AArch64ISelDAGToDAG.cpp source code [llvm_projects/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp]

1	//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines an instruction selector for the AArch64 target.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "AArch64MachineFunctionInfo.h"
14	#include "AArch64TargetMachine.h"
15	#include "MCTargetDesc/AArch64AddressingModes.h"
16	#include "llvm/ADT/APSInt.h"
17	#include "llvm/CodeGen/ISDOpcodes.h"
18	#include "llvm/CodeGen/SelectionDAGISel.h"
19	#include "llvm/IR/Function.h" // To access function attributes.
20	#include "llvm/IR/GlobalValue.h"
21	#include "llvm/IR/Intrinsics.h"
22	#include "llvm/IR/IntrinsicsAArch64.h"
23	#include "llvm/Support/Debug.h"
24	#include "llvm/Support/ErrorHandling.h"
25	#include "llvm/Support/KnownBits.h"
26	#include "llvm/Support/MathExtras.h"
27	#include "llvm/Support/raw_ostream.h"
28
29	using namespace llvm;
30
31	#define DEBUG_TYPE "aarch64-isel"
32	#define PASS_NAME "AArch64 Instruction Selection"
33
34	//===--------------------------------------------------------------------===//
35	/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
36	/// instructions for SelectionDAG operations.
37	///
38	namespace {
39
40	class AArch64DAGToDAGISel : public SelectionDAGISel {
41
42	/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
43	/// make the right decision when generating code for different targets.
44	const AArch64Subtarget *Subtarget;
45
46	public:
47	AArch64DAGToDAGISel() = delete;
48
49	explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
50	CodeGenOptLevel OptLevel)
51	: SelectionDAGISel (tm, OptLevel), Subtarget(nullptr) {}
52
53	bool runOnMachineFunction(MachineFunction &MF) override {
54	Subtarget = &MF.getSubtarget<AArch64Subtarget>();
55	return SelectionDAGISel::runOnMachineFunction(mf&: MF);
56	}
57
58	void Select(SDNode *Node) override;
59
60	/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
61	/// inline asm expressions.
62	bool SelectInlineAsmMemoryOperand(const SDValue &Op,
63	InlineAsm::ConstraintCode ConstraintID,
64	std::vector<SDValue> &OutOps) override;
65
66	template <signed Low, signed High, signed Scale>
67	bool SelectRDVLImm(SDValue N, SDValue &Imm);
68
69	bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
70	bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
71	bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
72	bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73	bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
74	return SelectShiftedRegister(N, AllowROR: false, Reg, Shift);
75	}
76	bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77	return SelectShiftedRegister(N, AllowROR: true, Reg, Shift);
78	}
79	bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
80	return SelectAddrModeIndexed7S(N, Size: `1`, Base, OffImm);
81	}
82	bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
83	return SelectAddrModeIndexed7S(N, Size: `2`, Base, OffImm);
84	}
85	bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
86	return SelectAddrModeIndexed7S(N, Size: `4`, Base, OffImm);
87	}
88	bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
89	return SelectAddrModeIndexed7S(N, Size: `8`, Base, OffImm);
90	}
91	bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
92	return SelectAddrModeIndexed7S(N, Size: `16`, Base, OffImm);
93	}
94	bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
95	return SelectAddrModeIndexedBitWidth(N, IsSignedImm: true, BW: `9`, Size: `16`, Base, OffImm);
96	}
97	bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
98	return SelectAddrModeIndexedBitWidth(N, IsSignedImm: false, BW: `6`, Size: `16`, Base, OffImm);
99	}
100	bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
101	return SelectAddrModeIndexed(N, Size: `1`, Base, OffImm);
102	}
103	bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
104	return SelectAddrModeIndexed(N, Size: `2`, Base, OffImm);
105	}
106	bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
107	return SelectAddrModeIndexed(N, Size: `4`, Base, OffImm);
108	}
109	bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
110	return SelectAddrModeIndexed(N, Size: `8`, Base, OffImm);
111	}
112	bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
113	return SelectAddrModeIndexed(N, Size: `16`, Base, OffImm);
114	}
115	bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
116	return SelectAddrModeUnscaled(N, Size: `1`, Base, OffImm);
117	}
118	bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
119	return SelectAddrModeUnscaled(N, Size: `2`, Base, OffImm);
120	}
121	bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
122	return SelectAddrModeUnscaled(N, Size: `4`, Base, OffImm);
123	}
124	bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
125	return SelectAddrModeUnscaled(N, Size: `8`, Base, OffImm);
126	}
127	bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
128	return SelectAddrModeUnscaled(N, Size: `16`, Base, OffImm);
129	}
130	template <unsigned Size, unsigned Max>
131	bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
132	// Test if there is an appropriate addressing mode and check if the
133	// immediate fits.
134	bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
135	if (Found) {
136	if (auto *CI = dyn_cast<ConstantSDNode>(Val&: OffImm)) {
137	int64_t C = CI->getSExtValue();
138	if (C <= Max)
139	return true;
140	}
141	}
142
143	// Otherwise, base only, materialize address in register.
144	Base = N;
145	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i64);
146	return true;
147	}
148
149	template<int Width>
150	bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
151	SDValue &SignExtend, SDValue &DoShift) {
152	return SelectAddrModeWRO(N, Size: Width / `8`, Base, Offset, SignExtend, DoShift);
153	}
154
155	template<int Width>
156	bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
157	SDValue &SignExtend, SDValue &DoShift) {
158	return SelectAddrModeXRO(N, Size: Width / `8`, Base, Offset, SignExtend, DoShift);
159	}
160
161	bool SelectExtractHigh(SDValue N, SDValue &Res) {
162	if (Subtarget->isLittleEndian() && N ->getOpcode() == ISD::BITCAST)
163	N = N ->getOperand(Num: `0`);
164	if (N ->getOpcode() != ISD::EXTRACT_SUBVECTOR \|\|
165	!isa<ConstantSDNode>(Val: N ->getOperand(Num: `1`)))
166	return false;
167	EVT VT = N ->getValueType(ResNo: `0`);
168	EVT LVT = N ->getOperand(Num: `0`).getValueType();
169	unsigned Index = N ->getConstantOperandVal(Num: `1`);
170	if (!VT.is64BitVector() \|\| !LVT.is128BitVector() \|\|
171	Index != VT.getVectorNumElements())
172	return false;
173	Res = N ->getOperand(Num: `0`);
174	return true;
175	}
176
177	bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
178	if (N.getOpcode() != AArch64ISD::VLSHR)
179	return false;
180	SDValue Op = N ->getOperand(Num: `0`);
181	EVT VT = Op.getValueType();
182	unsigned ShtAmt = N ->getConstantOperandVal(Num: `1`);
183	if (ShtAmt > VT.getScalarSizeInBits() / `2` \|\| Op.getOpcode() != ISD::ADD)
184	return false;
185
186	APInt Imm;
187	if (Op.getOperand(i: `1`).getOpcode() == AArch64ISD::MOVIshift)
188	Imm = APInt (VT.getScalarSizeInBits(),
189	Op.getOperand(i: `1`).getConstantOperandVal(i: `0`)
190	<< Op.getOperand(i: `1`).getConstantOperandVal(i: `1`));
191	else if (Op.getOperand(i: `1`).getOpcode() == AArch64ISD::DUP &&
192	isa<ConstantSDNode>(Val: Op.getOperand(i: `1`).getOperand(i: `0`)))
193	Imm = APInt (VT.getScalarSizeInBits(),
194	Op.getOperand(i: `1`).getConstantOperandVal(i: `0`));
195	else
196	return false;
197
198	if (Imm != `1ULL` << (ShtAmt - `1`))
199	return false;
200
201	Res1 = Op.getOperand(i: `0`);
202	Res2 = CurDAG->getTargetConstant(Val: ShtAmt, DL: SDLoc (N), VT: MVT::i32);
203	return true;
204	}
205
206	bool SelectDupZeroOrUndef(SDValue N) {
207	switch(N ->getOpcode()) {
208	case ISD::UNDEF:
209	return true;
210	case AArch64ISD::DUP:
211	case ISD::SPLAT_VECTOR: {
212	auto Opnd0 = N ->getOperand(Num: `0`);
213	if (isNullConstant(V: Opnd0))
214	return true;
215	if (isNullFPConstant(V: Opnd0))
216	return true;
217	break;
218	}
219	default:
220	break;
221	}
222
223	return false;
224	}
225
226	bool SelectDupZero(SDValue N) {
227	switch(N ->getOpcode()) {
228	case AArch64ISD::DUP:
229	case ISD::SPLAT_VECTOR: {
230	auto Opnd0 = N ->getOperand(Num: `0`);
231	if (isNullConstant(V: Opnd0))
232	return true;
233	if (isNullFPConstant(V: Opnd0))
234	return true;
235	break;
236	}
237	}
238
239	return false;
240	}
241
242	bool SelectDupNegativeZero(SDValue N) {
243	switch(N ->getOpcode()) {
244	case AArch64ISD::DUP:
245	case ISD::SPLAT_VECTOR: {
246	ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(Val: N ->getOperand(Num: `0`));
247	return Const && Const->isZero() && Const->isNegative();
248	}
249	}
250
251	return false;
252	}
253
254	template<MVT::SimpleValueType VT>
255	bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
256	return SelectSVEAddSubImm(N, VT, Imm, Shift);
257	}
258
259	template <MVT::SimpleValueType VT, bool Negate>
260	bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
261	return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
262	}
263
264	template <MVT::SimpleValueType VT>
265	bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
266	return SelectSVECpyDupImm(N, VT, Imm, Shift);
267	}
268
269	template <MVT::SimpleValueType VT, bool Invert = false>
270	bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
271	return SelectSVELogicalImm(N, VT, Imm, Invert);
272	}
273
274	template <MVT::SimpleValueType VT>
275	bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
276	return SelectSVEArithImm(N, VT, Imm);
277	}
278
279	template <unsigned Low, unsigned High, bool AllowSaturation = false>
280	bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
281	return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
282	}
283
284	bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
285	if (N ->getOpcode() != ISD::SPLAT_VECTOR)
286	return false;
287
288	EVT EltVT = N ->getValueType(ResNo: `0`).getVectorElementType();
289	return SelectSVEShiftImm(N: N ->getOperand(Num: `0`), / Low / `1`,
290	/ High / EltVT.getFixedSizeInBits(),
291	/ AllowSaturation / true, Imm);
292	}
293
294	// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALEN.*
295	template<signed Min, signed Max, signed Scale, bool Shift>
296	bool SelectCntImm(SDValue N, SDValue &Imm) {
297	if (!isa<ConstantSDNode>(Val: N))
298	return false;
299
300	int64_t MulImm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
301	if (Shift)
302	MulImm = `1LL` << MulImm;
303
304	if ((MulImm % std::abs(x: Scale)) != `0`)
305	return false;
306
307	MulImm /= Scale;
308	if ((MulImm >= Min) && (MulImm <= Max)) {
309	Imm = CurDAG->getTargetConstant(Val: MulImm, DL: SDLoc (N), VT: MVT::i32);
310	return true;
311	}
312
313	return false;
314	}
315
316	template <signed Max, signed Scale>
317	bool SelectEXTImm(SDValue N, SDValue &Imm) {
318	if (!isa<ConstantSDNode>(Val: N))
319	return false;
320
321	int64_t MulImm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
322
323	if (MulImm >= `0` && MulImm <= Max) {
324	MulImm *= Scale;
325	Imm = CurDAG->getTargetConstant(Val: MulImm, DL: SDLoc (N), VT: MVT::i32);
326	return true;
327	}
328
329	return false;
330	}
331
332	template <unsigned BaseReg, unsigned Max>
333	bool ImmToReg(SDValue N, SDValue &Imm) {
334	if (auto *CI = dyn_cast<ConstantSDNode>(Val&: N)) {
335	uint64_t C = CI->getZExtValue();
336
337	if (C > Max)
338	return false;
339
340	Imm = CurDAG->getRegister(Reg: BaseReg + C, VT: MVT::Other);
341	return true;
342	}
343	return false;
344	}
345
346	/// Form sequences of consecutive 64/128-bit registers for use in NEON
347	/// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
348	/// between 1 and 4 elements. If it contains a single element that is returned
349	/// unchanged; otherwise a REG_SEQUENCE value is returned.
350	SDValue createDTuple(ArrayRef<SDValue> Vecs);
351	SDValue createQTuple(ArrayRef<SDValue> Vecs);
352	// Form a sequence of SVE registers for instructions using list of vectors,
353	// e.g. structured loads and stores (ldN, stN).
354	SDValue createZTuple(ArrayRef<SDValue> Vecs);
355
356	// Similar to above, except the register must start at a multiple of the
357	// tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
358	SDValue createZMulTuple(ArrayRef<SDValue> Regs);
359
360	/// Generic helper for the createDTuple/createQTuple
361	/// functions. Those should almost always be called instead.
362	SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
363	const unsigned SubRegs[]);
364
365	void SelectTable(SDNode N, unsigned* NumVecs, unsigned Opc, bool isExt);
366
367	bool tryIndexedLoad(SDNode *N);
368
369	void SelectPtrauthAuth(SDNode *N);
370	void SelectPtrauthResign(SDNode *N);
371
372	bool trySelectStackSlotTagP(SDNode *N);
373	void SelectTagP(SDNode *N);
374
375	void SelectLoad(SDNode N, unsigned* NumVecs, unsigned Opc,
376	unsigned SubRegIdx);
377	void SelectPostLoad(SDNode N, unsigned* NumVecs, unsigned Opc,
378	unsigned SubRegIdx);
379	void SelectLoadLane(SDNode N, unsigned* NumVecs, unsigned Opc);
380	void SelectPostLoadLane(SDNode N, unsigned* NumVecs, unsigned Opc);
381	void SelectPredicatedLoad(SDNode N, unsigned* NumVecs, unsigned Scale,
382	unsigned Opc_rr, unsigned Opc_ri,
383	bool IsIntr = false);
384	void SelectContiguousMultiVectorLoad(SDNode N, unsigned* NumVecs,
385	unsigned Scale, unsigned Opc_ri,
386	unsigned Opc_rr);
387	void SelectDestructiveMultiIntrinsic(SDNode N, unsigned* NumVecs,
388	bool IsZmMulti, unsigned Opcode,
389	bool HasPred = false);
390	void SelectPExtPair(SDNode N, unsigned* Opc);
391	void SelectWhilePair(SDNode N, unsigned* Opc);
392	void SelectCVTIntrinsic(SDNode N, unsigned* NumVecs, unsigned Opcode);
393	void SelectClamp(SDNode N, unsigned* NumVecs, unsigned Opcode);
394	void SelectUnaryMultiIntrinsic(SDNode N, unsigned* NumOutVecs,
395	bool IsTupleInput, unsigned Opc);
396	void SelectFrintFromVT(SDNode N, unsigned* NumVecs, unsigned Opcode);
397
398	template <unsigned MaxIdx, unsigned Scale>
399	void SelectMultiVectorMove(SDNode N, unsigned* NumVecs, unsigned BaseReg,
400	unsigned Op);
401	void SelectMultiVectorMoveZ(SDNode N, unsigned* NumVecs,
402	unsigned Op, unsigned MaxIdx, unsigned Scale,
403	unsigned BaseReg = `0`);
404	bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
405	/// SVE Reg+Imm addressing mode.
406	template <int64_t Min, int64_t Max>
407	bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
408	SDValue &OffImm);
409	/// SVE Reg+Reg address mode.
410	template <unsigned Scale>
411	bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
412	return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
413	}
414
415	void SelectMultiVectorLuti(SDNode Node, unsigned* NumOutVecs, unsigned Opc,
416	uint32_t MaxImm);
417
418	template <unsigned MaxIdx, unsigned Scale>
419	bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
420	return SelectSMETileSlice(N, MaxSize: MaxIdx, Vector, Offset, Scale);
421	}
422
423	void SelectStore(SDNode N, unsigned* NumVecs, unsigned Opc);
424	void SelectPostStore(SDNode N, unsigned* NumVecs, unsigned Opc);
425	void SelectStoreLane(SDNode N, unsigned* NumVecs, unsigned Opc);
426	void SelectPostStoreLane(SDNode N, unsigned* NumVecs, unsigned Opc);
427	void SelectPredicatedStore(SDNode N, unsigned* NumVecs, unsigned Scale,
428	unsigned Opc_rr, unsigned Opc_ri);
429	std::tuple<unsigned, SDValue, SDValue>
430	findAddrModeSVELoadStore(SDNode N, unsigned* Opc_rr, unsigned Opc_ri,
431	const SDValue &OldBase, const SDValue &OldOffset,
432	unsigned Scale);
433
434	bool tryBitfieldExtractOp(SDNode *N);
435	bool tryBitfieldExtractOpFromSExt(SDNode *N);
436	bool tryBitfieldInsertOp(SDNode *N);
437	bool tryBitfieldInsertInZeroOp(SDNode *N);
438	bool tryShiftAmountMod(SDNode *N);
439
440	bool tryReadRegister(SDNode *N);
441	bool tryWriteRegister(SDNode *N);
442
443	bool trySelectCastFixedLengthToScalableVector(SDNode *N);
444	bool trySelectCastScalableToFixedLengthVector(SDNode *N);
445
446	bool trySelectXAR(SDNode *N);
447
448	// Include the pieces autogenerated from the target description.
449	#include "AArch64GenDAGISel.inc"
450
451	private:
452	bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
453	SDValue &Shift);
454	bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
455	bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
456	SDValue &OffImm) {
457	return SelectAddrModeIndexedBitWidth(N, IsSignedImm: true, BW: `7`, Size, Base, OffImm);
458	}
459	bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
460	unsigned Size, SDValue &Base,
461	SDValue &OffImm);
462	bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
463	SDValue &OffImm);
464	bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
465	SDValue &OffImm);
466	bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
467	SDValue &Offset, SDValue &SignExtend,
468	SDValue &DoShift);
469	bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
470	SDValue &Offset, SDValue &SignExtend,
471	SDValue &DoShift);
472	bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
473	bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
474	bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
475	SDValue &Offset, SDValue &SignExtend);
476
477	template<unsigned RegWidth>
478	bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
479	return SelectCVTFixedPosOperand(N, FixedPos, Width: RegWidth);
480	}
481
482	bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
483
484	template<unsigned RegWidth>
485	bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
486	return SelectCVTFixedPosRecipOperand(N, FixedPos, Width: RegWidth);
487	}
488
489	bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
490	unsigned Width);
491
492	bool SelectCMP_SWAP(SDNode *N);
493
494	bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
495	bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
496	bool Negate);
497	bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
498	bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
499
500	bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
501	bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
502	bool AllowSaturation, SDValue &Imm);
503
504	bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
505	bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
506	SDValue &Offset);
507	bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
508	SDValue &Offset, unsigned Scale = `1`);
509
510	bool SelectAllActivePredicate(SDValue N);
511	bool SelectAnyPredicate(SDValue N);
512	};
513
514	class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
515	public:
516	static char ID;
517	explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
518	CodeGenOptLevel OptLevel)
519	: SelectionDAGISelLegacy (
520	ID, std::make_unique<AArch64DAGToDAGISel>(args&: tm, args&: OptLevel)) {}
521	};
522	} // end anonymous namespace
523
524	char AArch64DAGToDAGISelLegacy::ID = `0`;
525
526	INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
527
528	/// isIntImmediate - This method tests to see if the node is a constant
529	/// operand. If so Imm will receive the 32-bit value.
530	static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
531	if (const ConstantSDNode C = dyn_cast<const* ConstantSDNode>(Val: N)) {
532	Imm = C->getZExtValue();
533	return true;
534	}
535	return false;
536	}
537
538	// isIntImmediate - This method tests to see if a constant operand.
539	// If so Imm will receive the value.
540	static bool isIntImmediate(SDValue N, uint64_t &Imm) {
541	return isIntImmediate(N: N.getNode(), Imm);
542	}
543
544	// isOpcWithIntImmediate - This method tests to see if the node is a specific
545	// opcode and that it has a immediate integer right operand.
546	// If so Imm will receive the 32 bit value.
547	static bool isOpcWithIntImmediate(const SDNode N, unsigned* Opc,
548	uint64_t &Imm) {
549	return N->getOpcode() == Opc &&
550	isIntImmediate(N: N->getOperand(Num: `1`).getNode(), Imm);
551	}
552
553	// isIntImmediateEq - This method tests to see if N is a constant operand that
554	// is equivalent to 'ImmExpected'.
555	#ifndef NDEBUG
556	static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
557	uint64_t Imm;
558	if (!isIntImmediate(N.getNode(), Imm))
559	return false;
560	return Imm == ImmExpected;
561	}
562	#endif
563
564	bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
565	const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
566	std::vector<SDValue> &OutOps) {
567	switch(ConstraintID) {
568	default:
569	llvm_unreachable("Unexpected asm memory constraint");
570	case InlineAsm::ConstraintCode::m:
571	case InlineAsm::ConstraintCode::o:
572	case InlineAsm::ConstraintCode::Q:
573	// We need to make sure that this one operand does not end up in XZR, thus
574	// require the address to be in a PointerRegClass register.
575	const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
576	const TargetRegisterClass TRC = TRI->getPointerRegClass(MF: MF);
577	SDLoc dl(Op);
578	SDValue RC = CurDAG->getTargetConstant(Val: TRC->getID(), DL: dl, VT: MVT::i64);
579	SDValue NewOp =
580	SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS,
581	dl, VT: Op.getValueType(),
582	Op1: Op, Op2: RC), `0`);
583	OutOps.push_back(x: NewOp);
584	return false;
585	}
586	return true;
587	}
588
589	/// SelectArithImmed - Select an immediate value that can be represented as
590	/// a 12-bit value shifted left by either 0 or 12. If so, return true with
591	/// Val set to the 12-bit value and Shift set to the shifter operand.
592	bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
593	SDValue &Shift) {
594	// This function is called from the addsub_shifted_imm ComplexPattern,
595	// which lists [imm] as the list of opcode it's interested in, however
596	// we still need to check whether the operand is actually an immediate
597	// here because the ComplexPattern opcode list is only used in
598	// root-level opcode matching.
599	if (!isa<ConstantSDNode>(Val: N.getNode()))
600	return false;
601
602	uint64_t Immed = N.getNode()->getAsZExtVal();
603	unsigned ShiftAmt;
604
605	if (Immed >> `12` == `0`) {
606	ShiftAmt = `0`;
607	} else if ((Immed & `0xfff`) == `0` && Immed >> `24` == `0`) {
608	ShiftAmt = `12`;
609	Immed = Immed >> `12`;
610	} else
611	return false;
612
613	unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt);
614	SDLoc dl(N);
615	Val = CurDAG->getTargetConstant(Val: Immed, DL: dl, VT: MVT::i32);
616	Shift = CurDAG->getTargetConstant(Val: ShVal, DL: dl, VT: MVT::i32);
617	return true;
618	}
619
620	/// SelectNegArithImmed - As above, but negates the value before trying to
621	/// select it.
622	bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
623	SDValue &Shift) {
624	// This function is called from the addsub_shifted_imm ComplexPattern,
625	// which lists [imm] as the list of opcode it's interested in, however
626	// we still need to check whether the operand is actually an immediate
627	// here because the ComplexPattern opcode list is only used in
628	// root-level opcode matching.
629	if (!isa<ConstantSDNode>(Val: N.getNode()))
630	return false;
631
632	// The immediate operand must be a 24-bit zero-extended immediate.
633	uint64_t Immed = N.getNode()->getAsZExtVal();
634
635	// This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
636	// have the opposite effect on the C flag, so this pattern mustn't match under
637	// those circumstances.
638	if (Immed == `0`)
639	return false;
640
641	if (N.getValueType() == MVT::i32)
642	Immed = ~((uint32_t)Immed) + `1`;
643	else
644	Immed = ~Immed + `1ULL`;
645	if (Immed & `0xFFFFFFFFFF000000ULL`)
646	return false;
647
648	Immed &= `0xFFFFFFULL`;
649	return SelectArithImmed(N: CurDAG->getConstant(Val: Immed, DL: SDLoc (N), VT: MVT::i32), Val,
650	Shift);
651	}
652
653	/// getShiftTypeForNode - Translate a shift node to the corresponding
654	/// ShiftType value.
655	static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
656	switch (N.getOpcode()) {
657	default:
658	return AArch64_AM::InvalidShiftExtend;
659	case ISD::SHL:
660	return AArch64_AM::LSL;
661	case ISD::SRL:
662	return AArch64_AM::LSR;
663	case ISD::SRA:
664	return AArch64_AM::ASR;
665	case ISD::ROTR:
666	return AArch64_AM::ROR;
667	}
668	}
669
670	/// Determine whether it is worth it to fold SHL into the addressing
671	/// mode.
672	static bool isWorthFoldingSHL(SDValue V) {
673	assert(V.getOpcode() == ISD::SHL && "invalid opcode");
674	// It is worth folding logical shift of up to three places.
675	auto *CSD = dyn_cast<ConstantSDNode>(Val: V.getOperand(i: `1`));
676	if (!CSD)
677	return false;
678	unsigned ShiftVal = CSD->getZExtValue();
679	if (ShiftVal > `3`)
680	return false;
681
682	// Check if this particular node is reused in any non-memory related
683	// operation. If yes, do not try to fold this node into the address
684	// computation, since the computation will be kept.
685	const SDNode *Node = V.getNode();
686	for (SDNode *UI : Node->uses())
687	if (!isa<MemSDNode>(Val: *UI))
688	for (SDNode *UII : UI->uses())
689	if (!isa<MemSDNode>(Val: *UII))
690	return false;
691	return true;
692	}
693
694	/// Determine whether it is worth to fold V into an extended register addressing
695	/// mode.
696	bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
697	// Trivial if we are optimizing for code size or if there is only
698	// one use of the value.
699	if (CurDAG->shouldOptForSize() \|\| V.hasOneUse())
700	return true;
701
702	// If a subtarget has a slow shift, folding a shift into multiple loads
703	// costs additional micro-ops.
704	if (Subtarget->hasAddrLSLSlow14() && (Size == `2` \|\| Size == `16`))
705	return false;
706
707	// Check whether we're going to emit the address arithmetic anyway because
708	// it's used by a non-address operation.
709	if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
710	return true;
711	if (V.getOpcode() == ISD::ADD) {
712	const SDValue LHS = V.getOperand(i: `0`);
713	const SDValue RHS = V.getOperand(i: `1`);
714	if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(V: LHS))
715	return true;
716	if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(V: RHS))
717	return true;
718	}
719
720	// It hurts otherwise, since the value will be reused.
721	return false;
722	}
723
724	/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
725	/// to select more shifted register
726	bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
727	SDValue &Shift) {
728	EVT VT = N.getValueType();
729	if (VT != MVT::i32 && VT != MVT::i64)
730	return false;
731
732	if (N ->getOpcode() != ISD::AND \|\| !N ->hasOneUse())
733	return false;
734	SDValue LHS = N.getOperand(i: `0`);
735	if (!LHS ->hasOneUse())
736	return false;
737
738	unsigned LHSOpcode = LHS ->getOpcode();
739	if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
740	return false;
741
742	ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(Val: LHS.getOperand(i: `1`));
743	if (!ShiftAmtNode)
744	return false;
745
746	uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
747	ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
748	if (!RHSC)
749	return false;
750
751	APInt AndMask = RHSC->getAPIntValue();
752	unsigned LowZBits, MaskLen;
753	if (!AndMask.isShiftedMask(MaskIdx&: LowZBits, MaskLen))
754	return false;
755
756	unsigned BitWidth = N.getValueSizeInBits();
757	SDLoc DL(LHS);
758	uint64_t NewShiftC;
759	unsigned NewShiftOp;
760	if (LHSOpcode == ISD::SHL) {
761	// LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
762	// BitWidth != LowZBits + MaskLen doesn't match the pattern
763	if (LowZBits <= ShiftAmtC \|\| (BitWidth != LowZBits + MaskLen))
764	return false;
765
766	NewShiftC = LowZBits - ShiftAmtC;
767	NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
768	} else {
769	if (LowZBits == `0`)
770	return false;
771
772	// NewShiftC >= BitWidth will fall into isBitfieldExtractOp
773	NewShiftC = LowZBits + ShiftAmtC;
774	if (NewShiftC >= BitWidth)
775	return false;
776
777	// SRA need all high bits
778	if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
779	return false;
780
781	// SRL high bits can be 0 or 1
782	if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
783	return false;
784
785	if (LHSOpcode == ISD::SRL)
786	NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
787	else
788	NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
789	}
790
791	assert(NewShiftC < BitWidth && "Invalid shift amount");
792	SDValue NewShiftAmt = CurDAG->getTargetConstant(Val: NewShiftC, DL, VT);
793	SDValue BitWidthMinus1 = CurDAG->getTargetConstant(Val: BitWidth - `1`, DL, VT);
794	Reg = SDValue (CurDAG->getMachineNode(Opcode: NewShiftOp, dl: DL, VT, Op1: LHS ->getOperand(Num: `0`),
795	Op2: NewShiftAmt, Op3: BitWidthMinus1),
796	`0`);
797	unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: LowZBits);
798	Shift = CurDAG->getTargetConstant(Val: ShVal, DL, VT: MVT::i32);
799	return true;
800	}
801
802	/// getExtendTypeForNode - Translate an extend node to the corresponding
803	/// ExtendType value.
804	static AArch64_AM::ShiftExtendType
805	getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
806	if (N.getOpcode() == ISD::SIGN_EXTEND \|\|
807	N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
808	EVT SrcVT;
809	if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
810	SrcVT = cast<VTSDNode>(Val: N.getOperand(i: `1`))->getVT();
811	else
812	SrcVT = N.getOperand(i: `0`).getValueType();
813
814	if (!IsLoadStore && SrcVT == MVT::i8)
815	return AArch64_AM::SXTB;
816	else if (!IsLoadStore && SrcVT == MVT::i16)
817	return AArch64_AM::SXTH;
818	else if (SrcVT == MVT::i32)
819	return AArch64_AM::SXTW;
820	assert(SrcVT != MVT::i64 && "extend from 64-bits?");
821
822	return AArch64_AM::InvalidShiftExtend;
823	} else if (N.getOpcode() == ISD::ZERO_EXTEND \|\|
824	N.getOpcode() == ISD::ANY_EXTEND) {
825	EVT SrcVT = N.getOperand(i: `0`).getValueType();
826	if (!IsLoadStore && SrcVT == MVT::i8)
827	return AArch64_AM::UXTB;
828	else if (!IsLoadStore && SrcVT == MVT::i16)
829	return AArch64_AM::UXTH;
830	else if (SrcVT == MVT::i32)
831	return AArch64_AM::UXTW;
832	assert(SrcVT != MVT::i64 && "extend from 64-bits?");
833
834	return AArch64_AM::InvalidShiftExtend;
835	} else if (N.getOpcode() == ISD::AND) {
836	ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
837	if (!CSD)
838	return AArch64_AM::InvalidShiftExtend;
839	uint64_t AndMask = CSD->getZExtValue();
840
841	switch (AndMask) {
842	default:
843	return AArch64_AM::InvalidShiftExtend;
844	case `0xFF`:
845	return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
846	case `0xFFFF`:
847	return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
848	case `0xFFFFFFFF`:
849	return AArch64_AM::UXTW;
850	}
851	}
852
853	return AArch64_AM::InvalidShiftExtend;
854	}
855
856	/// Determine whether it is worth to fold V into an extended register of an
857	/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
858	/// instruction, and the shift should be treated as worth folding even if has
859	/// multiple uses.
860	bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
861	// Trivial if we are optimizing for code size or if there is only
862	// one use of the value.
863	if (CurDAG->shouldOptForSize() \|\| V.hasOneUse())
864	return true;
865
866	// If a subtarget has a fastpath LSL we can fold a logical shift into
867	// the add/sub and save a cycle.
868	if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
869	V.getConstantOperandVal(i: `1`) <= `4` &&
870	getExtendTypeForNode(N: V.getOperand(i: `0`)) == AArch64_AM::InvalidShiftExtend)
871	return true;
872
873	// It hurts otherwise, since the value will be reused.
874	return false;
875	}
876
877	/// SelectShiftedRegister - Select a "shifted register" operand. If the value
878	/// is not shifted, set the Shift operand to default of "LSL 0". The logical
879	/// instructions allow the shifted register to be rotated, but the arithmetic
880	/// instructions do not. The AllowROR parameter specifies whether ROR is
881	/// supported.
882	bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
883	SDValue &Reg, SDValue &Shift) {
884	if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
885	return true;
886
887	AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
888	if (ShType == AArch64_AM::InvalidShiftExtend)
889	return false;
890	if (!AllowROR && ShType == AArch64_AM::ROR)
891	return false;
892
893	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
894	unsigned BitSize = N.getValueSizeInBits();
895	unsigned Val = RHS->getZExtValue() & (BitSize - `1`);
896	unsigned ShVal = AArch64_AM::getShifterImm(ST: ShType, Imm: Val);
897
898	Reg = N.getOperand(i: `0`);
899	Shift = CurDAG->getTargetConstant(Val: ShVal, DL: SDLoc (N), VT: MVT::i32);
900	return isWorthFoldingALU(V: N, LSL: true);
901	}
902
903	return false;
904	}
905
906	/// Instructions that accept extend modifiers like UXTW expect the register
907	/// being extended to be a GPR32, but the incoming DAG might be acting on a
908	/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
909	/// this is the case.
910	static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
911	if (N.getValueType() == MVT::i32)
912	return N;
913
914	SDLoc dl(N);
915	return CurDAG->getTargetExtractSubreg(SRIdx: AArch64::sub_32, DL: dl, VT: MVT::i32, Operand: N);
916	}
917
918	// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALEN.*
919	template<signed Low, signed High, signed Scale>
920	bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
921	if (!isa<ConstantSDNode>(Val: N))
922	return false;
923
924	int64_t MulImm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
925	if ((MulImm % std::abs(x: Scale)) == `0`) {
926	int64_t RDVLImm = MulImm / Scale;
927	if ((RDVLImm >= Low) && (RDVLImm <= High)) {
928	Imm = CurDAG->getTargetConstant(Val: RDVLImm, DL: SDLoc (N), VT: MVT::i32);
929	return true;
930	}
931	}
932
933	return false;
934	}
935
936	/// SelectArithExtendedRegister - Select a "extended register" operand. This
937	/// operand folds in an extend followed by an optional left shift.
938	bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
939	SDValue &Shift) {
940	unsigned ShiftVal = `0`;
941	AArch64_AM::ShiftExtendType Ext;
942
943	if (N.getOpcode() == ISD::SHL) {
944	ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
945	if (!CSD)
946	return false;
947	ShiftVal = CSD->getZExtValue();
948	if (ShiftVal > `4`)
949	return false;
950
951	Ext = getExtendTypeForNode(N: N.getOperand(i: `0`));
952	if (Ext == AArch64_AM::InvalidShiftExtend)
953	return false;
954
955	Reg = N.getOperand(i: `0`).getOperand(i: `0`);
956	} else {
957	Ext = getExtendTypeForNode(N);
958	if (Ext == AArch64_AM::InvalidShiftExtend)
959	return false;
960
961	Reg = N.getOperand(i: `0`);
962
963	// Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
964	// isDef32 as a heuristic for when the operand is likely to be a 32bit def.
965	auto isDef32 = [](SDValue N) {
966	unsigned Opc = N.getOpcode();
967	return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
968	Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
969	Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
970	Opc != ISD::FREEZE;
971	};
972	if (Ext == AArch64_AM::UXTW && Reg ->getValueType(ResNo: `0`).getSizeInBits() == `32` &&
973	isDef32 (Reg))
974	return false;
975	}
976
977	// AArch64 mandates that the RHS of the operation must use the smallest
978	// register class that could contain the size being extended from. Thus,
979	// if we're folding a (sext i8), we need the RHS to be a GPR32, even though
980	// there might not be an actual 32-bit value in the program. We can
981	// (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
982	assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
983	Reg = narrowIfNeeded(CurDAG, N: Reg);
984	Shift = CurDAG->getTargetConstant(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal), DL: SDLoc (N),
985	VT: MVT::i32);
986	return isWorthFoldingALU(V: N);
987	}
988
989	/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
990	/// operand is refered by the instructions have SP operand
991	bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
992	SDValue &Shift) {
993	unsigned ShiftVal = `0`;
994	AArch64_AM::ShiftExtendType Ext;
995
996	if (N.getOpcode() != ISD::SHL)
997	return false;
998
999	ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
1000	if (!CSD)
1001	return false;
1002	ShiftVal = CSD->getZExtValue();
1003	if (ShiftVal > `4`)
1004	return false;
1005
1006	Ext = AArch64_AM::UXTX;
1007	Reg = N.getOperand(i: `0`);
1008	Shift = CurDAG->getTargetConstant(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal), DL: SDLoc (N),
1009	VT: MVT::i32);
1010	return isWorthFoldingALU(V: N);
1011	}
1012
1013	/// If there's a use of this ADDlow that's not itself a load/store then we'll
1014	/// need to create a real ADD instruction from it anyway and there's no point in
1015	/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1016	/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1017	/// leads to duplicated ADRP instructions.
1018	static bool isWorthFoldingADDlow(SDValue N) {
1019	for (auto *Use : N ->uses()) {
1020	if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
1021	Use->getOpcode() != ISD::ATOMIC_LOAD &&
1022	Use->getOpcode() != ISD::ATOMIC_STORE)
1023	return false;
1024
1025	// ldar and stlr have much more restrictive addressing modes (just a
1026	// register).
1027	if (isStrongerThanMonotonic(AO: cast<MemSDNode>(Val: Use)->getSuccessOrdering()))
1028	return false;
1029	}
1030
1031	return true;
1032	}
1033
1034	/// Check if the immediate offset is valid as a scaled immediate.
1035	static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1036	unsigned Size) {
1037	if ((Offset & (Size - `1`)) == `0` && Offset >= `0` &&
1038	Offset < (Range << Log2_32(Value: Size)))
1039	return true;
1040	return false;
1041	}
1042
1043	/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1044	/// immediate" address. The "Size" argument is the size in bytes of the memory
1045	/// reference, which determines the scale.
1046	bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1047	unsigned BW, unsigned Size,
1048	SDValue &Base,
1049	SDValue &OffImm) {
1050	SDLoc dl(N);
1051	const DataLayout &DL = CurDAG->getDataLayout();
1052	const TargetLowering *TLI = getTargetLowering();
1053	if (N.getOpcode() == ISD::FrameIndex) {
1054	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1055	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1056	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i64);
1057	return true;
1058	}
1059
1060	// As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1061	// selected here doesn't support labels/immediates, only base+offset.
1062	if (CurDAG->isBaseWithConstantOffset(Op: N)) {
1063	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1064	if (IsSignedImm) {
1065	int64_t RHSC = RHS->getSExtValue();
1066	unsigned Scale = Log2_32(Value: Size);
1067	int64_t Range = `0x1LL` << (BW - `1`);
1068
1069	if ((RHSC & (Size - `1`)) == `0` && RHSC >= -(Range << Scale) &&
1070	RHSC < (Range << Scale)) {
1071	Base = N.getOperand(i: `0`);
1072	if (Base.getOpcode() == ISD::FrameIndex) {
1073	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1074	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1075	}
1076	OffImm = CurDAG->getTargetConstant(Val: RHSC >> Scale, DL: dl, VT: MVT::i64);
1077	return true;
1078	}
1079	} else {
1080	// unsigned Immediate
1081	uint64_t RHSC = RHS->getZExtValue();
1082	unsigned Scale = Log2_32(Value: Size);
1083	uint64_t Range = `0x1ULL` << BW;
1084
1085	if ((RHSC & (Size - `1`)) == `0` && RHSC < (Range << Scale)) {
1086	Base = N.getOperand(i: `0`);
1087	if (Base.getOpcode() == ISD::FrameIndex) {
1088	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1089	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1090	}
1091	OffImm = CurDAG->getTargetConstant(Val: RHSC >> Scale, DL: dl, VT: MVT::i64);
1092	return true;
1093	}
1094	}
1095	}
1096	}
1097	// Base only. The address will be materialized into a register before
1098	// the memory is accessed.
1099	// add x0, Xbase, #offset
1100	// stp x1, x2, [x0]
1101	Base = N;
1102	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i64);
1103	return true;
1104	}
1105
1106	/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1107	/// immediate" address. The "Size" argument is the size in bytes of the memory
1108	/// reference, which determines the scale.
1109	bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1110	SDValue &Base, SDValue &OffImm) {
1111	SDLoc dl(N);
1112	const DataLayout &DL = CurDAG->getDataLayout();
1113	const TargetLowering *TLI = getTargetLowering();
1114	if (N.getOpcode() == ISD::FrameIndex) {
1115	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1116	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1117	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i64);
1118	return true;
1119	}
1120
1121	if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1122	GlobalAddressSDNode *GAN =
1123	dyn_cast<GlobalAddressSDNode>(Val: N.getOperand(i: `1`).getNode());
1124	Base = N.getOperand(i: `0`);
1125	OffImm = N.getOperand(i: `1`);
1126	if (!GAN)
1127	return true;
1128
1129	if (GAN->getOffset() % Size == `0` &&
1130	GAN->getGlobal()->getPointerAlignment(DL) >= Size)
1131	return true;
1132	}
1133
1134	if (CurDAG->isBaseWithConstantOffset(Op: N)) {
1135	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1136	int64_t RHSC = (int64_t)RHS->getZExtValue();
1137	unsigned Scale = Log2_32(Value: Size);
1138	if (isValidAsScaledImmediate(Offset: RHSC, Range: `0x1000`, Size)) {
1139	Base = N.getOperand(i: `0`);
1140	if (Base.getOpcode() == ISD::FrameIndex) {
1141	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1142	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1143	}
1144	OffImm = CurDAG->getTargetConstant(Val: RHSC >> Scale, DL: dl, VT: MVT::i64);
1145	return true;
1146	}
1147	}
1148	}
1149
1150	// Before falling back to our general case, check if the unscaled
1151	// instructions can handle this. If so, that's preferable.
1152	if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1153	return false;
1154
1155	// Base only. The address will be materialized into a register before
1156	// the memory is accessed.
1157	// add x0, Xbase, #offset
1158	// ldr x0, [x0]
1159	Base = N;
1160	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i64);
1161	return true;
1162	}
1163
1164	/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1165	/// immediate" address. This should only match when there is an offset that
1166	/// is not valid for a scaled immediate addressing mode. The "Size" argument
1167	/// is the size in bytes of the memory reference, which is needed here to know
1168	/// what is valid for a scaled immediate.
1169	bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1170	SDValue &Base,
1171	SDValue &OffImm) {
1172	if (!CurDAG->isBaseWithConstantOffset(Op: N))
1173	return false;
1174	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1175	int64_t RHSC = RHS->getSExtValue();
1176	if (RHSC >= -`256` && RHSC < `256`) {
1177	Base = N.getOperand(i: `0`);
1178	if (Base.getOpcode() == ISD::FrameIndex) {
1179	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1180	const TargetLowering *TLI = getTargetLowering();
1181	Base = CurDAG->getTargetFrameIndex(
1182	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1183	}
1184	OffImm = CurDAG->getTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i64);
1185	return true;
1186	}
1187	}
1188	return false;
1189	}
1190
1191	static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
1192	SDLoc dl(N);
1193	SDValue ImpDef = SDValue (
1194	CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: MVT::i64), `0`);
1195	return CurDAG->getTargetInsertSubreg(SRIdx: AArch64::sub_32, DL: dl, VT: MVT::i64, Operand: ImpDef,
1196	Subreg: N);
1197	}
1198
1199	/// Check if the given SHL node (\p N), can be used to form an
1200	/// extended register for an addressing mode.
1201	bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1202	bool WantExtend, SDValue &Offset,
1203	SDValue &SignExtend) {
1204	assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1205	ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
1206	if (!CSD \|\| (CSD->getZExtValue() & `0x7`) != CSD->getZExtValue())
1207	return false;
1208
1209	SDLoc dl(N);
1210	if (WantExtend) {
1211	AArch64_AM::ShiftExtendType Ext =
1212	getExtendTypeForNode(N: N.getOperand(i: `0`), IsLoadStore: true);
1213	if (Ext == AArch64_AM::InvalidShiftExtend)
1214	return false;
1215
1216	Offset = narrowIfNeeded(CurDAG, N: N.getOperand(i: `0`).getOperand(i: `0`));
1217	SignExtend = CurDAG->getTargetConstant(Val: Ext == AArch64_AM::SXTW, DL: dl,
1218	VT: MVT::i32);
1219	} else {
1220	Offset = N.getOperand(i: `0`);
1221	SignExtend = CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32);
1222	}
1223
1224	unsigned LegalShiftVal = Log2_32(Value: Size);
1225	unsigned ShiftVal = CSD->getZExtValue();
1226
1227	if (ShiftVal != `0` && ShiftVal != LegalShiftVal)
1228	return false;
1229
1230	return isWorthFoldingAddr(V: N, Size);
1231	}
1232
1233	bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1234	SDValue &Base, SDValue &Offset,
1235	SDValue &SignExtend,
1236	SDValue &DoShift) {
1237	if (N.getOpcode() != ISD::ADD)
1238	return false;
1239	SDValue LHS = N.getOperand(i: `0`);
1240	SDValue RHS = N.getOperand(i: `1`);
1241	SDLoc dl(N);
1242
1243	// We don't want to match immediate adds here, because they are better lowered
1244	// to the register-immediate addressing modes.
1245	if (isa<ConstantSDNode>(Val: LHS) \|\| isa<ConstantSDNode>(Val: RHS))
1246	return false;
1247
1248	// Check if this particular node is reused in any non-memory related
1249	// operation. If yes, do not try to fold this node into the address
1250	// computation, since the computation will be kept.
1251	const SDNode *Node = N.getNode();
1252	for (SDNode *UI : Node->uses()) {
1253	if (!isa<MemSDNode>(Val: *UI))
1254	return false;
1255	}
1256
1257	// Remember if it is worth folding N when it produces extended register.
1258	bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(V: N, Size);
1259
1260	// Try to match a shifted extend on the RHS.
1261	if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1262	SelectExtendedSHL(N: RHS, Size, WantExtend: true, Offset, SignExtend)) {
1263	Base = LHS;
1264	DoShift = CurDAG->getTargetConstant(Val: true, DL: dl, VT: MVT::i32);
1265	return true;
1266	}
1267
1268	// Try to match a shifted extend on the LHS.
1269	if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1270	SelectExtendedSHL(N: LHS, Size, WantExtend: true, Offset, SignExtend)) {
1271	Base = RHS;
1272	DoShift = CurDAG->getTargetConstant(Val: true, DL: dl, VT: MVT::i32);
1273	return true;
1274	}
1275
1276	// There was no shift, whatever else we find.
1277	DoShift = CurDAG->getTargetConstant(Val: false, DL: dl, VT: MVT::i32);
1278
1279	AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
1280	// Try to match an unshifted extend on the LHS.
1281	if (IsExtendedRegisterWorthFolding &&
1282	(Ext = getExtendTypeForNode(N: LHS, IsLoadStore: true)) !=
1283	AArch64_AM::InvalidShiftExtend) {
1284	Base = RHS;
1285	Offset = narrowIfNeeded(CurDAG, N: LHS.getOperand(i: `0`));
1286	SignExtend = CurDAG->getTargetConstant(Val: Ext == AArch64_AM::SXTW, DL: dl,
1287	VT: MVT::i32);
1288	if (isWorthFoldingAddr(V: LHS, Size))
1289	return true;
1290	}
1291
1292	// Try to match an unshifted extend on the RHS.
1293	if (IsExtendedRegisterWorthFolding &&
1294	(Ext = getExtendTypeForNode(N: RHS, IsLoadStore: true)) !=
1295	AArch64_AM::InvalidShiftExtend) {
1296	Base = LHS;
1297	Offset = narrowIfNeeded(CurDAG, N: RHS.getOperand(i: `0`));
1298	SignExtend = CurDAG->getTargetConstant(Val: Ext == AArch64_AM::SXTW, DL: dl,
1299	VT: MVT::i32);
1300	if (isWorthFoldingAddr(V: RHS, Size))
1301	return true;
1302	}
1303
1304	return false;
1305	}
1306
1307	// Check if the given immediate is preferred by ADD. If an immediate can be
1308	// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1309	// encoded by one MOVZ, return true.
1310	static bool isPreferredADD(int64_t ImmOff) {
1311	// Constant in [0x0, 0xfff] can be encoded in ADD.
1312	if ((ImmOff & `0xfffffffffffff000LL`) == `0x0LL`)
1313	return true;
1314	// Check if it can be encoded in an "ADD LSL #12".
1315	if ((ImmOff & `0xffffffffff000fffLL`) == `0x0LL`)
1316	// As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1317	return (ImmOff & `0xffffffffff00ffffLL`) != `0x0LL` &&
1318	(ImmOff & `0xffffffffffff0fffLL`) != `0x0LL`;
1319	return false;
1320	}
1321
1322	bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1323	SDValue &Base, SDValue &Offset,
1324	SDValue &SignExtend,
1325	SDValue &DoShift) {
1326	if (N.getOpcode() != ISD::ADD)
1327	return false;
1328	SDValue LHS = N.getOperand(i: `0`);
1329	SDValue RHS = N.getOperand(i: `1`);
1330	SDLoc DL(N);
1331
1332	// Check if this particular node is reused in any non-memory related
1333	// operation. If yes, do not try to fold this node into the address
1334	// computation, since the computation will be kept.
1335	const SDNode *Node = N.getNode();
1336	for (SDNode *UI : Node->uses()) {
1337	if (!isa<MemSDNode>(Val: *UI))
1338	return false;
1339	}
1340
1341	// Watch out if RHS is a wide immediate, it can not be selected into
1342	// [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1343	// ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1344	// instructions like:
1345	// MOV X0, WideImmediate
1346	// ADD X1, BaseReg, X0
1347	// LDR X2, [X1, 0]
1348	// For such situation, using [BaseReg, XReg] addressing mode can save one
1349	// ADD/SUB:
1350	// MOV X0, WideImmediate
1351	// LDR X2, [BaseReg, X0]
1352	if (isa<ConstantSDNode>(Val: RHS)) {
1353	int64_t ImmOff = (int64_t)RHS ->getAsZExtVal();
1354	// Skip the immediate can be selected by load/store addressing mode.
1355	// Also skip the immediate can be encoded by a single ADD (SUB is also
1356	// checked by using -ImmOff).
1357	if (isValidAsScaledImmediate(Offset: ImmOff, Range: `0x1000`, Size) \|\|
1358	isPreferredADD(ImmOff) \|\| isPreferredADD(ImmOff: -ImmOff))
1359	return false;
1360
1361	SDValue Ops[] = { RHS };
1362	SDNode *MOVI =
1363	CurDAG->getMachineNode(Opcode: AArch64::MOVi64imm, dl: DL, VT: MVT::i64, Ops);
1364	SDValue MOVIV = SDValue (MOVI, `0`);
1365	// This ADD of two X register will be selected into [Reg+Reg] mode.
1366	N = CurDAG->getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: LHS, N2: MOVIV);
1367	}
1368
1369	// Remember if it is worth folding N when it produces extended register.
1370	bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(V: N, Size);
1371
1372	// Try to match a shifted extend on the RHS.
1373	if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1374	SelectExtendedSHL(N: RHS, Size, WantExtend: false, Offset, SignExtend)) {
1375	Base = LHS;
1376	DoShift = CurDAG->getTargetConstant(Val: true, DL, VT: MVT::i32);
1377	return true;
1378	}
1379
1380	// Try to match a shifted extend on the LHS.
1381	if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1382	SelectExtendedSHL(N: LHS, Size, WantExtend: false, Offset, SignExtend)) {
1383	Base = RHS;
1384	DoShift = CurDAG->getTargetConstant(Val: true, DL, VT: MVT::i32);
1385	return true;
1386	}
1387
1388	// Match any non-shifted, non-extend, non-immediate add expression.
1389	Base = LHS;
1390	Offset = RHS;
1391	SignExtend = CurDAG->getTargetConstant(Val: false, DL, VT: MVT::i32);
1392	DoShift = CurDAG->getTargetConstant(Val: false, DL, VT: MVT::i32);
1393	// Reg1 + Reg2 is free: no check needed.
1394	return true;
1395	}
1396
1397	SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1398	static const unsigned RegClassIDs[] = {
1399	AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1400	static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1401	AArch64::dsub2, AArch64::dsub3};
1402
1403	return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1404	}
1405
1406	SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1407	static const unsigned RegClassIDs[] = {
1408	AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1409	static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1410	AArch64::qsub2, AArch64::qsub3};
1411
1412	return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1413	}
1414
1415	SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1416	static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1417	AArch64::ZPR3RegClassID,
1418	AArch64::ZPR4RegClassID};
1419	static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1420	AArch64::zsub2, AArch64::zsub3};
1421
1422	return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1423	}
1424
1425	SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1426	assert(Regs.size() == `2` \|\| Regs.size() == `4`);
1427
1428	// The createTuple interface requires 3 RegClassIDs for each possible
1429	// tuple type even though we only have them for ZPR2 and ZPR4.
1430	static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, `0`,
1431	AArch64::ZPR4Mul4RegClassID};
1432	static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1433	AArch64::zsub2, AArch64::zsub3};
1434	return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1435	}
1436
1437	SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1438	const unsigned RegClassIDs[],
1439	const unsigned SubRegs[]) {
1440	// There's no special register-class for a vector-list of 1 element: it's just
1441	// a vector.
1442	if (Regs.size() == `1`)
1443	return Regs [`0`];
1444
1445	assert(Regs.size() >= `2` && Regs.size() <= `4`);
1446
1447	SDLoc DL(Regs [`0`]);
1448
1449	SmallVector<SDValue, `4`> Ops;
1450
1451	// First operand of REG_SEQUENCE is the desired RegClass.
1452	Ops.push_back(
1453	Elt: CurDAG->getTargetConstant(Val: RegClassIDs[Regs.size() - `2`], DL, VT: MVT::i32));
1454
1455	// Then we get pairs of source & subregister-position for the components.
1456	for (unsigned i = `0`; i < Regs.size(); ++i) {
1457	Ops.push_back(Elt: Regs [i]);
1458	Ops.push_back(Elt: CurDAG->getTargetConstant(Val: SubRegs[i], DL, VT: MVT::i32));
1459	}
1460
1461	SDNode *N =
1462	CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl: DL, VT: MVT::Untyped, Ops);
1463	return SDValue (N, `0`);
1464	}
1465
1466	void AArch64DAGToDAGISel::SelectTable(SDNode N, unsigned* NumVecs, unsigned Opc,
1467	bool isExt) {
1468	SDLoc dl(N);
1469	EVT VT = N->getValueType(ResNo: `0`);
1470
1471	unsigned ExtOff = isExt;
1472
1473	// Form a REG_SEQUENCE to force register allocation.
1474	unsigned Vec0Off = ExtOff + `1`;
1475	SmallVector<SDValue, `4`> Regs(N->op_begin() + Vec0Off,
1476	N->op_begin() + Vec0Off + NumVecs);
1477	SDValue RegSeq = createQTuple(Regs);
1478
1479	SmallVector<SDValue, `6`> Ops;
1480	if (isExt)
1481	Ops.push_back(Elt: N->getOperand(Num: `1`));
1482	Ops.push_back(Elt: RegSeq);
1483	Ops.push_back(Elt: N->getOperand(Num: NumVecs + ExtOff + `1`));
1484	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT, Ops));
1485	}
1486
1487	static std::tuple<SDValue, SDValue>
1488	extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG) {
1489	SDLoc DL(Disc);
1490	SDValue AddrDisc;
1491	SDValue ConstDisc;
1492
1493	// If this is a blend, remember the constant and address discriminators.
1494	// Otherwise, it's either a constant discriminator, or a non-blended
1495	// address discriminator.
1496	if (Disc ->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1497	Disc ->getConstantOperandVal(Num: `0`) == Intrinsic::ptrauth_blend) {
1498	AddrDisc = Disc ->getOperand(Num: `1`);
1499	ConstDisc = Disc ->getOperand(Num: `2`);
1500	} else {
1501	ConstDisc = Disc;
1502	}
1503
1504	// If the constant discriminator (either the blend RHS, or the entire
1505	// discriminator value) isn't a 16-bit constant, bail out, and let the
1506	// discriminator be computed separately.
1507	auto *ConstDiscN = dyn_cast<ConstantSDNode>(Val&: ConstDisc);
1508	if (!ConstDiscN \|\| !isUInt<`16`>(x: ConstDiscN->getZExtValue()))
1509	return std::make_tuple(args: DAG->getTargetConstant(Val: `0`, DL, VT: MVT::i64), args&: Disc);
1510
1511	// If there's no address discriminator, use XZR directly.
1512	if (!AddrDisc)
1513	AddrDisc = DAG->getRegister(Reg: AArch64::XZR, VT: MVT::i64);
1514
1515	return std::make_tuple(
1516	args: DAG->getTargetConstant(Val: ConstDiscN->getZExtValue(), DL, VT: MVT::i64),
1517	args&: AddrDisc);
1518	}
1519
1520	void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1521	SDLoc DL(N);
1522	// IntrinsicID is operand #0
1523	SDValue Val = N->getOperand(Num: `1`);
1524	SDValue AUTKey = N->getOperand(Num: `2`);
1525	SDValue AUTDisc = N->getOperand(Num: `3`);
1526
1527	unsigned AUTKeyC = cast<ConstantSDNode>(Val&: AUTKey)->getZExtValue();
1528	AUTKey = CurDAG->getTargetConstant(Val: AUTKeyC, DL, VT: MVT::i64);
1529
1530	SDValue AUTAddrDisc, AUTConstDisc;
1531	std::tie(args&: AUTConstDisc, args&: AUTAddrDisc) =
1532	extractPtrauthBlendDiscriminators(Disc: AUTDisc, DAG: CurDAG);
1533
1534	SDValue X16Copy = CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl: DL,
1535	Reg: AArch64::X16, N: Val, Glue: SDValue ());
1536	SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(R: `1`)};
1537
1538	SDNode *AUT = CurDAG->getMachineNode(Opcode: AArch64::AUT, dl: DL, VT: MVT::i64, Ops);
1539	ReplaceNode(F: N, T: AUT);
1540	return;
1541	}
1542
1543	void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1544	SDLoc DL(N);
1545	// IntrinsicID is operand #0
1546	SDValue Val = N->getOperand(Num: `1`);
1547	SDValue AUTKey = N->getOperand(Num: `2`);
1548	SDValue AUTDisc = N->getOperand(Num: `3`);
1549	SDValue PACKey = N->getOperand(Num: `4`);
1550	SDValue PACDisc = N->getOperand(Num: `5`);
1551
1552	unsigned AUTKeyC = cast<ConstantSDNode>(Val&: AUTKey)->getZExtValue();
1553	unsigned PACKeyC = cast<ConstantSDNode>(Val&: PACKey)->getZExtValue();
1554
1555	AUTKey = CurDAG->getTargetConstant(Val: AUTKeyC, DL, VT: MVT::i64);
1556	PACKey = CurDAG->getTargetConstant(Val: PACKeyC, DL, VT: MVT::i64);
1557
1558	SDValue AUTAddrDisc, AUTConstDisc;
1559	std::tie(args&: AUTConstDisc, args&: AUTAddrDisc) =
1560	extractPtrauthBlendDiscriminators(Disc: AUTDisc, DAG: CurDAG);
1561
1562	SDValue PACAddrDisc, PACConstDisc;
1563	std::tie(args&: PACConstDisc, args&: PACAddrDisc) =
1564	extractPtrauthBlendDiscriminators(Disc: PACDisc, DAG: CurDAG);
1565
1566	SDValue X16Copy = CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl: DL,
1567	Reg: AArch64::X16, N: Val, Glue: SDValue ());
1568
1569	SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1570	PACConstDisc, PACAddrDisc, X16Copy.getValue(R: `1`)};
1571
1572	SDNode *AUTPAC = CurDAG->getMachineNode(Opcode: AArch64::AUTPAC, dl: DL, VT: MVT::i64, Ops);
1573	ReplaceNode(F: N, T: AUTPAC);
1574	return;
1575	}
1576
1577	bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1578	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1579	if (LD->isUnindexed())
1580	return false;
1581	EVT VT = LD->getMemoryVT();
1582	EVT DstVT = N->getValueType(ResNo: `0`);
1583	ISD::MemIndexedMode AM = LD->getAddressingMode();
1584	bool IsPre = AM == ISD::PRE_INC \|\| AM == ISD::PRE_DEC;
1585
1586	// We're not doing validity checking here. That was done when checking
1587	// if we should mark the load as indexed or not. We're just selecting
1588	// the right instruction.
1589	unsigned Opcode = `0`;
1590
1591	ISD::LoadExtType ExtType = LD->getExtensionType();
1592	bool InsertTo64 = false;
1593	if (VT == MVT::i64)
1594	Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1595	else if (VT == MVT::i32) {
1596	if (ExtType == ISD::NON_EXTLOAD)
1597	Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1598	else if (ExtType == ISD::SEXTLOAD)
1599	Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1600	else {
1601	Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1602	InsertTo64 = true;
1603	// The result of the load is only i32. It's the subreg_to_reg that makes
1604	// it into an i64.
1605	DstVT = MVT::i32;
1606	}
1607	} else if (VT == MVT::i16) {
1608	if (ExtType == ISD::SEXTLOAD) {
1609	if (DstVT == MVT::i64)
1610	Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1611	else
1612	Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1613	} else {
1614	Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1615	InsertTo64 = DstVT == MVT::i64;
1616	// The result of the load is only i32. It's the subreg_to_reg that makes
1617	// it into an i64.
1618	DstVT = MVT::i32;
1619	}
1620	} else if (VT == MVT::i8) {
1621	if (ExtType == ISD::SEXTLOAD) {
1622	if (DstVT == MVT::i64)
1623	Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1624	else
1625	Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1626	} else {
1627	Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1628	InsertTo64 = DstVT == MVT::i64;
1629	// The result of the load is only i32. It's the subreg_to_reg that makes
1630	// it into an i64.
1631	DstVT = MVT::i32;
1632	}
1633	} else if (VT == MVT::f16) {
1634	Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1635	} else if (VT == MVT::bf16) {
1636	Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1637	} else if (VT == MVT::f32) {
1638	Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1639	} else if (VT == MVT::f64 \|\| VT.is64BitVector()) {
1640	Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1641	} else if (VT.is128BitVector()) {
1642	Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1643	} else
1644	return false;
1645	SDValue Chain = LD->getChain();
1646	SDValue Base = LD->getBasePtr();
1647	ConstantSDNode *OffsetOp = cast<ConstantSDNode>(Val: LD->getOffset());
1648	int OffsetVal = (int)OffsetOp->getZExtValue();
1649	SDLoc dl(N);
1650	SDValue Offset = CurDAG->getTargetConstant(Val: OffsetVal, DL: dl, VT: MVT::i64);
1651	SDValue Ops[] = { Base, Offset, Chain };
1652	SDNode *Res = CurDAG->getMachineNode(Opcode, dl, VT1: MVT::i64, VT2: DstVT,
1653	VT3: MVT::Other, Ops);
1654
1655	// Transfer memoperands.
1656	MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
1657	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Res), NewMemRefs: {MemOp});
1658
1659	// Either way, we're replacing the node, so tell the caller that.
1660	SDValue LoadedVal = SDValue (Res, `1`);
1661	if (InsertTo64) {
1662	SDValue SubReg = CurDAG->getTargetConstant(Val: AArch64::sub_32, DL: dl, VT: MVT::i32);
1663	LoadedVal =
1664	SDValue (CurDAG->getMachineNode(
1665	Opcode: AArch64::SUBREG_TO_REG, dl, VT: MVT::i64,
1666	Op1: CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i64), Op2: LoadedVal,
1667	Op3: SubReg),
1668	`0`);
1669	}
1670
1671	ReplaceUses(F: SDValue (N, `0`), T: LoadedVal);
1672	ReplaceUses(F: SDValue (N, `1`), T: SDValue (Res, `0`));
1673	ReplaceUses(F: SDValue (N, `2`), T: SDValue (Res, `2`));
1674	CurDAG->RemoveDeadNode(N);
1675	return true;
1676	}
1677
1678	void AArch64DAGToDAGISel::SelectLoad(SDNode N, unsigned* NumVecs, unsigned Opc,
1679	unsigned SubRegIdx) {
1680	SDLoc dl(N);
1681	EVT VT = N->getValueType(ResNo: `0`);
1682	SDValue Chain = N->getOperand(Num: `0`);
1683
1684	SDValue Ops[] = {N->getOperand(Num: `2`), // Mem operand;
1685	Chain};
1686
1687	const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1688
1689	SDNode *Ld = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
1690	SDValue SuperReg = SDValue (Ld, `0`);
1691	for (unsigned i = `0`; i < NumVecs; ++i)
1692	ReplaceUses(F: SDValue (N, i),
1693	T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx + i, DL: dl, VT, Operand: SuperReg));
1694
1695	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (Ld, `1`));
1696
1697	// Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1698	// because it's too simple to have needed special treatment during lowering.
1699	if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Val: N)) {
1700	MachineMemOperand *MemOp = MemIntr->getMemOperand();
1701	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
1702	}
1703
1704	CurDAG->RemoveDeadNode(N);
1705	}
1706
1707	void AArch64DAGToDAGISel::SelectPostLoad(SDNode N, unsigned* NumVecs,
1708	unsigned Opc, unsigned SubRegIdx) {
1709	SDLoc dl(N);
1710	EVT VT = N->getValueType(ResNo: `0`);
1711	SDValue Chain = N->getOperand(Num: `0`);
1712
1713	SDValue Ops[] = {N->getOperand(Num: `1`), // Mem operand
1714	N->getOperand(Num: `2`), // Incremental
1715	Chain};
1716
1717	const EVT ResTys[] = {MVT::i64, // Type of the write back register
1718	MVT::Untyped, MVT::Other};
1719
1720	SDNode *Ld = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
1721
1722	// Update uses of write back register
1723	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (Ld, `0`));
1724
1725	// Update uses of vector list
1726	SDValue SuperReg = SDValue (Ld, `1`);
1727	if (NumVecs == `1`)
1728	ReplaceUses(F: SDValue (N, `0`), T: SuperReg);
1729	else
1730	for (unsigned i = `0`; i < NumVecs; ++i)
1731	ReplaceUses(F: SDValue (N, i),
1732	T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx + i, DL: dl, VT, Operand: SuperReg));
1733
1734	// Update the chain
1735	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (Ld, `2`));
1736	CurDAG->RemoveDeadNode(N);
1737	}
1738
1739	/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1740	/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1741	/// new Base and an SDValue representing the new offset.
1742	std::tuple<unsigned, SDValue, SDValue>
1743	AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode N, unsigned* Opc_rr,
1744	unsigned Opc_ri,
1745	const SDValue &OldBase,
1746	const SDValue &OldOffset,
1747	unsigned Scale) {
1748	SDValue NewBase = OldBase;
1749	SDValue NewOffset = OldOffset;
1750	// Detect a possible Reg+Imm addressing mode.
1751	const bool IsRegImm = SelectAddrModeIndexedSVE</Min=/-`8`, /Max=/`7`>(
1752	Root: N, N: OldBase, Base&: NewBase, OffImm&: NewOffset);
1753
1754	// Detect a possible reg+reg addressing mode, but only if we haven't already
1755	// detected a Reg+Imm one.
1756	const bool IsRegReg =
1757	!IsRegImm && SelectSVERegRegAddrMode(N: OldBase, Scale, Base&: NewBase, Offset&: NewOffset);
1758
1759	// Select the instruction.
1760	return std::make_tuple(args&: IsRegReg ? Opc_rr : Opc_ri, args&: NewBase, args&: NewOffset);
1761	}
1762
1763	enum class SelectTypeKind {
1764	Int1 = `0`,
1765	Int = `1`,
1766	FP = `2`,
1767	AnyType = `3`,
1768	};
1769
1770	/// This function selects an opcode from a list of opcodes, which is
1771	/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1772	/// element types, in this order.
1773	template <SelectTypeKind Kind>
1774	static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1775	// Only match scalable vector VTs
1776	if (!VT.isScalableVector())
1777	return `0`;
1778
1779	EVT EltVT = VT.getVectorElementType();
1780	unsigned Key = VT.getVectorMinNumElements();
1781	switch (Kind) {
1782	case SelectTypeKind::AnyType:
1783	break;
1784	case SelectTypeKind::Int:
1785	if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1786	EltVT != MVT::i64)
1787	return `0`;
1788	break;
1789	case SelectTypeKind::Int1:
1790	if (EltVT != MVT::i1)
1791	return `0`;
1792	break;
1793	case SelectTypeKind::FP:
1794	if (EltVT == MVT::bf16)
1795	Key = `16`;
1796	else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1797	EltVT != MVT::f64)
1798	return `0`;
1799	break;
1800	}
1801
1802	unsigned Offset;
1803	switch (Key) {
1804	case `16`: // 8-bit or bf16
1805	Offset = `0`;
1806	break;
1807	case `8`: // 16-bit
1808	Offset = `1`;
1809	break;
1810	case `4`: // 32-bit
1811	Offset = `2`;
1812	break;
1813	case `2`: // 64-bit
1814	Offset = `3`;
1815	break;
1816	default:
1817	return `0`;
1818	}
1819
1820	return (Opcodes.size() <= Offset) ? `0` : Opcodes [Offset];
1821	}
1822
1823	// This function is almost identical to SelectWhilePair, but has an
1824	// extra check on the range of the immediate operand.
1825	// TODO: Merge these two functions together at some point?
1826	void AArch64DAGToDAGISel::SelectPExtPair(SDNode N, unsigned* Opc) {
1827	// Immediate can be either 0 or 1.
1828	if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `2`)))
1829	if (Imm->getZExtValue() > `1`)
1830	return;
1831
1832	SDLoc DL(N);
1833	EVT VT = N->getValueType(ResNo: `0`);
1834	SDValue Ops[] = {N->getOperand(Num: `1`), N->getOperand(Num: `2`)};
1835	SDNode *WhilePair = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Ops);
1836	SDValue SuperReg = SDValue (WhilePair, `0`);
1837
1838	for (unsigned I = `0`; I < `2`; ++I)
1839	ReplaceUses(F: SDValue (N, I), T: CurDAG->getTargetExtractSubreg(
1840	SRIdx: AArch64::psub0 + I, DL, VT, Operand: SuperReg));
1841
1842	CurDAG->RemoveDeadNode(N);
1843	}
1844
1845	void AArch64DAGToDAGISel::SelectWhilePair(SDNode N, unsigned* Opc) {
1846	SDLoc DL(N);
1847	EVT VT = N->getValueType(ResNo: `0`);
1848
1849	SDValue Ops[] = {N->getOperand(Num: `1`), N->getOperand(Num: `2`)};
1850
1851	SDNode *WhilePair = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Ops);
1852	SDValue SuperReg = SDValue (WhilePair, `0`);
1853
1854	for (unsigned I = `0`; I < `2`; ++I)
1855	ReplaceUses(F: SDValue (N, I), T: CurDAG->getTargetExtractSubreg(
1856	SRIdx: AArch64::psub0 + I, DL, VT, Operand: SuperReg));
1857
1858	CurDAG->RemoveDeadNode(N);
1859	}
1860
1861	void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode N, unsigned* NumVecs,
1862	unsigned Opcode) {
1863	EVT VT = N->getValueType(ResNo: `0`);
1864	SmallVector<SDValue, `4`> Regs(N->op_begin() + `1`, N->op_begin() + `1` + NumVecs);
1865	SDValue Ops = createZTuple(Regs);
1866	SDLoc DL(N);
1867	SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Untyped, Op1: Ops);
1868	SDValue SuperReg = SDValue (Intrinsic, `0`);
1869	for (unsigned i = `0`; i < NumVecs; ++i)
1870	ReplaceUses(F: SDValue (N, i), T: CurDAG->getTargetExtractSubreg(
1871	SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
1872
1873	CurDAG->RemoveDeadNode(N);
1874	}
1875
1876	void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1877	unsigned NumVecs,
1878	bool IsZmMulti,
1879	unsigned Opcode,
1880	bool HasPred) {
1881	assert(Opcode != `0` && "Unexpected opcode");
1882
1883	SDLoc DL(N);
1884	EVT VT = N->getValueType(ResNo: `0`);
1885	unsigned FirstVecIdx = HasPred ? `2` : `1`;
1886
1887	auto GetMultiVecOperand = [=](unsigned StartIdx) {
1888	SmallVector<SDValue, `4`> Regs(N->op_begin() + StartIdx,
1889	N->op_begin() + StartIdx + NumVecs);
1890	return createZMulTuple(Regs);
1891	};
1892
1893	SDValue Zdn = GetMultiVecOperand (FirstVecIdx);
1894
1895	SDValue Zm;
1896	if (IsZmMulti)
1897	Zm = GetMultiVecOperand (NumVecs + FirstVecIdx);
1898	else
1899	Zm = N->getOperand(Num: NumVecs + FirstVecIdx);
1900
1901	SDNode *Intrinsic;
1902	if (HasPred)
1903	Intrinsic = CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Untyped,
1904	Op1: N->getOperand(Num: `1`), Op2: Zdn, Op3: Zm);
1905	else
1906	Intrinsic = CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Untyped, Op1: Zdn, Op2: Zm);
1907	SDValue SuperReg = SDValue (Intrinsic, `0`);
1908	for (unsigned i = `0`; i < NumVecs; ++i)
1909	ReplaceUses(F: SDValue (N, i), T: CurDAG->getTargetExtractSubreg(
1910	SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
1911
1912	CurDAG->RemoveDeadNode(N);
1913	}
1914
1915	void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode N, unsigned* NumVecs,
1916	unsigned Scale, unsigned Opc_ri,
1917	unsigned Opc_rr, bool IsIntr) {
1918	assert(Scale < `5` && "Invalid scaling value.");
1919	SDLoc DL(N);
1920	EVT VT = N->getValueType(ResNo: `0`);
1921	SDValue Chain = N->getOperand(Num: `0`);
1922
1923	// Optimize addressing mode.
1924	SDValue Base, Offset;
1925	unsigned Opc;
1926	std::tie(args&: Opc, args&: Base, args&: Offset) = findAddrModeSVELoadStore(
1927	N, Opc_rr, Opc_ri, OldBase: N->getOperand(Num: IsIntr ? `3` : `2`),
1928	OldOffset: CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i64), Scale);
1929
1930	SDValue Ops[] = {N->getOperand(Num: IsIntr ? `2` : `1`), // Predicate
1931	Base, // Memory operand
1932	Offset, Chain};
1933
1934	const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1935
1936	SDNode *Load = CurDAG->getMachineNode(Opcode: Opc, dl: DL, ResultTys: ResTys, Ops);
1937	SDValue SuperReg = SDValue (Load, `0`);
1938	for (unsigned i = `0`; i < NumVecs; ++i)
1939	ReplaceUses(F: SDValue (N, i), T: CurDAG->getTargetExtractSubreg(
1940	SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
1941
1942	// Copy chain
1943	unsigned ChainIdx = NumVecs;
1944	ReplaceUses(F: SDValue (N, ChainIdx), T: SDValue (Load, `1`));
1945	CurDAG->RemoveDeadNode(N);
1946	}
1947
1948	void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1949	unsigned NumVecs,
1950	unsigned Scale,
1951	unsigned Opc_ri,
1952	unsigned Opc_rr) {
1953	assert(Scale < `4` && "Invalid scaling value.");
1954	SDLoc DL(N);
1955	EVT VT = N->getValueType(ResNo: `0`);
1956	SDValue Chain = N->getOperand(Num: `0`);
1957
1958	SDValue PNg = N->getOperand(Num: `2`);
1959	SDValue Base = N->getOperand(Num: `3`);
1960	SDValue Offset = CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i64);
1961	unsigned Opc;
1962	std::tie(args&: Opc, args&: Base, args&: Offset) =
1963	findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, OldBase: Base, OldOffset: Offset, Scale);
1964
1965	SDValue Ops[] = {PNg, // Predicate-as-counter
1966	Base, // Memory operand
1967	Offset, Chain};
1968
1969	const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1970
1971	SDNode *Load = CurDAG->getMachineNode(Opcode: Opc, dl: DL, ResultTys: ResTys, Ops);
1972	SDValue SuperReg = SDValue (Load, `0`);
1973	for (unsigned i = `0`; i < NumVecs; ++i)
1974	ReplaceUses(F: SDValue (N, i), T: CurDAG->getTargetExtractSubreg(
1975	SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
1976
1977	// Copy chain
1978	unsigned ChainIdx = NumVecs;
1979	ReplaceUses(F: SDValue (N, ChainIdx), T: SDValue (Load, `1`));
1980	CurDAG->RemoveDeadNode(N);
1981	}
1982
1983	void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode N, unsigned* NumVecs,
1984	unsigned Opcode) {
1985	if (N->getValueType(ResNo: `0`) != MVT::nxv4f32)
1986	return;
1987	SelectUnaryMultiIntrinsic(N, NumOutVecs: NumVecs, IsTupleInput: true, Opc: Opcode);
1988	}
1989
1990	void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
1991	unsigned NumOutVecs,
1992	unsigned Opc, uint32_t MaxImm) {
1993	if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: `4`)))
1994	if (Imm->getZExtValue() > MaxImm)
1995	return;
1996
1997	SDValue ZtValue;
1998	if (!ImmToReg<AArch64::ZT0, `0`>(N: Node->getOperand(Num: `2`), Imm&: ZtValue))
1999	return;
2000	SDValue Ops[] = {ZtValue, Node->getOperand(Num: `3`), Node->getOperand(Num: `4`)};
2001	SDLoc DL(Node);
2002	EVT VT = Node->getValueType(ResNo: `0`);
2003
2004	SDNode *Instruction =
2005	CurDAG->getMachineNode(Opcode: Opc, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2006	SDValue SuperReg = SDValue (Instruction, `0`);
2007
2008	for (unsigned I = `0`; I < NumOutVecs; ++I)
2009	ReplaceUses(F: SDValue (Node, I), T: CurDAG->getTargetExtractSubreg(
2010	SRIdx: AArch64::zsub0 + I, DL, VT, Operand: SuperReg));
2011
2012	// Copy chain
2013	unsigned ChainIdx = NumOutVecs;
2014	ReplaceUses(F: SDValue (Node, ChainIdx), T: SDValue (Instruction, `1`));
2015	CurDAG->RemoveDeadNode(N: Node);
2016	}
2017
2018	void AArch64DAGToDAGISel::SelectClamp(SDNode N, unsigned* NumVecs,
2019	unsigned Op) {
2020	SDLoc DL(N);
2021	EVT VT = N->getValueType(ResNo: `0`);
2022
2023	SmallVector<SDValue, `4`> Regs(N->op_begin() + `1`, N->op_begin() + `1` + NumVecs);
2024	SDValue Zd = createZMulTuple(Regs);
2025	SDValue Zn = N->getOperand(Num: `1` + NumVecs);
2026	SDValue Zm = N->getOperand(Num: `2` + NumVecs);
2027
2028	SDValue Ops[] = {Zd, Zn, Zm};
2029
2030	SDNode *Intrinsic = CurDAG->getMachineNode(Opcode: Op, dl: DL, VT: MVT::Untyped, Ops);
2031	SDValue SuperReg = SDValue (Intrinsic, `0`);
2032	for (unsigned i = `0`; i < NumVecs; ++i)
2033	ReplaceUses(F: SDValue (N, i), T: CurDAG->getTargetExtractSubreg(
2034	SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
2035
2036	CurDAG->RemoveDeadNode(N);
2037	}
2038
2039	bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2040	switch (BaseReg) {
2041	default:
2042	return false;
2043	case AArch64::ZA:
2044	case AArch64::ZAB0:
2045	if (TileNum == `0`)
2046	break;
2047	return false;
2048	case AArch64::ZAH0:
2049	if (TileNum <= `1`)
2050	break;
2051	return false;
2052	case AArch64::ZAS0:
2053	if (TileNum <= `3`)
2054	break;
2055	return false;
2056	case AArch64::ZAD0:
2057	if (TileNum <= `7`)
2058	break;
2059	return false;
2060	}
2061
2062	BaseReg += TileNum;
2063	return true;
2064	}
2065
2066	template <unsigned MaxIdx, unsigned Scale>
2067	void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode N, unsigned* NumVecs,
2068	unsigned BaseReg, unsigned Op) {
2069	unsigned TileNum = `0`;
2070	if (BaseReg != AArch64::ZA)
2071	TileNum = N->getConstantOperandVal(Num: `2`);
2072
2073	if (!SelectSMETile(BaseReg, TileNum))
2074	return;
2075
2076	SDValue SliceBase, Base, Offset;
2077	if (BaseReg == AArch64::ZA)
2078	SliceBase = N->getOperand(Num: `2`);
2079	else
2080	SliceBase = N->getOperand(Num: `3`);
2081
2082	if (!SelectSMETileSlice(N: SliceBase, MaxSize: MaxIdx, Vector&: Base, Offset, Scale))
2083	return;
2084
2085	SDLoc DL(N);
2086	SDValue SubReg = CurDAG->getRegister(Reg: BaseReg, VT: MVT::Other);
2087	SDValue Ops[] = {SubReg, Base, Offset, /Chain/ N->getOperand(Num: `0`)};
2088	SDNode *Mov = CurDAG->getMachineNode(Opcode: Op, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2089
2090	EVT VT = N->getValueType(ResNo: `0`);
2091	for (unsigned I = `0`; I < NumVecs; ++I)
2092	ReplaceUses(F: SDValue (N, I),
2093	T: CurDAG->getTargetExtractSubreg(SRIdx: AArch64::zsub0 + I, DL, VT,
2094	Operand: SDValue (Mov, `0`)));
2095	// Copy chain
2096	unsigned ChainIdx = NumVecs;
2097	ReplaceUses(F: SDValue (N, ChainIdx), T: SDValue (Mov, `1`));
2098	CurDAG->RemoveDeadNode(N);
2099	}
2100
2101	void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode N, unsigned* NumVecs,
2102	unsigned Op, unsigned MaxIdx,
2103	unsigned Scale, unsigned BaseReg) {
2104	// Slice can be in different positions
2105	// The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2106	// The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2107	SDValue SliceBase = N->getOperand(Num: `2`);
2108	if (BaseReg != AArch64::ZA)
2109	SliceBase = N->getOperand(Num: `3`);
2110
2111	SDValue Base, Offset;
2112	if (!SelectSMETileSlice(N: SliceBase, MaxSize: MaxIdx, Vector&: Base, Offset, Scale))
2113	return;
2114	// The correct Za tile number is computed in Machine Instruction
2115	// See EmitZAInstr
2116	// DAG cannot select Za tile as an output register with ZReg
2117	SDLoc DL(N);
2118	SmallVector<SDValue, `6`> Ops;
2119	if (BaseReg != AArch64::ZA )
2120	Ops.push_back(Elt: N->getOperand(Num: `2`));
2121	Ops.push_back(Elt: Base);
2122	Ops.push_back(Elt: Offset);
2123	Ops.push_back(Elt: N->getOperand(Num: `0`)); //Chain
2124	SDNode *Mov = CurDAG->getMachineNode(Opcode: Op, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2125
2126	EVT VT = N->getValueType(ResNo: `0`);
2127	for (unsigned I = `0`; I < NumVecs; ++I)
2128	ReplaceUses(F: SDValue (N, I),
2129	T: CurDAG->getTargetExtractSubreg(SRIdx: AArch64::zsub0 + I, DL, VT,
2130	Operand: SDValue (Mov, `0`)));
2131
2132	// Copy chain
2133	unsigned ChainIdx = NumVecs;
2134	ReplaceUses(F: SDValue (N, ChainIdx), T: SDValue (Mov, `1`));
2135	CurDAG->RemoveDeadNode(N);
2136	}
2137
2138	void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2139	unsigned NumOutVecs,
2140	bool IsTupleInput,
2141	unsigned Opc) {
2142	SDLoc DL(N);
2143	EVT VT = N->getValueType(ResNo: `0`);
2144	unsigned NumInVecs = N->getNumOperands() - `1`;
2145
2146	SmallVector<SDValue, `6`> Ops;
2147	if (IsTupleInput) {
2148	assert((NumInVecs == `2` \|\| NumInVecs == `4`) &&
2149	"Don't know how to handle multi-register input!");
2150	SmallVector<SDValue, `4`> Regs(N->op_begin() + `1`,
2151	N->op_begin() + `1` + NumInVecs);
2152	Ops.push_back(Elt: createZMulTuple(Regs));
2153	} else {
2154	// All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2155	for (unsigned I = `0`; I < NumInVecs; I++)
2156	Ops.push_back(Elt: N->getOperand(Num: `1` + I));
2157	}
2158
2159	SDNode *Res = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Ops);
2160	SDValue SuperReg = SDValue (Res, `0`);
2161
2162	for (unsigned I = `0`; I < NumOutVecs; I++)
2163	ReplaceUses(F: SDValue (N, I), T: CurDAG->getTargetExtractSubreg(
2164	SRIdx: AArch64::zsub0 + I, DL, VT, Operand: SuperReg));
2165	CurDAG->RemoveDeadNode(N);
2166	}
2167
2168	void AArch64DAGToDAGISel::SelectStore(SDNode N, unsigned* NumVecs,
2169	unsigned Opc) {
2170	SDLoc dl(N);
2171	EVT VT = N->getOperand(Num: `2`)->getValueType(ResNo: `0`);
2172
2173	// Form a REG_SEQUENCE to force register allocation.
2174	bool Is128Bit = VT.getSizeInBits() == `128`;
2175	SmallVector<SDValue, `4`> Regs(N->op_begin() + `2`, N->op_begin() + `2` + NumVecs);
2176	SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2177
2178	SDValue Ops[] = {RegSeq, N->getOperand(Num: NumVecs + `2`), N->getOperand(Num: `0`)};
2179	SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, VT: N->getValueType(ResNo: `0`), Ops);
2180
2181	// Transfer memoperands.
2182	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2183	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2184
2185	ReplaceNode(F: N, T: St);
2186	}
2187
2188	void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode N, unsigned* NumVecs,
2189	unsigned Scale, unsigned Opc_rr,
2190	unsigned Opc_ri) {
2191	SDLoc dl(N);
2192
2193	// Form a REG_SEQUENCE to force register allocation.
2194	SmallVector<SDValue, `4`> Regs(N->op_begin() + `2`, N->op_begin() + `2` + NumVecs);
2195	SDValue RegSeq = createZTuple(Regs);
2196
2197	// Optimize addressing mode.
2198	unsigned Opc;
2199	SDValue Offset, Base;
2200	std::tie(args&: Opc, args&: Base, args&: Offset) = findAddrModeSVELoadStore(
2201	N, Opc_rr, Opc_ri, OldBase: N->getOperand(Num: NumVecs + `3`),
2202	OldOffset: CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i64), Scale);
2203
2204	SDValue Ops[] = {RegSeq, N->getOperand(Num: NumVecs + `2`), // predicate
2205	Base, // address
2206	Offset, // offset
2207	N->getOperand(Num: `0`)}; // chain
2208	SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, VT: N->getValueType(ResNo: `0`), Ops);
2209
2210	ReplaceNode(F: N, T: St);
2211	}
2212
2213	bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2214	SDValue &OffImm) {
2215	SDLoc dl(N);
2216	const DataLayout &DL = CurDAG->getDataLayout();
2217	const TargetLowering *TLI = getTargetLowering();
2218
2219	// Try to match it for the frame address
2220	if (auto FINode = dyn_cast<FrameIndexSDNode>(Val&: N)) {
2221	int FI = FINode->getIndex();
2222	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
2223	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i64);
2224	return true;
2225	}
2226
2227	return false;
2228	}
2229
2230	void AArch64DAGToDAGISel::SelectPostStore(SDNode N, unsigned* NumVecs,
2231	unsigned Opc) {
2232	SDLoc dl(N);
2233	EVT VT = N->getOperand(Num: `2`)->getValueType(ResNo: `0`);
2234	const EVT ResTys[] = {MVT::i64, // Type of the write back register
2235	MVT::Other}; // Type for the Chain
2236
2237	// Form a REG_SEQUENCE to force register allocation.
2238	bool Is128Bit = VT.getSizeInBits() == `128`;
2239	SmallVector<SDValue, `4`> Regs(N->op_begin() + `1`, N->op_begin() + `1` + NumVecs);
2240	SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2241
2242	SDValue Ops[] = {RegSeq,
2243	N->getOperand(Num: NumVecs + `1`), // base register
2244	N->getOperand(Num: NumVecs + `2`), // Incremental
2245	N->getOperand(Num: `0`)}; // Chain
2246	SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2247
2248	ReplaceNode(F: N, T: St);
2249	}
2250
2251	namespace {
2252	/// WidenVector - Given a value in the V64 register class, produce the
2253	/// equivalent value in the V128 register class.
2254	class WidenVector {
2255	SelectionDAG &DAG;
2256
2257	public:
2258	WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2259
2260	SDValue operator()(SDValue V64Reg) {
2261	EVT VT = V64Reg.getValueType();
2262	unsigned NarrowSize = VT.getVectorNumElements();
2263	MVT EltTy = VT.getVectorElementType().getSimpleVT();
2264	MVT WideTy = MVT::getVectorVT(VT: EltTy, NumElements: `2` * NarrowSize);
2265	SDLoc DL(V64Reg);
2266
2267	SDValue Undef =
2268	SDValue (DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: WideTy), `0`);
2269	return DAG.getTargetInsertSubreg(SRIdx: AArch64::dsub, DL, VT: WideTy, Operand: Undef, Subreg: V64Reg);
2270	}
2271	};
2272	} // namespace
2273
2274	/// NarrowVector - Given a value in the V128 register class, produce the
2275	/// equivalent value in the V64 register class.
2276	static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
2277	EVT VT = V128Reg.getValueType();
2278	unsigned WideSize = VT.getVectorNumElements();
2279	MVT EltTy = VT.getVectorElementType().getSimpleVT();
2280	MVT NarrowTy = MVT::getVectorVT(VT: EltTy, NumElements: WideSize / `2`);
2281
2282	return DAG.getTargetExtractSubreg(SRIdx: AArch64::dsub, DL: SDLoc (V128Reg), VT: NarrowTy,
2283	Operand: V128Reg);
2284	}
2285
2286	void AArch64DAGToDAGISel::SelectLoadLane(SDNode N, unsigned* NumVecs,
2287	unsigned Opc) {
2288	SDLoc dl(N);
2289	EVT VT = N->getValueType(ResNo: `0`);
2290	bool Narrow = VT.getSizeInBits() == `64`;
2291
2292	// Form a REG_SEQUENCE to force register allocation.
2293	SmallVector<SDValue, `4`> Regs(N->op_begin() + `2`, N->op_begin() + `2` + NumVecs);
2294
2295	if (Narrow)
2296	transform(Range&: Regs, d_first: Regs.begin(),
2297	F: WidenVector (*CurDAG));
2298
2299	SDValue RegSeq = createQTuple(Regs);
2300
2301	const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2302
2303	unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + `2`);
2304
2305	SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(Val: LaneNo, DL: dl, VT: MVT::i64),
2306	N->getOperand(Num: NumVecs + `3`), N->getOperand(Num: `0`)};
2307	SDNode *Ld = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2308	SDValue SuperReg = SDValue (Ld, `0`);
2309
2310	EVT WideVT = RegSeq.getOperand(i: `1`)->getValueType(ResNo: `0`);
2311	static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2312	AArch64::qsub2, AArch64::qsub3 };
2313	for (unsigned i = `0`; i < NumVecs; ++i) {
2314	SDValue NV = CurDAG->getTargetExtractSubreg(SRIdx: QSubs[i], DL: dl, VT: WideVT, Operand: SuperReg);
2315	if (Narrow)
2316	NV = NarrowVector(V128Reg: NV, DAG&: *CurDAG);
2317	ReplaceUses(F: SDValue (N, i), T: NV);
2318	}
2319
2320	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (Ld, `1`));
2321	CurDAG->RemoveDeadNode(N);
2322	}
2323
2324	void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode N, unsigned* NumVecs,
2325	unsigned Opc) {
2326	SDLoc dl(N);
2327	EVT VT = N->getValueType(ResNo: `0`);
2328	bool Narrow = VT.getSizeInBits() == `64`;
2329
2330	// Form a REG_SEQUENCE to force register allocation.
2331	SmallVector<SDValue, `4`> Regs(N->op_begin() + `1`, N->op_begin() + `1` + NumVecs);
2332
2333	if (Narrow)
2334	transform(Range&: Regs, d_first: Regs.begin(),
2335	F: WidenVector (*CurDAG));
2336
2337	SDValue RegSeq = createQTuple(Regs);
2338
2339	const EVT ResTys[] = {MVT::i64, // Type of the write back register
2340	RegSeq ->getValueType(ResNo: `0`), MVT::Other};
2341
2342	unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + `1`);
2343
2344	SDValue Ops[] = {RegSeq,
2345	CurDAG->getTargetConstant(Val: LaneNo, DL: dl,
2346	VT: MVT::i64), // Lane Number
2347	N->getOperand(Num: NumVecs + `2`), // Base register
2348	N->getOperand(Num: NumVecs + `3`), // Incremental
2349	N->getOperand(Num: `0`)};
2350	SDNode *Ld = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2351
2352	// Update uses of the write back register
2353	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (Ld, `0`));
2354
2355	// Update uses of the vector list
2356	SDValue SuperReg = SDValue (Ld, `1`);
2357	if (NumVecs == `1`) {
2358	ReplaceUses(F: SDValue (N, `0`),
2359	T: Narrow ? NarrowVector(V128Reg: SuperReg, DAG&: *CurDAG) : SuperReg);
2360	} else {
2361	EVT WideVT = RegSeq.getOperand(i: `1`)->getValueType(ResNo: `0`);
2362	static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2363	AArch64::qsub2, AArch64::qsub3 };
2364	for (unsigned i = `0`; i < NumVecs; ++i) {
2365	SDValue NV = CurDAG->getTargetExtractSubreg(SRIdx: QSubs[i], DL: dl, VT: WideVT,
2366	Operand: SuperReg);
2367	if (Narrow)
2368	NV = NarrowVector(V128Reg: NV, DAG&: *CurDAG);
2369	ReplaceUses(F: SDValue (N, i), T: NV);
2370	}
2371	}
2372
2373	// Update the Chain
2374	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (Ld, `2`));
2375	CurDAG->RemoveDeadNode(N);
2376	}
2377
2378	void AArch64DAGToDAGISel::SelectStoreLane(SDNode N, unsigned* NumVecs,
2379	unsigned Opc) {
2380	SDLoc dl(N);
2381	EVT VT = N->getOperand(Num: `2`)->getValueType(ResNo: `0`);
2382	bool Narrow = VT.getSizeInBits() == `64`;
2383
2384	// Form a REG_SEQUENCE to force register allocation.
2385	SmallVector<SDValue, `4`> Regs(N->op_begin() + `2`, N->op_begin() + `2` + NumVecs);
2386
2387	if (Narrow)
2388	transform(Range&: Regs, d_first: Regs.begin(),
2389	F: WidenVector (*CurDAG));
2390
2391	SDValue RegSeq = createQTuple(Regs);
2392
2393	unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + `2`);
2394
2395	SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(Val: LaneNo, DL: dl, VT: MVT::i64),
2396	N->getOperand(Num: NumVecs + `3`), N->getOperand(Num: `0`)};
2397	SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::Other, Ops);
2398
2399	// Transfer memoperands.
2400	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2401	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2402
2403	ReplaceNode(F: N, T: St);
2404	}
2405
2406	void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode N, unsigned* NumVecs,
2407	unsigned Opc) {
2408	SDLoc dl(N);
2409	EVT VT = N->getOperand(Num: `2`)->getValueType(ResNo: `0`);
2410	bool Narrow = VT.getSizeInBits() == `64`;
2411
2412	// Form a REG_SEQUENCE to force register allocation.
2413	SmallVector<SDValue, `4`> Regs(N->op_begin() + `1`, N->op_begin() + `1` + NumVecs);
2414
2415	if (Narrow)
2416	transform(Range&: Regs, d_first: Regs.begin(),
2417	F: WidenVector (*CurDAG));
2418
2419	SDValue RegSeq = createQTuple(Regs);
2420
2421	const EVT ResTys[] = {MVT::i64, // Type of the write back register
2422	MVT::Other};
2423
2424	unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + `1`);
2425
2426	SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(Val: LaneNo, DL: dl, VT: MVT::i64),
2427	N->getOperand(Num: NumVecs + `2`), // Base Register
2428	N->getOperand(Num: NumVecs + `3`), // Incremental
2429	N->getOperand(Num: `0`)};
2430	SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2431
2432	// Transfer memoperands.
2433	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2434	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2435
2436	ReplaceNode(F: N, T: St);
2437	}
2438
2439	static bool isBitfieldExtractOpFromAnd(SelectionDAG CurDAG, SDNode N,
2440	unsigned &Opc, SDValue &Opd0,
2441	unsigned &LSB, unsigned &MSB,
2442	unsigned NumberOfIgnoredLowBits,
2443	bool BiggerPattern) {
2444	assert(N->getOpcode() == ISD::AND &&
2445	"N must be a AND operation to call this function");
2446
2447	EVT VT = N->getValueType(ResNo: `0`);
2448
2449	// Here we can test the type of VT and return false when the type does not
2450	// match, but since it is done prior to that call in the current context
2451	// we turned that into an assert to avoid redundant code.
2452	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
2453	"Type checking must have been done before calling this function");
2454
2455	// FIXME: simplify-demanded-bits in DAGCombine will probably have
2456	// changed the AND node to a 32-bit mask operation. We'll have to
2457	// undo that as part of the transform here if we want to catch all
2458	// the opportunities.
2459	// Currently the NumberOfIgnoredLowBits argument helps to recover
2460	// from these situations when matching bigger pattern (bitfield insert).
2461
2462	// For unsigned extracts, check for a shift right and mask
2463	uint64_t AndImm = `0`;
2464	if (!isOpcWithIntImmediate(N, Opc: ISD::AND, Imm&: AndImm))
2465	return false;
2466
2467	const SDNode *Op0 = N->getOperand(Num: `0`).getNode();
2468
2469	// Because of simplify-demanded-bits in DAGCombine, the mask may have been
2470	// simplified. Try to undo that
2471	AndImm \|= maskTrailingOnes<uint64_t>(N: NumberOfIgnoredLowBits);
2472
2473	// The immediate is a mask of the low bits iff imm & (imm+1) == 0
2474	if (AndImm & (AndImm + `1`))
2475	return false;
2476
2477	bool ClampMSB = false;
2478	uint64_t SrlImm = `0`;
2479	// Handle the SRL + ANY_EXTEND case.
2480	if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2481	isOpcWithIntImmediate(N: Op0->getOperand(Num: `0`).getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
2482	// Extend the incoming operand of the SRL to 64-bit.
2483	Opd0 = Widen(CurDAG, N: Op0->getOperand(Num: `0`).getOperand(i: `0`));
2484	// Make sure to clamp the MSB so that we preserve the semantics of the
2485	// original operations.
2486	ClampMSB = true;
2487	} else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2488	isOpcWithIntImmediate(N: Op0->getOperand(Num: `0`).getNode(), Opc: ISD::SRL,
2489	Imm&: SrlImm)) {
2490	// If the shift result was truncated, we can still combine them.
2491	Opd0 = Op0->getOperand(Num: `0`).getOperand(i: `0`);
2492
2493	// Use the type of SRL node.
2494	VT = Opd0 ->getValueType(ResNo: `0`);
2495	} else if (isOpcWithIntImmediate(N: Op0, Opc: ISD::SRL, Imm&: SrlImm)) {
2496	Opd0 = Op0->getOperand(Num: `0`);
2497	ClampMSB = (VT == MVT::i32);
2498	} else if (BiggerPattern) {
2499	// Let's pretend a 0 shift right has been performed.
2500	// The resulting code will be at least as good as the original one
2501	// plus it may expose more opportunities for bitfield insert pattern.
2502	// FIXME: Currently we limit this to the bigger pattern, because
2503	// some optimizations expect AND and not UBFM.
2504	Opd0 = N->getOperand(Num: `0`);
2505	} else
2506	return false;
2507
2508	// Bail out on large immediates. This happens when no proper
2509	// combining/constant folding was performed.
2510	if (!BiggerPattern && (SrlImm <= `0` \|\| SrlImm >= VT.getSizeInBits())) {
2511	LLVM_DEBUG(
2512	(dbgs() << N
2513	<< ": Found large shift immediate, this should not happen\n"));
2514	return false;
2515	}
2516
2517	LSB = SrlImm;
2518	MSB = SrlImm +
2519	(VT == MVT::i32 ? llvm::countr_one<uint32_t>(Value: AndImm)
2520	: llvm::countr_one<uint64_t>(Value: AndImm)) -
2521	`1`;
2522	if (ClampMSB)
2523	// Since we're moving the extend before the right shift operation, we need
2524	// to clamp the MSB to make sure we don't shift in undefined bits instead of
2525	// the zeros which would get shifted in with the original right shift
2526	// operation.
2527	MSB = MSB > `31` ? `31` : MSB;
2528
2529	Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2530	return true;
2531	}
2532
2533	static bool isBitfieldExtractOpFromSExtInReg(SDNode N, unsigned* &Opc,
2534	SDValue &Opd0, unsigned &Immr,
2535	unsigned &Imms) {
2536	assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2537
2538	EVT VT = N->getValueType(ResNo: `0`);
2539	unsigned BitWidth = VT.getSizeInBits();
2540	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
2541	"Type checking must have been done before calling this function");
2542
2543	SDValue Op = N->getOperand(Num: `0`);
2544	if (Op ->getOpcode() == ISD::TRUNCATE) {
2545	Op = Op ->getOperand(Num: `0`);
2546	VT = Op ->getValueType(ResNo: `0`);
2547	BitWidth = VT.getSizeInBits();
2548	}
2549
2550	uint64_t ShiftImm;
2551	if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRL, Imm&: ShiftImm) &&
2552	!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRA, Imm&: ShiftImm))
2553	return false;
2554
2555	unsigned Width = cast<VTSDNode>(Val: N->getOperand(Num: `1`))->getVT().getSizeInBits();
2556	if (ShiftImm + Width > BitWidth)
2557	return false;
2558
2559	Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2560	Opd0 = Op.getOperand(i: `0`);
2561	Immr = ShiftImm;
2562	Imms = ShiftImm + Width - `1`;
2563	return true;
2564	}
2565
2566	static bool isSeveralBitsExtractOpFromShr(SDNode N, unsigned* &Opc,
2567	SDValue &Opd0, unsigned &LSB,
2568	unsigned &MSB) {
2569	// We are looking for the following pattern which basically extracts several
2570	// continuous bits from the source value and places it from the LSB of the
2571	// destination value, all other bits of the destination value or set to zero:
2572	//
2573	// Value2 = AND Value, MaskImm
2574	// SRL Value2, ShiftImm
2575	//
2576	// with MaskImm >> ShiftImm to search for the bit width.
2577	//
2578	// This gets selected into a single UBFM:
2579	//
2580	// UBFM Value, ShiftImm, Log2_64(MaskImm)
2581	//
2582
2583	if (N->getOpcode() != ISD::SRL)
2584	return false;
2585
2586	uint64_t AndMask = `0`;
2587	if (!isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::AND, Imm&: AndMask))
2588	return false;
2589
2590	Opd0 = N->getOperand(Num: `0`).getOperand(i: `0`);
2591
2592	uint64_t SrlImm = `0`;
2593	if (!isIntImmediate(N: N->getOperand(Num: `1`), Imm&: SrlImm))
2594	return false;
2595
2596	// Check whether we really have several bits extract here.
2597	if (!isMask_64(Value: AndMask >> SrlImm))
2598	return false;
2599
2600	Opc = N->getValueType(ResNo: `0`) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2601	LSB = SrlImm;
2602	MSB = llvm::Log2_64(Value: AndMask);
2603	return true;
2604	}
2605
2606	static bool isBitfieldExtractOpFromShr(SDNode N, unsigned* &Opc, SDValue &Opd0,
2607	unsigned &Immr, unsigned &Imms,
2608	bool BiggerPattern) {
2609	assert((N->getOpcode() == ISD::SRA \|\| N->getOpcode() == ISD::SRL) &&
2610	"N must be a SHR/SRA operation to call this function");
2611
2612	EVT VT = N->getValueType(ResNo: `0`);
2613
2614	// Here we can test the type of VT and return false when the type does not
2615	// match, but since it is done prior to that call in the current context
2616	// we turned that into an assert to avoid redundant code.
2617	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
2618	"Type checking must have been done before calling this function");
2619
2620	// Check for AND + SRL doing several bits extract.
2621	if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, LSB&: Immr, MSB&: Imms))
2622	return true;
2623
2624	// We're looking for a shift of a shift.
2625	uint64_t ShlImm = `0`;
2626	uint64_t TruncBits = `0`;
2627	if (isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
2628	Opd0 = N->getOperand(Num: `0`).getOperand(i: `0`);
2629	} else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2630	N->getOperand(Num: `0`).getNode()->getOpcode() == ISD::TRUNCATE) {
2631	// We are looking for a shift of truncate. Truncate from i64 to i32 could
2632	// be considered as setting high 32 bits as zero. Our strategy here is to
2633	// always generate 64bit UBFM. This consistency will help the CSE pass
2634	// later find more redundancy.
2635	Opd0 = N->getOperand(Num: `0`).getOperand(i: `0`);
2636	TruncBits = Opd0 ->getValueType(ResNo: `0`).getSizeInBits() - VT.getSizeInBits();
2637	VT = Opd0.getValueType();
2638	assert(VT == MVT::i64 && "the promoted type should be i64");
2639	} else if (BiggerPattern) {
2640	// Let's pretend a 0 shift left has been performed.
2641	// FIXME: Currently we limit this to the bigger pattern case,
2642	// because some optimizations expect AND and not UBFM
2643	Opd0 = N->getOperand(Num: `0`);
2644	} else
2645	return false;
2646
2647	// Missing combines/constant folding may have left us with strange
2648	// constants.
2649	if (ShlImm >= VT.getSizeInBits()) {
2650	LLVM_DEBUG(
2651	(dbgs() << N
2652	<< ": Found large shift immediate, this should not happen\n"));
2653	return false;
2654	}
2655
2656	uint64_t SrlImm = `0`;
2657	if (!isIntImmediate(N: N->getOperand(Num: `1`), Imm&: SrlImm))
2658	return false;
2659
2660	assert(SrlImm > `0` && SrlImm < VT.getSizeInBits() &&
2661	"bad amount in shift node!");
2662	int immr = SrlImm - ShlImm;
2663	Immr = immr < `0` ? immr + VT.getSizeInBits() : immr;
2664	Imms = VT.getSizeInBits() - ShlImm - TruncBits - `1`;
2665	// SRA requires a signed extraction
2666	if (VT == MVT::i32)
2667	Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2668	else
2669	Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2670	return true;
2671	}
2672
2673	bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2674	assert(N->getOpcode() == ISD::SIGN_EXTEND);
2675
2676	EVT VT = N->getValueType(ResNo: `0`);
2677	EVT NarrowVT = N->getOperand(Num: `0`)->getValueType(ResNo: `0`);
2678	if (VT != MVT::i64 \|\| NarrowVT != MVT::i32)
2679	return false;
2680
2681	uint64_t ShiftImm;
2682	SDValue Op = N->getOperand(Num: `0`);
2683	if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRA, Imm&: ShiftImm))
2684	return false;
2685
2686	SDLoc dl(N);
2687	// Extend the incoming operand of the shift to 64-bits.
2688	SDValue Opd0 = Widen(CurDAG, N: Op.getOperand(i: `0`));
2689	unsigned Immr = ShiftImm;
2690	unsigned Imms = NarrowVT.getSizeInBits() - `1`;
2691	SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Val: Immr, DL: dl, VT),
2692	CurDAG->getTargetConstant(Val: Imms, DL: dl, VT)};
2693	CurDAG->SelectNodeTo(N, MachineOpc: AArch64::SBFMXri, VT, Ops);
2694	return true;
2695	}
2696
2697	static bool isBitfieldExtractOp(SelectionDAG CurDAG, SDNode N, unsigned &Opc,
2698	SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2699	unsigned NumberOfIgnoredLowBits = `0`,
2700	bool BiggerPattern = false) {
2701	if (N->getValueType(ResNo: `0`) != MVT::i32 && N->getValueType(ResNo: `0`) != MVT::i64)
2702	return false;
2703
2704	switch (N->getOpcode()) {
2705	default:
2706	if (!N->isMachineOpcode())
2707	return false;
2708	break;
2709	case ISD::AND:
2710	return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB&: Immr, MSB&: Imms,
2711	NumberOfIgnoredLowBits, BiggerPattern);
2712	case ISD::SRL:
2713	case ISD::SRA:
2714	return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2715
2716	case ISD::SIGN_EXTEND_INREG:
2717	return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2718	}
2719
2720	unsigned NOpc = N->getMachineOpcode();
2721	switch (NOpc) {
2722	default:
2723	return false;
2724	case AArch64::SBFMWri:
2725	case AArch64::UBFMWri:
2726	case AArch64::SBFMXri:
2727	case AArch64::UBFMXri:
2728	Opc = NOpc;
2729	Opd0 = N->getOperand(Num: `0`);
2730	Immr = N->getConstantOperandVal(Num: `1`);
2731	Imms = N->getConstantOperandVal(Num: `2`);
2732	return true;
2733	}
2734	// Unreachable
2735	return false;
2736	}
2737
2738	bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2739	unsigned Opc, Immr, Imms;
2740	SDValue Opd0;
2741	if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2742	return false;
2743
2744	EVT VT = N->getValueType(ResNo: `0`);
2745	SDLoc dl(N);
2746
2747	// If the bit extract operation is 64bit but the original type is 32bit, we
2748	// need to add one EXTRACT_SUBREG.
2749	if ((Opc == AArch64::SBFMXri \|\| Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2750	SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Val: Immr, DL: dl, VT: MVT::i64),
2751	CurDAG->getTargetConstant(Val: Imms, DL: dl, VT: MVT::i64)};
2752
2753	SDNode *BFM = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i64, Ops: Ops64);
2754	SDValue Inner = CurDAG->getTargetExtractSubreg(SRIdx: AArch64::sub_32, DL: dl,
2755	VT: MVT::i32, Operand: SDValue (BFM, `0`));
2756	ReplaceNode(F: N, T: Inner.getNode());
2757	return true;
2758	}
2759
2760	SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Val: Immr, DL: dl, VT),
2761	CurDAG->getTargetConstant(Val: Imms, DL: dl, VT)};
2762	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
2763	return true;
2764	}
2765
2766	/// Does DstMask form a complementary pair with the mask provided by
2767	/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2768	/// this asks whether DstMask zeroes precisely those bits that will be set by
2769	/// the other half.
2770	static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2771	unsigned NumberOfIgnoredHighBits, EVT VT) {
2772	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
2773	"i32 or i64 mask type expected!");
2774	unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2775
2776	APInt SignificantDstMask = APInt (BitWidth, DstMask);
2777	APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(width: BitWidth);
2778
2779	return (SignificantDstMask & SignificantBitsToBeInserted) == `0` &&
2780	(SignificantDstMask \| SignificantBitsToBeInserted).isAllOnes();
2781	}
2782
2783	// Look for bits that will be useful for later uses.
2784	// A bit is consider useless as soon as it is dropped and never used
2785	// before it as been dropped.
2786	// E.g., looking for useful bit of x
2787	// 1. y = x & 0x7
2788	// 2. z = y >> 2
2789	// After #1, x useful bits are 0x7, then the useful bits of x, live through
2790	// y.
2791	// After #2, the useful bits of x are 0x4.
2792	// However, if x is used on an unpredicatable instruction, then all its bits
2793	// are useful.
2794	// E.g.
2795	// 1. y = x & 0x7
2796	// 2. z = y >> 2
2797	// 3. str x, [@x]
2798	static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = `0`);
2799
2800	static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
2801	unsigned Depth) {
2802	uint64_t Imm =
2803	cast<const ConstantSDNode>(Val: Op.getOperand(i: `1`).getNode())->getZExtValue();
2804	Imm = AArch64_AM::decodeLogicalImmediate(val: Imm, regSize: UsefulBits.getBitWidth());
2805	UsefulBits &= APInt (UsefulBits.getBitWidth(), Imm);
2806	getUsefulBits(Op, UsefulBits, Depth: Depth + `1`);
2807	}
2808
2809	static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
2810	uint64_t Imm, uint64_t MSB,
2811	unsigned Depth) {
2812	// inherit the bitwidth value
2813	APInt OpUsefulBits(UsefulBits);
2814	OpUsefulBits = `1`;
2815
2816	if (MSB >= Imm) {
2817	OpUsefulBits <<= MSB - Imm + `1`;
2818	--OpUsefulBits;
2819	// The interesting part will be in the lower part of the result
2820	getUsefulBits(Op, UsefulBits&: OpUsefulBits, Depth: Depth + `1`);
2821	// The interesting part was starting at Imm in the argument
2822	OpUsefulBits <<= Imm;
2823	} else {
2824	OpUsefulBits <<= MSB + `1`;
2825	--OpUsefulBits;
2826	// The interesting part will be shifted in the result
2827	OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2828	getUsefulBits(Op, UsefulBits&: OpUsefulBits, Depth: Depth + `1`);
2829	// The interesting part was at zero in the argument
2830	OpUsefulBits.lshrInPlace(ShiftAmt: OpUsefulBits.getBitWidth() - Imm);
2831	}
2832
2833	UsefulBits &= OpUsefulBits;
2834	}
2835
2836	static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2837	unsigned Depth) {
2838	uint64_t Imm =
2839	cast<const ConstantSDNode>(Val: Op.getOperand(i: `1`).getNode())->getZExtValue();
2840	uint64_t MSB =
2841	cast<const ConstantSDNode>(Val: Op.getOperand(i: `2`).getNode())->getZExtValue();
2842
2843	getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2844	}
2845
2846	static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
2847	unsigned Depth) {
2848	uint64_t ShiftTypeAndValue =
2849	cast<const ConstantSDNode>(Val: Op.getOperand(i: `2`).getNode())->getZExtValue();
2850	APInt Mask(UsefulBits);
2851	Mask.clearAllBits();
2852	Mask.flipAllBits();
2853
2854	if (AArch64_AM::getShiftType(Imm: ShiftTypeAndValue) == AArch64_AM::LSL) {
2855	// Shift Left
2856	uint64_t ShiftAmt = AArch64_AM::getShiftValue(Imm: ShiftTypeAndValue);
2857	Mask <<= ShiftAmt;
2858	getUsefulBits(Op, UsefulBits&: Mask, Depth: Depth + `1`);
2859	Mask.lshrInPlace(ShiftAmt);
2860	} else if (AArch64_AM::getShiftType(Imm: ShiftTypeAndValue) == AArch64_AM::LSR) {
2861	// Shift Right
2862	// We do not handle AArch64_AM::ASR, because the sign will change the
2863	// number of useful bits
2864	uint64_t ShiftAmt = AArch64_AM::getShiftValue(Imm: ShiftTypeAndValue);
2865	Mask.lshrInPlace(ShiftAmt);
2866	getUsefulBits(Op, UsefulBits&: Mask, Depth: Depth + `1`);
2867	Mask <<= ShiftAmt;
2868	} else
2869	return;
2870
2871	UsefulBits &= Mask;
2872	}
2873
2874	static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2875	unsigned Depth) {
2876	uint64_t Imm =
2877	cast<const ConstantSDNode>(Val: Op.getOperand(i: `2`).getNode())->getZExtValue();
2878	uint64_t MSB =
2879	cast<const ConstantSDNode>(Val: Op.getOperand(i: `3`).getNode())->getZExtValue();
2880
2881	APInt OpUsefulBits(UsefulBits);
2882	OpUsefulBits = `1`;
2883
2884	APInt ResultUsefulBits(UsefulBits.getBitWidth(), `0`);
2885	ResultUsefulBits.flipAllBits();
2886	APInt Mask(UsefulBits.getBitWidth(), `0`);
2887
2888	getUsefulBits(Op, UsefulBits&: ResultUsefulBits, Depth: Depth + `1`);
2889
2890	if (MSB >= Imm) {
2891	// The instruction is a BFXIL.
2892	uint64_t Width = MSB - Imm + `1`;
2893	uint64_t LSB = Imm;
2894
2895	OpUsefulBits <<= Width;
2896	--OpUsefulBits;
2897
2898	if (Op.getOperand(i: `1`) == Orig) {
2899	// Copy the low bits from the result to bits starting from LSB.
2900	Mask = ResultUsefulBits & OpUsefulBits;
2901	Mask <<= LSB;
2902	}
2903
2904	if (Op.getOperand(i: `0`) == Orig)
2905	// Bits starting from LSB in the input contribute to the result.
2906	Mask \|= (ResultUsefulBits & ~OpUsefulBits);
2907	} else {
2908	// The instruction is a BFI.
2909	uint64_t Width = MSB + `1`;
2910	uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2911
2912	OpUsefulBits <<= Width;
2913	--OpUsefulBits;
2914	OpUsefulBits <<= LSB;
2915
2916	if (Op.getOperand(i: `1`) == Orig) {
2917	// Copy the bits from the result to the zero bits.
2918	Mask = ResultUsefulBits & OpUsefulBits;
2919	Mask.lshrInPlace(ShiftAmt: LSB);
2920	}
2921
2922	if (Op.getOperand(i: `0`) == Orig)
2923	Mask \|= (ResultUsefulBits & ~OpUsefulBits);
2924	}
2925
2926	UsefulBits &= Mask;
2927	}
2928
2929	static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2930	SDValue Orig, unsigned Depth) {
2931
2932	// Users of this node should have already been instruction selected
2933	// FIXME: Can we turn that into an assert?
2934	if (!UserNode->isMachineOpcode())
2935	return;
2936
2937	switch (UserNode->getMachineOpcode()) {
2938	default:
2939	return;
2940	case AArch64::ANDSWri:
2941	case AArch64::ANDSXri:
2942	case AArch64::ANDWri:
2943	case AArch64::ANDXri:
2944	// We increment Depth only when we call the getUsefulBits
2945	return getUsefulBitsFromAndWithImmediate(Op: SDValue (UserNode, `0`), UsefulBits,
2946	Depth);
2947	case AArch64::UBFMWri:
2948	case AArch64::UBFMXri:
2949	return getUsefulBitsFromUBFM(Op: SDValue (UserNode, `0`), UsefulBits, Depth);
2950
2951	case AArch64::ORRWrs:
2952	case AArch64::ORRXrs:
2953	if (UserNode->getOperand(Num: `0`) != Orig && UserNode->getOperand(Num: `1`) == Orig)
2954	getUsefulBitsFromOrWithShiftedReg(Op: SDValue (UserNode, `0`), UsefulBits,
2955	Depth);
2956	return;
2957	case AArch64::BFMWri:
2958	case AArch64::BFMXri:
2959	return getUsefulBitsFromBFM(Op: SDValue (UserNode, `0`), Orig, UsefulBits, Depth);
2960
2961	case AArch64::STRBBui:
2962	case AArch64::STURBBi:
2963	if (UserNode->getOperand(Num: `0`) != Orig)
2964	return;
2965	UsefulBits &= APInt (UsefulBits.getBitWidth(), `0xff`);
2966	return;
2967
2968	case AArch64::STRHHui:
2969	case AArch64::STURHHi:
2970	if (UserNode->getOperand(Num: `0`) != Orig)
2971	return;
2972	UsefulBits &= APInt (UsefulBits.getBitWidth(), `0xffff`);
2973	return;
2974	}
2975	}
2976
2977	static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2978	if (Depth >= SelectionDAG::MaxRecursionDepth)
2979	return;
2980	// Initialize UsefulBits
2981	if (!Depth) {
2982	unsigned Bitwidth = Op.getScalarValueSizeInBits();
2983	// At the beginning, assume every produced bits is useful
2984	UsefulBits = APInt (Bitwidth, `0`);
2985	UsefulBits.flipAllBits();
2986	}
2987	APInt UsersUsefulBits(UsefulBits.getBitWidth(), `0`);
2988
2989	for (SDNode *Node : Op.getNode()->uses()) {
2990	// A use cannot produce useful bits
2991	APInt UsefulBitsForUse = APInt (UsefulBits);
2992	getUsefulBitsForUse(UserNode: Node, UsefulBits&: UsefulBitsForUse, Orig: Op, Depth);
2993	UsersUsefulBits \|= UsefulBitsForUse;
2994	}
2995	// UsefulBits contains the produced bits that are meaningful for the
2996	// current definition, thus a user cannot make a bit meaningful at
2997	// this point
2998	UsefulBits &= UsersUsefulBits;
2999	}
3000
3001	/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3002	/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3003	/// 0, return Op unchanged.
3004	static SDValue getLeftShift(SelectionDAG CurDAG, SDValue Op, int* ShlAmount) {
3005	if (ShlAmount == `0`)
3006	return Op;
3007
3008	EVT VT = Op.getValueType();
3009	SDLoc dl(Op);
3010	unsigned BitWidth = VT.getSizeInBits();
3011	unsigned UBFMOpc = BitWidth == `32` ? AArch64::UBFMWri : AArch64::UBFMXri;
3012
3013	SDNode *ShiftNode;
3014	if (ShlAmount > `0`) {
3015	// LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3016	ShiftNode = CurDAG->getMachineNode(
3017	Opcode: UBFMOpc, dl, VT, Op1: Op,
3018	Op2: CurDAG->getTargetConstant(Val: BitWidth - ShlAmount, DL: dl, VT),
3019	Op3: CurDAG->getTargetConstant(Val: BitWidth - `1` - ShlAmount, DL: dl, VT));
3020	} else {
3021	// LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3022	assert(ShlAmount < `0` && "expected right shift");
3023	int ShrAmount = -ShlAmount;
3024	ShiftNode = CurDAG->getMachineNode(
3025	Opcode: UBFMOpc, dl, VT, Op1: Op, Op2: CurDAG->getTargetConstant(Val: ShrAmount, DL: dl, VT),
3026	Op3: CurDAG->getTargetConstant(Val: BitWidth - `1`, DL: dl, VT));
3027	}
3028
3029	return SDValue (ShiftNode, `0`);
3030	}
3031
3032	// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3033	static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3034	bool BiggerPattern,
3035	const uint64_t NonZeroBits,
3036	SDValue &Src, int &DstLSB,
3037	int &Width);
3038
3039	// For bit-field-positioning pattern "shl VAL, N)".
3040	static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3041	bool BiggerPattern,
3042	const uint64_t NonZeroBits,
3043	SDValue &Src, int &DstLSB,
3044	int &Width);
3045
3046	/// Does this tree qualify as an attempt to move a bitfield into position,
3047	/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3048	static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
3049	bool BiggerPattern, SDValue &Src,
3050	int &DstLSB, int &Width) {
3051	EVT VT = Op.getValueType();
3052	unsigned BitWidth = VT.getSizeInBits();
3053	(void)BitWidth;
3054	assert(BitWidth == `32` \|\| BitWidth == `64`);
3055
3056	KnownBits Known = CurDAG->computeKnownBits(Op);
3057
3058	// Non-zero in the sense that they're not provably zero, which is the key
3059	// point if we want to use this value
3060	const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3061	if (!isShiftedMask_64(Value: NonZeroBits))
3062	return false;
3063
3064	switch (Op.getOpcode()) {
3065	default:
3066	break;
3067	case ISD::AND:
3068	return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3069	NonZeroBits, Src, DstLSB, Width);
3070	case ISD::SHL:
3071	return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3072	NonZeroBits, Src, DstLSB, Width);
3073	}
3074
3075	return false;
3076	}
3077
3078	static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3079	bool BiggerPattern,
3080	const uint64_t NonZeroBits,
3081	SDValue &Src, int &DstLSB,
3082	int &Width) {
3083	assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3084
3085	EVT VT = Op.getValueType();
3086	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
3087	"Caller guarantees VT is one of i32 or i64");
3088	(void)VT;
3089
3090	uint64_t AndImm;
3091	if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::AND, Imm&: AndImm))
3092	return false;
3093
3094	// If (~AndImm & NonZeroBits) is not zero at POS, we know that
3095	// 1) (AndImm & (1 << POS) == 0)
3096	// 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3097	//
3098	// 1) and 2) don't agree so something must be wrong (e.g., in
3099	// 'SelectionDAG::computeKnownBits')
3100	assert((~AndImm & NonZeroBits) == `0` &&
3101	"Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3102
3103	SDValue AndOp0 = Op.getOperand(i: `0`);
3104
3105	uint64_t ShlImm;
3106	SDValue ShlOp0;
3107	if (isOpcWithIntImmediate(N: AndOp0.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
3108	// For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3109	ShlOp0 = AndOp0.getOperand(i: `0`);
3110	} else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3111	isOpcWithIntImmediate(N: AndOp0.getOperand(i: `0`).getNode(), Opc: ISD::SHL,
3112	Imm&: ShlImm)) {
3113	// For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3114
3115	// ShlVal == shl(val, N), which is a left shift on a smaller type.
3116	SDValue ShlVal = AndOp0.getOperand(i: `0`);
3117
3118	// Since this is after type legalization and ShlVal is extended to MVT::i64,
3119	// expect VT to be MVT::i32.
3120	assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3121
3122	// Widens 'val' to MVT::i64 as the source of bit field positioning.
3123	ShlOp0 = Widen(CurDAG, N: ShlVal.getOperand(i: `0`));
3124	} else
3125	return false;
3126
3127	// For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3128	// then we'll end up generating AndOp0+UBFIZ instead of just keeping
3129	// AndOp0+AND.
3130	if (!BiggerPattern && !AndOp0.hasOneUse())
3131	return false;
3132
3133	DstLSB = llvm::countr_zero(Val: NonZeroBits);
3134	Width = llvm::countr_one(Value: NonZeroBits >> DstLSB);
3135
3136	// Bail out on large Width. This happens when no proper combining / constant
3137	// folding was performed.
3138	if (Width >= (int)VT.getSizeInBits()) {
3139	// If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3140	// Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3141	// "val".
3142	// If VT is i32, what Width >= 32 means:
3143	// - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3144	// demands at least 'Width' bits (after dag-combiner). This together with
3145	// `any_extend` Op (undefined higher bits) indicates missed combination
3146	// when lowering the 'and' IR instruction to an machine IR instruction.
3147	LLVM_DEBUG(
3148	dbgs()
3149	<< "Found large Width in bit-field-positioning -- this indicates no "
3150	"proper combining / constant folding was performed\n");
3151	return false;
3152	}
3153
3154	// BFI encompasses sufficiently many nodes that it's worth inserting an extra
3155	// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3156	// amount. BiggerPattern is true when this pattern is being matched for BFI,
3157	// BiggerPattern is false when this pattern is being matched for UBFIZ, in
3158	// which case it is not profitable to insert an extra shift.
3159	if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3160	return false;
3161
3162	Src = getLeftShift(CurDAG, Op: ShlOp0, ShlAmount: ShlImm - DstLSB);
3163	return true;
3164	}
3165
3166	// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3167	// UBFIZ.
3168	static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op,
3169	SDValue &Src, int &DstLSB,
3170	int &Width) {
3171	// Caller should have verified that N is a left shift with constant shift
3172	// amount; asserts that.
3173	assert(Op.getOpcode() == ISD::SHL &&
3174	"Op.getNode() should be a SHL node to call this function");
3175	assert(isIntImmediateEq(Op.getOperand(`1`), ShlImm) &&
3176	"Op.getNode() should shift ShlImm to call this function");
3177
3178	uint64_t AndImm = `0`;
3179	SDValue Op0 = Op.getOperand(i: `0`);
3180	if (!isOpcWithIntImmediate(N: Op0.getNode(), Opc: ISD::AND, Imm&: AndImm))
3181	return false;
3182
3183	const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3184	if (isMask_64(Value: ShiftedAndImm)) {
3185	// AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3186	// should end with Mask, and could be prefixed with random bits if those
3187	// bits are shifted out.
3188	//
3189	// For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3190	// the AND result corresponding to those bits are shifted out, so it's fine
3191	// to not extract them.
3192	Width = llvm::countr_one(Value: ShiftedAndImm);
3193	DstLSB = ShlImm;
3194	Src = Op0.getOperand(i: `0`);
3195	return true;
3196	}
3197	return false;
3198	}
3199
3200	static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3201	bool BiggerPattern,
3202	const uint64_t NonZeroBits,
3203	SDValue &Src, int &DstLSB,
3204	int &Width) {
3205	assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3206
3207	EVT VT = Op.getValueType();
3208	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
3209	"Caller guarantees that type is i32 or i64");
3210	(void)VT;
3211
3212	uint64_t ShlImm;
3213	if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SHL, Imm&: ShlImm))
3214	return false;
3215
3216	if (!BiggerPattern && !Op.hasOneUse())
3217	return false;
3218
3219	if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3220	return true;
3221
3222	DstLSB = llvm::countr_zero(Val: NonZeroBits);
3223	Width = llvm::countr_one(Value: NonZeroBits >> DstLSB);
3224
3225	if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3226	return false;
3227
3228	Src = getLeftShift(CurDAG, Op: Op.getOperand(i: `0`), ShlAmount: ShlImm - DstLSB);
3229	return true;
3230	}
3231
3232	static bool isShiftedMask(uint64_t Mask, EVT VT) {
3233	assert(VT == MVT::i32 \|\| VT == MVT::i64);
3234	if (VT == MVT::i32)
3235	return isShiftedMask_32(Value: Mask);
3236	return isShiftedMask_64(Value: Mask);
3237	}
3238
3239	// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3240	// inserted only sets known zero bits.
3241	static bool tryBitfieldInsertOpFromOrAndImm(SDNode N, SelectionDAG CurDAG) {
3242	assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3243
3244	EVT VT = N->getValueType(ResNo: `0`);
3245	if (VT != MVT::i32 && VT != MVT::i64)
3246	return false;
3247
3248	unsigned BitWidth = VT.getSizeInBits();
3249
3250	uint64_t OrImm;
3251	if (!isOpcWithIntImmediate(N, Opc: ISD::OR, Imm&: OrImm))
3252	return false;
3253
3254	// Skip this transformation if the ORR immediate can be encoded in the ORR.
3255	// Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3256	// performance neutral.
3257	if (AArch64_AM::isLogicalImmediate(imm: OrImm, regSize: BitWidth))
3258	return false;
3259
3260	uint64_t MaskImm;
3261	SDValue And = N->getOperand(Num: `0`);
3262	// Must be a single use AND with an immediate operand.
3263	if (!And.hasOneUse() \|\|
3264	!isOpcWithIntImmediate(N: And.getNode(), Opc: ISD::AND, Imm&: MaskImm))
3265	return false;
3266
3267	// Compute the Known Zero for the AND as this allows us to catch more general
3268	// cases than just looking for AND with imm.
3269	KnownBits Known = CurDAG->computeKnownBits(Op: And);
3270
3271	// Non-zero in the sense that they're not provably zero, which is the key
3272	// point if we want to use this value.
3273	uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3274
3275	// The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3276	if (!isShiftedMask(Mask: Known.Zero.getZExtValue(), VT))
3277	return false;
3278
3279	// The bits being inserted must only set those bits that are known to be zero.
3280	if ((OrImm & NotKnownZero) != `0`) {
3281	// FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3282	// currently handle this case.
3283	return false;
3284	}
3285
3286	// BFI/BFXIL dst, src, #lsb, #width.
3287	int LSB = llvm::countr_one(Value: NotKnownZero);
3288	int Width = BitWidth - APInt (BitWidth, NotKnownZero).popcount();
3289
3290	// BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3291	unsigned ImmR = (BitWidth - LSB) % BitWidth;
3292	unsigned ImmS = Width - `1`;
3293
3294	// If we're creating a BFI instruction avoid cases where we need more
3295	// instructions to materialize the BFI constant as compared to the original
3296	// ORR. A BFXIL will use the same constant as the original ORR, so the code
3297	// should be no worse in this case.
3298	bool IsBFI = LSB != `0`;
3299	uint64_t BFIImm = OrImm >> LSB;
3300	if (IsBFI && !AArch64_AM::isLogicalImmediate(imm: BFIImm, regSize: BitWidth)) {
3301	// We have a BFI instruction and we know the constant can't be materialized
3302	// with a ORR-immediate with the zero register.
3303	unsigned OrChunks = `0`, BFIChunks = `0`;
3304	for (unsigned Shift = `0`; Shift < BitWidth; Shift += `16`) {
3305	if (((OrImm >> Shift) & `0xFFFF`) != `0`)
3306	++OrChunks;
3307	if (((BFIImm >> Shift) & `0xFFFF`) != `0`)
3308	++BFIChunks;
3309	}
3310	if (BFIChunks > OrChunks)
3311	return false;
3312	}
3313
3314	// Materialize the constant to be inserted.
3315	SDLoc DL(N);
3316	unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3317	SDNode *MOVI = CurDAG->getMachineNode(
3318	Opcode: MOVIOpc, dl: DL, VT, Op1: CurDAG->getTargetConstant(Val: BFIImm, DL, VT));
3319
3320	// Create the BFI/BFXIL instruction.
3321	SDValue Ops[] = {And.getOperand(i: `0`), SDValue (MOVI, `0`),
3322	CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3323	CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3324	unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3325	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3326	return true;
3327	}
3328
3329	static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG,
3330	SDValue &ShiftedOperand,
3331	uint64_t &EncodedShiftImm) {
3332	// Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3333	if (!Dst.hasOneUse())
3334	return false;
3335
3336	EVT VT = Dst.getValueType();
3337	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
3338	"Caller should guarantee that VT is one of i32 or i64");
3339	const unsigned SizeInBits = VT.getSizeInBits();
3340
3341	SDLoc DL(Dst.getNode());
3342	uint64_t AndImm, ShlImm;
3343	if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::AND, Imm&: AndImm) &&
3344	isShiftedMask_64(Value: AndImm)) {
3345	// Avoid transforming 'DstOp0' if it has other uses than the AND node.
3346	SDValue DstOp0 = Dst.getOperand(i: `0`);
3347	if (!DstOp0.hasOneUse())
3348	return false;
3349
3350	// An example to illustrate the transformation
3351	// From:
3352	// lsr x8, x1, #1
3353	// and x8, x8, #0x3f80
3354	// bfxil x8, x1, #0, #7
3355	// To:
3356	// and x8, x23, #0x7f
3357	// ubfx x9, x23, #8, #7
3358	// orr x23, x8, x9, lsl #7
3359	//
3360	// The number of instructions remains the same, but ORR is faster than BFXIL
3361	// on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3362	// the dependency chain is improved after the transformation.
3363	uint64_t SrlImm;
3364	if (isOpcWithIntImmediate(N: DstOp0.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3365	uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(Val: AndImm);
3366	if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3367	unsigned MaskWidth =
3368	llvm::countr_one(Value: AndImm >> NumTrailingZeroInShiftedMask);
3369	unsigned UBFMOpc =
3370	(VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3371	SDNode *UBFMNode = CurDAG->getMachineNode(
3372	Opcode: UBFMOpc, dl: DL, VT, Op1: DstOp0.getOperand(i: `0`),
3373	Op2: CurDAG->getTargetConstant(Val: SrlImm + NumTrailingZeroInShiftedMask, DL,
3374	VT),
3375	Op3: CurDAG->getTargetConstant(
3376	Val: SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - `1`, DL, VT));
3377	ShiftedOperand = SDValue (UBFMNode, `0`);
3378	EncodedShiftImm = AArch64_AM::getShifterImm(
3379	ST: AArch64_AM::LSL, Imm: NumTrailingZeroInShiftedMask);
3380	return true;
3381	}
3382	}
3383	return false;
3384	}
3385
3386	if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
3387	ShiftedOperand = Dst.getOperand(i: `0`);
3388	EncodedShiftImm = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm);
3389	return true;
3390	}
3391
3392	uint64_t SrlImm;
3393	if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3394	ShiftedOperand = Dst.getOperand(i: `0`);
3395	EncodedShiftImm = AArch64_AM::getShifterImm(ST: AArch64_AM::LSR, Imm: SrlImm);
3396	return true;
3397	}
3398	return false;
3399	}
3400
3401	// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3402	// the operands and select it to AArch64::ORR with shifted registers if
3403	// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3404	static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3405	SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3406	const bool BiggerPattern) {
3407	EVT VT = N->getValueType(ResNo: `0`);
3408	assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3409	assert(((N->getOperand(`0`) == OrOpd0 && N->getOperand(`1`) == OrOpd1) \|\|
3410	(N->getOperand(`1`) == OrOpd0 && N->getOperand(`0`) == OrOpd1)) &&
3411	"Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3412	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
3413	"Expect result type to be i32 or i64 since N is combinable to BFM");
3414	SDLoc DL(N);
3415
3416	// Bail out if BFM simplifies away one node in BFM Dst.
3417	if (OrOpd1 != Dst)
3418	return false;
3419
3420	const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3421	// For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3422	// nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3423	if (BiggerPattern) {
3424	uint64_t SrcAndImm;
3425	if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::AND, Imm&: SrcAndImm) &&
3426	isMask_64(Value: SrcAndImm) && OrOpd0.getOperand(i: `0`) == Src) {
3427	// OrOpd0 = AND Src, #Mask
3428	// So BFM simplifies away one AND node from Src and doesn't simplify away
3429	// nodes from Dst. If ORR with left-shifted operand also simplifies away
3430	// one node (from Rd), ORR is better since it has higher throughput and
3431	// smaller latency than BFM on many AArch64 processors (and for the rest
3432	// ORR is at least as good as BFM).
3433	SDValue ShiftedOperand;
3434	uint64_t EncodedShiftImm;
3435	if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3436	EncodedShiftImm)) {
3437	SDValue Ops[] = {OrOpd0, ShiftedOperand,
3438	CurDAG->getTargetConstant(Val: EncodedShiftImm, DL, VT)};
3439	CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3440	return true;
3441	}
3442	}
3443	return false;
3444	}
3445
3446	assert((!BiggerPattern) && "BiggerPattern should be handled above");
3447
3448	uint64_t ShlImm;
3449	if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
3450	if (OrOpd0.getOperand(i: `0`) == Src && OrOpd0.hasOneUse()) {
3451	SDValue Ops[] = {
3452	Dst, Src,
3453	CurDAG->getTargetConstant(
3454	Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm), DL, VT)};
3455	CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3456	return true;
3457	}
3458
3459	// Select the following pattern to left-shifted operand rather than BFI.
3460	// %val1 = op ..
3461	// %val2 = shl %val1, #imm
3462	// %res = or %val1, %val2
3463	//
3464	// If N is selected to be BFI, we know that
3465	// 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3466	// BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3467	//
3468	// Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3469	if (OrOpd0.getOperand(i: `0`) == OrOpd1) {
3470	SDValue Ops[] = {
3471	OrOpd1, OrOpd1,
3472	CurDAG->getTargetConstant(
3473	Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm), DL, VT)};
3474	CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3475	return true;
3476	}
3477	}
3478
3479	uint64_t SrlImm;
3480	if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3481	// Select the following pattern to right-shifted operand rather than BFXIL.
3482	// %val1 = op ..
3483	// %val2 = lshr %val1, #imm
3484	// %res = or %val1, %val2
3485	//
3486	// If N is selected to be BFXIL, we know that
3487	// 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3488	// BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3489	//
3490	// Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3491	if (OrOpd0.getOperand(i: `0`) == OrOpd1) {
3492	SDValue Ops[] = {
3493	OrOpd1, OrOpd1,
3494	CurDAG->getTargetConstant(
3495	Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSR, Imm: SrlImm), DL, VT)};
3496	CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3497	return true;
3498	}
3499	}
3500
3501	return false;
3502	}
3503
3504	static bool tryBitfieldInsertOpFromOr(SDNode N, const* APInt &UsefulBits,
3505	SelectionDAG *CurDAG) {
3506	assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3507
3508	EVT VT = N->getValueType(ResNo: `0`);
3509	if (VT != MVT::i32 && VT != MVT::i64)
3510	return false;
3511
3512	unsigned BitWidth = VT.getSizeInBits();
3513
3514	// Because of simplify-demanded-bits in DAGCombine, involved masks may not
3515	// have the expected shape. Try to undo that.
3516
3517	unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3518	unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3519
3520	// Given a OR operation, check if we have the following pattern
3521	// ubfm c, b, imm, imm2 (or something that does the same jobs, see
3522	// isBitfieldExtractOp)
3523	// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3524	// countTrailingZeros(mask2) == imm2 - imm + 1
3525	// f = d \| c
3526	// if yes, replace the OR instruction with:
3527	// f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3528
3529	// OR is commutative, check all combinations of operand order and values of
3530	// BiggerPattern, i.e.
3531	// Opd0, Opd1, BiggerPattern=false
3532	// Opd1, Opd0, BiggerPattern=false
3533	// Opd0, Opd1, BiggerPattern=true
3534	// Opd1, Opd0, BiggerPattern=true
3535	// Several of these combinations may match, so check with BiggerPattern=false
3536	// first since that will produce better results by matching more instructions
3537	// and/or inserting fewer extra instructions.
3538	for (int I = `0`; I < `4`; ++I) {
3539
3540	SDValue Dst, Src;
3541	unsigned ImmR, ImmS;
3542	bool BiggerPattern = I / `2`;
3543	SDValue OrOpd0Val = N->getOperand(Num: I % `2`);
3544	SDNode *OrOpd0 = OrOpd0Val.getNode();
3545	SDValue OrOpd1Val = N->getOperand(Num: (I + `1`) % `2`);
3546	SDNode *OrOpd1 = OrOpd1Val.getNode();
3547
3548	unsigned BFXOpc;
3549	int DstLSB, Width;
3550	if (isBitfieldExtractOp(CurDAG, N: OrOpd0, Opc&: BFXOpc, Opd0&: Src, Immr&: ImmR, Imms&: ImmS,
3551	NumberOfIgnoredLowBits, BiggerPattern)) {
3552	// Check that the returned opcode is compatible with the pattern,
3553	// i.e., same type and zero extended (U and not S)
3554	if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) \|\|
3555	(BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3556	continue;
3557
3558	// Compute the width of the bitfield insertion
3559	DstLSB = `0`;
3560	Width = ImmS - ImmR + `1`;
3561	// FIXME: This constraint is to catch bitfield insertion we may
3562	// want to widen the pattern if we want to grab general bitfied
3563	// move case
3564	if (Width <= `0`)
3565	continue;
3566
3567	// If the mask on the insertee is correct, we have a BFXIL operation. We
3568	// can share the ImmR and ImmS values from the already-computed UBFM.
3569	} else if (isBitfieldPositioningOp(CurDAG, Op: OrOpd0Val,
3570	BiggerPattern,
3571	Src, DstLSB, Width)) {
3572	ImmR = (BitWidth - DstLSB) % BitWidth;
3573	ImmS = Width - `1`;
3574	} else
3575	continue;
3576
3577	// Check the second part of the pattern
3578	EVT VT = OrOpd1Val.getValueType();
3579	assert((VT == MVT::i32 \|\| VT == MVT::i64) && "unexpected OR operand");
3580
3581	// Compute the Known Zero for the candidate of the first operand.
3582	// This allows to catch more general case than just looking for
3583	// AND with imm. Indeed, simplify-demanded-bits may have removed
3584	// the AND instruction because it proves it was useless.
3585	KnownBits Known = CurDAG->computeKnownBits(Op: OrOpd1Val);
3586
3587	// Check if there is enough room for the second operand to appear
3588	// in the first one
3589	APInt BitsToBeInserted =
3590	APInt::getBitsSet(numBits: Known.getBitWidth(), loBit: DstLSB, hiBit: DstLSB + Width);
3591
3592	if ((BitsToBeInserted & ~Known.Zero) != `0`)
3593	continue;
3594
3595	// Set the first operand
3596	uint64_t Imm;
3597	if (isOpcWithIntImmediate(N: OrOpd1, Opc: ISD::AND, Imm) &&
3598	isBitfieldDstMask(DstMask: Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3599	// In that case, we can eliminate the AND
3600	Dst = OrOpd1->getOperand(Num: `0`);
3601	else
3602	// Maybe the AND has been removed by simplify-demanded-bits
3603	// or is useful because it discards more bits
3604	Dst = OrOpd1Val;
3605
3606	// Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3607	// with shifted operand is more efficient.
3608	if (tryOrrWithShift(N, OrOpd0: OrOpd0Val, OrOpd1: OrOpd1Val, Src, Dst, CurDAG,
3609	BiggerPattern))
3610	return true;
3611
3612	// both parts match
3613	SDLoc DL(N);
3614	SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3615	CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3616	unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3617	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3618	return true;
3619	}
3620
3621	// Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3622	// Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3623	// mask (e.g., 0x000ffff0).
3624	uint64_t Mask0Imm, Mask1Imm;
3625	SDValue And0 = N->getOperand(Num: `0`);
3626	SDValue And1 = N->getOperand(Num: `1`);
3627	if (And0.hasOneUse() && And1.hasOneUse() &&
3628	isOpcWithIntImmediate(N: And0.getNode(), Opc: ISD::AND, Imm&: Mask0Imm) &&
3629	isOpcWithIntImmediate(N: And1.getNode(), Opc: ISD::AND, Imm&: Mask1Imm) &&
3630	APInt (BitWidth, Mask0Imm) == ~APInt (BitWidth, Mask1Imm) &&
3631	(isShiftedMask(Mask: Mask0Imm, VT) \|\| isShiftedMask(Mask: Mask1Imm, VT))) {
3632
3633	// ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3634	// (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3635	// bits to be inserted.
3636	if (isShiftedMask(Mask: Mask0Imm, VT)) {
3637	std::swap(a&: And0, b&: And1);
3638	std::swap(a&: Mask0Imm, b&: Mask1Imm);
3639	}
3640
3641	SDValue Src = And1 ->getOperand(Num: `0`);
3642	SDValue Dst = And0 ->getOperand(Num: `0`);
3643	unsigned LSB = llvm::countr_zero(Val: Mask1Imm);
3644	int Width = BitWidth - APInt (BitWidth, Mask0Imm).popcount();
3645
3646	// The BFXIL inserts the low-order bits from a source register, so right
3647	// shift the needed bits into place.
3648	SDLoc DL(N);
3649	unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3650	uint64_t LsrImm = LSB;
3651	if (Src ->hasOneUse() &&
3652	isOpcWithIntImmediate(N: Src.getNode(), Opc: ISD::SRL, Imm&: LsrImm) &&
3653	(LsrImm + LSB) < BitWidth) {
3654	Src = Src ->getOperand(Num: `0`);
3655	LsrImm += LSB;
3656	}
3657
3658	SDNode *LSR = CurDAG->getMachineNode(
3659	Opcode: ShiftOpc, dl: DL, VT, Op1: Src, Op2: CurDAG->getTargetConstant(Val: LsrImm, DL, VT),
3660	Op3: CurDAG->getTargetConstant(Val: BitWidth - `1`, DL, VT));
3661
3662	// BFXIL is an alias of BFM, so translate to BFM operands.
3663	unsigned ImmR = (BitWidth - LSB) % BitWidth;
3664	unsigned ImmS = Width - `1`;
3665
3666	// Create the BFXIL instruction.
3667	SDValue Ops[] = {Dst, SDValue (LSR, `0`),
3668	CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3669	CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3670	unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3671	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3672	return true;
3673	}
3674
3675	return false;
3676	}
3677
3678	bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3679	if (N->getOpcode() != ISD::OR)
3680	return false;
3681
3682	APInt NUsefulBits;
3683	getUsefulBits(Op: SDValue (N, `0`), UsefulBits&: NUsefulBits);
3684
3685	// If all bits are not useful, just return UNDEF.
3686	if (!NUsefulBits) {
3687	CurDAG->SelectNodeTo(N, MachineOpc: TargetOpcode::IMPLICIT_DEF, VT: N->getValueType(ResNo: `0`));
3688	return true;
3689	}
3690
3691	if (tryBitfieldInsertOpFromOr(N, UsefulBits: NUsefulBits, CurDAG))
3692	return true;
3693
3694	return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3695	}
3696
3697	/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3698	/// equivalent of a left shift by a constant amount followed by an and masking
3699	/// out a contiguous set of bits.
3700	bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3701	if (N->getOpcode() != ISD::AND)
3702	return false;
3703
3704	EVT VT = N->getValueType(ResNo: `0`);
3705	if (VT != MVT::i32 && VT != MVT::i64)
3706	return false;
3707
3708	SDValue Op0;
3709	int DstLSB, Width;
3710	if (!isBitfieldPositioningOp(CurDAG, Op: SDValue (N, `0`), /BiggerPattern=/false,
3711	Src&: Op0, DstLSB, Width))
3712	return false;
3713
3714	// ImmR is the rotate right amount.
3715	unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3716	// ImmS is the most significant bit of the source to be moved.
3717	unsigned ImmS = Width - `1`;
3718
3719	SDLoc DL(N);
3720	SDValue Ops[] = {Op0, CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3721	CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3722	unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3723	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3724	return true;
3725	}
3726
3727	/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3728	/// variable shift/rotate instructions.
3729	bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3730	EVT VT = N->getValueType(ResNo: `0`);
3731
3732	unsigned Opc;
3733	switch (N->getOpcode()) {
3734	case ISD::ROTR:
3735	Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3736	break;
3737	case ISD::SHL:
3738	Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3739	break;
3740	case ISD::SRL:
3741	Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3742	break;
3743	case ISD::SRA:
3744	Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3745	break;
3746	default:
3747	return false;
3748	}
3749
3750	uint64_t Size;
3751	uint64_t Bits;
3752	if (VT == MVT::i32) {
3753	Bits = `5`;
3754	Size = `32`;
3755	} else if (VT == MVT::i64) {
3756	Bits = `6`;
3757	Size = `64`;
3758	} else
3759	return false;
3760
3761	SDValue ShiftAmt = N->getOperand(Num: `1`);
3762	SDLoc DL(N);
3763	SDValue NewShiftAmt;
3764
3765	// Skip over an extend of the shift amount.
3766	if (ShiftAmt ->getOpcode() == ISD::ZERO_EXTEND \|\|
3767	ShiftAmt ->getOpcode() == ISD::ANY_EXTEND)
3768	ShiftAmt = ShiftAmt ->getOperand(Num: `0`);
3769
3770	if (ShiftAmt ->getOpcode() == ISD::ADD \|\| ShiftAmt ->getOpcode() == ISD::SUB) {
3771	SDValue Add0 = ShiftAmt ->getOperand(Num: `0`);
3772	SDValue Add1 = ShiftAmt ->getOperand(Num: `1`);
3773	uint64_t Add0Imm;
3774	uint64_t Add1Imm;
3775	if (isIntImmediate(N: Add1, Imm&: Add1Imm) && (Add1Imm % Size == `0`)) {
3776	// If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3777	// to avoid the ADD/SUB.
3778	NewShiftAmt = Add0;
3779	} else if (ShiftAmt ->getOpcode() == ISD::SUB &&
3780	isIntImmediate(N: Add0, Imm&: Add0Imm) && Add0Imm != `0` &&
3781	(Add0Imm % Size == `0`)) {
3782	// If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3783	// to generate a NEG instead of a SUB from a constant.
3784	unsigned NegOpc;
3785	unsigned ZeroReg;
3786	EVT SubVT = ShiftAmt ->getValueType(ResNo: `0`);
3787	if (SubVT == MVT::i32) {
3788	NegOpc = AArch64::SUBWrr;
3789	ZeroReg = AArch64::WZR;
3790	} else {
3791	assert(SubVT == MVT::i64);
3792	NegOpc = AArch64::SUBXrr;
3793	ZeroReg = AArch64::XZR;
3794	}
3795	SDValue Zero =
3796	CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: ZeroReg, VT: SubVT);
3797	MachineSDNode *Neg =
3798	CurDAG->getMachineNode(Opcode: NegOpc, dl: DL, VT: SubVT, Op1: Zero, Op2: Add1);
3799	NewShiftAmt = SDValue (Neg, `0`);
3800	} else if (ShiftAmt ->getOpcode() == ISD::SUB &&
3801	isIntImmediate(N: Add0, Imm&: Add0Imm) && (Add0Imm % Size == Size - `1`)) {
3802	// If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3803	// to generate a NOT instead of a SUB from a constant.
3804	unsigned NotOpc;
3805	unsigned ZeroReg;
3806	EVT SubVT = ShiftAmt ->getValueType(ResNo: `0`);
3807	if (SubVT == MVT::i32) {
3808	NotOpc = AArch64::ORNWrr;
3809	ZeroReg = AArch64::WZR;
3810	} else {
3811	assert(SubVT == MVT::i64);
3812	NotOpc = AArch64::ORNXrr;
3813	ZeroReg = AArch64::XZR;
3814	}
3815	SDValue Zero =
3816	CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: ZeroReg, VT: SubVT);
3817	MachineSDNode *Not =
3818	CurDAG->getMachineNode(Opcode: NotOpc, dl: DL, VT: SubVT, Op1: Zero, Op2: Add1);
3819	NewShiftAmt = SDValue (Not, `0`);
3820	} else
3821	return false;
3822	} else {
3823	// If the shift amount is masked with an AND, check that the mask covers the
3824	// bits that are implicitly ANDed off by the above opcodes and if so, skip
3825	// the AND.
3826	uint64_t MaskImm;
3827	if (!isOpcWithIntImmediate(N: ShiftAmt.getNode(), Opc: ISD::AND, Imm&: MaskImm) &&
3828	!isOpcWithIntImmediate(N: ShiftAmt.getNode(), Opc: AArch64ISD::ANDS, Imm&: MaskImm))
3829	return false;
3830
3831	if ((unsigned)llvm::countr_one(Value: MaskImm) < Bits)
3832	return false;
3833
3834	NewShiftAmt = ShiftAmt ->getOperand(Num: `0`);
3835	}
3836
3837	// Narrow/widen the shift amount to match the size of the shift operation.
3838	if (VT == MVT::i32)
3839	NewShiftAmt = narrowIfNeeded(CurDAG, N: NewShiftAmt);
3840	else if (VT == MVT::i64 && NewShiftAmt ->getValueType(ResNo: `0`) == MVT::i32) {
3841	SDValue SubReg = CurDAG->getTargetConstant(Val: AArch64::sub_32, DL, VT: MVT::i32);
3842	MachineSDNode *Ext = CurDAG->getMachineNode(
3843	Opcode: AArch64::SUBREG_TO_REG, dl: DL, VT,
3844	Op1: CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i64), Op2: NewShiftAmt, Op3: SubReg);
3845	NewShiftAmt = SDValue (Ext, `0`);
3846	}
3847
3848	SDValue Ops[] = {N->getOperand(Num: `0`), NewShiftAmt};
3849	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3850	return true;
3851	}
3852
3853	static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N,
3854	SDValue &FixedPos,
3855	unsigned RegWidth,
3856	bool isReciprocal) {
3857	APFloat FVal(`0.0`);
3858	if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Val&: N))
3859	FVal = CN->getValueAPF();
3860	else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(Val&: N)) {
3861	// Some otherwise illegal constants are allowed in this case.
3862	if (LN->getOperand(Num: `1`).getOpcode() != AArch64ISD::ADDlow \|\|
3863	!isa<ConstantPoolSDNode>(Val: LN->getOperand(Num: `1`)->getOperand(Num: `1`)))
3864	return false;
3865
3866	ConstantPoolSDNode *CN =
3867	dyn_cast<ConstantPoolSDNode>(Val: LN->getOperand(Num: `1`)->getOperand(Num: `1`));
3868	FVal = cast<ConstantFP>(Val: CN->getConstVal())->getValueAPF();
3869	} else
3870	return false;
3871
3872	// An FCVT[SU] instruction performs: convertToInt(Val 2^fbits) where fbits*
3873	// is between 1 and 32 for a destination w-register, or 1 and 64 for an
3874	// x-register.
3875	//
3876	// By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3877	// want THIS_NODE to be 2^fbits. This is much easier to deal with using
3878	// integers.
3879	bool IsExact;
3880
3881	if (isReciprocal)
3882	if (!FVal.getExactInverse(inv: &FVal))
3883	return false;
3884
3885	// fbits is between 1 and 64 in the worst-case, which means the fmul
3886	// could have 2^64 as an actual operand. Need 65 bits of precision.
3887	APSInt IntVal(`65`, true);
3888	FVal.convertToInteger(Result&: IntVal, RM: APFloat::rmTowardZero, IsExact: &IsExact);
3889
3890	// N.b. isPowerOf2 also checks for > 0.
3891	if (!IsExact \|\| !IntVal.isPowerOf2())
3892	return false;
3893	unsigned FBits = IntVal.logBase2();
3894
3895	// Checks above should have guaranteed that we haven't lost information in
3896	// finding FBits, but it must still be in range.
3897	if (FBits == `0` \|\| FBits > RegWidth) return false;
3898
3899	FixedPos = CurDAG->getTargetConstant(Val: FBits, DL: SDLoc (N), VT: MVT::i32);
3900	return true;
3901	}
3902
3903	bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3904	unsigned RegWidth) {
3905	return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3906	isReciprocal: false);
3907	}
3908
3909	bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
3910	SDValue &FixedPos,
3911	unsigned RegWidth) {
3912	return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3913	isReciprocal: true);
3914	}
3915
3916	// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3917	// of the string and obtains the integer values from them and combines these
3918	// into a single value to be used in the MRS/MSR instruction.
3919	static int getIntOperandFromRegisterString(StringRef RegString) {
3920	SmallVector<StringRef, `5`> Fields;
3921	RegString.split(A&: Fields, Separator: `':'`);
3922
3923	if (Fields.size() == `1`)
3924	return -`1`;
3925
3926	assert(Fields.size() == `5`
3927	&& "Invalid number of fields in read register string");
3928
3929	SmallVector<int, `5`> Ops;
3930	bool AllIntFields = true;
3931
3932	for (StringRef Field : Fields) {
3933	unsigned IntField;
3934	AllIntFields &= !Field.getAsInteger(Radix: `10`, Result&: IntField);
3935	Ops.push_back(Elt: IntField);
3936	}
3937
3938	assert(AllIntFields &&
3939	"Unexpected non-integer value in special register string.");
3940	(void)AllIntFields;
3941
3942	// Need to combine the integer fields of the string into a single value
3943	// based on the bit encoding of MRS/MSR instruction.
3944	return (Ops [`0`] << `14`) \| (Ops [`1`] << `11`) \| (Ops [`2`] << `7`) \|
3945	(Ops [`3`] << `3`) \| (Ops [`4`]);
3946	}
3947
3948	// Lower the read_register intrinsic to an MRS instruction node if the special
3949	// register string argument is either of the form detailed in the ALCE (the
3950	// form described in getIntOperandsFromRegsterString) or is a named register
3951	// known by the MRS SysReg mapper.
3952	bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3953	const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: `1`));
3954	const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: `0`));
3955	SDLoc DL(N);
3956
3957	bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
3958
3959	unsigned Opcode64Bit = AArch64::MRS;
3960	int Imm = getIntOperandFromRegisterString(RegString: RegString->getString());
3961	if (Imm == -`1`) {
3962	// No match, Use the sysreg mapper to map the remaining possible strings to
3963	// the value for the register to be used for the instruction operand.
3964	const auto *TheReg =
3965	AArch64SysReg::lookupSysRegByName(RegString->getString());
3966	if (TheReg && TheReg->Readable &&
3967	TheReg->haveFeatures(ActiveFeatures: Subtarget->getFeatureBits()))
3968	Imm = TheReg->Encoding;
3969	else
3970	Imm = AArch64SysReg::parseGenericRegister(Name: RegString->getString());
3971
3972	if (Imm == -`1`) {
3973	// Still no match, see if this is "pc" or give up.
3974	if (!ReadIs128Bit && RegString->getString() == "pc") {
3975	Opcode64Bit = AArch64::ADR;
3976	Imm = `0`;
3977	} else {
3978	return false;
3979	}
3980	}
3981	}
3982
3983	SDValue InChain = N->getOperand(Num: `0`);
3984	SDValue SysRegImm = CurDAG->getTargetConstant(Val: Imm, DL, VT: MVT::i32);
3985	if (!ReadIs128Bit) {
3986	CurDAG->SelectNodeTo(N, MachineOpc: Opcode64Bit, VT1: MVT::i64, VT2: MVT::Other / Chain /,
3987	Ops: {SysRegImm, InChain});
3988	} else {
3989	SDNode *MRRS = CurDAG->getMachineNode(
3990	Opcode: AArch64::MRRS, dl: DL,
3991	ResultTys: {MVT::Untyped / XSeqPair /, MVT::Other / Chain /},
3992	Ops: {SysRegImm, InChain});
3993
3994	// Sysregs are not endian. The even register always contains the low half
3995	// of the register.
3996	SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: AArch64::sube64, DL, VT: MVT::i64,
3997	Operand: SDValue (MRRS, `0`));
3998	SDValue Hi = CurDAG->getTargetExtractSubreg(SRIdx: AArch64::subo64, DL, VT: MVT::i64,
3999	Operand: SDValue (MRRS, `0`));
4000	SDValue OutChain = SDValue (MRRS, `1`);
4001
4002	ReplaceUses(F: SDValue (N, `0`), T: Lo);
4003	ReplaceUses(F: SDValue (N, `1`), T: Hi);
4004	ReplaceUses(F: SDValue (N, `2`), T: OutChain);
4005	};
4006	return true;
4007	}
4008
4009	// Lower the write_register intrinsic to an MSR instruction node if the special
4010	// register string argument is either of the form detailed in the ALCE (the
4011	// form described in getIntOperandsFromRegsterString) or is a named register
4012	// known by the MSR SysReg mapper.
4013	bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4014	const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: `1`));
4015	const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: `0`));
4016	SDLoc DL(N);
4017
4018	bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4019
4020	if (!WriteIs128Bit) {
4021	// Check if the register was one of those allowed as the pstatefield value
4022	// in the MSR (immediate) instruction. To accept the values allowed in the
4023	// pstatefield for the MSR (immediate) instruction, we also require that an
4024	// immediate value has been provided as an argument, we know that this is
4025	// the case as it has been ensured by semantic checking.
4026	auto trySelectPState = [&](auto PMapper, unsigned State) {
4027	if (PMapper) {
4028	assert(isa<ConstantSDNode>(N->getOperand(`2`)) &&
4029	"Expected a constant integer expression.");
4030	unsigned Reg = PMapper->Encoding;
4031	uint64_t Immed = N->getConstantOperandVal(Num: `2`);
4032	CurDAG->SelectNodeTo(
4033	N, MachineOpc: State, VT: MVT::Other, Op1: CurDAG->getTargetConstant(Val: Reg, DL, VT: MVT::i32),
4034	Op2: CurDAG->getTargetConstant(Val: Immed, DL, VT: MVT::i16), Op3: N->getOperand(Num: `0`));
4035	return true;
4036	}
4037	return false;
4038	};
4039
4040	if (trySelectPState (
4041	AArch64PState::lookupPStateImm0_15ByName(Name: RegString->getString()),
4042	AArch64::MSRpstateImm4))
4043	return true;
4044	if (trySelectPState (
4045	AArch64PState::lookupPStateImm0_1ByName(Name: RegString->getString()),
4046	AArch64::MSRpstateImm1))
4047	return true;
4048	}
4049
4050	int Imm = getIntOperandFromRegisterString(RegString: RegString->getString());
4051	if (Imm == -`1`) {
4052	// Use the sysreg mapper to attempt to map the remaining possible strings
4053	// to the value for the register to be used for the MSR (register)
4054	// instruction operand.
4055	auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4056	if (TheReg && TheReg->Writeable &&
4057	TheReg->haveFeatures(ActiveFeatures: Subtarget->getFeatureBits()))
4058	Imm = TheReg->Encoding;
4059	else
4060	Imm = AArch64SysReg::parseGenericRegister(Name: RegString->getString());
4061
4062	if (Imm == -`1`)
4063	return false;
4064	}
4065
4066	SDValue InChain = N->getOperand(Num: `0`);
4067	if (!WriteIs128Bit) {
4068	CurDAG->SelectNodeTo(N, MachineOpc: AArch64::MSR, VT: MVT::Other,
4069	Op1: CurDAG->getTargetConstant(Val: Imm, DL, VT: MVT::i32),
4070	Op2: N->getOperand(Num: `2`), Op3: InChain);
4071	} else {
4072	// No endian swap. The lower half always goes into the even subreg, and the
4073	// higher half always into the odd supreg.
4074	SDNode *Pair = CurDAG->getMachineNode(
4075	Opcode: TargetOpcode::REG_SEQUENCE, dl: DL, VT: MVT::Untyped / XSeqPair /,
4076	Ops: {CurDAG->getTargetConstant(Val: AArch64::XSeqPairsClassRegClass.getID(), DL,
4077	VT: MVT::i32),
4078	N->getOperand(Num: `2`),
4079	CurDAG->getTargetConstant(Val: AArch64::sube64, DL, VT: MVT::i32),
4080	N->getOperand(Num: `3`),
4081	CurDAG->getTargetConstant(Val: AArch64::subo64, DL, VT: MVT::i32)});
4082
4083	CurDAG->SelectNodeTo(N, MachineOpc: AArch64::MSRR, VT: MVT::Other,
4084	Op1: CurDAG->getTargetConstant(Val: Imm, DL, VT: MVT::i32),
4085	Op2: SDValue (Pair, `0`), Op3: InChain);
4086	}
4087
4088	return true;
4089	}
4090
4091	/// We've got special pseudo-instructions for these
4092	bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4093	unsigned Opcode;
4094	EVT MemTy = cast<MemSDNode>(Val: N)->getMemoryVT();
4095
4096	// Leave IR for LSE if subtarget supports it.
4097	if (Subtarget->hasLSE()) return false;
4098
4099	if (MemTy == MVT::i8)
4100	Opcode = AArch64::CMP_SWAP_8;
4101	else if (MemTy == MVT::i16)
4102	Opcode = AArch64::CMP_SWAP_16;
4103	else if (MemTy == MVT::i32)
4104	Opcode = AArch64::CMP_SWAP_32;
4105	else if (MemTy == MVT::i64)
4106	Opcode = AArch64::CMP_SWAP_64;
4107	else
4108	llvm_unreachable("Unknown AtomicCmpSwap type");
4109
4110	MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4111	SDValue Ops[] = {N->getOperand(Num: `1`), N->getOperand(Num: `2`), N->getOperand(Num: `3`),
4112	N->getOperand(Num: `0`)};
4113	SDNode *CmpSwap = CurDAG->getMachineNode(
4114	Opcode, dl: SDLoc (N),
4115	VTs: CurDAG->getVTList(VT1: RegTy, VT2: MVT::i32, VT3: MVT::Other), Ops);
4116
4117	MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
4118	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
4119
4120	ReplaceUses(F: SDValue (N, `0`), T: SDValue (CmpSwap, `0`));
4121	ReplaceUses(F: SDValue (N, `1`), T: SDValue (CmpSwap, `2`));
4122	CurDAG->RemoveDeadNode(N);
4123
4124	return true;
4125	}
4126
4127	bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4128	SDValue &Shift) {
4129	if (!isa<ConstantSDNode>(Val: N))
4130	return false;
4131
4132	SDLoc DL(N);
4133	uint64_t Val = cast<ConstantSDNode>(Val&: N)
4134	->getAPIntValue()
4135	.trunc(width: VT.getFixedSizeInBits())
4136	.getZExtValue();
4137
4138	switch (VT.SimpleTy) {
4139	case MVT::i8:
4140	// All immediates are supported.
4141	Shift = CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i32);
4142	Imm = CurDAG->getTargetConstant(Val, DL, VT: MVT::i32);
4143	return true;
4144	case MVT::i16:
4145	case MVT::i32:
4146	case MVT::i64:
4147	// Support 8bit unsigned immediates.
4148	if (Val <= `255`) {
4149	Shift = CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i32);
4150	Imm = CurDAG->getTargetConstant(Val, DL, VT: MVT::i32);
4151	return true;
4152	}
4153	// Support 16bit unsigned immediates that are a multiple of 256.
4154	if (Val <= `65280` && Val % `256` == `0`) {
4155	Shift = CurDAG->getTargetConstant(Val: `8`, DL, VT: MVT::i32);
4156	Imm = CurDAG->getTargetConstant(Val: Val >> `8`, DL, VT: MVT::i32);
4157	return true;
4158	}
4159	break;
4160	default:
4161	break;
4162	}
4163
4164	return false;
4165	}
4166
4167	bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4168	SDValue &Imm, SDValue &Shift,
4169	bool Negate) {
4170	if (!isa<ConstantSDNode>(Val: N))
4171	return false;
4172
4173	SDLoc DL(N);
4174	int64_t Val = cast<ConstantSDNode>(Val&: N)
4175	->getAPIntValue()
4176	.trunc(width: VT.getFixedSizeInBits())
4177	.getSExtValue();
4178
4179	if (Negate)
4180	Val = -Val;
4181
4182	// Signed saturating instructions treat their immediate operand as unsigned,
4183	// whereas the related intrinsics define their operands to be signed. This
4184	// means we can only use the immediate form when the operand is non-negative.
4185	if (Val < `0`)
4186	return false;
4187
4188	switch (VT.SimpleTy) {
4189	case MVT::i8:
4190	// All positive immediates are supported.
4191	Shift = CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i32);
4192	Imm = CurDAG->getTargetConstant(Val, DL, VT: MVT::i32);
4193	return true;
4194	case MVT::i16:
4195	case MVT::i32:
4196	case MVT::i64:
4197	// Support 8bit positive immediates.
4198	if (Val <= `255`) {
4199	Shift = CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i32);
4200	Imm = CurDAG->getTargetConstant(Val, DL, VT: MVT::i32);
4201	return true;
4202	}
4203	// Support 16bit positive immediates that are a multiple of 256.
4204	if (Val <= `65280` && Val % `256` == `0`) {
4205	Shift = CurDAG->getTargetConstant(Val: `8`, DL, VT: MVT::i32);
4206	Imm = CurDAG->getTargetConstant(Val: Val >> `8`, DL, VT: MVT::i32);
4207	return true;
4208	}
4209	break;
4210	default:
4211	break;
4212	}
4213
4214	return false;
4215	}
4216
4217	bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4218	SDValue &Shift) {
4219	if (!isa<ConstantSDNode>(Val: N))
4220	return false;
4221
4222	SDLoc DL(N);
4223	int64_t Val = cast<ConstantSDNode>(Val&: N)
4224	->getAPIntValue()
4225	.trunc(width: VT.getFixedSizeInBits())
4226	.getSExtValue();
4227
4228	switch (VT.SimpleTy) {
4229	case MVT::i8:
4230	// All immediates are supported.
4231	Shift = CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i32);
4232	Imm = CurDAG->getTargetConstant(Val: Val & `0xFF`, DL, VT: MVT::i32);
4233	return true;
4234	case MVT::i16:
4235	case MVT::i32:
4236	case MVT::i64:
4237	// Support 8bit signed immediates.
4238	if (Val >= -`128` && Val <= `127`) {
4239	Shift = CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i32);
4240	Imm = CurDAG->getTargetConstant(Val: Val & `0xFF`, DL, VT: MVT::i32);
4241	return true;
4242	}
4243	// Support 16bit signed immediates that are a multiple of 256.
4244	if (Val >= -`32768` && Val <= `32512` && Val % `256` == `0`) {
4245	Shift = CurDAG->getTargetConstant(Val: `8`, DL, VT: MVT::i32);
4246	Imm = CurDAG->getTargetConstant(Val: (Val >> `8`) & `0xFF`, DL, VT: MVT::i32);
4247	return true;
4248	}
4249	break;
4250	default:
4251	break;
4252	}
4253
4254	return false;
4255	}
4256
4257	bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4258	if (auto CNode = dyn_cast<ConstantSDNode>(Val&: N)) {
4259	int64_t ImmVal = CNode->getSExtValue();
4260	SDLoc DL(N);
4261	if (ImmVal >= -`128` && ImmVal < `128`) {
4262	Imm = CurDAG->getTargetConstant(Val: ImmVal, DL, VT: MVT::i32);
4263	return true;
4264	}
4265	}
4266	return false;
4267	}
4268
4269	bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4270	if (auto CNode = dyn_cast<ConstantSDNode>(Val&: N)) {
4271	uint64_t ImmVal = CNode->getZExtValue();
4272
4273	switch (VT.SimpleTy) {
4274	case MVT::i8:
4275	ImmVal &= `0xFF`;
4276	break;
4277	case MVT::i16:
4278	ImmVal &= `0xFFFF`;
4279	break;
4280	case MVT::i32:
4281	ImmVal &= `0xFFFFFFFF`;
4282	break;
4283	case MVT::i64:
4284	break;
4285	default:
4286	llvm_unreachable("Unexpected type");
4287	}
4288
4289	if (ImmVal < `256`) {
4290	Imm = CurDAG->getTargetConstant(Val: ImmVal, DL: SDLoc (N), VT: MVT::i32);
4291	return true;
4292	}
4293	}
4294	return false;
4295	}
4296
4297	bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4298	bool Invert) {
4299	if (auto CNode = dyn_cast<ConstantSDNode>(Val&: N)) {
4300	uint64_t ImmVal = CNode->getZExtValue();
4301	SDLoc DL(N);
4302
4303	if (Invert)
4304	ImmVal = ~ImmVal;
4305
4306	// Shift mask depending on type size.
4307	switch (VT.SimpleTy) {
4308	case MVT::i8:
4309	ImmVal &= `0xFF`;
4310	ImmVal \|= ImmVal << `8`;
4311	ImmVal \|= ImmVal << `16`;
4312	ImmVal \|= ImmVal << `32`;
4313	break;
4314	case MVT::i16:
4315	ImmVal &= `0xFFFF`;
4316	ImmVal \|= ImmVal << `16`;
4317	ImmVal \|= ImmVal << `32`;
4318	break;
4319	case MVT::i32:
4320	ImmVal &= `0xFFFFFFFF`;
4321	ImmVal \|= ImmVal << `32`;
4322	break;
4323	case MVT::i64:
4324	break;
4325	default:
4326	llvm_unreachable("Unexpected type");
4327	}
4328
4329	uint64_t encoding;
4330	if (AArch64_AM::processLogicalImmediate(Imm: ImmVal, RegSize: `64`, Encoding&: encoding)) {
4331	Imm = CurDAG->getTargetConstant(Val: encoding, DL, VT: MVT::i64);
4332	return true;
4333	}
4334	}
4335	return false;
4336	}
4337
4338	// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4339	// Rather than attempt to normalise everything we can sometimes saturate the
4340	// shift amount during selection. This function also allows for consistent
4341	// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4342	// required by the instructions.
4343	bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4344	uint64_t High, bool AllowSaturation,
4345	SDValue &Imm) {
4346	if (auto *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
4347	uint64_t ImmVal = CN->getZExtValue();
4348
4349	// Reject shift amounts that are too small.
4350	if (ImmVal < Low)
4351	return false;
4352
4353	// Reject or saturate shift amounts that are too big.
4354	if (ImmVal > High) {
4355	if (!AllowSaturation)
4356	return false;
4357	ImmVal = High;
4358	}
4359
4360	Imm = CurDAG->getTargetConstant(Val: ImmVal, DL: SDLoc (N), VT: MVT::i32);
4361	return true;
4362	}
4363
4364	return false;
4365	}
4366
4367	bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4368	// tagp(FrameIndex, IRGstack, tag_offset):
4369	// since the offset between FrameIndex and IRGstack is a compile-time
4370	// constant, this can be lowered to a single ADDG instruction.
4371	if (!(isa<FrameIndexSDNode>(Val: N->getOperand(Num: `1`)))) {
4372	return false;
4373	}
4374
4375	SDValue IRG_SP = N->getOperand(Num: `2`);
4376	if (IRG_SP ->getOpcode() != ISD::INTRINSIC_W_CHAIN \|\|
4377	IRG_SP ->getConstantOperandVal(Num: `1`) != Intrinsic::aarch64_irg_sp) {
4378	return false;
4379	}
4380
4381	const TargetLowering *TLI = getTargetLowering();
4382	SDLoc DL(N);
4383	int FI = cast<FrameIndexSDNode>(Val: N->getOperand(Num: `1`))->getIndex();
4384	SDValue FiOp = CurDAG->getTargetFrameIndex(
4385	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
4386	int TagOffset = N->getConstantOperandVal(Num: `3`);
4387
4388	SDNode *Out = CurDAG->getMachineNode(
4389	Opcode: AArch64::TAGPstack, dl: DL, VT: MVT::i64,
4390	Ops: {FiOp, CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i64), N->getOperand(Num: `2`),
4391	CurDAG->getTargetConstant(Val: TagOffset, DL, VT: MVT::i64)});
4392	ReplaceNode(F: N, T: Out);
4393	return true;
4394	}
4395
4396	void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4397	assert(isa<ConstantSDNode>(N->getOperand(`3`)) &&
4398	"llvm.aarch64.tagp third argument must be an immediate");
4399	if (trySelectStackSlotTagP(N))
4400	return;
4401	// FIXME: above applies in any case when offset between Op1 and Op2 is a
4402	// compile-time constant, not just for stack allocations.
4403
4404	// General case for unrelated pointers in Op1 and Op2.
4405	SDLoc DL(N);
4406	int TagOffset = N->getConstantOperandVal(Num: `3`);
4407	SDNode *N1 = CurDAG->getMachineNode(Opcode: AArch64::SUBP, dl: DL, VT: MVT::i64,
4408	Ops: {N->getOperand(Num: `1`), N->getOperand(Num: `2`)});
4409	SDNode *N2 = CurDAG->getMachineNode(Opcode: AArch64::ADDXrr, dl: DL, VT: MVT::i64,
4410	Ops: {SDValue (N1, `0`), N->getOperand(Num: `2`)});
4411	SDNode *N3 = CurDAG->getMachineNode(
4412	Opcode: AArch64::ADDG, dl: DL, VT: MVT::i64,
4413	Ops: {SDValue (N2, `0`), CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i64),
4414	CurDAG->getTargetConstant(Val: TagOffset, DL, VT: MVT::i64)});
4415	ReplaceNode(F: N, T: N3);
4416	}
4417
4418	bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4419	assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4420
4421	// Bail when not a "cast" like insert_subvector.
4422	if (N->getConstantOperandVal(Num: `2`) != `0`)
4423	return false;
4424	if (!N->getOperand(Num: `0`).isUndef())
4425	return false;
4426
4427	// Bail when normal isel should do the job.
4428	EVT VT = N->getValueType(ResNo: `0`);
4429	EVT InVT = N->getOperand(Num: `1`).getValueType();
4430	if (VT.isFixedLengthVector() \|\| InVT.isScalableVector())
4431	return false;
4432	if (InVT.getSizeInBits() <= `128`)
4433	return false;
4434
4435	// NOTE: We can only get here when doing fixed length SVE code generation.
4436	// We do manual selection because the types involved are not linked to real
4437	// registers (despite being legal) and must be coerced into SVE registers.
4438
4439	assert(VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4440	"Expected to insert into a packed scalable vector!");
4441
4442	SDLoc DL(N);
4443	auto RC = CurDAG->getTargetConstant(Val: AArch64::ZPRRegClassID, DL, VT: MVT::i64);
4444	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, dl: DL, VT,
4445	Op1: N->getOperand(Num: `1`), Op2: RC));
4446	return true;
4447	}
4448
4449	bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4450	assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4451
4452	// Bail when not a "cast" like extract_subvector.
4453	if (N->getConstantOperandVal(Num: `1`) != `0`)
4454	return false;
4455
4456	// Bail when normal isel can do the job.
4457	EVT VT = N->getValueType(ResNo: `0`);
4458	EVT InVT = N->getOperand(Num: `0`).getValueType();
4459	if (VT.isScalableVector() \|\| InVT.isFixedLengthVector())
4460	return false;
4461	if (VT.getSizeInBits() <= `128`)
4462	return false;
4463
4464	// NOTE: We can only get here when doing fixed length SVE code generation.
4465	// We do manual selection because the types involved are not linked to real
4466	// registers (despite being legal) and must be coerced into SVE registers.
4467
4468	assert(InVT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4469	"Expected to extract from a packed scalable vector!");
4470
4471	SDLoc DL(N);
4472	auto RC = CurDAG->getTargetConstant(Val: AArch64::ZPRRegClassID, DL, VT: MVT::i64);
4473	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, dl: DL, VT,
4474	Op1: N->getOperand(Num: `0`), Op2: RC));
4475	return true;
4476	}
4477
4478	bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4479	assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4480
4481	SDValue N0 = N->getOperand(Num: `0`);
4482	SDValue N1 = N->getOperand(Num: `1`);
4483	EVT VT = N->getValueType(ResNo: `0`);
4484
4485	// Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4486	// Rotate by a constant is a funnel shift in IR which is exanded to
4487	// an OR with shifted operands.
4488	// We do the following transform:
4489	// OR N0, N1 -> xar (x, y, imm)
4490	// Where:
4491	// N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4492	// N0 = SHL_PRED true, V, splat(bits-imm)
4493	// V = (xor x, y)
4494	if (VT.isScalableVector() &&
4495	(Subtarget->hasSVE2() \|\|
4496	(Subtarget->hasSME() && Subtarget->isStreaming()))) {
4497	if (N0.getOpcode() != AArch64ISD::SHL_PRED \|\|
4498	N1.getOpcode() != AArch64ISD::SRL_PRED)
4499	std::swap(a&: N0, b&: N1);
4500	if (N0.getOpcode() != AArch64ISD::SHL_PRED \|\|
4501	N1.getOpcode() != AArch64ISD::SRL_PRED)
4502	return false;
4503
4504	auto TLI = static_cast<const* AArch64TargetLowering *>(getTargetLowering());
4505	if (!TLI->isAllActivePredicate(DAG&: *CurDAG, N: N0.getOperand(i: `0`)) \|\|
4506	!TLI->isAllActivePredicate(DAG&: *CurDAG, N: N1.getOperand(i: `0`)))
4507	return false;
4508
4509	SDValue XOR = N0.getOperand(i: `1`);
4510	if (XOR.getOpcode() != ISD::XOR \|\| XOR != N1.getOperand(i: `1`))
4511	return false;
4512
4513	APInt ShlAmt, ShrAmt;
4514	if (!ISD::isConstantSplatVector(N: N0.getOperand(i: `2`).getNode(), SplatValue&: ShlAmt) \|\|
4515	!ISD::isConstantSplatVector(N: N1.getOperand(i: `2`).getNode(), SplatValue&: ShrAmt))
4516	return false;
4517
4518	if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4519	return false;
4520
4521	SDLoc DL(N);
4522	SDValue Imm =
4523	CurDAG->getTargetConstant(Val: ShrAmt.getZExtValue(), DL, VT: MVT::i32);
4524
4525	SDValue Ops[] = {XOR.getOperand(i: `0`), XOR.getOperand(i: `1`), Imm};
4526	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4527	VT, Opcodes: {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4528	AArch64::XAR_ZZZI_D})) {
4529	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
4530	return true;
4531	}
4532	return false;
4533	}
4534
4535	if (!Subtarget->hasSHA3())
4536	return false;
4537
4538	if (N0 ->getOpcode() != AArch64ISD::VSHL \|\|
4539	N1 ->getOpcode() != AArch64ISD::VLSHR)
4540	return false;
4541
4542	if (N0 ->getOperand(Num: `0`) != N1 ->getOperand(Num: `0`) \|\|
4543	N1 ->getOperand(Num: `0`)->getOpcode() != ISD::XOR)
4544	return false;
4545
4546	SDValue XOR = N0.getOperand(i: `0`);
4547	SDValue R1 = XOR.getOperand(i: `0`);
4548	SDValue R2 = XOR.getOperand(i: `1`);
4549
4550	unsigned HsAmt = N0.getConstantOperandVal(i: `1`);
4551	unsigned ShAmt = N1.getConstantOperandVal(i: `1`);
4552
4553	SDLoc DL = SDLoc (N0.getOperand(i: `1`));
4554	SDValue Imm = CurDAG->getTargetConstant(
4555	Val: ShAmt, DL, VT: N0.getOperand(i: `1`).getValueType(), isOpaque: false);
4556
4557	if (ShAmt + HsAmt != `64`)
4558	return false;
4559
4560	SDValue Ops[] = {R1, R2, Imm};
4561	CurDAG->SelectNodeTo(N, MachineOpc: AArch64::XAR, VT: N0.getValueType(), Ops);
4562
4563	return true;
4564	}
4565
4566	void AArch64DAGToDAGISel::Select(SDNode *Node) {
4567	// If we have a custom node, we already have selected!
4568	if (Node->isMachineOpcode()) {
4569	LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4570	Node->setNodeId(-`1`);
4571	return;
4572	}
4573
4574	// Few custom selection stuff.
4575	EVT VT = Node->getValueType(ResNo: `0`);
4576
4577	switch (Node->getOpcode()) {
4578	default:
4579	break;
4580
4581	case ISD::ATOMIC_CMP_SWAP:
4582	if (SelectCMP_SWAP(N: Node))
4583	return;
4584	break;
4585
4586	case ISD::READ_REGISTER:
4587	case AArch64ISD::MRRS:
4588	if (tryReadRegister(N: Node))
4589	return;
4590	break;
4591
4592	case ISD::WRITE_REGISTER:
4593	case AArch64ISD::MSRR:
4594	if (tryWriteRegister(N: Node))
4595	return;
4596	break;
4597
4598	case ISD::LOAD: {
4599	// Try to select as an indexed load. Fall through to normal processing
4600	// if we can't.
4601	if (tryIndexedLoad(N: Node))
4602	return;
4603	break;
4604	}
4605
4606	case ISD::SRL:
4607	case ISD::AND:
4608	case ISD::SRA:
4609	case ISD::SIGN_EXTEND_INREG:
4610	if (tryBitfieldExtractOp(N: Node))
4611	return;
4612	if (tryBitfieldInsertInZeroOp(N: Node))
4613	return;
4614	[[fallthrough]];
4615	case ISD::ROTR:
4616	case ISD::SHL:
4617	if (tryShiftAmountMod(N: Node))
4618	return;
4619	break;
4620
4621	case ISD::SIGN_EXTEND:
4622	if (tryBitfieldExtractOpFromSExt(N: Node))
4623	return;
4624	break;
4625
4626	case ISD::OR:
4627	if (tryBitfieldInsertOp(N: Node))
4628	return;
4629	if (trySelectXAR(N: Node))
4630	return;
4631	break;
4632
4633	case ISD::EXTRACT_SUBVECTOR: {
4634	if (trySelectCastScalableToFixedLengthVector(N: Node))
4635	return;
4636	break;
4637	}
4638
4639	case ISD::INSERT_SUBVECTOR: {
4640	if (trySelectCastFixedLengthToScalableVector(N: Node))
4641	return;
4642	break;
4643	}
4644
4645	case ISD::Constant: {
4646	// Materialize zero constants as copies from WZR/XZR. This allows
4647	// the coalescer to propagate these into other instructions.
4648	ConstantSDNode *ConstNode = cast<ConstantSDNode>(Val: Node);
4649	if (ConstNode->isZero()) {
4650	if (VT == MVT::i32) {
4651	SDValue New = CurDAG->getCopyFromReg(
4652	Chain: CurDAG->getEntryNode(), dl: SDLoc (Node), Reg: AArch64::WZR, VT: MVT::i32);
4653	ReplaceNode(F: Node, T: New.getNode());
4654	return;
4655	} else if (VT == MVT::i64) {
4656	SDValue New = CurDAG->getCopyFromReg(
4657	Chain: CurDAG->getEntryNode(), dl: SDLoc (Node), Reg: AArch64::XZR, VT: MVT::i64);
4658	ReplaceNode(F: Node, T: New.getNode());
4659	return;
4660	}
4661	}
4662	break;
4663	}
4664
4665	case ISD::FrameIndex: {
4666	// Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4667	int FI = cast<FrameIndexSDNode>(Val: Node)->getIndex();
4668	unsigned Shifter = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: `0`);
4669	const TargetLowering *TLI = getTargetLowering();
4670	SDValue TFI = CurDAG->getTargetFrameIndex(
4671	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
4672	SDLoc DL(Node);
4673	SDValue Ops[] = { TFI, CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i32),
4674	CurDAG->getTargetConstant(Val: Shifter, DL, VT: MVT::i32) };
4675	CurDAG->SelectNodeTo(N: Node, MachineOpc: AArch64::ADDXri, VT: MVT::i64, Ops);
4676	return;
4677	}
4678	case ISD::INTRINSIC_W_CHAIN: {
4679	unsigned IntNo = Node->getConstantOperandVal(Num: `1`);
4680	switch (IntNo) {
4681	default:
4682	break;
4683	case Intrinsic::aarch64_gcsss: {
4684	SDLoc DL(Node);
4685	SDValue Chain = Node->getOperand(Num: `0`);
4686	SDValue Val = Node->getOperand(Num: `2`);
4687	SDValue Zero = CurDAG->getCopyFromReg(Chain, dl: DL, Reg: AArch64::XZR, VT: MVT::i64);
4688	SDNode *SS1 =
4689	CurDAG->getMachineNode(Opcode: AArch64::GCSSS1, dl: DL, VT: MVT::Other, Op1: Val, Op2: Chain);
4690	SDNode *SS2 = CurDAG->getMachineNode(Opcode: AArch64::GCSSS2, dl: DL, VT1: MVT::i64,
4691	VT2: MVT::Other, Op1: Zero, Op2: SDValue (SS1, `0`));
4692	ReplaceNode(F: Node, T: SS2);
4693	return;
4694	}
4695	case Intrinsic::aarch64_ldaxp:
4696	case Intrinsic::aarch64_ldxp: {
4697	unsigned Op =
4698	IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4699	SDValue MemAddr = Node->getOperand(Num: `2`);
4700	SDLoc DL(Node);
4701	SDValue Chain = Node->getOperand(Num: `0`);
4702
4703	SDNode *Ld = CurDAG->getMachineNode(Opcode: Op, dl: DL, VT1: MVT::i64, VT2: MVT::i64,
4704	VT3: MVT::Other, Op1: MemAddr, Op2: Chain);
4705
4706	// Transfer memoperands.
4707	MachineMemOperand *MemOp =
4708	cast<MemIntrinsicSDNode>(Val: Node)->getMemOperand();
4709	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
4710	ReplaceNode(F: Node, T: Ld);
4711	return;
4712	}
4713	case Intrinsic::aarch64_stlxp:
4714	case Intrinsic::aarch64_stxp: {
4715	unsigned Op =
4716	IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4717	SDLoc DL(Node);
4718	SDValue Chain = Node->getOperand(Num: `0`);
4719	SDValue ValLo = Node->getOperand(Num: `2`);
4720	SDValue ValHi = Node->getOperand(Num: `3`);
4721	SDValue MemAddr = Node->getOperand(Num: `4`);
4722
4723	// Place arguments in the right order.
4724	SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4725
4726	SDNode *St = CurDAG->getMachineNode(Opcode: Op, dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops);
4727	// Transfer memoperands.
4728	MachineMemOperand *MemOp =
4729	cast<MemIntrinsicSDNode>(Val: Node)->getMemOperand();
4730	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
4731
4732	ReplaceNode(F: Node, T: St);
4733	return;
4734	}
4735	case Intrinsic::aarch64_neon_ld1x2:
4736	if (VT == MVT::v8i8) {
4737	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov8b, SubRegIdx: AArch64::dsub0);
4738	return;
4739	} else if (VT == MVT::v16i8) {
4740	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov16b, SubRegIdx: AArch64::qsub0);
4741	return;
4742	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4743	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov4h, SubRegIdx: AArch64::dsub0);
4744	return;
4745	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4746	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov8h, SubRegIdx: AArch64::qsub0);
4747	return;
4748	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4749	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov2s, SubRegIdx: AArch64::dsub0);
4750	return;
4751	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4752	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov4s, SubRegIdx: AArch64::qsub0);
4753	return;
4754	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4755	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov1d, SubRegIdx: AArch64::dsub0);
4756	return;
4757	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4758	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov2d, SubRegIdx: AArch64::qsub0);
4759	return;
4760	}
4761	break;
4762	case Intrinsic::aarch64_neon_ld1x3:
4763	if (VT == MVT::v8i8) {
4764	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev8b, SubRegIdx: AArch64::dsub0);
4765	return;
4766	} else if (VT == MVT::v16i8) {
4767	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev16b, SubRegIdx: AArch64::qsub0);
4768	return;
4769	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4770	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev4h, SubRegIdx: AArch64::dsub0);
4771	return;
4772	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4773	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev8h, SubRegIdx: AArch64::qsub0);
4774	return;
4775	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4776	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev2s, SubRegIdx: AArch64::dsub0);
4777	return;
4778	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4779	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev4s, SubRegIdx: AArch64::qsub0);
4780	return;
4781	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4782	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev1d, SubRegIdx: AArch64::dsub0);
4783	return;
4784	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4785	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev2d, SubRegIdx: AArch64::qsub0);
4786	return;
4787	}
4788	break;
4789	case Intrinsic::aarch64_neon_ld1x4:
4790	if (VT == MVT::v8i8) {
4791	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv8b, SubRegIdx: AArch64::dsub0);
4792	return;
4793	} else if (VT == MVT::v16i8) {
4794	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv16b, SubRegIdx: AArch64::qsub0);
4795	return;
4796	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4797	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv4h, SubRegIdx: AArch64::dsub0);
4798	return;
4799	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4800	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv8h, SubRegIdx: AArch64::qsub0);
4801	return;
4802	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4803	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv2s, SubRegIdx: AArch64::dsub0);
4804	return;
4805	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4806	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv4s, SubRegIdx: AArch64::qsub0);
4807	return;
4808	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4809	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv1d, SubRegIdx: AArch64::dsub0);
4810	return;
4811	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4812	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv2d, SubRegIdx: AArch64::qsub0);
4813	return;
4814	}
4815	break;
4816	case Intrinsic::aarch64_neon_ld2:
4817	if (VT == MVT::v8i8) {
4818	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov8b, SubRegIdx: AArch64::dsub0);
4819	return;
4820	} else if (VT == MVT::v16i8) {
4821	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov16b, SubRegIdx: AArch64::qsub0);
4822	return;
4823	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4824	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov4h, SubRegIdx: AArch64::dsub0);
4825	return;
4826	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4827	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov8h, SubRegIdx: AArch64::qsub0);
4828	return;
4829	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4830	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov2s, SubRegIdx: AArch64::dsub0);
4831	return;
4832	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4833	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov4s, SubRegIdx: AArch64::qsub0);
4834	return;
4835	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4836	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov1d, SubRegIdx: AArch64::dsub0);
4837	return;
4838	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4839	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov2d, SubRegIdx: AArch64::qsub0);
4840	return;
4841	}
4842	break;
4843	case Intrinsic::aarch64_neon_ld3:
4844	if (VT == MVT::v8i8) {
4845	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev8b, SubRegIdx: AArch64::dsub0);
4846	return;
4847	} else if (VT == MVT::v16i8) {
4848	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev16b, SubRegIdx: AArch64::qsub0);
4849	return;
4850	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4851	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev4h, SubRegIdx: AArch64::dsub0);
4852	return;
4853	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4854	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev8h, SubRegIdx: AArch64::qsub0);
4855	return;
4856	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4857	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev2s, SubRegIdx: AArch64::dsub0);
4858	return;
4859	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4860	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev4s, SubRegIdx: AArch64::qsub0);
4861	return;
4862	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4863	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev1d, SubRegIdx: AArch64::dsub0);
4864	return;
4865	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4866	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev2d, SubRegIdx: AArch64::qsub0);
4867	return;
4868	}
4869	break;
4870	case Intrinsic::aarch64_neon_ld4:
4871	if (VT == MVT::v8i8) {
4872	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv8b, SubRegIdx: AArch64::dsub0);
4873	return;
4874	} else if (VT == MVT::v16i8) {
4875	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv16b, SubRegIdx: AArch64::qsub0);
4876	return;
4877	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4878	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv4h, SubRegIdx: AArch64::dsub0);
4879	return;
4880	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4881	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv8h, SubRegIdx: AArch64::qsub0);
4882	return;
4883	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4884	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv2s, SubRegIdx: AArch64::dsub0);
4885	return;
4886	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4887	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv4s, SubRegIdx: AArch64::qsub0);
4888	return;
4889	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4890	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv1d, SubRegIdx: AArch64::dsub0);
4891	return;
4892	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4893	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv2d, SubRegIdx: AArch64::qsub0);
4894	return;
4895	}
4896	break;
4897	case Intrinsic::aarch64_neon_ld2r:
4898	if (VT == MVT::v8i8) {
4899	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv8b, SubRegIdx: AArch64::dsub0);
4900	return;
4901	} else if (VT == MVT::v16i8) {
4902	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv16b, SubRegIdx: AArch64::qsub0);
4903	return;
4904	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4905	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv4h, SubRegIdx: AArch64::dsub0);
4906	return;
4907	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4908	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv8h, SubRegIdx: AArch64::qsub0);
4909	return;
4910	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4911	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv2s, SubRegIdx: AArch64::dsub0);
4912	return;
4913	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4914	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv4s, SubRegIdx: AArch64::qsub0);
4915	return;
4916	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4917	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv1d, SubRegIdx: AArch64::dsub0);
4918	return;
4919	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4920	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv2d, SubRegIdx: AArch64::qsub0);
4921	return;
4922	}
4923	break;
4924	case Intrinsic::aarch64_neon_ld3r:
4925	if (VT == MVT::v8i8) {
4926	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv8b, SubRegIdx: AArch64::dsub0);
4927	return;
4928	} else if (VT == MVT::v16i8) {
4929	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv16b, SubRegIdx: AArch64::qsub0);
4930	return;
4931	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4932	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv4h, SubRegIdx: AArch64::dsub0);
4933	return;
4934	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4935	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv8h, SubRegIdx: AArch64::qsub0);
4936	return;
4937	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4938	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv2s, SubRegIdx: AArch64::dsub0);
4939	return;
4940	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4941	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv4s, SubRegIdx: AArch64::qsub0);
4942	return;
4943	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4944	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv1d, SubRegIdx: AArch64::dsub0);
4945	return;
4946	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4947	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv2d, SubRegIdx: AArch64::qsub0);
4948	return;
4949	}
4950	break;
4951	case Intrinsic::aarch64_neon_ld4r:
4952	if (VT == MVT::v8i8) {
4953	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv8b, SubRegIdx: AArch64::dsub0);
4954	return;
4955	} else if (VT == MVT::v16i8) {
4956	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv16b, SubRegIdx: AArch64::qsub0);
4957	return;
4958	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
4959	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv4h, SubRegIdx: AArch64::dsub0);
4960	return;
4961	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
4962	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv8h, SubRegIdx: AArch64::qsub0);
4963	return;
4964	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
4965	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv2s, SubRegIdx: AArch64::dsub0);
4966	return;
4967	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
4968	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv4s, SubRegIdx: AArch64::qsub0);
4969	return;
4970	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
4971	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv1d, SubRegIdx: AArch64::dsub0);
4972	return;
4973	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
4974	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv2d, SubRegIdx: AArch64::qsub0);
4975	return;
4976	}
4977	break;
4978	case Intrinsic::aarch64_neon_ld2lane:
4979	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
4980	SelectLoadLane(N: Node, NumVecs: `2`, Opc: AArch64::LD2i8);
4981	return;
4982	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
4983	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
4984	SelectLoadLane(N: Node, NumVecs: `2`, Opc: AArch64::LD2i16);
4985	return;
4986	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
4987	VT == MVT::v2f32) {
4988	SelectLoadLane(N: Node, NumVecs: `2`, Opc: AArch64::LD2i32);
4989	return;
4990	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
4991	VT == MVT::v1f64) {
4992	SelectLoadLane(N: Node, NumVecs: `2`, Opc: AArch64::LD2i64);
4993	return;
4994	}
4995	break;
4996	case Intrinsic::aarch64_neon_ld3lane:
4997	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
4998	SelectLoadLane(N: Node, NumVecs: `3`, Opc: AArch64::LD3i8);
4999	return;
5000	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
5001	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
5002	SelectLoadLane(N: Node, NumVecs: `3`, Opc: AArch64::LD3i16);
5003	return;
5004	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
5005	VT == MVT::v2f32) {
5006	SelectLoadLane(N: Node, NumVecs: `3`, Opc: AArch64::LD3i32);
5007	return;
5008	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
5009	VT == MVT::v1f64) {
5010	SelectLoadLane(N: Node, NumVecs: `3`, Opc: AArch64::LD3i64);
5011	return;
5012	}
5013	break;
5014	case Intrinsic::aarch64_neon_ld4lane:
5015	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
5016	SelectLoadLane(N: Node, NumVecs: `4`, Opc: AArch64::LD4i8);
5017	return;
5018	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
5019	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
5020	SelectLoadLane(N: Node, NumVecs: `4`, Opc: AArch64::LD4i16);
5021	return;
5022	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
5023	VT == MVT::v2f32) {
5024	SelectLoadLane(N: Node, NumVecs: `4`, Opc: AArch64::LD4i32);
5025	return;
5026	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
5027	VT == MVT::v1f64) {
5028	SelectLoadLane(N: Node, NumVecs: `4`, Opc: AArch64::LD4i64);
5029	return;
5030	}
5031	break;
5032	case Intrinsic::aarch64_ld64b:
5033	SelectLoad(N: Node, NumVecs: `8`, Opc: AArch64::LD64B, SubRegIdx: AArch64::x8sub_0);
5034	return;
5035	case Intrinsic::aarch64_sve_ld2q_sret: {
5036	SelectPredicatedLoad(N: Node, NumVecs: `2`, Scale: `4`, Opc_ri: AArch64::LD2Q_IMM, Opc_rr: AArch64::LD2Q, IsIntr: true);
5037	return;
5038	}
5039	case Intrinsic::aarch64_sve_ld3q_sret: {
5040	SelectPredicatedLoad(N: Node, NumVecs: `3`, Scale: `4`, Opc_ri: AArch64::LD3Q_IMM, Opc_rr: AArch64::LD3Q, IsIntr: true);
5041	return;
5042	}
5043	case Intrinsic::aarch64_sve_ld4q_sret: {
5044	SelectPredicatedLoad(N: Node, NumVecs: `4`, Scale: `4`, Opc_ri: AArch64::LD4Q_IMM, Opc_rr: AArch64::LD4Q, IsIntr: true);
5045	return;
5046	}
5047	case Intrinsic::aarch64_sve_ld2_sret: {
5048	if (VT == MVT::nxv16i8) {
5049	SelectPredicatedLoad(N: Node, NumVecs: `2`, Scale: `0`, Opc_ri: AArch64::LD2B_IMM, Opc_rr: AArch64::LD2B,
5050	IsIntr: true);
5051	return;
5052	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5053	VT == MVT::nxv8bf16) {
5054	SelectPredicatedLoad(N: Node, NumVecs: `2`, Scale: `1`, Opc_ri: AArch64::LD2H_IMM, Opc_rr: AArch64::LD2H,
5055	IsIntr: true);
5056	return;
5057	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5058	SelectPredicatedLoad(N: Node, NumVecs: `2`, Scale: `2`, Opc_ri: AArch64::LD2W_IMM, Opc_rr: AArch64::LD2W,
5059	IsIntr: true);
5060	return;
5061	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5062	SelectPredicatedLoad(N: Node, NumVecs: `2`, Scale: `3`, Opc_ri: AArch64::LD2D_IMM, Opc_rr: AArch64::LD2D,
5063	IsIntr: true);
5064	return;
5065	}
5066	break;
5067	}
5068	case Intrinsic::aarch64_sve_ld1_pn_x2: {
5069	if (VT == MVT::nxv16i8) {
5070	if (Subtarget->hasSME2())
5071	SelectContiguousMultiVectorLoad(
5072	N: Node, NumVecs: `2`, Scale: `0`, Opc_ri: AArch64::LD1B_2Z_IMM_PSEUDO, Opc_rr: AArch64::LD1B_2Z_PSEUDO);
5073	else if (Subtarget->hasSVE2p1())
5074	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `0`, Opc_ri: AArch64::LD1B_2Z_IMM,
5075	Opc_rr: AArch64::LD1B_2Z);
5076	else
5077	break;
5078	return;
5079	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5080	VT == MVT::nxv8bf16) {
5081	if (Subtarget->hasSME2())
5082	SelectContiguousMultiVectorLoad(
5083	N: Node, NumVecs: `2`, Scale: `1`, Opc_ri: AArch64::LD1H_2Z_IMM_PSEUDO, Opc_rr: AArch64::LD1H_2Z_PSEUDO);
5084	else if (Subtarget->hasSVE2p1())
5085	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `1`, Opc_ri: AArch64::LD1H_2Z_IMM,
5086	Opc_rr: AArch64::LD1H_2Z);
5087	else
5088	break;
5089	return;
5090	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5091	if (Subtarget->hasSME2())
5092	SelectContiguousMultiVectorLoad(
5093	N: Node, NumVecs: `2`, Scale: `2`, Opc_ri: AArch64::LD1W_2Z_IMM_PSEUDO, Opc_rr: AArch64::LD1W_2Z_PSEUDO);
5094	else if (Subtarget->hasSVE2p1())
5095	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `2`, Opc_ri: AArch64::LD1W_2Z_IMM,
5096	Opc_rr: AArch64::LD1W_2Z);
5097	else
5098	break;
5099	return;
5100	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5101	if (Subtarget->hasSME2())
5102	SelectContiguousMultiVectorLoad(
5103	N: Node, NumVecs: `2`, Scale: `3`, Opc_ri: AArch64::LD1D_2Z_IMM_PSEUDO, Opc_rr: AArch64::LD1D_2Z_PSEUDO);
5104	else if (Subtarget->hasSVE2p1())
5105	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `3`, Opc_ri: AArch64::LD1D_2Z_IMM,
5106	Opc_rr: AArch64::LD1D_2Z);
5107	else
5108	break;
5109	return;
5110	}
5111	break;
5112	}
5113	case Intrinsic::aarch64_sve_ld1_pn_x4: {
5114	if (VT == MVT::nxv16i8) {
5115	if (Subtarget->hasSME2())
5116	SelectContiguousMultiVectorLoad(
5117	N: Node, NumVecs: `4`, Scale: `0`, Opc_ri: AArch64::LD1B_4Z_IMM_PSEUDO, Opc_rr: AArch64::LD1B_4Z_PSEUDO);
5118	else if (Subtarget->hasSVE2p1())
5119	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `0`, Opc_ri: AArch64::LD1B_4Z_IMM,
5120	Opc_rr: AArch64::LD1B_4Z);
5121	else
5122	break;
5123	return;
5124	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5125	VT == MVT::nxv8bf16) {
5126	if (Subtarget->hasSME2())
5127	SelectContiguousMultiVectorLoad(
5128	N: Node, NumVecs: `4`, Scale: `1`, Opc_ri: AArch64::LD1H_4Z_IMM_PSEUDO, Opc_rr: AArch64::LD1H_4Z_PSEUDO);
5129	else if (Subtarget->hasSVE2p1())
5130	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `1`, Opc_ri: AArch64::LD1H_4Z_IMM,
5131	Opc_rr: AArch64::LD1H_4Z);
5132	else
5133	break;
5134	return;
5135	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5136	if (Subtarget->hasSME2())
5137	SelectContiguousMultiVectorLoad(
5138	N: Node, NumVecs: `4`, Scale: `2`, Opc_ri: AArch64::LD1W_4Z_IMM_PSEUDO, Opc_rr: AArch64::LD1W_4Z_PSEUDO);
5139	else if (Subtarget->hasSVE2p1())
5140	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `2`, Opc_ri: AArch64::LD1W_4Z_IMM,
5141	Opc_rr: AArch64::LD1W_4Z);
5142	else
5143	break;
5144	return;
5145	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5146	if (Subtarget->hasSME2())
5147	SelectContiguousMultiVectorLoad(
5148	N: Node, NumVecs: `4`, Scale: `3`, Opc_ri: AArch64::LD1D_4Z_IMM_PSEUDO, Opc_rr: AArch64::LD1D_4Z_PSEUDO);
5149	else if (Subtarget->hasSVE2p1())
5150	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `3`, Opc_ri: AArch64::LD1D_4Z_IMM,
5151	Opc_rr: AArch64::LD1D_4Z);
5152	else
5153	break;
5154	return;
5155	}
5156	break;
5157	}
5158	case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5159	if (VT == MVT::nxv16i8) {
5160	if (Subtarget->hasSME2())
5161	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `0`,
5162	Opc_ri: AArch64::LDNT1B_2Z_IMM_PSEUDO,
5163	Opc_rr: AArch64::LDNT1B_2Z_PSEUDO);
5164	else if (Subtarget->hasSVE2p1())
5165	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `0`, Opc_ri: AArch64::LDNT1B_2Z_IMM,
5166	Opc_rr: AArch64::LDNT1B_2Z);
5167	else
5168	break;
5169	return;
5170	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5171	VT == MVT::nxv8bf16) {
5172	if (Subtarget->hasSME2())
5173	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `1`,
5174	Opc_ri: AArch64::LDNT1H_2Z_IMM_PSEUDO,
5175	Opc_rr: AArch64::LDNT1H_2Z_PSEUDO);
5176	else if (Subtarget->hasSVE2p1())
5177	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `1`, Opc_ri: AArch64::LDNT1H_2Z_IMM,
5178	Opc_rr: AArch64::LDNT1H_2Z);
5179	else
5180	break;
5181	return;
5182	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5183	if (Subtarget->hasSME2())
5184	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `2`,
5185	Opc_ri: AArch64::LDNT1W_2Z_IMM_PSEUDO,
5186	Opc_rr: AArch64::LDNT1W_2Z_PSEUDO);
5187	else if (Subtarget->hasSVE2p1())
5188	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `2`, Opc_ri: AArch64::LDNT1W_2Z_IMM,
5189	Opc_rr: AArch64::LDNT1W_2Z);
5190	else
5191	break;
5192	return;
5193	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5194	if (Subtarget->hasSME2())
5195	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `3`,
5196	Opc_ri: AArch64::LDNT1D_2Z_IMM_PSEUDO,
5197	Opc_rr: AArch64::LDNT1D_2Z_PSEUDO);
5198	else if (Subtarget->hasSVE2p1())
5199	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `3`, Opc_ri: AArch64::LDNT1D_2Z_IMM,
5200	Opc_rr: AArch64::LDNT1D_2Z);
5201	else
5202	break;
5203	return;
5204	}
5205	break;
5206	}
5207	case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5208	if (VT == MVT::nxv16i8) {
5209	if (Subtarget->hasSME2())
5210	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `0`,
5211	Opc_ri: AArch64::LDNT1B_4Z_IMM_PSEUDO,
5212	Opc_rr: AArch64::LDNT1B_4Z_PSEUDO);
5213	else if (Subtarget->hasSVE2p1())
5214	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `0`, Opc_ri: AArch64::LDNT1B_4Z_IMM,
5215	Opc_rr: AArch64::LDNT1B_4Z);
5216	else
5217	break;
5218	return;
5219	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5220	VT == MVT::nxv8bf16) {
5221	if (Subtarget->hasSME2())
5222	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `1`,
5223	Opc_ri: AArch64::LDNT1H_4Z_IMM_PSEUDO,
5224	Opc_rr: AArch64::LDNT1H_4Z_PSEUDO);
5225	else if (Subtarget->hasSVE2p1())
5226	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `1`, Opc_ri: AArch64::LDNT1H_4Z_IMM,
5227	Opc_rr: AArch64::LDNT1H_4Z);
5228	else
5229	break;
5230	return;
5231	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5232	if (Subtarget->hasSME2())
5233	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `2`,
5234	Opc_ri: AArch64::LDNT1W_4Z_IMM_PSEUDO,
5235	Opc_rr: AArch64::LDNT1W_4Z_PSEUDO);
5236	else if (Subtarget->hasSVE2p1())
5237	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `2`, Opc_ri: AArch64::LDNT1W_4Z_IMM,
5238	Opc_rr: AArch64::LDNT1W_4Z);
5239	else
5240	break;
5241	return;
5242	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5243	if (Subtarget->hasSME2())
5244	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `3`,
5245	Opc_ri: AArch64::LDNT1D_4Z_IMM_PSEUDO,
5246	Opc_rr: AArch64::LDNT1D_4Z_PSEUDO);
5247	else if (Subtarget->hasSVE2p1())
5248	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `3`, Opc_ri: AArch64::LDNT1D_4Z_IMM,
5249	Opc_rr: AArch64::LDNT1D_4Z);
5250	else
5251	break;
5252	return;
5253	}
5254	break;
5255	}
5256	case Intrinsic::aarch64_sve_ld3_sret: {
5257	if (VT == MVT::nxv16i8) {
5258	SelectPredicatedLoad(N: Node, NumVecs: `3`, Scale: `0`, Opc_ri: AArch64::LD3B_IMM, Opc_rr: AArch64::LD3B,
5259	IsIntr: true);
5260	return;
5261	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5262	VT == MVT::nxv8bf16) {
5263	SelectPredicatedLoad(N: Node, NumVecs: `3`, Scale: `1`, Opc_ri: AArch64::LD3H_IMM, Opc_rr: AArch64::LD3H,
5264	IsIntr: true);
5265	return;
5266	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5267	SelectPredicatedLoad(N: Node, NumVecs: `3`, Scale: `2`, Opc_ri: AArch64::LD3W_IMM, Opc_rr: AArch64::LD3W,
5268	IsIntr: true);
5269	return;
5270	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5271	SelectPredicatedLoad(N: Node, NumVecs: `3`, Scale: `3`, Opc_ri: AArch64::LD3D_IMM, Opc_rr: AArch64::LD3D,
5272	IsIntr: true);
5273	return;
5274	}
5275	break;
5276	}
5277	case Intrinsic::aarch64_sve_ld4_sret: {
5278	if (VT == MVT::nxv16i8) {
5279	SelectPredicatedLoad(N: Node, NumVecs: `4`, Scale: `0`, Opc_ri: AArch64::LD4B_IMM, Opc_rr: AArch64::LD4B,
5280	IsIntr: true);
5281	return;
5282	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5283	VT == MVT::nxv8bf16) {
5284	SelectPredicatedLoad(N: Node, NumVecs: `4`, Scale: `1`, Opc_ri: AArch64::LD4H_IMM, Opc_rr: AArch64::LD4H,
5285	IsIntr: true);
5286	return;
5287	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5288	SelectPredicatedLoad(N: Node, NumVecs: `4`, Scale: `2`, Opc_ri: AArch64::LD4W_IMM, Opc_rr: AArch64::LD4W,
5289	IsIntr: true);
5290	return;
5291	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5292	SelectPredicatedLoad(N: Node, NumVecs: `4`, Scale: `3`, Opc_ri: AArch64::LD4D_IMM, Opc_rr: AArch64::LD4D,
5293	IsIntr: true);
5294	return;
5295	}
5296	break;
5297	}
5298	case Intrinsic::aarch64_sme_read_hor_vg2: {
5299	if (VT == MVT::nxv16i8) {
5300	SelectMultiVectorMove<`14`, `2`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZAB0,
5301	Op: AArch64::MOVA_2ZMXI_H_B);
5302	return;
5303	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5304	VT == MVT::nxv8bf16) {
5305	SelectMultiVectorMove<`6`, `2`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZAH0,
5306	Op: AArch64::MOVA_2ZMXI_H_H);
5307	return;
5308	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5309	SelectMultiVectorMove<`2`, `2`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZAS0,
5310	Op: AArch64::MOVA_2ZMXI_H_S);
5311	return;
5312	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5313	SelectMultiVectorMove<`0`, `2`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZAD0,
5314	Op: AArch64::MOVA_2ZMXI_H_D);
5315	return;
5316	}
5317	break;
5318	}
5319	case Intrinsic::aarch64_sme_read_ver_vg2: {
5320	if (VT == MVT::nxv16i8) {
5321	SelectMultiVectorMove<`14`, `2`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZAB0,
5322	Op: AArch64::MOVA_2ZMXI_V_B);
5323	return;
5324	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5325	VT == MVT::nxv8bf16) {
5326	SelectMultiVectorMove<`6`, `2`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZAH0,
5327	Op: AArch64::MOVA_2ZMXI_V_H);
5328	return;
5329	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5330	SelectMultiVectorMove<`2`, `2`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZAS0,
5331	Op: AArch64::MOVA_2ZMXI_V_S);
5332	return;
5333	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5334	SelectMultiVectorMove<`0`, `2`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZAD0,
5335	Op: AArch64::MOVA_2ZMXI_V_D);
5336	return;
5337	}
5338	break;
5339	}
5340	case Intrinsic::aarch64_sme_read_hor_vg4: {
5341	if (VT == MVT::nxv16i8) {
5342	SelectMultiVectorMove<`12`, `4`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZAB0,
5343	Op: AArch64::MOVA_4ZMXI_H_B);
5344	return;
5345	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5346	VT == MVT::nxv8bf16) {
5347	SelectMultiVectorMove<`4`, `4`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZAH0,
5348	Op: AArch64::MOVA_4ZMXI_H_H);
5349	return;
5350	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5351	SelectMultiVectorMove<`0`, `2`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZAS0,
5352	Op: AArch64::MOVA_4ZMXI_H_S);
5353	return;
5354	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5355	SelectMultiVectorMove<`0`, `2`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZAD0,
5356	Op: AArch64::MOVA_4ZMXI_H_D);
5357	return;
5358	}
5359	break;
5360	}
5361	case Intrinsic::aarch64_sme_read_ver_vg4: {
5362	if (VT == MVT::nxv16i8) {
5363	SelectMultiVectorMove<`12`, `4`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZAB0,
5364	Op: AArch64::MOVA_4ZMXI_V_B);
5365	return;
5366	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5367	VT == MVT::nxv8bf16) {
5368	SelectMultiVectorMove<`4`, `4`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZAH0,
5369	Op: AArch64::MOVA_4ZMXI_V_H);
5370	return;
5371	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5372	SelectMultiVectorMove<`0`, `4`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZAS0,
5373	Op: AArch64::MOVA_4ZMXI_V_S);
5374	return;
5375	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5376	SelectMultiVectorMove<`0`, `4`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZAD0,
5377	Op: AArch64::MOVA_4ZMXI_V_D);
5378	return;
5379	}
5380	break;
5381	}
5382	case Intrinsic::aarch64_sme_read_vg1x2: {
5383	SelectMultiVectorMove<`7`, `1`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZA,
5384	Op: AArch64::MOVA_VG2_2ZMXI);
5385	return;
5386	}
5387	case Intrinsic::aarch64_sme_read_vg1x4: {
5388	SelectMultiVectorMove<`7`, `1`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZA,
5389	Op: AArch64::MOVA_VG4_4ZMXI);
5390	return;
5391	}
5392	case Intrinsic::aarch64_sme_readz_horiz_x2: {
5393	if (VT == MVT::nxv16i8) {
5394	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_2ZMI_H_B_PSEUDO, MaxIdx: `14`, Scale: `2`);
5395	return;
5396	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5397	VT == MVT::nxv8bf16) {
5398	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_2ZMI_H_H_PSEUDO, MaxIdx: `6`, Scale: `2`);
5399	return;
5400	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5401	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_2ZMI_H_S_PSEUDO, MaxIdx: `2`, Scale: `2`);
5402	return;
5403	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5404	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_2ZMI_H_D_PSEUDO, MaxIdx: `0`, Scale: `2`);
5405	return;
5406	}
5407	break;
5408	}
5409	case Intrinsic::aarch64_sme_readz_vert_x2: {
5410	if (VT == MVT::nxv16i8) {
5411	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_2ZMI_V_B_PSEUDO, MaxIdx: `14`, Scale: `2`);
5412	return;
5413	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5414	VT == MVT::nxv8bf16) {
5415	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_2ZMI_V_H_PSEUDO, MaxIdx: `6`, Scale: `2`);
5416	return;
5417	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5418	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_2ZMI_V_S_PSEUDO, MaxIdx: `2`, Scale: `2`);
5419	return;
5420	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5421	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_2ZMI_V_D_PSEUDO, MaxIdx: `0`, Scale: `2`);
5422	return;
5423	}
5424	break;
5425	}
5426	case Intrinsic::aarch64_sme_readz_horiz_x4: {
5427	if (VT == MVT::nxv16i8) {
5428	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_4ZMI_H_B_PSEUDO, MaxIdx: `12`, Scale: `4`);
5429	return;
5430	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5431	VT == MVT::nxv8bf16) {
5432	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_4ZMI_H_H_PSEUDO, MaxIdx: `4`, Scale: `4`);
5433	return;
5434	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5435	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_4ZMI_H_S_PSEUDO, MaxIdx: `0`, Scale: `4`);
5436	return;
5437	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5438	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_4ZMI_H_D_PSEUDO, MaxIdx: `0`, Scale: `4`);
5439	return;
5440	}
5441	break;
5442	}
5443	case Intrinsic::aarch64_sme_readz_vert_x4: {
5444	if (VT == MVT::nxv16i8) {
5445	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_4ZMI_V_B_PSEUDO, MaxIdx: `12`, Scale: `4`);
5446	return;
5447	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5448	VT == MVT::nxv8bf16) {
5449	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_4ZMI_V_H_PSEUDO, MaxIdx: `4`, Scale: `4`);
5450	return;
5451	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5452	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_4ZMI_V_S_PSEUDO, MaxIdx: `0`, Scale: `4`);
5453	return;
5454	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5455	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_4ZMI_V_D_PSEUDO, MaxIdx: `0`, Scale: `4`);
5456	return;
5457	}
5458	break;
5459	}
5460	case Intrinsic::aarch64_sme_readz_x2: {
5461	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, MaxIdx: `7`, Scale: `1`,
5462	BaseReg: AArch64::ZA);
5463	return;
5464	}
5465	case Intrinsic::aarch64_sme_readz_x4: {
5466	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, MaxIdx: `7`, Scale: `1`,
5467	BaseReg: AArch64::ZA);
5468	return;
5469	}
5470	case Intrinsic::swift_async_context_addr: {
5471	SDLoc DL(Node);
5472	SDValue Chain = Node->getOperand(Num: `0`);
5473	SDValue CopyFP = CurDAG->getCopyFromReg(Chain, dl: DL, Reg: AArch64::FP, VT: MVT::i64);
5474	SDValue Res = SDValue (
5475	CurDAG->getMachineNode(Opcode: AArch64::SUBXri, dl: DL, VT: MVT::i64, Op1: CopyFP,
5476	Op2: CurDAG->getTargetConstant(Val: `8`, DL, VT: MVT::i32),
5477	Op3: CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i32)),
5478	`0`);
5479	ReplaceUses(F: SDValue (Node, `0`), T: Res);
5480	ReplaceUses(F: SDValue (Node, `1`), T: CopyFP.getValue(R: `1`));
5481	CurDAG->RemoveDeadNode(N: Node);
5482
5483	auto &MF = CurDAG->getMachineFunction();
5484	MF.getFrameInfo().setFrameAddressIsTaken(true);
5485	MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5486	return;
5487	}
5488	case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5489	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5490	VT: Node->getValueType(ResNo: `0`),
5491	Opcodes: {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5492	AArch64::LUTI2_4ZTZI_S}))
5493	// Second Immediate must be <= 3:
5494	SelectMultiVectorLuti(Node, NumOutVecs: `4`, Opc, MaxImm: `3`);
5495	return;
5496	}
5497	case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5498	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5499	VT: Node->getValueType(ResNo: `0`),
5500	Opcodes: {`0`, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5501	// Second Immediate must be <= 1:
5502	SelectMultiVectorLuti(Node, NumOutVecs: `4`, Opc, MaxImm: `1`);
5503	return;
5504	}
5505	case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5506	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5507	VT: Node->getValueType(ResNo: `0`),
5508	Opcodes: {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5509	AArch64::LUTI2_2ZTZI_S}))
5510	// Second Immediate must be <= 7:
5511	SelectMultiVectorLuti(Node, NumOutVecs: `2`, Opc, MaxImm: `7`);
5512	return;
5513	}
5514	case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5515	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5516	VT: Node->getValueType(ResNo: `0`),
5517	Opcodes: {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5518	AArch64::LUTI4_2ZTZI_S}))
5519	// Second Immediate must be <= 3:
5520	SelectMultiVectorLuti(Node, NumOutVecs: `2`, Opc, MaxImm: `3`);
5521	return;
5522	}
5523	}
5524	} break;
5525	case ISD::INTRINSIC_WO_CHAIN: {
5526	unsigned IntNo = Node->getConstantOperandVal(Num: `0`);
5527	switch (IntNo) {
5528	default:
5529	break;
5530	case Intrinsic::aarch64_tagp:
5531	SelectTagP(N: Node);
5532	return;
5533
5534	case Intrinsic::ptrauth_auth:
5535	SelectPtrauthAuth(N: Node);
5536	return;
5537
5538	case Intrinsic::ptrauth_resign:
5539	SelectPtrauthResign(N: Node);
5540	return;
5541
5542	case Intrinsic::aarch64_neon_tbl2:
5543	SelectTable(N: Node, NumVecs: `2`,
5544	Opc: VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5545	isExt: false);
5546	return;
5547	case Intrinsic::aarch64_neon_tbl3:
5548	SelectTable(N: Node, NumVecs: `3`, Opc: VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5549	: AArch64::TBLv16i8Three,
5550	isExt: false);
5551	return;
5552	case Intrinsic::aarch64_neon_tbl4:
5553	SelectTable(N: Node, NumVecs: `4`, Opc: VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5554	: AArch64::TBLv16i8Four,
5555	isExt: false);
5556	return;
5557	case Intrinsic::aarch64_neon_tbx2:
5558	SelectTable(N: Node, NumVecs: `2`,
5559	Opc: VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5560	isExt: true);
5561	return;
5562	case Intrinsic::aarch64_neon_tbx3:
5563	SelectTable(N: Node, NumVecs: `3`, Opc: VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5564	: AArch64::TBXv16i8Three,
5565	isExt: true);
5566	return;
5567	case Intrinsic::aarch64_neon_tbx4:
5568	SelectTable(N: Node, NumVecs: `4`, Opc: VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5569	: AArch64::TBXv16i8Four,
5570	isExt: true);
5571	return;
5572	case Intrinsic::aarch64_sve_srshl_single_x2:
5573	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5574	VT: Node->getValueType(ResNo: `0`),
5575	Opcodes: {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5576	AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5577	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5578	return;
5579	case Intrinsic::aarch64_sve_srshl_single_x4:
5580	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5581	VT: Node->getValueType(ResNo: `0`),
5582	Opcodes: {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5583	AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5584	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5585	return;
5586	case Intrinsic::aarch64_sve_urshl_single_x2:
5587	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5588	VT: Node->getValueType(ResNo: `0`),
5589	Opcodes: {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5590	AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5591	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5592	return;
5593	case Intrinsic::aarch64_sve_urshl_single_x4:
5594	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5595	VT: Node->getValueType(ResNo: `0`),
5596	Opcodes: {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5597	AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5598	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5599	return;
5600	case Intrinsic::aarch64_sve_srshl_x2:
5601	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5602	VT: Node->getValueType(ResNo: `0`),
5603	Opcodes: {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5604	AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5605	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5606	return;
5607	case Intrinsic::aarch64_sve_srshl_x4:
5608	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5609	VT: Node->getValueType(ResNo: `0`),
5610	Opcodes: {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5611	AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5612	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5613	return;
5614	case Intrinsic::aarch64_sve_urshl_x2:
5615	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5616	VT: Node->getValueType(ResNo: `0`),
5617	Opcodes: {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5618	AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5619	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5620	return;
5621	case Intrinsic::aarch64_sve_urshl_x4:
5622	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5623	VT: Node->getValueType(ResNo: `0`),
5624	Opcodes: {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5625	AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5626	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5627	return;
5628	case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5629	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5630	VT: Node->getValueType(ResNo: `0`),
5631	Opcodes: {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5632	AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5633	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5634	return;
5635	case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5636	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5637	VT: Node->getValueType(ResNo: `0`),
5638	Opcodes: {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5639	AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5640	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5641	return;
5642	case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5643	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5644	VT: Node->getValueType(ResNo: `0`),
5645	Opcodes: {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5646	AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5647	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5648	return;
5649	case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5650	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5651	VT: Node->getValueType(ResNo: `0`),
5652	Opcodes: {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5653	AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5654	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5655	return;
5656	case Intrinsic::aarch64_sve_whilege_x2:
5657	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5658	VT: Node->getValueType(ResNo: `0`),
5659	Opcodes: {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5660	AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5661	SelectWhilePair(N: Node, Opc: Op);
5662	return;
5663	case Intrinsic::aarch64_sve_whilegt_x2:
5664	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5665	VT: Node->getValueType(ResNo: `0`),
5666	Opcodes: {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5667	AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5668	SelectWhilePair(N: Node, Opc: Op);
5669	return;
5670	case Intrinsic::aarch64_sve_whilehi_x2:
5671	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5672	VT: Node->getValueType(ResNo: `0`),
5673	Opcodes: {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5674	AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5675	SelectWhilePair(N: Node, Opc: Op);
5676	return;
5677	case Intrinsic::aarch64_sve_whilehs_x2:
5678	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5679	VT: Node->getValueType(ResNo: `0`),
5680	Opcodes: {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5681	AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5682	SelectWhilePair(N: Node, Opc: Op);
5683	return;
5684	case Intrinsic::aarch64_sve_whilele_x2:
5685	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5686	VT: Node->getValueType(ResNo: `0`),
5687	Opcodes: {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5688	AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5689	SelectWhilePair(N: Node, Opc: Op);
5690	return;
5691	case Intrinsic::aarch64_sve_whilelo_x2:
5692	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5693	VT: Node->getValueType(ResNo: `0`),
5694	Opcodes: {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5695	AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5696	SelectWhilePair(N: Node, Opc: Op);
5697	return;
5698	case Intrinsic::aarch64_sve_whilels_x2:
5699	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5700	VT: Node->getValueType(ResNo: `0`),
5701	Opcodes: {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5702	AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5703	SelectWhilePair(N: Node, Opc: Op);
5704	return;
5705	case Intrinsic::aarch64_sve_whilelt_x2:
5706	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5707	VT: Node->getValueType(ResNo: `0`),
5708	Opcodes: {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5709	AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5710	SelectWhilePair(N: Node, Opc: Op);
5711	return;
5712	case Intrinsic::aarch64_sve_smax_single_x2:
5713	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5714	VT: Node->getValueType(ResNo: `0`),
5715	Opcodes: {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5716	AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5717	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5718	return;
5719	case Intrinsic::aarch64_sve_umax_single_x2:
5720	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5721	VT: Node->getValueType(ResNo: `0`),
5722	Opcodes: {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5723	AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5724	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5725	return;
5726	case Intrinsic::aarch64_sve_fmax_single_x2:
5727	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5728	VT: Node->getValueType(ResNo: `0`),
5729	Opcodes: {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
5730	AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
5731	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5732	return;
5733	case Intrinsic::aarch64_sve_smax_single_x4:
5734	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5735	VT: Node->getValueType(ResNo: `0`),
5736	Opcodes: {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
5737	AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
5738	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5739	return;
5740	case Intrinsic::aarch64_sve_umax_single_x4:
5741	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5742	VT: Node->getValueType(ResNo: `0`),
5743	Opcodes: {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
5744	AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
5745	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5746	return;
5747	case Intrinsic::aarch64_sve_fmax_single_x4:
5748	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5749	VT: Node->getValueType(ResNo: `0`),
5750	Opcodes: {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
5751	AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
5752	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5753	return;
5754	case Intrinsic::aarch64_sve_smin_single_x2:
5755	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5756	VT: Node->getValueType(ResNo: `0`),
5757	Opcodes: {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
5758	AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
5759	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5760	return;
5761	case Intrinsic::aarch64_sve_umin_single_x2:
5762	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5763	VT: Node->getValueType(ResNo: `0`),
5764	Opcodes: {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
5765	AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
5766	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5767	return;
5768	case Intrinsic::aarch64_sve_fmin_single_x2:
5769	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5770	VT: Node->getValueType(ResNo: `0`),
5771	Opcodes: {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
5772	AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
5773	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5774	return;
5775	case Intrinsic::aarch64_sve_smin_single_x4:
5776	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5777	VT: Node->getValueType(ResNo: `0`),
5778	Opcodes: {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
5779	AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
5780	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5781	return;
5782	case Intrinsic::aarch64_sve_umin_single_x4:
5783	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5784	VT: Node->getValueType(ResNo: `0`),
5785	Opcodes: {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
5786	AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
5787	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5788	return;
5789	case Intrinsic::aarch64_sve_fmin_single_x4:
5790	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5791	VT: Node->getValueType(ResNo: `0`),
5792	Opcodes: {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
5793	AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
5794	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5795	return;
5796	case Intrinsic::aarch64_sve_smax_x2:
5797	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5798	VT: Node->getValueType(ResNo: `0`),
5799	Opcodes: {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
5800	AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
5801	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5802	return;
5803	case Intrinsic::aarch64_sve_umax_x2:
5804	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5805	VT: Node->getValueType(ResNo: `0`),
5806	Opcodes: {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
5807	AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
5808	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5809	return;
5810	case Intrinsic::aarch64_sve_fmax_x2:
5811	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5812	VT: Node->getValueType(ResNo: `0`),
5813	Opcodes: {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
5814	AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
5815	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5816	return;
5817	case Intrinsic::aarch64_sve_smax_x4:
5818	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5819	VT: Node->getValueType(ResNo: `0`),
5820	Opcodes: {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
5821	AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
5822	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5823	return;
5824	case Intrinsic::aarch64_sve_umax_x4:
5825	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5826	VT: Node->getValueType(ResNo: `0`),
5827	Opcodes: {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
5828	AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
5829	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5830	return;
5831	case Intrinsic::aarch64_sve_fmax_x4:
5832	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5833	VT: Node->getValueType(ResNo: `0`),
5834	Opcodes: {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
5835	AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
5836	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5837	return;
5838	case Intrinsic::aarch64_sve_smin_x2:
5839	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5840	VT: Node->getValueType(ResNo: `0`),
5841	Opcodes: {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
5842	AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
5843	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5844	return;
5845	case Intrinsic::aarch64_sve_umin_x2:
5846	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5847	VT: Node->getValueType(ResNo: `0`),
5848	Opcodes: {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
5849	AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
5850	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5851	return;
5852	case Intrinsic::aarch64_sve_fmin_x2:
5853	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5854	VT: Node->getValueType(ResNo: `0`),
5855	Opcodes: {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
5856	AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
5857	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5858	return;
5859	case Intrinsic::aarch64_sve_smin_x4:
5860	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5861	VT: Node->getValueType(ResNo: `0`),
5862	Opcodes: {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
5863	AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
5864	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5865	return;
5866	case Intrinsic::aarch64_sve_umin_x4:
5867	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5868	VT: Node->getValueType(ResNo: `0`),
5869	Opcodes: {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
5870	AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
5871	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5872	return;
5873	case Intrinsic::aarch64_sve_fmin_x4:
5874	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5875	VT: Node->getValueType(ResNo: `0`),
5876	Opcodes: {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
5877	AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
5878	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5879	return;
5880	case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
5881	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5882	VT: Node->getValueType(ResNo: `0`),
5883	Opcodes: {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
5884	AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
5885	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5886	return;
5887	case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
5888	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5889	VT: Node->getValueType(ResNo: `0`),
5890	Opcodes: {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
5891	AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
5892	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5893	return;
5894	case Intrinsic::aarch64_sve_fminnm_single_x2:
5895	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5896	VT: Node->getValueType(ResNo: `0`),
5897	Opcodes: {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
5898	AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
5899	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5900	return;
5901	case Intrinsic::aarch64_sve_fminnm_single_x4:
5902	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5903	VT: Node->getValueType(ResNo: `0`),
5904	Opcodes: {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
5905	AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
5906	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5907	return;
5908	case Intrinsic::aarch64_sve_fmaxnm_x2:
5909	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5910	VT: Node->getValueType(ResNo: `0`),
5911	Opcodes: {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
5912	AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
5913	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5914	return;
5915	case Intrinsic::aarch64_sve_fmaxnm_x4:
5916	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5917	VT: Node->getValueType(ResNo: `0`),
5918	Opcodes: {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
5919	AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
5920	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5921	return;
5922	case Intrinsic::aarch64_sve_fminnm_x2:
5923	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5924	VT: Node->getValueType(ResNo: `0`),
5925	Opcodes: {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
5926	AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
5927	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
5928	return;
5929	case Intrinsic::aarch64_sve_fminnm_x4:
5930	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5931	VT: Node->getValueType(ResNo: `0`),
5932	Opcodes: {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
5933	AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
5934	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
5935	return;
5936	case Intrinsic::aarch64_sve_fcvtzs_x2:
5937	SelectCVTIntrinsic(N: Node, NumVecs: `2`, Opcode: AArch64::FCVTZS_2Z2Z_StoS);
5938	return;
5939	case Intrinsic::aarch64_sve_scvtf_x2:
5940	SelectCVTIntrinsic(N: Node, NumVecs: `2`, Opcode: AArch64::SCVTF_2Z2Z_StoS);
5941	return;
5942	case Intrinsic::aarch64_sve_fcvtzu_x2:
5943	SelectCVTIntrinsic(N: Node, NumVecs: `2`, Opcode: AArch64::FCVTZU_2Z2Z_StoS);
5944	return;
5945	case Intrinsic::aarch64_sve_ucvtf_x2:
5946	SelectCVTIntrinsic(N: Node, NumVecs: `2`, Opcode: AArch64::UCVTF_2Z2Z_StoS);
5947	return;
5948	case Intrinsic::aarch64_sve_fcvtzs_x4:
5949	SelectCVTIntrinsic(N: Node, NumVecs: `4`, Opcode: AArch64::FCVTZS_4Z4Z_StoS);
5950	return;
5951	case Intrinsic::aarch64_sve_scvtf_x4:
5952	SelectCVTIntrinsic(N: Node, NumVecs: `4`, Opcode: AArch64::SCVTF_4Z4Z_StoS);
5953	return;
5954	case Intrinsic::aarch64_sve_fcvtzu_x4:
5955	SelectCVTIntrinsic(N: Node, NumVecs: `4`, Opcode: AArch64::FCVTZU_4Z4Z_StoS);
5956	return;
5957	case Intrinsic::aarch64_sve_ucvtf_x4:
5958	SelectCVTIntrinsic(N: Node, NumVecs: `4`, Opcode: AArch64::UCVTF_4Z4Z_StoS);
5959	return;
5960	case Intrinsic::aarch64_sve_fcvt_widen_x2:
5961	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, IsTupleInput: false, Opc: AArch64::FCVT_2ZZ_H_S);
5962	return;
5963	case Intrinsic::aarch64_sve_fcvtl_widen_x2:
5964	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, IsTupleInput: false, Opc: AArch64::FCVTL_2ZZ_H_S);
5965	return;
5966	case Intrinsic::aarch64_sve_sclamp_single_x2:
5967	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5968	VT: Node->getValueType(ResNo: `0`),
5969	Opcodes: {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
5970	AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
5971	SelectClamp(N: Node, NumVecs: `2`, Op);
5972	return;
5973	case Intrinsic::aarch64_sve_uclamp_single_x2:
5974	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5975	VT: Node->getValueType(ResNo: `0`),
5976	Opcodes: {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
5977	AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
5978	SelectClamp(N: Node, NumVecs: `2`, Op);
5979	return;
5980	case Intrinsic::aarch64_sve_fclamp_single_x2:
5981	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5982	VT: Node->getValueType(ResNo: `0`),
5983	Opcodes: {`0`, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
5984	AArch64::FCLAMP_VG2_2Z2Z_D}))
5985	SelectClamp(N: Node, NumVecs: `2`, Op);
5986	return;
5987	case Intrinsic::aarch64_sve_bfclamp_single_x2:
5988	SelectClamp(N: Node, NumVecs: `2`, Op: AArch64::BFCLAMP_VG2_2ZZZ_H);
5989	return;
5990	case Intrinsic::aarch64_sve_sclamp_single_x4:
5991	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5992	VT: Node->getValueType(ResNo: `0`),
5993	Opcodes: {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
5994	AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
5995	SelectClamp(N: Node, NumVecs: `4`, Op);
5996	return;
5997	case Intrinsic::aarch64_sve_uclamp_single_x4:
5998	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5999	VT: Node->getValueType(ResNo: `0`),
6000	Opcodes: {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6001	AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6002	SelectClamp(N: Node, NumVecs: `4`, Op);
6003	return;
6004	case Intrinsic::aarch64_sve_fclamp_single_x4:
6005	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6006	VT: Node->getValueType(ResNo: `0`),
6007	Opcodes: {`0`, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6008	AArch64::FCLAMP_VG4_4Z4Z_D}))
6009	SelectClamp(N: Node, NumVecs: `4`, Op);
6010	return;
6011	case Intrinsic::aarch64_sve_bfclamp_single_x4:
6012	SelectClamp(N: Node, NumVecs: `4`, Op: AArch64::BFCLAMP_VG4_4ZZZ_H);
6013	return;
6014	case Intrinsic::aarch64_sve_add_single_x2:
6015	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6016	VT: Node->getValueType(ResNo: `0`),
6017	Opcodes: {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6018	AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6019	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
6020	return;
6021	case Intrinsic::aarch64_sve_add_single_x4:
6022	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6023	VT: Node->getValueType(ResNo: `0`),
6024	Opcodes: {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6025	AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6026	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
6027	return;
6028	case Intrinsic::aarch64_sve_zip_x2:
6029	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6030	VT: Node->getValueType(ResNo: `0`),
6031	Opcodes: {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6032	AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6033	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, /IsTupleInput=/false, Opc: Op);
6034	return;
6035	case Intrinsic::aarch64_sve_zipq_x2:
6036	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, /IsTupleInput=/false,
6037	Opc: AArch64::ZIP_VG2_2ZZZ_Q);
6038	return;
6039	case Intrinsic::aarch64_sve_zip_x4:
6040	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6041	VT: Node->getValueType(ResNo: `0`),
6042	Opcodes: {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6043	AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6044	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `4`, /IsTupleInput=/true, Opc: Op);
6045	return;
6046	case Intrinsic::aarch64_sve_zipq_x4:
6047	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `4`, /IsTupleInput=/true,
6048	Opc: AArch64::ZIP_VG4_4Z4Z_Q);
6049	return;
6050	case Intrinsic::aarch64_sve_uzp_x2:
6051	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6052	VT: Node->getValueType(ResNo: `0`),
6053	Opcodes: {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6054	AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6055	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, /IsTupleInput=/false, Opc: Op);
6056	return;
6057	case Intrinsic::aarch64_sve_uzpq_x2:
6058	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, /IsTupleInput=/false,
6059	Opc: AArch64::UZP_VG2_2ZZZ_Q);
6060	return;
6061	case Intrinsic::aarch64_sve_uzp_x4:
6062	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6063	VT: Node->getValueType(ResNo: `0`),
6064	Opcodes: {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6065	AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6066	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `4`, /IsTupleInput=/true, Opc: Op);
6067	return;
6068	case Intrinsic::aarch64_sve_uzpq_x4:
6069	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `4`, /IsTupleInput=/true,
6070	Opc: AArch64::UZP_VG4_4Z4Z_Q);
6071	return;
6072	case Intrinsic::aarch64_sve_sel_x2:
6073	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6074	VT: Node->getValueType(ResNo: `0`),
6075	Opcodes: {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6076	AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6077	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op, /HasPred=/true);
6078	return;
6079	case Intrinsic::aarch64_sve_sel_x4:
6080	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6081	VT: Node->getValueType(ResNo: `0`),
6082	Opcodes: {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6083	AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6084	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op, /HasPred=/true);
6085	return;
6086	case Intrinsic::aarch64_sve_frinta_x2:
6087	SelectFrintFromVT(N: Node, NumVecs: `2`, Opcode: AArch64::FRINTA_2Z2Z_S);
6088	return;
6089	case Intrinsic::aarch64_sve_frinta_x4:
6090	SelectFrintFromVT(N: Node, NumVecs: `4`, Opcode: AArch64::FRINTA_4Z4Z_S);
6091	return;
6092	case Intrinsic::aarch64_sve_frintm_x2:
6093	SelectFrintFromVT(N: Node, NumVecs: `2`, Opcode: AArch64::FRINTM_2Z2Z_S);
6094	return;
6095	case Intrinsic::aarch64_sve_frintm_x4:
6096	SelectFrintFromVT(N: Node, NumVecs: `4`, Opcode: AArch64::FRINTM_4Z4Z_S);
6097	return;
6098	case Intrinsic::aarch64_sve_frintn_x2:
6099	SelectFrintFromVT(N: Node, NumVecs: `2`, Opcode: AArch64::FRINTN_2Z2Z_S);
6100	return;
6101	case Intrinsic::aarch64_sve_frintn_x4:
6102	SelectFrintFromVT(N: Node, NumVecs: `4`, Opcode: AArch64::FRINTN_4Z4Z_S);
6103	return;
6104	case Intrinsic::aarch64_sve_frintp_x2:
6105	SelectFrintFromVT(N: Node, NumVecs: `2`, Opcode: AArch64::FRINTP_2Z2Z_S);
6106	return;
6107	case Intrinsic::aarch64_sve_frintp_x4:
6108	SelectFrintFromVT(N: Node, NumVecs: `4`, Opcode: AArch64::FRINTP_4Z4Z_S);
6109	return;
6110	case Intrinsic::aarch64_sve_sunpk_x2:
6111	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6112	VT: Node->getValueType(ResNo: `0`),
6113	Opcodes: {`0`, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6114	AArch64::SUNPK_VG2_2ZZ_D}))
6115	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, /IsTupleInput=/false, Opc: Op);
6116	return;
6117	case Intrinsic::aarch64_sve_uunpk_x2:
6118	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6119	VT: Node->getValueType(ResNo: `0`),
6120	Opcodes: {`0`, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6121	AArch64::UUNPK_VG2_2ZZ_D}))
6122	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, /IsTupleInput=/false, Opc: Op);
6123	return;
6124	case Intrinsic::aarch64_sve_sunpk_x4:
6125	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6126	VT: Node->getValueType(ResNo: `0`),
6127	Opcodes: {`0`, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6128	AArch64::SUNPK_VG4_4Z2Z_D}))
6129	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `4`, /IsTupleInput=/true, Opc: Op);
6130	return;
6131	case Intrinsic::aarch64_sve_uunpk_x4:
6132	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6133	VT: Node->getValueType(ResNo: `0`),
6134	Opcodes: {`0`, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6135	AArch64::UUNPK_VG4_4Z2Z_D}))
6136	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `4`, /IsTupleInput=/true, Opc: Op);
6137	return;
6138	case Intrinsic::aarch64_sve_pext_x2: {
6139	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6140	VT: Node->getValueType(ResNo: `0`),
6141	Opcodes: {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6142	AArch64::PEXT_2PCI_D}))
6143	SelectPExtPair(N: Node, Opc: Op);
6144	return;
6145	}
6146	}
6147	break;
6148	}
6149	case ISD::INTRINSIC_VOID: {
6150	unsigned IntNo = Node->getConstantOperandVal(Num: `1`);
6151	if (Node->getNumOperands() >= `3`)
6152	VT = Node->getOperand(Num: `2`)->getValueType(ResNo: `0`);
6153	switch (IntNo) {
6154	default:
6155	break;
6156	case Intrinsic::aarch64_neon_st1x2: {
6157	if (VT == MVT::v8i8) {
6158	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov8b);
6159	return;
6160	} else if (VT == MVT::v16i8) {
6161	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov16b);
6162	return;
6163	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6164	VT == MVT::v4bf16) {
6165	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov4h);
6166	return;
6167	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
6168	VT == MVT::v8bf16) {
6169	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov8h);
6170	return;
6171	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6172	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov2s);
6173	return;
6174	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6175	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov4s);
6176	return;
6177	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6178	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov2d);
6179	return;
6180	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6181	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov1d);
6182	return;
6183	}
6184	break;
6185	}
6186	case Intrinsic::aarch64_neon_st1x3: {
6187	if (VT == MVT::v8i8) {
6188	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev8b);
6189	return;
6190	} else if (VT == MVT::v16i8) {
6191	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev16b);
6192	return;
6193	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6194	VT == MVT::v4bf16) {
6195	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev4h);
6196	return;
6197	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
6198	VT == MVT::v8bf16) {
6199	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev8h);
6200	return;
6201	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6202	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev2s);
6203	return;
6204	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6205	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev4s);
6206	return;
6207	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6208	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev2d);
6209	return;
6210	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6211	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev1d);
6212	return;
6213	}
6214	break;
6215	}
6216	case Intrinsic::aarch64_neon_st1x4: {
6217	if (VT == MVT::v8i8) {
6218	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv8b);
6219	return;
6220	} else if (VT == MVT::v16i8) {
6221	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv16b);
6222	return;
6223	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6224	VT == MVT::v4bf16) {
6225	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv4h);
6226	return;
6227	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
6228	VT == MVT::v8bf16) {
6229	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv8h);
6230	return;
6231	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6232	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv2s);
6233	return;
6234	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6235	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv4s);
6236	return;
6237	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6238	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv2d);
6239	return;
6240	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6241	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv1d);
6242	return;
6243	}
6244	break;
6245	}
6246	case Intrinsic::aarch64_neon_st2: {
6247	if (VT == MVT::v8i8) {
6248	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov8b);
6249	return;
6250	} else if (VT == MVT::v16i8) {
6251	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov16b);
6252	return;
6253	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6254	VT == MVT::v4bf16) {
6255	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov4h);
6256	return;
6257	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
6258	VT == MVT::v8bf16) {
6259	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov8h);
6260	return;
6261	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6262	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov2s);
6263	return;
6264	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6265	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov4s);
6266	return;
6267	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6268	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov2d);
6269	return;
6270	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6271	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov1d);
6272	return;
6273	}
6274	break;
6275	}
6276	case Intrinsic::aarch64_neon_st3: {
6277	if (VT == MVT::v8i8) {
6278	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev8b);
6279	return;
6280	} else if (VT == MVT::v16i8) {
6281	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev16b);
6282	return;
6283	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6284	VT == MVT::v4bf16) {
6285	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev4h);
6286	return;
6287	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
6288	VT == MVT::v8bf16) {
6289	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev8h);
6290	return;
6291	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6292	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev2s);
6293	return;
6294	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6295	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev4s);
6296	return;
6297	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6298	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev2d);
6299	return;
6300	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6301	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev1d);
6302	return;
6303	}
6304	break;
6305	}
6306	case Intrinsic::aarch64_neon_st4: {
6307	if (VT == MVT::v8i8) {
6308	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv8b);
6309	return;
6310	} else if (VT == MVT::v16i8) {
6311	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv16b);
6312	return;
6313	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6314	VT == MVT::v4bf16) {
6315	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv4h);
6316	return;
6317	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
6318	VT == MVT::v8bf16) {
6319	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv8h);
6320	return;
6321	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6322	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv2s);
6323	return;
6324	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6325	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv4s);
6326	return;
6327	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6328	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv2d);
6329	return;
6330	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6331	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv1d);
6332	return;
6333	}
6334	break;
6335	}
6336	case Intrinsic::aarch64_neon_st2lane: {
6337	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6338	SelectStoreLane(N: Node, NumVecs: `2`, Opc: AArch64::ST2i8);
6339	return;
6340	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6341	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6342	SelectStoreLane(N: Node, NumVecs: `2`, Opc: AArch64::ST2i16);
6343	return;
6344	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6345	VT == MVT::v2f32) {
6346	SelectStoreLane(N: Node, NumVecs: `2`, Opc: AArch64::ST2i32);
6347	return;
6348	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6349	VT == MVT::v1f64) {
6350	SelectStoreLane(N: Node, NumVecs: `2`, Opc: AArch64::ST2i64);
6351	return;
6352	}
6353	break;
6354	}
6355	case Intrinsic::aarch64_neon_st3lane: {
6356	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6357	SelectStoreLane(N: Node, NumVecs: `3`, Opc: AArch64::ST3i8);
6358	return;
6359	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6360	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6361	SelectStoreLane(N: Node, NumVecs: `3`, Opc: AArch64::ST3i16);
6362	return;
6363	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6364	VT == MVT::v2f32) {
6365	SelectStoreLane(N: Node, NumVecs: `3`, Opc: AArch64::ST3i32);
6366	return;
6367	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6368	VT == MVT::v1f64) {
6369	SelectStoreLane(N: Node, NumVecs: `3`, Opc: AArch64::ST3i64);
6370	return;
6371	}
6372	break;
6373	}
6374	case Intrinsic::aarch64_neon_st4lane: {
6375	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6376	SelectStoreLane(N: Node, NumVecs: `4`, Opc: AArch64::ST4i8);
6377	return;
6378	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6379	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6380	SelectStoreLane(N: Node, NumVecs: `4`, Opc: AArch64::ST4i16);
6381	return;
6382	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6383	VT == MVT::v2f32) {
6384	SelectStoreLane(N: Node, NumVecs: `4`, Opc: AArch64::ST4i32);
6385	return;
6386	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6387	VT == MVT::v1f64) {
6388	SelectStoreLane(N: Node, NumVecs: `4`, Opc: AArch64::ST4i64);
6389	return;
6390	}
6391	break;
6392	}
6393	case Intrinsic::aarch64_sve_st2q: {
6394	SelectPredicatedStore(N: Node, NumVecs: `2`, Scale: `4`, Opc_rr: AArch64::ST2Q, Opc_ri: AArch64::ST2Q_IMM);
6395	return;
6396	}
6397	case Intrinsic::aarch64_sve_st3q: {
6398	SelectPredicatedStore(N: Node, NumVecs: `3`, Scale: `4`, Opc_rr: AArch64::ST3Q, Opc_ri: AArch64::ST3Q_IMM);
6399	return;
6400	}
6401	case Intrinsic::aarch64_sve_st4q: {
6402	SelectPredicatedStore(N: Node, NumVecs: `4`, Scale: `4`, Opc_rr: AArch64::ST4Q, Opc_ri: AArch64::ST4Q_IMM);
6403	return;
6404	}
6405	case Intrinsic::aarch64_sve_st2: {
6406	if (VT == MVT::nxv16i8) {
6407	SelectPredicatedStore(N: Node, NumVecs: `2`, Scale: `0`, Opc_rr: AArch64::ST2B, Opc_ri: AArch64::ST2B_IMM);
6408	return;
6409	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
6410	VT == MVT::nxv8bf16) {
6411	SelectPredicatedStore(N: Node, NumVecs: `2`, Scale: `1`, Opc_rr: AArch64::ST2H, Opc_ri: AArch64::ST2H_IMM);
6412	return;
6413	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
6414	SelectPredicatedStore(N: Node, NumVecs: `2`, Scale: `2`, Opc_rr: AArch64::ST2W, Opc_ri: AArch64::ST2W_IMM);
6415	return;
6416	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
6417	SelectPredicatedStore(N: Node, NumVecs: `2`, Scale: `3`, Opc_rr: AArch64::ST2D, Opc_ri: AArch64::ST2D_IMM);
6418	return;
6419	}
6420	break;
6421	}
6422	case Intrinsic::aarch64_sve_st3: {
6423	if (VT == MVT::nxv16i8) {
6424	SelectPredicatedStore(N: Node, NumVecs: `3`, Scale: `0`, Opc_rr: AArch64::ST3B, Opc_ri: AArch64::ST3B_IMM);
6425	return;
6426	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
6427	VT == MVT::nxv8bf16) {
6428	SelectPredicatedStore(N: Node, NumVecs: `3`, Scale: `1`, Opc_rr: AArch64::ST3H, Opc_ri: AArch64::ST3H_IMM);
6429	return;
6430	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
6431	SelectPredicatedStore(N: Node, NumVecs: `3`, Scale: `2`, Opc_rr: AArch64::ST3W, Opc_ri: AArch64::ST3W_IMM);
6432	return;
6433	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
6434	SelectPredicatedStore(N: Node, NumVecs: `3`, Scale: `3`, Opc_rr: AArch64::ST3D, Opc_ri: AArch64::ST3D_IMM);
6435	return;
6436	}
6437	break;
6438	}
6439	case Intrinsic::aarch64_sve_st4: {
6440	if (VT == MVT::nxv16i8) {
6441	SelectPredicatedStore(N: Node, NumVecs: `4`, Scale: `0`, Opc_rr: AArch64::ST4B, Opc_ri: AArch64::ST4B_IMM);
6442	return;
6443	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
6444	VT == MVT::nxv8bf16) {
6445	SelectPredicatedStore(N: Node, NumVecs: `4`, Scale: `1`, Opc_rr: AArch64::ST4H, Opc_ri: AArch64::ST4H_IMM);
6446	return;
6447	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
6448	SelectPredicatedStore(N: Node, NumVecs: `4`, Scale: `2`, Opc_rr: AArch64::ST4W, Opc_ri: AArch64::ST4W_IMM);
6449	return;
6450	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
6451	SelectPredicatedStore(N: Node, NumVecs: `4`, Scale: `3`, Opc_rr: AArch64::ST4D, Opc_ri: AArch64::ST4D_IMM);
6452	return;
6453	}
6454	break;
6455	}
6456	}
6457	break;
6458	}
6459	case AArch64ISD::LD2post: {
6460	if (VT == MVT::v8i8) {
6461	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov8b_POST, SubRegIdx: AArch64::dsub0);
6462	return;
6463	} else if (VT == MVT::v16i8) {
6464	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov16b_POST, SubRegIdx: AArch64::qsub0);
6465	return;
6466	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6467	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov4h_POST, SubRegIdx: AArch64::dsub0);
6468	return;
6469	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6470	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov8h_POST, SubRegIdx: AArch64::qsub0);
6471	return;
6472	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6473	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov2s_POST, SubRegIdx: AArch64::dsub0);
6474	return;
6475	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6476	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov4s_POST, SubRegIdx: AArch64::qsub0);
6477	return;
6478	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6479	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov1d_POST, SubRegIdx: AArch64::dsub0);
6480	return;
6481	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6482	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov2d_POST, SubRegIdx: AArch64::qsub0);
6483	return;
6484	}
6485	break;
6486	}
6487	case AArch64ISD::LD3post: {
6488	if (VT == MVT::v8i8) {
6489	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev8b_POST, SubRegIdx: AArch64::dsub0);
6490	return;
6491	} else if (VT == MVT::v16i8) {
6492	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev16b_POST, SubRegIdx: AArch64::qsub0);
6493	return;
6494	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6495	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev4h_POST, SubRegIdx: AArch64::dsub0);
6496	return;
6497	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6498	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev8h_POST, SubRegIdx: AArch64::qsub0);
6499	return;
6500	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6501	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev2s_POST, SubRegIdx: AArch64::dsub0);
6502	return;
6503	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6504	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev4s_POST, SubRegIdx: AArch64::qsub0);
6505	return;
6506	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6507	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev1d_POST, SubRegIdx: AArch64::dsub0);
6508	return;
6509	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6510	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev2d_POST, SubRegIdx: AArch64::qsub0);
6511	return;
6512	}
6513	break;
6514	}
6515	case AArch64ISD::LD4post: {
6516	if (VT == MVT::v8i8) {
6517	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv8b_POST, SubRegIdx: AArch64::dsub0);
6518	return;
6519	} else if (VT == MVT::v16i8) {
6520	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv16b_POST, SubRegIdx: AArch64::qsub0);
6521	return;
6522	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6523	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv4h_POST, SubRegIdx: AArch64::dsub0);
6524	return;
6525	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6526	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv8h_POST, SubRegIdx: AArch64::qsub0);
6527	return;
6528	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6529	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv2s_POST, SubRegIdx: AArch64::dsub0);
6530	return;
6531	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6532	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv4s_POST, SubRegIdx: AArch64::qsub0);
6533	return;
6534	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6535	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv1d_POST, SubRegIdx: AArch64::dsub0);
6536	return;
6537	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6538	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv2d_POST, SubRegIdx: AArch64::qsub0);
6539	return;
6540	}
6541	break;
6542	}
6543	case AArch64ISD::LD1x2post: {
6544	if (VT == MVT::v8i8) {
6545	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov8b_POST, SubRegIdx: AArch64::dsub0);
6546	return;
6547	} else if (VT == MVT::v16i8) {
6548	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov16b_POST, SubRegIdx: AArch64::qsub0);
6549	return;
6550	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6551	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov4h_POST, SubRegIdx: AArch64::dsub0);
6552	return;
6553	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6554	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov8h_POST, SubRegIdx: AArch64::qsub0);
6555	return;
6556	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6557	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov2s_POST, SubRegIdx: AArch64::dsub0);
6558	return;
6559	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6560	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov4s_POST, SubRegIdx: AArch64::qsub0);
6561	return;
6562	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6563	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov1d_POST, SubRegIdx: AArch64::dsub0);
6564	return;
6565	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6566	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov2d_POST, SubRegIdx: AArch64::qsub0);
6567	return;
6568	}
6569	break;
6570	}
6571	case AArch64ISD::LD1x3post: {
6572	if (VT == MVT::v8i8) {
6573	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev8b_POST, SubRegIdx: AArch64::dsub0);
6574	return;
6575	} else if (VT == MVT::v16i8) {
6576	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev16b_POST, SubRegIdx: AArch64::qsub0);
6577	return;
6578	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6579	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev4h_POST, SubRegIdx: AArch64::dsub0);
6580	return;
6581	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6582	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev8h_POST, SubRegIdx: AArch64::qsub0);
6583	return;
6584	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6585	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev2s_POST, SubRegIdx: AArch64::dsub0);
6586	return;
6587	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6588	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev4s_POST, SubRegIdx: AArch64::qsub0);
6589	return;
6590	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6591	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev1d_POST, SubRegIdx: AArch64::dsub0);
6592	return;
6593	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6594	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev2d_POST, SubRegIdx: AArch64::qsub0);
6595	return;
6596	}
6597	break;
6598	}
6599	case AArch64ISD::LD1x4post: {
6600	if (VT == MVT::v8i8) {
6601	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv8b_POST, SubRegIdx: AArch64::dsub0);
6602	return;
6603	} else if (VT == MVT::v16i8) {
6604	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv16b_POST, SubRegIdx: AArch64::qsub0);
6605	return;
6606	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6607	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv4h_POST, SubRegIdx: AArch64::dsub0);
6608	return;
6609	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6610	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv8h_POST, SubRegIdx: AArch64::qsub0);
6611	return;
6612	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6613	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv2s_POST, SubRegIdx: AArch64::dsub0);
6614	return;
6615	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6616	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv4s_POST, SubRegIdx: AArch64::qsub0);
6617	return;
6618	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6619	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv1d_POST, SubRegIdx: AArch64::dsub0);
6620	return;
6621	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6622	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv2d_POST, SubRegIdx: AArch64::qsub0);
6623	return;
6624	}
6625	break;
6626	}
6627	case AArch64ISD::LD1DUPpost: {
6628	if (VT == MVT::v8i8) {
6629	SelectPostLoad(N: Node, NumVecs: `1`, Opc: AArch64::LD1Rv8b_POST, SubRegIdx: AArch64::dsub0);
6630	return;
6631	} else if (VT == MVT::v16i8) {
6632	SelectPostLoad(N: Node, NumVecs: `1`, Opc: AArch64::LD1Rv16b_POST, SubRegIdx: AArch64::qsub0);
6633	return;
6634	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6635	SelectPostLoad(N: Node, NumVecs: `1`, Opc: AArch64::LD1Rv4h_POST, SubRegIdx: AArch64::dsub0);
6636	return;
6637	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6638	SelectPostLoad(N: Node, NumVecs: `1`, Opc: AArch64::LD1Rv8h_POST, SubRegIdx: AArch64::qsub0);
6639	return;
6640	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6641	SelectPostLoad(N: Node, NumVecs: `1`, Opc: AArch64::LD1Rv2s_POST, SubRegIdx: AArch64::dsub0);
6642	return;
6643	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6644	SelectPostLoad(N: Node, NumVecs: `1`, Opc: AArch64::LD1Rv4s_POST, SubRegIdx: AArch64::qsub0);
6645	return;
6646	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6647	SelectPostLoad(N: Node, NumVecs: `1`, Opc: AArch64::LD1Rv1d_POST, SubRegIdx: AArch64::dsub0);
6648	return;
6649	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6650	SelectPostLoad(N: Node, NumVecs: `1`, Opc: AArch64::LD1Rv2d_POST, SubRegIdx: AArch64::qsub0);
6651	return;
6652	}
6653	break;
6654	}
6655	case AArch64ISD::LD2DUPpost: {
6656	if (VT == MVT::v8i8) {
6657	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv8b_POST, SubRegIdx: AArch64::dsub0);
6658	return;
6659	} else if (VT == MVT::v16i8) {
6660	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv16b_POST, SubRegIdx: AArch64::qsub0);
6661	return;
6662	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6663	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv4h_POST, SubRegIdx: AArch64::dsub0);
6664	return;
6665	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6666	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv8h_POST, SubRegIdx: AArch64::qsub0);
6667	return;
6668	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6669	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv2s_POST, SubRegIdx: AArch64::dsub0);
6670	return;
6671	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6672	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv4s_POST, SubRegIdx: AArch64::qsub0);
6673	return;
6674	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6675	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv1d_POST, SubRegIdx: AArch64::dsub0);
6676	return;
6677	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6678	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv2d_POST, SubRegIdx: AArch64::qsub0);
6679	return;
6680	}
6681	break;
6682	}
6683	case AArch64ISD::LD3DUPpost: {
6684	if (VT == MVT::v8i8) {
6685	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv8b_POST, SubRegIdx: AArch64::dsub0);
6686	return;
6687	} else if (VT == MVT::v16i8) {
6688	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv16b_POST, SubRegIdx: AArch64::qsub0);
6689	return;
6690	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6691	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv4h_POST, SubRegIdx: AArch64::dsub0);
6692	return;
6693	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6694	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv8h_POST, SubRegIdx: AArch64::qsub0);
6695	return;
6696	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6697	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv2s_POST, SubRegIdx: AArch64::dsub0);
6698	return;
6699	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6700	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv4s_POST, SubRegIdx: AArch64::qsub0);
6701	return;
6702	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6703	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv1d_POST, SubRegIdx: AArch64::dsub0);
6704	return;
6705	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6706	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv2d_POST, SubRegIdx: AArch64::qsub0);
6707	return;
6708	}
6709	break;
6710	}
6711	case AArch64ISD::LD4DUPpost: {
6712	if (VT == MVT::v8i8) {
6713	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv8b_POST, SubRegIdx: AArch64::dsub0);
6714	return;
6715	} else if (VT == MVT::v16i8) {
6716	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv16b_POST, SubRegIdx: AArch64::qsub0);
6717	return;
6718	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6719	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv4h_POST, SubRegIdx: AArch64::dsub0);
6720	return;
6721	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6722	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv8h_POST, SubRegIdx: AArch64::qsub0);
6723	return;
6724	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6725	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv2s_POST, SubRegIdx: AArch64::dsub0);
6726	return;
6727	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6728	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv4s_POST, SubRegIdx: AArch64::qsub0);
6729	return;
6730	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6731	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv1d_POST, SubRegIdx: AArch64::dsub0);
6732	return;
6733	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6734	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv2d_POST, SubRegIdx: AArch64::qsub0);
6735	return;
6736	}
6737	break;
6738	}
6739	case AArch64ISD::LD1LANEpost: {
6740	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6741	SelectPostLoadLane(N: Node, NumVecs: `1`, Opc: AArch64::LD1i8_POST);
6742	return;
6743	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6744	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6745	SelectPostLoadLane(N: Node, NumVecs: `1`, Opc: AArch64::LD1i16_POST);
6746	return;
6747	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6748	VT == MVT::v2f32) {
6749	SelectPostLoadLane(N: Node, NumVecs: `1`, Opc: AArch64::LD1i32_POST);
6750	return;
6751	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6752	VT == MVT::v1f64) {
6753	SelectPostLoadLane(N: Node, NumVecs: `1`, Opc: AArch64::LD1i64_POST);
6754	return;
6755	}
6756	break;
6757	}
6758	case AArch64ISD::LD2LANEpost: {
6759	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6760	SelectPostLoadLane(N: Node, NumVecs: `2`, Opc: AArch64::LD2i8_POST);
6761	return;
6762	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6763	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6764	SelectPostLoadLane(N: Node, NumVecs: `2`, Opc: AArch64::LD2i16_POST);
6765	return;
6766	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6767	VT == MVT::v2f32) {
6768	SelectPostLoadLane(N: Node, NumVecs: `2`, Opc: AArch64::LD2i32_POST);
6769	return;
6770	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6771	VT == MVT::v1f64) {
6772	SelectPostLoadLane(N: Node, NumVecs: `2`, Opc: AArch64::LD2i64_POST);
6773	return;
6774	}
6775	break;
6776	}
6777	case AArch64ISD::LD3LANEpost: {
6778	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6779	SelectPostLoadLane(N: Node, NumVecs: `3`, Opc: AArch64::LD3i8_POST);
6780	return;
6781	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6782	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6783	SelectPostLoadLane(N: Node, NumVecs: `3`, Opc: AArch64::LD3i16_POST);
6784	return;
6785	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6786	VT == MVT::v2f32) {
6787	SelectPostLoadLane(N: Node, NumVecs: `3`, Opc: AArch64::LD3i32_POST);
6788	return;
6789	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6790	VT == MVT::v1f64) {
6791	SelectPostLoadLane(N: Node, NumVecs: `3`, Opc: AArch64::LD3i64_POST);
6792	return;
6793	}
6794	break;
6795	}
6796	case AArch64ISD::LD4LANEpost: {
6797	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6798	SelectPostLoadLane(N: Node, NumVecs: `4`, Opc: AArch64::LD4i8_POST);
6799	return;
6800	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6801	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6802	SelectPostLoadLane(N: Node, NumVecs: `4`, Opc: AArch64::LD4i16_POST);
6803	return;
6804	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6805	VT == MVT::v2f32) {
6806	SelectPostLoadLane(N: Node, NumVecs: `4`, Opc: AArch64::LD4i32_POST);
6807	return;
6808	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6809	VT == MVT::v1f64) {
6810	SelectPostLoadLane(N: Node, NumVecs: `4`, Opc: AArch64::LD4i64_POST);
6811	return;
6812	}
6813	break;
6814	}
6815	case AArch64ISD::ST2post: {
6816	VT = Node->getOperand(Num: `1`).getValueType();
6817	if (VT == MVT::v8i8) {
6818	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov8b_POST);
6819	return;
6820	} else if (VT == MVT::v16i8) {
6821	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov16b_POST);
6822	return;
6823	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6824	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov4h_POST);
6825	return;
6826	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6827	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov8h_POST);
6828	return;
6829	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6830	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov2s_POST);
6831	return;
6832	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6833	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov4s_POST);
6834	return;
6835	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6836	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov2d_POST);
6837	return;
6838	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6839	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov1d_POST);
6840	return;
6841	}
6842	break;
6843	}
6844	case AArch64ISD::ST3post: {
6845	VT = Node->getOperand(Num: `1`).getValueType();
6846	if (VT == MVT::v8i8) {
6847	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev8b_POST);
6848	return;
6849	} else if (VT == MVT::v16i8) {
6850	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev16b_POST);
6851	return;
6852	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6853	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev4h_POST);
6854	return;
6855	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6856	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev8h_POST);
6857	return;
6858	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6859	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev2s_POST);
6860	return;
6861	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6862	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev4s_POST);
6863	return;
6864	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6865	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev2d_POST);
6866	return;
6867	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6868	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev1d_POST);
6869	return;
6870	}
6871	break;
6872	}
6873	case AArch64ISD::ST4post: {
6874	VT = Node->getOperand(Num: `1`).getValueType();
6875	if (VT == MVT::v8i8) {
6876	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv8b_POST);
6877	return;
6878	} else if (VT == MVT::v16i8) {
6879	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv16b_POST);
6880	return;
6881	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6882	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv4h_POST);
6883	return;
6884	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6885	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv8h_POST);
6886	return;
6887	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6888	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv2s_POST);
6889	return;
6890	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6891	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv4s_POST);
6892	return;
6893	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6894	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv2d_POST);
6895	return;
6896	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6897	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv1d_POST);
6898	return;
6899	}
6900	break;
6901	}
6902	case AArch64ISD::ST1x2post: {
6903	VT = Node->getOperand(Num: `1`).getValueType();
6904	if (VT == MVT::v8i8) {
6905	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov8b_POST);
6906	return;
6907	} else if (VT == MVT::v16i8) {
6908	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov16b_POST);
6909	return;
6910	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6911	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov4h_POST);
6912	return;
6913	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6914	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov8h_POST);
6915	return;
6916	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6917	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov2s_POST);
6918	return;
6919	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6920	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov4s_POST);
6921	return;
6922	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6923	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov1d_POST);
6924	return;
6925	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6926	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov2d_POST);
6927	return;
6928	}
6929	break;
6930	}
6931	case AArch64ISD::ST1x3post: {
6932	VT = Node->getOperand(Num: `1`).getValueType();
6933	if (VT == MVT::v8i8) {
6934	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev8b_POST);
6935	return;
6936	} else if (VT == MVT::v16i8) {
6937	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev16b_POST);
6938	return;
6939	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6940	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev4h_POST);
6941	return;
6942	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16 ) {
6943	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev8h_POST);
6944	return;
6945	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6946	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev2s_POST);
6947	return;
6948	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6949	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev4s_POST);
6950	return;
6951	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6952	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev1d_POST);
6953	return;
6954	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6955	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev2d_POST);
6956	return;
6957	}
6958	break;
6959	}
6960	case AArch64ISD::ST1x4post: {
6961	VT = Node->getOperand(Num: `1`).getValueType();
6962	if (VT == MVT::v8i8) {
6963	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv8b_POST);
6964	return;
6965	} else if (VT == MVT::v16i8) {
6966	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv16b_POST);
6967	return;
6968	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
6969	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv4h_POST);
6970	return;
6971	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
6972	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv8h_POST);
6973	return;
6974	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6975	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv2s_POST);
6976	return;
6977	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6978	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv4s_POST);
6979	return;
6980	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6981	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv1d_POST);
6982	return;
6983	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6984	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv2d_POST);
6985	return;
6986	}
6987	break;
6988	}
6989	case AArch64ISD::ST2LANEpost: {
6990	VT = Node->getOperand(Num: `1`).getValueType();
6991	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6992	SelectPostStoreLane(N: Node, NumVecs: `2`, Opc: AArch64::ST2i8_POST);
6993	return;
6994	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6995	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6996	SelectPostStoreLane(N: Node, NumVecs: `2`, Opc: AArch64::ST2i16_POST);
6997	return;
6998	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6999	VT == MVT::v2f32) {
7000	SelectPostStoreLane(N: Node, NumVecs: `2`, Opc: AArch64::ST2i32_POST);
7001	return;
7002	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
7003	VT == MVT::v1f64) {
7004	SelectPostStoreLane(N: Node, NumVecs: `2`, Opc: AArch64::ST2i64_POST);
7005	return;
7006	}
7007	break;
7008	}
7009	case AArch64ISD::ST3LANEpost: {
7010	VT = Node->getOperand(Num: `1`).getValueType();
7011	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
7012	SelectPostStoreLane(N: Node, NumVecs: `3`, Opc: AArch64::ST3i8_POST);
7013	return;
7014	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
7015	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
7016	SelectPostStoreLane(N: Node, NumVecs: `3`, Opc: AArch64::ST3i16_POST);
7017	return;
7018	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
7019	VT == MVT::v2f32) {
7020	SelectPostStoreLane(N: Node, NumVecs: `3`, Opc: AArch64::ST3i32_POST);
7021	return;
7022	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
7023	VT == MVT::v1f64) {
7024	SelectPostStoreLane(N: Node, NumVecs: `3`, Opc: AArch64::ST3i64_POST);
7025	return;
7026	}
7027	break;
7028	}
7029	case AArch64ISD::ST4LANEpost: {
7030	VT = Node->getOperand(Num: `1`).getValueType();
7031	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
7032	SelectPostStoreLane(N: Node, NumVecs: `4`, Opc: AArch64::ST4i8_POST);
7033	return;
7034	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
7035	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
7036	SelectPostStoreLane(N: Node, NumVecs: `4`, Opc: AArch64::ST4i16_POST);
7037	return;
7038	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
7039	VT == MVT::v2f32) {
7040	SelectPostStoreLane(N: Node, NumVecs: `4`, Opc: AArch64::ST4i32_POST);
7041	return;
7042	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
7043	VT == MVT::v1f64) {
7044	SelectPostStoreLane(N: Node, NumVecs: `4`, Opc: AArch64::ST4i64_POST);
7045	return;
7046	}
7047	break;
7048	}
7049	case AArch64ISD::SVE_LD2_MERGE_ZERO: {
7050	if (VT == MVT::nxv16i8) {
7051	SelectPredicatedLoad(N: Node, NumVecs: `2`, Scale: `0`, Opc_ri: AArch64::LD2B_IMM, Opc_rr: AArch64::LD2B);
7052	return;
7053	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
7054	VT == MVT::nxv8bf16) {
7055	SelectPredicatedLoad(N: Node, NumVecs: `2`, Scale: `1`, Opc_ri: AArch64::LD2H_IMM, Opc_rr: AArch64::LD2H);
7056	return;
7057	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
7058	SelectPredicatedLoad(N: Node, NumVecs: `2`, Scale: `2`, Opc_ri: AArch64::LD2W_IMM, Opc_rr: AArch64::LD2W);
7059	return;
7060	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
7061	SelectPredicatedLoad(N: Node, NumVecs: `2`, Scale: `3`, Opc_ri: AArch64::LD2D_IMM, Opc_rr: AArch64::LD2D);
7062	return;
7063	}
7064	break;
7065	}
7066	case AArch64ISD::SVE_LD3_MERGE_ZERO: {
7067	if (VT == MVT::nxv16i8) {
7068	SelectPredicatedLoad(N: Node, NumVecs: `3`, Scale: `0`, Opc_ri: AArch64::LD3B_IMM, Opc_rr: AArch64::LD3B);
7069	return;
7070	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
7071	VT == MVT::nxv8bf16) {
7072	SelectPredicatedLoad(N: Node, NumVecs: `3`, Scale: `1`, Opc_ri: AArch64::LD3H_IMM, Opc_rr: AArch64::LD3H);
7073	return;
7074	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
7075	SelectPredicatedLoad(N: Node, NumVecs: `3`, Scale: `2`, Opc_ri: AArch64::LD3W_IMM, Opc_rr: AArch64::LD3W);
7076	return;
7077	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
7078	SelectPredicatedLoad(N: Node, NumVecs: `3`, Scale: `3`, Opc_ri: AArch64::LD3D_IMM, Opc_rr: AArch64::LD3D);
7079	return;
7080	}
7081	break;
7082	}
7083	case AArch64ISD::SVE_LD4_MERGE_ZERO: {
7084	if (VT == MVT::nxv16i8) {
7085	SelectPredicatedLoad(N: Node, NumVecs: `4`, Scale: `0`, Opc_ri: AArch64::LD4B_IMM, Opc_rr: AArch64::LD4B);
7086	return;
7087	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
7088	VT == MVT::nxv8bf16) {
7089	SelectPredicatedLoad(N: Node, NumVecs: `4`, Scale: `1`, Opc_ri: AArch64::LD4H_IMM, Opc_rr: AArch64::LD4H);
7090	return;
7091	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
7092	SelectPredicatedLoad(N: Node, NumVecs: `4`, Scale: `2`, Opc_ri: AArch64::LD4W_IMM, Opc_rr: AArch64::LD4W);
7093	return;
7094	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
7095	SelectPredicatedLoad(N: Node, NumVecs: `4`, Scale: `3`, Opc_ri: AArch64::LD4D_IMM, Opc_rr: AArch64::LD4D);
7096	return;
7097	}
7098	break;
7099	}
7100	}
7101
7102	// Select the default instruction
7103	SelectCode(N: Node);
7104	}
7105
7106	/// createAArch64ISelDag - This pass converts a legalized DAG into a
7107	/// AArch64-specific DAG, ready for instruction scheduling.
7108	FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
7109	CodeGenOptLevel OptLevel) {
7110	return new AArch64DAGToDAGISelLegacy (TM, OptLevel);
7111	}
7112
7113	/// When \p PredVT is a scalable vector predicate in the form
7114	/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7115	/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7116	/// structured vectors (NumVec >1), the output data type is
7117	/// MVT::nx<MNumVec>xi<bits> s.t. M x bits = 128. If the input*
7118	/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7119	/// EVT.
7120	static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
7121	unsigned NumVec) {
7122	assert(NumVec > `0` && NumVec < `5` && "Invalid number of vectors.");
7123	if (!PredVT.isScalableVector() \|\| PredVT.getVectorElementType() != MVT::i1)
7124	return EVT ();
7125
7126	if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7127	PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7128	return EVT ();
7129
7130	ElementCount EC = PredVT.getVectorElementCount();
7131	EVT ScalarVT =
7132	EVT::getIntegerVT(Context&: Ctx, BitWidth: AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7133	EVT MemVT = EVT::getVectorVT(Context&: Ctx, VT: ScalarVT, EC: EC * NumVec);
7134
7135	return MemVT;
7136	}
7137
7138	/// Return the EVT of the data associated to a memory operation in \p
7139	/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
7140	static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
7141	if (isa<MemSDNode>(Val: Root))
7142	return cast<MemSDNode>(Val: Root)->getMemoryVT();
7143
7144	if (isa<MemIntrinsicSDNode>(Val: Root))
7145	return cast<MemIntrinsicSDNode>(Val: Root)->getMemoryVT();
7146
7147	const unsigned Opcode = Root->getOpcode();
7148	// For custom ISD nodes, we have to look at them individually to extract the
7149	// type of the data moved to/from memory.
7150	switch (Opcode) {
7151	case AArch64ISD::LD1_MERGE_ZERO:
7152	case AArch64ISD::LD1S_MERGE_ZERO:
7153	case AArch64ISD::LDNF1_MERGE_ZERO:
7154	case AArch64ISD::LDNF1S_MERGE_ZERO:
7155	return cast<VTSDNode>(Val: Root->getOperand(Num: `3`))->getVT();
7156	case AArch64ISD::ST1_PRED:
7157	return cast<VTSDNode>(Val: Root->getOperand(Num: `4`))->getVT();
7158	case AArch64ISD::SVE_LD2_MERGE_ZERO:
7159	return getPackedVectorTypeFromPredicateType(
7160	Ctx, PredVT: Root->getOperand(Num: `1`)->getValueType(ResNo: `0`), /NumVec=/`2`);
7161	case AArch64ISD::SVE_LD3_MERGE_ZERO:
7162	return getPackedVectorTypeFromPredicateType(
7163	Ctx, PredVT: Root->getOperand(Num: `1`)->getValueType(ResNo: `0`), /NumVec=/`3`);
7164	case AArch64ISD::SVE_LD4_MERGE_ZERO:
7165	return getPackedVectorTypeFromPredicateType(
7166	Ctx, PredVT: Root->getOperand(Num: `1`)->getValueType(ResNo: `0`), /NumVec=/`4`);
7167	default:
7168	break;
7169	}
7170
7171	if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7172	return EVT ();
7173
7174	switch (Root->getConstantOperandVal(Num: `1`)) {
7175	default:
7176	return EVT ();
7177	case Intrinsic::aarch64_sme_ldr:
7178	case Intrinsic::aarch64_sme_str:
7179	return MVT::nxv16i8;
7180	case Intrinsic::aarch64_sve_prf:
7181	// We are using an SVE prefetch intrinsic. Type must be inferred from the
7182	// width of the predicate.
7183	return getPackedVectorTypeFromPredicateType(
7184	Ctx, PredVT: Root->getOperand(Num: `2`)->getValueType(ResNo: `0`), /NumVec=/`1`);
7185	case Intrinsic::aarch64_sve_ld2_sret:
7186	case Intrinsic::aarch64_sve_ld2q_sret:
7187	return getPackedVectorTypeFromPredicateType(
7188	Ctx, PredVT: Root->getOperand(Num: `2`)->getValueType(ResNo: `0`), /NumVec=/`2`);
7189	case Intrinsic::aarch64_sve_st2q:
7190	return getPackedVectorTypeFromPredicateType(
7191	Ctx, PredVT: Root->getOperand(Num: `4`)->getValueType(ResNo: `0`), /NumVec=/`2`);
7192	case Intrinsic::aarch64_sve_ld3_sret:
7193	case Intrinsic::aarch64_sve_ld3q_sret:
7194	return getPackedVectorTypeFromPredicateType(
7195	Ctx, PredVT: Root->getOperand(Num: `2`)->getValueType(ResNo: `0`), /NumVec=/`3`);
7196	case Intrinsic::aarch64_sve_st3q:
7197	return getPackedVectorTypeFromPredicateType(
7198	Ctx, PredVT: Root->getOperand(Num: `5`)->getValueType(ResNo: `0`), /NumVec=/`3`);
7199	case Intrinsic::aarch64_sve_ld4_sret:
7200	case Intrinsic::aarch64_sve_ld4q_sret:
7201	return getPackedVectorTypeFromPredicateType(
7202	Ctx, PredVT: Root->getOperand(Num: `2`)->getValueType(ResNo: `0`), /NumVec=/`4`);
7203	case Intrinsic::aarch64_sve_st4q:
7204	return getPackedVectorTypeFromPredicateType(
7205	Ctx, PredVT: Root->getOperand(Num: `6`)->getValueType(ResNo: `0`), /NumVec=/`4`);
7206	case Intrinsic::aarch64_sve_ld1udq:
7207	case Intrinsic::aarch64_sve_st1dq:
7208	return EVT (MVT::nxv1i64);
7209	case Intrinsic::aarch64_sve_ld1uwq:
7210	case Intrinsic::aarch64_sve_st1wq:
7211	return EVT (MVT::nxv1i32);
7212	}
7213	}
7214
7215	/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7216	/// Base + OffImm sizeof(MemVT) for Min >= OffImm <= Max*
7217	/// where Root is the memory access using N for its address.
7218	template <int64_t Min, int64_t Max>
7219	bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7220	SDValue &Base,
7221	SDValue &OffImm) {
7222	const EVT MemVT = getMemVTFromNode(Ctx&: *(CurDAG->getContext()), Root);
7223	const DataLayout &DL = CurDAG->getDataLayout();
7224	const MachineFrameInfo &MFI = MF->getFrameInfo();
7225
7226	if (N.getOpcode() == ISD::FrameIndex) {
7227	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
7228	// We can only encode VL scaled offsets, so only fold in frame indexes
7229	// referencing SVE objects.
7230	if (MFI.getStackID(ObjectIdx: FI) == TargetStackID::ScalableVector) {
7231	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
7232	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i64);
7233	return true;
7234	}
7235
7236	return false;
7237	}
7238
7239	if (MemVT == EVT ())
7240	return false;
7241
7242	if (N.getOpcode() != ISD::ADD)
7243	return false;
7244
7245	SDValue VScale = N.getOperand(i: `1`);
7246	if (VScale.getOpcode() != ISD::VSCALE)
7247	return false;
7248
7249	TypeSize TS = MemVT.getSizeInBits();
7250	int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / `8`;
7251	int64_t MulImm = cast<ConstantSDNode>(Val: VScale.getOperand(i: `0`))->getSExtValue();
7252
7253	if ((MulImm % MemWidthBytes) != `0`)
7254	return false;
7255
7256	int64_t Offset = MulImm / MemWidthBytes;
7257	if (Offset < Min \|\| Offset > Max)
7258	return false;
7259
7260	Base = N.getOperand(i: `0`);
7261	if (Base.getOpcode() == ISD::FrameIndex) {
7262	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
7263	// We can only encode VL scaled offsets, so only fold in frame indexes
7264	// referencing SVE objects.
7265	if (MFI.getStackID(ObjectIdx: FI) == TargetStackID::ScalableVector)
7266	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
7267	}
7268
7269	OffImm = CurDAG->getTargetConstant(Val: Offset, DL: SDLoc (N), VT: MVT::i64);
7270	return true;
7271	}
7272
7273	/// Select register plus register addressing mode for SVE, with scaled
7274	/// offset.
7275	bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7276	SDValue &Base,
7277	SDValue &Offset) {
7278	if (N.getOpcode() != ISD::ADD)
7279	return false;
7280
7281	// Process an ADD node.
7282	const SDValue LHS = N.getOperand(i: `0`);
7283	const SDValue RHS = N.getOperand(i: `1`);
7284
7285	// 8 bit data does not come with the SHL node, so it is treated
7286	// separately.
7287	if (Scale == `0`) {
7288	Base = LHS;
7289	Offset = RHS;
7290	return true;
7291	}
7292
7293	if (auto C = dyn_cast<ConstantSDNode>(Val: RHS)) {
7294	int64_t ImmOff = C->getSExtValue();
7295	unsigned Size = `1` << Scale;
7296
7297	// To use the reg+reg addressing mode, the immediate must be a multiple of
7298	// the vector element's byte size.
7299	if (ImmOff % Size)
7300	return false;
7301
7302	SDLoc DL(N);
7303	Base = LHS;
7304	Offset = CurDAG->getTargetConstant(Val: ImmOff >> Scale, DL, VT: MVT::i64);
7305	SDValue Ops[] = {Offset};
7306	SDNode *MI = CurDAG->getMachineNode(Opcode: AArch64::MOVi64imm, dl: DL, VT: MVT::i64, Ops);
7307	Offset = SDValue (MI, `0`);
7308	return true;
7309	}
7310
7311	// Check if the RHS is a shift node with a constant.
7312	if (RHS.getOpcode() != ISD::SHL)
7313	return false;
7314
7315	const SDValue ShiftRHS = RHS.getOperand(i: `1`);
7316	if (auto *C = dyn_cast<ConstantSDNode>(Val: ShiftRHS))
7317	if (C->getZExtValue() == Scale) {
7318	Base = LHS;
7319	Offset = RHS.getOperand(i: `0`);
7320	return true;
7321	}
7322
7323	return false;
7324	}
7325
7326	bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7327	const AArch64TargetLowering *TLI =
7328	static_cast<const AArch64TargetLowering *>(getTargetLowering());
7329
7330	return TLI->isAllActivePredicate(DAG&: *CurDAG, N);
7331	}
7332
7333	bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7334	EVT VT = N.getValueType();
7335	return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7336	}
7337
7338	bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7339	SDValue &Base, SDValue &Offset,
7340	unsigned Scale) {
7341	// Try to untangle an ADD node into a 'reg + offset'
7342	if (N.getOpcode() == ISD::ADD)
7343	if (auto C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
7344	int64_t ImmOff = C->getSExtValue();
7345	if ((ImmOff > `0` && ImmOff <= MaxSize && (ImmOff % Scale == `0`))) {
7346	Base = N.getOperand(i: `0`);
7347	Offset = CurDAG->getTargetConstant(Val: ImmOff / Scale, DL: SDLoc (N), VT: MVT::i64);
7348	return true;
7349	}
7350	}
7351
7352	// By default, just match reg + 0.
7353	Base = N;
7354	Offset = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i64);
7355	return true;
7356	}
7357

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp