ARMISelDAGToDAG.cpp source code [llvm_projects/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp]

1	//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines an instruction selector for the ARM target.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "ARM.h"
14	#include "ARMBaseInstrInfo.h"
15	#include "ARMTargetMachine.h"
16	#include "MCTargetDesc/ARMAddressingModes.h"
17	#include "Utils/ARMBaseInfo.h"
18	#include "llvm/ADT/APSInt.h"
19	#include "llvm/ADT/StringSwitch.h"
20	#include "llvm/CodeGen/MachineFrameInfo.h"
21	#include "llvm/CodeGen/MachineFunction.h"
22	#include "llvm/CodeGen/MachineInstrBuilder.h"
23	#include "llvm/CodeGen/MachineRegisterInfo.h"
24	#include "llvm/CodeGen/SelectionDAG.h"
25	#include "llvm/CodeGen/SelectionDAGISel.h"
26	#include "llvm/CodeGen/TargetLowering.h"
27	#include "llvm/IR/CallingConv.h"
28	#include "llvm/IR/Constants.h"
29	#include "llvm/IR/DerivedTypes.h"
30	#include "llvm/IR/Function.h"
31	#include "llvm/IR/Intrinsics.h"
32	#include "llvm/IR/IntrinsicsARM.h"
33	#include "llvm/IR/LLVMContext.h"
34	#include "llvm/Support/CommandLine.h"
35	#include "llvm/Support/Debug.h"
36	#include "llvm/Support/ErrorHandling.h"
37	#include "llvm/Target/TargetOptions.h"
38	#include <optional>
39
40	using namespace llvm;
41
42	#define DEBUG_TYPE "arm-isel"
43	#define PASS_NAME "ARM Instruction Selection"
44
45	static cl::opt<bool>
46	DisableShifterOp("disable-shifter-op", cl::Hidden,
47	cl::desc ("Disable isel of shifter-op"),
48	cl::init(Val: false));
49
50	//===--------------------------------------------------------------------===//
51	/// ARMDAGToDAGISel - ARM specific code to select ARM machine
52	/// instructions for SelectionDAG operations.
53	///
54	namespace {
55
56	class ARMDAGToDAGISel : public SelectionDAGISel {
57	/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
58	/// make the right decision when generating code for different targets.
59	const ARMSubtarget *Subtarget;
60
61	public:
62	ARMDAGToDAGISel() = delete;
63
64	explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
65	: SelectionDAGISel (tm, OptLevel) {}
66
67	bool runOnMachineFunction(MachineFunction &MF) override {
68	// Reset the subtarget each time through.
69	Subtarget = &MF.getSubtarget<ARMSubtarget>();
70	SelectionDAGISel::runOnMachineFunction(mf&: MF);
71	return true;
72	}
73
74	void PreprocessISelDAG() override;
75
76	/// getI32Imm - Return a target constant of type i32 with the specified
77	/// value.
78	inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
79	return CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32);
80	}
81
82	void Select(SDNode *N) override;
83
84	/// Return true as some complex patterns, like those that call
85	/// canExtractShiftFromMul can modify the DAG inplace.
86	bool ComplexPatternFuncMutatesDAG() const override { return true; }
87
88	bool hasNoVMLxHazardUse(SDNode N) const*;
89	bool isShifterOpProfitable(const SDValue &Shift,
90	ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
91	bool SelectRegShifterOperand(SDValue N, SDValue &A,
92	SDValue &B, SDValue &C,
93	bool CheckProfitability = true);
94	bool SelectImmShifterOperand(SDValue N, SDValue &A,
95	SDValue &B, bool CheckProfitability = true);
96	bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
97	SDValue &C) {
98	// Don't apply the profitability check
99	return SelectRegShifterOperand(N, A, B, C, CheckProfitability: false);
100	}
101	bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
102	// Don't apply the profitability check
103	return SelectImmShifterOperand(N, A, B, CheckProfitability: false);
104	}
105	bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
106	if (!N.hasOneUse())
107	return false;
108	return SelectImmShifterOperand(N, A, B, CheckProfitability: false);
109	}
110
111	bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
112
113	bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
114	bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
115
116	bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
117	const ConstantSDNode *CN = cast<ConstantSDNode>(Val&: N);
118	Pred = CurDAG->getTargetConstant(Val: CN->getZExtValue(), DL: SDLoc (N), VT: MVT::i32);
119	Reg = CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32);
120	return true;
121	}
122
123	bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
124	SDValue &Offset, SDValue &Opc);
125	bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
126	SDValue &Offset, SDValue &Opc);
127	bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
128	SDValue &Offset, SDValue &Opc);
129	bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
130	bool SelectAddrMode3(SDValue N, SDValue &Base,
131	SDValue &Offset, SDValue &Opc);
132	bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
133	SDValue &Offset, SDValue &Opc);
134	bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
135	bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
136	bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
137	bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
138	bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
139
140	bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
141
142	// Thumb Addressing Modes:
143	bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
144	bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
145	bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
146	SDValue &OffImm);
147	bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
148	SDValue &OffImm);
149	bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
150	SDValue &OffImm);
151	bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
152	SDValue &OffImm);
153	bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
154	template <unsigned Shift>
155	bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
156
157	// Thumb 2 Addressing Modes:
158	bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
159	template <unsigned Shift>
160	bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
161	bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
162	SDValue &OffImm);
163	bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
164	SDValue &OffImm);
165	template <unsigned Shift>
166	bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
167	bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
168	unsigned Shift);
169	template <unsigned Shift>
170	bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
171	bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
172	SDValue &OffReg, SDValue &ShImm);
173	bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
174
175	template<int Min, int Max>
176	bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
177
178	inline bool is_so_imm(unsigned Imm) const {
179	return ARM_AM::getSOImmVal(Arg: Imm) != -`1`;
180	}
181
182	inline bool is_so_imm_not(unsigned Imm) const {
183	return ARM_AM::getSOImmVal(Arg: ~Imm) != -`1`;
184	}
185
186	inline bool is_t2_so_imm(unsigned Imm) const {
187	return ARM_AM::getT2SOImmVal(Arg: Imm) != -`1`;
188	}
189
190	inline bool is_t2_so_imm_not(unsigned Imm) const {
191	return ARM_AM::getT2SOImmVal(Arg: ~Imm) != -`1`;
192	}
193
194	// Include the pieces autogenerated from the target description.
195	#include "ARMGenDAGISel.inc"
196
197	private:
198	void transferMemOperands(SDNode Src, SDNode Dst);
199
200	/// Indexed (pre/post inc/dec) load matching code for ARM.
201	bool tryARMIndexedLoad(SDNode *N);
202	bool tryT1IndexedLoad(SDNode *N);
203	bool tryT2IndexedLoad(SDNode *N);
204	bool tryMVEIndexedLoad(SDNode *N);
205	bool tryFMULFixed(SDNode *N, SDLoc dl);
206	bool tryFP_TO_INT(SDNode *N, SDLoc dl);
207	bool transformFixedFloatingPointConversion(SDNode N, SDNode FMul,
208	bool IsUnsigned,
209	bool FixedToFloat);
210
211	/// SelectVLD - Select NEON load intrinsics. NumVecs should be
212	/// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
213	/// loads of D registers and even subregs and odd subregs of Q registers.
214	/// For NumVecs <= 2, QOpcodes1 is not used.
215	void SelectVLD(SDNode N, bool* isUpdating, unsigned NumVecs,
216	const uint16_t DOpcodes, const* uint16_t *QOpcodes0,
217	const uint16_t *QOpcodes1);
218
219	/// SelectVST - Select NEON store intrinsics. NumVecs should
220	/// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
221	/// stores of D registers and even subregs and odd subregs of Q registers.
222	/// For NumVecs <= 2, QOpcodes1 is not used.
223	void SelectVST(SDNode N, bool* isUpdating, unsigned NumVecs,
224	const uint16_t DOpcodes, const* uint16_t *QOpcodes0,
225	const uint16_t *QOpcodes1);
226
227	/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
228	/// be 2, 3 or 4. The opcode arrays specify the instructions used for
229	/// load/store of D registers and Q registers.
230	void SelectVLDSTLane(SDNode N, bool* IsLoad, bool isUpdating,
231	unsigned NumVecs, const uint16_t *DOpcodes,
232	const uint16_t *QOpcodes);
233
234	/// Helper functions for setting up clusters of MVE predication operands.
235	template <typename SDValueVector>
236	void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
237	SDValue PredicateMask);
238	template <typename SDValueVector>
239	void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
240	SDValue PredicateMask, SDValue Inactive);
241
242	template <typename SDValueVector>
243	void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
244	template <typename SDValueVector>
245	void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
246
247	/// SelectMVE_WB - Select MVE writeback load/store intrinsics.
248	void SelectMVE_WB(SDNode N, const* uint16_t Opcodes, bool* Predicated);
249
250	/// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
251	void SelectMVE_LongShift(SDNode N, uint16_t Opcode, bool* Immediate,
252	bool HasSaturationOperand);
253
254	/// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
255	void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
256	uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
257
258	/// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
259	/// vector lanes.
260	void SelectMVE_VSHLC(SDNode N, bool* Predicated);
261
262	/// Select long MVE vector reductions with two vector operands
263	/// Stride is the number of vector element widths the instruction can operate
264	/// on:
265	/// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
266	/// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
267	/// Stride is used when addressing the OpcodesS array which contains multiple
268	/// opcodes for each element width.
269	/// TySize is the index into the list of element types listed above
270	void SelectBaseMVE_VMLLDAV(SDNode N, bool* Predicated,
271	const uint16_t OpcodesS, const* uint16_t *OpcodesU,
272	size_t Stride, size_t TySize);
273
274	/// Select a 64-bit MVE vector reduction with two vector operands
275	/// arm_mve_vmlldava_[predicated]
276	void SelectMVE_VMLLDAV(SDNode N, bool* Predicated, const uint16_t *OpcodesS,
277	const uint16_t *OpcodesU);
278	/// Select a 72-bit MVE vector rounding reduction with two vector operands
279	/// int_arm_mve_vrmlldavha[_predicated]
280	void SelectMVE_VRMLLDAVH(SDNode N, bool* Predicated, const uint16_t *OpcodesS,
281	const uint16_t *OpcodesU);
282
283	/// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
284	/// should be 2 or 4. The opcode array specifies the instructions
285	/// used for 8, 16 and 32-bit lane sizes respectively, and each
286	/// pointer points to a set of NumVecs sub-opcodes used for the
287	/// different stages (e.g. VLD20 versus VLD21) of each load family.
288	void SelectMVE_VLD(SDNode N, unsigned* NumVecs,
289	const uint16_t *const Opcodes, bool* HasWriteback);
290
291	/// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
292	/// array of 3 elements for the 8, 16 and 32-bit lane sizes.
293	void SelectMVE_VxDUP(SDNode N, const* uint16_t *Opcodes,
294	bool Wrapping, bool Predicated);
295
296	/// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
297	/// CX1DA, CX2D, CX2DA, CX3, CX3DA).
298	/// \arg \c NumExtraOps number of extra operands besides the coprocossor,
299	/// the accumulator and the immediate operand, i.e. 0
300	/// for CX1, 1 for CX2, 2 for CX3*
301	/// \arg \c HasAccum whether the instruction has an accumulator operand
302	void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
303	bool HasAccum);
304
305	/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
306	/// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
307	/// for loading D registers.
308	void SelectVLDDup(SDNode N, bool* IsIntrinsic, bool isUpdating,
309	unsigned NumVecs, const uint16_t *DOpcodes,
310	const uint16_t QOpcodes0 = nullptr*,
311	const uint16_t QOpcodes1 = nullptr*);
312
313	/// Try to select SBFX/UBFX instructions for ARM.
314	bool tryV6T2BitfieldExtractOp(SDNode N, bool* isSigned);
315
316	bool tryInsertVectorElt(SDNode *N);
317
318	// Select special operations if node forms integer ABS pattern
319	bool tryABSOp(SDNode *N);
320
321	bool tryReadRegister(SDNode *N);
322	bool tryWriteRegister(SDNode *N);
323
324	bool tryInlineAsm(SDNode *N);
325
326	void SelectCMPZ(SDNode N, bool* &SwitchEQNEToPLMI);
327
328	void SelectCMP_SWAP(SDNode *N);
329
330	/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
331	/// inline asm expressions.
332	bool SelectInlineAsmMemoryOperand(const SDValue &Op,
333	InlineAsm::ConstraintCode ConstraintID,
334	std::vector<SDValue> &OutOps) override;
335
336	// Form pairs of consecutive R, S, D, or Q registers.
337	SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
338	SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
339	SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
340	SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
341
342	// Form sequences of 4 consecutive S, D, or Q registers.
343	SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
344	SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
345	SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
346
347	// Get the alignment operand for a NEON VLD or VST instruction.
348	SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
349	bool is64BitVector);
350
351	/// Checks if N is a multiplication by a constant where we can extract out a
352	/// power of two from the constant so that it can be used in a shift, but only
353	/// if it simplifies the materialization of the constant. Returns true if it
354	/// is, and assigns to PowerOfTwo the power of two that should be extracted
355	/// out and to NewMulConst the new constant to be multiplied by.
356	bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
357	unsigned &PowerOfTwo, SDValue &NewMulConst) const;
358
359	/// Replace N with M in CurDAG, in a way that also ensures that M gets
360	/// selected when N would have been selected.
361	void replaceDAGValue(const SDValue &N, SDValue M);
362	};
363
364	class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
365	public:
366	static char ID;
367	ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
368	: SelectionDAGISelLegacy (
369	ID, std::make_unique<ARMDAGToDAGISel>(args&: tm, args&: OptLevel)) {}
370	};
371	}
372
373	char ARMDAGToDAGISelLegacy::ID = `0`;
374
375	INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
376
377	/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
378	/// operand. If so Imm will receive the 32-bit value.
379	static bool isInt32Immediate(SDNode N, unsigned* &Imm) {
380	if (N->getOpcode() == ISD::Constant && N->getValueType(ResNo: `0`) == MVT::i32) {
381	Imm = N->getAsZExtVal();
382	return true;
383	}
384	return false;
385	}
386
387	// isInt32Immediate - This method tests to see if a constant operand.
388	// If so Imm will receive the 32 bit value.
389	static bool isInt32Immediate(SDValue N, unsigned &Imm) {
390	return isInt32Immediate(N: N.getNode(), Imm);
391	}
392
393	// isOpcWithIntImmediate - This method tests to see if the node is a specific
394	// opcode and that it has a immediate integer right operand.
395	// If so Imm will receive the 32 bit value.
396	static bool isOpcWithIntImmediate(SDNode N, unsigned* Opc, unsigned& Imm) {
397	return N->getOpcode() == Opc &&
398	isInt32Immediate(N: N->getOperand(Num: `1`).getNode(), Imm);
399	}
400
401	/// Check whether a particular node is a constant value representable as
402	/// (N Scale) where (N in [\p RangeMin, \p RangeMax).*
403	///
404	/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
405	static bool isScaledConstantInRange(SDValue Node, int Scale,
406	int RangeMin, int RangeMax,
407	int &ScaledConstant) {
408	assert(Scale > `0` && "Invalid scale!");
409
410	// Check that this is a constant.
411	const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: Node);
412	if (!C)
413	return false;
414
415	ScaledConstant = (int) C->getZExtValue();
416	if ((ScaledConstant % Scale) != `0`)
417	return false;
418
419	ScaledConstant /= Scale;
420	return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
421	}
422
423	void ARMDAGToDAGISel::PreprocessISelDAG() {
424	if (!Subtarget->hasV6T2Ops())
425	return;
426
427	bool isThumb2 = Subtarget->isThumb();
428	// We use make_early_inc_range to avoid invalidation issues.
429	for (SDNode &N : llvm::make_early_inc_range(Range: CurDAG->allnodes())) {
430	if (N.getOpcode() != ISD::ADD)
431	continue;
432
433	// Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
434	// leading zeros, followed by consecutive set bits, followed by 1 or 2
435	// trailing zeros, e.g. 1020.
436	// Transform the expression to
437	// (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
438	// of trailing zeros of c2. The left shift would be folded as an shifter
439	// operand of 'add' and the 'and' and 'srl' would become a bits extraction
440	// node (UBFX).
441
442	SDValue N0 = N.getOperand(Num: `0`);
443	SDValue N1 = N.getOperand(Num: `1`);
444	unsigned And_imm = `0`;
445	if (!isOpcWithIntImmediate(N: N1.getNode(), Opc: ISD::AND, Imm&: And_imm)) {
446	if (isOpcWithIntImmediate(N: N0.getNode(), Opc: ISD::AND, Imm&: And_imm))
447	std::swap(a&: N0, b&: N1);
448	}
449	if (!And_imm)
450	continue;
451
452	// Check if the AND mask is an immediate of the form: 000.....1111111100
453	unsigned TZ = llvm::countr_zero(Val: And_imm);
454	if (TZ != `1` && TZ != `2`)
455	// Be conservative here. Shifter operands aren't always free. e.g. On
456	// Swift, left shifter operand of 1 / 2 for free but others are not.
457	// e.g.
458	// ubfx r3, r1, #16, #8
459	// ldr.w r3, [r0, r3, lsl #2]
460	// vs.
461	// mov.w r9, #1020
462	// and.w r2, r9, r1, lsr #14
463	// ldr r2, [r0, r2]
464	continue;
465	And_imm >>= TZ;
466	if (And_imm & (And_imm + `1`))
467	continue;
468
469	// Look for (and (srl X, c1), c2).
470	SDValue Srl = N1.getOperand(i: `0`);
471	unsigned Srl_imm = `0`;
472	if (!isOpcWithIntImmediate(N: Srl.getNode(), Opc: ISD::SRL, Imm&: Srl_imm) \|\|
473	(Srl_imm <= `2`))
474	continue;
475
476	// Make sure first operand is not a shifter operand which would prevent
477	// folding of the left shift.
478	SDValue CPTmp0;
479	SDValue CPTmp1;
480	SDValue CPTmp2;
481	if (isThumb2) {
482	if (SelectImmShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1))
483	continue;
484	} else {
485	if (SelectImmShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1) \|\|
486	SelectRegShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1, C&: CPTmp2))
487	continue;
488	}
489
490	// Now make the transformation.
491	Srl = CurDAG->getNode(Opcode: ISD::SRL, DL: SDLoc (Srl), VT: MVT::i32,
492	N1: Srl.getOperand(i: `0`),
493	N2: CurDAG->getConstant(Val: Srl_imm + TZ, DL: SDLoc (Srl),
494	VT: MVT::i32));
495	N1 = CurDAG->getNode(Opcode: ISD::AND, DL: SDLoc (N1), VT: MVT::i32,
496	N1: Srl,
497	N2: CurDAG->getConstant(Val: And_imm, DL: SDLoc (Srl), VT: MVT::i32));
498	N1 = CurDAG->getNode(Opcode: ISD::SHL, DL: SDLoc (N1), VT: MVT::i32,
499	N1, N2: CurDAG->getConstant(Val: TZ, DL: SDLoc (Srl), VT: MVT::i32));
500	CurDAG->UpdateNodeOperands(N: &N, Op1: N0, Op2: N1);
501	}
502	}
503
504	/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
505	/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
506	/// least on current ARM implementations) which should be avoidded.
507	bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode N) const* {
508	if (OptLevel == CodeGenOptLevel::None)
509	return true;
510
511	if (!Subtarget->hasVMLxHazards())
512	return true;
513
514	if (!N->hasOneUse())
515	return false;
516
517	SDNode Use = N->use_begin();
518	if (Use->getOpcode() == ISD::CopyToReg)
519	return true;
520	if (Use->isMachineOpcode()) {
521	const ARMBaseInstrInfo TII = static_cast<const* ARMBaseInstrInfo *>(
522	CurDAG->getSubtarget().getInstrInfo());
523
524	const MCInstrDesc &MCID = TII->get(Opcode: Use->getMachineOpcode());
525	if (MCID.mayStore())
526	return true;
527	unsigned Opcode = MCID.getOpcode();
528	if (Opcode == ARM::VMOVRS \|\| Opcode == ARM::VMOVRRD)
529	return true;
530	// vmlx feeding into another vmlx. We actually want to unfold
531	// the use later in the MLxExpansion pass. e.g.
532	// vmla
533	// vmla (stall 8 cycles)
534	//
535	// vmul (5 cycles)
536	// vadd (5 cycles)
537	// vmla
538	// This adds up to about 18 - 19 cycles.
539	//
540	// vmla
541	// vmul (stall 4 cycles)
542	// vadd adds up to about 14 cycles.
543	return TII->isFpMLxInstruction(Opcode);
544	}
545
546	return false;
547	}
548
549	bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
550	ARM_AM::ShiftOpc ShOpcVal,
551	unsigned ShAmt) {
552	if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
553	return true;
554	if (Shift.hasOneUse())
555	return true;
556	// R << 2 is free.
557	return ShOpcVal == ARM_AM::lsl &&
558	(ShAmt == `2` \|\| (Subtarget->isSwift() && ShAmt == `1`));
559	}
560
561	bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
562	unsigned MaxShift,
563	unsigned &PowerOfTwo,
564	SDValue &NewMulConst) const {
565	assert(N.getOpcode() == ISD::MUL);
566	assert(MaxShift > `0`);
567
568	// If the multiply is used in more than one place then changing the constant
569	// will make other uses incorrect, so don't.
570	if (!N.hasOneUse()) return false;
571	// Check if the multiply is by a constant
572	ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
573	if (!MulConst) return false;
574	// If the constant is used in more than one place then modifying it will mean
575	// we need to materialize two constants instead of one, which is a bad idea.
576	if (!MulConst->hasOneUse()) return false;
577	unsigned MulConstVal = MulConst->getZExtValue();
578	if (MulConstVal == `0`) return false;
579
580	// Find the largest power of 2 that MulConstVal is a multiple of
581	PowerOfTwo = MaxShift;
582	while ((MulConstVal % (`1` << PowerOfTwo)) != `0`) {
583	--PowerOfTwo;
584	if (PowerOfTwo == `0`) return false;
585	}
586
587	// Only optimise if the new cost is better
588	unsigned NewMulConstVal = MulConstVal / (`1` << PowerOfTwo);
589	NewMulConst = CurDAG->getConstant(Val: NewMulConstVal, DL: SDLoc (N), VT: MVT::i32);
590	unsigned OldCost = ConstantMaterializationCost(Val: MulConstVal, Subtarget);
591	unsigned NewCost = ConstantMaterializationCost(Val: NewMulConstVal, Subtarget);
592	return NewCost < OldCost;
593	}
594
595	void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
596	CurDAG->RepositionNode(Position: N.getNode()->getIterator(), N: M.getNode());
597	ReplaceUses(F: N, T: M);
598	}
599
600	bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
601	SDValue &BaseReg,
602	SDValue &Opc,
603	bool CheckProfitability) {
604	if (DisableShifterOp)
605	return false;
606
607	// If N is a multiply-by-constant and it's profitable to extract a shift and
608	// use it in a shifted operand do so.
609	if (N.getOpcode() == ISD::MUL) {
610	unsigned PowerOfTwo = `0`;
611	SDValue NewMulConst;
612	if (canExtractShiftFromMul(N, MaxShift: `31`, PowerOfTwo, NewMulConst)) {
613	HandleSDNode Handle(N);
614	SDLoc Loc(N);
615	replaceDAGValue(N: N.getOperand(i: `1`), M: NewMulConst);
616	BaseReg = Handle.getValue();
617	Opc = CurDAG->getTargetConstant(
618	Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: PowerOfTwo), DL: Loc, VT: MVT::i32);
619	return true;
620	}
621	}
622
623	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
624
625	// Don't match base register only case. That is matched to a separate
626	// lower complexity pattern with explicit register operand.
627	if (ShOpcVal == ARM_AM::no_shift) return false;
628
629	BaseReg = N.getOperand(i: `0`);
630	unsigned ShImmVal = `0`;
631	ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
632	if (!RHS) return false;
633	ShImmVal = RHS->getZExtValue() & `31`;
634	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: ShImmVal),
635	DL: SDLoc (N), VT: MVT::i32);
636	return true;
637	}
638
639	bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
640	SDValue &BaseReg,
641	SDValue &ShReg,
642	SDValue &Opc,
643	bool CheckProfitability) {
644	if (DisableShifterOp)
645	return false;
646
647	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
648
649	// Don't match base register only case. That is matched to a separate
650	// lower complexity pattern with explicit register operand.
651	if (ShOpcVal == ARM_AM::no_shift) return false;
652
653	BaseReg = N.getOperand(i: `0`);
654	unsigned ShImmVal = `0`;
655	ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
656	if (RHS) return false;
657
658	ShReg = N.getOperand(i: `1`);
659	if (CheckProfitability && !isShifterOpProfitable(Shift: N, ShOpcVal, ShAmt: ShImmVal))
660	return false;
661	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: ShImmVal),
662	DL: SDLoc (N), VT: MVT::i32);
663	return true;
664	}
665
666	// Determine whether an ISD::OR's operands are suitable to turn the operation
667	// into an addition, which often has more compact encodings.
668	bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
669	assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
670	Out = N;
671	return CurDAG->haveNoCommonBitsSet(A: N, B: Parent->getOperand(Num: `1`));
672	}
673
674
675	bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
676	SDValue &Base,
677	SDValue &OffImm) {
678	// Match simple R + imm12 operands.
679
680	// Base only.
681	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
682	!CurDAG->isBaseWithConstantOffset(Op: N)) {
683	if (N.getOpcode() == ISD::FrameIndex) {
684	// Match frame index.
685	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
686	Base = CurDAG->getTargetFrameIndex(
687	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
688	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
689	return true;
690	}
691
692	if (N.getOpcode() == ARMISD::Wrapper &&
693	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalAddress &&
694	N.getOperand(i: `0`).getOpcode() != ISD::TargetExternalSymbol &&
695	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalTLSAddress) {
696	Base = N.getOperand(i: `0`);
697	} else
698	Base = N;
699	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
700	return true;
701	}
702
703	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
704	int RHSC = (int)RHS->getSExtValue();
705	if (N.getOpcode() == ISD::SUB)
706	RHSC = -RHSC;
707
708	if (RHSC > -`0x1000` && RHSC < `0x1000`) { // 12 bits
709	Base = N.getOperand(i: `0`);
710	if (Base.getOpcode() == ISD::FrameIndex) {
711	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
712	Base = CurDAG->getTargetFrameIndex(
713	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
714	}
715	OffImm = CurDAG->getTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32);
716	return true;
717	}
718	}
719
720	// Base only.
721	Base = N;
722	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
723	return true;
724	}
725
726
727
728	bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
729	SDValue &Opc) {
730	if (N.getOpcode() == ISD::MUL &&
731	((!Subtarget->isLikeA9() && !Subtarget->isSwift()) \|\| N.hasOneUse())) {
732	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
733	// X [3,5,9] -> X + X * [2,4,8] etc.*
734	int RHSC = (int)RHS->getZExtValue();
735	if (RHSC & `1`) {
736	RHSC = RHSC & ~`1`;
737	ARM_AM::AddrOpc AddSub = ARM_AM::add;
738	if (RHSC < `0`) {
739	AddSub = ARM_AM::sub;
740	RHSC = - RHSC;
741	}
742	if (isPowerOf2_32(Value: RHSC)) {
743	unsigned ShAmt = Log2_32(Value: RHSC);
744	Base = Offset = N.getOperand(i: `0`);
745	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt,
746	SO: ARM_AM::lsl),
747	DL: SDLoc (N), VT: MVT::i32);
748	return true;
749	}
750	}
751	}
752	}
753
754	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
755	// ISD::OR that is equivalent to an ISD::ADD.
756	!CurDAG->isBaseWithConstantOffset(Op: N))
757	return false;
758
759	// Leave simple R +/- imm12 operands for LDRi12
760	if (N.getOpcode() == ISD::ADD \|\| N.getOpcode() == ISD::OR) {
761	int RHSC;
762	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), /Scale=/`1`,
763	RangeMin: -`0x1000`+`1`, RangeMax: `0x1000`, ScaledConstant&: RHSC)) // 12 bits.
764	return false;
765	}
766
767	// Otherwise this is R +/- [possibly shifted] R.
768	ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
769	ARM_AM::ShiftOpc ShOpcVal =
770	ARM_AM::getShiftOpcForNode(Opcode: N.getOperand(i: `1`).getOpcode());
771	unsigned ShAmt = `0`;
772
773	Base = N.getOperand(i: `0`);
774	Offset = N.getOperand(i: `1`);
775
776	if (ShOpcVal != ARM_AM::no_shift) {
777	// Check to see if the RHS of the shift is a constant, if not, we can't fold
778	// it.
779	if (ConstantSDNode *Sh =
780	dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`).getOperand(i: `1`))) {
781	ShAmt = Sh->getZExtValue();
782	if (isShifterOpProfitable(Shift: Offset, ShOpcVal, ShAmt))
783	Offset = N.getOperand(i: `1`).getOperand(i: `0`);
784	else {
785	ShAmt = `0`;
786	ShOpcVal = ARM_AM::no_shift;
787	}
788	} else {
789	ShOpcVal = ARM_AM::no_shift;
790	}
791	}
792
793	// Try matching (R shl C) + (R).
794	if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
795	!(Subtarget->isLikeA9() \|\| Subtarget->isSwift() \|\|
796	N.getOperand(i: `0`).hasOneUse())) {
797	ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOperand(i: `0`).getOpcode());
798	if (ShOpcVal != ARM_AM::no_shift) {
799	// Check to see if the RHS of the shift is a constant, if not, we can't
800	// fold it.
801	if (ConstantSDNode *Sh =
802	dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `0`).getOperand(i: `1`))) {
803	ShAmt = Sh->getZExtValue();
804	if (isShifterOpProfitable(Shift: N.getOperand(i: `0`), ShOpcVal, ShAmt)) {
805	Offset = N.getOperand(i: `0`).getOperand(i: `0`);
806	Base = N.getOperand(i: `1`);
807	} else {
808	ShAmt = `0`;
809	ShOpcVal = ARM_AM::no_shift;
810	}
811	} else {
812	ShOpcVal = ARM_AM::no_shift;
813	}
814	}
815	}
816
817	// If Offset is a multiply-by-constant and it's profitable to extract a shift
818	// and use it in a shifted operand do so.
819	if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
820	unsigned PowerOfTwo = `0`;
821	SDValue NewMulConst;
822	if (canExtractShiftFromMul(N: Offset, MaxShift: `31`, PowerOfTwo, NewMulConst)) {
823	HandleSDNode Handle(Offset);
824	replaceDAGValue(N: Offset.getOperand(i: `1`), M: NewMulConst);
825	Offset = Handle.getValue();
826	ShAmt = PowerOfTwo;
827	ShOpcVal = ARM_AM::lsl;
828	}
829	}
830
831	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt, SO: ShOpcVal),
832	DL: SDLoc (N), VT: MVT::i32);
833	return true;
834	}
835
836	bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
837	SDValue &Offset, SDValue &Opc) {
838	unsigned Opcode = Op->getOpcode();
839	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
840	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
841	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
842	ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC)
843	? ARM_AM::add : ARM_AM::sub;
844	int Val;
845	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `0x1000`, ScaledConstant&: Val))
846	return false;
847
848	Offset = N;
849	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
850	unsigned ShAmt = `0`;
851	if (ShOpcVal != ARM_AM::no_shift) {
852	// Check to see if the RHS of the shift is a constant, if not, we can't fold
853	// it.
854	if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
855	ShAmt = Sh->getZExtValue();
856	if (isShifterOpProfitable(Shift: N, ShOpcVal, ShAmt))
857	Offset = N.getOperand(i: `0`);
858	else {
859	ShAmt = `0`;
860	ShOpcVal = ARM_AM::no_shift;
861	}
862	} else {
863	ShOpcVal = ARM_AM::no_shift;
864	}
865	}
866
867	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt, SO: ShOpcVal),
868	DL: SDLoc (N), VT: MVT::i32);
869	return true;
870	}
871
872	bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
873	SDValue &Offset, SDValue &Opc) {
874	unsigned Opcode = Op->getOpcode();
875	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
876	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
877	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
878	ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC)
879	? ARM_AM::add : ARM_AM::sub;
880	int Val;
881	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `0x1000`, ScaledConstant&: Val)) { // 12 bits.
882	if (AddSub == ARM_AM::sub) Val *= -`1`;
883	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
884	Opc = CurDAG->getTargetConstant(Val, DL: SDLoc (Op), VT: MVT::i32);
885	return true;
886	}
887
888	return false;
889	}
890
891
892	bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
893	SDValue &Offset, SDValue &Opc) {
894	unsigned Opcode = Op->getOpcode();
895	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
896	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
897	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
898	ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC)
899	? ARM_AM::add : ARM_AM::sub;
900	int Val;
901	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `0x1000`, ScaledConstant&: Val)) { // 12 bits.
902	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
903	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: Val,
904	SO: ARM_AM::no_shift),
905	DL: SDLoc (Op), VT: MVT::i32);
906	return true;
907	}
908
909	return false;
910	}
911
912	bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
913	Base = N;
914	return true;
915	}
916
917	bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
918	SDValue &Base, SDValue &Offset,
919	SDValue &Opc) {
920	if (N.getOpcode() == ISD::SUB) {
921	// X - C is canonicalize to X + -C, no need to handle it here.
922	Base = N.getOperand(i: `0`);
923	Offset = N.getOperand(i: `1`);
924	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::sub, Offset: `0`), DL: SDLoc (N),
925	VT: MVT::i32);
926	return true;
927	}
928
929	if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
930	Base = N;
931	if (N.getOpcode() == ISD::FrameIndex) {
932	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
933	Base = CurDAG->getTargetFrameIndex(
934	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
935	}
936	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
937	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::add, Offset: `0`), DL: SDLoc (N),
938	VT: MVT::i32);
939	return true;
940	}
941
942	// If the RHS is +/- imm8, fold into addr mode.
943	int RHSC;
944	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), /Scale=/`1`,
945	RangeMin: -`256` + `1`, RangeMax: `256`, ScaledConstant&: RHSC)) { // 8 bits.
946	Base = N.getOperand(i: `0`);
947	if (Base.getOpcode() == ISD::FrameIndex) {
948	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
949	Base = CurDAG->getTargetFrameIndex(
950	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
951	}
952	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
953
954	ARM_AM::AddrOpc AddSub = ARM_AM::add;
955	if (RHSC < `0`) {
956	AddSub = ARM_AM::sub;
957	RHSC = -RHSC;
958	}
959	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: RHSC), DL: SDLoc (N),
960	VT: MVT::i32);
961	return true;
962	}
963
964	Base = N.getOperand(i: `0`);
965	Offset = N.getOperand(i: `1`);
966	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::add, Offset: `0`), DL: SDLoc (N),
967	VT: MVT::i32);
968	return true;
969	}
970
971	bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
972	SDValue &Offset, SDValue &Opc) {
973	unsigned Opcode = Op->getOpcode();
974	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
975	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
976	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
977	ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC)
978	? ARM_AM::add : ARM_AM::sub;
979	int Val;
980	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `256`, ScaledConstant&: Val)) { // 12 bits.
981	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
982	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: Val), DL: SDLoc (Op),
983	VT: MVT::i32);
984	return true;
985	}
986
987	Offset = N;
988	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: `0`), DL: SDLoc (Op),
989	VT: MVT::i32);
990	return true;
991	}
992
993	bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
994	bool FP16) {
995	if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
996	Base = N;
997	if (N.getOpcode() == ISD::FrameIndex) {
998	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
999	Base = CurDAG->getTargetFrameIndex(
1000	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1001	} else if (N.getOpcode() == ARMISD::Wrapper &&
1002	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalAddress &&
1003	N.getOperand(i: `0`).getOpcode() != ISD::TargetExternalSymbol &&
1004	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalTLSAddress) {
1005	Base = N.getOperand(i: `0`);
1006	}
1007	Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: ARM_AM::add, Offset: `0`),
1008	DL: SDLoc (N), VT: MVT::i32);
1009	return true;
1010	}
1011
1012	// If the RHS is +/- imm8, fold into addr mode.
1013	int RHSC;
1014	const int Scale = FP16 ? `2` : `4`;
1015
1016	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale, RangeMin: -`255`, RangeMax: `256`, ScaledConstant&: RHSC)) {
1017	Base = N.getOperand(i: `0`);
1018	if (Base.getOpcode() == ISD::FrameIndex) {
1019	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1020	Base = CurDAG->getTargetFrameIndex(
1021	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1022	}
1023
1024	ARM_AM::AddrOpc AddSub = ARM_AM::add;
1025	if (RHSC < `0`) {
1026	AddSub = ARM_AM::sub;
1027	RHSC = -RHSC;
1028	}
1029
1030	if (FP16)
1031	Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5FP16Opc(Opc: AddSub, Offset: RHSC),
1032	DL: SDLoc (N), VT: MVT::i32);
1033	else
1034	Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: AddSub, Offset: RHSC),
1035	DL: SDLoc (N), VT: MVT::i32);
1036
1037	return true;
1038	}
1039
1040	Base = N;
1041
1042	if (FP16)
1043	Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5FP16Opc(Opc: ARM_AM::add, Offset: `0`),
1044	DL: SDLoc (N), VT: MVT::i32);
1045	else
1046	Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: ARM_AM::add, Offset: `0`),
1047	DL: SDLoc (N), VT: MVT::i32);
1048
1049	return true;
1050	}
1051
1052	bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1053	SDValue &Base, SDValue &Offset) {
1054	return IsAddressingMode5(N, Base, Offset, /FP16=/ false);
1055	}
1056
1057	bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1058	SDValue &Base, SDValue &Offset) {
1059	return IsAddressingMode5(N, Base, Offset, /FP16=/ true);
1060	}
1061
1062	bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1063	SDValue &Align) {
1064	Addr = N;
1065
1066	unsigned Alignment = `0`;
1067
1068	MemSDNode *MemN = cast<MemSDNode>(Val: Parent);
1069
1070	if (isa<LSBaseSDNode>(Val: MemN) \|\|
1071	((MemN->getOpcode() == ARMISD::VST1_UPD \|\|
1072	MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1073	MemN->getConstantOperandVal(Num: MemN->getNumOperands() - `1`) == `1`)) {
1074	// This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1075	// The maximum alignment is equal to the memory size being referenced.
1076	llvm::Align MMOAlign = MemN->getAlign();
1077	unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / `8`;
1078	if (MMOAlign.value() >= MemSize && MemSize > `1`)
1079	Alignment = MemSize;
1080	} else {
1081	// All other uses of addrmode6 are for intrinsics. For now just record
1082	// the raw alignment value; it will be refined later based on the legal
1083	// alignment operands for the intrinsic.
1084	Alignment = MemN->getAlign().value();
1085	}
1086
1087	Align = CurDAG->getTargetConstant(Val: Alignment, DL: SDLoc (N), VT: MVT::i32);
1088	return true;
1089	}
1090
1091	bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1092	SDValue &Offset) {
1093	LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Val: Op);
1094	ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1095	if (AM != ISD::POST_INC)
1096	return false;
1097	Offset = N;
1098	if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(Val&: N)) {
1099	if (NC->getZExtValue() * `8` == LdSt->getMemoryVT().getSizeInBits())
1100	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
1101	}
1102	return true;
1103	}
1104
1105	bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1106	SDValue &Offset, SDValue &Label) {
1107	if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1108	Offset = N.getOperand(i: `0`);
1109	SDValue N1 = N.getOperand(i: `1`);
1110	Label = CurDAG->getTargetConstant(Val: N1 ->getAsZExtVal(), DL: SDLoc (N), VT: MVT::i32);
1111	return true;
1112	}
1113
1114	return false;
1115	}
1116
1117
1118	//===----------------------------------------------------------------------===//
1119	// Thumb Addressing Modes
1120	//===----------------------------------------------------------------------===//
1121
1122	static bool shouldUseZeroOffsetLdSt(SDValue N) {
1123	// Negative numbers are difficult to materialise in thumb1. If we are
1124	// selecting the add of a negative, instead try to select ri with a zero
1125	// offset, so create the add node directly which will become a sub.
1126	if (N.getOpcode() != ISD::ADD)
1127	return false;
1128
1129	// Look for an imm which is not legal for ld/st, but is legal for sub.
1130	if (auto C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`)))
1131	return C->getSExtValue() < `0` && C->getSExtValue() >= -`255`;
1132
1133	return false;
1134	}
1135
1136	bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1137	SDValue &Offset) {
1138	if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(Op: N)) {
1139	if (!isNullConstant(V: N))
1140	return false;
1141
1142	Base = Offset = N;
1143	return true;
1144	}
1145
1146	Base = N.getOperand(i: `0`);
1147	Offset = N.getOperand(i: `1`);
1148	return true;
1149	}
1150
1151	bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1152	SDValue &Offset) {
1153	if (shouldUseZeroOffsetLdSt(N))
1154	return false; // Select ri instead
1155	return SelectThumbAddrModeRRSext(N, Base, Offset);
1156	}
1157
1158	bool
1159	ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1160	SDValue &Base, SDValue &OffImm) {
1161	if (shouldUseZeroOffsetLdSt(N)) {
1162	Base = N;
1163	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1164	return true;
1165	}
1166
1167	if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
1168	if (N.getOpcode() == ISD::ADD) {
1169	return false; // We want to select register offset instead
1170	} else if (N.getOpcode() == ARMISD::Wrapper &&
1171	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalAddress &&
1172	N.getOperand(i: `0`).getOpcode() != ISD::TargetExternalSymbol &&
1173	N.getOperand(i: `0`).getOpcode() != ISD::TargetConstantPool &&
1174	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalTLSAddress) {
1175	Base = N.getOperand(i: `0`);
1176	} else {
1177	Base = N;
1178	}
1179
1180	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1181	return true;
1182	}
1183
1184	// If the RHS is + imm5 scale, fold into addr mode.*
1185	int RHSC;
1186	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale, RangeMin: `0`, RangeMax: `32`, ScaledConstant&: RHSC)) {
1187	Base = N.getOperand(i: `0`);
1188	OffImm = CurDAG->getTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32);
1189	return true;
1190	}
1191
1192	// Offset is too large, so use register offset instead.
1193	return false;
1194	}
1195
1196	bool
1197	ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1198	SDValue &OffImm) {
1199	return SelectThumbAddrModeImm5S(N, Scale: `4`, Base, OffImm);
1200	}
1201
1202	bool
1203	ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1204	SDValue &OffImm) {
1205	return SelectThumbAddrModeImm5S(N, Scale: `2`, Base, OffImm);
1206	}
1207
1208	bool
1209	ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1210	SDValue &OffImm) {
1211	return SelectThumbAddrModeImm5S(N, Scale: `1`, Base, OffImm);
1212	}
1213
1214	bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1215	SDValue &Base, SDValue &OffImm) {
1216	if (N.getOpcode() == ISD::FrameIndex) {
1217	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1218	// Only multiples of 4 are allowed for the offset, so the frame object
1219	// alignment must be at least 4.
1220	MachineFrameInfo &MFI = MF->getFrameInfo();
1221	if (MFI.getObjectAlign(ObjectIdx: FI) < Align (`4`))
1222	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align (`4`));
1223	Base = CurDAG->getTargetFrameIndex(
1224	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1225	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1226	return true;
1227	}
1228
1229	if (!CurDAG->isBaseWithConstantOffset(Op: N))
1230	return false;
1231
1232	if (N.getOperand(i: `0`).getOpcode() == ISD::FrameIndex) {
1233	// If the RHS is + imm8 scale, fold into addr mode.*
1234	int RHSC;
1235	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), /Scale=/`4`, RangeMin: `0`, RangeMax: `256`, ScaledConstant&: RHSC)) {
1236	Base = N.getOperand(i: `0`);
1237	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1238	// Make sure the offset is inside the object, or we might fail to
1239	// allocate an emergency spill slot. (An out-of-range access is UB, but
1240	// it could show up anyway.)
1241	MachineFrameInfo &MFI = MF->getFrameInfo();
1242	if (RHSC * `4` < MFI.getObjectSize(ObjectIdx: FI)) {
1243	// For LHS+RHS to result in an offset that's a multiple of 4 the object
1244	// indexed by the LHS must be 4-byte aligned.
1245	if (!MFI.isFixedObjectIndex(ObjectIdx: FI) && MFI.getObjectAlign(ObjectIdx: FI) < Align (`4`))
1246	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align (`4`));
1247	if (MFI.getObjectAlign(ObjectIdx: FI) >= Align (`4`)) {
1248	Base = CurDAG->getTargetFrameIndex(
1249	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1250	OffImm = CurDAG->getTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32);
1251	return true;
1252	}
1253	}
1254	}
1255	}
1256
1257	return false;
1258	}
1259
1260	template <unsigned Shift>
1261	bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1262	SDValue &OffImm) {
1263	if (N.getOpcode() == ISD::SUB \|\| CurDAG->isBaseWithConstantOffset(Op: N)) {
1264	int RHSC;
1265	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale: `1` << Shift, RangeMin: -`0x7f`, RangeMax: `0x80`,
1266	ScaledConstant&: RHSC)) {
1267	Base = N.getOperand(i: `0`);
1268	if (N.getOpcode() == ISD::SUB)
1269	RHSC = -RHSC;
1270	OffImm =
1271	CurDAG->getTargetConstant(Val: RHSC * (`1` << Shift), DL: SDLoc (N), VT: MVT::i32);
1272	return true;
1273	}
1274	}
1275
1276	// Base only.
1277	Base = N;
1278	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1279	return true;
1280	}
1281
1282
1283	//===----------------------------------------------------------------------===//
1284	// Thumb 2 Addressing Modes
1285	//===----------------------------------------------------------------------===//
1286
1287
1288	bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1289	SDValue &Base, SDValue &OffImm) {
1290	// Match simple R + imm12 operands.
1291
1292	// Base only.
1293	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1294	!CurDAG->isBaseWithConstantOffset(Op: N)) {
1295	if (N.getOpcode() == ISD::FrameIndex) {
1296	// Match frame index.
1297	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1298	Base = CurDAG->getTargetFrameIndex(
1299	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1300	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1301	return true;
1302	}
1303
1304	if (N.getOpcode() == ARMISD::Wrapper &&
1305	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalAddress &&
1306	N.getOperand(i: `0`).getOpcode() != ISD::TargetExternalSymbol &&
1307	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalTLSAddress) {
1308	Base = N.getOperand(i: `0`);
1309	if (Base.getOpcode() == ISD::TargetConstantPool)
1310	return false; // We want to select t2LDRpci instead.
1311	} else
1312	Base = N;
1313	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1314	return true;
1315	}
1316
1317	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1318	if (SelectT2AddrModeImm8(N, Base, OffImm))
1319	// Let t2LDRi8 handle (R - imm8).
1320	return false;
1321
1322	int RHSC = (int)RHS->getZExtValue();
1323	if (N.getOpcode() == ISD::SUB)
1324	RHSC = -RHSC;
1325
1326	if (RHSC >= `0` && RHSC < `0x1000`) { // 12 bits (unsigned)
1327	Base = N.getOperand(i: `0`);
1328	if (Base.getOpcode() == ISD::FrameIndex) {
1329	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1330	Base = CurDAG->getTargetFrameIndex(
1331	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1332	}
1333	OffImm = CurDAG->getTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32);
1334	return true;
1335	}
1336	}
1337
1338	// Base only.
1339	Base = N;
1340	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1341	return true;
1342	}
1343
1344	template <unsigned Shift>
1345	bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1346	SDValue &OffImm) {
1347	if (N.getOpcode() == ISD::SUB \|\| CurDAG->isBaseWithConstantOffset(Op: N)) {
1348	int RHSC;
1349	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale: `1` << Shift, RangeMin: -`255`, RangeMax: `256`, ScaledConstant&: RHSC)) {
1350	Base = N.getOperand(i: `0`);
1351	if (Base.getOpcode() == ISD::FrameIndex) {
1352	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1353	Base = CurDAG->getTargetFrameIndex(
1354	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1355	}
1356
1357	if (N.getOpcode() == ISD::SUB)
1358	RHSC = -RHSC;
1359	OffImm =
1360	CurDAG->getTargetConstant(Val: RHSC * (`1` << Shift), DL: SDLoc (N), VT: MVT::i32);
1361	return true;
1362	}
1363	}
1364
1365	// Base only.
1366	Base = N;
1367	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1368	return true;
1369	}
1370
1371	bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1372	SDValue &Base, SDValue &OffImm) {
1373	// Match simple R - imm8 operands.
1374	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1375	!CurDAG->isBaseWithConstantOffset(Op: N))
1376	return false;
1377
1378	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1379	int RHSC = (int)RHS->getSExtValue();
1380	if (N.getOpcode() == ISD::SUB)
1381	RHSC = -RHSC;
1382
1383	if ((RHSC >= -`255`) && (RHSC < `0`)) { // 8 bits (always negative)
1384	Base = N.getOperand(i: `0`);
1385	if (Base.getOpcode() == ISD::FrameIndex) {
1386	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1387	Base = CurDAG->getTargetFrameIndex(
1388	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1389	}
1390	OffImm = CurDAG->getTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32);
1391	return true;
1392	}
1393	}
1394
1395	return false;
1396	}
1397
1398	bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1399	SDValue &OffImm){
1400	unsigned Opcode = Op->getOpcode();
1401	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1402	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
1403	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
1404	int RHSC;
1405	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `0x100`, ScaledConstant&: RHSC)) { // 8 bits.
1406	OffImm = ((AM == ISD::PRE_INC) \|\| (AM == ISD::POST_INC))
1407	? CurDAG->getTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32)
1408	: CurDAG->getTargetConstant(Val: -RHSC, DL: SDLoc (N), VT: MVT::i32);
1409	return true;
1410	}
1411
1412	return false;
1413	}
1414
1415	template <unsigned Shift>
1416	bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1417	SDValue &OffImm) {
1418	if (N.getOpcode() == ISD::SUB \|\| CurDAG->isBaseWithConstantOffset(Op: N)) {
1419	int RHSC;
1420	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale: `1` << Shift, RangeMin: -`0x7f`, RangeMax: `0x80`,
1421	ScaledConstant&: RHSC)) {
1422	Base = N.getOperand(i: `0`);
1423	if (Base.getOpcode() == ISD::FrameIndex) {
1424	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1425	Base = CurDAG->getTargetFrameIndex(
1426	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1427	}
1428
1429	if (N.getOpcode() == ISD::SUB)
1430	RHSC = -RHSC;
1431	OffImm =
1432	CurDAG->getTargetConstant(Val: RHSC * (`1` << Shift), DL: SDLoc (N), VT: MVT::i32);
1433	return true;
1434	}
1435	}
1436
1437	// Base only.
1438	Base = N;
1439	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1440	return true;
1441	}
1442
1443	template <unsigned Shift>
1444	bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1445	SDValue &OffImm) {
1446	return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1447	}
1448
1449	bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1450	SDValue &OffImm,
1451	unsigned Shift) {
1452	unsigned Opcode = Op->getOpcode();
1453	ISD::MemIndexedMode AM;
1454	switch (Opcode) {
1455	case ISD::LOAD:
1456	AM = cast<LoadSDNode>(Val: Op)->getAddressingMode();
1457	break;
1458	case ISD::STORE:
1459	AM = cast<StoreSDNode>(Val: Op)->getAddressingMode();
1460	break;
1461	case ISD::MLOAD:
1462	AM = cast<MaskedLoadSDNode>(Val: Op)->getAddressingMode();
1463	break;
1464	case ISD::MSTORE:
1465	AM = cast<MaskedStoreSDNode>(Val: Op)->getAddressingMode();
1466	break;
1467	default:
1468	llvm_unreachable("Unexpected Opcode for Imm7Offset");
1469	}
1470
1471	int RHSC;
1472	// 7 bit constant, shifted by Shift.
1473	if (isScaledConstantInRange(Node: N, Scale: `1` << Shift, RangeMin: `0`, RangeMax: `0x80`, ScaledConstant&: RHSC)) {
1474	OffImm =
1475	((AM == ISD::PRE_INC) \|\| (AM == ISD::POST_INC))
1476	? CurDAG->getTargetConstant(Val: RHSC * (`1` << Shift), DL: SDLoc (N), VT: MVT::i32)
1477	: CurDAG->getTargetConstant(Val: -RHSC * (`1` << Shift), DL: SDLoc (N),
1478	VT: MVT::i32);
1479	return true;
1480	}
1481	return false;
1482	}
1483
1484	template <int Min, int Max>
1485	bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1486	int Val;
1487	if (isScaledConstantInRange(Node: N, Scale: `1`, RangeMin: Min, RangeMax: Max, ScaledConstant&: Val)) {
1488	OffImm = CurDAG->getTargetConstant(Val, DL: SDLoc (N), VT: MVT::i32);
1489	return true;
1490	}
1491	return false;
1492	}
1493
1494	bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1495	SDValue &Base,
1496	SDValue &OffReg, SDValue &ShImm) {
1497	// (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1498	if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(Op: N))
1499	return false;
1500
1501	// Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1502	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1503	int RHSC = (int)RHS->getZExtValue();
1504	if (RHSC >= `0` && RHSC < `0x1000`) // 12 bits (unsigned)
1505	return false;
1506	else if (RHSC < `0` && RHSC >= -`255`) // 8 bits
1507	return false;
1508	}
1509
1510	// Look for (R + R) or (R + (R << [1,2,3])).
1511	unsigned ShAmt = `0`;
1512	Base = N.getOperand(i: `0`);
1513	OffReg = N.getOperand(i: `1`);
1514
1515	// Swap if it is ((R << c) + R).
1516	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: OffReg.getOpcode());
1517	if (ShOpcVal != ARM_AM::lsl) {
1518	ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: Base.getOpcode());
1519	if (ShOpcVal == ARM_AM::lsl)
1520	std::swap(a&: Base, b&: OffReg);
1521	}
1522
1523	if (ShOpcVal == ARM_AM::lsl) {
1524	// Check to see if the RHS of the shift is a constant, if not, we can't fold
1525	// it.
1526	if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(Val: OffReg.getOperand(i: `1`))) {
1527	ShAmt = Sh->getZExtValue();
1528	if (ShAmt < `4` && isShifterOpProfitable(Shift: OffReg, ShOpcVal, ShAmt))
1529	OffReg = OffReg.getOperand(i: `0`);
1530	else {
1531	ShAmt = `0`;
1532	}
1533	}
1534	}
1535
1536	// If OffReg is a multiply-by-constant and it's profitable to extract a shift
1537	// and use it in a shifted operand do so.
1538	if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1539	unsigned PowerOfTwo = `0`;
1540	SDValue NewMulConst;
1541	if (canExtractShiftFromMul(N: OffReg, MaxShift: `3`, PowerOfTwo, NewMulConst)) {
1542	HandleSDNode Handle(OffReg);
1543	replaceDAGValue(N: OffReg.getOperand(i: `1`), M: NewMulConst);
1544	OffReg = Handle.getValue();
1545	ShAmt = PowerOfTwo;
1546	}
1547	}
1548
1549	ShImm = CurDAG->getTargetConstant(Val: ShAmt, DL: SDLoc (N), VT: MVT::i32);
1550
1551	return true;
1552	}
1553
1554	bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1555	SDValue &OffImm) {
1556	// This must* succeed since it's used for the irreplaceable ldrex and strex*
1557	// instructions.
1558	Base = N;
1559	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1560
1561	if (N.getOpcode() != ISD::ADD \|\| !CurDAG->isBaseWithConstantOffset(Op: N))
1562	return true;
1563
1564	ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
1565	if (!RHS)
1566	return true;
1567
1568	uint32_t RHSC = (int)RHS->getZExtValue();
1569	if (RHSC > `1020` \|\| RHSC % `4` != `0`)
1570	return true;
1571
1572	Base = N.getOperand(i: `0`);
1573	if (Base.getOpcode() == ISD::FrameIndex) {
1574	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1575	Base = CurDAG->getTargetFrameIndex(
1576	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1577	}
1578
1579	OffImm = CurDAG->getTargetConstant(Val: RHSC/`4`, DL: SDLoc (N), VT: MVT::i32);
1580	return true;
1581	}
1582
1583	//===--------------------------------------------------------------------===//
1584
1585	/// getAL - Returns a ARMCC::AL immediate node.
1586	static inline SDValue getAL(SelectionDAG CurDAG, const* SDLoc &dl) {
1587	return CurDAG->getTargetConstant(Val: (uint64_t)ARMCC::AL, DL: dl, VT: MVT::i32);
1588	}
1589
1590	void ARMDAGToDAGISel::transferMemOperands(SDNode N, SDNode Result) {
1591	MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
1592	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Result), NewMemRefs: {MemOp});
1593	}
1594
1595	bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1596	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1597	ISD::MemIndexedMode AM = LD->getAddressingMode();
1598	if (AM == ISD::UNINDEXED)
1599	return false;
1600
1601	EVT LoadedVT = LD->getMemoryVT();
1602	SDValue Offset, AMOpc;
1603	bool isPre = (AM == ISD::PRE_INC) \|\| (AM == ISD::PRE_DEC);
1604	unsigned Opcode = `0`;
1605	bool Match = false;
1606	if (LoadedVT == MVT::i32 && isPre &&
1607	SelectAddrMode2OffsetImmPre(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1608	Opcode = ARM::LDR_PRE_IMM;
1609	Match = true;
1610	} else if (LoadedVT == MVT::i32 && !isPre &&
1611	SelectAddrMode2OffsetImm(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1612	Opcode = ARM::LDR_POST_IMM;
1613	Match = true;
1614	} else if (LoadedVT == MVT::i32 &&
1615	SelectAddrMode2OffsetReg(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1616	Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1617	Match = true;
1618
1619	} else if (LoadedVT == MVT::i16 &&
1620	SelectAddrMode3Offset(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1621	Match = true;
1622	Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1623	? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1624	: (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1625	} else if (LoadedVT == MVT::i8 \|\| LoadedVT == MVT::i1) {
1626	if (LD->getExtensionType() == ISD::SEXTLOAD) {
1627	if (SelectAddrMode3Offset(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1628	Match = true;
1629	Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1630	}
1631	} else {
1632	if (isPre &&
1633	SelectAddrMode2OffsetImmPre(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1634	Match = true;
1635	Opcode = ARM::LDRB_PRE_IMM;
1636	} else if (!isPre &&
1637	SelectAddrMode2OffsetImm(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1638	Match = true;
1639	Opcode = ARM::LDRB_POST_IMM;
1640	} else if (SelectAddrMode2OffsetReg(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1641	Match = true;
1642	Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1643	}
1644	}
1645	}
1646
1647	if (Match) {
1648	if (Opcode == ARM::LDR_PRE_IMM \|\| Opcode == ARM::LDRB_PRE_IMM) {
1649	SDValue Chain = LD->getChain();
1650	SDValue Base = LD->getBasePtr();
1651	SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, dl: SDLoc (N)),
1652	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), Chain };
1653	SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc (N), VT1: MVT::i32, VT2: MVT::i32,
1654	VT3: MVT::Other, Ops);
1655	transferMemOperands(N, Result: New);
1656	ReplaceNode(F: N, T: New);
1657	return true;
1658	} else {
1659	SDValue Chain = LD->getChain();
1660	SDValue Base = LD->getBasePtr();
1661	SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, dl: SDLoc (N)),
1662	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), Chain };
1663	SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc (N), VT1: MVT::i32, VT2: MVT::i32,
1664	VT3: MVT::Other, Ops);
1665	transferMemOperands(N, Result: New);
1666	ReplaceNode(F: N, T: New);
1667	return true;
1668	}
1669	}
1670
1671	return false;
1672	}
1673
1674	bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1675	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1676	EVT LoadedVT = LD->getMemoryVT();
1677	ISD::MemIndexedMode AM = LD->getAddressingMode();
1678	if (AM != ISD::POST_INC \|\| LD->getExtensionType() != ISD::NON_EXTLOAD \|\|
1679	LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1680	return false;
1681
1682	auto *COffs = dyn_cast<ConstantSDNode>(Val: LD->getOffset());
1683	if (!COffs \|\| COffs->getZExtValue() != `4`)
1684	return false;
1685
1686	// A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1687	// The encoding of LDM is not how the rest of ISel expects a post-inc load to
1688	// look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1689	// ISel.
1690	SDValue Chain = LD->getChain();
1691	SDValue Base = LD->getBasePtr();
1692	SDValue Ops[]= { Base, getAL(CurDAG, dl: SDLoc (N)),
1693	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), Chain };
1694	SDNode *New = CurDAG->getMachineNode(Opcode: ARM::tLDR_postidx, dl: SDLoc (N), VT1: MVT::i32,
1695	VT2: MVT::i32, VT3: MVT::Other, Ops);
1696	transferMemOperands(N, Result: New);
1697	ReplaceNode(F: N, T: New);
1698	return true;
1699	}
1700
1701	bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1702	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1703	ISD::MemIndexedMode AM = LD->getAddressingMode();
1704	if (AM == ISD::UNINDEXED)
1705	return false;
1706
1707	EVT LoadedVT = LD->getMemoryVT();
1708	bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1709	SDValue Offset;
1710	bool isPre = (AM == ISD::PRE_INC) \|\| (AM == ISD::PRE_DEC);
1711	unsigned Opcode = `0`;
1712	bool Match = false;
1713	if (SelectT2AddrModeImm8Offset(Op: N, N: LD->getOffset(), OffImm&: Offset)) {
1714	switch (LoadedVT.getSimpleVT().SimpleTy) {
1715	case MVT::i32:
1716	Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1717	break;
1718	case MVT::i16:
1719	if (isSExtLd)
1720	Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1721	else
1722	Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1723	break;
1724	case MVT::i8:
1725	case MVT::i1:
1726	if (isSExtLd)
1727	Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1728	else
1729	Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1730	break;
1731	default:
1732	return false;
1733	}
1734	Match = true;
1735	}
1736
1737	if (Match) {
1738	SDValue Chain = LD->getChain();
1739	SDValue Base = LD->getBasePtr();
1740	SDValue Ops[]= { Base, Offset, getAL(CurDAG, dl: SDLoc (N)),
1741	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), Chain };
1742	SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc (N), VT1: MVT::i32, VT2: MVT::i32,
1743	VT3: MVT::Other, Ops);
1744	transferMemOperands(N, Result: New);
1745	ReplaceNode(F: N, T: New);
1746	return true;
1747	}
1748
1749	return false;
1750	}
1751
1752	bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1753	EVT LoadedVT;
1754	unsigned Opcode = `0`;
1755	bool isSExtLd, isPre;
1756	Align Alignment;
1757	ARMVCC::VPTCodes Pred;
1758	SDValue PredReg;
1759	SDValue Chain, Base, Offset;
1760
1761	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
1762	ISD::MemIndexedMode AM = LD->getAddressingMode();
1763	if (AM == ISD::UNINDEXED)
1764	return false;
1765	LoadedVT = LD->getMemoryVT();
1766	if (!LoadedVT.isVector())
1767	return false;
1768
1769	Chain = LD->getChain();
1770	Base = LD->getBasePtr();
1771	Offset = LD->getOffset();
1772	Alignment = LD->getAlign();
1773	isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1774	isPre = (AM == ISD::PRE_INC) \|\| (AM == ISD::PRE_DEC);
1775	Pred = ARMVCC::None;
1776	PredReg = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
1777	} else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Val: N)) {
1778	ISD::MemIndexedMode AM = LD->getAddressingMode();
1779	if (AM == ISD::UNINDEXED)
1780	return false;
1781	LoadedVT = LD->getMemoryVT();
1782	if (!LoadedVT.isVector())
1783	return false;
1784
1785	Chain = LD->getChain();
1786	Base = LD->getBasePtr();
1787	Offset = LD->getOffset();
1788	Alignment = LD->getAlign();
1789	isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1790	isPre = (AM == ISD::PRE_INC) \|\| (AM == ISD::PRE_DEC);
1791	Pred = ARMVCC::Then;
1792	PredReg = LD->getMask();
1793	} else
1794	llvm_unreachable("Expected a Load or a Masked Load!");
1795
1796	// We allow LE non-masked loads to change the type (for example use a vldrb.8
1797	// as opposed to a vldrw.32). This can allow extra addressing modes or
1798	// alignments for what is otherwise an equivalent instruction.
1799	bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(Val: N);
1800
1801	SDValue NewOffset;
1802	if (Alignment >= Align (`2`) && LoadedVT == MVT::v4i16 &&
1803	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `1`)) {
1804	if (isSExtLd)
1805	Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1806	else
1807	Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1808	} else if (LoadedVT == MVT::v8i8 &&
1809	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `0`)) {
1810	if (isSExtLd)
1811	Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1812	else
1813	Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1814	} else if (LoadedVT == MVT::v4i8 &&
1815	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `0`)) {
1816	if (isSExtLd)
1817	Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1818	else
1819	Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1820	} else if (Alignment >= Align (`4`) &&
1821	(CanChangeType \|\| LoadedVT == MVT::v4i32 \|\|
1822	LoadedVT == MVT::v4f32) &&
1823	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `2`))
1824	Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1825	else if (Alignment >= Align (`2`) &&
1826	(CanChangeType \|\| LoadedVT == MVT::v8i16 \|\|
1827	LoadedVT == MVT::v8f16) &&
1828	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `1`))
1829	Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1830	else if ((CanChangeType \|\| LoadedVT == MVT::v16i8) &&
1831	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `0`))
1832	Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1833	else
1834	return false;
1835
1836	SDValue Ops[] = {Base,
1837	NewOffset,
1838	CurDAG->getTargetConstant(Val: Pred, DL: SDLoc (N), VT: MVT::i32),
1839	PredReg,
1840	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), // tp_reg
1841	Chain};
1842	SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc (N), VT1: MVT::i32,
1843	VT2: N->getValueType(ResNo: `0`), VT3: MVT::Other, Ops);
1844	transferMemOperands(N, Result: New);
1845	ReplaceUses(F: SDValue (N, `0`), T: SDValue (New, `1`));
1846	ReplaceUses(F: SDValue (N, `1`), T: SDValue (New, `0`));
1847	ReplaceUses(F: SDValue (N, `2`), T: SDValue (New, `2`));
1848	CurDAG->RemoveDeadNode(N);
1849	return true;
1850	}
1851
1852	/// Form a GPRPair pseudo register from a pair of GPR regs.
1853	SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1854	SDLoc dl(V0.getNode());
1855	SDValue RegClass =
1856	CurDAG->getTargetConstant(Val: ARM::GPRPairRegClassID, DL: dl, VT: MVT::i32);
1857	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::gsub_0, DL: dl, VT: MVT::i32);
1858	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::gsub_1, DL: dl, VT: MVT::i32);
1859	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1860	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1861	}
1862
1863	/// Form a D register from a pair of S registers.
1864	SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1865	SDLoc dl(V0.getNode());
1866	SDValue RegClass =
1867	CurDAG->getTargetConstant(Val: ARM::DPR_VFP2RegClassID, DL: dl, VT: MVT::i32);
1868	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::ssub_0, DL: dl, VT: MVT::i32);
1869	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::ssub_1, DL: dl, VT: MVT::i32);
1870	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1871	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1872	}
1873
1874	/// Form a quad register from a pair of D registers.
1875	SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1876	SDLoc dl(V0.getNode());
1877	SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QPRRegClassID, DL: dl,
1878	VT: MVT::i32);
1879	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::dsub_0, DL: dl, VT: MVT::i32);
1880	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::dsub_1, DL: dl, VT: MVT::i32);
1881	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1882	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1883	}
1884
1885	/// Form 4 consecutive D registers from a pair of Q registers.
1886	SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1887	SDLoc dl(V0.getNode());
1888	SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQPRRegClassID, DL: dl,
1889	VT: MVT::i32);
1890	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::qsub_0, DL: dl, VT: MVT::i32);
1891	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::qsub_1, DL: dl, VT: MVT::i32);
1892	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1893	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1894	}
1895
1896	/// Form 4 consecutive S registers.
1897	SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1898	SDValue V2, SDValue V3) {
1899	SDLoc dl(V0.getNode());
1900	SDValue RegClass =
1901	CurDAG->getTargetConstant(Val: ARM::QPR_VFP2RegClassID, DL: dl, VT: MVT::i32);
1902	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::ssub_0, DL: dl, VT: MVT::i32);
1903	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::ssub_1, DL: dl, VT: MVT::i32);
1904	SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::ssub_2, DL: dl, VT: MVT::i32);
1905	SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::ssub_3, DL: dl, VT: MVT::i32);
1906	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1907	V2, SubReg2, V3, SubReg3 };
1908	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1909	}
1910
1911	/// Form 4 consecutive D registers.
1912	SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1913	SDValue V2, SDValue V3) {
1914	SDLoc dl(V0.getNode());
1915	SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQPRRegClassID, DL: dl,
1916	VT: MVT::i32);
1917	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::dsub_0, DL: dl, VT: MVT::i32);
1918	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::dsub_1, DL: dl, VT: MVT::i32);
1919	SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::dsub_2, DL: dl, VT: MVT::i32);
1920	SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::dsub_3, DL: dl, VT: MVT::i32);
1921	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1922	V2, SubReg2, V3, SubReg3 };
1923	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1924	}
1925
1926	/// Form 4 consecutive Q registers.
1927	SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1928	SDValue V2, SDValue V3) {
1929	SDLoc dl(V0.getNode());
1930	SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQQQPRRegClassID, DL: dl,
1931	VT: MVT::i32);
1932	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::qsub_0, DL: dl, VT: MVT::i32);
1933	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::qsub_1, DL: dl, VT: MVT::i32);
1934	SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::qsub_2, DL: dl, VT: MVT::i32);
1935	SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::qsub_3, DL: dl, VT: MVT::i32);
1936	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1937	V2, SubReg2, V3, SubReg3 };
1938	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1939	}
1940
1941	/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1942	/// of a NEON VLD or VST instruction. The supported values depend on the
1943	/// number of registers being loaded.
1944	SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1945	unsigned NumVecs, bool is64BitVector) {
1946	unsigned NumRegs = NumVecs;
1947	if (!is64BitVector && NumVecs < `3`)
1948	NumRegs *= `2`;
1949
1950	unsigned Alignment = Align ->getAsZExtVal();
1951	if (Alignment >= `32` && NumRegs == `4`)
1952	Alignment = `32`;
1953	else if (Alignment >= `16` && (NumRegs == `2` \|\| NumRegs == `4`))
1954	Alignment = `16`;
1955	else if (Alignment >= `8`)
1956	Alignment = `8`;
1957	else
1958	Alignment = `0`;
1959
1960	return CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
1961	}
1962
1963	static bool isVLDfixed(unsigned Opc)
1964	{
1965	switch (Opc) {
1966	default: return false;
1967	case ARM::VLD1d8wb_fixed : return true;
1968	case ARM::VLD1d16wb_fixed : return true;
1969	case ARM::VLD1d64Qwb_fixed : return true;
1970	case ARM::VLD1d32wb_fixed : return true;
1971	case ARM::VLD1d64wb_fixed : return true;
1972	case ARM::VLD1d8TPseudoWB_fixed : return true;
1973	case ARM::VLD1d16TPseudoWB_fixed : return true;
1974	case ARM::VLD1d32TPseudoWB_fixed : return true;
1975	case ARM::VLD1d64TPseudoWB_fixed : return true;
1976	case ARM::VLD1d8QPseudoWB_fixed : return true;
1977	case ARM::VLD1d16QPseudoWB_fixed : return true;
1978	case ARM::VLD1d32QPseudoWB_fixed : return true;
1979	case ARM::VLD1d64QPseudoWB_fixed : return true;
1980	case ARM::VLD1q8wb_fixed : return true;
1981	case ARM::VLD1q16wb_fixed : return true;
1982	case ARM::VLD1q32wb_fixed : return true;
1983	case ARM::VLD1q64wb_fixed : return true;
1984	case ARM::VLD1DUPd8wb_fixed : return true;
1985	case ARM::VLD1DUPd16wb_fixed : return true;
1986	case ARM::VLD1DUPd32wb_fixed : return true;
1987	case ARM::VLD1DUPq8wb_fixed : return true;
1988	case ARM::VLD1DUPq16wb_fixed : return true;
1989	case ARM::VLD1DUPq32wb_fixed : return true;
1990	case ARM::VLD2d8wb_fixed : return true;
1991	case ARM::VLD2d16wb_fixed : return true;
1992	case ARM::VLD2d32wb_fixed : return true;
1993	case ARM::VLD2q8PseudoWB_fixed : return true;
1994	case ARM::VLD2q16PseudoWB_fixed : return true;
1995	case ARM::VLD2q32PseudoWB_fixed : return true;
1996	case ARM::VLD2DUPd8wb_fixed : return true;
1997	case ARM::VLD2DUPd16wb_fixed : return true;
1998	case ARM::VLD2DUPd32wb_fixed : return true;
1999	case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
2000	case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
2001	case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
2002	}
2003	}
2004
2005	static bool isVSTfixed(unsigned Opc)
2006	{
2007	switch (Opc) {
2008	default: return false;
2009	case ARM::VST1d8wb_fixed : return true;
2010	case ARM::VST1d16wb_fixed : return true;
2011	case ARM::VST1d32wb_fixed : return true;
2012	case ARM::VST1d64wb_fixed : return true;
2013	case ARM::VST1q8wb_fixed : return true;
2014	case ARM::VST1q16wb_fixed : return true;
2015	case ARM::VST1q32wb_fixed : return true;
2016	case ARM::VST1q64wb_fixed : return true;
2017	case ARM::VST1d8TPseudoWB_fixed : return true;
2018	case ARM::VST1d16TPseudoWB_fixed : return true;
2019	case ARM::VST1d32TPseudoWB_fixed : return true;
2020	case ARM::VST1d64TPseudoWB_fixed : return true;
2021	case ARM::VST1d8QPseudoWB_fixed : return true;
2022	case ARM::VST1d16QPseudoWB_fixed : return true;
2023	case ARM::VST1d32QPseudoWB_fixed : return true;
2024	case ARM::VST1d64QPseudoWB_fixed : return true;
2025	case ARM::VST2d8wb_fixed : return true;
2026	case ARM::VST2d16wb_fixed : return true;
2027	case ARM::VST2d32wb_fixed : return true;
2028	case ARM::VST2q8PseudoWB_fixed : return true;
2029	case ARM::VST2q16PseudoWB_fixed : return true;
2030	case ARM::VST2q32PseudoWB_fixed : return true;
2031	}
2032	}
2033
2034	// Get the register stride update opcode of a VLD/VST instruction that
2035	// is otherwise equivalent to the given fixed stride updating instruction.
2036	static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2037	assert((isVLDfixed(Opc) \|\| isVSTfixed(Opc))
2038	&& "Incorrect fixed stride updating instruction.");
2039	switch (Opc) {
2040	default: break;
2041	case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2042	case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2043	case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2044	case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2045	case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2046	case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2047	case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2048	case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2049	case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2050	case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2051	case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2052	case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2053	case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2054	case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2055	case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2056	case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2057	case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2058	case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2059	case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2060	case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2061	case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2062	case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2063	case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2064	case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2065	case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2066	case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2067	case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2068
2069	case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2070	case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2071	case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2072	case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2073	case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2074	case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2075	case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2076	case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2077	case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2078	case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2079	case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2080	case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2081	case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2082	case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2083	case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2084	case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2085
2086	case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2087	case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2088	case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2089	case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2090	case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2091	case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2092
2093	case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2094	case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2095	case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2096	case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2097	case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2098	case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2099
2100	case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2101	case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2102	case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2103	}
2104	return Opc; // If not one we handle, return it unchanged.
2105	}
2106
2107	/// Returns true if the given increment is a Constant known to be equal to the
2108	/// access size performed by a NEON load/store. This means the "[rN]!" form can
2109	/// be used.
2110	static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2111	auto C = dyn_cast<ConstantSDNode>(Val&: Inc);
2112	return C && C->getZExtValue() == VecTy.getSizeInBits() / `8` * NumVecs;
2113	}
2114
2115	void ARMDAGToDAGISel::SelectVLD(SDNode N, bool* isUpdating, unsigned NumVecs,
2116	const uint16_t *DOpcodes,
2117	const uint16_t *QOpcodes0,
2118	const uint16_t *QOpcodes1) {
2119	assert(Subtarget->hasNEON());
2120	assert(NumVecs >= `1` && NumVecs <= `4` && "VLD NumVecs out-of-range");
2121	SDLoc dl(N);
2122
2123	SDValue MemAddr, Align;
2124	bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2125	// nodes are not intrinsics.
2126	unsigned AddrOpIdx = IsIntrinsic ? `2` : `1`;
2127	if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2128	return;
2129
2130	SDValue Chain = N->getOperand(Num: `0`);
2131	EVT VT = N->getValueType(ResNo: `0`);
2132	bool is64BitVector = VT.is64BitVector();
2133	Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2134
2135	unsigned OpcodeIndex;
2136	switch (VT.getSimpleVT().SimpleTy) {
2137	default: llvm_unreachable("unhandled vld type");
2138	// Double-register operations:
2139	case MVT::v8i8: OpcodeIndex = `0`; break;
2140	case MVT::v4f16:
2141	case MVT::v4bf16:
2142	case MVT::v4i16: OpcodeIndex = `1`; break;
2143	case MVT::v2f32:
2144	case MVT::v2i32: OpcodeIndex = `2`; break;
2145	case MVT::v1i64: OpcodeIndex = `3`; break;
2146	// Quad-register operations:
2147	case MVT::v16i8: OpcodeIndex = `0`; break;
2148	case MVT::v8f16:
2149	case MVT::v8bf16:
2150	case MVT::v8i16: OpcodeIndex = `1`; break;
2151	case MVT::v4f32:
2152	case MVT::v4i32: OpcodeIndex = `2`; break;
2153	case MVT::v2f64:
2154	case MVT::v2i64: OpcodeIndex = `3`; break;
2155	}
2156
2157	EVT ResTy;
2158	if (NumVecs == `1`)
2159	ResTy = VT;
2160	else {
2161	unsigned ResTyElts = (NumVecs == `3`) ? `4` : NumVecs;
2162	if (!is64BitVector)
2163	ResTyElts *= `2`;
2164	ResTy = EVT::getVectorVT(Context&: *CurDAG->getContext(), VT: MVT::i64, NumElements: ResTyElts);
2165	}
2166	std::vector<EVT> ResTys;
2167	ResTys.push_back(x: ResTy);
2168	if (isUpdating)
2169	ResTys.push_back(x: MVT::i32);
2170	ResTys.push_back(x: MVT::Other);
2171
2172	SDValue Pred = getAL(CurDAG, dl);
2173	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
2174	SDNode *VLd;
2175	SmallVector<SDValue, `7`> Ops;
2176
2177	// Double registers and VLD1/VLD2 quad registers are directly supported.
2178	if (is64BitVector \|\| NumVecs <= `2`) {
2179	unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2180	QOpcodes0[OpcodeIndex]);
2181	Ops.push_back(Elt: MemAddr);
2182	Ops.push_back(Elt: Align);
2183	if (isUpdating) {
2184	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2185	bool IsImmUpdate = isPerfectIncrement(Inc, VecTy: VT, NumVecs);
2186	if (!IsImmUpdate) {
2187	// We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2188	// check for the opcode rather than the number of vector elements.
2189	if (isVLDfixed(Opc))
2190	Opc = getVLDSTRegisterUpdateOpcode(Opc);
2191	Ops.push_back(Elt: Inc);
2192	// VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2193	// the operands if not such an opcode.
2194	} else if (!isVLDfixed(Opc))
2195	Ops.push_back(Elt: Reg0);
2196	}
2197	Ops.push_back(Elt: Pred);
2198	Ops.push_back(Elt: Reg0);
2199	Ops.push_back(Elt: Chain);
2200	VLd = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2201
2202	} else {
2203	// Otherwise, quad registers are loaded with two separate instructions,
2204	// where one loads the even registers and the other loads the odd registers.
2205	EVT AddrTy = MemAddr.getValueType();
2206
2207	// Load the even subregs. This is always an updating load, so that it
2208	// provides the address to the second load for the odd subregs.
2209	SDValue ImplDef =
2210	SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: ResTy), `0`);
2211	const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2212	SDNode *VLdA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl,
2213	VT1: ResTy, VT2: AddrTy, VT3: MVT::Other, Ops: OpsA);
2214	Chain = SDValue (VLdA, `2`);
2215
2216	// Load the odd subregs.
2217	Ops.push_back(Elt: SDValue (VLdA, `1`));
2218	Ops.push_back(Elt: Align);
2219	if (isUpdating) {
2220	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2221	assert(isa<ConstantSDNode>(Inc.getNode()) &&
2222	"only constant post-increment update allowed for VLD3/4");
2223	(void)Inc;
2224	Ops.push_back(Elt: Reg0);
2225	}
2226	Ops.push_back(Elt: SDValue (VLdA, `0`));
2227	Ops.push_back(Elt: Pred);
2228	Ops.push_back(Elt: Reg0);
2229	Ops.push_back(Elt: Chain);
2230	VLd = CurDAG->getMachineNode(Opcode: QOpcodes1[OpcodeIndex], dl, ResultTys: ResTys, Ops);
2231	}
2232
2233	// Transfer memoperands.
2234	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2235	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLd), NewMemRefs: {MemOp});
2236
2237	if (NumVecs == `1`) {
2238	ReplaceNode(F: N, T: VLd);
2239	return;
2240	}
2241
2242	// Extract out the subregisters.
2243	SDValue SuperReg = SDValue (VLd, `0`);
2244	static_assert(ARM::dsub_7 == ARM::dsub_0 + `7` &&
2245	ARM::qsub_3 == ARM::qsub_0 + `3`,
2246	"Unexpected subreg numbering");
2247	unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2248	for (unsigned Vec = `0`; Vec < NumVecs; ++Vec)
2249	ReplaceUses(F: SDValue (N, Vec),
2250	T: CurDAG->getTargetExtractSubreg(SRIdx: Sub0 + Vec, DL: dl, VT, Operand: SuperReg));
2251	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (VLd, `1`));
2252	if (isUpdating)
2253	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (VLd, `2`));
2254	CurDAG->RemoveDeadNode(N);
2255	}
2256
2257	void ARMDAGToDAGISel::SelectVST(SDNode N, bool* isUpdating, unsigned NumVecs,
2258	const uint16_t *DOpcodes,
2259	const uint16_t *QOpcodes0,
2260	const uint16_t *QOpcodes1) {
2261	assert(Subtarget->hasNEON());
2262	assert(NumVecs >= `1` && NumVecs <= `4` && "VST NumVecs out-of-range");
2263	SDLoc dl(N);
2264
2265	SDValue MemAddr, Align;
2266	bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2267	// nodes are not intrinsics.
2268	unsigned AddrOpIdx = IsIntrinsic ? `2` : `1`;
2269	unsigned Vec0Idx = `3`; // AddrOpIdx + (isUpdating ? 2 : 1)
2270	if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2271	return;
2272
2273	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2274
2275	SDValue Chain = N->getOperand(Num: `0`);
2276	EVT VT = N->getOperand(Num: Vec0Idx).getValueType();
2277	bool is64BitVector = VT.is64BitVector();
2278	Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2279
2280	unsigned OpcodeIndex;
2281	switch (VT.getSimpleVT().SimpleTy) {
2282	default: llvm_unreachable("unhandled vst type");
2283	// Double-register operations:
2284	case MVT::v8i8: OpcodeIndex = `0`; break;
2285	case MVT::v4f16:
2286	case MVT::v4bf16:
2287	case MVT::v4i16: OpcodeIndex = `1`; break;
2288	case MVT::v2f32:
2289	case MVT::v2i32: OpcodeIndex = `2`; break;
2290	case MVT::v1i64: OpcodeIndex = `3`; break;
2291	// Quad-register operations:
2292	case MVT::v16i8: OpcodeIndex = `0`; break;
2293	case MVT::v8f16:
2294	case MVT::v8bf16:
2295	case MVT::v8i16: OpcodeIndex = `1`; break;
2296	case MVT::v4f32:
2297	case MVT::v4i32: OpcodeIndex = `2`; break;
2298	case MVT::v2f64:
2299	case MVT::v2i64: OpcodeIndex = `3`; break;
2300	}
2301
2302	std::vector<EVT> ResTys;
2303	if (isUpdating)
2304	ResTys.push_back(x: MVT::i32);
2305	ResTys.push_back(x: MVT::Other);
2306
2307	SDValue Pred = getAL(CurDAG, dl);
2308	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
2309	SmallVector<SDValue, `7`> Ops;
2310
2311	// Double registers and VST1/VST2 quad registers are directly supported.
2312	if (is64BitVector \|\| NumVecs <= `2`) {
2313	SDValue SrcReg;
2314	if (NumVecs == `1`) {
2315	SrcReg = N->getOperand(Num: Vec0Idx);
2316	} else if (is64BitVector) {
2317	// Form a REG_SEQUENCE to force register allocation.
2318	SDValue V0 = N->getOperand(Num: Vec0Idx + `0`);
2319	SDValue V1 = N->getOperand(Num: Vec0Idx + `1`);
2320	if (NumVecs == `2`)
2321	SrcReg = SDValue (createDRegPairNode(VT: MVT::v2i64, V0, V1), `0`);
2322	else {
2323	SDValue V2 = N->getOperand(Num: Vec0Idx + `2`);
2324	// If it's a vst3, form a quad D-register and leave the last part as
2325	// an undef.
2326	SDValue V3 = (NumVecs == `3`)
2327	? SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF,dl,VT), `0`)
2328	: N->getOperand(Num: Vec0Idx + `3`);
2329	SrcReg = SDValue (createQuadDRegsNode(VT: MVT::v4i64, V0, V1, V2, V3), `0`);
2330	}
2331	} else {
2332	// Form a QQ register.
2333	SDValue Q0 = N->getOperand(Num: Vec0Idx);
2334	SDValue Q1 = N->getOperand(Num: Vec0Idx + `1`);
2335	SrcReg = SDValue (createQRegPairNode(VT: MVT::v4i64, V0: Q0, V1: Q1), `0`);
2336	}
2337
2338	unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2339	QOpcodes0[OpcodeIndex]);
2340	Ops.push_back(Elt: MemAddr);
2341	Ops.push_back(Elt: Align);
2342	if (isUpdating) {
2343	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2344	bool IsImmUpdate = isPerfectIncrement(Inc, VecTy: VT, NumVecs);
2345	if (!IsImmUpdate) {
2346	// We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2347	// check for the opcode rather than the number of vector elements.
2348	if (isVSTfixed(Opc))
2349	Opc = getVLDSTRegisterUpdateOpcode(Opc);
2350	Ops.push_back(Elt: Inc);
2351	}
2352	// VST1/VST2 fixed increment does not need Reg0 so only include it in
2353	// the operands if not such an opcode.
2354	else if (!isVSTfixed(Opc))
2355	Ops.push_back(Elt: Reg0);
2356	}
2357	Ops.push_back(Elt: SrcReg);
2358	Ops.push_back(Elt: Pred);
2359	Ops.push_back(Elt: Reg0);
2360	Ops.push_back(Elt: Chain);
2361	SDNode *VSt = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2362
2363	// Transfer memoperands.
2364	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VSt), NewMemRefs: {MemOp});
2365
2366	ReplaceNode(F: N, T: VSt);
2367	return;
2368	}
2369
2370	// Otherwise, quad registers are stored with two separate instructions,
2371	// where one stores the even registers and the other stores the odd registers.
2372
2373	// Form the QQQQ REG_SEQUENCE.
2374	SDValue V0 = N->getOperand(Num: Vec0Idx + `0`);
2375	SDValue V1 = N->getOperand(Num: Vec0Idx + `1`);
2376	SDValue V2 = N->getOperand(Num: Vec0Idx + `2`);
2377	SDValue V3 = (NumVecs == `3`)
2378	? SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT), `0`)
2379	: N->getOperand(Num: Vec0Idx + `3`);
2380	SDValue RegSeq = SDValue (createQuadQRegsNode(VT: MVT::v8i64, V0, V1, V2, V3), `0`);
2381
2382	// Store the even D registers. This is always an updating store, so that it
2383	// provides the address to the second store for the odd subregs.
2384	const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2385	SDNode *VStA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl,
2386	VT1: MemAddr.getValueType(),
2387	VT2: MVT::Other, Ops: OpsA);
2388	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VStA), NewMemRefs: {MemOp});
2389	Chain = SDValue (VStA, `1`);
2390
2391	// Store the odd D registers.
2392	Ops.push_back(Elt: SDValue (VStA, `0`));
2393	Ops.push_back(Elt: Align);
2394	if (isUpdating) {
2395	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2396	assert(isa<ConstantSDNode>(Inc.getNode()) &&
2397	"only constant post-increment update allowed for VST3/4");
2398	(void)Inc;
2399	Ops.push_back(Elt: Reg0);
2400	}
2401	Ops.push_back(Elt: RegSeq);
2402	Ops.push_back(Elt: Pred);
2403	Ops.push_back(Elt: Reg0);
2404	Ops.push_back(Elt: Chain);
2405	SDNode *VStB = CurDAG->getMachineNode(Opcode: QOpcodes1[OpcodeIndex], dl, ResultTys: ResTys,
2406	Ops);
2407	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VStB), NewMemRefs: {MemOp});
2408	ReplaceNode(F: N, T: VStB);
2409	}
2410
2411	void ARMDAGToDAGISel::SelectVLDSTLane(SDNode N, bool* IsLoad, bool isUpdating,
2412	unsigned NumVecs,
2413	const uint16_t *DOpcodes,
2414	const uint16_t *QOpcodes) {
2415	assert(Subtarget->hasNEON());
2416	assert(NumVecs >=`2` && NumVecs <= `4` && "VLDSTLane NumVecs out-of-range");
2417	SDLoc dl(N);
2418
2419	SDValue MemAddr, Align;
2420	bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2421	// nodes are not intrinsics.
2422	unsigned AddrOpIdx = IsIntrinsic ? `2` : `1`;
2423	unsigned Vec0Idx = `3`; // AddrOpIdx + (isUpdating ? 2 : 1)
2424	if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2425	return;
2426
2427	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2428
2429	SDValue Chain = N->getOperand(Num: `0`);
2430	unsigned Lane = N->getConstantOperandVal(Num: Vec0Idx + NumVecs);
2431	EVT VT = N->getOperand(Num: Vec0Idx).getValueType();
2432	bool is64BitVector = VT.is64BitVector();
2433
2434	unsigned Alignment = `0`;
2435	if (NumVecs != `3`) {
2436	Alignment = Align ->getAsZExtVal();
2437	unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / `8`;
2438	if (Alignment > NumBytes)
2439	Alignment = NumBytes;
2440	if (Alignment < `8` && Alignment < NumBytes)
2441	Alignment = `0`;
2442	// Alignment must be a power of two; make sure of that.
2443	Alignment = (Alignment & -Alignment);
2444	if (Alignment == `1`)
2445	Alignment = `0`;
2446	}
2447	Align = CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
2448
2449	unsigned OpcodeIndex;
2450	switch (VT.getSimpleVT().SimpleTy) {
2451	default: llvm_unreachable("unhandled vld/vst lane type");
2452	// Double-register operations:
2453	case MVT::v8i8: OpcodeIndex = `0`; break;
2454	case MVT::v4f16:
2455	case MVT::v4bf16:
2456	case MVT::v4i16: OpcodeIndex = `1`; break;
2457	case MVT::v2f32:
2458	case MVT::v2i32: OpcodeIndex = `2`; break;
2459	// Quad-register operations:
2460	case MVT::v8f16:
2461	case MVT::v8bf16:
2462	case MVT::v8i16: OpcodeIndex = `0`; break;
2463	case MVT::v4f32:
2464	case MVT::v4i32: OpcodeIndex = `1`; break;
2465	}
2466
2467	std::vector<EVT> ResTys;
2468	if (IsLoad) {
2469	unsigned ResTyElts = (NumVecs == `3`) ? `4` : NumVecs;
2470	if (!is64BitVector)
2471	ResTyElts *= `2`;
2472	ResTys.push_back(x: EVT::getVectorVT(Context&: *CurDAG->getContext(),
2473	VT: MVT::i64, NumElements: ResTyElts));
2474	}
2475	if (isUpdating)
2476	ResTys.push_back(x: MVT::i32);
2477	ResTys.push_back(x: MVT::Other);
2478
2479	SDValue Pred = getAL(CurDAG, dl);
2480	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
2481
2482	SmallVector<SDValue, `8`> Ops;
2483	Ops.push_back(Elt: MemAddr);
2484	Ops.push_back(Elt: Align);
2485	if (isUpdating) {
2486	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2487	bool IsImmUpdate =
2488	isPerfectIncrement(Inc, VecTy: VT.getVectorElementType(), NumVecs);
2489	Ops.push_back(Elt: IsImmUpdate ? Reg0 : Inc);
2490	}
2491
2492	SDValue SuperReg;
2493	SDValue V0 = N->getOperand(Num: Vec0Idx + `0`);
2494	SDValue V1 = N->getOperand(Num: Vec0Idx + `1`);
2495	if (NumVecs == `2`) {
2496	if (is64BitVector)
2497	SuperReg = SDValue (createDRegPairNode(VT: MVT::v2i64, V0, V1), `0`);
2498	else
2499	SuperReg = SDValue (createQRegPairNode(VT: MVT::v4i64, V0, V1), `0`);
2500	} else {
2501	SDValue V2 = N->getOperand(Num: Vec0Idx + `2`);
2502	SDValue V3 = (NumVecs == `3`)
2503	? SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT), `0`)
2504	: N->getOperand(Num: Vec0Idx + `3`);
2505	if (is64BitVector)
2506	SuperReg = SDValue (createQuadDRegsNode(VT: MVT::v4i64, V0, V1, V2, V3), `0`);
2507	else
2508	SuperReg = SDValue (createQuadQRegsNode(VT: MVT::v8i64, V0, V1, V2, V3), `0`);
2509	}
2510	Ops.push_back(Elt: SuperReg);
2511	Ops.push_back(Elt: getI32Imm(Imm: Lane, dl));
2512	Ops.push_back(Elt: Pred);
2513	Ops.push_back(Elt: Reg0);
2514	Ops.push_back(Elt: Chain);
2515
2516	unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2517	QOpcodes[OpcodeIndex]);
2518	SDNode *VLdLn = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2519	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLdLn), NewMemRefs: {MemOp});
2520	if (!IsLoad) {
2521	ReplaceNode(F: N, T: VLdLn);
2522	return;
2523	}
2524
2525	// Extract the subregisters.
2526	SuperReg = SDValue (VLdLn, `0`);
2527	static_assert(ARM::dsub_7 == ARM::dsub_0 + `7` &&
2528	ARM::qsub_3 == ARM::qsub_0 + `3`,
2529	"Unexpected subreg numbering");
2530	unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2531	for (unsigned Vec = `0`; Vec < NumVecs; ++Vec)
2532	ReplaceUses(F: SDValue (N, Vec),
2533	T: CurDAG->getTargetExtractSubreg(SRIdx: Sub0 + Vec, DL: dl, VT, Operand: SuperReg));
2534	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (VLdLn, `1`));
2535	if (isUpdating)
2536	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (VLdLn, `2`));
2537	CurDAG->RemoveDeadNode(N);
2538	}
2539
2540	template <typename SDValueVector>
2541	void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2542	SDValue PredicateMask) {
2543	Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::Then, DL: Loc, VT: MVT::i32));
2544	Ops.push_back(PredicateMask);
2545	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32)); // tp_reg
2546	}
2547
2548	template <typename SDValueVector>
2549	void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2550	SDValue PredicateMask,
2551	SDValue Inactive) {
2552	Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::Then, DL: Loc, VT: MVT::i32));
2553	Ops.push_back(PredicateMask);
2554	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32)); // tp_reg
2555	Ops.push_back(Inactive);
2556	}
2557
2558	template <typename SDValueVector>
2559	void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2560	Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::None, DL: Loc, VT: MVT::i32));
2561	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
2562	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32)); // tp_reg
2563	}
2564
2565	template <typename SDValueVector>
2566	void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2567	EVT InactiveTy) {
2568	Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::None, DL: Loc, VT: MVT::i32));
2569	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
2570	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32)); // tp_reg
2571	Ops.push_back(SDValue (
2572	CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: Loc, VT: InactiveTy), `0`));
2573	}
2574
2575	void ARMDAGToDAGISel::SelectMVE_WB(SDNode N, const* uint16_t *Opcodes,
2576	bool Predicated) {
2577	SDLoc Loc(N);
2578	SmallVector<SDValue, `8`> Ops;
2579
2580	uint16_t Opcode;
2581	switch (N->getValueType(ResNo: `1`).getVectorElementType().getSizeInBits()) {
2582	case `32`:
2583	Opcode = Opcodes[`0`];
2584	break;
2585	case `64`:
2586	Opcode = Opcodes[`1`];
2587	break;
2588	default:
2589	llvm_unreachable("bad vector element size in SelectMVE_WB");
2590	}
2591
2592	Ops.push_back(Elt: N->getOperand(Num: `2`)); // vector of base addresses
2593
2594	int32_t ImmValue = N->getConstantOperandVal(Num: `3`);
2595	Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate offset
2596
2597	if (Predicated)
2598	AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: `4`));
2599	else
2600	AddEmptyMVEPredicateToOps(Ops, Loc);
2601
2602	Ops.push_back(Elt: N->getOperand(Num: `0`)); // chain
2603
2604	SmallVector<EVT, `8`> VTs;
2605	VTs.push_back(Elt: N->getValueType(ResNo: `1`));
2606	VTs.push_back(Elt: N->getValueType(ResNo: `0`));
2607	VTs.push_back(Elt: N->getValueType(ResNo: `2`));
2608
2609	SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc (N), ResultTys: VTs, Ops);
2610	ReplaceUses(F: SDValue (N, `0`), T: SDValue (New, `1`));
2611	ReplaceUses(F: SDValue (N, `1`), T: SDValue (New, `0`));
2612	ReplaceUses(F: SDValue (N, `2`), T: SDValue (New, `2`));
2613	transferMemOperands(N, Result: New);
2614	CurDAG->RemoveDeadNode(N);
2615	}
2616
2617	void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2618	bool Immediate,
2619	bool HasSaturationOperand) {
2620	SDLoc Loc(N);
2621	SmallVector<SDValue, `8`> Ops;
2622
2623	// Two 32-bit halves of the value to be shifted
2624	Ops.push_back(Elt: N->getOperand(Num: `1`));
2625	Ops.push_back(Elt: N->getOperand(Num: `2`));
2626
2627	// The shift count
2628	if (Immediate) {
2629	int32_t ImmValue = N->getConstantOperandVal(Num: `3`);
2630	Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate shift count
2631	} else {
2632	Ops.push_back(Elt: N->getOperand(Num: `3`));
2633	}
2634
2635	// The immediate saturation operand, if any
2636	if (HasSaturationOperand) {
2637	int32_t SatOp = N->getConstantOperandVal(Num: `4`);
2638	int SatBit = (SatOp == `64` ? `0` : `1`);
2639	Ops.push_back(Elt: getI32Imm(Imm: SatBit, dl: Loc));
2640	}
2641
2642	// MVE scalar shifts are IT-predicable, so include the standard
2643	// predicate arguments.
2644	Ops.push_back(Elt: getAL(CurDAG, dl: Loc));
2645	Ops.push_back(Elt: CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
2646
2647	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2648	}
2649
2650	void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2651	uint16_t OpcodeWithNoCarry,
2652	bool Add, bool Predicated) {
2653	SDLoc Loc(N);
2654	SmallVector<SDValue, `8`> Ops;
2655	uint16_t Opcode;
2656
2657	unsigned FirstInputOp = Predicated ? `2` : `1`;
2658
2659	// Two input vectors and the input carry flag
2660	Ops.push_back(Elt: N->getOperand(Num: FirstInputOp));
2661	Ops.push_back(Elt: N->getOperand(Num: FirstInputOp + `1`));
2662	SDValue CarryIn = N->getOperand(Num: FirstInputOp + `2`);
2663	ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(Val&: CarryIn);
2664	uint32_t CarryMask = `1` << `29`;
2665	uint32_t CarryExpected = Add ? `0` : CarryMask;
2666	if (CarryInConstant &&
2667	(CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2668	Opcode = OpcodeWithNoCarry;
2669	} else {
2670	Ops.push_back(Elt: CarryIn);
2671	Opcode = OpcodeWithCarry;
2672	}
2673
2674	if (Predicated)
2675	AddMVEPredicateToOps(Ops, Loc,
2676	PredicateMask: N->getOperand(Num: FirstInputOp + `3`), // predicate
2677	Inactive: N->getOperand(Num: FirstInputOp - `1`)); // inactive
2678	else
2679	AddEmptyMVEPredicateToOps(Ops, Loc, InactiveTy: N->getValueType(ResNo: `0`));
2680
2681	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2682	}
2683
2684	void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode N, bool* Predicated) {
2685	SDLoc Loc(N);
2686	SmallVector<SDValue, `8`> Ops;
2687
2688	// One vector input, followed by a 32-bit word of bits to shift in
2689	// and then an immediate shift count
2690	Ops.push_back(Elt: N->getOperand(Num: `1`));
2691	Ops.push_back(Elt: N->getOperand(Num: `2`));
2692	int32_t ImmValue = N->getConstantOperandVal(Num: `3`);
2693	Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate shift count
2694
2695	if (Predicated)
2696	AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: `4`));
2697	else
2698	AddEmptyMVEPredicateToOps(Ops, Loc);
2699
2700	CurDAG->SelectNodeTo(N, MachineOpc: ARM::MVE_VSHLC, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2701	}
2702
2703	static bool SDValueToConstBool(SDValue SDVal) {
2704	assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2705	ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(Val&: SDVal);
2706	uint64_t Value = SDValConstant->getZExtValue();
2707	assert((Value == `0` \|\| Value == `1`) && "expected value 0 or 1");
2708	return Value;
2709	}
2710
2711	void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode N, bool* Predicated,
2712	const uint16_t *OpcodesS,
2713	const uint16_t *OpcodesU,
2714	size_t Stride, size_t TySize) {
2715	assert(TySize < Stride && "Invalid TySize");
2716	bool IsUnsigned = SDValueToConstBool(SDVal: N->getOperand(Num: `1`));
2717	bool IsSub = SDValueToConstBool(SDVal: N->getOperand(Num: `2`));
2718	bool IsExchange = SDValueToConstBool(SDVal: N->getOperand(Num: `3`));
2719	if (IsUnsigned) {
2720	assert(!IsSub &&
2721	"Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2722	assert(!IsExchange &&
2723	"Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2724	}
2725
2726	auto OpIsZero = [N](size_t OpNo) {
2727	return isNullConstant(V: N->getOperand(Num: OpNo));
2728	};
2729
2730	// If the input accumulator value is not zero, select an instruction with
2731	// accumulator, otherwise select an instruction without accumulator
2732	bool IsAccum = !(OpIsZero (`4`) && OpIsZero (`5`));
2733
2734	const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2735	if (IsSub)
2736	Opcodes += `4` * Stride;
2737	if (IsExchange)
2738	Opcodes += `2` * Stride;
2739	if (IsAccum)
2740	Opcodes += Stride;
2741	uint16_t Opcode = Opcodes[TySize];
2742
2743	SDLoc Loc(N);
2744	SmallVector<SDValue, `8`> Ops;
2745	// Push the accumulator operands, if they are used
2746	if (IsAccum) {
2747	Ops.push_back(Elt: N->getOperand(Num: `4`));
2748	Ops.push_back(Elt: N->getOperand(Num: `5`));
2749	}
2750	// Push the two vector operands
2751	Ops.push_back(Elt: N->getOperand(Num: `6`));
2752	Ops.push_back(Elt: N->getOperand(Num: `7`));
2753
2754	if (Predicated)
2755	AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: `8`));
2756	else
2757	AddEmptyMVEPredicateToOps(Ops, Loc);
2758
2759	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2760	}
2761
2762	void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode N, bool* Predicated,
2763	const uint16_t *OpcodesS,
2764	const uint16_t *OpcodesU) {
2765	EVT VecTy = N->getOperand(Num: `6`).getValueType();
2766	size_t SizeIndex;
2767	switch (VecTy.getVectorElementType().getSizeInBits()) {
2768	case `16`:
2769	SizeIndex = `0`;
2770	break;
2771	case `32`:
2772	SizeIndex = `1`;
2773	break;
2774	default:
2775	llvm_unreachable("bad vector element size");
2776	}
2777
2778	SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, Stride: `2`, TySize: SizeIndex);
2779	}
2780
2781	void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode N, bool* Predicated,
2782	const uint16_t *OpcodesS,
2783	const uint16_t *OpcodesU) {
2784	assert(
2785	N->getOperand(`6`).getValueType().getVectorElementType().getSizeInBits() ==
2786	`32` &&
2787	"bad vector element size");
2788	SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, Stride: `1`, TySize: `0`);
2789	}
2790
2791	void ARMDAGToDAGISel::SelectMVE_VLD(SDNode N, unsigned* NumVecs,
2792	const uint16_t *const *Opcodes,
2793	bool HasWriteback) {
2794	EVT VT = N->getValueType(ResNo: `0`);
2795	SDLoc Loc(N);
2796
2797	const uint16_t *OurOpcodes;
2798	switch (VT.getVectorElementType().getSizeInBits()) {
2799	case `8`:
2800	OurOpcodes = Opcodes[`0`];
2801	break;
2802	case `16`:
2803	OurOpcodes = Opcodes[`1`];
2804	break;
2805	case `32`:
2806	OurOpcodes = Opcodes[`2`];
2807	break;
2808	default:
2809	llvm_unreachable("bad vector element size in SelectMVE_VLD");
2810	}
2811
2812	EVT DataTy = EVT::getVectorVT(Context&: CurDAG->getContext(), VT: MVT::i64, NumElements: NumVecs `2`);
2813	SmallVector<EVT, `4`> ResultTys = {DataTy, MVT::Other};
2814	unsigned PtrOperand = HasWriteback ? `1` : `2`;
2815
2816	auto Data = SDValue (
2817	CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: Loc, VT: DataTy), `0`);
2818	SDValue Chain = N->getOperand(Num: `0`);
2819	// Add a MVE_VLDn instruction for each Vec, except the last
2820	for (unsigned Stage = `0`; Stage < NumVecs - `1`; ++Stage) {
2821	SDValue Ops[] = {Data, N->getOperand(Num: PtrOperand), Chain};
2822	auto LoadInst =
2823	CurDAG->getMachineNode(Opcode: OurOpcodes[Stage], dl: Loc, ResultTys, Ops);
2824	Data = SDValue (LoadInst, `0`);
2825	Chain = SDValue (LoadInst, `1`);
2826	transferMemOperands(N, Result: LoadInst);
2827	}
2828	// The last may need a writeback on it
2829	if (HasWriteback)
2830	ResultTys = {DataTy, MVT::i32, MVT::Other};
2831	SDValue Ops[] = {Data, N->getOperand(Num: PtrOperand), Chain};
2832	auto LoadInst =
2833	CurDAG->getMachineNode(Opcode: OurOpcodes[NumVecs - `1`], dl: Loc, ResultTys, Ops);
2834	transferMemOperands(N, Result: LoadInst);
2835
2836	unsigned i;
2837	for (i = `0`; i < NumVecs; i++)
2838	ReplaceUses(F: SDValue (N, i),
2839	T: CurDAG->getTargetExtractSubreg(SRIdx: ARM::qsub_0 + i, DL: Loc, VT,
2840	Operand: SDValue (LoadInst, `0`)));
2841	if (HasWriteback)
2842	ReplaceUses(F: SDValue (N, i++), T: SDValue (LoadInst, `1`));
2843	ReplaceUses(F: SDValue (N, i), T: SDValue (LoadInst, HasWriteback ? `2` : `1`));
2844	CurDAG->RemoveDeadNode(N);
2845	}
2846
2847	void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode N, const* uint16_t *Opcodes,
2848	bool Wrapping, bool Predicated) {
2849	EVT VT = N->getValueType(ResNo: `0`);
2850	SDLoc Loc(N);
2851
2852	uint16_t Opcode;
2853	switch (VT.getScalarSizeInBits()) {
2854	case `8`:
2855	Opcode = Opcodes[`0`];
2856	break;
2857	case `16`:
2858	Opcode = Opcodes[`1`];
2859	break;
2860	case `32`:
2861	Opcode = Opcodes[`2`];
2862	break;
2863	default:
2864	llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2865	}
2866
2867	SmallVector<SDValue, `8`> Ops;
2868	unsigned OpIdx = `1`;
2869
2870	SDValue Inactive;
2871	if (Predicated)
2872	Inactive = N->getOperand(Num: OpIdx++);
2873
2874	Ops.push_back(Elt: N->getOperand(Num: OpIdx++)); // base
2875	if (Wrapping)
2876	Ops.push_back(Elt: N->getOperand(Num: OpIdx++)); // limit
2877
2878	SDValue ImmOp = N->getOperand(Num: OpIdx++); // step
2879	int ImmValue = ImmOp ->getAsZExtVal();
2880	Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc));
2881
2882	if (Predicated)
2883	AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: OpIdx), Inactive);
2884	else
2885	AddEmptyMVEPredicateToOps(Ops, Loc, InactiveTy: N->getValueType(ResNo: `0`));
2886
2887	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2888	}
2889
2890	void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2891	size_t NumExtraOps, bool HasAccum) {
2892	bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2893	SDLoc Loc(N);
2894	SmallVector<SDValue, `8`> Ops;
2895
2896	unsigned OpIdx = `1`;
2897
2898	// Convert and append the immediate operand designating the coprocessor.
2899	SDValue ImmCorpoc = N->getOperand(Num: OpIdx++);
2900	uint32_t ImmCoprocVal = ImmCorpoc ->getAsZExtVal();
2901	Ops.push_back(Elt: getI32Imm(Imm: ImmCoprocVal, dl: Loc));
2902
2903	// For accumulating variants copy the low and high order parts of the
2904	// accumulator into a register pair and add it to the operand vector.
2905	if (HasAccum) {
2906	SDValue AccLo = N->getOperand(Num: OpIdx++);
2907	SDValue AccHi = N->getOperand(Num: OpIdx++);
2908	if (IsBigEndian)
2909	std::swap(a&: AccLo, b&: AccHi);
2910	Ops.push_back(Elt: SDValue (createGPRPairNode(VT: MVT::Untyped, V0: AccLo, V1: AccHi), `0`));
2911	}
2912
2913	// Copy extra operands as-is.
2914	for (size_t I = `0`; I < NumExtraOps; I++)
2915	Ops.push_back(Elt: N->getOperand(Num: OpIdx++));
2916
2917	// Convert and append the immediate operand
2918	SDValue Imm = N->getOperand(Num: OpIdx);
2919	uint32_t ImmVal = Imm ->getAsZExtVal();
2920	Ops.push_back(Elt: getI32Imm(Imm: ImmVal, dl: Loc));
2921
2922	// Accumulating variants are IT-predicable, add predicate operands.
2923	if (HasAccum) {
2924	SDValue Pred = getAL(CurDAG, dl: Loc);
2925	SDValue PredReg = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
2926	Ops.push_back(Elt: Pred);
2927	Ops.push_back(Elt: PredReg);
2928	}
2929
2930	// Create the CDE intruction
2931	SDNode *InstrNode = CurDAG->getMachineNode(Opcode, dl: Loc, VT: MVT::Untyped, Ops);
2932	SDValue ResultPair = SDValue (InstrNode, `0`);
2933
2934	// The original intrinsic had two outputs, and the output of the dual-register
2935	// CDE instruction is a register pair. We need to extract the two subregisters
2936	// and replace all uses of the original outputs with the extracted
2937	// subregisters.
2938	uint16_t SubRegs[`2`] = {ARM::gsub_0, ARM::gsub_1};
2939	if (IsBigEndian)
2940	std::swap(a&: SubRegs[`0`], b&: SubRegs[`1`]);
2941
2942	for (size_t ResIdx = `0`; ResIdx < `2`; ResIdx++) {
2943	if (SDValue (N, ResIdx).use_empty())
2944	continue;
2945	SDValue SubReg = CurDAG->getTargetExtractSubreg(SRIdx: SubRegs[ResIdx], DL: Loc,
2946	VT: MVT::i32, Operand: ResultPair);
2947	ReplaceUses(F: SDValue (N, ResIdx), T: SubReg);
2948	}
2949
2950	CurDAG->RemoveDeadNode(N);
2951	}
2952
2953	void ARMDAGToDAGISel::SelectVLDDup(SDNode N, bool* IsIntrinsic,
2954	bool isUpdating, unsigned NumVecs,
2955	const uint16_t *DOpcodes,
2956	const uint16_t *QOpcodes0,
2957	const uint16_t *QOpcodes1) {
2958	assert(Subtarget->hasNEON());
2959	assert(NumVecs >= `1` && NumVecs <= `4` && "VLDDup NumVecs out-of-range");
2960	SDLoc dl(N);
2961
2962	SDValue MemAddr, Align;
2963	unsigned AddrOpIdx = IsIntrinsic ? `2` : `1`;
2964	if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2965	return;
2966
2967	SDValue Chain = N->getOperand(Num: `0`);
2968	EVT VT = N->getValueType(ResNo: `0`);
2969	bool is64BitVector = VT.is64BitVector();
2970
2971	unsigned Alignment = `0`;
2972	if (NumVecs != `3`) {
2973	Alignment = Align ->getAsZExtVal();
2974	unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / `8`;
2975	if (Alignment > NumBytes)
2976	Alignment = NumBytes;
2977	if (Alignment < `8` && Alignment < NumBytes)
2978	Alignment = `0`;
2979	// Alignment must be a power of two; make sure of that.
2980	Alignment = (Alignment & -Alignment);
2981	if (Alignment == `1`)
2982	Alignment = `0`;
2983	}
2984	Align = CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
2985
2986	unsigned OpcodeIndex;
2987	switch (VT.getSimpleVT().SimpleTy) {
2988	default: llvm_unreachable("unhandled vld-dup type");
2989	case MVT::v8i8:
2990	case MVT::v16i8: OpcodeIndex = `0`; break;
2991	case MVT::v4i16:
2992	case MVT::v8i16:
2993	case MVT::v4f16:
2994	case MVT::v8f16:
2995	case MVT::v4bf16:
2996	case MVT::v8bf16:
2997	OpcodeIndex = `1`; break;
2998	case MVT::v2f32:
2999	case MVT::v2i32:
3000	case MVT::v4f32:
3001	case MVT::v4i32: OpcodeIndex = `2`; break;
3002	case MVT::v1f64:
3003	case MVT::v1i64: OpcodeIndex = `3`; break;
3004	}
3005
3006	unsigned ResTyElts = (NumVecs == `3`) ? `4` : NumVecs;
3007	if (!is64BitVector)
3008	ResTyElts *= `2`;
3009	EVT ResTy = EVT::getVectorVT(Context&: *CurDAG->getContext(), VT: MVT::i64, NumElements: ResTyElts);
3010
3011	std::vector<EVT> ResTys;
3012	ResTys.push_back(x: ResTy);
3013	if (isUpdating)
3014	ResTys.push_back(x: MVT::i32);
3015	ResTys.push_back(x: MVT::Other);
3016
3017	SDValue Pred = getAL(CurDAG, dl);
3018	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3019
3020	SmallVector<SDValue, `6`> Ops;
3021	Ops.push_back(Elt: MemAddr);
3022	Ops.push_back(Elt: Align);
3023	unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3024	: (NumVecs == `1`) ? QOpcodes0[OpcodeIndex]
3025	: QOpcodes1[OpcodeIndex];
3026	if (isUpdating) {
3027	SDValue Inc = N->getOperand(Num: `2`);
3028	bool IsImmUpdate =
3029	isPerfectIncrement(Inc, VecTy: VT.getVectorElementType(), NumVecs);
3030	if (IsImmUpdate) {
3031	if (!isVLDfixed(Opc))
3032	Ops.push_back(Elt: Reg0);
3033	} else {
3034	if (isVLDfixed(Opc))
3035	Opc = getVLDSTRegisterUpdateOpcode(Opc);
3036	Ops.push_back(Elt: Inc);
3037	}
3038	}
3039	if (is64BitVector \|\| NumVecs == `1`) {
3040	// Double registers and VLD1 quad registers are directly supported.
3041	} else {
3042	SDValue ImplDef = SDValue (
3043	CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: ResTy), `0`);
3044	const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3045	SDNode *VLdA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl, VT1: ResTy,
3046	VT2: MVT::Other, Ops: OpsA);
3047	Ops.push_back(Elt: SDValue (VLdA, `0`));
3048	Chain = SDValue (VLdA, `1`);
3049	}
3050
3051	Ops.push_back(Elt: Pred);
3052	Ops.push_back(Elt: Reg0);
3053	Ops.push_back(Elt: Chain);
3054
3055	SDNode *VLdDup = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
3056
3057	// Transfer memoperands.
3058	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
3059	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLdDup), NewMemRefs: {MemOp});
3060
3061	// Extract the subregisters.
3062	if (NumVecs == `1`) {
3063	ReplaceUses(F: SDValue (N, `0`), T: SDValue (VLdDup, `0`));
3064	} else {
3065	SDValue SuperReg = SDValue (VLdDup, `0`);
3066	static_assert(ARM::dsub_7 == ARM::dsub_0 + `7`, "Unexpected subreg numbering");
3067	unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3068	for (unsigned Vec = `0`; Vec != NumVecs; ++Vec) {
3069	ReplaceUses(F: SDValue (N, Vec),
3070	T: CurDAG->getTargetExtractSubreg(SRIdx: SubIdx+Vec, DL: dl, VT, Operand: SuperReg));
3071	}
3072	}
3073	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (VLdDup, `1`));
3074	if (isUpdating)
3075	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (VLdDup, `2`));
3076	CurDAG->RemoveDeadNode(N);
3077	}
3078
3079	bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3080	if (!Subtarget->hasMVEIntegerOps())
3081	return false;
3082
3083	SDLoc dl(N);
3084
3085	// We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3086	// extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3087	// inserts of the correct type:
3088	SDValue Ins1 = SDValue (N, `0`);
3089	SDValue Ins2 = N->getOperand(Num: `0`);
3090	EVT VT = Ins1.getValueType();
3091	if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT \|\| !Ins2.hasOneUse() \|\|
3092	!isa<ConstantSDNode>(Val: Ins1.getOperand(i: `2`)) \|\|
3093	!isa<ConstantSDNode>(Val: Ins2.getOperand(i: `2`)) \|\|
3094	(VT != MVT::v8f16 && VT != MVT::v8i16) \|\| (Ins2.getValueType() != VT))
3095	return false;
3096
3097	unsigned Lane1 = Ins1.getConstantOperandVal(i: `2`);
3098	unsigned Lane2 = Ins2.getConstantOperandVal(i: `2`);
3099	if (Lane2 % `2` != `0` \|\| Lane1 != Lane2 + `1`)
3100	return false;
3101
3102	// If the inserted values will be able to use T/B already, leave it to the
3103	// existing tablegen patterns. For example VCVTT/VCVTB.
3104	SDValue Val1 = Ins1.getOperand(i: `1`);
3105	SDValue Val2 = Ins2.getOperand(i: `1`);
3106	if (Val1.getOpcode() == ISD::FP_ROUND \|\| Val2.getOpcode() == ISD::FP_ROUND)
3107	return false;
3108
3109	// Check if the inserted values are both extracts.
3110	if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT \|\|
3111	Val1.getOpcode() == ARMISD::VGETLANEu) &&
3112	(Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT \|\|
3113	Val2.getOpcode() == ARMISD::VGETLANEu) &&
3114	isa<ConstantSDNode>(Val: Val1.getOperand(i: `1`)) &&
3115	isa<ConstantSDNode>(Val: Val2.getOperand(i: `1`)) &&
3116	(Val1.getOperand(i: `0`).getValueType() == MVT::v8f16 \|\|
3117	Val1.getOperand(i: `0`).getValueType() == MVT::v8i16) &&
3118	(Val2.getOperand(i: `0`).getValueType() == MVT::v8f16 \|\|
3119	Val2.getOperand(i: `0`).getValueType() == MVT::v8i16)) {
3120	unsigned ExtractLane1 = Val1.getConstantOperandVal(i: `1`);
3121	unsigned ExtractLane2 = Val2.getConstantOperandVal(i: `1`);
3122
3123	// If the two extracted lanes are from the same place and adjacent, this
3124	// simplifies into a f32 lane move.
3125	if (Val1.getOperand(i: `0`) == Val2.getOperand(i: `0`) && ExtractLane2 % `2` == `0` &&
3126	ExtractLane1 == ExtractLane2 + `1`) {
3127	SDValue NewExt = CurDAG->getTargetExtractSubreg(
3128	SRIdx: ARM::ssub_0 + ExtractLane2 / `2`, DL: dl, VT: MVT::f32, Operand: Val1.getOperand(i: `0`));
3129	SDValue NewIns = CurDAG->getTargetInsertSubreg(
3130	SRIdx: ARM::ssub_0 + Lane2 / `2`, DL: dl, VT, Operand: Ins2.getOperand(i: `0`),
3131	Subreg: NewExt);
3132	ReplaceUses(F: Ins1, T: NewIns);
3133	return true;
3134	}
3135
3136	// Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3137	// extracting odd lanes.
3138	if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3139	SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3140	SRIdx: ARM::ssub_0 + ExtractLane1 / `2`, DL: dl, VT: MVT::f32, Operand: Val1.getOperand(i: `0`));
3141	SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3142	SRIdx: ARM::ssub_0 + ExtractLane2 / `2`, DL: dl, VT: MVT::f32, Operand: Val2.getOperand(i: `0`));
3143	if (ExtractLane1 % `2` != `0`)
3144	Inp1 = SDValue (CurDAG->getMachineNode(Opcode: ARM::VMOVH, dl, VT: MVT::f32, Op1: Inp1), `0`);
3145	if (ExtractLane2 % `2` != `0`)
3146	Inp2 = SDValue (CurDAG->getMachineNode(Opcode: ARM::VMOVH, dl, VT: MVT::f32, Op1: Inp2), `0`);
3147	SDNode *VINS = CurDAG->getMachineNode(Opcode: ARM::VINSH, dl, VT: MVT::f32, Op1: Inp2, Op2: Inp1);
3148	SDValue NewIns =
3149	CurDAG->getTargetInsertSubreg(SRIdx: ARM::ssub_0 + Lane2 / `2`, DL: dl, VT: MVT::v4f32,
3150	Operand: Ins2.getOperand(i: `0`), Subreg: SDValue (VINS, `0`));
3151	ReplaceUses(F: Ins1, T: NewIns);
3152	return true;
3153	}
3154	}
3155
3156	// The inserted values are not extracted - if they are f16 then insert them
3157	// directly using a VINS.
3158	if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3159	SDNode *VINS = CurDAG->getMachineNode(Opcode: ARM::VINSH, dl, VT: MVT::f32, Op1: Val2, Op2: Val1);
3160	SDValue NewIns =
3161	CurDAG->getTargetInsertSubreg(SRIdx: ARM::ssub_0 + Lane2 / `2`, DL: dl, VT: MVT::v4f32,
3162	Operand: Ins2.getOperand(i: `0`), Subreg: SDValue (VINS, `0`));
3163	ReplaceUses(F: Ins1, T: NewIns);
3164	return true;
3165	}
3166
3167	return false;
3168	}
3169
3170	bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3171	SDNode *FMul,
3172	bool IsUnsigned,
3173	bool FixedToFloat) {
3174	auto Type = N->getValueType(ResNo: `0`);
3175	unsigned ScalarBits = Type.getScalarSizeInBits();
3176	if (ScalarBits > `32`)
3177	return false;
3178
3179	SDNodeFlags FMulFlags = FMul->getFlags();
3180	// The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3181	// allowed in 16 bit unsigned floats
3182	if (ScalarBits == `16` && !FMulFlags.hasNoInfs() && IsUnsigned)
3183	return false;
3184
3185	SDValue ImmNode = FMul->getOperand(Num: `1`);
3186	SDValue VecVal = FMul->getOperand(Num: `0`);
3187	if (VecVal ->getOpcode() == ISD::UINT_TO_FP \|\|
3188	VecVal ->getOpcode() == ISD::SINT_TO_FP)
3189	VecVal = VecVal ->getOperand(Num: `0`);
3190
3191	if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3192	return false;
3193
3194	if (ImmNode.getOpcode() == ISD::BITCAST) {
3195	if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3196	return false;
3197	ImmNode = ImmNode.getOperand(i: `0`);
3198	}
3199
3200	if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3201	return false;
3202
3203	APFloat ImmAPF(`0.0f`);
3204	switch (ImmNode.getOpcode()) {
3205	case ARMISD::VMOVIMM:
3206	case ARMISD::VDUP: {
3207	if (!isa<ConstantSDNode>(Val: ImmNode.getOperand(i: `0`)))
3208	return false;
3209	unsigned Imm = ImmNode.getConstantOperandVal(i: `0`);
3210	if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3211	Imm = ARM_AM::decodeVMOVModImm(ModImm: Imm, EltBits&: ScalarBits);
3212	ImmAPF =
3213	APFloat (ScalarBits == `32` ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3214	APInt (ScalarBits, Imm));
3215	break;
3216	}
3217	case ARMISD::VMOVFPIMM: {
3218	ImmAPF = APFloat (ARM_AM::getFPImmFloat(Imm: ImmNode.getConstantOperandVal(i: `0`)));
3219	break;
3220	}
3221	default:
3222	return false;
3223	}
3224
3225	// Where n is the number of fractional bits, multiplying by 2^n will convert
3226	// from float to fixed and multiplying by 2^-n will convert from fixed to
3227	// float. Taking log2 of the factor (after taking the inverse in the case of
3228	// float to fixed) will give n.
3229	APFloat ToConvert = ImmAPF;
3230	if (FixedToFloat) {
3231	if (!ImmAPF.getExactInverse(inv: &ToConvert))
3232	return false;
3233	}
3234	APSInt Converted(`64`, false);
3235	bool IsExact;
3236	ToConvert.convertToInteger(Result&: Converted, RM: llvm::RoundingMode::NearestTiesToEven,
3237	IsExact: &IsExact);
3238	if (!IsExact \|\| !Converted.isPowerOf2())
3239	return false;
3240
3241	unsigned FracBits = Converted.logBase2();
3242	if (FracBits > ScalarBits)
3243	return false;
3244
3245	SmallVector<SDValue, `3`> Ops{
3246	VecVal, CurDAG->getConstant(Val: FracBits, DL: SDLoc (N), VT: MVT::i32)};
3247	AddEmptyMVEPredicateToOps(Ops, Loc: SDLoc (N), InactiveTy: Type);
3248
3249	unsigned int Opcode;
3250	switch (ScalarBits) {
3251	case `16`:
3252	if (FixedToFloat)
3253	Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3254	else
3255	Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3256	break;
3257	case `32`:
3258	if (FixedToFloat)
3259	Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3260	else
3261	Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3262	break;
3263	default:
3264	llvm_unreachable("unexpected number of scalar bits");
3265	break;
3266	}
3267
3268	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: SDLoc (N), VT: Type, Ops));
3269	return true;
3270	}
3271
3272	bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3273	// Transform a floating-point to fixed-point conversion to a VCVT
3274	if (!Subtarget->hasMVEFloatOps())
3275	return false;
3276	EVT Type = N->getValueType(ResNo: `0`);
3277	if (!Type.isVector())
3278	return false;
3279	unsigned int ScalarBits = Type.getScalarSizeInBits();
3280
3281	bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT \|\|
3282	N->getOpcode() == ISD::FP_TO_UINT_SAT;
3283	SDNode *Node = N->getOperand(Num: `0`).getNode();
3284
3285	// floating-point to fixed-point with one fractional bit gets turned into an
3286	// FP_TO_[U\|S]INT(FADD (x, x)) rather than an FP_TO_[U\|S]INT(FMUL (x, y))
3287	if (Node->getOpcode() == ISD::FADD) {
3288	if (Node->getOperand(Num: `0`) != Node->getOperand(Num: `1`))
3289	return false;
3290	SDNodeFlags Flags = Node->getFlags();
3291	// The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3292	// allowed in 16 bit unsigned floats
3293	if (ScalarBits == `16` && !Flags.hasNoInfs() && IsUnsigned)
3294	return false;
3295
3296	unsigned Opcode;
3297	switch (ScalarBits) {
3298	case `16`:
3299	Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3300	break;
3301	case `32`:
3302	Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3303	break;
3304	}
3305	SmallVector<SDValue, `3`> Ops{Node->getOperand(Num: `0`),
3306	CurDAG->getConstant(Val: `1`, DL: dl, VT: MVT::i32)};
3307	AddEmptyMVEPredicateToOps(Ops, Loc: dl, InactiveTy: Type);
3308
3309	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl, VT: Type, Ops));
3310	return true;
3311	}
3312
3313	if (Node->getOpcode() != ISD::FMUL)
3314	return false;
3315
3316	return transformFixedFloatingPointConversion(N, FMul: Node, IsUnsigned, FixedToFloat: false);
3317	}
3318
3319	bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3320	// Transform a fixed-point to floating-point conversion to a VCVT
3321	if (!Subtarget->hasMVEFloatOps())
3322	return false;
3323	auto Type = N->getValueType(ResNo: `0`);
3324	if (!Type.isVector())
3325	return false;
3326
3327	auto LHS = N->getOperand(Num: `0`);
3328	if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3329	return false;
3330
3331	return transformFixedFloatingPointConversion(
3332	N, FMul: N, IsUnsigned: LHS.getOpcode() == ISD::UINT_TO_FP, FixedToFloat: true);
3333	}
3334
3335	bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode N, bool* isSigned) {
3336	if (!Subtarget->hasV6T2Ops())
3337	return false;
3338
3339	unsigned Opc = isSigned
3340	? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3341	: (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3342	SDLoc dl(N);
3343
3344	// For unsigned extracts, check for a shift right and mask
3345	unsigned And_imm = `0`;
3346	if (N->getOpcode() == ISD::AND) {
3347	if (isOpcWithIntImmediate(N, Opc: ISD::AND, Imm&: And_imm)) {
3348
3349	// The immediate is a mask of the low bits iff imm & (imm+1) == 0
3350	if (And_imm & (And_imm + `1`))
3351	return false;
3352
3353	unsigned Srl_imm = `0`;
3354	if (isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SRL,
3355	Imm&: Srl_imm)) {
3356	assert(Srl_imm > `0` && Srl_imm < `32` && "bad amount in shift node!");
3357
3358	// Mask off the unnecessary bits of the AND immediate; normally
3359	// DAGCombine will do this, but that might not happen if
3360	// targetShrinkDemandedConstant chooses a different immediate.
3361	And_imm &= -`1U` >> Srl_imm;
3362
3363	// Note: The width operand is encoded as width-1.
3364	unsigned Width = llvm::countr_one(Value: And_imm) - `1`;
3365	unsigned LSB = Srl_imm;
3366
3367	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3368
3369	if ((LSB + Width + `1`) == N->getValueType(ResNo: `0`).getSizeInBits()) {
3370	// It's cheaper to use a right shift to extract the top bits.
3371	if (Subtarget->isThumb()) {
3372	Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3373	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`),
3374	CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3375	getAL(CurDAG, dl), Reg0, Reg0 };
3376	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3377	return true;
3378	}
3379
3380	// ARM models shift instructions as MOVsi with shifter operand.
3381	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: ISD::SRL);
3382	SDValue ShOpc =
3383	CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: LSB), DL: dl,
3384	VT: MVT::i32);
3385	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`), ShOpc,
3386	getAL(CurDAG, dl), Reg0, Reg0 };
3387	CurDAG->SelectNodeTo(N, MachineOpc: ARM::MOVsi, VT: MVT::i32, Ops);
3388	return true;
3389	}
3390
3391	assert(LSB + Width + `1` <= `32` && "Shouldn't create an invalid ubfx");
3392	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`),
3393	CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3394	CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3395	getAL(CurDAG, dl), Reg0 };
3396	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3397	return true;
3398	}
3399	}
3400	return false;
3401	}
3402
3403	// Otherwise, we're looking for a shift of a shift
3404	unsigned Shl_imm = `0`;
3405	if (isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SHL, Imm&: Shl_imm)) {
3406	assert(Shl_imm > `0` && Shl_imm < `32` && "bad amount in shift node!");
3407	unsigned Srl_imm = `0`;
3408	if (isInt32Immediate(N: N->getOperand(Num: `1`), Imm&: Srl_imm)) {
3409	assert(Srl_imm > `0` && Srl_imm < `32` && "bad amount in shift node!");
3410	// Note: The width operand is encoded as width-1.
3411	unsigned Width = `32` - Srl_imm - `1`;
3412	int LSB = Srl_imm - Shl_imm;
3413	if (LSB < `0`)
3414	return false;
3415	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3416	assert(LSB + Width + `1` <= `32` && "Shouldn't create an invalid ubfx");
3417	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`),
3418	CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3419	CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3420	getAL(CurDAG, dl), Reg0 };
3421	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3422	return true;
3423	}
3424	}
3425
3426	// Or we are looking for a shift of an and, with a mask operand
3427	if (isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::AND, Imm&: And_imm) &&
3428	isShiftedMask_32(Value: And_imm)) {
3429	unsigned Srl_imm = `0`;
3430	unsigned LSB = llvm::countr_zero(Val: And_imm);
3431	// Shift must be the same as the ands lsb
3432	if (isInt32Immediate(N: N->getOperand(Num: `1`), Imm&: Srl_imm) && Srl_imm == LSB) {
3433	assert(Srl_imm > `0` && Srl_imm < `32` && "bad amount in shift node!");
3434	unsigned MSB = llvm::Log2_32(Value: And_imm);
3435	// Note: The width operand is encoded as width-1.
3436	unsigned Width = MSB - LSB;
3437	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3438	assert(Srl_imm + Width + `1` <= `32` && "Shouldn't create an invalid ubfx");
3439	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`),
3440	CurDAG->getTargetConstant(Val: Srl_imm, DL: dl, VT: MVT::i32),
3441	CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3442	getAL(CurDAG, dl), Reg0 };
3443	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3444	return true;
3445	}
3446	}
3447
3448	if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3449	unsigned Width = cast<VTSDNode>(Val: N->getOperand(Num: `1`))->getVT().getSizeInBits();
3450	unsigned LSB = `0`;
3451	if (!isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SRL, Imm&: LSB) &&
3452	!isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SRA, Imm&: LSB))
3453	return false;
3454
3455	if (LSB + Width > `32`)
3456	return false;
3457
3458	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3459	assert(LSB + Width <= `32` && "Shouldn't create an invalid ubfx");
3460	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`),
3461	CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3462	CurDAG->getTargetConstant(Val: Width - `1`, DL: dl, VT: MVT::i32),
3463	getAL(CurDAG, dl), Reg0 };
3464	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3465	return true;
3466	}
3467
3468	return false;
3469	}
3470
3471	/// Target-specific DAG combining for ISD::SUB.
3472	/// Target-independent combining lowers SELECT_CC nodes of the form
3473	/// select_cc setg[ge] X, 0, X, -X
3474	/// select_cc setgt X, -1, X, -X
3475	/// select_cc setl[te] X, 0, -X, X
3476	/// select_cc setlt X, 1, -X, X
3477	/// which represent Integer ABS into:
3478	/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
3479	/// ARM instruction selection detects the latter and matches it to
3480	/// ARM::ABS or ARM::t2ABS machine node.
3481	bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3482	SDValue SUBSrc0 = N->getOperand(Num: `0`);
3483	SDValue SUBSrc1 = N->getOperand(Num: `1`);
3484	EVT VT = N->getValueType(ResNo: `0`);
3485
3486	if (Subtarget->isThumb1Only())
3487	return false;
3488
3489	if (SUBSrc0.getOpcode() != ISD::XOR \|\| SUBSrc1.getOpcode() != ISD::SRA)
3490	return false;
3491
3492	SDValue XORSrc0 = SUBSrc0.getOperand(i: `0`);
3493	SDValue XORSrc1 = SUBSrc0.getOperand(i: `1`);
3494	SDValue SRASrc0 = SUBSrc1.getOperand(i: `0`);
3495	SDValue SRASrc1 = SUBSrc1.getOperand(i: `1`);
3496	ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(Val&: SRASrc1);
3497	EVT XType = SRASrc0.getValueType();
3498	unsigned Size = XType.getSizeInBits() - `1`;
3499
3500	if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
3501	SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
3502	unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3503	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VT, Op1: XORSrc0);
3504	return true;
3505	}
3506
3507	return false;
3508	}
3509
3510	/// We've got special pseudo-instructions for these
3511	void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3512	unsigned Opcode;
3513	EVT MemTy = cast<MemSDNode>(Val: N)->getMemoryVT();
3514	if (MemTy == MVT::i8)
3515	Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3516	else if (MemTy == MVT::i16)
3517	Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3518	else if (MemTy == MVT::i32)
3519	Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3520	else
3521	llvm_unreachable("Unknown AtomicCmpSwap type");
3522
3523	SDValue Ops[] = {N->getOperand(Num: `1`), N->getOperand(Num: `2`), N->getOperand(Num: `3`),
3524	N->getOperand(Num: `0`)};
3525	SDNode *CmpSwap = CurDAG->getMachineNode(
3526	Opcode, dl: SDLoc (N),
3527	VTs: CurDAG->getVTList(VT1: MVT::i32, VT2: MVT::i32, VT3: MVT::Other), Ops);
3528
3529	MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
3530	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
3531
3532	ReplaceUses(F: SDValue (N, `0`), T: SDValue (CmpSwap, `0`));
3533	ReplaceUses(F: SDValue (N, `1`), T: SDValue (CmpSwap, `2`));
3534	CurDAG->RemoveDeadNode(N);
3535	}
3536
3537	static std::optional<std::pair<unsigned, unsigned>>
3538	getContiguousRangeOfSetBits(const APInt &A) {
3539	unsigned FirstOne = A.getBitWidth() - A.countl_zero() - `1`;
3540	unsigned LastOne = A.countr_zero();
3541	if (A.popcount() != (FirstOne - LastOne + `1`))
3542	return std::nullopt;
3543	return std::make_pair(x&: FirstOne, y&: LastOne);
3544	}
3545
3546	void ARMDAGToDAGISel::SelectCMPZ(SDNode N, bool* &SwitchEQNEToPLMI) {
3547	assert(N->getOpcode() == ARMISD::CMPZ);
3548	SwitchEQNEToPLMI = false;
3549
3550	if (!Subtarget->isThumb())
3551	// FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3552	// LSR don't exist as standalone instructions - they need the barrel shifter.
3553	return;
3554
3555	// select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3556	SDValue And = N->getOperand(Num: `0`);
3557	if (!And ->hasOneUse())
3558	return;
3559
3560	SDValue Zero = N->getOperand(Num: `1`);
3561	if (!isNullConstant(V: Zero) \|\| And ->getOpcode() != ISD::AND)
3562	return;
3563	SDValue X = And.getOperand(i: `0`);
3564	auto C = dyn_cast<ConstantSDNode>(Val: And.getOperand(i: `1`));
3565
3566	if (!C)
3567	return;
3568	auto Range = getContiguousRangeOfSetBits(A: C->getAPIntValue());
3569	if (!Range)
3570	return;
3571
3572	// There are several ways to lower this:
3573	SDNode *NewN;
3574	SDLoc dl(N);
3575
3576	auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3577	if (Subtarget->isThumb2()) {
3578	Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3579	SDValue Ops[] = { Src, CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32),
3580	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3581	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3582	return CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
3583	} else {
3584	SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), Src,
3585	CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32),
3586	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
3587	return CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
3588	}
3589	};
3590
3591	if (Range ->second == `0`) {
3592	// 1. Mask includes the LSB -> Simply shift the top N bits off
3593	NewN = EmitShift (ARM::tLSLri, X, `31` - Range ->first);
3594	ReplaceNode(F: And.getNode(), T: NewN);
3595	} else if (Range ->first == `31`) {
3596	// 2. Mask includes the MSB -> Simply shift the bottom N bits off
3597	NewN = EmitShift (ARM::tLSRri, X, Range ->second);
3598	ReplaceNode(F: And.getNode(), T: NewN);
3599	} else if (Range ->first == Range ->second) {
3600	// 3. Only one bit is set. We can shift this into the sign bit and use a
3601	// PL/MI comparison.
3602	NewN = EmitShift (ARM::tLSLri, X, `31` - Range ->first);
3603	ReplaceNode(F: And.getNode(), T: NewN);
3604
3605	SwitchEQNEToPLMI = true;
3606	} else if (!Subtarget->hasV6T2Ops()) {
3607	// 4. Do a double shift to clear bottom and top bits, but only in
3608	// thumb-1 mode as in thumb-2 we can use UBFX.
3609	NewN = EmitShift (ARM::tLSLri, X, `31` - Range ->first);
3610	NewN = EmitShift (ARM::tLSRri, SDValue (NewN, `0`),
3611	Range ->second + (`31` - Range ->first));
3612	ReplaceNode(F: And.getNode(), T: NewN);
3613	}
3614	}
3615
3616	static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[`3`],
3617	unsigned Opc128[`3`]) {
3618	assert((VT.is64BitVector() \|\| VT.is128BitVector()) &&
3619	"Unexpected vector shuffle length");
3620	switch (VT.getScalarSizeInBits()) {
3621	default:
3622	llvm_unreachable("Unexpected vector shuffle element size");
3623	case `8`:
3624	return VT.is64BitVector() ? Opc64[`0`] : Opc128[`0`];
3625	case `16`:
3626	return VT.is64BitVector() ? Opc64[`1`] : Opc128[`1`];
3627	case `32`:
3628	return VT.is64BitVector() ? Opc64[`2`] : Opc128[`2`];
3629	}
3630	}
3631
3632	void ARMDAGToDAGISel::Select(SDNode *N) {
3633	SDLoc dl(N);
3634
3635	if (N->isMachineOpcode()) {
3636	N->setNodeId(-`1`);
3637	return; // Already selected.
3638	}
3639
3640	switch (N->getOpcode()) {
3641	default: break;
3642	case ISD::STORE: {
3643	// For Thumb1, match an sp-relative store in C++. This is a little
3644	// unfortunate, but I don't think I can make the chain check work
3645	// otherwise. (The chain of the store has to be the same as the chain
3646	// of the CopyFromReg, or else we can't replace the CopyFromReg with
3647	// a direct reference to "SP".)
3648	//
3649	// This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3650	// a different addressing mode from other four-byte stores.
3651	//
3652	// This pattern usually comes up with call arguments.
3653	StoreSDNode *ST = cast<StoreSDNode>(Val: N);
3654	SDValue Ptr = ST->getBasePtr();
3655	if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3656	int RHSC = `0`;
3657	if (Ptr.getOpcode() == ISD::ADD &&
3658	isScaledConstantInRange(Node: Ptr.getOperand(i: `1`), /Scale=/`4`, RangeMin: `0`, RangeMax: `256`, ScaledConstant&: RHSC))
3659	Ptr = Ptr.getOperand(i: `0`);
3660
3661	if (Ptr.getOpcode() == ISD::CopyFromReg &&
3662	cast<RegisterSDNode>(Val: Ptr.getOperand(i: `1`))->getReg() == ARM::SP &&
3663	Ptr.getOperand(i: `0`) == ST->getChain()) {
3664	SDValue Ops[] = {ST->getValue(),
3665	CurDAG->getRegister(Reg: ARM::SP, VT: MVT::i32),
3666	CurDAG->getTargetConstant(Val: RHSC, DL: dl, VT: MVT::i32),
3667	getAL(CurDAG, dl),
3668	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3669	ST->getChain()};
3670	MachineSDNode *ResNode =
3671	CurDAG->getMachineNode(Opcode: ARM::tSTRspi, dl, VT: MVT::Other, Ops);
3672	MachineMemOperand *MemOp = ST->getMemOperand();
3673	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: ResNode), NewMemRefs: {MemOp});
3674	ReplaceNode(F: N, T: ResNode);
3675	return;
3676	}
3677	}
3678	break;
3679	}
3680	case ISD::WRITE_REGISTER:
3681	if (tryWriteRegister(N))
3682	return;
3683	break;
3684	case ISD::READ_REGISTER:
3685	if (tryReadRegister(N))
3686	return;
3687	break;
3688	case ISD::INLINEASM:
3689	case ISD::INLINEASM_BR:
3690	if (tryInlineAsm(N))
3691	return;
3692	break;
3693	case ISD::SUB:
3694	// Select special operations if SUB node forms integer ABS pattern
3695	if (tryABSOp(N))
3696	return;
3697	// Other cases are autogenerated.
3698	break;
3699	case ISD::Constant: {
3700	unsigned Val = N->getAsZExtVal();
3701	// If we can't materialize the constant we need to use a literal pool
3702	if (ConstantMaterializationCost(Val, Subtarget) > `2` &&
3703	!Subtarget->genExecuteOnly()) {
3704	SDValue CPIdx = CurDAG->getTargetConstantPool(
3705	C: ConstantInt::get(Ty: Type::getInt32Ty(C&: *CurDAG->getContext()), V: Val),
3706	VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
3707
3708	SDNode *ResNode;
3709	if (Subtarget->isThumb()) {
3710	SDValue Ops[] = {
3711	CPIdx,
3712	getAL(CurDAG, dl),
3713	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3714	CurDAG->getEntryNode()
3715	};
3716	ResNode = CurDAG->getMachineNode(Opcode: ARM::tLDRpci, dl, VT1: MVT::i32, VT2: MVT::Other,
3717	Ops);
3718	} else {
3719	SDValue Ops[] = {
3720	CPIdx,
3721	CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32),
3722	getAL(CurDAG, dl),
3723	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3724	CurDAG->getEntryNode()
3725	};
3726	ResNode = CurDAG->getMachineNode(Opcode: ARM::LDRcp, dl, VT1: MVT::i32, VT2: MVT::Other,
3727	Ops);
3728	}
3729	// Annotate the Node with memory operand information so that MachineInstr
3730	// queries work properly. This e.g. gives the register allocation the
3731	// required information for rematerialization.
3732	MachineFunction& MF = CurDAG->getMachineFunction();
3733	MachineMemOperand *MemOp =
3734	MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF),
3735	F: MachineMemOperand::MOLoad, Size: `4`, BaseAlignment: Align (`4`));
3736
3737	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: ResNode), NewMemRefs: {MemOp});
3738
3739	ReplaceNode(F: N, T: ResNode);
3740	return;
3741	}
3742
3743	// Other cases are autogenerated.
3744	break;
3745	}
3746	case ISD::FrameIndex: {
3747	// Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3748	int FI = cast<FrameIndexSDNode>(Val: N)->getIndex();
3749	SDValue TFI = CurDAG->getTargetFrameIndex(
3750	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
3751	if (Subtarget->isThumb1Only()) {
3752	// Set the alignment of the frame object to 4, to avoid having to generate
3753	// more than one ADD
3754	MachineFrameInfo &MFI = MF->getFrameInfo();
3755	if (MFI.getObjectAlign(ObjectIdx: FI) < Align (`4`))
3756	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align (`4`));
3757	CurDAG->SelectNodeTo(N, MachineOpc: ARM::tADDframe, VT: MVT::i32, Op1: TFI,
3758	Op2: CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32));
3759	return;
3760	} else {
3761	unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3762	ARM::t2ADDri : ARM::ADDri);
3763	SDValue Ops[] = { TFI, CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32),
3764	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3765	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3766	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3767	return;
3768	}
3769	}
3770	case ISD::INSERT_VECTOR_ELT: {
3771	if (tryInsertVectorElt(N))
3772	return;
3773	break;
3774	}
3775	case ISD::SRL:
3776	if (tryV6T2BitfieldExtractOp(N, isSigned: false))
3777	return;
3778	break;
3779	case ISD::SIGN_EXTEND_INREG:
3780	case ISD::SRA:
3781	if (tryV6T2BitfieldExtractOp(N, isSigned: true))
3782	return;
3783	break;
3784	case ISD::FP_TO_UINT:
3785	case ISD::FP_TO_SINT:
3786	case ISD::FP_TO_UINT_SAT:
3787	case ISD::FP_TO_SINT_SAT:
3788	if (tryFP_TO_INT(N, dl))
3789	return;
3790	break;
3791	case ISD::FMUL:
3792	if (tryFMULFixed(N, dl))
3793	return;
3794	break;
3795	case ISD::MUL:
3796	if (Subtarget->isThumb1Only())
3797	break;
3798	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`))) {
3799	unsigned RHSV = C->getZExtValue();
3800	if (!RHSV) break;
3801	if (isPowerOf2_32(Value: RHSV-`1`)) { // 2^n+1?
3802	unsigned ShImm = Log2_32(Value: RHSV-`1`);
3803	if (ShImm >= `32`)
3804	break;
3805	SDValue V = N->getOperand(Num: `0`);
3806	ShImm = ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: ShImm);
3807	SDValue ShImmOp = CurDAG->getTargetConstant(Val: ShImm, DL: dl, VT: MVT::i32);
3808	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3809	if (Subtarget->isThumb()) {
3810	SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3811	CurDAG->SelectNodeTo(N, MachineOpc: ARM::t2ADDrs, VT: MVT::i32, Ops);
3812	return;
3813	} else {
3814	SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3815	Reg0 };
3816	CurDAG->SelectNodeTo(N, MachineOpc: ARM::ADDrsi, VT: MVT::i32, Ops);
3817	return;
3818	}
3819	}
3820	if (isPowerOf2_32(Value: RHSV+`1`)) { // 2^n-1?
3821	unsigned ShImm = Log2_32(Value: RHSV+`1`);
3822	if (ShImm >= `32`)
3823	break;
3824	SDValue V = N->getOperand(Num: `0`);
3825	ShImm = ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: ShImm);
3826	SDValue ShImmOp = CurDAG->getTargetConstant(Val: ShImm, DL: dl, VT: MVT::i32);
3827	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3828	if (Subtarget->isThumb()) {
3829	SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3830	CurDAG->SelectNodeTo(N, MachineOpc: ARM::t2RSBrs, VT: MVT::i32, Ops);
3831	return;
3832	} else {
3833	SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3834	Reg0 };
3835	CurDAG->SelectNodeTo(N, MachineOpc: ARM::RSBrsi, VT: MVT::i32, Ops);
3836	return;
3837	}
3838	}
3839	}
3840	break;
3841	case ISD::AND: {
3842	// Check for unsigned bitfield extract
3843	if (tryV6T2BitfieldExtractOp(N, isSigned: false))
3844	return;
3845
3846	// If an immediate is used in an AND node, it is possible that the immediate
3847	// can be more optimally materialized when negated. If this is the case we
3848	// can negate the immediate and use a BIC instead.
3849	auto *N1C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
3850	if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3851	uint32_t Imm = (uint32_t) N1C->getZExtValue();
3852
3853	// In Thumb2 mode, an AND can take a 12-bit immediate. If this
3854	// immediate can be negated and fit in the immediate operand of
3855	// a t2BIC, don't do any manual transform here as this can be
3856	// handled by the generic ISel machinery.
3857	bool PreferImmediateEncoding =
3858	Subtarget->hasThumb2() && (is_t2_so_imm(Imm) \|\| is_t2_so_imm_not(Imm));
3859	if (!PreferImmediateEncoding &&
3860	ConstantMaterializationCost(Val: Imm, Subtarget) >
3861	ConstantMaterializationCost(Val: ~Imm, Subtarget)) {
3862	// The current immediate costs more to materialize than a negated
3863	// immediate, so negate the immediate and use a BIC.
3864	SDValue NewImm =
3865	CurDAG->getConstant(Val: ~N1C->getZExtValue(), DL: dl, VT: MVT::i32);
3866	// If the new constant didn't exist before, reposition it in the topological
3867	// ordering so it is just before N. Otherwise, don't touch its location.
3868	if (NewImm ->getNodeId() == -`1`)
3869	CurDAG->RepositionNode(Position: N->getIterator(), N: NewImm.getNode());
3870
3871	if (!Subtarget->hasThumb2()) {
3872	SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32),
3873	N->getOperand(Num: `0`), NewImm, getAL(CurDAG, dl),
3874	CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
3875	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: ARM::tBIC, dl, VT: MVT::i32, Ops));
3876	return;
3877	} else {
3878	SDValue Ops[] = {N->getOperand(Num: `0`), NewImm, getAL(CurDAG, dl),
3879	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3880	CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
3881	ReplaceNode(F: N,
3882	T: CurDAG->getMachineNode(Opcode: ARM::t2BICrr, dl, VT: MVT::i32, Ops));
3883	return;
3884	}
3885	}
3886	}
3887
3888	// (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3889	// of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3890	// are entirely contributed by c2 and lower 16-bits are entirely contributed
3891	// by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3892	// Select it to: "movt x, ((c1 & 0xffff) >> 16)
3893	EVT VT = N->getValueType(ResNo: `0`);
3894	if (VT != MVT::i32)
3895	break;
3896	unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3897	? ARM::t2MOVTi16
3898	: (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : `0`);
3899	if (!Opc)
3900	break;
3901	SDValue N0 = N->getOperand(Num: `0`), N1 = N->getOperand(Num: `1`);
3902	N1C = dyn_cast<ConstantSDNode>(Val&: N1);
3903	if (!N1C)
3904	break;
3905	if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3906	SDValue N2 = N0.getOperand(i: `1`);
3907	ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Val&: N2);
3908	if (!N2C)
3909	break;
3910	unsigned N1CVal = N1C->getZExtValue();
3911	unsigned N2CVal = N2C->getZExtValue();
3912	if ((N1CVal & `0xffff0000U`) == (N2CVal & `0xffff0000U`) &&
3913	(N1CVal & `0xffffU`) == `0xffffU` &&
3914	(N2CVal & `0xffffU`) == `0x0U`) {
3915	SDValue Imm16 = CurDAG->getTargetConstant(Val: (N2CVal & `0xFFFF0000U`) >> `16`,
3916	DL: dl, VT: MVT::i32);
3917	SDValue Ops[] = { N0.getOperand(i: `0`), Imm16,
3918	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3919	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT, Ops));
3920	return;
3921	}
3922	}
3923
3924	break;
3925	}
3926	case ARMISD::UMAAL: {
3927	unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3928	SDValue Ops[] = { N->getOperand(Num: `0`), N->getOperand(Num: `1`),
3929	N->getOperand(Num: `2`), N->getOperand(Num: `3`),
3930	getAL(CurDAG, dl),
3931	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3932	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
3933	return;
3934	}
3935	case ARMISD::UMLAL:{
3936	if (Subtarget->isThumb()) {
3937	SDValue Ops[] = { N->getOperand(Num: `0`), N->getOperand(Num: `1`), N->getOperand(Num: `2`),
3938	N->getOperand(Num: `3`), getAL(CurDAG, dl),
3939	CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
3940	ReplaceNode(
3941	F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2UMLAL, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
3942	return;
3943	}else{
3944	SDValue Ops[] = { N->getOperand(Num: `0`), N->getOperand(Num: `1`), N->getOperand(Num: `2`),
3945	N->getOperand(Num: `3`), getAL(CurDAG, dl),
3946	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3947	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3948	ReplaceNode(F: N, T: CurDAG->getMachineNode(
3949	Opcode: Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3950	VT1: MVT::i32, VT2: MVT::i32, Ops));
3951	return;
3952	}
3953	}
3954	case ARMISD::SMLAL:{
3955	if (Subtarget->isThumb()) {
3956	SDValue Ops[] = { N->getOperand(Num: `0`), N->getOperand(Num: `1`), N->getOperand(Num: `2`),
3957	N->getOperand(Num: `3`), getAL(CurDAG, dl),
3958	CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
3959	ReplaceNode(
3960	F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2SMLAL, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
3961	return;
3962	}else{
3963	SDValue Ops[] = { N->getOperand(Num: `0`), N->getOperand(Num: `1`), N->getOperand(Num: `2`),
3964	N->getOperand(Num: `3`), getAL(CurDAG, dl),
3965	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3966	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3967	ReplaceNode(F: N, T: CurDAG->getMachineNode(
3968	Opcode: Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3969	VT1: MVT::i32, VT2: MVT::i32, Ops));
3970	return;
3971	}
3972	}
3973	case ARMISD::SUBE: {
3974	if (!Subtarget->hasV6Ops() \|\| !Subtarget->hasDSP())
3975	break;
3976	// Look for a pattern to match SMMLS
3977	// (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3978	if (N->getOperand(Num: `1`).getOpcode() != ISD::SMUL_LOHI \|\|
3979	N->getOperand(Num: `2`).getOpcode() != ARMISD::SUBC \|\|
3980	!SDValue (N, `1`).use_empty())
3981	break;
3982
3983	if (Subtarget->isThumb())
3984	assert(Subtarget->hasThumb2() &&
3985	"This pattern should not be generated for Thumb");
3986
3987	SDValue SmulLoHi = N->getOperand(Num: `1`);
3988	SDValue Subc = N->getOperand(Num: `2`);
3989	SDValue Zero = Subc.getOperand(i: `0`);
3990
3991	if (!isNullConstant(V: Zero) \|\| Subc.getOperand(i: `1`) != SmulLoHi.getValue(R: `0`) \|\|
3992	N->getOperand(Num: `1`) != SmulLoHi.getValue(R: `1`) \|\|
3993	N->getOperand(Num: `2`) != Subc.getValue(R: `1`))
3994	break;
3995
3996	unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3997	SDValue Ops[] = { SmulLoHi.getOperand(i: `0`), SmulLoHi.getOperand(i: `1`),
3998	N->getOperand(Num: `0`), getAL(CurDAG, dl),
3999	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
4000	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops));
4001	return;
4002	}
4003	case ISD::LOAD: {
4004	if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4005	return;
4006	if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
4007	if (tryT2IndexedLoad(N))
4008	return;
4009	} else if (Subtarget->isThumb()) {
4010	if (tryT1IndexedLoad(N))
4011	return;
4012	} else if (tryARMIndexedLoad(N))
4013	return;
4014	// Other cases are autogenerated.
4015	break;
4016	}
4017	case ISD::MLOAD:
4018	if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4019	return;
4020	// Other cases are autogenerated.
4021	break;
4022	case ARMISD::WLSSETUP: {
4023	SDNode *New = CurDAG->getMachineNode(Opcode: ARM::t2WhileLoopSetup, dl, VT: MVT::i32,
4024	Op1: N->getOperand(Num: `0`));
4025	ReplaceUses(F: N, T: New);
4026	CurDAG->RemoveDeadNode(N);
4027	return;
4028	}
4029	case ARMISD::WLS: {
4030	SDNode *New = CurDAG->getMachineNode(Opcode: ARM::t2WhileLoopStart, dl, VT: MVT::Other,
4031	Op1: N->getOperand(Num: `1`), Op2: N->getOperand(Num: `2`),
4032	Op3: N->getOperand(Num: `0`));
4033	ReplaceUses(F: N, T: New);
4034	CurDAG->RemoveDeadNode(N);
4035	return;
4036	}
4037	case ARMISD::LE: {
4038	SDValue Ops[] = { N->getOperand(Num: `1`),
4039	N->getOperand(Num: `2`),
4040	N->getOperand(Num: `0`) };
4041	unsigned Opc = ARM::t2LoopEnd;
4042	SDNode *New = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::Other, Ops);
4043	ReplaceUses(F: N, T: New);
4044	CurDAG->RemoveDeadNode(N);
4045	return;
4046	}
4047	case ARMISD::LDRD: {
4048	if (Subtarget->isThumb2())
4049	break; // TableGen handles isel in this case.
4050	SDValue Base, RegOffset, ImmOffset;
4051	const SDValue &Chain = N->getOperand(Num: `0`);
4052	const SDValue &Addr = N->getOperand(Num: `1`);
4053	SelectAddrMode3(N: Addr, Base, Offset&: RegOffset, Opc&: ImmOffset);
4054	if (RegOffset != CurDAG->getRegister(Reg: `0`, VT: MVT::i32)) {
4055	// The register-offset variant of LDRD mandates that the register
4056	// allocated to RegOffset is not reused in any of the remaining operands.
4057	// This restriction is currently not enforced. Therefore emitting this
4058	// variant is explicitly avoided.
4059	Base = Addr;
4060	RegOffset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
4061	}
4062	SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4063	SDNode *New = CurDAG->getMachineNode(Opcode: ARM::LOADDUAL, dl,
4064	ResultTys: {MVT::Untyped, MVT::Other}, Ops);
4065	SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_0, DL: dl, VT: MVT::i32,
4066	Operand: SDValue (New, `0`));
4067	SDValue Hi = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_1, DL: dl, VT: MVT::i32,
4068	Operand: SDValue (New, `0`));
4069	transferMemOperands(N, Result: New);
4070	ReplaceUses(F: SDValue (N, `0`), T: Lo);
4071	ReplaceUses(F: SDValue (N, `1`), T: Hi);
4072	ReplaceUses(F: SDValue (N, `2`), T: SDValue (New, `1`));
4073	CurDAG->RemoveDeadNode(N);
4074	return;
4075	}
4076	case ARMISD::STRD: {
4077	if (Subtarget->isThumb2())
4078	break; // TableGen handles isel in this case.
4079	SDValue Base, RegOffset, ImmOffset;
4080	const SDValue &Chain = N->getOperand(Num: `0`);
4081	const SDValue &Addr = N->getOperand(Num: `3`);
4082	SelectAddrMode3(N: Addr, Base, Offset&: RegOffset, Opc&: ImmOffset);
4083	if (RegOffset != CurDAG->getRegister(Reg: `0`, VT: MVT::i32)) {
4084	// The register-offset variant of STRD mandates that the register
4085	// allocated to RegOffset is not reused in any of the remaining operands.
4086	// This restriction is currently not enforced. Therefore emitting this
4087	// variant is explicitly avoided.
4088	Base = Addr;
4089	RegOffset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
4090	}
4091	SDNode *RegPair =
4092	createGPRPairNode(VT: MVT::Untyped, V0: N->getOperand(Num: `1`), V1: N->getOperand(Num: `2`));
4093	SDValue Ops[] = {SDValue (RegPair, `0`), Base, RegOffset, ImmOffset, Chain};
4094	SDNode *New = CurDAG->getMachineNode(Opcode: ARM::STOREDUAL, dl, VT: MVT::Other, Ops);
4095	transferMemOperands(N, Result: New);
4096	ReplaceUses(F: SDValue (N, `0`), T: SDValue (New, `0`));
4097	CurDAG->RemoveDeadNode(N);
4098	return;
4099	}
4100	case ARMISD::LOOP_DEC: {
4101	SDValue Ops[] = { N->getOperand(Num: `1`),
4102	N->getOperand(Num: `2`),
4103	N->getOperand(Num: `0`) };
4104	SDNode *Dec =
4105	CurDAG->getMachineNode(Opcode: ARM::t2LoopDec, dl,
4106	VTs: CurDAG->getVTList(VT1: MVT::i32, VT2: MVT::Other), Ops);
4107	ReplaceUses(F: N, T: Dec);
4108	CurDAG->RemoveDeadNode(N);
4109	return;
4110	}
4111	case ARMISD::BRCOND: {
4112	// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4113	// Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4114	// Pattern complexity = 6 cost = 1 size = 0
4115
4116	// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4117	// Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4118	// Pattern complexity = 6 cost = 1 size = 0
4119
4120	// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4121	// Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4122	// Pattern complexity = 6 cost = 1 size = 0
4123
4124	unsigned Opc = Subtarget->isThumb() ?
4125	((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4126	SDValue Chain = N->getOperand(Num: `0`);
4127	SDValue N1 = N->getOperand(Num: `1`);
4128	SDValue N2 = N->getOperand(Num: `2`);
4129	SDValue N3 = N->getOperand(Num: `3`);
4130	SDValue InGlue = N->getOperand(Num: `4`);
4131	assert(N1.getOpcode() == ISD::BasicBlock);
4132	assert(N2.getOpcode() == ISD::Constant);
4133	assert(N3.getOpcode() == ISD::Register);
4134
4135	unsigned CC = (unsigned)N2 ->getAsZExtVal();
4136
4137	if (InGlue.getOpcode() == ARMISD::CMPZ) {
4138	if (InGlue.getOperand(i: `0`).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4139	SDValue Int = InGlue.getOperand(i: `0`);
4140	uint64_t ID = Int ->getConstantOperandVal(Num: `1`);
4141
4142	// Handle low-overhead loops.
4143	if (ID == Intrinsic::loop_decrement_reg) {
4144	SDValue Elements = Int.getOperand(i: `2`);
4145	SDValue Size = CurDAG->getTargetConstant(Val: Int.getConstantOperandVal(i: `3`),
4146	DL: dl, VT: MVT::i32);
4147
4148	SDValue Args[] = { Elements, Size, Int.getOperand(i: `0`) };
4149	SDNode *LoopDec =
4150	CurDAG->getMachineNode(Opcode: ARM::t2LoopDec, dl,
4151	VTs: CurDAG->getVTList(VT1: MVT::i32, VT2: MVT::Other),
4152	Ops: Args);
4153	ReplaceUses(F: Int.getNode(), T: LoopDec);
4154
4155	SDValue EndArgs[] = { SDValue (LoopDec, `0`), N1, Chain };
4156	SDNode *LoopEnd =
4157	CurDAG->getMachineNode(Opcode: ARM::t2LoopEnd, dl, VT: MVT::Other, Ops: EndArgs);
4158
4159	ReplaceUses(F: N, T: LoopEnd);
4160	CurDAG->RemoveDeadNode(N);
4161	CurDAG->RemoveDeadNode(N: InGlue.getNode());
4162	CurDAG->RemoveDeadNode(N: Int.getNode());
4163	return;
4164	}
4165	}
4166
4167	bool SwitchEQNEToPLMI;
4168	SelectCMPZ(N: InGlue.getNode(), SwitchEQNEToPLMI);
4169	InGlue = N->getOperand(Num: `4`);
4170
4171	if (SwitchEQNEToPLMI) {
4172	switch ((ARMCC::CondCodes)CC) {
4173	default: llvm_unreachable("CMPZ must be either NE or EQ!");
4174	case ARMCC::NE:
4175	CC = (unsigned)ARMCC::MI;
4176	break;
4177	case ARMCC::EQ:
4178	CC = (unsigned)ARMCC::PL;
4179	break;
4180	}
4181	}
4182	}
4183
4184	SDValue Tmp2 = CurDAG->getTargetConstant(Val: CC, DL: dl, VT: MVT::i32);
4185	SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue };
4186	SDNode *ResNode = CurDAG->getMachineNode(Opcode: Opc, dl, VT1: MVT::Other,
4187	VT2: MVT::Glue, Ops);
4188	Chain = SDValue (ResNode, `0`);
4189	if (N->getNumValues() == `2`) {
4190	InGlue = SDValue (ResNode, `1`);
4191	ReplaceUses(F: SDValue (N, `1`), T: InGlue);
4192	}
4193	ReplaceUses(F: SDValue (N, `0`),
4194	T: SDValue (Chain.getNode(), Chain.getResNo()));
4195	CurDAG->RemoveDeadNode(N);
4196	return;
4197	}
4198
4199	case ARMISD::CMPZ: {
4200	// select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4201	// This allows us to avoid materializing the expensive negative constant.
4202	// The CMPZ #0 is useless and will be peepholed away but we need to keep it
4203	// for its glue output.
4204	SDValue X = N->getOperand(Num: `0`);
4205	auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`).getNode());
4206	if (C && C->getSExtValue() < `0` && Subtarget->isThumb()) {
4207	int64_t Addend = -C->getSExtValue();
4208
4209	SDNode Add = nullptr*;
4210	// ADDS can be better than CMN if the immediate fits in a
4211	// 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4212	// Outside that range we can just use a CMN which is 32-bit but has a
4213	// 12-bit immediate range.
4214	if (Addend < `1`<<`8`) {
4215	if (Subtarget->isThumb2()) {
4216	SDValue Ops[] = { X, CurDAG->getTargetConstant(Val: Addend, DL: dl, VT: MVT::i32),
4217	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
4218	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
4219	Add = CurDAG->getMachineNode(Opcode: ARM::t2ADDri, dl, VT: MVT::i32, Ops);
4220	} else {
4221	unsigned Opc = (Addend < `1`<<`3`) ? ARM::tADDi3 : ARM::tADDi8;
4222	SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), X,
4223	CurDAG->getTargetConstant(Val: Addend, DL: dl, VT: MVT::i32),
4224	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
4225	Add = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
4226	}
4227	}
4228	if (Add) {
4229	SDValue Ops2[] = {SDValue (Add, `0`), CurDAG->getConstant(Val: `0`, DL: dl, VT: MVT::i32)};
4230	CurDAG->MorphNodeTo(N, Opc: ARMISD::CMPZ, VTs: CurDAG->getVTList(VT: MVT::Glue), Ops: Ops2);
4231	}
4232	}
4233	// Other cases are autogenerated.
4234	break;
4235	}
4236
4237	case ARMISD::CMOV: {
4238	SDValue InGlue = N->getOperand(Num: `4`);
4239
4240	if (InGlue.getOpcode() == ARMISD::CMPZ) {
4241	bool SwitchEQNEToPLMI;
4242	SelectCMPZ(N: InGlue.getNode(), SwitchEQNEToPLMI);
4243
4244	if (SwitchEQNEToPLMI) {
4245	SDValue ARMcc = N->getOperand(Num: `2`);
4246	ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc ->getAsZExtVal();
4247
4248	switch (CC) {
4249	default: llvm_unreachable("CMPZ must be either NE or EQ!");
4250	case ARMCC::NE:
4251	CC = ARMCC::MI;
4252	break;
4253	case ARMCC::EQ:
4254	CC = ARMCC::PL;
4255	break;
4256	}
4257	SDValue NewARMcc = CurDAG->getConstant(Val: (unsigned)CC, DL: dl, VT: MVT::i32);
4258	SDValue Ops[] = {N->getOperand(Num: `0`), N->getOperand(Num: `1`), NewARMcc,
4259	N->getOperand(Num: `3`), N->getOperand(Num: `4`)};
4260	CurDAG->MorphNodeTo(N, Opc: ARMISD::CMOV, VTs: N->getVTList(), Ops);
4261	}
4262
4263	}
4264	// Other cases are autogenerated.
4265	break;
4266	}
4267	case ARMISD::VZIP: {
4268	EVT VT = N->getValueType(ResNo: `0`);
4269	// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4270	unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4271	unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4272	unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4273	SDValue Pred = getAL(CurDAG, dl);
4274	SDValue PredReg = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
4275	SDValue Ops[] = {N->getOperand(Num: `0`), N->getOperand(Num: `1`), Pred, PredReg};
4276	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4277	return;
4278	}
4279	case ARMISD::VUZP: {
4280	EVT VT = N->getValueType(ResNo: `0`);
4281	// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4282	unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4283	unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4284	unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4285	SDValue Pred = getAL(CurDAG, dl);
4286	SDValue PredReg = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
4287	SDValue Ops[] = {N->getOperand(Num: `0`), N->getOperand(Num: `1`), Pred, PredReg};
4288	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4289	return;
4290	}
4291	case ARMISD::VTRN: {
4292	EVT VT = N->getValueType(ResNo: `0`);
4293	unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4294	unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4295	unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4296	SDValue Pred = getAL(CurDAG, dl);
4297	SDValue PredReg = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
4298	SDValue Ops[] = {N->getOperand(Num: `0`), N->getOperand(Num: `1`), Pred, PredReg};
4299	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4300	return;
4301	}
4302	case ARMISD::BUILD_VECTOR: {
4303	EVT VecVT = N->getValueType(ResNo: `0`);
4304	EVT EltVT = VecVT.getVectorElementType();
4305	unsigned NumElts = VecVT.getVectorNumElements();
4306	if (EltVT == MVT::f64) {
4307	assert(NumElts == `2` && "unexpected type for BUILD_VECTOR");
4308	ReplaceNode(
4309	F: N, T: createDRegPairNode(VT: VecVT, V0: N->getOperand(Num: `0`), V1: N->getOperand(Num: `1`)));
4310	return;
4311	}
4312	assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4313	if (NumElts == `2`) {
4314	ReplaceNode(
4315	F: N, T: createSRegPairNode(VT: VecVT, V0: N->getOperand(Num: `0`), V1: N->getOperand(Num: `1`)));
4316	return;
4317	}
4318	assert(NumElts == `4` && "unexpected type for BUILD_VECTOR");
4319	ReplaceNode(F: N,
4320	T: createQuadSRegsNode(VT: VecVT, V0: N->getOperand(Num: `0`), V1: N->getOperand(Num: `1`),
4321	V2: N->getOperand(Num: `2`), V3: N->getOperand(Num: `3`)));
4322	return;
4323	}
4324
4325	case ARMISD::VLD1DUP: {
4326	static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4327	ARM::VLD1DUPd32 };
4328	static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4329	ARM::VLD1DUPq32 };
4330	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: false, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes);
4331	return;
4332	}
4333
4334	case ARMISD::VLD2DUP: {
4335	static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4336	ARM::VLD2DUPd32 };
4337	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: false, NumVecs: `2`, DOpcodes: Opcodes);
4338	return;
4339	}
4340
4341	case ARMISD::VLD3DUP: {
4342	static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4343	ARM::VLD3DUPd16Pseudo,
4344	ARM::VLD3DUPd32Pseudo };
4345	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: false, NumVecs: `3`, DOpcodes: Opcodes);
4346	return;
4347	}
4348
4349	case ARMISD::VLD4DUP: {
4350	static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4351	ARM::VLD4DUPd16Pseudo,
4352	ARM::VLD4DUPd32Pseudo };
4353	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: false, NumVecs: `4`, DOpcodes: Opcodes);
4354	return;
4355	}
4356
4357	case ARMISD::VLD1DUP_UPD: {
4358	static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4359	ARM::VLD1DUPd16wb_fixed,
4360	ARM::VLD1DUPd32wb_fixed };
4361	static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4362	ARM::VLD1DUPq16wb_fixed,
4363	ARM::VLD1DUPq32wb_fixed };
4364	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: true, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes);
4365	return;
4366	}
4367
4368	case ARMISD::VLD2DUP_UPD: {
4369	static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4370	ARM::VLD2DUPd16wb_fixed,
4371	ARM::VLD2DUPd32wb_fixed,
4372	ARM::VLD1q64wb_fixed };
4373	static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4374	ARM::VLD2DUPq16EvenPseudo,
4375	ARM::VLD2DUPq32EvenPseudo };
4376	static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4377	ARM::VLD2DUPq16OddPseudoWB_fixed,
4378	ARM::VLD2DUPq32OddPseudoWB_fixed };
4379	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0, QOpcodes1);
4380	return;
4381	}
4382
4383	case ARMISD::VLD3DUP_UPD: {
4384	static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4385	ARM::VLD3DUPd16Pseudo_UPD,
4386	ARM::VLD3DUPd32Pseudo_UPD,
4387	ARM::VLD1d64TPseudoWB_fixed };
4388	static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4389	ARM::VLD3DUPq16EvenPseudo,
4390	ARM::VLD3DUPq32EvenPseudo };
4391	static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4392	ARM::VLD3DUPq16OddPseudo_UPD,
4393	ARM::VLD3DUPq32OddPseudo_UPD };
4394	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4395	return;
4396	}
4397
4398	case ARMISD::VLD4DUP_UPD: {
4399	static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4400	ARM::VLD4DUPd16Pseudo_UPD,
4401	ARM::VLD4DUPd32Pseudo_UPD,
4402	ARM::VLD1d64QPseudoWB_fixed };
4403	static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4404	ARM::VLD4DUPq16EvenPseudo,
4405	ARM::VLD4DUPq32EvenPseudo };
4406	static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4407	ARM::VLD4DUPq16OddPseudo_UPD,
4408	ARM::VLD4DUPq32OddPseudo_UPD };
4409	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4410	return;
4411	}
4412
4413	case ARMISD::VLD1_UPD: {
4414	static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4415	ARM::VLD1d16wb_fixed,
4416	ARM::VLD1d32wb_fixed,
4417	ARM::VLD1d64wb_fixed };
4418	static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4419	ARM::VLD1q16wb_fixed,
4420	ARM::VLD1q32wb_fixed,
4421	ARM::VLD1q64wb_fixed };
4422	SelectVLD(N, isUpdating: true, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4423	return;
4424	}
4425
4426	case ARMISD::VLD2_UPD: {
4427	if (Subtarget->hasNEON()) {
4428	static const uint16_t DOpcodes[] = {
4429	ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4430	ARM::VLD1q64wb_fixed};
4431	static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4432	ARM::VLD2q16PseudoWB_fixed,
4433	ARM::VLD2q32PseudoWB_fixed};
4434	SelectVLD(N, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4435	} else {
4436	static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4437	ARM::MVE_VLD21_8_wb};
4438	static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4439	ARM::MVE_VLD21_16_wb};
4440	static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4441	ARM::MVE_VLD21_32_wb};
4442	static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4443	SelectMVE_VLD(N, NumVecs: `2`, Opcodes, HasWriteback: true);
4444	}
4445	return;
4446	}
4447
4448	case ARMISD::VLD3_UPD: {
4449	static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4450	ARM::VLD3d16Pseudo_UPD,
4451	ARM::VLD3d32Pseudo_UPD,
4452	ARM::VLD1d64TPseudoWB_fixed};
4453	static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4454	ARM::VLD3q16Pseudo_UPD,
4455	ARM::VLD3q32Pseudo_UPD };
4456	static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4457	ARM::VLD3q16oddPseudo_UPD,
4458	ARM::VLD3q32oddPseudo_UPD };
4459	SelectVLD(N, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4460	return;
4461	}
4462
4463	case ARMISD::VLD4_UPD: {
4464	if (Subtarget->hasNEON()) {
4465	static const uint16_t DOpcodes[] = {
4466	ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4467	ARM::VLD1d64QPseudoWB_fixed};
4468	static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4469	ARM::VLD4q16Pseudo_UPD,
4470	ARM::VLD4q32Pseudo_UPD};
4471	static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4472	ARM::VLD4q16oddPseudo_UPD,
4473	ARM::VLD4q32oddPseudo_UPD};
4474	SelectVLD(N, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4475	} else {
4476	static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4477	ARM::MVE_VLD42_8,
4478	ARM::MVE_VLD43_8_wb};
4479	static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4480	ARM::MVE_VLD42_16,
4481	ARM::MVE_VLD43_16_wb};
4482	static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4483	ARM::MVE_VLD42_32,
4484	ARM::MVE_VLD43_32_wb};
4485	static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4486	SelectMVE_VLD(N, NumVecs: `4`, Opcodes, HasWriteback: true);
4487	}
4488	return;
4489	}
4490
4491	case ARMISD::VLD1x2_UPD: {
4492	if (Subtarget->hasNEON()) {
4493	static const uint16_t DOpcodes[] = {
4494	ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4495	ARM::VLD1q64wb_fixed};
4496	static const uint16_t QOpcodes[] = {
4497	ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4498	ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4499	SelectVLD(N, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4500	return;
4501	}
4502	break;
4503	}
4504
4505	case ARMISD::VLD1x3_UPD: {
4506	if (Subtarget->hasNEON()) {
4507	static const uint16_t DOpcodes[] = {
4508	ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4509	ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4510	static const uint16_t QOpcodes0[] = {
4511	ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4512	ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4513	static const uint16_t QOpcodes1[] = {
4514	ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4515	ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4516	SelectVLD(N, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4517	return;
4518	}
4519	break;
4520	}
4521
4522	case ARMISD::VLD1x4_UPD: {
4523	if (Subtarget->hasNEON()) {
4524	static const uint16_t DOpcodes[] = {
4525	ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4526	ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4527	static const uint16_t QOpcodes0[] = {
4528	ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4529	ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4530	static const uint16_t QOpcodes1[] = {
4531	ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4532	ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4533	SelectVLD(N, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4534	return;
4535	}
4536	break;
4537	}
4538
4539	case ARMISD::VLD2LN_UPD: {
4540	static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4541	ARM::VLD2LNd16Pseudo_UPD,
4542	ARM::VLD2LNd32Pseudo_UPD };
4543	static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4544	ARM::VLD2LNq32Pseudo_UPD };
4545	SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes);
4546	return;
4547	}
4548
4549	case ARMISD::VLD3LN_UPD: {
4550	static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4551	ARM::VLD3LNd16Pseudo_UPD,
4552	ARM::VLD3LNd32Pseudo_UPD };
4553	static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4554	ARM::VLD3LNq32Pseudo_UPD };
4555	SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes);
4556	return;
4557	}
4558
4559	case ARMISD::VLD4LN_UPD: {
4560	static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4561	ARM::VLD4LNd16Pseudo_UPD,
4562	ARM::VLD4LNd32Pseudo_UPD };
4563	static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4564	ARM::VLD4LNq32Pseudo_UPD };
4565	SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes);
4566	return;
4567	}
4568
4569	case ARMISD::VST1_UPD: {
4570	static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4571	ARM::VST1d16wb_fixed,
4572	ARM::VST1d32wb_fixed,
4573	ARM::VST1d64wb_fixed };
4574	static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4575	ARM::VST1q16wb_fixed,
4576	ARM::VST1q32wb_fixed,
4577	ARM::VST1q64wb_fixed };
4578	SelectVST(N, isUpdating: true, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4579	return;
4580	}
4581
4582	case ARMISD::VST2_UPD: {
4583	if (Subtarget->hasNEON()) {
4584	static const uint16_t DOpcodes[] = {
4585	ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4586	ARM::VST1q64wb_fixed};
4587	static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4588	ARM::VST2q16PseudoWB_fixed,
4589	ARM::VST2q32PseudoWB_fixed};
4590	SelectVST(N, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4591	return;
4592	}
4593	break;
4594	}
4595
4596	case ARMISD::VST3_UPD: {
4597	static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4598	ARM::VST3d16Pseudo_UPD,
4599	ARM::VST3d32Pseudo_UPD,
4600	ARM::VST1d64TPseudoWB_fixed};
4601	static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4602	ARM::VST3q16Pseudo_UPD,
4603	ARM::VST3q32Pseudo_UPD };
4604	static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4605	ARM::VST3q16oddPseudo_UPD,
4606	ARM::VST3q32oddPseudo_UPD };
4607	SelectVST(N, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4608	return;
4609	}
4610
4611	case ARMISD::VST4_UPD: {
4612	if (Subtarget->hasNEON()) {
4613	static const uint16_t DOpcodes[] = {
4614	ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4615	ARM::VST1d64QPseudoWB_fixed};
4616	static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4617	ARM::VST4q16Pseudo_UPD,
4618	ARM::VST4q32Pseudo_UPD};
4619	static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4620	ARM::VST4q16oddPseudo_UPD,
4621	ARM::VST4q32oddPseudo_UPD};
4622	SelectVST(N, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4623	return;
4624	}
4625	break;
4626	}
4627
4628	case ARMISD::VST1x2_UPD: {
4629	if (Subtarget->hasNEON()) {
4630	static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4631	ARM::VST1q16wb_fixed,
4632	ARM::VST1q32wb_fixed,
4633	ARM::VST1q64wb_fixed};
4634	static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4635	ARM::VST1d16QPseudoWB_fixed,
4636	ARM::VST1d32QPseudoWB_fixed,
4637	ARM::VST1d64QPseudoWB_fixed };
4638	SelectVST(N, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4639	return;
4640	}
4641	break;
4642	}
4643
4644	case ARMISD::VST1x3_UPD: {
4645	if (Subtarget->hasNEON()) {
4646	static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4647	ARM::VST1d16TPseudoWB_fixed,
4648	ARM::VST1d32TPseudoWB_fixed,
4649	ARM::VST1d64TPseudoWB_fixed };
4650	static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4651	ARM::VST1q16LowTPseudo_UPD,
4652	ARM::VST1q32LowTPseudo_UPD,
4653	ARM::VST1q64LowTPseudo_UPD };
4654	static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4655	ARM::VST1q16HighTPseudo_UPD,
4656	ARM::VST1q32HighTPseudo_UPD,
4657	ARM::VST1q64HighTPseudo_UPD };
4658	SelectVST(N, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4659	return;
4660	}
4661	break;
4662	}
4663
4664	case ARMISD::VST1x4_UPD: {
4665	if (Subtarget->hasNEON()) {
4666	static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4667	ARM::VST1d16QPseudoWB_fixed,
4668	ARM::VST1d32QPseudoWB_fixed,
4669	ARM::VST1d64QPseudoWB_fixed };
4670	static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4671	ARM::VST1q16LowQPseudo_UPD,
4672	ARM::VST1q32LowQPseudo_UPD,
4673	ARM::VST1q64LowQPseudo_UPD };
4674	static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4675	ARM::VST1q16HighQPseudo_UPD,
4676	ARM::VST1q32HighQPseudo_UPD,
4677	ARM::VST1q64HighQPseudo_UPD };
4678	SelectVST(N, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4679	return;
4680	}
4681	break;
4682	}
4683	case ARMISD::VST2LN_UPD: {
4684	static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4685	ARM::VST2LNd16Pseudo_UPD,
4686	ARM::VST2LNd32Pseudo_UPD };
4687	static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4688	ARM::VST2LNq32Pseudo_UPD };
4689	SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes);
4690	return;
4691	}
4692
4693	case ARMISD::VST3LN_UPD: {
4694	static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4695	ARM::VST3LNd16Pseudo_UPD,
4696	ARM::VST3LNd32Pseudo_UPD };
4697	static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4698	ARM::VST3LNq32Pseudo_UPD };
4699	SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes);
4700	return;
4701	}
4702
4703	case ARMISD::VST4LN_UPD: {
4704	static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4705	ARM::VST4LNd16Pseudo_UPD,
4706	ARM::VST4LNd32Pseudo_UPD };
4707	static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4708	ARM::VST4LNq32Pseudo_UPD };
4709	SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes);
4710	return;
4711	}
4712
4713	case ISD::INTRINSIC_VOID:
4714	case ISD::INTRINSIC_W_CHAIN: {
4715	unsigned IntNo = N->getConstantOperandVal(Num: `1`);
4716	switch (IntNo) {
4717	default:
4718	break;
4719
4720	case Intrinsic::arm_mrrc:
4721	case Intrinsic::arm_mrrc2: {
4722	SDLoc dl(N);
4723	SDValue Chain = N->getOperand(Num: `0`);
4724	unsigned Opc;
4725
4726	if (Subtarget->isThumb())
4727	Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4728	else
4729	Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4730
4731	SmallVector<SDValue, `5`> Ops;
4732	Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: `2`), dl)); / coproc /
4733	Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: `3`), dl)); / opc /
4734	Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: `4`), dl)); / CRm /
4735
4736	// The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4737	// instruction will always be '1111' but it is possible in assembly language to specify
4738	// AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4739	if (Opc != ARM::MRRC2) {
4740	Ops.push_back(Elt: getAL(CurDAG, dl));
4741	Ops.push_back(Elt: CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
4742	}
4743
4744	Ops.push_back(Elt: Chain);
4745
4746	// Writes to two registers.
4747	const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4748
4749	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: RetType, Ops));
4750	return;
4751	}
4752	case Intrinsic::arm_ldaexd:
4753	case Intrinsic::arm_ldrexd: {
4754	SDLoc dl(N);
4755	SDValue Chain = N->getOperand(Num: `0`);
4756	SDValue MemAddr = N->getOperand(Num: `2`);
4757	bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4758
4759	bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4760	unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4761	: (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4762
4763	// arm_ldrexd returns a i64 value in {i32, i32}
4764	std::vector<EVT> ResTys;
4765	if (isThumb) {
4766	ResTys.push_back(x: MVT::i32);
4767	ResTys.push_back(x: MVT::i32);
4768	} else
4769	ResTys.push_back(x: MVT::Untyped);
4770	ResTys.push_back(x: MVT::Other);
4771
4772	// Place arguments in the right order.
4773	SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4774	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), Chain};
4775	SDNode *Ld = CurDAG->getMachineNode(Opcode: NewOpc, dl, ResultTys: ResTys, Ops);
4776	// Transfer memoperands.
4777	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
4778	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
4779
4780	// Remap uses.
4781	SDValue OutChain = isThumb ? SDValue (Ld, `2`) : SDValue (Ld, `1`);
4782	if (!SDValue (N, `0`).use_empty()) {
4783	SDValue Result;
4784	if (isThumb)
4785	Result = SDValue (Ld, `0`);
4786	else {
4787	SDValue SubRegIdx =
4788	CurDAG->getTargetConstant(Val: ARM::gsub_0, DL: dl, VT: MVT::i32);
4789	SDNode *ResNode = CurDAG->getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG,
4790	dl, VT: MVT::i32, Op1: SDValue (Ld, `0`), Op2: SubRegIdx);
4791	Result = SDValue (ResNode,`0`);
4792	}
4793	ReplaceUses(F: SDValue (N, `0`), T: Result);
4794	}
4795	if (!SDValue (N, `1`).use_empty()) {
4796	SDValue Result;
4797	if (isThumb)
4798	Result = SDValue (Ld, `1`);
4799	else {
4800	SDValue SubRegIdx =
4801	CurDAG->getTargetConstant(Val: ARM::gsub_1, DL: dl, VT: MVT::i32);
4802	SDNode *ResNode = CurDAG->getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG,
4803	dl, VT: MVT::i32, Op1: SDValue (Ld, `0`), Op2: SubRegIdx);
4804	Result = SDValue (ResNode,`0`);
4805	}
4806	ReplaceUses(F: SDValue (N, `1`), T: Result);
4807	}
4808	ReplaceUses(F: SDValue (N, `2`), T: OutChain);
4809	CurDAG->RemoveDeadNode(N);
4810	return;
4811	}
4812	case Intrinsic::arm_stlexd:
4813	case Intrinsic::arm_strexd: {
4814	SDLoc dl(N);
4815	SDValue Chain = N->getOperand(Num: `0`);
4816	SDValue Val0 = N->getOperand(Num: `2`);
4817	SDValue Val1 = N->getOperand(Num: `3`);
4818	SDValue MemAddr = N->getOperand(Num: `4`);
4819
4820	// Store exclusive double return a i32 value which is the return status
4821	// of the issued store.
4822	const EVT ResTys[] = {MVT::i32, MVT::Other};
4823
4824	bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4825	// Place arguments in the right order.
4826	SmallVector<SDValue, `7`> Ops;
4827	if (isThumb) {
4828	Ops.push_back(Elt: Val0);
4829	Ops.push_back(Elt: Val1);
4830	} else
4831	// arm_strexd uses GPRPair.
4832	Ops.push_back(Elt: SDValue (createGPRPairNode(VT: MVT::Untyped, V0: Val0, V1: Val1), `0`));
4833	Ops.push_back(Elt: MemAddr);
4834	Ops.push_back(Elt: getAL(CurDAG, dl));
4835	Ops.push_back(Elt: CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
4836	Ops.push_back(Elt: Chain);
4837
4838	bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4839	unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4840	: (IsRelease ? ARM::STLEXD : ARM::STREXD);
4841
4842	SDNode *St = CurDAG->getMachineNode(Opcode: NewOpc, dl, ResultTys: ResTys, Ops);
4843	// Transfer memoperands.
4844	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
4845	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
4846
4847	ReplaceNode(F: N, T: St);
4848	return;
4849	}
4850
4851	case Intrinsic::arm_neon_vld1: {
4852	static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4853	ARM::VLD1d32, ARM::VLD1d64 };
4854	static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4855	ARM::VLD1q32, ARM::VLD1q64};
4856	SelectVLD(N, isUpdating: false, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4857	return;
4858	}
4859
4860	case Intrinsic::arm_neon_vld1x2: {
4861	static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4862	ARM::VLD1q32, ARM::VLD1q64 };
4863	static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4864	ARM::VLD1d16QPseudo,
4865	ARM::VLD1d32QPseudo,
4866	ARM::VLD1d64QPseudo };
4867	SelectVLD(N, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4868	return;
4869	}
4870
4871	case Intrinsic::arm_neon_vld1x3: {
4872	static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4873	ARM::VLD1d16TPseudo,
4874	ARM::VLD1d32TPseudo,
4875	ARM::VLD1d64TPseudo };
4876	static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4877	ARM::VLD1q16LowTPseudo_UPD,
4878	ARM::VLD1q32LowTPseudo_UPD,
4879	ARM::VLD1q64LowTPseudo_UPD };
4880	static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4881	ARM::VLD1q16HighTPseudo,
4882	ARM::VLD1q32HighTPseudo,
4883	ARM::VLD1q64HighTPseudo };
4884	SelectVLD(N, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4885	return;
4886	}
4887
4888	case Intrinsic::arm_neon_vld1x4: {
4889	static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4890	ARM::VLD1d16QPseudo,
4891	ARM::VLD1d32QPseudo,
4892	ARM::VLD1d64QPseudo };
4893	static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4894	ARM::VLD1q16LowQPseudo_UPD,
4895	ARM::VLD1q32LowQPseudo_UPD,
4896	ARM::VLD1q64LowQPseudo_UPD };
4897	static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4898	ARM::VLD1q16HighQPseudo,
4899	ARM::VLD1q32HighQPseudo,
4900	ARM::VLD1q64HighQPseudo };
4901	SelectVLD(N, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4902	return;
4903	}
4904
4905	case Intrinsic::arm_neon_vld2: {
4906	static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4907	ARM::VLD2d32, ARM::VLD1q64 };
4908	static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4909	ARM::VLD2q32Pseudo };
4910	SelectVLD(N, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4911	return;
4912	}
4913
4914	case Intrinsic::arm_neon_vld3: {
4915	static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4916	ARM::VLD3d16Pseudo,
4917	ARM::VLD3d32Pseudo,
4918	ARM::VLD1d64TPseudo };
4919	static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4920	ARM::VLD3q16Pseudo_UPD,
4921	ARM::VLD3q32Pseudo_UPD };
4922	static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4923	ARM::VLD3q16oddPseudo,
4924	ARM::VLD3q32oddPseudo };
4925	SelectVLD(N, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4926	return;
4927	}
4928
4929	case Intrinsic::arm_neon_vld4: {
4930	static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4931	ARM::VLD4d16Pseudo,
4932	ARM::VLD4d32Pseudo,
4933	ARM::VLD1d64QPseudo };
4934	static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4935	ARM::VLD4q16Pseudo_UPD,
4936	ARM::VLD4q32Pseudo_UPD };
4937	static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4938	ARM::VLD4q16oddPseudo,
4939	ARM::VLD4q32oddPseudo };
4940	SelectVLD(N, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4941	return;
4942	}
4943
4944	case Intrinsic::arm_neon_vld2dup: {
4945	static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4946	ARM::VLD2DUPd32, ARM::VLD1q64 };
4947	static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4948	ARM::VLD2DUPq16EvenPseudo,
4949	ARM::VLD2DUPq32EvenPseudo };
4950	static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4951	ARM::VLD2DUPq16OddPseudo,
4952	ARM::VLD2DUPq32OddPseudo };
4953	SelectVLDDup(N, / IsIntrinsic= / true, isUpdating: false, NumVecs: `2`,
4954	DOpcodes, QOpcodes0, QOpcodes1);
4955	return;
4956	}
4957
4958	case Intrinsic::arm_neon_vld3dup: {
4959	static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4960	ARM::VLD3DUPd16Pseudo,
4961	ARM::VLD3DUPd32Pseudo,
4962	ARM::VLD1d64TPseudo };
4963	static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4964	ARM::VLD3DUPq16EvenPseudo,
4965	ARM::VLD3DUPq32EvenPseudo };
4966	static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4967	ARM::VLD3DUPq16OddPseudo,
4968	ARM::VLD3DUPq32OddPseudo };
4969	SelectVLDDup(N, / IsIntrinsic= / true, isUpdating: false, NumVecs: `3`,
4970	DOpcodes, QOpcodes0, QOpcodes1);
4971	return;
4972	}
4973
4974	case Intrinsic::arm_neon_vld4dup: {
4975	static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4976	ARM::VLD4DUPd16Pseudo,
4977	ARM::VLD4DUPd32Pseudo,
4978	ARM::VLD1d64QPseudo };
4979	static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4980	ARM::VLD4DUPq16EvenPseudo,
4981	ARM::VLD4DUPq32EvenPseudo };
4982	static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4983	ARM::VLD4DUPq16OddPseudo,
4984	ARM::VLD4DUPq32OddPseudo };
4985	SelectVLDDup(N, / IsIntrinsic= / true, isUpdating: false, NumVecs: `4`,
4986	DOpcodes, QOpcodes0, QOpcodes1);
4987	return;
4988	}
4989
4990	case Intrinsic::arm_neon_vld2lane: {
4991	static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4992	ARM::VLD2LNd16Pseudo,
4993	ARM::VLD2LNd32Pseudo };
4994	static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4995	ARM::VLD2LNq32Pseudo };
4996	SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes);
4997	return;
4998	}
4999
5000	case Intrinsic::arm_neon_vld3lane: {
5001	static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
5002	ARM::VLD3LNd16Pseudo,
5003	ARM::VLD3LNd32Pseudo };
5004	static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
5005	ARM::VLD3LNq32Pseudo };
5006	SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes);
5007	return;
5008	}
5009
5010	case Intrinsic::arm_neon_vld4lane: {
5011	static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
5012	ARM::VLD4LNd16Pseudo,
5013	ARM::VLD4LNd32Pseudo };
5014	static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
5015	ARM::VLD4LNq32Pseudo };
5016	SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes);
5017	return;
5018	}
5019
5020	case Intrinsic::arm_neon_vst1: {
5021	static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
5022	ARM::VST1d32, ARM::VST1d64 };
5023	static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5024	ARM::VST1q32, ARM::VST1q64 };
5025	SelectVST(N, isUpdating: false, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
5026	return;
5027	}
5028
5029	case Intrinsic::arm_neon_vst1x2: {
5030	static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5031	ARM::VST1q32, ARM::VST1q64 };
5032	static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
5033	ARM::VST1d16QPseudo,
5034	ARM::VST1d32QPseudo,
5035	ARM::VST1d64QPseudo };
5036	SelectVST(N, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
5037	return;
5038	}
5039
5040	case Intrinsic::arm_neon_vst1x3: {
5041	static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
5042	ARM::VST1d16TPseudo,
5043	ARM::VST1d32TPseudo,
5044	ARM::VST1d64TPseudo };
5045	static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
5046	ARM::VST1q16LowTPseudo_UPD,
5047	ARM::VST1q32LowTPseudo_UPD,
5048	ARM::VST1q64LowTPseudo_UPD };
5049	static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
5050	ARM::VST1q16HighTPseudo,
5051	ARM::VST1q32HighTPseudo,
5052	ARM::VST1q64HighTPseudo };
5053	SelectVST(N, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
5054	return;
5055	}
5056
5057	case Intrinsic::arm_neon_vst1x4: {
5058	static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
5059	ARM::VST1d16QPseudo,
5060	ARM::VST1d32QPseudo,
5061	ARM::VST1d64QPseudo };
5062	static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
5063	ARM::VST1q16LowQPseudo_UPD,
5064	ARM::VST1q32LowQPseudo_UPD,
5065	ARM::VST1q64LowQPseudo_UPD };
5066	static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
5067	ARM::VST1q16HighQPseudo,
5068	ARM::VST1q32HighQPseudo,
5069	ARM::VST1q64HighQPseudo };
5070	SelectVST(N, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
5071	return;
5072	}
5073
5074	case Intrinsic::arm_neon_vst2: {
5075	static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
5076	ARM::VST2d32, ARM::VST1q64 };
5077	static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
5078	ARM::VST2q32Pseudo };
5079	SelectVST(N, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
5080	return;
5081	}
5082
5083	case Intrinsic::arm_neon_vst3: {
5084	static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
5085	ARM::VST3d16Pseudo,
5086	ARM::VST3d32Pseudo,
5087	ARM::VST1d64TPseudo };
5088	static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
5089	ARM::VST3q16Pseudo_UPD,
5090	ARM::VST3q32Pseudo_UPD };
5091	static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
5092	ARM::VST3q16oddPseudo,
5093	ARM::VST3q32oddPseudo };
5094	SelectVST(N, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
5095	return;
5096	}
5097
5098	case Intrinsic::arm_neon_vst4: {
5099	static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5100	ARM::VST4d16Pseudo,
5101	ARM::VST4d32Pseudo,
5102	ARM::VST1d64QPseudo };
5103	static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5104	ARM::VST4q16Pseudo_UPD,
5105	ARM::VST4q32Pseudo_UPD };
5106	static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5107	ARM::VST4q16oddPseudo,
5108	ARM::VST4q32oddPseudo };
5109	SelectVST(N, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
5110	return;
5111	}
5112
5113	case Intrinsic::arm_neon_vst2lane: {
5114	static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5115	ARM::VST2LNd16Pseudo,
5116	ARM::VST2LNd32Pseudo };
5117	static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5118	ARM::VST2LNq32Pseudo };
5119	SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes);
5120	return;
5121	}
5122
5123	case Intrinsic::arm_neon_vst3lane: {
5124	static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5125	ARM::VST3LNd16Pseudo,
5126	ARM::VST3LNd32Pseudo };
5127	static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5128	ARM::VST3LNq32Pseudo };
5129	SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes);
5130	return;
5131	}
5132
5133	case Intrinsic::arm_neon_vst4lane: {
5134	static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5135	ARM::VST4LNd16Pseudo,
5136	ARM::VST4LNd32Pseudo };
5137	static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5138	ARM::VST4LNq32Pseudo };
5139	SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes);
5140	return;
5141	}
5142
5143	case Intrinsic::arm_mve_vldr_gather_base_wb:
5144	case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5145	static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5146	ARM::MVE_VLDRDU64_qi_pre};
5147	SelectMVE_WB(N, Opcodes,
5148	Predicated: IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5149	return;
5150	}
5151
5152	case Intrinsic::arm_mve_vld2q: {
5153	static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5154	static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5155	ARM::MVE_VLD21_16};
5156	static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5157	ARM::MVE_VLD21_32};
5158	static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5159	SelectMVE_VLD(N, NumVecs: `2`, Opcodes, HasWriteback: false);
5160	return;
5161	}
5162
5163	case Intrinsic::arm_mve_vld4q: {
5164	static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5165	ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5166	static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5167	ARM::MVE_VLD42_16,
5168	ARM::MVE_VLD43_16};
5169	static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5170	ARM::MVE_VLD42_32,
5171	ARM::MVE_VLD43_32};
5172	static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5173	SelectMVE_VLD(N, NumVecs: `4`, Opcodes, HasWriteback: false);
5174	return;
5175	}
5176	}
5177	break;
5178	}
5179
5180	case ISD::INTRINSIC_WO_CHAIN: {
5181	unsigned IntNo = N->getConstantOperandVal(Num: `0`);
5182	switch (IntNo) {
5183	default:
5184	break;
5185
5186	// Scalar f32 -> bf16
5187	case Intrinsic::arm_neon_vcvtbfp2bf: {
5188	SDLoc dl(N);
5189	const SDValue &Src = N->getOperand(Num: `1`);
5190	llvm::EVT DestTy = N->getValueType(ResNo: `0`);
5191	SDValue Pred = getAL(CurDAG, dl);
5192	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
5193	SDValue Ops[] = { Src, Src, Pred, Reg0 };
5194	CurDAG->SelectNodeTo(N, MachineOpc: ARM::BF16_VCVTB, VT: DestTy, Ops);
5195	return;
5196	}
5197
5198	// Vector v4f32 -> v4bf16
5199	case Intrinsic::arm_neon_vcvtfp2bf: {
5200	SDLoc dl(N);
5201	const SDValue &Src = N->getOperand(Num: `1`);
5202	SDValue Pred = getAL(CurDAG, dl);
5203	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
5204	SDValue Ops[] = { Src, Pred, Reg0 };
5205	CurDAG->SelectNodeTo(N, MachineOpc: ARM::BF16_VCVT, VT: MVT::v4bf16, Ops);
5206	return;
5207	}
5208
5209	case Intrinsic::arm_mve_urshrl:
5210	SelectMVE_LongShift(N, Opcode: ARM::MVE_URSHRL, Immediate: true, HasSaturationOperand: false);
5211	return;
5212	case Intrinsic::arm_mve_uqshll:
5213	SelectMVE_LongShift(N, Opcode: ARM::MVE_UQSHLL, Immediate: true, HasSaturationOperand: false);
5214	return;
5215	case Intrinsic::arm_mve_srshrl:
5216	SelectMVE_LongShift(N, Opcode: ARM::MVE_SRSHRL, Immediate: true, HasSaturationOperand: false);
5217	return;
5218	case Intrinsic::arm_mve_sqshll:
5219	SelectMVE_LongShift(N, Opcode: ARM::MVE_SQSHLL, Immediate: true, HasSaturationOperand: false);
5220	return;
5221	case Intrinsic::arm_mve_uqrshll:
5222	SelectMVE_LongShift(N, Opcode: ARM::MVE_UQRSHLL, Immediate: false, HasSaturationOperand: true);
5223	return;
5224	case Intrinsic::arm_mve_sqrshrl:
5225	SelectMVE_LongShift(N, Opcode: ARM::MVE_SQRSHRL, Immediate: false, HasSaturationOperand: true);
5226	return;
5227
5228	case Intrinsic::arm_mve_vadc:
5229	case Intrinsic::arm_mve_vadc_predicated:
5230	SelectMVE_VADCSBC(N, OpcodeWithCarry: ARM::MVE_VADC, OpcodeWithNoCarry: ARM::MVE_VADCI, Add: true,
5231	Predicated: IntNo == Intrinsic::arm_mve_vadc_predicated);
5232	return;
5233	case Intrinsic::arm_mve_vsbc:
5234	case Intrinsic::arm_mve_vsbc_predicated:
5235	SelectMVE_VADCSBC(N, OpcodeWithCarry: ARM::MVE_VSBC, OpcodeWithNoCarry: ARM::MVE_VSBCI, Add: true,
5236	Predicated: IntNo == Intrinsic::arm_mve_vsbc_predicated);
5237	return;
5238	case Intrinsic::arm_mve_vshlc:
5239	case Intrinsic::arm_mve_vshlc_predicated:
5240	SelectMVE_VSHLC(N, Predicated: IntNo == Intrinsic::arm_mve_vshlc_predicated);
5241	return;
5242
5243	case Intrinsic::arm_mve_vmlldava:
5244	case Intrinsic::arm_mve_vmlldava_predicated: {
5245	static const uint16_t OpcodesU[] = {
5246	ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
5247	ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
5248	};
5249	static const uint16_t OpcodesS[] = {
5250	ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
5251	ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
5252	ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
5253	ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5254	ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
5255	ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
5256	ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
5257	ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5258	};
5259	SelectMVE_VMLLDAV(N, Predicated: IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5260	OpcodesS, OpcodesU);
5261	return;
5262	}
5263
5264	case Intrinsic::arm_mve_vrmlldavha:
5265	case Intrinsic::arm_mve_vrmlldavha_predicated: {
5266	static const uint16_t OpcodesU[] = {
5267	ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
5268	};
5269	static const uint16_t OpcodesS[] = {
5270	ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
5271	ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5272	ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
5273	ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5274	};
5275	SelectMVE_VRMLLDAVH(N, Predicated: IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5276	OpcodesS, OpcodesU);
5277	return;
5278	}
5279
5280	case Intrinsic::arm_mve_vidup:
5281	case Intrinsic::arm_mve_vidup_predicated: {
5282	static const uint16_t Opcodes[] = {
5283	ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5284	};
5285	SelectMVE_VxDUP(N, Opcodes, Wrapping: false,
5286	Predicated: IntNo == Intrinsic::arm_mve_vidup_predicated);
5287	return;
5288	}
5289
5290	case Intrinsic::arm_mve_vddup:
5291	case Intrinsic::arm_mve_vddup_predicated: {
5292	static const uint16_t Opcodes[] = {
5293	ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5294	};
5295	SelectMVE_VxDUP(N, Opcodes, Wrapping: false,
5296	Predicated: IntNo == Intrinsic::arm_mve_vddup_predicated);
5297	return;
5298	}
5299
5300	case Intrinsic::arm_mve_viwdup:
5301	case Intrinsic::arm_mve_viwdup_predicated: {
5302	static const uint16_t Opcodes[] = {
5303	ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5304	};
5305	SelectMVE_VxDUP(N, Opcodes, Wrapping: true,
5306	Predicated: IntNo == Intrinsic::arm_mve_viwdup_predicated);
5307	return;
5308	}
5309
5310	case Intrinsic::arm_mve_vdwdup:
5311	case Intrinsic::arm_mve_vdwdup_predicated: {
5312	static const uint16_t Opcodes[] = {
5313	ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5314	};
5315	SelectMVE_VxDUP(N, Opcodes, Wrapping: true,
5316	Predicated: IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5317	return;
5318	}
5319
5320	case Intrinsic::arm_cde_cx1d:
5321	case Intrinsic::arm_cde_cx1da:
5322	case Intrinsic::arm_cde_cx2d:
5323	case Intrinsic::arm_cde_cx2da:
5324	case Intrinsic::arm_cde_cx3d:
5325	case Intrinsic::arm_cde_cx3da: {
5326	bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da \|\|
5327	IntNo == Intrinsic::arm_cde_cx2da \|\|
5328	IntNo == Intrinsic::arm_cde_cx3da;
5329	size_t NumExtraOps;
5330	uint16_t Opcode;
5331	switch (IntNo) {
5332	case Intrinsic::arm_cde_cx1d:
5333	case Intrinsic::arm_cde_cx1da:
5334	NumExtraOps = `0`;
5335	Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5336	break;
5337	case Intrinsic::arm_cde_cx2d:
5338	case Intrinsic::arm_cde_cx2da:
5339	NumExtraOps = `1`;
5340	Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5341	break;
5342	case Intrinsic::arm_cde_cx3d:
5343	case Intrinsic::arm_cde_cx3da:
5344	NumExtraOps = `2`;
5345	Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5346	break;
5347	default:
5348	llvm_unreachable("Unexpected opcode");
5349	}
5350	SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5351	return;
5352	}
5353	}
5354	break;
5355	}
5356
5357	case ISD::ATOMIC_CMP_SWAP:
5358	SelectCMP_SWAP(N);
5359	return;
5360	}
5361
5362	SelectCode(N);
5363	}
5364
5365	// Inspect a register string of the form
5366	// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5367	// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5368	// and obtain the integer operands from them, adding these operands to the
5369	// provided vector.
5370	static void getIntOperandsFromRegisterString(StringRef RegString,
5371	SelectionDAG *CurDAG,
5372	const SDLoc &DL,
5373	std::vector<SDValue> &Ops) {
5374	SmallVector<StringRef, `5`> Fields;
5375	RegString.split(A&: Fields, Separator: `':'`);
5376
5377	if (Fields.size() > `1`) {
5378	bool AllIntFields = true;
5379
5380	for (StringRef Field : Fields) {
5381	// Need to trim out leading 'cp' characters and get the integer field.
5382	unsigned IntField;
5383	AllIntFields &= !Field.trim(Chars: "CPcp").getAsInteger(Radix: `10`, Result&: IntField);
5384	Ops.push_back(x: CurDAG->getTargetConstant(Val: IntField, DL, VT: MVT::i32));
5385	}
5386
5387	assert(AllIntFields &&
5388	"Unexpected non-integer value in special register string.");
5389	(void)AllIntFields;
5390	}
5391	}
5392
5393	// Maps a Banked Register string to its mask value. The mask value returned is
5394	// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5395	// mask operand, which expresses which register is to be used, e.g. r8, and in
5396	// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5397	// was invalid.
5398	static inline int getBankedRegisterMask(StringRef RegString) {
5399	auto TheReg = ARMBankedReg::lookupBankedRegByName(Name: RegString.lower());
5400	if (!TheReg)
5401	return -`1`;
5402	return TheReg->Encoding;
5403	}
5404
5405	// The flags here are common to those allowed for apsr in the A class cores and
5406	// those allowed for the special registers in the M class cores. Returns a
5407	// value representing which flags were present, -1 if invalid.
5408	static inline int getMClassFlagsMask(StringRef Flags) {
5409	return StringSwitch<int>(Flags)
5410	.Case(S: "", Value: `0x2`) // no flags means nzcvq for psr registers, and 0x2 is
5411	// correct when flags are not permitted
5412	.Case(S: "g", Value: `0x1`)
5413	.Case(S: "nzcvq", Value: `0x2`)
5414	.Case(S: "nzcvqg", Value: `0x3`)
5415	.Default(Value: -`1`);
5416	}
5417
5418	// Maps MClass special registers string to its value for use in the
5419	// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5420	// Returns -1 to signify that the string was invalid.
5421	static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5422	auto TheReg = ARMSysReg::lookupMClassSysRegByName(Name: Reg);
5423	const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5424	if (!TheReg \|\| !TheReg->hasRequiredFeatures(ActiveFeatures: FeatureBits))
5425	return -`1`;
5426	return (int)(TheReg->Encoding & `0xFFF`); // SYSm value
5427	}
5428
5429	static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
5430	// The mask operand contains the special register (R Bit) in bit 4, whether
5431	// the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5432	// bits 3-0 contains the fields to be accessed in the special register, set by
5433	// the flags provided with the register.
5434	int Mask = `0`;
5435	if (Reg == "apsr") {
5436	// The flags permitted for apsr are the same flags that are allowed in
5437	// M class registers. We get the flag value and then shift the flags into
5438	// the correct place to combine with the mask.
5439	Mask = getMClassFlagsMask(Flags);
5440	if (Mask == -`1`)
5441	return -`1`;
5442	return Mask << `2`;
5443	}
5444
5445	if (Reg != "cpsr" && Reg != "spsr") {
5446	return -`1`;
5447	}
5448
5449	// This is the same as if the flags were "fc"
5450	if (Flags.empty() \|\| Flags == "all")
5451	return Mask \| `0x9`;
5452
5453	// Inspect the supplied flags string and set the bits in the mask for
5454	// the relevant and valid flags allowed for cpsr and spsr.
5455	for (char Flag : Flags) {
5456	int FlagVal;
5457	switch (Flag) {
5458	case `'c'`:
5459	FlagVal = `0x1`;
5460	break;
5461	case `'x'`:
5462	FlagVal = `0x2`;
5463	break;
5464	case `'s'`:
5465	FlagVal = `0x4`;
5466	break;
5467	case `'f'`:
5468	FlagVal = `0x8`;
5469	break;
5470	default:
5471	FlagVal = `0`;
5472	}
5473
5474	// This avoids allowing strings where the same flag bit appears twice.
5475	if (!FlagVal \|\| (Mask & FlagVal))
5476	return -`1`;
5477	Mask \|= FlagVal;
5478	}
5479
5480	// If the register is spsr then we need to set the R bit.
5481	if (Reg == "spsr")
5482	Mask \|= `0x10`;
5483
5484	return Mask;
5485	}
5486
5487	// Lower the read_register intrinsic to ARM specific DAG nodes
5488	// using the supplied metadata string to select the instruction node to use
5489	// and the registers/masks to construct as operands for the node.
5490	bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5491	const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: `1`));
5492	const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: `0`));
5493	bool IsThumb2 = Subtarget->isThumb2();
5494	SDLoc DL(N);
5495
5496	std::vector<SDValue> Ops;
5497	getIntOperandsFromRegisterString(RegString: RegString->getString(), CurDAG, DL, Ops);
5498
5499	if (!Ops.empty()) {
5500	// If the special register string was constructed of fields (as defined
5501	// in the ACLE) then need to lower to MRC node (32 bit) or
5502	// MRRC node(64 bit), we can make the distinction based on the number of
5503	// operands we have.
5504	unsigned Opcode;
5505	SmallVector<EVT, `3`> ResTypes;
5506	if (Ops.size() == `5`){
5507	Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5508	ResTypes.append(IL: { MVT::i32, MVT::Other });
5509	} else {
5510	assert(Ops.size() == `3` &&
5511	"Invalid number of fields in special register string.");
5512	Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5513	ResTypes.append(IL: { MVT::i32, MVT::i32, MVT::Other });
5514	}
5515
5516	Ops.push_back(x: getAL(CurDAG, dl: DL));
5517	Ops.push_back(x: CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
5518	Ops.push_back(x: N->getOperand(Num: `0`));
5519	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, ResultTys: ResTypes, Ops));
5520	return true;
5521	}
5522
5523	std::string SpecialReg = RegString->getString().lower();
5524
5525	int BankedReg = getBankedRegisterMask(RegString: SpecialReg);
5526	if (BankedReg != -`1`) {
5527	Ops = { CurDAG->getTargetConstant(Val: BankedReg, DL, VT: MVT::i32),
5528	getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5529	N->getOperand(Num: `0`) };
5530	ReplaceNode(
5531	F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5532	dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5533	return true;
5534	}
5535
5536	// The VFP registers are read by creating SelectionDAG nodes with opcodes
5537	// corresponding to the register that is being read from. So we switch on the
5538	// string to find which opcode we need to use.
5539	unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5540	.Case(S: "fpscr", Value: ARM::VMRS)
5541	.Case(S: "fpexc", Value: ARM::VMRS_FPEXC)
5542	.Case(S: "fpsid", Value: ARM::VMRS_FPSID)
5543	.Case(S: "mvfr0", Value: ARM::VMRS_MVFR0)
5544	.Case(S: "mvfr1", Value: ARM::VMRS_MVFR1)
5545	.Case(S: "mvfr2", Value: ARM::VMRS_MVFR2)
5546	.Case(S: "fpinst", Value: ARM::VMRS_FPINST)
5547	.Case(S: "fpinst2", Value: ARM::VMRS_FPINST2)
5548	.Default(Value: `0`);
5549
5550	// If an opcode was found then we can lower the read to a VFP instruction.
5551	if (Opcode) {
5552	if (!Subtarget->hasVFP2Base())
5553	return false;
5554	if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5555	return false;
5556
5557	Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5558	N->getOperand(Num: `0`) };
5559	ReplaceNode(F: N,
5560	T: CurDAG->getMachineNode(Opcode, dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5561	return true;
5562	}
5563
5564	// If the target is M Class then need to validate that the register string
5565	// is an acceptable value, so check that a mask can be constructed from the
5566	// string.
5567	if (Subtarget->isMClass()) {
5568	int SYSmValue = getMClassRegisterMask(Reg: SpecialReg, Subtarget);
5569	if (SYSmValue == -`1`)
5570	return false;
5571
5572	SDValue Ops[] = { CurDAG->getTargetConstant(Val: SYSmValue, DL, VT: MVT::i32),
5573	getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5574	N->getOperand(Num: `0`) };
5575	ReplaceNode(
5576	F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2MRS_M, dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5577	return true;
5578	}
5579
5580	// Here we know the target is not M Class so we need to check if it is one
5581	// of the remaining possible values which are apsr, cpsr or spsr.
5582	if (SpecialReg == "apsr" \|\| SpecialReg == "cpsr") {
5583	Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5584	N->getOperand(Num: `0`) };
5585	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5586	dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5587	return true;
5588	}
5589
5590	if (SpecialReg == "spsr") {
5591	Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5592	N->getOperand(Num: `0`) };
5593	ReplaceNode(
5594	F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, dl: DL,
5595	VT1: MVT::i32, VT2: MVT::Other, Ops));
5596	return true;
5597	}
5598
5599	return false;
5600	}
5601
5602	// Lower the write_register intrinsic to ARM specific DAG nodes
5603	// using the supplied metadata string to select the instruction node to use
5604	// and the registers/masks to use in the nodes
5605	bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5606	const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: `1`));
5607	const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: `0`));
5608	bool IsThumb2 = Subtarget->isThumb2();
5609	SDLoc DL(N);
5610
5611	std::vector<SDValue> Ops;
5612	getIntOperandsFromRegisterString(RegString: RegString->getString(), CurDAG, DL, Ops);
5613
5614	if (!Ops.empty()) {
5615	// If the special register string was constructed of fields (as defined
5616	// in the ACLE) then need to lower to MCR node (32 bit) or
5617	// MCRR node(64 bit), we can make the distinction based on the number of
5618	// operands we have.
5619	unsigned Opcode;
5620	if (Ops.size() == `5`) {
5621	Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5622	Ops.insert(position: Ops.begin()+`2`, x: N->getOperand(Num: `2`));
5623	} else {
5624	assert(Ops.size() == `3` &&
5625	"Invalid number of fields in special register string.");
5626	Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5627	SDValue WriteValue[] = { N->getOperand(Num: `2`), N->getOperand(Num: `3`) };
5628	Ops.insert(position: Ops.begin()+`2`, first: WriteValue, last: WriteValue+`2`);
5629	}
5630
5631	Ops.push_back(x: getAL(CurDAG, dl: DL));
5632	Ops.push_back(x: CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
5633	Ops.push_back(x: N->getOperand(Num: `0`));
5634
5635	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Other, Ops));
5636	return true;
5637	}
5638
5639	std::string SpecialReg = RegString->getString().lower();
5640	int BankedReg = getBankedRegisterMask(RegString: SpecialReg);
5641	if (BankedReg != -`1`) {
5642	Ops = { CurDAG->getTargetConstant(Val: BankedReg, DL, VT: MVT::i32), N->getOperand(Num: `2`),
5643	getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5644	N->getOperand(Num: `0`) };
5645	ReplaceNode(
5646	F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5647	dl: DL, VT: MVT::Other, Ops));
5648	return true;
5649	}
5650
5651	// The VFP registers are written to by creating SelectionDAG nodes with
5652	// opcodes corresponding to the register that is being written. So we switch
5653	// on the string to find which opcode we need to use.
5654	unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5655	.Case(S: "fpscr", Value: ARM::VMSR)
5656	.Case(S: "fpexc", Value: ARM::VMSR_FPEXC)
5657	.Case(S: "fpsid", Value: ARM::VMSR_FPSID)
5658	.Case(S: "fpinst", Value: ARM::VMSR_FPINST)
5659	.Case(S: "fpinst2", Value: ARM::VMSR_FPINST2)
5660	.Default(Value: `0`);
5661
5662	if (Opcode) {
5663	if (!Subtarget->hasVFP2Base())
5664	return false;
5665	Ops = { N->getOperand(Num: `2`), getAL(CurDAG, dl: DL),
5666	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), N->getOperand(Num: `0`) };
5667	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Other, Ops));
5668	return true;
5669	}
5670
5671	std::pair<StringRef, StringRef> Fields;
5672	Fields = StringRef (SpecialReg).rsplit(Separator: `'_'`);
5673	std::string Reg = Fields.first.str();
5674	StringRef Flags = Fields.second;
5675
5676	// If the target was M Class then need to validate the special register value
5677	// and retrieve the mask for use in the instruction node.
5678	if (Subtarget->isMClass()) {
5679	int SYSmValue = getMClassRegisterMask(Reg: SpecialReg, Subtarget);
5680	if (SYSmValue == -`1`)
5681	return false;
5682
5683	SDValue Ops[] = { CurDAG->getTargetConstant(Val: SYSmValue, DL, VT: MVT::i32),
5684	N->getOperand(Num: `2`), getAL(CurDAG, dl: DL),
5685	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), N->getOperand(Num: `0`) };
5686	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2MSR_M, dl: DL, VT: MVT::Other, Ops));
5687	return true;
5688	}
5689
5690	// We then check to see if a valid mask can be constructed for one of the
5691	// register string values permitted for the A and R class cores. These values
5692	// are apsr, spsr and cpsr; these are also valid on older cores.
5693	int Mask = getARClassRegisterMask(Reg, Flags);
5694	if (Mask != -`1`) {
5695	Ops = { CurDAG->getTargetConstant(Val: Mask, DL, VT: MVT::i32), N->getOperand(Num: `2`),
5696	getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5697	N->getOperand(Num: `0`) };
5698	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5699	dl: DL, VT: MVT::Other, Ops));
5700	return true;
5701	}
5702
5703	return false;
5704	}
5705
5706	bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5707	std::vector<SDValue> AsmNodeOperands;
5708	InlineAsm::Flag Flag;
5709	bool Changed = false;
5710	unsigned NumOps = N->getNumOperands();
5711
5712	// Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5713	// However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5714	// (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5715	// respectively. Since there is no constraint to explicitly specify a
5716	// reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5717	// the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5718	// them into a GPRPair.
5719
5720	SDLoc dl(N);
5721	SDValue Glue = N->getGluedNode() ? N->getOperand(Num: NumOps - `1`) : SDValue ();
5722
5723	SmallVector<bool, `8`> OpChanged;
5724	// Glue node will be appended late.
5725	for(unsigned i = `0`, e = N->getGluedNode() ? NumOps - `1` : NumOps; i < e; ++i) {
5726	SDValue op = N->getOperand(Num: i);
5727	AsmNodeOperands.push_back(x: op);
5728
5729	if (i < InlineAsm::Op_FirstOperand)
5730	continue;
5731
5732	if (const auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: i)))
5733	Flag = InlineAsm::Flag (C->getZExtValue());
5734	else
5735	continue;
5736
5737	// Immediate operands to inline asm in the SelectionDAG are modeled with
5738	// two operands. The first is a constant of value InlineAsm::Kind::Imm, and
5739	// the second is a constant with the value of the immediate. If we get here
5740	// and we have a Kind::Imm, skip the next operand, and continue.
5741	if (Flag.isImmKind()) {
5742	SDValue op = N->getOperand(Num: ++i);
5743	AsmNodeOperands.push_back(x: op);
5744	continue;
5745	}
5746
5747	const unsigned NumRegs = Flag.getNumOperandRegisters();
5748	if (NumRegs)
5749	OpChanged.push_back(Elt: false);
5750
5751	unsigned DefIdx = `0`;
5752	bool IsTiedToChangedOp = false;
5753	// If it's a use that is tied with a previous def, it has no
5754	// reg class constraint.
5755	if (Changed && Flag.isUseOperandTiedToDef(Idx&: DefIdx))
5756	IsTiedToChangedOp = OpChanged [DefIdx];
5757
5758	// Memory operands to inline asm in the SelectionDAG are modeled with two
5759	// operands: a constant of value InlineAsm::Kind::Mem followed by the input
5760	// operand. If we get here and we have a Kind::Mem, skip the next operand
5761	// (so it doesn't get misinterpreted), and continue. We do this here because
5762	// it's important to update the OpChanged array correctly before moving on.
5763	if (Flag.isMemKind()) {
5764	SDValue op = N->getOperand(Num: ++i);
5765	AsmNodeOperands.push_back(x: op);
5766	continue;
5767	}
5768
5769	if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
5770	!Flag.isRegDefEarlyClobberKind())
5771	continue;
5772
5773	unsigned RC;
5774	const bool HasRC = Flag.hasRegClassConstraint(RC);
5775	if ((!IsTiedToChangedOp && (!HasRC \|\| RC != ARM::GPRRegClassID))
5776	\|\| NumRegs != `2`)
5777	continue;
5778
5779	assert((i+`2` < NumOps) && "Invalid number of operands in inline asm");
5780	SDValue V0 = N->getOperand(Num: i+`1`);
5781	SDValue V1 = N->getOperand(Num: i+`2`);
5782	Register Reg0 = cast<RegisterSDNode>(Val&: V0)->getReg();
5783	Register Reg1 = cast<RegisterSDNode>(Val&: V1)->getReg();
5784	SDValue PairedReg;
5785	MachineRegisterInfo &MRI = MF->getRegInfo();
5786
5787	if (Flag.isRegDefKind() \|\| Flag.isRegDefEarlyClobberKind()) {
5788	// Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5789	// the original GPRs.
5790
5791	Register GPVR = MRI.createVirtualRegister(RegClass: &ARM::GPRPairRegClass);
5792	PairedReg = CurDAG->getRegister(Reg: GPVR, VT: MVT::Untyped);
5793	SDValue Chain = SDValue (N,`0`);
5794
5795	SDNode *GU = N->getGluedUser();
5796	SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, Reg: GPVR, VT: MVT::Untyped,
5797	Glue: Chain.getValue(R: `1`));
5798
5799	// Extract values from a GPRPair reg and copy to the original GPR reg.
5800	SDValue Sub0 = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_0, DL: dl, VT: MVT::i32,
5801	Operand: RegCopy);
5802	SDValue Sub1 = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_1, DL: dl, VT: MVT::i32,
5803	Operand: RegCopy);
5804	SDValue T0 = CurDAG->getCopyToReg(Chain: Sub0, dl, Reg: Reg0, N: Sub0,
5805	Glue: RegCopy.getValue(R: `1`));
5806	SDValue T1 = CurDAG->getCopyToReg(Chain: Sub1, dl, Reg: Reg1, N: Sub1, Glue: T0.getValue(R: `1`));
5807
5808	// Update the original glue user.
5809	std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-`1`);
5810	Ops.push_back(x: T1.getValue(R: `1`));
5811	CurDAG->UpdateNodeOperands(N: GU, Ops);
5812	} else {
5813	// For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
5814	// GPRPair and then pass the GPRPair to the inline asm.
5815	SDValue Chain = AsmNodeOperands [InlineAsm::Op_InputChain];
5816
5817	// As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5818	SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg: Reg0, VT: MVT::i32,
5819	Glue: Chain.getValue(R: `1`));
5820	SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg: Reg1, VT: MVT::i32,
5821	Glue: T0.getValue(R: `1`));
5822	SDValue Pair = SDValue (createGPRPairNode(VT: MVT::Untyped, V0: T0, V1: T1), `0`);
5823
5824	// Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5825	// i32 VRs of inline asm with it.
5826	Register GPVR = MRI.createVirtualRegister(RegClass: &ARM::GPRPairRegClass);
5827	PairedReg = CurDAG->getRegister(Reg: GPVR, VT: MVT::Untyped);
5828	Chain = CurDAG->getCopyToReg(Chain: T1, dl, Reg: GPVR, N: Pair, Glue: T1.getValue(R: `1`));
5829
5830	AsmNodeOperands [InlineAsm::Op_InputChain] = Chain;
5831	Glue = Chain.getValue(R: `1`);
5832	}
5833
5834	Changed = true;
5835
5836	if(PairedReg.getNode()) {
5837	OpChanged [OpChanged.size() -`1` ] = true;
5838	Flag = InlineAsm::Flag (Flag.getKind(), `1` / RegNum/);
5839	if (IsTiedToChangedOp)
5840	Flag.setMatchingOp(DefIdx);
5841	else
5842	Flag.setRegClass(ARM::GPRPairRegClassID);
5843	// Replace the current flag.
5844	AsmNodeOperands [AsmNodeOperands.size() -`1`] = CurDAG->getTargetConstant(
5845	Val: Flag, DL: dl, VT: MVT::i32);
5846	// Add the new register node and skip the original two GPRs.
5847	AsmNodeOperands.push_back(x: PairedReg);
5848	// Skip the next two GPRs.
5849	i += `2`;
5850	}
5851	}
5852
5853	if (Glue.getNode())
5854	AsmNodeOperands.push_back(x: Glue);
5855	if (!Changed)
5856	return false;
5857
5858	SDValue New = CurDAG->getNode(Opcode: N->getOpcode(), DL: SDLoc (N),
5859	VTList: CurDAG->getVTList(VT1: MVT::Other, VT2: MVT::Glue), Ops: AsmNodeOperands);
5860	New ->setNodeId(-`1`);
5861	ReplaceNode(F: N, T: New.getNode());
5862	return true;
5863	}
5864
5865	bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
5866	const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
5867	std::vector<SDValue> &OutOps) {
5868	switch(ConstraintID) {
5869	default:
5870	llvm_unreachable("Unexpected asm memory constraint");
5871	case InlineAsm::ConstraintCode::m:
5872	case InlineAsm::ConstraintCode::o:
5873	case InlineAsm::ConstraintCode::Q:
5874	case InlineAsm::ConstraintCode::Um:
5875	case InlineAsm::ConstraintCode::Un:
5876	case InlineAsm::ConstraintCode::Uq:
5877	case InlineAsm::ConstraintCode::Us:
5878	case InlineAsm::ConstraintCode::Ut:
5879	case InlineAsm::ConstraintCode::Uv:
5880	case InlineAsm::ConstraintCode::Uy:
5881	// Require the address to be in a register. That is safe for all ARM
5882	// variants and it is hard to do anything much smarter without knowing
5883	// how the operand is used.
5884	OutOps.push_back(x: Op);
5885	return false;
5886	}
5887	return true;
5888	}
5889
5890	/// createARMISelDag - This pass converts a legalized DAG into a
5891	/// ARM-specific DAG, ready for instruction scheduling.
5892	///
5893	FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
5894	CodeGenOptLevel OptLevel) {
5895	return new ARMDAGToDAGISelLegacy (TM, OptLevel);
5896	}
5897

Browse the source code of llvm_projects/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp