ARMISelDAGToDAG.cpp source code [llvm_projects/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp]

1	//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines an instruction selector for the ARM target.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "ARM.h"
14	#include "ARMBaseInstrInfo.h"
15	#include "ARMTargetMachine.h"
16	#include "MCTargetDesc/ARMAddressingModes.h"
17	#include "Utils/ARMBaseInfo.h"
18	#include "llvm/ADT/APSInt.h"
19	#include "llvm/ADT/StringSwitch.h"
20	#include "llvm/CodeGen/MachineFrameInfo.h"
21	#include "llvm/CodeGen/MachineFunction.h"
22	#include "llvm/CodeGen/MachineInstrBuilder.h"
23	#include "llvm/CodeGen/MachineRegisterInfo.h"
24	#include "llvm/CodeGen/SelectionDAG.h"
25	#include "llvm/CodeGen/SelectionDAGISel.h"
26	#include "llvm/CodeGen/TargetLowering.h"
27	#include "llvm/IR/Constants.h"
28	#include "llvm/IR/DerivedTypes.h"
29	#include "llvm/IR/Function.h"
30	#include "llvm/IR/Intrinsics.h"
31	#include "llvm/IR/IntrinsicsARM.h"
32	#include "llvm/IR/LLVMContext.h"
33	#include "llvm/Support/CommandLine.h"
34	#include "llvm/Support/ErrorHandling.h"
35	#include "llvm/Target/TargetOptions.h"
36	#include <optional>
37
38	using namespace llvm;
39
40	#define DEBUG_TYPE "arm-isel"
41	#define PASS_NAME "ARM Instruction Selection"
42
43	static cl::opt<bool>
44	DisableShifterOp("disable-shifter-op", cl::Hidden,
45	cl::desc ("Disable isel of shifter-op"),
46	cl::init(Val: false));
47
48	//===--------------------------------------------------------------------===//
49	/// ARMDAGToDAGISel - ARM specific code to select ARM machine
50	/// instructions for SelectionDAG operations.
51	///
52	namespace {
53
54	class ARMDAGToDAGISel : public SelectionDAGISel {
55	/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
56	/// make the right decision when generating code for different targets.
57	const ARMSubtarget *Subtarget;
58
59	public:
60	ARMDAGToDAGISel() = delete;
61
62	explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
63	: SelectionDAGISel (tm, OptLevel) {}
64
65	bool runOnMachineFunction(MachineFunction &MF) override {
66	// Reset the subtarget each time through.
67	Subtarget = &MF.getSubtarget<ARMSubtarget>();
68	SelectionDAGISel::runOnMachineFunction(mf&: MF);
69	return true;
70	}
71
72	void PreprocessISelDAG() override;
73
74	/// getI32Imm - Return a target constant of type i32 with the specified
75	/// value.
76	inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
77	return CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32);
78	}
79
80	void Select(SDNode *N) override;
81
82	/// Return true as some complex patterns, like those that call
83	/// canExtractShiftFromMul can modify the DAG inplace.
84	bool ComplexPatternFuncMutatesDAG() const override { return true; }
85
86	bool hasNoVMLxHazardUse(SDNode N) const*;
87	bool isShifterOpProfitable(const SDValue &Shift,
88	ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
89	bool SelectRegShifterOperand(SDValue N, SDValue &A,
90	SDValue &B, SDValue &C,
91	bool CheckProfitability = true);
92	bool SelectImmShifterOperand(SDValue N, SDValue &A,
93	SDValue &B, bool CheckProfitability = true);
94	bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
95	SDValue &C) {
96	// Don't apply the profitability check
97	return SelectRegShifterOperand(N, A, B, C, CheckProfitability: false);
98	}
99	bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
100	// Don't apply the profitability check
101	return SelectImmShifterOperand(N, A, B, CheckProfitability: false);
102	}
103	bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
104	if (!N.hasOneUse())
105	return false;
106	return SelectImmShifterOperand(N, A, B, CheckProfitability: false);
107	}
108
109	bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
110
111	bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
112	bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
113
114	bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
115	SDValue &Offset, SDValue &Opc);
116	bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
117	SDValue &Offset, SDValue &Opc);
118	bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
119	SDValue &Offset, SDValue &Opc);
120	bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
121	bool SelectAddrMode3(SDValue N, SDValue &Base,
122	SDValue &Offset, SDValue &Opc);
123	bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
124	SDValue &Offset, SDValue &Opc);
125	bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
126	bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
127	bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
128	bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
129	bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
130
131	bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
132
133	// Thumb Addressing Modes:
134	bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
135	bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
136	bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
137	SDValue &OffImm);
138	bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
139	SDValue &OffImm);
140	bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
141	SDValue &OffImm);
142	bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
143	SDValue &OffImm);
144	bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
145	template <unsigned Shift>
146	bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
147
148	// Thumb 2 Addressing Modes:
149	bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
150	template <unsigned Shift>
151	bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
152	bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
153	SDValue &OffImm);
154	bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
155	SDValue &OffImm);
156	template <unsigned Shift>
157	bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
158	bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
159	unsigned Shift);
160	template <unsigned Shift>
161	bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
162	bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
163	SDValue &OffReg, SDValue &ShImm);
164	bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
165
166	template<int Min, int Max>
167	bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
168
169	inline bool is_so_imm(unsigned Imm) const {
170	return ARM_AM::getSOImmVal(Arg: Imm) != -`1`;
171	}
172
173	inline bool is_so_imm_not(unsigned Imm) const {
174	return ARM_AM::getSOImmVal(Arg: ~Imm) != -`1`;
175	}
176
177	inline bool is_t2_so_imm(unsigned Imm) const {
178	return ARM_AM::getT2SOImmVal(Arg: Imm) != -`1`;
179	}
180
181	inline bool is_t2_so_imm_not(unsigned Imm) const {
182	return ARM_AM::getT2SOImmVal(Arg: ~Imm) != -`1`;
183	}
184
185	// Include the pieces autogenerated from the target description.
186	#include "ARMGenDAGISel.inc"
187
188	private:
189	void transferMemOperands(SDNode Src, SDNode Dst);
190
191	/// Indexed (pre/post inc/dec) load matching code for ARM.
192	bool tryARMIndexedLoad(SDNode *N);
193	bool tryT1IndexedLoad(SDNode *N);
194	bool tryT2IndexedLoad(SDNode *N);
195	bool tryMVEIndexedLoad(SDNode *N);
196	bool tryFMULFixed(SDNode *N, SDLoc dl);
197	bool tryFP_TO_INT(SDNode *N, SDLoc dl);
198	bool transformFixedFloatingPointConversion(SDNode N, SDNode FMul,
199	bool IsUnsigned,
200	bool FixedToFloat);
201
202	/// SelectVLD - Select NEON load intrinsics. NumVecs should be
203	/// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
204	/// loads of D registers and even subregs and odd subregs of Q registers.
205	/// For NumVecs <= 2, QOpcodes1 is not used.
206	void SelectVLD(SDNode N, bool* isUpdating, unsigned NumVecs,
207	const uint16_t DOpcodes, const* uint16_t *QOpcodes0,
208	const uint16_t *QOpcodes1);
209
210	/// SelectVST - Select NEON store intrinsics. NumVecs should
211	/// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
212	/// stores of D registers and even subregs and odd subregs of Q registers.
213	/// For NumVecs <= 2, QOpcodes1 is not used.
214	void SelectVST(SDNode N, bool* isUpdating, unsigned NumVecs,
215	const uint16_t DOpcodes, const* uint16_t *QOpcodes0,
216	const uint16_t *QOpcodes1);
217
218	/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
219	/// be 2, 3 or 4. The opcode arrays specify the instructions used for
220	/// load/store of D registers and Q registers.
221	void SelectVLDSTLane(SDNode N, bool* IsLoad, bool isUpdating,
222	unsigned NumVecs, const uint16_t *DOpcodes,
223	const uint16_t *QOpcodes);
224
225	/// Helper functions for setting up clusters of MVE predication operands.
226	template <typename SDValueVector>
227	void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
228	SDValue PredicateMask);
229	template <typename SDValueVector>
230	void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
231	SDValue PredicateMask, SDValue Inactive);
232
233	template <typename SDValueVector>
234	void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
235	template <typename SDValueVector>
236	void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
237
238	/// SelectMVE_WB - Select MVE writeback load/store intrinsics.
239	void SelectMVE_WB(SDNode N, const* uint16_t Opcodes, bool* Predicated);
240
241	/// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
242	void SelectMVE_LongShift(SDNode N, uint16_t Opcode, bool* Immediate,
243	bool HasSaturationOperand);
244
245	/// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
246	void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
247	uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
248
249	/// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
250	/// vector lanes.
251	void SelectMVE_VSHLC(SDNode N, bool* Predicated);
252
253	/// Select long MVE vector reductions with two vector operands
254	/// Stride is the number of vector element widths the instruction can operate
255	/// on:
256	/// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
257	/// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
258	/// Stride is used when addressing the OpcodesS array which contains multiple
259	/// opcodes for each element width.
260	/// TySize is the index into the list of element types listed above
261	void SelectBaseMVE_VMLLDAV(SDNode N, bool* Predicated,
262	const uint16_t OpcodesS, const* uint16_t *OpcodesU,
263	size_t Stride, size_t TySize);
264
265	/// Select a 64-bit MVE vector reduction with two vector operands
266	/// arm_mve_vmlldava_[predicated]
267	void SelectMVE_VMLLDAV(SDNode N, bool* Predicated, const uint16_t *OpcodesS,
268	const uint16_t *OpcodesU);
269	/// Select a 72-bit MVE vector rounding reduction with two vector operands
270	/// int_arm_mve_vrmlldavha[_predicated]
271	void SelectMVE_VRMLLDAVH(SDNode N, bool* Predicated, const uint16_t *OpcodesS,
272	const uint16_t *OpcodesU);
273
274	/// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
275	/// should be 2 or 4. The opcode array specifies the instructions
276	/// used for 8, 16 and 32-bit lane sizes respectively, and each
277	/// pointer points to a set of NumVecs sub-opcodes used for the
278	/// different stages (e.g. VLD20 versus VLD21) of each load family.
279	void SelectMVE_VLD(SDNode N, unsigned* NumVecs,
280	const uint16_t *const Opcodes, bool* HasWriteback);
281
282	/// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
283	/// array of 3 elements for the 8, 16 and 32-bit lane sizes.
284	void SelectMVE_VxDUP(SDNode N, const* uint16_t *Opcodes,
285	bool Wrapping, bool Predicated);
286
287	/// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
288	/// CX1DA, CX2D, CX2DA, CX3, CX3DA).
289	/// \arg \c NumExtraOps number of extra operands besides the coprocossor,
290	/// the accumulator and the immediate operand, i.e. 0
291	/// for CX1, 1 for CX2, 2 for CX3*
292	/// \arg \c HasAccum whether the instruction has an accumulator operand
293	void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
294	bool HasAccum);
295
296	/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
297	/// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
298	/// for loading D registers.
299	void SelectVLDDup(SDNode N, bool* IsIntrinsic, bool isUpdating,
300	unsigned NumVecs, const uint16_t *DOpcodes,
301	const uint16_t QOpcodes0 = nullptr*,
302	const uint16_t QOpcodes1 = nullptr*);
303
304	/// Try to select SBFX/UBFX instructions for ARM.
305	bool tryV6T2BitfieldExtractOp(SDNode N, bool* isSigned);
306
307	bool tryInsertVectorElt(SDNode *N);
308
309	// Select special operations if node forms integer ABS pattern
310	bool tryABSOp(SDNode *N);
311
312	bool tryReadRegister(SDNode *N);
313	bool tryWriteRegister(SDNode *N);
314
315	bool tryInlineAsm(SDNode *N);
316
317	void SelectCMPZ(SDNode N, bool* &SwitchEQNEToPLMI);
318
319	void SelectCMP_SWAP(SDNode *N);
320
321	/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
322	/// inline asm expressions.
323	bool SelectInlineAsmMemoryOperand(const SDValue &Op,
324	InlineAsm::ConstraintCode ConstraintID,
325	std::vector<SDValue> &OutOps) override;
326
327	// Form pairs of consecutive R, S, D, or Q registers.
328	SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
329	SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
330	SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
331	SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
332
333	// Form sequences of 4 consecutive S, D, or Q registers.
334	SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
335	SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
336	SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
337
338	// Get the alignment operand for a NEON VLD or VST instruction.
339	SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
340	bool is64BitVector);
341
342	/// Checks if N is a multiplication by a constant where we can extract out a
343	/// power of two from the constant so that it can be used in a shift, but only
344	/// if it simplifies the materialization of the constant. Returns true if it
345	/// is, and assigns to PowerOfTwo the power of two that should be extracted
346	/// out and to NewMulConst the new constant to be multiplied by.
347	bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
348	unsigned &PowerOfTwo, SDValue &NewMulConst) const;
349
350	/// Replace N with M in CurDAG, in a way that also ensures that M gets
351	/// selected when N would have been selected.
352	void replaceDAGValue(const SDValue &N, SDValue M);
353	};
354
355	class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
356	public:
357	static char ID;
358	ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
359	: SelectionDAGISelLegacy (
360	ID, std::make_unique<ARMDAGToDAGISel>(args&: tm, args&: OptLevel)) {}
361	};
362	}
363
364	char ARMDAGToDAGISelLegacy::ID = `0`;
365
366	INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
367
368	/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
369	/// operand. If so Imm will receive the 32-bit value.
370	static bool isInt32Immediate(SDNode N, unsigned* &Imm) {
371	if (N->getOpcode() == ISD::Constant && N->getValueType(ResNo: `0`) == MVT::i32) {
372	Imm = N->getAsZExtVal();
373	return true;
374	}
375	return false;
376	}
377
378	// isInt32Immediate - This method tests to see if a constant operand.
379	// If so Imm will receive the 32 bit value.
380	static bool isInt32Immediate(SDValue N, unsigned &Imm) {
381	return isInt32Immediate(N: N.getNode(), Imm);
382	}
383
384	// isOpcWithIntImmediate - This method tests to see if the node is a specific
385	// opcode and that it has a immediate integer right operand.
386	// If so Imm will receive the 32 bit value.
387	static bool isOpcWithIntImmediate(SDNode N, unsigned* Opc, unsigned& Imm) {
388	return N->getOpcode() == Opc &&
389	isInt32Immediate(N: N->getOperand(Num: `1`).getNode(), Imm);
390	}
391
392	/// Check whether a particular node is a constant value representable as
393	/// (N Scale) where (N in [\p RangeMin, \p RangeMax).*
394	///
395	/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
396	static bool isScaledConstantInRange(SDValue Node, int Scale,
397	int RangeMin, int RangeMax,
398	int &ScaledConstant) {
399	assert(Scale > `0` && "Invalid scale!");
400
401	// Check that this is a constant.
402	const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: Node);
403	if (!C)
404	return false;
405
406	ScaledConstant = (int) C->getZExtValue();
407	if ((ScaledConstant % Scale) != `0`)
408	return false;
409
410	ScaledConstant /= Scale;
411	return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
412	}
413
414	void ARMDAGToDAGISel::PreprocessISelDAG() {
415	if (!Subtarget->hasV6T2Ops())
416	return;
417
418	bool isThumb2 = Subtarget->isThumb();
419	// We use make_early_inc_range to avoid invalidation issues.
420	for (SDNode &N : llvm::make_early_inc_range(Range: CurDAG->allnodes())) {
421	if (N.getOpcode() != ISD::ADD)
422	continue;
423
424	// Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
425	// leading zeros, followed by consecutive set bits, followed by 1 or 2
426	// trailing zeros, e.g. 1020.
427	// Transform the expression to
428	// (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
429	// of trailing zeros of c2. The left shift would be folded as an shifter
430	// operand of 'add' and the 'and' and 'srl' would become a bits extraction
431	// node (UBFX).
432
433	SDValue N0 = N.getOperand(Num: `0`);
434	SDValue N1 = N.getOperand(Num: `1`);
435	unsigned And_imm = `0`;
436	if (!isOpcWithIntImmediate(N: N1.getNode(), Opc: ISD::AND, Imm&: And_imm)) {
437	if (isOpcWithIntImmediate(N: N0.getNode(), Opc: ISD::AND, Imm&: And_imm))
438	std::swap(a&: N0, b&: N1);
439	}
440	if (!And_imm)
441	continue;
442
443	// Check if the AND mask is an immediate of the form: 000.....1111111100
444	unsigned TZ = llvm::countr_zero(Val: And_imm);
445	if (TZ != `1` && TZ != `2`)
446	// Be conservative here. Shifter operands aren't always free. e.g. On
447	// Swift, left shifter operand of 1 / 2 for free but others are not.
448	// e.g.
449	// ubfx r3, r1, #16, #8
450	// ldr.w r3, [r0, r3, lsl #2]
451	// vs.
452	// mov.w r9, #1020
453	// and.w r2, r9, r1, lsr #14
454	// ldr r2, [r0, r2]
455	continue;
456	And_imm >>= TZ;
457	if (And_imm & (And_imm + `1`))
458	continue;
459
460	// Look for (and (srl X, c1), c2).
461	SDValue Srl = N1.getOperand(i: `0`);
462	unsigned Srl_imm = `0`;
463	if (!isOpcWithIntImmediate(N: Srl.getNode(), Opc: ISD::SRL, Imm&: Srl_imm) \|\|
464	(Srl_imm <= `2`))
465	continue;
466
467	// Make sure first operand is not a shifter operand which would prevent
468	// folding of the left shift.
469	SDValue CPTmp0;
470	SDValue CPTmp1;
471	SDValue CPTmp2;
472	if (isThumb2) {
473	if (SelectImmShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1))
474	continue;
475	} else {
476	if (SelectImmShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1) \|\|
477	SelectRegShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1, C&: CPTmp2))
478	continue;
479	}
480
481	// Now make the transformation.
482	Srl = CurDAG->getNode(Opcode: ISD::SRL, DL: SDLoc (Srl), VT: MVT::i32,
483	N1: Srl.getOperand(i: `0`),
484	N2: CurDAG->getConstant(Val: Srl_imm + TZ, DL: SDLoc (Srl),
485	VT: MVT::i32));
486	N1 = CurDAG->getNode(Opcode: ISD::AND, DL: SDLoc (N1), VT: MVT::i32,
487	N1: Srl,
488	N2: CurDAG->getConstant(Val: And_imm, DL: SDLoc (Srl), VT: MVT::i32));
489	N1 = CurDAG->getNode(Opcode: ISD::SHL, DL: SDLoc (N1), VT: MVT::i32,
490	N1, N2: CurDAG->getConstant(Val: TZ, DL: SDLoc (Srl), VT: MVT::i32));
491	CurDAG->UpdateNodeOperands(N: &N, Op1: N0, Op2: N1);
492	}
493	}
494
495	/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
496	/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
497	/// least on current ARM implementations) which should be avoidded.
498	bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode N) const* {
499	if (OptLevel == CodeGenOptLevel::None)
500	return true;
501
502	if (!Subtarget->hasVMLxHazards())
503	return true;
504
505	if (!N->hasOneUse())
506	return false;
507
508	SDNode User = N->user_begin();
509	if (User->getOpcode() == ISD::CopyToReg)
510	return true;
511	if (User->isMachineOpcode()) {
512	const ARMBaseInstrInfo TII = static_cast<const* ARMBaseInstrInfo *>(
513	CurDAG->getSubtarget().getInstrInfo());
514
515	const MCInstrDesc &MCID = TII->get(Opcode: User->getMachineOpcode());
516	if (MCID.mayStore())
517	return true;
518	unsigned Opcode = MCID.getOpcode();
519	if (Opcode == ARM::VMOVRS \|\| Opcode == ARM::VMOVRRD)
520	return true;
521	// vmlx feeding into another vmlx. We actually want to unfold
522	// the use later in the MLxExpansion pass. e.g.
523	// vmla
524	// vmla (stall 8 cycles)
525	//
526	// vmul (5 cycles)
527	// vadd (5 cycles)
528	// vmla
529	// This adds up to about 18 - 19 cycles.
530	//
531	// vmla
532	// vmul (stall 4 cycles)
533	// vadd adds up to about 14 cycles.
534	return TII->isFpMLxInstruction(Opcode);
535	}
536
537	return false;
538	}
539
540	bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
541	ARM_AM::ShiftOpc ShOpcVal,
542	unsigned ShAmt) {
543	if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
544	return true;
545	if (Shift.hasOneUse())
546	return true;
547	// R << 2 is free.
548	return ShOpcVal == ARM_AM::lsl &&
549	(ShAmt == `2` \|\| (Subtarget->isSwift() && ShAmt == `1`));
550	}
551
552	bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
553	unsigned MaxShift,
554	unsigned &PowerOfTwo,
555	SDValue &NewMulConst) const {
556	assert(N.getOpcode() == ISD::MUL);
557	assert(MaxShift > `0`);
558
559	// If the multiply is used in more than one place then changing the constant
560	// will make other uses incorrect, so don't.
561	if (!N.hasOneUse()) return false;
562	// Check if the multiply is by a constant
563	ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
564	if (!MulConst) return false;
565	// If the constant is used in more than one place then modifying it will mean
566	// we need to materialize two constants instead of one, which is a bad idea.
567	if (!MulConst->hasOneUse()) return false;
568	unsigned MulConstVal = MulConst->getZExtValue();
569	if (MulConstVal == `0`) return false;
570
571	// Find the largest power of 2 that MulConstVal is a multiple of
572	PowerOfTwo = MaxShift;
573	while ((MulConstVal % (`1` << PowerOfTwo)) != `0`) {
574	--PowerOfTwo;
575	if (PowerOfTwo == `0`) return false;
576	}
577
578	// Only optimise if the new cost is better
579	unsigned NewMulConstVal = MulConstVal / (`1` << PowerOfTwo);
580	NewMulConst = CurDAG->getConstant(Val: NewMulConstVal, DL: SDLoc (N), VT: MVT::i32);
581	unsigned OldCost = ConstantMaterializationCost(Val: MulConstVal, Subtarget);
582	unsigned NewCost = ConstantMaterializationCost(Val: NewMulConstVal, Subtarget);
583	return NewCost < OldCost;
584	}
585
586	void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
587	CurDAG->RepositionNode(Position: N.getNode()->getIterator(), N: M.getNode());
588	ReplaceUses(F: N, T: M);
589	}
590
591	bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
592	SDValue &BaseReg,
593	SDValue &Opc,
594	bool CheckProfitability) {
595	if (DisableShifterOp)
596	return false;
597
598	// If N is a multiply-by-constant and it's profitable to extract a shift and
599	// use it in a shifted operand do so.
600	if (N.getOpcode() == ISD::MUL) {
601	unsigned PowerOfTwo = `0`;
602	SDValue NewMulConst;
603	if (canExtractShiftFromMul(N, MaxShift: `31`, PowerOfTwo, NewMulConst)) {
604	HandleSDNode Handle(N);
605	SDLoc Loc(N);
606	replaceDAGValue(N: N.getOperand(i: `1`), M: NewMulConst);
607	BaseReg = Handle.getValue();
608	Opc = CurDAG->getTargetConstant(
609	Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: PowerOfTwo), DL: Loc, VT: MVT::i32);
610	return true;
611	}
612	}
613
614	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
615
616	// Don't match base register only case. That is matched to a separate
617	// lower complexity pattern with explicit register operand.
618	if (ShOpcVal == ARM_AM::no_shift) return false;
619
620	BaseReg = N.getOperand(i: `0`);
621	unsigned ShImmVal = `0`;
622	ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
623	if (!RHS) return false;
624	ShImmVal = RHS->getZExtValue() & `31`;
625	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: ShImmVal),
626	DL: SDLoc (N), VT: MVT::i32);
627	return true;
628	}
629
630	bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
631	SDValue &BaseReg,
632	SDValue &ShReg,
633	SDValue &Opc,
634	bool CheckProfitability) {
635	if (DisableShifterOp)
636	return false;
637
638	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
639
640	// Don't match base register only case. That is matched to a separate
641	// lower complexity pattern with explicit register operand.
642	if (ShOpcVal == ARM_AM::no_shift) return false;
643
644	BaseReg = N.getOperand(i: `0`);
645	unsigned ShImmVal = `0`;
646	ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
647	if (RHS) return false;
648
649	ShReg = N.getOperand(i: `1`);
650	if (CheckProfitability && !isShifterOpProfitable(Shift: N, ShOpcVal, ShAmt: ShImmVal))
651	return false;
652	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: ShImmVal),
653	DL: SDLoc (N), VT: MVT::i32);
654	return true;
655	}
656
657	// Determine whether an ISD::OR's operands are suitable to turn the operation
658	// into an addition, which often has more compact encodings.
659	bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
660	assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
661	Out = N;
662	return CurDAG->haveNoCommonBitsSet(A: N, B: Parent->getOperand(Num: `1`));
663	}
664
665
666	bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
667	SDValue &Base,
668	SDValue &OffImm) {
669	// Match simple R + imm12 operands.
670
671	// Base only.
672	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
673	!CurDAG->isBaseWithConstantOffset(Op: N)) {
674	if (N.getOpcode() == ISD::FrameIndex) {
675	// Match frame index.
676	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
677	Base = CurDAG->getTargetFrameIndex(
678	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
679	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
680	return true;
681	}
682
683	if (N.getOpcode() == ARMISD::Wrapper &&
684	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalAddress &&
685	N.getOperand(i: `0`).getOpcode() != ISD::TargetExternalSymbol &&
686	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalTLSAddress) {
687	Base = N.getOperand(i: `0`);
688	} else
689	Base = N;
690	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
691	return true;
692	}
693
694	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
695	int RHSC = (int)RHS->getSExtValue();
696	if (N.getOpcode() == ISD::SUB)
697	RHSC = -RHSC;
698
699	if (RHSC > -`0x1000` && RHSC < `0x1000`) { // 12 bits
700	Base = N.getOperand(i: `0`);
701	if (Base.getOpcode() == ISD::FrameIndex) {
702	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
703	Base = CurDAG->getTargetFrameIndex(
704	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
705	}
706	OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32);
707	return true;
708	}
709	}
710
711	// Base only.
712	Base = N;
713	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
714	return true;
715	}
716
717
718
719	bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
720	SDValue &Opc) {
721	if (N.getOpcode() == ISD::MUL &&
722	((!Subtarget->isLikeA9() && !Subtarget->isSwift()) \|\| N.hasOneUse())) {
723	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
724	// X [3,5,9] -> X + X * [2,4,8] etc.*
725	int RHSC = (int)RHS->getZExtValue();
726	if (RHSC & `1`) {
727	RHSC = RHSC & ~`1`;
728	ARM_AM::AddrOpc AddSub = ARM_AM::add;
729	if (RHSC < `0`) {
730	AddSub = ARM_AM::sub;
731	RHSC = - RHSC;
732	}
733	if (isPowerOf2_32(Value: RHSC)) {
734	unsigned ShAmt = Log2_32(Value: RHSC);
735	Base = Offset = N.getOperand(i: `0`);
736	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt,
737	SO: ARM_AM::lsl),
738	DL: SDLoc (N), VT: MVT::i32);
739	return true;
740	}
741	}
742	}
743	}
744
745	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
746	// ISD::OR that is equivalent to an ISD::ADD.
747	!CurDAG->isBaseWithConstantOffset(Op: N))
748	return false;
749
750	// Leave simple R +/- imm12 operands for LDRi12
751	if (N.getOpcode() == ISD::ADD \|\| N.getOpcode() == ISD::OR) {
752	int RHSC;
753	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), /Scale=/`1`,
754	RangeMin: -`0x1000`+`1`, RangeMax: `0x1000`, ScaledConstant&: RHSC)) // 12 bits.
755	return false;
756	}
757
758	// Otherwise this is R +/- [possibly shifted] R.
759	ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
760	ARM_AM::ShiftOpc ShOpcVal =
761	ARM_AM::getShiftOpcForNode(Opcode: N.getOperand(i: `1`).getOpcode());
762	unsigned ShAmt = `0`;
763
764	Base = N.getOperand(i: `0`);
765	Offset = N.getOperand(i: `1`);
766
767	if (ShOpcVal != ARM_AM::no_shift) {
768	// Check to see if the RHS of the shift is a constant, if not, we can't fold
769	// it.
770	if (ConstantSDNode *Sh =
771	dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`).getOperand(i: `1`))) {
772	ShAmt = Sh->getZExtValue();
773	if (isShifterOpProfitable(Shift: Offset, ShOpcVal, ShAmt))
774	Offset = N.getOperand(i: `1`).getOperand(i: `0`);
775	else {
776	ShAmt = `0`;
777	ShOpcVal = ARM_AM::no_shift;
778	}
779	} else {
780	ShOpcVal = ARM_AM::no_shift;
781	}
782	}
783
784	// Try matching (R shl C) + (R).
785	if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
786	!(Subtarget->isLikeA9() \|\| Subtarget->isSwift() \|\|
787	N.getOperand(i: `0`).hasOneUse())) {
788	ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOperand(i: `0`).getOpcode());
789	if (ShOpcVal != ARM_AM::no_shift) {
790	// Check to see if the RHS of the shift is a constant, if not, we can't
791	// fold it.
792	if (ConstantSDNode *Sh =
793	dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `0`).getOperand(i: `1`))) {
794	ShAmt = Sh->getZExtValue();
795	if (isShifterOpProfitable(Shift: N.getOperand(i: `0`), ShOpcVal, ShAmt)) {
796	Offset = N.getOperand(i: `0`).getOperand(i: `0`);
797	Base = N.getOperand(i: `1`);
798	} else {
799	ShAmt = `0`;
800	ShOpcVal = ARM_AM::no_shift;
801	}
802	} else {
803	ShOpcVal = ARM_AM::no_shift;
804	}
805	}
806	}
807
808	// If Offset is a multiply-by-constant and it's profitable to extract a shift
809	// and use it in a shifted operand do so.
810	if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
811	unsigned PowerOfTwo = `0`;
812	SDValue NewMulConst;
813	if (canExtractShiftFromMul(N: Offset, MaxShift: `31`, PowerOfTwo, NewMulConst)) {
814	HandleSDNode Handle(Offset);
815	replaceDAGValue(N: Offset.getOperand(i: `1`), M: NewMulConst);
816	Offset = Handle.getValue();
817	ShAmt = PowerOfTwo;
818	ShOpcVal = ARM_AM::lsl;
819	}
820	}
821
822	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt, SO: ShOpcVal),
823	DL: SDLoc (N), VT: MVT::i32);
824	return true;
825	}
826
827	bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
828	SDValue &Offset, SDValue &Opc) {
829	unsigned Opcode = Op->getOpcode();
830	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
831	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
832	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
833	ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC)
834	? ARM_AM::add : ARM_AM::sub;
835	int Val;
836	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `0x1000`, ScaledConstant&: Val))
837	return false;
838
839	Offset = N;
840	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
841	unsigned ShAmt = `0`;
842	if (ShOpcVal != ARM_AM::no_shift) {
843	// Check to see if the RHS of the shift is a constant, if not, we can't fold
844	// it.
845	if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
846	ShAmt = Sh->getZExtValue();
847	if (isShifterOpProfitable(Shift: N, ShOpcVal, ShAmt))
848	Offset = N.getOperand(i: `0`);
849	else {
850	ShAmt = `0`;
851	ShOpcVal = ARM_AM::no_shift;
852	}
853	} else {
854	ShOpcVal = ARM_AM::no_shift;
855	}
856	}
857
858	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt, SO: ShOpcVal),
859	DL: SDLoc (N), VT: MVT::i32);
860	return true;
861	}
862
863	bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
864	SDValue &Offset, SDValue &Opc) {
865	unsigned Opcode = Op->getOpcode();
866	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
867	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
868	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
869	ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC)
870	? ARM_AM::add : ARM_AM::sub;
871	int Val;
872	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `0x1000`, ScaledConstant&: Val)) { // 12 bits.
873	if (AddSub == ARM_AM::sub) Val *= -`1`;
874	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
875	Opc = CurDAG->getSignedTargetConstant(Val, DL: SDLoc (Op), VT: MVT::i32);
876	return true;
877	}
878
879	return false;
880	}
881
882
883	bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
884	SDValue &Offset, SDValue &Opc) {
885	unsigned Opcode = Op->getOpcode();
886	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
887	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
888	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
889	ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC)
890	? ARM_AM::add : ARM_AM::sub;
891	int Val;
892	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `0x1000`, ScaledConstant&: Val)) { // 12 bits.
893	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
894	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: Val,
895	SO: ARM_AM::no_shift),
896	DL: SDLoc (Op), VT: MVT::i32);
897	return true;
898	}
899
900	return false;
901	}
902
903	bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
904	Base = N;
905	return true;
906	}
907
908	bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
909	SDValue &Base, SDValue &Offset,
910	SDValue &Opc) {
911	if (N.getOpcode() == ISD::SUB) {
912	// X - C is canonicalize to X + -C, no need to handle it here.
913	Base = N.getOperand(i: `0`);
914	Offset = N.getOperand(i: `1`);
915	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::sub, Offset: `0`), DL: SDLoc (N),
916	VT: MVT::i32);
917	return true;
918	}
919
920	if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
921	Base = N;
922	if (N.getOpcode() == ISD::FrameIndex) {
923	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
924	Base = CurDAG->getTargetFrameIndex(
925	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
926	}
927	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
928	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::add, Offset: `0`), DL: SDLoc (N),
929	VT: MVT::i32);
930	return true;
931	}
932
933	// If the RHS is +/- imm8, fold into addr mode.
934	int RHSC;
935	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), /Scale=/`1`,
936	RangeMin: -`256` + `1`, RangeMax: `256`, ScaledConstant&: RHSC)) { // 8 bits.
937	Base = N.getOperand(i: `0`);
938	if (Base.getOpcode() == ISD::FrameIndex) {
939	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
940	Base = CurDAG->getTargetFrameIndex(
941	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
942	}
943	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
944
945	ARM_AM::AddrOpc AddSub = ARM_AM::add;
946	if (RHSC < `0`) {
947	AddSub = ARM_AM::sub;
948	RHSC = -RHSC;
949	}
950	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: RHSC), DL: SDLoc (N),
951	VT: MVT::i32);
952	return true;
953	}
954
955	Base = N.getOperand(i: `0`);
956	Offset = N.getOperand(i: `1`);
957	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::add, Offset: `0`), DL: SDLoc (N),
958	VT: MVT::i32);
959	return true;
960	}
961
962	bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
963	SDValue &Offset, SDValue &Opc) {
964	unsigned Opcode = Op->getOpcode();
965	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
966	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
967	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
968	ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC)
969	? ARM_AM::add : ARM_AM::sub;
970	int Val;
971	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `256`, ScaledConstant&: Val)) { // 12 bits.
972	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
973	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: Val), DL: SDLoc (Op),
974	VT: MVT::i32);
975	return true;
976	}
977
978	Offset = N;
979	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: `0`), DL: SDLoc (Op),
980	VT: MVT::i32);
981	return true;
982	}
983
984	bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
985	bool FP16) {
986	if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
987	Base = N;
988	if (N.getOpcode() == ISD::FrameIndex) {
989	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
990	Base = CurDAG->getTargetFrameIndex(
991	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
992	} else if (N.getOpcode() == ARMISD::Wrapper &&
993	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalAddress &&
994	N.getOperand(i: `0`).getOpcode() != ISD::TargetExternalSymbol &&
995	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalTLSAddress) {
996	Base = N.getOperand(i: `0`);
997	}
998	Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: ARM_AM::add, Offset: `0`),
999	DL: SDLoc (N), VT: MVT::i32);
1000	return true;
1001	}
1002
1003	// If the RHS is +/- imm8, fold into addr mode.
1004	int RHSC;
1005	const int Scale = FP16 ? `2` : `4`;
1006
1007	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale, RangeMin: -`255`, RangeMax: `256`, ScaledConstant&: RHSC)) {
1008	Base = N.getOperand(i: `0`);
1009	if (Base.getOpcode() == ISD::FrameIndex) {
1010	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1011	Base = CurDAG->getTargetFrameIndex(
1012	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1013	}
1014
1015	ARM_AM::AddrOpc AddSub = ARM_AM::add;
1016	if (RHSC < `0`) {
1017	AddSub = ARM_AM::sub;
1018	RHSC = -RHSC;
1019	}
1020
1021	if (FP16)
1022	Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5FP16Opc(Opc: AddSub, Offset: RHSC),
1023	DL: SDLoc (N), VT: MVT::i32);
1024	else
1025	Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: AddSub, Offset: RHSC),
1026	DL: SDLoc (N), VT: MVT::i32);
1027
1028	return true;
1029	}
1030
1031	Base = N;
1032
1033	if (FP16)
1034	Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5FP16Opc(Opc: ARM_AM::add, Offset: `0`),
1035	DL: SDLoc (N), VT: MVT::i32);
1036	else
1037	Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: ARM_AM::add, Offset: `0`),
1038	DL: SDLoc (N), VT: MVT::i32);
1039
1040	return true;
1041	}
1042
1043	bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1044	SDValue &Base, SDValue &Offset) {
1045	return IsAddressingMode5(N, Base, Offset, /FP16=/ false);
1046	}
1047
1048	bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1049	SDValue &Base, SDValue &Offset) {
1050	return IsAddressingMode5(N, Base, Offset, /FP16=/ true);
1051	}
1052
1053	bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1054	SDValue &Align) {
1055	Addr = N;
1056
1057	unsigned Alignment = `0`;
1058
1059	MemSDNode *MemN = cast<MemSDNode>(Val: Parent);
1060
1061	if (isa<LSBaseSDNode>(Val: MemN) \|\|
1062	((MemN->getOpcode() == ARMISD::VST1_UPD \|\|
1063	MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1064	MemN->getConstantOperandVal(Num: MemN->getNumOperands() - `1`) == `1`)) {
1065	// This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1066	// The maximum alignment is equal to the memory size being referenced.
1067	llvm::Align MMOAlign = MemN->getAlign();
1068	unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / `8`;
1069	if (MMOAlign.value() >= MemSize && MemSize > `1`)
1070	Alignment = MemSize;
1071	} else {
1072	// All other uses of addrmode6 are for intrinsics. For now just record
1073	// the raw alignment value; it will be refined later based on the legal
1074	// alignment operands for the intrinsic.
1075	Alignment = MemN->getAlign().value();
1076	}
1077
1078	Align = CurDAG->getTargetConstant(Val: Alignment, DL: SDLoc (N), VT: MVT::i32);
1079	return true;
1080	}
1081
1082	bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1083	SDValue &Offset) {
1084	LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Val: Op);
1085	ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1086	if (AM != ISD::POST_INC)
1087	return false;
1088	Offset = N;
1089	if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(Val&: N)) {
1090	if (NC->getZExtValue() * `8` == LdSt->getMemoryVT().getSizeInBits())
1091	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
1092	}
1093	return true;
1094	}
1095
1096	bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1097	SDValue &Offset, SDValue &Label) {
1098	if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1099	Offset = N.getOperand(i: `0`);
1100	SDValue N1 = N.getOperand(i: `1`);
1101	Label = CurDAG->getTargetConstant(Val: N1 ->getAsZExtVal(), DL: SDLoc (N), VT: MVT::i32);
1102	return true;
1103	}
1104
1105	return false;
1106	}
1107
1108
1109	//===----------------------------------------------------------------------===//
1110	// Thumb Addressing Modes
1111	//===----------------------------------------------------------------------===//
1112
1113	static bool shouldUseZeroOffsetLdSt(SDValue N) {
1114	// Negative numbers are difficult to materialise in thumb1. If we are
1115	// selecting the add of a negative, instead try to select ri with a zero
1116	// offset, so create the add node directly which will become a sub.
1117	if (N.getOpcode() != ISD::ADD)
1118	return false;
1119
1120	// Look for an imm which is not legal for ld/st, but is legal for sub.
1121	if (auto C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`)))
1122	return C->getSExtValue() < `0` && C->getSExtValue() >= -`255`;
1123
1124	return false;
1125	}
1126
1127	bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1128	SDValue &Offset) {
1129	if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(Op: N)) {
1130	if (!isNullConstant(V: N))
1131	return false;
1132
1133	Base = Offset = N;
1134	return true;
1135	}
1136
1137	Base = N.getOperand(i: `0`);
1138	Offset = N.getOperand(i: `1`);
1139	return true;
1140	}
1141
1142	bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1143	SDValue &Offset) {
1144	if (shouldUseZeroOffsetLdSt(N))
1145	return false; // Select ri instead
1146	return SelectThumbAddrModeRRSext(N, Base, Offset);
1147	}
1148
1149	bool
1150	ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1151	SDValue &Base, SDValue &OffImm) {
1152	if (shouldUseZeroOffsetLdSt(N)) {
1153	Base = N;
1154	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1155	return true;
1156	}
1157
1158	if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
1159	if (N.getOpcode() == ISD::ADD) {
1160	return false; // We want to select register offset instead
1161	} else if (N.getOpcode() == ARMISD::Wrapper &&
1162	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalAddress &&
1163	N.getOperand(i: `0`).getOpcode() != ISD::TargetExternalSymbol &&
1164	N.getOperand(i: `0`).getOpcode() != ISD::TargetConstantPool &&
1165	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalTLSAddress) {
1166	Base = N.getOperand(i: `0`);
1167	} else {
1168	Base = N;
1169	}
1170
1171	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1172	return true;
1173	}
1174
1175	// If the RHS is + imm5 scale, fold into addr mode.*
1176	int RHSC;
1177	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale, RangeMin: `0`, RangeMax: `32`, ScaledConstant&: RHSC)) {
1178	Base = N.getOperand(i: `0`);
1179	OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32);
1180	return true;
1181	}
1182
1183	// Offset is too large, so use register offset instead.
1184	return false;
1185	}
1186
1187	bool
1188	ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1189	SDValue &OffImm) {
1190	return SelectThumbAddrModeImm5S(N, Scale: `4`, Base, OffImm);
1191	}
1192
1193	bool
1194	ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1195	SDValue &OffImm) {
1196	return SelectThumbAddrModeImm5S(N, Scale: `2`, Base, OffImm);
1197	}
1198
1199	bool
1200	ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1201	SDValue &OffImm) {
1202	return SelectThumbAddrModeImm5S(N, Scale: `1`, Base, OffImm);
1203	}
1204
1205	bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1206	SDValue &Base, SDValue &OffImm) {
1207	if (N.getOpcode() == ISD::FrameIndex) {
1208	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1209	// Only multiples of 4 are allowed for the offset, so the frame object
1210	// alignment must be at least 4.
1211	MachineFrameInfo &MFI = MF->getFrameInfo();
1212	if (MFI.getObjectAlign(ObjectIdx: FI) < Align (`4`))
1213	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align (`4`));
1214	Base = CurDAG->getTargetFrameIndex(
1215	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1216	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1217	return true;
1218	}
1219
1220	if (!CurDAG->isBaseWithConstantOffset(Op: N))
1221	return false;
1222
1223	if (N.getOperand(i: `0`).getOpcode() == ISD::FrameIndex) {
1224	// If the RHS is + imm8 scale, fold into addr mode.*
1225	int RHSC;
1226	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), /Scale=/`4`, RangeMin: `0`, RangeMax: `256`, ScaledConstant&: RHSC)) {
1227	Base = N.getOperand(i: `0`);
1228	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1229	// Make sure the offset is inside the object, or we might fail to
1230	// allocate an emergency spill slot. (An out-of-range access is UB, but
1231	// it could show up anyway.)
1232	MachineFrameInfo &MFI = MF->getFrameInfo();
1233	if (RHSC * `4` < MFI.getObjectSize(ObjectIdx: FI)) {
1234	// For LHS+RHS to result in an offset that's a multiple of 4 the object
1235	// indexed by the LHS must be 4-byte aligned.
1236	if (!MFI.isFixedObjectIndex(ObjectIdx: FI) && MFI.getObjectAlign(ObjectIdx: FI) < Align (`4`))
1237	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align (`4`));
1238	if (MFI.getObjectAlign(ObjectIdx: FI) >= Align (`4`)) {
1239	Base = CurDAG->getTargetFrameIndex(
1240	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1241	OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32);
1242	return true;
1243	}
1244	}
1245	}
1246	}
1247
1248	return false;
1249	}
1250
1251	template <unsigned Shift>
1252	bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1253	SDValue &OffImm) {
1254	if (N.getOpcode() == ISD::SUB \|\| CurDAG->isBaseWithConstantOffset(Op: N)) {
1255	int RHSC;
1256	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale: `1` << Shift, RangeMin: -`0x7f`, RangeMax: `0x80`,
1257	ScaledConstant&: RHSC)) {
1258	Base = N.getOperand(i: `0`);
1259	if (N.getOpcode() == ISD::SUB)
1260	RHSC = -RHSC;
1261	OffImm = CurDAG->getSignedTargetConstant(Val: RHSC * (`1` << Shift), DL: SDLoc (N),
1262	VT: MVT::i32);
1263	return true;
1264	}
1265	}
1266
1267	// Base only.
1268	Base = N;
1269	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1270	return true;
1271	}
1272
1273
1274	//===----------------------------------------------------------------------===//
1275	// Thumb 2 Addressing Modes
1276	//===----------------------------------------------------------------------===//
1277
1278
1279	bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1280	SDValue &Base, SDValue &OffImm) {
1281	// Match simple R + imm12 operands.
1282
1283	// Base only.
1284	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1285	!CurDAG->isBaseWithConstantOffset(Op: N)) {
1286	if (N.getOpcode() == ISD::FrameIndex) {
1287	// Match frame index.
1288	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1289	Base = CurDAG->getTargetFrameIndex(
1290	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1291	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1292	return true;
1293	}
1294
1295	if (N.getOpcode() == ARMISD::Wrapper &&
1296	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalAddress &&
1297	N.getOperand(i: `0`).getOpcode() != ISD::TargetExternalSymbol &&
1298	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalTLSAddress) {
1299	Base = N.getOperand(i: `0`);
1300	if (Base.getOpcode() == ISD::TargetConstantPool)
1301	return false; // We want to select t2LDRpci instead.
1302	} else
1303	Base = N;
1304	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1305	return true;
1306	}
1307
1308	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1309	if (SelectT2AddrModeImm8(N, Base, OffImm))
1310	// Let t2LDRi8 handle (R - imm8).
1311	return false;
1312
1313	int RHSC = (int)RHS->getZExtValue();
1314	if (N.getOpcode() == ISD::SUB)
1315	RHSC = -RHSC;
1316
1317	if (RHSC >= `0` && RHSC < `0x1000`) { // 12 bits (unsigned)
1318	Base = N.getOperand(i: `0`);
1319	if (Base.getOpcode() == ISD::FrameIndex) {
1320	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1321	Base = CurDAG->getTargetFrameIndex(
1322	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1323	}
1324	OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32);
1325	return true;
1326	}
1327	}
1328
1329	// Base only.
1330	Base = N;
1331	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1332	return true;
1333	}
1334
1335	template <unsigned Shift>
1336	bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1337	SDValue &OffImm) {
1338	if (N.getOpcode() == ISD::SUB \|\| CurDAG->isBaseWithConstantOffset(Op: N)) {
1339	int RHSC;
1340	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale: `1` << Shift, RangeMin: -`255`, RangeMax: `256`, ScaledConstant&: RHSC)) {
1341	Base = N.getOperand(i: `0`);
1342	if (Base.getOpcode() == ISD::FrameIndex) {
1343	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1344	Base = CurDAG->getTargetFrameIndex(
1345	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1346	}
1347
1348	if (N.getOpcode() == ISD::SUB)
1349	RHSC = -RHSC;
1350	OffImm = CurDAG->getSignedTargetConstant(Val: RHSC * (`1` << Shift), DL: SDLoc (N),
1351	VT: MVT::i32);
1352	return true;
1353	}
1354	}
1355
1356	// Base only.
1357	Base = N;
1358	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1359	return true;
1360	}
1361
1362	bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1363	SDValue &Base, SDValue &OffImm) {
1364	// Match simple R - imm8 operands.
1365	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1366	!CurDAG->isBaseWithConstantOffset(Op: N))
1367	return false;
1368
1369	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1370	int RHSC = (int)RHS->getSExtValue();
1371	if (N.getOpcode() == ISD::SUB)
1372	RHSC = -RHSC;
1373
1374	if ((RHSC >= -`255`) && (RHSC < `0`)) { // 8 bits (always negative)
1375	Base = N.getOperand(i: `0`);
1376	if (Base.getOpcode() == ISD::FrameIndex) {
1377	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1378	Base = CurDAG->getTargetFrameIndex(
1379	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1380	}
1381	OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32);
1382	return true;
1383	}
1384	}
1385
1386	return false;
1387	}
1388
1389	bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1390	SDValue &OffImm){
1391	unsigned Opcode = Op->getOpcode();
1392	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1393	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
1394	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
1395	int RHSC;
1396	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `0x100`, ScaledConstant&: RHSC)) { // 8 bits.
1397	OffImm = ((AM == ISD::PRE_INC) \|\| (AM == ISD::POST_INC))
1398	? CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32)
1399	: CurDAG->getSignedTargetConstant(Val: -RHSC, DL: SDLoc (N), VT: MVT::i32);
1400	return true;
1401	}
1402
1403	return false;
1404	}
1405
1406	template <unsigned Shift>
1407	bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1408	SDValue &OffImm) {
1409	if (N.getOpcode() == ISD::SUB \|\| CurDAG->isBaseWithConstantOffset(Op: N)) {
1410	int RHSC;
1411	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale: `1` << Shift, RangeMin: -`0x7f`, RangeMax: `0x80`,
1412	ScaledConstant&: RHSC)) {
1413	Base = N.getOperand(i: `0`);
1414	if (Base.getOpcode() == ISD::FrameIndex) {
1415	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1416	Base = CurDAG->getTargetFrameIndex(
1417	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1418	}
1419
1420	if (N.getOpcode() == ISD::SUB)
1421	RHSC = -RHSC;
1422	OffImm = CurDAG->getSignedTargetConstant(Val: RHSC * (`1` << Shift), DL: SDLoc (N),
1423	VT: MVT::i32);
1424	return true;
1425	}
1426	}
1427
1428	// Base only.
1429	Base = N;
1430	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1431	return true;
1432	}
1433
1434	template <unsigned Shift>
1435	bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1436	SDValue &OffImm) {
1437	return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1438	}
1439
1440	bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1441	SDValue &OffImm,
1442	unsigned Shift) {
1443	unsigned Opcode = Op->getOpcode();
1444	ISD::MemIndexedMode AM;
1445	switch (Opcode) {
1446	case ISD::LOAD:
1447	AM = cast<LoadSDNode>(Val: Op)->getAddressingMode();
1448	break;
1449	case ISD::STORE:
1450	AM = cast<StoreSDNode>(Val: Op)->getAddressingMode();
1451	break;
1452	case ISD::MLOAD:
1453	AM = cast<MaskedLoadSDNode>(Val: Op)->getAddressingMode();
1454	break;
1455	case ISD::MSTORE:
1456	AM = cast<MaskedStoreSDNode>(Val: Op)->getAddressingMode();
1457	break;
1458	default:
1459	llvm_unreachable("Unexpected Opcode for Imm7Offset");
1460	}
1461
1462	int RHSC;
1463	// 7 bit constant, shifted by Shift.
1464	if (isScaledConstantInRange(Node: N, Scale: `1` << Shift, RangeMin: `0`, RangeMax: `0x80`, ScaledConstant&: RHSC)) {
1465	OffImm = ((AM == ISD::PRE_INC) \|\| (AM == ISD::POST_INC))
1466	? CurDAG->getSignedTargetConstant(Val: RHSC * (`1` << Shift),
1467	DL: SDLoc (N), VT: MVT::i32)
1468	: CurDAG->getSignedTargetConstant(Val: -RHSC * (`1` << Shift),
1469	DL: SDLoc (N), VT: MVT::i32);
1470	return true;
1471	}
1472	return false;
1473	}
1474
1475	template <int Min, int Max>
1476	bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1477	int Val;
1478	if (isScaledConstantInRange(Node: N, Scale: `1`, RangeMin: Min, RangeMax: Max, ScaledConstant&: Val)) {
1479	OffImm = CurDAG->getSignedTargetConstant(Val, DL: SDLoc (N), VT: MVT::i32);
1480	return true;
1481	}
1482	return false;
1483	}
1484
1485	bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1486	SDValue &Base,
1487	SDValue &OffReg, SDValue &ShImm) {
1488	// (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1489	if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(Op: N))
1490	return false;
1491
1492	// Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1493	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1494	int RHSC = (int)RHS->getZExtValue();
1495	if (RHSC >= `0` && RHSC < `0x1000`) // 12 bits (unsigned)
1496	return false;
1497	else if (RHSC < `0` && RHSC >= -`255`) // 8 bits
1498	return false;
1499	}
1500
1501	// Look for (R + R) or (R + (R << [1,2,3])).
1502	unsigned ShAmt = `0`;
1503	Base = N.getOperand(i: `0`);
1504	OffReg = N.getOperand(i: `1`);
1505
1506	// Swap if it is ((R << c) + R).
1507	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: OffReg.getOpcode());
1508	if (ShOpcVal != ARM_AM::lsl) {
1509	ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: Base.getOpcode());
1510	if (ShOpcVal == ARM_AM::lsl)
1511	std::swap(a&: Base, b&: OffReg);
1512	}
1513
1514	if (ShOpcVal == ARM_AM::lsl) {
1515	// Check to see if the RHS of the shift is a constant, if not, we can't fold
1516	// it.
1517	if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(Val: OffReg.getOperand(i: `1`))) {
1518	ShAmt = Sh->getZExtValue();
1519	if (ShAmt < `4` && isShifterOpProfitable(Shift: OffReg, ShOpcVal, ShAmt))
1520	OffReg = OffReg.getOperand(i: `0`);
1521	else {
1522	ShAmt = `0`;
1523	}
1524	}
1525	}
1526
1527	// If OffReg is a multiply-by-constant and it's profitable to extract a shift
1528	// and use it in a shifted operand do so.
1529	if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1530	unsigned PowerOfTwo = `0`;
1531	SDValue NewMulConst;
1532	if (canExtractShiftFromMul(N: OffReg, MaxShift: `3`, PowerOfTwo, NewMulConst)) {
1533	HandleSDNode Handle(OffReg);
1534	replaceDAGValue(N: OffReg.getOperand(i: `1`), M: NewMulConst);
1535	OffReg = Handle.getValue();
1536	ShAmt = PowerOfTwo;
1537	}
1538	}
1539
1540	ShImm = CurDAG->getTargetConstant(Val: ShAmt, DL: SDLoc (N), VT: MVT::i32);
1541
1542	return true;
1543	}
1544
1545	bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1546	SDValue &OffImm) {
1547	// This must* succeed since it's used for the irreplaceable ldrex and strex*
1548	// instructions.
1549	Base = N;
1550	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1551
1552	if (N.getOpcode() != ISD::ADD \|\| !CurDAG->isBaseWithConstantOffset(Op: N))
1553	return true;
1554
1555	ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
1556	if (!RHS)
1557	return true;
1558
1559	uint32_t RHSC = (int)RHS->getZExtValue();
1560	if (RHSC > `1020` \|\| RHSC % `4` != `0`)
1561	return true;
1562
1563	Base = N.getOperand(i: `0`);
1564	if (Base.getOpcode() == ISD::FrameIndex) {
1565	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1566	Base = CurDAG->getTargetFrameIndex(
1567	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1568	}
1569
1570	OffImm = CurDAG->getTargetConstant(Val: RHSC/`4`, DL: SDLoc (N), VT: MVT::i32);
1571	return true;
1572	}
1573
1574	//===--------------------------------------------------------------------===//
1575
1576	/// getAL - Returns a ARMCC::AL immediate node.
1577	static inline SDValue getAL(SelectionDAG CurDAG, const* SDLoc &dl) {
1578	return CurDAG->getTargetConstant(Val: (uint64_t)ARMCC::AL, DL: dl, VT: MVT::i32);
1579	}
1580
1581	void ARMDAGToDAGISel::transferMemOperands(SDNode N, SDNode Result) {
1582	MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
1583	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Result), NewMemRefs: {MemOp});
1584	}
1585
1586	bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1587	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1588	ISD::MemIndexedMode AM = LD->getAddressingMode();
1589	if (AM == ISD::UNINDEXED)
1590	return false;
1591
1592	EVT LoadedVT = LD->getMemoryVT();
1593	SDValue Offset, AMOpc;
1594	bool isPre = (AM == ISD::PRE_INC) \|\| (AM == ISD::PRE_DEC);
1595	unsigned Opcode = `0`;
1596	bool Match = false;
1597	if (LoadedVT == MVT::i32 && isPre &&
1598	SelectAddrMode2OffsetImmPre(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1599	Opcode = ARM::LDR_PRE_IMM;
1600	Match = true;
1601	} else if (LoadedVT == MVT::i32 && !isPre &&
1602	SelectAddrMode2OffsetImm(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1603	Opcode = ARM::LDR_POST_IMM;
1604	Match = true;
1605	} else if (LoadedVT == MVT::i32 &&
1606	SelectAddrMode2OffsetReg(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1607	Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1608	Match = true;
1609
1610	} else if (LoadedVT == MVT::i16 &&
1611	SelectAddrMode3Offset(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1612	Match = true;
1613	Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1614	? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1615	: (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1616	} else if (LoadedVT == MVT::i8 \|\| LoadedVT == MVT::i1) {
1617	if (LD->getExtensionType() == ISD::SEXTLOAD) {
1618	if (SelectAddrMode3Offset(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1619	Match = true;
1620	Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1621	}
1622	} else {
1623	if (isPre &&
1624	SelectAddrMode2OffsetImmPre(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1625	Match = true;
1626	Opcode = ARM::LDRB_PRE_IMM;
1627	} else if (!isPre &&
1628	SelectAddrMode2OffsetImm(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1629	Match = true;
1630	Opcode = ARM::LDRB_POST_IMM;
1631	} else if (SelectAddrMode2OffsetReg(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1632	Match = true;
1633	Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1634	}
1635	}
1636	}
1637
1638	if (Match) {
1639	if (Opcode == ARM::LDR_PRE_IMM \|\| Opcode == ARM::LDRB_PRE_IMM) {
1640	SDValue Chain = LD->getChain();
1641	SDValue Base = LD->getBasePtr();
1642	SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, dl: SDLoc (N)),
1643	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), Chain };
1644	SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc (N), VT1: MVT::i32, VT2: MVT::i32,
1645	VT3: MVT::Other, Ops);
1646	transferMemOperands(N, Result: New);
1647	ReplaceNode(F: N, T: New);
1648	return true;
1649	} else {
1650	SDValue Chain = LD->getChain();
1651	SDValue Base = LD->getBasePtr();
1652	SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, dl: SDLoc (N)),
1653	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), Chain };
1654	SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc (N), VT1: MVT::i32, VT2: MVT::i32,
1655	VT3: MVT::Other, Ops);
1656	transferMemOperands(N, Result: New);
1657	ReplaceNode(F: N, T: New);
1658	return true;
1659	}
1660	}
1661
1662	return false;
1663	}
1664
1665	bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1666	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1667	EVT LoadedVT = LD->getMemoryVT();
1668	ISD::MemIndexedMode AM = LD->getAddressingMode();
1669	if (AM != ISD::POST_INC \|\| LD->getExtensionType() != ISD::NON_EXTLOAD \|\|
1670	LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1671	return false;
1672
1673	auto *COffs = dyn_cast<ConstantSDNode>(Val: LD->getOffset());
1674	if (!COffs \|\| COffs->getZExtValue() != `4`)
1675	return false;
1676
1677	// A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1678	// The encoding of LDM is not how the rest of ISel expects a post-inc load to
1679	// look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1680	// ISel.
1681	SDValue Chain = LD->getChain();
1682	SDValue Base = LD->getBasePtr();
1683	SDValue Ops[]= { Base, getAL(CurDAG, dl: SDLoc (N)),
1684	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), Chain };
1685	SDNode *New = CurDAG->getMachineNode(Opcode: ARM::tLDR_postidx, dl: SDLoc (N), VT1: MVT::i32,
1686	VT2: MVT::i32, VT3: MVT::Other, Ops);
1687	transferMemOperands(N, Result: New);
1688	ReplaceNode(F: N, T: New);
1689	return true;
1690	}
1691
1692	bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1693	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1694	ISD::MemIndexedMode AM = LD->getAddressingMode();
1695	if (AM == ISD::UNINDEXED)
1696	return false;
1697
1698	EVT LoadedVT = LD->getMemoryVT();
1699	bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1700	SDValue Offset;
1701	bool isPre = (AM == ISD::PRE_INC) \|\| (AM == ISD::PRE_DEC);
1702	unsigned Opcode = `0`;
1703	bool Match = false;
1704	if (SelectT2AddrModeImm8Offset(Op: N, N: LD->getOffset(), OffImm&: Offset)) {
1705	switch (LoadedVT.getSimpleVT().SimpleTy) {
1706	case MVT::i32:
1707	Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1708	break;
1709	case MVT::i16:
1710	if (isSExtLd)
1711	Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1712	else
1713	Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1714	break;
1715	case MVT::i8:
1716	case MVT::i1:
1717	if (isSExtLd)
1718	Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1719	else
1720	Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1721	break;
1722	default:
1723	return false;
1724	}
1725	Match = true;
1726	}
1727
1728	if (Match) {
1729	SDValue Chain = LD->getChain();
1730	SDValue Base = LD->getBasePtr();
1731	SDValue Ops[]= { Base, Offset, getAL(CurDAG, dl: SDLoc (N)),
1732	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), Chain };
1733	SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc (N), VT1: MVT::i32, VT2: MVT::i32,
1734	VT3: MVT::Other, Ops);
1735	transferMemOperands(N, Result: New);
1736	ReplaceNode(F: N, T: New);
1737	return true;
1738	}
1739
1740	return false;
1741	}
1742
1743	bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1744	EVT LoadedVT;
1745	unsigned Opcode = `0`;
1746	bool isSExtLd, isPre;
1747	Align Alignment;
1748	ARMVCC::VPTCodes Pred;
1749	SDValue PredReg;
1750	SDValue Chain, Base, Offset;
1751
1752	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
1753	ISD::MemIndexedMode AM = LD->getAddressingMode();
1754	if (AM == ISD::UNINDEXED)
1755	return false;
1756	LoadedVT = LD->getMemoryVT();
1757	if (!LoadedVT.isVector())
1758	return false;
1759
1760	Chain = LD->getChain();
1761	Base = LD->getBasePtr();
1762	Offset = LD->getOffset();
1763	Alignment = LD->getAlign();
1764	isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1765	isPre = (AM == ISD::PRE_INC) \|\| (AM == ISD::PRE_DEC);
1766	Pred = ARMVCC::None;
1767	PredReg = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
1768	} else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Val: N)) {
1769	ISD::MemIndexedMode AM = LD->getAddressingMode();
1770	if (AM == ISD::UNINDEXED)
1771	return false;
1772	LoadedVT = LD->getMemoryVT();
1773	if (!LoadedVT.isVector())
1774	return false;
1775
1776	Chain = LD->getChain();
1777	Base = LD->getBasePtr();
1778	Offset = LD->getOffset();
1779	Alignment = LD->getAlign();
1780	isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1781	isPre = (AM == ISD::PRE_INC) \|\| (AM == ISD::PRE_DEC);
1782	Pred = ARMVCC::Then;
1783	PredReg = LD->getMask();
1784	} else
1785	llvm_unreachable("Expected a Load or a Masked Load!");
1786
1787	// We allow LE non-masked loads to change the type (for example use a vldrb.8
1788	// as opposed to a vldrw.32). This can allow extra addressing modes or
1789	// alignments for what is otherwise an equivalent instruction.
1790	bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(Val: N);
1791
1792	SDValue NewOffset;
1793	if (Alignment >= Align (`2`) && LoadedVT == MVT::v4i16 &&
1794	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `1`)) {
1795	if (isSExtLd)
1796	Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1797	else
1798	Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1799	} else if (LoadedVT == MVT::v8i8 &&
1800	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `0`)) {
1801	if (isSExtLd)
1802	Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1803	else
1804	Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1805	} else if (LoadedVT == MVT::v4i8 &&
1806	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `0`)) {
1807	if (isSExtLd)
1808	Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1809	else
1810	Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1811	} else if (Alignment >= Align (`4`) &&
1812	(CanChangeType \|\| LoadedVT == MVT::v4i32 \|\|
1813	LoadedVT == MVT::v4f32) &&
1814	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `2`))
1815	Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1816	else if (Alignment >= Align (`2`) &&
1817	(CanChangeType \|\| LoadedVT == MVT::v8i16 \|\|
1818	LoadedVT == MVT::v8f16) &&
1819	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `1`))
1820	Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1821	else if ((CanChangeType \|\| LoadedVT == MVT::v16i8) &&
1822	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `0`))
1823	Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1824	else
1825	return false;
1826
1827	SDValue Ops[] = {Base,
1828	NewOffset,
1829	CurDAG->getTargetConstant(Val: Pred, DL: SDLoc (N), VT: MVT::i32),
1830	PredReg,
1831	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), // tp_reg
1832	Chain};
1833	SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc (N), VT1: MVT::i32,
1834	VT2: N->getValueType(ResNo: `0`), VT3: MVT::Other, Ops);
1835	transferMemOperands(N, Result: New);
1836	ReplaceUses(F: SDValue (N, `0`), T: SDValue (New, `1`));
1837	ReplaceUses(F: SDValue (N, `1`), T: SDValue (New, `0`));
1838	ReplaceUses(F: SDValue (N, `2`), T: SDValue (New, `2`));
1839	CurDAG->RemoveDeadNode(N);
1840	return true;
1841	}
1842
1843	/// Form a GPRPair pseudo register from a pair of GPR regs.
1844	SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1845	SDLoc dl(V0.getNode());
1846	SDValue RegClass =
1847	CurDAG->getTargetConstant(Val: ARM::GPRPairRegClassID, DL: dl, VT: MVT::i32);
1848	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::gsub_0, DL: dl, VT: MVT::i32);
1849	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::gsub_1, DL: dl, VT: MVT::i32);
1850	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1851	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1852	}
1853
1854	/// Form a D register from a pair of S registers.
1855	SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1856	SDLoc dl(V0.getNode());
1857	SDValue RegClass =
1858	CurDAG->getTargetConstant(Val: ARM::DPR_VFP2RegClassID, DL: dl, VT: MVT::i32);
1859	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::ssub_0, DL: dl, VT: MVT::i32);
1860	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::ssub_1, DL: dl, VT: MVT::i32);
1861	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1862	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1863	}
1864
1865	/// Form a quad register from a pair of D registers.
1866	SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1867	SDLoc dl(V0.getNode());
1868	SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QPRRegClassID, DL: dl,
1869	VT: MVT::i32);
1870	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::dsub_0, DL: dl, VT: MVT::i32);
1871	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::dsub_1, DL: dl, VT: MVT::i32);
1872	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1873	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1874	}
1875
1876	/// Form 4 consecutive D registers from a pair of Q registers.
1877	SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1878	SDLoc dl(V0.getNode());
1879	SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQPRRegClassID, DL: dl,
1880	VT: MVT::i32);
1881	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::qsub_0, DL: dl, VT: MVT::i32);
1882	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::qsub_1, DL: dl, VT: MVT::i32);
1883	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1884	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1885	}
1886
1887	/// Form 4 consecutive S registers.
1888	SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1889	SDValue V2, SDValue V3) {
1890	SDLoc dl(V0.getNode());
1891	SDValue RegClass =
1892	CurDAG->getTargetConstant(Val: ARM::QPR_VFP2RegClassID, DL: dl, VT: MVT::i32);
1893	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::ssub_0, DL: dl, VT: MVT::i32);
1894	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::ssub_1, DL: dl, VT: MVT::i32);
1895	SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::ssub_2, DL: dl, VT: MVT::i32);
1896	SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::ssub_3, DL: dl, VT: MVT::i32);
1897	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1898	V2, SubReg2, V3, SubReg3 };
1899	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1900	}
1901
1902	/// Form 4 consecutive D registers.
1903	SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1904	SDValue V2, SDValue V3) {
1905	SDLoc dl(V0.getNode());
1906	SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQPRRegClassID, DL: dl,
1907	VT: MVT::i32);
1908	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::dsub_0, DL: dl, VT: MVT::i32);
1909	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::dsub_1, DL: dl, VT: MVT::i32);
1910	SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::dsub_2, DL: dl, VT: MVT::i32);
1911	SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::dsub_3, DL: dl, VT: MVT::i32);
1912	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1913	V2, SubReg2, V3, SubReg3 };
1914	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1915	}
1916
1917	/// Form 4 consecutive Q registers.
1918	SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1919	SDValue V2, SDValue V3) {
1920	SDLoc dl(V0.getNode());
1921	SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQQQPRRegClassID, DL: dl,
1922	VT: MVT::i32);
1923	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::qsub_0, DL: dl, VT: MVT::i32);
1924	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::qsub_1, DL: dl, VT: MVT::i32);
1925	SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::qsub_2, DL: dl, VT: MVT::i32);
1926	SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::qsub_3, DL: dl, VT: MVT::i32);
1927	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1928	V2, SubReg2, V3, SubReg3 };
1929	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1930	}
1931
1932	/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1933	/// of a NEON VLD or VST instruction. The supported values depend on the
1934	/// number of registers being loaded.
1935	SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1936	unsigned NumVecs, bool is64BitVector) {
1937	unsigned NumRegs = NumVecs;
1938	if (!is64BitVector && NumVecs < `3`)
1939	NumRegs *= `2`;
1940
1941	unsigned Alignment = Align ->getAsZExtVal();
1942	if (Alignment >= `32` && NumRegs == `4`)
1943	Alignment = `32`;
1944	else if (Alignment >= `16` && (NumRegs == `2` \|\| NumRegs == `4`))
1945	Alignment = `16`;
1946	else if (Alignment >= `8`)
1947	Alignment = `8`;
1948	else
1949	Alignment = `0`;
1950
1951	return CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
1952	}
1953
1954	static bool isVLDfixed(unsigned Opc)
1955	{
1956	switch (Opc) {
1957	default: return false;
1958	case ARM::VLD1d8wb_fixed : return true;
1959	case ARM::VLD1d16wb_fixed : return true;
1960	case ARM::VLD1d64Qwb_fixed : return true;
1961	case ARM::VLD1d32wb_fixed : return true;
1962	case ARM::VLD1d64wb_fixed : return true;
1963	case ARM::VLD1d8TPseudoWB_fixed : return true;
1964	case ARM::VLD1d16TPseudoWB_fixed : return true;
1965	case ARM::VLD1d32TPseudoWB_fixed : return true;
1966	case ARM::VLD1d64TPseudoWB_fixed : return true;
1967	case ARM::VLD1d8QPseudoWB_fixed : return true;
1968	case ARM::VLD1d16QPseudoWB_fixed : return true;
1969	case ARM::VLD1d32QPseudoWB_fixed : return true;
1970	case ARM::VLD1d64QPseudoWB_fixed : return true;
1971	case ARM::VLD1q8wb_fixed : return true;
1972	case ARM::VLD1q16wb_fixed : return true;
1973	case ARM::VLD1q32wb_fixed : return true;
1974	case ARM::VLD1q64wb_fixed : return true;
1975	case ARM::VLD1DUPd8wb_fixed : return true;
1976	case ARM::VLD1DUPd16wb_fixed : return true;
1977	case ARM::VLD1DUPd32wb_fixed : return true;
1978	case ARM::VLD1DUPq8wb_fixed : return true;
1979	case ARM::VLD1DUPq16wb_fixed : return true;
1980	case ARM::VLD1DUPq32wb_fixed : return true;
1981	case ARM::VLD2d8wb_fixed : return true;
1982	case ARM::VLD2d16wb_fixed : return true;
1983	case ARM::VLD2d32wb_fixed : return true;
1984	case ARM::VLD2q8PseudoWB_fixed : return true;
1985	case ARM::VLD2q16PseudoWB_fixed : return true;
1986	case ARM::VLD2q32PseudoWB_fixed : return true;
1987	case ARM::VLD2DUPd8wb_fixed : return true;
1988	case ARM::VLD2DUPd16wb_fixed : return true;
1989	case ARM::VLD2DUPd32wb_fixed : return true;
1990	case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
1991	case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
1992	case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
1993	}
1994	}
1995
1996	static bool isVSTfixed(unsigned Opc)
1997	{
1998	switch (Opc) {
1999	default: return false;
2000	case ARM::VST1d8wb_fixed : return true;
2001	case ARM::VST1d16wb_fixed : return true;
2002	case ARM::VST1d32wb_fixed : return true;
2003	case ARM::VST1d64wb_fixed : return true;
2004	case ARM::VST1q8wb_fixed : return true;
2005	case ARM::VST1q16wb_fixed : return true;
2006	case ARM::VST1q32wb_fixed : return true;
2007	case ARM::VST1q64wb_fixed : return true;
2008	case ARM::VST1d8TPseudoWB_fixed : return true;
2009	case ARM::VST1d16TPseudoWB_fixed : return true;
2010	case ARM::VST1d32TPseudoWB_fixed : return true;
2011	case ARM::VST1d64TPseudoWB_fixed : return true;
2012	case ARM::VST1d8QPseudoWB_fixed : return true;
2013	case ARM::VST1d16QPseudoWB_fixed : return true;
2014	case ARM::VST1d32QPseudoWB_fixed : return true;
2015	case ARM::VST1d64QPseudoWB_fixed : return true;
2016	case ARM::VST2d8wb_fixed : return true;
2017	case ARM::VST2d16wb_fixed : return true;
2018	case ARM::VST2d32wb_fixed : return true;
2019	case ARM::VST2q8PseudoWB_fixed : return true;
2020	case ARM::VST2q16PseudoWB_fixed : return true;
2021	case ARM::VST2q32PseudoWB_fixed : return true;
2022	}
2023	}
2024
2025	// Get the register stride update opcode of a VLD/VST instruction that
2026	// is otherwise equivalent to the given fixed stride updating instruction.
2027	static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2028	assert((isVLDfixed(Opc) \|\| isVSTfixed(Opc))
2029	&& "Incorrect fixed stride updating instruction.");
2030	switch (Opc) {
2031	default: break;
2032	case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2033	case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2034	case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2035	case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2036	case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2037	case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2038	case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2039	case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2040	case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2041	case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2042	case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2043	case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2044	case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2045	case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2046	case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2047	case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2048	case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2049	case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2050	case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2051	case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2052	case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2053	case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2054	case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2055	case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2056	case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2057	case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2058	case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2059
2060	case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2061	case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2062	case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2063	case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2064	case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2065	case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2066	case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2067	case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2068	case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2069	case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2070	case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2071	case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2072	case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2073	case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2074	case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2075	case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2076
2077	case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2078	case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2079	case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2080	case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2081	case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2082	case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2083
2084	case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2085	case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2086	case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2087	case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2088	case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2089	case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2090
2091	case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2092	case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2093	case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2094	}
2095	return Opc; // If not one we handle, return it unchanged.
2096	}
2097
2098	/// Returns true if the given increment is a Constant known to be equal to the
2099	/// access size performed by a NEON load/store. This means the "[rN]!" form can
2100	/// be used.
2101	static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2102	auto C = dyn_cast<ConstantSDNode>(Val&: Inc);
2103	return C && C->getZExtValue() == VecTy.getSizeInBits() / `8` * NumVecs;
2104	}
2105
2106	void ARMDAGToDAGISel::SelectVLD(SDNode N, bool* isUpdating, unsigned NumVecs,
2107	const uint16_t *DOpcodes,
2108	const uint16_t *QOpcodes0,
2109	const uint16_t *QOpcodes1) {
2110	assert(Subtarget->hasNEON());
2111	assert(NumVecs >= `1` && NumVecs <= `4` && "VLD NumVecs out-of-range");
2112	SDLoc dl(N);
2113
2114	SDValue MemAddr, Align;
2115	bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2116	// nodes are not intrinsics.
2117	unsigned AddrOpIdx = IsIntrinsic ? `2` : `1`;
2118	if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2119	return;
2120
2121	SDValue Chain = N->getOperand(Num: `0`);
2122	EVT VT = N->getValueType(ResNo: `0`);
2123	bool is64BitVector = VT.is64BitVector();
2124	Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2125
2126	unsigned OpcodeIndex;
2127	switch (VT.getSimpleVT().SimpleTy) {
2128	default: llvm_unreachable("unhandled vld type");
2129	// Double-register operations:
2130	case MVT::v8i8: OpcodeIndex = `0`; break;
2131	case MVT::v4f16:
2132	case MVT::v4bf16:
2133	case MVT::v4i16: OpcodeIndex = `1`; break;
2134	case MVT::v2f32:
2135	case MVT::v2i32: OpcodeIndex = `2`; break;
2136	case MVT::v1i64: OpcodeIndex = `3`; break;
2137	// Quad-register operations:
2138	case MVT::v16i8: OpcodeIndex = `0`; break;
2139	case MVT::v8f16:
2140	case MVT::v8bf16:
2141	case MVT::v8i16: OpcodeIndex = `1`; break;
2142	case MVT::v4f32:
2143	case MVT::v4i32: OpcodeIndex = `2`; break;
2144	case MVT::v2f64:
2145	case MVT::v2i64: OpcodeIndex = `3`; break;
2146	}
2147
2148	EVT ResTy;
2149	if (NumVecs == `1`)
2150	ResTy = VT;
2151	else {
2152	unsigned ResTyElts = (NumVecs == `3`) ? `4` : NumVecs;
2153	if (!is64BitVector)
2154	ResTyElts *= `2`;
2155	ResTy = EVT::getVectorVT(Context&: *CurDAG->getContext(), VT: MVT::i64, NumElements: ResTyElts);
2156	}
2157	std::vector<EVT> ResTys;
2158	ResTys.push_back(x: ResTy);
2159	if (isUpdating)
2160	ResTys.push_back(x: MVT::i32);
2161	ResTys.push_back(x: MVT::Other);
2162
2163	SDValue Pred = getAL(CurDAG, dl);
2164	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
2165	SDNode *VLd;
2166	SmallVector<SDValue, `7`> Ops;
2167
2168	// Double registers and VLD1/VLD2 quad registers are directly supported.
2169	if (is64BitVector \|\| NumVecs <= `2`) {
2170	unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2171	QOpcodes0[OpcodeIndex]);
2172	Ops.push_back(Elt: MemAddr);
2173	Ops.push_back(Elt: Align);
2174	if (isUpdating) {
2175	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2176	bool IsImmUpdate = isPerfectIncrement(Inc, VecTy: VT, NumVecs);
2177	if (!IsImmUpdate) {
2178	// We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2179	// check for the opcode rather than the number of vector elements.
2180	if (isVLDfixed(Opc))
2181	Opc = getVLDSTRegisterUpdateOpcode(Opc);
2182	Ops.push_back(Elt: Inc);
2183	// VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2184	// the operands if not such an opcode.
2185	} else if (!isVLDfixed(Opc))
2186	Ops.push_back(Elt: Reg0);
2187	}
2188	Ops.push_back(Elt: Pred);
2189	Ops.push_back(Elt: Reg0);
2190	Ops.push_back(Elt: Chain);
2191	VLd = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2192
2193	} else {
2194	// Otherwise, quad registers are loaded with two separate instructions,
2195	// where one loads the even registers and the other loads the odd registers.
2196	EVT AddrTy = MemAddr.getValueType();
2197
2198	// Load the even subregs. This is always an updating load, so that it
2199	// provides the address to the second load for the odd subregs.
2200	SDValue ImplDef =
2201	SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: ResTy), `0`);
2202	const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2203	SDNode *VLdA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl,
2204	VT1: ResTy, VT2: AddrTy, VT3: MVT::Other, Ops: OpsA);
2205	Chain = SDValue (VLdA, `2`);
2206
2207	// Load the odd subregs.
2208	Ops.push_back(Elt: SDValue (VLdA, `1`));
2209	Ops.push_back(Elt: Align);
2210	if (isUpdating) {
2211	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2212	assert(isa<ConstantSDNode>(Inc.getNode()) &&
2213	"only constant post-increment update allowed for VLD3/4");
2214	(void)Inc;
2215	Ops.push_back(Elt: Reg0);
2216	}
2217	Ops.push_back(Elt: SDValue (VLdA, `0`));
2218	Ops.push_back(Elt: Pred);
2219	Ops.push_back(Elt: Reg0);
2220	Ops.push_back(Elt: Chain);
2221	VLd = CurDAG->getMachineNode(Opcode: QOpcodes1[OpcodeIndex], dl, ResultTys: ResTys, Ops);
2222	}
2223
2224	// Transfer memoperands.
2225	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2226	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLd), NewMemRefs: {MemOp});
2227
2228	if (NumVecs == `1`) {
2229	ReplaceNode(F: N, T: VLd);
2230	return;
2231	}
2232
2233	// Extract out the subregisters.
2234	SDValue SuperReg = SDValue (VLd, `0`);
2235	static_assert(ARM::dsub_7 == ARM::dsub_0 + `7` &&
2236	ARM::qsub_3 == ARM::qsub_0 + `3`,
2237	"Unexpected subreg numbering");
2238	unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2239	for (unsigned Vec = `0`; Vec < NumVecs; ++Vec)
2240	ReplaceUses(F: SDValue (N, Vec),
2241	T: CurDAG->getTargetExtractSubreg(SRIdx: Sub0 + Vec, DL: dl, VT, Operand: SuperReg));
2242	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (VLd, `1`));
2243	if (isUpdating)
2244	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (VLd, `2`));
2245	CurDAG->RemoveDeadNode(N);
2246	}
2247
2248	void ARMDAGToDAGISel::SelectVST(SDNode N, bool* isUpdating, unsigned NumVecs,
2249	const uint16_t *DOpcodes,
2250	const uint16_t *QOpcodes0,
2251	const uint16_t *QOpcodes1) {
2252	assert(Subtarget->hasNEON());
2253	assert(NumVecs >= `1` && NumVecs <= `4` && "VST NumVecs out-of-range");
2254	SDLoc dl(N);
2255
2256	SDValue MemAddr, Align;
2257	bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2258	// nodes are not intrinsics.
2259	unsigned AddrOpIdx = IsIntrinsic ? `2` : `1`;
2260	unsigned Vec0Idx = `3`; // AddrOpIdx + (isUpdating ? 2 : 1)
2261	if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2262	return;
2263
2264	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2265
2266	SDValue Chain = N->getOperand(Num: `0`);
2267	EVT VT = N->getOperand(Num: Vec0Idx).getValueType();
2268	bool is64BitVector = VT.is64BitVector();
2269	Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2270
2271	unsigned OpcodeIndex;
2272	switch (VT.getSimpleVT().SimpleTy) {
2273	default: llvm_unreachable("unhandled vst type");
2274	// Double-register operations:
2275	case MVT::v8i8: OpcodeIndex = `0`; break;
2276	case MVT::v4f16:
2277	case MVT::v4bf16:
2278	case MVT::v4i16: OpcodeIndex = `1`; break;
2279	case MVT::v2f32:
2280	case MVT::v2i32: OpcodeIndex = `2`; break;
2281	case MVT::v1i64: OpcodeIndex = `3`; break;
2282	// Quad-register operations:
2283	case MVT::v16i8: OpcodeIndex = `0`; break;
2284	case MVT::v8f16:
2285	case MVT::v8bf16:
2286	case MVT::v8i16: OpcodeIndex = `1`; break;
2287	case MVT::v4f32:
2288	case MVT::v4i32: OpcodeIndex = `2`; break;
2289	case MVT::v2f64:
2290	case MVT::v2i64: OpcodeIndex = `3`; break;
2291	}
2292
2293	std::vector<EVT> ResTys;
2294	if (isUpdating)
2295	ResTys.push_back(x: MVT::i32);
2296	ResTys.push_back(x: MVT::Other);
2297
2298	SDValue Pred = getAL(CurDAG, dl);
2299	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
2300	SmallVector<SDValue, `7`> Ops;
2301
2302	// Double registers and VST1/VST2 quad registers are directly supported.
2303	if (is64BitVector \|\| NumVecs <= `2`) {
2304	SDValue SrcReg;
2305	if (NumVecs == `1`) {
2306	SrcReg = N->getOperand(Num: Vec0Idx);
2307	} else if (is64BitVector) {
2308	// Form a REG_SEQUENCE to force register allocation.
2309	SDValue V0 = N->getOperand(Num: Vec0Idx + `0`);
2310	SDValue V1 = N->getOperand(Num: Vec0Idx + `1`);
2311	if (NumVecs == `2`)
2312	SrcReg = SDValue (createDRegPairNode(VT: MVT::v2i64, V0, V1), `0`);
2313	else {
2314	SDValue V2 = N->getOperand(Num: Vec0Idx + `2`);
2315	// If it's a vst3, form a quad D-register and leave the last part as
2316	// an undef.
2317	SDValue V3 = (NumVecs == `3`)
2318	? SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF,dl,VT), `0`)
2319	: N->getOperand(Num: Vec0Idx + `3`);
2320	SrcReg = SDValue (createQuadDRegsNode(VT: MVT::v4i64, V0, V1, V2, V3), `0`);
2321	}
2322	} else {
2323	// Form a QQ register.
2324	SDValue Q0 = N->getOperand(Num: Vec0Idx);
2325	SDValue Q1 = N->getOperand(Num: Vec0Idx + `1`);
2326	SrcReg = SDValue (createQRegPairNode(VT: MVT::v4i64, V0: Q0, V1: Q1), `0`);
2327	}
2328
2329	unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2330	QOpcodes0[OpcodeIndex]);
2331	Ops.push_back(Elt: MemAddr);
2332	Ops.push_back(Elt: Align);
2333	if (isUpdating) {
2334	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2335	bool IsImmUpdate = isPerfectIncrement(Inc, VecTy: VT, NumVecs);
2336	if (!IsImmUpdate) {
2337	// We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2338	// check for the opcode rather than the number of vector elements.
2339	if (isVSTfixed(Opc))
2340	Opc = getVLDSTRegisterUpdateOpcode(Opc);
2341	Ops.push_back(Elt: Inc);
2342	}
2343	// VST1/VST2 fixed increment does not need Reg0 so only include it in
2344	// the operands if not such an opcode.
2345	else if (!isVSTfixed(Opc))
2346	Ops.push_back(Elt: Reg0);
2347	}
2348	Ops.push_back(Elt: SrcReg);
2349	Ops.push_back(Elt: Pred);
2350	Ops.push_back(Elt: Reg0);
2351	Ops.push_back(Elt: Chain);
2352	SDNode *VSt = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2353
2354	// Transfer memoperands.
2355	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VSt), NewMemRefs: {MemOp});
2356
2357	ReplaceNode(F: N, T: VSt);
2358	return;
2359	}
2360
2361	// Otherwise, quad registers are stored with two separate instructions,
2362	// where one stores the even registers and the other stores the odd registers.
2363
2364	// Form the QQQQ REG_SEQUENCE.
2365	SDValue V0 = N->getOperand(Num: Vec0Idx + `0`);
2366	SDValue V1 = N->getOperand(Num: Vec0Idx + `1`);
2367	SDValue V2 = N->getOperand(Num: Vec0Idx + `2`);
2368	SDValue V3 = (NumVecs == `3`)
2369	? SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT), `0`)
2370	: N->getOperand(Num: Vec0Idx + `3`);
2371	SDValue RegSeq = SDValue (createQuadQRegsNode(VT: MVT::v8i64, V0, V1, V2, V3), `0`);
2372
2373	// Store the even D registers. This is always an updating store, so that it
2374	// provides the address to the second store for the odd subregs.
2375	const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2376	SDNode *VStA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl,
2377	VT1: MemAddr.getValueType(),
2378	VT2: MVT::Other, Ops: OpsA);
2379	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VStA), NewMemRefs: {MemOp});
2380	Chain = SDValue (VStA, `1`);
2381
2382	// Store the odd D registers.
2383	Ops.push_back(Elt: SDValue (VStA, `0`));
2384	Ops.push_back(Elt: Align);
2385	if (isUpdating) {
2386	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2387	assert(isa<ConstantSDNode>(Inc.getNode()) &&
2388	"only constant post-increment update allowed for VST3/4");
2389	(void)Inc;
2390	Ops.push_back(Elt: Reg0);
2391	}
2392	Ops.push_back(Elt: RegSeq);
2393	Ops.push_back(Elt: Pred);
2394	Ops.push_back(Elt: Reg0);
2395	Ops.push_back(Elt: Chain);
2396	SDNode *VStB = CurDAG->getMachineNode(Opcode: QOpcodes1[OpcodeIndex], dl, ResultTys: ResTys,
2397	Ops);
2398	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VStB), NewMemRefs: {MemOp});
2399	ReplaceNode(F: N, T: VStB);
2400	}
2401
2402	void ARMDAGToDAGISel::SelectVLDSTLane(SDNode N, bool* IsLoad, bool isUpdating,
2403	unsigned NumVecs,
2404	const uint16_t *DOpcodes,
2405	const uint16_t *QOpcodes) {
2406	assert(Subtarget->hasNEON());
2407	assert(NumVecs >=`2` && NumVecs <= `4` && "VLDSTLane NumVecs out-of-range");
2408	SDLoc dl(N);
2409
2410	SDValue MemAddr, Align;
2411	bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2412	// nodes are not intrinsics.
2413	unsigned AddrOpIdx = IsIntrinsic ? `2` : `1`;
2414	unsigned Vec0Idx = `3`; // AddrOpIdx + (isUpdating ? 2 : 1)
2415	if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2416	return;
2417
2418	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2419
2420	SDValue Chain = N->getOperand(Num: `0`);
2421	unsigned Lane = N->getConstantOperandVal(Num: Vec0Idx + NumVecs);
2422	EVT VT = N->getOperand(Num: Vec0Idx).getValueType();
2423	bool is64BitVector = VT.is64BitVector();
2424
2425	unsigned Alignment = `0`;
2426	if (NumVecs != `3`) {
2427	Alignment = Align ->getAsZExtVal();
2428	unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / `8`;
2429	if (Alignment > NumBytes)
2430	Alignment = NumBytes;
2431	if (Alignment < `8` && Alignment < NumBytes)
2432	Alignment = `0`;
2433	// Alignment must be a power of two; make sure of that.
2434	Alignment = (Alignment & -Alignment);
2435	if (Alignment == `1`)
2436	Alignment = `0`;
2437	}
2438	Align = CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
2439
2440	unsigned OpcodeIndex;
2441	switch (VT.getSimpleVT().SimpleTy) {
2442	default: llvm_unreachable("unhandled vld/vst lane type");
2443	// Double-register operations:
2444	case MVT::v8i8: OpcodeIndex = `0`; break;
2445	case MVT::v4f16:
2446	case MVT::v4bf16:
2447	case MVT::v4i16: OpcodeIndex = `1`; break;
2448	case MVT::v2f32:
2449	case MVT::v2i32: OpcodeIndex = `2`; break;
2450	// Quad-register operations:
2451	case MVT::v8f16:
2452	case MVT::v8bf16:
2453	case MVT::v8i16: OpcodeIndex = `0`; break;
2454	case MVT::v4f32:
2455	case MVT::v4i32: OpcodeIndex = `1`; break;
2456	}
2457
2458	std::vector<EVT> ResTys;
2459	if (IsLoad) {
2460	unsigned ResTyElts = (NumVecs == `3`) ? `4` : NumVecs;
2461	if (!is64BitVector)
2462	ResTyElts *= `2`;
2463	ResTys.push_back(x: EVT::getVectorVT(Context&: *CurDAG->getContext(),
2464	VT: MVT::i64, NumElements: ResTyElts));
2465	}
2466	if (isUpdating)
2467	ResTys.push_back(x: MVT::i32);
2468	ResTys.push_back(x: MVT::Other);
2469
2470	SDValue Pred = getAL(CurDAG, dl);
2471	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
2472
2473	SmallVector<SDValue, `8`> Ops;
2474	Ops.push_back(Elt: MemAddr);
2475	Ops.push_back(Elt: Align);
2476	if (isUpdating) {
2477	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2478	bool IsImmUpdate =
2479	isPerfectIncrement(Inc, VecTy: VT.getVectorElementType(), NumVecs);
2480	Ops.push_back(Elt: IsImmUpdate ? Reg0 : Inc);
2481	}
2482
2483	SDValue SuperReg;
2484	SDValue V0 = N->getOperand(Num: Vec0Idx + `0`);
2485	SDValue V1 = N->getOperand(Num: Vec0Idx + `1`);
2486	if (NumVecs == `2`) {
2487	if (is64BitVector)
2488	SuperReg = SDValue (createDRegPairNode(VT: MVT::v2i64, V0, V1), `0`);
2489	else
2490	SuperReg = SDValue (createQRegPairNode(VT: MVT::v4i64, V0, V1), `0`);
2491	} else {
2492	SDValue V2 = N->getOperand(Num: Vec0Idx + `2`);
2493	SDValue V3 = (NumVecs == `3`)
2494	? SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT), `0`)
2495	: N->getOperand(Num: Vec0Idx + `3`);
2496	if (is64BitVector)
2497	SuperReg = SDValue (createQuadDRegsNode(VT: MVT::v4i64, V0, V1, V2, V3), `0`);
2498	else
2499	SuperReg = SDValue (createQuadQRegsNode(VT: MVT::v8i64, V0, V1, V2, V3), `0`);
2500	}
2501	Ops.push_back(Elt: SuperReg);
2502	Ops.push_back(Elt: getI32Imm(Imm: Lane, dl));
2503	Ops.push_back(Elt: Pred);
2504	Ops.push_back(Elt: Reg0);
2505	Ops.push_back(Elt: Chain);
2506
2507	unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2508	QOpcodes[OpcodeIndex]);
2509	SDNode *VLdLn = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2510	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLdLn), NewMemRefs: {MemOp});
2511	if (!IsLoad) {
2512	ReplaceNode(F: N, T: VLdLn);
2513	return;
2514	}
2515
2516	// Extract the subregisters.
2517	SuperReg = SDValue (VLdLn, `0`);
2518	static_assert(ARM::dsub_7 == ARM::dsub_0 + `7` &&
2519	ARM::qsub_3 == ARM::qsub_0 + `3`,
2520	"Unexpected subreg numbering");
2521	unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2522	for (unsigned Vec = `0`; Vec < NumVecs; ++Vec)
2523	ReplaceUses(F: SDValue (N, Vec),
2524	T: CurDAG->getTargetExtractSubreg(SRIdx: Sub0 + Vec, DL: dl, VT, Operand: SuperReg));
2525	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (VLdLn, `1`));
2526	if (isUpdating)
2527	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (VLdLn, `2`));
2528	CurDAG->RemoveDeadNode(N);
2529	}
2530
2531	template <typename SDValueVector>
2532	void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2533	SDValue PredicateMask) {
2534	Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::Then, DL: Loc, VT: MVT::i32));
2535	Ops.push_back(PredicateMask);
2536	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32)); // tp_reg
2537	}
2538
2539	template <typename SDValueVector>
2540	void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2541	SDValue PredicateMask,
2542	SDValue Inactive) {
2543	Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::Then, DL: Loc, VT: MVT::i32));
2544	Ops.push_back(PredicateMask);
2545	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32)); // tp_reg
2546	Ops.push_back(Inactive);
2547	}
2548
2549	template <typename SDValueVector>
2550	void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2551	Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::None, DL: Loc, VT: MVT::i32));
2552	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
2553	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32)); // tp_reg
2554	}
2555
2556	template <typename SDValueVector>
2557	void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2558	EVT InactiveTy) {
2559	Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::None, DL: Loc, VT: MVT::i32));
2560	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
2561	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32)); // tp_reg
2562	Ops.push_back(SDValue (
2563	CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: Loc, VT: InactiveTy), `0`));
2564	}
2565
2566	void ARMDAGToDAGISel::SelectMVE_WB(SDNode N, const* uint16_t *Opcodes,
2567	bool Predicated) {
2568	SDLoc Loc(N);
2569	SmallVector<SDValue, `8`> Ops;
2570
2571	uint16_t Opcode;
2572	switch (N->getValueType(ResNo: `1`).getVectorElementType().getSizeInBits()) {
2573	case `32`:
2574	Opcode = Opcodes[`0`];
2575	break;
2576	case `64`:
2577	Opcode = Opcodes[`1`];
2578	break;
2579	default:
2580	llvm_unreachable("bad vector element size in SelectMVE_WB");
2581	}
2582
2583	Ops.push_back(Elt: N->getOperand(Num: `2`)); // vector of base addresses
2584
2585	int32_t ImmValue = N->getConstantOperandVal(Num: `3`);
2586	Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate offset
2587
2588	if (Predicated)
2589	AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: `4`));
2590	else
2591	AddEmptyMVEPredicateToOps(Ops, Loc);
2592
2593	Ops.push_back(Elt: N->getOperand(Num: `0`)); // chain
2594
2595	SmallVector<EVT, `8`> VTs;
2596	VTs.push_back(Elt: N->getValueType(ResNo: `1`));
2597	VTs.push_back(Elt: N->getValueType(ResNo: `0`));
2598	VTs.push_back(Elt: N->getValueType(ResNo: `2`));
2599
2600	SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc (N), ResultTys: VTs, Ops);
2601	ReplaceUses(F: SDValue (N, `0`), T: SDValue (New, `1`));
2602	ReplaceUses(F: SDValue (N, `1`), T: SDValue (New, `0`));
2603	ReplaceUses(F: SDValue (N, `2`), T: SDValue (New, `2`));
2604	transferMemOperands(N, Result: New);
2605	CurDAG->RemoveDeadNode(N);
2606	}
2607
2608	void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2609	bool Immediate,
2610	bool HasSaturationOperand) {
2611	SDLoc Loc(N);
2612	SmallVector<SDValue, `8`> Ops;
2613
2614	// Two 32-bit halves of the value to be shifted
2615	Ops.push_back(Elt: N->getOperand(Num: `1`));
2616	Ops.push_back(Elt: N->getOperand(Num: `2`));
2617
2618	// The shift count
2619	if (Immediate) {
2620	int32_t ImmValue = N->getConstantOperandVal(Num: `3`);
2621	Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate shift count
2622	} else {
2623	Ops.push_back(Elt: N->getOperand(Num: `3`));
2624	}
2625
2626	// The immediate saturation operand, if any
2627	if (HasSaturationOperand) {
2628	int32_t SatOp = N->getConstantOperandVal(Num: `4`);
2629	int SatBit = (SatOp == `64` ? `0` : `1`);
2630	Ops.push_back(Elt: getI32Imm(Imm: SatBit, dl: Loc));
2631	}
2632
2633	// MVE scalar shifts are IT-predicable, so include the standard
2634	// predicate arguments.
2635	Ops.push_back(Elt: getAL(CurDAG, dl: Loc));
2636	Ops.push_back(Elt: CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
2637
2638	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2639	}
2640
2641	void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2642	uint16_t OpcodeWithNoCarry,
2643	bool Add, bool Predicated) {
2644	SDLoc Loc(N);
2645	SmallVector<SDValue, `8`> Ops;
2646	uint16_t Opcode;
2647
2648	unsigned FirstInputOp = Predicated ? `2` : `1`;
2649
2650	// Two input vectors and the input carry flag
2651	Ops.push_back(Elt: N->getOperand(Num: FirstInputOp));
2652	Ops.push_back(Elt: N->getOperand(Num: FirstInputOp + `1`));
2653	SDValue CarryIn = N->getOperand(Num: FirstInputOp + `2`);
2654	ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(Val&: CarryIn);
2655	uint32_t CarryMask = `1` << `29`;
2656	uint32_t CarryExpected = Add ? `0` : CarryMask;
2657	if (CarryInConstant &&
2658	(CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2659	Opcode = OpcodeWithNoCarry;
2660	} else {
2661	Ops.push_back(Elt: CarryIn);
2662	Opcode = OpcodeWithCarry;
2663	}
2664
2665	if (Predicated)
2666	AddMVEPredicateToOps(Ops, Loc,
2667	PredicateMask: N->getOperand(Num: FirstInputOp + `3`), // predicate
2668	Inactive: N->getOperand(Num: FirstInputOp - `1`)); // inactive
2669	else
2670	AddEmptyMVEPredicateToOps(Ops, Loc, InactiveTy: N->getValueType(ResNo: `0`));
2671
2672	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2673	}
2674
2675	void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode N, bool* Predicated) {
2676	SDLoc Loc(N);
2677	SmallVector<SDValue, `8`> Ops;
2678
2679	// One vector input, followed by a 32-bit word of bits to shift in
2680	// and then an immediate shift count
2681	Ops.push_back(Elt: N->getOperand(Num: `1`));
2682	Ops.push_back(Elt: N->getOperand(Num: `2`));
2683	int32_t ImmValue = N->getConstantOperandVal(Num: `3`);
2684	Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate shift count
2685
2686	if (Predicated)
2687	AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: `4`));
2688	else
2689	AddEmptyMVEPredicateToOps(Ops, Loc);
2690
2691	CurDAG->SelectNodeTo(N, MachineOpc: ARM::MVE_VSHLC, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2692	}
2693
2694	static bool SDValueToConstBool(SDValue SDVal) {
2695	assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2696	ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(Val&: SDVal);
2697	uint64_t Value = SDValConstant->getZExtValue();
2698	assert((Value == `0` \|\| Value == `1`) && "expected value 0 or 1");
2699	return Value;
2700	}
2701
2702	void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode N, bool* Predicated,
2703	const uint16_t *OpcodesS,
2704	const uint16_t *OpcodesU,
2705	size_t Stride, size_t TySize) {
2706	assert(TySize < Stride && "Invalid TySize");
2707	bool IsUnsigned = SDValueToConstBool(SDVal: N->getOperand(Num: `1`));
2708	bool IsSub = SDValueToConstBool(SDVal: N->getOperand(Num: `2`));
2709	bool IsExchange = SDValueToConstBool(SDVal: N->getOperand(Num: `3`));
2710	if (IsUnsigned) {
2711	assert(!IsSub &&
2712	"Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2713	assert(!IsExchange &&
2714	"Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2715	}
2716
2717	auto OpIsZero = [N](size_t OpNo) {
2718	return isNullConstant(V: N->getOperand(Num: OpNo));
2719	};
2720
2721	// If the input accumulator value is not zero, select an instruction with
2722	// accumulator, otherwise select an instruction without accumulator
2723	bool IsAccum = !(OpIsZero (`4`) && OpIsZero (`5`));
2724
2725	const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2726	if (IsSub)
2727	Opcodes += `4` * Stride;
2728	if (IsExchange)
2729	Opcodes += `2` * Stride;
2730	if (IsAccum)
2731	Opcodes += Stride;
2732	uint16_t Opcode = Opcodes[TySize];
2733
2734	SDLoc Loc(N);
2735	SmallVector<SDValue, `8`> Ops;
2736	// Push the accumulator operands, if they are used
2737	if (IsAccum) {
2738	Ops.push_back(Elt: N->getOperand(Num: `4`));
2739	Ops.push_back(Elt: N->getOperand(Num: `5`));
2740	}
2741	// Push the two vector operands
2742	Ops.push_back(Elt: N->getOperand(Num: `6`));
2743	Ops.push_back(Elt: N->getOperand(Num: `7`));
2744
2745	if (Predicated)
2746	AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: `8`));
2747	else
2748	AddEmptyMVEPredicateToOps(Ops, Loc);
2749
2750	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2751	}
2752
2753	void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode N, bool* Predicated,
2754	const uint16_t *OpcodesS,
2755	const uint16_t *OpcodesU) {
2756	EVT VecTy = N->getOperand(Num: `6`).getValueType();
2757	size_t SizeIndex;
2758	switch (VecTy.getVectorElementType().getSizeInBits()) {
2759	case `16`:
2760	SizeIndex = `0`;
2761	break;
2762	case `32`:
2763	SizeIndex = `1`;
2764	break;
2765	default:
2766	llvm_unreachable("bad vector element size");
2767	}
2768
2769	SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, Stride: `2`, TySize: SizeIndex);
2770	}
2771
2772	void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode N, bool* Predicated,
2773	const uint16_t *OpcodesS,
2774	const uint16_t *OpcodesU) {
2775	assert(
2776	N->getOperand(`6`).getValueType().getVectorElementType().getSizeInBits() ==
2777	`32` &&
2778	"bad vector element size");
2779	SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, Stride: `1`, TySize: `0`);
2780	}
2781
2782	void ARMDAGToDAGISel::SelectMVE_VLD(SDNode N, unsigned* NumVecs,
2783	const uint16_t *const *Opcodes,
2784	bool HasWriteback) {
2785	EVT VT = N->getValueType(ResNo: `0`);
2786	SDLoc Loc(N);
2787
2788	const uint16_t *OurOpcodes;
2789	switch (VT.getVectorElementType().getSizeInBits()) {
2790	case `8`:
2791	OurOpcodes = Opcodes[`0`];
2792	break;
2793	case `16`:
2794	OurOpcodes = Opcodes[`1`];
2795	break;
2796	case `32`:
2797	OurOpcodes = Opcodes[`2`];
2798	break;
2799	default:
2800	llvm_unreachable("bad vector element size in SelectMVE_VLD");
2801	}
2802
2803	EVT DataTy = EVT::getVectorVT(Context&: CurDAG->getContext(), VT: MVT::i64, NumElements: NumVecs `2`);
2804	SmallVector<EVT, `4`> ResultTys = {DataTy, MVT::Other};
2805	unsigned PtrOperand = HasWriteback ? `1` : `2`;
2806
2807	auto Data = SDValue (
2808	CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: Loc, VT: DataTy), `0`);
2809	SDValue Chain = N->getOperand(Num: `0`);
2810	// Add a MVE_VLDn instruction for each Vec, except the last
2811	for (unsigned Stage = `0`; Stage < NumVecs - `1`; ++Stage) {
2812	SDValue Ops[] = {Data, N->getOperand(Num: PtrOperand), Chain};
2813	auto LoadInst =
2814	CurDAG->getMachineNode(Opcode: OurOpcodes[Stage], dl: Loc, ResultTys, Ops);
2815	Data = SDValue (LoadInst, `0`);
2816	Chain = SDValue (LoadInst, `1`);
2817	transferMemOperands(N, Result: LoadInst);
2818	}
2819	// The last may need a writeback on it
2820	if (HasWriteback)
2821	ResultTys = {DataTy, MVT::i32, MVT::Other};
2822	SDValue Ops[] = {Data, N->getOperand(Num: PtrOperand), Chain};
2823	auto LoadInst =
2824	CurDAG->getMachineNode(Opcode: OurOpcodes[NumVecs - `1`], dl: Loc, ResultTys, Ops);
2825	transferMemOperands(N, Result: LoadInst);
2826
2827	unsigned i;
2828	for (i = `0`; i < NumVecs; i++)
2829	ReplaceUses(F: SDValue (N, i),
2830	T: CurDAG->getTargetExtractSubreg(SRIdx: ARM::qsub_0 + i, DL: Loc, VT,
2831	Operand: SDValue (LoadInst, `0`)));
2832	if (HasWriteback)
2833	ReplaceUses(F: SDValue (N, i++), T: SDValue (LoadInst, `1`));
2834	ReplaceUses(F: SDValue (N, i), T: SDValue (LoadInst, HasWriteback ? `2` : `1`));
2835	CurDAG->RemoveDeadNode(N);
2836	}
2837
2838	void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode N, const* uint16_t *Opcodes,
2839	bool Wrapping, bool Predicated) {
2840	EVT VT = N->getValueType(ResNo: `0`);
2841	SDLoc Loc(N);
2842
2843	uint16_t Opcode;
2844	switch (VT.getScalarSizeInBits()) {
2845	case `8`:
2846	Opcode = Opcodes[`0`];
2847	break;
2848	case `16`:
2849	Opcode = Opcodes[`1`];
2850	break;
2851	case `32`:
2852	Opcode = Opcodes[`2`];
2853	break;
2854	default:
2855	llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2856	}
2857
2858	SmallVector<SDValue, `8`> Ops;
2859	unsigned OpIdx = `1`;
2860
2861	SDValue Inactive;
2862	if (Predicated)
2863	Inactive = N->getOperand(Num: OpIdx++);
2864
2865	Ops.push_back(Elt: N->getOperand(Num: OpIdx++)); // base
2866	if (Wrapping)
2867	Ops.push_back(Elt: N->getOperand(Num: OpIdx++)); // limit
2868
2869	SDValue ImmOp = N->getOperand(Num: OpIdx++); // step
2870	int ImmValue = ImmOp ->getAsZExtVal();
2871	Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc));
2872
2873	if (Predicated)
2874	AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: OpIdx), Inactive);
2875	else
2876	AddEmptyMVEPredicateToOps(Ops, Loc, InactiveTy: N->getValueType(ResNo: `0`));
2877
2878	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2879	}
2880
2881	void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2882	size_t NumExtraOps, bool HasAccum) {
2883	bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2884	SDLoc Loc(N);
2885	SmallVector<SDValue, `8`> Ops;
2886
2887	unsigned OpIdx = `1`;
2888
2889	// Convert and append the immediate operand designating the coprocessor.
2890	SDValue ImmCorpoc = N->getOperand(Num: OpIdx++);
2891	uint32_t ImmCoprocVal = ImmCorpoc ->getAsZExtVal();
2892	Ops.push_back(Elt: getI32Imm(Imm: ImmCoprocVal, dl: Loc));
2893
2894	// For accumulating variants copy the low and high order parts of the
2895	// accumulator into a register pair and add it to the operand vector.
2896	if (HasAccum) {
2897	SDValue AccLo = N->getOperand(Num: OpIdx++);
2898	SDValue AccHi = N->getOperand(Num: OpIdx++);
2899	if (IsBigEndian)
2900	std::swap(a&: AccLo, b&: AccHi);
2901	Ops.push_back(Elt: SDValue (createGPRPairNode(VT: MVT::Untyped, V0: AccLo, V1: AccHi), `0`));
2902	}
2903
2904	// Copy extra operands as-is.
2905	for (size_t I = `0`; I < NumExtraOps; I++)
2906	Ops.push_back(Elt: N->getOperand(Num: OpIdx++));
2907
2908	// Convert and append the immediate operand
2909	SDValue Imm = N->getOperand(Num: OpIdx);
2910	uint32_t ImmVal = Imm ->getAsZExtVal();
2911	Ops.push_back(Elt: getI32Imm(Imm: ImmVal, dl: Loc));
2912
2913	// Accumulating variants are IT-predicable, add predicate operands.
2914	if (HasAccum) {
2915	SDValue Pred = getAL(CurDAG, dl: Loc);
2916	SDValue PredReg = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
2917	Ops.push_back(Elt: Pred);
2918	Ops.push_back(Elt: PredReg);
2919	}
2920
2921	// Create the CDE intruction
2922	SDNode *InstrNode = CurDAG->getMachineNode(Opcode, dl: Loc, VT: MVT::Untyped, Ops);
2923	SDValue ResultPair = SDValue (InstrNode, `0`);
2924
2925	// The original intrinsic had two outputs, and the output of the dual-register
2926	// CDE instruction is a register pair. We need to extract the two subregisters
2927	// and replace all uses of the original outputs with the extracted
2928	// subregisters.
2929	uint16_t SubRegs[`2`] = {ARM::gsub_0, ARM::gsub_1};
2930	if (IsBigEndian)
2931	std::swap(a&: SubRegs[`0`], b&: SubRegs[`1`]);
2932
2933	for (size_t ResIdx = `0`; ResIdx < `2`; ResIdx++) {
2934	if (SDValue (N, ResIdx).use_empty())
2935	continue;
2936	SDValue SubReg = CurDAG->getTargetExtractSubreg(SRIdx: SubRegs[ResIdx], DL: Loc,
2937	VT: MVT::i32, Operand: ResultPair);
2938	ReplaceUses(F: SDValue (N, ResIdx), T: SubReg);
2939	}
2940
2941	CurDAG->RemoveDeadNode(N);
2942	}
2943
2944	void ARMDAGToDAGISel::SelectVLDDup(SDNode N, bool* IsIntrinsic,
2945	bool isUpdating, unsigned NumVecs,
2946	const uint16_t *DOpcodes,
2947	const uint16_t *QOpcodes0,
2948	const uint16_t *QOpcodes1) {
2949	assert(Subtarget->hasNEON());
2950	assert(NumVecs >= `1` && NumVecs <= `4` && "VLDDup NumVecs out-of-range");
2951	SDLoc dl(N);
2952
2953	SDValue MemAddr, Align;
2954	unsigned AddrOpIdx = IsIntrinsic ? `2` : `1`;
2955	if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2956	return;
2957
2958	SDValue Chain = N->getOperand(Num: `0`);
2959	EVT VT = N->getValueType(ResNo: `0`);
2960	bool is64BitVector = VT.is64BitVector();
2961
2962	unsigned Alignment = `0`;
2963	if (NumVecs != `3`) {
2964	Alignment = Align ->getAsZExtVal();
2965	unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / `8`;
2966	if (Alignment > NumBytes)
2967	Alignment = NumBytes;
2968	if (Alignment < `8` && Alignment < NumBytes)
2969	Alignment = `0`;
2970	// Alignment must be a power of two; make sure of that.
2971	Alignment = (Alignment & -Alignment);
2972	if (Alignment == `1`)
2973	Alignment = `0`;
2974	}
2975	Align = CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
2976
2977	unsigned OpcodeIndex;
2978	switch (VT.getSimpleVT().SimpleTy) {
2979	default: llvm_unreachable("unhandled vld-dup type");
2980	case MVT::v8i8:
2981	case MVT::v16i8: OpcodeIndex = `0`; break;
2982	case MVT::v4i16:
2983	case MVT::v8i16:
2984	case MVT::v4f16:
2985	case MVT::v8f16:
2986	case MVT::v4bf16:
2987	case MVT::v8bf16:
2988	OpcodeIndex = `1`; break;
2989	case MVT::v2f32:
2990	case MVT::v2i32:
2991	case MVT::v4f32:
2992	case MVT::v4i32: OpcodeIndex = `2`; break;
2993	case MVT::v1f64:
2994	case MVT::v1i64: OpcodeIndex = `3`; break;
2995	}
2996
2997	unsigned ResTyElts = (NumVecs == `3`) ? `4` : NumVecs;
2998	if (!is64BitVector)
2999	ResTyElts *= `2`;
3000	EVT ResTy = EVT::getVectorVT(Context&: *CurDAG->getContext(), VT: MVT::i64, NumElements: ResTyElts);
3001
3002	std::vector<EVT> ResTys;
3003	ResTys.push_back(x: ResTy);
3004	if (isUpdating)
3005	ResTys.push_back(x: MVT::i32);
3006	ResTys.push_back(x: MVT::Other);
3007
3008	SDValue Pred = getAL(CurDAG, dl);
3009	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3010
3011	SmallVector<SDValue, `6`> Ops;
3012	Ops.push_back(Elt: MemAddr);
3013	Ops.push_back(Elt: Align);
3014	unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3015	: (NumVecs == `1`) ? QOpcodes0[OpcodeIndex]
3016	: QOpcodes1[OpcodeIndex];
3017	if (isUpdating) {
3018	SDValue Inc = N->getOperand(Num: `2`);
3019	bool IsImmUpdate =
3020	isPerfectIncrement(Inc, VecTy: VT.getVectorElementType(), NumVecs);
3021	if (IsImmUpdate) {
3022	if (!isVLDfixed(Opc))
3023	Ops.push_back(Elt: Reg0);
3024	} else {
3025	if (isVLDfixed(Opc))
3026	Opc = getVLDSTRegisterUpdateOpcode(Opc);
3027	Ops.push_back(Elt: Inc);
3028	}
3029	}
3030	if (is64BitVector \|\| NumVecs == `1`) {
3031	// Double registers and VLD1 quad registers are directly supported.
3032	} else {
3033	SDValue ImplDef = SDValue (
3034	CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: ResTy), `0`);
3035	const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3036	SDNode *VLdA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl, VT1: ResTy,
3037	VT2: MVT::Other, Ops: OpsA);
3038	Ops.push_back(Elt: SDValue (VLdA, `0`));
3039	Chain = SDValue (VLdA, `1`);
3040	}
3041
3042	Ops.push_back(Elt: Pred);
3043	Ops.push_back(Elt: Reg0);
3044	Ops.push_back(Elt: Chain);
3045
3046	SDNode *VLdDup = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
3047
3048	// Transfer memoperands.
3049	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
3050	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLdDup), NewMemRefs: {MemOp});
3051
3052	// Extract the subregisters.
3053	if (NumVecs == `1`) {
3054	ReplaceUses(F: SDValue (N, `0`), T: SDValue (VLdDup, `0`));
3055	} else {
3056	SDValue SuperReg = SDValue (VLdDup, `0`);
3057	static_assert(ARM::dsub_7 == ARM::dsub_0 + `7`, "Unexpected subreg numbering");
3058	unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3059	for (unsigned Vec = `0`; Vec != NumVecs; ++Vec) {
3060	ReplaceUses(F: SDValue (N, Vec),
3061	T: CurDAG->getTargetExtractSubreg(SRIdx: SubIdx+Vec, DL: dl, VT, Operand: SuperReg));
3062	}
3063	}
3064	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (VLdDup, `1`));
3065	if (isUpdating)
3066	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (VLdDup, `2`));
3067	CurDAG->RemoveDeadNode(N);
3068	}
3069
3070	bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3071	if (!Subtarget->hasMVEIntegerOps())
3072	return false;
3073
3074	SDLoc dl(N);
3075
3076	// We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3077	// extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3078	// inserts of the correct type:
3079	SDValue Ins1 = SDValue (N, `0`);
3080	SDValue Ins2 = N->getOperand(Num: `0`);
3081	EVT VT = Ins1.getValueType();
3082	if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT \|\| !Ins2.hasOneUse() \|\|
3083	!isa<ConstantSDNode>(Val: Ins1.getOperand(i: `2`)) \|\|
3084	!isa<ConstantSDNode>(Val: Ins2.getOperand(i: `2`)) \|\|
3085	(VT != MVT::v8f16 && VT != MVT::v8i16) \|\| (Ins2.getValueType() != VT))
3086	return false;
3087
3088	unsigned Lane1 = Ins1.getConstantOperandVal(i: `2`);
3089	unsigned Lane2 = Ins2.getConstantOperandVal(i: `2`);
3090	if (Lane2 % `2` != `0` \|\| Lane1 != Lane2 + `1`)
3091	return false;
3092
3093	// If the inserted values will be able to use T/B already, leave it to the
3094	// existing tablegen patterns. For example VCVTT/VCVTB.
3095	SDValue Val1 = Ins1.getOperand(i: `1`);
3096	SDValue Val2 = Ins2.getOperand(i: `1`);
3097	if (Val1.getOpcode() == ISD::FP_ROUND \|\| Val2.getOpcode() == ISD::FP_ROUND)
3098	return false;
3099
3100	// Check if the inserted values are both extracts.
3101	if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT \|\|
3102	Val1.getOpcode() == ARMISD::VGETLANEu) &&
3103	(Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT \|\|
3104	Val2.getOpcode() == ARMISD::VGETLANEu) &&
3105	isa<ConstantSDNode>(Val: Val1.getOperand(i: `1`)) &&
3106	isa<ConstantSDNode>(Val: Val2.getOperand(i: `1`)) &&
3107	(Val1.getOperand(i: `0`).getValueType() == MVT::v8f16 \|\|
3108	Val1.getOperand(i: `0`).getValueType() == MVT::v8i16) &&
3109	(Val2.getOperand(i: `0`).getValueType() == MVT::v8f16 \|\|
3110	Val2.getOperand(i: `0`).getValueType() == MVT::v8i16)) {
3111	unsigned ExtractLane1 = Val1.getConstantOperandVal(i: `1`);
3112	unsigned ExtractLane2 = Val2.getConstantOperandVal(i: `1`);
3113
3114	// If the two extracted lanes are from the same place and adjacent, this
3115	// simplifies into a f32 lane move.
3116	if (Val1.getOperand(i: `0`) == Val2.getOperand(i: `0`) && ExtractLane2 % `2` == `0` &&
3117	ExtractLane1 == ExtractLane2 + `1`) {
3118	SDValue NewExt = CurDAG->getTargetExtractSubreg(
3119	SRIdx: ARM::ssub_0 + ExtractLane2 / `2`, DL: dl, VT: MVT::f32, Operand: Val1.getOperand(i: `0`));
3120	SDValue NewIns = CurDAG->getTargetInsertSubreg(
3121	SRIdx: ARM::ssub_0 + Lane2 / `2`, DL: dl, VT, Operand: Ins2.getOperand(i: `0`),
3122	Subreg: NewExt);
3123	ReplaceUses(F: Ins1, T: NewIns);
3124	return true;
3125	}
3126
3127	// Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3128	// extracting odd lanes.
3129	if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3130	SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3131	SRIdx: ARM::ssub_0 + ExtractLane1 / `2`, DL: dl, VT: MVT::f32, Operand: Val1.getOperand(i: `0`));
3132	SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3133	SRIdx: ARM::ssub_0 + ExtractLane2 / `2`, DL: dl, VT: MVT::f32, Operand: Val2.getOperand(i: `0`));
3134	if (ExtractLane1 % `2` != `0`)
3135	Inp1 = SDValue (CurDAG->getMachineNode(Opcode: ARM::VMOVH, dl, VT: MVT::f32, Op1: Inp1), `0`);
3136	if (ExtractLane2 % `2` != `0`)
3137	Inp2 = SDValue (CurDAG->getMachineNode(Opcode: ARM::VMOVH, dl, VT: MVT::f32, Op1: Inp2), `0`);
3138	SDNode *VINS = CurDAG->getMachineNode(Opcode: ARM::VINSH, dl, VT: MVT::f32, Op1: Inp2, Op2: Inp1);
3139	SDValue NewIns =
3140	CurDAG->getTargetInsertSubreg(SRIdx: ARM::ssub_0 + Lane2 / `2`, DL: dl, VT: MVT::v4f32,
3141	Operand: Ins2.getOperand(i: `0`), Subreg: SDValue (VINS, `0`));
3142	ReplaceUses(F: Ins1, T: NewIns);
3143	return true;
3144	}
3145	}
3146
3147	// The inserted values are not extracted - if they are f16 then insert them
3148	// directly using a VINS.
3149	if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3150	SDNode *VINS = CurDAG->getMachineNode(Opcode: ARM::VINSH, dl, VT: MVT::f32, Op1: Val2, Op2: Val1);
3151	SDValue NewIns =
3152	CurDAG->getTargetInsertSubreg(SRIdx: ARM::ssub_0 + Lane2 / `2`, DL: dl, VT: MVT::v4f32,
3153	Operand: Ins2.getOperand(i: `0`), Subreg: SDValue (VINS, `0`));
3154	ReplaceUses(F: Ins1, T: NewIns);
3155	return true;
3156	}
3157
3158	return false;
3159	}
3160
3161	bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3162	SDNode *FMul,
3163	bool IsUnsigned,
3164	bool FixedToFloat) {
3165	auto Type = N->getValueType(ResNo: `0`);
3166	unsigned ScalarBits = Type.getScalarSizeInBits();
3167	if (ScalarBits > `32`)
3168	return false;
3169
3170	SDNodeFlags FMulFlags = FMul->getFlags();
3171	// The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3172	// allowed in 16 bit unsigned floats
3173	if (ScalarBits == `16` && !FMulFlags.hasNoInfs() && IsUnsigned)
3174	return false;
3175
3176	SDValue ImmNode = FMul->getOperand(Num: `1`);
3177	SDValue VecVal = FMul->getOperand(Num: `0`);
3178	if (VecVal ->getOpcode() == ISD::UINT_TO_FP \|\|
3179	VecVal ->getOpcode() == ISD::SINT_TO_FP)
3180	VecVal = VecVal ->getOperand(Num: `0`);
3181
3182	if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3183	return false;
3184
3185	if (ImmNode.getOpcode() == ISD::BITCAST) {
3186	if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3187	return false;
3188	ImmNode = ImmNode.getOperand(i: `0`);
3189	}
3190
3191	if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3192	return false;
3193
3194	APFloat ImmAPF(`0.0f`);
3195	switch (ImmNode.getOpcode()) {
3196	case ARMISD::VMOVIMM:
3197	case ARMISD::VDUP: {
3198	if (!isa<ConstantSDNode>(Val: ImmNode.getOperand(i: `0`)))
3199	return false;
3200	unsigned Imm = ImmNode.getConstantOperandVal(i: `0`);
3201	if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3202	Imm = ARM_AM::decodeVMOVModImm(ModImm: Imm, EltBits&: ScalarBits);
3203	ImmAPF =
3204	APFloat (ScalarBits == `32` ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3205	APInt (ScalarBits, Imm));
3206	break;
3207	}
3208	case ARMISD::VMOVFPIMM: {
3209	ImmAPF = APFloat (ARM_AM::getFPImmFloat(Imm: ImmNode.getConstantOperandVal(i: `0`)));
3210	break;
3211	}
3212	default:
3213	return false;
3214	}
3215
3216	// Where n is the number of fractional bits, multiplying by 2^n will convert
3217	// from float to fixed and multiplying by 2^-n will convert from fixed to
3218	// float. Taking log2 of the factor (after taking the inverse in the case of
3219	// float to fixed) will give n.
3220	APFloat ToConvert = ImmAPF;
3221	if (FixedToFloat) {
3222	if (!ImmAPF.getExactInverse(inv: &ToConvert))
3223	return false;
3224	}
3225	APSInt Converted(`64`, false);
3226	bool IsExact;
3227	ToConvert.convertToInteger(Result&: Converted, RM: llvm::RoundingMode::NearestTiesToEven,
3228	IsExact: &IsExact);
3229	if (!IsExact \|\| !Converted.isPowerOf2())
3230	return false;
3231
3232	unsigned FracBits = Converted.logBase2();
3233	if (FracBits > ScalarBits)
3234	return false;
3235
3236	SmallVector<SDValue, `3`> Ops{
3237	VecVal, CurDAG->getConstant(Val: FracBits, DL: SDLoc (N), VT: MVT::i32)};
3238	AddEmptyMVEPredicateToOps(Ops, Loc: SDLoc (N), InactiveTy: Type);
3239
3240	unsigned int Opcode;
3241	switch (ScalarBits) {
3242	case `16`:
3243	if (FixedToFloat)
3244	Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3245	else
3246	Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3247	break;
3248	case `32`:
3249	if (FixedToFloat)
3250	Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3251	else
3252	Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3253	break;
3254	default:
3255	llvm_unreachable("unexpected number of scalar bits");
3256	break;
3257	}
3258
3259	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: SDLoc (N), VT: Type, Ops));
3260	return true;
3261	}
3262
3263	bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3264	// Transform a floating-point to fixed-point conversion to a VCVT
3265	if (!Subtarget->hasMVEFloatOps())
3266	return false;
3267	EVT Type = N->getValueType(ResNo: `0`);
3268	if (!Type.isVector())
3269	return false;
3270	unsigned int ScalarBits = Type.getScalarSizeInBits();
3271
3272	bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT \|\|
3273	N->getOpcode() == ISD::FP_TO_UINT_SAT;
3274	SDNode *Node = N->getOperand(Num: `0`).getNode();
3275
3276	// floating-point to fixed-point with one fractional bit gets turned into an
3277	// FP_TO_[U\|S]INT(FADD (x, x)) rather than an FP_TO_[U\|S]INT(FMUL (x, y))
3278	if (Node->getOpcode() == ISD::FADD) {
3279	if (Node->getOperand(Num: `0`) != Node->getOperand(Num: `1`))
3280	return false;
3281	SDNodeFlags Flags = Node->getFlags();
3282	// The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3283	// allowed in 16 bit unsigned floats
3284	if (ScalarBits == `16` && !Flags.hasNoInfs() && IsUnsigned)
3285	return false;
3286
3287	unsigned Opcode;
3288	switch (ScalarBits) {
3289	case `16`:
3290	Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3291	break;
3292	case `32`:
3293	Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3294	break;
3295	}
3296	SmallVector<SDValue, `3`> Ops{Node->getOperand(Num: `0`),
3297	CurDAG->getConstant(Val: `1`, DL: dl, VT: MVT::i32)};
3298	AddEmptyMVEPredicateToOps(Ops, Loc: dl, InactiveTy: Type);
3299
3300	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl, VT: Type, Ops));
3301	return true;
3302	}
3303
3304	if (Node->getOpcode() != ISD::FMUL)
3305	return false;
3306
3307	return transformFixedFloatingPointConversion(N, FMul: Node, IsUnsigned, FixedToFloat: false);
3308	}
3309
3310	bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3311	// Transform a fixed-point to floating-point conversion to a VCVT
3312	if (!Subtarget->hasMVEFloatOps())
3313	return false;
3314	auto Type = N->getValueType(ResNo: `0`);
3315	if (!Type.isVector())
3316	return false;
3317
3318	auto LHS = N->getOperand(Num: `0`);
3319	if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3320	return false;
3321
3322	return transformFixedFloatingPointConversion(
3323	N, FMul: N, IsUnsigned: LHS.getOpcode() == ISD::UINT_TO_FP, FixedToFloat: true);
3324	}
3325
3326	bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode N, bool* isSigned) {
3327	if (!Subtarget->hasV6T2Ops())
3328	return false;
3329
3330	unsigned Opc = isSigned
3331	? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3332	: (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3333	SDLoc dl(N);
3334
3335	// For unsigned extracts, check for a shift right and mask
3336	unsigned And_imm = `0`;
3337	if (N->getOpcode() == ISD::AND) {
3338	if (isOpcWithIntImmediate(N, Opc: ISD::AND, Imm&: And_imm)) {
3339
3340	// The immediate is a mask of the low bits iff imm & (imm+1) == 0
3341	if (And_imm & (And_imm + `1`))
3342	return false;
3343
3344	unsigned Srl_imm = `0`;
3345	if (isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SRL,
3346	Imm&: Srl_imm)) {
3347	assert(Srl_imm > `0` && Srl_imm < `32` && "bad amount in shift node!");
3348
3349	// Mask off the unnecessary bits of the AND immediate; normally
3350	// DAGCombine will do this, but that might not happen if
3351	// targetShrinkDemandedConstant chooses a different immediate.
3352	And_imm &= -`1U` >> Srl_imm;
3353
3354	// Note: The width operand is encoded as width-1.
3355	unsigned Width = llvm::countr_one(Value: And_imm) - `1`;
3356	unsigned LSB = Srl_imm;
3357
3358	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3359
3360	if ((LSB + Width + `1`) == N->getValueType(ResNo: `0`).getSizeInBits()) {
3361	// It's cheaper to use a right shift to extract the top bits.
3362	if (Subtarget->isThumb()) {
3363	Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3364	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`),
3365	CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3366	getAL(CurDAG, dl), Reg0, Reg0 };
3367	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3368	return true;
3369	}
3370
3371	// ARM models shift instructions as MOVsi with shifter operand.
3372	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: ISD::SRL);
3373	SDValue ShOpc =
3374	CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: LSB), DL: dl,
3375	VT: MVT::i32);
3376	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`), ShOpc,
3377	getAL(CurDAG, dl), Reg0, Reg0 };
3378	CurDAG->SelectNodeTo(N, MachineOpc: ARM::MOVsi, VT: MVT::i32, Ops);
3379	return true;
3380	}
3381
3382	assert(LSB + Width + `1` <= `32` && "Shouldn't create an invalid ubfx");
3383	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`),
3384	CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3385	CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3386	getAL(CurDAG, dl), Reg0 };
3387	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3388	return true;
3389	}
3390	}
3391	return false;
3392	}
3393
3394	// Otherwise, we're looking for a shift of a shift
3395	unsigned Shl_imm = `0`;
3396	if (isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SHL, Imm&: Shl_imm)) {
3397	assert(Shl_imm > `0` && Shl_imm < `32` && "bad amount in shift node!");
3398	unsigned Srl_imm = `0`;
3399	if (isInt32Immediate(N: N->getOperand(Num: `1`), Imm&: Srl_imm)) {
3400	assert(Srl_imm > `0` && Srl_imm < `32` && "bad amount in shift node!");
3401	// Note: The width operand is encoded as width-1.
3402	unsigned Width = `32` - Srl_imm - `1`;
3403	int LSB = Srl_imm - Shl_imm;
3404	if (LSB < `0`)
3405	return false;
3406	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3407	assert(LSB + Width + `1` <= `32` && "Shouldn't create an invalid ubfx");
3408	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`),
3409	CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3410	CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3411	getAL(CurDAG, dl), Reg0 };
3412	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3413	return true;
3414	}
3415	}
3416
3417	// Or we are looking for a shift of an and, with a mask operand
3418	if (isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::AND, Imm&: And_imm) &&
3419	isShiftedMask_32(Value: And_imm)) {
3420	unsigned Srl_imm = `0`;
3421	unsigned LSB = llvm::countr_zero(Val: And_imm);
3422	// Shift must be the same as the ands lsb
3423	if (isInt32Immediate(N: N->getOperand(Num: `1`), Imm&: Srl_imm) && Srl_imm == LSB) {
3424	assert(Srl_imm > `0` && Srl_imm < `32` && "bad amount in shift node!");
3425	unsigned MSB = llvm::Log2_32(Value: And_imm);
3426	// Note: The width operand is encoded as width-1.
3427	unsigned Width = MSB - LSB;
3428	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3429	assert(Srl_imm + Width + `1` <= `32` && "Shouldn't create an invalid ubfx");
3430	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`),
3431	CurDAG->getTargetConstant(Val: Srl_imm, DL: dl, VT: MVT::i32),
3432	CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3433	getAL(CurDAG, dl), Reg0 };
3434	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3435	return true;
3436	}
3437	}
3438
3439	if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3440	unsigned Width = cast<VTSDNode>(Val: N->getOperand(Num: `1`))->getVT().getSizeInBits();
3441	unsigned LSB = `0`;
3442	if (!isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SRL, Imm&: LSB) &&
3443	!isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SRA, Imm&: LSB))
3444	return false;
3445
3446	if (LSB + Width > `32`)
3447	return false;
3448
3449	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3450	assert(LSB + Width <= `32` && "Shouldn't create an invalid ubfx");
3451	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`),
3452	CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3453	CurDAG->getTargetConstant(Val: Width - `1`, DL: dl, VT: MVT::i32),
3454	getAL(CurDAG, dl), Reg0 };
3455	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3456	return true;
3457	}
3458
3459	return false;
3460	}
3461
3462	/// Target-specific DAG combining for ISD::SUB.
3463	/// Target-independent combining lowers SELECT_CC nodes of the form
3464	/// select_cc setg[ge] X, 0, X, -X
3465	/// select_cc setgt X, -1, X, -X
3466	/// select_cc setl[te] X, 0, -X, X
3467	/// select_cc setlt X, 1, -X, X
3468	/// which represent Integer ABS into:
3469	/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
3470	/// ARM instruction selection detects the latter and matches it to
3471	/// ARM::ABS or ARM::t2ABS machine node.
3472	bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3473	SDValue SUBSrc0 = N->getOperand(Num: `0`);
3474	SDValue SUBSrc1 = N->getOperand(Num: `1`);
3475	EVT VT = N->getValueType(ResNo: `0`);
3476
3477	if (Subtarget->isThumb1Only())
3478	return false;
3479
3480	if (SUBSrc0.getOpcode() != ISD::XOR \|\| SUBSrc1.getOpcode() != ISD::SRA)
3481	return false;
3482
3483	SDValue XORSrc0 = SUBSrc0.getOperand(i: `0`);
3484	SDValue XORSrc1 = SUBSrc0.getOperand(i: `1`);
3485	SDValue SRASrc0 = SUBSrc1.getOperand(i: `0`);
3486	SDValue SRASrc1 = SUBSrc1.getOperand(i: `1`);
3487	ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(Val&: SRASrc1);
3488	EVT XType = SRASrc0.getValueType();
3489	unsigned Size = XType.getSizeInBits() - `1`;
3490
3491	if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
3492	SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
3493	unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3494	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VT, Op1: XORSrc0);
3495	return true;
3496	}
3497
3498	return false;
3499	}
3500
3501	/// We've got special pseudo-instructions for these
3502	void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3503	unsigned Opcode;
3504	EVT MemTy = cast<MemSDNode>(Val: N)->getMemoryVT();
3505	if (MemTy == MVT::i8)
3506	Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3507	else if (MemTy == MVT::i16)
3508	Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3509	else if (MemTy == MVT::i32)
3510	Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3511	else
3512	llvm_unreachable("Unknown AtomicCmpSwap type");
3513
3514	SDValue Ops[] = {N->getOperand(Num: `1`), N->getOperand(Num: `2`), N->getOperand(Num: `3`),
3515	N->getOperand(Num: `0`)};
3516	SDNode *CmpSwap = CurDAG->getMachineNode(
3517	Opcode, dl: SDLoc (N),
3518	VTs: CurDAG->getVTList(VT1: MVT::i32, VT2: MVT::i32, VT3: MVT::Other), Ops);
3519
3520	MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
3521	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
3522
3523	ReplaceUses(F: SDValue (N, `0`), T: SDValue (CmpSwap, `0`));
3524	ReplaceUses(F: SDValue (N, `1`), T: SDValue (CmpSwap, `2`));
3525	CurDAG->RemoveDeadNode(N);
3526	}
3527
3528	static std::optional<std::pair<unsigned, unsigned>>
3529	getContiguousRangeOfSetBits(const APInt &A) {
3530	unsigned FirstOne = A.getBitWidth() - A.countl_zero() - `1`;
3531	unsigned LastOne = A.countr_zero();
3532	if (A.popcount() != (FirstOne - LastOne + `1`))
3533	return std::nullopt;
3534	return std::make_pair(x&: FirstOne, y&: LastOne);
3535	}
3536
3537	void ARMDAGToDAGISel::SelectCMPZ(SDNode N, bool* &SwitchEQNEToPLMI) {
3538	assert(N->getOpcode() == ARMISD::CMPZ);
3539	SwitchEQNEToPLMI = false;
3540
3541	if (!Subtarget->isThumb())
3542	// FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3543	// LSR don't exist as standalone instructions - they need the barrel shifter.
3544	return;
3545
3546	// select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3547	SDValue And = N->getOperand(Num: `0`);
3548	if (!And ->hasOneUse())
3549	return;
3550
3551	SDValue Zero = N->getOperand(Num: `1`);
3552	if (!isNullConstant(V: Zero) \|\| And ->getOpcode() != ISD::AND)
3553	return;
3554	SDValue X = And.getOperand(i: `0`);
3555	auto C = dyn_cast<ConstantSDNode>(Val: And.getOperand(i: `1`));
3556
3557	if (!C)
3558	return;
3559	auto Range = getContiguousRangeOfSetBits(A: C->getAPIntValue());
3560	if (!Range)
3561	return;
3562
3563	// There are several ways to lower this:
3564	SDNode *NewN;
3565	SDLoc dl(N);
3566
3567	auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3568	if (Subtarget->isThumb2()) {
3569	Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3570	SDValue Ops[] = { Src, CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32),
3571	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3572	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3573	return CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
3574	} else {
3575	SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), Src,
3576	CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32),
3577	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
3578	return CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
3579	}
3580	};
3581
3582	if (Range ->second == `0`) {
3583	// 1. Mask includes the LSB -> Simply shift the top N bits off
3584	NewN = EmitShift (ARM::tLSLri, X, `31` - Range ->first);
3585	ReplaceNode(F: And.getNode(), T: NewN);
3586	} else if (Range ->first == `31`) {
3587	// 2. Mask includes the MSB -> Simply shift the bottom N bits off
3588	NewN = EmitShift (ARM::tLSRri, X, Range ->second);
3589	ReplaceNode(F: And.getNode(), T: NewN);
3590	} else if (Range ->first == Range ->second) {
3591	// 3. Only one bit is set. We can shift this into the sign bit and use a
3592	// PL/MI comparison. This is not safe if CMPZ has multiple uses because
3593	// only one of them (the one currently being selected) will be switched
3594	// to use the new condition code.
3595	if (!N->hasOneUse())
3596	return;
3597	NewN = EmitShift (ARM::tLSLri, X, `31` - Range ->first);
3598	ReplaceNode(F: And.getNode(), T: NewN);
3599
3600	SwitchEQNEToPLMI = true;
3601	} else if (!Subtarget->hasV6T2Ops()) {
3602	// 4. Do a double shift to clear bottom and top bits, but only in
3603	// thumb-1 mode as in thumb-2 we can use UBFX.
3604	NewN = EmitShift (ARM::tLSLri, X, `31` - Range ->first);
3605	NewN = EmitShift (ARM::tLSRri, SDValue (NewN, `0`),
3606	Range ->second + (`31` - Range ->first));
3607	ReplaceNode(F: And.getNode(), T: NewN);
3608	}
3609	}
3610
3611	static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[`3`],
3612	unsigned Opc128[`3`]) {
3613	assert((VT.is64BitVector() \|\| VT.is128BitVector()) &&
3614	"Unexpected vector shuffle length");
3615	switch (VT.getScalarSizeInBits()) {
3616	default:
3617	llvm_unreachable("Unexpected vector shuffle element size");
3618	case `8`:
3619	return VT.is64BitVector() ? Opc64[`0`] : Opc128[`0`];
3620	case `16`:
3621	return VT.is64BitVector() ? Opc64[`1`] : Opc128[`1`];
3622	case `32`:
3623	return VT.is64BitVector() ? Opc64[`2`] : Opc128[`2`];
3624	}
3625	}
3626
3627	void ARMDAGToDAGISel::Select(SDNode *N) {
3628	SDLoc dl(N);
3629
3630	if (N->isMachineOpcode()) {
3631	N->setNodeId(-`1`);
3632	return; // Already selected.
3633	}
3634
3635	switch (N->getOpcode()) {
3636	default: break;
3637	case ISD::STORE: {
3638	// For Thumb1, match an sp-relative store in C++. This is a little
3639	// unfortunate, but I don't think I can make the chain check work
3640	// otherwise. (The chain of the store has to be the same as the chain
3641	// of the CopyFromReg, or else we can't replace the CopyFromReg with
3642	// a direct reference to "SP".)
3643	//
3644	// This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3645	// a different addressing mode from other four-byte stores.
3646	//
3647	// This pattern usually comes up with call arguments.
3648	StoreSDNode *ST = cast<StoreSDNode>(Val: N);
3649	SDValue Ptr = ST->getBasePtr();
3650	if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3651	int RHSC = `0`;
3652	if (Ptr.getOpcode() == ISD::ADD &&
3653	isScaledConstantInRange(Node: Ptr.getOperand(i: `1`), /Scale=/`4`, RangeMin: `0`, RangeMax: `256`, ScaledConstant&: RHSC))
3654	Ptr = Ptr.getOperand(i: `0`);
3655
3656	if (Ptr.getOpcode() == ISD::CopyFromReg &&
3657	cast<RegisterSDNode>(Val: Ptr.getOperand(i: `1`))->getReg() == ARM::SP &&
3658	Ptr.getOperand(i: `0`) == ST->getChain()) {
3659	SDValue Ops[] = {ST->getValue(),
3660	CurDAG->getRegister(Reg: ARM::SP, VT: MVT::i32),
3661	CurDAG->getTargetConstant(Val: RHSC, DL: dl, VT: MVT::i32),
3662	getAL(CurDAG, dl),
3663	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3664	ST->getChain()};
3665	MachineSDNode *ResNode =
3666	CurDAG->getMachineNode(Opcode: ARM::tSTRspi, dl, VT: MVT::Other, Ops);
3667	MachineMemOperand *MemOp = ST->getMemOperand();
3668	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: ResNode), NewMemRefs: {MemOp});
3669	ReplaceNode(F: N, T: ResNode);
3670	return;
3671	}
3672	}
3673	break;
3674	}
3675	case ISD::WRITE_REGISTER:
3676	if (tryWriteRegister(N))
3677	return;
3678	break;
3679	case ISD::READ_REGISTER:
3680	if (tryReadRegister(N))
3681	return;
3682	break;
3683	case ISD::INLINEASM:
3684	case ISD::INLINEASM_BR:
3685	if (tryInlineAsm(N))
3686	return;
3687	break;
3688	case ISD::SUB:
3689	// Select special operations if SUB node forms integer ABS pattern
3690	if (tryABSOp(N))
3691	return;
3692	// Other cases are autogenerated.
3693	break;
3694	case ISD::Constant: {
3695	unsigned Val = N->getAsZExtVal();
3696	// If we can't materialize the constant we need to use a literal pool
3697	if (ConstantMaterializationCost(Val, Subtarget) > `2` &&
3698	!Subtarget->genExecuteOnly()) {
3699	SDValue CPIdx = CurDAG->getTargetConstantPool(
3700	C: ConstantInt::get(Ty: Type::getInt32Ty(C&: *CurDAG->getContext()), V: Val),
3701	VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
3702
3703	SDNode *ResNode;
3704	if (Subtarget->isThumb()) {
3705	SDValue Ops[] = {
3706	CPIdx,
3707	getAL(CurDAG, dl),
3708	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3709	CurDAG->getEntryNode()
3710	};
3711	ResNode = CurDAG->getMachineNode(Opcode: ARM::tLDRpci, dl, VT1: MVT::i32, VT2: MVT::Other,
3712	Ops);
3713	} else {
3714	SDValue Ops[] = {
3715	CPIdx,
3716	CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32),
3717	getAL(CurDAG, dl),
3718	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3719	CurDAG->getEntryNode()
3720	};
3721	ResNode = CurDAG->getMachineNode(Opcode: ARM::LDRcp, dl, VT1: MVT::i32, VT2: MVT::Other,
3722	Ops);
3723	}
3724	// Annotate the Node with memory operand information so that MachineInstr
3725	// queries work properly. This e.g. gives the register allocation the
3726	// required information for rematerialization.
3727	MachineFunction& MF = CurDAG->getMachineFunction();
3728	MachineMemOperand *MemOp =
3729	MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF),
3730	F: MachineMemOperand::MOLoad, Size: `4`, BaseAlignment: Align (`4`));
3731
3732	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: ResNode), NewMemRefs: {MemOp});
3733
3734	ReplaceNode(F: N, T: ResNode);
3735	return;
3736	}
3737
3738	// Other cases are autogenerated.
3739	break;
3740	}
3741	case ISD::FrameIndex: {
3742	// Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3743	int FI = cast<FrameIndexSDNode>(Val: N)->getIndex();
3744	SDValue TFI = CurDAG->getTargetFrameIndex(
3745	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
3746	if (Subtarget->isThumb1Only()) {
3747	// Set the alignment of the frame object to 4, to avoid having to generate
3748	// more than one ADD
3749	MachineFrameInfo &MFI = MF->getFrameInfo();
3750	if (MFI.getObjectAlign(ObjectIdx: FI) < Align (`4`))
3751	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align (`4`));
3752	CurDAG->SelectNodeTo(N, MachineOpc: ARM::tADDframe, VT: MVT::i32, Op1: TFI,
3753	Op2: CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32));
3754	return;
3755	} else {
3756	unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3757	ARM::t2ADDri : ARM::ADDri);
3758	SDValue Ops[] = { TFI, CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32),
3759	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3760	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3761	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3762	return;
3763	}
3764	}
3765	case ISD::INSERT_VECTOR_ELT: {
3766	if (tryInsertVectorElt(N))
3767	return;
3768	break;
3769	}
3770	case ISD::SRL:
3771	if (tryV6T2BitfieldExtractOp(N, isSigned: false))
3772	return;
3773	break;
3774	case ISD::SIGN_EXTEND_INREG:
3775	case ISD::SRA:
3776	if (tryV6T2BitfieldExtractOp(N, isSigned: true))
3777	return;
3778	break;
3779	case ISD::FP_TO_UINT:
3780	case ISD::FP_TO_SINT:
3781	case ISD::FP_TO_UINT_SAT:
3782	case ISD::FP_TO_SINT_SAT:
3783	if (tryFP_TO_INT(N, dl))
3784	return;
3785	break;
3786	case ISD::FMUL:
3787	if (tryFMULFixed(N, dl))
3788	return;
3789	break;
3790	case ISD::MUL:
3791	if (Subtarget->isThumb1Only())
3792	break;
3793	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`))) {
3794	unsigned RHSV = C->getZExtValue();
3795	if (!RHSV) break;
3796	if (isPowerOf2_32(Value: RHSV-`1`)) { // 2^n+1?
3797	unsigned ShImm = Log2_32(Value: RHSV-`1`);
3798	if (ShImm >= `32`)
3799	break;
3800	SDValue V = N->getOperand(Num: `0`);
3801	ShImm = ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: ShImm);
3802	SDValue ShImmOp = CurDAG->getTargetConstant(Val: ShImm, DL: dl, VT: MVT::i32);
3803	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3804	if (Subtarget->isThumb()) {
3805	SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3806	CurDAG->SelectNodeTo(N, MachineOpc: ARM::t2ADDrs, VT: MVT::i32, Ops);
3807	return;
3808	} else {
3809	SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3810	Reg0 };
3811	CurDAG->SelectNodeTo(N, MachineOpc: ARM::ADDrsi, VT: MVT::i32, Ops);
3812	return;
3813	}
3814	}
3815	if (isPowerOf2_32(Value: RHSV+`1`)) { // 2^n-1?
3816	unsigned ShImm = Log2_32(Value: RHSV+`1`);
3817	if (ShImm >= `32`)
3818	break;
3819	SDValue V = N->getOperand(Num: `0`);
3820	ShImm = ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: ShImm);
3821	SDValue ShImmOp = CurDAG->getTargetConstant(Val: ShImm, DL: dl, VT: MVT::i32);
3822	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3823	if (Subtarget->isThumb()) {
3824	SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3825	CurDAG->SelectNodeTo(N, MachineOpc: ARM::t2RSBrs, VT: MVT::i32, Ops);
3826	return;
3827	} else {
3828	SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3829	Reg0 };
3830	CurDAG->SelectNodeTo(N, MachineOpc: ARM::RSBrsi, VT: MVT::i32, Ops);
3831	return;
3832	}
3833	}
3834	}
3835	break;
3836	case ISD::AND: {
3837	// Check for unsigned bitfield extract
3838	if (tryV6T2BitfieldExtractOp(N, isSigned: false))
3839	return;
3840
3841	// If an immediate is used in an AND node, it is possible that the immediate
3842	// can be more optimally materialized when negated. If this is the case we
3843	// can negate the immediate and use a BIC instead.
3844	auto *N1C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
3845	if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3846	uint32_t Imm = (uint32_t) N1C->getZExtValue();
3847
3848	// In Thumb2 mode, an AND can take a 12-bit immediate. If this
3849	// immediate can be negated and fit in the immediate operand of
3850	// a t2BIC, don't do any manual transform here as this can be
3851	// handled by the generic ISel machinery.
3852	bool PreferImmediateEncoding =
3853	Subtarget->hasThumb2() && (is_t2_so_imm(Imm) \|\| is_t2_so_imm_not(Imm));
3854	if (!PreferImmediateEncoding &&
3855	ConstantMaterializationCost(Val: Imm, Subtarget) >
3856	ConstantMaterializationCost(Val: ~Imm, Subtarget)) {
3857	// The current immediate costs more to materialize than a negated
3858	// immediate, so negate the immediate and use a BIC.
3859	SDValue NewImm = CurDAG->getConstant(Val: ~Imm, DL: dl, VT: MVT::i32);
3860	// If the new constant didn't exist before, reposition it in the topological
3861	// ordering so it is just before N. Otherwise, don't touch its location.
3862	if (NewImm ->getNodeId() == -`1`)
3863	CurDAG->RepositionNode(Position: N->getIterator(), N: NewImm.getNode());
3864
3865	if (!Subtarget->hasThumb2()) {
3866	SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32),
3867	N->getOperand(Num: `0`), NewImm, getAL(CurDAG, dl),
3868	CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
3869	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: ARM::tBIC, dl, VT: MVT::i32, Ops));
3870	return;
3871	} else {
3872	SDValue Ops[] = {N->getOperand(Num: `0`), NewImm, getAL(CurDAG, dl),
3873	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3874	CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
3875	ReplaceNode(F: N,
3876	T: CurDAG->getMachineNode(Opcode: ARM::t2BICrr, dl, VT: MVT::i32, Ops));
3877	return;
3878	}
3879	}
3880	}
3881
3882	// (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3883	// of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3884	// are entirely contributed by c2 and lower 16-bits are entirely contributed
3885	// by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3886	// Select it to: "movt x, ((c1 & 0xffff) >> 16)
3887	EVT VT = N->getValueType(ResNo: `0`);
3888	if (VT != MVT::i32)
3889	break;
3890	unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3891	? ARM::t2MOVTi16
3892	: (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : `0`);
3893	if (!Opc)
3894	break;
3895	SDValue N0 = N->getOperand(Num: `0`), N1 = N->getOperand(Num: `1`);
3896	N1C = dyn_cast<ConstantSDNode>(Val&: N1);
3897	if (!N1C)
3898	break;
3899	if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3900	SDValue N2 = N0.getOperand(i: `1`);
3901	ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Val&: N2);
3902	if (!N2C)
3903	break;
3904	unsigned N1CVal = N1C->getZExtValue();
3905	unsigned N2CVal = N2C->getZExtValue();
3906	if ((N1CVal & `0xffff0000U`) == (N2CVal & `0xffff0000U`) &&
3907	(N1CVal & `0xffffU`) == `0xffffU` &&
3908	(N2CVal & `0xffffU`) == `0x0U`) {
3909	SDValue Imm16 = CurDAG->getTargetConstant(Val: (N2CVal & `0xFFFF0000U`) >> `16`,
3910	DL: dl, VT: MVT::i32);
3911	SDValue Ops[] = { N0.getOperand(i: `0`), Imm16,
3912	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3913	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT, Ops));
3914	return;
3915	}
3916	}
3917
3918	break;
3919	}
3920	case ARMISD::UMAAL: {
3921	unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3922	SDValue Ops[] = { N->getOperand(Num: `0`), N->getOperand(Num: `1`),
3923	N->getOperand(Num: `2`), N->getOperand(Num: `3`),
3924	getAL(CurDAG, dl),
3925	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3926	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
3927	return;
3928	}
3929	case ARMISD::UMLAL:{
3930	if (Subtarget->isThumb()) {
3931	SDValue Ops[] = { N->getOperand(Num: `0`), N->getOperand(Num: `1`), N->getOperand(Num: `2`),
3932	N->getOperand(Num: `3`), getAL(CurDAG, dl),
3933	CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
3934	ReplaceNode(
3935	F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2UMLAL, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
3936	return;
3937	}else{
3938	SDValue Ops[] = { N->getOperand(Num: `0`), N->getOperand(Num: `1`), N->getOperand(Num: `2`),
3939	N->getOperand(Num: `3`), getAL(CurDAG, dl),
3940	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3941	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3942	ReplaceNode(F: N, T: CurDAG->getMachineNode(
3943	Opcode: Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3944	VT1: MVT::i32, VT2: MVT::i32, Ops));
3945	return;
3946	}
3947	}
3948	case ARMISD::SMLAL:{
3949	if (Subtarget->isThumb()) {
3950	SDValue Ops[] = { N->getOperand(Num: `0`), N->getOperand(Num: `1`), N->getOperand(Num: `2`),
3951	N->getOperand(Num: `3`), getAL(CurDAG, dl),
3952	CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
3953	ReplaceNode(
3954	F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2SMLAL, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
3955	return;
3956	}else{
3957	SDValue Ops[] = { N->getOperand(Num: `0`), N->getOperand(Num: `1`), N->getOperand(Num: `2`),
3958	N->getOperand(Num: `3`), getAL(CurDAG, dl),
3959	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3960	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3961	ReplaceNode(F: N, T: CurDAG->getMachineNode(
3962	Opcode: Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3963	VT1: MVT::i32, VT2: MVT::i32, Ops));
3964	return;
3965	}
3966	}
3967	case ARMISD::SUBE: {
3968	if (!Subtarget->hasV6Ops() \|\| !Subtarget->hasDSP())
3969	break;
3970	// Look for a pattern to match SMMLS
3971	// (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3972	if (N->getOperand(Num: `1`).getOpcode() != ISD::SMUL_LOHI \|\|
3973	N->getOperand(Num: `2`).getOpcode() != ARMISD::SUBC \|\|
3974	!SDValue (N, `1`).use_empty())
3975	break;
3976
3977	if (Subtarget->isThumb())
3978	assert(Subtarget->hasThumb2() &&
3979	"This pattern should not be generated for Thumb");
3980
3981	SDValue SmulLoHi = N->getOperand(Num: `1`);
3982	SDValue Subc = N->getOperand(Num: `2`);
3983	SDValue Zero = Subc.getOperand(i: `0`);
3984
3985	if (!isNullConstant(V: Zero) \|\| Subc.getOperand(i: `1`) != SmulLoHi.getValue(R: `0`) \|\|
3986	N->getOperand(Num: `1`) != SmulLoHi.getValue(R: `1`) \|\|
3987	N->getOperand(Num: `2`) != Subc.getValue(R: `1`))
3988	break;
3989
3990	unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3991	SDValue Ops[] = { SmulLoHi.getOperand(i: `0`), SmulLoHi.getOperand(i: `1`),
3992	N->getOperand(Num: `0`), getAL(CurDAG, dl),
3993	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3994	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops));
3995	return;
3996	}
3997	case ISD::LOAD: {
3998	if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3999	return;
4000	if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
4001	if (tryT2IndexedLoad(N))
4002	return;
4003	} else if (Subtarget->isThumb()) {
4004	if (tryT1IndexedLoad(N))
4005	return;
4006	} else if (tryARMIndexedLoad(N))
4007	return;
4008	// Other cases are autogenerated.
4009	break;
4010	}
4011	case ISD::MLOAD:
4012	if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4013	return;
4014	// Other cases are autogenerated.
4015	break;
4016	case ARMISD::WLSSETUP: {
4017	SDNode *New = CurDAG->getMachineNode(Opcode: ARM::t2WhileLoopSetup, dl, VT: MVT::i32,
4018	Op1: N->getOperand(Num: `0`));
4019	ReplaceUses(F: N, T: New);
4020	CurDAG->RemoveDeadNode(N);
4021	return;
4022	}
4023	case ARMISD::WLS: {
4024	SDNode *New = CurDAG->getMachineNode(Opcode: ARM::t2WhileLoopStart, dl, VT: MVT::Other,
4025	Op1: N->getOperand(Num: `1`), Op2: N->getOperand(Num: `2`),
4026	Op3: N->getOperand(Num: `0`));
4027	ReplaceUses(F: N, T: New);
4028	CurDAG->RemoveDeadNode(N);
4029	return;
4030	}
4031	case ARMISD::LE: {
4032	SDValue Ops[] = { N->getOperand(Num: `1`),
4033	N->getOperand(Num: `2`),
4034	N->getOperand(Num: `0`) };
4035	unsigned Opc = ARM::t2LoopEnd;
4036	SDNode *New = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::Other, Ops);
4037	ReplaceUses(F: N, T: New);
4038	CurDAG->RemoveDeadNode(N);
4039	return;
4040	}
4041	case ARMISD::LDRD: {
4042	if (Subtarget->isThumb2())
4043	break; // TableGen handles isel in this case.
4044	SDValue Base, RegOffset, ImmOffset;
4045	const SDValue &Chain = N->getOperand(Num: `0`);
4046	const SDValue &Addr = N->getOperand(Num: `1`);
4047	SelectAddrMode3(N: Addr, Base, Offset&: RegOffset, Opc&: ImmOffset);
4048	if (RegOffset != CurDAG->getRegister(Reg: `0`, VT: MVT::i32)) {
4049	// The register-offset variant of LDRD mandates that the register
4050	// allocated to RegOffset is not reused in any of the remaining operands.
4051	// This restriction is currently not enforced. Therefore emitting this
4052	// variant is explicitly avoided.
4053	Base = Addr;
4054	RegOffset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
4055	}
4056	SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4057	SDNode *New = CurDAG->getMachineNode(Opcode: ARM::LOADDUAL, dl,
4058	ResultTys: {MVT::Untyped, MVT::Other}, Ops);
4059	SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_0, DL: dl, VT: MVT::i32,
4060	Operand: SDValue (New, `0`));
4061	SDValue Hi = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_1, DL: dl, VT: MVT::i32,
4062	Operand: SDValue (New, `0`));
4063	transferMemOperands(N, Result: New);
4064	ReplaceUses(F: SDValue (N, `0`), T: Lo);
4065	ReplaceUses(F: SDValue (N, `1`), T: Hi);
4066	ReplaceUses(F: SDValue (N, `2`), T: SDValue (New, `1`));
4067	CurDAG->RemoveDeadNode(N);
4068	return;
4069	}
4070	case ARMISD::STRD: {
4071	if (Subtarget->isThumb2())
4072	break; // TableGen handles isel in this case.
4073	SDValue Base, RegOffset, ImmOffset;
4074	const SDValue &Chain = N->getOperand(Num: `0`);
4075	const SDValue &Addr = N->getOperand(Num: `3`);
4076	SelectAddrMode3(N: Addr, Base, Offset&: RegOffset, Opc&: ImmOffset);
4077	if (RegOffset != CurDAG->getRegister(Reg: `0`, VT: MVT::i32)) {
4078	// The register-offset variant of STRD mandates that the register
4079	// allocated to RegOffset is not reused in any of the remaining operands.
4080	// This restriction is currently not enforced. Therefore emitting this
4081	// variant is explicitly avoided.
4082	Base = Addr;
4083	RegOffset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
4084	}
4085	SDNode *RegPair =
4086	createGPRPairNode(VT: MVT::Untyped, V0: N->getOperand(Num: `1`), V1: N->getOperand(Num: `2`));
4087	SDValue Ops[] = {SDValue (RegPair, `0`), Base, RegOffset, ImmOffset, Chain};
4088	SDNode *New = CurDAG->getMachineNode(Opcode: ARM::STOREDUAL, dl, VT: MVT::Other, Ops);
4089	transferMemOperands(N, Result: New);
4090	ReplaceUses(F: SDValue (N, `0`), T: SDValue (New, `0`));
4091	CurDAG->RemoveDeadNode(N);
4092	return;
4093	}
4094	case ARMISD::LOOP_DEC: {
4095	SDValue Ops[] = { N->getOperand(Num: `1`),
4096	N->getOperand(Num: `2`),
4097	N->getOperand(Num: `0`) };
4098	SDNode *Dec =
4099	CurDAG->getMachineNode(Opcode: ARM::t2LoopDec, dl,
4100	VTs: CurDAG->getVTList(VT1: MVT::i32, VT2: MVT::Other), Ops);
4101	ReplaceUses(F: N, T: Dec);
4102	CurDAG->RemoveDeadNode(N);
4103	return;
4104	}
4105	case ARMISD::BRCOND: {
4106	// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4107	// Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4108	// Pattern complexity = 6 cost = 1 size = 0
4109
4110	// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4111	// Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4112	// Pattern complexity = 6 cost = 1 size = 0
4113
4114	// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4115	// Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4116	// Pattern complexity = 6 cost = 1 size = 0
4117
4118	unsigned Opc = Subtarget->isThumb() ?
4119	((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4120	SDValue Chain = N->getOperand(Num: `0`);
4121	SDValue N1 = N->getOperand(Num: `1`);
4122	SDValue N2 = N->getOperand(Num: `2`);
4123	SDValue Flags = N->getOperand(Num: `3`);
4124	assert(N1.getOpcode() == ISD::BasicBlock);
4125	assert(N2.getOpcode() == ISD::Constant);
4126
4127	unsigned CC = (unsigned)N2 ->getAsZExtVal();
4128
4129	if (Flags.getOpcode() == ARMISD::CMPZ) {
4130	if (Flags.getOperand(i: `0`).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4131	SDValue Int = Flags.getOperand(i: `0`);
4132	uint64_t ID = Int ->getConstantOperandVal(Num: `1`);
4133
4134	// Handle low-overhead loops.
4135	if (ID == Intrinsic::loop_decrement_reg) {
4136	SDValue Elements = Int.getOperand(i: `2`);
4137	SDValue Size = CurDAG->getTargetConstant(Val: Int.getConstantOperandVal(i: `3`),
4138	DL: dl, VT: MVT::i32);
4139
4140	SDValue Args[] = { Elements, Size, Int.getOperand(i: `0`) };
4141	SDNode *LoopDec =
4142	CurDAG->getMachineNode(Opcode: ARM::t2LoopDec, dl,
4143	VTs: CurDAG->getVTList(VT1: MVT::i32, VT2: MVT::Other),
4144	Ops: Args);
4145	ReplaceUses(F: Int.getNode(), T: LoopDec);
4146
4147	SDValue EndArgs[] = { SDValue (LoopDec, `0`), N1, Chain };
4148	SDNode *LoopEnd =
4149	CurDAG->getMachineNode(Opcode: ARM::t2LoopEnd, dl, VT: MVT::Other, Ops: EndArgs);
4150
4151	ReplaceUses(F: N, T: LoopEnd);
4152	CurDAG->RemoveDeadNode(N);
4153	CurDAG->RemoveDeadNode(N: Flags.getNode());
4154	CurDAG->RemoveDeadNode(N: Int.getNode());
4155	return;
4156	}
4157	}
4158
4159	bool SwitchEQNEToPLMI;
4160	SelectCMPZ(N: Flags.getNode(), SwitchEQNEToPLMI);
4161	Flags = N->getOperand(Num: `3`);
4162
4163	if (SwitchEQNEToPLMI) {
4164	switch ((ARMCC::CondCodes)CC) {
4165	default: llvm_unreachable("CMPZ must be either NE or EQ!");
4166	case ARMCC::NE:
4167	CC = (unsigned)ARMCC::MI;
4168	break;
4169	case ARMCC::EQ:
4170	CC = (unsigned)ARMCC::PL;
4171	break;
4172	}
4173	}
4174	}
4175
4176	SDValue Tmp2 = CurDAG->getTargetConstant(Val: CC, DL: dl, VT: MVT::i32);
4177	Chain = CurDAG->getCopyToReg(Chain, dl, Reg: ARM::CPSR, N: Flags, Glue: SDValue ());
4178	SDValue Ops[] = {N1, Tmp2, CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), Chain,
4179	Chain.getValue(R: `1`)};
4180	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::Other, Ops);
4181	return;
4182	}
4183
4184	case ARMISD::CMPZ: {
4185	// select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4186	// This allows us to avoid materializing the expensive negative constant.
4187	// The CMPZ #0 is useless and will be peepholed away but we need to keep
4188	// it for its flags output.
4189	SDValue X = N->getOperand(Num: `0`);
4190	auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`).getNode());
4191	if (C && C->getSExtValue() < `0` && Subtarget->isThumb()) {
4192	int64_t Addend = -C->getSExtValue();
4193
4194	SDNode Add = nullptr*;
4195	// ADDS can be better than CMN if the immediate fits in a
4196	// 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4197	// Outside that range we can just use a CMN which is 32-bit but has a
4198	// 12-bit immediate range.
4199	if (Addend < `1`<<`8`) {
4200	if (Subtarget->isThumb2()) {
4201	SDValue Ops[] = { X, CurDAG->getTargetConstant(Val: Addend, DL: dl, VT: MVT::i32),
4202	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
4203	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
4204	Add = CurDAG->getMachineNode(Opcode: ARM::t2ADDri, dl, VT: MVT::i32, Ops);
4205	} else {
4206	unsigned Opc = (Addend < `1`<<`3`) ? ARM::tADDi3 : ARM::tADDi8;
4207	SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), X,
4208	CurDAG->getTargetConstant(Val: Addend, DL: dl, VT: MVT::i32),
4209	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
4210	Add = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
4211	}
4212	}
4213	if (Add) {
4214	SDValue Ops2[] = {SDValue (Add, `0`), CurDAG->getConstant(Val: `0`, DL: dl, VT: MVT::i32)};
4215	CurDAG->MorphNodeTo(N, Opc: ARMISD::CMPZ, VTs: N->getVTList(), Ops: Ops2);
4216	}
4217	}
4218	// Other cases are autogenerated.
4219	break;
4220	}
4221
4222	case ARMISD::CMOV: {
4223	SDValue Flags = N->getOperand(Num: `3`);
4224
4225	if (Flags.getOpcode() == ARMISD::CMPZ) {
4226	bool SwitchEQNEToPLMI;
4227	SelectCMPZ(N: Flags.getNode(), SwitchEQNEToPLMI);
4228
4229	if (SwitchEQNEToPLMI) {
4230	SDValue ARMcc = N->getOperand(Num: `2`);
4231	ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc ->getAsZExtVal();
4232
4233	switch (CC) {
4234	default: llvm_unreachable("CMPZ must be either NE or EQ!");
4235	case ARMCC::NE:
4236	CC = ARMCC::MI;
4237	break;
4238	case ARMCC::EQ:
4239	CC = ARMCC::PL;
4240	break;
4241	}
4242	SDValue NewARMcc = CurDAG->getConstant(Val: (unsigned)CC, DL: dl, VT: MVT::i32);
4243	SDValue Ops[] = {N->getOperand(Num: `0`), N->getOperand(Num: `1`), NewARMcc,
4244	N->getOperand(Num: `3`)};
4245	CurDAG->MorphNodeTo(N, Opc: ARMISD::CMOV, VTs: N->getVTList(), Ops);
4246	}
4247	}
4248	// Other cases are autogenerated.
4249	break;
4250	}
4251	case ARMISD::VZIP: {
4252	EVT VT = N->getValueType(ResNo: `0`);
4253	// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4254	unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4255	unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4256	unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4257	SDValue Pred = getAL(CurDAG, dl);
4258	SDValue PredReg = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
4259	SDValue Ops[] = {N->getOperand(Num: `0`), N->getOperand(Num: `1`), Pred, PredReg};
4260	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4261	return;
4262	}
4263	case ARMISD::VUZP: {
4264	EVT VT = N->getValueType(ResNo: `0`);
4265	// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4266	unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4267	unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4268	unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4269	SDValue Pred = getAL(CurDAG, dl);
4270	SDValue PredReg = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
4271	SDValue Ops[] = {N->getOperand(Num: `0`), N->getOperand(Num: `1`), Pred, PredReg};
4272	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4273	return;
4274	}
4275	case ARMISD::VTRN: {
4276	EVT VT = N->getValueType(ResNo: `0`);
4277	unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4278	unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4279	unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4280	SDValue Pred = getAL(CurDAG, dl);
4281	SDValue PredReg = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
4282	SDValue Ops[] = {N->getOperand(Num: `0`), N->getOperand(Num: `1`), Pred, PredReg};
4283	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4284	return;
4285	}
4286	case ARMISD::BUILD_VECTOR: {
4287	EVT VecVT = N->getValueType(ResNo: `0`);
4288	EVT EltVT = VecVT.getVectorElementType();
4289	unsigned NumElts = VecVT.getVectorNumElements();
4290	if (EltVT == MVT::f64) {
4291	assert(NumElts == `2` && "unexpected type for BUILD_VECTOR");
4292	ReplaceNode(
4293	F: N, T: createDRegPairNode(VT: VecVT, V0: N->getOperand(Num: `0`), V1: N->getOperand(Num: `1`)));
4294	return;
4295	}
4296	assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4297	if (NumElts == `2`) {
4298	ReplaceNode(
4299	F: N, T: createSRegPairNode(VT: VecVT, V0: N->getOperand(Num: `0`), V1: N->getOperand(Num: `1`)));
4300	return;
4301	}
4302	assert(NumElts == `4` && "unexpected type for BUILD_VECTOR");
4303	ReplaceNode(F: N,
4304	T: createQuadSRegsNode(VT: VecVT, V0: N->getOperand(Num: `0`), V1: N->getOperand(Num: `1`),
4305	V2: N->getOperand(Num: `2`), V3: N->getOperand(Num: `3`)));
4306	return;
4307	}
4308
4309	case ARMISD::VLD1DUP: {
4310	static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4311	ARM::VLD1DUPd32 };
4312	static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4313	ARM::VLD1DUPq32 };
4314	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: false, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes);
4315	return;
4316	}
4317
4318	case ARMISD::VLD2DUP: {
4319	static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4320	ARM::VLD2DUPd32 };
4321	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: false, NumVecs: `2`, DOpcodes: Opcodes);
4322	return;
4323	}
4324
4325	case ARMISD::VLD3DUP: {
4326	static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4327	ARM::VLD3DUPd16Pseudo,
4328	ARM::VLD3DUPd32Pseudo };
4329	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: false, NumVecs: `3`, DOpcodes: Opcodes);
4330	return;
4331	}
4332
4333	case ARMISD::VLD4DUP: {
4334	static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4335	ARM::VLD4DUPd16Pseudo,
4336	ARM::VLD4DUPd32Pseudo };
4337	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: false, NumVecs: `4`, DOpcodes: Opcodes);
4338	return;
4339	}
4340
4341	case ARMISD::VLD1DUP_UPD: {
4342	static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4343	ARM::VLD1DUPd16wb_fixed,
4344	ARM::VLD1DUPd32wb_fixed };
4345	static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4346	ARM::VLD1DUPq16wb_fixed,
4347	ARM::VLD1DUPq32wb_fixed };
4348	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: true, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes);
4349	return;
4350	}
4351
4352	case ARMISD::VLD2DUP_UPD: {
4353	static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4354	ARM::VLD2DUPd16wb_fixed,
4355	ARM::VLD2DUPd32wb_fixed,
4356	ARM::VLD1q64wb_fixed };
4357	static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4358	ARM::VLD2DUPq16EvenPseudo,
4359	ARM::VLD2DUPq32EvenPseudo };
4360	static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4361	ARM::VLD2DUPq16OddPseudoWB_fixed,
4362	ARM::VLD2DUPq32OddPseudoWB_fixed };
4363	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0, QOpcodes1);
4364	return;
4365	}
4366
4367	case ARMISD::VLD3DUP_UPD: {
4368	static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4369	ARM::VLD3DUPd16Pseudo_UPD,
4370	ARM::VLD3DUPd32Pseudo_UPD,
4371	ARM::VLD1d64TPseudoWB_fixed };
4372	static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4373	ARM::VLD3DUPq16EvenPseudo,
4374	ARM::VLD3DUPq32EvenPseudo };
4375	static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4376	ARM::VLD3DUPq16OddPseudo_UPD,
4377	ARM::VLD3DUPq32OddPseudo_UPD };
4378	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4379	return;
4380	}
4381
4382	case ARMISD::VLD4DUP_UPD: {
4383	static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4384	ARM::VLD4DUPd16Pseudo_UPD,
4385	ARM::VLD4DUPd32Pseudo_UPD,
4386	ARM::VLD1d64QPseudoWB_fixed };
4387	static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4388	ARM::VLD4DUPq16EvenPseudo,
4389	ARM::VLD4DUPq32EvenPseudo };
4390	static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4391	ARM::VLD4DUPq16OddPseudo_UPD,
4392	ARM::VLD4DUPq32OddPseudo_UPD };
4393	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4394	return;
4395	}
4396
4397	case ARMISD::VLD1_UPD: {
4398	static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4399	ARM::VLD1d16wb_fixed,
4400	ARM::VLD1d32wb_fixed,
4401	ARM::VLD1d64wb_fixed };
4402	static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4403	ARM::VLD1q16wb_fixed,
4404	ARM::VLD1q32wb_fixed,
4405	ARM::VLD1q64wb_fixed };
4406	SelectVLD(N, isUpdating: true, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4407	return;
4408	}
4409
4410	case ARMISD::VLD2_UPD: {
4411	if (Subtarget->hasNEON()) {
4412	static const uint16_t DOpcodes[] = {
4413	ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4414	ARM::VLD1q64wb_fixed};
4415	static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4416	ARM::VLD2q16PseudoWB_fixed,
4417	ARM::VLD2q32PseudoWB_fixed};
4418	SelectVLD(N, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4419	} else {
4420	static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4421	ARM::MVE_VLD21_8_wb};
4422	static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4423	ARM::MVE_VLD21_16_wb};
4424	static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4425	ARM::MVE_VLD21_32_wb};
4426	static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4427	SelectMVE_VLD(N, NumVecs: `2`, Opcodes, HasWriteback: true);
4428	}
4429	return;
4430	}
4431
4432	case ARMISD::VLD3_UPD: {
4433	static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4434	ARM::VLD3d16Pseudo_UPD,
4435	ARM::VLD3d32Pseudo_UPD,
4436	ARM::VLD1d64TPseudoWB_fixed};
4437	static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4438	ARM::VLD3q16Pseudo_UPD,
4439	ARM::VLD3q32Pseudo_UPD };
4440	static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4441	ARM::VLD3q16oddPseudo_UPD,
4442	ARM::VLD3q32oddPseudo_UPD };
4443	SelectVLD(N, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4444	return;
4445	}
4446
4447	case ARMISD::VLD4_UPD: {
4448	if (Subtarget->hasNEON()) {
4449	static const uint16_t DOpcodes[] = {
4450	ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4451	ARM::VLD1d64QPseudoWB_fixed};
4452	static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4453	ARM::VLD4q16Pseudo_UPD,
4454	ARM::VLD4q32Pseudo_UPD};
4455	static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4456	ARM::VLD4q16oddPseudo_UPD,
4457	ARM::VLD4q32oddPseudo_UPD};
4458	SelectVLD(N, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4459	} else {
4460	static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4461	ARM::MVE_VLD42_8,
4462	ARM::MVE_VLD43_8_wb};
4463	static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4464	ARM::MVE_VLD42_16,
4465	ARM::MVE_VLD43_16_wb};
4466	static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4467	ARM::MVE_VLD42_32,
4468	ARM::MVE_VLD43_32_wb};
4469	static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4470	SelectMVE_VLD(N, NumVecs: `4`, Opcodes, HasWriteback: true);
4471	}
4472	return;
4473	}
4474
4475	case ARMISD::VLD1x2_UPD: {
4476	if (Subtarget->hasNEON()) {
4477	static const uint16_t DOpcodes[] = {
4478	ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4479	ARM::VLD1q64wb_fixed};
4480	static const uint16_t QOpcodes[] = {
4481	ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4482	ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4483	SelectVLD(N, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4484	return;
4485	}
4486	break;
4487	}
4488
4489	case ARMISD::VLD1x3_UPD: {
4490	if (Subtarget->hasNEON()) {
4491	static const uint16_t DOpcodes[] = {
4492	ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4493	ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4494	static const uint16_t QOpcodes0[] = {
4495	ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4496	ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4497	static const uint16_t QOpcodes1[] = {
4498	ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4499	ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4500	SelectVLD(N, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4501	return;
4502	}
4503	break;
4504	}
4505
4506	case ARMISD::VLD1x4_UPD: {
4507	if (Subtarget->hasNEON()) {
4508	static const uint16_t DOpcodes[] = {
4509	ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4510	ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4511	static const uint16_t QOpcodes0[] = {
4512	ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4513	ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4514	static const uint16_t QOpcodes1[] = {
4515	ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4516	ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4517	SelectVLD(N, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4518	return;
4519	}
4520	break;
4521	}
4522
4523	case ARMISD::VLD2LN_UPD: {
4524	static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4525	ARM::VLD2LNd16Pseudo_UPD,
4526	ARM::VLD2LNd32Pseudo_UPD };
4527	static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4528	ARM::VLD2LNq32Pseudo_UPD };
4529	SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes);
4530	return;
4531	}
4532
4533	case ARMISD::VLD3LN_UPD: {
4534	static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4535	ARM::VLD3LNd16Pseudo_UPD,
4536	ARM::VLD3LNd32Pseudo_UPD };
4537	static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4538	ARM::VLD3LNq32Pseudo_UPD };
4539	SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes);
4540	return;
4541	}
4542
4543	case ARMISD::VLD4LN_UPD: {
4544	static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4545	ARM::VLD4LNd16Pseudo_UPD,
4546	ARM::VLD4LNd32Pseudo_UPD };
4547	static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4548	ARM::VLD4LNq32Pseudo_UPD };
4549	SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes);
4550	return;
4551	}
4552
4553	case ARMISD::VST1_UPD: {
4554	static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4555	ARM::VST1d16wb_fixed,
4556	ARM::VST1d32wb_fixed,
4557	ARM::VST1d64wb_fixed };
4558	static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4559	ARM::VST1q16wb_fixed,
4560	ARM::VST1q32wb_fixed,
4561	ARM::VST1q64wb_fixed };
4562	SelectVST(N, isUpdating: true, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4563	return;
4564	}
4565
4566	case ARMISD::VST2_UPD: {
4567	if (Subtarget->hasNEON()) {
4568	static const uint16_t DOpcodes[] = {
4569	ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4570	ARM::VST1q64wb_fixed};
4571	static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4572	ARM::VST2q16PseudoWB_fixed,
4573	ARM::VST2q32PseudoWB_fixed};
4574	SelectVST(N, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4575	return;
4576	}
4577	break;
4578	}
4579
4580	case ARMISD::VST3_UPD: {
4581	static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4582	ARM::VST3d16Pseudo_UPD,
4583	ARM::VST3d32Pseudo_UPD,
4584	ARM::VST1d64TPseudoWB_fixed};
4585	static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4586	ARM::VST3q16Pseudo_UPD,
4587	ARM::VST3q32Pseudo_UPD };
4588	static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4589	ARM::VST3q16oddPseudo_UPD,
4590	ARM::VST3q32oddPseudo_UPD };
4591	SelectVST(N, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4592	return;
4593	}
4594
4595	case ARMISD::VST4_UPD: {
4596	if (Subtarget->hasNEON()) {
4597	static const uint16_t DOpcodes[] = {
4598	ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4599	ARM::VST1d64QPseudoWB_fixed};
4600	static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4601	ARM::VST4q16Pseudo_UPD,
4602	ARM::VST4q32Pseudo_UPD};
4603	static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4604	ARM::VST4q16oddPseudo_UPD,
4605	ARM::VST4q32oddPseudo_UPD};
4606	SelectVST(N, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4607	return;
4608	}
4609	break;
4610	}
4611
4612	case ARMISD::VST1x2_UPD: {
4613	if (Subtarget->hasNEON()) {
4614	static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4615	ARM::VST1q16wb_fixed,
4616	ARM::VST1q32wb_fixed,
4617	ARM::VST1q64wb_fixed};
4618	static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4619	ARM::VST1d16QPseudoWB_fixed,
4620	ARM::VST1d32QPseudoWB_fixed,
4621	ARM::VST1d64QPseudoWB_fixed };
4622	SelectVST(N, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4623	return;
4624	}
4625	break;
4626	}
4627
4628	case ARMISD::VST1x3_UPD: {
4629	if (Subtarget->hasNEON()) {
4630	static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4631	ARM::VST1d16TPseudoWB_fixed,
4632	ARM::VST1d32TPseudoWB_fixed,
4633	ARM::VST1d64TPseudoWB_fixed };
4634	static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4635	ARM::VST1q16LowTPseudo_UPD,
4636	ARM::VST1q32LowTPseudo_UPD,
4637	ARM::VST1q64LowTPseudo_UPD };
4638	static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4639	ARM::VST1q16HighTPseudo_UPD,
4640	ARM::VST1q32HighTPseudo_UPD,
4641	ARM::VST1q64HighTPseudo_UPD };
4642	SelectVST(N, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4643	return;
4644	}
4645	break;
4646	}
4647
4648	case ARMISD::VST1x4_UPD: {
4649	if (Subtarget->hasNEON()) {
4650	static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4651	ARM::VST1d16QPseudoWB_fixed,
4652	ARM::VST1d32QPseudoWB_fixed,
4653	ARM::VST1d64QPseudoWB_fixed };
4654	static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4655	ARM::VST1q16LowQPseudo_UPD,
4656	ARM::VST1q32LowQPseudo_UPD,
4657	ARM::VST1q64LowQPseudo_UPD };
4658	static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4659	ARM::VST1q16HighQPseudo_UPD,
4660	ARM::VST1q32HighQPseudo_UPD,
4661	ARM::VST1q64HighQPseudo_UPD };
4662	SelectVST(N, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4663	return;
4664	}
4665	break;
4666	}
4667	case ARMISD::VST2LN_UPD: {
4668	static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4669	ARM::VST2LNd16Pseudo_UPD,
4670	ARM::VST2LNd32Pseudo_UPD };
4671	static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4672	ARM::VST2LNq32Pseudo_UPD };
4673	SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes);
4674	return;
4675	}
4676
4677	case ARMISD::VST3LN_UPD: {
4678	static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4679	ARM::VST3LNd16Pseudo_UPD,
4680	ARM::VST3LNd32Pseudo_UPD };
4681	static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4682	ARM::VST3LNq32Pseudo_UPD };
4683	SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes);
4684	return;
4685	}
4686
4687	case ARMISD::VST4LN_UPD: {
4688	static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4689	ARM::VST4LNd16Pseudo_UPD,
4690	ARM::VST4LNd32Pseudo_UPD };
4691	static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4692	ARM::VST4LNq32Pseudo_UPD };
4693	SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes);
4694	return;
4695	}
4696
4697	case ISD::INTRINSIC_VOID:
4698	case ISD::INTRINSIC_W_CHAIN: {
4699	unsigned IntNo = N->getConstantOperandVal(Num: `1`);
4700	switch (IntNo) {
4701	default:
4702	break;
4703
4704	case Intrinsic::arm_mrrc:
4705	case Intrinsic::arm_mrrc2: {
4706	SDLoc dl(N);
4707	SDValue Chain = N->getOperand(Num: `0`);
4708	unsigned Opc;
4709
4710	if (Subtarget->isThumb())
4711	Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4712	else
4713	Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4714
4715	SmallVector<SDValue, `5`> Ops;
4716	Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: `2`), dl)); / coproc /
4717	Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: `3`), dl)); / opc /
4718	Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: `4`), dl)); / CRm /
4719
4720	// The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4721	// instruction will always be '1111' but it is possible in assembly language to specify
4722	// AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4723	if (Opc != ARM::MRRC2) {
4724	Ops.push_back(Elt: getAL(CurDAG, dl));
4725	Ops.push_back(Elt: CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
4726	}
4727
4728	Ops.push_back(Elt: Chain);
4729
4730	// Writes to two registers.
4731	const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4732
4733	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: RetType, Ops));
4734	return;
4735	}
4736	case Intrinsic::arm_ldaexd:
4737	case Intrinsic::arm_ldrexd: {
4738	SDLoc dl(N);
4739	SDValue Chain = N->getOperand(Num: `0`);
4740	SDValue MemAddr = N->getOperand(Num: `2`);
4741	bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4742
4743	bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4744	unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4745	: (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4746
4747	// arm_ldrexd returns a i64 value in {i32, i32}
4748	std::vector<EVT> ResTys;
4749	if (isThumb) {
4750	ResTys.push_back(x: MVT::i32);
4751	ResTys.push_back(x: MVT::i32);
4752	} else
4753	ResTys.push_back(x: MVT::Untyped);
4754	ResTys.push_back(x: MVT::Other);
4755
4756	// Place arguments in the right order.
4757	SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4758	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), Chain};
4759	SDNode *Ld = CurDAG->getMachineNode(Opcode: NewOpc, dl, ResultTys: ResTys, Ops);
4760	// Transfer memoperands.
4761	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
4762	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
4763
4764	// Remap uses.
4765	SDValue OutChain = isThumb ? SDValue (Ld, `2`) : SDValue (Ld, `1`);
4766	if (!SDValue (N, `0`).use_empty()) {
4767	SDValue Result;
4768	if (isThumb)
4769	Result = SDValue (Ld, `0`);
4770	else {
4771	SDValue SubRegIdx =
4772	CurDAG->getTargetConstant(Val: ARM::gsub_0, DL: dl, VT: MVT::i32);
4773	SDNode *ResNode = CurDAG->getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG,
4774	dl, VT: MVT::i32, Op1: SDValue (Ld, `0`), Op2: SubRegIdx);
4775	Result = SDValue (ResNode,`0`);
4776	}
4777	ReplaceUses(F: SDValue (N, `0`), T: Result);
4778	}
4779	if (!SDValue (N, `1`).use_empty()) {
4780	SDValue Result;
4781	if (isThumb)
4782	Result = SDValue (Ld, `1`);
4783	else {
4784	SDValue SubRegIdx =
4785	CurDAG->getTargetConstant(Val: ARM::gsub_1, DL: dl, VT: MVT::i32);
4786	SDNode *ResNode = CurDAG->getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG,
4787	dl, VT: MVT::i32, Op1: SDValue (Ld, `0`), Op2: SubRegIdx);
4788	Result = SDValue (ResNode,`0`);
4789	}
4790	ReplaceUses(F: SDValue (N, `1`), T: Result);
4791	}
4792	ReplaceUses(F: SDValue (N, `2`), T: OutChain);
4793	CurDAG->RemoveDeadNode(N);
4794	return;
4795	}
4796	case Intrinsic::arm_stlexd:
4797	case Intrinsic::arm_strexd: {
4798	SDLoc dl(N);
4799	SDValue Chain = N->getOperand(Num: `0`);
4800	SDValue Val0 = N->getOperand(Num: `2`);
4801	SDValue Val1 = N->getOperand(Num: `3`);
4802	SDValue MemAddr = N->getOperand(Num: `4`);
4803
4804	// Store exclusive double return a i32 value which is the return status
4805	// of the issued store.
4806	const EVT ResTys[] = {MVT::i32, MVT::Other};
4807
4808	bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4809	// Place arguments in the right order.
4810	SmallVector<SDValue, `7`> Ops;
4811	if (isThumb) {
4812	Ops.push_back(Elt: Val0);
4813	Ops.push_back(Elt: Val1);
4814	} else
4815	// arm_strexd uses GPRPair.
4816	Ops.push_back(Elt: SDValue (createGPRPairNode(VT: MVT::Untyped, V0: Val0, V1: Val1), `0`));
4817	Ops.push_back(Elt: MemAddr);
4818	Ops.push_back(Elt: getAL(CurDAG, dl));
4819	Ops.push_back(Elt: CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
4820	Ops.push_back(Elt: Chain);
4821
4822	bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4823	unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4824	: (IsRelease ? ARM::STLEXD : ARM::STREXD);
4825
4826	SDNode *St = CurDAG->getMachineNode(Opcode: NewOpc, dl, ResultTys: ResTys, Ops);
4827	// Transfer memoperands.
4828	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
4829	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
4830
4831	ReplaceNode(F: N, T: St);
4832	return;
4833	}
4834
4835	case Intrinsic::arm_neon_vld1: {
4836	static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4837	ARM::VLD1d32, ARM::VLD1d64 };
4838	static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4839	ARM::VLD1q32, ARM::VLD1q64};
4840	SelectVLD(N, isUpdating: false, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4841	return;
4842	}
4843
4844	case Intrinsic::arm_neon_vld1x2: {
4845	static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4846	ARM::VLD1q32, ARM::VLD1q64 };
4847	static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4848	ARM::VLD1d16QPseudo,
4849	ARM::VLD1d32QPseudo,
4850	ARM::VLD1d64QPseudo };
4851	SelectVLD(N, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4852	return;
4853	}
4854
4855	case Intrinsic::arm_neon_vld1x3: {
4856	static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4857	ARM::VLD1d16TPseudo,
4858	ARM::VLD1d32TPseudo,
4859	ARM::VLD1d64TPseudo };
4860	static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4861	ARM::VLD1q16LowTPseudo_UPD,
4862	ARM::VLD1q32LowTPseudo_UPD,
4863	ARM::VLD1q64LowTPseudo_UPD };
4864	static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4865	ARM::VLD1q16HighTPseudo,
4866	ARM::VLD1q32HighTPseudo,
4867	ARM::VLD1q64HighTPseudo };
4868	SelectVLD(N, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4869	return;
4870	}
4871
4872	case Intrinsic::arm_neon_vld1x4: {
4873	static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4874	ARM::VLD1d16QPseudo,
4875	ARM::VLD1d32QPseudo,
4876	ARM::VLD1d64QPseudo };
4877	static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4878	ARM::VLD1q16LowQPseudo_UPD,
4879	ARM::VLD1q32LowQPseudo_UPD,
4880	ARM::VLD1q64LowQPseudo_UPD };
4881	static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4882	ARM::VLD1q16HighQPseudo,
4883	ARM::VLD1q32HighQPseudo,
4884	ARM::VLD1q64HighQPseudo };
4885	SelectVLD(N, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4886	return;
4887	}
4888
4889	case Intrinsic::arm_neon_vld2: {
4890	static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4891	ARM::VLD2d32, ARM::VLD1q64 };
4892	static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4893	ARM::VLD2q32Pseudo };
4894	SelectVLD(N, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4895	return;
4896	}
4897
4898	case Intrinsic::arm_neon_vld3: {
4899	static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4900	ARM::VLD3d16Pseudo,
4901	ARM::VLD3d32Pseudo,
4902	ARM::VLD1d64TPseudo };
4903	static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4904	ARM::VLD3q16Pseudo_UPD,
4905	ARM::VLD3q32Pseudo_UPD };
4906	static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4907	ARM::VLD3q16oddPseudo,
4908	ARM::VLD3q32oddPseudo };
4909	SelectVLD(N, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4910	return;
4911	}
4912
4913	case Intrinsic::arm_neon_vld4: {
4914	static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4915	ARM::VLD4d16Pseudo,
4916	ARM::VLD4d32Pseudo,
4917	ARM::VLD1d64QPseudo };
4918	static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4919	ARM::VLD4q16Pseudo_UPD,
4920	ARM::VLD4q32Pseudo_UPD };
4921	static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4922	ARM::VLD4q16oddPseudo,
4923	ARM::VLD4q32oddPseudo };
4924	SelectVLD(N, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4925	return;
4926	}
4927
4928	case Intrinsic::arm_neon_vld2dup: {
4929	static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4930	ARM::VLD2DUPd32, ARM::VLD1q64 };
4931	static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4932	ARM::VLD2DUPq16EvenPseudo,
4933	ARM::VLD2DUPq32EvenPseudo };
4934	static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4935	ARM::VLD2DUPq16OddPseudo,
4936	ARM::VLD2DUPq32OddPseudo };
4937	SelectVLDDup(N, / IsIntrinsic= / true, isUpdating: false, NumVecs: `2`,
4938	DOpcodes, QOpcodes0, QOpcodes1);
4939	return;
4940	}
4941
4942	case Intrinsic::arm_neon_vld3dup: {
4943	static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4944	ARM::VLD3DUPd16Pseudo,
4945	ARM::VLD3DUPd32Pseudo,
4946	ARM::VLD1d64TPseudo };
4947	static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4948	ARM::VLD3DUPq16EvenPseudo,
4949	ARM::VLD3DUPq32EvenPseudo };
4950	static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4951	ARM::VLD3DUPq16OddPseudo,
4952	ARM::VLD3DUPq32OddPseudo };
4953	SelectVLDDup(N, / IsIntrinsic= / true, isUpdating: false, NumVecs: `3`,
4954	DOpcodes, QOpcodes0, QOpcodes1);
4955	return;
4956	}
4957
4958	case Intrinsic::arm_neon_vld4dup: {
4959	static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4960	ARM::VLD4DUPd16Pseudo,
4961	ARM::VLD4DUPd32Pseudo,
4962	ARM::VLD1d64QPseudo };
4963	static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4964	ARM::VLD4DUPq16EvenPseudo,
4965	ARM::VLD4DUPq32EvenPseudo };
4966	static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4967	ARM::VLD4DUPq16OddPseudo,
4968	ARM::VLD4DUPq32OddPseudo };
4969	SelectVLDDup(N, / IsIntrinsic= / true, isUpdating: false, NumVecs: `4`,
4970	DOpcodes, QOpcodes0, QOpcodes1);
4971	return;
4972	}
4973
4974	case Intrinsic::arm_neon_vld2lane: {
4975	static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4976	ARM::VLD2LNd16Pseudo,
4977	ARM::VLD2LNd32Pseudo };
4978	static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4979	ARM::VLD2LNq32Pseudo };
4980	SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes);
4981	return;
4982	}
4983
4984	case Intrinsic::arm_neon_vld3lane: {
4985	static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
4986	ARM::VLD3LNd16Pseudo,
4987	ARM::VLD3LNd32Pseudo };
4988	static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
4989	ARM::VLD3LNq32Pseudo };
4990	SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes);
4991	return;
4992	}
4993
4994	case Intrinsic::arm_neon_vld4lane: {
4995	static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
4996	ARM::VLD4LNd16Pseudo,
4997	ARM::VLD4LNd32Pseudo };
4998	static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
4999	ARM::VLD4LNq32Pseudo };
5000	SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes);
5001	return;
5002	}
5003
5004	case Intrinsic::arm_neon_vst1: {
5005	static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
5006	ARM::VST1d32, ARM::VST1d64 };
5007	static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5008	ARM::VST1q32, ARM::VST1q64 };
5009	SelectVST(N, isUpdating: false, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
5010	return;
5011	}
5012
5013	case Intrinsic::arm_neon_vst1x2: {
5014	static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5015	ARM::VST1q32, ARM::VST1q64 };
5016	static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
5017	ARM::VST1d16QPseudo,
5018	ARM::VST1d32QPseudo,
5019	ARM::VST1d64QPseudo };
5020	SelectVST(N, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
5021	return;
5022	}
5023
5024	case Intrinsic::arm_neon_vst1x3: {
5025	static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
5026	ARM::VST1d16TPseudo,
5027	ARM::VST1d32TPseudo,
5028	ARM::VST1d64TPseudo };
5029	static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
5030	ARM::VST1q16LowTPseudo_UPD,
5031	ARM::VST1q32LowTPseudo_UPD,
5032	ARM::VST1q64LowTPseudo_UPD };
5033	static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
5034	ARM::VST1q16HighTPseudo,
5035	ARM::VST1q32HighTPseudo,
5036	ARM::VST1q64HighTPseudo };
5037	SelectVST(N, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
5038	return;
5039	}
5040
5041	case Intrinsic::arm_neon_vst1x4: {
5042	static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
5043	ARM::VST1d16QPseudo,
5044	ARM::VST1d32QPseudo,
5045	ARM::VST1d64QPseudo };
5046	static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
5047	ARM::VST1q16LowQPseudo_UPD,
5048	ARM::VST1q32LowQPseudo_UPD,
5049	ARM::VST1q64LowQPseudo_UPD };
5050	static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
5051	ARM::VST1q16HighQPseudo,
5052	ARM::VST1q32HighQPseudo,
5053	ARM::VST1q64HighQPseudo };
5054	SelectVST(N, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
5055	return;
5056	}
5057
5058	case Intrinsic::arm_neon_vst2: {
5059	static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
5060	ARM::VST2d32, ARM::VST1q64 };
5061	static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
5062	ARM::VST2q32Pseudo };
5063	SelectVST(N, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
5064	return;
5065	}
5066
5067	case Intrinsic::arm_neon_vst3: {
5068	static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
5069	ARM::VST3d16Pseudo,
5070	ARM::VST3d32Pseudo,
5071	ARM::VST1d64TPseudo };
5072	static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
5073	ARM::VST3q16Pseudo_UPD,
5074	ARM::VST3q32Pseudo_UPD };
5075	static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
5076	ARM::VST3q16oddPseudo,
5077	ARM::VST3q32oddPseudo };
5078	SelectVST(N, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
5079	return;
5080	}
5081
5082	case Intrinsic::arm_neon_vst4: {
5083	static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5084	ARM::VST4d16Pseudo,
5085	ARM::VST4d32Pseudo,
5086	ARM::VST1d64QPseudo };
5087	static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5088	ARM::VST4q16Pseudo_UPD,
5089	ARM::VST4q32Pseudo_UPD };
5090	static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5091	ARM::VST4q16oddPseudo,
5092	ARM::VST4q32oddPseudo };
5093	SelectVST(N, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
5094	return;
5095	}
5096
5097	case Intrinsic::arm_neon_vst2lane: {
5098	static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5099	ARM::VST2LNd16Pseudo,
5100	ARM::VST2LNd32Pseudo };
5101	static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5102	ARM::VST2LNq32Pseudo };
5103	SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes);
5104	return;
5105	}
5106
5107	case Intrinsic::arm_neon_vst3lane: {
5108	static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5109	ARM::VST3LNd16Pseudo,
5110	ARM::VST3LNd32Pseudo };
5111	static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5112	ARM::VST3LNq32Pseudo };
5113	SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes);
5114	return;
5115	}
5116
5117	case Intrinsic::arm_neon_vst4lane: {
5118	static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5119	ARM::VST4LNd16Pseudo,
5120	ARM::VST4LNd32Pseudo };
5121	static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5122	ARM::VST4LNq32Pseudo };
5123	SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes);
5124	return;
5125	}
5126
5127	case Intrinsic::arm_mve_vldr_gather_base_wb:
5128	case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5129	static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5130	ARM::MVE_VLDRDU64_qi_pre};
5131	SelectMVE_WB(N, Opcodes,
5132	Predicated: IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5133	return;
5134	}
5135
5136	case Intrinsic::arm_mve_vld2q: {
5137	static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5138	static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5139	ARM::MVE_VLD21_16};
5140	static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5141	ARM::MVE_VLD21_32};
5142	static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5143	SelectMVE_VLD(N, NumVecs: `2`, Opcodes, HasWriteback: false);
5144	return;
5145	}
5146
5147	case Intrinsic::arm_mve_vld4q: {
5148	static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5149	ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5150	static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5151	ARM::MVE_VLD42_16,
5152	ARM::MVE_VLD43_16};
5153	static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5154	ARM::MVE_VLD42_32,
5155	ARM::MVE_VLD43_32};
5156	static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5157	SelectMVE_VLD(N, NumVecs: `4`, Opcodes, HasWriteback: false);
5158	return;
5159	}
5160	}
5161	break;
5162	}
5163
5164	case ISD::INTRINSIC_WO_CHAIN: {
5165	unsigned IntNo = N->getConstantOperandVal(Num: `0`);
5166	switch (IntNo) {
5167	default:
5168	break;
5169
5170	// Scalar f32 -> bf16
5171	case Intrinsic::arm_neon_vcvtbfp2bf: {
5172	SDLoc dl(N);
5173	const SDValue &Src = N->getOperand(Num: `1`);
5174	llvm::EVT DestTy = N->getValueType(ResNo: `0`);
5175	SDValue Pred = getAL(CurDAG, dl);
5176	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
5177	SDValue Ops[] = { Src, Src, Pred, Reg0 };
5178	CurDAG->SelectNodeTo(N, MachineOpc: ARM::BF16_VCVTB, VT: DestTy, Ops);
5179	return;
5180	}
5181
5182	// Vector v4f32 -> v4bf16
5183	case Intrinsic::arm_neon_vcvtfp2bf: {
5184	SDLoc dl(N);
5185	const SDValue &Src = N->getOperand(Num: `1`);
5186	SDValue Pred = getAL(CurDAG, dl);
5187	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
5188	SDValue Ops[] = { Src, Pred, Reg0 };
5189	CurDAG->SelectNodeTo(N, MachineOpc: ARM::BF16_VCVT, VT: MVT::v4bf16, Ops);
5190	return;
5191	}
5192
5193	case Intrinsic::arm_mve_urshrl:
5194	SelectMVE_LongShift(N, Opcode: ARM::MVE_URSHRL, Immediate: true, HasSaturationOperand: false);
5195	return;
5196	case Intrinsic::arm_mve_uqshll:
5197	SelectMVE_LongShift(N, Opcode: ARM::MVE_UQSHLL, Immediate: true, HasSaturationOperand: false);
5198	return;
5199	case Intrinsic::arm_mve_srshrl:
5200	SelectMVE_LongShift(N, Opcode: ARM::MVE_SRSHRL, Immediate: true, HasSaturationOperand: false);
5201	return;
5202	case Intrinsic::arm_mve_sqshll:
5203	SelectMVE_LongShift(N, Opcode: ARM::MVE_SQSHLL, Immediate: true, HasSaturationOperand: false);
5204	return;
5205	case Intrinsic::arm_mve_uqrshll:
5206	SelectMVE_LongShift(N, Opcode: ARM::MVE_UQRSHLL, Immediate: false, HasSaturationOperand: true);
5207	return;
5208	case Intrinsic::arm_mve_sqrshrl:
5209	SelectMVE_LongShift(N, Opcode: ARM::MVE_SQRSHRL, Immediate: false, HasSaturationOperand: true);
5210	return;
5211
5212	case Intrinsic::arm_mve_vadc:
5213	case Intrinsic::arm_mve_vadc_predicated:
5214	SelectMVE_VADCSBC(N, OpcodeWithCarry: ARM::MVE_VADC, OpcodeWithNoCarry: ARM::MVE_VADCI, Add: true,
5215	Predicated: IntNo == Intrinsic::arm_mve_vadc_predicated);
5216	return;
5217	case Intrinsic::arm_mve_vsbc:
5218	case Intrinsic::arm_mve_vsbc_predicated:
5219	SelectMVE_VADCSBC(N, OpcodeWithCarry: ARM::MVE_VSBC, OpcodeWithNoCarry: ARM::MVE_VSBCI, Add: false,
5220	Predicated: IntNo == Intrinsic::arm_mve_vsbc_predicated);
5221	return;
5222	case Intrinsic::arm_mve_vshlc:
5223	case Intrinsic::arm_mve_vshlc_predicated:
5224	SelectMVE_VSHLC(N, Predicated: IntNo == Intrinsic::arm_mve_vshlc_predicated);
5225	return;
5226
5227	case Intrinsic::arm_mve_vmlldava:
5228	case Intrinsic::arm_mve_vmlldava_predicated: {
5229	static const uint16_t OpcodesU[] = {
5230	ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
5231	ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
5232	};
5233	static const uint16_t OpcodesS[] = {
5234	ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
5235	ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
5236	ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
5237	ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5238	ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
5239	ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
5240	ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
5241	ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5242	};
5243	SelectMVE_VMLLDAV(N, Predicated: IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5244	OpcodesS, OpcodesU);
5245	return;
5246	}
5247
5248	case Intrinsic::arm_mve_vrmlldavha:
5249	case Intrinsic::arm_mve_vrmlldavha_predicated: {
5250	static const uint16_t OpcodesU[] = {
5251	ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
5252	};
5253	static const uint16_t OpcodesS[] = {
5254	ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
5255	ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5256	ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
5257	ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5258	};
5259	SelectMVE_VRMLLDAVH(N, Predicated: IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5260	OpcodesS, OpcodesU);
5261	return;
5262	}
5263
5264	case Intrinsic::arm_mve_vidup:
5265	case Intrinsic::arm_mve_vidup_predicated: {
5266	static const uint16_t Opcodes[] = {
5267	ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5268	};
5269	SelectMVE_VxDUP(N, Opcodes, Wrapping: false,
5270	Predicated: IntNo == Intrinsic::arm_mve_vidup_predicated);
5271	return;
5272	}
5273
5274	case Intrinsic::arm_mve_vddup:
5275	case Intrinsic::arm_mve_vddup_predicated: {
5276	static const uint16_t Opcodes[] = {
5277	ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5278	};
5279	SelectMVE_VxDUP(N, Opcodes, Wrapping: false,
5280	Predicated: IntNo == Intrinsic::arm_mve_vddup_predicated);
5281	return;
5282	}
5283
5284	case Intrinsic::arm_mve_viwdup:
5285	case Intrinsic::arm_mve_viwdup_predicated: {
5286	static const uint16_t Opcodes[] = {
5287	ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5288	};
5289	SelectMVE_VxDUP(N, Opcodes, Wrapping: true,
5290	Predicated: IntNo == Intrinsic::arm_mve_viwdup_predicated);
5291	return;
5292	}
5293
5294	case Intrinsic::arm_mve_vdwdup:
5295	case Intrinsic::arm_mve_vdwdup_predicated: {
5296	static const uint16_t Opcodes[] = {
5297	ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5298	};
5299	SelectMVE_VxDUP(N, Opcodes, Wrapping: true,
5300	Predicated: IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5301	return;
5302	}
5303
5304	case Intrinsic::arm_cde_cx1d:
5305	case Intrinsic::arm_cde_cx1da:
5306	case Intrinsic::arm_cde_cx2d:
5307	case Intrinsic::arm_cde_cx2da:
5308	case Intrinsic::arm_cde_cx3d:
5309	case Intrinsic::arm_cde_cx3da: {
5310	bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da \|\|
5311	IntNo == Intrinsic::arm_cde_cx2da \|\|
5312	IntNo == Intrinsic::arm_cde_cx3da;
5313	size_t NumExtraOps;
5314	uint16_t Opcode;
5315	switch (IntNo) {
5316	case Intrinsic::arm_cde_cx1d:
5317	case Intrinsic::arm_cde_cx1da:
5318	NumExtraOps = `0`;
5319	Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5320	break;
5321	case Intrinsic::arm_cde_cx2d:
5322	case Intrinsic::arm_cde_cx2da:
5323	NumExtraOps = `1`;
5324	Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5325	break;
5326	case Intrinsic::arm_cde_cx3d:
5327	case Intrinsic::arm_cde_cx3da:
5328	NumExtraOps = `2`;
5329	Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5330	break;
5331	default:
5332	llvm_unreachable("Unexpected opcode");
5333	}
5334	SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5335	return;
5336	}
5337	}
5338	break;
5339	}
5340
5341	case ISD::ATOMIC_CMP_SWAP:
5342	SelectCMP_SWAP(N);
5343	return;
5344	}
5345
5346	SelectCode(N);
5347	}
5348
5349	// Inspect a register string of the form
5350	// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5351	// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5352	// and obtain the integer operands from them, adding these operands to the
5353	// provided vector.
5354	static void getIntOperandsFromRegisterString(StringRef RegString,
5355	SelectionDAG *CurDAG,
5356	const SDLoc &DL,
5357	std::vector<SDValue> &Ops) {
5358	SmallVector<StringRef, `5`> Fields;
5359	RegString.split(A&: Fields, Separator: `':'`);
5360
5361	if (Fields.size() > `1`) {
5362	bool AllIntFields = true;
5363
5364	for (StringRef Field : Fields) {
5365	// Need to trim out leading 'cp' characters and get the integer field.
5366	unsigned IntField;
5367	AllIntFields &= !Field.trim(Chars: "CPcp").getAsInteger(Radix: `10`, Result&: IntField);
5368	Ops.push_back(x: CurDAG->getTargetConstant(Val: IntField, DL, VT: MVT::i32));
5369	}
5370
5371	assert(AllIntFields &&
5372	"Unexpected non-integer value in special register string.");
5373	(void)AllIntFields;
5374	}
5375	}
5376
5377	// Maps a Banked Register string to its mask value. The mask value returned is
5378	// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5379	// mask operand, which expresses which register is to be used, e.g. r8, and in
5380	// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5381	// was invalid.
5382	static inline int getBankedRegisterMask(StringRef RegString) {
5383	auto TheReg = ARMBankedReg::lookupBankedRegByName(Name: RegString.lower());
5384	if (!TheReg)
5385	return -`1`;
5386	return TheReg->Encoding;
5387	}
5388
5389	// The flags here are common to those allowed for apsr in the A class cores and
5390	// those allowed for the special registers in the M class cores. Returns a
5391	// value representing which flags were present, -1 if invalid.
5392	static inline int getMClassFlagsMask(StringRef Flags) {
5393	return StringSwitch<int>(Flags)
5394	.Case(S: "", Value: `0x2`) // no flags means nzcvq for psr registers, and 0x2 is
5395	// correct when flags are not permitted
5396	.Case(S: "g", Value: `0x1`)
5397	.Case(S: "nzcvq", Value: `0x2`)
5398	.Case(S: "nzcvqg", Value: `0x3`)
5399	.Default(Value: -`1`);
5400	}
5401
5402	// Maps MClass special registers string to its value for use in the
5403	// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5404	// Returns -1 to signify that the string was invalid.
5405	static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5406	auto TheReg = ARMSysReg::lookupMClassSysRegByName(Name: Reg);
5407	const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5408	if (!TheReg \|\| !TheReg->hasRequiredFeatures(ActiveFeatures: FeatureBits))
5409	return -`1`;
5410	return (int)(TheReg->Encoding & `0xFFF`); // SYSm value
5411	}
5412
5413	static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
5414	// The mask operand contains the special register (R Bit) in bit 4, whether
5415	// the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5416	// bits 3-0 contains the fields to be accessed in the special register, set by
5417	// the flags provided with the register.
5418	int Mask = `0`;
5419	if (Reg == "apsr") {
5420	// The flags permitted for apsr are the same flags that are allowed in
5421	// M class registers. We get the flag value and then shift the flags into
5422	// the correct place to combine with the mask.
5423	Mask = getMClassFlagsMask(Flags);
5424	if (Mask == -`1`)
5425	return -`1`;
5426	return Mask << `2`;
5427	}
5428
5429	if (Reg != "cpsr" && Reg != "spsr") {
5430	return -`1`;
5431	}
5432
5433	// This is the same as if the flags were "fc"
5434	if (Flags.empty() \|\| Flags == "all")
5435	return Mask \| `0x9`;
5436
5437	// Inspect the supplied flags string and set the bits in the mask for
5438	// the relevant and valid flags allowed for cpsr and spsr.
5439	for (char Flag : Flags) {
5440	int FlagVal;
5441	switch (Flag) {
5442	case `'c'`:
5443	FlagVal = `0x1`;
5444	break;
5445	case `'x'`:
5446	FlagVal = `0x2`;
5447	break;
5448	case `'s'`:
5449	FlagVal = `0x4`;
5450	break;
5451	case `'f'`:
5452	FlagVal = `0x8`;
5453	break;
5454	default:
5455	FlagVal = `0`;
5456	}
5457
5458	// This avoids allowing strings where the same flag bit appears twice.
5459	if (!FlagVal \|\| (Mask & FlagVal))
5460	return -`1`;
5461	Mask \|= FlagVal;
5462	}
5463
5464	// If the register is spsr then we need to set the R bit.
5465	if (Reg == "spsr")
5466	Mask \|= `0x10`;
5467
5468	return Mask;
5469	}
5470
5471	// Lower the read_register intrinsic to ARM specific DAG nodes
5472	// using the supplied metadata string to select the instruction node to use
5473	// and the registers/masks to construct as operands for the node.
5474	bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5475	const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: `1`));
5476	const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: `0`));
5477	bool IsThumb2 = Subtarget->isThumb2();
5478	SDLoc DL(N);
5479
5480	std::vector<SDValue> Ops;
5481	getIntOperandsFromRegisterString(RegString: RegString->getString(), CurDAG, DL, Ops);
5482
5483	if (!Ops.empty()) {
5484	// If the special register string was constructed of fields (as defined
5485	// in the ACLE) then need to lower to MRC node (32 bit) or
5486	// MRRC node(64 bit), we can make the distinction based on the number of
5487	// operands we have.
5488	unsigned Opcode;
5489	SmallVector<EVT, `3`> ResTypes;
5490	if (Ops.size() == `5`){
5491	Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5492	ResTypes.append(IL: { MVT::i32, MVT::Other });
5493	} else {
5494	assert(Ops.size() == `3` &&
5495	"Invalid number of fields in special register string.");
5496	Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5497	ResTypes.append(IL: { MVT::i32, MVT::i32, MVT::Other });
5498	}
5499
5500	Ops.push_back(x: getAL(CurDAG, dl: DL));
5501	Ops.push_back(x: CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
5502	Ops.push_back(x: N->getOperand(Num: `0`));
5503	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, ResultTys: ResTypes, Ops));
5504	return true;
5505	}
5506
5507	std::string SpecialReg = RegString->getString().lower();
5508
5509	int BankedReg = getBankedRegisterMask(RegString: SpecialReg);
5510	if (BankedReg != -`1`) {
5511	Ops = { CurDAG->getTargetConstant(Val: BankedReg, DL, VT: MVT::i32),
5512	getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5513	N->getOperand(Num: `0`) };
5514	ReplaceNode(
5515	F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5516	dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5517	return true;
5518	}
5519
5520	// The VFP registers are read by creating SelectionDAG nodes with opcodes
5521	// corresponding to the register that is being read from. So we switch on the
5522	// string to find which opcode we need to use.
5523	unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5524	.Case(S: "fpscr", Value: ARM::VMRS)
5525	.Case(S: "fpexc", Value: ARM::VMRS_FPEXC)
5526	.Case(S: "fpsid", Value: ARM::VMRS_FPSID)
5527	.Case(S: "mvfr0", Value: ARM::VMRS_MVFR0)
5528	.Case(S: "mvfr1", Value: ARM::VMRS_MVFR1)
5529	.Case(S: "mvfr2", Value: ARM::VMRS_MVFR2)
5530	.Case(S: "fpinst", Value: ARM::VMRS_FPINST)
5531	.Case(S: "fpinst2", Value: ARM::VMRS_FPINST2)
5532	.Default(Value: `0`);
5533
5534	// If an opcode was found then we can lower the read to a VFP instruction.
5535	if (Opcode) {
5536	if (!Subtarget->hasVFP2Base())
5537	return false;
5538	if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5539	return false;
5540
5541	Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5542	N->getOperand(Num: `0`) };
5543	ReplaceNode(F: N,
5544	T: CurDAG->getMachineNode(Opcode, dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5545	return true;
5546	}
5547
5548	// If the target is M Class then need to validate that the register string
5549	// is an acceptable value, so check that a mask can be constructed from the
5550	// string.
5551	if (Subtarget->isMClass()) {
5552	int SYSmValue = getMClassRegisterMask(Reg: SpecialReg, Subtarget);
5553	if (SYSmValue == -`1`)
5554	return false;
5555
5556	SDValue Ops[] = { CurDAG->getTargetConstant(Val: SYSmValue, DL, VT: MVT::i32),
5557	getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5558	N->getOperand(Num: `0`) };
5559	ReplaceNode(
5560	F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2MRS_M, dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5561	return true;
5562	}
5563
5564	// Here we know the target is not M Class so we need to check if it is one
5565	// of the remaining possible values which are apsr, cpsr or spsr.
5566	if (SpecialReg == "apsr" \|\| SpecialReg == "cpsr") {
5567	Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5568	N->getOperand(Num: `0`) };
5569	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5570	dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5571	return true;
5572	}
5573
5574	if (SpecialReg == "spsr") {
5575	Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5576	N->getOperand(Num: `0`) };
5577	ReplaceNode(
5578	F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, dl: DL,
5579	VT1: MVT::i32, VT2: MVT::Other, Ops));
5580	return true;
5581	}
5582
5583	return false;
5584	}
5585
5586	// Lower the write_register intrinsic to ARM specific DAG nodes
5587	// using the supplied metadata string to select the instruction node to use
5588	// and the registers/masks to use in the nodes
5589	bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5590	const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: `1`));
5591	const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: `0`));
5592	bool IsThumb2 = Subtarget->isThumb2();
5593	SDLoc DL(N);
5594
5595	std::vector<SDValue> Ops;
5596	getIntOperandsFromRegisterString(RegString: RegString->getString(), CurDAG, DL, Ops);
5597
5598	if (!Ops.empty()) {
5599	// If the special register string was constructed of fields (as defined
5600	// in the ACLE) then need to lower to MCR node (32 bit) or
5601	// MCRR node(64 bit), we can make the distinction based on the number of
5602	// operands we have.
5603	unsigned Opcode;
5604	if (Ops.size() == `5`) {
5605	Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5606	Ops.insert(position: Ops.begin()+`2`, x: N->getOperand(Num: `2`));
5607	} else {
5608	assert(Ops.size() == `3` &&
5609	"Invalid number of fields in special register string.");
5610	Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5611	SDValue WriteValue[] = { N->getOperand(Num: `2`), N->getOperand(Num: `3`) };
5612	Ops.insert(position: Ops.begin()+`2`, first: WriteValue, last: WriteValue+`2`);
5613	}
5614
5615	Ops.push_back(x: getAL(CurDAG, dl: DL));
5616	Ops.push_back(x: CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
5617	Ops.push_back(x: N->getOperand(Num: `0`));
5618
5619	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Other, Ops));
5620	return true;
5621	}
5622
5623	std::string SpecialReg = RegString->getString().lower();
5624	int BankedReg = getBankedRegisterMask(RegString: SpecialReg);
5625	if (BankedReg != -`1`) {
5626	Ops = { CurDAG->getTargetConstant(Val: BankedReg, DL, VT: MVT::i32), N->getOperand(Num: `2`),
5627	getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5628	N->getOperand(Num: `0`) };
5629	ReplaceNode(
5630	F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5631	dl: DL, VT: MVT::Other, Ops));
5632	return true;
5633	}
5634
5635	// The VFP registers are written to by creating SelectionDAG nodes with
5636	// opcodes corresponding to the register that is being written. So we switch
5637	// on the string to find which opcode we need to use.
5638	unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5639	.Case(S: "fpscr", Value: ARM::VMSR)
5640	.Case(S: "fpexc", Value: ARM::VMSR_FPEXC)
5641	.Case(S: "fpsid", Value: ARM::VMSR_FPSID)
5642	.Case(S: "fpinst", Value: ARM::VMSR_FPINST)
5643	.Case(S: "fpinst2", Value: ARM::VMSR_FPINST2)
5644	.Default(Value: `0`);
5645
5646	if (Opcode) {
5647	if (!Subtarget->hasVFP2Base())
5648	return false;
5649	Ops = { N->getOperand(Num: `2`), getAL(CurDAG, dl: DL),
5650	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), N->getOperand(Num: `0`) };
5651	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Other, Ops));
5652	return true;
5653	}
5654
5655	std::pair<StringRef, StringRef> Fields;
5656	Fields = StringRef (SpecialReg).rsplit(Separator: `'_'`);
5657	std::string Reg = Fields.first.str();
5658	StringRef Flags = Fields.second;
5659
5660	// If the target was M Class then need to validate the special register value
5661	// and retrieve the mask for use in the instruction node.
5662	if (Subtarget->isMClass()) {
5663	int SYSmValue = getMClassRegisterMask(Reg: SpecialReg, Subtarget);
5664	if (SYSmValue == -`1`)
5665	return false;
5666
5667	SDValue Ops[] = { CurDAG->getTargetConstant(Val: SYSmValue, DL, VT: MVT::i32),
5668	N->getOperand(Num: `2`), getAL(CurDAG, dl: DL),
5669	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), N->getOperand(Num: `0`) };
5670	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2MSR_M, dl: DL, VT: MVT::Other, Ops));
5671	return true;
5672	}
5673
5674	// We then check to see if a valid mask can be constructed for one of the
5675	// register string values permitted for the A and R class cores. These values
5676	// are apsr, spsr and cpsr; these are also valid on older cores.
5677	int Mask = getARClassRegisterMask(Reg, Flags);
5678	if (Mask != -`1`) {
5679	Ops = { CurDAG->getTargetConstant(Val: Mask, DL, VT: MVT::i32), N->getOperand(Num: `2`),
5680	getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5681	N->getOperand(Num: `0`) };
5682	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5683	dl: DL, VT: MVT::Other, Ops));
5684	return true;
5685	}
5686
5687	return false;
5688	}
5689
5690	bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5691	std::vector<SDValue> AsmNodeOperands;
5692	InlineAsm::Flag Flag;
5693	bool Changed = false;
5694	unsigned NumOps = N->getNumOperands();
5695
5696	// Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5697	// However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5698	// (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5699	// respectively. Since there is no constraint to explicitly specify a
5700	// reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5701	// the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5702	// them into a GPRPair.
5703
5704	SDLoc dl(N);
5705	SDValue Glue = N->getGluedNode() ? N->getOperand(Num: NumOps - `1`) : SDValue ();
5706
5707	SmallVector<bool, `8`> OpChanged;
5708	// Glue node will be appended late.
5709	for(unsigned i = `0`, e = N->getGluedNode() ? NumOps - `1` : NumOps; i < e; ++i) {
5710	SDValue op = N->getOperand(Num: i);
5711	AsmNodeOperands.push_back(x: op);
5712
5713	if (i < InlineAsm::Op_FirstOperand)
5714	continue;
5715
5716	if (const auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: i)))
5717	Flag = InlineAsm::Flag (C->getZExtValue());
5718	else
5719	continue;
5720
5721	// Immediate operands to inline asm in the SelectionDAG are modeled with
5722	// two operands. The first is a constant of value InlineAsm::Kind::Imm, and
5723	// the second is a constant with the value of the immediate. If we get here
5724	// and we have a Kind::Imm, skip the next operand, and continue.
5725	if (Flag.isImmKind()) {
5726	SDValue op = N->getOperand(Num: ++i);
5727	AsmNodeOperands.push_back(x: op);
5728	continue;
5729	}
5730
5731	const unsigned NumRegs = Flag.getNumOperandRegisters();
5732	if (NumRegs)
5733	OpChanged.push_back(Elt: false);
5734
5735	unsigned DefIdx = `0`;
5736	bool IsTiedToChangedOp = false;
5737	// If it's a use that is tied with a previous def, it has no
5738	// reg class constraint.
5739	if (Changed && Flag.isUseOperandTiedToDef(Idx&: DefIdx))
5740	IsTiedToChangedOp = OpChanged [DefIdx];
5741
5742	// Memory operands to inline asm in the SelectionDAG are modeled with two
5743	// operands: a constant of value InlineAsm::Kind::Mem followed by the input
5744	// operand. If we get here and we have a Kind::Mem, skip the next operand
5745	// (so it doesn't get misinterpreted), and continue. We do this here because
5746	// it's important to update the OpChanged array correctly before moving on.
5747	if (Flag.isMemKind()) {
5748	SDValue op = N->getOperand(Num: ++i);
5749	AsmNodeOperands.push_back(x: op);
5750	continue;
5751	}
5752
5753	if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
5754	!Flag.isRegDefEarlyClobberKind())
5755	continue;
5756
5757	unsigned RC;
5758	const bool HasRC = Flag.hasRegClassConstraint(RC);
5759	if ((!IsTiedToChangedOp && (!HasRC \|\| RC != ARM::GPRRegClassID))
5760	\|\| NumRegs != `2`)
5761	continue;
5762
5763	assert((i+`2` < NumOps) && "Invalid number of operands in inline asm");
5764	SDValue V0 = N->getOperand(Num: i+`1`);
5765	SDValue V1 = N->getOperand(Num: i+`2`);
5766	Register Reg0 = cast<RegisterSDNode>(Val&: V0)->getReg();
5767	Register Reg1 = cast<RegisterSDNode>(Val&: V1)->getReg();
5768	SDValue PairedReg;
5769	MachineRegisterInfo &MRI = MF->getRegInfo();
5770
5771	if (Flag.isRegDefKind() \|\| Flag.isRegDefEarlyClobberKind()) {
5772	// Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5773	// the original GPRs.
5774
5775	Register GPVR = MRI.createVirtualRegister(RegClass: &ARM::GPRPairRegClass);
5776	PairedReg = CurDAG->getRegister(Reg: GPVR, VT: MVT::Untyped);
5777	SDValue Chain = SDValue (N,`0`);
5778
5779	SDNode *GU = N->getGluedUser();
5780	SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, Reg: GPVR, VT: MVT::Untyped,
5781	Glue: Chain.getValue(R: `1`));
5782
5783	// Extract values from a GPRPair reg and copy to the original GPR reg.
5784	SDValue Sub0 = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_0, DL: dl, VT: MVT::i32,
5785	Operand: RegCopy);
5786	SDValue Sub1 = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_1, DL: dl, VT: MVT::i32,
5787	Operand: RegCopy);
5788	SDValue T0 = CurDAG->getCopyToReg(Chain: Sub0, dl, Reg: Reg0, N: Sub0,
5789	Glue: RegCopy.getValue(R: `1`));
5790	SDValue T1 = CurDAG->getCopyToReg(Chain: Sub1, dl, Reg: Reg1, N: Sub1, Glue: T0.getValue(R: `1`));
5791
5792	// Update the original glue user.
5793	std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-`1`);
5794	Ops.push_back(x: T1.getValue(R: `1`));
5795	CurDAG->UpdateNodeOperands(N: GU, Ops);
5796	} else {
5797	// For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
5798	// GPRPair and then pass the GPRPair to the inline asm.
5799	SDValue Chain = AsmNodeOperands [InlineAsm::Op_InputChain];
5800
5801	// As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5802	SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg: Reg0, VT: MVT::i32,
5803	Glue: Chain.getValue(R: `1`));
5804	SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg: Reg1, VT: MVT::i32,
5805	Glue: T0.getValue(R: `1`));
5806	SDValue Pair = SDValue (createGPRPairNode(VT: MVT::Untyped, V0: T0, V1: T1), `0`);
5807
5808	// Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5809	// i32 VRs of inline asm with it.
5810	Register GPVR = MRI.createVirtualRegister(RegClass: &ARM::GPRPairRegClass);
5811	PairedReg = CurDAG->getRegister(Reg: GPVR, VT: MVT::Untyped);
5812	Chain = CurDAG->getCopyToReg(Chain: T1, dl, Reg: GPVR, N: Pair, Glue: T1.getValue(R: `1`));
5813
5814	AsmNodeOperands [InlineAsm::Op_InputChain] = Chain;
5815	Glue = Chain.getValue(R: `1`);
5816	}
5817
5818	Changed = true;
5819
5820	if(PairedReg.getNode()) {
5821	OpChanged [OpChanged.size() -`1` ] = true;
5822	Flag = InlineAsm::Flag (Flag.getKind(), `1` / RegNum/);
5823	if (IsTiedToChangedOp)
5824	Flag.setMatchingOp(DefIdx);
5825	else
5826	Flag.setRegClass(ARM::GPRPairRegClassID);
5827	// Replace the current flag.
5828	AsmNodeOperands [AsmNodeOperands.size() -`1`] = CurDAG->getTargetConstant(
5829	Val: Flag, DL: dl, VT: MVT::i32);
5830	// Add the new register node and skip the original two GPRs.
5831	AsmNodeOperands.push_back(x: PairedReg);
5832	// Skip the next two GPRs.
5833	i += `2`;
5834	}
5835	}
5836
5837	if (Glue.getNode())
5838	AsmNodeOperands.push_back(x: Glue);
5839	if (!Changed)
5840	return false;
5841
5842	SDValue New = CurDAG->getNode(Opcode: N->getOpcode(), DL: SDLoc (N),
5843	VTList: CurDAG->getVTList(VT1: MVT::Other, VT2: MVT::Glue), Ops: AsmNodeOperands);
5844	New ->setNodeId(-`1`);
5845	ReplaceNode(F: N, T: New.getNode());
5846	return true;
5847	}
5848
5849	bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
5850	const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
5851	std::vector<SDValue> &OutOps) {
5852	switch(ConstraintID) {
5853	default:
5854	llvm_unreachable("Unexpected asm memory constraint");
5855	case InlineAsm::ConstraintCode::m:
5856	case InlineAsm::ConstraintCode::o:
5857	case InlineAsm::ConstraintCode::Q:
5858	case InlineAsm::ConstraintCode::Um:
5859	case InlineAsm::ConstraintCode::Un:
5860	case InlineAsm::ConstraintCode::Uq:
5861	case InlineAsm::ConstraintCode::Us:
5862	case InlineAsm::ConstraintCode::Ut:
5863	case InlineAsm::ConstraintCode::Uv:
5864	case InlineAsm::ConstraintCode::Uy:
5865	// Require the address to be in a register. That is safe for all ARM
5866	// variants and it is hard to do anything much smarter without knowing
5867	// how the operand is used.
5868	OutOps.push_back(x: Op);
5869	return false;
5870	}
5871	return true;
5872	}
5873
5874	/// createARMISelDag - This pass converts a legalized DAG into a
5875	/// ARM-specific DAG, ready for instruction scheduling.
5876	///
5877	FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
5878	CodeGenOptLevel OptLevel) {
5879	return new ARMDAGToDAGISelLegacy (TM, OptLevel);
5880	}
5881

Browse the source code of llvm_projects/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp