ARMISelDAGToDAG.cpp source code [llvm_projects/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp]

1	//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines an instruction selector for the ARM target.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "ARM.h"
14	#include "ARMBaseInstrInfo.h"
15	#include "ARMTargetMachine.h"
16	#include "MCTargetDesc/ARMAddressingModes.h"
17	#include "Utils/ARMBaseInfo.h"
18	#include "llvm/ADT/APSInt.h"
19	#include "llvm/ADT/StringSwitch.h"
20	#include "llvm/CodeGen/MachineFrameInfo.h"
21	#include "llvm/CodeGen/MachineFunction.h"
22	#include "llvm/CodeGen/MachineInstrBuilder.h"
23	#include "llvm/CodeGen/MachineRegisterInfo.h"
24	#include "llvm/CodeGen/SelectionDAG.h"
25	#include "llvm/CodeGen/SelectionDAGISel.h"
26	#include "llvm/CodeGen/TargetLowering.h"
27	#include "llvm/IR/Constants.h"
28	#include "llvm/IR/DerivedTypes.h"
29	#include "llvm/IR/Function.h"
30	#include "llvm/IR/Intrinsics.h"
31	#include "llvm/IR/IntrinsicsARM.h"
32	#include "llvm/IR/LLVMContext.h"
33	#include "llvm/Support/CommandLine.h"
34	#include "llvm/Support/ErrorHandling.h"
35	#include "llvm/Target/TargetOptions.h"
36	#include <optional>
37
38	using namespace llvm;
39
40	#define DEBUG_TYPE "arm-isel"
41	#define PASS_NAME "ARM Instruction Selection"
42
43	static cl::opt<bool>
44	DisableShifterOp("disable-shifter-op", cl::Hidden,
45	cl::desc ("Disable isel of shifter-op"),
46	cl::init(Val: false));
47
48	//===--------------------------------------------------------------------===//
49	/// ARMDAGToDAGISel - ARM specific code to select ARM machine
50	/// instructions for SelectionDAG operations.
51	///
52	namespace {
53
54	class ARMDAGToDAGISel : public SelectionDAGISel {
55	/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
56	/// make the right decision when generating code for different targets.
57	const ARMSubtarget *Subtarget;
58
59	public:
60	ARMDAGToDAGISel() = delete;
61
62	explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
63	: SelectionDAGISel (tm, OptLevel) {}
64
65	bool runOnMachineFunction(MachineFunction &MF) override {
66	// Reset the subtarget each time through.
67	Subtarget = &MF.getSubtarget<ARMSubtarget>();
68	SelectionDAGISel::runOnMachineFunction(mf&: MF);
69	return true;
70	}
71
72	void PreprocessISelDAG() override;
73
74	/// getI32Imm - Return a target constant of type i32 with the specified
75	/// value.
76	inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
77	return CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32);
78	}
79
80	void Select(SDNode *N) override;
81
82	/// Return true as some complex patterns, like those that call
83	/// canExtractShiftFromMul can modify the DAG inplace.
84	bool ComplexPatternFuncMutatesDAG() const override { return true; }
85
86	bool hasNoVMLxHazardUse(SDNode N) const*;
87	bool isShifterOpProfitable(const SDValue &Shift,
88	ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
89	bool SelectRegShifterOperand(SDValue N, SDValue &A,
90	SDValue &B, SDValue &C,
91	bool CheckProfitability = true);
92	bool SelectImmShifterOperand(SDValue N, SDValue &A,
93	SDValue &B, bool CheckProfitability = true);
94	bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
95	SDValue &C) {
96	// Don't apply the profitability check
97	return SelectRegShifterOperand(N, A, B, C, CheckProfitability: false);
98	}
99	bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
100	// Don't apply the profitability check
101	return SelectImmShifterOperand(N, A, B, CheckProfitability: false);
102	}
103	bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
104	if (!N.hasOneUse())
105	return false;
106	return SelectImmShifterOperand(N, A, B, CheckProfitability: false);
107	}
108
109	bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
110
111	bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
112	bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
113
114	bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
115	SDValue &Offset, SDValue &Opc);
116	bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
117	SDValue &Offset, SDValue &Opc);
118	bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
119	SDValue &Offset, SDValue &Opc);
120	bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
121	bool SelectAddrMode3(SDValue N, SDValue &Base,
122	SDValue &Offset, SDValue &Opc);
123	bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
124	SDValue &Offset, SDValue &Opc);
125	bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
126	bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
127	bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
128	bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
129	bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
130
131	bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
132
133	// Thumb Addressing Modes:
134	bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
135	bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
136	bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
137	SDValue &OffImm);
138	bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
139	SDValue &OffImm);
140	bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
141	SDValue &OffImm);
142	bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
143	SDValue &OffImm);
144	bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
145	template <unsigned Shift>
146	bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
147
148	// Thumb 2 Addressing Modes:
149	bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
150	template <unsigned Shift>
151	bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
152	bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
153	SDValue &OffImm);
154	bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
155	SDValue &OffImm);
156	template <unsigned Shift>
157	bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
158	bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
159	unsigned Shift);
160	template <unsigned Shift>
161	bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
162	bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
163	SDValue &OffReg, SDValue &ShImm);
164	bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
165
166	template<int Min, int Max>
167	bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
168
169	inline bool is_so_imm(unsigned Imm) const {
170	return ARM_AM::getSOImmVal(Arg: Imm) != -`1`;
171	}
172
173	inline bool is_so_imm_not(unsigned Imm) const {
174	return ARM_AM::getSOImmVal(Arg: ~Imm) != -`1`;
175	}
176
177	inline bool is_t2_so_imm(unsigned Imm) const {
178	return ARM_AM::getT2SOImmVal(Arg: Imm) != -`1`;
179	}
180
181	inline bool is_t2_so_imm_not(unsigned Imm) const {
182	return ARM_AM::getT2SOImmVal(Arg: ~Imm) != -`1`;
183	}
184
185	// Include the pieces autogenerated from the target description.
186	#include "ARMGenDAGISel.inc"
187
188	private:
189	void transferMemOperands(SDNode Src, SDNode Dst);
190
191	/// Indexed (pre/post inc/dec) load matching code for ARM.
192	bool tryARMIndexedLoad(SDNode *N);
193	bool tryT1IndexedLoad(SDNode *N);
194	bool tryT2IndexedLoad(SDNode *N);
195	bool tryMVEIndexedLoad(SDNode *N);
196	bool tryFMULFixed(SDNode *N, SDLoc dl);
197	bool tryFP_TO_INT(SDNode *N, SDLoc dl);
198	bool transformFixedFloatingPointConversion(SDNode N, SDNode FMul,
199	bool IsUnsigned,
200	bool FixedToFloat);
201
202	/// SelectVLD - Select NEON load intrinsics. NumVecs should be
203	/// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
204	/// loads of D registers and even subregs and odd subregs of Q registers.
205	/// For NumVecs <= 2, QOpcodes1 is not used.
206	void SelectVLD(SDNode N, bool* isUpdating, unsigned NumVecs,
207	const uint16_t DOpcodes, const* uint16_t *QOpcodes0,
208	const uint16_t *QOpcodes1);
209
210	/// SelectVST - Select NEON store intrinsics. NumVecs should
211	/// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
212	/// stores of D registers and even subregs and odd subregs of Q registers.
213	/// For NumVecs <= 2, QOpcodes1 is not used.
214	void SelectVST(SDNode N, bool* isUpdating, unsigned NumVecs,
215	const uint16_t DOpcodes, const* uint16_t *QOpcodes0,
216	const uint16_t *QOpcodes1);
217
218	/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
219	/// be 2, 3 or 4. The opcode arrays specify the instructions used for
220	/// load/store of D registers and Q registers.
221	void SelectVLDSTLane(SDNode N, bool* IsLoad, bool isUpdating,
222	unsigned NumVecs, const uint16_t *DOpcodes,
223	const uint16_t *QOpcodes);
224
225	/// Helper functions for setting up clusters of MVE predication operands.
226	template <typename SDValueVector>
227	void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
228	SDValue PredicateMask);
229	template <typename SDValueVector>
230	void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
231	SDValue PredicateMask, SDValue Inactive);
232
233	template <typename SDValueVector>
234	void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
235	template <typename SDValueVector>
236	void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
237
238	/// SelectMVE_WB - Select MVE writeback load/store intrinsics.
239	void SelectMVE_WB(SDNode N, const* uint16_t Opcodes, bool* Predicated);
240
241	/// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
242	void SelectMVE_LongShift(SDNode N, uint16_t Opcode, bool* Immediate,
243	bool HasSaturationOperand);
244
245	/// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
246	void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
247	uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
248
249	/// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
250	/// vector lanes.
251	void SelectMVE_VSHLC(SDNode N, bool* Predicated);
252
253	/// Select long MVE vector reductions with two vector operands
254	/// Stride is the number of vector element widths the instruction can operate
255	/// on:
256	/// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
257	/// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
258	/// Stride is used when addressing the OpcodesS array which contains multiple
259	/// opcodes for each element width.
260	/// TySize is the index into the list of element types listed above
261	void SelectBaseMVE_VMLLDAV(SDNode N, bool* Predicated,
262	const uint16_t OpcodesS, const* uint16_t *OpcodesU,
263	size_t Stride, size_t TySize);
264
265	/// Select a 64-bit MVE vector reduction with two vector operands
266	/// arm_mve_vmlldava_[predicated]
267	void SelectMVE_VMLLDAV(SDNode N, bool* Predicated, const uint16_t *OpcodesS,
268	const uint16_t *OpcodesU);
269	/// Select a 72-bit MVE vector rounding reduction with two vector operands
270	/// int_arm_mve_vrmlldavha[_predicated]
271	void SelectMVE_VRMLLDAVH(SDNode N, bool* Predicated, const uint16_t *OpcodesS,
272	const uint16_t *OpcodesU);
273
274	/// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
275	/// should be 2 or 4. The opcode array specifies the instructions
276	/// used for 8, 16 and 32-bit lane sizes respectively, and each
277	/// pointer points to a set of NumVecs sub-opcodes used for the
278	/// different stages (e.g. VLD20 versus VLD21) of each load family.
279	void SelectMVE_VLD(SDNode N, unsigned* NumVecs,
280	const uint16_t *const Opcodes, bool* HasWriteback);
281
282	/// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
283	/// array of 3 elements for the 8, 16 and 32-bit lane sizes.
284	void SelectMVE_VxDUP(SDNode N, const* uint16_t *Opcodes,
285	bool Wrapping, bool Predicated);
286
287	/// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
288	/// CX1DA, CX2D, CX2DA, CX3, CX3DA).
289	/// \arg \c NumExtraOps number of extra operands besides the coprocossor,
290	/// the accumulator and the immediate operand, i.e. 0
291	/// for CX1, 1 for CX2, 2 for CX3*
292	/// \arg \c HasAccum whether the instruction has an accumulator operand
293	void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
294	bool HasAccum);
295
296	/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
297	/// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
298	/// for loading D registers.
299	void SelectVLDDup(SDNode N, bool* IsIntrinsic, bool isUpdating,
300	unsigned NumVecs, const uint16_t *DOpcodes,
301	const uint16_t QOpcodes0 = nullptr*,
302	const uint16_t QOpcodes1 = nullptr*);
303
304	/// Try to select SBFX/UBFX instructions for ARM.
305	bool tryV6T2BitfieldExtractOp(SDNode N, bool* isSigned);
306
307	bool tryInsertVectorElt(SDNode *N);
308
309	bool tryReadRegister(SDNode *N);
310	bool tryWriteRegister(SDNode *N);
311
312	bool tryInlineAsm(SDNode *N);
313
314	void SelectCMPZ(SDNode N, bool* &SwitchEQNEToPLMI);
315
316	void SelectCMP_SWAP(SDNode *N);
317
318	/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
319	/// inline asm expressions.
320	bool SelectInlineAsmMemoryOperand(const SDValue &Op,
321	InlineAsm::ConstraintCode ConstraintID,
322	std::vector<SDValue> &OutOps) override;
323
324	// Form pairs of consecutive R, S, D, or Q registers.
325	SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
326	SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
327	SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
328	SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
329
330	// Form sequences of 4 consecutive S, D, or Q registers.
331	SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
332	SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
333	SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
334
335	// Get the alignment operand for a NEON VLD or VST instruction.
336	SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
337	bool is64BitVector);
338
339	/// Checks if N is a multiplication by a constant where we can extract out a
340	/// power of two from the constant so that it can be used in a shift, but only
341	/// if it simplifies the materialization of the constant. Returns true if it
342	/// is, and assigns to PowerOfTwo the power of two that should be extracted
343	/// out and to NewMulConst the new constant to be multiplied by.
344	bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
345	unsigned &PowerOfTwo, SDValue &NewMulConst) const;
346
347	/// Replace N with M in CurDAG, in a way that also ensures that M gets
348	/// selected when N would have been selected.
349	void replaceDAGValue(const SDValue &N, SDValue M);
350	};
351
352	class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
353	public:
354	static char ID;
355	ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
356	: SelectionDAGISelLegacy (
357	ID, std::make_unique<ARMDAGToDAGISel>(args&: tm, args&: OptLevel)) {}
358	};
359	}
360
361	char ARMDAGToDAGISelLegacy::ID = `0`;
362
363	INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
364
365	/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
366	/// operand. If so Imm will receive the 32-bit value.
367	static bool isInt32Immediate(SDNode N, unsigned* &Imm) {
368	if (N->getOpcode() == ISD::Constant && N->getValueType(ResNo: `0`) == MVT::i32) {
369	Imm = N->getAsZExtVal();
370	return true;
371	}
372	return false;
373	}
374
375	// isInt32Immediate - This method tests to see if a constant operand.
376	// If so Imm will receive the 32 bit value.
377	static bool isInt32Immediate(SDValue N, unsigned &Imm) {
378	return isInt32Immediate(N: N.getNode(), Imm);
379	}
380
381	// isOpcWithIntImmediate - This method tests to see if the node is a specific
382	// opcode and that it has a immediate integer right operand.
383	// If so Imm will receive the 32 bit value.
384	static bool isOpcWithIntImmediate(SDNode N, unsigned* Opc, unsigned& Imm) {
385	return N->getOpcode() == Opc &&
386	isInt32Immediate(N: N->getOperand(Num: `1`).getNode(), Imm);
387	}
388
389	/// Check whether a particular node is a constant value representable as
390	/// (N Scale) where (N in [\p RangeMin, \p RangeMax).*
391	///
392	/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
393	static bool isScaledConstantInRange(SDValue Node, int Scale,
394	int RangeMin, int RangeMax,
395	int &ScaledConstant) {
396	assert(Scale > `0` && "Invalid scale!");
397
398	// Check that this is a constant.
399	const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: Node);
400	if (!C)
401	return false;
402
403	ScaledConstant = (int) C->getZExtValue();
404	if ((ScaledConstant % Scale) != `0`)
405	return false;
406
407	ScaledConstant /= Scale;
408	return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
409	}
410
411	void ARMDAGToDAGISel::PreprocessISelDAG() {
412	if (!Subtarget->hasV6T2Ops())
413	return;
414
415	bool isThumb2 = Subtarget->isThumb();
416	// We use make_early_inc_range to avoid invalidation issues.
417	for (SDNode &N : llvm::make_early_inc_range(Range: CurDAG->allnodes())) {
418	if (N.getOpcode() != ISD::ADD)
419	continue;
420
421	// Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
422	// leading zeros, followed by consecutive set bits, followed by 1 or 2
423	// trailing zeros, e.g. 1020.
424	// Transform the expression to
425	// (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
426	// of trailing zeros of c2. The left shift would be folded as an shifter
427	// operand of 'add' and the 'and' and 'srl' would become a bits extraction
428	// node (UBFX).
429
430	SDValue N0 = N.getOperand(Num: `0`);
431	SDValue N1 = N.getOperand(Num: `1`);
432	unsigned And_imm = `0`;
433	if (!isOpcWithIntImmediate(N: N1.getNode(), Opc: ISD::AND, Imm&: And_imm)) {
434	if (isOpcWithIntImmediate(N: N0.getNode(), Opc: ISD::AND, Imm&: And_imm))
435	std::swap(a&: N0, b&: N1);
436	}
437	if (!And_imm)
438	continue;
439
440	// Check if the AND mask is an immediate of the form: 000.....1111111100
441	unsigned TZ = llvm::countr_zero(Val: And_imm);
442	if (TZ != `1` && TZ != `2`)
443	// Be conservative here. Shifter operands aren't always free. e.g. On
444	// Swift, left shifter operand of 1 / 2 for free but others are not.
445	// e.g.
446	// ubfx r3, r1, #16, #8
447	// ldr.w r3, [r0, r3, lsl #2]
448	// vs.
449	// mov.w r9, #1020
450	// and.w r2, r9, r1, lsr #14
451	// ldr r2, [r0, r2]
452	continue;
453	And_imm >>= TZ;
454	if (And_imm & (And_imm + `1`))
455	continue;
456
457	// Look for (and (srl X, c1), c2).
458	SDValue Srl = N1.getOperand(i: `0`);
459	unsigned Srl_imm = `0`;
460	if (!isOpcWithIntImmediate(N: Srl.getNode(), Opc: ISD::SRL, Imm&: Srl_imm) \|\|
461	(Srl_imm <= `2`))
462	continue;
463
464	// Make sure first operand is not a shifter operand which would prevent
465	// folding of the left shift.
466	SDValue CPTmp0;
467	SDValue CPTmp1;
468	SDValue CPTmp2;
469	if (isThumb2) {
470	if (SelectImmShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1))
471	continue;
472	} else {
473	if (SelectImmShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1) \|\|
474	SelectRegShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1, C&: CPTmp2))
475	continue;
476	}
477
478	// Now make the transformation.
479	Srl = CurDAG->getNode(Opcode: ISD::SRL, DL: SDLoc (Srl), VT: MVT::i32,
480	N1: Srl.getOperand(i: `0`),
481	N2: CurDAG->getConstant(Val: Srl_imm + TZ, DL: SDLoc (Srl),
482	VT: MVT::i32));
483	N1 = CurDAG->getNode(Opcode: ISD::AND, DL: SDLoc (N1), VT: MVT::i32,
484	N1: Srl,
485	N2: CurDAG->getConstant(Val: And_imm, DL: SDLoc (Srl), VT: MVT::i32));
486	N1 = CurDAG->getNode(Opcode: ISD::SHL, DL: SDLoc (N1), VT: MVT::i32,
487	N1, N2: CurDAG->getConstant(Val: TZ, DL: SDLoc (Srl), VT: MVT::i32));
488	CurDAG->UpdateNodeOperands(N: &N, Op1: N0, Op2: N1);
489	}
490	}
491
492	/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
493	/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
494	/// least on current ARM implementations) which should be avoidded.
495	bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode N) const* {
496	if (OptLevel == CodeGenOptLevel::None)
497	return true;
498
499	if (!Subtarget->hasVMLxHazards())
500	return true;
501
502	if (!N->hasOneUse())
503	return false;
504
505	SDNode User = N->user_begin();
506	if (User->getOpcode() == ISD::CopyToReg)
507	return true;
508	if (User->isMachineOpcode()) {
509	const ARMBaseInstrInfo TII = static_cast<const* ARMBaseInstrInfo *>(
510	CurDAG->getSubtarget().getInstrInfo());
511
512	const MCInstrDesc &MCID = TII->get(Opcode: User->getMachineOpcode());
513	if (MCID.mayStore())
514	return true;
515	unsigned Opcode = MCID.getOpcode();
516	if (Opcode == ARM::VMOVRS \|\| Opcode == ARM::VMOVRRD)
517	return true;
518	// vmlx feeding into another vmlx. We actually want to unfold
519	// the use later in the MLxExpansion pass. e.g.
520	// vmla
521	// vmla (stall 8 cycles)
522	//
523	// vmul (5 cycles)
524	// vadd (5 cycles)
525	// vmla
526	// This adds up to about 18 - 19 cycles.
527	//
528	// vmla
529	// vmul (stall 4 cycles)
530	// vadd adds up to about 14 cycles.
531	return TII->isFpMLxInstruction(Opcode);
532	}
533
534	return false;
535	}
536
537	bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
538	ARM_AM::ShiftOpc ShOpcVal,
539	unsigned ShAmt) {
540	if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
541	return true;
542	if (Shift.hasOneUse())
543	return true;
544	// R << 2 is free.
545	return ShOpcVal == ARM_AM::lsl &&
546	(ShAmt == `2` \|\| (Subtarget->isSwift() && ShAmt == `1`));
547	}
548
549	bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
550	unsigned MaxShift,
551	unsigned &PowerOfTwo,
552	SDValue &NewMulConst) const {
553	assert(N.getOpcode() == ISD::MUL);
554	assert(MaxShift > `0`);
555
556	// If the multiply is used in more than one place then changing the constant
557	// will make other uses incorrect, so don't.
558	if (!N.hasOneUse()) return false;
559	// Check if the multiply is by a constant
560	ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
561	if (!MulConst) return false;
562	// If the constant is used in more than one place then modifying it will mean
563	// we need to materialize two constants instead of one, which is a bad idea.
564	if (!MulConst->hasOneUse()) return false;
565	unsigned MulConstVal = MulConst->getZExtValue();
566	if (MulConstVal == `0`) return false;
567
568	// Find the largest power of 2 that MulConstVal is a multiple of
569	PowerOfTwo = MaxShift;
570	while ((MulConstVal % (`1` << PowerOfTwo)) != `0`) {
571	--PowerOfTwo;
572	if (PowerOfTwo == `0`) return false;
573	}
574
575	// Only optimise if the new cost is better
576	unsigned NewMulConstVal = MulConstVal / (`1` << PowerOfTwo);
577	NewMulConst = CurDAG->getConstant(Val: NewMulConstVal, DL: SDLoc (N), VT: MVT::i32);
578	unsigned OldCost = ConstantMaterializationCost(Val: MulConstVal, Subtarget);
579	unsigned NewCost = ConstantMaterializationCost(Val: NewMulConstVal, Subtarget);
580	return NewCost < OldCost;
581	}
582
583	void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
584	CurDAG->RepositionNode(Position: N.getNode()->getIterator(), N: M.getNode());
585	ReplaceUses(F: N, T: M);
586	}
587
588	bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
589	SDValue &BaseReg,
590	SDValue &Opc,
591	bool CheckProfitability) {
592	if (DisableShifterOp)
593	return false;
594
595	// If N is a multiply-by-constant and it's profitable to extract a shift and
596	// use it in a shifted operand do so.
597	if (N.getOpcode() == ISD::MUL) {
598	unsigned PowerOfTwo = `0`;
599	SDValue NewMulConst;
600	if (canExtractShiftFromMul(N, MaxShift: `31`, PowerOfTwo, NewMulConst)) {
601	HandleSDNode Handle(N);
602	SDLoc Loc(N);
603	replaceDAGValue(N: N.getOperand(i: `1`), M: NewMulConst);
604	BaseReg = Handle.getValue();
605	Opc = CurDAG->getTargetConstant(
606	Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: PowerOfTwo), DL: Loc, VT: MVT::i32);
607	return true;
608	}
609	}
610
611	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
612
613	// Don't match base register only case. That is matched to a separate
614	// lower complexity pattern with explicit register operand.
615	if (ShOpcVal == ARM_AM::no_shift) return false;
616
617	BaseReg = N.getOperand(i: `0`);
618	unsigned ShImmVal = `0`;
619	ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
620	if (!RHS) return false;
621	ShImmVal = RHS->getZExtValue() & `31`;
622	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: ShImmVal),
623	DL: SDLoc (N), VT: MVT::i32);
624	return true;
625	}
626
627	bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
628	SDValue &BaseReg,
629	SDValue &ShReg,
630	SDValue &Opc,
631	bool CheckProfitability) {
632	if (DisableShifterOp)
633	return false;
634
635	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
636
637	// Don't match base register only case. That is matched to a separate
638	// lower complexity pattern with explicit register operand.
639	if (ShOpcVal == ARM_AM::no_shift) return false;
640
641	BaseReg = N.getOperand(i: `0`);
642	unsigned ShImmVal = `0`;
643	ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
644	if (RHS) return false;
645
646	ShReg = N.getOperand(i: `1`);
647	if (CheckProfitability && !isShifterOpProfitable(Shift: N, ShOpcVal, ShAmt: ShImmVal))
648	return false;
649	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: ShImmVal),
650	DL: SDLoc (N), VT: MVT::i32);
651	return true;
652	}
653
654	// Determine whether an ISD::OR's operands are suitable to turn the operation
655	// into an addition, which often has more compact encodings.
656	bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
657	assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
658	Out = N;
659	return CurDAG->haveNoCommonBitsSet(A: N, B: Parent->getOperand(Num: `1`));
660	}
661
662
663	bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
664	SDValue &Base,
665	SDValue &OffImm) {
666	// Match simple R + imm12 operands.
667
668	// Base only.
669	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
670	!CurDAG->isBaseWithConstantOffset(Op: N)) {
671	if (N.getOpcode() == ISD::FrameIndex) {
672	// Match frame index.
673	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
674	Base = CurDAG->getTargetFrameIndex(
675	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
676	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
677	return true;
678	}
679
680	if (N.getOpcode() == ARMISD::Wrapper &&
681	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalAddress &&
682	N.getOperand(i: `0`).getOpcode() != ISD::TargetExternalSymbol &&
683	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalTLSAddress) {
684	Base = N.getOperand(i: `0`);
685	} else
686	Base = N;
687	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
688	return true;
689	}
690
691	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
692	int RHSC = (int)RHS->getSExtValue();
693	if (N.getOpcode() == ISD::SUB)
694	RHSC = -RHSC;
695
696	if (RHSC > -`0x1000` && RHSC < `0x1000`) { // 12 bits
697	Base = N.getOperand(i: `0`);
698	if (Base.getOpcode() == ISD::FrameIndex) {
699	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
700	Base = CurDAG->getTargetFrameIndex(
701	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
702	}
703	OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32);
704	return true;
705	}
706	}
707
708	// Base only.
709	Base = N;
710	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
711	return true;
712	}
713
714
715
716	bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
717	SDValue &Opc) {
718	if (N.getOpcode() == ISD::MUL &&
719	((!Subtarget->isLikeA9() && !Subtarget->isSwift()) \|\| N.hasOneUse())) {
720	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
721	// X [3,5,9] -> X + X * [2,4,8] etc.*
722	int RHSC = (int)RHS->getZExtValue();
723	if (RHSC & `1`) {
724	RHSC = RHSC & ~`1`;
725	ARM_AM::AddrOpc AddSub = ARM_AM::add;
726	if (RHSC < `0`) {
727	AddSub = ARM_AM::sub;
728	RHSC = - RHSC;
729	}
730	if (isPowerOf2_32(Value: RHSC)) {
731	unsigned ShAmt = Log2_32(Value: RHSC);
732	Base = Offset = N.getOperand(i: `0`);
733	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt,
734	SO: ARM_AM::lsl),
735	DL: SDLoc (N), VT: MVT::i32);
736	return true;
737	}
738	}
739	}
740	}
741
742	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
743	// ISD::OR that is equivalent to an ISD::ADD.
744	!CurDAG->isBaseWithConstantOffset(Op: N))
745	return false;
746
747	// Leave simple R +/- imm12 operands for LDRi12
748	if (N.getOpcode() == ISD::ADD \|\| N.getOpcode() == ISD::OR) {
749	int RHSC;
750	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), /Scale=/`1`,
751	RangeMin: -`0x1000`+`1`, RangeMax: `0x1000`, ScaledConstant&: RHSC)) // 12 bits.
752	return false;
753	}
754
755	// Otherwise this is R +/- [possibly shifted] R.
756	ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
757	ARM_AM::ShiftOpc ShOpcVal =
758	ARM_AM::getShiftOpcForNode(Opcode: N.getOperand(i: `1`).getOpcode());
759	unsigned ShAmt = `0`;
760
761	Base = N.getOperand(i: `0`);
762	Offset = N.getOperand(i: `1`);
763
764	if (ShOpcVal != ARM_AM::no_shift) {
765	// Check to see if the RHS of the shift is a constant, if not, we can't fold
766	// it.
767	if (ConstantSDNode *Sh =
768	dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`).getOperand(i: `1`))) {
769	ShAmt = Sh->getZExtValue();
770	if (isShifterOpProfitable(Shift: Offset, ShOpcVal, ShAmt))
771	Offset = N.getOperand(i: `1`).getOperand(i: `0`);
772	else {
773	ShAmt = `0`;
774	ShOpcVal = ARM_AM::no_shift;
775	}
776	} else {
777	ShOpcVal = ARM_AM::no_shift;
778	}
779	}
780
781	// Try matching (R shl C) + (R).
782	if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
783	!(Subtarget->isLikeA9() \|\| Subtarget->isSwift() \|\|
784	N.getOperand(i: `0`).hasOneUse())) {
785	ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOperand(i: `0`).getOpcode());
786	if (ShOpcVal != ARM_AM::no_shift) {
787	// Check to see if the RHS of the shift is a constant, if not, we can't
788	// fold it.
789	if (ConstantSDNode *Sh =
790	dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `0`).getOperand(i: `1`))) {
791	ShAmt = Sh->getZExtValue();
792	if (isShifterOpProfitable(Shift: N.getOperand(i: `0`), ShOpcVal, ShAmt)) {
793	Offset = N.getOperand(i: `0`).getOperand(i: `0`);
794	Base = N.getOperand(i: `1`);
795	} else {
796	ShAmt = `0`;
797	ShOpcVal = ARM_AM::no_shift;
798	}
799	} else {
800	ShOpcVal = ARM_AM::no_shift;
801	}
802	}
803	}
804
805	// If Offset is a multiply-by-constant and it's profitable to extract a shift
806	// and use it in a shifted operand do so.
807	if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
808	unsigned PowerOfTwo = `0`;
809	SDValue NewMulConst;
810	if (canExtractShiftFromMul(N: Offset, MaxShift: `31`, PowerOfTwo, NewMulConst)) {
811	HandleSDNode Handle(Offset);
812	replaceDAGValue(N: Offset.getOperand(i: `1`), M: NewMulConst);
813	Offset = Handle.getValue();
814	ShAmt = PowerOfTwo;
815	ShOpcVal = ARM_AM::lsl;
816	}
817	}
818
819	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt, SO: ShOpcVal),
820	DL: SDLoc (N), VT: MVT::i32);
821	return true;
822	}
823
824	bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
825	SDValue &Offset, SDValue &Opc) {
826	unsigned Opcode = Op->getOpcode();
827	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
828	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
829	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
830	ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC)
831	? ARM_AM::add : ARM_AM::sub;
832	int Val;
833	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `0x1000`, ScaledConstant&: Val))
834	return false;
835
836	Offset = N;
837	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
838	unsigned ShAmt = `0`;
839	if (ShOpcVal != ARM_AM::no_shift) {
840	// Check to see if the RHS of the shift is a constant, if not, we can't fold
841	// it.
842	if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
843	ShAmt = Sh->getZExtValue();
844	if (isShifterOpProfitable(Shift: N, ShOpcVal, ShAmt))
845	Offset = N.getOperand(i: `0`);
846	else {
847	ShAmt = `0`;
848	ShOpcVal = ARM_AM::no_shift;
849	}
850	} else {
851	ShOpcVal = ARM_AM::no_shift;
852	}
853	}
854
855	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt, SO: ShOpcVal),
856	DL: SDLoc (N), VT: MVT::i32);
857	return true;
858	}
859
860	bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
861	SDValue &Offset, SDValue &Opc) {
862	unsigned Opcode = Op->getOpcode();
863	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
864	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
865	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
866	ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC)
867	? ARM_AM::add : ARM_AM::sub;
868	int Val;
869	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `0x1000`, ScaledConstant&: Val)) { // 12 bits.
870	if (AddSub == ARM_AM::sub) Val *= -`1`;
871	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
872	Opc = CurDAG->getSignedTargetConstant(Val, DL: SDLoc (Op), VT: MVT::i32);
873	return true;
874	}
875
876	return false;
877	}
878
879
880	bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
881	SDValue &Offset, SDValue &Opc) {
882	unsigned Opcode = Op->getOpcode();
883	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
884	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
885	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
886	ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC)
887	? ARM_AM::add : ARM_AM::sub;
888	int Val;
889	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `0x1000`, ScaledConstant&: Val)) { // 12 bits.
890	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
891	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: Val,
892	SO: ARM_AM::no_shift),
893	DL: SDLoc (Op), VT: MVT::i32);
894	return true;
895	}
896
897	return false;
898	}
899
900	bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
901	Base = N;
902	return true;
903	}
904
905	bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
906	SDValue &Base, SDValue &Offset,
907	SDValue &Opc) {
908	if (N.getOpcode() == ISD::SUB) {
909	// X - C is canonicalize to X + -C, no need to handle it here.
910	Base = N.getOperand(i: `0`);
911	Offset = N.getOperand(i: `1`);
912	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::sub, Offset: `0`), DL: SDLoc (N),
913	VT: MVT::i32);
914	return true;
915	}
916
917	if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
918	Base = N;
919	if (N.getOpcode() == ISD::FrameIndex) {
920	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
921	Base = CurDAG->getTargetFrameIndex(
922	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
923	}
924	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
925	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::add, Offset: `0`), DL: SDLoc (N),
926	VT: MVT::i32);
927	return true;
928	}
929
930	// If the RHS is +/- imm8, fold into addr mode.
931	int RHSC;
932	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), /Scale=/`1`,
933	RangeMin: -`256` + `1`, RangeMax: `256`, ScaledConstant&: RHSC)) { // 8 bits.
934	Base = N.getOperand(i: `0`);
935	if (Base.getOpcode() == ISD::FrameIndex) {
936	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
937	Base = CurDAG->getTargetFrameIndex(
938	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
939	}
940	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
941
942	ARM_AM::AddrOpc AddSub = ARM_AM::add;
943	if (RHSC < `0`) {
944	AddSub = ARM_AM::sub;
945	RHSC = -RHSC;
946	}
947	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: RHSC), DL: SDLoc (N),
948	VT: MVT::i32);
949	return true;
950	}
951
952	Base = N.getOperand(i: `0`);
953	Offset = N.getOperand(i: `1`);
954	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::add, Offset: `0`), DL: SDLoc (N),
955	VT: MVT::i32);
956	return true;
957	}
958
959	bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
960	SDValue &Offset, SDValue &Opc) {
961	unsigned Opcode = Op->getOpcode();
962	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
963	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
964	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
965	ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC \|\| AM == ISD::POST_INC)
966	? ARM_AM::add : ARM_AM::sub;
967	int Val;
968	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `256`, ScaledConstant&: Val)) { // 12 bits.
969	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
970	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: Val), DL: SDLoc (Op),
971	VT: MVT::i32);
972	return true;
973	}
974
975	Offset = N;
976	Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: `0`), DL: SDLoc (Op),
977	VT: MVT::i32);
978	return true;
979	}
980
981	bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
982	bool FP16) {
983	if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
984	Base = N;
985	if (N.getOpcode() == ISD::FrameIndex) {
986	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
987	Base = CurDAG->getTargetFrameIndex(
988	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
989	} else if (N.getOpcode() == ARMISD::Wrapper &&
990	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalAddress &&
991	N.getOperand(i: `0`).getOpcode() != ISD::TargetExternalSymbol &&
992	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalTLSAddress) {
993	Base = N.getOperand(i: `0`);
994	}
995	Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: ARM_AM::add, Offset: `0`),
996	DL: SDLoc (N), VT: MVT::i32);
997	return true;
998	}
999
1000	// If the RHS is +/- imm8, fold into addr mode.
1001	int RHSC;
1002	const int Scale = FP16 ? `2` : `4`;
1003
1004	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale, RangeMin: -`255`, RangeMax: `256`, ScaledConstant&: RHSC)) {
1005	Base = N.getOperand(i: `0`);
1006	if (Base.getOpcode() == ISD::FrameIndex) {
1007	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1008	Base = CurDAG->getTargetFrameIndex(
1009	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1010	}
1011
1012	ARM_AM::AddrOpc AddSub = ARM_AM::add;
1013	if (RHSC < `0`) {
1014	AddSub = ARM_AM::sub;
1015	RHSC = -RHSC;
1016	}
1017
1018	if (FP16)
1019	Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5FP16Opc(Opc: AddSub, Offset: RHSC),
1020	DL: SDLoc (N), VT: MVT::i32);
1021	else
1022	Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: AddSub, Offset: RHSC),
1023	DL: SDLoc (N), VT: MVT::i32);
1024
1025	return true;
1026	}
1027
1028	Base = N;
1029
1030	if (FP16)
1031	Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5FP16Opc(Opc: ARM_AM::add, Offset: `0`),
1032	DL: SDLoc (N), VT: MVT::i32);
1033	else
1034	Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: ARM_AM::add, Offset: `0`),
1035	DL: SDLoc (N), VT: MVT::i32);
1036
1037	return true;
1038	}
1039
1040	bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1041	SDValue &Base, SDValue &Offset) {
1042	return IsAddressingMode5(N, Base, Offset, /FP16=/ false);
1043	}
1044
1045	bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1046	SDValue &Base, SDValue &Offset) {
1047	return IsAddressingMode5(N, Base, Offset, /FP16=/ true);
1048	}
1049
1050	bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1051	SDValue &Align) {
1052	Addr = N;
1053
1054	unsigned Alignment = `0`;
1055
1056	MemSDNode *MemN = cast<MemSDNode>(Val: Parent);
1057
1058	if (isa<LSBaseSDNode>(Val: MemN) \|\|
1059	((MemN->getOpcode() == ARMISD::VST1_UPD \|\|
1060	MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1061	MemN->getConstantOperandVal(Num: MemN->getNumOperands() - `1`) == `1`)) {
1062	// This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1063	// The maximum alignment is equal to the memory size being referenced.
1064	llvm::Align MMOAlign = MemN->getAlign();
1065	unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / `8`;
1066	if (MMOAlign.value() >= MemSize && MemSize > `1`)
1067	Alignment = MemSize;
1068	} else {
1069	// All other uses of addrmode6 are for intrinsics. For now just record
1070	// the raw alignment value; it will be refined later based on the legal
1071	// alignment operands for the intrinsic.
1072	Alignment = MemN->getAlign().value();
1073	}
1074
1075	Align = CurDAG->getTargetConstant(Val: Alignment, DL: SDLoc (N), VT: MVT::i32);
1076	return true;
1077	}
1078
1079	bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1080	SDValue &Offset) {
1081	LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Val: Op);
1082	ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1083	if (AM != ISD::POST_INC)
1084	return false;
1085	Offset = N;
1086	if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(Val&: N)) {
1087	if (NC->getZExtValue() * `8` == LdSt->getMemoryVT().getSizeInBits())
1088	Offset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
1089	}
1090	return true;
1091	}
1092
1093	bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1094	SDValue &Offset, SDValue &Label) {
1095	if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1096	Offset = N.getOperand(i: `0`);
1097	SDValue N1 = N.getOperand(i: `1`);
1098	Label = CurDAG->getTargetConstant(Val: N1 ->getAsZExtVal(), DL: SDLoc (N), VT: MVT::i32);
1099	return true;
1100	}
1101
1102	return false;
1103	}
1104
1105
1106	//===----------------------------------------------------------------------===//
1107	// Thumb Addressing Modes
1108	//===----------------------------------------------------------------------===//
1109
1110	static bool shouldUseZeroOffsetLdSt(SDValue N) {
1111	// Negative numbers are difficult to materialise in thumb1. If we are
1112	// selecting the add of a negative, instead try to select ri with a zero
1113	// offset, so create the add node directly which will become a sub.
1114	if (N.getOpcode() != ISD::ADD)
1115	return false;
1116
1117	// Look for an imm which is not legal for ld/st, but is legal for sub.
1118	if (auto C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`)))
1119	return C->getSExtValue() < `0` && C->getSExtValue() >= -`255`;
1120
1121	return false;
1122	}
1123
1124	bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1125	SDValue &Offset) {
1126	if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(Op: N)) {
1127	if (!isNullConstant(V: N))
1128	return false;
1129
1130	Base = Offset = N;
1131	return true;
1132	}
1133
1134	Base = N.getOperand(i: `0`);
1135	Offset = N.getOperand(i: `1`);
1136	return true;
1137	}
1138
1139	bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1140	SDValue &Offset) {
1141	if (shouldUseZeroOffsetLdSt(N))
1142	return false; // Select ri instead
1143	return SelectThumbAddrModeRRSext(N, Base, Offset);
1144	}
1145
1146	bool
1147	ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1148	SDValue &Base, SDValue &OffImm) {
1149	if (shouldUseZeroOffsetLdSt(N)) {
1150	Base = N;
1151	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1152	return true;
1153	}
1154
1155	if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
1156	if (N.getOpcode() == ISD::ADD) {
1157	return false; // We want to select register offset instead
1158	} else if (N.getOpcode() == ARMISD::Wrapper &&
1159	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalAddress &&
1160	N.getOperand(i: `0`).getOpcode() != ISD::TargetExternalSymbol &&
1161	N.getOperand(i: `0`).getOpcode() != ISD::TargetConstantPool &&
1162	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalTLSAddress) {
1163	Base = N.getOperand(i: `0`);
1164	} else {
1165	Base = N;
1166	}
1167
1168	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1169	return true;
1170	}
1171
1172	// If the RHS is + imm5 scale, fold into addr mode.*
1173	int RHSC;
1174	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale, RangeMin: `0`, RangeMax: `32`, ScaledConstant&: RHSC)) {
1175	Base = N.getOperand(i: `0`);
1176	OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32);
1177	return true;
1178	}
1179
1180	// Offset is too large, so use register offset instead.
1181	return false;
1182	}
1183
1184	bool
1185	ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1186	SDValue &OffImm) {
1187	return SelectThumbAddrModeImm5S(N, Scale: `4`, Base, OffImm);
1188	}
1189
1190	bool
1191	ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1192	SDValue &OffImm) {
1193	return SelectThumbAddrModeImm5S(N, Scale: `2`, Base, OffImm);
1194	}
1195
1196	bool
1197	ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1198	SDValue &OffImm) {
1199	return SelectThumbAddrModeImm5S(N, Scale: `1`, Base, OffImm);
1200	}
1201
1202	bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1203	SDValue &Base, SDValue &OffImm) {
1204	if (N.getOpcode() == ISD::FrameIndex) {
1205	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1206	// Only multiples of 4 are allowed for the offset, so the frame object
1207	// alignment must be at least 4.
1208	MachineFrameInfo &MFI = MF->getFrameInfo();
1209	if (MFI.getObjectAlign(ObjectIdx: FI) < Align (`4`))
1210	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align (`4`));
1211	Base = CurDAG->getTargetFrameIndex(
1212	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1213	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1214	return true;
1215	}
1216
1217	if (!CurDAG->isBaseWithConstantOffset(Op: N))
1218	return false;
1219
1220	if (N.getOperand(i: `0`).getOpcode() == ISD::FrameIndex) {
1221	// If the RHS is + imm8 scale, fold into addr mode.*
1222	int RHSC;
1223	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), /Scale=/`4`, RangeMin: `0`, RangeMax: `256`, ScaledConstant&: RHSC)) {
1224	Base = N.getOperand(i: `0`);
1225	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1226	// Make sure the offset is inside the object, or we might fail to
1227	// allocate an emergency spill slot. (An out-of-range access is UB, but
1228	// it could show up anyway.)
1229	MachineFrameInfo &MFI = MF->getFrameInfo();
1230	if (RHSC * `4` < MFI.getObjectSize(ObjectIdx: FI)) {
1231	// For LHS+RHS to result in an offset that's a multiple of 4 the object
1232	// indexed by the LHS must be 4-byte aligned.
1233	if (!MFI.isFixedObjectIndex(ObjectIdx: FI) && MFI.getObjectAlign(ObjectIdx: FI) < Align (`4`))
1234	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align (`4`));
1235	if (MFI.getObjectAlign(ObjectIdx: FI) >= Align (`4`)) {
1236	Base = CurDAG->getTargetFrameIndex(
1237	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1238	OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32);
1239	return true;
1240	}
1241	}
1242	}
1243	}
1244
1245	return false;
1246	}
1247
1248	template <unsigned Shift>
1249	bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1250	SDValue &OffImm) {
1251	if (N.getOpcode() == ISD::SUB \|\| CurDAG->isBaseWithConstantOffset(Op: N)) {
1252	int RHSC;
1253	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale: `1` << Shift, RangeMin: -`0x7f`, RangeMax: `0x80`,
1254	ScaledConstant&: RHSC)) {
1255	Base = N.getOperand(i: `0`);
1256	if (N.getOpcode() == ISD::SUB)
1257	RHSC = -RHSC;
1258	OffImm = CurDAG->getSignedTargetConstant(Val: RHSC * (`1` << Shift), DL: SDLoc (N),
1259	VT: MVT::i32);
1260	return true;
1261	}
1262	}
1263
1264	// Base only.
1265	Base = N;
1266	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1267	return true;
1268	}
1269
1270
1271	//===----------------------------------------------------------------------===//
1272	// Thumb 2 Addressing Modes
1273	//===----------------------------------------------------------------------===//
1274
1275
1276	bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1277	SDValue &Base, SDValue &OffImm) {
1278	// Match simple R + imm12 operands.
1279
1280	// Base only.
1281	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1282	!CurDAG->isBaseWithConstantOffset(Op: N)) {
1283	if (N.getOpcode() == ISD::FrameIndex) {
1284	// Match frame index.
1285	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1286	Base = CurDAG->getTargetFrameIndex(
1287	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1288	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1289	return true;
1290	}
1291
1292	if (N.getOpcode() == ARMISD::Wrapper &&
1293	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalAddress &&
1294	N.getOperand(i: `0`).getOpcode() != ISD::TargetExternalSymbol &&
1295	N.getOperand(i: `0`).getOpcode() != ISD::TargetGlobalTLSAddress) {
1296	Base = N.getOperand(i: `0`);
1297	if (Base.getOpcode() == ISD::TargetConstantPool)
1298	return false; // We want to select t2LDRpci instead.
1299	} else
1300	Base = N;
1301	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1302	return true;
1303	}
1304
1305	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1306	if (SelectT2AddrModeImm8(N, Base, OffImm))
1307	// Let t2LDRi8 handle (R - imm8).
1308	return false;
1309
1310	int RHSC = (int)RHS->getZExtValue();
1311	if (N.getOpcode() == ISD::SUB)
1312	RHSC = -RHSC;
1313
1314	if (RHSC >= `0` && RHSC < `0x1000`) { // 12 bits (unsigned)
1315	Base = N.getOperand(i: `0`);
1316	if (Base.getOpcode() == ISD::FrameIndex) {
1317	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1318	Base = CurDAG->getTargetFrameIndex(
1319	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1320	}
1321	OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32);
1322	return true;
1323	}
1324	}
1325
1326	// Base only.
1327	Base = N;
1328	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1329	return true;
1330	}
1331
1332	template <unsigned Shift>
1333	bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1334	SDValue &OffImm) {
1335	if (N.getOpcode() == ISD::SUB \|\| CurDAG->isBaseWithConstantOffset(Op: N)) {
1336	int RHSC;
1337	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale: `1` << Shift, RangeMin: -`255`, RangeMax: `256`, ScaledConstant&: RHSC)) {
1338	Base = N.getOperand(i: `0`);
1339	if (Base.getOpcode() == ISD::FrameIndex) {
1340	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1341	Base = CurDAG->getTargetFrameIndex(
1342	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1343	}
1344
1345	if (N.getOpcode() == ISD::SUB)
1346	RHSC = -RHSC;
1347	OffImm = CurDAG->getSignedTargetConstant(Val: RHSC * (`1` << Shift), DL: SDLoc (N),
1348	VT: MVT::i32);
1349	return true;
1350	}
1351	}
1352
1353	// Base only.
1354	Base = N;
1355	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1356	return true;
1357	}
1358
1359	bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1360	SDValue &Base, SDValue &OffImm) {
1361	// Match simple R - imm8 operands.
1362	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1363	!CurDAG->isBaseWithConstantOffset(Op: N))
1364	return false;
1365
1366	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1367	int RHSC = (int)RHS->getSExtValue();
1368	if (N.getOpcode() == ISD::SUB)
1369	RHSC = -RHSC;
1370
1371	if ((RHSC >= -`255`) && (RHSC < `0`)) { // 8 bits (always negative)
1372	Base = N.getOperand(i: `0`);
1373	if (Base.getOpcode() == ISD::FrameIndex) {
1374	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1375	Base = CurDAG->getTargetFrameIndex(
1376	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1377	}
1378	OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32);
1379	return true;
1380	}
1381	}
1382
1383	return false;
1384	}
1385
1386	bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1387	SDValue &OffImm){
1388	unsigned Opcode = Op->getOpcode();
1389	ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1390	? cast<LoadSDNode>(Val: Op)->getAddressingMode()
1391	: cast<StoreSDNode>(Val: Op)->getAddressingMode();
1392	int RHSC;
1393	if (isScaledConstantInRange(Node: N, /Scale=/`1`, RangeMin: `0`, RangeMax: `0x100`, ScaledConstant&: RHSC)) { // 8 bits.
1394	OffImm = ((AM == ISD::PRE_INC) \|\| (AM == ISD::POST_INC))
1395	? CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i32)
1396	: CurDAG->getSignedTargetConstant(Val: -RHSC, DL: SDLoc (N), VT: MVT::i32);
1397	return true;
1398	}
1399
1400	return false;
1401	}
1402
1403	template <unsigned Shift>
1404	bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1405	SDValue &OffImm) {
1406	if (N.getOpcode() == ISD::SUB \|\| CurDAG->isBaseWithConstantOffset(Op: N)) {
1407	int RHSC;
1408	if (isScaledConstantInRange(Node: N.getOperand(i: `1`), Scale: `1` << Shift, RangeMin: -`0x7f`, RangeMax: `0x80`,
1409	ScaledConstant&: RHSC)) {
1410	Base = N.getOperand(i: `0`);
1411	if (Base.getOpcode() == ISD::FrameIndex) {
1412	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1413	Base = CurDAG->getTargetFrameIndex(
1414	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1415	}
1416
1417	if (N.getOpcode() == ISD::SUB)
1418	RHSC = -RHSC;
1419	OffImm = CurDAG->getSignedTargetConstant(Val: RHSC * (`1` << Shift), DL: SDLoc (N),
1420	VT: MVT::i32);
1421	return true;
1422	}
1423	}
1424
1425	// Base only.
1426	Base = N;
1427	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1428	return true;
1429	}
1430
1431	template <unsigned Shift>
1432	bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1433	SDValue &OffImm) {
1434	return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1435	}
1436
1437	bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1438	SDValue &OffImm,
1439	unsigned Shift) {
1440	unsigned Opcode = Op->getOpcode();
1441	ISD::MemIndexedMode AM;
1442	switch (Opcode) {
1443	case ISD::LOAD:
1444	AM = cast<LoadSDNode>(Val: Op)->getAddressingMode();
1445	break;
1446	case ISD::STORE:
1447	AM = cast<StoreSDNode>(Val: Op)->getAddressingMode();
1448	break;
1449	case ISD::MLOAD:
1450	AM = cast<MaskedLoadSDNode>(Val: Op)->getAddressingMode();
1451	break;
1452	case ISD::MSTORE:
1453	AM = cast<MaskedStoreSDNode>(Val: Op)->getAddressingMode();
1454	break;
1455	default:
1456	llvm_unreachable("Unexpected Opcode for Imm7Offset");
1457	}
1458
1459	int RHSC;
1460	// 7 bit constant, shifted by Shift.
1461	if (isScaledConstantInRange(Node: N, Scale: `1` << Shift, RangeMin: `0`, RangeMax: `0x80`, ScaledConstant&: RHSC)) {
1462	OffImm = ((AM == ISD::PRE_INC) \|\| (AM == ISD::POST_INC))
1463	? CurDAG->getSignedTargetConstant(Val: RHSC * (`1` << Shift),
1464	DL: SDLoc (N), VT: MVT::i32)
1465	: CurDAG->getSignedTargetConstant(Val: -RHSC * (`1` << Shift),
1466	DL: SDLoc (N), VT: MVT::i32);
1467	return true;
1468	}
1469	return false;
1470	}
1471
1472	template <int Min, int Max>
1473	bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1474	int Val;
1475	if (isScaledConstantInRange(Node: N, Scale: `1`, RangeMin: Min, RangeMax: Max, ScaledConstant&: Val)) {
1476	OffImm = CurDAG->getSignedTargetConstant(Val, DL: SDLoc (N), VT: MVT::i32);
1477	return true;
1478	}
1479	return false;
1480	}
1481
1482	bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1483	SDValue &Base,
1484	SDValue &OffReg, SDValue &ShImm) {
1485	// (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1486	if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(Op: N))
1487	return false;
1488
1489	// Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1490	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1491	int RHSC = (int)RHS->getZExtValue();
1492	if (RHSC >= `0` && RHSC < `0x1000`) // 12 bits (unsigned)
1493	return false;
1494	else if (RHSC < `0` && RHSC >= -`255`) // 8 bits
1495	return false;
1496	}
1497
1498	// Look for (R + R) or (R + (R << [1,2,3])).
1499	unsigned ShAmt = `0`;
1500	Base = N.getOperand(i: `0`);
1501	OffReg = N.getOperand(i: `1`);
1502
1503	// Swap if it is ((R << c) + R).
1504	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: OffReg.getOpcode());
1505	if (ShOpcVal != ARM_AM::lsl) {
1506	ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: Base.getOpcode());
1507	if (ShOpcVal == ARM_AM::lsl)
1508	std::swap(a&: Base, b&: OffReg);
1509	}
1510
1511	if (ShOpcVal == ARM_AM::lsl) {
1512	// Check to see if the RHS of the shift is a constant, if not, we can't fold
1513	// it.
1514	if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(Val: OffReg.getOperand(i: `1`))) {
1515	ShAmt = Sh->getZExtValue();
1516	if (ShAmt < `4` && isShifterOpProfitable(Shift: OffReg, ShOpcVal, ShAmt))
1517	OffReg = OffReg.getOperand(i: `0`);
1518	else {
1519	ShAmt = `0`;
1520	}
1521	}
1522	}
1523
1524	// If OffReg is a multiply-by-constant and it's profitable to extract a shift
1525	// and use it in a shifted operand do so.
1526	if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1527	unsigned PowerOfTwo = `0`;
1528	SDValue NewMulConst;
1529	if (canExtractShiftFromMul(N: OffReg, MaxShift: `3`, PowerOfTwo, NewMulConst)) {
1530	HandleSDNode Handle(OffReg);
1531	replaceDAGValue(N: OffReg.getOperand(i: `1`), M: NewMulConst);
1532	OffReg = Handle.getValue();
1533	ShAmt = PowerOfTwo;
1534	}
1535	}
1536
1537	ShImm = CurDAG->getTargetConstant(Val: ShAmt, DL: SDLoc (N), VT: MVT::i32);
1538
1539	return true;
1540	}
1541
1542	bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1543	SDValue &OffImm) {
1544	// This must* succeed since it's used for the irreplaceable ldrex and strex*
1545	// instructions.
1546	Base = N;
1547	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
1548
1549	if (N.getOpcode() != ISD::ADD \|\| !CurDAG->isBaseWithConstantOffset(Op: N))
1550	return true;
1551
1552	ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
1553	if (!RHS)
1554	return true;
1555
1556	uint32_t RHSC = (int)RHS->getZExtValue();
1557	if (RHSC > `1020` \|\| RHSC % `4` != `0`)
1558	return true;
1559
1560	Base = N.getOperand(i: `0`);
1561	if (Base.getOpcode() == ISD::FrameIndex) {
1562	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1563	Base = CurDAG->getTargetFrameIndex(
1564	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1565	}
1566
1567	OffImm = CurDAG->getTargetConstant(Val: RHSC/`4`, DL: SDLoc (N), VT: MVT::i32);
1568	return true;
1569	}
1570
1571	//===--------------------------------------------------------------------===//
1572
1573	/// getAL - Returns a ARMCC::AL immediate node.
1574	static inline SDValue getAL(SelectionDAG CurDAG, const* SDLoc &dl) {
1575	return CurDAG->getTargetConstant(Val: (uint64_t)ARMCC::AL, DL: dl, VT: MVT::i32);
1576	}
1577
1578	void ARMDAGToDAGISel::transferMemOperands(SDNode N, SDNode Result) {
1579	MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
1580	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Result), NewMemRefs: {MemOp});
1581	}
1582
1583	bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1584	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1585	ISD::MemIndexedMode AM = LD->getAddressingMode();
1586	if (AM == ISD::UNINDEXED)
1587	return false;
1588
1589	EVT LoadedVT = LD->getMemoryVT();
1590	SDValue Offset, AMOpc;
1591	bool isPre = (AM == ISD::PRE_INC) \|\| (AM == ISD::PRE_DEC);
1592	unsigned Opcode = `0`;
1593	bool Match = false;
1594	if (LoadedVT == MVT::i32 && isPre &&
1595	SelectAddrMode2OffsetImmPre(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1596	Opcode = ARM::LDR_PRE_IMM;
1597	Match = true;
1598	} else if (LoadedVT == MVT::i32 && !isPre &&
1599	SelectAddrMode2OffsetImm(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1600	Opcode = ARM::LDR_POST_IMM;
1601	Match = true;
1602	} else if (LoadedVT == MVT::i32 &&
1603	SelectAddrMode2OffsetReg(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1604	Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1605	Match = true;
1606
1607	} else if (LoadedVT == MVT::i16 &&
1608	SelectAddrMode3Offset(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1609	Match = true;
1610	Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1611	? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1612	: (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1613	} else if (LoadedVT == MVT::i8 \|\| LoadedVT == MVT::i1) {
1614	if (LD->getExtensionType() == ISD::SEXTLOAD) {
1615	if (SelectAddrMode3Offset(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1616	Match = true;
1617	Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1618	}
1619	} else {
1620	if (isPre &&
1621	SelectAddrMode2OffsetImmPre(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1622	Match = true;
1623	Opcode = ARM::LDRB_PRE_IMM;
1624	} else if (!isPre &&
1625	SelectAddrMode2OffsetImm(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1626	Match = true;
1627	Opcode = ARM::LDRB_POST_IMM;
1628	} else if (SelectAddrMode2OffsetReg(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1629	Match = true;
1630	Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1631	}
1632	}
1633	}
1634
1635	if (Match) {
1636	if (Opcode == ARM::LDR_PRE_IMM \|\| Opcode == ARM::LDRB_PRE_IMM) {
1637	SDValue Chain = LD->getChain();
1638	SDValue Base = LD->getBasePtr();
1639	SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, dl: SDLoc (N)),
1640	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), Chain };
1641	SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc (N), VT1: MVT::i32, VT2: MVT::i32,
1642	VT3: MVT::Other, Ops);
1643	transferMemOperands(N, Result: New);
1644	ReplaceNode(F: N, T: New);
1645	return true;
1646	} else {
1647	SDValue Chain = LD->getChain();
1648	SDValue Base = LD->getBasePtr();
1649	SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, dl: SDLoc (N)),
1650	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), Chain };
1651	SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc (N), VT1: MVT::i32, VT2: MVT::i32,
1652	VT3: MVT::Other, Ops);
1653	transferMemOperands(N, Result: New);
1654	ReplaceNode(F: N, T: New);
1655	return true;
1656	}
1657	}
1658
1659	return false;
1660	}
1661
1662	bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1663	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1664	EVT LoadedVT = LD->getMemoryVT();
1665	ISD::MemIndexedMode AM = LD->getAddressingMode();
1666	if (AM != ISD::POST_INC \|\| LD->getExtensionType() != ISD::NON_EXTLOAD \|\|
1667	LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1668	return false;
1669
1670	auto *COffs = dyn_cast<ConstantSDNode>(Val: LD->getOffset());
1671	if (!COffs \|\| COffs->getZExtValue() != `4`)
1672	return false;
1673
1674	// A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1675	// The encoding of LDM is not how the rest of ISel expects a post-inc load to
1676	// look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1677	// ISel.
1678	SDValue Chain = LD->getChain();
1679	SDValue Base = LD->getBasePtr();
1680	SDValue Ops[]= { Base, getAL(CurDAG, dl: SDLoc (N)),
1681	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), Chain };
1682	SDNode *New = CurDAG->getMachineNode(Opcode: ARM::tLDR_postidx, dl: SDLoc (N), VT1: MVT::i32,
1683	VT2: MVT::i32, VT3: MVT::Other, Ops);
1684	transferMemOperands(N, Result: New);
1685	ReplaceNode(F: N, T: New);
1686	return true;
1687	}
1688
1689	bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1690	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1691	ISD::MemIndexedMode AM = LD->getAddressingMode();
1692	if (AM == ISD::UNINDEXED)
1693	return false;
1694
1695	EVT LoadedVT = LD->getMemoryVT();
1696	bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1697	SDValue Offset;
1698	bool isPre = (AM == ISD::PRE_INC) \|\| (AM == ISD::PRE_DEC);
1699	unsigned Opcode = `0`;
1700	bool Match = false;
1701	if (SelectT2AddrModeImm8Offset(Op: N, N: LD->getOffset(), OffImm&: Offset)) {
1702	switch (LoadedVT.getSimpleVT().SimpleTy) {
1703	case MVT::i32:
1704	Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1705	break;
1706	case MVT::i16:
1707	if (isSExtLd)
1708	Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1709	else
1710	Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1711	break;
1712	case MVT::i8:
1713	case MVT::i1:
1714	if (isSExtLd)
1715	Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1716	else
1717	Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1718	break;
1719	default:
1720	return false;
1721	}
1722	Match = true;
1723	}
1724
1725	if (Match) {
1726	SDValue Chain = LD->getChain();
1727	SDValue Base = LD->getBasePtr();
1728	SDValue Ops[]= { Base, Offset, getAL(CurDAG, dl: SDLoc (N)),
1729	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), Chain };
1730	SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc (N), VT1: MVT::i32, VT2: MVT::i32,
1731	VT3: MVT::Other, Ops);
1732	transferMemOperands(N, Result: New);
1733	ReplaceNode(F: N, T: New);
1734	return true;
1735	}
1736
1737	return false;
1738	}
1739
1740	bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1741	EVT LoadedVT;
1742	unsigned Opcode = `0`;
1743	bool isSExtLd, isPre;
1744	Align Alignment;
1745	ARMVCC::VPTCodes Pred;
1746	SDValue PredReg;
1747	SDValue Chain, Base, Offset;
1748
1749	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
1750	ISD::MemIndexedMode AM = LD->getAddressingMode();
1751	if (AM == ISD::UNINDEXED)
1752	return false;
1753	LoadedVT = LD->getMemoryVT();
1754	if (!LoadedVT.isVector())
1755	return false;
1756
1757	Chain = LD->getChain();
1758	Base = LD->getBasePtr();
1759	Offset = LD->getOffset();
1760	Alignment = LD->getAlign();
1761	isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1762	isPre = (AM == ISD::PRE_INC) \|\| (AM == ISD::PRE_DEC);
1763	Pred = ARMVCC::None;
1764	PredReg = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
1765	} else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Val: N)) {
1766	ISD::MemIndexedMode AM = LD->getAddressingMode();
1767	if (AM == ISD::UNINDEXED)
1768	return false;
1769	LoadedVT = LD->getMemoryVT();
1770	if (!LoadedVT.isVector())
1771	return false;
1772
1773	Chain = LD->getChain();
1774	Base = LD->getBasePtr();
1775	Offset = LD->getOffset();
1776	Alignment = LD->getAlign();
1777	isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1778	isPre = (AM == ISD::PRE_INC) \|\| (AM == ISD::PRE_DEC);
1779	Pred = ARMVCC::Then;
1780	PredReg = LD->getMask();
1781	} else
1782	llvm_unreachable("Expected a Load or a Masked Load!");
1783
1784	// We allow LE non-masked loads to change the type (for example use a vldrb.8
1785	// as opposed to a vldrw.32). This can allow extra addressing modes or
1786	// alignments for what is otherwise an equivalent instruction.
1787	bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(Val: N);
1788
1789	SDValue NewOffset;
1790	if (Alignment >= Align (`2`) && LoadedVT == MVT::v4i16 &&
1791	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `1`)) {
1792	if (isSExtLd)
1793	Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1794	else
1795	Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1796	} else if (LoadedVT == MVT::v8i8 &&
1797	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `0`)) {
1798	if (isSExtLd)
1799	Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1800	else
1801	Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1802	} else if (LoadedVT == MVT::v4i8 &&
1803	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `0`)) {
1804	if (isSExtLd)
1805	Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1806	else
1807	Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1808	} else if (Alignment >= Align (`4`) &&
1809	(CanChangeType \|\| LoadedVT == MVT::v4i32 \|\|
1810	LoadedVT == MVT::v4f32) &&
1811	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `2`))
1812	Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1813	else if (Alignment >= Align (`2`) &&
1814	(CanChangeType \|\| LoadedVT == MVT::v8i16 \|\|
1815	LoadedVT == MVT::v8f16) &&
1816	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `1`))
1817	Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1818	else if ((CanChangeType \|\| LoadedVT == MVT::v16i8) &&
1819	SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: `0`))
1820	Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1821	else
1822	return false;
1823
1824	SDValue Ops[] = {Base,
1825	NewOffset,
1826	CurDAG->getTargetConstant(Val: Pred, DL: SDLoc (N), VT: MVT::i32),
1827	PredReg,
1828	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), // tp_reg
1829	Chain};
1830	SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc (N), VT1: MVT::i32,
1831	VT2: N->getValueType(ResNo: `0`), VT3: MVT::Other, Ops);
1832	transferMemOperands(N, Result: New);
1833	ReplaceUses(F: SDValue (N, `0`), T: SDValue (New, `1`));
1834	ReplaceUses(F: SDValue (N, `1`), T: SDValue (New, `0`));
1835	ReplaceUses(F: SDValue (N, `2`), T: SDValue (New, `2`));
1836	CurDAG->RemoveDeadNode(N);
1837	return true;
1838	}
1839
1840	/// Form a GPRPair pseudo register from a pair of GPR regs.
1841	SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1842	SDLoc dl(V0.getNode());
1843	SDValue RegClass =
1844	CurDAG->getTargetConstant(Val: ARM::GPRPairRegClassID, DL: dl, VT: MVT::i32);
1845	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::gsub_0, DL: dl, VT: MVT::i32);
1846	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::gsub_1, DL: dl, VT: MVT::i32);
1847	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1848	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1849	}
1850
1851	/// Form a D register from a pair of S registers.
1852	SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1853	SDLoc dl(V0.getNode());
1854	SDValue RegClass =
1855	CurDAG->getTargetConstant(Val: ARM::DPR_VFP2RegClassID, DL: dl, VT: MVT::i32);
1856	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::ssub_0, DL: dl, VT: MVT::i32);
1857	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::ssub_1, DL: dl, VT: MVT::i32);
1858	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1859	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1860	}
1861
1862	/// Form a quad register from a pair of D registers.
1863	SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1864	SDLoc dl(V0.getNode());
1865	SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QPRRegClassID, DL: dl,
1866	VT: MVT::i32);
1867	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::dsub_0, DL: dl, VT: MVT::i32);
1868	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::dsub_1, DL: dl, VT: MVT::i32);
1869	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1870	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1871	}
1872
1873	/// Form 4 consecutive D registers from a pair of Q registers.
1874	SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1875	SDLoc dl(V0.getNode());
1876	SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQPRRegClassID, DL: dl,
1877	VT: MVT::i32);
1878	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::qsub_0, DL: dl, VT: MVT::i32);
1879	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::qsub_1, DL: dl, VT: MVT::i32);
1880	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1881	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1882	}
1883
1884	/// Form 4 consecutive S registers.
1885	SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1886	SDValue V2, SDValue V3) {
1887	SDLoc dl(V0.getNode());
1888	SDValue RegClass =
1889	CurDAG->getTargetConstant(Val: ARM::QPR_VFP2RegClassID, DL: dl, VT: MVT::i32);
1890	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::ssub_0, DL: dl, VT: MVT::i32);
1891	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::ssub_1, DL: dl, VT: MVT::i32);
1892	SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::ssub_2, DL: dl, VT: MVT::i32);
1893	SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::ssub_3, DL: dl, VT: MVT::i32);
1894	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1895	V2, SubReg2, V3, SubReg3 };
1896	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1897	}
1898
1899	/// Form 4 consecutive D registers.
1900	SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1901	SDValue V2, SDValue V3) {
1902	SDLoc dl(V0.getNode());
1903	SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQPRRegClassID, DL: dl,
1904	VT: MVT::i32);
1905	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::dsub_0, DL: dl, VT: MVT::i32);
1906	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::dsub_1, DL: dl, VT: MVT::i32);
1907	SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::dsub_2, DL: dl, VT: MVT::i32);
1908	SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::dsub_3, DL: dl, VT: MVT::i32);
1909	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1910	V2, SubReg2, V3, SubReg3 };
1911	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1912	}
1913
1914	/// Form 4 consecutive Q registers.
1915	SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1916	SDValue V2, SDValue V3) {
1917	SDLoc dl(V0.getNode());
1918	SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQQQPRRegClassID, DL: dl,
1919	VT: MVT::i32);
1920	SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::qsub_0, DL: dl, VT: MVT::i32);
1921	SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::qsub_1, DL: dl, VT: MVT::i32);
1922	SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::qsub_2, DL: dl, VT: MVT::i32);
1923	SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::qsub_3, DL: dl, VT: MVT::i32);
1924	const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1925	V2, SubReg2, V3, SubReg3 };
1926	return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1927	}
1928
1929	/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1930	/// of a NEON VLD or VST instruction. The supported values depend on the
1931	/// number of registers being loaded.
1932	SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1933	unsigned NumVecs, bool is64BitVector) {
1934	unsigned NumRegs = NumVecs;
1935	if (!is64BitVector && NumVecs < `3`)
1936	NumRegs *= `2`;
1937
1938	unsigned Alignment = Align ->getAsZExtVal();
1939	if (Alignment >= `32` && NumRegs == `4`)
1940	Alignment = `32`;
1941	else if (Alignment >= `16` && (NumRegs == `2` \|\| NumRegs == `4`))
1942	Alignment = `16`;
1943	else if (Alignment >= `8`)
1944	Alignment = `8`;
1945	else
1946	Alignment = `0`;
1947
1948	return CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
1949	}
1950
1951	static bool isVLDfixed(unsigned Opc)
1952	{
1953	switch (Opc) {
1954	default: return false;
1955	case ARM::VLD1d8wb_fixed : return true;
1956	case ARM::VLD1d16wb_fixed : return true;
1957	case ARM::VLD1d64Qwb_fixed : return true;
1958	case ARM::VLD1d32wb_fixed : return true;
1959	case ARM::VLD1d64wb_fixed : return true;
1960	case ARM::VLD1d8TPseudoWB_fixed : return true;
1961	case ARM::VLD1d16TPseudoWB_fixed : return true;
1962	case ARM::VLD1d32TPseudoWB_fixed : return true;
1963	case ARM::VLD1d64TPseudoWB_fixed : return true;
1964	case ARM::VLD1d8QPseudoWB_fixed : return true;
1965	case ARM::VLD1d16QPseudoWB_fixed : return true;
1966	case ARM::VLD1d32QPseudoWB_fixed : return true;
1967	case ARM::VLD1d64QPseudoWB_fixed : return true;
1968	case ARM::VLD1q8wb_fixed : return true;
1969	case ARM::VLD1q16wb_fixed : return true;
1970	case ARM::VLD1q32wb_fixed : return true;
1971	case ARM::VLD1q64wb_fixed : return true;
1972	case ARM::VLD1DUPd8wb_fixed : return true;
1973	case ARM::VLD1DUPd16wb_fixed : return true;
1974	case ARM::VLD1DUPd32wb_fixed : return true;
1975	case ARM::VLD1DUPq8wb_fixed : return true;
1976	case ARM::VLD1DUPq16wb_fixed : return true;
1977	case ARM::VLD1DUPq32wb_fixed : return true;
1978	case ARM::VLD2d8wb_fixed : return true;
1979	case ARM::VLD2d16wb_fixed : return true;
1980	case ARM::VLD2d32wb_fixed : return true;
1981	case ARM::VLD2q8PseudoWB_fixed : return true;
1982	case ARM::VLD2q16PseudoWB_fixed : return true;
1983	case ARM::VLD2q32PseudoWB_fixed : return true;
1984	case ARM::VLD2DUPd8wb_fixed : return true;
1985	case ARM::VLD2DUPd16wb_fixed : return true;
1986	case ARM::VLD2DUPd32wb_fixed : return true;
1987	case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
1988	case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
1989	case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
1990	}
1991	}
1992
1993	static bool isVSTfixed(unsigned Opc)
1994	{
1995	switch (Opc) {
1996	default: return false;
1997	case ARM::VST1d8wb_fixed : return true;
1998	case ARM::VST1d16wb_fixed : return true;
1999	case ARM::VST1d32wb_fixed : return true;
2000	case ARM::VST1d64wb_fixed : return true;
2001	case ARM::VST1q8wb_fixed : return true;
2002	case ARM::VST1q16wb_fixed : return true;
2003	case ARM::VST1q32wb_fixed : return true;
2004	case ARM::VST1q64wb_fixed : return true;
2005	case ARM::VST1d8TPseudoWB_fixed : return true;
2006	case ARM::VST1d16TPseudoWB_fixed : return true;
2007	case ARM::VST1d32TPseudoWB_fixed : return true;
2008	case ARM::VST1d64TPseudoWB_fixed : return true;
2009	case ARM::VST1d8QPseudoWB_fixed : return true;
2010	case ARM::VST1d16QPseudoWB_fixed : return true;
2011	case ARM::VST1d32QPseudoWB_fixed : return true;
2012	case ARM::VST1d64QPseudoWB_fixed : return true;
2013	case ARM::VST2d8wb_fixed : return true;
2014	case ARM::VST2d16wb_fixed : return true;
2015	case ARM::VST2d32wb_fixed : return true;
2016	case ARM::VST2q8PseudoWB_fixed : return true;
2017	case ARM::VST2q16PseudoWB_fixed : return true;
2018	case ARM::VST2q32PseudoWB_fixed : return true;
2019	}
2020	}
2021
2022	// Get the register stride update opcode of a VLD/VST instruction that
2023	// is otherwise equivalent to the given fixed stride updating instruction.
2024	static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2025	assert((isVLDfixed(Opc) \|\| isVSTfixed(Opc))
2026	&& "Incorrect fixed stride updating instruction.");
2027	switch (Opc) {
2028	default: break;
2029	case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2030	case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2031	case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2032	case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2033	case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2034	case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2035	case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2036	case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2037	case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2038	case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2039	case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2040	case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2041	case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2042	case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2043	case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2044	case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2045	case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2046	case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2047	case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2048	case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2049	case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2050	case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2051	case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2052	case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2053	case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2054	case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2055	case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2056
2057	case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2058	case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2059	case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2060	case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2061	case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2062	case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2063	case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2064	case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2065	case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2066	case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2067	case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2068	case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2069	case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2070	case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2071	case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2072	case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2073
2074	case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2075	case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2076	case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2077	case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2078	case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2079	case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2080
2081	case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2082	case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2083	case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2084	case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2085	case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2086	case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2087
2088	case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2089	case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2090	case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2091	}
2092	return Opc; // If not one we handle, return it unchanged.
2093	}
2094
2095	/// Returns true if the given increment is a Constant known to be equal to the
2096	/// access size performed by a NEON load/store. This means the "[rN]!" form can
2097	/// be used.
2098	static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2099	auto C = dyn_cast<ConstantSDNode>(Val&: Inc);
2100	return C && C->getZExtValue() == VecTy.getSizeInBits() / `8` * NumVecs;
2101	}
2102
2103	void ARMDAGToDAGISel::SelectVLD(SDNode N, bool* isUpdating, unsigned NumVecs,
2104	const uint16_t *DOpcodes,
2105	const uint16_t *QOpcodes0,
2106	const uint16_t *QOpcodes1) {
2107	assert(Subtarget->hasNEON());
2108	assert(NumVecs >= `1` && NumVecs <= `4` && "VLD NumVecs out-of-range");
2109	SDLoc dl(N);
2110
2111	SDValue MemAddr, Align;
2112	bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2113	// nodes are not intrinsics.
2114	unsigned AddrOpIdx = IsIntrinsic ? `2` : `1`;
2115	if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2116	return;
2117
2118	SDValue Chain = N->getOperand(Num: `0`);
2119	EVT VT = N->getValueType(ResNo: `0`);
2120	bool is64BitVector = VT.is64BitVector();
2121	Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2122
2123	unsigned OpcodeIndex;
2124	switch (VT.getSimpleVT().SimpleTy) {
2125	default: llvm_unreachable("unhandled vld type");
2126	// Double-register operations:
2127	case MVT::v8i8: OpcodeIndex = `0`; break;
2128	case MVT::v4f16:
2129	case MVT::v4bf16:
2130	case MVT::v4i16: OpcodeIndex = `1`; break;
2131	case MVT::v2f32:
2132	case MVT::v2i32: OpcodeIndex = `2`; break;
2133	case MVT::v1i64: OpcodeIndex = `3`; break;
2134	// Quad-register operations:
2135	case MVT::v16i8: OpcodeIndex = `0`; break;
2136	case MVT::v8f16:
2137	case MVT::v8bf16:
2138	case MVT::v8i16: OpcodeIndex = `1`; break;
2139	case MVT::v4f32:
2140	case MVT::v4i32: OpcodeIndex = `2`; break;
2141	case MVT::v2f64:
2142	case MVT::v2i64: OpcodeIndex = `3`; break;
2143	}
2144
2145	EVT ResTy;
2146	if (NumVecs == `1`)
2147	ResTy = VT;
2148	else {
2149	unsigned ResTyElts = (NumVecs == `3`) ? `4` : NumVecs;
2150	if (!is64BitVector)
2151	ResTyElts *= `2`;
2152	ResTy = EVT::getVectorVT(Context&: *CurDAG->getContext(), VT: MVT::i64, NumElements: ResTyElts);
2153	}
2154	std::vector<EVT> ResTys;
2155	ResTys.push_back(x: ResTy);
2156	if (isUpdating)
2157	ResTys.push_back(x: MVT::i32);
2158	ResTys.push_back(x: MVT::Other);
2159
2160	SDValue Pred = getAL(CurDAG, dl);
2161	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
2162	SDNode *VLd;
2163	SmallVector<SDValue, `7`> Ops;
2164
2165	// Double registers and VLD1/VLD2 quad registers are directly supported.
2166	if (is64BitVector \|\| NumVecs <= `2`) {
2167	unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2168	QOpcodes0[OpcodeIndex]);
2169	Ops.push_back(Elt: MemAddr);
2170	Ops.push_back(Elt: Align);
2171	if (isUpdating) {
2172	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2173	bool IsImmUpdate = isPerfectIncrement(Inc, VecTy: VT, NumVecs);
2174	if (!IsImmUpdate) {
2175	// We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2176	// check for the opcode rather than the number of vector elements.
2177	if (isVLDfixed(Opc))
2178	Opc = getVLDSTRegisterUpdateOpcode(Opc);
2179	Ops.push_back(Elt: Inc);
2180	// VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2181	// the operands if not such an opcode.
2182	} else if (!isVLDfixed(Opc))
2183	Ops.push_back(Elt: Reg0);
2184	}
2185	Ops.push_back(Elt: Pred);
2186	Ops.push_back(Elt: Reg0);
2187	Ops.push_back(Elt: Chain);
2188	VLd = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2189
2190	} else {
2191	// Otherwise, quad registers are loaded with two separate instructions,
2192	// where one loads the even registers and the other loads the odd registers.
2193	EVT AddrTy = MemAddr.getValueType();
2194
2195	// Load the even subregs. This is always an updating load, so that it
2196	// provides the address to the second load for the odd subregs.
2197	SDValue ImplDef =
2198	SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: ResTy), `0`);
2199	const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2200	SDNode *VLdA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl,
2201	VT1: ResTy, VT2: AddrTy, VT3: MVT::Other, Ops: OpsA);
2202	Chain = SDValue (VLdA, `2`);
2203
2204	// Load the odd subregs.
2205	Ops.push_back(Elt: SDValue (VLdA, `1`));
2206	Ops.push_back(Elt: Align);
2207	if (isUpdating) {
2208	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2209	assert(isa<ConstantSDNode>(Inc.getNode()) &&
2210	"only constant post-increment update allowed for VLD3/4");
2211	(void)Inc;
2212	Ops.push_back(Elt: Reg0);
2213	}
2214	Ops.push_back(Elt: SDValue (VLdA, `0`));
2215	Ops.push_back(Elt: Pred);
2216	Ops.push_back(Elt: Reg0);
2217	Ops.push_back(Elt: Chain);
2218	VLd = CurDAG->getMachineNode(Opcode: QOpcodes1[OpcodeIndex], dl, ResultTys: ResTys, Ops);
2219	}
2220
2221	// Transfer memoperands.
2222	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2223	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLd), NewMemRefs: {MemOp});
2224
2225	if (NumVecs == `1`) {
2226	ReplaceNode(F: N, T: VLd);
2227	return;
2228	}
2229
2230	// Extract out the subregisters.
2231	SDValue SuperReg = SDValue (VLd, `0`);
2232	static_assert(ARM::dsub_7 == ARM::dsub_0 + `7` &&
2233	ARM::qsub_3 == ARM::qsub_0 + `3`,
2234	"Unexpected subreg numbering");
2235	unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2236	for (unsigned Vec = `0`; Vec < NumVecs; ++Vec)
2237	ReplaceUses(F: SDValue (N, Vec),
2238	T: CurDAG->getTargetExtractSubreg(SRIdx: Sub0 + Vec, DL: dl, VT, Operand: SuperReg));
2239	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (VLd, `1`));
2240	if (isUpdating)
2241	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (VLd, `2`));
2242	CurDAG->RemoveDeadNode(N);
2243	}
2244
2245	void ARMDAGToDAGISel::SelectVST(SDNode N, bool* isUpdating, unsigned NumVecs,
2246	const uint16_t *DOpcodes,
2247	const uint16_t *QOpcodes0,
2248	const uint16_t *QOpcodes1) {
2249	assert(Subtarget->hasNEON());
2250	assert(NumVecs >= `1` && NumVecs <= `4` && "VST NumVecs out-of-range");
2251	SDLoc dl(N);
2252
2253	SDValue MemAddr, Align;
2254	bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2255	// nodes are not intrinsics.
2256	unsigned AddrOpIdx = IsIntrinsic ? `2` : `1`;
2257	unsigned Vec0Idx = `3`; // AddrOpIdx + (isUpdating ? 2 : 1)
2258	if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2259	return;
2260
2261	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2262
2263	SDValue Chain = N->getOperand(Num: `0`);
2264	EVT VT = N->getOperand(Num: Vec0Idx).getValueType();
2265	bool is64BitVector = VT.is64BitVector();
2266	Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2267
2268	unsigned OpcodeIndex;
2269	switch (VT.getSimpleVT().SimpleTy) {
2270	default: llvm_unreachable("unhandled vst type");
2271	// Double-register operations:
2272	case MVT::v8i8: OpcodeIndex = `0`; break;
2273	case MVT::v4f16:
2274	case MVT::v4bf16:
2275	case MVT::v4i16: OpcodeIndex = `1`; break;
2276	case MVT::v2f32:
2277	case MVT::v2i32: OpcodeIndex = `2`; break;
2278	case MVT::v1i64: OpcodeIndex = `3`; break;
2279	// Quad-register operations:
2280	case MVT::v16i8: OpcodeIndex = `0`; break;
2281	case MVT::v8f16:
2282	case MVT::v8bf16:
2283	case MVT::v8i16: OpcodeIndex = `1`; break;
2284	case MVT::v4f32:
2285	case MVT::v4i32: OpcodeIndex = `2`; break;
2286	case MVT::v2f64:
2287	case MVT::v2i64: OpcodeIndex = `3`; break;
2288	}
2289
2290	std::vector<EVT> ResTys;
2291	if (isUpdating)
2292	ResTys.push_back(x: MVT::i32);
2293	ResTys.push_back(x: MVT::Other);
2294
2295	SDValue Pred = getAL(CurDAG, dl);
2296	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
2297	SmallVector<SDValue, `7`> Ops;
2298
2299	// Double registers and VST1/VST2 quad registers are directly supported.
2300	if (is64BitVector \|\| NumVecs <= `2`) {
2301	SDValue SrcReg;
2302	if (NumVecs == `1`) {
2303	SrcReg = N->getOperand(Num: Vec0Idx);
2304	} else if (is64BitVector) {
2305	// Form a REG_SEQUENCE to force register allocation.
2306	SDValue V0 = N->getOperand(Num: Vec0Idx + `0`);
2307	SDValue V1 = N->getOperand(Num: Vec0Idx + `1`);
2308	if (NumVecs == `2`)
2309	SrcReg = SDValue (createDRegPairNode(VT: MVT::v2i64, V0, V1), `0`);
2310	else {
2311	SDValue V2 = N->getOperand(Num: Vec0Idx + `2`);
2312	// If it's a vst3, form a quad D-register and leave the last part as
2313	// an undef.
2314	SDValue V3 = (NumVecs == `3`)
2315	? SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF,dl,VT), `0`)
2316	: N->getOperand(Num: Vec0Idx + `3`);
2317	SrcReg = SDValue (createQuadDRegsNode(VT: MVT::v4i64, V0, V1, V2, V3), `0`);
2318	}
2319	} else {
2320	// Form a QQ register.
2321	SDValue Q0 = N->getOperand(Num: Vec0Idx);
2322	SDValue Q1 = N->getOperand(Num: Vec0Idx + `1`);
2323	SrcReg = SDValue (createQRegPairNode(VT: MVT::v4i64, V0: Q0, V1: Q1), `0`);
2324	}
2325
2326	unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2327	QOpcodes0[OpcodeIndex]);
2328	Ops.push_back(Elt: MemAddr);
2329	Ops.push_back(Elt: Align);
2330	if (isUpdating) {
2331	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2332	bool IsImmUpdate = isPerfectIncrement(Inc, VecTy: VT, NumVecs);
2333	if (!IsImmUpdate) {
2334	// We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2335	// check for the opcode rather than the number of vector elements.
2336	if (isVSTfixed(Opc))
2337	Opc = getVLDSTRegisterUpdateOpcode(Opc);
2338	Ops.push_back(Elt: Inc);
2339	}
2340	// VST1/VST2 fixed increment does not need Reg0 so only include it in
2341	// the operands if not such an opcode.
2342	else if (!isVSTfixed(Opc))
2343	Ops.push_back(Elt: Reg0);
2344	}
2345	Ops.push_back(Elt: SrcReg);
2346	Ops.push_back(Elt: Pred);
2347	Ops.push_back(Elt: Reg0);
2348	Ops.push_back(Elt: Chain);
2349	SDNode *VSt = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2350
2351	// Transfer memoperands.
2352	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VSt), NewMemRefs: {MemOp});
2353
2354	ReplaceNode(F: N, T: VSt);
2355	return;
2356	}
2357
2358	// Otherwise, quad registers are stored with two separate instructions,
2359	// where one stores the even registers and the other stores the odd registers.
2360
2361	// Form the QQQQ REG_SEQUENCE.
2362	SDValue V0 = N->getOperand(Num: Vec0Idx + `0`);
2363	SDValue V1 = N->getOperand(Num: Vec0Idx + `1`);
2364	SDValue V2 = N->getOperand(Num: Vec0Idx + `2`);
2365	SDValue V3 = (NumVecs == `3`)
2366	? SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT), `0`)
2367	: N->getOperand(Num: Vec0Idx + `3`);
2368	SDValue RegSeq = SDValue (createQuadQRegsNode(VT: MVT::v8i64, V0, V1, V2, V3), `0`);
2369
2370	// Store the even D registers. This is always an updating store, so that it
2371	// provides the address to the second store for the odd subregs.
2372	const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2373	SDNode *VStA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl,
2374	VT1: MemAddr.getValueType(),
2375	VT2: MVT::Other, Ops: OpsA);
2376	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VStA), NewMemRefs: {MemOp});
2377	Chain = SDValue (VStA, `1`);
2378
2379	// Store the odd D registers.
2380	Ops.push_back(Elt: SDValue (VStA, `0`));
2381	Ops.push_back(Elt: Align);
2382	if (isUpdating) {
2383	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2384	assert(isa<ConstantSDNode>(Inc.getNode()) &&
2385	"only constant post-increment update allowed for VST3/4");
2386	(void)Inc;
2387	Ops.push_back(Elt: Reg0);
2388	}
2389	Ops.push_back(Elt: RegSeq);
2390	Ops.push_back(Elt: Pred);
2391	Ops.push_back(Elt: Reg0);
2392	Ops.push_back(Elt: Chain);
2393	SDNode *VStB = CurDAG->getMachineNode(Opcode: QOpcodes1[OpcodeIndex], dl, ResultTys: ResTys,
2394	Ops);
2395	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VStB), NewMemRefs: {MemOp});
2396	ReplaceNode(F: N, T: VStB);
2397	}
2398
2399	void ARMDAGToDAGISel::SelectVLDSTLane(SDNode N, bool* IsLoad, bool isUpdating,
2400	unsigned NumVecs,
2401	const uint16_t *DOpcodes,
2402	const uint16_t *QOpcodes) {
2403	assert(Subtarget->hasNEON());
2404	assert(NumVecs >=`2` && NumVecs <= `4` && "VLDSTLane NumVecs out-of-range");
2405	SDLoc dl(N);
2406
2407	SDValue MemAddr, Align;
2408	bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2409	// nodes are not intrinsics.
2410	unsigned AddrOpIdx = IsIntrinsic ? `2` : `1`;
2411	unsigned Vec0Idx = `3`; // AddrOpIdx + (isUpdating ? 2 : 1)
2412	if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2413	return;
2414
2415	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2416
2417	SDValue Chain = N->getOperand(Num: `0`);
2418	unsigned Lane = N->getConstantOperandVal(Num: Vec0Idx + NumVecs);
2419	EVT VT = N->getOperand(Num: Vec0Idx).getValueType();
2420	bool is64BitVector = VT.is64BitVector();
2421
2422	unsigned Alignment = `0`;
2423	if (NumVecs != `3`) {
2424	Alignment = Align ->getAsZExtVal();
2425	unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / `8`;
2426	if (Alignment > NumBytes)
2427	Alignment = NumBytes;
2428	if (Alignment < `8` && Alignment < NumBytes)
2429	Alignment = `0`;
2430	// Alignment must be a power of two; make sure of that.
2431	Alignment = (Alignment & -Alignment);
2432	if (Alignment == `1`)
2433	Alignment = `0`;
2434	}
2435	Align = CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
2436
2437	unsigned OpcodeIndex;
2438	switch (VT.getSimpleVT().SimpleTy) {
2439	default: llvm_unreachable("unhandled vld/vst lane type");
2440	// Double-register operations:
2441	case MVT::v8i8: OpcodeIndex = `0`; break;
2442	case MVT::v4f16:
2443	case MVT::v4bf16:
2444	case MVT::v4i16: OpcodeIndex = `1`; break;
2445	case MVT::v2f32:
2446	case MVT::v2i32: OpcodeIndex = `2`; break;
2447	// Quad-register operations:
2448	case MVT::v8f16:
2449	case MVT::v8bf16:
2450	case MVT::v8i16: OpcodeIndex = `0`; break;
2451	case MVT::v4f32:
2452	case MVT::v4i32: OpcodeIndex = `1`; break;
2453	}
2454
2455	std::vector<EVT> ResTys;
2456	if (IsLoad) {
2457	unsigned ResTyElts = (NumVecs == `3`) ? `4` : NumVecs;
2458	if (!is64BitVector)
2459	ResTyElts *= `2`;
2460	ResTys.push_back(x: EVT::getVectorVT(Context&: *CurDAG->getContext(),
2461	VT: MVT::i64, NumElements: ResTyElts));
2462	}
2463	if (isUpdating)
2464	ResTys.push_back(x: MVT::i32);
2465	ResTys.push_back(x: MVT::Other);
2466
2467	SDValue Pred = getAL(CurDAG, dl);
2468	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
2469
2470	SmallVector<SDValue, `8`> Ops;
2471	Ops.push_back(Elt: MemAddr);
2472	Ops.push_back(Elt: Align);
2473	if (isUpdating) {
2474	SDValue Inc = N->getOperand(Num: AddrOpIdx + `1`);
2475	bool IsImmUpdate =
2476	isPerfectIncrement(Inc, VecTy: VT.getVectorElementType(), NumVecs);
2477	Ops.push_back(Elt: IsImmUpdate ? Reg0 : Inc);
2478	}
2479
2480	SDValue SuperReg;
2481	SDValue V0 = N->getOperand(Num: Vec0Idx + `0`);
2482	SDValue V1 = N->getOperand(Num: Vec0Idx + `1`);
2483	if (NumVecs == `2`) {
2484	if (is64BitVector)
2485	SuperReg = SDValue (createDRegPairNode(VT: MVT::v2i64, V0, V1), `0`);
2486	else
2487	SuperReg = SDValue (createQRegPairNode(VT: MVT::v4i64, V0, V1), `0`);
2488	} else {
2489	SDValue V2 = N->getOperand(Num: Vec0Idx + `2`);
2490	SDValue V3 = (NumVecs == `3`)
2491	? SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT), `0`)
2492	: N->getOperand(Num: Vec0Idx + `3`);
2493	if (is64BitVector)
2494	SuperReg = SDValue (createQuadDRegsNode(VT: MVT::v4i64, V0, V1, V2, V3), `0`);
2495	else
2496	SuperReg = SDValue (createQuadQRegsNode(VT: MVT::v8i64, V0, V1, V2, V3), `0`);
2497	}
2498	Ops.push_back(Elt: SuperReg);
2499	Ops.push_back(Elt: getI32Imm(Imm: Lane, dl));
2500	Ops.push_back(Elt: Pred);
2501	Ops.push_back(Elt: Reg0);
2502	Ops.push_back(Elt: Chain);
2503
2504	unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2505	QOpcodes[OpcodeIndex]);
2506	SDNode *VLdLn = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2507	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLdLn), NewMemRefs: {MemOp});
2508	if (!IsLoad) {
2509	ReplaceNode(F: N, T: VLdLn);
2510	return;
2511	}
2512
2513	// Extract the subregisters.
2514	SuperReg = SDValue (VLdLn, `0`);
2515	static_assert(ARM::dsub_7 == ARM::dsub_0 + `7` &&
2516	ARM::qsub_3 == ARM::qsub_0 + `3`,
2517	"Unexpected subreg numbering");
2518	unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2519	for (unsigned Vec = `0`; Vec < NumVecs; ++Vec)
2520	ReplaceUses(F: SDValue (N, Vec),
2521	T: CurDAG->getTargetExtractSubreg(SRIdx: Sub0 + Vec, DL: dl, VT, Operand: SuperReg));
2522	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (VLdLn, `1`));
2523	if (isUpdating)
2524	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (VLdLn, `2`));
2525	CurDAG->RemoveDeadNode(N);
2526	}
2527
2528	template <typename SDValueVector>
2529	void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2530	SDValue PredicateMask) {
2531	Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::Then, DL: Loc, VT: MVT::i32));
2532	Ops.push_back(PredicateMask);
2533	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32)); // tp_reg
2534	}
2535
2536	template <typename SDValueVector>
2537	void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2538	SDValue PredicateMask,
2539	SDValue Inactive) {
2540	Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::Then, DL: Loc, VT: MVT::i32));
2541	Ops.push_back(PredicateMask);
2542	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32)); // tp_reg
2543	Ops.push_back(Inactive);
2544	}
2545
2546	template <typename SDValueVector>
2547	void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2548	Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::None, DL: Loc, VT: MVT::i32));
2549	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
2550	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32)); // tp_reg
2551	}
2552
2553	template <typename SDValueVector>
2554	void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2555	EVT InactiveTy) {
2556	Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::None, DL: Loc, VT: MVT::i32));
2557	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
2558	Ops.push_back(CurDAG->getRegister(Reg: `0`, VT: MVT::i32)); // tp_reg
2559	Ops.push_back(SDValue (
2560	CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: Loc, VT: InactiveTy), `0`));
2561	}
2562
2563	void ARMDAGToDAGISel::SelectMVE_WB(SDNode N, const* uint16_t *Opcodes,
2564	bool Predicated) {
2565	SDLoc Loc(N);
2566	SmallVector<SDValue, `8`> Ops;
2567
2568	uint16_t Opcode;
2569	switch (N->getValueType(ResNo: `1`).getVectorElementType().getSizeInBits()) {
2570	case `32`:
2571	Opcode = Opcodes[`0`];
2572	break;
2573	case `64`:
2574	Opcode = Opcodes[`1`];
2575	break;
2576	default:
2577	llvm_unreachable("bad vector element size in SelectMVE_WB");
2578	}
2579
2580	Ops.push_back(Elt: N->getOperand(Num: `2`)); // vector of base addresses
2581
2582	int32_t ImmValue = N->getConstantOperandVal(Num: `3`);
2583	Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate offset
2584
2585	if (Predicated)
2586	AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: `4`));
2587	else
2588	AddEmptyMVEPredicateToOps(Ops, Loc);
2589
2590	Ops.push_back(Elt: N->getOperand(Num: `0`)); // chain
2591
2592	SmallVector<EVT, `8`> VTs;
2593	VTs.push_back(Elt: N->getValueType(ResNo: `1`));
2594	VTs.push_back(Elt: N->getValueType(ResNo: `0`));
2595	VTs.push_back(Elt: N->getValueType(ResNo: `2`));
2596
2597	SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc (N), ResultTys: VTs, Ops);
2598	ReplaceUses(F: SDValue (N, `0`), T: SDValue (New, `1`));
2599	ReplaceUses(F: SDValue (N, `1`), T: SDValue (New, `0`));
2600	ReplaceUses(F: SDValue (N, `2`), T: SDValue (New, `2`));
2601	transferMemOperands(N, Result: New);
2602	CurDAG->RemoveDeadNode(N);
2603	}
2604
2605	void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2606	bool Immediate,
2607	bool HasSaturationOperand) {
2608	SDLoc Loc(N);
2609	SmallVector<SDValue, `8`> Ops;
2610
2611	// Two 32-bit halves of the value to be shifted
2612	Ops.push_back(Elt: N->getOperand(Num: `1`));
2613	Ops.push_back(Elt: N->getOperand(Num: `2`));
2614
2615	// The shift count
2616	if (Immediate) {
2617	int32_t ImmValue = N->getConstantOperandVal(Num: `3`);
2618	Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate shift count
2619	} else {
2620	Ops.push_back(Elt: N->getOperand(Num: `3`));
2621	}
2622
2623	// The immediate saturation operand, if any
2624	if (HasSaturationOperand) {
2625	int32_t SatOp = N->getConstantOperandVal(Num: `4`);
2626	int SatBit = (SatOp == `64` ? `0` : `1`);
2627	Ops.push_back(Elt: getI32Imm(Imm: SatBit, dl: Loc));
2628	}
2629
2630	// MVE scalar shifts are IT-predicable, so include the standard
2631	// predicate arguments.
2632	Ops.push_back(Elt: getAL(CurDAG, dl: Loc));
2633	Ops.push_back(Elt: CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
2634
2635	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2636	}
2637
2638	void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2639	uint16_t OpcodeWithNoCarry,
2640	bool Add, bool Predicated) {
2641	SDLoc Loc(N);
2642	SmallVector<SDValue, `8`> Ops;
2643	uint16_t Opcode;
2644
2645	unsigned FirstInputOp = Predicated ? `2` : `1`;
2646
2647	// Two input vectors and the input carry flag
2648	Ops.push_back(Elt: N->getOperand(Num: FirstInputOp));
2649	Ops.push_back(Elt: N->getOperand(Num: FirstInputOp + `1`));
2650	SDValue CarryIn = N->getOperand(Num: FirstInputOp + `2`);
2651	ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(Val&: CarryIn);
2652	uint32_t CarryMask = `1` << `29`;
2653	uint32_t CarryExpected = Add ? `0` : CarryMask;
2654	if (CarryInConstant &&
2655	(CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2656	Opcode = OpcodeWithNoCarry;
2657	} else {
2658	Ops.push_back(Elt: CarryIn);
2659	Opcode = OpcodeWithCarry;
2660	}
2661
2662	if (Predicated)
2663	AddMVEPredicateToOps(Ops, Loc,
2664	PredicateMask: N->getOperand(Num: FirstInputOp + `3`), // predicate
2665	Inactive: N->getOperand(Num: FirstInputOp - `1`)); // inactive
2666	else
2667	AddEmptyMVEPredicateToOps(Ops, Loc, InactiveTy: N->getValueType(ResNo: `0`));
2668
2669	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2670	}
2671
2672	void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode N, bool* Predicated) {
2673	SDLoc Loc(N);
2674	SmallVector<SDValue, `8`> Ops;
2675
2676	// One vector input, followed by a 32-bit word of bits to shift in
2677	// and then an immediate shift count
2678	Ops.push_back(Elt: N->getOperand(Num: `1`));
2679	Ops.push_back(Elt: N->getOperand(Num: `2`));
2680	int32_t ImmValue = N->getConstantOperandVal(Num: `3`);
2681	Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate shift count
2682
2683	if (Predicated)
2684	AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: `4`));
2685	else
2686	AddEmptyMVEPredicateToOps(Ops, Loc);
2687
2688	CurDAG->SelectNodeTo(N, MachineOpc: ARM::MVE_VSHLC, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2689	}
2690
2691	static bool SDValueToConstBool(SDValue SDVal) {
2692	assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2693	ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(Val&: SDVal);
2694	uint64_t Value = SDValConstant->getZExtValue();
2695	assert((Value == `0` \|\| Value == `1`) && "expected value 0 or 1");
2696	return Value;
2697	}
2698
2699	void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode N, bool* Predicated,
2700	const uint16_t *OpcodesS,
2701	const uint16_t *OpcodesU,
2702	size_t Stride, size_t TySize) {
2703	assert(TySize < Stride && "Invalid TySize");
2704	bool IsUnsigned = SDValueToConstBool(SDVal: N->getOperand(Num: `1`));
2705	bool IsSub = SDValueToConstBool(SDVal: N->getOperand(Num: `2`));
2706	bool IsExchange = SDValueToConstBool(SDVal: N->getOperand(Num: `3`));
2707	if (IsUnsigned) {
2708	assert(!IsSub &&
2709	"Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2710	assert(!IsExchange &&
2711	"Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2712	}
2713
2714	auto OpIsZero = [N](size_t OpNo) {
2715	return isNullConstant(V: N->getOperand(Num: OpNo));
2716	};
2717
2718	// If the input accumulator value is not zero, select an instruction with
2719	// accumulator, otherwise select an instruction without accumulator
2720	bool IsAccum = !(OpIsZero (`4`) && OpIsZero (`5`));
2721
2722	const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2723	if (IsSub)
2724	Opcodes += `4` * Stride;
2725	if (IsExchange)
2726	Opcodes += `2` * Stride;
2727	if (IsAccum)
2728	Opcodes += Stride;
2729	uint16_t Opcode = Opcodes[TySize];
2730
2731	SDLoc Loc(N);
2732	SmallVector<SDValue, `8`> Ops;
2733	// Push the accumulator operands, if they are used
2734	if (IsAccum) {
2735	Ops.push_back(Elt: N->getOperand(Num: `4`));
2736	Ops.push_back(Elt: N->getOperand(Num: `5`));
2737	}
2738	// Push the two vector operands
2739	Ops.push_back(Elt: N->getOperand(Num: `6`));
2740	Ops.push_back(Elt: N->getOperand(Num: `7`));
2741
2742	if (Predicated)
2743	AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: `8`));
2744	else
2745	AddEmptyMVEPredicateToOps(Ops, Loc);
2746
2747	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2748	}
2749
2750	void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode N, bool* Predicated,
2751	const uint16_t *OpcodesS,
2752	const uint16_t *OpcodesU) {
2753	EVT VecTy = N->getOperand(Num: `6`).getValueType();
2754	size_t SizeIndex;
2755	switch (VecTy.getVectorElementType().getSizeInBits()) {
2756	case `16`:
2757	SizeIndex = `0`;
2758	break;
2759	case `32`:
2760	SizeIndex = `1`;
2761	break;
2762	default:
2763	llvm_unreachable("bad vector element size");
2764	}
2765
2766	SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, Stride: `2`, TySize: SizeIndex);
2767	}
2768
2769	void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode N, bool* Predicated,
2770	const uint16_t *OpcodesS,
2771	const uint16_t *OpcodesU) {
2772	assert(
2773	N->getOperand(`6`).getValueType().getVectorElementType().getSizeInBits() ==
2774	`32` &&
2775	"bad vector element size");
2776	SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, Stride: `1`, TySize: `0`);
2777	}
2778
2779	void ARMDAGToDAGISel::SelectMVE_VLD(SDNode N, unsigned* NumVecs,
2780	const uint16_t *const *Opcodes,
2781	bool HasWriteback) {
2782	EVT VT = N->getValueType(ResNo: `0`);
2783	SDLoc Loc(N);
2784
2785	const uint16_t *OurOpcodes;
2786	switch (VT.getVectorElementType().getSizeInBits()) {
2787	case `8`:
2788	OurOpcodes = Opcodes[`0`];
2789	break;
2790	case `16`:
2791	OurOpcodes = Opcodes[`1`];
2792	break;
2793	case `32`:
2794	OurOpcodes = Opcodes[`2`];
2795	break;
2796	default:
2797	llvm_unreachable("bad vector element size in SelectMVE_VLD");
2798	}
2799
2800	EVT DataTy = EVT::getVectorVT(Context&: CurDAG->getContext(), VT: MVT::i64, NumElements: NumVecs `2`);
2801	SmallVector<EVT, `4`> ResultTys = {DataTy, MVT::Other};
2802	unsigned PtrOperand = HasWriteback ? `1` : `2`;
2803
2804	auto Data = SDValue (
2805	CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: Loc, VT: DataTy), `0`);
2806	SDValue Chain = N->getOperand(Num: `0`);
2807	// Add a MVE_VLDn instruction for each Vec, except the last
2808	for (unsigned Stage = `0`; Stage < NumVecs - `1`; ++Stage) {
2809	SDValue Ops[] = {Data, N->getOperand(Num: PtrOperand), Chain};
2810	auto LoadInst =
2811	CurDAG->getMachineNode(Opcode: OurOpcodes[Stage], dl: Loc, ResultTys, Ops);
2812	Data = SDValue (LoadInst, `0`);
2813	Chain = SDValue (LoadInst, `1`);
2814	transferMemOperands(N, Result: LoadInst);
2815	}
2816	// The last may need a writeback on it
2817	if (HasWriteback)
2818	ResultTys = {DataTy, MVT::i32, MVT::Other};
2819	SDValue Ops[] = {Data, N->getOperand(Num: PtrOperand), Chain};
2820	auto LoadInst =
2821	CurDAG->getMachineNode(Opcode: OurOpcodes[NumVecs - `1`], dl: Loc, ResultTys, Ops);
2822	transferMemOperands(N, Result: LoadInst);
2823
2824	unsigned i;
2825	for (i = `0`; i < NumVecs; i++)
2826	ReplaceUses(F: SDValue (N, i),
2827	T: CurDAG->getTargetExtractSubreg(SRIdx: ARM::qsub_0 + i, DL: Loc, VT,
2828	Operand: SDValue (LoadInst, `0`)));
2829	if (HasWriteback)
2830	ReplaceUses(F: SDValue (N, i++), T: SDValue (LoadInst, `1`));
2831	ReplaceUses(F: SDValue (N, i), T: SDValue (LoadInst, HasWriteback ? `2` : `1`));
2832	CurDAG->RemoveDeadNode(N);
2833	}
2834
2835	void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode N, const* uint16_t *Opcodes,
2836	bool Wrapping, bool Predicated) {
2837	EVT VT = N->getValueType(ResNo: `0`);
2838	SDLoc Loc(N);
2839
2840	uint16_t Opcode;
2841	switch (VT.getScalarSizeInBits()) {
2842	case `8`:
2843	Opcode = Opcodes[`0`];
2844	break;
2845	case `16`:
2846	Opcode = Opcodes[`1`];
2847	break;
2848	case `32`:
2849	Opcode = Opcodes[`2`];
2850	break;
2851	default:
2852	llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2853	}
2854
2855	SmallVector<SDValue, `8`> Ops;
2856	unsigned OpIdx = `1`;
2857
2858	SDValue Inactive;
2859	if (Predicated)
2860	Inactive = N->getOperand(Num: OpIdx++);
2861
2862	Ops.push_back(Elt: N->getOperand(Num: OpIdx++)); // base
2863	if (Wrapping)
2864	Ops.push_back(Elt: N->getOperand(Num: OpIdx++)); // limit
2865
2866	SDValue ImmOp = N->getOperand(Num: OpIdx++); // step
2867	int ImmValue = ImmOp ->getAsZExtVal();
2868	Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc));
2869
2870	if (Predicated)
2871	AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: OpIdx), Inactive);
2872	else
2873	AddEmptyMVEPredicateToOps(Ops, Loc, InactiveTy: N->getValueType(ResNo: `0`));
2874
2875	CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2876	}
2877
2878	void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2879	size_t NumExtraOps, bool HasAccum) {
2880	bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2881	SDLoc Loc(N);
2882	SmallVector<SDValue, `8`> Ops;
2883
2884	unsigned OpIdx = `1`;
2885
2886	// Convert and append the immediate operand designating the coprocessor.
2887	SDValue ImmCorpoc = N->getOperand(Num: OpIdx++);
2888	uint32_t ImmCoprocVal = ImmCorpoc ->getAsZExtVal();
2889	Ops.push_back(Elt: getI32Imm(Imm: ImmCoprocVal, dl: Loc));
2890
2891	// For accumulating variants copy the low and high order parts of the
2892	// accumulator into a register pair and add it to the operand vector.
2893	if (HasAccum) {
2894	SDValue AccLo = N->getOperand(Num: OpIdx++);
2895	SDValue AccHi = N->getOperand(Num: OpIdx++);
2896	if (IsBigEndian)
2897	std::swap(a&: AccLo, b&: AccHi);
2898	Ops.push_back(Elt: SDValue (createGPRPairNode(VT: MVT::Untyped, V0: AccLo, V1: AccHi), `0`));
2899	}
2900
2901	// Copy extra operands as-is.
2902	for (size_t I = `0`; I < NumExtraOps; I++)
2903	Ops.push_back(Elt: N->getOperand(Num: OpIdx++));
2904
2905	// Convert and append the immediate operand
2906	SDValue Imm = N->getOperand(Num: OpIdx);
2907	uint32_t ImmVal = Imm ->getAsZExtVal();
2908	Ops.push_back(Elt: getI32Imm(Imm: ImmVal, dl: Loc));
2909
2910	// Accumulating variants are IT-predicable, add predicate operands.
2911	if (HasAccum) {
2912	SDValue Pred = getAL(CurDAG, dl: Loc);
2913	SDValue PredReg = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
2914	Ops.push_back(Elt: Pred);
2915	Ops.push_back(Elt: PredReg);
2916	}
2917
2918	// Create the CDE instruction
2919	SDNode *InstrNode = CurDAG->getMachineNode(Opcode, dl: Loc, VT: MVT::Untyped, Ops);
2920	SDValue ResultPair = SDValue (InstrNode, `0`);
2921
2922	// The original intrinsic had two outputs, and the output of the dual-register
2923	// CDE instruction is a register pair. We need to extract the two subregisters
2924	// and replace all uses of the original outputs with the extracted
2925	// subregisters.
2926	uint16_t SubRegs[`2`] = {ARM::gsub_0, ARM::gsub_1};
2927	if (IsBigEndian)
2928	std::swap(a&: SubRegs[`0`], b&: SubRegs[`1`]);
2929
2930	for (size_t ResIdx = `0`; ResIdx < `2`; ResIdx++) {
2931	if (SDValue (N, ResIdx).use_empty())
2932	continue;
2933	SDValue SubReg = CurDAG->getTargetExtractSubreg(SRIdx: SubRegs[ResIdx], DL: Loc,
2934	VT: MVT::i32, Operand: ResultPair);
2935	ReplaceUses(F: SDValue (N, ResIdx), T: SubReg);
2936	}
2937
2938	CurDAG->RemoveDeadNode(N);
2939	}
2940
2941	void ARMDAGToDAGISel::SelectVLDDup(SDNode N, bool* IsIntrinsic,
2942	bool isUpdating, unsigned NumVecs,
2943	const uint16_t *DOpcodes,
2944	const uint16_t *QOpcodes0,
2945	const uint16_t *QOpcodes1) {
2946	assert(Subtarget->hasNEON());
2947	assert(NumVecs >= `1` && NumVecs <= `4` && "VLDDup NumVecs out-of-range");
2948	SDLoc dl(N);
2949
2950	SDValue MemAddr, Align;
2951	unsigned AddrOpIdx = IsIntrinsic ? `2` : `1`;
2952	if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2953	return;
2954
2955	SDValue Chain = N->getOperand(Num: `0`);
2956	EVT VT = N->getValueType(ResNo: `0`);
2957	bool is64BitVector = VT.is64BitVector();
2958
2959	unsigned Alignment = `0`;
2960	if (NumVecs != `3`) {
2961	Alignment = Align ->getAsZExtVal();
2962	unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / `8`;
2963	if (Alignment > NumBytes)
2964	Alignment = NumBytes;
2965	if (Alignment < `8` && Alignment < NumBytes)
2966	Alignment = `0`;
2967	// Alignment must be a power of two; make sure of that.
2968	Alignment = (Alignment & -Alignment);
2969	if (Alignment == `1`)
2970	Alignment = `0`;
2971	}
2972	Align = CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
2973
2974	unsigned OpcodeIndex;
2975	switch (VT.getSimpleVT().SimpleTy) {
2976	default: llvm_unreachable("unhandled vld-dup type");
2977	case MVT::v8i8:
2978	case MVT::v16i8: OpcodeIndex = `0`; break;
2979	case MVT::v4i16:
2980	case MVT::v8i16:
2981	case MVT::v4f16:
2982	case MVT::v8f16:
2983	case MVT::v4bf16:
2984	case MVT::v8bf16:
2985	OpcodeIndex = `1`; break;
2986	case MVT::v2f32:
2987	case MVT::v2i32:
2988	case MVT::v4f32:
2989	case MVT::v4i32: OpcodeIndex = `2`; break;
2990	case MVT::v1f64:
2991	case MVT::v1i64: OpcodeIndex = `3`; break;
2992	}
2993
2994	unsigned ResTyElts = (NumVecs == `3`) ? `4` : NumVecs;
2995	if (!is64BitVector)
2996	ResTyElts *= `2`;
2997	EVT ResTy = EVT::getVectorVT(Context&: *CurDAG->getContext(), VT: MVT::i64, NumElements: ResTyElts);
2998
2999	std::vector<EVT> ResTys;
3000	ResTys.push_back(x: ResTy);
3001	if (isUpdating)
3002	ResTys.push_back(x: MVT::i32);
3003	ResTys.push_back(x: MVT::Other);
3004
3005	SDValue Pred = getAL(CurDAG, dl);
3006	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3007
3008	SmallVector<SDValue, `6`> Ops;
3009	Ops.push_back(Elt: MemAddr);
3010	Ops.push_back(Elt: Align);
3011	unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3012	: (NumVecs == `1`) ? QOpcodes0[OpcodeIndex]
3013	: QOpcodes1[OpcodeIndex];
3014	if (isUpdating) {
3015	SDValue Inc = N->getOperand(Num: `2`);
3016	bool IsImmUpdate =
3017	isPerfectIncrement(Inc, VecTy: VT.getVectorElementType(), NumVecs);
3018	if (IsImmUpdate) {
3019	if (!isVLDfixed(Opc))
3020	Ops.push_back(Elt: Reg0);
3021	} else {
3022	if (isVLDfixed(Opc))
3023	Opc = getVLDSTRegisterUpdateOpcode(Opc);
3024	Ops.push_back(Elt: Inc);
3025	}
3026	}
3027	if (is64BitVector \|\| NumVecs == `1`) {
3028	// Double registers and VLD1 quad registers are directly supported.
3029	} else {
3030	SDValue ImplDef = SDValue (
3031	CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: ResTy), `0`);
3032	const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3033	SDNode *VLdA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl, VT1: ResTy,
3034	VT2: MVT::Other, Ops: OpsA);
3035	Ops.push_back(Elt: SDValue (VLdA, `0`));
3036	Chain = SDValue (VLdA, `1`);
3037	}
3038
3039	Ops.push_back(Elt: Pred);
3040	Ops.push_back(Elt: Reg0);
3041	Ops.push_back(Elt: Chain);
3042
3043	SDNode *VLdDup = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
3044
3045	// Transfer memoperands.
3046	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
3047	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLdDup), NewMemRefs: {MemOp});
3048
3049	// Extract the subregisters.
3050	if (NumVecs == `1`) {
3051	ReplaceUses(F: SDValue (N, `0`), T: SDValue (VLdDup, `0`));
3052	} else {
3053	SDValue SuperReg = SDValue (VLdDup, `0`);
3054	static_assert(ARM::dsub_7 == ARM::dsub_0 + `7`, "Unexpected subreg numbering");
3055	unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3056	for (unsigned Vec = `0`; Vec != NumVecs; ++Vec) {
3057	ReplaceUses(F: SDValue (N, Vec),
3058	T: CurDAG->getTargetExtractSubreg(SRIdx: SubIdx+Vec, DL: dl, VT, Operand: SuperReg));
3059	}
3060	}
3061	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (VLdDup, `1`));
3062	if (isUpdating)
3063	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (VLdDup, `2`));
3064	CurDAG->RemoveDeadNode(N);
3065	}
3066
3067	bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3068	if (!Subtarget->hasMVEIntegerOps())
3069	return false;
3070
3071	SDLoc dl(N);
3072
3073	// We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3074	// extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3075	// inserts of the correct type:
3076	SDValue Ins1 = SDValue (N, `0`);
3077	SDValue Ins2 = N->getOperand(Num: `0`);
3078	EVT VT = Ins1.getValueType();
3079	if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT \|\| !Ins2.hasOneUse() \|\|
3080	!isa<ConstantSDNode>(Val: Ins1.getOperand(i: `2`)) \|\|
3081	!isa<ConstantSDNode>(Val: Ins2.getOperand(i: `2`)) \|\|
3082	(VT != MVT::v8f16 && VT != MVT::v8i16) \|\| (Ins2.getValueType() != VT))
3083	return false;
3084
3085	unsigned Lane1 = Ins1.getConstantOperandVal(i: `2`);
3086	unsigned Lane2 = Ins2.getConstantOperandVal(i: `2`);
3087	if (Lane2 % `2` != `0` \|\| Lane1 != Lane2 + `1`)
3088	return false;
3089
3090	// If the inserted values will be able to use T/B already, leave it to the
3091	// existing tablegen patterns. For example VCVTT/VCVTB.
3092	SDValue Val1 = Ins1.getOperand(i: `1`);
3093	SDValue Val2 = Ins2.getOperand(i: `1`);
3094	if (Val1.getOpcode() == ISD::FP_ROUND \|\| Val2.getOpcode() == ISD::FP_ROUND)
3095	return false;
3096
3097	// Check if the inserted values are both extracts.
3098	if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT \|\|
3099	Val1.getOpcode() == ARMISD::VGETLANEu) &&
3100	(Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT \|\|
3101	Val2.getOpcode() == ARMISD::VGETLANEu) &&
3102	isa<ConstantSDNode>(Val: Val1.getOperand(i: `1`)) &&
3103	isa<ConstantSDNode>(Val: Val2.getOperand(i: `1`)) &&
3104	(Val1.getOperand(i: `0`).getValueType() == MVT::v8f16 \|\|
3105	Val1.getOperand(i: `0`).getValueType() == MVT::v8i16) &&
3106	(Val2.getOperand(i: `0`).getValueType() == MVT::v8f16 \|\|
3107	Val2.getOperand(i: `0`).getValueType() == MVT::v8i16)) {
3108	unsigned ExtractLane1 = Val1.getConstantOperandVal(i: `1`);
3109	unsigned ExtractLane2 = Val2.getConstantOperandVal(i: `1`);
3110
3111	// If the two extracted lanes are from the same place and adjacent, this
3112	// simplifies into a f32 lane move.
3113	if (Val1.getOperand(i: `0`) == Val2.getOperand(i: `0`) && ExtractLane2 % `2` == `0` &&
3114	ExtractLane1 == ExtractLane2 + `1`) {
3115	SDValue NewExt = CurDAG->getTargetExtractSubreg(
3116	SRIdx: ARM::ssub_0 + ExtractLane2 / `2`, DL: dl, VT: MVT::f32, Operand: Val1.getOperand(i: `0`));
3117	SDValue NewIns = CurDAG->getTargetInsertSubreg(
3118	SRIdx: ARM::ssub_0 + Lane2 / `2`, DL: dl, VT, Operand: Ins2.getOperand(i: `0`),
3119	Subreg: NewExt);
3120	ReplaceUses(F: Ins1, T: NewIns);
3121	return true;
3122	}
3123
3124	// Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3125	// extracting odd lanes.
3126	if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3127	SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3128	SRIdx: ARM::ssub_0 + ExtractLane1 / `2`, DL: dl, VT: MVT::f32, Operand: Val1.getOperand(i: `0`));
3129	SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3130	SRIdx: ARM::ssub_0 + ExtractLane2 / `2`, DL: dl, VT: MVT::f32, Operand: Val2.getOperand(i: `0`));
3131	if (ExtractLane1 % `2` != `0`)
3132	Inp1 = SDValue (CurDAG->getMachineNode(Opcode: ARM::VMOVH, dl, VT: MVT::f32, Op1: Inp1), `0`);
3133	if (ExtractLane2 % `2` != `0`)
3134	Inp2 = SDValue (CurDAG->getMachineNode(Opcode: ARM::VMOVH, dl, VT: MVT::f32, Op1: Inp2), `0`);
3135	SDNode *VINS = CurDAG->getMachineNode(Opcode: ARM::VINSH, dl, VT: MVT::f32, Op1: Inp2, Op2: Inp1);
3136	SDValue NewIns =
3137	CurDAG->getTargetInsertSubreg(SRIdx: ARM::ssub_0 + Lane2 / `2`, DL: dl, VT: MVT::v4f32,
3138	Operand: Ins2.getOperand(i: `0`), Subreg: SDValue (VINS, `0`));
3139	ReplaceUses(F: Ins1, T: NewIns);
3140	return true;
3141	}
3142	}
3143
3144	// The inserted values are not extracted - if they are f16 then insert them
3145	// directly using a VINS.
3146	if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3147	SDNode *VINS = CurDAG->getMachineNode(Opcode: ARM::VINSH, dl, VT: MVT::f32, Op1: Val2, Op2: Val1);
3148	SDValue NewIns =
3149	CurDAG->getTargetInsertSubreg(SRIdx: ARM::ssub_0 + Lane2 / `2`, DL: dl, VT: MVT::v4f32,
3150	Operand: Ins2.getOperand(i: `0`), Subreg: SDValue (VINS, `0`));
3151	ReplaceUses(F: Ins1, T: NewIns);
3152	return true;
3153	}
3154
3155	return false;
3156	}
3157
3158	bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3159	SDNode *FMul,
3160	bool IsUnsigned,
3161	bool FixedToFloat) {
3162	auto Type = N->getValueType(ResNo: `0`);
3163	unsigned ScalarBits = Type.getScalarSizeInBits();
3164	if (ScalarBits > `32`)
3165	return false;
3166
3167	SDNodeFlags FMulFlags = FMul->getFlags();
3168	// The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3169	// allowed in 16 bit unsigned floats
3170	if (ScalarBits == `16` && !FMulFlags.hasNoInfs() && IsUnsigned)
3171	return false;
3172
3173	SDValue ImmNode = FMul->getOperand(Num: `1`);
3174	SDValue VecVal = FMul->getOperand(Num: `0`);
3175	if (VecVal ->getOpcode() == ISD::UINT_TO_FP \|\|
3176	VecVal ->getOpcode() == ISD::SINT_TO_FP)
3177	VecVal = VecVal ->getOperand(Num: `0`);
3178
3179	if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3180	return false;
3181
3182	if (ImmNode.getOpcode() == ISD::BITCAST) {
3183	if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3184	return false;
3185	ImmNode = ImmNode.getOperand(i: `0`);
3186	}
3187
3188	if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3189	return false;
3190
3191	APFloat ImmAPF(`0.0f`);
3192	switch (ImmNode.getOpcode()) {
3193	case ARMISD::VMOVIMM:
3194	case ARMISD::VDUP: {
3195	if (!isa<ConstantSDNode>(Val: ImmNode.getOperand(i: `0`)))
3196	return false;
3197	unsigned Imm = ImmNode.getConstantOperandVal(i: `0`);
3198	if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3199	Imm = ARM_AM::decodeVMOVModImm(ModImm: Imm, EltBits&: ScalarBits);
3200	ImmAPF =
3201	APFloat (ScalarBits == `32` ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3202	APInt (ScalarBits, Imm));
3203	break;
3204	}
3205	case ARMISD::VMOVFPIMM: {
3206	ImmAPF = APFloat (ARM_AM::getFPImmFloat(Imm: ImmNode.getConstantOperandVal(i: `0`)));
3207	break;
3208	}
3209	default:
3210	return false;
3211	}
3212
3213	// Where n is the number of fractional bits, multiplying by 2^n will convert
3214	// from float to fixed and multiplying by 2^-n will convert from fixed to
3215	// float. Taking log2 of the factor (after taking the inverse in the case of
3216	// float to fixed) will give n.
3217	APFloat ToConvert = ImmAPF;
3218	if (FixedToFloat) {
3219	if (!ImmAPF.getExactInverse(Inv: &ToConvert))
3220	return false;
3221	}
3222	APSInt Converted(`64`, false);
3223	bool IsExact;
3224	ToConvert.convertToInteger(Result&: Converted, RM: llvm::RoundingMode::NearestTiesToEven,
3225	IsExact: &IsExact);
3226	if (!IsExact \|\| !Converted.isPowerOf2())
3227	return false;
3228
3229	unsigned FracBits = Converted.logBase2();
3230	if (FracBits > ScalarBits)
3231	return false;
3232
3233	SmallVector<SDValue, `3`> Ops{
3234	VecVal, CurDAG->getConstant(Val: FracBits, DL: SDLoc (N), VT: MVT::i32)};
3235	AddEmptyMVEPredicateToOps(Ops, Loc: SDLoc (N), InactiveTy: Type);
3236
3237	unsigned int Opcode;
3238	switch (ScalarBits) {
3239	case `16`:
3240	if (FixedToFloat)
3241	Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3242	else
3243	Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3244	break;
3245	case `32`:
3246	if (FixedToFloat)
3247	Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3248	else
3249	Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3250	break;
3251	default:
3252	llvm_unreachable("unexpected number of scalar bits");
3253	break;
3254	}
3255
3256	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: SDLoc (N), VT: Type, Ops));
3257	return true;
3258	}
3259
3260	bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3261	// Transform a floating-point to fixed-point conversion to a VCVT
3262	if (!Subtarget->hasMVEFloatOps())
3263	return false;
3264	EVT Type = N->getValueType(ResNo: `0`);
3265	if (!Type.isVector())
3266	return false;
3267	unsigned int ScalarBits = Type.getScalarSizeInBits();
3268
3269	bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT \|\|
3270	N->getOpcode() == ISD::FP_TO_UINT_SAT;
3271	SDNode *Node = N->getOperand(Num: `0`).getNode();
3272
3273	// floating-point to fixed-point with one fractional bit gets turned into an
3274	// FP_TO_[U\|S]INT(FADD (x, x)) rather than an FP_TO_[U\|S]INT(FMUL (x, y))
3275	if (Node->getOpcode() == ISD::FADD) {
3276	if (Node->getOperand(Num: `0`) != Node->getOperand(Num: `1`))
3277	return false;
3278	SDNodeFlags Flags = Node->getFlags();
3279	// The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3280	// allowed in 16 bit unsigned floats
3281	if (ScalarBits == `16` && !Flags.hasNoInfs() && IsUnsigned)
3282	return false;
3283
3284	unsigned Opcode;
3285	switch (ScalarBits) {
3286	case `16`:
3287	Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3288	break;
3289	case `32`:
3290	Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3291	break;
3292	}
3293	SmallVector<SDValue, `3`> Ops{Node->getOperand(Num: `0`),
3294	CurDAG->getConstant(Val: `1`, DL: dl, VT: MVT::i32)};
3295	AddEmptyMVEPredicateToOps(Ops, Loc: dl, InactiveTy: Type);
3296
3297	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl, VT: Type, Ops));
3298	return true;
3299	}
3300
3301	if (Node->getOpcode() != ISD::FMUL)
3302	return false;
3303
3304	return transformFixedFloatingPointConversion(N, FMul: Node, IsUnsigned, FixedToFloat: false);
3305	}
3306
3307	bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3308	// Transform a fixed-point to floating-point conversion to a VCVT
3309	if (!Subtarget->hasMVEFloatOps())
3310	return false;
3311	auto Type = N->getValueType(ResNo: `0`);
3312	if (!Type.isVector())
3313	return false;
3314
3315	auto LHS = N->getOperand(Num: `0`);
3316	if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3317	return false;
3318
3319	return transformFixedFloatingPointConversion(
3320	N, FMul: N, IsUnsigned: LHS.getOpcode() == ISD::UINT_TO_FP, FixedToFloat: true);
3321	}
3322
3323	bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode N, bool* isSigned) {
3324	if (!Subtarget->hasV6T2Ops())
3325	return false;
3326
3327	unsigned Opc = isSigned
3328	? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3329	: (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3330	SDLoc dl(N);
3331
3332	// For unsigned extracts, check for a shift right and mask
3333	unsigned And_imm = `0`;
3334	if (N->getOpcode() == ISD::AND) {
3335	if (isOpcWithIntImmediate(N, Opc: ISD::AND, Imm&: And_imm)) {
3336
3337	// The immediate is a mask of the low bits iff imm & (imm+1) == 0
3338	if (And_imm & (And_imm + `1`))
3339	return false;
3340
3341	unsigned Srl_imm = `0`;
3342	if (isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SRL,
3343	Imm&: Srl_imm)) {
3344	assert(Srl_imm > `0` && Srl_imm < `32` && "bad amount in shift node!");
3345
3346	// Mask off the unnecessary bits of the AND immediate; normally
3347	// DAGCombine will do this, but that might not happen if
3348	// targetShrinkDemandedConstant chooses a different immediate.
3349	And_imm &= -`1U` >> Srl_imm;
3350
3351	// Note: The width operand is encoded as width-1.
3352	unsigned Width = llvm::countr_one(Value: And_imm) - `1`;
3353	unsigned LSB = Srl_imm;
3354
3355	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3356
3357	if ((LSB + Width + `1`) == N->getValueType(ResNo: `0`).getSizeInBits()) {
3358	// It's cheaper to use a right shift to extract the top bits.
3359	if (Subtarget->isThumb()) {
3360	Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3361	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`),
3362	CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3363	getAL(CurDAG, dl), Reg0, Reg0 };
3364	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3365	return true;
3366	}
3367
3368	// ARM models shift instructions as MOVsi with shifter operand.
3369	ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: ISD::SRL);
3370	SDValue ShOpc =
3371	CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: LSB), DL: dl,
3372	VT: MVT::i32);
3373	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`), ShOpc,
3374	getAL(CurDAG, dl), Reg0, Reg0 };
3375	CurDAG->SelectNodeTo(N, MachineOpc: ARM::MOVsi, VT: MVT::i32, Ops);
3376	return true;
3377	}
3378
3379	assert(LSB + Width + `1` <= `32` && "Shouldn't create an invalid ubfx");
3380	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`),
3381	CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3382	CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3383	getAL(CurDAG, dl), Reg0 };
3384	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3385	return true;
3386	}
3387	}
3388	return false;
3389	}
3390
3391	// Otherwise, we're looking for a shift of a shift
3392	unsigned Shl_imm = `0`;
3393	if (isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SHL, Imm&: Shl_imm)) {
3394	assert(Shl_imm > `0` && Shl_imm < `32` && "bad amount in shift node!");
3395	unsigned Srl_imm = `0`;
3396	if (isInt32Immediate(N: N->getOperand(Num: `1`), Imm&: Srl_imm)) {
3397	assert(Srl_imm > `0` && Srl_imm < `32` && "bad amount in shift node!");
3398	// Note: The width operand is encoded as width-1.
3399	unsigned Width = `32` - Srl_imm - `1`;
3400	int LSB = Srl_imm - Shl_imm;
3401	if (LSB < `0`)
3402	return false;
3403	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3404	assert(LSB + Width + `1` <= `32` && "Shouldn't create an invalid ubfx");
3405	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`),
3406	CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3407	CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3408	getAL(CurDAG, dl), Reg0 };
3409	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3410	return true;
3411	}
3412	}
3413
3414	// Or we are looking for a shift of an and, with a mask operand
3415	if (isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::AND, Imm&: And_imm) &&
3416	isShiftedMask_32(Value: And_imm)) {
3417	unsigned Srl_imm = `0`;
3418	unsigned LSB = llvm::countr_zero(Val: And_imm);
3419	// Shift must be the same as the ands lsb
3420	if (isInt32Immediate(N: N->getOperand(Num: `1`), Imm&: Srl_imm) && Srl_imm == LSB) {
3421	assert(Srl_imm > `0` && Srl_imm < `32` && "bad amount in shift node!");
3422	unsigned MSB = llvm::Log2_32(Value: And_imm);
3423	// Note: The width operand is encoded as width-1.
3424	unsigned Width = MSB - LSB;
3425	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3426	assert(Srl_imm + Width + `1` <= `32` && "Shouldn't create an invalid ubfx");
3427	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`),
3428	CurDAG->getTargetConstant(Val: Srl_imm, DL: dl, VT: MVT::i32),
3429	CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3430	getAL(CurDAG, dl), Reg0 };
3431	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3432	return true;
3433	}
3434	}
3435
3436	if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3437	unsigned Width = cast<VTSDNode>(Val: N->getOperand(Num: `1`))->getVT().getSizeInBits();
3438	unsigned LSB = `0`;
3439	if (!isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SRL, Imm&: LSB) &&
3440	!isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SRA, Imm&: LSB))
3441	return false;
3442
3443	if (LSB + Width > `32`)
3444	return false;
3445
3446	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3447	assert(LSB + Width <= `32` && "Shouldn't create an invalid ubfx");
3448	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`),
3449	CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3450	CurDAG->getTargetConstant(Val: Width - `1`, DL: dl, VT: MVT::i32),
3451	getAL(CurDAG, dl), Reg0 };
3452	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3453	return true;
3454	}
3455
3456	return false;
3457	}
3458
3459	/// We've got special pseudo-instructions for these
3460	void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3461	unsigned Opcode;
3462	EVT MemTy = cast<MemSDNode>(Val: N)->getMemoryVT();
3463	if (MemTy == MVT::i8)
3464	Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3465	else if (MemTy == MVT::i16)
3466	Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3467	else if (MemTy == MVT::i32)
3468	Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3469	else
3470	llvm_unreachable("Unknown AtomicCmpSwap type");
3471
3472	SDValue Ops[] = {N->getOperand(Num: `1`), N->getOperand(Num: `2`), N->getOperand(Num: `3`),
3473	N->getOperand(Num: `0`)};
3474	SDNode *CmpSwap = CurDAG->getMachineNode(
3475	Opcode, dl: SDLoc (N),
3476	VTs: CurDAG->getVTList(VT1: MVT::i32, VT2: MVT::i32, VT3: MVT::Other), Ops);
3477
3478	MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
3479	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
3480
3481	ReplaceUses(F: SDValue (N, `0`), T: SDValue (CmpSwap, `0`));
3482	ReplaceUses(F: SDValue (N, `1`), T: SDValue (CmpSwap, `2`));
3483	CurDAG->RemoveDeadNode(N);
3484	}
3485
3486	static std::optional<std::pair<unsigned, unsigned>>
3487	getContiguousRangeOfSetBits(const APInt &A) {
3488	unsigned FirstOne = A.getBitWidth() - A.countl_zero() - `1`;
3489	unsigned LastOne = A.countr_zero();
3490	if (A.popcount() != (FirstOne - LastOne + `1`))
3491	return std::nullopt;
3492	return std::make_pair(x&: FirstOne, y&: LastOne);
3493	}
3494
3495	void ARMDAGToDAGISel::SelectCMPZ(SDNode N, bool* &SwitchEQNEToPLMI) {
3496	assert(N->getOpcode() == ARMISD::CMPZ);
3497	SwitchEQNEToPLMI = false;
3498
3499	if (!Subtarget->isThumb())
3500	// FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3501	// LSR don't exist as standalone instructions - they need the barrel shifter.
3502	return;
3503
3504	// select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3505	SDValue And = N->getOperand(Num: `0`);
3506	if (!And ->hasOneUse())
3507	return;
3508
3509	SDValue Zero = N->getOperand(Num: `1`);
3510	if (!isNullConstant(V: Zero) \|\| And ->getOpcode() != ISD::AND)
3511	return;
3512	SDValue X = And.getOperand(i: `0`);
3513	auto C = dyn_cast<ConstantSDNode>(Val: And.getOperand(i: `1`));
3514
3515	if (!C)
3516	return;
3517	auto Range = getContiguousRangeOfSetBits(A: C->getAPIntValue());
3518	if (!Range)
3519	return;
3520
3521	// There are several ways to lower this:
3522	SDNode *NewN;
3523	SDLoc dl(N);
3524
3525	auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3526	if (Subtarget->isThumb2()) {
3527	Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3528	SDValue Ops[] = { Src, CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32),
3529	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3530	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3531	return CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
3532	} else {
3533	SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), Src,
3534	CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32),
3535	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
3536	return CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
3537	}
3538	};
3539
3540	if (Range ->second == `0`) {
3541	// 1. Mask includes the LSB -> Simply shift the top N bits off
3542	NewN = EmitShift (ARM::tLSLri, X, `31` - Range ->first);
3543	ReplaceNode(F: And.getNode(), T: NewN);
3544	} else if (Range ->first == `31`) {
3545	// 2. Mask includes the MSB -> Simply shift the bottom N bits off
3546	NewN = EmitShift (ARM::tLSRri, X, Range ->second);
3547	ReplaceNode(F: And.getNode(), T: NewN);
3548	} else if (Range ->first == Range ->second) {
3549	// 3. Only one bit is set. We can shift this into the sign bit and use a
3550	// PL/MI comparison. This is not safe if CMPZ has multiple uses because
3551	// only one of them (the one currently being selected) will be switched
3552	// to use the new condition code.
3553	if (!N->hasOneUse())
3554	return;
3555	NewN = EmitShift (ARM::tLSLri, X, `31` - Range ->first);
3556	ReplaceNode(F: And.getNode(), T: NewN);
3557
3558	SwitchEQNEToPLMI = true;
3559	} else if (!Subtarget->hasV6T2Ops()) {
3560	// 4. Do a double shift to clear bottom and top bits, but only in
3561	// thumb-1 mode as in thumb-2 we can use UBFX.
3562	NewN = EmitShift (ARM::tLSLri, X, `31` - Range ->first);
3563	NewN = EmitShift (ARM::tLSRri, SDValue (NewN, `0`),
3564	Range ->second + (`31` - Range ->first));
3565	ReplaceNode(F: And.getNode(), T: NewN);
3566	}
3567	}
3568
3569	static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[`3`],
3570	unsigned Opc128[`3`]) {
3571	assert((VT.is64BitVector() \|\| VT.is128BitVector()) &&
3572	"Unexpected vector shuffle length");
3573	switch (VT.getScalarSizeInBits()) {
3574	default:
3575	llvm_unreachable("Unexpected vector shuffle element size");
3576	case `8`:
3577	return VT.is64BitVector() ? Opc64[`0`] : Opc128[`0`];
3578	case `16`:
3579	return VT.is64BitVector() ? Opc64[`1`] : Opc128[`1`];
3580	case `32`:
3581	return VT.is64BitVector() ? Opc64[`2`] : Opc128[`2`];
3582	}
3583	}
3584
3585	void ARMDAGToDAGISel::Select(SDNode *N) {
3586	SDLoc dl(N);
3587
3588	if (N->isMachineOpcode()) {
3589	N->setNodeId(-`1`);
3590	return; // Already selected.
3591	}
3592
3593	switch (N->getOpcode()) {
3594	default: break;
3595	case ISD::STORE: {
3596	// For Thumb1, match an sp-relative store in C++. This is a little
3597	// unfortunate, but I don't think I can make the chain check work
3598	// otherwise. (The chain of the store has to be the same as the chain
3599	// of the CopyFromReg, or else we can't replace the CopyFromReg with
3600	// a direct reference to "SP".)
3601	//
3602	// This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3603	// a different addressing mode from other four-byte stores.
3604	//
3605	// This pattern usually comes up with call arguments.
3606	StoreSDNode *ST = cast<StoreSDNode>(Val: N);
3607	SDValue Ptr = ST->getBasePtr();
3608	if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3609	int RHSC = `0`;
3610	if (Ptr.getOpcode() == ISD::ADD &&
3611	isScaledConstantInRange(Node: Ptr.getOperand(i: `1`), /Scale=/`4`, RangeMin: `0`, RangeMax: `256`, ScaledConstant&: RHSC))
3612	Ptr = Ptr.getOperand(i: `0`);
3613
3614	if (Ptr.getOpcode() == ISD::CopyFromReg &&
3615	cast<RegisterSDNode>(Val: Ptr.getOperand(i: `1`))->getReg() == ARM::SP &&
3616	Ptr.getOperand(i: `0`) == ST->getChain()) {
3617	SDValue Ops[] = {ST->getValue(),
3618	CurDAG->getRegister(Reg: ARM::SP, VT: MVT::i32),
3619	CurDAG->getTargetConstant(Val: RHSC, DL: dl, VT: MVT::i32),
3620	getAL(CurDAG, dl),
3621	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3622	ST->getChain()};
3623	MachineSDNode *ResNode =
3624	CurDAG->getMachineNode(Opcode: ARM::tSTRspi, dl, VT: MVT::Other, Ops);
3625	MachineMemOperand *MemOp = ST->getMemOperand();
3626	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: ResNode), NewMemRefs: {MemOp});
3627	ReplaceNode(F: N, T: ResNode);
3628	return;
3629	}
3630	}
3631	break;
3632	}
3633	case ISD::WRITE_REGISTER:
3634	if (tryWriteRegister(N))
3635	return;
3636	break;
3637	case ISD::READ_REGISTER:
3638	if (tryReadRegister(N))
3639	return;
3640	break;
3641	case ISD::INLINEASM:
3642	case ISD::INLINEASM_BR:
3643	if (tryInlineAsm(N))
3644	return;
3645	break;
3646	case ISD::Constant: {
3647	unsigned Val = N->getAsZExtVal();
3648	// If we can't materialize the constant we need to use a literal pool
3649	if (ConstantMaterializationCost(Val, Subtarget) > `2` &&
3650	!Subtarget->genExecuteOnly()) {
3651	SDValue CPIdx = CurDAG->getTargetConstantPool(
3652	C: ConstantInt::get(Ty: Type::getInt32Ty(C&: *CurDAG->getContext()), V: Val),
3653	VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
3654
3655	SDNode *ResNode;
3656	if (Subtarget->isThumb()) {
3657	SDValue Ops[] = {
3658	CPIdx,
3659	getAL(CurDAG, dl),
3660	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3661	CurDAG->getEntryNode()
3662	};
3663	ResNode = CurDAG->getMachineNode(Opcode: ARM::tLDRpci, dl, VT1: MVT::i32, VT2: MVT::Other,
3664	Ops);
3665	} else {
3666	SDValue Ops[] = {
3667	CPIdx,
3668	CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32),
3669	getAL(CurDAG, dl),
3670	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3671	CurDAG->getEntryNode()
3672	};
3673	ResNode = CurDAG->getMachineNode(Opcode: ARM::LDRcp, dl, VT1: MVT::i32, VT2: MVT::Other,
3674	Ops);
3675	}
3676	// Annotate the Node with memory operand information so that MachineInstr
3677	// queries work properly. This e.g. gives the register allocation the
3678	// required information for rematerialization.
3679	MachineFunction& MF = CurDAG->getMachineFunction();
3680	MachineMemOperand *MemOp =
3681	MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF),
3682	F: MachineMemOperand::MOLoad, Size: `4`, BaseAlignment: Align (`4`));
3683
3684	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: ResNode), NewMemRefs: {MemOp});
3685
3686	ReplaceNode(F: N, T: ResNode);
3687	return;
3688	}
3689
3690	// Other cases are autogenerated.
3691	break;
3692	}
3693	case ISD::FrameIndex: {
3694	// Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3695	int FI = cast<FrameIndexSDNode>(Val: N)->getIndex();
3696	SDValue TFI = CurDAG->getTargetFrameIndex(
3697	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
3698	if (Subtarget->isThumb1Only()) {
3699	// Set the alignment of the frame object to 4, to avoid having to generate
3700	// more than one ADD
3701	MachineFrameInfo &MFI = MF->getFrameInfo();
3702	if (MFI.getObjectAlign(ObjectIdx: FI) < Align (`4`))
3703	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align (`4`));
3704	CurDAG->SelectNodeTo(N, MachineOpc: ARM::tADDframe, VT: MVT::i32, Op1: TFI,
3705	Op2: CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32));
3706	return;
3707	} else {
3708	unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3709	ARM::t2ADDri : ARM::ADDri);
3710	SDValue Ops[] = { TFI, CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32),
3711	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3712	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3713	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3714	return;
3715	}
3716	}
3717	case ISD::INSERT_VECTOR_ELT: {
3718	if (tryInsertVectorElt(N))
3719	return;
3720	break;
3721	}
3722	case ISD::SRL:
3723	if (tryV6T2BitfieldExtractOp(N, isSigned: false))
3724	return;
3725	break;
3726	case ISD::SIGN_EXTEND_INREG:
3727	case ISD::SRA:
3728	if (tryV6T2BitfieldExtractOp(N, isSigned: true))
3729	return;
3730	break;
3731	case ISD::FP_TO_UINT:
3732	case ISD::FP_TO_SINT:
3733	case ISD::FP_TO_UINT_SAT:
3734	case ISD::FP_TO_SINT_SAT:
3735	if (tryFP_TO_INT(N, dl))
3736	return;
3737	break;
3738	case ISD::FMUL:
3739	if (tryFMULFixed(N, dl))
3740	return;
3741	break;
3742	case ISD::MUL:
3743	if (Subtarget->isThumb1Only())
3744	break;
3745	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`))) {
3746	unsigned RHSV = C->getZExtValue();
3747	if (!RHSV) break;
3748	if (isPowerOf2_32(Value: RHSV-`1`)) { // 2^n+1?
3749	unsigned ShImm = Log2_32(Value: RHSV-`1`);
3750	if (ShImm >= `32`)
3751	break;
3752	SDValue V = N->getOperand(Num: `0`);
3753	ShImm = ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: ShImm);
3754	SDValue ShImmOp = CurDAG->getTargetConstant(Val: ShImm, DL: dl, VT: MVT::i32);
3755	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3756	if (Subtarget->isThumb()) {
3757	SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3758	CurDAG->SelectNodeTo(N, MachineOpc: ARM::t2ADDrs, VT: MVT::i32, Ops);
3759	return;
3760	} else {
3761	SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3762	Reg0 };
3763	CurDAG->SelectNodeTo(N, MachineOpc: ARM::ADDrsi, VT: MVT::i32, Ops);
3764	return;
3765	}
3766	}
3767	if (isPowerOf2_32(Value: RHSV+`1`)) { // 2^n-1?
3768	unsigned ShImm = Log2_32(Value: RHSV+`1`);
3769	if (ShImm >= `32`)
3770	break;
3771	SDValue V = N->getOperand(Num: `0`);
3772	ShImm = ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: ShImm);
3773	SDValue ShImmOp = CurDAG->getTargetConstant(Val: ShImm, DL: dl, VT: MVT::i32);
3774	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3775	if (Subtarget->isThumb()) {
3776	SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3777	CurDAG->SelectNodeTo(N, MachineOpc: ARM::t2RSBrs, VT: MVT::i32, Ops);
3778	return;
3779	} else {
3780	SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3781	Reg0 };
3782	CurDAG->SelectNodeTo(N, MachineOpc: ARM::RSBrsi, VT: MVT::i32, Ops);
3783	return;
3784	}
3785	}
3786	}
3787	break;
3788	case ISD::AND: {
3789	// Check for unsigned bitfield extract
3790	if (tryV6T2BitfieldExtractOp(N, isSigned: false))
3791	return;
3792
3793	// If an immediate is used in an AND node, it is possible that the immediate
3794	// can be more optimally materialized when negated. If this is the case we
3795	// can negate the immediate and use a BIC instead.
3796	auto *N1C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
3797	if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3798	uint32_t Imm = (uint32_t) N1C->getZExtValue();
3799
3800	// In Thumb2 mode, an AND can take a 12-bit immediate. If this
3801	// immediate can be negated and fit in the immediate operand of
3802	// a t2BIC, don't do any manual transform here as this can be
3803	// handled by the generic ISel machinery.
3804	bool PreferImmediateEncoding =
3805	Subtarget->hasThumb2() && (is_t2_so_imm(Imm) \|\| is_t2_so_imm_not(Imm));
3806	if (!PreferImmediateEncoding &&
3807	ConstantMaterializationCost(Val: Imm, Subtarget) >
3808	ConstantMaterializationCost(Val: ~Imm, Subtarget)) {
3809	// The current immediate costs more to materialize than a negated
3810	// immediate, so negate the immediate and use a BIC.
3811	SDValue NewImm = CurDAG->getConstant(Val: ~Imm, DL: dl, VT: MVT::i32);
3812	// If the new constant didn't exist before, reposition it in the topological
3813	// ordering so it is just before N. Otherwise, don't touch its location.
3814	if (NewImm ->getNodeId() == -`1`)
3815	CurDAG->RepositionNode(Position: N->getIterator(), N: NewImm.getNode());
3816
3817	if (!Subtarget->hasThumb2()) {
3818	SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32),
3819	N->getOperand(Num: `0`), NewImm, getAL(CurDAG, dl),
3820	CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
3821	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: ARM::tBIC, dl, VT: MVT::i32, Ops));
3822	return;
3823	} else {
3824	SDValue Ops[] = {N->getOperand(Num: `0`), NewImm, getAL(CurDAG, dl),
3825	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3826	CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
3827	ReplaceNode(F: N,
3828	T: CurDAG->getMachineNode(Opcode: ARM::t2BICrr, dl, VT: MVT::i32, Ops));
3829	return;
3830	}
3831	}
3832	}
3833
3834	// (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3835	// of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3836	// are entirely contributed by c2 and lower 16-bits are entirely contributed
3837	// by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3838	// Select it to: "movt x, ((c1 & 0xffff) >> 16)
3839	EVT VT = N->getValueType(ResNo: `0`);
3840	if (VT != MVT::i32)
3841	break;
3842	unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3843	? ARM::t2MOVTi16
3844	: (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : `0`);
3845	if (!Opc)
3846	break;
3847	SDValue N0 = N->getOperand(Num: `0`), N1 = N->getOperand(Num: `1`);
3848	N1C = dyn_cast<ConstantSDNode>(Val&: N1);
3849	if (!N1C)
3850	break;
3851	if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3852	SDValue N2 = N0.getOperand(i: `1`);
3853	ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Val&: N2);
3854	if (!N2C)
3855	break;
3856	unsigned N1CVal = N1C->getZExtValue();
3857	unsigned N2CVal = N2C->getZExtValue();
3858	if ((N1CVal & `0xffff0000U`) == (N2CVal & `0xffff0000U`) &&
3859	(N1CVal & `0xffffU`) == `0xffffU` &&
3860	(N2CVal & `0xffffU`) == `0x0U`) {
3861	SDValue Imm16 = CurDAG->getTargetConstant(Val: (N2CVal & `0xFFFF0000U`) >> `16`,
3862	DL: dl, VT: MVT::i32);
3863	SDValue Ops[] = { N0.getOperand(i: `0`), Imm16,
3864	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3865	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT, Ops));
3866	return;
3867	}
3868	}
3869
3870	break;
3871	}
3872	case ARMISD::UMAAL: {
3873	unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3874	SDValue Ops[] = { N->getOperand(Num: `0`), N->getOperand(Num: `1`),
3875	N->getOperand(Num: `2`), N->getOperand(Num: `3`),
3876	getAL(CurDAG, dl),
3877	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3878	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
3879	return;
3880	}
3881	case ARMISD::UMLAL:{
3882	if (Subtarget->isThumb()) {
3883	SDValue Ops[] = { N->getOperand(Num: `0`), N->getOperand(Num: `1`), N->getOperand(Num: `2`),
3884	N->getOperand(Num: `3`), getAL(CurDAG, dl),
3885	CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
3886	ReplaceNode(
3887	F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2UMLAL, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
3888	return;
3889	}else{
3890	SDValue Ops[] = { N->getOperand(Num: `0`), N->getOperand(Num: `1`), N->getOperand(Num: `2`),
3891	N->getOperand(Num: `3`), getAL(CurDAG, dl),
3892	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3893	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3894	ReplaceNode(F: N, T: CurDAG->getMachineNode(
3895	Opcode: Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3896	VT1: MVT::i32, VT2: MVT::i32, Ops));
3897	return;
3898	}
3899	}
3900	case ARMISD::SMLAL:{
3901	if (Subtarget->isThumb()) {
3902	SDValue Ops[] = { N->getOperand(Num: `0`), N->getOperand(Num: `1`), N->getOperand(Num: `2`),
3903	N->getOperand(Num: `3`), getAL(CurDAG, dl),
3904	CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
3905	ReplaceNode(
3906	F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2SMLAL, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
3907	return;
3908	}else{
3909	SDValue Ops[] = { N->getOperand(Num: `0`), N->getOperand(Num: `1`), N->getOperand(Num: `2`),
3910	N->getOperand(Num: `3`), getAL(CurDAG, dl),
3911	CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
3912	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3913	ReplaceNode(F: N, T: CurDAG->getMachineNode(
3914	Opcode: Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3915	VT1: MVT::i32, VT2: MVT::i32, Ops));
3916	return;
3917	}
3918	}
3919	case ARMISD::SUBE: {
3920	if (!Subtarget->hasV6Ops() \|\| !Subtarget->hasDSP())
3921	break;
3922	// Look for a pattern to match SMMLS
3923	// (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3924	if (N->getOperand(Num: `1`).getOpcode() != ISD::SMUL_LOHI \|\|
3925	N->getOperand(Num: `2`).getOpcode() != ARMISD::SUBC \|\|
3926	!SDValue (N, `1`).use_empty())
3927	break;
3928
3929	if (Subtarget->isThumb())
3930	assert(Subtarget->hasThumb2() &&
3931	"This pattern should not be generated for Thumb");
3932
3933	SDValue SmulLoHi = N->getOperand(Num: `1`);
3934	SDValue Subc = N->getOperand(Num: `2`);
3935	SDValue Zero = Subc.getOperand(i: `0`);
3936
3937	if (!isNullConstant(V: Zero) \|\| Subc.getOperand(i: `1`) != SmulLoHi.getValue(R: `0`) \|\|
3938	N->getOperand(Num: `1`) != SmulLoHi.getValue(R: `1`) \|\|
3939	N->getOperand(Num: `2`) != Subc.getValue(R: `1`))
3940	break;
3941
3942	unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3943	SDValue Ops[] = { SmulLoHi.getOperand(i: `0`), SmulLoHi.getOperand(i: `1`),
3944	N->getOperand(Num: `0`), getAL(CurDAG, dl),
3945	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
3946	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops));
3947	return;
3948	}
3949	case ISD::LOAD: {
3950	if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3951	return;
3952	if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3953	if (tryT2IndexedLoad(N))
3954	return;
3955	} else if (Subtarget->isThumb()) {
3956	if (tryT1IndexedLoad(N))
3957	return;
3958	} else if (tryARMIndexedLoad(N))
3959	return;
3960	// Other cases are autogenerated.
3961	break;
3962	}
3963	case ISD::MLOAD:
3964	if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3965	return;
3966	// Other cases are autogenerated.
3967	break;
3968	case ARMISD::LDRD: {
3969	if (Subtarget->isThumb2())
3970	break; // TableGen handles isel in this case.
3971	SDValue Base, RegOffset, ImmOffset;
3972	const SDValue &Chain = N->getOperand(Num: `0`);
3973	const SDValue &Addr = N->getOperand(Num: `1`);
3974	SelectAddrMode3(N: Addr, Base, Offset&: RegOffset, Opc&: ImmOffset);
3975	if (RegOffset != CurDAG->getRegister(Reg: `0`, VT: MVT::i32)) {
3976	// The register-offset variant of LDRD mandates that the register
3977	// allocated to RegOffset is not reused in any of the remaining operands.
3978	// This restriction is currently not enforced. Therefore emitting this
3979	// variant is explicitly avoided.
3980	Base = Addr;
3981	RegOffset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
3982	}
3983	SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
3984	SDNode *New = CurDAG->getMachineNode(Opcode: ARM::LOADDUAL, dl,
3985	ResultTys: {MVT::Untyped, MVT::Other}, Ops);
3986	SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_0, DL: dl, VT: MVT::i32,
3987	Operand: SDValue (New, `0`));
3988	SDValue Hi = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_1, DL: dl, VT: MVT::i32,
3989	Operand: SDValue (New, `0`));
3990	transferMemOperands(N, Result: New);
3991	ReplaceUses(F: SDValue (N, `0`), T: Lo);
3992	ReplaceUses(F: SDValue (N, `1`), T: Hi);
3993	ReplaceUses(F: SDValue (N, `2`), T: SDValue (New, `1`));
3994	CurDAG->RemoveDeadNode(N);
3995	return;
3996	}
3997	case ARMISD::STRD: {
3998	if (Subtarget->isThumb2())
3999	break; // TableGen handles isel in this case.
4000	SDValue Base, RegOffset, ImmOffset;
4001	const SDValue &Chain = N->getOperand(Num: `0`);
4002	const SDValue &Addr = N->getOperand(Num: `3`);
4003	SelectAddrMode3(N: Addr, Base, Offset&: RegOffset, Opc&: ImmOffset);
4004	if (RegOffset != CurDAG->getRegister(Reg: `0`, VT: MVT::i32)) {
4005	// The register-offset variant of STRD mandates that the register
4006	// allocated to RegOffset is not reused in any of the remaining operands.
4007	// This restriction is currently not enforced. Therefore emitting this
4008	// variant is explicitly avoided.
4009	Base = Addr;
4010	RegOffset = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
4011	}
4012	SDNode *RegPair =
4013	createGPRPairNode(VT: MVT::Untyped, V0: N->getOperand(Num: `1`), V1: N->getOperand(Num: `2`));
4014	SDValue Ops[] = {SDValue (RegPair, `0`), Base, RegOffset, ImmOffset, Chain};
4015	SDNode *New = CurDAG->getMachineNode(Opcode: ARM::STOREDUAL, dl, VT: MVT::Other, Ops);
4016	transferMemOperands(N, Result: New);
4017	ReplaceUses(F: SDValue (N, `0`), T: SDValue (New, `0`));
4018	CurDAG->RemoveDeadNode(N);
4019	return;
4020	}
4021	case ARMISD::BRCOND: {
4022	// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4023	// Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4024	// Pattern complexity = 6 cost = 1 size = 0
4025
4026	// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4027	// Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4028	// Pattern complexity = 6 cost = 1 size = 0
4029
4030	// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4031	// Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4032	// Pattern complexity = 6 cost = 1 size = 0
4033
4034	unsigned Opc = Subtarget->isThumb() ?
4035	((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4036	SDValue Chain = N->getOperand(Num: `0`);
4037	SDValue N1 = N->getOperand(Num: `1`);
4038	SDValue N2 = N->getOperand(Num: `2`);
4039	SDValue Flags = N->getOperand(Num: `3`);
4040	assert(N1.getOpcode() == ISD::BasicBlock);
4041	assert(N2.getOpcode() == ISD::Constant);
4042
4043	unsigned CC = (unsigned)N2 ->getAsZExtVal();
4044
4045	if (Flags.getOpcode() == ARMISD::CMPZ) {
4046	if (Flags.getOperand(i: `0`).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4047	SDValue Int = Flags.getOperand(i: `0`);
4048	uint64_t ID = Int ->getConstantOperandVal(Num: `1`);
4049
4050	// Handle low-overhead loops.
4051	if (ID == Intrinsic::loop_decrement_reg) {
4052	SDValue Elements = Int.getOperand(i: `2`);
4053	SDValue Size = CurDAG->getTargetConstant(Val: Int.getConstantOperandVal(i: `3`),
4054	DL: dl, VT: MVT::i32);
4055
4056	SDValue Args[] = { Elements, Size, Int.getOperand(i: `0`) };
4057	SDNode *LoopDec =
4058	CurDAG->getMachineNode(Opcode: ARM::t2LoopDec, dl,
4059	VTs: CurDAG->getVTList(VT1: MVT::i32, VT2: MVT::Other),
4060	Ops: Args);
4061	ReplaceUses(F: Int.getNode(), T: LoopDec);
4062
4063	SDValue EndArgs[] = { SDValue (LoopDec, `0`), N1, Chain };
4064	SDNode *LoopEnd =
4065	CurDAG->getMachineNode(Opcode: ARM::t2LoopEnd, dl, VT: MVT::Other, Ops: EndArgs);
4066
4067	ReplaceUses(F: N, T: LoopEnd);
4068	CurDAG->RemoveDeadNode(N);
4069	CurDAG->RemoveDeadNode(N: Flags.getNode());
4070	CurDAG->RemoveDeadNode(N: Int.getNode());
4071	return;
4072	}
4073	}
4074
4075	bool SwitchEQNEToPLMI;
4076	SelectCMPZ(N: Flags.getNode(), SwitchEQNEToPLMI);
4077	Flags = N->getOperand(Num: `3`);
4078
4079	if (SwitchEQNEToPLMI) {
4080	switch ((ARMCC::CondCodes)CC) {
4081	default: llvm_unreachable("CMPZ must be either NE or EQ!");
4082	case ARMCC::NE:
4083	CC = (unsigned)ARMCC::MI;
4084	break;
4085	case ARMCC::EQ:
4086	CC = (unsigned)ARMCC::PL;
4087	break;
4088	}
4089	}
4090	}
4091
4092	SDValue Tmp2 = CurDAG->getTargetConstant(Val: CC, DL: dl, VT: MVT::i32);
4093	Chain = CurDAG->getCopyToReg(Chain, dl, Reg: ARM::CPSR, N: Flags, Glue: SDValue ());
4094	SDValue Ops[] = {N1, Tmp2, CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), Chain,
4095	Chain.getValue(R: `1`)};
4096	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::Other, Ops);
4097	return;
4098	}
4099
4100	case ARMISD::CMPZ: {
4101	// select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4102	// This allows us to avoid materializing the expensive negative constant.
4103	// The CMPZ #0 is useless and will be peepholed away but we need to keep
4104	// it for its flags output.
4105	SDValue X = N->getOperand(Num: `0`);
4106	auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`).getNode());
4107	if (C && C->getSExtValue() < `0` && Subtarget->isThumb()) {
4108	int64_t Addend = -C->getSExtValue();
4109
4110	SDNode Add = nullptr*;
4111	// ADDS can be better than CMN if the immediate fits in a
4112	// 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4113	// Outside that range we can just use a CMN which is 32-bit but has a
4114	// 12-bit immediate range.
4115	if (Addend < `1`<<`8`) {
4116	if (Subtarget->isThumb2()) {
4117	SDValue Ops[] = { X, CurDAG->getTargetConstant(Val: Addend, DL: dl, VT: MVT::i32),
4118	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
4119	CurDAG->getRegister(Reg: `0`, VT: MVT::i32) };
4120	Add = CurDAG->getMachineNode(Opcode: ARM::t2ADDri, dl, VT: MVT::i32, Ops);
4121	} else {
4122	unsigned Opc = (Addend < `1`<<`3`) ? ARM::tADDi3 : ARM::tADDi8;
4123	SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), X,
4124	CurDAG->getTargetConstant(Val: Addend, DL: dl, VT: MVT::i32),
4125	getAL(CurDAG, dl), CurDAG->getRegister(Reg: `0`, VT: MVT::i32)};
4126	Add = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
4127	}
4128	}
4129	if (Add) {
4130	SDValue Ops2[] = {SDValue (Add, `0`), CurDAG->getConstant(Val: `0`, DL: dl, VT: MVT::i32)};
4131	CurDAG->MorphNodeTo(N, Opc: ARMISD::CMPZ, VTs: N->getVTList(), Ops: Ops2);
4132	}
4133	}
4134	// Other cases are autogenerated.
4135	break;
4136	}
4137
4138	case ARMISD::CMOV: {
4139	SDValue Flags = N->getOperand(Num: `3`);
4140
4141	if (Flags.getOpcode() == ARMISD::CMPZ) {
4142	bool SwitchEQNEToPLMI;
4143	SelectCMPZ(N: Flags.getNode(), SwitchEQNEToPLMI);
4144
4145	if (SwitchEQNEToPLMI) {
4146	SDValue ARMcc = N->getOperand(Num: `2`);
4147	ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc ->getAsZExtVal();
4148
4149	switch (CC) {
4150	default: llvm_unreachable("CMPZ must be either NE or EQ!");
4151	case ARMCC::NE:
4152	CC = ARMCC::MI;
4153	break;
4154	case ARMCC::EQ:
4155	CC = ARMCC::PL;
4156	break;
4157	}
4158	SDValue NewARMcc = CurDAG->getConstant(Val: (unsigned)CC, DL: dl, VT: MVT::i32);
4159	SDValue Ops[] = {N->getOperand(Num: `0`), N->getOperand(Num: `1`), NewARMcc,
4160	N->getOperand(Num: `3`)};
4161	CurDAG->MorphNodeTo(N, Opc: ARMISD::CMOV, VTs: N->getVTList(), Ops);
4162	}
4163	}
4164	// Other cases are autogenerated.
4165	break;
4166	}
4167	case ARMISD::VZIP: {
4168	EVT VT = N->getValueType(ResNo: `0`);
4169	// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4170	unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4171	unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4172	unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4173	SDValue Pred = getAL(CurDAG, dl);
4174	SDValue PredReg = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
4175	SDValue Ops[] = {N->getOperand(Num: `0`), N->getOperand(Num: `1`), Pred, PredReg};
4176	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4177	return;
4178	}
4179	case ARMISD::VUZP: {
4180	EVT VT = N->getValueType(ResNo: `0`);
4181	// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4182	unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4183	unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4184	unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4185	SDValue Pred = getAL(CurDAG, dl);
4186	SDValue PredReg = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
4187	SDValue Ops[] = {N->getOperand(Num: `0`), N->getOperand(Num: `1`), Pred, PredReg};
4188	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4189	return;
4190	}
4191	case ARMISD::VTRN: {
4192	EVT VT = N->getValueType(ResNo: `0`);
4193	unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4194	unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4195	unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4196	SDValue Pred = getAL(CurDAG, dl);
4197	SDValue PredReg = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
4198	SDValue Ops[] = {N->getOperand(Num: `0`), N->getOperand(Num: `1`), Pred, PredReg};
4199	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4200	return;
4201	}
4202	case ARMISD::BUILD_VECTOR: {
4203	EVT VecVT = N->getValueType(ResNo: `0`);
4204	EVT EltVT = VecVT.getVectorElementType();
4205	unsigned NumElts = VecVT.getVectorNumElements();
4206	if (EltVT == MVT::f64) {
4207	assert(NumElts == `2` && "unexpected type for BUILD_VECTOR");
4208	ReplaceNode(
4209	F: N, T: createDRegPairNode(VT: VecVT, V0: N->getOperand(Num: `0`), V1: N->getOperand(Num: `1`)));
4210	return;
4211	}
4212	assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4213	if (NumElts == `2`) {
4214	ReplaceNode(
4215	F: N, T: createSRegPairNode(VT: VecVT, V0: N->getOperand(Num: `0`), V1: N->getOperand(Num: `1`)));
4216	return;
4217	}
4218	assert(NumElts == `4` && "unexpected type for BUILD_VECTOR");
4219	ReplaceNode(F: N,
4220	T: createQuadSRegsNode(VT: VecVT, V0: N->getOperand(Num: `0`), V1: N->getOperand(Num: `1`),
4221	V2: N->getOperand(Num: `2`), V3: N->getOperand(Num: `3`)));
4222	return;
4223	}
4224
4225	case ARMISD::VLD1DUP: {
4226	static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4227	ARM::VLD1DUPd32 };
4228	static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4229	ARM::VLD1DUPq32 };
4230	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: false, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes);
4231	return;
4232	}
4233
4234	case ARMISD::VLD2DUP: {
4235	static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4236	ARM::VLD2DUPd32 };
4237	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: false, NumVecs: `2`, DOpcodes: Opcodes);
4238	return;
4239	}
4240
4241	case ARMISD::VLD3DUP: {
4242	static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4243	ARM::VLD3DUPd16Pseudo,
4244	ARM::VLD3DUPd32Pseudo };
4245	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: false, NumVecs: `3`, DOpcodes: Opcodes);
4246	return;
4247	}
4248
4249	case ARMISD::VLD4DUP: {
4250	static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4251	ARM::VLD4DUPd16Pseudo,
4252	ARM::VLD4DUPd32Pseudo };
4253	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: false, NumVecs: `4`, DOpcodes: Opcodes);
4254	return;
4255	}
4256
4257	case ARMISD::VLD1DUP_UPD: {
4258	static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4259	ARM::VLD1DUPd16wb_fixed,
4260	ARM::VLD1DUPd32wb_fixed };
4261	static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4262	ARM::VLD1DUPq16wb_fixed,
4263	ARM::VLD1DUPq32wb_fixed };
4264	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: true, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes);
4265	return;
4266	}
4267
4268	case ARMISD::VLD2DUP_UPD: {
4269	static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4270	ARM::VLD2DUPd16wb_fixed,
4271	ARM::VLD2DUPd32wb_fixed,
4272	ARM::VLD1q64wb_fixed };
4273	static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4274	ARM::VLD2DUPq16EvenPseudo,
4275	ARM::VLD2DUPq32EvenPseudo };
4276	static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4277	ARM::VLD2DUPq16OddPseudoWB_fixed,
4278	ARM::VLD2DUPq32OddPseudoWB_fixed };
4279	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0, QOpcodes1);
4280	return;
4281	}
4282
4283	case ARMISD::VLD3DUP_UPD: {
4284	static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4285	ARM::VLD3DUPd16Pseudo_UPD,
4286	ARM::VLD3DUPd32Pseudo_UPD,
4287	ARM::VLD1d64TPseudoWB_fixed };
4288	static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4289	ARM::VLD3DUPq16EvenPseudo,
4290	ARM::VLD3DUPq32EvenPseudo };
4291	static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4292	ARM::VLD3DUPq16OddPseudo_UPD,
4293	ARM::VLD3DUPq32OddPseudo_UPD };
4294	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4295	return;
4296	}
4297
4298	case ARMISD::VLD4DUP_UPD: {
4299	static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4300	ARM::VLD4DUPd16Pseudo_UPD,
4301	ARM::VLD4DUPd32Pseudo_UPD,
4302	ARM::VLD1d64QPseudoWB_fixed };
4303	static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4304	ARM::VLD4DUPq16EvenPseudo,
4305	ARM::VLD4DUPq32EvenPseudo };
4306	static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4307	ARM::VLD4DUPq16OddPseudo_UPD,
4308	ARM::VLD4DUPq32OddPseudo_UPD };
4309	SelectVLDDup(N, / IsIntrinsic= / false, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4310	return;
4311	}
4312
4313	case ARMISD::VLD1_UPD: {
4314	static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4315	ARM::VLD1d16wb_fixed,
4316	ARM::VLD1d32wb_fixed,
4317	ARM::VLD1d64wb_fixed };
4318	static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4319	ARM::VLD1q16wb_fixed,
4320	ARM::VLD1q32wb_fixed,
4321	ARM::VLD1q64wb_fixed };
4322	SelectVLD(N, isUpdating: true, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4323	return;
4324	}
4325
4326	case ARMISD::VLD2_UPD: {
4327	if (Subtarget->hasNEON()) {
4328	static const uint16_t DOpcodes[] = {
4329	ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4330	ARM::VLD1q64wb_fixed};
4331	static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4332	ARM::VLD2q16PseudoWB_fixed,
4333	ARM::VLD2q32PseudoWB_fixed};
4334	SelectVLD(N, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4335	} else {
4336	static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4337	ARM::MVE_VLD21_8_wb};
4338	static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4339	ARM::MVE_VLD21_16_wb};
4340	static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4341	ARM::MVE_VLD21_32_wb};
4342	static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4343	SelectMVE_VLD(N, NumVecs: `2`, Opcodes, HasWriteback: true);
4344	}
4345	return;
4346	}
4347
4348	case ARMISD::VLD3_UPD: {
4349	static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4350	ARM::VLD3d16Pseudo_UPD,
4351	ARM::VLD3d32Pseudo_UPD,
4352	ARM::VLD1d64TPseudoWB_fixed};
4353	static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4354	ARM::VLD3q16Pseudo_UPD,
4355	ARM::VLD3q32Pseudo_UPD };
4356	static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4357	ARM::VLD3q16oddPseudo_UPD,
4358	ARM::VLD3q32oddPseudo_UPD };
4359	SelectVLD(N, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4360	return;
4361	}
4362
4363	case ARMISD::VLD4_UPD: {
4364	if (Subtarget->hasNEON()) {
4365	static const uint16_t DOpcodes[] = {
4366	ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4367	ARM::VLD1d64QPseudoWB_fixed};
4368	static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4369	ARM::VLD4q16Pseudo_UPD,
4370	ARM::VLD4q32Pseudo_UPD};
4371	static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4372	ARM::VLD4q16oddPseudo_UPD,
4373	ARM::VLD4q32oddPseudo_UPD};
4374	SelectVLD(N, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4375	} else {
4376	static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4377	ARM::MVE_VLD42_8,
4378	ARM::MVE_VLD43_8_wb};
4379	static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4380	ARM::MVE_VLD42_16,
4381	ARM::MVE_VLD43_16_wb};
4382	static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4383	ARM::MVE_VLD42_32,
4384	ARM::MVE_VLD43_32_wb};
4385	static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4386	SelectMVE_VLD(N, NumVecs: `4`, Opcodes, HasWriteback: true);
4387	}
4388	return;
4389	}
4390
4391	case ARMISD::VLD1x2_UPD: {
4392	if (Subtarget->hasNEON()) {
4393	static const uint16_t DOpcodes[] = {
4394	ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4395	ARM::VLD1q64wb_fixed};
4396	static const uint16_t QOpcodes[] = {
4397	ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4398	ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4399	SelectVLD(N, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4400	return;
4401	}
4402	break;
4403	}
4404
4405	case ARMISD::VLD1x3_UPD: {
4406	if (Subtarget->hasNEON()) {
4407	static const uint16_t DOpcodes[] = {
4408	ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4409	ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4410	static const uint16_t QOpcodes0[] = {
4411	ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4412	ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4413	static const uint16_t QOpcodes1[] = {
4414	ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4415	ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4416	SelectVLD(N, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4417	return;
4418	}
4419	break;
4420	}
4421
4422	case ARMISD::VLD1x4_UPD: {
4423	if (Subtarget->hasNEON()) {
4424	static const uint16_t DOpcodes[] = {
4425	ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4426	ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4427	static const uint16_t QOpcodes0[] = {
4428	ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4429	ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4430	static const uint16_t QOpcodes1[] = {
4431	ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4432	ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4433	SelectVLD(N, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4434	return;
4435	}
4436	break;
4437	}
4438
4439	case ARMISD::VLD2LN_UPD: {
4440	static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4441	ARM::VLD2LNd16Pseudo_UPD,
4442	ARM::VLD2LNd32Pseudo_UPD };
4443	static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4444	ARM::VLD2LNq32Pseudo_UPD };
4445	SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes);
4446	return;
4447	}
4448
4449	case ARMISD::VLD3LN_UPD: {
4450	static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4451	ARM::VLD3LNd16Pseudo_UPD,
4452	ARM::VLD3LNd32Pseudo_UPD };
4453	static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4454	ARM::VLD3LNq32Pseudo_UPD };
4455	SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes);
4456	return;
4457	}
4458
4459	case ARMISD::VLD4LN_UPD: {
4460	static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4461	ARM::VLD4LNd16Pseudo_UPD,
4462	ARM::VLD4LNd32Pseudo_UPD };
4463	static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4464	ARM::VLD4LNq32Pseudo_UPD };
4465	SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes);
4466	return;
4467	}
4468
4469	case ARMISD::VST1_UPD: {
4470	static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4471	ARM::VST1d16wb_fixed,
4472	ARM::VST1d32wb_fixed,
4473	ARM::VST1d64wb_fixed };
4474	static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4475	ARM::VST1q16wb_fixed,
4476	ARM::VST1q32wb_fixed,
4477	ARM::VST1q64wb_fixed };
4478	SelectVST(N, isUpdating: true, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4479	return;
4480	}
4481
4482	case ARMISD::VST2_UPD: {
4483	if (Subtarget->hasNEON()) {
4484	static const uint16_t DOpcodes[] = {
4485	ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4486	ARM::VST1q64wb_fixed};
4487	static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4488	ARM::VST2q16PseudoWB_fixed,
4489	ARM::VST2q32PseudoWB_fixed};
4490	SelectVST(N, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4491	return;
4492	}
4493	break;
4494	}
4495
4496	case ARMISD::VST3_UPD: {
4497	static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4498	ARM::VST3d16Pseudo_UPD,
4499	ARM::VST3d32Pseudo_UPD,
4500	ARM::VST1d64TPseudoWB_fixed};
4501	static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4502	ARM::VST3q16Pseudo_UPD,
4503	ARM::VST3q32Pseudo_UPD };
4504	static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4505	ARM::VST3q16oddPseudo_UPD,
4506	ARM::VST3q32oddPseudo_UPD };
4507	SelectVST(N, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4508	return;
4509	}
4510
4511	case ARMISD::VST4_UPD: {
4512	if (Subtarget->hasNEON()) {
4513	static const uint16_t DOpcodes[] = {
4514	ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4515	ARM::VST1d64QPseudoWB_fixed};
4516	static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4517	ARM::VST4q16Pseudo_UPD,
4518	ARM::VST4q32Pseudo_UPD};
4519	static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4520	ARM::VST4q16oddPseudo_UPD,
4521	ARM::VST4q32oddPseudo_UPD};
4522	SelectVST(N, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4523	return;
4524	}
4525	break;
4526	}
4527
4528	case ARMISD::VST1x2_UPD: {
4529	if (Subtarget->hasNEON()) {
4530	static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4531	ARM::VST1q16wb_fixed,
4532	ARM::VST1q32wb_fixed,
4533	ARM::VST1q64wb_fixed};
4534	static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4535	ARM::VST1d16QPseudoWB_fixed,
4536	ARM::VST1d32QPseudoWB_fixed,
4537	ARM::VST1d64QPseudoWB_fixed };
4538	SelectVST(N, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4539	return;
4540	}
4541	break;
4542	}
4543
4544	case ARMISD::VST1x3_UPD: {
4545	if (Subtarget->hasNEON()) {
4546	static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4547	ARM::VST1d16TPseudoWB_fixed,
4548	ARM::VST1d32TPseudoWB_fixed,
4549	ARM::VST1d64TPseudoWB_fixed };
4550	static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4551	ARM::VST1q16LowTPseudo_UPD,
4552	ARM::VST1q32LowTPseudo_UPD,
4553	ARM::VST1q64LowTPseudo_UPD };
4554	static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4555	ARM::VST1q16HighTPseudo_UPD,
4556	ARM::VST1q32HighTPseudo_UPD,
4557	ARM::VST1q64HighTPseudo_UPD };
4558	SelectVST(N, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4559	return;
4560	}
4561	break;
4562	}
4563
4564	case ARMISD::VST1x4_UPD: {
4565	if (Subtarget->hasNEON()) {
4566	static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4567	ARM::VST1d16QPseudoWB_fixed,
4568	ARM::VST1d32QPseudoWB_fixed,
4569	ARM::VST1d64QPseudoWB_fixed };
4570	static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4571	ARM::VST1q16LowQPseudo_UPD,
4572	ARM::VST1q32LowQPseudo_UPD,
4573	ARM::VST1q64LowQPseudo_UPD };
4574	static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4575	ARM::VST1q16HighQPseudo_UPD,
4576	ARM::VST1q32HighQPseudo_UPD,
4577	ARM::VST1q64HighQPseudo_UPD };
4578	SelectVST(N, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4579	return;
4580	}
4581	break;
4582	}
4583	case ARMISD::VST2LN_UPD: {
4584	static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4585	ARM::VST2LNd16Pseudo_UPD,
4586	ARM::VST2LNd32Pseudo_UPD };
4587	static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4588	ARM::VST2LNq32Pseudo_UPD };
4589	SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: `2`, DOpcodes, QOpcodes);
4590	return;
4591	}
4592
4593	case ARMISD::VST3LN_UPD: {
4594	static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4595	ARM::VST3LNd16Pseudo_UPD,
4596	ARM::VST3LNd32Pseudo_UPD };
4597	static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4598	ARM::VST3LNq32Pseudo_UPD };
4599	SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: `3`, DOpcodes, QOpcodes);
4600	return;
4601	}
4602
4603	case ARMISD::VST4LN_UPD: {
4604	static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4605	ARM::VST4LNd16Pseudo_UPD,
4606	ARM::VST4LNd32Pseudo_UPD };
4607	static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4608	ARM::VST4LNq32Pseudo_UPD };
4609	SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: `4`, DOpcodes, QOpcodes);
4610	return;
4611	}
4612
4613	case ISD::INTRINSIC_VOID:
4614	case ISD::INTRINSIC_W_CHAIN: {
4615	unsigned IntNo = N->getConstantOperandVal(Num: `1`);
4616	switch (IntNo) {
4617	default:
4618	break;
4619
4620	case Intrinsic::arm_mrrc:
4621	case Intrinsic::arm_mrrc2: {
4622	SDLoc dl(N);
4623	SDValue Chain = N->getOperand(Num: `0`);
4624	unsigned Opc;
4625
4626	if (Subtarget->isThumb())
4627	Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4628	else
4629	Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4630
4631	SmallVector<SDValue, `5`> Ops;
4632	Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: `2`), dl)); / coproc /
4633	Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: `3`), dl)); / opc /
4634	Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: `4`), dl)); / CRm /
4635
4636	// The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4637	// instruction will always be '1111' but it is possible in assembly language to specify
4638	// AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4639	if (Opc != ARM::MRRC2) {
4640	Ops.push_back(Elt: getAL(CurDAG, dl));
4641	Ops.push_back(Elt: CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
4642	}
4643
4644	Ops.push_back(Elt: Chain);
4645
4646	// Writes to two registers.
4647	const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4648
4649	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: RetType, Ops));
4650	return;
4651	}
4652	case Intrinsic::arm_ldaexd:
4653	case Intrinsic::arm_ldrexd: {
4654	SDLoc dl(N);
4655	SDValue Chain = N->getOperand(Num: `0`);
4656	SDValue MemAddr = N->getOperand(Num: `2`);
4657	bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4658
4659	bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4660	unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4661	: (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4662
4663	// arm_ldrexd returns a i64 value in {i32, i32}
4664	std::vector<EVT> ResTys;
4665	if (isThumb) {
4666	ResTys.push_back(x: MVT::i32);
4667	ResTys.push_back(x: MVT::i32);
4668	} else
4669	ResTys.push_back(x: MVT::Untyped);
4670	ResTys.push_back(x: MVT::Other);
4671
4672	// Place arguments in the right order.
4673	SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4674	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), Chain};
4675	SDNode *Ld = CurDAG->getMachineNode(Opcode: NewOpc, dl, ResultTys: ResTys, Ops);
4676	// Transfer memoperands.
4677	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
4678	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
4679
4680	// Remap uses.
4681	SDValue OutChain = isThumb ? SDValue (Ld, `2`) : SDValue (Ld, `1`);
4682	if (!SDValue (N, `0`).use_empty()) {
4683	SDValue Result;
4684	if (isThumb)
4685	Result = SDValue (Ld, `0`);
4686	else {
4687	SDValue SubRegIdx =
4688	CurDAG->getTargetConstant(Val: ARM::gsub_0, DL: dl, VT: MVT::i32);
4689	SDNode *ResNode = CurDAG->getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG,
4690	dl, VT: MVT::i32, Op1: SDValue (Ld, `0`), Op2: SubRegIdx);
4691	Result = SDValue (ResNode,`0`);
4692	}
4693	ReplaceUses(F: SDValue (N, `0`), T: Result);
4694	}
4695	if (!SDValue (N, `1`).use_empty()) {
4696	SDValue Result;
4697	if (isThumb)
4698	Result = SDValue (Ld, `1`);
4699	else {
4700	SDValue SubRegIdx =
4701	CurDAG->getTargetConstant(Val: ARM::gsub_1, DL: dl, VT: MVT::i32);
4702	SDNode *ResNode = CurDAG->getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG,
4703	dl, VT: MVT::i32, Op1: SDValue (Ld, `0`), Op2: SubRegIdx);
4704	Result = SDValue (ResNode,`0`);
4705	}
4706	ReplaceUses(F: SDValue (N, `1`), T: Result);
4707	}
4708	ReplaceUses(F: SDValue (N, `2`), T: OutChain);
4709	CurDAG->RemoveDeadNode(N);
4710	return;
4711	}
4712	case Intrinsic::arm_stlexd:
4713	case Intrinsic::arm_strexd: {
4714	SDLoc dl(N);
4715	SDValue Chain = N->getOperand(Num: `0`);
4716	SDValue Val0 = N->getOperand(Num: `2`);
4717	SDValue Val1 = N->getOperand(Num: `3`);
4718	SDValue MemAddr = N->getOperand(Num: `4`);
4719
4720	// Store exclusive double return a i32 value which is the return status
4721	// of the issued store.
4722	const EVT ResTys[] = {MVT::i32, MVT::Other};
4723
4724	bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4725	// Place arguments in the right order.
4726	SmallVector<SDValue, `7`> Ops;
4727	if (isThumb) {
4728	Ops.push_back(Elt: Val0);
4729	Ops.push_back(Elt: Val1);
4730	} else
4731	// arm_strexd uses GPRPair.
4732	Ops.push_back(Elt: SDValue (createGPRPairNode(VT: MVT::Untyped, V0: Val0, V1: Val1), `0`));
4733	Ops.push_back(Elt: MemAddr);
4734	Ops.push_back(Elt: getAL(CurDAG, dl));
4735	Ops.push_back(Elt: CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
4736	Ops.push_back(Elt: Chain);
4737
4738	bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4739	unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4740	: (IsRelease ? ARM::STLEXD : ARM::STREXD);
4741
4742	SDNode *St = CurDAG->getMachineNode(Opcode: NewOpc, dl, ResultTys: ResTys, Ops);
4743	// Transfer memoperands.
4744	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
4745	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
4746
4747	ReplaceNode(F: N, T: St);
4748	return;
4749	}
4750
4751	case Intrinsic::arm_neon_vld1: {
4752	static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4753	ARM::VLD1d32, ARM::VLD1d64 };
4754	static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4755	ARM::VLD1q32, ARM::VLD1q64};
4756	SelectVLD(N, isUpdating: false, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4757	return;
4758	}
4759
4760	case Intrinsic::arm_neon_vld1x2: {
4761	static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4762	ARM::VLD1q32, ARM::VLD1q64 };
4763	static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4764	ARM::VLD1d16QPseudo,
4765	ARM::VLD1d32QPseudo,
4766	ARM::VLD1d64QPseudo };
4767	SelectVLD(N, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4768	return;
4769	}
4770
4771	case Intrinsic::arm_neon_vld1x3: {
4772	static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4773	ARM::VLD1d16TPseudo,
4774	ARM::VLD1d32TPseudo,
4775	ARM::VLD1d64TPseudo };
4776	static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4777	ARM::VLD1q16LowTPseudo_UPD,
4778	ARM::VLD1q32LowTPseudo_UPD,
4779	ARM::VLD1q64LowTPseudo_UPD };
4780	static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4781	ARM::VLD1q16HighTPseudo,
4782	ARM::VLD1q32HighTPseudo,
4783	ARM::VLD1q64HighTPseudo };
4784	SelectVLD(N, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4785	return;
4786	}
4787
4788	case Intrinsic::arm_neon_vld1x4: {
4789	static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4790	ARM::VLD1d16QPseudo,
4791	ARM::VLD1d32QPseudo,
4792	ARM::VLD1d64QPseudo };
4793	static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4794	ARM::VLD1q16LowQPseudo_UPD,
4795	ARM::VLD1q32LowQPseudo_UPD,
4796	ARM::VLD1q64LowQPseudo_UPD };
4797	static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4798	ARM::VLD1q16HighQPseudo,
4799	ARM::VLD1q32HighQPseudo,
4800	ARM::VLD1q64HighQPseudo };
4801	SelectVLD(N, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4802	return;
4803	}
4804
4805	case Intrinsic::arm_neon_vld2: {
4806	static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4807	ARM::VLD2d32, ARM::VLD1q64 };
4808	static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4809	ARM::VLD2q32Pseudo };
4810	SelectVLD(N, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4811	return;
4812	}
4813
4814	case Intrinsic::arm_neon_vld3: {
4815	static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4816	ARM::VLD3d16Pseudo,
4817	ARM::VLD3d32Pseudo,
4818	ARM::VLD1d64TPseudo };
4819	static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4820	ARM::VLD3q16Pseudo_UPD,
4821	ARM::VLD3q32Pseudo_UPD };
4822	static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4823	ARM::VLD3q16oddPseudo,
4824	ARM::VLD3q32oddPseudo };
4825	SelectVLD(N, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4826	return;
4827	}
4828
4829	case Intrinsic::arm_neon_vld4: {
4830	static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4831	ARM::VLD4d16Pseudo,
4832	ARM::VLD4d32Pseudo,
4833	ARM::VLD1d64QPseudo };
4834	static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4835	ARM::VLD4q16Pseudo_UPD,
4836	ARM::VLD4q32Pseudo_UPD };
4837	static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4838	ARM::VLD4q16oddPseudo,
4839	ARM::VLD4q32oddPseudo };
4840	SelectVLD(N, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4841	return;
4842	}
4843
4844	case Intrinsic::arm_neon_vld2dup: {
4845	static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4846	ARM::VLD2DUPd32, ARM::VLD1q64 };
4847	static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4848	ARM::VLD2DUPq16EvenPseudo,
4849	ARM::VLD2DUPq32EvenPseudo };
4850	static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4851	ARM::VLD2DUPq16OddPseudo,
4852	ARM::VLD2DUPq32OddPseudo };
4853	SelectVLDDup(N, / IsIntrinsic= / true, isUpdating: false, NumVecs: `2`,
4854	DOpcodes, QOpcodes0, QOpcodes1);
4855	return;
4856	}
4857
4858	case Intrinsic::arm_neon_vld3dup: {
4859	static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4860	ARM::VLD3DUPd16Pseudo,
4861	ARM::VLD3DUPd32Pseudo,
4862	ARM::VLD1d64TPseudo };
4863	static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4864	ARM::VLD3DUPq16EvenPseudo,
4865	ARM::VLD3DUPq32EvenPseudo };
4866	static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4867	ARM::VLD3DUPq16OddPseudo,
4868	ARM::VLD3DUPq32OddPseudo };
4869	SelectVLDDup(N, / IsIntrinsic= / true, isUpdating: false, NumVecs: `3`,
4870	DOpcodes, QOpcodes0, QOpcodes1);
4871	return;
4872	}
4873
4874	case Intrinsic::arm_neon_vld4dup: {
4875	static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4876	ARM::VLD4DUPd16Pseudo,
4877	ARM::VLD4DUPd32Pseudo,
4878	ARM::VLD1d64QPseudo };
4879	static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4880	ARM::VLD4DUPq16EvenPseudo,
4881	ARM::VLD4DUPq32EvenPseudo };
4882	static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4883	ARM::VLD4DUPq16OddPseudo,
4884	ARM::VLD4DUPq32OddPseudo };
4885	SelectVLDDup(N, / IsIntrinsic= / true, isUpdating: false, NumVecs: `4`,
4886	DOpcodes, QOpcodes0, QOpcodes1);
4887	return;
4888	}
4889
4890	case Intrinsic::arm_neon_vld2lane: {
4891	static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4892	ARM::VLD2LNd16Pseudo,
4893	ARM::VLD2LNd32Pseudo };
4894	static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4895	ARM::VLD2LNq32Pseudo };
4896	SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes);
4897	return;
4898	}
4899
4900	case Intrinsic::arm_neon_vld3lane: {
4901	static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
4902	ARM::VLD3LNd16Pseudo,
4903	ARM::VLD3LNd32Pseudo };
4904	static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
4905	ARM::VLD3LNq32Pseudo };
4906	SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes);
4907	return;
4908	}
4909
4910	case Intrinsic::arm_neon_vld4lane: {
4911	static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
4912	ARM::VLD4LNd16Pseudo,
4913	ARM::VLD4LNd32Pseudo };
4914	static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
4915	ARM::VLD4LNq32Pseudo };
4916	SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes);
4917	return;
4918	}
4919
4920	case Intrinsic::arm_neon_vst1: {
4921	static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
4922	ARM::VST1d32, ARM::VST1d64 };
4923	static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
4924	ARM::VST1q32, ARM::VST1q64 };
4925	SelectVST(N, isUpdating: false, NumVecs: `1`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4926	return;
4927	}
4928
4929	case Intrinsic::arm_neon_vst1x2: {
4930	static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
4931	ARM::VST1q32, ARM::VST1q64 };
4932	static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
4933	ARM::VST1d16QPseudo,
4934	ARM::VST1d32QPseudo,
4935	ARM::VST1d64QPseudo };
4936	SelectVST(N, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4937	return;
4938	}
4939
4940	case Intrinsic::arm_neon_vst1x3: {
4941	static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
4942	ARM::VST1d16TPseudo,
4943	ARM::VST1d32TPseudo,
4944	ARM::VST1d64TPseudo };
4945	static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4946	ARM::VST1q16LowTPseudo_UPD,
4947	ARM::VST1q32LowTPseudo_UPD,
4948	ARM::VST1q64LowTPseudo_UPD };
4949	static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
4950	ARM::VST1q16HighTPseudo,
4951	ARM::VST1q32HighTPseudo,
4952	ARM::VST1q64HighTPseudo };
4953	SelectVST(N, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4954	return;
4955	}
4956
4957	case Intrinsic::arm_neon_vst1x4: {
4958	static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
4959	ARM::VST1d16QPseudo,
4960	ARM::VST1d32QPseudo,
4961	ARM::VST1d64QPseudo };
4962	static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4963	ARM::VST1q16LowQPseudo_UPD,
4964	ARM::VST1q32LowQPseudo_UPD,
4965	ARM::VST1q64LowQPseudo_UPD };
4966	static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
4967	ARM::VST1q16HighQPseudo,
4968	ARM::VST1q32HighQPseudo,
4969	ARM::VST1q64HighQPseudo };
4970	SelectVST(N, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
4971	return;
4972	}
4973
4974	case Intrinsic::arm_neon_vst2: {
4975	static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
4976	ARM::VST2d32, ARM::VST1q64 };
4977	static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
4978	ARM::VST2q32Pseudo };
4979	SelectVST(N, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4980	return;
4981	}
4982
4983	case Intrinsic::arm_neon_vst3: {
4984	static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
4985	ARM::VST3d16Pseudo,
4986	ARM::VST3d32Pseudo,
4987	ARM::VST1d64TPseudo };
4988	static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4989	ARM::VST3q16Pseudo_UPD,
4990	ARM::VST3q32Pseudo_UPD };
4991	static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
4992	ARM::VST3q16oddPseudo,
4993	ARM::VST3q32oddPseudo };
4994	SelectVST(N, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes0, QOpcodes1);
4995	return;
4996	}
4997
4998	case Intrinsic::arm_neon_vst4: {
4999	static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5000	ARM::VST4d16Pseudo,
5001	ARM::VST4d32Pseudo,
5002	ARM::VST1d64QPseudo };
5003	static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5004	ARM::VST4q16Pseudo_UPD,
5005	ARM::VST4q32Pseudo_UPD };
5006	static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5007	ARM::VST4q16oddPseudo,
5008	ARM::VST4q32oddPseudo };
5009	SelectVST(N, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes0, QOpcodes1);
5010	return;
5011	}
5012
5013	case Intrinsic::arm_neon_vst2lane: {
5014	static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5015	ARM::VST2LNd16Pseudo,
5016	ARM::VST2LNd32Pseudo };
5017	static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5018	ARM::VST2LNq32Pseudo };
5019	SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: `2`, DOpcodes, QOpcodes);
5020	return;
5021	}
5022
5023	case Intrinsic::arm_neon_vst3lane: {
5024	static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5025	ARM::VST3LNd16Pseudo,
5026	ARM::VST3LNd32Pseudo };
5027	static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5028	ARM::VST3LNq32Pseudo };
5029	SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: `3`, DOpcodes, QOpcodes);
5030	return;
5031	}
5032
5033	case Intrinsic::arm_neon_vst4lane: {
5034	static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5035	ARM::VST4LNd16Pseudo,
5036	ARM::VST4LNd32Pseudo };
5037	static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5038	ARM::VST4LNq32Pseudo };
5039	SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: `4`, DOpcodes, QOpcodes);
5040	return;
5041	}
5042
5043	case Intrinsic::arm_mve_vldr_gather_base_wb:
5044	case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5045	static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5046	ARM::MVE_VLDRDU64_qi_pre};
5047	SelectMVE_WB(N, Opcodes,
5048	Predicated: IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5049	return;
5050	}
5051
5052	case Intrinsic::arm_mve_vld2q: {
5053	static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5054	static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5055	ARM::MVE_VLD21_16};
5056	static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5057	ARM::MVE_VLD21_32};
5058	static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5059	SelectMVE_VLD(N, NumVecs: `2`, Opcodes, HasWriteback: false);
5060	return;
5061	}
5062
5063	case Intrinsic::arm_mve_vld4q: {
5064	static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5065	ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5066	static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5067	ARM::MVE_VLD42_16,
5068	ARM::MVE_VLD43_16};
5069	static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5070	ARM::MVE_VLD42_32,
5071	ARM::MVE_VLD43_32};
5072	static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5073	SelectMVE_VLD(N, NumVecs: `4`, Opcodes, HasWriteback: false);
5074	return;
5075	}
5076	}
5077	break;
5078	}
5079
5080	case ISD::INTRINSIC_WO_CHAIN: {
5081	unsigned IntNo = N->getConstantOperandVal(Num: `0`);
5082	switch (IntNo) {
5083	default:
5084	break;
5085
5086	// Scalar f32 -> bf16
5087	case Intrinsic::arm_neon_vcvtbfp2bf: {
5088	SDLoc dl(N);
5089	const SDValue &Src = N->getOperand(Num: `1`);
5090	llvm::EVT DestTy = N->getValueType(ResNo: `0`);
5091	SDValue Pred = getAL(CurDAG, dl);
5092	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
5093	SDValue Ops[] = { Src, Src, Pred, Reg0 };
5094	CurDAG->SelectNodeTo(N, MachineOpc: ARM::BF16_VCVTB, VT: DestTy, Ops);
5095	return;
5096	}
5097
5098	// Vector v4f32 -> v4bf16
5099	case Intrinsic::arm_neon_vcvtfp2bf: {
5100	SDLoc dl(N);
5101	const SDValue &Src = N->getOperand(Num: `1`);
5102	SDValue Pred = getAL(CurDAG, dl);
5103	SDValue Reg0 = CurDAG->getRegister(Reg: `0`, VT: MVT::i32);
5104	SDValue Ops[] = { Src, Pred, Reg0 };
5105	CurDAG->SelectNodeTo(N, MachineOpc: ARM::BF16_VCVT, VT: MVT::v4bf16, Ops);
5106	return;
5107	}
5108
5109	case Intrinsic::arm_mve_urshrl:
5110	SelectMVE_LongShift(N, Opcode: ARM::MVE_URSHRL, Immediate: true, HasSaturationOperand: false);
5111	return;
5112	case Intrinsic::arm_mve_uqshll:
5113	SelectMVE_LongShift(N, Opcode: ARM::MVE_UQSHLL, Immediate: true, HasSaturationOperand: false);
5114	return;
5115	case Intrinsic::arm_mve_srshrl:
5116	SelectMVE_LongShift(N, Opcode: ARM::MVE_SRSHRL, Immediate: true, HasSaturationOperand: false);
5117	return;
5118	case Intrinsic::arm_mve_sqshll:
5119	SelectMVE_LongShift(N, Opcode: ARM::MVE_SQSHLL, Immediate: true, HasSaturationOperand: false);
5120	return;
5121	case Intrinsic::arm_mve_uqrshll:
5122	SelectMVE_LongShift(N, Opcode: ARM::MVE_UQRSHLL, Immediate: false, HasSaturationOperand: true);
5123	return;
5124	case Intrinsic::arm_mve_sqrshrl:
5125	SelectMVE_LongShift(N, Opcode: ARM::MVE_SQRSHRL, Immediate: false, HasSaturationOperand: true);
5126	return;
5127
5128	case Intrinsic::arm_mve_vadc:
5129	case Intrinsic::arm_mve_vadc_predicated:
5130	SelectMVE_VADCSBC(N, OpcodeWithCarry: ARM::MVE_VADC, OpcodeWithNoCarry: ARM::MVE_VADCI, Add: true,
5131	Predicated: IntNo == Intrinsic::arm_mve_vadc_predicated);
5132	return;
5133	case Intrinsic::arm_mve_vsbc:
5134	case Intrinsic::arm_mve_vsbc_predicated:
5135	SelectMVE_VADCSBC(N, OpcodeWithCarry: ARM::MVE_VSBC, OpcodeWithNoCarry: ARM::MVE_VSBCI, Add: false,
5136	Predicated: IntNo == Intrinsic::arm_mve_vsbc_predicated);
5137	return;
5138	case Intrinsic::arm_mve_vshlc:
5139	case Intrinsic::arm_mve_vshlc_predicated:
5140	SelectMVE_VSHLC(N, Predicated: IntNo == Intrinsic::arm_mve_vshlc_predicated);
5141	return;
5142
5143	case Intrinsic::arm_mve_vmlldava:
5144	case Intrinsic::arm_mve_vmlldava_predicated: {
5145	static const uint16_t OpcodesU[] = {
5146	ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
5147	ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
5148	};
5149	static const uint16_t OpcodesS[] = {
5150	ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
5151	ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
5152	ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
5153	ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5154	ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
5155	ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
5156	ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
5157	ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5158	};
5159	SelectMVE_VMLLDAV(N, Predicated: IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5160	OpcodesS, OpcodesU);
5161	return;
5162	}
5163
5164	case Intrinsic::arm_mve_vrmlldavha:
5165	case Intrinsic::arm_mve_vrmlldavha_predicated: {
5166	static const uint16_t OpcodesU[] = {
5167	ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
5168	};
5169	static const uint16_t OpcodesS[] = {
5170	ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
5171	ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5172	ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
5173	ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5174	};
5175	SelectMVE_VRMLLDAVH(N, Predicated: IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5176	OpcodesS, OpcodesU);
5177	return;
5178	}
5179
5180	case Intrinsic::arm_mve_vidup:
5181	case Intrinsic::arm_mve_vidup_predicated: {
5182	static const uint16_t Opcodes[] = {
5183	ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5184	};
5185	SelectMVE_VxDUP(N, Opcodes, Wrapping: false,
5186	Predicated: IntNo == Intrinsic::arm_mve_vidup_predicated);
5187	return;
5188	}
5189
5190	case Intrinsic::arm_mve_vddup:
5191	case Intrinsic::arm_mve_vddup_predicated: {
5192	static const uint16_t Opcodes[] = {
5193	ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5194	};
5195	SelectMVE_VxDUP(N, Opcodes, Wrapping: false,
5196	Predicated: IntNo == Intrinsic::arm_mve_vddup_predicated);
5197	return;
5198	}
5199
5200	case Intrinsic::arm_mve_viwdup:
5201	case Intrinsic::arm_mve_viwdup_predicated: {
5202	static const uint16_t Opcodes[] = {
5203	ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5204	};
5205	SelectMVE_VxDUP(N, Opcodes, Wrapping: true,
5206	Predicated: IntNo == Intrinsic::arm_mve_viwdup_predicated);
5207	return;
5208	}
5209
5210	case Intrinsic::arm_mve_vdwdup:
5211	case Intrinsic::arm_mve_vdwdup_predicated: {
5212	static const uint16_t Opcodes[] = {
5213	ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5214	};
5215	SelectMVE_VxDUP(N, Opcodes, Wrapping: true,
5216	Predicated: IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5217	return;
5218	}
5219
5220	case Intrinsic::arm_cde_cx1d:
5221	case Intrinsic::arm_cde_cx1da:
5222	case Intrinsic::arm_cde_cx2d:
5223	case Intrinsic::arm_cde_cx2da:
5224	case Intrinsic::arm_cde_cx3d:
5225	case Intrinsic::arm_cde_cx3da: {
5226	bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da \|\|
5227	IntNo == Intrinsic::arm_cde_cx2da \|\|
5228	IntNo == Intrinsic::arm_cde_cx3da;
5229	size_t NumExtraOps;
5230	uint16_t Opcode;
5231	switch (IntNo) {
5232	case Intrinsic::arm_cde_cx1d:
5233	case Intrinsic::arm_cde_cx1da:
5234	NumExtraOps = `0`;
5235	Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5236	break;
5237	case Intrinsic::arm_cde_cx2d:
5238	case Intrinsic::arm_cde_cx2da:
5239	NumExtraOps = `1`;
5240	Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5241	break;
5242	case Intrinsic::arm_cde_cx3d:
5243	case Intrinsic::arm_cde_cx3da:
5244	NumExtraOps = `2`;
5245	Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5246	break;
5247	default:
5248	llvm_unreachable("Unexpected opcode");
5249	}
5250	SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5251	return;
5252	}
5253	}
5254	break;
5255	}
5256
5257	case ISD::ATOMIC_CMP_SWAP:
5258	SelectCMP_SWAP(N);
5259	return;
5260	}
5261
5262	SelectCode(N);
5263	}
5264
5265	// Inspect a register string of the form
5266	// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5267	// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5268	// and obtain the integer operands from them, adding these operands to the
5269	// provided vector.
5270	static void getIntOperandsFromRegisterString(StringRef RegString,
5271	SelectionDAG *CurDAG,
5272	const SDLoc &DL,
5273	std::vector<SDValue> &Ops) {
5274	SmallVector<StringRef, `5`> Fields;
5275	RegString.split(A&: Fields, Separator: `':'`);
5276
5277	if (Fields.size() > `1`) {
5278	bool AllIntFields = true;
5279
5280	for (StringRef Field : Fields) {
5281	// Need to trim out leading 'cp' characters and get the integer field.
5282	unsigned IntField;
5283	AllIntFields &= !Field.trim(Chars: "CPcp").getAsInteger(Radix: `10`, Result&: IntField);
5284	Ops.push_back(x: CurDAG->getTargetConstant(Val: IntField, DL, VT: MVT::i32));
5285	}
5286
5287	assert(AllIntFields &&
5288	"Unexpected non-integer value in special register string.");
5289	(void)AllIntFields;
5290	}
5291	}
5292
5293	// Maps a Banked Register string to its mask value. The mask value returned is
5294	// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5295	// mask operand, which expresses which register is to be used, e.g. r8, and in
5296	// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5297	// was invalid.
5298	static inline int getBankedRegisterMask(StringRef RegString) {
5299	auto TheReg = ARMBankedReg::lookupBankedRegByName(Name: RegString.lower());
5300	if (!TheReg)
5301	return -`1`;
5302	return TheReg->Encoding;
5303	}
5304
5305	// The flags here are common to those allowed for apsr in the A class cores and
5306	// those allowed for the special registers in the M class cores. Returns a
5307	// value representing which flags were present, -1 if invalid.
5308	static inline int getMClassFlagsMask(StringRef Flags) {
5309	return StringSwitch<int>(Flags)
5310	.Case(S: "", Value: `0x2`) // no flags means nzcvq for psr registers, and 0x2 is
5311	// correct when flags are not permitted
5312	.Case(S: "g", Value: `0x1`)
5313	.Case(S: "nzcvq", Value: `0x2`)
5314	.Case(S: "nzcvqg", Value: `0x3`)
5315	.Default(Value: -`1`);
5316	}
5317
5318	// Maps MClass special registers string to its value for use in the
5319	// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5320	// Returns -1 to signify that the string was invalid.
5321	static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5322	auto TheReg = ARMSysReg::lookupMClassSysRegByName(Name: Reg);
5323	const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5324	if (!TheReg \|\| !TheReg->hasRequiredFeatures(ActiveFeatures: FeatureBits))
5325	return -`1`;
5326	return (int)(TheReg->Encoding & `0xFFF`); // SYSm value
5327	}
5328
5329	static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
5330	// The mask operand contains the special register (R Bit) in bit 4, whether
5331	// the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5332	// bits 3-0 contains the fields to be accessed in the special register, set by
5333	// the flags provided with the register.
5334	int Mask = `0`;
5335	if (Reg == "apsr") {
5336	// The flags permitted for apsr are the same flags that are allowed in
5337	// M class registers. We get the flag value and then shift the flags into
5338	// the correct place to combine with the mask.
5339	Mask = getMClassFlagsMask(Flags);
5340	if (Mask == -`1`)
5341	return -`1`;
5342	return Mask << `2`;
5343	}
5344
5345	if (Reg != "cpsr" && Reg != "spsr") {
5346	return -`1`;
5347	}
5348
5349	// This is the same as if the flags were "fc"
5350	if (Flags.empty() \|\| Flags == "all")
5351	return Mask \| `0x9`;
5352
5353	// Inspect the supplied flags string and set the bits in the mask for
5354	// the relevant and valid flags allowed for cpsr and spsr.
5355	for (char Flag : Flags) {
5356	int FlagVal;
5357	switch (Flag) {
5358	case `'c'`:
5359	FlagVal = `0x1`;
5360	break;
5361	case `'x'`:
5362	FlagVal = `0x2`;
5363	break;
5364	case `'s'`:
5365	FlagVal = `0x4`;
5366	break;
5367	case `'f'`:
5368	FlagVal = `0x8`;
5369	break;
5370	default:
5371	FlagVal = `0`;
5372	}
5373
5374	// This avoids allowing strings where the same flag bit appears twice.
5375	if (!FlagVal \|\| (Mask & FlagVal))
5376	return -`1`;
5377	Mask \|= FlagVal;
5378	}
5379
5380	// If the register is spsr then we need to set the R bit.
5381	if (Reg == "spsr")
5382	Mask \|= `0x10`;
5383
5384	return Mask;
5385	}
5386
5387	// Lower the read_register intrinsic to ARM specific DAG nodes
5388	// using the supplied metadata string to select the instruction node to use
5389	// and the registers/masks to construct as operands for the node.
5390	bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5391	const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: `1`));
5392	const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: `0`));
5393	bool IsThumb2 = Subtarget->isThumb2();
5394	SDLoc DL(N);
5395
5396	std::vector<SDValue> Ops;
5397	getIntOperandsFromRegisterString(RegString: RegString->getString(), CurDAG, DL, Ops);
5398
5399	if (!Ops.empty()) {
5400	// If the special register string was constructed of fields (as defined
5401	// in the ACLE) then need to lower to MRC node (32 bit) or
5402	// MRRC node(64 bit), we can make the distinction based on the number of
5403	// operands we have.
5404	unsigned Opcode;
5405	SmallVector<EVT, `3`> ResTypes;
5406	if (Ops.size() == `5`){
5407	Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5408	ResTypes.append(IL: { MVT::i32, MVT::Other });
5409	} else {
5410	assert(Ops.size() == `3` &&
5411	"Invalid number of fields in special register string.");
5412	Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5413	ResTypes.append(IL: { MVT::i32, MVT::i32, MVT::Other });
5414	}
5415
5416	Ops.push_back(x: getAL(CurDAG, dl: DL));
5417	Ops.push_back(x: CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
5418	Ops.push_back(x: N->getOperand(Num: `0`));
5419	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, ResultTys: ResTypes, Ops));
5420	return true;
5421	}
5422
5423	std::string SpecialReg = RegString->getString().lower();
5424
5425	int BankedReg = getBankedRegisterMask(RegString: SpecialReg);
5426	if (BankedReg != -`1`) {
5427	Ops = { CurDAG->getTargetConstant(Val: BankedReg, DL, VT: MVT::i32),
5428	getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5429	N->getOperand(Num: `0`) };
5430	ReplaceNode(
5431	F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5432	dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5433	return true;
5434	}
5435
5436	// The VFP registers are read by creating SelectionDAG nodes with opcodes
5437	// corresponding to the register that is being read from. So we switch on the
5438	// string to find which opcode we need to use.
5439	unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5440	.Case(S: "fpscr", Value: ARM::VMRS)
5441	.Case(S: "fpexc", Value: ARM::VMRS_FPEXC)
5442	.Case(S: "fpsid", Value: ARM::VMRS_FPSID)
5443	.Case(S: "mvfr0", Value: ARM::VMRS_MVFR0)
5444	.Case(S: "mvfr1", Value: ARM::VMRS_MVFR1)
5445	.Case(S: "mvfr2", Value: ARM::VMRS_MVFR2)
5446	.Case(S: "fpinst", Value: ARM::VMRS_FPINST)
5447	.Case(S: "fpinst2", Value: ARM::VMRS_FPINST2)
5448	.Default(Value: `0`);
5449
5450	// If an opcode was found then we can lower the read to a VFP instruction.
5451	if (Opcode) {
5452	if (!Subtarget->hasVFP2Base())
5453	return false;
5454	if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5455	return false;
5456
5457	Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5458	N->getOperand(Num: `0`) };
5459	ReplaceNode(F: N,
5460	T: CurDAG->getMachineNode(Opcode, dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5461	return true;
5462	}
5463
5464	// If the target is M Class then need to validate that the register string
5465	// is an acceptable value, so check that a mask can be constructed from the
5466	// string.
5467	if (Subtarget->isMClass()) {
5468	int SYSmValue = getMClassRegisterMask(Reg: SpecialReg, Subtarget);
5469	if (SYSmValue == -`1`)
5470	return false;
5471
5472	SDValue Ops[] = { CurDAG->getTargetConstant(Val: SYSmValue, DL, VT: MVT::i32),
5473	getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5474	N->getOperand(Num: `0`) };
5475	ReplaceNode(
5476	F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2MRS_M, dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5477	return true;
5478	}
5479
5480	// Here we know the target is not M Class so we need to check if it is one
5481	// of the remaining possible values which are apsr, cpsr or spsr.
5482	if (SpecialReg == "apsr" \|\| SpecialReg == "cpsr") {
5483	Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5484	N->getOperand(Num: `0`) };
5485	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5486	dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5487	return true;
5488	}
5489
5490	if (SpecialReg == "spsr") {
5491	Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5492	N->getOperand(Num: `0`) };
5493	ReplaceNode(
5494	F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, dl: DL,
5495	VT1: MVT::i32, VT2: MVT::Other, Ops));
5496	return true;
5497	}
5498
5499	return false;
5500	}
5501
5502	// Lower the write_register intrinsic to ARM specific DAG nodes
5503	// using the supplied metadata string to select the instruction node to use
5504	// and the registers/masks to use in the nodes
5505	bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5506	const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: `1`));
5507	const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: `0`));
5508	bool IsThumb2 = Subtarget->isThumb2();
5509	SDLoc DL(N);
5510
5511	std::vector<SDValue> Ops;
5512	getIntOperandsFromRegisterString(RegString: RegString->getString(), CurDAG, DL, Ops);
5513
5514	if (!Ops.empty()) {
5515	// If the special register string was constructed of fields (as defined
5516	// in the ACLE) then need to lower to MCR node (32 bit) or
5517	// MCRR node(64 bit), we can make the distinction based on the number of
5518	// operands we have.
5519	unsigned Opcode;
5520	if (Ops.size() == `5`) {
5521	Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5522	Ops.insert(position: Ops.begin()+`2`, x: N->getOperand(Num: `2`));
5523	} else {
5524	assert(Ops.size() == `3` &&
5525	"Invalid number of fields in special register string.");
5526	Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5527	SDValue WriteValue[] = { N->getOperand(Num: `2`), N->getOperand(Num: `3`) };
5528	Ops.insert(position: Ops.begin()+`2`, first: WriteValue, last: WriteValue+`2`);
5529	}
5530
5531	Ops.push_back(x: getAL(CurDAG, dl: DL));
5532	Ops.push_back(x: CurDAG->getRegister(Reg: `0`, VT: MVT::i32));
5533	Ops.push_back(x: N->getOperand(Num: `0`));
5534
5535	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Other, Ops));
5536	return true;
5537	}
5538
5539	std::string SpecialReg = RegString->getString().lower();
5540	int BankedReg = getBankedRegisterMask(RegString: SpecialReg);
5541	if (BankedReg != -`1`) {
5542	Ops = { CurDAG->getTargetConstant(Val: BankedReg, DL, VT: MVT::i32), N->getOperand(Num: `2`),
5543	getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5544	N->getOperand(Num: `0`) };
5545	ReplaceNode(
5546	F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5547	dl: DL, VT: MVT::Other, Ops));
5548	return true;
5549	}
5550
5551	// The VFP registers are written to by creating SelectionDAG nodes with
5552	// opcodes corresponding to the register that is being written. So we switch
5553	// on the string to find which opcode we need to use.
5554	unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5555	.Case(S: "fpscr", Value: ARM::VMSR)
5556	.Case(S: "fpexc", Value: ARM::VMSR_FPEXC)
5557	.Case(S: "fpsid", Value: ARM::VMSR_FPSID)
5558	.Case(S: "fpinst", Value: ARM::VMSR_FPINST)
5559	.Case(S: "fpinst2", Value: ARM::VMSR_FPINST2)
5560	.Default(Value: `0`);
5561
5562	if (Opcode) {
5563	if (!Subtarget->hasVFP2Base())
5564	return false;
5565	Ops = { N->getOperand(Num: `2`), getAL(CurDAG, dl: DL),
5566	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), N->getOperand(Num: `0`) };
5567	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Other, Ops));
5568	return true;
5569	}
5570
5571	std::pair<StringRef, StringRef> Fields;
5572	Fields = StringRef (SpecialReg).rsplit(Separator: `'_'`);
5573	std::string Reg = Fields.first.str();
5574	StringRef Flags = Fields.second;
5575
5576	// If the target was M Class then need to validate the special register value
5577	// and retrieve the mask for use in the instruction node.
5578	if (Subtarget->isMClass()) {
5579	int SYSmValue = getMClassRegisterMask(Reg: SpecialReg, Subtarget);
5580	if (SYSmValue == -`1`)
5581	return false;
5582
5583	SDValue Ops[] = { CurDAG->getTargetConstant(Val: SYSmValue, DL, VT: MVT::i32),
5584	N->getOperand(Num: `2`), getAL(CurDAG, dl: DL),
5585	CurDAG->getRegister(Reg: `0`, VT: MVT::i32), N->getOperand(Num: `0`) };
5586	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2MSR_M, dl: DL, VT: MVT::Other, Ops));
5587	return true;
5588	}
5589
5590	// We then check to see if a valid mask can be constructed for one of the
5591	// register string values permitted for the A and R class cores. These values
5592	// are apsr, spsr and cpsr; these are also valid on older cores.
5593	int Mask = getARClassRegisterMask(Reg, Flags);
5594	if (Mask != -`1`) {
5595	Ops = { CurDAG->getTargetConstant(Val: Mask, DL, VT: MVT::i32), N->getOperand(Num: `2`),
5596	getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: `0`, VT: MVT::i32),
5597	N->getOperand(Num: `0`) };
5598	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5599	dl: DL, VT: MVT::Other, Ops));
5600	return true;
5601	}
5602
5603	return false;
5604	}
5605
5606	bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5607	std::vector<SDValue> AsmNodeOperands;
5608	InlineAsm::Flag Flag;
5609	bool Changed = false;
5610	unsigned NumOps = N->getNumOperands();
5611
5612	// Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5613	// However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5614	// (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5615	// respectively. Since there is no constraint to explicitly specify a
5616	// reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5617	// the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5618	// them into a GPRPair.
5619
5620	SDLoc dl(N);
5621	SDValue Glue = N->getGluedNode() ? N->getOperand(Num: NumOps - `1`) : SDValue ();
5622
5623	SmallVector<bool, `8`> OpChanged;
5624	// Glue node will be appended late.
5625	for(unsigned i = `0`, e = N->getGluedNode() ? NumOps - `1` : NumOps; i < e; ++i) {
5626	SDValue op = N->getOperand(Num: i);
5627	AsmNodeOperands.push_back(x: op);
5628
5629	if (i < InlineAsm::Op_FirstOperand)
5630	continue;
5631
5632	if (const auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: i)))
5633	Flag = InlineAsm::Flag (C->getZExtValue());
5634	else
5635	continue;
5636
5637	// Immediate operands to inline asm in the SelectionDAG are modeled with
5638	// two operands. The first is a constant of value InlineAsm::Kind::Imm, and
5639	// the second is a constant with the value of the immediate. If we get here
5640	// and we have a Kind::Imm, skip the next operand, and continue.
5641	if (Flag.isImmKind()) {
5642	SDValue op = N->getOperand(Num: ++i);
5643	AsmNodeOperands.push_back(x: op);
5644	continue;
5645	}
5646
5647	const unsigned NumRegs = Flag.getNumOperandRegisters();
5648	if (NumRegs)
5649	OpChanged.push_back(Elt: false);
5650
5651	unsigned DefIdx = `0`;
5652	bool IsTiedToChangedOp = false;
5653	// If it's a use that is tied with a previous def, it has no
5654	// reg class constraint.
5655	if (Changed && Flag.isUseOperandTiedToDef(Idx&: DefIdx))
5656	IsTiedToChangedOp = OpChanged [DefIdx];
5657
5658	// Memory operands to inline asm in the SelectionDAG are modeled with two
5659	// operands: a constant of value InlineAsm::Kind::Mem followed by the input
5660	// operand. If we get here and we have a Kind::Mem, skip the next operand
5661	// (so it doesn't get misinterpreted), and continue. We do this here because
5662	// it's important to update the OpChanged array correctly before moving on.
5663	if (Flag.isMemKind()) {
5664	SDValue op = N->getOperand(Num: ++i);
5665	AsmNodeOperands.push_back(x: op);
5666	continue;
5667	}
5668
5669	if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
5670	!Flag.isRegDefEarlyClobberKind())
5671	continue;
5672
5673	unsigned RC;
5674	const bool HasRC = Flag.hasRegClassConstraint(RC);
5675	if ((!IsTiedToChangedOp && (!HasRC \|\| RC != ARM::GPRRegClassID))
5676	\|\| NumRegs != `2`)
5677	continue;
5678
5679	assert((i+`2` < NumOps) && "Invalid number of operands in inline asm");
5680	SDValue V0 = N->getOperand(Num: i+`1`);
5681	SDValue V1 = N->getOperand(Num: i+`2`);
5682	Register Reg0 = cast<RegisterSDNode>(Val&: V0)->getReg();
5683	Register Reg1 = cast<RegisterSDNode>(Val&: V1)->getReg();
5684	SDValue PairedReg;
5685	MachineRegisterInfo &MRI = MF->getRegInfo();
5686
5687	if (Flag.isRegDefKind() \|\| Flag.isRegDefEarlyClobberKind()) {
5688	// Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5689	// the original GPRs.
5690
5691	Register GPVR = MRI.createVirtualRegister(RegClass: &ARM::GPRPairRegClass);
5692	PairedReg = CurDAG->getRegister(Reg: GPVR, VT: MVT::Untyped);
5693	SDValue Chain = SDValue (N,`0`);
5694
5695	SDNode *GU = N->getGluedUser();
5696	SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, Reg: GPVR, VT: MVT::Untyped,
5697	Glue: Chain.getValue(R: `1`));
5698
5699	// Extract values from a GPRPair reg and copy to the original GPR reg.
5700	SDValue Sub0 = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_0, DL: dl, VT: MVT::i32,
5701	Operand: RegCopy);
5702	SDValue Sub1 = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_1, DL: dl, VT: MVT::i32,
5703	Operand: RegCopy);
5704	SDValue T0 = CurDAG->getCopyToReg(Chain: Sub0, dl, Reg: Reg0, N: Sub0,
5705	Glue: RegCopy.getValue(R: `1`));
5706	SDValue T1 = CurDAG->getCopyToReg(Chain: Sub1, dl, Reg: Reg1, N: Sub1, Glue: T0.getValue(R: `1`));
5707
5708	// Update the original glue user.
5709	std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-`1`);
5710	Ops.push_back(x: T1.getValue(R: `1`));
5711	CurDAG->UpdateNodeOperands(N: GU, Ops);
5712	} else {
5713	// For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
5714	// GPRPair and then pass the GPRPair to the inline asm.
5715	SDValue Chain = AsmNodeOperands [InlineAsm::Op_InputChain];
5716
5717	// As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5718	SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg: Reg0, VT: MVT::i32,
5719	Glue: Chain.getValue(R: `1`));
5720	SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg: Reg1, VT: MVT::i32,
5721	Glue: T0.getValue(R: `1`));
5722	SDValue Pair = SDValue (createGPRPairNode(VT: MVT::Untyped, V0: T0, V1: T1), `0`);
5723
5724	// Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5725	// i32 VRs of inline asm with it.
5726	Register GPVR = MRI.createVirtualRegister(RegClass: &ARM::GPRPairRegClass);
5727	PairedReg = CurDAG->getRegister(Reg: GPVR, VT: MVT::Untyped);
5728	Chain = CurDAG->getCopyToReg(Chain: T1, dl, Reg: GPVR, N: Pair, Glue: T1.getValue(R: `1`));
5729
5730	AsmNodeOperands [InlineAsm::Op_InputChain] = Chain;
5731	Glue = Chain.getValue(R: `1`);
5732	}
5733
5734	Changed = true;
5735
5736	if(PairedReg.getNode()) {
5737	OpChanged [OpChanged.size() -`1` ] = true;
5738	Flag = InlineAsm::Flag (Flag.getKind(), `1` / RegNum/);
5739	if (IsTiedToChangedOp)
5740	Flag.setMatchingOp(DefIdx);
5741	else
5742	Flag.setRegClass(ARM::GPRPairRegClassID);
5743	// Replace the current flag.
5744	AsmNodeOperands [AsmNodeOperands.size() -`1`] = CurDAG->getTargetConstant(
5745	Val: Flag, DL: dl, VT: MVT::i32);
5746	// Add the new register node and skip the original two GPRs.
5747	AsmNodeOperands.push_back(x: PairedReg);
5748	// Skip the next two GPRs.
5749	i += `2`;
5750	}
5751	}
5752
5753	if (Glue.getNode())
5754	AsmNodeOperands.push_back(x: Glue);
5755	if (!Changed)
5756	return false;
5757
5758	SDValue New = CurDAG->getNode(Opcode: N->getOpcode(), DL: SDLoc (N),
5759	VTList: CurDAG->getVTList(VT1: MVT::Other, VT2: MVT::Glue), Ops: AsmNodeOperands);
5760	New ->setNodeId(-`1`);
5761	ReplaceNode(F: N, T: New.getNode());
5762	return true;
5763	}
5764
5765	bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
5766	const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
5767	std::vector<SDValue> &OutOps) {
5768	switch(ConstraintID) {
5769	default:
5770	llvm_unreachable("Unexpected asm memory constraint");
5771	case InlineAsm::ConstraintCode::m:
5772	case InlineAsm::ConstraintCode::o:
5773	case InlineAsm::ConstraintCode::Q:
5774	case InlineAsm::ConstraintCode::Um:
5775	case InlineAsm::ConstraintCode::Un:
5776	case InlineAsm::ConstraintCode::Uq:
5777	case InlineAsm::ConstraintCode::Us:
5778	case InlineAsm::ConstraintCode::Ut:
5779	case InlineAsm::ConstraintCode::Uv:
5780	case InlineAsm::ConstraintCode::Uy:
5781	// Require the address to be in a register. That is safe for all ARM
5782	// variants and it is hard to do anything much smarter without knowing
5783	// how the operand is used.
5784	OutOps.push_back(x: Op);
5785	return false;
5786	}
5787	return true;
5788	}
5789
5790	/// createARMISelDag - This pass converts a legalized DAG into a
5791	/// ARM-specific DAG, ready for instruction scheduling.
5792	///
5793	FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
5794	CodeGenOptLevel OptLevel) {
5795	return new ARMDAGToDAGISelLegacy (TM, OptLevel);
5796	}
5797

Browse the source code of llvm_projects/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp