1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the ARM target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARM.h"
14#include "ARMBaseInstrInfo.h"
15#include "ARMTargetMachine.h"
16#include "MCTargetDesc/ARMAddressingModes.h"
17#include "Utils/ARMBaseInfo.h"
18#include "llvm/ADT/APSInt.h"
19#include "llvm/ADT/StringSwitch.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/SelectionDAG.h"
25#include "llvm/CodeGen/SelectionDAGISel.h"
26#include "llvm/CodeGen/TargetLowering.h"
27#include "llvm/IR/Constants.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/LLVMContext.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Target/TargetOptions.h"
36#include <optional>
37
38using namespace llvm;
39
40#define DEBUG_TYPE "arm-isel"
41#define PASS_NAME "ARM Instruction Selection"
42
43static cl::opt<bool>
44DisableShifterOp("disable-shifter-op", cl::Hidden,
45 cl::desc("Disable isel of shifter-op"),
46 cl::init(Val: false));
47
48//===--------------------------------------------------------------------===//
49/// ARMDAGToDAGISel - ARM specific code to select ARM machine
50/// instructions for SelectionDAG operations.
51///
52namespace {
53
54class ARMDAGToDAGISel : public SelectionDAGISel {
55 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
56 /// make the right decision when generating code for different targets.
57 const ARMSubtarget *Subtarget;
58
59public:
60 ARMDAGToDAGISel() = delete;
61
62 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
63 : SelectionDAGISel(tm, OptLevel) {}
64
65 bool runOnMachineFunction(MachineFunction &MF) override {
66 // Reset the subtarget each time through.
67 Subtarget = &MF.getSubtarget<ARMSubtarget>();
68 SelectionDAGISel::runOnMachineFunction(mf&: MF);
69 return true;
70 }
71
72 void PreprocessISelDAG() override;
73
74 /// getI32Imm - Return a target constant of type i32 with the specified
75 /// value.
76 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
77 return CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32);
78 }
79
80 void Select(SDNode *N) override;
81
82 /// Return true as some complex patterns, like those that call
83 /// canExtractShiftFromMul can modify the DAG inplace.
84 bool ComplexPatternFuncMutatesDAG() const override { return true; }
85
86 bool hasNoVMLxHazardUse(SDNode *N) const;
87 bool isShifterOpProfitable(const SDValue &Shift,
88 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
89 bool SelectRegShifterOperand(SDValue N, SDValue &A,
90 SDValue &B, SDValue &C,
91 bool CheckProfitability = true);
92 bool SelectImmShifterOperand(SDValue N, SDValue &A,
93 SDValue &B, bool CheckProfitability = true);
94 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
95 SDValue &C) {
96 // Don't apply the profitability check
97 return SelectRegShifterOperand(N, A, B, C, CheckProfitability: false);
98 }
99 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
100 // Don't apply the profitability check
101 return SelectImmShifterOperand(N, A, B, CheckProfitability: false);
102 }
103 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
104 if (!N.hasOneUse())
105 return false;
106 return SelectImmShifterOperand(N, A, B, CheckProfitability: false);
107 }
108
109 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
110
111 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
112 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
113
114 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
115 SDValue &Offset, SDValue &Opc);
116 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
117 SDValue &Offset, SDValue &Opc);
118 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
119 SDValue &Offset, SDValue &Opc);
120 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
121 bool SelectAddrMode3(SDValue N, SDValue &Base,
122 SDValue &Offset, SDValue &Opc);
123 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
124 SDValue &Offset, SDValue &Opc);
125 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
126 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
127 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
128 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
129 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
130
131 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
132
133 // Thumb Addressing Modes:
134 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
135 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
136 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
137 SDValue &OffImm);
138 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
139 SDValue &OffImm);
140 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
141 SDValue &OffImm);
142 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
143 SDValue &OffImm);
144 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
145 template <unsigned Shift>
146 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
147
148 // Thumb 2 Addressing Modes:
149 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
150 template <unsigned Shift>
151 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
152 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
153 SDValue &OffImm);
154 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
155 SDValue &OffImm);
156 template <unsigned Shift>
157 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
158 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
159 unsigned Shift);
160 template <unsigned Shift>
161 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
162 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
163 SDValue &OffReg, SDValue &ShImm);
164 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
165
166 template<int Min, int Max>
167 bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
168
169 inline bool is_so_imm(unsigned Imm) const {
170 return ARM_AM::getSOImmVal(Arg: Imm) != -1;
171 }
172
173 inline bool is_so_imm_not(unsigned Imm) const {
174 return ARM_AM::getSOImmVal(Arg: ~Imm) != -1;
175 }
176
177 inline bool is_t2_so_imm(unsigned Imm) const {
178 return ARM_AM::getT2SOImmVal(Arg: Imm) != -1;
179 }
180
181 inline bool is_t2_so_imm_not(unsigned Imm) const {
182 return ARM_AM::getT2SOImmVal(Arg: ~Imm) != -1;
183 }
184
185 // Include the pieces autogenerated from the target description.
186#include "ARMGenDAGISel.inc"
187
188private:
189 void transferMemOperands(SDNode *Src, SDNode *Dst);
190
191 /// Indexed (pre/post inc/dec) load matching code for ARM.
192 bool tryARMIndexedLoad(SDNode *N);
193 bool tryT1IndexedLoad(SDNode *N);
194 bool tryT2IndexedLoad(SDNode *N);
195 bool tryMVEIndexedLoad(SDNode *N);
196 bool tryFMULFixed(SDNode *N, SDLoc dl);
197 bool tryFP_TO_INT(SDNode *N, SDLoc dl);
198 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
199 bool IsUnsigned,
200 bool FixedToFloat);
201
202 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
203 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
204 /// loads of D registers and even subregs and odd subregs of Q registers.
205 /// For NumVecs <= 2, QOpcodes1 is not used.
206 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
207 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
208 const uint16_t *QOpcodes1);
209
210 /// SelectVST - Select NEON store intrinsics. NumVecs should
211 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
212 /// stores of D registers and even subregs and odd subregs of Q registers.
213 /// For NumVecs <= 2, QOpcodes1 is not used.
214 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
215 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
216 const uint16_t *QOpcodes1);
217
218 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
219 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
220 /// load/store of D registers and Q registers.
221 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
222 unsigned NumVecs, const uint16_t *DOpcodes,
223 const uint16_t *QOpcodes);
224
225 /// Helper functions for setting up clusters of MVE predication operands.
226 template <typename SDValueVector>
227 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
228 SDValue PredicateMask);
229 template <typename SDValueVector>
230 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
231 SDValue PredicateMask, SDValue Inactive);
232
233 template <typename SDValueVector>
234 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
235 template <typename SDValueVector>
236 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
237
238 /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
239 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
240
241 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
242 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
243 bool HasSaturationOperand);
244
245 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
246 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
247 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
248
249 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
250 /// vector lanes.
251 void SelectMVE_VSHLC(SDNode *N, bool Predicated);
252
253 /// Select long MVE vector reductions with two vector operands
254 /// Stride is the number of vector element widths the instruction can operate
255 /// on:
256 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
257 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
258 /// Stride is used when addressing the OpcodesS array which contains multiple
259 /// opcodes for each element width.
260 /// TySize is the index into the list of element types listed above
261 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
262 const uint16_t *OpcodesS, const uint16_t *OpcodesU,
263 size_t Stride, size_t TySize);
264
265 /// Select a 64-bit MVE vector reduction with two vector operands
266 /// arm_mve_vmlldava_[predicated]
267 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
268 const uint16_t *OpcodesU);
269 /// Select a 72-bit MVE vector rounding reduction with two vector operands
270 /// int_arm_mve_vrmlldavha[_predicated]
271 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
272 const uint16_t *OpcodesU);
273
274 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
275 /// should be 2 or 4. The opcode array specifies the instructions
276 /// used for 8, 16 and 32-bit lane sizes respectively, and each
277 /// pointer points to a set of NumVecs sub-opcodes used for the
278 /// different stages (e.g. VLD20 versus VLD21) of each load family.
279 void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
280 const uint16_t *const *Opcodes, bool HasWriteback);
281
282 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
283 /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
284 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
285 bool Wrapping, bool Predicated);
286
287 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
288 /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
289 /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
290 /// the accumulator and the immediate operand, i.e. 0
291 /// for CX1*, 1 for CX2*, 2 for CX3*
292 /// \arg \c HasAccum whether the instruction has an accumulator operand
293 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
294 bool HasAccum);
295
296 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
297 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
298 /// for loading D registers.
299 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
300 unsigned NumVecs, const uint16_t *DOpcodes,
301 const uint16_t *QOpcodes0 = nullptr,
302 const uint16_t *QOpcodes1 = nullptr);
303
304 /// Try to select SBFX/UBFX instructions for ARM.
305 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
306
307 bool tryInsertVectorElt(SDNode *N);
308
309 // Select special operations if node forms integer ABS pattern
310 bool tryABSOp(SDNode *N);
311
312 bool tryReadRegister(SDNode *N);
313 bool tryWriteRegister(SDNode *N);
314
315 bool tryInlineAsm(SDNode *N);
316
317 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
318
319 void SelectCMP_SWAP(SDNode *N);
320
321 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
322 /// inline asm expressions.
323 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
324 InlineAsm::ConstraintCode ConstraintID,
325 std::vector<SDValue> &OutOps) override;
326
327 // Form pairs of consecutive R, S, D, or Q registers.
328 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
329 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
330 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
331 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
332
333 // Form sequences of 4 consecutive S, D, or Q registers.
334 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
335 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
336 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
337
338 // Get the alignment operand for a NEON VLD or VST instruction.
339 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
340 bool is64BitVector);
341
342 /// Checks if N is a multiplication by a constant where we can extract out a
343 /// power of two from the constant so that it can be used in a shift, but only
344 /// if it simplifies the materialization of the constant. Returns true if it
345 /// is, and assigns to PowerOfTwo the power of two that should be extracted
346 /// out and to NewMulConst the new constant to be multiplied by.
347 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
348 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
349
350 /// Replace N with M in CurDAG, in a way that also ensures that M gets
351 /// selected when N would have been selected.
352 void replaceDAGValue(const SDValue &N, SDValue M);
353};
354
355class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
356public:
357 static char ID;
358 ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
359 : SelectionDAGISelLegacy(
360 ID, std::make_unique<ARMDAGToDAGISel>(args&: tm, args&: OptLevel)) {}
361};
362}
363
364char ARMDAGToDAGISelLegacy::ID = 0;
365
366INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
367
368/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
369/// operand. If so Imm will receive the 32-bit value.
370static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
371 if (N->getOpcode() == ISD::Constant && N->getValueType(ResNo: 0) == MVT::i32) {
372 Imm = N->getAsZExtVal();
373 return true;
374 }
375 return false;
376}
377
378// isInt32Immediate - This method tests to see if a constant operand.
379// If so Imm will receive the 32 bit value.
380static bool isInt32Immediate(SDValue N, unsigned &Imm) {
381 return isInt32Immediate(N: N.getNode(), Imm);
382}
383
384// isOpcWithIntImmediate - This method tests to see if the node is a specific
385// opcode and that it has a immediate integer right operand.
386// If so Imm will receive the 32 bit value.
387static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
388 return N->getOpcode() == Opc &&
389 isInt32Immediate(N: N->getOperand(Num: 1).getNode(), Imm);
390}
391
392/// Check whether a particular node is a constant value representable as
393/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
394///
395/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
396static bool isScaledConstantInRange(SDValue Node, int Scale,
397 int RangeMin, int RangeMax,
398 int &ScaledConstant) {
399 assert(Scale > 0 && "Invalid scale!");
400
401 // Check that this is a constant.
402 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: Node);
403 if (!C)
404 return false;
405
406 ScaledConstant = (int) C->getZExtValue();
407 if ((ScaledConstant % Scale) != 0)
408 return false;
409
410 ScaledConstant /= Scale;
411 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
412}
413
414void ARMDAGToDAGISel::PreprocessISelDAG() {
415 if (!Subtarget->hasV6T2Ops())
416 return;
417
418 bool isThumb2 = Subtarget->isThumb();
419 // We use make_early_inc_range to avoid invalidation issues.
420 for (SDNode &N : llvm::make_early_inc_range(Range: CurDAG->allnodes())) {
421 if (N.getOpcode() != ISD::ADD)
422 continue;
423
424 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
425 // leading zeros, followed by consecutive set bits, followed by 1 or 2
426 // trailing zeros, e.g. 1020.
427 // Transform the expression to
428 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
429 // of trailing zeros of c2. The left shift would be folded as an shifter
430 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
431 // node (UBFX).
432
433 SDValue N0 = N.getOperand(Num: 0);
434 SDValue N1 = N.getOperand(Num: 1);
435 unsigned And_imm = 0;
436 if (!isOpcWithIntImmediate(N: N1.getNode(), Opc: ISD::AND, Imm&: And_imm)) {
437 if (isOpcWithIntImmediate(N: N0.getNode(), Opc: ISD::AND, Imm&: And_imm))
438 std::swap(a&: N0, b&: N1);
439 }
440 if (!And_imm)
441 continue;
442
443 // Check if the AND mask is an immediate of the form: 000.....1111111100
444 unsigned TZ = llvm::countr_zero(Val: And_imm);
445 if (TZ != 1 && TZ != 2)
446 // Be conservative here. Shifter operands aren't always free. e.g. On
447 // Swift, left shifter operand of 1 / 2 for free but others are not.
448 // e.g.
449 // ubfx r3, r1, #16, #8
450 // ldr.w r3, [r0, r3, lsl #2]
451 // vs.
452 // mov.w r9, #1020
453 // and.w r2, r9, r1, lsr #14
454 // ldr r2, [r0, r2]
455 continue;
456 And_imm >>= TZ;
457 if (And_imm & (And_imm + 1))
458 continue;
459
460 // Look for (and (srl X, c1), c2).
461 SDValue Srl = N1.getOperand(i: 0);
462 unsigned Srl_imm = 0;
463 if (!isOpcWithIntImmediate(N: Srl.getNode(), Opc: ISD::SRL, Imm&: Srl_imm) ||
464 (Srl_imm <= 2))
465 continue;
466
467 // Make sure first operand is not a shifter operand which would prevent
468 // folding of the left shift.
469 SDValue CPTmp0;
470 SDValue CPTmp1;
471 SDValue CPTmp2;
472 if (isThumb2) {
473 if (SelectImmShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1))
474 continue;
475 } else {
476 if (SelectImmShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1) ||
477 SelectRegShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1, C&: CPTmp2))
478 continue;
479 }
480
481 // Now make the transformation.
482 Srl = CurDAG->getNode(Opcode: ISD::SRL, DL: SDLoc(Srl), VT: MVT::i32,
483 N1: Srl.getOperand(i: 0),
484 N2: CurDAG->getConstant(Val: Srl_imm + TZ, DL: SDLoc(Srl),
485 VT: MVT::i32));
486 N1 = CurDAG->getNode(Opcode: ISD::AND, DL: SDLoc(N1), VT: MVT::i32,
487 N1: Srl,
488 N2: CurDAG->getConstant(Val: And_imm, DL: SDLoc(Srl), VT: MVT::i32));
489 N1 = CurDAG->getNode(Opcode: ISD::SHL, DL: SDLoc(N1), VT: MVT::i32,
490 N1, N2: CurDAG->getConstant(Val: TZ, DL: SDLoc(Srl), VT: MVT::i32));
491 CurDAG->UpdateNodeOperands(N: &N, Op1: N0, Op2: N1);
492 }
493}
494
495/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
496/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
497/// least on current ARM implementations) which should be avoidded.
498bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
499 if (OptLevel == CodeGenOptLevel::None)
500 return true;
501
502 if (!Subtarget->hasVMLxHazards())
503 return true;
504
505 if (!N->hasOneUse())
506 return false;
507
508 SDNode *User = *N->user_begin();
509 if (User->getOpcode() == ISD::CopyToReg)
510 return true;
511 if (User->isMachineOpcode()) {
512 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
513 CurDAG->getSubtarget().getInstrInfo());
514
515 const MCInstrDesc &MCID = TII->get(Opcode: User->getMachineOpcode());
516 if (MCID.mayStore())
517 return true;
518 unsigned Opcode = MCID.getOpcode();
519 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
520 return true;
521 // vmlx feeding into another vmlx. We actually want to unfold
522 // the use later in the MLxExpansion pass. e.g.
523 // vmla
524 // vmla (stall 8 cycles)
525 //
526 // vmul (5 cycles)
527 // vadd (5 cycles)
528 // vmla
529 // This adds up to about 18 - 19 cycles.
530 //
531 // vmla
532 // vmul (stall 4 cycles)
533 // vadd adds up to about 14 cycles.
534 return TII->isFpMLxInstruction(Opcode);
535 }
536
537 return false;
538}
539
540bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
541 ARM_AM::ShiftOpc ShOpcVal,
542 unsigned ShAmt) {
543 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
544 return true;
545 if (Shift.hasOneUse())
546 return true;
547 // R << 2 is free.
548 return ShOpcVal == ARM_AM::lsl &&
549 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
550}
551
552bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
553 unsigned MaxShift,
554 unsigned &PowerOfTwo,
555 SDValue &NewMulConst) const {
556 assert(N.getOpcode() == ISD::MUL);
557 assert(MaxShift > 0);
558
559 // If the multiply is used in more than one place then changing the constant
560 // will make other uses incorrect, so don't.
561 if (!N.hasOneUse()) return false;
562 // Check if the multiply is by a constant
563 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
564 if (!MulConst) return false;
565 // If the constant is used in more than one place then modifying it will mean
566 // we need to materialize two constants instead of one, which is a bad idea.
567 if (!MulConst->hasOneUse()) return false;
568 unsigned MulConstVal = MulConst->getZExtValue();
569 if (MulConstVal == 0) return false;
570
571 // Find the largest power of 2 that MulConstVal is a multiple of
572 PowerOfTwo = MaxShift;
573 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
574 --PowerOfTwo;
575 if (PowerOfTwo == 0) return false;
576 }
577
578 // Only optimise if the new cost is better
579 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
580 NewMulConst = CurDAG->getConstant(Val: NewMulConstVal, DL: SDLoc(N), VT: MVT::i32);
581 unsigned OldCost = ConstantMaterializationCost(Val: MulConstVal, Subtarget);
582 unsigned NewCost = ConstantMaterializationCost(Val: NewMulConstVal, Subtarget);
583 return NewCost < OldCost;
584}
585
586void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
587 CurDAG->RepositionNode(Position: N.getNode()->getIterator(), N: M.getNode());
588 ReplaceUses(F: N, T: M);
589}
590
591bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
592 SDValue &BaseReg,
593 SDValue &Opc,
594 bool CheckProfitability) {
595 if (DisableShifterOp)
596 return false;
597
598 // If N is a multiply-by-constant and it's profitable to extract a shift and
599 // use it in a shifted operand do so.
600 if (N.getOpcode() == ISD::MUL) {
601 unsigned PowerOfTwo = 0;
602 SDValue NewMulConst;
603 if (canExtractShiftFromMul(N, MaxShift: 31, PowerOfTwo, NewMulConst)) {
604 HandleSDNode Handle(N);
605 SDLoc Loc(N);
606 replaceDAGValue(N: N.getOperand(i: 1), M: NewMulConst);
607 BaseReg = Handle.getValue();
608 Opc = CurDAG->getTargetConstant(
609 Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: PowerOfTwo), DL: Loc, VT: MVT::i32);
610 return true;
611 }
612 }
613
614 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
615
616 // Don't match base register only case. That is matched to a separate
617 // lower complexity pattern with explicit register operand.
618 if (ShOpcVal == ARM_AM::no_shift) return false;
619
620 BaseReg = N.getOperand(i: 0);
621 unsigned ShImmVal = 0;
622 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
623 if (!RHS) return false;
624 ShImmVal = RHS->getZExtValue() & 31;
625 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: ShImmVal),
626 DL: SDLoc(N), VT: MVT::i32);
627 return true;
628}
629
630bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
631 SDValue &BaseReg,
632 SDValue &ShReg,
633 SDValue &Opc,
634 bool CheckProfitability) {
635 if (DisableShifterOp)
636 return false;
637
638 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
639
640 // Don't match base register only case. That is matched to a separate
641 // lower complexity pattern with explicit register operand.
642 if (ShOpcVal == ARM_AM::no_shift) return false;
643
644 BaseReg = N.getOperand(i: 0);
645 unsigned ShImmVal = 0;
646 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
647 if (RHS) return false;
648
649 ShReg = N.getOperand(i: 1);
650 if (CheckProfitability && !isShifterOpProfitable(Shift: N, ShOpcVal, ShAmt: ShImmVal))
651 return false;
652 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: ShImmVal),
653 DL: SDLoc(N), VT: MVT::i32);
654 return true;
655}
656
657// Determine whether an ISD::OR's operands are suitable to turn the operation
658// into an addition, which often has more compact encodings.
659bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
660 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
661 Out = N;
662 return CurDAG->haveNoCommonBitsSet(A: N, B: Parent->getOperand(Num: 1));
663}
664
665
666bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
667 SDValue &Base,
668 SDValue &OffImm) {
669 // Match simple R + imm12 operands.
670
671 // Base only.
672 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
673 !CurDAG->isBaseWithConstantOffset(Op: N)) {
674 if (N.getOpcode() == ISD::FrameIndex) {
675 // Match frame index.
676 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
677 Base = CurDAG->getTargetFrameIndex(
678 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
679 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
680 return true;
681 }
682
683 if (N.getOpcode() == ARMISD::Wrapper &&
684 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalAddress &&
685 N.getOperand(i: 0).getOpcode() != ISD::TargetExternalSymbol &&
686 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalTLSAddress) {
687 Base = N.getOperand(i: 0);
688 } else
689 Base = N;
690 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
691 return true;
692 }
693
694 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
695 int RHSC = (int)RHS->getSExtValue();
696 if (N.getOpcode() == ISD::SUB)
697 RHSC = -RHSC;
698
699 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
700 Base = N.getOperand(i: 0);
701 if (Base.getOpcode() == ISD::FrameIndex) {
702 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
703 Base = CurDAG->getTargetFrameIndex(
704 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
705 }
706 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32);
707 return true;
708 }
709 }
710
711 // Base only.
712 Base = N;
713 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
714 return true;
715}
716
717
718
719bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
720 SDValue &Opc) {
721 if (N.getOpcode() == ISD::MUL &&
722 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
723 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
724 // X * [3,5,9] -> X + X * [2,4,8] etc.
725 int RHSC = (int)RHS->getZExtValue();
726 if (RHSC & 1) {
727 RHSC = RHSC & ~1;
728 ARM_AM::AddrOpc AddSub = ARM_AM::add;
729 if (RHSC < 0) {
730 AddSub = ARM_AM::sub;
731 RHSC = - RHSC;
732 }
733 if (isPowerOf2_32(Value: RHSC)) {
734 unsigned ShAmt = Log2_32(Value: RHSC);
735 Base = Offset = N.getOperand(i: 0);
736 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt,
737 SO: ARM_AM::lsl),
738 DL: SDLoc(N), VT: MVT::i32);
739 return true;
740 }
741 }
742 }
743 }
744
745 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
746 // ISD::OR that is equivalent to an ISD::ADD.
747 !CurDAG->isBaseWithConstantOffset(Op: N))
748 return false;
749
750 // Leave simple R +/- imm12 operands for LDRi12
751 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
752 int RHSC;
753 if (isScaledConstantInRange(Node: N.getOperand(i: 1), /*Scale=*/1,
754 RangeMin: -0x1000+1, RangeMax: 0x1000, ScaledConstant&: RHSC)) // 12 bits.
755 return false;
756 }
757
758 // Otherwise this is R +/- [possibly shifted] R.
759 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
760 ARM_AM::ShiftOpc ShOpcVal =
761 ARM_AM::getShiftOpcForNode(Opcode: N.getOperand(i: 1).getOpcode());
762 unsigned ShAmt = 0;
763
764 Base = N.getOperand(i: 0);
765 Offset = N.getOperand(i: 1);
766
767 if (ShOpcVal != ARM_AM::no_shift) {
768 // Check to see if the RHS of the shift is a constant, if not, we can't fold
769 // it.
770 if (ConstantSDNode *Sh =
771 dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1).getOperand(i: 1))) {
772 ShAmt = Sh->getZExtValue();
773 if (isShifterOpProfitable(Shift: Offset, ShOpcVal, ShAmt))
774 Offset = N.getOperand(i: 1).getOperand(i: 0);
775 else {
776 ShAmt = 0;
777 ShOpcVal = ARM_AM::no_shift;
778 }
779 } else {
780 ShOpcVal = ARM_AM::no_shift;
781 }
782 }
783
784 // Try matching (R shl C) + (R).
785 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
786 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
787 N.getOperand(i: 0).hasOneUse())) {
788 ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOperand(i: 0).getOpcode());
789 if (ShOpcVal != ARM_AM::no_shift) {
790 // Check to see if the RHS of the shift is a constant, if not, we can't
791 // fold it.
792 if (ConstantSDNode *Sh =
793 dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 0).getOperand(i: 1))) {
794 ShAmt = Sh->getZExtValue();
795 if (isShifterOpProfitable(Shift: N.getOperand(i: 0), ShOpcVal, ShAmt)) {
796 Offset = N.getOperand(i: 0).getOperand(i: 0);
797 Base = N.getOperand(i: 1);
798 } else {
799 ShAmt = 0;
800 ShOpcVal = ARM_AM::no_shift;
801 }
802 } else {
803 ShOpcVal = ARM_AM::no_shift;
804 }
805 }
806 }
807
808 // If Offset is a multiply-by-constant and it's profitable to extract a shift
809 // and use it in a shifted operand do so.
810 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
811 unsigned PowerOfTwo = 0;
812 SDValue NewMulConst;
813 if (canExtractShiftFromMul(N: Offset, MaxShift: 31, PowerOfTwo, NewMulConst)) {
814 HandleSDNode Handle(Offset);
815 replaceDAGValue(N: Offset.getOperand(i: 1), M: NewMulConst);
816 Offset = Handle.getValue();
817 ShAmt = PowerOfTwo;
818 ShOpcVal = ARM_AM::lsl;
819 }
820 }
821
822 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt, SO: ShOpcVal),
823 DL: SDLoc(N), VT: MVT::i32);
824 return true;
825}
826
827bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
828 SDValue &Offset, SDValue &Opc) {
829 unsigned Opcode = Op->getOpcode();
830 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
831 ? cast<LoadSDNode>(Val: Op)->getAddressingMode()
832 : cast<StoreSDNode>(Val: Op)->getAddressingMode();
833 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
834 ? ARM_AM::add : ARM_AM::sub;
835 int Val;
836 if (isScaledConstantInRange(Node: N, /*Scale=*/1, RangeMin: 0, RangeMax: 0x1000, ScaledConstant&: Val))
837 return false;
838
839 Offset = N;
840 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
841 unsigned ShAmt = 0;
842 if (ShOpcVal != ARM_AM::no_shift) {
843 // Check to see if the RHS of the shift is a constant, if not, we can't fold
844 // it.
845 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
846 ShAmt = Sh->getZExtValue();
847 if (isShifterOpProfitable(Shift: N, ShOpcVal, ShAmt))
848 Offset = N.getOperand(i: 0);
849 else {
850 ShAmt = 0;
851 ShOpcVal = ARM_AM::no_shift;
852 }
853 } else {
854 ShOpcVal = ARM_AM::no_shift;
855 }
856 }
857
858 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt, SO: ShOpcVal),
859 DL: SDLoc(N), VT: MVT::i32);
860 return true;
861}
862
863bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
864 SDValue &Offset, SDValue &Opc) {
865 unsigned Opcode = Op->getOpcode();
866 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
867 ? cast<LoadSDNode>(Val: Op)->getAddressingMode()
868 : cast<StoreSDNode>(Val: Op)->getAddressingMode();
869 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
870 ? ARM_AM::add : ARM_AM::sub;
871 int Val;
872 if (isScaledConstantInRange(Node: N, /*Scale=*/1, RangeMin: 0, RangeMax: 0x1000, ScaledConstant&: Val)) { // 12 bits.
873 if (AddSub == ARM_AM::sub) Val *= -1;
874 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
875 Opc = CurDAG->getSignedTargetConstant(Val, DL: SDLoc(Op), VT: MVT::i32);
876 return true;
877 }
878
879 return false;
880}
881
882
883bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
884 SDValue &Offset, SDValue &Opc) {
885 unsigned Opcode = Op->getOpcode();
886 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
887 ? cast<LoadSDNode>(Val: Op)->getAddressingMode()
888 : cast<StoreSDNode>(Val: Op)->getAddressingMode();
889 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
890 ? ARM_AM::add : ARM_AM::sub;
891 int Val;
892 if (isScaledConstantInRange(Node: N, /*Scale=*/1, RangeMin: 0, RangeMax: 0x1000, ScaledConstant&: Val)) { // 12 bits.
893 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
894 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: Val,
895 SO: ARM_AM::no_shift),
896 DL: SDLoc(Op), VT: MVT::i32);
897 return true;
898 }
899
900 return false;
901}
902
903bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
904 Base = N;
905 return true;
906}
907
908bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
909 SDValue &Base, SDValue &Offset,
910 SDValue &Opc) {
911 if (N.getOpcode() == ISD::SUB) {
912 // X - C is canonicalize to X + -C, no need to handle it here.
913 Base = N.getOperand(i: 0);
914 Offset = N.getOperand(i: 1);
915 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::sub, Offset: 0), DL: SDLoc(N),
916 VT: MVT::i32);
917 return true;
918 }
919
920 if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
921 Base = N;
922 if (N.getOpcode() == ISD::FrameIndex) {
923 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
924 Base = CurDAG->getTargetFrameIndex(
925 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
926 }
927 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
928 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::add, Offset: 0), DL: SDLoc(N),
929 VT: MVT::i32);
930 return true;
931 }
932
933 // If the RHS is +/- imm8, fold into addr mode.
934 int RHSC;
935 if (isScaledConstantInRange(Node: N.getOperand(i: 1), /*Scale=*/1,
936 RangeMin: -256 + 1, RangeMax: 256, ScaledConstant&: RHSC)) { // 8 bits.
937 Base = N.getOperand(i: 0);
938 if (Base.getOpcode() == ISD::FrameIndex) {
939 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
940 Base = CurDAG->getTargetFrameIndex(
941 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
942 }
943 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
944
945 ARM_AM::AddrOpc AddSub = ARM_AM::add;
946 if (RHSC < 0) {
947 AddSub = ARM_AM::sub;
948 RHSC = -RHSC;
949 }
950 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: RHSC), DL: SDLoc(N),
951 VT: MVT::i32);
952 return true;
953 }
954
955 Base = N.getOperand(i: 0);
956 Offset = N.getOperand(i: 1);
957 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::add, Offset: 0), DL: SDLoc(N),
958 VT: MVT::i32);
959 return true;
960}
961
962bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
963 SDValue &Offset, SDValue &Opc) {
964 unsigned Opcode = Op->getOpcode();
965 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
966 ? cast<LoadSDNode>(Val: Op)->getAddressingMode()
967 : cast<StoreSDNode>(Val: Op)->getAddressingMode();
968 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
969 ? ARM_AM::add : ARM_AM::sub;
970 int Val;
971 if (isScaledConstantInRange(Node: N, /*Scale=*/1, RangeMin: 0, RangeMax: 256, ScaledConstant&: Val)) { // 12 bits.
972 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
973 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: Val), DL: SDLoc(Op),
974 VT: MVT::i32);
975 return true;
976 }
977
978 Offset = N;
979 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: 0), DL: SDLoc(Op),
980 VT: MVT::i32);
981 return true;
982}
983
984bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
985 bool FP16) {
986 if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
987 Base = N;
988 if (N.getOpcode() == ISD::FrameIndex) {
989 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
990 Base = CurDAG->getTargetFrameIndex(
991 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
992 } else if (N.getOpcode() == ARMISD::Wrapper &&
993 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalAddress &&
994 N.getOperand(i: 0).getOpcode() != ISD::TargetExternalSymbol &&
995 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalTLSAddress) {
996 Base = N.getOperand(i: 0);
997 }
998 Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: ARM_AM::add, Offset: 0),
999 DL: SDLoc(N), VT: MVT::i32);
1000 return true;
1001 }
1002
1003 // If the RHS is +/- imm8, fold into addr mode.
1004 int RHSC;
1005 const int Scale = FP16 ? 2 : 4;
1006
1007 if (isScaledConstantInRange(Node: N.getOperand(i: 1), Scale, RangeMin: -255, RangeMax: 256, ScaledConstant&: RHSC)) {
1008 Base = N.getOperand(i: 0);
1009 if (Base.getOpcode() == ISD::FrameIndex) {
1010 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1011 Base = CurDAG->getTargetFrameIndex(
1012 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1013 }
1014
1015 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1016 if (RHSC < 0) {
1017 AddSub = ARM_AM::sub;
1018 RHSC = -RHSC;
1019 }
1020
1021 if (FP16)
1022 Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5FP16Opc(Opc: AddSub, Offset: RHSC),
1023 DL: SDLoc(N), VT: MVT::i32);
1024 else
1025 Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: AddSub, Offset: RHSC),
1026 DL: SDLoc(N), VT: MVT::i32);
1027
1028 return true;
1029 }
1030
1031 Base = N;
1032
1033 if (FP16)
1034 Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5FP16Opc(Opc: ARM_AM::add, Offset: 0),
1035 DL: SDLoc(N), VT: MVT::i32);
1036 else
1037 Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: ARM_AM::add, Offset: 0),
1038 DL: SDLoc(N), VT: MVT::i32);
1039
1040 return true;
1041}
1042
1043bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1044 SDValue &Base, SDValue &Offset) {
1045 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1046}
1047
1048bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1049 SDValue &Base, SDValue &Offset) {
1050 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1051}
1052
1053bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1054 SDValue &Align) {
1055 Addr = N;
1056
1057 unsigned Alignment = 0;
1058
1059 MemSDNode *MemN = cast<MemSDNode>(Val: Parent);
1060
1061 if (isa<LSBaseSDNode>(Val: MemN) ||
1062 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1063 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1064 MemN->getConstantOperandVal(Num: MemN->getNumOperands() - 1) == 1)) {
1065 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1066 // The maximum alignment is equal to the memory size being referenced.
1067 llvm::Align MMOAlign = MemN->getAlign();
1068 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1069 if (MMOAlign.value() >= MemSize && MemSize > 1)
1070 Alignment = MemSize;
1071 } else {
1072 // All other uses of addrmode6 are for intrinsics. For now just record
1073 // the raw alignment value; it will be refined later based on the legal
1074 // alignment operands for the intrinsic.
1075 Alignment = MemN->getAlign().value();
1076 }
1077
1078 Align = CurDAG->getTargetConstant(Val: Alignment, DL: SDLoc(N), VT: MVT::i32);
1079 return true;
1080}
1081
1082bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1083 SDValue &Offset) {
1084 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Val: Op);
1085 ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1086 if (AM != ISD::POST_INC)
1087 return false;
1088 Offset = N;
1089 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(Val&: N)) {
1090 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1091 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
1092 }
1093 return true;
1094}
1095
1096bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1097 SDValue &Offset, SDValue &Label) {
1098 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1099 Offset = N.getOperand(i: 0);
1100 SDValue N1 = N.getOperand(i: 1);
1101 Label = CurDAG->getTargetConstant(Val: N1->getAsZExtVal(), DL: SDLoc(N), VT: MVT::i32);
1102 return true;
1103 }
1104
1105 return false;
1106}
1107
1108
1109//===----------------------------------------------------------------------===//
1110// Thumb Addressing Modes
1111//===----------------------------------------------------------------------===//
1112
1113static bool shouldUseZeroOffsetLdSt(SDValue N) {
1114 // Negative numbers are difficult to materialise in thumb1. If we are
1115 // selecting the add of a negative, instead try to select ri with a zero
1116 // offset, so create the add node directly which will become a sub.
1117 if (N.getOpcode() != ISD::ADD)
1118 return false;
1119
1120 // Look for an imm which is not legal for ld/st, but is legal for sub.
1121 if (auto C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1)))
1122 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1123
1124 return false;
1125}
1126
1127bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1128 SDValue &Offset) {
1129 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(Op: N)) {
1130 if (!isNullConstant(V: N))
1131 return false;
1132
1133 Base = Offset = N;
1134 return true;
1135 }
1136
1137 Base = N.getOperand(i: 0);
1138 Offset = N.getOperand(i: 1);
1139 return true;
1140}
1141
1142bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1143 SDValue &Offset) {
1144 if (shouldUseZeroOffsetLdSt(N))
1145 return false; // Select ri instead
1146 return SelectThumbAddrModeRRSext(N, Base, Offset);
1147}
1148
1149bool
1150ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1151 SDValue &Base, SDValue &OffImm) {
1152 if (shouldUseZeroOffsetLdSt(N)) {
1153 Base = N;
1154 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1155 return true;
1156 }
1157
1158 if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
1159 if (N.getOpcode() == ISD::ADD) {
1160 return false; // We want to select register offset instead
1161 } else if (N.getOpcode() == ARMISD::Wrapper &&
1162 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalAddress &&
1163 N.getOperand(i: 0).getOpcode() != ISD::TargetExternalSymbol &&
1164 N.getOperand(i: 0).getOpcode() != ISD::TargetConstantPool &&
1165 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1166 Base = N.getOperand(i: 0);
1167 } else {
1168 Base = N;
1169 }
1170
1171 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1172 return true;
1173 }
1174
1175 // If the RHS is + imm5 * scale, fold into addr mode.
1176 int RHSC;
1177 if (isScaledConstantInRange(Node: N.getOperand(i: 1), Scale, RangeMin: 0, RangeMax: 32, ScaledConstant&: RHSC)) {
1178 Base = N.getOperand(i: 0);
1179 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32);
1180 return true;
1181 }
1182
1183 // Offset is too large, so use register offset instead.
1184 return false;
1185}
1186
1187bool
1188ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1189 SDValue &OffImm) {
1190 return SelectThumbAddrModeImm5S(N, Scale: 4, Base, OffImm);
1191}
1192
1193bool
1194ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1195 SDValue &OffImm) {
1196 return SelectThumbAddrModeImm5S(N, Scale: 2, Base, OffImm);
1197}
1198
1199bool
1200ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1201 SDValue &OffImm) {
1202 return SelectThumbAddrModeImm5S(N, Scale: 1, Base, OffImm);
1203}
1204
1205bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1206 SDValue &Base, SDValue &OffImm) {
1207 if (N.getOpcode() == ISD::FrameIndex) {
1208 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1209 // Only multiples of 4 are allowed for the offset, so the frame object
1210 // alignment must be at least 4.
1211 MachineFrameInfo &MFI = MF->getFrameInfo();
1212 if (MFI.getObjectAlign(ObjectIdx: FI) < Align(4))
1213 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align(4));
1214 Base = CurDAG->getTargetFrameIndex(
1215 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1216 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1217 return true;
1218 }
1219
1220 if (!CurDAG->isBaseWithConstantOffset(Op: N))
1221 return false;
1222
1223 if (N.getOperand(i: 0).getOpcode() == ISD::FrameIndex) {
1224 // If the RHS is + imm8 * scale, fold into addr mode.
1225 int RHSC;
1226 if (isScaledConstantInRange(Node: N.getOperand(i: 1), /*Scale=*/4, RangeMin: 0, RangeMax: 256, ScaledConstant&: RHSC)) {
1227 Base = N.getOperand(i: 0);
1228 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1229 // Make sure the offset is inside the object, or we might fail to
1230 // allocate an emergency spill slot. (An out-of-range access is UB, but
1231 // it could show up anyway.)
1232 MachineFrameInfo &MFI = MF->getFrameInfo();
1233 if (RHSC * 4 < MFI.getObjectSize(ObjectIdx: FI)) {
1234 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1235 // indexed by the LHS must be 4-byte aligned.
1236 if (!MFI.isFixedObjectIndex(ObjectIdx: FI) && MFI.getObjectAlign(ObjectIdx: FI) < Align(4))
1237 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align(4));
1238 if (MFI.getObjectAlign(ObjectIdx: FI) >= Align(4)) {
1239 Base = CurDAG->getTargetFrameIndex(
1240 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1241 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32);
1242 return true;
1243 }
1244 }
1245 }
1246 }
1247
1248 return false;
1249}
1250
1251template <unsigned Shift>
1252bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1253 SDValue &OffImm) {
1254 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(Op: N)) {
1255 int RHSC;
1256 if (isScaledConstantInRange(Node: N.getOperand(i: 1), Scale: 1 << Shift, RangeMin: -0x7f, RangeMax: 0x80,
1257 ScaledConstant&: RHSC)) {
1258 Base = N.getOperand(i: 0);
1259 if (N.getOpcode() == ISD::SUB)
1260 RHSC = -RHSC;
1261 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC * (1 << Shift), DL: SDLoc(N),
1262 VT: MVT::i32);
1263 return true;
1264 }
1265 }
1266
1267 // Base only.
1268 Base = N;
1269 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1270 return true;
1271}
1272
1273
1274//===----------------------------------------------------------------------===//
1275// Thumb 2 Addressing Modes
1276//===----------------------------------------------------------------------===//
1277
1278
1279bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1280 SDValue &Base, SDValue &OffImm) {
1281 // Match simple R + imm12 operands.
1282
1283 // Base only.
1284 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1285 !CurDAG->isBaseWithConstantOffset(Op: N)) {
1286 if (N.getOpcode() == ISD::FrameIndex) {
1287 // Match frame index.
1288 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1289 Base = CurDAG->getTargetFrameIndex(
1290 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1291 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1292 return true;
1293 }
1294
1295 if (N.getOpcode() == ARMISD::Wrapper &&
1296 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalAddress &&
1297 N.getOperand(i: 0).getOpcode() != ISD::TargetExternalSymbol &&
1298 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1299 Base = N.getOperand(i: 0);
1300 if (Base.getOpcode() == ISD::TargetConstantPool)
1301 return false; // We want to select t2LDRpci instead.
1302 } else
1303 Base = N;
1304 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1305 return true;
1306 }
1307
1308 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1309 if (SelectT2AddrModeImm8(N, Base, OffImm))
1310 // Let t2LDRi8 handle (R - imm8).
1311 return false;
1312
1313 int RHSC = (int)RHS->getZExtValue();
1314 if (N.getOpcode() == ISD::SUB)
1315 RHSC = -RHSC;
1316
1317 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1318 Base = N.getOperand(i: 0);
1319 if (Base.getOpcode() == ISD::FrameIndex) {
1320 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1321 Base = CurDAG->getTargetFrameIndex(
1322 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1323 }
1324 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32);
1325 return true;
1326 }
1327 }
1328
1329 // Base only.
1330 Base = N;
1331 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1332 return true;
1333}
1334
1335template <unsigned Shift>
1336bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1337 SDValue &OffImm) {
1338 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(Op: N)) {
1339 int RHSC;
1340 if (isScaledConstantInRange(Node: N.getOperand(i: 1), Scale: 1 << Shift, RangeMin: -255, RangeMax: 256, ScaledConstant&: RHSC)) {
1341 Base = N.getOperand(i: 0);
1342 if (Base.getOpcode() == ISD::FrameIndex) {
1343 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1344 Base = CurDAG->getTargetFrameIndex(
1345 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1346 }
1347
1348 if (N.getOpcode() == ISD::SUB)
1349 RHSC = -RHSC;
1350 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC * (1 << Shift), DL: SDLoc(N),
1351 VT: MVT::i32);
1352 return true;
1353 }
1354 }
1355
1356 // Base only.
1357 Base = N;
1358 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1359 return true;
1360}
1361
1362bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1363 SDValue &Base, SDValue &OffImm) {
1364 // Match simple R - imm8 operands.
1365 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1366 !CurDAG->isBaseWithConstantOffset(Op: N))
1367 return false;
1368
1369 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1370 int RHSC = (int)RHS->getSExtValue();
1371 if (N.getOpcode() == ISD::SUB)
1372 RHSC = -RHSC;
1373
1374 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1375 Base = N.getOperand(i: 0);
1376 if (Base.getOpcode() == ISD::FrameIndex) {
1377 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1378 Base = CurDAG->getTargetFrameIndex(
1379 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1380 }
1381 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32);
1382 return true;
1383 }
1384 }
1385
1386 return false;
1387}
1388
1389bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1390 SDValue &OffImm){
1391 unsigned Opcode = Op->getOpcode();
1392 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1393 ? cast<LoadSDNode>(Val: Op)->getAddressingMode()
1394 : cast<StoreSDNode>(Val: Op)->getAddressingMode();
1395 int RHSC;
1396 if (isScaledConstantInRange(Node: N, /*Scale=*/1, RangeMin: 0, RangeMax: 0x100, ScaledConstant&: RHSC)) { // 8 bits.
1397 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1398 ? CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32)
1399 : CurDAG->getSignedTargetConstant(Val: -RHSC, DL: SDLoc(N), VT: MVT::i32);
1400 return true;
1401 }
1402
1403 return false;
1404}
1405
1406template <unsigned Shift>
1407bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1408 SDValue &OffImm) {
1409 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(Op: N)) {
1410 int RHSC;
1411 if (isScaledConstantInRange(Node: N.getOperand(i: 1), Scale: 1 << Shift, RangeMin: -0x7f, RangeMax: 0x80,
1412 ScaledConstant&: RHSC)) {
1413 Base = N.getOperand(i: 0);
1414 if (Base.getOpcode() == ISD::FrameIndex) {
1415 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1416 Base = CurDAG->getTargetFrameIndex(
1417 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1418 }
1419
1420 if (N.getOpcode() == ISD::SUB)
1421 RHSC = -RHSC;
1422 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC * (1 << Shift), DL: SDLoc(N),
1423 VT: MVT::i32);
1424 return true;
1425 }
1426 }
1427
1428 // Base only.
1429 Base = N;
1430 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1431 return true;
1432}
1433
1434template <unsigned Shift>
1435bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1436 SDValue &OffImm) {
1437 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1438}
1439
1440bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1441 SDValue &OffImm,
1442 unsigned Shift) {
1443 unsigned Opcode = Op->getOpcode();
1444 ISD::MemIndexedMode AM;
1445 switch (Opcode) {
1446 case ISD::LOAD:
1447 AM = cast<LoadSDNode>(Val: Op)->getAddressingMode();
1448 break;
1449 case ISD::STORE:
1450 AM = cast<StoreSDNode>(Val: Op)->getAddressingMode();
1451 break;
1452 case ISD::MLOAD:
1453 AM = cast<MaskedLoadSDNode>(Val: Op)->getAddressingMode();
1454 break;
1455 case ISD::MSTORE:
1456 AM = cast<MaskedStoreSDNode>(Val: Op)->getAddressingMode();
1457 break;
1458 default:
1459 llvm_unreachable("Unexpected Opcode for Imm7Offset");
1460 }
1461
1462 int RHSC;
1463 // 7 bit constant, shifted by Shift.
1464 if (isScaledConstantInRange(Node: N, Scale: 1 << Shift, RangeMin: 0, RangeMax: 0x80, ScaledConstant&: RHSC)) {
1465 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1466 ? CurDAG->getSignedTargetConstant(Val: RHSC * (1 << Shift),
1467 DL: SDLoc(N), VT: MVT::i32)
1468 : CurDAG->getSignedTargetConstant(Val: -RHSC * (1 << Shift),
1469 DL: SDLoc(N), VT: MVT::i32);
1470 return true;
1471 }
1472 return false;
1473}
1474
1475template <int Min, int Max>
1476bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1477 int Val;
1478 if (isScaledConstantInRange(Node: N, Scale: 1, RangeMin: Min, RangeMax: Max, ScaledConstant&: Val)) {
1479 OffImm = CurDAG->getSignedTargetConstant(Val, DL: SDLoc(N), VT: MVT::i32);
1480 return true;
1481 }
1482 return false;
1483}
1484
1485bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1486 SDValue &Base,
1487 SDValue &OffReg, SDValue &ShImm) {
1488 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1489 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(Op: N))
1490 return false;
1491
1492 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1493 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1494 int RHSC = (int)RHS->getZExtValue();
1495 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1496 return false;
1497 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1498 return false;
1499 }
1500
1501 // Look for (R + R) or (R + (R << [1,2,3])).
1502 unsigned ShAmt = 0;
1503 Base = N.getOperand(i: 0);
1504 OffReg = N.getOperand(i: 1);
1505
1506 // Swap if it is ((R << c) + R).
1507 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: OffReg.getOpcode());
1508 if (ShOpcVal != ARM_AM::lsl) {
1509 ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: Base.getOpcode());
1510 if (ShOpcVal == ARM_AM::lsl)
1511 std::swap(a&: Base, b&: OffReg);
1512 }
1513
1514 if (ShOpcVal == ARM_AM::lsl) {
1515 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1516 // it.
1517 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(Val: OffReg.getOperand(i: 1))) {
1518 ShAmt = Sh->getZExtValue();
1519 if (ShAmt < 4 && isShifterOpProfitable(Shift: OffReg, ShOpcVal, ShAmt))
1520 OffReg = OffReg.getOperand(i: 0);
1521 else {
1522 ShAmt = 0;
1523 }
1524 }
1525 }
1526
1527 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1528 // and use it in a shifted operand do so.
1529 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1530 unsigned PowerOfTwo = 0;
1531 SDValue NewMulConst;
1532 if (canExtractShiftFromMul(N: OffReg, MaxShift: 3, PowerOfTwo, NewMulConst)) {
1533 HandleSDNode Handle(OffReg);
1534 replaceDAGValue(N: OffReg.getOperand(i: 1), M: NewMulConst);
1535 OffReg = Handle.getValue();
1536 ShAmt = PowerOfTwo;
1537 }
1538 }
1539
1540 ShImm = CurDAG->getTargetConstant(Val: ShAmt, DL: SDLoc(N), VT: MVT::i32);
1541
1542 return true;
1543}
1544
1545bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1546 SDValue &OffImm) {
1547 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1548 // instructions.
1549 Base = N;
1550 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1551
1552 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(Op: N))
1553 return true;
1554
1555 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
1556 if (!RHS)
1557 return true;
1558
1559 uint32_t RHSC = (int)RHS->getZExtValue();
1560 if (RHSC > 1020 || RHSC % 4 != 0)
1561 return true;
1562
1563 Base = N.getOperand(i: 0);
1564 if (Base.getOpcode() == ISD::FrameIndex) {
1565 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1566 Base = CurDAG->getTargetFrameIndex(
1567 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1568 }
1569
1570 OffImm = CurDAG->getTargetConstant(Val: RHSC/4, DL: SDLoc(N), VT: MVT::i32);
1571 return true;
1572}
1573
1574//===--------------------------------------------------------------------===//
1575
1576/// getAL - Returns a ARMCC::AL immediate node.
1577static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1578 return CurDAG->getTargetConstant(Val: (uint64_t)ARMCC::AL, DL: dl, VT: MVT::i32);
1579}
1580
1581void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1582 MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
1583 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Result), NewMemRefs: {MemOp});
1584}
1585
1586bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1587 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1588 ISD::MemIndexedMode AM = LD->getAddressingMode();
1589 if (AM == ISD::UNINDEXED)
1590 return false;
1591
1592 EVT LoadedVT = LD->getMemoryVT();
1593 SDValue Offset, AMOpc;
1594 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1595 unsigned Opcode = 0;
1596 bool Match = false;
1597 if (LoadedVT == MVT::i32 && isPre &&
1598 SelectAddrMode2OffsetImmPre(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1599 Opcode = ARM::LDR_PRE_IMM;
1600 Match = true;
1601 } else if (LoadedVT == MVT::i32 && !isPre &&
1602 SelectAddrMode2OffsetImm(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1603 Opcode = ARM::LDR_POST_IMM;
1604 Match = true;
1605 } else if (LoadedVT == MVT::i32 &&
1606 SelectAddrMode2OffsetReg(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1607 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1608 Match = true;
1609
1610 } else if (LoadedVT == MVT::i16 &&
1611 SelectAddrMode3Offset(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1612 Match = true;
1613 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1614 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1615 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1616 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1617 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1618 if (SelectAddrMode3Offset(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1619 Match = true;
1620 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1621 }
1622 } else {
1623 if (isPre &&
1624 SelectAddrMode2OffsetImmPre(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1625 Match = true;
1626 Opcode = ARM::LDRB_PRE_IMM;
1627 } else if (!isPre &&
1628 SelectAddrMode2OffsetImm(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1629 Match = true;
1630 Opcode = ARM::LDRB_POST_IMM;
1631 } else if (SelectAddrMode2OffsetReg(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1632 Match = true;
1633 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1634 }
1635 }
1636 }
1637
1638 if (Match) {
1639 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1640 SDValue Chain = LD->getChain();
1641 SDValue Base = LD->getBasePtr();
1642 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, dl: SDLoc(N)),
1643 CurDAG->getRegister(Reg: 0, VT: MVT::i32), Chain };
1644 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(N), VT1: MVT::i32, VT2: MVT::i32,
1645 VT3: MVT::Other, Ops);
1646 transferMemOperands(N, Result: New);
1647 ReplaceNode(F: N, T: New);
1648 return true;
1649 } else {
1650 SDValue Chain = LD->getChain();
1651 SDValue Base = LD->getBasePtr();
1652 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, dl: SDLoc(N)),
1653 CurDAG->getRegister(Reg: 0, VT: MVT::i32), Chain };
1654 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(N), VT1: MVT::i32, VT2: MVT::i32,
1655 VT3: MVT::Other, Ops);
1656 transferMemOperands(N, Result: New);
1657 ReplaceNode(F: N, T: New);
1658 return true;
1659 }
1660 }
1661
1662 return false;
1663}
1664
1665bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1666 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1667 EVT LoadedVT = LD->getMemoryVT();
1668 ISD::MemIndexedMode AM = LD->getAddressingMode();
1669 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1670 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1671 return false;
1672
1673 auto *COffs = dyn_cast<ConstantSDNode>(Val: LD->getOffset());
1674 if (!COffs || COffs->getZExtValue() != 4)
1675 return false;
1676
1677 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1678 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1679 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1680 // ISel.
1681 SDValue Chain = LD->getChain();
1682 SDValue Base = LD->getBasePtr();
1683 SDValue Ops[]= { Base, getAL(CurDAG, dl: SDLoc(N)),
1684 CurDAG->getRegister(Reg: 0, VT: MVT::i32), Chain };
1685 SDNode *New = CurDAG->getMachineNode(Opcode: ARM::tLDR_postidx, dl: SDLoc(N), VT1: MVT::i32,
1686 VT2: MVT::i32, VT3: MVT::Other, Ops);
1687 transferMemOperands(N, Result: New);
1688 ReplaceNode(F: N, T: New);
1689 return true;
1690}
1691
1692bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1693 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1694 ISD::MemIndexedMode AM = LD->getAddressingMode();
1695 if (AM == ISD::UNINDEXED)
1696 return false;
1697
1698 EVT LoadedVT = LD->getMemoryVT();
1699 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1700 SDValue Offset;
1701 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1702 unsigned Opcode = 0;
1703 bool Match = false;
1704 if (SelectT2AddrModeImm8Offset(Op: N, N: LD->getOffset(), OffImm&: Offset)) {
1705 switch (LoadedVT.getSimpleVT().SimpleTy) {
1706 case MVT::i32:
1707 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1708 break;
1709 case MVT::i16:
1710 if (isSExtLd)
1711 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1712 else
1713 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1714 break;
1715 case MVT::i8:
1716 case MVT::i1:
1717 if (isSExtLd)
1718 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1719 else
1720 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1721 break;
1722 default:
1723 return false;
1724 }
1725 Match = true;
1726 }
1727
1728 if (Match) {
1729 SDValue Chain = LD->getChain();
1730 SDValue Base = LD->getBasePtr();
1731 SDValue Ops[]= { Base, Offset, getAL(CurDAG, dl: SDLoc(N)),
1732 CurDAG->getRegister(Reg: 0, VT: MVT::i32), Chain };
1733 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(N), VT1: MVT::i32, VT2: MVT::i32,
1734 VT3: MVT::Other, Ops);
1735 transferMemOperands(N, Result: New);
1736 ReplaceNode(F: N, T: New);
1737 return true;
1738 }
1739
1740 return false;
1741}
1742
1743bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1744 EVT LoadedVT;
1745 unsigned Opcode = 0;
1746 bool isSExtLd, isPre;
1747 Align Alignment;
1748 ARMVCC::VPTCodes Pred;
1749 SDValue PredReg;
1750 SDValue Chain, Base, Offset;
1751
1752 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
1753 ISD::MemIndexedMode AM = LD->getAddressingMode();
1754 if (AM == ISD::UNINDEXED)
1755 return false;
1756 LoadedVT = LD->getMemoryVT();
1757 if (!LoadedVT.isVector())
1758 return false;
1759
1760 Chain = LD->getChain();
1761 Base = LD->getBasePtr();
1762 Offset = LD->getOffset();
1763 Alignment = LD->getAlign();
1764 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1765 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1766 Pred = ARMVCC::None;
1767 PredReg = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
1768 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Val: N)) {
1769 ISD::MemIndexedMode AM = LD->getAddressingMode();
1770 if (AM == ISD::UNINDEXED)
1771 return false;
1772 LoadedVT = LD->getMemoryVT();
1773 if (!LoadedVT.isVector())
1774 return false;
1775
1776 Chain = LD->getChain();
1777 Base = LD->getBasePtr();
1778 Offset = LD->getOffset();
1779 Alignment = LD->getAlign();
1780 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1781 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1782 Pred = ARMVCC::Then;
1783 PredReg = LD->getMask();
1784 } else
1785 llvm_unreachable("Expected a Load or a Masked Load!");
1786
1787 // We allow LE non-masked loads to change the type (for example use a vldrb.8
1788 // as opposed to a vldrw.32). This can allow extra addressing modes or
1789 // alignments for what is otherwise an equivalent instruction.
1790 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(Val: N);
1791
1792 SDValue NewOffset;
1793 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1794 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 1)) {
1795 if (isSExtLd)
1796 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1797 else
1798 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1799 } else if (LoadedVT == MVT::v8i8 &&
1800 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 0)) {
1801 if (isSExtLd)
1802 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1803 else
1804 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1805 } else if (LoadedVT == MVT::v4i8 &&
1806 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 0)) {
1807 if (isSExtLd)
1808 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1809 else
1810 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1811 } else if (Alignment >= Align(4) &&
1812 (CanChangeType || LoadedVT == MVT::v4i32 ||
1813 LoadedVT == MVT::v4f32) &&
1814 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 2))
1815 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1816 else if (Alignment >= Align(2) &&
1817 (CanChangeType || LoadedVT == MVT::v8i16 ||
1818 LoadedVT == MVT::v8f16) &&
1819 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 1))
1820 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1821 else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1822 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 0))
1823 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1824 else
1825 return false;
1826
1827 SDValue Ops[] = {Base,
1828 NewOffset,
1829 CurDAG->getTargetConstant(Val: Pred, DL: SDLoc(N), VT: MVT::i32),
1830 PredReg,
1831 CurDAG->getRegister(Reg: 0, VT: MVT::i32), // tp_reg
1832 Chain};
1833 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(N), VT1: MVT::i32,
1834 VT2: N->getValueType(ResNo: 0), VT3: MVT::Other, Ops);
1835 transferMemOperands(N, Result: New);
1836 ReplaceUses(F: SDValue(N, 0), T: SDValue(New, 1));
1837 ReplaceUses(F: SDValue(N, 1), T: SDValue(New, 0));
1838 ReplaceUses(F: SDValue(N, 2), T: SDValue(New, 2));
1839 CurDAG->RemoveDeadNode(N);
1840 return true;
1841}
1842
1843/// Form a GPRPair pseudo register from a pair of GPR regs.
1844SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1845 SDLoc dl(V0.getNode());
1846 SDValue RegClass =
1847 CurDAG->getTargetConstant(Val: ARM::GPRPairRegClassID, DL: dl, VT: MVT::i32);
1848 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::gsub_0, DL: dl, VT: MVT::i32);
1849 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::gsub_1, DL: dl, VT: MVT::i32);
1850 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1851 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1852}
1853
1854/// Form a D register from a pair of S registers.
1855SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1856 SDLoc dl(V0.getNode());
1857 SDValue RegClass =
1858 CurDAG->getTargetConstant(Val: ARM::DPR_VFP2RegClassID, DL: dl, VT: MVT::i32);
1859 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::ssub_0, DL: dl, VT: MVT::i32);
1860 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::ssub_1, DL: dl, VT: MVT::i32);
1861 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1862 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1863}
1864
1865/// Form a quad register from a pair of D registers.
1866SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1867 SDLoc dl(V0.getNode());
1868 SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QPRRegClassID, DL: dl,
1869 VT: MVT::i32);
1870 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::dsub_0, DL: dl, VT: MVT::i32);
1871 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::dsub_1, DL: dl, VT: MVT::i32);
1872 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1873 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1874}
1875
1876/// Form 4 consecutive D registers from a pair of Q registers.
1877SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1878 SDLoc dl(V0.getNode());
1879 SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQPRRegClassID, DL: dl,
1880 VT: MVT::i32);
1881 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::qsub_0, DL: dl, VT: MVT::i32);
1882 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::qsub_1, DL: dl, VT: MVT::i32);
1883 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1884 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1885}
1886
1887/// Form 4 consecutive S registers.
1888SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1889 SDValue V2, SDValue V3) {
1890 SDLoc dl(V0.getNode());
1891 SDValue RegClass =
1892 CurDAG->getTargetConstant(Val: ARM::QPR_VFP2RegClassID, DL: dl, VT: MVT::i32);
1893 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::ssub_0, DL: dl, VT: MVT::i32);
1894 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::ssub_1, DL: dl, VT: MVT::i32);
1895 SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::ssub_2, DL: dl, VT: MVT::i32);
1896 SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::ssub_3, DL: dl, VT: MVT::i32);
1897 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1898 V2, SubReg2, V3, SubReg3 };
1899 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1900}
1901
1902/// Form 4 consecutive D registers.
1903SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1904 SDValue V2, SDValue V3) {
1905 SDLoc dl(V0.getNode());
1906 SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQPRRegClassID, DL: dl,
1907 VT: MVT::i32);
1908 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::dsub_0, DL: dl, VT: MVT::i32);
1909 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::dsub_1, DL: dl, VT: MVT::i32);
1910 SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::dsub_2, DL: dl, VT: MVT::i32);
1911 SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::dsub_3, DL: dl, VT: MVT::i32);
1912 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1913 V2, SubReg2, V3, SubReg3 };
1914 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1915}
1916
1917/// Form 4 consecutive Q registers.
1918SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1919 SDValue V2, SDValue V3) {
1920 SDLoc dl(V0.getNode());
1921 SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQQQPRRegClassID, DL: dl,
1922 VT: MVT::i32);
1923 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::qsub_0, DL: dl, VT: MVT::i32);
1924 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::qsub_1, DL: dl, VT: MVT::i32);
1925 SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::qsub_2, DL: dl, VT: MVT::i32);
1926 SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::qsub_3, DL: dl, VT: MVT::i32);
1927 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1928 V2, SubReg2, V3, SubReg3 };
1929 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1930}
1931
1932/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1933/// of a NEON VLD or VST instruction. The supported values depend on the
1934/// number of registers being loaded.
1935SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1936 unsigned NumVecs, bool is64BitVector) {
1937 unsigned NumRegs = NumVecs;
1938 if (!is64BitVector && NumVecs < 3)
1939 NumRegs *= 2;
1940
1941 unsigned Alignment = Align->getAsZExtVal();
1942 if (Alignment >= 32 && NumRegs == 4)
1943 Alignment = 32;
1944 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1945 Alignment = 16;
1946 else if (Alignment >= 8)
1947 Alignment = 8;
1948 else
1949 Alignment = 0;
1950
1951 return CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
1952}
1953
1954static bool isVLDfixed(unsigned Opc)
1955{
1956 switch (Opc) {
1957 default: return false;
1958 case ARM::VLD1d8wb_fixed : return true;
1959 case ARM::VLD1d16wb_fixed : return true;
1960 case ARM::VLD1d64Qwb_fixed : return true;
1961 case ARM::VLD1d32wb_fixed : return true;
1962 case ARM::VLD1d64wb_fixed : return true;
1963 case ARM::VLD1d8TPseudoWB_fixed : return true;
1964 case ARM::VLD1d16TPseudoWB_fixed : return true;
1965 case ARM::VLD1d32TPseudoWB_fixed : return true;
1966 case ARM::VLD1d64TPseudoWB_fixed : return true;
1967 case ARM::VLD1d8QPseudoWB_fixed : return true;
1968 case ARM::VLD1d16QPseudoWB_fixed : return true;
1969 case ARM::VLD1d32QPseudoWB_fixed : return true;
1970 case ARM::VLD1d64QPseudoWB_fixed : return true;
1971 case ARM::VLD1q8wb_fixed : return true;
1972 case ARM::VLD1q16wb_fixed : return true;
1973 case ARM::VLD1q32wb_fixed : return true;
1974 case ARM::VLD1q64wb_fixed : return true;
1975 case ARM::VLD1DUPd8wb_fixed : return true;
1976 case ARM::VLD1DUPd16wb_fixed : return true;
1977 case ARM::VLD1DUPd32wb_fixed : return true;
1978 case ARM::VLD1DUPq8wb_fixed : return true;
1979 case ARM::VLD1DUPq16wb_fixed : return true;
1980 case ARM::VLD1DUPq32wb_fixed : return true;
1981 case ARM::VLD2d8wb_fixed : return true;
1982 case ARM::VLD2d16wb_fixed : return true;
1983 case ARM::VLD2d32wb_fixed : return true;
1984 case ARM::VLD2q8PseudoWB_fixed : return true;
1985 case ARM::VLD2q16PseudoWB_fixed : return true;
1986 case ARM::VLD2q32PseudoWB_fixed : return true;
1987 case ARM::VLD2DUPd8wb_fixed : return true;
1988 case ARM::VLD2DUPd16wb_fixed : return true;
1989 case ARM::VLD2DUPd32wb_fixed : return true;
1990 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
1991 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
1992 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
1993 }
1994}
1995
1996static bool isVSTfixed(unsigned Opc)
1997{
1998 switch (Opc) {
1999 default: return false;
2000 case ARM::VST1d8wb_fixed : return true;
2001 case ARM::VST1d16wb_fixed : return true;
2002 case ARM::VST1d32wb_fixed : return true;
2003 case ARM::VST1d64wb_fixed : return true;
2004 case ARM::VST1q8wb_fixed : return true;
2005 case ARM::VST1q16wb_fixed : return true;
2006 case ARM::VST1q32wb_fixed : return true;
2007 case ARM::VST1q64wb_fixed : return true;
2008 case ARM::VST1d8TPseudoWB_fixed : return true;
2009 case ARM::VST1d16TPseudoWB_fixed : return true;
2010 case ARM::VST1d32TPseudoWB_fixed : return true;
2011 case ARM::VST1d64TPseudoWB_fixed : return true;
2012 case ARM::VST1d8QPseudoWB_fixed : return true;
2013 case ARM::VST1d16QPseudoWB_fixed : return true;
2014 case ARM::VST1d32QPseudoWB_fixed : return true;
2015 case ARM::VST1d64QPseudoWB_fixed : return true;
2016 case ARM::VST2d8wb_fixed : return true;
2017 case ARM::VST2d16wb_fixed : return true;
2018 case ARM::VST2d32wb_fixed : return true;
2019 case ARM::VST2q8PseudoWB_fixed : return true;
2020 case ARM::VST2q16PseudoWB_fixed : return true;
2021 case ARM::VST2q32PseudoWB_fixed : return true;
2022 }
2023}
2024
2025// Get the register stride update opcode of a VLD/VST instruction that
2026// is otherwise equivalent to the given fixed stride updating instruction.
2027static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2028 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
2029 && "Incorrect fixed stride updating instruction.");
2030 switch (Opc) {
2031 default: break;
2032 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2033 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2034 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2035 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2036 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2037 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2038 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2039 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2040 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2041 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2042 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2043 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2044 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2045 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2046 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2047 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2048 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2049 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2050 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2051 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2052 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2053 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2054 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2055 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2056 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2057 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2058 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2059
2060 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2061 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2062 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2063 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2064 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2065 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2066 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2067 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2068 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2069 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2070 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2071 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2072 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2073 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2074 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2075 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2076
2077 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2078 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2079 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2080 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2081 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2082 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2083
2084 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2085 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2086 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2087 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2088 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2089 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2090
2091 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2092 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2093 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2094 }
2095 return Opc; // If not one we handle, return it unchanged.
2096}
2097
2098/// Returns true if the given increment is a Constant known to be equal to the
2099/// access size performed by a NEON load/store. This means the "[rN]!" form can
2100/// be used.
2101static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2102 auto C = dyn_cast<ConstantSDNode>(Val&: Inc);
2103 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2104}
2105
2106void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2107 const uint16_t *DOpcodes,
2108 const uint16_t *QOpcodes0,
2109 const uint16_t *QOpcodes1) {
2110 assert(Subtarget->hasNEON());
2111 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2112 SDLoc dl(N);
2113
2114 SDValue MemAddr, Align;
2115 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2116 // nodes are not intrinsics.
2117 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2118 if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2119 return;
2120
2121 SDValue Chain = N->getOperand(Num: 0);
2122 EVT VT = N->getValueType(ResNo: 0);
2123 bool is64BitVector = VT.is64BitVector();
2124 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2125
2126 unsigned OpcodeIndex;
2127 switch (VT.getSimpleVT().SimpleTy) {
2128 default: llvm_unreachable("unhandled vld type");
2129 // Double-register operations:
2130 case MVT::v8i8: OpcodeIndex = 0; break;
2131 case MVT::v4f16:
2132 case MVT::v4bf16:
2133 case MVT::v4i16: OpcodeIndex = 1; break;
2134 case MVT::v2f32:
2135 case MVT::v2i32: OpcodeIndex = 2; break;
2136 case MVT::v1i64: OpcodeIndex = 3; break;
2137 // Quad-register operations:
2138 case MVT::v16i8: OpcodeIndex = 0; break;
2139 case MVT::v8f16:
2140 case MVT::v8bf16:
2141 case MVT::v8i16: OpcodeIndex = 1; break;
2142 case MVT::v4f32:
2143 case MVT::v4i32: OpcodeIndex = 2; break;
2144 case MVT::v2f64:
2145 case MVT::v2i64: OpcodeIndex = 3; break;
2146 }
2147
2148 EVT ResTy;
2149 if (NumVecs == 1)
2150 ResTy = VT;
2151 else {
2152 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2153 if (!is64BitVector)
2154 ResTyElts *= 2;
2155 ResTy = EVT::getVectorVT(Context&: *CurDAG->getContext(), VT: MVT::i64, NumElements: ResTyElts);
2156 }
2157 std::vector<EVT> ResTys;
2158 ResTys.push_back(x: ResTy);
2159 if (isUpdating)
2160 ResTys.push_back(x: MVT::i32);
2161 ResTys.push_back(x: MVT::Other);
2162
2163 SDValue Pred = getAL(CurDAG, dl);
2164 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
2165 SDNode *VLd;
2166 SmallVector<SDValue, 7> Ops;
2167
2168 // Double registers and VLD1/VLD2 quad registers are directly supported.
2169 if (is64BitVector || NumVecs <= 2) {
2170 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2171 QOpcodes0[OpcodeIndex]);
2172 Ops.push_back(Elt: MemAddr);
2173 Ops.push_back(Elt: Align);
2174 if (isUpdating) {
2175 SDValue Inc = N->getOperand(Num: AddrOpIdx + 1);
2176 bool IsImmUpdate = isPerfectIncrement(Inc, VecTy: VT, NumVecs);
2177 if (!IsImmUpdate) {
2178 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2179 // check for the opcode rather than the number of vector elements.
2180 if (isVLDfixed(Opc))
2181 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2182 Ops.push_back(Elt: Inc);
2183 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2184 // the operands if not such an opcode.
2185 } else if (!isVLDfixed(Opc))
2186 Ops.push_back(Elt: Reg0);
2187 }
2188 Ops.push_back(Elt: Pred);
2189 Ops.push_back(Elt: Reg0);
2190 Ops.push_back(Elt: Chain);
2191 VLd = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2192
2193 } else {
2194 // Otherwise, quad registers are loaded with two separate instructions,
2195 // where one loads the even registers and the other loads the odd registers.
2196 EVT AddrTy = MemAddr.getValueType();
2197
2198 // Load the even subregs. This is always an updating load, so that it
2199 // provides the address to the second load for the odd subregs.
2200 SDValue ImplDef =
2201 SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: ResTy), 0);
2202 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2203 SDNode *VLdA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl,
2204 VT1: ResTy, VT2: AddrTy, VT3: MVT::Other, Ops: OpsA);
2205 Chain = SDValue(VLdA, 2);
2206
2207 // Load the odd subregs.
2208 Ops.push_back(Elt: SDValue(VLdA, 1));
2209 Ops.push_back(Elt: Align);
2210 if (isUpdating) {
2211 SDValue Inc = N->getOperand(Num: AddrOpIdx + 1);
2212 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2213 "only constant post-increment update allowed for VLD3/4");
2214 (void)Inc;
2215 Ops.push_back(Elt: Reg0);
2216 }
2217 Ops.push_back(Elt: SDValue(VLdA, 0));
2218 Ops.push_back(Elt: Pred);
2219 Ops.push_back(Elt: Reg0);
2220 Ops.push_back(Elt: Chain);
2221 VLd = CurDAG->getMachineNode(Opcode: QOpcodes1[OpcodeIndex], dl, ResultTys: ResTys, Ops);
2222 }
2223
2224 // Transfer memoperands.
2225 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2226 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLd), NewMemRefs: {MemOp});
2227
2228 if (NumVecs == 1) {
2229 ReplaceNode(F: N, T: VLd);
2230 return;
2231 }
2232
2233 // Extract out the subregisters.
2234 SDValue SuperReg = SDValue(VLd, 0);
2235 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2236 ARM::qsub_3 == ARM::qsub_0 + 3,
2237 "Unexpected subreg numbering");
2238 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2239 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2240 ReplaceUses(F: SDValue(N, Vec),
2241 T: CurDAG->getTargetExtractSubreg(SRIdx: Sub0 + Vec, DL: dl, VT, Operand: SuperReg));
2242 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(VLd, 1));
2243 if (isUpdating)
2244 ReplaceUses(F: SDValue(N, NumVecs + 1), T: SDValue(VLd, 2));
2245 CurDAG->RemoveDeadNode(N);
2246}
2247
2248void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2249 const uint16_t *DOpcodes,
2250 const uint16_t *QOpcodes0,
2251 const uint16_t *QOpcodes1) {
2252 assert(Subtarget->hasNEON());
2253 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2254 SDLoc dl(N);
2255
2256 SDValue MemAddr, Align;
2257 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2258 // nodes are not intrinsics.
2259 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2260 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2261 if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2262 return;
2263
2264 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2265
2266 SDValue Chain = N->getOperand(Num: 0);
2267 EVT VT = N->getOperand(Num: Vec0Idx).getValueType();
2268 bool is64BitVector = VT.is64BitVector();
2269 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2270
2271 unsigned OpcodeIndex;
2272 switch (VT.getSimpleVT().SimpleTy) {
2273 default: llvm_unreachable("unhandled vst type");
2274 // Double-register operations:
2275 case MVT::v8i8: OpcodeIndex = 0; break;
2276 case MVT::v4f16:
2277 case MVT::v4bf16:
2278 case MVT::v4i16: OpcodeIndex = 1; break;
2279 case MVT::v2f32:
2280 case MVT::v2i32: OpcodeIndex = 2; break;
2281 case MVT::v1i64: OpcodeIndex = 3; break;
2282 // Quad-register operations:
2283 case MVT::v16i8: OpcodeIndex = 0; break;
2284 case MVT::v8f16:
2285 case MVT::v8bf16:
2286 case MVT::v8i16: OpcodeIndex = 1; break;
2287 case MVT::v4f32:
2288 case MVT::v4i32: OpcodeIndex = 2; break;
2289 case MVT::v2f64:
2290 case MVT::v2i64: OpcodeIndex = 3; break;
2291 }
2292
2293 std::vector<EVT> ResTys;
2294 if (isUpdating)
2295 ResTys.push_back(x: MVT::i32);
2296 ResTys.push_back(x: MVT::Other);
2297
2298 SDValue Pred = getAL(CurDAG, dl);
2299 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
2300 SmallVector<SDValue, 7> Ops;
2301
2302 // Double registers and VST1/VST2 quad registers are directly supported.
2303 if (is64BitVector || NumVecs <= 2) {
2304 SDValue SrcReg;
2305 if (NumVecs == 1) {
2306 SrcReg = N->getOperand(Num: Vec0Idx);
2307 } else if (is64BitVector) {
2308 // Form a REG_SEQUENCE to force register allocation.
2309 SDValue V0 = N->getOperand(Num: Vec0Idx + 0);
2310 SDValue V1 = N->getOperand(Num: Vec0Idx + 1);
2311 if (NumVecs == 2)
2312 SrcReg = SDValue(createDRegPairNode(VT: MVT::v2i64, V0, V1), 0);
2313 else {
2314 SDValue V2 = N->getOperand(Num: Vec0Idx + 2);
2315 // If it's a vst3, form a quad D-register and leave the last part as
2316 // an undef.
2317 SDValue V3 = (NumVecs == 3)
2318 ? SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2319 : N->getOperand(Num: Vec0Idx + 3);
2320 SrcReg = SDValue(createQuadDRegsNode(VT: MVT::v4i64, V0, V1, V2, V3), 0);
2321 }
2322 } else {
2323 // Form a QQ register.
2324 SDValue Q0 = N->getOperand(Num: Vec0Idx);
2325 SDValue Q1 = N->getOperand(Num: Vec0Idx + 1);
2326 SrcReg = SDValue(createQRegPairNode(VT: MVT::v4i64, V0: Q0, V1: Q1), 0);
2327 }
2328
2329 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2330 QOpcodes0[OpcodeIndex]);
2331 Ops.push_back(Elt: MemAddr);
2332 Ops.push_back(Elt: Align);
2333 if (isUpdating) {
2334 SDValue Inc = N->getOperand(Num: AddrOpIdx + 1);
2335 bool IsImmUpdate = isPerfectIncrement(Inc, VecTy: VT, NumVecs);
2336 if (!IsImmUpdate) {
2337 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2338 // check for the opcode rather than the number of vector elements.
2339 if (isVSTfixed(Opc))
2340 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2341 Ops.push_back(Elt: Inc);
2342 }
2343 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2344 // the operands if not such an opcode.
2345 else if (!isVSTfixed(Opc))
2346 Ops.push_back(Elt: Reg0);
2347 }
2348 Ops.push_back(Elt: SrcReg);
2349 Ops.push_back(Elt: Pred);
2350 Ops.push_back(Elt: Reg0);
2351 Ops.push_back(Elt: Chain);
2352 SDNode *VSt = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2353
2354 // Transfer memoperands.
2355 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VSt), NewMemRefs: {MemOp});
2356
2357 ReplaceNode(F: N, T: VSt);
2358 return;
2359 }
2360
2361 // Otherwise, quad registers are stored with two separate instructions,
2362 // where one stores the even registers and the other stores the odd registers.
2363
2364 // Form the QQQQ REG_SEQUENCE.
2365 SDValue V0 = N->getOperand(Num: Vec0Idx + 0);
2366 SDValue V1 = N->getOperand(Num: Vec0Idx + 1);
2367 SDValue V2 = N->getOperand(Num: Vec0Idx + 2);
2368 SDValue V3 = (NumVecs == 3)
2369 ? SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2370 : N->getOperand(Num: Vec0Idx + 3);
2371 SDValue RegSeq = SDValue(createQuadQRegsNode(VT: MVT::v8i64, V0, V1, V2, V3), 0);
2372
2373 // Store the even D registers. This is always an updating store, so that it
2374 // provides the address to the second store for the odd subregs.
2375 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2376 SDNode *VStA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl,
2377 VT1: MemAddr.getValueType(),
2378 VT2: MVT::Other, Ops: OpsA);
2379 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VStA), NewMemRefs: {MemOp});
2380 Chain = SDValue(VStA, 1);
2381
2382 // Store the odd D registers.
2383 Ops.push_back(Elt: SDValue(VStA, 0));
2384 Ops.push_back(Elt: Align);
2385 if (isUpdating) {
2386 SDValue Inc = N->getOperand(Num: AddrOpIdx + 1);
2387 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2388 "only constant post-increment update allowed for VST3/4");
2389 (void)Inc;
2390 Ops.push_back(Elt: Reg0);
2391 }
2392 Ops.push_back(Elt: RegSeq);
2393 Ops.push_back(Elt: Pred);
2394 Ops.push_back(Elt: Reg0);
2395 Ops.push_back(Elt: Chain);
2396 SDNode *VStB = CurDAG->getMachineNode(Opcode: QOpcodes1[OpcodeIndex], dl, ResultTys: ResTys,
2397 Ops);
2398 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VStB), NewMemRefs: {MemOp});
2399 ReplaceNode(F: N, T: VStB);
2400}
2401
2402void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2403 unsigned NumVecs,
2404 const uint16_t *DOpcodes,
2405 const uint16_t *QOpcodes) {
2406 assert(Subtarget->hasNEON());
2407 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2408 SDLoc dl(N);
2409
2410 SDValue MemAddr, Align;
2411 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2412 // nodes are not intrinsics.
2413 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2414 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2415 if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2416 return;
2417
2418 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2419
2420 SDValue Chain = N->getOperand(Num: 0);
2421 unsigned Lane = N->getConstantOperandVal(Num: Vec0Idx + NumVecs);
2422 EVT VT = N->getOperand(Num: Vec0Idx).getValueType();
2423 bool is64BitVector = VT.is64BitVector();
2424
2425 unsigned Alignment = 0;
2426 if (NumVecs != 3) {
2427 Alignment = Align->getAsZExtVal();
2428 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2429 if (Alignment > NumBytes)
2430 Alignment = NumBytes;
2431 if (Alignment < 8 && Alignment < NumBytes)
2432 Alignment = 0;
2433 // Alignment must be a power of two; make sure of that.
2434 Alignment = (Alignment & -Alignment);
2435 if (Alignment == 1)
2436 Alignment = 0;
2437 }
2438 Align = CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
2439
2440 unsigned OpcodeIndex;
2441 switch (VT.getSimpleVT().SimpleTy) {
2442 default: llvm_unreachable("unhandled vld/vst lane type");
2443 // Double-register operations:
2444 case MVT::v8i8: OpcodeIndex = 0; break;
2445 case MVT::v4f16:
2446 case MVT::v4bf16:
2447 case MVT::v4i16: OpcodeIndex = 1; break;
2448 case MVT::v2f32:
2449 case MVT::v2i32: OpcodeIndex = 2; break;
2450 // Quad-register operations:
2451 case MVT::v8f16:
2452 case MVT::v8bf16:
2453 case MVT::v8i16: OpcodeIndex = 0; break;
2454 case MVT::v4f32:
2455 case MVT::v4i32: OpcodeIndex = 1; break;
2456 }
2457
2458 std::vector<EVT> ResTys;
2459 if (IsLoad) {
2460 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2461 if (!is64BitVector)
2462 ResTyElts *= 2;
2463 ResTys.push_back(x: EVT::getVectorVT(Context&: *CurDAG->getContext(),
2464 VT: MVT::i64, NumElements: ResTyElts));
2465 }
2466 if (isUpdating)
2467 ResTys.push_back(x: MVT::i32);
2468 ResTys.push_back(x: MVT::Other);
2469
2470 SDValue Pred = getAL(CurDAG, dl);
2471 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
2472
2473 SmallVector<SDValue, 8> Ops;
2474 Ops.push_back(Elt: MemAddr);
2475 Ops.push_back(Elt: Align);
2476 if (isUpdating) {
2477 SDValue Inc = N->getOperand(Num: AddrOpIdx + 1);
2478 bool IsImmUpdate =
2479 isPerfectIncrement(Inc, VecTy: VT.getVectorElementType(), NumVecs);
2480 Ops.push_back(Elt: IsImmUpdate ? Reg0 : Inc);
2481 }
2482
2483 SDValue SuperReg;
2484 SDValue V0 = N->getOperand(Num: Vec0Idx + 0);
2485 SDValue V1 = N->getOperand(Num: Vec0Idx + 1);
2486 if (NumVecs == 2) {
2487 if (is64BitVector)
2488 SuperReg = SDValue(createDRegPairNode(VT: MVT::v2i64, V0, V1), 0);
2489 else
2490 SuperReg = SDValue(createQRegPairNode(VT: MVT::v4i64, V0, V1), 0);
2491 } else {
2492 SDValue V2 = N->getOperand(Num: Vec0Idx + 2);
2493 SDValue V3 = (NumVecs == 3)
2494 ? SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2495 : N->getOperand(Num: Vec0Idx + 3);
2496 if (is64BitVector)
2497 SuperReg = SDValue(createQuadDRegsNode(VT: MVT::v4i64, V0, V1, V2, V3), 0);
2498 else
2499 SuperReg = SDValue(createQuadQRegsNode(VT: MVT::v8i64, V0, V1, V2, V3), 0);
2500 }
2501 Ops.push_back(Elt: SuperReg);
2502 Ops.push_back(Elt: getI32Imm(Imm: Lane, dl));
2503 Ops.push_back(Elt: Pred);
2504 Ops.push_back(Elt: Reg0);
2505 Ops.push_back(Elt: Chain);
2506
2507 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2508 QOpcodes[OpcodeIndex]);
2509 SDNode *VLdLn = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2510 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLdLn), NewMemRefs: {MemOp});
2511 if (!IsLoad) {
2512 ReplaceNode(F: N, T: VLdLn);
2513 return;
2514 }
2515
2516 // Extract the subregisters.
2517 SuperReg = SDValue(VLdLn, 0);
2518 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2519 ARM::qsub_3 == ARM::qsub_0 + 3,
2520 "Unexpected subreg numbering");
2521 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2522 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2523 ReplaceUses(F: SDValue(N, Vec),
2524 T: CurDAG->getTargetExtractSubreg(SRIdx: Sub0 + Vec, DL: dl, VT, Operand: SuperReg));
2525 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(VLdLn, 1));
2526 if (isUpdating)
2527 ReplaceUses(F: SDValue(N, NumVecs + 1), T: SDValue(VLdLn, 2));
2528 CurDAG->RemoveDeadNode(N);
2529}
2530
2531template <typename SDValueVector>
2532void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2533 SDValue PredicateMask) {
2534 Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::Then, DL: Loc, VT: MVT::i32));
2535 Ops.push_back(PredicateMask);
2536 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32)); // tp_reg
2537}
2538
2539template <typename SDValueVector>
2540void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2541 SDValue PredicateMask,
2542 SDValue Inactive) {
2543 Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::Then, DL: Loc, VT: MVT::i32));
2544 Ops.push_back(PredicateMask);
2545 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32)); // tp_reg
2546 Ops.push_back(Inactive);
2547}
2548
2549template <typename SDValueVector>
2550void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2551 Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::None, DL: Loc, VT: MVT::i32));
2552 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32));
2553 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32)); // tp_reg
2554}
2555
2556template <typename SDValueVector>
2557void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2558 EVT InactiveTy) {
2559 Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::None, DL: Loc, VT: MVT::i32));
2560 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32));
2561 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32)); // tp_reg
2562 Ops.push_back(SDValue(
2563 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: Loc, VT: InactiveTy), 0));
2564}
2565
2566void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2567 bool Predicated) {
2568 SDLoc Loc(N);
2569 SmallVector<SDValue, 8> Ops;
2570
2571 uint16_t Opcode;
2572 switch (N->getValueType(ResNo: 1).getVectorElementType().getSizeInBits()) {
2573 case 32:
2574 Opcode = Opcodes[0];
2575 break;
2576 case 64:
2577 Opcode = Opcodes[1];
2578 break;
2579 default:
2580 llvm_unreachable("bad vector element size in SelectMVE_WB");
2581 }
2582
2583 Ops.push_back(Elt: N->getOperand(Num: 2)); // vector of base addresses
2584
2585 int32_t ImmValue = N->getConstantOperandVal(Num: 3);
2586 Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate offset
2587
2588 if (Predicated)
2589 AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: 4));
2590 else
2591 AddEmptyMVEPredicateToOps(Ops, Loc);
2592
2593 Ops.push_back(Elt: N->getOperand(Num: 0)); // chain
2594
2595 SmallVector<EVT, 8> VTs;
2596 VTs.push_back(Elt: N->getValueType(ResNo: 1));
2597 VTs.push_back(Elt: N->getValueType(ResNo: 0));
2598 VTs.push_back(Elt: N->getValueType(ResNo: 2));
2599
2600 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(N), ResultTys: VTs, Ops);
2601 ReplaceUses(F: SDValue(N, 0), T: SDValue(New, 1));
2602 ReplaceUses(F: SDValue(N, 1), T: SDValue(New, 0));
2603 ReplaceUses(F: SDValue(N, 2), T: SDValue(New, 2));
2604 transferMemOperands(N, Result: New);
2605 CurDAG->RemoveDeadNode(N);
2606}
2607
2608void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2609 bool Immediate,
2610 bool HasSaturationOperand) {
2611 SDLoc Loc(N);
2612 SmallVector<SDValue, 8> Ops;
2613
2614 // Two 32-bit halves of the value to be shifted
2615 Ops.push_back(Elt: N->getOperand(Num: 1));
2616 Ops.push_back(Elt: N->getOperand(Num: 2));
2617
2618 // The shift count
2619 if (Immediate) {
2620 int32_t ImmValue = N->getConstantOperandVal(Num: 3);
2621 Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate shift count
2622 } else {
2623 Ops.push_back(Elt: N->getOperand(Num: 3));
2624 }
2625
2626 // The immediate saturation operand, if any
2627 if (HasSaturationOperand) {
2628 int32_t SatOp = N->getConstantOperandVal(Num: 4);
2629 int SatBit = (SatOp == 64 ? 0 : 1);
2630 Ops.push_back(Elt: getI32Imm(Imm: SatBit, dl: Loc));
2631 }
2632
2633 // MVE scalar shifts are IT-predicable, so include the standard
2634 // predicate arguments.
2635 Ops.push_back(Elt: getAL(CurDAG, dl: Loc));
2636 Ops.push_back(Elt: CurDAG->getRegister(Reg: 0, VT: MVT::i32));
2637
2638 CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2639}
2640
2641void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2642 uint16_t OpcodeWithNoCarry,
2643 bool Add, bool Predicated) {
2644 SDLoc Loc(N);
2645 SmallVector<SDValue, 8> Ops;
2646 uint16_t Opcode;
2647
2648 unsigned FirstInputOp = Predicated ? 2 : 1;
2649
2650 // Two input vectors and the input carry flag
2651 Ops.push_back(Elt: N->getOperand(Num: FirstInputOp));
2652 Ops.push_back(Elt: N->getOperand(Num: FirstInputOp + 1));
2653 SDValue CarryIn = N->getOperand(Num: FirstInputOp + 2);
2654 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(Val&: CarryIn);
2655 uint32_t CarryMask = 1 << 29;
2656 uint32_t CarryExpected = Add ? 0 : CarryMask;
2657 if (CarryInConstant &&
2658 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2659 Opcode = OpcodeWithNoCarry;
2660 } else {
2661 Ops.push_back(Elt: CarryIn);
2662 Opcode = OpcodeWithCarry;
2663 }
2664
2665 if (Predicated)
2666 AddMVEPredicateToOps(Ops, Loc,
2667 PredicateMask: N->getOperand(Num: FirstInputOp + 3), // predicate
2668 Inactive: N->getOperand(Num: FirstInputOp - 1)); // inactive
2669 else
2670 AddEmptyMVEPredicateToOps(Ops, Loc, InactiveTy: N->getValueType(ResNo: 0));
2671
2672 CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2673}
2674
2675void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2676 SDLoc Loc(N);
2677 SmallVector<SDValue, 8> Ops;
2678
2679 // One vector input, followed by a 32-bit word of bits to shift in
2680 // and then an immediate shift count
2681 Ops.push_back(Elt: N->getOperand(Num: 1));
2682 Ops.push_back(Elt: N->getOperand(Num: 2));
2683 int32_t ImmValue = N->getConstantOperandVal(Num: 3);
2684 Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate shift count
2685
2686 if (Predicated)
2687 AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: 4));
2688 else
2689 AddEmptyMVEPredicateToOps(Ops, Loc);
2690
2691 CurDAG->SelectNodeTo(N, MachineOpc: ARM::MVE_VSHLC, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2692}
2693
2694static bool SDValueToConstBool(SDValue SDVal) {
2695 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2696 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(Val&: SDVal);
2697 uint64_t Value = SDValConstant->getZExtValue();
2698 assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2699 return Value;
2700}
2701
2702void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2703 const uint16_t *OpcodesS,
2704 const uint16_t *OpcodesU,
2705 size_t Stride, size_t TySize) {
2706 assert(TySize < Stride && "Invalid TySize");
2707 bool IsUnsigned = SDValueToConstBool(SDVal: N->getOperand(Num: 1));
2708 bool IsSub = SDValueToConstBool(SDVal: N->getOperand(Num: 2));
2709 bool IsExchange = SDValueToConstBool(SDVal: N->getOperand(Num: 3));
2710 if (IsUnsigned) {
2711 assert(!IsSub &&
2712 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2713 assert(!IsExchange &&
2714 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2715 }
2716
2717 auto OpIsZero = [N](size_t OpNo) {
2718 return isNullConstant(V: N->getOperand(Num: OpNo));
2719 };
2720
2721 // If the input accumulator value is not zero, select an instruction with
2722 // accumulator, otherwise select an instruction without accumulator
2723 bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2724
2725 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2726 if (IsSub)
2727 Opcodes += 4 * Stride;
2728 if (IsExchange)
2729 Opcodes += 2 * Stride;
2730 if (IsAccum)
2731 Opcodes += Stride;
2732 uint16_t Opcode = Opcodes[TySize];
2733
2734 SDLoc Loc(N);
2735 SmallVector<SDValue, 8> Ops;
2736 // Push the accumulator operands, if they are used
2737 if (IsAccum) {
2738 Ops.push_back(Elt: N->getOperand(Num: 4));
2739 Ops.push_back(Elt: N->getOperand(Num: 5));
2740 }
2741 // Push the two vector operands
2742 Ops.push_back(Elt: N->getOperand(Num: 6));
2743 Ops.push_back(Elt: N->getOperand(Num: 7));
2744
2745 if (Predicated)
2746 AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: 8));
2747 else
2748 AddEmptyMVEPredicateToOps(Ops, Loc);
2749
2750 CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2751}
2752
2753void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2754 const uint16_t *OpcodesS,
2755 const uint16_t *OpcodesU) {
2756 EVT VecTy = N->getOperand(Num: 6).getValueType();
2757 size_t SizeIndex;
2758 switch (VecTy.getVectorElementType().getSizeInBits()) {
2759 case 16:
2760 SizeIndex = 0;
2761 break;
2762 case 32:
2763 SizeIndex = 1;
2764 break;
2765 default:
2766 llvm_unreachable("bad vector element size");
2767 }
2768
2769 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, Stride: 2, TySize: SizeIndex);
2770}
2771
2772void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2773 const uint16_t *OpcodesS,
2774 const uint16_t *OpcodesU) {
2775 assert(
2776 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2777 32 &&
2778 "bad vector element size");
2779 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, Stride: 1, TySize: 0);
2780}
2781
2782void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2783 const uint16_t *const *Opcodes,
2784 bool HasWriteback) {
2785 EVT VT = N->getValueType(ResNo: 0);
2786 SDLoc Loc(N);
2787
2788 const uint16_t *OurOpcodes;
2789 switch (VT.getVectorElementType().getSizeInBits()) {
2790 case 8:
2791 OurOpcodes = Opcodes[0];
2792 break;
2793 case 16:
2794 OurOpcodes = Opcodes[1];
2795 break;
2796 case 32:
2797 OurOpcodes = Opcodes[2];
2798 break;
2799 default:
2800 llvm_unreachable("bad vector element size in SelectMVE_VLD");
2801 }
2802
2803 EVT DataTy = EVT::getVectorVT(Context&: *CurDAG->getContext(), VT: MVT::i64, NumElements: NumVecs * 2);
2804 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2805 unsigned PtrOperand = HasWriteback ? 1 : 2;
2806
2807 auto Data = SDValue(
2808 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: Loc, VT: DataTy), 0);
2809 SDValue Chain = N->getOperand(Num: 0);
2810 // Add a MVE_VLDn instruction for each Vec, except the last
2811 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2812 SDValue Ops[] = {Data, N->getOperand(Num: PtrOperand), Chain};
2813 auto LoadInst =
2814 CurDAG->getMachineNode(Opcode: OurOpcodes[Stage], dl: Loc, ResultTys, Ops);
2815 Data = SDValue(LoadInst, 0);
2816 Chain = SDValue(LoadInst, 1);
2817 transferMemOperands(N, Result: LoadInst);
2818 }
2819 // The last may need a writeback on it
2820 if (HasWriteback)
2821 ResultTys = {DataTy, MVT::i32, MVT::Other};
2822 SDValue Ops[] = {Data, N->getOperand(Num: PtrOperand), Chain};
2823 auto LoadInst =
2824 CurDAG->getMachineNode(Opcode: OurOpcodes[NumVecs - 1], dl: Loc, ResultTys, Ops);
2825 transferMemOperands(N, Result: LoadInst);
2826
2827 unsigned i;
2828 for (i = 0; i < NumVecs; i++)
2829 ReplaceUses(F: SDValue(N, i),
2830 T: CurDAG->getTargetExtractSubreg(SRIdx: ARM::qsub_0 + i, DL: Loc, VT,
2831 Operand: SDValue(LoadInst, 0)));
2832 if (HasWriteback)
2833 ReplaceUses(F: SDValue(N, i++), T: SDValue(LoadInst, 1));
2834 ReplaceUses(F: SDValue(N, i), T: SDValue(LoadInst, HasWriteback ? 2 : 1));
2835 CurDAG->RemoveDeadNode(N);
2836}
2837
2838void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2839 bool Wrapping, bool Predicated) {
2840 EVT VT = N->getValueType(ResNo: 0);
2841 SDLoc Loc(N);
2842
2843 uint16_t Opcode;
2844 switch (VT.getScalarSizeInBits()) {
2845 case 8:
2846 Opcode = Opcodes[0];
2847 break;
2848 case 16:
2849 Opcode = Opcodes[1];
2850 break;
2851 case 32:
2852 Opcode = Opcodes[2];
2853 break;
2854 default:
2855 llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2856 }
2857
2858 SmallVector<SDValue, 8> Ops;
2859 unsigned OpIdx = 1;
2860
2861 SDValue Inactive;
2862 if (Predicated)
2863 Inactive = N->getOperand(Num: OpIdx++);
2864
2865 Ops.push_back(Elt: N->getOperand(Num: OpIdx++)); // base
2866 if (Wrapping)
2867 Ops.push_back(Elt: N->getOperand(Num: OpIdx++)); // limit
2868
2869 SDValue ImmOp = N->getOperand(Num: OpIdx++); // step
2870 int ImmValue = ImmOp->getAsZExtVal();
2871 Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc));
2872
2873 if (Predicated)
2874 AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: OpIdx), Inactive);
2875 else
2876 AddEmptyMVEPredicateToOps(Ops, Loc, InactiveTy: N->getValueType(ResNo: 0));
2877
2878 CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2879}
2880
2881void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2882 size_t NumExtraOps, bool HasAccum) {
2883 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2884 SDLoc Loc(N);
2885 SmallVector<SDValue, 8> Ops;
2886
2887 unsigned OpIdx = 1;
2888
2889 // Convert and append the immediate operand designating the coprocessor.
2890 SDValue ImmCorpoc = N->getOperand(Num: OpIdx++);
2891 uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal();
2892 Ops.push_back(Elt: getI32Imm(Imm: ImmCoprocVal, dl: Loc));
2893
2894 // For accumulating variants copy the low and high order parts of the
2895 // accumulator into a register pair and add it to the operand vector.
2896 if (HasAccum) {
2897 SDValue AccLo = N->getOperand(Num: OpIdx++);
2898 SDValue AccHi = N->getOperand(Num: OpIdx++);
2899 if (IsBigEndian)
2900 std::swap(a&: AccLo, b&: AccHi);
2901 Ops.push_back(Elt: SDValue(createGPRPairNode(VT: MVT::Untyped, V0: AccLo, V1: AccHi), 0));
2902 }
2903
2904 // Copy extra operands as-is.
2905 for (size_t I = 0; I < NumExtraOps; I++)
2906 Ops.push_back(Elt: N->getOperand(Num: OpIdx++));
2907
2908 // Convert and append the immediate operand
2909 SDValue Imm = N->getOperand(Num: OpIdx);
2910 uint32_t ImmVal = Imm->getAsZExtVal();
2911 Ops.push_back(Elt: getI32Imm(Imm: ImmVal, dl: Loc));
2912
2913 // Accumulating variants are IT-predicable, add predicate operands.
2914 if (HasAccum) {
2915 SDValue Pred = getAL(CurDAG, dl: Loc);
2916 SDValue PredReg = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
2917 Ops.push_back(Elt: Pred);
2918 Ops.push_back(Elt: PredReg);
2919 }
2920
2921 // Create the CDE intruction
2922 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, dl: Loc, VT: MVT::Untyped, Ops);
2923 SDValue ResultPair = SDValue(InstrNode, 0);
2924
2925 // The original intrinsic had two outputs, and the output of the dual-register
2926 // CDE instruction is a register pair. We need to extract the two subregisters
2927 // and replace all uses of the original outputs with the extracted
2928 // subregisters.
2929 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2930 if (IsBigEndian)
2931 std::swap(a&: SubRegs[0], b&: SubRegs[1]);
2932
2933 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2934 if (SDValue(N, ResIdx).use_empty())
2935 continue;
2936 SDValue SubReg = CurDAG->getTargetExtractSubreg(SRIdx: SubRegs[ResIdx], DL: Loc,
2937 VT: MVT::i32, Operand: ResultPair);
2938 ReplaceUses(F: SDValue(N, ResIdx), T: SubReg);
2939 }
2940
2941 CurDAG->RemoveDeadNode(N);
2942}
2943
2944void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2945 bool isUpdating, unsigned NumVecs,
2946 const uint16_t *DOpcodes,
2947 const uint16_t *QOpcodes0,
2948 const uint16_t *QOpcodes1) {
2949 assert(Subtarget->hasNEON());
2950 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2951 SDLoc dl(N);
2952
2953 SDValue MemAddr, Align;
2954 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2955 if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2956 return;
2957
2958 SDValue Chain = N->getOperand(Num: 0);
2959 EVT VT = N->getValueType(ResNo: 0);
2960 bool is64BitVector = VT.is64BitVector();
2961
2962 unsigned Alignment = 0;
2963 if (NumVecs != 3) {
2964 Alignment = Align->getAsZExtVal();
2965 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2966 if (Alignment > NumBytes)
2967 Alignment = NumBytes;
2968 if (Alignment < 8 && Alignment < NumBytes)
2969 Alignment = 0;
2970 // Alignment must be a power of two; make sure of that.
2971 Alignment = (Alignment & -Alignment);
2972 if (Alignment == 1)
2973 Alignment = 0;
2974 }
2975 Align = CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
2976
2977 unsigned OpcodeIndex;
2978 switch (VT.getSimpleVT().SimpleTy) {
2979 default: llvm_unreachable("unhandled vld-dup type");
2980 case MVT::v8i8:
2981 case MVT::v16i8: OpcodeIndex = 0; break;
2982 case MVT::v4i16:
2983 case MVT::v8i16:
2984 case MVT::v4f16:
2985 case MVT::v8f16:
2986 case MVT::v4bf16:
2987 case MVT::v8bf16:
2988 OpcodeIndex = 1; break;
2989 case MVT::v2f32:
2990 case MVT::v2i32:
2991 case MVT::v4f32:
2992 case MVT::v4i32: OpcodeIndex = 2; break;
2993 case MVT::v1f64:
2994 case MVT::v1i64: OpcodeIndex = 3; break;
2995 }
2996
2997 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2998 if (!is64BitVector)
2999 ResTyElts *= 2;
3000 EVT ResTy = EVT::getVectorVT(Context&: *CurDAG->getContext(), VT: MVT::i64, NumElements: ResTyElts);
3001
3002 std::vector<EVT> ResTys;
3003 ResTys.push_back(x: ResTy);
3004 if (isUpdating)
3005 ResTys.push_back(x: MVT::i32);
3006 ResTys.push_back(x: MVT::Other);
3007
3008 SDValue Pred = getAL(CurDAG, dl);
3009 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3010
3011 SmallVector<SDValue, 6> Ops;
3012 Ops.push_back(Elt: MemAddr);
3013 Ops.push_back(Elt: Align);
3014 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3015 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
3016 : QOpcodes1[OpcodeIndex];
3017 if (isUpdating) {
3018 SDValue Inc = N->getOperand(Num: 2);
3019 bool IsImmUpdate =
3020 isPerfectIncrement(Inc, VecTy: VT.getVectorElementType(), NumVecs);
3021 if (IsImmUpdate) {
3022 if (!isVLDfixed(Opc))
3023 Ops.push_back(Elt: Reg0);
3024 } else {
3025 if (isVLDfixed(Opc))
3026 Opc = getVLDSTRegisterUpdateOpcode(Opc);
3027 Ops.push_back(Elt: Inc);
3028 }
3029 }
3030 if (is64BitVector || NumVecs == 1) {
3031 // Double registers and VLD1 quad registers are directly supported.
3032 } else {
3033 SDValue ImplDef = SDValue(
3034 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: ResTy), 0);
3035 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3036 SDNode *VLdA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl, VT1: ResTy,
3037 VT2: MVT::Other, Ops: OpsA);
3038 Ops.push_back(Elt: SDValue(VLdA, 0));
3039 Chain = SDValue(VLdA, 1);
3040 }
3041
3042 Ops.push_back(Elt: Pred);
3043 Ops.push_back(Elt: Reg0);
3044 Ops.push_back(Elt: Chain);
3045
3046 SDNode *VLdDup = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
3047
3048 // Transfer memoperands.
3049 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
3050 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLdDup), NewMemRefs: {MemOp});
3051
3052 // Extract the subregisters.
3053 if (NumVecs == 1) {
3054 ReplaceUses(F: SDValue(N, 0), T: SDValue(VLdDup, 0));
3055 } else {
3056 SDValue SuperReg = SDValue(VLdDup, 0);
3057 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3058 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3059 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3060 ReplaceUses(F: SDValue(N, Vec),
3061 T: CurDAG->getTargetExtractSubreg(SRIdx: SubIdx+Vec, DL: dl, VT, Operand: SuperReg));
3062 }
3063 }
3064 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(VLdDup, 1));
3065 if (isUpdating)
3066 ReplaceUses(F: SDValue(N, NumVecs + 1), T: SDValue(VLdDup, 2));
3067 CurDAG->RemoveDeadNode(N);
3068}
3069
3070bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3071 if (!Subtarget->hasMVEIntegerOps())
3072 return false;
3073
3074 SDLoc dl(N);
3075
3076 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3077 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3078 // inserts of the correct type:
3079 SDValue Ins1 = SDValue(N, 0);
3080 SDValue Ins2 = N->getOperand(Num: 0);
3081 EVT VT = Ins1.getValueType();
3082 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3083 !isa<ConstantSDNode>(Val: Ins1.getOperand(i: 2)) ||
3084 !isa<ConstantSDNode>(Val: Ins2.getOperand(i: 2)) ||
3085 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3086 return false;
3087
3088 unsigned Lane1 = Ins1.getConstantOperandVal(i: 2);
3089 unsigned Lane2 = Ins2.getConstantOperandVal(i: 2);
3090 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3091 return false;
3092
3093 // If the inserted values will be able to use T/B already, leave it to the
3094 // existing tablegen patterns. For example VCVTT/VCVTB.
3095 SDValue Val1 = Ins1.getOperand(i: 1);
3096 SDValue Val2 = Ins2.getOperand(i: 1);
3097 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3098 return false;
3099
3100 // Check if the inserted values are both extracts.
3101 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3102 Val1.getOpcode() == ARMISD::VGETLANEu) &&
3103 (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3104 Val2.getOpcode() == ARMISD::VGETLANEu) &&
3105 isa<ConstantSDNode>(Val: Val1.getOperand(i: 1)) &&
3106 isa<ConstantSDNode>(Val: Val2.getOperand(i: 1)) &&
3107 (Val1.getOperand(i: 0).getValueType() == MVT::v8f16 ||
3108 Val1.getOperand(i: 0).getValueType() == MVT::v8i16) &&
3109 (Val2.getOperand(i: 0).getValueType() == MVT::v8f16 ||
3110 Val2.getOperand(i: 0).getValueType() == MVT::v8i16)) {
3111 unsigned ExtractLane1 = Val1.getConstantOperandVal(i: 1);
3112 unsigned ExtractLane2 = Val2.getConstantOperandVal(i: 1);
3113
3114 // If the two extracted lanes are from the same place and adjacent, this
3115 // simplifies into a f32 lane move.
3116 if (Val1.getOperand(i: 0) == Val2.getOperand(i: 0) && ExtractLane2 % 2 == 0 &&
3117 ExtractLane1 == ExtractLane2 + 1) {
3118 SDValue NewExt = CurDAG->getTargetExtractSubreg(
3119 SRIdx: ARM::ssub_0 + ExtractLane2 / 2, DL: dl, VT: MVT::f32, Operand: Val1.getOperand(i: 0));
3120 SDValue NewIns = CurDAG->getTargetInsertSubreg(
3121 SRIdx: ARM::ssub_0 + Lane2 / 2, DL: dl, VT, Operand: Ins2.getOperand(i: 0),
3122 Subreg: NewExt);
3123 ReplaceUses(F: Ins1, T: NewIns);
3124 return true;
3125 }
3126
3127 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3128 // extracting odd lanes.
3129 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3130 SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3131 SRIdx: ARM::ssub_0 + ExtractLane1 / 2, DL: dl, VT: MVT::f32, Operand: Val1.getOperand(i: 0));
3132 SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3133 SRIdx: ARM::ssub_0 + ExtractLane2 / 2, DL: dl, VT: MVT::f32, Operand: Val2.getOperand(i: 0));
3134 if (ExtractLane1 % 2 != 0)
3135 Inp1 = SDValue(CurDAG->getMachineNode(Opcode: ARM::VMOVH, dl, VT: MVT::f32, Op1: Inp1), 0);
3136 if (ExtractLane2 % 2 != 0)
3137 Inp2 = SDValue(CurDAG->getMachineNode(Opcode: ARM::VMOVH, dl, VT: MVT::f32, Op1: Inp2), 0);
3138 SDNode *VINS = CurDAG->getMachineNode(Opcode: ARM::VINSH, dl, VT: MVT::f32, Op1: Inp2, Op2: Inp1);
3139 SDValue NewIns =
3140 CurDAG->getTargetInsertSubreg(SRIdx: ARM::ssub_0 + Lane2 / 2, DL: dl, VT: MVT::v4f32,
3141 Operand: Ins2.getOperand(i: 0), Subreg: SDValue(VINS, 0));
3142 ReplaceUses(F: Ins1, T: NewIns);
3143 return true;
3144 }
3145 }
3146
3147 // The inserted values are not extracted - if they are f16 then insert them
3148 // directly using a VINS.
3149 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3150 SDNode *VINS = CurDAG->getMachineNode(Opcode: ARM::VINSH, dl, VT: MVT::f32, Op1: Val2, Op2: Val1);
3151 SDValue NewIns =
3152 CurDAG->getTargetInsertSubreg(SRIdx: ARM::ssub_0 + Lane2 / 2, DL: dl, VT: MVT::v4f32,
3153 Operand: Ins2.getOperand(i: 0), Subreg: SDValue(VINS, 0));
3154 ReplaceUses(F: Ins1, T: NewIns);
3155 return true;
3156 }
3157
3158 return false;
3159}
3160
3161bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3162 SDNode *FMul,
3163 bool IsUnsigned,
3164 bool FixedToFloat) {
3165 auto Type = N->getValueType(ResNo: 0);
3166 unsigned ScalarBits = Type.getScalarSizeInBits();
3167 if (ScalarBits > 32)
3168 return false;
3169
3170 SDNodeFlags FMulFlags = FMul->getFlags();
3171 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3172 // allowed in 16 bit unsigned floats
3173 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
3174 return false;
3175
3176 SDValue ImmNode = FMul->getOperand(Num: 1);
3177 SDValue VecVal = FMul->getOperand(Num: 0);
3178 if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
3179 VecVal->getOpcode() == ISD::SINT_TO_FP)
3180 VecVal = VecVal->getOperand(Num: 0);
3181
3182 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3183 return false;
3184
3185 if (ImmNode.getOpcode() == ISD::BITCAST) {
3186 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3187 return false;
3188 ImmNode = ImmNode.getOperand(i: 0);
3189 }
3190
3191 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3192 return false;
3193
3194 APFloat ImmAPF(0.0f);
3195 switch (ImmNode.getOpcode()) {
3196 case ARMISD::VMOVIMM:
3197 case ARMISD::VDUP: {
3198 if (!isa<ConstantSDNode>(Val: ImmNode.getOperand(i: 0)))
3199 return false;
3200 unsigned Imm = ImmNode.getConstantOperandVal(i: 0);
3201 if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3202 Imm = ARM_AM::decodeVMOVModImm(ModImm: Imm, EltBits&: ScalarBits);
3203 ImmAPF =
3204 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3205 APInt(ScalarBits, Imm));
3206 break;
3207 }
3208 case ARMISD::VMOVFPIMM: {
3209 ImmAPF = APFloat(ARM_AM::getFPImmFloat(Imm: ImmNode.getConstantOperandVal(i: 0)));
3210 break;
3211 }
3212 default:
3213 return false;
3214 }
3215
3216 // Where n is the number of fractional bits, multiplying by 2^n will convert
3217 // from float to fixed and multiplying by 2^-n will convert from fixed to
3218 // float. Taking log2 of the factor (after taking the inverse in the case of
3219 // float to fixed) will give n.
3220 APFloat ToConvert = ImmAPF;
3221 if (FixedToFloat) {
3222 if (!ImmAPF.getExactInverse(inv: &ToConvert))
3223 return false;
3224 }
3225 APSInt Converted(64, false);
3226 bool IsExact;
3227 ToConvert.convertToInteger(Result&: Converted, RM: llvm::RoundingMode::NearestTiesToEven,
3228 IsExact: &IsExact);
3229 if (!IsExact || !Converted.isPowerOf2())
3230 return false;
3231
3232 unsigned FracBits = Converted.logBase2();
3233 if (FracBits > ScalarBits)
3234 return false;
3235
3236 SmallVector<SDValue, 3> Ops{
3237 VecVal, CurDAG->getConstant(Val: FracBits, DL: SDLoc(N), VT: MVT::i32)};
3238 AddEmptyMVEPredicateToOps(Ops, Loc: SDLoc(N), InactiveTy: Type);
3239
3240 unsigned int Opcode;
3241 switch (ScalarBits) {
3242 case 16:
3243 if (FixedToFloat)
3244 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3245 else
3246 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3247 break;
3248 case 32:
3249 if (FixedToFloat)
3250 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3251 else
3252 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3253 break;
3254 default:
3255 llvm_unreachable("unexpected number of scalar bits");
3256 break;
3257 }
3258
3259 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: SDLoc(N), VT: Type, Ops));
3260 return true;
3261}
3262
3263bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3264 // Transform a floating-point to fixed-point conversion to a VCVT
3265 if (!Subtarget->hasMVEFloatOps())
3266 return false;
3267 EVT Type = N->getValueType(ResNo: 0);
3268 if (!Type.isVector())
3269 return false;
3270 unsigned int ScalarBits = Type.getScalarSizeInBits();
3271
3272 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
3273 N->getOpcode() == ISD::FP_TO_UINT_SAT;
3274 SDNode *Node = N->getOperand(Num: 0).getNode();
3275
3276 // floating-point to fixed-point with one fractional bit gets turned into an
3277 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3278 if (Node->getOpcode() == ISD::FADD) {
3279 if (Node->getOperand(Num: 0) != Node->getOperand(Num: 1))
3280 return false;
3281 SDNodeFlags Flags = Node->getFlags();
3282 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3283 // allowed in 16 bit unsigned floats
3284 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
3285 return false;
3286
3287 unsigned Opcode;
3288 switch (ScalarBits) {
3289 case 16:
3290 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3291 break;
3292 case 32:
3293 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3294 break;
3295 }
3296 SmallVector<SDValue, 3> Ops{Node->getOperand(Num: 0),
3297 CurDAG->getConstant(Val: 1, DL: dl, VT: MVT::i32)};
3298 AddEmptyMVEPredicateToOps(Ops, Loc: dl, InactiveTy: Type);
3299
3300 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl, VT: Type, Ops));
3301 return true;
3302 }
3303
3304 if (Node->getOpcode() != ISD::FMUL)
3305 return false;
3306
3307 return transformFixedFloatingPointConversion(N, FMul: Node, IsUnsigned, FixedToFloat: false);
3308}
3309
3310bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3311 // Transform a fixed-point to floating-point conversion to a VCVT
3312 if (!Subtarget->hasMVEFloatOps())
3313 return false;
3314 auto Type = N->getValueType(ResNo: 0);
3315 if (!Type.isVector())
3316 return false;
3317
3318 auto LHS = N->getOperand(Num: 0);
3319 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3320 return false;
3321
3322 return transformFixedFloatingPointConversion(
3323 N, FMul: N, IsUnsigned: LHS.getOpcode() == ISD::UINT_TO_FP, FixedToFloat: true);
3324}
3325
3326bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3327 if (!Subtarget->hasV6T2Ops())
3328 return false;
3329
3330 unsigned Opc = isSigned
3331 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3332 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3333 SDLoc dl(N);
3334
3335 // For unsigned extracts, check for a shift right and mask
3336 unsigned And_imm = 0;
3337 if (N->getOpcode() == ISD::AND) {
3338 if (isOpcWithIntImmediate(N, Opc: ISD::AND, Imm&: And_imm)) {
3339
3340 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3341 if (And_imm & (And_imm + 1))
3342 return false;
3343
3344 unsigned Srl_imm = 0;
3345 if (isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::SRL,
3346 Imm&: Srl_imm)) {
3347 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3348
3349 // Mask off the unnecessary bits of the AND immediate; normally
3350 // DAGCombine will do this, but that might not happen if
3351 // targetShrinkDemandedConstant chooses a different immediate.
3352 And_imm &= -1U >> Srl_imm;
3353
3354 // Note: The width operand is encoded as width-1.
3355 unsigned Width = llvm::countr_one(Value: And_imm) - 1;
3356 unsigned LSB = Srl_imm;
3357
3358 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3359
3360 if ((LSB + Width + 1) == N->getValueType(ResNo: 0).getSizeInBits()) {
3361 // It's cheaper to use a right shift to extract the top bits.
3362 if (Subtarget->isThumb()) {
3363 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3364 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0),
3365 CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3366 getAL(CurDAG, dl), Reg0, Reg0 };
3367 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3368 return true;
3369 }
3370
3371 // ARM models shift instructions as MOVsi with shifter operand.
3372 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: ISD::SRL);
3373 SDValue ShOpc =
3374 CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: LSB), DL: dl,
3375 VT: MVT::i32);
3376 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0), ShOpc,
3377 getAL(CurDAG, dl), Reg0, Reg0 };
3378 CurDAG->SelectNodeTo(N, MachineOpc: ARM::MOVsi, VT: MVT::i32, Ops);
3379 return true;
3380 }
3381
3382 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3383 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0),
3384 CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3385 CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3386 getAL(CurDAG, dl), Reg0 };
3387 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3388 return true;
3389 }
3390 }
3391 return false;
3392 }
3393
3394 // Otherwise, we're looking for a shift of a shift
3395 unsigned Shl_imm = 0;
3396 if (isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::SHL, Imm&: Shl_imm)) {
3397 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3398 unsigned Srl_imm = 0;
3399 if (isInt32Immediate(N: N->getOperand(Num: 1), Imm&: Srl_imm)) {
3400 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3401 // Note: The width operand is encoded as width-1.
3402 unsigned Width = 32 - Srl_imm - 1;
3403 int LSB = Srl_imm - Shl_imm;
3404 if (LSB < 0)
3405 return false;
3406 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3407 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3408 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0),
3409 CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3410 CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3411 getAL(CurDAG, dl), Reg0 };
3412 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3413 return true;
3414 }
3415 }
3416
3417 // Or we are looking for a shift of an and, with a mask operand
3418 if (isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::AND, Imm&: And_imm) &&
3419 isShiftedMask_32(Value: And_imm)) {
3420 unsigned Srl_imm = 0;
3421 unsigned LSB = llvm::countr_zero(Val: And_imm);
3422 // Shift must be the same as the ands lsb
3423 if (isInt32Immediate(N: N->getOperand(Num: 1), Imm&: Srl_imm) && Srl_imm == LSB) {
3424 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3425 unsigned MSB = llvm::Log2_32(Value: And_imm);
3426 // Note: The width operand is encoded as width-1.
3427 unsigned Width = MSB - LSB;
3428 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3429 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3430 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0),
3431 CurDAG->getTargetConstant(Val: Srl_imm, DL: dl, VT: MVT::i32),
3432 CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3433 getAL(CurDAG, dl), Reg0 };
3434 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3435 return true;
3436 }
3437 }
3438
3439 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3440 unsigned Width = cast<VTSDNode>(Val: N->getOperand(Num: 1))->getVT().getSizeInBits();
3441 unsigned LSB = 0;
3442 if (!isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::SRL, Imm&: LSB) &&
3443 !isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::SRA, Imm&: LSB))
3444 return false;
3445
3446 if (LSB + Width > 32)
3447 return false;
3448
3449 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3450 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3451 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0),
3452 CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3453 CurDAG->getTargetConstant(Val: Width - 1, DL: dl, VT: MVT::i32),
3454 getAL(CurDAG, dl), Reg0 };
3455 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3456 return true;
3457 }
3458
3459 return false;
3460}
3461
3462/// Target-specific DAG combining for ISD::SUB.
3463/// Target-independent combining lowers SELECT_CC nodes of the form
3464/// select_cc setg[ge] X, 0, X, -X
3465/// select_cc setgt X, -1, X, -X
3466/// select_cc setl[te] X, 0, -X, X
3467/// select_cc setlt X, 1, -X, X
3468/// which represent Integer ABS into:
3469/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
3470/// ARM instruction selection detects the latter and matches it to
3471/// ARM::ABS or ARM::t2ABS machine node.
3472bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3473 SDValue SUBSrc0 = N->getOperand(Num: 0);
3474 SDValue SUBSrc1 = N->getOperand(Num: 1);
3475 EVT VT = N->getValueType(ResNo: 0);
3476
3477 if (Subtarget->isThumb1Only())
3478 return false;
3479
3480 if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
3481 return false;
3482
3483 SDValue XORSrc0 = SUBSrc0.getOperand(i: 0);
3484 SDValue XORSrc1 = SUBSrc0.getOperand(i: 1);
3485 SDValue SRASrc0 = SUBSrc1.getOperand(i: 0);
3486 SDValue SRASrc1 = SUBSrc1.getOperand(i: 1);
3487 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(Val&: SRASrc1);
3488 EVT XType = SRASrc0.getValueType();
3489 unsigned Size = XType.getSizeInBits() - 1;
3490
3491 if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
3492 SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
3493 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3494 CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VT, Op1: XORSrc0);
3495 return true;
3496 }
3497
3498 return false;
3499}
3500
3501/// We've got special pseudo-instructions for these
3502void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3503 unsigned Opcode;
3504 EVT MemTy = cast<MemSDNode>(Val: N)->getMemoryVT();
3505 if (MemTy == MVT::i8)
3506 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3507 else if (MemTy == MVT::i16)
3508 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3509 else if (MemTy == MVT::i32)
3510 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3511 else
3512 llvm_unreachable("Unknown AtomicCmpSwap type");
3513
3514 SDValue Ops[] = {N->getOperand(Num: 1), N->getOperand(Num: 2), N->getOperand(Num: 3),
3515 N->getOperand(Num: 0)};
3516 SDNode *CmpSwap = CurDAG->getMachineNode(
3517 Opcode, dl: SDLoc(N),
3518 VTs: CurDAG->getVTList(VT1: MVT::i32, VT2: MVT::i32, VT3: MVT::Other), Ops);
3519
3520 MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
3521 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
3522
3523 ReplaceUses(F: SDValue(N, 0), T: SDValue(CmpSwap, 0));
3524 ReplaceUses(F: SDValue(N, 1), T: SDValue(CmpSwap, 2));
3525 CurDAG->RemoveDeadNode(N);
3526}
3527
3528static std::optional<std::pair<unsigned, unsigned>>
3529getContiguousRangeOfSetBits(const APInt &A) {
3530 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1;
3531 unsigned LastOne = A.countr_zero();
3532 if (A.popcount() != (FirstOne - LastOne + 1))
3533 return std::nullopt;
3534 return std::make_pair(x&: FirstOne, y&: LastOne);
3535}
3536
3537void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3538 assert(N->getOpcode() == ARMISD::CMPZ);
3539 SwitchEQNEToPLMI = false;
3540
3541 if (!Subtarget->isThumb())
3542 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3543 // LSR don't exist as standalone instructions - they need the barrel shifter.
3544 return;
3545
3546 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3547 SDValue And = N->getOperand(Num: 0);
3548 if (!And->hasOneUse())
3549 return;
3550
3551 SDValue Zero = N->getOperand(Num: 1);
3552 if (!isNullConstant(V: Zero) || And->getOpcode() != ISD::AND)
3553 return;
3554 SDValue X = And.getOperand(i: 0);
3555 auto C = dyn_cast<ConstantSDNode>(Val: And.getOperand(i: 1));
3556
3557 if (!C)
3558 return;
3559 auto Range = getContiguousRangeOfSetBits(A: C->getAPIntValue());
3560 if (!Range)
3561 return;
3562
3563 // There are several ways to lower this:
3564 SDNode *NewN;
3565 SDLoc dl(N);
3566
3567 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3568 if (Subtarget->isThumb2()) {
3569 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3570 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32),
3571 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3572 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3573 return CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
3574 } else {
3575 SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), Src,
3576 CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32),
3577 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3578 return CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
3579 }
3580 };
3581
3582 if (Range->second == 0) {
3583 // 1. Mask includes the LSB -> Simply shift the top N bits off
3584 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3585 ReplaceNode(F: And.getNode(), T: NewN);
3586 } else if (Range->first == 31) {
3587 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
3588 NewN = EmitShift(ARM::tLSRri, X, Range->second);
3589 ReplaceNode(F: And.getNode(), T: NewN);
3590 } else if (Range->first == Range->second) {
3591 // 3. Only one bit is set. We can shift this into the sign bit and use a
3592 // PL/MI comparison. This is not safe if CMPZ has multiple uses because
3593 // only one of them (the one currently being selected) will be switched
3594 // to use the new condition code.
3595 if (!N->hasOneUse())
3596 return;
3597 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3598 ReplaceNode(F: And.getNode(), T: NewN);
3599
3600 SwitchEQNEToPLMI = true;
3601 } else if (!Subtarget->hasV6T2Ops()) {
3602 // 4. Do a double shift to clear bottom and top bits, but only in
3603 // thumb-1 mode as in thumb-2 we can use UBFX.
3604 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3605 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3606 Range->second + (31 - Range->first));
3607 ReplaceNode(F: And.getNode(), T: NewN);
3608 }
3609}
3610
3611static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3],
3612 unsigned Opc128[3]) {
3613 assert((VT.is64BitVector() || VT.is128BitVector()) &&
3614 "Unexpected vector shuffle length");
3615 switch (VT.getScalarSizeInBits()) {
3616 default:
3617 llvm_unreachable("Unexpected vector shuffle element size");
3618 case 8:
3619 return VT.is64BitVector() ? Opc64[0] : Opc128[0];
3620 case 16:
3621 return VT.is64BitVector() ? Opc64[1] : Opc128[1];
3622 case 32:
3623 return VT.is64BitVector() ? Opc64[2] : Opc128[2];
3624 }
3625}
3626
3627void ARMDAGToDAGISel::Select(SDNode *N) {
3628 SDLoc dl(N);
3629
3630 if (N->isMachineOpcode()) {
3631 N->setNodeId(-1);
3632 return; // Already selected.
3633 }
3634
3635 switch (N->getOpcode()) {
3636 default: break;
3637 case ISD::STORE: {
3638 // For Thumb1, match an sp-relative store in C++. This is a little
3639 // unfortunate, but I don't think I can make the chain check work
3640 // otherwise. (The chain of the store has to be the same as the chain
3641 // of the CopyFromReg, or else we can't replace the CopyFromReg with
3642 // a direct reference to "SP".)
3643 //
3644 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3645 // a different addressing mode from other four-byte stores.
3646 //
3647 // This pattern usually comes up with call arguments.
3648 StoreSDNode *ST = cast<StoreSDNode>(Val: N);
3649 SDValue Ptr = ST->getBasePtr();
3650 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3651 int RHSC = 0;
3652 if (Ptr.getOpcode() == ISD::ADD &&
3653 isScaledConstantInRange(Node: Ptr.getOperand(i: 1), /*Scale=*/4, RangeMin: 0, RangeMax: 256, ScaledConstant&: RHSC))
3654 Ptr = Ptr.getOperand(i: 0);
3655
3656 if (Ptr.getOpcode() == ISD::CopyFromReg &&
3657 cast<RegisterSDNode>(Val: Ptr.getOperand(i: 1))->getReg() == ARM::SP &&
3658 Ptr.getOperand(i: 0) == ST->getChain()) {
3659 SDValue Ops[] = {ST->getValue(),
3660 CurDAG->getRegister(Reg: ARM::SP, VT: MVT::i32),
3661 CurDAG->getTargetConstant(Val: RHSC, DL: dl, VT: MVT::i32),
3662 getAL(CurDAG, dl),
3663 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3664 ST->getChain()};
3665 MachineSDNode *ResNode =
3666 CurDAG->getMachineNode(Opcode: ARM::tSTRspi, dl, VT: MVT::Other, Ops);
3667 MachineMemOperand *MemOp = ST->getMemOperand();
3668 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: ResNode), NewMemRefs: {MemOp});
3669 ReplaceNode(F: N, T: ResNode);
3670 return;
3671 }
3672 }
3673 break;
3674 }
3675 case ISD::WRITE_REGISTER:
3676 if (tryWriteRegister(N))
3677 return;
3678 break;
3679 case ISD::READ_REGISTER:
3680 if (tryReadRegister(N))
3681 return;
3682 break;
3683 case ISD::INLINEASM:
3684 case ISD::INLINEASM_BR:
3685 if (tryInlineAsm(N))
3686 return;
3687 break;
3688 case ISD::SUB:
3689 // Select special operations if SUB node forms integer ABS pattern
3690 if (tryABSOp(N))
3691 return;
3692 // Other cases are autogenerated.
3693 break;
3694 case ISD::Constant: {
3695 unsigned Val = N->getAsZExtVal();
3696 // If we can't materialize the constant we need to use a literal pool
3697 if (ConstantMaterializationCost(Val, Subtarget) > 2 &&
3698 !Subtarget->genExecuteOnly()) {
3699 SDValue CPIdx = CurDAG->getTargetConstantPool(
3700 C: ConstantInt::get(Ty: Type::getInt32Ty(C&: *CurDAG->getContext()), V: Val),
3701 VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
3702
3703 SDNode *ResNode;
3704 if (Subtarget->isThumb()) {
3705 SDValue Ops[] = {
3706 CPIdx,
3707 getAL(CurDAG, dl),
3708 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3709 CurDAG->getEntryNode()
3710 };
3711 ResNode = CurDAG->getMachineNode(Opcode: ARM::tLDRpci, dl, VT1: MVT::i32, VT2: MVT::Other,
3712 Ops);
3713 } else {
3714 SDValue Ops[] = {
3715 CPIdx,
3716 CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i32),
3717 getAL(CurDAG, dl),
3718 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3719 CurDAG->getEntryNode()
3720 };
3721 ResNode = CurDAG->getMachineNode(Opcode: ARM::LDRcp, dl, VT1: MVT::i32, VT2: MVT::Other,
3722 Ops);
3723 }
3724 // Annotate the Node with memory operand information so that MachineInstr
3725 // queries work properly. This e.g. gives the register allocation the
3726 // required information for rematerialization.
3727 MachineFunction& MF = CurDAG->getMachineFunction();
3728 MachineMemOperand *MemOp =
3729 MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF),
3730 F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(4));
3731
3732 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: ResNode), NewMemRefs: {MemOp});
3733
3734 ReplaceNode(F: N, T: ResNode);
3735 return;
3736 }
3737
3738 // Other cases are autogenerated.
3739 break;
3740 }
3741 case ISD::FrameIndex: {
3742 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3743 int FI = cast<FrameIndexSDNode>(Val: N)->getIndex();
3744 SDValue TFI = CurDAG->getTargetFrameIndex(
3745 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
3746 if (Subtarget->isThumb1Only()) {
3747 // Set the alignment of the frame object to 4, to avoid having to generate
3748 // more than one ADD
3749 MachineFrameInfo &MFI = MF->getFrameInfo();
3750 if (MFI.getObjectAlign(ObjectIdx: FI) < Align(4))
3751 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align(4));
3752 CurDAG->SelectNodeTo(N, MachineOpc: ARM::tADDframe, VT: MVT::i32, Op1: TFI,
3753 Op2: CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i32));
3754 return;
3755 } else {
3756 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3757 ARM::t2ADDri : ARM::ADDri);
3758 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i32),
3759 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3760 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3761 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3762 return;
3763 }
3764 }
3765 case ISD::INSERT_VECTOR_ELT: {
3766 if (tryInsertVectorElt(N))
3767 return;
3768 break;
3769 }
3770 case ISD::SRL:
3771 if (tryV6T2BitfieldExtractOp(N, isSigned: false))
3772 return;
3773 break;
3774 case ISD::SIGN_EXTEND_INREG:
3775 case ISD::SRA:
3776 if (tryV6T2BitfieldExtractOp(N, isSigned: true))
3777 return;
3778 break;
3779 case ISD::FP_TO_UINT:
3780 case ISD::FP_TO_SINT:
3781 case ISD::FP_TO_UINT_SAT:
3782 case ISD::FP_TO_SINT_SAT:
3783 if (tryFP_TO_INT(N, dl))
3784 return;
3785 break;
3786 case ISD::FMUL:
3787 if (tryFMULFixed(N, dl))
3788 return;
3789 break;
3790 case ISD::MUL:
3791 if (Subtarget->isThumb1Only())
3792 break;
3793 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1))) {
3794 unsigned RHSV = C->getZExtValue();
3795 if (!RHSV) break;
3796 if (isPowerOf2_32(Value: RHSV-1)) { // 2^n+1?
3797 unsigned ShImm = Log2_32(Value: RHSV-1);
3798 if (ShImm >= 32)
3799 break;
3800 SDValue V = N->getOperand(Num: 0);
3801 ShImm = ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: ShImm);
3802 SDValue ShImmOp = CurDAG->getTargetConstant(Val: ShImm, DL: dl, VT: MVT::i32);
3803 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3804 if (Subtarget->isThumb()) {
3805 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3806 CurDAG->SelectNodeTo(N, MachineOpc: ARM::t2ADDrs, VT: MVT::i32, Ops);
3807 return;
3808 } else {
3809 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3810 Reg0 };
3811 CurDAG->SelectNodeTo(N, MachineOpc: ARM::ADDrsi, VT: MVT::i32, Ops);
3812 return;
3813 }
3814 }
3815 if (isPowerOf2_32(Value: RHSV+1)) { // 2^n-1?
3816 unsigned ShImm = Log2_32(Value: RHSV+1);
3817 if (ShImm >= 32)
3818 break;
3819 SDValue V = N->getOperand(Num: 0);
3820 ShImm = ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: ShImm);
3821 SDValue ShImmOp = CurDAG->getTargetConstant(Val: ShImm, DL: dl, VT: MVT::i32);
3822 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3823 if (Subtarget->isThumb()) {
3824 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3825 CurDAG->SelectNodeTo(N, MachineOpc: ARM::t2RSBrs, VT: MVT::i32, Ops);
3826 return;
3827 } else {
3828 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3829 Reg0 };
3830 CurDAG->SelectNodeTo(N, MachineOpc: ARM::RSBrsi, VT: MVT::i32, Ops);
3831 return;
3832 }
3833 }
3834 }
3835 break;
3836 case ISD::AND: {
3837 // Check for unsigned bitfield extract
3838 if (tryV6T2BitfieldExtractOp(N, isSigned: false))
3839 return;
3840
3841 // If an immediate is used in an AND node, it is possible that the immediate
3842 // can be more optimally materialized when negated. If this is the case we
3843 // can negate the immediate and use a BIC instead.
3844 auto *N1C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
3845 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3846 uint32_t Imm = (uint32_t) N1C->getZExtValue();
3847
3848 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3849 // immediate can be negated and fit in the immediate operand of
3850 // a t2BIC, don't do any manual transform here as this can be
3851 // handled by the generic ISel machinery.
3852 bool PreferImmediateEncoding =
3853 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3854 if (!PreferImmediateEncoding &&
3855 ConstantMaterializationCost(Val: Imm, Subtarget) >
3856 ConstantMaterializationCost(Val: ~Imm, Subtarget)) {
3857 // The current immediate costs more to materialize than a negated
3858 // immediate, so negate the immediate and use a BIC.
3859 SDValue NewImm = CurDAG->getConstant(Val: ~Imm, DL: dl, VT: MVT::i32);
3860 // If the new constant didn't exist before, reposition it in the topological
3861 // ordering so it is just before N. Otherwise, don't touch its location.
3862 if (NewImm->getNodeId() == -1)
3863 CurDAG->RepositionNode(Position: N->getIterator(), N: NewImm.getNode());
3864
3865 if (!Subtarget->hasThumb2()) {
3866 SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32),
3867 N->getOperand(Num: 0), NewImm, getAL(CurDAG, dl),
3868 CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3869 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: ARM::tBIC, dl, VT: MVT::i32, Ops));
3870 return;
3871 } else {
3872 SDValue Ops[] = {N->getOperand(Num: 0), NewImm, getAL(CurDAG, dl),
3873 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3874 CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3875 ReplaceNode(F: N,
3876 T: CurDAG->getMachineNode(Opcode: ARM::t2BICrr, dl, VT: MVT::i32, Ops));
3877 return;
3878 }
3879 }
3880 }
3881
3882 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3883 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3884 // are entirely contributed by c2 and lower 16-bits are entirely contributed
3885 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3886 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3887 EVT VT = N->getValueType(ResNo: 0);
3888 if (VT != MVT::i32)
3889 break;
3890 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3891 ? ARM::t2MOVTi16
3892 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3893 if (!Opc)
3894 break;
3895 SDValue N0 = N->getOperand(Num: 0), N1 = N->getOperand(Num: 1);
3896 N1C = dyn_cast<ConstantSDNode>(Val&: N1);
3897 if (!N1C)
3898 break;
3899 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3900 SDValue N2 = N0.getOperand(i: 1);
3901 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Val&: N2);
3902 if (!N2C)
3903 break;
3904 unsigned N1CVal = N1C->getZExtValue();
3905 unsigned N2CVal = N2C->getZExtValue();
3906 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3907 (N1CVal & 0xffffU) == 0xffffU &&
3908 (N2CVal & 0xffffU) == 0x0U) {
3909 SDValue Imm16 = CurDAG->getTargetConstant(Val: (N2CVal & 0xFFFF0000U) >> 16,
3910 DL: dl, VT: MVT::i32);
3911 SDValue Ops[] = { N0.getOperand(i: 0), Imm16,
3912 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3913 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT, Ops));
3914 return;
3915 }
3916 }
3917
3918 break;
3919 }
3920 case ARMISD::UMAAL: {
3921 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3922 SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1),
3923 N->getOperand(Num: 2), N->getOperand(Num: 3),
3924 getAL(CurDAG, dl),
3925 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3926 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
3927 return;
3928 }
3929 case ARMISD::UMLAL:{
3930 if (Subtarget->isThumb()) {
3931 SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1), N->getOperand(Num: 2),
3932 N->getOperand(Num: 3), getAL(CurDAG, dl),
3933 CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3934 ReplaceNode(
3935 F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2UMLAL, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
3936 return;
3937 }else{
3938 SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1), N->getOperand(Num: 2),
3939 N->getOperand(Num: 3), getAL(CurDAG, dl),
3940 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3941 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3942 ReplaceNode(F: N, T: CurDAG->getMachineNode(
3943 Opcode: Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3944 VT1: MVT::i32, VT2: MVT::i32, Ops));
3945 return;
3946 }
3947 }
3948 case ARMISD::SMLAL:{
3949 if (Subtarget->isThumb()) {
3950 SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1), N->getOperand(Num: 2),
3951 N->getOperand(Num: 3), getAL(CurDAG, dl),
3952 CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3953 ReplaceNode(
3954 F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2SMLAL, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
3955 return;
3956 }else{
3957 SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1), N->getOperand(Num: 2),
3958 N->getOperand(Num: 3), getAL(CurDAG, dl),
3959 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3960 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3961 ReplaceNode(F: N, T: CurDAG->getMachineNode(
3962 Opcode: Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3963 VT1: MVT::i32, VT2: MVT::i32, Ops));
3964 return;
3965 }
3966 }
3967 case ARMISD::SUBE: {
3968 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3969 break;
3970 // Look for a pattern to match SMMLS
3971 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3972 if (N->getOperand(Num: 1).getOpcode() != ISD::SMUL_LOHI ||
3973 N->getOperand(Num: 2).getOpcode() != ARMISD::SUBC ||
3974 !SDValue(N, 1).use_empty())
3975 break;
3976
3977 if (Subtarget->isThumb())
3978 assert(Subtarget->hasThumb2() &&
3979 "This pattern should not be generated for Thumb");
3980
3981 SDValue SmulLoHi = N->getOperand(Num: 1);
3982 SDValue Subc = N->getOperand(Num: 2);
3983 SDValue Zero = Subc.getOperand(i: 0);
3984
3985 if (!isNullConstant(V: Zero) || Subc.getOperand(i: 1) != SmulLoHi.getValue(R: 0) ||
3986 N->getOperand(Num: 1) != SmulLoHi.getValue(R: 1) ||
3987 N->getOperand(Num: 2) != Subc.getValue(R: 1))
3988 break;
3989
3990 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3991 SDValue Ops[] = { SmulLoHi.getOperand(i: 0), SmulLoHi.getOperand(i: 1),
3992 N->getOperand(Num: 0), getAL(CurDAG, dl),
3993 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3994 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops));
3995 return;
3996 }
3997 case ISD::LOAD: {
3998 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3999 return;
4000 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
4001 if (tryT2IndexedLoad(N))
4002 return;
4003 } else if (Subtarget->isThumb()) {
4004 if (tryT1IndexedLoad(N))
4005 return;
4006 } else if (tryARMIndexedLoad(N))
4007 return;
4008 // Other cases are autogenerated.
4009 break;
4010 }
4011 case ISD::MLOAD:
4012 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4013 return;
4014 // Other cases are autogenerated.
4015 break;
4016 case ARMISD::WLSSETUP: {
4017 SDNode *New = CurDAG->getMachineNode(Opcode: ARM::t2WhileLoopSetup, dl, VT: MVT::i32,
4018 Op1: N->getOperand(Num: 0));
4019 ReplaceUses(F: N, T: New);
4020 CurDAG->RemoveDeadNode(N);
4021 return;
4022 }
4023 case ARMISD::WLS: {
4024 SDNode *New = CurDAG->getMachineNode(Opcode: ARM::t2WhileLoopStart, dl, VT: MVT::Other,
4025 Op1: N->getOperand(Num: 1), Op2: N->getOperand(Num: 2),
4026 Op3: N->getOperand(Num: 0));
4027 ReplaceUses(F: N, T: New);
4028 CurDAG->RemoveDeadNode(N);
4029 return;
4030 }
4031 case ARMISD::LE: {
4032 SDValue Ops[] = { N->getOperand(Num: 1),
4033 N->getOperand(Num: 2),
4034 N->getOperand(Num: 0) };
4035 unsigned Opc = ARM::t2LoopEnd;
4036 SDNode *New = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::Other, Ops);
4037 ReplaceUses(F: N, T: New);
4038 CurDAG->RemoveDeadNode(N);
4039 return;
4040 }
4041 case ARMISD::LDRD: {
4042 if (Subtarget->isThumb2())
4043 break; // TableGen handles isel in this case.
4044 SDValue Base, RegOffset, ImmOffset;
4045 const SDValue &Chain = N->getOperand(Num: 0);
4046 const SDValue &Addr = N->getOperand(Num: 1);
4047 SelectAddrMode3(N: Addr, Base, Offset&: RegOffset, Opc&: ImmOffset);
4048 if (RegOffset != CurDAG->getRegister(Reg: 0, VT: MVT::i32)) {
4049 // The register-offset variant of LDRD mandates that the register
4050 // allocated to RegOffset is not reused in any of the remaining operands.
4051 // This restriction is currently not enforced. Therefore emitting this
4052 // variant is explicitly avoided.
4053 Base = Addr;
4054 RegOffset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
4055 }
4056 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4057 SDNode *New = CurDAG->getMachineNode(Opcode: ARM::LOADDUAL, dl,
4058 ResultTys: {MVT::Untyped, MVT::Other}, Ops);
4059 SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_0, DL: dl, VT: MVT::i32,
4060 Operand: SDValue(New, 0));
4061 SDValue Hi = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_1, DL: dl, VT: MVT::i32,
4062 Operand: SDValue(New, 0));
4063 transferMemOperands(N, Result: New);
4064 ReplaceUses(F: SDValue(N, 0), T: Lo);
4065 ReplaceUses(F: SDValue(N, 1), T: Hi);
4066 ReplaceUses(F: SDValue(N, 2), T: SDValue(New, 1));
4067 CurDAG->RemoveDeadNode(N);
4068 return;
4069 }
4070 case ARMISD::STRD: {
4071 if (Subtarget->isThumb2())
4072 break; // TableGen handles isel in this case.
4073 SDValue Base, RegOffset, ImmOffset;
4074 const SDValue &Chain = N->getOperand(Num: 0);
4075 const SDValue &Addr = N->getOperand(Num: 3);
4076 SelectAddrMode3(N: Addr, Base, Offset&: RegOffset, Opc&: ImmOffset);
4077 if (RegOffset != CurDAG->getRegister(Reg: 0, VT: MVT::i32)) {
4078 // The register-offset variant of STRD mandates that the register
4079 // allocated to RegOffset is not reused in any of the remaining operands.
4080 // This restriction is currently not enforced. Therefore emitting this
4081 // variant is explicitly avoided.
4082 Base = Addr;
4083 RegOffset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
4084 }
4085 SDNode *RegPair =
4086 createGPRPairNode(VT: MVT::Untyped, V0: N->getOperand(Num: 1), V1: N->getOperand(Num: 2));
4087 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
4088 SDNode *New = CurDAG->getMachineNode(Opcode: ARM::STOREDUAL, dl, VT: MVT::Other, Ops);
4089 transferMemOperands(N, Result: New);
4090 ReplaceUses(F: SDValue(N, 0), T: SDValue(New, 0));
4091 CurDAG->RemoveDeadNode(N);
4092 return;
4093 }
4094 case ARMISD::LOOP_DEC: {
4095 SDValue Ops[] = { N->getOperand(Num: 1),
4096 N->getOperand(Num: 2),
4097 N->getOperand(Num: 0) };
4098 SDNode *Dec =
4099 CurDAG->getMachineNode(Opcode: ARM::t2LoopDec, dl,
4100 VTs: CurDAG->getVTList(VT1: MVT::i32, VT2: MVT::Other), Ops);
4101 ReplaceUses(F: N, T: Dec);
4102 CurDAG->RemoveDeadNode(N);
4103 return;
4104 }
4105 case ARMISD::BRCOND: {
4106 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4107 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4108 // Pattern complexity = 6 cost = 1 size = 0
4109
4110 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4111 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4112 // Pattern complexity = 6 cost = 1 size = 0
4113
4114 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4115 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4116 // Pattern complexity = 6 cost = 1 size = 0
4117
4118 unsigned Opc = Subtarget->isThumb() ?
4119 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4120 SDValue Chain = N->getOperand(Num: 0);
4121 SDValue N1 = N->getOperand(Num: 1);
4122 SDValue N2 = N->getOperand(Num: 2);
4123 SDValue Flags = N->getOperand(Num: 3);
4124 assert(N1.getOpcode() == ISD::BasicBlock);
4125 assert(N2.getOpcode() == ISD::Constant);
4126
4127 unsigned CC = (unsigned)N2->getAsZExtVal();
4128
4129 if (Flags.getOpcode() == ARMISD::CMPZ) {
4130 if (Flags.getOperand(i: 0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4131 SDValue Int = Flags.getOperand(i: 0);
4132 uint64_t ID = Int->getConstantOperandVal(Num: 1);
4133
4134 // Handle low-overhead loops.
4135 if (ID == Intrinsic::loop_decrement_reg) {
4136 SDValue Elements = Int.getOperand(i: 2);
4137 SDValue Size = CurDAG->getTargetConstant(Val: Int.getConstantOperandVal(i: 3),
4138 DL: dl, VT: MVT::i32);
4139
4140 SDValue Args[] = { Elements, Size, Int.getOperand(i: 0) };
4141 SDNode *LoopDec =
4142 CurDAG->getMachineNode(Opcode: ARM::t2LoopDec, dl,
4143 VTs: CurDAG->getVTList(VT1: MVT::i32, VT2: MVT::Other),
4144 Ops: Args);
4145 ReplaceUses(F: Int.getNode(), T: LoopDec);
4146
4147 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
4148 SDNode *LoopEnd =
4149 CurDAG->getMachineNode(Opcode: ARM::t2LoopEnd, dl, VT: MVT::Other, Ops: EndArgs);
4150
4151 ReplaceUses(F: N, T: LoopEnd);
4152 CurDAG->RemoveDeadNode(N);
4153 CurDAG->RemoveDeadNode(N: Flags.getNode());
4154 CurDAG->RemoveDeadNode(N: Int.getNode());
4155 return;
4156 }
4157 }
4158
4159 bool SwitchEQNEToPLMI;
4160 SelectCMPZ(N: Flags.getNode(), SwitchEQNEToPLMI);
4161 Flags = N->getOperand(Num: 3);
4162
4163 if (SwitchEQNEToPLMI) {
4164 switch ((ARMCC::CondCodes)CC) {
4165 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4166 case ARMCC::NE:
4167 CC = (unsigned)ARMCC::MI;
4168 break;
4169 case ARMCC::EQ:
4170 CC = (unsigned)ARMCC::PL;
4171 break;
4172 }
4173 }
4174 }
4175
4176 SDValue Tmp2 = CurDAG->getTargetConstant(Val: CC, DL: dl, VT: MVT::i32);
4177 Chain = CurDAG->getCopyToReg(Chain, dl, Reg: ARM::CPSR, N: Flags, Glue: SDValue());
4178 SDValue Ops[] = {N1, Tmp2, CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), Chain,
4179 Chain.getValue(R: 1)};
4180 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::Other, Ops);
4181 return;
4182 }
4183
4184 case ARMISD::CMPZ: {
4185 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4186 // This allows us to avoid materializing the expensive negative constant.
4187 // The CMPZ #0 is useless and will be peepholed away but we need to keep
4188 // it for its flags output.
4189 SDValue X = N->getOperand(Num: 0);
4190 auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1).getNode());
4191 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
4192 int64_t Addend = -C->getSExtValue();
4193
4194 SDNode *Add = nullptr;
4195 // ADDS can be better than CMN if the immediate fits in a
4196 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4197 // Outside that range we can just use a CMN which is 32-bit but has a
4198 // 12-bit immediate range.
4199 if (Addend < 1<<8) {
4200 if (Subtarget->isThumb2()) {
4201 SDValue Ops[] = { X, CurDAG->getTargetConstant(Val: Addend, DL: dl, VT: MVT::i32),
4202 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
4203 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
4204 Add = CurDAG->getMachineNode(Opcode: ARM::t2ADDri, dl, VT: MVT::i32, Ops);
4205 } else {
4206 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
4207 SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), X,
4208 CurDAG->getTargetConstant(Val: Addend, DL: dl, VT: MVT::i32),
4209 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
4210 Add = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
4211 }
4212 }
4213 if (Add) {
4214 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(Val: 0, DL: dl, VT: MVT::i32)};
4215 CurDAG->MorphNodeTo(N, Opc: ARMISD::CMPZ, VTs: N->getVTList(), Ops: Ops2);
4216 }
4217 }
4218 // Other cases are autogenerated.
4219 break;
4220 }
4221
4222 case ARMISD::CMOV: {
4223 SDValue Flags = N->getOperand(Num: 3);
4224
4225 if (Flags.getOpcode() == ARMISD::CMPZ) {
4226 bool SwitchEQNEToPLMI;
4227 SelectCMPZ(N: Flags.getNode(), SwitchEQNEToPLMI);
4228
4229 if (SwitchEQNEToPLMI) {
4230 SDValue ARMcc = N->getOperand(Num: 2);
4231 ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal();
4232
4233 switch (CC) {
4234 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4235 case ARMCC::NE:
4236 CC = ARMCC::MI;
4237 break;
4238 case ARMCC::EQ:
4239 CC = ARMCC::PL;
4240 break;
4241 }
4242 SDValue NewARMcc = CurDAG->getConstant(Val: (unsigned)CC, DL: dl, VT: MVT::i32);
4243 SDValue Ops[] = {N->getOperand(Num: 0), N->getOperand(Num: 1), NewARMcc,
4244 N->getOperand(Num: 3)};
4245 CurDAG->MorphNodeTo(N, Opc: ARMISD::CMOV, VTs: N->getVTList(), Ops);
4246 }
4247 }
4248 // Other cases are autogenerated.
4249 break;
4250 }
4251 case ARMISD::VZIP: {
4252 EVT VT = N->getValueType(ResNo: 0);
4253 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4254 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4255 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4256 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4257 SDValue Pred = getAL(CurDAG, dl);
4258 SDValue PredReg = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
4259 SDValue Ops[] = {N->getOperand(Num: 0), N->getOperand(Num: 1), Pred, PredReg};
4260 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4261 return;
4262 }
4263 case ARMISD::VUZP: {
4264 EVT VT = N->getValueType(ResNo: 0);
4265 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4266 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4267 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4268 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4269 SDValue Pred = getAL(CurDAG, dl);
4270 SDValue PredReg = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
4271 SDValue Ops[] = {N->getOperand(Num: 0), N->getOperand(Num: 1), Pred, PredReg};
4272 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4273 return;
4274 }
4275 case ARMISD::VTRN: {
4276 EVT VT = N->getValueType(ResNo: 0);
4277 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4278 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4279 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4280 SDValue Pred = getAL(CurDAG, dl);
4281 SDValue PredReg = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
4282 SDValue Ops[] = {N->getOperand(Num: 0), N->getOperand(Num: 1), Pred, PredReg};
4283 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4284 return;
4285 }
4286 case ARMISD::BUILD_VECTOR: {
4287 EVT VecVT = N->getValueType(ResNo: 0);
4288 EVT EltVT = VecVT.getVectorElementType();
4289 unsigned NumElts = VecVT.getVectorNumElements();
4290 if (EltVT == MVT::f64) {
4291 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4292 ReplaceNode(
4293 F: N, T: createDRegPairNode(VT: VecVT, V0: N->getOperand(Num: 0), V1: N->getOperand(Num: 1)));
4294 return;
4295 }
4296 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4297 if (NumElts == 2) {
4298 ReplaceNode(
4299 F: N, T: createSRegPairNode(VT: VecVT, V0: N->getOperand(Num: 0), V1: N->getOperand(Num: 1)));
4300 return;
4301 }
4302 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4303 ReplaceNode(F: N,
4304 T: createQuadSRegsNode(VT: VecVT, V0: N->getOperand(Num: 0), V1: N->getOperand(Num: 1),
4305 V2: N->getOperand(Num: 2), V3: N->getOperand(Num: 3)));
4306 return;
4307 }
4308
4309 case ARMISD::VLD1DUP: {
4310 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4311 ARM::VLD1DUPd32 };
4312 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4313 ARM::VLD1DUPq32 };
4314 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: false, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes);
4315 return;
4316 }
4317
4318 case ARMISD::VLD2DUP: {
4319 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4320 ARM::VLD2DUPd32 };
4321 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: false, NumVecs: 2, DOpcodes: Opcodes);
4322 return;
4323 }
4324
4325 case ARMISD::VLD3DUP: {
4326 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4327 ARM::VLD3DUPd16Pseudo,
4328 ARM::VLD3DUPd32Pseudo };
4329 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: false, NumVecs: 3, DOpcodes: Opcodes);
4330 return;
4331 }
4332
4333 case ARMISD::VLD4DUP: {
4334 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4335 ARM::VLD4DUPd16Pseudo,
4336 ARM::VLD4DUPd32Pseudo };
4337 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: false, NumVecs: 4, DOpcodes: Opcodes);
4338 return;
4339 }
4340
4341 case ARMISD::VLD1DUP_UPD: {
4342 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4343 ARM::VLD1DUPd16wb_fixed,
4344 ARM::VLD1DUPd32wb_fixed };
4345 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4346 ARM::VLD1DUPq16wb_fixed,
4347 ARM::VLD1DUPq32wb_fixed };
4348 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: true, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes);
4349 return;
4350 }
4351
4352 case ARMISD::VLD2DUP_UPD: {
4353 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4354 ARM::VLD2DUPd16wb_fixed,
4355 ARM::VLD2DUPd32wb_fixed,
4356 ARM::VLD1q64wb_fixed };
4357 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4358 ARM::VLD2DUPq16EvenPseudo,
4359 ARM::VLD2DUPq32EvenPseudo };
4360 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4361 ARM::VLD2DUPq16OddPseudoWB_fixed,
4362 ARM::VLD2DUPq32OddPseudoWB_fixed };
4363 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes0, QOpcodes1);
4364 return;
4365 }
4366
4367 case ARMISD::VLD3DUP_UPD: {
4368 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4369 ARM::VLD3DUPd16Pseudo_UPD,
4370 ARM::VLD3DUPd32Pseudo_UPD,
4371 ARM::VLD1d64TPseudoWB_fixed };
4372 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4373 ARM::VLD3DUPq16EvenPseudo,
4374 ARM::VLD3DUPq32EvenPseudo };
4375 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4376 ARM::VLD3DUPq16OddPseudo_UPD,
4377 ARM::VLD3DUPq32OddPseudo_UPD };
4378 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4379 return;
4380 }
4381
4382 case ARMISD::VLD4DUP_UPD: {
4383 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4384 ARM::VLD4DUPd16Pseudo_UPD,
4385 ARM::VLD4DUPd32Pseudo_UPD,
4386 ARM::VLD1d64QPseudoWB_fixed };
4387 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4388 ARM::VLD4DUPq16EvenPseudo,
4389 ARM::VLD4DUPq32EvenPseudo };
4390 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4391 ARM::VLD4DUPq16OddPseudo_UPD,
4392 ARM::VLD4DUPq32OddPseudo_UPD };
4393 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4394 return;
4395 }
4396
4397 case ARMISD::VLD1_UPD: {
4398 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4399 ARM::VLD1d16wb_fixed,
4400 ARM::VLD1d32wb_fixed,
4401 ARM::VLD1d64wb_fixed };
4402 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4403 ARM::VLD1q16wb_fixed,
4404 ARM::VLD1q32wb_fixed,
4405 ARM::VLD1q64wb_fixed };
4406 SelectVLD(N, isUpdating: true, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4407 return;
4408 }
4409
4410 case ARMISD::VLD2_UPD: {
4411 if (Subtarget->hasNEON()) {
4412 static const uint16_t DOpcodes[] = {
4413 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4414 ARM::VLD1q64wb_fixed};
4415 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4416 ARM::VLD2q16PseudoWB_fixed,
4417 ARM::VLD2q32PseudoWB_fixed};
4418 SelectVLD(N, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4419 } else {
4420 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4421 ARM::MVE_VLD21_8_wb};
4422 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4423 ARM::MVE_VLD21_16_wb};
4424 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4425 ARM::MVE_VLD21_32_wb};
4426 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4427 SelectMVE_VLD(N, NumVecs: 2, Opcodes, HasWriteback: true);
4428 }
4429 return;
4430 }
4431
4432 case ARMISD::VLD3_UPD: {
4433 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4434 ARM::VLD3d16Pseudo_UPD,
4435 ARM::VLD3d32Pseudo_UPD,
4436 ARM::VLD1d64TPseudoWB_fixed};
4437 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4438 ARM::VLD3q16Pseudo_UPD,
4439 ARM::VLD3q32Pseudo_UPD };
4440 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4441 ARM::VLD3q16oddPseudo_UPD,
4442 ARM::VLD3q32oddPseudo_UPD };
4443 SelectVLD(N, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4444 return;
4445 }
4446
4447 case ARMISD::VLD4_UPD: {
4448 if (Subtarget->hasNEON()) {
4449 static const uint16_t DOpcodes[] = {
4450 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4451 ARM::VLD1d64QPseudoWB_fixed};
4452 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4453 ARM::VLD4q16Pseudo_UPD,
4454 ARM::VLD4q32Pseudo_UPD};
4455 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4456 ARM::VLD4q16oddPseudo_UPD,
4457 ARM::VLD4q32oddPseudo_UPD};
4458 SelectVLD(N, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4459 } else {
4460 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4461 ARM::MVE_VLD42_8,
4462 ARM::MVE_VLD43_8_wb};
4463 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4464 ARM::MVE_VLD42_16,
4465 ARM::MVE_VLD43_16_wb};
4466 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4467 ARM::MVE_VLD42_32,
4468 ARM::MVE_VLD43_32_wb};
4469 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4470 SelectMVE_VLD(N, NumVecs: 4, Opcodes, HasWriteback: true);
4471 }
4472 return;
4473 }
4474
4475 case ARMISD::VLD1x2_UPD: {
4476 if (Subtarget->hasNEON()) {
4477 static const uint16_t DOpcodes[] = {
4478 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4479 ARM::VLD1q64wb_fixed};
4480 static const uint16_t QOpcodes[] = {
4481 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4482 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4483 SelectVLD(N, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4484 return;
4485 }
4486 break;
4487 }
4488
4489 case ARMISD::VLD1x3_UPD: {
4490 if (Subtarget->hasNEON()) {
4491 static const uint16_t DOpcodes[] = {
4492 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4493 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4494 static const uint16_t QOpcodes0[] = {
4495 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4496 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4497 static const uint16_t QOpcodes1[] = {
4498 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4499 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4500 SelectVLD(N, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4501 return;
4502 }
4503 break;
4504 }
4505
4506 case ARMISD::VLD1x4_UPD: {
4507 if (Subtarget->hasNEON()) {
4508 static const uint16_t DOpcodes[] = {
4509 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4510 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4511 static const uint16_t QOpcodes0[] = {
4512 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4513 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4514 static const uint16_t QOpcodes1[] = {
4515 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4516 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4517 SelectVLD(N, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4518 return;
4519 }
4520 break;
4521 }
4522
4523 case ARMISD::VLD2LN_UPD: {
4524 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4525 ARM::VLD2LNd16Pseudo_UPD,
4526 ARM::VLD2LNd32Pseudo_UPD };
4527 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4528 ARM::VLD2LNq32Pseudo_UPD };
4529 SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes);
4530 return;
4531 }
4532
4533 case ARMISD::VLD3LN_UPD: {
4534 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4535 ARM::VLD3LNd16Pseudo_UPD,
4536 ARM::VLD3LNd32Pseudo_UPD };
4537 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4538 ARM::VLD3LNq32Pseudo_UPD };
4539 SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes);
4540 return;
4541 }
4542
4543 case ARMISD::VLD4LN_UPD: {
4544 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4545 ARM::VLD4LNd16Pseudo_UPD,
4546 ARM::VLD4LNd32Pseudo_UPD };
4547 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4548 ARM::VLD4LNq32Pseudo_UPD };
4549 SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes);
4550 return;
4551 }
4552
4553 case ARMISD::VST1_UPD: {
4554 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4555 ARM::VST1d16wb_fixed,
4556 ARM::VST1d32wb_fixed,
4557 ARM::VST1d64wb_fixed };
4558 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4559 ARM::VST1q16wb_fixed,
4560 ARM::VST1q32wb_fixed,
4561 ARM::VST1q64wb_fixed };
4562 SelectVST(N, isUpdating: true, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4563 return;
4564 }
4565
4566 case ARMISD::VST2_UPD: {
4567 if (Subtarget->hasNEON()) {
4568 static const uint16_t DOpcodes[] = {
4569 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4570 ARM::VST1q64wb_fixed};
4571 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4572 ARM::VST2q16PseudoWB_fixed,
4573 ARM::VST2q32PseudoWB_fixed};
4574 SelectVST(N, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4575 return;
4576 }
4577 break;
4578 }
4579
4580 case ARMISD::VST3_UPD: {
4581 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4582 ARM::VST3d16Pseudo_UPD,
4583 ARM::VST3d32Pseudo_UPD,
4584 ARM::VST1d64TPseudoWB_fixed};
4585 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4586 ARM::VST3q16Pseudo_UPD,
4587 ARM::VST3q32Pseudo_UPD };
4588 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4589 ARM::VST3q16oddPseudo_UPD,
4590 ARM::VST3q32oddPseudo_UPD };
4591 SelectVST(N, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4592 return;
4593 }
4594
4595 case ARMISD::VST4_UPD: {
4596 if (Subtarget->hasNEON()) {
4597 static const uint16_t DOpcodes[] = {
4598 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4599 ARM::VST1d64QPseudoWB_fixed};
4600 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4601 ARM::VST4q16Pseudo_UPD,
4602 ARM::VST4q32Pseudo_UPD};
4603 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4604 ARM::VST4q16oddPseudo_UPD,
4605 ARM::VST4q32oddPseudo_UPD};
4606 SelectVST(N, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4607 return;
4608 }
4609 break;
4610 }
4611
4612 case ARMISD::VST1x2_UPD: {
4613 if (Subtarget->hasNEON()) {
4614 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4615 ARM::VST1q16wb_fixed,
4616 ARM::VST1q32wb_fixed,
4617 ARM::VST1q64wb_fixed};
4618 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4619 ARM::VST1d16QPseudoWB_fixed,
4620 ARM::VST1d32QPseudoWB_fixed,
4621 ARM::VST1d64QPseudoWB_fixed };
4622 SelectVST(N, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4623 return;
4624 }
4625 break;
4626 }
4627
4628 case ARMISD::VST1x3_UPD: {
4629 if (Subtarget->hasNEON()) {
4630 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4631 ARM::VST1d16TPseudoWB_fixed,
4632 ARM::VST1d32TPseudoWB_fixed,
4633 ARM::VST1d64TPseudoWB_fixed };
4634 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4635 ARM::VST1q16LowTPseudo_UPD,
4636 ARM::VST1q32LowTPseudo_UPD,
4637 ARM::VST1q64LowTPseudo_UPD };
4638 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4639 ARM::VST1q16HighTPseudo_UPD,
4640 ARM::VST1q32HighTPseudo_UPD,
4641 ARM::VST1q64HighTPseudo_UPD };
4642 SelectVST(N, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4643 return;
4644 }
4645 break;
4646 }
4647
4648 case ARMISD::VST1x4_UPD: {
4649 if (Subtarget->hasNEON()) {
4650 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4651 ARM::VST1d16QPseudoWB_fixed,
4652 ARM::VST1d32QPseudoWB_fixed,
4653 ARM::VST1d64QPseudoWB_fixed };
4654 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4655 ARM::VST1q16LowQPseudo_UPD,
4656 ARM::VST1q32LowQPseudo_UPD,
4657 ARM::VST1q64LowQPseudo_UPD };
4658 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4659 ARM::VST1q16HighQPseudo_UPD,
4660 ARM::VST1q32HighQPseudo_UPD,
4661 ARM::VST1q64HighQPseudo_UPD };
4662 SelectVST(N, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4663 return;
4664 }
4665 break;
4666 }
4667 case ARMISD::VST2LN_UPD: {
4668 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4669 ARM::VST2LNd16Pseudo_UPD,
4670 ARM::VST2LNd32Pseudo_UPD };
4671 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4672 ARM::VST2LNq32Pseudo_UPD };
4673 SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes);
4674 return;
4675 }
4676
4677 case ARMISD::VST3LN_UPD: {
4678 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4679 ARM::VST3LNd16Pseudo_UPD,
4680 ARM::VST3LNd32Pseudo_UPD };
4681 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4682 ARM::VST3LNq32Pseudo_UPD };
4683 SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes);
4684 return;
4685 }
4686
4687 case ARMISD::VST4LN_UPD: {
4688 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4689 ARM::VST4LNd16Pseudo_UPD,
4690 ARM::VST4LNd32Pseudo_UPD };
4691 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4692 ARM::VST4LNq32Pseudo_UPD };
4693 SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes);
4694 return;
4695 }
4696
4697 case ISD::INTRINSIC_VOID:
4698 case ISD::INTRINSIC_W_CHAIN: {
4699 unsigned IntNo = N->getConstantOperandVal(Num: 1);
4700 switch (IntNo) {
4701 default:
4702 break;
4703
4704 case Intrinsic::arm_mrrc:
4705 case Intrinsic::arm_mrrc2: {
4706 SDLoc dl(N);
4707 SDValue Chain = N->getOperand(Num: 0);
4708 unsigned Opc;
4709
4710 if (Subtarget->isThumb())
4711 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4712 else
4713 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4714
4715 SmallVector<SDValue, 5> Ops;
4716 Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: 2), dl)); /* coproc */
4717 Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: 3), dl)); /* opc */
4718 Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: 4), dl)); /* CRm */
4719
4720 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4721 // instruction will always be '1111' but it is possible in assembly language to specify
4722 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4723 if (Opc != ARM::MRRC2) {
4724 Ops.push_back(Elt: getAL(CurDAG, dl));
4725 Ops.push_back(Elt: CurDAG->getRegister(Reg: 0, VT: MVT::i32));
4726 }
4727
4728 Ops.push_back(Elt: Chain);
4729
4730 // Writes to two registers.
4731 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4732
4733 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: RetType, Ops));
4734 return;
4735 }
4736 case Intrinsic::arm_ldaexd:
4737 case Intrinsic::arm_ldrexd: {
4738 SDLoc dl(N);
4739 SDValue Chain = N->getOperand(Num: 0);
4740 SDValue MemAddr = N->getOperand(Num: 2);
4741 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4742
4743 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4744 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4745 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4746
4747 // arm_ldrexd returns a i64 value in {i32, i32}
4748 std::vector<EVT> ResTys;
4749 if (isThumb) {
4750 ResTys.push_back(x: MVT::i32);
4751 ResTys.push_back(x: MVT::i32);
4752 } else
4753 ResTys.push_back(x: MVT::Untyped);
4754 ResTys.push_back(x: MVT::Other);
4755
4756 // Place arguments in the right order.
4757 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4758 CurDAG->getRegister(Reg: 0, VT: MVT::i32), Chain};
4759 SDNode *Ld = CurDAG->getMachineNode(Opcode: NewOpc, dl, ResultTys: ResTys, Ops);
4760 // Transfer memoperands.
4761 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
4762 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
4763
4764 // Remap uses.
4765 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4766 if (!SDValue(N, 0).use_empty()) {
4767 SDValue Result;
4768 if (isThumb)
4769 Result = SDValue(Ld, 0);
4770 else {
4771 SDValue SubRegIdx =
4772 CurDAG->getTargetConstant(Val: ARM::gsub_0, DL: dl, VT: MVT::i32);
4773 SDNode *ResNode = CurDAG->getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG,
4774 dl, VT: MVT::i32, Op1: SDValue(Ld, 0), Op2: SubRegIdx);
4775 Result = SDValue(ResNode,0);
4776 }
4777 ReplaceUses(F: SDValue(N, 0), T: Result);
4778 }
4779 if (!SDValue(N, 1).use_empty()) {
4780 SDValue Result;
4781 if (isThumb)
4782 Result = SDValue(Ld, 1);
4783 else {
4784 SDValue SubRegIdx =
4785 CurDAG->getTargetConstant(Val: ARM::gsub_1, DL: dl, VT: MVT::i32);
4786 SDNode *ResNode = CurDAG->getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG,
4787 dl, VT: MVT::i32, Op1: SDValue(Ld, 0), Op2: SubRegIdx);
4788 Result = SDValue(ResNode,0);
4789 }
4790 ReplaceUses(F: SDValue(N, 1), T: Result);
4791 }
4792 ReplaceUses(F: SDValue(N, 2), T: OutChain);
4793 CurDAG->RemoveDeadNode(N);
4794 return;
4795 }
4796 case Intrinsic::arm_stlexd:
4797 case Intrinsic::arm_strexd: {
4798 SDLoc dl(N);
4799 SDValue Chain = N->getOperand(Num: 0);
4800 SDValue Val0 = N->getOperand(Num: 2);
4801 SDValue Val1 = N->getOperand(Num: 3);
4802 SDValue MemAddr = N->getOperand(Num: 4);
4803
4804 // Store exclusive double return a i32 value which is the return status
4805 // of the issued store.
4806 const EVT ResTys[] = {MVT::i32, MVT::Other};
4807
4808 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4809 // Place arguments in the right order.
4810 SmallVector<SDValue, 7> Ops;
4811 if (isThumb) {
4812 Ops.push_back(Elt: Val0);
4813 Ops.push_back(Elt: Val1);
4814 } else
4815 // arm_strexd uses GPRPair.
4816 Ops.push_back(Elt: SDValue(createGPRPairNode(VT: MVT::Untyped, V0: Val0, V1: Val1), 0));
4817 Ops.push_back(Elt: MemAddr);
4818 Ops.push_back(Elt: getAL(CurDAG, dl));
4819 Ops.push_back(Elt: CurDAG->getRegister(Reg: 0, VT: MVT::i32));
4820 Ops.push_back(Elt: Chain);
4821
4822 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4823 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4824 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4825
4826 SDNode *St = CurDAG->getMachineNode(Opcode: NewOpc, dl, ResultTys: ResTys, Ops);
4827 // Transfer memoperands.
4828 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
4829 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
4830
4831 ReplaceNode(F: N, T: St);
4832 return;
4833 }
4834
4835 case Intrinsic::arm_neon_vld1: {
4836 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4837 ARM::VLD1d32, ARM::VLD1d64 };
4838 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4839 ARM::VLD1q32, ARM::VLD1q64};
4840 SelectVLD(N, isUpdating: false, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4841 return;
4842 }
4843
4844 case Intrinsic::arm_neon_vld1x2: {
4845 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4846 ARM::VLD1q32, ARM::VLD1q64 };
4847 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4848 ARM::VLD1d16QPseudo,
4849 ARM::VLD1d32QPseudo,
4850 ARM::VLD1d64QPseudo };
4851 SelectVLD(N, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4852 return;
4853 }
4854
4855 case Intrinsic::arm_neon_vld1x3: {
4856 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4857 ARM::VLD1d16TPseudo,
4858 ARM::VLD1d32TPseudo,
4859 ARM::VLD1d64TPseudo };
4860 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4861 ARM::VLD1q16LowTPseudo_UPD,
4862 ARM::VLD1q32LowTPseudo_UPD,
4863 ARM::VLD1q64LowTPseudo_UPD };
4864 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4865 ARM::VLD1q16HighTPseudo,
4866 ARM::VLD1q32HighTPseudo,
4867 ARM::VLD1q64HighTPseudo };
4868 SelectVLD(N, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4869 return;
4870 }
4871
4872 case Intrinsic::arm_neon_vld1x4: {
4873 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4874 ARM::VLD1d16QPseudo,
4875 ARM::VLD1d32QPseudo,
4876 ARM::VLD1d64QPseudo };
4877 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4878 ARM::VLD1q16LowQPseudo_UPD,
4879 ARM::VLD1q32LowQPseudo_UPD,
4880 ARM::VLD1q64LowQPseudo_UPD };
4881 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4882 ARM::VLD1q16HighQPseudo,
4883 ARM::VLD1q32HighQPseudo,
4884 ARM::VLD1q64HighQPseudo };
4885 SelectVLD(N, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4886 return;
4887 }
4888
4889 case Intrinsic::arm_neon_vld2: {
4890 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4891 ARM::VLD2d32, ARM::VLD1q64 };
4892 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4893 ARM::VLD2q32Pseudo };
4894 SelectVLD(N, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4895 return;
4896 }
4897
4898 case Intrinsic::arm_neon_vld3: {
4899 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4900 ARM::VLD3d16Pseudo,
4901 ARM::VLD3d32Pseudo,
4902 ARM::VLD1d64TPseudo };
4903 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4904 ARM::VLD3q16Pseudo_UPD,
4905 ARM::VLD3q32Pseudo_UPD };
4906 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4907 ARM::VLD3q16oddPseudo,
4908 ARM::VLD3q32oddPseudo };
4909 SelectVLD(N, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4910 return;
4911 }
4912
4913 case Intrinsic::arm_neon_vld4: {
4914 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4915 ARM::VLD4d16Pseudo,
4916 ARM::VLD4d32Pseudo,
4917 ARM::VLD1d64QPseudo };
4918 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4919 ARM::VLD4q16Pseudo_UPD,
4920 ARM::VLD4q32Pseudo_UPD };
4921 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4922 ARM::VLD4q16oddPseudo,
4923 ARM::VLD4q32oddPseudo };
4924 SelectVLD(N, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4925 return;
4926 }
4927
4928 case Intrinsic::arm_neon_vld2dup: {
4929 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4930 ARM::VLD2DUPd32, ARM::VLD1q64 };
4931 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4932 ARM::VLD2DUPq16EvenPseudo,
4933 ARM::VLD2DUPq32EvenPseudo };
4934 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4935 ARM::VLD2DUPq16OddPseudo,
4936 ARM::VLD2DUPq32OddPseudo };
4937 SelectVLDDup(N, /* IsIntrinsic= */ true, isUpdating: false, NumVecs: 2,
4938 DOpcodes, QOpcodes0, QOpcodes1);
4939 return;
4940 }
4941
4942 case Intrinsic::arm_neon_vld3dup: {
4943 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4944 ARM::VLD3DUPd16Pseudo,
4945 ARM::VLD3DUPd32Pseudo,
4946 ARM::VLD1d64TPseudo };
4947 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4948 ARM::VLD3DUPq16EvenPseudo,
4949 ARM::VLD3DUPq32EvenPseudo };
4950 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4951 ARM::VLD3DUPq16OddPseudo,
4952 ARM::VLD3DUPq32OddPseudo };
4953 SelectVLDDup(N, /* IsIntrinsic= */ true, isUpdating: false, NumVecs: 3,
4954 DOpcodes, QOpcodes0, QOpcodes1);
4955 return;
4956 }
4957
4958 case Intrinsic::arm_neon_vld4dup: {
4959 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4960 ARM::VLD4DUPd16Pseudo,
4961 ARM::VLD4DUPd32Pseudo,
4962 ARM::VLD1d64QPseudo };
4963 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4964 ARM::VLD4DUPq16EvenPseudo,
4965 ARM::VLD4DUPq32EvenPseudo };
4966 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4967 ARM::VLD4DUPq16OddPseudo,
4968 ARM::VLD4DUPq32OddPseudo };
4969 SelectVLDDup(N, /* IsIntrinsic= */ true, isUpdating: false, NumVecs: 4,
4970 DOpcodes, QOpcodes0, QOpcodes1);
4971 return;
4972 }
4973
4974 case Intrinsic::arm_neon_vld2lane: {
4975 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4976 ARM::VLD2LNd16Pseudo,
4977 ARM::VLD2LNd32Pseudo };
4978 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4979 ARM::VLD2LNq32Pseudo };
4980 SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes);
4981 return;
4982 }
4983
4984 case Intrinsic::arm_neon_vld3lane: {
4985 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
4986 ARM::VLD3LNd16Pseudo,
4987 ARM::VLD3LNd32Pseudo };
4988 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
4989 ARM::VLD3LNq32Pseudo };
4990 SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes);
4991 return;
4992 }
4993
4994 case Intrinsic::arm_neon_vld4lane: {
4995 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
4996 ARM::VLD4LNd16Pseudo,
4997 ARM::VLD4LNd32Pseudo };
4998 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
4999 ARM::VLD4LNq32Pseudo };
5000 SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes);
5001 return;
5002 }
5003
5004 case Intrinsic::arm_neon_vst1: {
5005 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
5006 ARM::VST1d32, ARM::VST1d64 };
5007 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5008 ARM::VST1q32, ARM::VST1q64 };
5009 SelectVST(N, isUpdating: false, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
5010 return;
5011 }
5012
5013 case Intrinsic::arm_neon_vst1x2: {
5014 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5015 ARM::VST1q32, ARM::VST1q64 };
5016 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
5017 ARM::VST1d16QPseudo,
5018 ARM::VST1d32QPseudo,
5019 ARM::VST1d64QPseudo };
5020 SelectVST(N, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
5021 return;
5022 }
5023
5024 case Intrinsic::arm_neon_vst1x3: {
5025 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
5026 ARM::VST1d16TPseudo,
5027 ARM::VST1d32TPseudo,
5028 ARM::VST1d64TPseudo };
5029 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
5030 ARM::VST1q16LowTPseudo_UPD,
5031 ARM::VST1q32LowTPseudo_UPD,
5032 ARM::VST1q64LowTPseudo_UPD };
5033 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
5034 ARM::VST1q16HighTPseudo,
5035 ARM::VST1q32HighTPseudo,
5036 ARM::VST1q64HighTPseudo };
5037 SelectVST(N, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
5038 return;
5039 }
5040
5041 case Intrinsic::arm_neon_vst1x4: {
5042 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
5043 ARM::VST1d16QPseudo,
5044 ARM::VST1d32QPseudo,
5045 ARM::VST1d64QPseudo };
5046 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
5047 ARM::VST1q16LowQPseudo_UPD,
5048 ARM::VST1q32LowQPseudo_UPD,
5049 ARM::VST1q64LowQPseudo_UPD };
5050 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
5051 ARM::VST1q16HighQPseudo,
5052 ARM::VST1q32HighQPseudo,
5053 ARM::VST1q64HighQPseudo };
5054 SelectVST(N, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
5055 return;
5056 }
5057
5058 case Intrinsic::arm_neon_vst2: {
5059 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
5060 ARM::VST2d32, ARM::VST1q64 };
5061 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
5062 ARM::VST2q32Pseudo };
5063 SelectVST(N, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
5064 return;
5065 }
5066
5067 case Intrinsic::arm_neon_vst3: {
5068 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
5069 ARM::VST3d16Pseudo,
5070 ARM::VST3d32Pseudo,
5071 ARM::VST1d64TPseudo };
5072 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
5073 ARM::VST3q16Pseudo_UPD,
5074 ARM::VST3q32Pseudo_UPD };
5075 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
5076 ARM::VST3q16oddPseudo,
5077 ARM::VST3q32oddPseudo };
5078 SelectVST(N, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
5079 return;
5080 }
5081
5082 case Intrinsic::arm_neon_vst4: {
5083 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5084 ARM::VST4d16Pseudo,
5085 ARM::VST4d32Pseudo,
5086 ARM::VST1d64QPseudo };
5087 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5088 ARM::VST4q16Pseudo_UPD,
5089 ARM::VST4q32Pseudo_UPD };
5090 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5091 ARM::VST4q16oddPseudo,
5092 ARM::VST4q32oddPseudo };
5093 SelectVST(N, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
5094 return;
5095 }
5096
5097 case Intrinsic::arm_neon_vst2lane: {
5098 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5099 ARM::VST2LNd16Pseudo,
5100 ARM::VST2LNd32Pseudo };
5101 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5102 ARM::VST2LNq32Pseudo };
5103 SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes);
5104 return;
5105 }
5106
5107 case Intrinsic::arm_neon_vst3lane: {
5108 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5109 ARM::VST3LNd16Pseudo,
5110 ARM::VST3LNd32Pseudo };
5111 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5112 ARM::VST3LNq32Pseudo };
5113 SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes);
5114 return;
5115 }
5116
5117 case Intrinsic::arm_neon_vst4lane: {
5118 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5119 ARM::VST4LNd16Pseudo,
5120 ARM::VST4LNd32Pseudo };
5121 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5122 ARM::VST4LNq32Pseudo };
5123 SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes);
5124 return;
5125 }
5126
5127 case Intrinsic::arm_mve_vldr_gather_base_wb:
5128 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5129 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5130 ARM::MVE_VLDRDU64_qi_pre};
5131 SelectMVE_WB(N, Opcodes,
5132 Predicated: IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5133 return;
5134 }
5135
5136 case Intrinsic::arm_mve_vld2q: {
5137 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5138 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5139 ARM::MVE_VLD21_16};
5140 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5141 ARM::MVE_VLD21_32};
5142 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5143 SelectMVE_VLD(N, NumVecs: 2, Opcodes, HasWriteback: false);
5144 return;
5145 }
5146
5147 case Intrinsic::arm_mve_vld4q: {
5148 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5149 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5150 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5151 ARM::MVE_VLD42_16,
5152 ARM::MVE_VLD43_16};
5153 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5154 ARM::MVE_VLD42_32,
5155 ARM::MVE_VLD43_32};
5156 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5157 SelectMVE_VLD(N, NumVecs: 4, Opcodes, HasWriteback: false);
5158 return;
5159 }
5160 }
5161 break;
5162 }
5163
5164 case ISD::INTRINSIC_WO_CHAIN: {
5165 unsigned IntNo = N->getConstantOperandVal(Num: 0);
5166 switch (IntNo) {
5167 default:
5168 break;
5169
5170 // Scalar f32 -> bf16
5171 case Intrinsic::arm_neon_vcvtbfp2bf: {
5172 SDLoc dl(N);
5173 const SDValue &Src = N->getOperand(Num: 1);
5174 llvm::EVT DestTy = N->getValueType(ResNo: 0);
5175 SDValue Pred = getAL(CurDAG, dl);
5176 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
5177 SDValue Ops[] = { Src, Src, Pred, Reg0 };
5178 CurDAG->SelectNodeTo(N, MachineOpc: ARM::BF16_VCVTB, VT: DestTy, Ops);
5179 return;
5180 }
5181
5182 // Vector v4f32 -> v4bf16
5183 case Intrinsic::arm_neon_vcvtfp2bf: {
5184 SDLoc dl(N);
5185 const SDValue &Src = N->getOperand(Num: 1);
5186 SDValue Pred = getAL(CurDAG, dl);
5187 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
5188 SDValue Ops[] = { Src, Pred, Reg0 };
5189 CurDAG->SelectNodeTo(N, MachineOpc: ARM::BF16_VCVT, VT: MVT::v4bf16, Ops);
5190 return;
5191 }
5192
5193 case Intrinsic::arm_mve_urshrl:
5194 SelectMVE_LongShift(N, Opcode: ARM::MVE_URSHRL, Immediate: true, HasSaturationOperand: false);
5195 return;
5196 case Intrinsic::arm_mve_uqshll:
5197 SelectMVE_LongShift(N, Opcode: ARM::MVE_UQSHLL, Immediate: true, HasSaturationOperand: false);
5198 return;
5199 case Intrinsic::arm_mve_srshrl:
5200 SelectMVE_LongShift(N, Opcode: ARM::MVE_SRSHRL, Immediate: true, HasSaturationOperand: false);
5201 return;
5202 case Intrinsic::arm_mve_sqshll:
5203 SelectMVE_LongShift(N, Opcode: ARM::MVE_SQSHLL, Immediate: true, HasSaturationOperand: false);
5204 return;
5205 case Intrinsic::arm_mve_uqrshll:
5206 SelectMVE_LongShift(N, Opcode: ARM::MVE_UQRSHLL, Immediate: false, HasSaturationOperand: true);
5207 return;
5208 case Intrinsic::arm_mve_sqrshrl:
5209 SelectMVE_LongShift(N, Opcode: ARM::MVE_SQRSHRL, Immediate: false, HasSaturationOperand: true);
5210 return;
5211
5212 case Intrinsic::arm_mve_vadc:
5213 case Intrinsic::arm_mve_vadc_predicated:
5214 SelectMVE_VADCSBC(N, OpcodeWithCarry: ARM::MVE_VADC, OpcodeWithNoCarry: ARM::MVE_VADCI, Add: true,
5215 Predicated: IntNo == Intrinsic::arm_mve_vadc_predicated);
5216 return;
5217 case Intrinsic::arm_mve_vsbc:
5218 case Intrinsic::arm_mve_vsbc_predicated:
5219 SelectMVE_VADCSBC(N, OpcodeWithCarry: ARM::MVE_VSBC, OpcodeWithNoCarry: ARM::MVE_VSBCI, Add: false,
5220 Predicated: IntNo == Intrinsic::arm_mve_vsbc_predicated);
5221 return;
5222 case Intrinsic::arm_mve_vshlc:
5223 case Intrinsic::arm_mve_vshlc_predicated:
5224 SelectMVE_VSHLC(N, Predicated: IntNo == Intrinsic::arm_mve_vshlc_predicated);
5225 return;
5226
5227 case Intrinsic::arm_mve_vmlldava:
5228 case Intrinsic::arm_mve_vmlldava_predicated: {
5229 static const uint16_t OpcodesU[] = {
5230 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
5231 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
5232 };
5233 static const uint16_t OpcodesS[] = {
5234 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
5235 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
5236 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
5237 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5238 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
5239 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
5240 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
5241 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5242 };
5243 SelectMVE_VMLLDAV(N, Predicated: IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5244 OpcodesS, OpcodesU);
5245 return;
5246 }
5247
5248 case Intrinsic::arm_mve_vrmlldavha:
5249 case Intrinsic::arm_mve_vrmlldavha_predicated: {
5250 static const uint16_t OpcodesU[] = {
5251 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
5252 };
5253 static const uint16_t OpcodesS[] = {
5254 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
5255 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5256 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
5257 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5258 };
5259 SelectMVE_VRMLLDAVH(N, Predicated: IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5260 OpcodesS, OpcodesU);
5261 return;
5262 }
5263
5264 case Intrinsic::arm_mve_vidup:
5265 case Intrinsic::arm_mve_vidup_predicated: {
5266 static const uint16_t Opcodes[] = {
5267 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5268 };
5269 SelectMVE_VxDUP(N, Opcodes, Wrapping: false,
5270 Predicated: IntNo == Intrinsic::arm_mve_vidup_predicated);
5271 return;
5272 }
5273
5274 case Intrinsic::arm_mve_vddup:
5275 case Intrinsic::arm_mve_vddup_predicated: {
5276 static const uint16_t Opcodes[] = {
5277 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5278 };
5279 SelectMVE_VxDUP(N, Opcodes, Wrapping: false,
5280 Predicated: IntNo == Intrinsic::arm_mve_vddup_predicated);
5281 return;
5282 }
5283
5284 case Intrinsic::arm_mve_viwdup:
5285 case Intrinsic::arm_mve_viwdup_predicated: {
5286 static const uint16_t Opcodes[] = {
5287 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5288 };
5289 SelectMVE_VxDUP(N, Opcodes, Wrapping: true,
5290 Predicated: IntNo == Intrinsic::arm_mve_viwdup_predicated);
5291 return;
5292 }
5293
5294 case Intrinsic::arm_mve_vdwdup:
5295 case Intrinsic::arm_mve_vdwdup_predicated: {
5296 static const uint16_t Opcodes[] = {
5297 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5298 };
5299 SelectMVE_VxDUP(N, Opcodes, Wrapping: true,
5300 Predicated: IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5301 return;
5302 }
5303
5304 case Intrinsic::arm_cde_cx1d:
5305 case Intrinsic::arm_cde_cx1da:
5306 case Intrinsic::arm_cde_cx2d:
5307 case Intrinsic::arm_cde_cx2da:
5308 case Intrinsic::arm_cde_cx3d:
5309 case Intrinsic::arm_cde_cx3da: {
5310 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
5311 IntNo == Intrinsic::arm_cde_cx2da ||
5312 IntNo == Intrinsic::arm_cde_cx3da;
5313 size_t NumExtraOps;
5314 uint16_t Opcode;
5315 switch (IntNo) {
5316 case Intrinsic::arm_cde_cx1d:
5317 case Intrinsic::arm_cde_cx1da:
5318 NumExtraOps = 0;
5319 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5320 break;
5321 case Intrinsic::arm_cde_cx2d:
5322 case Intrinsic::arm_cde_cx2da:
5323 NumExtraOps = 1;
5324 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5325 break;
5326 case Intrinsic::arm_cde_cx3d:
5327 case Intrinsic::arm_cde_cx3da:
5328 NumExtraOps = 2;
5329 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5330 break;
5331 default:
5332 llvm_unreachable("Unexpected opcode");
5333 }
5334 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5335 return;
5336 }
5337 }
5338 break;
5339 }
5340
5341 case ISD::ATOMIC_CMP_SWAP:
5342 SelectCMP_SWAP(N);
5343 return;
5344 }
5345
5346 SelectCode(N);
5347}
5348
5349// Inspect a register string of the form
5350// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5351// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5352// and obtain the integer operands from them, adding these operands to the
5353// provided vector.
5354static void getIntOperandsFromRegisterString(StringRef RegString,
5355 SelectionDAG *CurDAG,
5356 const SDLoc &DL,
5357 std::vector<SDValue> &Ops) {
5358 SmallVector<StringRef, 5> Fields;
5359 RegString.split(A&: Fields, Separator: ':');
5360
5361 if (Fields.size() > 1) {
5362 bool AllIntFields = true;
5363
5364 for (StringRef Field : Fields) {
5365 // Need to trim out leading 'cp' characters and get the integer field.
5366 unsigned IntField;
5367 AllIntFields &= !Field.trim(Chars: "CPcp").getAsInteger(Radix: 10, Result&: IntField);
5368 Ops.push_back(x: CurDAG->getTargetConstant(Val: IntField, DL, VT: MVT::i32));
5369 }
5370
5371 assert(AllIntFields &&
5372 "Unexpected non-integer value in special register string.");
5373 (void)AllIntFields;
5374 }
5375}
5376
5377// Maps a Banked Register string to its mask value. The mask value returned is
5378// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5379// mask operand, which expresses which register is to be used, e.g. r8, and in
5380// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5381// was invalid.
5382static inline int getBankedRegisterMask(StringRef RegString) {
5383 auto TheReg = ARMBankedReg::lookupBankedRegByName(Name: RegString.lower());
5384 if (!TheReg)
5385 return -1;
5386 return TheReg->Encoding;
5387}
5388
5389// The flags here are common to those allowed for apsr in the A class cores and
5390// those allowed for the special registers in the M class cores. Returns a
5391// value representing which flags were present, -1 if invalid.
5392static inline int getMClassFlagsMask(StringRef Flags) {
5393 return StringSwitch<int>(Flags)
5394 .Case(S: "", Value: 0x2) // no flags means nzcvq for psr registers, and 0x2 is
5395 // correct when flags are not permitted
5396 .Case(S: "g", Value: 0x1)
5397 .Case(S: "nzcvq", Value: 0x2)
5398 .Case(S: "nzcvqg", Value: 0x3)
5399 .Default(Value: -1);
5400}
5401
5402// Maps MClass special registers string to its value for use in the
5403// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5404// Returns -1 to signify that the string was invalid.
5405static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5406 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Name: Reg);
5407 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5408 if (!TheReg || !TheReg->hasRequiredFeatures(ActiveFeatures: FeatureBits))
5409 return -1;
5410 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
5411}
5412
5413static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
5414 // The mask operand contains the special register (R Bit) in bit 4, whether
5415 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5416 // bits 3-0 contains the fields to be accessed in the special register, set by
5417 // the flags provided with the register.
5418 int Mask = 0;
5419 if (Reg == "apsr") {
5420 // The flags permitted for apsr are the same flags that are allowed in
5421 // M class registers. We get the flag value and then shift the flags into
5422 // the correct place to combine with the mask.
5423 Mask = getMClassFlagsMask(Flags);
5424 if (Mask == -1)
5425 return -1;
5426 return Mask << 2;
5427 }
5428
5429 if (Reg != "cpsr" && Reg != "spsr") {
5430 return -1;
5431 }
5432
5433 // This is the same as if the flags were "fc"
5434 if (Flags.empty() || Flags == "all")
5435 return Mask | 0x9;
5436
5437 // Inspect the supplied flags string and set the bits in the mask for
5438 // the relevant and valid flags allowed for cpsr and spsr.
5439 for (char Flag : Flags) {
5440 int FlagVal;
5441 switch (Flag) {
5442 case 'c':
5443 FlagVal = 0x1;
5444 break;
5445 case 'x':
5446 FlagVal = 0x2;
5447 break;
5448 case 's':
5449 FlagVal = 0x4;
5450 break;
5451 case 'f':
5452 FlagVal = 0x8;
5453 break;
5454 default:
5455 FlagVal = 0;
5456 }
5457
5458 // This avoids allowing strings where the same flag bit appears twice.
5459 if (!FlagVal || (Mask & FlagVal))
5460 return -1;
5461 Mask |= FlagVal;
5462 }
5463
5464 // If the register is spsr then we need to set the R bit.
5465 if (Reg == "spsr")
5466 Mask |= 0x10;
5467
5468 return Mask;
5469}
5470
5471// Lower the read_register intrinsic to ARM specific DAG nodes
5472// using the supplied metadata string to select the instruction node to use
5473// and the registers/masks to construct as operands for the node.
5474bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5475 const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: 1));
5476 const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: 0));
5477 bool IsThumb2 = Subtarget->isThumb2();
5478 SDLoc DL(N);
5479
5480 std::vector<SDValue> Ops;
5481 getIntOperandsFromRegisterString(RegString: RegString->getString(), CurDAG, DL, Ops);
5482
5483 if (!Ops.empty()) {
5484 // If the special register string was constructed of fields (as defined
5485 // in the ACLE) then need to lower to MRC node (32 bit) or
5486 // MRRC node(64 bit), we can make the distinction based on the number of
5487 // operands we have.
5488 unsigned Opcode;
5489 SmallVector<EVT, 3> ResTypes;
5490 if (Ops.size() == 5){
5491 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5492 ResTypes.append(IL: { MVT::i32, MVT::Other });
5493 } else {
5494 assert(Ops.size() == 3 &&
5495 "Invalid number of fields in special register string.");
5496 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5497 ResTypes.append(IL: { MVT::i32, MVT::i32, MVT::Other });
5498 }
5499
5500 Ops.push_back(x: getAL(CurDAG, dl: DL));
5501 Ops.push_back(x: CurDAG->getRegister(Reg: 0, VT: MVT::i32));
5502 Ops.push_back(x: N->getOperand(Num: 0));
5503 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, ResultTys: ResTypes, Ops));
5504 return true;
5505 }
5506
5507 std::string SpecialReg = RegString->getString().lower();
5508
5509 int BankedReg = getBankedRegisterMask(RegString: SpecialReg);
5510 if (BankedReg != -1) {
5511 Ops = { CurDAG->getTargetConstant(Val: BankedReg, DL, VT: MVT::i32),
5512 getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5513 N->getOperand(Num: 0) };
5514 ReplaceNode(
5515 F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5516 dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5517 return true;
5518 }
5519
5520 // The VFP registers are read by creating SelectionDAG nodes with opcodes
5521 // corresponding to the register that is being read from. So we switch on the
5522 // string to find which opcode we need to use.
5523 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5524 .Case(S: "fpscr", Value: ARM::VMRS)
5525 .Case(S: "fpexc", Value: ARM::VMRS_FPEXC)
5526 .Case(S: "fpsid", Value: ARM::VMRS_FPSID)
5527 .Case(S: "mvfr0", Value: ARM::VMRS_MVFR0)
5528 .Case(S: "mvfr1", Value: ARM::VMRS_MVFR1)
5529 .Case(S: "mvfr2", Value: ARM::VMRS_MVFR2)
5530 .Case(S: "fpinst", Value: ARM::VMRS_FPINST)
5531 .Case(S: "fpinst2", Value: ARM::VMRS_FPINST2)
5532 .Default(Value: 0);
5533
5534 // If an opcode was found then we can lower the read to a VFP instruction.
5535 if (Opcode) {
5536 if (!Subtarget->hasVFP2Base())
5537 return false;
5538 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5539 return false;
5540
5541 Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5542 N->getOperand(Num: 0) };
5543 ReplaceNode(F: N,
5544 T: CurDAG->getMachineNode(Opcode, dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5545 return true;
5546 }
5547
5548 // If the target is M Class then need to validate that the register string
5549 // is an acceptable value, so check that a mask can be constructed from the
5550 // string.
5551 if (Subtarget->isMClass()) {
5552 int SYSmValue = getMClassRegisterMask(Reg: SpecialReg, Subtarget);
5553 if (SYSmValue == -1)
5554 return false;
5555
5556 SDValue Ops[] = { CurDAG->getTargetConstant(Val: SYSmValue, DL, VT: MVT::i32),
5557 getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5558 N->getOperand(Num: 0) };
5559 ReplaceNode(
5560 F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2MRS_M, dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5561 return true;
5562 }
5563
5564 // Here we know the target is not M Class so we need to check if it is one
5565 // of the remaining possible values which are apsr, cpsr or spsr.
5566 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
5567 Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5568 N->getOperand(Num: 0) };
5569 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5570 dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5571 return true;
5572 }
5573
5574 if (SpecialReg == "spsr") {
5575 Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5576 N->getOperand(Num: 0) };
5577 ReplaceNode(
5578 F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, dl: DL,
5579 VT1: MVT::i32, VT2: MVT::Other, Ops));
5580 return true;
5581 }
5582
5583 return false;
5584}
5585
5586// Lower the write_register intrinsic to ARM specific DAG nodes
5587// using the supplied metadata string to select the instruction node to use
5588// and the registers/masks to use in the nodes
5589bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5590 const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: 1));
5591 const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: 0));
5592 bool IsThumb2 = Subtarget->isThumb2();
5593 SDLoc DL(N);
5594
5595 std::vector<SDValue> Ops;
5596 getIntOperandsFromRegisterString(RegString: RegString->getString(), CurDAG, DL, Ops);
5597
5598 if (!Ops.empty()) {
5599 // If the special register string was constructed of fields (as defined
5600 // in the ACLE) then need to lower to MCR node (32 bit) or
5601 // MCRR node(64 bit), we can make the distinction based on the number of
5602 // operands we have.
5603 unsigned Opcode;
5604 if (Ops.size() == 5) {
5605 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5606 Ops.insert(position: Ops.begin()+2, x: N->getOperand(Num: 2));
5607 } else {
5608 assert(Ops.size() == 3 &&
5609 "Invalid number of fields in special register string.");
5610 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5611 SDValue WriteValue[] = { N->getOperand(Num: 2), N->getOperand(Num: 3) };
5612 Ops.insert(position: Ops.begin()+2, first: WriteValue, last: WriteValue+2);
5613 }
5614
5615 Ops.push_back(x: getAL(CurDAG, dl: DL));
5616 Ops.push_back(x: CurDAG->getRegister(Reg: 0, VT: MVT::i32));
5617 Ops.push_back(x: N->getOperand(Num: 0));
5618
5619 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Other, Ops));
5620 return true;
5621 }
5622
5623 std::string SpecialReg = RegString->getString().lower();
5624 int BankedReg = getBankedRegisterMask(RegString: SpecialReg);
5625 if (BankedReg != -1) {
5626 Ops = { CurDAG->getTargetConstant(Val: BankedReg, DL, VT: MVT::i32), N->getOperand(Num: 2),
5627 getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5628 N->getOperand(Num: 0) };
5629 ReplaceNode(
5630 F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5631 dl: DL, VT: MVT::Other, Ops));
5632 return true;
5633 }
5634
5635 // The VFP registers are written to by creating SelectionDAG nodes with
5636 // opcodes corresponding to the register that is being written. So we switch
5637 // on the string to find which opcode we need to use.
5638 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5639 .Case(S: "fpscr", Value: ARM::VMSR)
5640 .Case(S: "fpexc", Value: ARM::VMSR_FPEXC)
5641 .Case(S: "fpsid", Value: ARM::VMSR_FPSID)
5642 .Case(S: "fpinst", Value: ARM::VMSR_FPINST)
5643 .Case(S: "fpinst2", Value: ARM::VMSR_FPINST2)
5644 .Default(Value: 0);
5645
5646 if (Opcode) {
5647 if (!Subtarget->hasVFP2Base())
5648 return false;
5649 Ops = { N->getOperand(Num: 2), getAL(CurDAG, dl: DL),
5650 CurDAG->getRegister(Reg: 0, VT: MVT::i32), N->getOperand(Num: 0) };
5651 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Other, Ops));
5652 return true;
5653 }
5654
5655 std::pair<StringRef, StringRef> Fields;
5656 Fields = StringRef(SpecialReg).rsplit(Separator: '_');
5657 std::string Reg = Fields.first.str();
5658 StringRef Flags = Fields.second;
5659
5660 // If the target was M Class then need to validate the special register value
5661 // and retrieve the mask for use in the instruction node.
5662 if (Subtarget->isMClass()) {
5663 int SYSmValue = getMClassRegisterMask(Reg: SpecialReg, Subtarget);
5664 if (SYSmValue == -1)
5665 return false;
5666
5667 SDValue Ops[] = { CurDAG->getTargetConstant(Val: SYSmValue, DL, VT: MVT::i32),
5668 N->getOperand(Num: 2), getAL(CurDAG, dl: DL),
5669 CurDAG->getRegister(Reg: 0, VT: MVT::i32), N->getOperand(Num: 0) };
5670 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2MSR_M, dl: DL, VT: MVT::Other, Ops));
5671 return true;
5672 }
5673
5674 // We then check to see if a valid mask can be constructed for one of the
5675 // register string values permitted for the A and R class cores. These values
5676 // are apsr, spsr and cpsr; these are also valid on older cores.
5677 int Mask = getARClassRegisterMask(Reg, Flags);
5678 if (Mask != -1) {
5679 Ops = { CurDAG->getTargetConstant(Val: Mask, DL, VT: MVT::i32), N->getOperand(Num: 2),
5680 getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5681 N->getOperand(Num: 0) };
5682 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5683 dl: DL, VT: MVT::Other, Ops));
5684 return true;
5685 }
5686
5687 return false;
5688}
5689
5690bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5691 std::vector<SDValue> AsmNodeOperands;
5692 InlineAsm::Flag Flag;
5693 bool Changed = false;
5694 unsigned NumOps = N->getNumOperands();
5695
5696 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5697 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5698 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5699 // respectively. Since there is no constraint to explicitly specify a
5700 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5701 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5702 // them into a GPRPair.
5703
5704 SDLoc dl(N);
5705 SDValue Glue = N->getGluedNode() ? N->getOperand(Num: NumOps - 1) : SDValue();
5706
5707 SmallVector<bool, 8> OpChanged;
5708 // Glue node will be appended late.
5709 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
5710 SDValue op = N->getOperand(Num: i);
5711 AsmNodeOperands.push_back(x: op);
5712
5713 if (i < InlineAsm::Op_FirstOperand)
5714 continue;
5715
5716 if (const auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: i)))
5717 Flag = InlineAsm::Flag(C->getZExtValue());
5718 else
5719 continue;
5720
5721 // Immediate operands to inline asm in the SelectionDAG are modeled with
5722 // two operands. The first is a constant of value InlineAsm::Kind::Imm, and
5723 // the second is a constant with the value of the immediate. If we get here
5724 // and we have a Kind::Imm, skip the next operand, and continue.
5725 if (Flag.isImmKind()) {
5726 SDValue op = N->getOperand(Num: ++i);
5727 AsmNodeOperands.push_back(x: op);
5728 continue;
5729 }
5730
5731 const unsigned NumRegs = Flag.getNumOperandRegisters();
5732 if (NumRegs)
5733 OpChanged.push_back(Elt: false);
5734
5735 unsigned DefIdx = 0;
5736 bool IsTiedToChangedOp = false;
5737 // If it's a use that is tied with a previous def, it has no
5738 // reg class constraint.
5739 if (Changed && Flag.isUseOperandTiedToDef(Idx&: DefIdx))
5740 IsTiedToChangedOp = OpChanged[DefIdx];
5741
5742 // Memory operands to inline asm in the SelectionDAG are modeled with two
5743 // operands: a constant of value InlineAsm::Kind::Mem followed by the input
5744 // operand. If we get here and we have a Kind::Mem, skip the next operand
5745 // (so it doesn't get misinterpreted), and continue. We do this here because
5746 // it's important to update the OpChanged array correctly before moving on.
5747 if (Flag.isMemKind()) {
5748 SDValue op = N->getOperand(Num: ++i);
5749 AsmNodeOperands.push_back(x: op);
5750 continue;
5751 }
5752
5753 if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
5754 !Flag.isRegDefEarlyClobberKind())
5755 continue;
5756
5757 unsigned RC;
5758 const bool HasRC = Flag.hasRegClassConstraint(RC);
5759 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
5760 || NumRegs != 2)
5761 continue;
5762
5763 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
5764 SDValue V0 = N->getOperand(Num: i+1);
5765 SDValue V1 = N->getOperand(Num: i+2);
5766 Register Reg0 = cast<RegisterSDNode>(Val&: V0)->getReg();
5767 Register Reg1 = cast<RegisterSDNode>(Val&: V1)->getReg();
5768 SDValue PairedReg;
5769 MachineRegisterInfo &MRI = MF->getRegInfo();
5770
5771 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
5772 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5773 // the original GPRs.
5774
5775 Register GPVR = MRI.createVirtualRegister(RegClass: &ARM::GPRPairRegClass);
5776 PairedReg = CurDAG->getRegister(Reg: GPVR, VT: MVT::Untyped);
5777 SDValue Chain = SDValue(N,0);
5778
5779 SDNode *GU = N->getGluedUser();
5780 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, Reg: GPVR, VT: MVT::Untyped,
5781 Glue: Chain.getValue(R: 1));
5782
5783 // Extract values from a GPRPair reg and copy to the original GPR reg.
5784 SDValue Sub0 = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_0, DL: dl, VT: MVT::i32,
5785 Operand: RegCopy);
5786 SDValue Sub1 = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_1, DL: dl, VT: MVT::i32,
5787 Operand: RegCopy);
5788 SDValue T0 = CurDAG->getCopyToReg(Chain: Sub0, dl, Reg: Reg0, N: Sub0,
5789 Glue: RegCopy.getValue(R: 1));
5790 SDValue T1 = CurDAG->getCopyToReg(Chain: Sub1, dl, Reg: Reg1, N: Sub1, Glue: T0.getValue(R: 1));
5791
5792 // Update the original glue user.
5793 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
5794 Ops.push_back(x: T1.getValue(R: 1));
5795 CurDAG->UpdateNodeOperands(N: GU, Ops);
5796 } else {
5797 // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
5798 // GPRPair and then pass the GPRPair to the inline asm.
5799 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
5800
5801 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5802 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg: Reg0, VT: MVT::i32,
5803 Glue: Chain.getValue(R: 1));
5804 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg: Reg1, VT: MVT::i32,
5805 Glue: T0.getValue(R: 1));
5806 SDValue Pair = SDValue(createGPRPairNode(VT: MVT::Untyped, V0: T0, V1: T1), 0);
5807
5808 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5809 // i32 VRs of inline asm with it.
5810 Register GPVR = MRI.createVirtualRegister(RegClass: &ARM::GPRPairRegClass);
5811 PairedReg = CurDAG->getRegister(Reg: GPVR, VT: MVT::Untyped);
5812 Chain = CurDAG->getCopyToReg(Chain: T1, dl, Reg: GPVR, N: Pair, Glue: T1.getValue(R: 1));
5813
5814 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5815 Glue = Chain.getValue(R: 1);
5816 }
5817
5818 Changed = true;
5819
5820 if(PairedReg.getNode()) {
5821 OpChanged[OpChanged.size() -1 ] = true;
5822 Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/);
5823 if (IsTiedToChangedOp)
5824 Flag.setMatchingOp(DefIdx);
5825 else
5826 Flag.setRegClass(ARM::GPRPairRegClassID);
5827 // Replace the current flag.
5828 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
5829 Val: Flag, DL: dl, VT: MVT::i32);
5830 // Add the new register node and skip the original two GPRs.
5831 AsmNodeOperands.push_back(x: PairedReg);
5832 // Skip the next two GPRs.
5833 i += 2;
5834 }
5835 }
5836
5837 if (Glue.getNode())
5838 AsmNodeOperands.push_back(x: Glue);
5839 if (!Changed)
5840 return false;
5841
5842 SDValue New = CurDAG->getNode(Opcode: N->getOpcode(), DL: SDLoc(N),
5843 VTList: CurDAG->getVTList(VT1: MVT::Other, VT2: MVT::Glue), Ops: AsmNodeOperands);
5844 New->setNodeId(-1);
5845 ReplaceNode(F: N, T: New.getNode());
5846 return true;
5847}
5848
5849bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
5850 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
5851 std::vector<SDValue> &OutOps) {
5852 switch(ConstraintID) {
5853 default:
5854 llvm_unreachable("Unexpected asm memory constraint");
5855 case InlineAsm::ConstraintCode::m:
5856 case InlineAsm::ConstraintCode::o:
5857 case InlineAsm::ConstraintCode::Q:
5858 case InlineAsm::ConstraintCode::Um:
5859 case InlineAsm::ConstraintCode::Un:
5860 case InlineAsm::ConstraintCode::Uq:
5861 case InlineAsm::ConstraintCode::Us:
5862 case InlineAsm::ConstraintCode::Ut:
5863 case InlineAsm::ConstraintCode::Uv:
5864 case InlineAsm::ConstraintCode::Uy:
5865 // Require the address to be in a register. That is safe for all ARM
5866 // variants and it is hard to do anything much smarter without knowing
5867 // how the operand is used.
5868 OutOps.push_back(x: Op);
5869 return false;
5870 }
5871 return true;
5872}
5873
5874/// createARMISelDag - This pass converts a legalized DAG into a
5875/// ARM-specific DAG, ready for instruction scheduling.
5876///
5877FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
5878 CodeGenOptLevel OptLevel) {
5879 return new ARMDAGToDAGISelLegacy(TM, OptLevel);
5880}
5881