1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the ARM target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARM.h"
14#include "ARMBaseInstrInfo.h"
15#include "ARMTargetMachine.h"
16#include "MCTargetDesc/ARMAddressingModes.h"
17#include "Utils/ARMBaseInfo.h"
18#include "llvm/ADT/APSInt.h"
19#include "llvm/ADT/StringSwitch.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/SelectionDAG.h"
25#include "llvm/CodeGen/SelectionDAGISel.h"
26#include "llvm/CodeGen/TargetLowering.h"
27#include "llvm/IR/Constants.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/LLVMContext.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Target/TargetOptions.h"
36#include <optional>
37
38using namespace llvm;
39
40#define DEBUG_TYPE "arm-isel"
41#define PASS_NAME "ARM Instruction Selection"
42
43static cl::opt<bool>
44DisableShifterOp("disable-shifter-op", cl::Hidden,
45 cl::desc("Disable isel of shifter-op"),
46 cl::init(Val: false));
47
48//===--------------------------------------------------------------------===//
49/// ARMDAGToDAGISel - ARM specific code to select ARM machine
50/// instructions for SelectionDAG operations.
51///
52namespace {
53
54class ARMDAGToDAGISel : public SelectionDAGISel {
55 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
56 /// make the right decision when generating code for different targets.
57 const ARMSubtarget *Subtarget;
58
59public:
60 ARMDAGToDAGISel() = delete;
61
62 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
63 : SelectionDAGISel(tm, OptLevel) {}
64
65 bool runOnMachineFunction(MachineFunction &MF) override {
66 // Reset the subtarget each time through.
67 Subtarget = &MF.getSubtarget<ARMSubtarget>();
68 SelectionDAGISel::runOnMachineFunction(mf&: MF);
69 return true;
70 }
71
72 void PreprocessISelDAG() override;
73
74 /// getI32Imm - Return a target constant of type i32 with the specified
75 /// value.
76 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
77 return CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32);
78 }
79
80 void Select(SDNode *N) override;
81
82 /// Return true as some complex patterns, like those that call
83 /// canExtractShiftFromMul can modify the DAG inplace.
84 bool ComplexPatternFuncMutatesDAG() const override { return true; }
85
86 bool hasNoVMLxHazardUse(SDNode *N) const;
87 bool isShifterOpProfitable(const SDValue &Shift,
88 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
89 bool SelectRegShifterOperand(SDValue N, SDValue &A,
90 SDValue &B, SDValue &C,
91 bool CheckProfitability = true);
92 bool SelectImmShifterOperand(SDValue N, SDValue &A,
93 SDValue &B, bool CheckProfitability = true);
94 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
95 SDValue &C) {
96 // Don't apply the profitability check
97 return SelectRegShifterOperand(N, A, B, C, CheckProfitability: false);
98 }
99 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
100 // Don't apply the profitability check
101 return SelectImmShifterOperand(N, A, B, CheckProfitability: false);
102 }
103 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
104 if (!N.hasOneUse())
105 return false;
106 return SelectImmShifterOperand(N, A, B, CheckProfitability: false);
107 }
108
109 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
110
111 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
112 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
113
114 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
115 SDValue &Offset, SDValue &Opc);
116 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
117 SDValue &Offset, SDValue &Opc);
118 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
119 SDValue &Offset, SDValue &Opc);
120 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
121 bool SelectAddrMode3(SDValue N, SDValue &Base,
122 SDValue &Offset, SDValue &Opc);
123 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
124 SDValue &Offset, SDValue &Opc);
125 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
126 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
127 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
128 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
129 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
130
131 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
132
133 // Thumb Addressing Modes:
134 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
135 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
136 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
137 SDValue &OffImm);
138 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
139 SDValue &OffImm);
140 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
141 SDValue &OffImm);
142 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
143 SDValue &OffImm);
144 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
145 template <unsigned Shift>
146 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
147
148 // Thumb 2 Addressing Modes:
149 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
150 template <unsigned Shift>
151 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
152 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
153 SDValue &OffImm);
154 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
155 SDValue &OffImm);
156 template <unsigned Shift>
157 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
158 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
159 unsigned Shift);
160 template <unsigned Shift>
161 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
162 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
163 SDValue &OffReg, SDValue &ShImm);
164 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
165
166 template<int Min, int Max>
167 bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
168
169 inline bool is_so_imm(unsigned Imm) const {
170 return ARM_AM::getSOImmVal(Arg: Imm) != -1;
171 }
172
173 inline bool is_so_imm_not(unsigned Imm) const {
174 return ARM_AM::getSOImmVal(Arg: ~Imm) != -1;
175 }
176
177 inline bool is_t2_so_imm(unsigned Imm) const {
178 return ARM_AM::getT2SOImmVal(Arg: Imm) != -1;
179 }
180
181 inline bool is_t2_so_imm_not(unsigned Imm) const {
182 return ARM_AM::getT2SOImmVal(Arg: ~Imm) != -1;
183 }
184
185 // Include the pieces autogenerated from the target description.
186#include "ARMGenDAGISel.inc"
187
188private:
189 void transferMemOperands(SDNode *Src, SDNode *Dst);
190
191 /// Indexed (pre/post inc/dec) load matching code for ARM.
192 bool tryARMIndexedLoad(SDNode *N);
193 bool tryT1IndexedLoad(SDNode *N);
194 bool tryT2IndexedLoad(SDNode *N);
195 bool tryMVEIndexedLoad(SDNode *N);
196 bool tryFMULFixed(SDNode *N, SDLoc dl);
197 bool tryFP_TO_INT(SDNode *N, SDLoc dl);
198 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
199 bool IsUnsigned,
200 bool FixedToFloat);
201
202 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
203 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
204 /// loads of D registers and even subregs and odd subregs of Q registers.
205 /// For NumVecs <= 2, QOpcodes1 is not used.
206 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
207 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
208 const uint16_t *QOpcodes1);
209
210 /// SelectVST - Select NEON store intrinsics. NumVecs should
211 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
212 /// stores of D registers and even subregs and odd subregs of Q registers.
213 /// For NumVecs <= 2, QOpcodes1 is not used.
214 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
215 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
216 const uint16_t *QOpcodes1);
217
218 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
219 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
220 /// load/store of D registers and Q registers.
221 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
222 unsigned NumVecs, const uint16_t *DOpcodes,
223 const uint16_t *QOpcodes);
224
225 /// Helper functions for setting up clusters of MVE predication operands.
226 template <typename SDValueVector>
227 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
228 SDValue PredicateMask);
229 template <typename SDValueVector>
230 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
231 SDValue PredicateMask, SDValue Inactive);
232
233 template <typename SDValueVector>
234 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
235 template <typename SDValueVector>
236 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
237
238 /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
239 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
240
241 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
242 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
243 bool HasSaturationOperand);
244
245 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
246 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
247 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
248
249 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
250 /// vector lanes.
251 void SelectMVE_VSHLC(SDNode *N, bool Predicated);
252
253 /// Select long MVE vector reductions with two vector operands
254 /// Stride is the number of vector element widths the instruction can operate
255 /// on:
256 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
257 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
258 /// Stride is used when addressing the OpcodesS array which contains multiple
259 /// opcodes for each element width.
260 /// TySize is the index into the list of element types listed above
261 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
262 const uint16_t *OpcodesS, const uint16_t *OpcodesU,
263 size_t Stride, size_t TySize);
264
265 /// Select a 64-bit MVE vector reduction with two vector operands
266 /// arm_mve_vmlldava_[predicated]
267 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
268 const uint16_t *OpcodesU);
269 /// Select a 72-bit MVE vector rounding reduction with two vector operands
270 /// int_arm_mve_vrmlldavha[_predicated]
271 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
272 const uint16_t *OpcodesU);
273
274 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
275 /// should be 2 or 4. The opcode array specifies the instructions
276 /// used for 8, 16 and 32-bit lane sizes respectively, and each
277 /// pointer points to a set of NumVecs sub-opcodes used for the
278 /// different stages (e.g. VLD20 versus VLD21) of each load family.
279 void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
280 const uint16_t *const *Opcodes, bool HasWriteback);
281
282 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
283 /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
284 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
285 bool Wrapping, bool Predicated);
286
287 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
288 /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
289 /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
290 /// the accumulator and the immediate operand, i.e. 0
291 /// for CX1*, 1 for CX2*, 2 for CX3*
292 /// \arg \c HasAccum whether the instruction has an accumulator operand
293 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
294 bool HasAccum);
295
296 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
297 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
298 /// for loading D registers.
299 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
300 unsigned NumVecs, const uint16_t *DOpcodes,
301 const uint16_t *QOpcodes0 = nullptr,
302 const uint16_t *QOpcodes1 = nullptr);
303
304 /// Try to select SBFX/UBFX instructions for ARM.
305 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
306
307 bool tryInsertVectorElt(SDNode *N);
308
309 bool tryShiftAmountMod(SDNode *N);
310
311 bool tryReadRegister(SDNode *N);
312 bool tryWriteRegister(SDNode *N);
313
314 bool tryInlineAsm(SDNode *N);
315
316 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
317
318 void SelectCMP_SWAP(SDNode *N);
319
320 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
321 /// inline asm expressions.
322 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
323 InlineAsm::ConstraintCode ConstraintID,
324 std::vector<SDValue> &OutOps) override;
325
326 // Form pairs of consecutive R, S, D, or Q registers.
327 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
328 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
329 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
330 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
331
332 // Form sequences of 4 consecutive S, D, or Q registers.
333 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
334 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
335 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
336
337 // Get the alignment operand for a NEON VLD or VST instruction.
338 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
339 bool is64BitVector);
340
341 /// Checks if N is a multiplication by a constant where we can extract out a
342 /// power of two from the constant so that it can be used in a shift, but only
343 /// if it simplifies the materialization of the constant. Returns true if it
344 /// is, and assigns to PowerOfTwo the power of two that should be extracted
345 /// out and to NewMulConst the new constant to be multiplied by.
346 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
347 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
348
349 /// Replace N with M in CurDAG, in a way that also ensures that M gets
350 /// selected when N would have been selected.
351 void replaceDAGValue(const SDValue &N, SDValue M);
352};
353
354class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
355public:
356 static char ID;
357 ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
358 : SelectionDAGISelLegacy(
359 ID, std::make_unique<ARMDAGToDAGISel>(args&: tm, args&: OptLevel)) {}
360};
361}
362
363char ARMDAGToDAGISelLegacy::ID = 0;
364
365INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
366
367/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
368/// operand. If so Imm will receive the 32-bit value.
369static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
370 if (N->getOpcode() == ISD::Constant && N->getValueType(ResNo: 0) == MVT::i32) {
371 Imm = N->getAsZExtVal();
372 return true;
373 }
374 return false;
375}
376
377// isInt32Immediate - This method tests to see if a constant operand.
378// If so Imm will receive the 32 bit value.
379static bool isInt32Immediate(SDValue N, unsigned &Imm) {
380 return isInt32Immediate(N: N.getNode(), Imm);
381}
382
383// isOpcWithIntImmediate - This method tests to see if the node is a specific
384// opcode and that it has a immediate integer right operand.
385// If so Imm will receive the 32 bit value.
386static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
387 return N->getOpcode() == Opc &&
388 isInt32Immediate(N: N->getOperand(Num: 1).getNode(), Imm);
389}
390
391/// Check whether a particular node is a constant value representable as
392/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
393///
394/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
395static bool isScaledConstantInRange(SDValue Node, int Scale,
396 int RangeMin, int RangeMax,
397 int &ScaledConstant) {
398 assert(Scale > 0 && "Invalid scale!");
399
400 // Check that this is a constant.
401 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: Node);
402 if (!C)
403 return false;
404
405 ScaledConstant = (int) C->getZExtValue();
406 if ((ScaledConstant % Scale) != 0)
407 return false;
408
409 ScaledConstant /= Scale;
410 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
411}
412
413void ARMDAGToDAGISel::PreprocessISelDAG() {
414 if (!Subtarget->hasV6T2Ops())
415 return;
416
417 bool isThumb2 = Subtarget->isThumb();
418 // We use make_early_inc_range to avoid invalidation issues.
419 for (SDNode &N : llvm::make_early_inc_range(Range: CurDAG->allnodes())) {
420 if (N.getOpcode() != ISD::ADD)
421 continue;
422
423 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
424 // leading zeros, followed by consecutive set bits, followed by 1 or 2
425 // trailing zeros, e.g. 1020.
426 // Transform the expression to
427 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
428 // of trailing zeros of c2. The left shift would be folded as an shifter
429 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
430 // node (UBFX).
431
432 SDValue N0 = N.getOperand(Num: 0);
433 SDValue N1 = N.getOperand(Num: 1);
434 unsigned And_imm = 0;
435 if (!isOpcWithIntImmediate(N: N1.getNode(), Opc: ISD::AND, Imm&: And_imm)) {
436 if (isOpcWithIntImmediate(N: N0.getNode(), Opc: ISD::AND, Imm&: And_imm))
437 std::swap(a&: N0, b&: N1);
438 }
439 if (!And_imm)
440 continue;
441
442 // Check if the AND mask is an immediate of the form: 000.....1111111100
443 unsigned TZ = llvm::countr_zero(Val: And_imm);
444 if (TZ != 1 && TZ != 2)
445 // Be conservative here. Shifter operands aren't always free. e.g. On
446 // Swift, left shifter operand of 1 / 2 for free but others are not.
447 // e.g.
448 // ubfx r3, r1, #16, #8
449 // ldr.w r3, [r0, r3, lsl #2]
450 // vs.
451 // mov.w r9, #1020
452 // and.w r2, r9, r1, lsr #14
453 // ldr r2, [r0, r2]
454 continue;
455 And_imm >>= TZ;
456 if (And_imm & (And_imm + 1))
457 continue;
458
459 // Look for (and (srl X, c1), c2).
460 SDValue Srl = N1.getOperand(i: 0);
461 unsigned Srl_imm = 0;
462 if (!isOpcWithIntImmediate(N: Srl.getNode(), Opc: ISD::SRL, Imm&: Srl_imm) ||
463 (Srl_imm <= 2))
464 continue;
465
466 // Make sure first operand is not a shifter operand which would prevent
467 // folding of the left shift.
468 SDValue CPTmp0;
469 SDValue CPTmp1;
470 SDValue CPTmp2;
471 if (isThumb2) {
472 if (SelectImmShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1))
473 continue;
474 } else {
475 if (SelectImmShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1) ||
476 SelectRegShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1, C&: CPTmp2))
477 continue;
478 }
479
480 // Now make the transformation.
481 Srl = CurDAG->getNode(Opcode: ISD::SRL, DL: SDLoc(Srl), VT: MVT::i32,
482 N1: Srl.getOperand(i: 0),
483 N2: CurDAG->getConstant(Val: Srl_imm + TZ, DL: SDLoc(Srl),
484 VT: MVT::i32));
485 N1 = CurDAG->getNode(Opcode: ISD::AND, DL: SDLoc(N1), VT: MVT::i32,
486 N1: Srl,
487 N2: CurDAG->getConstant(Val: And_imm, DL: SDLoc(Srl), VT: MVT::i32));
488 N1 = CurDAG->getNode(Opcode: ISD::SHL, DL: SDLoc(N1), VT: MVT::i32,
489 N1, N2: CurDAG->getConstant(Val: TZ, DL: SDLoc(Srl), VT: MVT::i32));
490 CurDAG->UpdateNodeOperands(N: &N, Op1: N0, Op2: N1);
491 }
492}
493
494/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
495/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
496/// least on current ARM implementations) which should be avoidded.
497bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
498 if (OptLevel == CodeGenOptLevel::None)
499 return true;
500
501 if (!Subtarget->hasVMLxHazards())
502 return true;
503
504 if (!N->hasOneUse())
505 return false;
506
507 SDNode *User = *N->user_begin();
508 if (User->getOpcode() == ISD::CopyToReg)
509 return true;
510 if (User->isMachineOpcode()) {
511 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
512 CurDAG->getSubtarget().getInstrInfo());
513
514 const MCInstrDesc &MCID = TII->get(Opcode: User->getMachineOpcode());
515 if (MCID.mayStore())
516 return true;
517 unsigned Opcode = MCID.getOpcode();
518 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
519 return true;
520 // vmlx feeding into another vmlx. We actually want to unfold
521 // the use later in the MLxExpansion pass. e.g.
522 // vmla
523 // vmla (stall 8 cycles)
524 //
525 // vmul (5 cycles)
526 // vadd (5 cycles)
527 // vmla
528 // This adds up to about 18 - 19 cycles.
529 //
530 // vmla
531 // vmul (stall 4 cycles)
532 // vadd adds up to about 14 cycles.
533 return TII->isFpMLxInstruction(Opcode);
534 }
535
536 return false;
537}
538
539bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
540 ARM_AM::ShiftOpc ShOpcVal,
541 unsigned ShAmt) {
542 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
543 return true;
544 if (Shift.hasOneUse())
545 return true;
546 // R << 2 is free.
547 return ShOpcVal == ARM_AM::lsl &&
548 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
549}
550
551bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
552 unsigned MaxShift,
553 unsigned &PowerOfTwo,
554 SDValue &NewMulConst) const {
555 assert(N.getOpcode() == ISD::MUL);
556 assert(MaxShift > 0);
557
558 // If the multiply is used in more than one place then changing the constant
559 // will make other uses incorrect, so don't.
560 if (!N.hasOneUse()) return false;
561 // Check if the multiply is by a constant
562 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
563 if (!MulConst) return false;
564 // If the constant is used in more than one place then modifying it will mean
565 // we need to materialize two constants instead of one, which is a bad idea.
566 if (!MulConst->hasOneUse()) return false;
567 unsigned MulConstVal = MulConst->getZExtValue();
568 if (MulConstVal == 0) return false;
569
570 // Find the largest power of 2 that MulConstVal is a multiple of
571 PowerOfTwo = MaxShift;
572 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
573 --PowerOfTwo;
574 if (PowerOfTwo == 0) return false;
575 }
576
577 // Only optimise if the new cost is better
578 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
579 NewMulConst = CurDAG->getConstant(Val: NewMulConstVal, DL: SDLoc(N), VT: MVT::i32);
580 unsigned OldCost = ConstantMaterializationCost(Val: MulConstVal, Subtarget);
581 unsigned NewCost = ConstantMaterializationCost(Val: NewMulConstVal, Subtarget);
582 return NewCost < OldCost;
583}
584
585void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
586 CurDAG->RepositionNode(Position: N.getNode()->getIterator(), N: M.getNode());
587 ReplaceUses(F: N, T: M);
588}
589
590bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
591 SDValue &BaseReg,
592 SDValue &Opc,
593 bool CheckProfitability) {
594 if (DisableShifterOp)
595 return false;
596
597 // If N is a multiply-by-constant and it's profitable to extract a shift and
598 // use it in a shifted operand do so.
599 if (N.getOpcode() == ISD::MUL) {
600 unsigned PowerOfTwo = 0;
601 SDValue NewMulConst;
602 if (canExtractShiftFromMul(N, MaxShift: 31, PowerOfTwo, NewMulConst)) {
603 HandleSDNode Handle(N);
604 SDLoc Loc(N);
605 replaceDAGValue(N: N.getOperand(i: 1), M: NewMulConst);
606 BaseReg = Handle.getValue();
607 Opc = CurDAG->getTargetConstant(
608 Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: PowerOfTwo), DL: Loc, VT: MVT::i32);
609 return true;
610 }
611 }
612
613 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
614
615 // Don't match base register only case. That is matched to a separate
616 // lower complexity pattern with explicit register operand.
617 if (ShOpcVal == ARM_AM::no_shift) return false;
618
619 BaseReg = N.getOperand(i: 0);
620 unsigned ShImmVal = 0;
621 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
622 if (!RHS) return false;
623 ShImmVal = RHS->getZExtValue() & 31;
624 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: ShImmVal),
625 DL: SDLoc(N), VT: MVT::i32);
626 return true;
627}
628
629bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
630 SDValue &BaseReg,
631 SDValue &ShReg,
632 SDValue &Opc,
633 bool CheckProfitability) {
634 if (DisableShifterOp)
635 return false;
636
637 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
638
639 // Don't match base register only case. That is matched to a separate
640 // lower complexity pattern with explicit register operand.
641 if (ShOpcVal == ARM_AM::no_shift) return false;
642
643 BaseReg = N.getOperand(i: 0);
644 unsigned ShImmVal = 0;
645 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
646 if (RHS) return false;
647
648 ShReg = N.getOperand(i: 1);
649 if (CheckProfitability && !isShifterOpProfitable(Shift: N, ShOpcVal, ShAmt: ShImmVal))
650 return false;
651 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: ShImmVal),
652 DL: SDLoc(N), VT: MVT::i32);
653 return true;
654}
655
656// Determine whether an ISD::OR's operands are suitable to turn the operation
657// into an addition, which often has more compact encodings.
658bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
659 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
660 Out = N;
661 return CurDAG->haveNoCommonBitsSet(A: N, B: Parent->getOperand(Num: 1));
662}
663
664
665bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
666 SDValue &Base,
667 SDValue &OffImm) {
668 // Match simple R + imm12 operands.
669
670 // Base only.
671 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
672 !CurDAG->isBaseWithConstantOffset(Op: N)) {
673 if (N.getOpcode() == ISD::FrameIndex) {
674 // Match frame index.
675 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
676 Base = CurDAG->getTargetFrameIndex(
677 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
678 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
679 return true;
680 }
681
682 if (N.getOpcode() == ARMISD::Wrapper &&
683 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalAddress &&
684 N.getOperand(i: 0).getOpcode() != ISD::TargetExternalSymbol &&
685 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalTLSAddress) {
686 Base = N.getOperand(i: 0);
687 } else
688 Base = N;
689 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
690 return true;
691 }
692
693 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
694 int RHSC = (int)RHS->getSExtValue();
695 if (N.getOpcode() == ISD::SUB)
696 RHSC = -RHSC;
697
698 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
699 Base = N.getOperand(i: 0);
700 if (Base.getOpcode() == ISD::FrameIndex) {
701 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
702 Base = CurDAG->getTargetFrameIndex(
703 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
704 }
705 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32);
706 return true;
707 }
708 }
709
710 // Base only.
711 Base = N;
712 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
713 return true;
714}
715
716
717
718bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
719 SDValue &Opc) {
720 if (N.getOpcode() == ISD::MUL &&
721 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
722 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
723 // X * [3,5,9] -> X + X * [2,4,8] etc.
724 int RHSC = (int)RHS->getZExtValue();
725 if (RHSC & 1) {
726 RHSC = RHSC & ~1;
727 ARM_AM::AddrOpc AddSub = ARM_AM::add;
728 if (RHSC < 0) {
729 AddSub = ARM_AM::sub;
730 RHSC = - RHSC;
731 }
732 if (isPowerOf2_32(Value: RHSC)) {
733 unsigned ShAmt = Log2_32(Value: RHSC);
734 Base = Offset = N.getOperand(i: 0);
735 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt,
736 SO: ARM_AM::lsl),
737 DL: SDLoc(N), VT: MVT::i32);
738 return true;
739 }
740 }
741 }
742 }
743
744 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
745 // ISD::OR that is equivalent to an ISD::ADD.
746 !CurDAG->isBaseWithConstantOffset(Op: N))
747 return false;
748
749 // Leave simple R +/- imm12 operands for LDRi12
750 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
751 int RHSC;
752 if (isScaledConstantInRange(Node: N.getOperand(i: 1), /*Scale=*/1,
753 RangeMin: -0x1000+1, RangeMax: 0x1000, ScaledConstant&: RHSC)) // 12 bits.
754 return false;
755 }
756
757 // Otherwise this is R +/- [possibly shifted] R.
758 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
759 ARM_AM::ShiftOpc ShOpcVal =
760 ARM_AM::getShiftOpcForNode(Opcode: N.getOperand(i: 1).getOpcode());
761 unsigned ShAmt = 0;
762
763 Base = N.getOperand(i: 0);
764 Offset = N.getOperand(i: 1);
765
766 if (ShOpcVal != ARM_AM::no_shift) {
767 // Check to see if the RHS of the shift is a constant, if not, we can't fold
768 // it.
769 if (ConstantSDNode *Sh =
770 dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1).getOperand(i: 1))) {
771 ShAmt = Sh->getZExtValue();
772 if (isShifterOpProfitable(Shift: Offset, ShOpcVal, ShAmt))
773 Offset = N.getOperand(i: 1).getOperand(i: 0);
774 else {
775 ShAmt = 0;
776 ShOpcVal = ARM_AM::no_shift;
777 }
778 } else {
779 ShOpcVal = ARM_AM::no_shift;
780 }
781 }
782
783 // Try matching (R shl C) + (R).
784 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
785 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
786 N.getOperand(i: 0).hasOneUse())) {
787 ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOperand(i: 0).getOpcode());
788 if (ShOpcVal != ARM_AM::no_shift) {
789 // Check to see if the RHS of the shift is a constant, if not, we can't
790 // fold it.
791 if (ConstantSDNode *Sh =
792 dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 0).getOperand(i: 1))) {
793 ShAmt = Sh->getZExtValue();
794 if (isShifterOpProfitable(Shift: N.getOperand(i: 0), ShOpcVal, ShAmt)) {
795 Offset = N.getOperand(i: 0).getOperand(i: 0);
796 Base = N.getOperand(i: 1);
797 } else {
798 ShAmt = 0;
799 ShOpcVal = ARM_AM::no_shift;
800 }
801 } else {
802 ShOpcVal = ARM_AM::no_shift;
803 }
804 }
805 }
806
807 // If Offset is a multiply-by-constant and it's profitable to extract a shift
808 // and use it in a shifted operand do so.
809 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
810 unsigned PowerOfTwo = 0;
811 SDValue NewMulConst;
812 if (canExtractShiftFromMul(N: Offset, MaxShift: 31, PowerOfTwo, NewMulConst)) {
813 HandleSDNode Handle(Offset);
814 replaceDAGValue(N: Offset.getOperand(i: 1), M: NewMulConst);
815 Offset = Handle.getValue();
816 ShAmt = PowerOfTwo;
817 ShOpcVal = ARM_AM::lsl;
818 }
819 }
820
821 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt, SO: ShOpcVal),
822 DL: SDLoc(N), VT: MVT::i32);
823 return true;
824}
825
826bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
827 SDValue &Offset, SDValue &Opc) {
828 unsigned Opcode = Op->getOpcode();
829 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
830 ? cast<LoadSDNode>(Val: Op)->getAddressingMode()
831 : cast<StoreSDNode>(Val: Op)->getAddressingMode();
832 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
833 ? ARM_AM::add : ARM_AM::sub;
834 int Val;
835 if (isScaledConstantInRange(Node: N, /*Scale=*/1, RangeMin: 0, RangeMax: 0x1000, ScaledConstant&: Val))
836 return false;
837
838 Offset = N;
839 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
840 unsigned ShAmt = 0;
841 if (ShOpcVal != ARM_AM::no_shift) {
842 // Check to see if the RHS of the shift is a constant, if not, we can't fold
843 // it.
844 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
845 ShAmt = Sh->getZExtValue();
846 if (isShifterOpProfitable(Shift: N, ShOpcVal, ShAmt))
847 Offset = N.getOperand(i: 0);
848 else {
849 ShAmt = 0;
850 ShOpcVal = ARM_AM::no_shift;
851 }
852 } else {
853 ShOpcVal = ARM_AM::no_shift;
854 }
855 }
856
857 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt, SO: ShOpcVal),
858 DL: SDLoc(N), VT: MVT::i32);
859 return true;
860}
861
862bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
863 SDValue &Offset, SDValue &Opc) {
864 unsigned Opcode = Op->getOpcode();
865 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
866 ? cast<LoadSDNode>(Val: Op)->getAddressingMode()
867 : cast<StoreSDNode>(Val: Op)->getAddressingMode();
868 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
869 ? ARM_AM::add : ARM_AM::sub;
870 int Val;
871 if (isScaledConstantInRange(Node: N, /*Scale=*/1, RangeMin: 0, RangeMax: 0x1000, ScaledConstant&: Val)) { // 12 bits.
872 if (AddSub == ARM_AM::sub) Val *= -1;
873 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
874 Opc = CurDAG->getSignedTargetConstant(Val, DL: SDLoc(Op), VT: MVT::i32);
875 return true;
876 }
877
878 return false;
879}
880
881
882bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
883 SDValue &Offset, SDValue &Opc) {
884 unsigned Opcode = Op->getOpcode();
885 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
886 ? cast<LoadSDNode>(Val: Op)->getAddressingMode()
887 : cast<StoreSDNode>(Val: Op)->getAddressingMode();
888 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
889 ? ARM_AM::add : ARM_AM::sub;
890 int Val;
891 if (isScaledConstantInRange(Node: N, /*Scale=*/1, RangeMin: 0, RangeMax: 0x1000, ScaledConstant&: Val)) { // 12 bits.
892 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
893 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: Val,
894 SO: ARM_AM::no_shift),
895 DL: SDLoc(Op), VT: MVT::i32);
896 return true;
897 }
898
899 return false;
900}
901
902bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
903 Base = N;
904 return true;
905}
906
907bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
908 SDValue &Base, SDValue &Offset,
909 SDValue &Opc) {
910 if (N.getOpcode() == ISD::SUB) {
911 // X - C is canonicalize to X + -C, no need to handle it here.
912 Base = N.getOperand(i: 0);
913 Offset = N.getOperand(i: 1);
914 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::sub, Offset: 0), DL: SDLoc(N),
915 VT: MVT::i32);
916 return true;
917 }
918
919 if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
920 Base = N;
921 if (N.getOpcode() == ISD::FrameIndex) {
922 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
923 Base = CurDAG->getTargetFrameIndex(
924 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
925 }
926 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
927 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::add, Offset: 0), DL: SDLoc(N),
928 VT: MVT::i32);
929 return true;
930 }
931
932 // If the RHS is +/- imm8, fold into addr mode.
933 int RHSC;
934 if (isScaledConstantInRange(Node: N.getOperand(i: 1), /*Scale=*/1,
935 RangeMin: -256 + 1, RangeMax: 256, ScaledConstant&: RHSC)) { // 8 bits.
936 Base = N.getOperand(i: 0);
937 if (Base.getOpcode() == ISD::FrameIndex) {
938 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
939 Base = CurDAG->getTargetFrameIndex(
940 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
941 }
942 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
943
944 ARM_AM::AddrOpc AddSub = ARM_AM::add;
945 if (RHSC < 0) {
946 AddSub = ARM_AM::sub;
947 RHSC = -RHSC;
948 }
949 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: RHSC), DL: SDLoc(N),
950 VT: MVT::i32);
951 return true;
952 }
953
954 Base = N.getOperand(i: 0);
955 Offset = N.getOperand(i: 1);
956 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::add, Offset: 0), DL: SDLoc(N),
957 VT: MVT::i32);
958 return true;
959}
960
961bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
962 SDValue &Offset, SDValue &Opc) {
963 unsigned Opcode = Op->getOpcode();
964 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
965 ? cast<LoadSDNode>(Val: Op)->getAddressingMode()
966 : cast<StoreSDNode>(Val: Op)->getAddressingMode();
967 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
968 ? ARM_AM::add : ARM_AM::sub;
969 int Val;
970 if (isScaledConstantInRange(Node: N, /*Scale=*/1, RangeMin: 0, RangeMax: 256, ScaledConstant&: Val)) { // 12 bits.
971 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
972 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: Val), DL: SDLoc(Op),
973 VT: MVT::i32);
974 return true;
975 }
976
977 Offset = N;
978 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: 0), DL: SDLoc(Op),
979 VT: MVT::i32);
980 return true;
981}
982
983bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
984 bool FP16) {
985 if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
986 Base = N;
987 if (N.getOpcode() == ISD::FrameIndex) {
988 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
989 Base = CurDAG->getTargetFrameIndex(
990 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
991 } else if (N.getOpcode() == ARMISD::Wrapper &&
992 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalAddress &&
993 N.getOperand(i: 0).getOpcode() != ISD::TargetExternalSymbol &&
994 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalTLSAddress) {
995 Base = N.getOperand(i: 0);
996 }
997 Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: ARM_AM::add, Offset: 0),
998 DL: SDLoc(N), VT: MVT::i32);
999 return true;
1000 }
1001
1002 // If the RHS is +/- imm8, fold into addr mode.
1003 int RHSC;
1004 const int Scale = FP16 ? 2 : 4;
1005
1006 if (isScaledConstantInRange(Node: N.getOperand(i: 1), Scale, RangeMin: -255, RangeMax: 256, ScaledConstant&: RHSC)) {
1007 Base = N.getOperand(i: 0);
1008 if (Base.getOpcode() == ISD::FrameIndex) {
1009 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1010 Base = CurDAG->getTargetFrameIndex(
1011 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1012 }
1013
1014 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1015 if (RHSC < 0) {
1016 AddSub = ARM_AM::sub;
1017 RHSC = -RHSC;
1018 }
1019
1020 if (FP16)
1021 Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5FP16Opc(Opc: AddSub, Offset: RHSC),
1022 DL: SDLoc(N), VT: MVT::i32);
1023 else
1024 Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: AddSub, Offset: RHSC),
1025 DL: SDLoc(N), VT: MVT::i32);
1026
1027 return true;
1028 }
1029
1030 Base = N;
1031
1032 if (FP16)
1033 Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5FP16Opc(Opc: ARM_AM::add, Offset: 0),
1034 DL: SDLoc(N), VT: MVT::i32);
1035 else
1036 Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: ARM_AM::add, Offset: 0),
1037 DL: SDLoc(N), VT: MVT::i32);
1038
1039 return true;
1040}
1041
1042bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1043 SDValue &Base, SDValue &Offset) {
1044 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1045}
1046
1047bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1048 SDValue &Base, SDValue &Offset) {
1049 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1050}
1051
1052bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1053 SDValue &Align) {
1054 Addr = N;
1055
1056 unsigned Alignment = 0;
1057
1058 MemSDNode *MemN = cast<MemSDNode>(Val: Parent);
1059
1060 if (isa<LSBaseSDNode>(Val: MemN) ||
1061 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1062 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1063 MemN->getConstantOperandVal(Num: MemN->getNumOperands() - 1) == 1)) {
1064 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1065 // The maximum alignment is equal to the memory size being referenced.
1066 llvm::Align MMOAlign = MemN->getAlign();
1067 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1068 if (MMOAlign.value() >= MemSize && MemSize > 1)
1069 Alignment = MemSize;
1070 } else {
1071 // All other uses of addrmode6 are for intrinsics. For now just record
1072 // the raw alignment value; it will be refined later based on the legal
1073 // alignment operands for the intrinsic.
1074 Alignment = MemN->getAlign().value();
1075 }
1076
1077 Align = CurDAG->getTargetConstant(Val: Alignment, DL: SDLoc(N), VT: MVT::i32);
1078 return true;
1079}
1080
1081bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1082 SDValue &Offset) {
1083 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Val: Op);
1084 ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1085 if (AM != ISD::POST_INC)
1086 return false;
1087 Offset = N;
1088 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(Val&: N)) {
1089 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1090 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
1091 }
1092 return true;
1093}
1094
1095bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1096 SDValue &Offset, SDValue &Label) {
1097 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1098 Offset = N.getOperand(i: 0);
1099 SDValue N1 = N.getOperand(i: 1);
1100 Label = CurDAG->getTargetConstant(Val: N1->getAsZExtVal(), DL: SDLoc(N), VT: MVT::i32);
1101 return true;
1102 }
1103
1104 return false;
1105}
1106
1107
1108//===----------------------------------------------------------------------===//
1109// Thumb Addressing Modes
1110//===----------------------------------------------------------------------===//
1111
1112static bool shouldUseZeroOffsetLdSt(SDValue N) {
1113 // Negative numbers are difficult to materialise in thumb1. If we are
1114 // selecting the add of a negative, instead try to select ri with a zero
1115 // offset, so create the add node directly which will become a sub.
1116 if (N.getOpcode() != ISD::ADD)
1117 return false;
1118
1119 // Look for an imm which is not legal for ld/st, but is legal for sub.
1120 if (auto C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1)))
1121 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1122
1123 return false;
1124}
1125
1126bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1127 SDValue &Offset) {
1128 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(Op: N)) {
1129 if (!isNullConstant(V: N))
1130 return false;
1131
1132 Base = Offset = N;
1133 return true;
1134 }
1135
1136 Base = N.getOperand(i: 0);
1137 Offset = N.getOperand(i: 1);
1138 return true;
1139}
1140
1141bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1142 SDValue &Offset) {
1143 if (shouldUseZeroOffsetLdSt(N))
1144 return false; // Select ri instead
1145 return SelectThumbAddrModeRRSext(N, Base, Offset);
1146}
1147
1148bool
1149ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1150 SDValue &Base, SDValue &OffImm) {
1151 if (shouldUseZeroOffsetLdSt(N)) {
1152 Base = N;
1153 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1154 return true;
1155 }
1156
1157 if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
1158 if (N.getOpcode() == ISD::ADD) {
1159 return false; // We want to select register offset instead
1160 } else if (N.getOpcode() == ARMISD::Wrapper &&
1161 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalAddress &&
1162 N.getOperand(i: 0).getOpcode() != ISD::TargetExternalSymbol &&
1163 N.getOperand(i: 0).getOpcode() != ISD::TargetConstantPool &&
1164 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1165 Base = N.getOperand(i: 0);
1166 } else {
1167 Base = N;
1168 }
1169
1170 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1171 return true;
1172 }
1173
1174 // If the RHS is + imm5 * scale, fold into addr mode.
1175 int RHSC;
1176 if (isScaledConstantInRange(Node: N.getOperand(i: 1), Scale, RangeMin: 0, RangeMax: 32, ScaledConstant&: RHSC)) {
1177 Base = N.getOperand(i: 0);
1178 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32);
1179 return true;
1180 }
1181
1182 // Offset is too large, so use register offset instead.
1183 return false;
1184}
1185
1186bool
1187ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1188 SDValue &OffImm) {
1189 return SelectThumbAddrModeImm5S(N, Scale: 4, Base, OffImm);
1190}
1191
1192bool
1193ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1194 SDValue &OffImm) {
1195 return SelectThumbAddrModeImm5S(N, Scale: 2, Base, OffImm);
1196}
1197
1198bool
1199ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1200 SDValue &OffImm) {
1201 return SelectThumbAddrModeImm5S(N, Scale: 1, Base, OffImm);
1202}
1203
1204bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1205 SDValue &Base, SDValue &OffImm) {
1206 if (N.getOpcode() == ISD::FrameIndex) {
1207 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1208 // Only multiples of 4 are allowed for the offset, so the frame object
1209 // alignment must be at least 4.
1210 MachineFrameInfo &MFI = MF->getFrameInfo();
1211 if (MFI.getObjectAlign(ObjectIdx: FI) < Align(4))
1212 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align(4));
1213 Base = CurDAG->getTargetFrameIndex(
1214 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1215 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1216 return true;
1217 }
1218
1219 if (!CurDAG->isBaseWithConstantOffset(Op: N))
1220 return false;
1221
1222 if (N.getOperand(i: 0).getOpcode() == ISD::FrameIndex) {
1223 // If the RHS is + imm8 * scale, fold into addr mode.
1224 int RHSC;
1225 if (isScaledConstantInRange(Node: N.getOperand(i: 1), /*Scale=*/4, RangeMin: 0, RangeMax: 256, ScaledConstant&: RHSC)) {
1226 Base = N.getOperand(i: 0);
1227 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1228 // Make sure the offset is inside the object, or we might fail to
1229 // allocate an emergency spill slot. (An out-of-range access is UB, but
1230 // it could show up anyway.)
1231 MachineFrameInfo &MFI = MF->getFrameInfo();
1232 if (RHSC * 4 < MFI.getObjectSize(ObjectIdx: FI)) {
1233 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1234 // indexed by the LHS must be 4-byte aligned.
1235 if (!MFI.isFixedObjectIndex(ObjectIdx: FI) && MFI.getObjectAlign(ObjectIdx: FI) < Align(4))
1236 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align(4));
1237 if (MFI.getObjectAlign(ObjectIdx: FI) >= Align(4)) {
1238 Base = CurDAG->getTargetFrameIndex(
1239 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1240 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32);
1241 return true;
1242 }
1243 }
1244 }
1245 }
1246
1247 return false;
1248}
1249
1250template <unsigned Shift>
1251bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1252 SDValue &OffImm) {
1253 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(Op: N)) {
1254 int RHSC;
1255 if (isScaledConstantInRange(Node: N.getOperand(i: 1), Scale: 1 << Shift, RangeMin: -0x7f, RangeMax: 0x80,
1256 ScaledConstant&: RHSC)) {
1257 Base = N.getOperand(i: 0);
1258 if (N.getOpcode() == ISD::SUB)
1259 RHSC = -RHSC;
1260 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC * (1 << Shift), DL: SDLoc(N),
1261 VT: MVT::i32);
1262 return true;
1263 }
1264 }
1265
1266 // Base only.
1267 Base = N;
1268 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1269 return true;
1270}
1271
1272
1273//===----------------------------------------------------------------------===//
1274// Thumb 2 Addressing Modes
1275//===----------------------------------------------------------------------===//
1276
1277
1278bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1279 SDValue &Base, SDValue &OffImm) {
1280 // Match simple R + imm12 operands.
1281
1282 // Base only.
1283 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1284 !CurDAG->isBaseWithConstantOffset(Op: N)) {
1285 if (N.getOpcode() == ISD::FrameIndex) {
1286 // Match frame index.
1287 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1288 Base = CurDAG->getTargetFrameIndex(
1289 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1290 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1291 return true;
1292 }
1293
1294 if (N.getOpcode() == ARMISD::Wrapper &&
1295 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalAddress &&
1296 N.getOperand(i: 0).getOpcode() != ISD::TargetExternalSymbol &&
1297 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1298 Base = N.getOperand(i: 0);
1299 if (Base.getOpcode() == ISD::TargetConstantPool)
1300 return false; // We want to select t2LDRpci instead.
1301 } else
1302 Base = N;
1303 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1304 return true;
1305 }
1306
1307 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1308 if (SelectT2AddrModeImm8(N, Base, OffImm))
1309 // Let t2LDRi8 handle (R - imm8).
1310 return false;
1311
1312 int RHSC = (int)RHS->getZExtValue();
1313 if (N.getOpcode() == ISD::SUB)
1314 RHSC = -RHSC;
1315
1316 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1317 Base = N.getOperand(i: 0);
1318 if (Base.getOpcode() == ISD::FrameIndex) {
1319 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1320 Base = CurDAG->getTargetFrameIndex(
1321 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1322 }
1323 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32);
1324 return true;
1325 }
1326 }
1327
1328 // Base only.
1329 Base = N;
1330 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1331 return true;
1332}
1333
1334template <unsigned Shift>
1335bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1336 SDValue &OffImm) {
1337 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(Op: N)) {
1338 int RHSC;
1339 if (isScaledConstantInRange(Node: N.getOperand(i: 1), Scale: 1 << Shift, RangeMin: -255, RangeMax: 256, ScaledConstant&: RHSC)) {
1340 Base = N.getOperand(i: 0);
1341 if (Base.getOpcode() == ISD::FrameIndex) {
1342 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1343 Base = CurDAG->getTargetFrameIndex(
1344 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1345 }
1346
1347 if (N.getOpcode() == ISD::SUB)
1348 RHSC = -RHSC;
1349 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC * (1 << Shift), DL: SDLoc(N),
1350 VT: MVT::i32);
1351 return true;
1352 }
1353 }
1354
1355 // Base only.
1356 Base = N;
1357 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1358 return true;
1359}
1360
1361bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1362 SDValue &Base, SDValue &OffImm) {
1363 // Match simple R - imm8 operands.
1364 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1365 !CurDAG->isBaseWithConstantOffset(Op: N))
1366 return false;
1367
1368 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1369 int RHSC = (int)RHS->getSExtValue();
1370 if (N.getOpcode() == ISD::SUB)
1371 RHSC = -RHSC;
1372
1373 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1374 Base = N.getOperand(i: 0);
1375 if (Base.getOpcode() == ISD::FrameIndex) {
1376 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1377 Base = CurDAG->getTargetFrameIndex(
1378 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1379 }
1380 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32);
1381 return true;
1382 }
1383 }
1384
1385 return false;
1386}
1387
1388bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1389 SDValue &OffImm){
1390 unsigned Opcode = Op->getOpcode();
1391 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1392 ? cast<LoadSDNode>(Val: Op)->getAddressingMode()
1393 : cast<StoreSDNode>(Val: Op)->getAddressingMode();
1394 int RHSC;
1395 if (isScaledConstantInRange(Node: N, /*Scale=*/1, RangeMin: 0, RangeMax: 0x100, ScaledConstant&: RHSC)) { // 8 bits.
1396 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1397 ? CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32)
1398 : CurDAG->getSignedTargetConstant(Val: -RHSC, DL: SDLoc(N), VT: MVT::i32);
1399 return true;
1400 }
1401
1402 return false;
1403}
1404
1405template <unsigned Shift>
1406bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1407 SDValue &OffImm) {
1408 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(Op: N)) {
1409 int RHSC;
1410 if (isScaledConstantInRange(Node: N.getOperand(i: 1), Scale: 1 << Shift, RangeMin: -0x7f, RangeMax: 0x80,
1411 ScaledConstant&: RHSC)) {
1412 Base = N.getOperand(i: 0);
1413 if (Base.getOpcode() == ISD::FrameIndex) {
1414 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1415 Base = CurDAG->getTargetFrameIndex(
1416 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1417 }
1418
1419 if (N.getOpcode() == ISD::SUB)
1420 RHSC = -RHSC;
1421 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC * (1 << Shift), DL: SDLoc(N),
1422 VT: MVT::i32);
1423 return true;
1424 }
1425 }
1426
1427 // Base only.
1428 Base = N;
1429 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1430 return true;
1431}
1432
1433template <unsigned Shift>
1434bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1435 SDValue &OffImm) {
1436 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1437}
1438
1439bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1440 SDValue &OffImm,
1441 unsigned Shift) {
1442 unsigned Opcode = Op->getOpcode();
1443 ISD::MemIndexedMode AM;
1444 switch (Opcode) {
1445 case ISD::LOAD:
1446 AM = cast<LoadSDNode>(Val: Op)->getAddressingMode();
1447 break;
1448 case ISD::STORE:
1449 AM = cast<StoreSDNode>(Val: Op)->getAddressingMode();
1450 break;
1451 case ISD::MLOAD:
1452 AM = cast<MaskedLoadSDNode>(Val: Op)->getAddressingMode();
1453 break;
1454 case ISD::MSTORE:
1455 AM = cast<MaskedStoreSDNode>(Val: Op)->getAddressingMode();
1456 break;
1457 default:
1458 llvm_unreachable("Unexpected Opcode for Imm7Offset");
1459 }
1460
1461 int RHSC;
1462 // 7 bit constant, shifted by Shift.
1463 if (isScaledConstantInRange(Node: N, Scale: 1 << Shift, RangeMin: 0, RangeMax: 0x80, ScaledConstant&: RHSC)) {
1464 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1465 ? CurDAG->getSignedTargetConstant(Val: RHSC * (1 << Shift),
1466 DL: SDLoc(N), VT: MVT::i32)
1467 : CurDAG->getSignedTargetConstant(Val: -RHSC * (1 << Shift),
1468 DL: SDLoc(N), VT: MVT::i32);
1469 return true;
1470 }
1471 return false;
1472}
1473
1474template <int Min, int Max>
1475bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1476 int Val;
1477 if (isScaledConstantInRange(Node: N, Scale: 1, RangeMin: Min, RangeMax: Max, ScaledConstant&: Val)) {
1478 OffImm = CurDAG->getSignedTargetConstant(Val, DL: SDLoc(N), VT: MVT::i32);
1479 return true;
1480 }
1481 return false;
1482}
1483
1484bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1485 SDValue &Base,
1486 SDValue &OffReg, SDValue &ShImm) {
1487 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1488 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(Op: N))
1489 return false;
1490
1491 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1492 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1493 int RHSC = (int)RHS->getZExtValue();
1494 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1495 return false;
1496 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1497 return false;
1498 }
1499
1500 // Look for (R + R) or (R + (R << [1,2,3])).
1501 unsigned ShAmt = 0;
1502 Base = N.getOperand(i: 0);
1503 OffReg = N.getOperand(i: 1);
1504
1505 // Swap if it is ((R << c) + R).
1506 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: OffReg.getOpcode());
1507 if (ShOpcVal != ARM_AM::lsl) {
1508 ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: Base.getOpcode());
1509 if (ShOpcVal == ARM_AM::lsl)
1510 std::swap(a&: Base, b&: OffReg);
1511 }
1512
1513 if (ShOpcVal == ARM_AM::lsl) {
1514 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1515 // it.
1516 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(Val: OffReg.getOperand(i: 1))) {
1517 ShAmt = Sh->getZExtValue();
1518 if (ShAmt < 4 && isShifterOpProfitable(Shift: OffReg, ShOpcVal, ShAmt))
1519 OffReg = OffReg.getOperand(i: 0);
1520 else {
1521 ShAmt = 0;
1522 }
1523 }
1524 }
1525
1526 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1527 // and use it in a shifted operand do so.
1528 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1529 unsigned PowerOfTwo = 0;
1530 SDValue NewMulConst;
1531 if (canExtractShiftFromMul(N: OffReg, MaxShift: 3, PowerOfTwo, NewMulConst)) {
1532 HandleSDNode Handle(OffReg);
1533 replaceDAGValue(N: OffReg.getOperand(i: 1), M: NewMulConst);
1534 OffReg = Handle.getValue();
1535 ShAmt = PowerOfTwo;
1536 }
1537 }
1538
1539 ShImm = CurDAG->getTargetConstant(Val: ShAmt, DL: SDLoc(N), VT: MVT::i32);
1540
1541 return true;
1542}
1543
1544bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1545 SDValue &OffImm) {
1546 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1547 // instructions.
1548 Base = N;
1549 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1550
1551 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(Op: N))
1552 return true;
1553
1554 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
1555 if (!RHS)
1556 return true;
1557
1558 uint32_t RHSC = (int)RHS->getZExtValue();
1559 if (RHSC > 1020 || RHSC % 4 != 0)
1560 return true;
1561
1562 Base = N.getOperand(i: 0);
1563 if (Base.getOpcode() == ISD::FrameIndex) {
1564 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1565 Base = CurDAG->getTargetFrameIndex(
1566 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1567 }
1568
1569 OffImm = CurDAG->getTargetConstant(Val: RHSC/4, DL: SDLoc(N), VT: MVT::i32);
1570 return true;
1571}
1572
1573//===--------------------------------------------------------------------===//
1574
1575/// getAL - Returns a ARMCC::AL immediate node.
1576static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1577 return CurDAG->getTargetConstant(Val: (uint64_t)ARMCC::AL, DL: dl, VT: MVT::i32);
1578}
1579
1580void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1581 MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
1582 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Result), NewMemRefs: {MemOp});
1583}
1584
1585bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1586 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1587 ISD::MemIndexedMode AM = LD->getAddressingMode();
1588 if (AM == ISD::UNINDEXED)
1589 return false;
1590
1591 EVT LoadedVT = LD->getMemoryVT();
1592 SDValue Offset, AMOpc;
1593 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1594 unsigned Opcode = 0;
1595 bool Match = false;
1596 if (LoadedVT == MVT::i32 && isPre &&
1597 SelectAddrMode2OffsetImmPre(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1598 Opcode = ARM::LDR_PRE_IMM;
1599 Match = true;
1600 } else if (LoadedVT == MVT::i32 && !isPre &&
1601 SelectAddrMode2OffsetImm(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1602 Opcode = ARM::LDR_POST_IMM;
1603 Match = true;
1604 } else if (LoadedVT == MVT::i32 &&
1605 SelectAddrMode2OffsetReg(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1606 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1607 Match = true;
1608
1609 } else if (LoadedVT == MVT::i16 &&
1610 SelectAddrMode3Offset(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1611 Match = true;
1612 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1613 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1614 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1615 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1616 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1617 if (SelectAddrMode3Offset(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1618 Match = true;
1619 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1620 }
1621 } else {
1622 if (isPre &&
1623 SelectAddrMode2OffsetImmPre(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1624 Match = true;
1625 Opcode = ARM::LDRB_PRE_IMM;
1626 } else if (!isPre &&
1627 SelectAddrMode2OffsetImm(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1628 Match = true;
1629 Opcode = ARM::LDRB_POST_IMM;
1630 } else if (SelectAddrMode2OffsetReg(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1631 Match = true;
1632 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1633 }
1634 }
1635 }
1636
1637 if (Match) {
1638 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1639 SDValue Chain = LD->getChain();
1640 SDValue Base = LD->getBasePtr();
1641 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, dl: SDLoc(N)),
1642 CurDAG->getRegister(Reg: 0, VT: MVT::i32), Chain };
1643 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(N), VT1: MVT::i32, VT2: MVT::i32,
1644 VT3: MVT::Other, Ops);
1645 transferMemOperands(N, Result: New);
1646 ReplaceNode(F: N, T: New);
1647 return true;
1648 } else {
1649 SDValue Chain = LD->getChain();
1650 SDValue Base = LD->getBasePtr();
1651 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, dl: SDLoc(N)),
1652 CurDAG->getRegister(Reg: 0, VT: MVT::i32), Chain };
1653 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(N), VT1: MVT::i32, VT2: MVT::i32,
1654 VT3: MVT::Other, Ops);
1655 transferMemOperands(N, Result: New);
1656 ReplaceNode(F: N, T: New);
1657 return true;
1658 }
1659 }
1660
1661 return false;
1662}
1663
1664bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1665 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1666 EVT LoadedVT = LD->getMemoryVT();
1667 ISD::MemIndexedMode AM = LD->getAddressingMode();
1668 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1669 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1670 return false;
1671
1672 auto *COffs = dyn_cast<ConstantSDNode>(Val: LD->getOffset());
1673 if (!COffs || COffs->getZExtValue() != 4)
1674 return false;
1675
1676 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1677 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1678 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1679 // ISel.
1680 SDValue Chain = LD->getChain();
1681 SDValue Base = LD->getBasePtr();
1682 SDValue Ops[]= { Base, getAL(CurDAG, dl: SDLoc(N)),
1683 CurDAG->getRegister(Reg: 0, VT: MVT::i32), Chain };
1684 SDNode *New = CurDAG->getMachineNode(Opcode: ARM::tLDR_postidx, dl: SDLoc(N), VT1: MVT::i32,
1685 VT2: MVT::i32, VT3: MVT::Other, Ops);
1686 transferMemOperands(N, Result: New);
1687 ReplaceNode(F: N, T: New);
1688 return true;
1689}
1690
1691bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1692 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1693 ISD::MemIndexedMode AM = LD->getAddressingMode();
1694 if (AM == ISD::UNINDEXED)
1695 return false;
1696
1697 EVT LoadedVT = LD->getMemoryVT();
1698 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1699 SDValue Offset;
1700 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1701 unsigned Opcode = 0;
1702 bool Match = false;
1703 if (SelectT2AddrModeImm8Offset(Op: N, N: LD->getOffset(), OffImm&: Offset)) {
1704 switch (LoadedVT.getSimpleVT().SimpleTy) {
1705 case MVT::i32:
1706 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1707 break;
1708 case MVT::i16:
1709 if (isSExtLd)
1710 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1711 else
1712 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1713 break;
1714 case MVT::i8:
1715 case MVT::i1:
1716 if (isSExtLd)
1717 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1718 else
1719 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1720 break;
1721 default:
1722 return false;
1723 }
1724 Match = true;
1725 }
1726
1727 if (Match) {
1728 SDValue Chain = LD->getChain();
1729 SDValue Base = LD->getBasePtr();
1730 SDValue Ops[]= { Base, Offset, getAL(CurDAG, dl: SDLoc(N)),
1731 CurDAG->getRegister(Reg: 0, VT: MVT::i32), Chain };
1732 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(N), VT1: MVT::i32, VT2: MVT::i32,
1733 VT3: MVT::Other, Ops);
1734 transferMemOperands(N, Result: New);
1735 ReplaceNode(F: N, T: New);
1736 return true;
1737 }
1738
1739 return false;
1740}
1741
1742bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1743 EVT LoadedVT;
1744 unsigned Opcode = 0;
1745 bool isSExtLd, isPre;
1746 Align Alignment;
1747 ARMVCC::VPTCodes Pred;
1748 SDValue PredReg;
1749 SDValue Chain, Base, Offset;
1750
1751 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
1752 ISD::MemIndexedMode AM = LD->getAddressingMode();
1753 if (AM == ISD::UNINDEXED)
1754 return false;
1755 LoadedVT = LD->getMemoryVT();
1756 if (!LoadedVT.isVector())
1757 return false;
1758
1759 Chain = LD->getChain();
1760 Base = LD->getBasePtr();
1761 Offset = LD->getOffset();
1762 Alignment = LD->getAlign();
1763 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1764 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1765 Pred = ARMVCC::None;
1766 PredReg = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
1767 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Val: N)) {
1768 ISD::MemIndexedMode AM = LD->getAddressingMode();
1769 if (AM == ISD::UNINDEXED)
1770 return false;
1771 LoadedVT = LD->getMemoryVT();
1772 if (!LoadedVT.isVector())
1773 return false;
1774
1775 Chain = LD->getChain();
1776 Base = LD->getBasePtr();
1777 Offset = LD->getOffset();
1778 Alignment = LD->getAlign();
1779 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1780 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1781 Pred = ARMVCC::Then;
1782 PredReg = LD->getMask();
1783 } else
1784 llvm_unreachable("Expected a Load or a Masked Load!");
1785
1786 // We allow LE non-masked loads to change the type (for example use a vldrb.8
1787 // as opposed to a vldrw.32). This can allow extra addressing modes or
1788 // alignments for what is otherwise an equivalent instruction.
1789 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(Val: N);
1790
1791 SDValue NewOffset;
1792 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1793 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 1)) {
1794 if (isSExtLd)
1795 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1796 else
1797 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1798 } else if (LoadedVT == MVT::v8i8 &&
1799 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 0)) {
1800 if (isSExtLd)
1801 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1802 else
1803 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1804 } else if (LoadedVT == MVT::v4i8 &&
1805 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 0)) {
1806 if (isSExtLd)
1807 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1808 else
1809 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1810 } else if (Alignment >= Align(4) &&
1811 (CanChangeType || LoadedVT == MVT::v4i32 ||
1812 LoadedVT == MVT::v4f32) &&
1813 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 2))
1814 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1815 else if (Alignment >= Align(2) &&
1816 (CanChangeType || LoadedVT == MVT::v8i16 ||
1817 LoadedVT == MVT::v8f16) &&
1818 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 1))
1819 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1820 else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1821 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 0))
1822 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1823 else
1824 return false;
1825
1826 SDValue Ops[] = {Base,
1827 NewOffset,
1828 CurDAG->getTargetConstant(Val: Pred, DL: SDLoc(N), VT: MVT::i32),
1829 PredReg,
1830 CurDAG->getRegister(Reg: 0, VT: MVT::i32), // tp_reg
1831 Chain};
1832 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(N), VT1: MVT::i32,
1833 VT2: N->getValueType(ResNo: 0), VT3: MVT::Other, Ops);
1834 transferMemOperands(N, Result: New);
1835 ReplaceUses(F: SDValue(N, 0), T: SDValue(New, 1));
1836 ReplaceUses(F: SDValue(N, 1), T: SDValue(New, 0));
1837 ReplaceUses(F: SDValue(N, 2), T: SDValue(New, 2));
1838 CurDAG->RemoveDeadNode(N);
1839 return true;
1840}
1841
1842/// Form a GPRPair pseudo register from a pair of GPR regs.
1843SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1844 SDLoc dl(V0.getNode());
1845 SDValue RegClass =
1846 CurDAG->getTargetConstant(Val: ARM::GPRPairRegClassID, DL: dl, VT: MVT::i32);
1847 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::gsub_0, DL: dl, VT: MVT::i32);
1848 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::gsub_1, DL: dl, VT: MVT::i32);
1849 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1850 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1851}
1852
1853/// Form a D register from a pair of S registers.
1854SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1855 SDLoc dl(V0.getNode());
1856 SDValue RegClass =
1857 CurDAG->getTargetConstant(Val: ARM::DPR_VFP2RegClassID, DL: dl, VT: MVT::i32);
1858 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::ssub_0, DL: dl, VT: MVT::i32);
1859 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::ssub_1, DL: dl, VT: MVT::i32);
1860 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1861 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1862}
1863
1864/// Form a quad register from a pair of D registers.
1865SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1866 SDLoc dl(V0.getNode());
1867 SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QPRRegClassID, DL: dl,
1868 VT: MVT::i32);
1869 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::dsub_0, DL: dl, VT: MVT::i32);
1870 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::dsub_1, DL: dl, VT: MVT::i32);
1871 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1872 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1873}
1874
1875/// Form 4 consecutive D registers from a pair of Q registers.
1876SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1877 SDLoc dl(V0.getNode());
1878 SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQPRRegClassID, DL: dl,
1879 VT: MVT::i32);
1880 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::qsub_0, DL: dl, VT: MVT::i32);
1881 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::qsub_1, DL: dl, VT: MVT::i32);
1882 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1883 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1884}
1885
1886/// Form 4 consecutive S registers.
1887SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1888 SDValue V2, SDValue V3) {
1889 SDLoc dl(V0.getNode());
1890 SDValue RegClass =
1891 CurDAG->getTargetConstant(Val: ARM::QPR_VFP2RegClassID, DL: dl, VT: MVT::i32);
1892 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::ssub_0, DL: dl, VT: MVT::i32);
1893 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::ssub_1, DL: dl, VT: MVT::i32);
1894 SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::ssub_2, DL: dl, VT: MVT::i32);
1895 SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::ssub_3, DL: dl, VT: MVT::i32);
1896 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1897 V2, SubReg2, V3, SubReg3 };
1898 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1899}
1900
1901/// Form 4 consecutive D registers.
1902SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1903 SDValue V2, SDValue V3) {
1904 SDLoc dl(V0.getNode());
1905 SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQPRRegClassID, DL: dl,
1906 VT: MVT::i32);
1907 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::dsub_0, DL: dl, VT: MVT::i32);
1908 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::dsub_1, DL: dl, VT: MVT::i32);
1909 SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::dsub_2, DL: dl, VT: MVT::i32);
1910 SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::dsub_3, DL: dl, VT: MVT::i32);
1911 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1912 V2, SubReg2, V3, SubReg3 };
1913 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1914}
1915
1916/// Form 4 consecutive Q registers.
1917SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1918 SDValue V2, SDValue V3) {
1919 SDLoc dl(V0.getNode());
1920 SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQQQPRRegClassID, DL: dl,
1921 VT: MVT::i32);
1922 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::qsub_0, DL: dl, VT: MVT::i32);
1923 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::qsub_1, DL: dl, VT: MVT::i32);
1924 SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::qsub_2, DL: dl, VT: MVT::i32);
1925 SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::qsub_3, DL: dl, VT: MVT::i32);
1926 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1927 V2, SubReg2, V3, SubReg3 };
1928 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1929}
1930
1931/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1932/// of a NEON VLD or VST instruction. The supported values depend on the
1933/// number of registers being loaded.
1934SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1935 unsigned NumVecs, bool is64BitVector) {
1936 unsigned NumRegs = NumVecs;
1937 if (!is64BitVector && NumVecs < 3)
1938 NumRegs *= 2;
1939
1940 unsigned Alignment = Align->getAsZExtVal();
1941 if (Alignment >= 32 && NumRegs == 4)
1942 Alignment = 32;
1943 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1944 Alignment = 16;
1945 else if (Alignment >= 8)
1946 Alignment = 8;
1947 else
1948 Alignment = 0;
1949
1950 return CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
1951}
1952
1953static bool isVLDfixed(unsigned Opc)
1954{
1955 switch (Opc) {
1956 default: return false;
1957 case ARM::VLD1d8wb_fixed : return true;
1958 case ARM::VLD1d16wb_fixed : return true;
1959 case ARM::VLD1d64Qwb_fixed : return true;
1960 case ARM::VLD1d32wb_fixed : return true;
1961 case ARM::VLD1d64wb_fixed : return true;
1962 case ARM::VLD1d8TPseudoWB_fixed : return true;
1963 case ARM::VLD1d16TPseudoWB_fixed : return true;
1964 case ARM::VLD1d32TPseudoWB_fixed : return true;
1965 case ARM::VLD1d64TPseudoWB_fixed : return true;
1966 case ARM::VLD1d8QPseudoWB_fixed : return true;
1967 case ARM::VLD1d16QPseudoWB_fixed : return true;
1968 case ARM::VLD1d32QPseudoWB_fixed : return true;
1969 case ARM::VLD1d64QPseudoWB_fixed : return true;
1970 case ARM::VLD1q8wb_fixed : return true;
1971 case ARM::VLD1q16wb_fixed : return true;
1972 case ARM::VLD1q32wb_fixed : return true;
1973 case ARM::VLD1q64wb_fixed : return true;
1974 case ARM::VLD1DUPd8wb_fixed : return true;
1975 case ARM::VLD1DUPd16wb_fixed : return true;
1976 case ARM::VLD1DUPd32wb_fixed : return true;
1977 case ARM::VLD1DUPq8wb_fixed : return true;
1978 case ARM::VLD1DUPq16wb_fixed : return true;
1979 case ARM::VLD1DUPq32wb_fixed : return true;
1980 case ARM::VLD2d8wb_fixed : return true;
1981 case ARM::VLD2d16wb_fixed : return true;
1982 case ARM::VLD2d32wb_fixed : return true;
1983 case ARM::VLD2q8PseudoWB_fixed : return true;
1984 case ARM::VLD2q16PseudoWB_fixed : return true;
1985 case ARM::VLD2q32PseudoWB_fixed : return true;
1986 case ARM::VLD2DUPd8wb_fixed : return true;
1987 case ARM::VLD2DUPd16wb_fixed : return true;
1988 case ARM::VLD2DUPd32wb_fixed : return true;
1989 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
1990 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
1991 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
1992 }
1993}
1994
1995static bool isVSTfixed(unsigned Opc)
1996{
1997 switch (Opc) {
1998 default: return false;
1999 case ARM::VST1d8wb_fixed : return true;
2000 case ARM::VST1d16wb_fixed : return true;
2001 case ARM::VST1d32wb_fixed : return true;
2002 case ARM::VST1d64wb_fixed : return true;
2003 case ARM::VST1q8wb_fixed : return true;
2004 case ARM::VST1q16wb_fixed : return true;
2005 case ARM::VST1q32wb_fixed : return true;
2006 case ARM::VST1q64wb_fixed : return true;
2007 case ARM::VST1d8TPseudoWB_fixed : return true;
2008 case ARM::VST1d16TPseudoWB_fixed : return true;
2009 case ARM::VST1d32TPseudoWB_fixed : return true;
2010 case ARM::VST1d64TPseudoWB_fixed : return true;
2011 case ARM::VST1d8QPseudoWB_fixed : return true;
2012 case ARM::VST1d16QPseudoWB_fixed : return true;
2013 case ARM::VST1d32QPseudoWB_fixed : return true;
2014 case ARM::VST1d64QPseudoWB_fixed : return true;
2015 case ARM::VST2d8wb_fixed : return true;
2016 case ARM::VST2d16wb_fixed : return true;
2017 case ARM::VST2d32wb_fixed : return true;
2018 case ARM::VST2q8PseudoWB_fixed : return true;
2019 case ARM::VST2q16PseudoWB_fixed : return true;
2020 case ARM::VST2q32PseudoWB_fixed : return true;
2021 }
2022}
2023
2024// Get the register stride update opcode of a VLD/VST instruction that
2025// is otherwise equivalent to the given fixed stride updating instruction.
2026static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2027 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
2028 && "Incorrect fixed stride updating instruction.");
2029 switch (Opc) {
2030 default: break;
2031 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2032 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2033 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2034 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2035 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2036 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2037 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2038 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2039 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2040 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2041 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2042 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2043 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2044 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2045 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2046 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2047 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2048 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2049 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2050 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2051 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2052 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2053 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2054 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2055 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2056 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2057 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2058
2059 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2060 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2061 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2062 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2063 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2064 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2065 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2066 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2067 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2068 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2069 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2070 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2071 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2072 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2073 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2074 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2075
2076 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2077 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2078 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2079 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2080 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2081 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2082
2083 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2084 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2085 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2086 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2087 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2088 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2089
2090 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2091 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2092 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2093 }
2094 return Opc; // If not one we handle, return it unchanged.
2095}
2096
2097/// Returns true if the given increment is a Constant known to be equal to the
2098/// access size performed by a NEON load/store. This means the "[rN]!" form can
2099/// be used.
2100static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2101 auto C = dyn_cast<ConstantSDNode>(Val&: Inc);
2102 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2103}
2104
2105void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2106 const uint16_t *DOpcodes,
2107 const uint16_t *QOpcodes0,
2108 const uint16_t *QOpcodes1) {
2109 assert(Subtarget->hasNEON());
2110 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2111 SDLoc dl(N);
2112
2113 SDValue MemAddr, Align;
2114 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2115 // nodes are not intrinsics.
2116 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2117 if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2118 return;
2119
2120 SDValue Chain = N->getOperand(Num: 0);
2121 EVT VT = N->getValueType(ResNo: 0);
2122 bool is64BitVector = VT.is64BitVector();
2123 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2124
2125 unsigned OpcodeIndex;
2126 switch (VT.getSimpleVT().SimpleTy) {
2127 default: llvm_unreachable("unhandled vld type");
2128 // Double-register operations:
2129 case MVT::v8i8: OpcodeIndex = 0; break;
2130 case MVT::v4f16:
2131 case MVT::v4bf16:
2132 case MVT::v4i16: OpcodeIndex = 1; break;
2133 case MVT::v2f32:
2134 case MVT::v2i32: OpcodeIndex = 2; break;
2135 case MVT::v1i64: OpcodeIndex = 3; break;
2136 // Quad-register operations:
2137 case MVT::v16i8: OpcodeIndex = 0; break;
2138 case MVT::v8f16:
2139 case MVT::v8bf16:
2140 case MVT::v8i16: OpcodeIndex = 1; break;
2141 case MVT::v4f32:
2142 case MVT::v4i32: OpcodeIndex = 2; break;
2143 case MVT::v2f64:
2144 case MVT::v2i64: OpcodeIndex = 3; break;
2145 }
2146
2147 EVT ResTy;
2148 if (NumVecs == 1)
2149 ResTy = VT;
2150 else {
2151 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2152 if (!is64BitVector)
2153 ResTyElts *= 2;
2154 ResTy = EVT::getVectorVT(Context&: *CurDAG->getContext(), VT: MVT::i64, NumElements: ResTyElts);
2155 }
2156 std::vector<EVT> ResTys;
2157 ResTys.push_back(x: ResTy);
2158 if (isUpdating)
2159 ResTys.push_back(x: MVT::i32);
2160 ResTys.push_back(x: MVT::Other);
2161
2162 SDValue Pred = getAL(CurDAG, dl);
2163 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
2164 SDNode *VLd;
2165 SmallVector<SDValue, 7> Ops;
2166
2167 // Double registers and VLD1/VLD2 quad registers are directly supported.
2168 if (is64BitVector || NumVecs <= 2) {
2169 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2170 QOpcodes0[OpcodeIndex]);
2171 Ops.push_back(Elt: MemAddr);
2172 Ops.push_back(Elt: Align);
2173 if (isUpdating) {
2174 SDValue Inc = N->getOperand(Num: AddrOpIdx + 1);
2175 bool IsImmUpdate = isPerfectIncrement(Inc, VecTy: VT, NumVecs);
2176 if (!IsImmUpdate) {
2177 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2178 // check for the opcode rather than the number of vector elements.
2179 if (isVLDfixed(Opc))
2180 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2181 Ops.push_back(Elt: Inc);
2182 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2183 // the operands if not such an opcode.
2184 } else if (!isVLDfixed(Opc))
2185 Ops.push_back(Elt: Reg0);
2186 }
2187 Ops.push_back(Elt: Pred);
2188 Ops.push_back(Elt: Reg0);
2189 Ops.push_back(Elt: Chain);
2190 VLd = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2191
2192 } else {
2193 // Otherwise, quad registers are loaded with two separate instructions,
2194 // where one loads the even registers and the other loads the odd registers.
2195 EVT AddrTy = MemAddr.getValueType();
2196
2197 // Load the even subregs. This is always an updating load, so that it
2198 // provides the address to the second load for the odd subregs.
2199 SDValue ImplDef =
2200 SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: ResTy), 0);
2201 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2202 SDNode *VLdA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl,
2203 VT1: ResTy, VT2: AddrTy, VT3: MVT::Other, Ops: OpsA);
2204 Chain = SDValue(VLdA, 2);
2205
2206 // Load the odd subregs.
2207 Ops.push_back(Elt: SDValue(VLdA, 1));
2208 Ops.push_back(Elt: Align);
2209 if (isUpdating) {
2210 SDValue Inc = N->getOperand(Num: AddrOpIdx + 1);
2211 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2212 "only constant post-increment update allowed for VLD3/4");
2213 (void)Inc;
2214 Ops.push_back(Elt: Reg0);
2215 }
2216 Ops.push_back(Elt: SDValue(VLdA, 0));
2217 Ops.push_back(Elt: Pred);
2218 Ops.push_back(Elt: Reg0);
2219 Ops.push_back(Elt: Chain);
2220 VLd = CurDAG->getMachineNode(Opcode: QOpcodes1[OpcodeIndex], dl, ResultTys: ResTys, Ops);
2221 }
2222
2223 // Transfer memoperands.
2224 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2225 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLd), NewMemRefs: {MemOp});
2226
2227 if (NumVecs == 1) {
2228 ReplaceNode(F: N, T: VLd);
2229 return;
2230 }
2231
2232 // Extract out the subregisters.
2233 SDValue SuperReg = SDValue(VLd, 0);
2234 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2235 ARM::qsub_3 == ARM::qsub_0 + 3,
2236 "Unexpected subreg numbering");
2237 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2238 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2239 ReplaceUses(F: SDValue(N, Vec),
2240 T: CurDAG->getTargetExtractSubreg(SRIdx: Sub0 + Vec, DL: dl, VT, Operand: SuperReg));
2241 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(VLd, 1));
2242 if (isUpdating)
2243 ReplaceUses(F: SDValue(N, NumVecs + 1), T: SDValue(VLd, 2));
2244 CurDAG->RemoveDeadNode(N);
2245}
2246
2247void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2248 const uint16_t *DOpcodes,
2249 const uint16_t *QOpcodes0,
2250 const uint16_t *QOpcodes1) {
2251 assert(Subtarget->hasNEON());
2252 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2253 SDLoc dl(N);
2254
2255 SDValue MemAddr, Align;
2256 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2257 // nodes are not intrinsics.
2258 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2259 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2260 if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2261 return;
2262
2263 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2264
2265 SDValue Chain = N->getOperand(Num: 0);
2266 EVT VT = N->getOperand(Num: Vec0Idx).getValueType();
2267 bool is64BitVector = VT.is64BitVector();
2268 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2269
2270 unsigned OpcodeIndex;
2271 switch (VT.getSimpleVT().SimpleTy) {
2272 default: llvm_unreachable("unhandled vst type");
2273 // Double-register operations:
2274 case MVT::v8i8: OpcodeIndex = 0; break;
2275 case MVT::v4f16:
2276 case MVT::v4bf16:
2277 case MVT::v4i16: OpcodeIndex = 1; break;
2278 case MVT::v2f32:
2279 case MVT::v2i32: OpcodeIndex = 2; break;
2280 case MVT::v1i64: OpcodeIndex = 3; break;
2281 // Quad-register operations:
2282 case MVT::v16i8: OpcodeIndex = 0; break;
2283 case MVT::v8f16:
2284 case MVT::v8bf16:
2285 case MVT::v8i16: OpcodeIndex = 1; break;
2286 case MVT::v4f32:
2287 case MVT::v4i32: OpcodeIndex = 2; break;
2288 case MVT::v2f64:
2289 case MVT::v2i64: OpcodeIndex = 3; break;
2290 }
2291
2292 std::vector<EVT> ResTys;
2293 if (isUpdating)
2294 ResTys.push_back(x: MVT::i32);
2295 ResTys.push_back(x: MVT::Other);
2296
2297 SDValue Pred = getAL(CurDAG, dl);
2298 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
2299 SmallVector<SDValue, 7> Ops;
2300
2301 // Double registers and VST1/VST2 quad registers are directly supported.
2302 if (is64BitVector || NumVecs <= 2) {
2303 SDValue SrcReg;
2304 if (NumVecs == 1) {
2305 SrcReg = N->getOperand(Num: Vec0Idx);
2306 } else if (is64BitVector) {
2307 // Form a REG_SEQUENCE to force register allocation.
2308 SDValue V0 = N->getOperand(Num: Vec0Idx + 0);
2309 SDValue V1 = N->getOperand(Num: Vec0Idx + 1);
2310 if (NumVecs == 2)
2311 SrcReg = SDValue(createDRegPairNode(VT: MVT::v2i64, V0, V1), 0);
2312 else {
2313 SDValue V2 = N->getOperand(Num: Vec0Idx + 2);
2314 // If it's a vst3, form a quad D-register and leave the last part as
2315 // an undef.
2316 SDValue V3 = (NumVecs == 3)
2317 ? SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2318 : N->getOperand(Num: Vec0Idx + 3);
2319 SrcReg = SDValue(createQuadDRegsNode(VT: MVT::v4i64, V0, V1, V2, V3), 0);
2320 }
2321 } else {
2322 // Form a QQ register.
2323 SDValue Q0 = N->getOperand(Num: Vec0Idx);
2324 SDValue Q1 = N->getOperand(Num: Vec0Idx + 1);
2325 SrcReg = SDValue(createQRegPairNode(VT: MVT::v4i64, V0: Q0, V1: Q1), 0);
2326 }
2327
2328 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2329 QOpcodes0[OpcodeIndex]);
2330 Ops.push_back(Elt: MemAddr);
2331 Ops.push_back(Elt: Align);
2332 if (isUpdating) {
2333 SDValue Inc = N->getOperand(Num: AddrOpIdx + 1);
2334 bool IsImmUpdate = isPerfectIncrement(Inc, VecTy: VT, NumVecs);
2335 if (!IsImmUpdate) {
2336 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2337 // check for the opcode rather than the number of vector elements.
2338 if (isVSTfixed(Opc))
2339 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2340 Ops.push_back(Elt: Inc);
2341 }
2342 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2343 // the operands if not such an opcode.
2344 else if (!isVSTfixed(Opc))
2345 Ops.push_back(Elt: Reg0);
2346 }
2347 Ops.push_back(Elt: SrcReg);
2348 Ops.push_back(Elt: Pred);
2349 Ops.push_back(Elt: Reg0);
2350 Ops.push_back(Elt: Chain);
2351 SDNode *VSt = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2352
2353 // Transfer memoperands.
2354 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VSt), NewMemRefs: {MemOp});
2355
2356 ReplaceNode(F: N, T: VSt);
2357 return;
2358 }
2359
2360 // Otherwise, quad registers are stored with two separate instructions,
2361 // where one stores the even registers and the other stores the odd registers.
2362
2363 // Form the QQQQ REG_SEQUENCE.
2364 SDValue V0 = N->getOperand(Num: Vec0Idx + 0);
2365 SDValue V1 = N->getOperand(Num: Vec0Idx + 1);
2366 SDValue V2 = N->getOperand(Num: Vec0Idx + 2);
2367 SDValue V3 = (NumVecs == 3)
2368 ? SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2369 : N->getOperand(Num: Vec0Idx + 3);
2370 SDValue RegSeq = SDValue(createQuadQRegsNode(VT: MVT::v8i64, V0, V1, V2, V3), 0);
2371
2372 // Store the even D registers. This is always an updating store, so that it
2373 // provides the address to the second store for the odd subregs.
2374 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2375 SDNode *VStA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl,
2376 VT1: MemAddr.getValueType(),
2377 VT2: MVT::Other, Ops: OpsA);
2378 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VStA), NewMemRefs: {MemOp});
2379 Chain = SDValue(VStA, 1);
2380
2381 // Store the odd D registers.
2382 Ops.push_back(Elt: SDValue(VStA, 0));
2383 Ops.push_back(Elt: Align);
2384 if (isUpdating) {
2385 SDValue Inc = N->getOperand(Num: AddrOpIdx + 1);
2386 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2387 "only constant post-increment update allowed for VST3/4");
2388 (void)Inc;
2389 Ops.push_back(Elt: Reg0);
2390 }
2391 Ops.push_back(Elt: RegSeq);
2392 Ops.push_back(Elt: Pred);
2393 Ops.push_back(Elt: Reg0);
2394 Ops.push_back(Elt: Chain);
2395 SDNode *VStB = CurDAG->getMachineNode(Opcode: QOpcodes1[OpcodeIndex], dl, ResultTys: ResTys,
2396 Ops);
2397 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VStB), NewMemRefs: {MemOp});
2398 ReplaceNode(F: N, T: VStB);
2399}
2400
2401void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2402 unsigned NumVecs,
2403 const uint16_t *DOpcodes,
2404 const uint16_t *QOpcodes) {
2405 assert(Subtarget->hasNEON());
2406 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2407 SDLoc dl(N);
2408
2409 SDValue MemAddr, Align;
2410 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2411 // nodes are not intrinsics.
2412 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2413 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2414 if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2415 return;
2416
2417 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2418
2419 SDValue Chain = N->getOperand(Num: 0);
2420 unsigned Lane = N->getConstantOperandVal(Num: Vec0Idx + NumVecs);
2421 EVT VT = N->getOperand(Num: Vec0Idx).getValueType();
2422 bool is64BitVector = VT.is64BitVector();
2423
2424 unsigned Alignment = 0;
2425 if (NumVecs != 3) {
2426 Alignment = Align->getAsZExtVal();
2427 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2428 if (Alignment > NumBytes)
2429 Alignment = NumBytes;
2430 if (Alignment < 8 && Alignment < NumBytes)
2431 Alignment = 0;
2432 // Alignment must be a power of two; make sure of that.
2433 Alignment = (Alignment & -Alignment);
2434 if (Alignment == 1)
2435 Alignment = 0;
2436 }
2437 Align = CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
2438
2439 unsigned OpcodeIndex;
2440 switch (VT.getSimpleVT().SimpleTy) {
2441 default: llvm_unreachable("unhandled vld/vst lane type");
2442 // Double-register operations:
2443 case MVT::v8i8: OpcodeIndex = 0; break;
2444 case MVT::v4f16:
2445 case MVT::v4bf16:
2446 case MVT::v4i16: OpcodeIndex = 1; break;
2447 case MVT::v2f32:
2448 case MVT::v2i32: OpcodeIndex = 2; break;
2449 // Quad-register operations:
2450 case MVT::v8f16:
2451 case MVT::v8bf16:
2452 case MVT::v8i16: OpcodeIndex = 0; break;
2453 case MVT::v4f32:
2454 case MVT::v4i32: OpcodeIndex = 1; break;
2455 }
2456
2457 std::vector<EVT> ResTys;
2458 if (IsLoad) {
2459 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2460 if (!is64BitVector)
2461 ResTyElts *= 2;
2462 ResTys.push_back(x: EVT::getVectorVT(Context&: *CurDAG->getContext(),
2463 VT: MVT::i64, NumElements: ResTyElts));
2464 }
2465 if (isUpdating)
2466 ResTys.push_back(x: MVT::i32);
2467 ResTys.push_back(x: MVT::Other);
2468
2469 SDValue Pred = getAL(CurDAG, dl);
2470 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
2471
2472 SmallVector<SDValue, 8> Ops;
2473 Ops.push_back(Elt: MemAddr);
2474 Ops.push_back(Elt: Align);
2475 if (isUpdating) {
2476 SDValue Inc = N->getOperand(Num: AddrOpIdx + 1);
2477 bool IsImmUpdate =
2478 isPerfectIncrement(Inc, VecTy: VT.getVectorElementType(), NumVecs);
2479 Ops.push_back(Elt: IsImmUpdate ? Reg0 : Inc);
2480 }
2481
2482 SDValue SuperReg;
2483 SDValue V0 = N->getOperand(Num: Vec0Idx + 0);
2484 SDValue V1 = N->getOperand(Num: Vec0Idx + 1);
2485 if (NumVecs == 2) {
2486 if (is64BitVector)
2487 SuperReg = SDValue(createDRegPairNode(VT: MVT::v2i64, V0, V1), 0);
2488 else
2489 SuperReg = SDValue(createQRegPairNode(VT: MVT::v4i64, V0, V1), 0);
2490 } else {
2491 SDValue V2 = N->getOperand(Num: Vec0Idx + 2);
2492 SDValue V3 = (NumVecs == 3)
2493 ? SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2494 : N->getOperand(Num: Vec0Idx + 3);
2495 if (is64BitVector)
2496 SuperReg = SDValue(createQuadDRegsNode(VT: MVT::v4i64, V0, V1, V2, V3), 0);
2497 else
2498 SuperReg = SDValue(createQuadQRegsNode(VT: MVT::v8i64, V0, V1, V2, V3), 0);
2499 }
2500 Ops.push_back(Elt: SuperReg);
2501 Ops.push_back(Elt: getI32Imm(Imm: Lane, dl));
2502 Ops.push_back(Elt: Pred);
2503 Ops.push_back(Elt: Reg0);
2504 Ops.push_back(Elt: Chain);
2505
2506 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2507 QOpcodes[OpcodeIndex]);
2508 SDNode *VLdLn = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2509 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLdLn), NewMemRefs: {MemOp});
2510 if (!IsLoad) {
2511 ReplaceNode(F: N, T: VLdLn);
2512 return;
2513 }
2514
2515 // Extract the subregisters.
2516 SuperReg = SDValue(VLdLn, 0);
2517 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2518 ARM::qsub_3 == ARM::qsub_0 + 3,
2519 "Unexpected subreg numbering");
2520 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2521 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2522 ReplaceUses(F: SDValue(N, Vec),
2523 T: CurDAG->getTargetExtractSubreg(SRIdx: Sub0 + Vec, DL: dl, VT, Operand: SuperReg));
2524 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(VLdLn, 1));
2525 if (isUpdating)
2526 ReplaceUses(F: SDValue(N, NumVecs + 1), T: SDValue(VLdLn, 2));
2527 CurDAG->RemoveDeadNode(N);
2528}
2529
2530template <typename SDValueVector>
2531void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2532 SDValue PredicateMask) {
2533 Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::Then, DL: Loc, VT: MVT::i32));
2534 Ops.push_back(PredicateMask);
2535 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32)); // tp_reg
2536}
2537
2538template <typename SDValueVector>
2539void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2540 SDValue PredicateMask,
2541 SDValue Inactive) {
2542 Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::Then, DL: Loc, VT: MVT::i32));
2543 Ops.push_back(PredicateMask);
2544 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32)); // tp_reg
2545 Ops.push_back(Inactive);
2546}
2547
2548template <typename SDValueVector>
2549void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2550 Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::None, DL: Loc, VT: MVT::i32));
2551 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32));
2552 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32)); // tp_reg
2553}
2554
2555template <typename SDValueVector>
2556void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2557 EVT InactiveTy) {
2558 Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::None, DL: Loc, VT: MVT::i32));
2559 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32));
2560 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32)); // tp_reg
2561 Ops.push_back(SDValue(
2562 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: Loc, VT: InactiveTy), 0));
2563}
2564
2565void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2566 bool Predicated) {
2567 SDLoc Loc(N);
2568 SmallVector<SDValue, 8> Ops;
2569
2570 uint16_t Opcode;
2571 switch (N->getValueType(ResNo: 1).getVectorElementType().getSizeInBits()) {
2572 case 32:
2573 Opcode = Opcodes[0];
2574 break;
2575 case 64:
2576 Opcode = Opcodes[1];
2577 break;
2578 default:
2579 llvm_unreachable("bad vector element size in SelectMVE_WB");
2580 }
2581
2582 Ops.push_back(Elt: N->getOperand(Num: 2)); // vector of base addresses
2583
2584 int32_t ImmValue = N->getConstantOperandVal(Num: 3);
2585 Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate offset
2586
2587 if (Predicated)
2588 AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: 4));
2589 else
2590 AddEmptyMVEPredicateToOps(Ops, Loc);
2591
2592 Ops.push_back(Elt: N->getOperand(Num: 0)); // chain
2593
2594 SmallVector<EVT, 8> VTs;
2595 VTs.push_back(Elt: N->getValueType(ResNo: 1));
2596 VTs.push_back(Elt: N->getValueType(ResNo: 0));
2597 VTs.push_back(Elt: N->getValueType(ResNo: 2));
2598
2599 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(N), ResultTys: VTs, Ops);
2600 ReplaceUses(F: SDValue(N, 0), T: SDValue(New, 1));
2601 ReplaceUses(F: SDValue(N, 1), T: SDValue(New, 0));
2602 ReplaceUses(F: SDValue(N, 2), T: SDValue(New, 2));
2603 transferMemOperands(N, Result: New);
2604 CurDAG->RemoveDeadNode(N);
2605}
2606
2607void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2608 bool Immediate,
2609 bool HasSaturationOperand) {
2610 SDLoc Loc(N);
2611 SmallVector<SDValue, 8> Ops;
2612
2613 // Two 32-bit halves of the value to be shifted
2614 Ops.push_back(Elt: N->getOperand(Num: 1));
2615 Ops.push_back(Elt: N->getOperand(Num: 2));
2616
2617 // The shift count
2618 if (Immediate) {
2619 int32_t ImmValue = N->getConstantOperandVal(Num: 3);
2620 Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate shift count
2621 } else {
2622 Ops.push_back(Elt: N->getOperand(Num: 3));
2623 }
2624
2625 // The immediate saturation operand, if any
2626 if (HasSaturationOperand) {
2627 int32_t SatOp = N->getConstantOperandVal(Num: 4);
2628 int SatBit = (SatOp == 64 ? 0 : 1);
2629 Ops.push_back(Elt: getI32Imm(Imm: SatBit, dl: Loc));
2630 }
2631
2632 // MVE scalar shifts are IT-predicable, so include the standard
2633 // predicate arguments.
2634 Ops.push_back(Elt: getAL(CurDAG, dl: Loc));
2635 Ops.push_back(Elt: CurDAG->getRegister(Reg: 0, VT: MVT::i32));
2636
2637 CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2638}
2639
2640void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2641 uint16_t OpcodeWithNoCarry,
2642 bool Add, bool Predicated) {
2643 SDLoc Loc(N);
2644 SmallVector<SDValue, 8> Ops;
2645 uint16_t Opcode;
2646
2647 unsigned FirstInputOp = Predicated ? 2 : 1;
2648
2649 // Two input vectors and the input carry flag
2650 Ops.push_back(Elt: N->getOperand(Num: FirstInputOp));
2651 Ops.push_back(Elt: N->getOperand(Num: FirstInputOp + 1));
2652 SDValue CarryIn = N->getOperand(Num: FirstInputOp + 2);
2653 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(Val&: CarryIn);
2654 uint32_t CarryMask = 1 << 29;
2655 uint32_t CarryExpected = Add ? 0 : CarryMask;
2656 if (CarryInConstant &&
2657 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2658 Opcode = OpcodeWithNoCarry;
2659 } else {
2660 Ops.push_back(Elt: CarryIn);
2661 Opcode = OpcodeWithCarry;
2662 }
2663
2664 if (Predicated)
2665 AddMVEPredicateToOps(Ops, Loc,
2666 PredicateMask: N->getOperand(Num: FirstInputOp + 3), // predicate
2667 Inactive: N->getOperand(Num: FirstInputOp - 1)); // inactive
2668 else
2669 AddEmptyMVEPredicateToOps(Ops, Loc, InactiveTy: N->getValueType(ResNo: 0));
2670
2671 CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2672}
2673
2674void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2675 SDLoc Loc(N);
2676 SmallVector<SDValue, 8> Ops;
2677
2678 // One vector input, followed by a 32-bit word of bits to shift in
2679 // and then an immediate shift count
2680 Ops.push_back(Elt: N->getOperand(Num: 1));
2681 Ops.push_back(Elt: N->getOperand(Num: 2));
2682 int32_t ImmValue = N->getConstantOperandVal(Num: 3);
2683 Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate shift count
2684
2685 if (Predicated)
2686 AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: 4));
2687 else
2688 AddEmptyMVEPredicateToOps(Ops, Loc);
2689
2690 CurDAG->SelectNodeTo(N, MachineOpc: ARM::MVE_VSHLC, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2691}
2692
2693static bool SDValueToConstBool(SDValue SDVal) {
2694 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2695 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(Val&: SDVal);
2696 uint64_t Value = SDValConstant->getZExtValue();
2697 assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2698 return Value;
2699}
2700
2701void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2702 const uint16_t *OpcodesS,
2703 const uint16_t *OpcodesU,
2704 size_t Stride, size_t TySize) {
2705 assert(TySize < Stride && "Invalid TySize");
2706 bool IsUnsigned = SDValueToConstBool(SDVal: N->getOperand(Num: 1));
2707 bool IsSub = SDValueToConstBool(SDVal: N->getOperand(Num: 2));
2708 bool IsExchange = SDValueToConstBool(SDVal: N->getOperand(Num: 3));
2709 if (IsUnsigned) {
2710 assert(!IsSub &&
2711 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2712 assert(!IsExchange &&
2713 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2714 }
2715
2716 auto OpIsZero = [N](size_t OpNo) {
2717 return isNullConstant(V: N->getOperand(Num: OpNo));
2718 };
2719
2720 // If the input accumulator value is not zero, select an instruction with
2721 // accumulator, otherwise select an instruction without accumulator
2722 bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2723
2724 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2725 if (IsSub)
2726 Opcodes += 4 * Stride;
2727 if (IsExchange)
2728 Opcodes += 2 * Stride;
2729 if (IsAccum)
2730 Opcodes += Stride;
2731 uint16_t Opcode = Opcodes[TySize];
2732
2733 SDLoc Loc(N);
2734 SmallVector<SDValue, 8> Ops;
2735 // Push the accumulator operands, if they are used
2736 if (IsAccum) {
2737 Ops.push_back(Elt: N->getOperand(Num: 4));
2738 Ops.push_back(Elt: N->getOperand(Num: 5));
2739 }
2740 // Push the two vector operands
2741 Ops.push_back(Elt: N->getOperand(Num: 6));
2742 Ops.push_back(Elt: N->getOperand(Num: 7));
2743
2744 if (Predicated)
2745 AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: 8));
2746 else
2747 AddEmptyMVEPredicateToOps(Ops, Loc);
2748
2749 CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2750}
2751
2752void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2753 const uint16_t *OpcodesS,
2754 const uint16_t *OpcodesU) {
2755 EVT VecTy = N->getOperand(Num: 6).getValueType();
2756 size_t SizeIndex;
2757 switch (VecTy.getVectorElementType().getSizeInBits()) {
2758 case 16:
2759 SizeIndex = 0;
2760 break;
2761 case 32:
2762 SizeIndex = 1;
2763 break;
2764 default:
2765 llvm_unreachable("bad vector element size");
2766 }
2767
2768 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, Stride: 2, TySize: SizeIndex);
2769}
2770
2771void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2772 const uint16_t *OpcodesS,
2773 const uint16_t *OpcodesU) {
2774 assert(
2775 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2776 32 &&
2777 "bad vector element size");
2778 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, Stride: 1, TySize: 0);
2779}
2780
2781void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2782 const uint16_t *const *Opcodes,
2783 bool HasWriteback) {
2784 EVT VT = N->getValueType(ResNo: 0);
2785 SDLoc Loc(N);
2786
2787 const uint16_t *OurOpcodes;
2788 switch (VT.getVectorElementType().getSizeInBits()) {
2789 case 8:
2790 OurOpcodes = Opcodes[0];
2791 break;
2792 case 16:
2793 OurOpcodes = Opcodes[1];
2794 break;
2795 case 32:
2796 OurOpcodes = Opcodes[2];
2797 break;
2798 default:
2799 llvm_unreachable("bad vector element size in SelectMVE_VLD");
2800 }
2801
2802 EVT DataTy = EVT::getVectorVT(Context&: *CurDAG->getContext(), VT: MVT::i64, NumElements: NumVecs * 2);
2803 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2804 unsigned PtrOperand = HasWriteback ? 1 : 2;
2805
2806 auto Data = SDValue(
2807 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: Loc, VT: DataTy), 0);
2808 SDValue Chain = N->getOperand(Num: 0);
2809 // Add a MVE_VLDn instruction for each Vec, except the last
2810 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2811 SDValue Ops[] = {Data, N->getOperand(Num: PtrOperand), Chain};
2812 auto LoadInst =
2813 CurDAG->getMachineNode(Opcode: OurOpcodes[Stage], dl: Loc, ResultTys, Ops);
2814 Data = SDValue(LoadInst, 0);
2815 Chain = SDValue(LoadInst, 1);
2816 transferMemOperands(N, Result: LoadInst);
2817 }
2818 // The last may need a writeback on it
2819 if (HasWriteback)
2820 ResultTys = {DataTy, MVT::i32, MVT::Other};
2821 SDValue Ops[] = {Data, N->getOperand(Num: PtrOperand), Chain};
2822 auto LoadInst =
2823 CurDAG->getMachineNode(Opcode: OurOpcodes[NumVecs - 1], dl: Loc, ResultTys, Ops);
2824 transferMemOperands(N, Result: LoadInst);
2825
2826 unsigned i;
2827 for (i = 0; i < NumVecs; i++)
2828 ReplaceUses(F: SDValue(N, i),
2829 T: CurDAG->getTargetExtractSubreg(SRIdx: ARM::qsub_0 + i, DL: Loc, VT,
2830 Operand: SDValue(LoadInst, 0)));
2831 if (HasWriteback)
2832 ReplaceUses(F: SDValue(N, i++), T: SDValue(LoadInst, 1));
2833 ReplaceUses(F: SDValue(N, i), T: SDValue(LoadInst, HasWriteback ? 2 : 1));
2834 CurDAG->RemoveDeadNode(N);
2835}
2836
2837void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2838 bool Wrapping, bool Predicated) {
2839 EVT VT = N->getValueType(ResNo: 0);
2840 SDLoc Loc(N);
2841
2842 uint16_t Opcode;
2843 switch (VT.getScalarSizeInBits()) {
2844 case 8:
2845 Opcode = Opcodes[0];
2846 break;
2847 case 16:
2848 Opcode = Opcodes[1];
2849 break;
2850 case 32:
2851 Opcode = Opcodes[2];
2852 break;
2853 default:
2854 llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2855 }
2856
2857 SmallVector<SDValue, 8> Ops;
2858 unsigned OpIdx = 1;
2859
2860 SDValue Inactive;
2861 if (Predicated)
2862 Inactive = N->getOperand(Num: OpIdx++);
2863
2864 Ops.push_back(Elt: N->getOperand(Num: OpIdx++)); // base
2865 if (Wrapping)
2866 Ops.push_back(Elt: N->getOperand(Num: OpIdx++)); // limit
2867
2868 SDValue ImmOp = N->getOperand(Num: OpIdx++); // step
2869 int ImmValue = ImmOp->getAsZExtVal();
2870 Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc));
2871
2872 if (Predicated)
2873 AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: OpIdx), Inactive);
2874 else
2875 AddEmptyMVEPredicateToOps(Ops, Loc, InactiveTy: N->getValueType(ResNo: 0));
2876
2877 CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2878}
2879
2880void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2881 size_t NumExtraOps, bool HasAccum) {
2882 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2883 SDLoc Loc(N);
2884 SmallVector<SDValue, 8> Ops;
2885
2886 unsigned OpIdx = 1;
2887
2888 // Convert and append the immediate operand designating the coprocessor.
2889 SDValue ImmCorpoc = N->getOperand(Num: OpIdx++);
2890 uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal();
2891 Ops.push_back(Elt: getI32Imm(Imm: ImmCoprocVal, dl: Loc));
2892
2893 // For accumulating variants copy the low and high order parts of the
2894 // accumulator into a register pair and add it to the operand vector.
2895 if (HasAccum) {
2896 SDValue AccLo = N->getOperand(Num: OpIdx++);
2897 SDValue AccHi = N->getOperand(Num: OpIdx++);
2898 if (IsBigEndian)
2899 std::swap(a&: AccLo, b&: AccHi);
2900 Ops.push_back(Elt: SDValue(createGPRPairNode(VT: MVT::Untyped, V0: AccLo, V1: AccHi), 0));
2901 }
2902
2903 // Copy extra operands as-is.
2904 for (size_t I = 0; I < NumExtraOps; I++)
2905 Ops.push_back(Elt: N->getOperand(Num: OpIdx++));
2906
2907 // Convert and append the immediate operand
2908 SDValue Imm = N->getOperand(Num: OpIdx);
2909 uint32_t ImmVal = Imm->getAsZExtVal();
2910 Ops.push_back(Elt: getI32Imm(Imm: ImmVal, dl: Loc));
2911
2912 // Accumulating variants are IT-predicable, add predicate operands.
2913 if (HasAccum) {
2914 SDValue Pred = getAL(CurDAG, dl: Loc);
2915 SDValue PredReg = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
2916 Ops.push_back(Elt: Pred);
2917 Ops.push_back(Elt: PredReg);
2918 }
2919
2920 // Create the CDE instruction
2921 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, dl: Loc, VT: MVT::Untyped, Ops);
2922 SDValue ResultPair = SDValue(InstrNode, 0);
2923
2924 // The original intrinsic had two outputs, and the output of the dual-register
2925 // CDE instruction is a register pair. We need to extract the two subregisters
2926 // and replace all uses of the original outputs with the extracted
2927 // subregisters.
2928 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2929 if (IsBigEndian)
2930 std::swap(a&: SubRegs[0], b&: SubRegs[1]);
2931
2932 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2933 if (SDValue(N, ResIdx).use_empty())
2934 continue;
2935 SDValue SubReg = CurDAG->getTargetExtractSubreg(SRIdx: SubRegs[ResIdx], DL: Loc,
2936 VT: MVT::i32, Operand: ResultPair);
2937 ReplaceUses(F: SDValue(N, ResIdx), T: SubReg);
2938 }
2939
2940 CurDAG->RemoveDeadNode(N);
2941}
2942
2943void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2944 bool isUpdating, unsigned NumVecs,
2945 const uint16_t *DOpcodes,
2946 const uint16_t *QOpcodes0,
2947 const uint16_t *QOpcodes1) {
2948 assert(Subtarget->hasNEON());
2949 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2950 SDLoc dl(N);
2951
2952 SDValue MemAddr, Align;
2953 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2954 if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2955 return;
2956
2957 SDValue Chain = N->getOperand(Num: 0);
2958 EVT VT = N->getValueType(ResNo: 0);
2959 bool is64BitVector = VT.is64BitVector();
2960
2961 unsigned Alignment = 0;
2962 if (NumVecs != 3) {
2963 Alignment = Align->getAsZExtVal();
2964 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2965 if (Alignment > NumBytes)
2966 Alignment = NumBytes;
2967 if (Alignment < 8 && Alignment < NumBytes)
2968 Alignment = 0;
2969 // Alignment must be a power of two; make sure of that.
2970 Alignment = (Alignment & -Alignment);
2971 if (Alignment == 1)
2972 Alignment = 0;
2973 }
2974 Align = CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
2975
2976 unsigned OpcodeIndex;
2977 switch (VT.getSimpleVT().SimpleTy) {
2978 default: llvm_unreachable("unhandled vld-dup type");
2979 case MVT::v8i8:
2980 case MVT::v16i8: OpcodeIndex = 0; break;
2981 case MVT::v4i16:
2982 case MVT::v8i16:
2983 case MVT::v4f16:
2984 case MVT::v8f16:
2985 case MVT::v4bf16:
2986 case MVT::v8bf16:
2987 OpcodeIndex = 1; break;
2988 case MVT::v2f32:
2989 case MVT::v2i32:
2990 case MVT::v4f32:
2991 case MVT::v4i32: OpcodeIndex = 2; break;
2992 case MVT::v1f64:
2993 case MVT::v1i64: OpcodeIndex = 3; break;
2994 }
2995
2996 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2997 if (!is64BitVector)
2998 ResTyElts *= 2;
2999 EVT ResTy = EVT::getVectorVT(Context&: *CurDAG->getContext(), VT: MVT::i64, NumElements: ResTyElts);
3000
3001 std::vector<EVT> ResTys;
3002 ResTys.push_back(x: ResTy);
3003 if (isUpdating)
3004 ResTys.push_back(x: MVT::i32);
3005 ResTys.push_back(x: MVT::Other);
3006
3007 SDValue Pred = getAL(CurDAG, dl);
3008 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3009
3010 SmallVector<SDValue, 6> Ops;
3011 Ops.push_back(Elt: MemAddr);
3012 Ops.push_back(Elt: Align);
3013 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3014 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
3015 : QOpcodes1[OpcodeIndex];
3016 if (isUpdating) {
3017 SDValue Inc = N->getOperand(Num: 2);
3018 bool IsImmUpdate =
3019 isPerfectIncrement(Inc, VecTy: VT.getVectorElementType(), NumVecs);
3020 if (IsImmUpdate) {
3021 if (!isVLDfixed(Opc))
3022 Ops.push_back(Elt: Reg0);
3023 } else {
3024 if (isVLDfixed(Opc))
3025 Opc = getVLDSTRegisterUpdateOpcode(Opc);
3026 Ops.push_back(Elt: Inc);
3027 }
3028 }
3029 if (is64BitVector || NumVecs == 1) {
3030 // Double registers and VLD1 quad registers are directly supported.
3031 } else {
3032 SDValue ImplDef = SDValue(
3033 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: ResTy), 0);
3034 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3035 SDNode *VLdA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl, VT1: ResTy,
3036 VT2: MVT::Other, Ops: OpsA);
3037 Ops.push_back(Elt: SDValue(VLdA, 0));
3038 Chain = SDValue(VLdA, 1);
3039 }
3040
3041 Ops.push_back(Elt: Pred);
3042 Ops.push_back(Elt: Reg0);
3043 Ops.push_back(Elt: Chain);
3044
3045 SDNode *VLdDup = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
3046
3047 // Transfer memoperands.
3048 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
3049 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLdDup), NewMemRefs: {MemOp});
3050
3051 // Extract the subregisters.
3052 if (NumVecs == 1) {
3053 ReplaceUses(F: SDValue(N, 0), T: SDValue(VLdDup, 0));
3054 } else {
3055 SDValue SuperReg = SDValue(VLdDup, 0);
3056 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3057 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3058 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3059 ReplaceUses(F: SDValue(N, Vec),
3060 T: CurDAG->getTargetExtractSubreg(SRIdx: SubIdx+Vec, DL: dl, VT, Operand: SuperReg));
3061 }
3062 }
3063 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(VLdDup, 1));
3064 if (isUpdating)
3065 ReplaceUses(F: SDValue(N, NumVecs + 1), T: SDValue(VLdDup, 2));
3066 CurDAG->RemoveDeadNode(N);
3067}
3068
3069bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3070 if (!Subtarget->hasMVEIntegerOps())
3071 return false;
3072
3073 SDLoc dl(N);
3074
3075 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3076 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3077 // inserts of the correct type:
3078 SDValue Ins1 = SDValue(N, 0);
3079 SDValue Ins2 = N->getOperand(Num: 0);
3080 EVT VT = Ins1.getValueType();
3081 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3082 !isa<ConstantSDNode>(Val: Ins1.getOperand(i: 2)) ||
3083 !isa<ConstantSDNode>(Val: Ins2.getOperand(i: 2)) ||
3084 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3085 return false;
3086
3087 unsigned Lane1 = Ins1.getConstantOperandVal(i: 2);
3088 unsigned Lane2 = Ins2.getConstantOperandVal(i: 2);
3089 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3090 return false;
3091
3092 // If the inserted values will be able to use T/B already, leave it to the
3093 // existing tablegen patterns. For example VCVTT/VCVTB.
3094 SDValue Val1 = Ins1.getOperand(i: 1);
3095 SDValue Val2 = Ins2.getOperand(i: 1);
3096 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3097 return false;
3098
3099 // Check if the inserted values are both extracts.
3100 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3101 Val1.getOpcode() == ARMISD::VGETLANEu) &&
3102 (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3103 Val2.getOpcode() == ARMISD::VGETLANEu) &&
3104 isa<ConstantSDNode>(Val: Val1.getOperand(i: 1)) &&
3105 isa<ConstantSDNode>(Val: Val2.getOperand(i: 1)) &&
3106 (Val1.getOperand(i: 0).getValueType() == MVT::v8f16 ||
3107 Val1.getOperand(i: 0).getValueType() == MVT::v8i16) &&
3108 (Val2.getOperand(i: 0).getValueType() == MVT::v8f16 ||
3109 Val2.getOperand(i: 0).getValueType() == MVT::v8i16)) {
3110 unsigned ExtractLane1 = Val1.getConstantOperandVal(i: 1);
3111 unsigned ExtractLane2 = Val2.getConstantOperandVal(i: 1);
3112
3113 // If the two extracted lanes are from the same place and adjacent, this
3114 // simplifies into a f32 lane move.
3115 if (Val1.getOperand(i: 0) == Val2.getOperand(i: 0) && ExtractLane2 % 2 == 0 &&
3116 ExtractLane1 == ExtractLane2 + 1) {
3117 SDValue NewExt = CurDAG->getTargetExtractSubreg(
3118 SRIdx: ARM::ssub_0 + ExtractLane2 / 2, DL: dl, VT: MVT::f32, Operand: Val1.getOperand(i: 0));
3119 SDValue NewIns = CurDAG->getTargetInsertSubreg(
3120 SRIdx: ARM::ssub_0 + Lane2 / 2, DL: dl, VT, Operand: Ins2.getOperand(i: 0),
3121 Subreg: NewExt);
3122 ReplaceUses(F: Ins1, T: NewIns);
3123 return true;
3124 }
3125
3126 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3127 // extracting odd lanes.
3128 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3129 SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3130 SRIdx: ARM::ssub_0 + ExtractLane1 / 2, DL: dl, VT: MVT::f32, Operand: Val1.getOperand(i: 0));
3131 SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3132 SRIdx: ARM::ssub_0 + ExtractLane2 / 2, DL: dl, VT: MVT::f32, Operand: Val2.getOperand(i: 0));
3133 if (ExtractLane1 % 2 != 0)
3134 Inp1 = SDValue(CurDAG->getMachineNode(Opcode: ARM::VMOVH, dl, VT: MVT::f32, Op1: Inp1), 0);
3135 if (ExtractLane2 % 2 != 0)
3136 Inp2 = SDValue(CurDAG->getMachineNode(Opcode: ARM::VMOVH, dl, VT: MVT::f32, Op1: Inp2), 0);
3137 SDNode *VINS = CurDAG->getMachineNode(Opcode: ARM::VINSH, dl, VT: MVT::f32, Op1: Inp2, Op2: Inp1);
3138 SDValue NewIns =
3139 CurDAG->getTargetInsertSubreg(SRIdx: ARM::ssub_0 + Lane2 / 2, DL: dl, VT: MVT::v4f32,
3140 Operand: Ins2.getOperand(i: 0), Subreg: SDValue(VINS, 0));
3141 ReplaceUses(F: Ins1, T: NewIns);
3142 return true;
3143 }
3144 }
3145
3146 // The inserted values are not extracted - if they are f16 then insert them
3147 // directly using a VINS.
3148 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3149 SDNode *VINS = CurDAG->getMachineNode(Opcode: ARM::VINSH, dl, VT: MVT::f32, Op1: Val2, Op2: Val1);
3150 SDValue NewIns =
3151 CurDAG->getTargetInsertSubreg(SRIdx: ARM::ssub_0 + Lane2 / 2, DL: dl, VT: MVT::v4f32,
3152 Operand: Ins2.getOperand(i: 0), Subreg: SDValue(VINS, 0));
3153 ReplaceUses(F: Ins1, T: NewIns);
3154 return true;
3155 }
3156
3157 return false;
3158}
3159
3160/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3161/// variable shift/rotate instructions.
3162bool ARMDAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3163 EVT VT = N->getValueType(ResNo: 0);
3164 if (VT != MVT::i32)
3165 return false;
3166 // On ARM we intentionally do this only for ROTR. Unlike AArch64, variable
3167 // SHL/SRL/SRA do not all have the same modulo-shift semantics we can exploit.
3168 // Select ROR by register; in ARM state this is modeled as MOVsr with a ROR
3169 // shifter operand, while in Thumb we use tROR/t2RORrr directly.
3170
3171 SDValue ShiftAmt = N->getOperand(Num: 1);
3172 SDLoc DL(N);
3173 SDValue NewShiftAmt;
3174 auto emitUnary = [&](unsigned Opc, SDValue Src, bool IsRSB) {
3175 if (Subtarget->isThumb2() || !Subtarget->isThumb()) {
3176 SDValue Ops[] = {Src};
3177 if (IsRSB) {
3178 SDValue ZeroImm = CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i32);
3179 SDValue FullOps[] = {Src, ZeroImm, getAL(CurDAG, dl: DL),
3180 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3181 CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3182 MachineSDNode *Unary =
3183 CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::i32, Ops: FullOps);
3184 return SDValue(Unary, 0);
3185 }
3186 SDValue FullOps[] = {Ops[0], getAL(CurDAG, dl: DL),
3187 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3188 CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3189 MachineSDNode *Unary = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::i32, Ops: FullOps);
3190 return SDValue(Unary, 0);
3191 }
3192 SDValue Thumb1Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), Src,
3193 getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3194 MachineSDNode *Unary = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::i32, Ops: Thumb1Ops);
3195 return SDValue(Unary, 0);
3196 };
3197
3198 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3199 SDValue Add0 = ShiftAmt->getOperand(Num: 0);
3200 SDValue Add1 = ShiftAmt->getOperand(Num: 1);
3201 unsigned Add0Imm;
3202 unsigned Add1Imm;
3203 if (isInt32Immediate(N: Add1, Imm&: Add1Imm) && ((Add1Imm & 31) == 0)) {
3204 NewShiftAmt = Add0;
3205 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3206 isInt32Immediate(N: Add0, Imm&: Add0Imm) && Add0Imm != 0 &&
3207 ((Add0Imm & 31) == 0)) {
3208 unsigned NegOpc =
3209 Subtarget->isThumb()
3210 ? (Subtarget->hasThumb2() ? ARM::t2RSBri : ARM::tRSB)
3211 : ARM::RSBri;
3212 NewShiftAmt = emitUnary(NegOpc, Add1, /*IsRSB=*/true);
3213 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3214 isInt32Immediate(N: Add0, Imm&: Add0Imm) && ((Add0Imm & 31) == 31)) {
3215 unsigned NotOpc = Subtarget->isThumb()
3216 ? (Subtarget->isThumb2() ? ARM::t2MVNr : ARM::tMVN)
3217 : ARM::MVNr;
3218 NewShiftAmt = emitUnary(NotOpc, Add1, /*IsRSB=*/false);
3219 } else {
3220 return false;
3221 }
3222 } else {
3223 return false;
3224 }
3225
3226 if (Subtarget->isThumb()) {
3227 if (Subtarget->isThumb1Only()) {
3228 SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32),
3229 N->getOperand(Num: 0), NewShiftAmt, getAL(CurDAG, dl: DL),
3230 CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3231 CurDAG->SelectNodeTo(N, MachineOpc: ARM::tROR, VT, Ops);
3232 } else {
3233 SDValue Ops[] = {N->getOperand(Num: 0), NewShiftAmt, getAL(CurDAG, dl: DL),
3234 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3235 CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3236 CurDAG->SelectNodeTo(N, MachineOpc: ARM::t2RORrr, VT, Ops);
3237 }
3238 } else {
3239 SDValue BaseReg = N->getOperand(Num: 0);
3240 SDValue ShReg = NewShiftAmt;
3241 SDValue OpcEnc = CurDAG->getTargetConstant(
3242 Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::ror, Imm: 0), DL, VT: MVT::i32);
3243 SDValue Ops[] = {BaseReg,
3244 ShReg,
3245 OpcEnc,
3246 getAL(CurDAG, dl: DL),
3247 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3248 CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3249 CurDAG->SelectNodeTo(N, MachineOpc: ARM::MOVsr, VT, Ops);
3250 }
3251 return true;
3252}
3253
3254bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3255 SDNode *FMul,
3256 bool IsUnsigned,
3257 bool FixedToFloat) {
3258 auto Type = N->getValueType(ResNo: 0);
3259 unsigned ScalarBits = Type.getScalarSizeInBits();
3260 if (ScalarBits > 32)
3261 return false;
3262
3263 SDNodeFlags FMulFlags = FMul->getFlags();
3264 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3265 // allowed in 16 bit unsigned floats
3266 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
3267 return false;
3268
3269 SDValue ImmNode = FMul->getOperand(Num: 1);
3270 SDValue VecVal = FMul->getOperand(Num: 0);
3271 if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
3272 VecVal->getOpcode() == ISD::SINT_TO_FP)
3273 VecVal = VecVal->getOperand(Num: 0);
3274
3275 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3276 return false;
3277
3278 if (ImmNode.getOpcode() == ISD::BITCAST) {
3279 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3280 return false;
3281 ImmNode = ImmNode.getOperand(i: 0);
3282 }
3283
3284 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3285 return false;
3286
3287 APFloat ImmAPF(0.0f);
3288 switch (ImmNode.getOpcode()) {
3289 case ARMISD::VMOVIMM:
3290 case ARMISD::VDUP: {
3291 if (!isa<ConstantSDNode>(Val: ImmNode.getOperand(i: 0)))
3292 return false;
3293 unsigned Imm = ImmNode.getConstantOperandVal(i: 0);
3294 if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3295 Imm = ARM_AM::decodeVMOVModImm(ModImm: Imm, EltBits&: ScalarBits);
3296 ImmAPF =
3297 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3298 APInt(ScalarBits, Imm));
3299 break;
3300 }
3301 case ARMISD::VMOVFPIMM: {
3302 ImmAPF = APFloat(ARM_AM::getFPImmFloat(Imm: ImmNode.getConstantOperandVal(i: 0)));
3303 break;
3304 }
3305 default:
3306 return false;
3307 }
3308
3309 // Where n is the number of fractional bits, multiplying by 2^n will convert
3310 // from float to fixed and multiplying by 2^-n will convert from fixed to
3311 // float. Taking log2 of the factor (after taking the inverse in the case of
3312 // float to fixed) will give n.
3313 APFloat ToConvert = ImmAPF;
3314 if (FixedToFloat) {
3315 if (!ImmAPF.getExactInverse(Inv: &ToConvert))
3316 return false;
3317 }
3318 APSInt Converted(64, false);
3319 bool IsExact;
3320 ToConvert.convertToInteger(Result&: Converted, RM: llvm::RoundingMode::NearestTiesToEven,
3321 IsExact: &IsExact);
3322 if (!IsExact || !Converted.isPowerOf2())
3323 return false;
3324
3325 unsigned FracBits = Converted.logBase2();
3326 if (FracBits > ScalarBits)
3327 return false;
3328
3329 SmallVector<SDValue, 3> Ops{
3330 VecVal, CurDAG->getConstant(Val: FracBits, DL: SDLoc(N), VT: MVT::i32)};
3331 AddEmptyMVEPredicateToOps(Ops, Loc: SDLoc(N), InactiveTy: Type);
3332
3333 unsigned int Opcode;
3334 switch (ScalarBits) {
3335 case 16:
3336 if (FixedToFloat)
3337 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3338 else
3339 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3340 break;
3341 case 32:
3342 if (FixedToFloat)
3343 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3344 else
3345 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3346 break;
3347 default:
3348 llvm_unreachable("unexpected number of scalar bits");
3349 break;
3350 }
3351
3352 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: SDLoc(N), VT: Type, Ops));
3353 return true;
3354}
3355
3356bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3357 // Transform a floating-point to fixed-point conversion to a VCVT
3358 if (!Subtarget->hasMVEFloatOps())
3359 return false;
3360 EVT Type = N->getValueType(ResNo: 0);
3361 if (!Type.isVector())
3362 return false;
3363 unsigned int ScalarBits = Type.getScalarSizeInBits();
3364
3365 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
3366 N->getOpcode() == ISD::FP_TO_UINT_SAT;
3367 SDNode *Node = N->getOperand(Num: 0).getNode();
3368
3369 // floating-point to fixed-point with one fractional bit gets turned into an
3370 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3371 if (Node->getOpcode() == ISD::FADD) {
3372 if (Node->getOperand(Num: 0) != Node->getOperand(Num: 1))
3373 return false;
3374 SDNodeFlags Flags = Node->getFlags();
3375 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3376 // allowed in 16 bit unsigned floats
3377 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
3378 return false;
3379
3380 unsigned Opcode;
3381 switch (ScalarBits) {
3382 case 16:
3383 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3384 break;
3385 case 32:
3386 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3387 break;
3388 }
3389 SmallVector<SDValue, 3> Ops{Node->getOperand(Num: 0),
3390 CurDAG->getConstant(Val: 1, DL: dl, VT: MVT::i32)};
3391 AddEmptyMVEPredicateToOps(Ops, Loc: dl, InactiveTy: Type);
3392
3393 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl, VT: Type, Ops));
3394 return true;
3395 }
3396
3397 if (Node->getOpcode() != ISD::FMUL)
3398 return false;
3399
3400 return transformFixedFloatingPointConversion(N, FMul: Node, IsUnsigned, FixedToFloat: false);
3401}
3402
3403bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3404 // Transform a fixed-point to floating-point conversion to a VCVT
3405 if (!Subtarget->hasMVEFloatOps())
3406 return false;
3407 auto Type = N->getValueType(ResNo: 0);
3408 if (!Type.isVector())
3409 return false;
3410
3411 auto LHS = N->getOperand(Num: 0);
3412 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3413 return false;
3414
3415 return transformFixedFloatingPointConversion(
3416 N, FMul: N, IsUnsigned: LHS.getOpcode() == ISD::UINT_TO_FP, FixedToFloat: true);
3417}
3418
3419bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3420 if (!Subtarget->hasV6T2Ops())
3421 return false;
3422
3423 unsigned Opc = isSigned
3424 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3425 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3426 SDLoc dl(N);
3427
3428 // For unsigned extracts, check for a shift right and mask
3429 unsigned And_imm = 0;
3430 if (N->getOpcode() == ISD::AND) {
3431 if (isOpcWithIntImmediate(N, Opc: ISD::AND, Imm&: And_imm)) {
3432
3433 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3434 if (And_imm & (And_imm + 1))
3435 return false;
3436
3437 unsigned Srl_imm = 0;
3438 if (isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::SRL,
3439 Imm&: Srl_imm)) {
3440 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3441
3442 // Mask off the unnecessary bits of the AND immediate; normally
3443 // DAGCombine will do this, but that might not happen if
3444 // targetShrinkDemandedConstant chooses a different immediate.
3445 And_imm &= -1U >> Srl_imm;
3446
3447 // Note: The width operand is encoded as width-1.
3448 unsigned Width = llvm::countr_one(Value: And_imm) - 1;
3449 unsigned LSB = Srl_imm;
3450
3451 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3452
3453 if ((LSB + Width + 1) == N->getValueType(ResNo: 0).getSizeInBits()) {
3454 // It's cheaper to use a right shift to extract the top bits.
3455 if (Subtarget->isThumb()) {
3456 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3457 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0),
3458 CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3459 getAL(CurDAG, dl), Reg0, Reg0 };
3460 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3461 return true;
3462 }
3463
3464 // ARM models shift instructions as MOVsi with shifter operand.
3465 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: ISD::SRL);
3466 SDValue ShOpc =
3467 CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: LSB), DL: dl,
3468 VT: MVT::i32);
3469 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0), ShOpc,
3470 getAL(CurDAG, dl), Reg0, Reg0 };
3471 CurDAG->SelectNodeTo(N, MachineOpc: ARM::MOVsi, VT: MVT::i32, Ops);
3472 return true;
3473 }
3474
3475 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3476 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0),
3477 CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3478 CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3479 getAL(CurDAG, dl), Reg0 };
3480 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3481 return true;
3482 }
3483 }
3484 return false;
3485 }
3486
3487 // Otherwise, we're looking for a shift of a shift
3488 unsigned Shl_imm = 0;
3489 if (isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::SHL, Imm&: Shl_imm)) {
3490 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3491 unsigned Srl_imm = 0;
3492 if (isInt32Immediate(N: N->getOperand(Num: 1), Imm&: Srl_imm)) {
3493 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3494 // Note: The width operand is encoded as width-1.
3495 unsigned Width = 32 - Srl_imm - 1;
3496 int LSB = Srl_imm - Shl_imm;
3497 if (LSB < 0)
3498 return false;
3499 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3500 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3501 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0),
3502 CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3503 CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3504 getAL(CurDAG, dl), Reg0 };
3505 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3506 return true;
3507 }
3508 }
3509
3510 // Or we are looking for a shift of an and, with a mask operand
3511 if (isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::AND, Imm&: And_imm) &&
3512 isShiftedMask_32(Value: And_imm)) {
3513 unsigned Srl_imm = 0;
3514 unsigned LSB = llvm::countr_zero(Val: And_imm);
3515 // Shift must be the same as the ands lsb
3516 if (isInt32Immediate(N: N->getOperand(Num: 1), Imm&: Srl_imm) && Srl_imm == LSB) {
3517 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3518 unsigned MSB = llvm::Log2_32(Value: And_imm);
3519 // Note: The width operand is encoded as width-1.
3520 unsigned Width = MSB - LSB;
3521 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3522 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3523 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0),
3524 CurDAG->getTargetConstant(Val: Srl_imm, DL: dl, VT: MVT::i32),
3525 CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3526 getAL(CurDAG, dl), Reg0 };
3527 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3528 return true;
3529 }
3530 }
3531
3532 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3533 unsigned Width = cast<VTSDNode>(Val: N->getOperand(Num: 1))->getVT().getSizeInBits();
3534 unsigned LSB = 0;
3535 if (!isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::SRL, Imm&: LSB) &&
3536 !isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::SRA, Imm&: LSB))
3537 return false;
3538
3539 if (LSB + Width > 32)
3540 return false;
3541
3542 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3543 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3544 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0),
3545 CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3546 CurDAG->getTargetConstant(Val: Width - 1, DL: dl, VT: MVT::i32),
3547 getAL(CurDAG, dl), Reg0 };
3548 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3549 return true;
3550 }
3551
3552 return false;
3553}
3554
3555/// We've got special pseudo-instructions for these
3556void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3557 unsigned Opcode;
3558 EVT MemTy = cast<MemSDNode>(Val: N)->getMemoryVT();
3559 if (MemTy == MVT::i8)
3560 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3561 else if (MemTy == MVT::i16)
3562 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3563 else if (MemTy == MVT::i32)
3564 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3565 else
3566 llvm_unreachable("Unknown AtomicCmpSwap type");
3567
3568 SDValue Ops[] = {N->getOperand(Num: 1), N->getOperand(Num: 2), N->getOperand(Num: 3),
3569 N->getOperand(Num: 0)};
3570 SDNode *CmpSwap = CurDAG->getMachineNode(
3571 Opcode, dl: SDLoc(N),
3572 VTs: CurDAG->getVTList(VT1: MVT::i32, VT2: MVT::i32, VT3: MVT::Other), Ops);
3573
3574 MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
3575 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
3576
3577 ReplaceUses(F: SDValue(N, 0), T: SDValue(CmpSwap, 0));
3578 ReplaceUses(F: SDValue(N, 1), T: SDValue(CmpSwap, 2));
3579 CurDAG->RemoveDeadNode(N);
3580}
3581
3582static std::optional<std::pair<unsigned, unsigned>>
3583getContiguousRangeOfSetBits(const APInt &A) {
3584 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1;
3585 unsigned LastOne = A.countr_zero();
3586 if (A.popcount() != (FirstOne - LastOne + 1))
3587 return std::nullopt;
3588 return std::make_pair(x&: FirstOne, y&: LastOne);
3589}
3590
3591void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3592 assert(N->getOpcode() == ARMISD::CMPZ);
3593 SwitchEQNEToPLMI = false;
3594
3595 if (!Subtarget->isThumb())
3596 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3597 // LSR don't exist as standalone instructions - they need the barrel shifter.
3598 return;
3599
3600 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3601 SDValue And = N->getOperand(Num: 0);
3602 if (!And->hasOneUse())
3603 return;
3604
3605 SDValue Zero = N->getOperand(Num: 1);
3606 if (!isNullConstant(V: Zero) || And->getOpcode() != ISD::AND)
3607 return;
3608 SDValue X = And.getOperand(i: 0);
3609 auto C = dyn_cast<ConstantSDNode>(Val: And.getOperand(i: 1));
3610
3611 if (!C)
3612 return;
3613 auto Range = getContiguousRangeOfSetBits(A: C->getAPIntValue());
3614 if (!Range)
3615 return;
3616
3617 // There are several ways to lower this:
3618 SDNode *NewN;
3619 SDLoc dl(N);
3620
3621 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3622 if (Subtarget->isThumb2()) {
3623 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3624 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32),
3625 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3626 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3627 return CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
3628 } else {
3629 SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), Src,
3630 CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32),
3631 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3632 return CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
3633 }
3634 };
3635
3636 if (Range->second == 0) {
3637 // 1. Mask includes the LSB -> Simply shift the top N bits off
3638 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3639 ReplaceNode(F: And.getNode(), T: NewN);
3640 } else if (Range->first == 31) {
3641 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
3642 NewN = EmitShift(ARM::tLSRri, X, Range->second);
3643 ReplaceNode(F: And.getNode(), T: NewN);
3644 } else if (Range->first == Range->second) {
3645 // 3. Only one bit is set. We can shift this into the sign bit and use a
3646 // PL/MI comparison. This is not safe if CMPZ has multiple uses because
3647 // only one of them (the one currently being selected) will be switched
3648 // to use the new condition code.
3649 if (!N->hasOneUse())
3650 return;
3651 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3652 ReplaceNode(F: And.getNode(), T: NewN);
3653
3654 SwitchEQNEToPLMI = true;
3655 } else if (!Subtarget->hasV6T2Ops()) {
3656 // 4. Do a double shift to clear bottom and top bits, but only in
3657 // thumb-1 mode as in thumb-2 we can use UBFX.
3658 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3659 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3660 Range->second + (31 - Range->first));
3661 ReplaceNode(F: And.getNode(), T: NewN);
3662 }
3663}
3664
3665static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3],
3666 unsigned Opc128[3]) {
3667 assert((VT.is64BitVector() || VT.is128BitVector()) &&
3668 "Unexpected vector shuffle length");
3669 switch (VT.getScalarSizeInBits()) {
3670 default:
3671 llvm_unreachable("Unexpected vector shuffle element size");
3672 case 8:
3673 return VT.is64BitVector() ? Opc64[0] : Opc128[0];
3674 case 16:
3675 return VT.is64BitVector() ? Opc64[1] : Opc128[1];
3676 case 32:
3677 return VT.is64BitVector() ? Opc64[2] : Opc128[2];
3678 }
3679}
3680
3681void ARMDAGToDAGISel::Select(SDNode *N) {
3682 SDLoc dl(N);
3683
3684 if (N->isMachineOpcode()) {
3685 N->setNodeId(-1);
3686 return; // Already selected.
3687 }
3688
3689 switch (N->getOpcode()) {
3690 default: break;
3691 case ISD::STORE: {
3692 // For Thumb1, match an sp-relative store in C++. This is a little
3693 // unfortunate, but I don't think I can make the chain check work
3694 // otherwise. (The chain of the store has to be the same as the chain
3695 // of the CopyFromReg, or else we can't replace the CopyFromReg with
3696 // a direct reference to "SP".)
3697 //
3698 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3699 // a different addressing mode from other four-byte stores.
3700 //
3701 // This pattern usually comes up with call arguments.
3702 StoreSDNode *ST = cast<StoreSDNode>(Val: N);
3703 SDValue Ptr = ST->getBasePtr();
3704 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3705 int RHSC = 0;
3706 if (Ptr.getOpcode() == ISD::ADD &&
3707 isScaledConstantInRange(Node: Ptr.getOperand(i: 1), /*Scale=*/4, RangeMin: 0, RangeMax: 256, ScaledConstant&: RHSC))
3708 Ptr = Ptr.getOperand(i: 0);
3709
3710 if (Ptr.getOpcode() == ISD::CopyFromReg &&
3711 cast<RegisterSDNode>(Val: Ptr.getOperand(i: 1))->getReg() == ARM::SP &&
3712 Ptr.getOperand(i: 0) == ST->getChain()) {
3713 SDValue Ops[] = {ST->getValue(),
3714 CurDAG->getRegister(Reg: ARM::SP, VT: MVT::i32),
3715 CurDAG->getTargetConstant(Val: RHSC, DL: dl, VT: MVT::i32),
3716 getAL(CurDAG, dl),
3717 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3718 ST->getChain()};
3719 MachineSDNode *ResNode =
3720 CurDAG->getMachineNode(Opcode: ARM::tSTRspi, dl, VT: MVT::Other, Ops);
3721 MachineMemOperand *MemOp = ST->getMemOperand();
3722 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: ResNode), NewMemRefs: {MemOp});
3723 ReplaceNode(F: N, T: ResNode);
3724 return;
3725 }
3726 }
3727 break;
3728 }
3729 case ISD::WRITE_REGISTER:
3730 if (tryWriteRegister(N))
3731 return;
3732 break;
3733 case ISD::READ_REGISTER:
3734 if (tryReadRegister(N))
3735 return;
3736 break;
3737 case ISD::INLINEASM:
3738 case ISD::INLINEASM_BR:
3739 if (tryInlineAsm(N))
3740 return;
3741 break;
3742 case ISD::Constant: {
3743 unsigned Val = N->getAsZExtVal();
3744 // If we can't materialize the constant we need to use a literal pool
3745 if (ConstantMaterializationCost(Val, Subtarget) > 2 &&
3746 !Subtarget->genExecuteOnly()) {
3747 SDValue CPIdx = CurDAG->getTargetConstantPool(
3748 C: ConstantInt::get(Ty: Type::getInt32Ty(C&: *CurDAG->getContext()), V: Val),
3749 VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
3750
3751 SDNode *ResNode;
3752 if (Subtarget->isThumb()) {
3753 SDValue Ops[] = {
3754 CPIdx,
3755 getAL(CurDAG, dl),
3756 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3757 CurDAG->getEntryNode()
3758 };
3759 ResNode = CurDAG->getMachineNode(Opcode: ARM::tLDRpci, dl, VT1: MVT::i32, VT2: MVT::Other,
3760 Ops);
3761 } else {
3762 SDValue Ops[] = {
3763 CPIdx,
3764 CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i32),
3765 getAL(CurDAG, dl),
3766 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3767 CurDAG->getEntryNode()
3768 };
3769 ResNode = CurDAG->getMachineNode(Opcode: ARM::LDRcp, dl, VT1: MVT::i32, VT2: MVT::Other,
3770 Ops);
3771 }
3772 // Annotate the Node with memory operand information so that MachineInstr
3773 // queries work properly. This e.g. gives the register allocation the
3774 // required information for rematerialization.
3775 MachineFunction& MF = CurDAG->getMachineFunction();
3776 MachineMemOperand *MemOp =
3777 MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF),
3778 F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(4));
3779
3780 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: ResNode), NewMemRefs: {MemOp});
3781
3782 ReplaceNode(F: N, T: ResNode);
3783 return;
3784 }
3785
3786 // Other cases are autogenerated.
3787 break;
3788 }
3789 case ISD::FrameIndex: {
3790 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3791 int FI = cast<FrameIndexSDNode>(Val: N)->getIndex();
3792 SDValue TFI = CurDAG->getTargetFrameIndex(
3793 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
3794 if (Subtarget->isThumb1Only()) {
3795 // Set the alignment of the frame object to 4, to avoid having to generate
3796 // more than one ADD
3797 MachineFrameInfo &MFI = MF->getFrameInfo();
3798 if (MFI.getObjectAlign(ObjectIdx: FI) < Align(4))
3799 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align(4));
3800 CurDAG->SelectNodeTo(N, MachineOpc: ARM::tADDframe, VT: MVT::i32, Op1: TFI,
3801 Op2: CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i32));
3802 return;
3803 } else {
3804 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3805 ARM::t2ADDri : ARM::ADDri);
3806 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i32),
3807 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3808 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3809 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3810 return;
3811 }
3812 }
3813 case ISD::INSERT_VECTOR_ELT: {
3814 if (tryInsertVectorElt(N))
3815 return;
3816 break;
3817 }
3818 case ISD::SRL:
3819 if (tryV6T2BitfieldExtractOp(N, isSigned: false))
3820 return;
3821 break;
3822 case ISD::SIGN_EXTEND_INREG:
3823 case ISD::SRA:
3824 if (tryV6T2BitfieldExtractOp(N, isSigned: true))
3825 return;
3826 break;
3827 case ISD::ROTR:
3828 if (tryShiftAmountMod(N))
3829 return;
3830 break;
3831 case ISD::FP_TO_UINT:
3832 case ISD::FP_TO_SINT:
3833 case ISD::FP_TO_UINT_SAT:
3834 case ISD::FP_TO_SINT_SAT:
3835 if (tryFP_TO_INT(N, dl))
3836 return;
3837 break;
3838 case ISD::FMUL:
3839 if (tryFMULFixed(N, dl))
3840 return;
3841 break;
3842 case ISD::MUL:
3843 if (Subtarget->isThumb1Only())
3844 break;
3845 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1))) {
3846 unsigned RHSV = C->getZExtValue();
3847 if (!RHSV) break;
3848 if (isPowerOf2_32(Value: RHSV-1)) { // 2^n+1?
3849 unsigned ShImm = Log2_32(Value: RHSV-1);
3850 if (ShImm >= 32)
3851 break;
3852 SDValue V = N->getOperand(Num: 0);
3853 ShImm = ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: ShImm);
3854 SDValue ShImmOp = CurDAG->getTargetConstant(Val: ShImm, DL: dl, VT: MVT::i32);
3855 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3856 if (Subtarget->isThumb()) {
3857 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3858 CurDAG->SelectNodeTo(N, MachineOpc: ARM::t2ADDrs, VT: MVT::i32, Ops);
3859 return;
3860 } else {
3861 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3862 Reg0 };
3863 CurDAG->SelectNodeTo(N, MachineOpc: ARM::ADDrsi, VT: MVT::i32, Ops);
3864 return;
3865 }
3866 }
3867 if (isPowerOf2_32(Value: RHSV+1)) { // 2^n-1?
3868 unsigned ShImm = Log2_32(Value: RHSV+1);
3869 if (ShImm >= 32)
3870 break;
3871 SDValue V = N->getOperand(Num: 0);
3872 ShImm = ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: ShImm);
3873 SDValue ShImmOp = CurDAG->getTargetConstant(Val: ShImm, DL: dl, VT: MVT::i32);
3874 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3875 if (Subtarget->isThumb()) {
3876 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3877 CurDAG->SelectNodeTo(N, MachineOpc: ARM::t2RSBrs, VT: MVT::i32, Ops);
3878 return;
3879 } else {
3880 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3881 Reg0 };
3882 CurDAG->SelectNodeTo(N, MachineOpc: ARM::RSBrsi, VT: MVT::i32, Ops);
3883 return;
3884 }
3885 }
3886 }
3887 break;
3888 case ISD::AND: {
3889 // Check for unsigned bitfield extract
3890 if (tryV6T2BitfieldExtractOp(N, isSigned: false))
3891 return;
3892
3893 // If an immediate is used in an AND node, it is possible that the immediate
3894 // can be more optimally materialized when negated. If this is the case we
3895 // can negate the immediate and use a BIC instead.
3896 auto *N1C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
3897 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3898 uint32_t Imm = (uint32_t) N1C->getZExtValue();
3899
3900 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3901 // immediate can be negated and fit in the immediate operand of
3902 // a t2BIC, don't do any manual transform here as this can be
3903 // handled by the generic ISel machinery.
3904 bool PreferImmediateEncoding =
3905 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3906 if (!PreferImmediateEncoding &&
3907 ConstantMaterializationCost(Val: Imm, Subtarget) >
3908 ConstantMaterializationCost(Val: ~Imm, Subtarget)) {
3909 // The current immediate costs more to materialize than a negated
3910 // immediate, so negate the immediate and use a BIC.
3911 SDValue NewImm = CurDAG->getConstant(Val: ~Imm, DL: dl, VT: MVT::i32);
3912 // If the new constant didn't exist before, reposition it in the topological
3913 // ordering so it is just before N. Otherwise, don't touch its location.
3914 if (NewImm->getNodeId() == -1)
3915 CurDAG->RepositionNode(Position: N->getIterator(), N: NewImm.getNode());
3916
3917 if (!Subtarget->hasThumb2()) {
3918 SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32),
3919 N->getOperand(Num: 0), NewImm, getAL(CurDAG, dl),
3920 CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3921 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: ARM::tBIC, dl, VT: MVT::i32, Ops));
3922 return;
3923 } else {
3924 SDValue Ops[] = {N->getOperand(Num: 0), NewImm, getAL(CurDAG, dl),
3925 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3926 CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3927 ReplaceNode(F: N,
3928 T: CurDAG->getMachineNode(Opcode: ARM::t2BICrr, dl, VT: MVT::i32, Ops));
3929 return;
3930 }
3931 }
3932 }
3933
3934 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3935 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3936 // are entirely contributed by c2 and lower 16-bits are entirely contributed
3937 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3938 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3939 EVT VT = N->getValueType(ResNo: 0);
3940 if (VT != MVT::i32)
3941 break;
3942 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3943 ? ARM::t2MOVTi16
3944 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3945 if (!Opc)
3946 break;
3947 SDValue N0 = N->getOperand(Num: 0), N1 = N->getOperand(Num: 1);
3948 N1C = dyn_cast<ConstantSDNode>(Val&: N1);
3949 if (!N1C)
3950 break;
3951 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3952 SDValue N2 = N0.getOperand(i: 1);
3953 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Val&: N2);
3954 if (!N2C)
3955 break;
3956 unsigned N1CVal = N1C->getZExtValue();
3957 unsigned N2CVal = N2C->getZExtValue();
3958 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3959 (N1CVal & 0xffffU) == 0xffffU &&
3960 (N2CVal & 0xffffU) == 0x0U) {
3961 SDValue Imm16 = CurDAG->getTargetConstant(Val: (N2CVal & 0xFFFF0000U) >> 16,
3962 DL: dl, VT: MVT::i32);
3963 SDValue Ops[] = { N0.getOperand(i: 0), Imm16,
3964 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3965 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT, Ops));
3966 return;
3967 }
3968 }
3969
3970 break;
3971 }
3972 case ARMISD::UMAAL: {
3973 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3974 SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1),
3975 N->getOperand(Num: 2), N->getOperand(Num: 3),
3976 getAL(CurDAG, dl),
3977 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3978 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
3979 return;
3980 }
3981 case ARMISD::UMLAL:{
3982 if (Subtarget->isThumb()) {
3983 SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1), N->getOperand(Num: 2),
3984 N->getOperand(Num: 3), getAL(CurDAG, dl),
3985 CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3986 ReplaceNode(
3987 F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2UMLAL, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
3988 return;
3989 }else{
3990 SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1), N->getOperand(Num: 2),
3991 N->getOperand(Num: 3), getAL(CurDAG, dl),
3992 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3993 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3994 ReplaceNode(F: N, T: CurDAG->getMachineNode(
3995 Opcode: Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3996 VT1: MVT::i32, VT2: MVT::i32, Ops));
3997 return;
3998 }
3999 }
4000 case ARMISD::SMLAL:{
4001 if (Subtarget->isThumb()) {
4002 SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1), N->getOperand(Num: 2),
4003 N->getOperand(Num: 3), getAL(CurDAG, dl),
4004 CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
4005 ReplaceNode(
4006 F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2SMLAL, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
4007 return;
4008 }else{
4009 SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1), N->getOperand(Num: 2),
4010 N->getOperand(Num: 3), getAL(CurDAG, dl),
4011 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
4012 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
4013 ReplaceNode(F: N, T: CurDAG->getMachineNode(
4014 Opcode: Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
4015 VT1: MVT::i32, VT2: MVT::i32, Ops));
4016 return;
4017 }
4018 }
4019 case ARMISD::SUBE: {
4020 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
4021 break;
4022 // Look for a pattern to match SMMLS
4023 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
4024 if (N->getOperand(Num: 1).getOpcode() != ISD::SMUL_LOHI ||
4025 N->getOperand(Num: 2).getOpcode() != ARMISD::SUBC ||
4026 !SDValue(N, 1).use_empty())
4027 break;
4028
4029 if (Subtarget->isThumb())
4030 assert(Subtarget->hasThumb2() &&
4031 "This pattern should not be generated for Thumb");
4032
4033 SDValue SmulLoHi = N->getOperand(Num: 1);
4034 SDValue Subc = N->getOperand(Num: 2);
4035 SDValue Zero = Subc.getOperand(i: 0);
4036
4037 if (!isNullConstant(V: Zero) || Subc.getOperand(i: 1) != SmulLoHi.getValue(R: 0) ||
4038 N->getOperand(Num: 1) != SmulLoHi.getValue(R: 1) ||
4039 N->getOperand(Num: 2) != Subc.getValue(R: 1))
4040 break;
4041
4042 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
4043 SDValue Ops[] = { SmulLoHi.getOperand(i: 0), SmulLoHi.getOperand(i: 1),
4044 N->getOperand(Num: 0), getAL(CurDAG, dl),
4045 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
4046 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops));
4047 return;
4048 }
4049 case ISD::LOAD: {
4050 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4051 return;
4052 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
4053 if (tryT2IndexedLoad(N))
4054 return;
4055 } else if (Subtarget->isThumb()) {
4056 if (tryT1IndexedLoad(N))
4057 return;
4058 } else if (tryARMIndexedLoad(N))
4059 return;
4060 // Other cases are autogenerated.
4061 break;
4062 }
4063 case ISD::MLOAD:
4064 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4065 return;
4066 // Other cases are autogenerated.
4067 break;
4068 case ARMISD::LDRD: {
4069 if (Subtarget->isThumb2())
4070 break; // TableGen handles isel in this case.
4071 SDValue Base, RegOffset, ImmOffset;
4072 const SDValue &Chain = N->getOperand(Num: 0);
4073 const SDValue &Addr = N->getOperand(Num: 1);
4074 SelectAddrMode3(N: Addr, Base, Offset&: RegOffset, Opc&: ImmOffset);
4075 if (RegOffset != CurDAG->getRegister(Reg: 0, VT: MVT::i32)) {
4076 // The register-offset variant of LDRD mandates that the register
4077 // allocated to RegOffset is not reused in any of the remaining operands.
4078 // This restriction is currently not enforced. Therefore emitting this
4079 // variant is explicitly avoided.
4080 Base = Addr;
4081 RegOffset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
4082 }
4083 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4084 SDNode *New = CurDAG->getMachineNode(Opcode: ARM::LOADDUAL, dl,
4085 ResultTys: {MVT::Untyped, MVT::Other}, Ops);
4086 SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_0, DL: dl, VT: MVT::i32,
4087 Operand: SDValue(New, 0));
4088 SDValue Hi = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_1, DL: dl, VT: MVT::i32,
4089 Operand: SDValue(New, 0));
4090 transferMemOperands(N, Result: New);
4091 ReplaceUses(F: SDValue(N, 0), T: Lo);
4092 ReplaceUses(F: SDValue(N, 1), T: Hi);
4093 ReplaceUses(F: SDValue(N, 2), T: SDValue(New, 1));
4094 CurDAG->RemoveDeadNode(N);
4095 return;
4096 }
4097 case ARMISD::STRD: {
4098 if (Subtarget->isThumb2())
4099 break; // TableGen handles isel in this case.
4100 SDValue Base, RegOffset, ImmOffset;
4101 const SDValue &Chain = N->getOperand(Num: 0);
4102 const SDValue &Addr = N->getOperand(Num: 3);
4103 SelectAddrMode3(N: Addr, Base, Offset&: RegOffset, Opc&: ImmOffset);
4104 if (RegOffset != CurDAG->getRegister(Reg: 0, VT: MVT::i32)) {
4105 // The register-offset variant of STRD mandates that the register
4106 // allocated to RegOffset is not reused in any of the remaining operands.
4107 // This restriction is currently not enforced. Therefore emitting this
4108 // variant is explicitly avoided.
4109 Base = Addr;
4110 RegOffset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
4111 }
4112 SDNode *RegPair =
4113 createGPRPairNode(VT: MVT::Untyped, V0: N->getOperand(Num: 1), V1: N->getOperand(Num: 2));
4114 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
4115 SDNode *New = CurDAG->getMachineNode(Opcode: ARM::STOREDUAL, dl, VT: MVT::Other, Ops);
4116 transferMemOperands(N, Result: New);
4117 ReplaceUses(F: SDValue(N, 0), T: SDValue(New, 0));
4118 CurDAG->RemoveDeadNode(N);
4119 return;
4120 }
4121 case ARMISD::BRCOND: {
4122 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4123 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4124 // Pattern complexity = 6 cost = 1 size = 0
4125
4126 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4127 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4128 // Pattern complexity = 6 cost = 1 size = 0
4129
4130 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4131 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4132 // Pattern complexity = 6 cost = 1 size = 0
4133
4134 unsigned Opc = Subtarget->isThumb() ?
4135 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4136 SDValue Chain = N->getOperand(Num: 0);
4137 SDValue N1 = N->getOperand(Num: 1);
4138 SDValue N2 = N->getOperand(Num: 2);
4139 SDValue Flags = N->getOperand(Num: 3);
4140 assert(N1.getOpcode() == ISD::BasicBlock);
4141 assert(N2.getOpcode() == ISD::Constant);
4142
4143 unsigned CC = (unsigned)N2->getAsZExtVal();
4144
4145 if (Flags.getOpcode() == ARMISD::CMPZ) {
4146 if (Flags.getOperand(i: 0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4147 SDValue Int = Flags.getOperand(i: 0);
4148 uint64_t ID = Int->getConstantOperandVal(Num: 1);
4149
4150 // Handle low-overhead loops.
4151 if (ID == Intrinsic::loop_decrement_reg) {
4152 SDValue Elements = Int.getOperand(i: 2);
4153 SDValue Size = CurDAG->getTargetConstant(Val: Int.getConstantOperandVal(i: 3),
4154 DL: dl, VT: MVT::i32);
4155
4156 SDValue Args[] = { Elements, Size, Int.getOperand(i: 0) };
4157 SDNode *LoopDec =
4158 CurDAG->getMachineNode(Opcode: ARM::t2LoopDec, dl,
4159 VTs: CurDAG->getVTList(VT1: MVT::i32, VT2: MVT::Other),
4160 Ops: Args);
4161 ReplaceUses(F: Int.getNode(), T: LoopDec);
4162
4163 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
4164 SDNode *LoopEnd =
4165 CurDAG->getMachineNode(Opcode: ARM::t2LoopEnd, dl, VT: MVT::Other, Ops: EndArgs);
4166
4167 ReplaceUses(F: N, T: LoopEnd);
4168 CurDAG->RemoveDeadNode(N);
4169 CurDAG->RemoveDeadNode(N: Flags.getNode());
4170 CurDAG->RemoveDeadNode(N: Int.getNode());
4171 return;
4172 }
4173 }
4174
4175 bool SwitchEQNEToPLMI;
4176 SelectCMPZ(N: Flags.getNode(), SwitchEQNEToPLMI);
4177 Flags = N->getOperand(Num: 3);
4178
4179 if (SwitchEQNEToPLMI) {
4180 switch ((ARMCC::CondCodes)CC) {
4181 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4182 case ARMCC::NE:
4183 CC = (unsigned)ARMCC::MI;
4184 break;
4185 case ARMCC::EQ:
4186 CC = (unsigned)ARMCC::PL;
4187 break;
4188 }
4189 }
4190 }
4191
4192 SDValue Tmp2 = CurDAG->getTargetConstant(Val: CC, DL: dl, VT: MVT::i32);
4193 Chain = CurDAG->getCopyToReg(Chain, dl, Reg: ARM::CPSR, N: Flags, Glue: SDValue());
4194 SDValue Ops[] = {N1, Tmp2, CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), Chain,
4195 Chain.getValue(R: 1)};
4196 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::Other, Ops);
4197 return;
4198 }
4199
4200 case ARMISD::CMPZ: {
4201 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4202 // This allows us to avoid materializing the expensive negative constant.
4203 // The CMPZ #0 is useless and will be peepholed away but we need to keep
4204 // it for its flags output.
4205 SDValue X = N->getOperand(Num: 0);
4206 auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1).getNode());
4207 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
4208 int64_t Addend = -C->getSExtValue();
4209
4210 SDNode *Add = nullptr;
4211 // ADDS can be better than CMN if the immediate fits in a
4212 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4213 // Outside that range we can just use a CMN which is 32-bit but has a
4214 // 12-bit immediate range.
4215 if (Addend < 1<<8) {
4216 if (Subtarget->isThumb2()) {
4217 SDValue Ops[] = { X, CurDAG->getTargetConstant(Val: Addend, DL: dl, VT: MVT::i32),
4218 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
4219 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
4220 Add = CurDAG->getMachineNode(Opcode: ARM::t2ADDri, dl, VT: MVT::i32, Ops);
4221 } else {
4222 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
4223 SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), X,
4224 CurDAG->getTargetConstant(Val: Addend, DL: dl, VT: MVT::i32),
4225 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
4226 Add = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
4227 }
4228 }
4229 if (Add) {
4230 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(Val: 0, DL: dl, VT: MVT::i32)};
4231 CurDAG->MorphNodeTo(N, Opc: ARMISD::CMPZ, VTs: N->getVTList(), Ops: Ops2);
4232 }
4233 }
4234 // Other cases are autogenerated.
4235 break;
4236 }
4237
4238 case ARMISD::CMOV: {
4239 SDValue Flags = N->getOperand(Num: 3);
4240
4241 if (Flags.getOpcode() == ARMISD::CMPZ) {
4242 bool SwitchEQNEToPLMI;
4243 SelectCMPZ(N: Flags.getNode(), SwitchEQNEToPLMI);
4244
4245 if (SwitchEQNEToPLMI) {
4246 SDValue ARMcc = N->getOperand(Num: 2);
4247 ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal();
4248
4249 switch (CC) {
4250 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4251 case ARMCC::NE:
4252 CC = ARMCC::MI;
4253 break;
4254 case ARMCC::EQ:
4255 CC = ARMCC::PL;
4256 break;
4257 }
4258 SDValue NewARMcc = CurDAG->getConstant(Val: (unsigned)CC, DL: dl, VT: MVT::i32);
4259 SDValue Ops[] = {N->getOperand(Num: 0), N->getOperand(Num: 1), NewARMcc,
4260 N->getOperand(Num: 3)};
4261 CurDAG->MorphNodeTo(N, Opc: ARMISD::CMOV, VTs: N->getVTList(), Ops);
4262 }
4263 }
4264 // Other cases are autogenerated.
4265 break;
4266 }
4267 case ARMISD::VZIP: {
4268 EVT VT = N->getValueType(ResNo: 0);
4269 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4270 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4271 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4272 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4273 SDValue Pred = getAL(CurDAG, dl);
4274 SDValue PredReg = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
4275 SDValue Ops[] = {N->getOperand(Num: 0), N->getOperand(Num: 1), Pred, PredReg};
4276 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4277 return;
4278 }
4279 case ARMISD::VUZP: {
4280 EVT VT = N->getValueType(ResNo: 0);
4281 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4282 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4283 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4284 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4285 SDValue Pred = getAL(CurDAG, dl);
4286 SDValue PredReg = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
4287 SDValue Ops[] = {N->getOperand(Num: 0), N->getOperand(Num: 1), Pred, PredReg};
4288 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4289 return;
4290 }
4291 case ARMISD::VTRN: {
4292 EVT VT = N->getValueType(ResNo: 0);
4293 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4294 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4295 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4296 SDValue Pred = getAL(CurDAG, dl);
4297 SDValue PredReg = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
4298 SDValue Ops[] = {N->getOperand(Num: 0), N->getOperand(Num: 1), Pred, PredReg};
4299 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4300 return;
4301 }
4302 case ARMISD::BUILD_VECTOR: {
4303 EVT VecVT = N->getValueType(ResNo: 0);
4304 EVT EltVT = VecVT.getVectorElementType();
4305 unsigned NumElts = VecVT.getVectorNumElements();
4306 if (EltVT == MVT::f64) {
4307 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4308 ReplaceNode(
4309 F: N, T: createDRegPairNode(VT: VecVT, V0: N->getOperand(Num: 0), V1: N->getOperand(Num: 1)));
4310 return;
4311 }
4312 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4313 if (NumElts == 2) {
4314 ReplaceNode(
4315 F: N, T: createSRegPairNode(VT: VecVT, V0: N->getOperand(Num: 0), V1: N->getOperand(Num: 1)));
4316 return;
4317 }
4318 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4319 ReplaceNode(F: N,
4320 T: createQuadSRegsNode(VT: VecVT, V0: N->getOperand(Num: 0), V1: N->getOperand(Num: 1),
4321 V2: N->getOperand(Num: 2), V3: N->getOperand(Num: 3)));
4322 return;
4323 }
4324
4325 case ARMISD::VLD1DUP: {
4326 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4327 ARM::VLD1DUPd32 };
4328 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4329 ARM::VLD1DUPq32 };
4330 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: false, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes);
4331 return;
4332 }
4333
4334 case ARMISD::VLD2DUP: {
4335 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4336 ARM::VLD2DUPd32 };
4337 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: false, NumVecs: 2, DOpcodes: Opcodes);
4338 return;
4339 }
4340
4341 case ARMISD::VLD3DUP: {
4342 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4343 ARM::VLD3DUPd16Pseudo,
4344 ARM::VLD3DUPd32Pseudo };
4345 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: false, NumVecs: 3, DOpcodes: Opcodes);
4346 return;
4347 }
4348
4349 case ARMISD::VLD4DUP: {
4350 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4351 ARM::VLD4DUPd16Pseudo,
4352 ARM::VLD4DUPd32Pseudo };
4353 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: false, NumVecs: 4, DOpcodes: Opcodes);
4354 return;
4355 }
4356
4357 case ARMISD::VLD1DUP_UPD: {
4358 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4359 ARM::VLD1DUPd16wb_fixed,
4360 ARM::VLD1DUPd32wb_fixed };
4361 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4362 ARM::VLD1DUPq16wb_fixed,
4363 ARM::VLD1DUPq32wb_fixed };
4364 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: true, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes);
4365 return;
4366 }
4367
4368 case ARMISD::VLD2DUP_UPD: {
4369 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4370 ARM::VLD2DUPd16wb_fixed,
4371 ARM::VLD2DUPd32wb_fixed,
4372 ARM::VLD1q64wb_fixed };
4373 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4374 ARM::VLD2DUPq16EvenPseudo,
4375 ARM::VLD2DUPq32EvenPseudo };
4376 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4377 ARM::VLD2DUPq16OddPseudoWB_fixed,
4378 ARM::VLD2DUPq32OddPseudoWB_fixed };
4379 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes0, QOpcodes1);
4380 return;
4381 }
4382
4383 case ARMISD::VLD3DUP_UPD: {
4384 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4385 ARM::VLD3DUPd16Pseudo_UPD,
4386 ARM::VLD3DUPd32Pseudo_UPD,
4387 ARM::VLD1d64TPseudoWB_fixed };
4388 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4389 ARM::VLD3DUPq16EvenPseudo,
4390 ARM::VLD3DUPq32EvenPseudo };
4391 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4392 ARM::VLD3DUPq16OddPseudo_UPD,
4393 ARM::VLD3DUPq32OddPseudo_UPD };
4394 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4395 return;
4396 }
4397
4398 case ARMISD::VLD4DUP_UPD: {
4399 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4400 ARM::VLD4DUPd16Pseudo_UPD,
4401 ARM::VLD4DUPd32Pseudo_UPD,
4402 ARM::VLD1d64QPseudoWB_fixed };
4403 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4404 ARM::VLD4DUPq16EvenPseudo,
4405 ARM::VLD4DUPq32EvenPseudo };
4406 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4407 ARM::VLD4DUPq16OddPseudo_UPD,
4408 ARM::VLD4DUPq32OddPseudo_UPD };
4409 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4410 return;
4411 }
4412
4413 case ARMISD::VLD1_UPD: {
4414 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4415 ARM::VLD1d16wb_fixed,
4416 ARM::VLD1d32wb_fixed,
4417 ARM::VLD1d64wb_fixed };
4418 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4419 ARM::VLD1q16wb_fixed,
4420 ARM::VLD1q32wb_fixed,
4421 ARM::VLD1q64wb_fixed };
4422 SelectVLD(N, isUpdating: true, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4423 return;
4424 }
4425
4426 case ARMISD::VLD2_UPD: {
4427 if (Subtarget->hasNEON()) {
4428 static const uint16_t DOpcodes[] = {
4429 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4430 ARM::VLD1q64wb_fixed};
4431 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4432 ARM::VLD2q16PseudoWB_fixed,
4433 ARM::VLD2q32PseudoWB_fixed};
4434 SelectVLD(N, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4435 } else {
4436 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4437 ARM::MVE_VLD21_8_wb};
4438 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4439 ARM::MVE_VLD21_16_wb};
4440 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4441 ARM::MVE_VLD21_32_wb};
4442 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4443 SelectMVE_VLD(N, NumVecs: 2, Opcodes, HasWriteback: true);
4444 }
4445 return;
4446 }
4447
4448 case ARMISD::VLD3_UPD: {
4449 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4450 ARM::VLD3d16Pseudo_UPD,
4451 ARM::VLD3d32Pseudo_UPD,
4452 ARM::VLD1d64TPseudoWB_fixed};
4453 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4454 ARM::VLD3q16Pseudo_UPD,
4455 ARM::VLD3q32Pseudo_UPD };
4456 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4457 ARM::VLD3q16oddPseudo_UPD,
4458 ARM::VLD3q32oddPseudo_UPD };
4459 SelectVLD(N, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4460 return;
4461 }
4462
4463 case ARMISD::VLD4_UPD: {
4464 if (Subtarget->hasNEON()) {
4465 static const uint16_t DOpcodes[] = {
4466 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4467 ARM::VLD1d64QPseudoWB_fixed};
4468 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4469 ARM::VLD4q16Pseudo_UPD,
4470 ARM::VLD4q32Pseudo_UPD};
4471 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4472 ARM::VLD4q16oddPseudo_UPD,
4473 ARM::VLD4q32oddPseudo_UPD};
4474 SelectVLD(N, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4475 } else {
4476 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4477 ARM::MVE_VLD42_8,
4478 ARM::MVE_VLD43_8_wb};
4479 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4480 ARM::MVE_VLD42_16,
4481 ARM::MVE_VLD43_16_wb};
4482 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4483 ARM::MVE_VLD42_32,
4484 ARM::MVE_VLD43_32_wb};
4485 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4486 SelectMVE_VLD(N, NumVecs: 4, Opcodes, HasWriteback: true);
4487 }
4488 return;
4489 }
4490
4491 case ARMISD::VLD1x2_UPD: {
4492 if (Subtarget->hasNEON()) {
4493 static const uint16_t DOpcodes[] = {
4494 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4495 ARM::VLD1q64wb_fixed};
4496 static const uint16_t QOpcodes[] = {
4497 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4498 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4499 SelectVLD(N, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4500 return;
4501 }
4502 break;
4503 }
4504
4505 case ARMISD::VLD1x3_UPD: {
4506 if (Subtarget->hasNEON()) {
4507 static const uint16_t DOpcodes[] = {
4508 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4509 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4510 static const uint16_t QOpcodes0[] = {
4511 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4512 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4513 static const uint16_t QOpcodes1[] = {
4514 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4515 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4516 SelectVLD(N, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4517 return;
4518 }
4519 break;
4520 }
4521
4522 case ARMISD::VLD1x4_UPD: {
4523 if (Subtarget->hasNEON()) {
4524 static const uint16_t DOpcodes[] = {
4525 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4526 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4527 static const uint16_t QOpcodes0[] = {
4528 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4529 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4530 static const uint16_t QOpcodes1[] = {
4531 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4532 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4533 SelectVLD(N, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4534 return;
4535 }
4536 break;
4537 }
4538
4539 case ARMISD::VLD2LN_UPD: {
4540 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4541 ARM::VLD2LNd16Pseudo_UPD,
4542 ARM::VLD2LNd32Pseudo_UPD };
4543 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4544 ARM::VLD2LNq32Pseudo_UPD };
4545 SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes);
4546 return;
4547 }
4548
4549 case ARMISD::VLD3LN_UPD: {
4550 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4551 ARM::VLD3LNd16Pseudo_UPD,
4552 ARM::VLD3LNd32Pseudo_UPD };
4553 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4554 ARM::VLD3LNq32Pseudo_UPD };
4555 SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes);
4556 return;
4557 }
4558
4559 case ARMISD::VLD4LN_UPD: {
4560 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4561 ARM::VLD4LNd16Pseudo_UPD,
4562 ARM::VLD4LNd32Pseudo_UPD };
4563 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4564 ARM::VLD4LNq32Pseudo_UPD };
4565 SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes);
4566 return;
4567 }
4568
4569 case ARMISD::VST1_UPD: {
4570 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4571 ARM::VST1d16wb_fixed,
4572 ARM::VST1d32wb_fixed,
4573 ARM::VST1d64wb_fixed };
4574 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4575 ARM::VST1q16wb_fixed,
4576 ARM::VST1q32wb_fixed,
4577 ARM::VST1q64wb_fixed };
4578 SelectVST(N, isUpdating: true, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4579 return;
4580 }
4581
4582 case ARMISD::VST2_UPD: {
4583 if (Subtarget->hasNEON()) {
4584 static const uint16_t DOpcodes[] = {
4585 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4586 ARM::VST1q64wb_fixed};
4587 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4588 ARM::VST2q16PseudoWB_fixed,
4589 ARM::VST2q32PseudoWB_fixed};
4590 SelectVST(N, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4591 return;
4592 }
4593 break;
4594 }
4595
4596 case ARMISD::VST3_UPD: {
4597 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4598 ARM::VST3d16Pseudo_UPD,
4599 ARM::VST3d32Pseudo_UPD,
4600 ARM::VST1d64TPseudoWB_fixed};
4601 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4602 ARM::VST3q16Pseudo_UPD,
4603 ARM::VST3q32Pseudo_UPD };
4604 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4605 ARM::VST3q16oddPseudo_UPD,
4606 ARM::VST3q32oddPseudo_UPD };
4607 SelectVST(N, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4608 return;
4609 }
4610
4611 case ARMISD::VST4_UPD: {
4612 if (Subtarget->hasNEON()) {
4613 static const uint16_t DOpcodes[] = {
4614 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4615 ARM::VST1d64QPseudoWB_fixed};
4616 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4617 ARM::VST4q16Pseudo_UPD,
4618 ARM::VST4q32Pseudo_UPD};
4619 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4620 ARM::VST4q16oddPseudo_UPD,
4621 ARM::VST4q32oddPseudo_UPD};
4622 SelectVST(N, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4623 return;
4624 }
4625 break;
4626 }
4627
4628 case ARMISD::VST1x2_UPD: {
4629 if (Subtarget->hasNEON()) {
4630 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4631 ARM::VST1q16wb_fixed,
4632 ARM::VST1q32wb_fixed,
4633 ARM::VST1q64wb_fixed};
4634 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4635 ARM::VST1d16QPseudoWB_fixed,
4636 ARM::VST1d32QPseudoWB_fixed,
4637 ARM::VST1d64QPseudoWB_fixed };
4638 SelectVST(N, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4639 return;
4640 }
4641 break;
4642 }
4643
4644 case ARMISD::VST1x3_UPD: {
4645 if (Subtarget->hasNEON()) {
4646 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4647 ARM::VST1d16TPseudoWB_fixed,
4648 ARM::VST1d32TPseudoWB_fixed,
4649 ARM::VST1d64TPseudoWB_fixed };
4650 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4651 ARM::VST1q16LowTPseudo_UPD,
4652 ARM::VST1q32LowTPseudo_UPD,
4653 ARM::VST1q64LowTPseudo_UPD };
4654 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4655 ARM::VST1q16HighTPseudo_UPD,
4656 ARM::VST1q32HighTPseudo_UPD,
4657 ARM::VST1q64HighTPseudo_UPD };
4658 SelectVST(N, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4659 return;
4660 }
4661 break;
4662 }
4663
4664 case ARMISD::VST1x4_UPD: {
4665 if (Subtarget->hasNEON()) {
4666 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4667 ARM::VST1d16QPseudoWB_fixed,
4668 ARM::VST1d32QPseudoWB_fixed,
4669 ARM::VST1d64QPseudoWB_fixed };
4670 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4671 ARM::VST1q16LowQPseudo_UPD,
4672 ARM::VST1q32LowQPseudo_UPD,
4673 ARM::VST1q64LowQPseudo_UPD };
4674 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4675 ARM::VST1q16HighQPseudo_UPD,
4676 ARM::VST1q32HighQPseudo_UPD,
4677 ARM::VST1q64HighQPseudo_UPD };
4678 SelectVST(N, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4679 return;
4680 }
4681 break;
4682 }
4683 case ARMISD::VST2LN_UPD: {
4684 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4685 ARM::VST2LNd16Pseudo_UPD,
4686 ARM::VST2LNd32Pseudo_UPD };
4687 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4688 ARM::VST2LNq32Pseudo_UPD };
4689 SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes);
4690 return;
4691 }
4692
4693 case ARMISD::VST3LN_UPD: {
4694 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4695 ARM::VST3LNd16Pseudo_UPD,
4696 ARM::VST3LNd32Pseudo_UPD };
4697 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4698 ARM::VST3LNq32Pseudo_UPD };
4699 SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes);
4700 return;
4701 }
4702
4703 case ARMISD::VST4LN_UPD: {
4704 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4705 ARM::VST4LNd16Pseudo_UPD,
4706 ARM::VST4LNd32Pseudo_UPD };
4707 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4708 ARM::VST4LNq32Pseudo_UPD };
4709 SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes);
4710 return;
4711 }
4712
4713 case ISD::INTRINSIC_VOID:
4714 case ISD::INTRINSIC_W_CHAIN: {
4715 unsigned IntNo = N->getConstantOperandVal(Num: 1);
4716 switch (IntNo) {
4717 default:
4718 break;
4719
4720 case Intrinsic::arm_mrrc:
4721 case Intrinsic::arm_mrrc2: {
4722 SDLoc dl(N);
4723 SDValue Chain = N->getOperand(Num: 0);
4724 unsigned Opc;
4725
4726 if (Subtarget->isThumb())
4727 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4728 else
4729 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4730
4731 SmallVector<SDValue, 5> Ops;
4732 Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: 2), dl)); /* coproc */
4733 Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: 3), dl)); /* opc */
4734 Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: 4), dl)); /* CRm */
4735
4736 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4737 // instruction will always be '1111' but it is possible in assembly language to specify
4738 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4739 if (Opc != ARM::MRRC2) {
4740 Ops.push_back(Elt: getAL(CurDAG, dl));
4741 Ops.push_back(Elt: CurDAG->getRegister(Reg: 0, VT: MVT::i32));
4742 }
4743
4744 Ops.push_back(Elt: Chain);
4745
4746 // Writes to two registers.
4747 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4748
4749 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: RetType, Ops));
4750 return;
4751 }
4752 case Intrinsic::arm_ldaexd:
4753 case Intrinsic::arm_ldrexd: {
4754 SDLoc dl(N);
4755 SDValue Chain = N->getOperand(Num: 0);
4756 SDValue MemAddr = N->getOperand(Num: 2);
4757 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4758
4759 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4760 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4761 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4762
4763 // arm_ldrexd returns a i64 value in {i32, i32}
4764 std::vector<EVT> ResTys;
4765 if (isThumb) {
4766 ResTys.push_back(x: MVT::i32);
4767 ResTys.push_back(x: MVT::i32);
4768 } else
4769 ResTys.push_back(x: MVT::Untyped);
4770 ResTys.push_back(x: MVT::Other);
4771
4772 // Place arguments in the right order.
4773 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4774 CurDAG->getRegister(Reg: 0, VT: MVT::i32), Chain};
4775 SDNode *Ld = CurDAG->getMachineNode(Opcode: NewOpc, dl, ResultTys: ResTys, Ops);
4776 // Transfer memoperands.
4777 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
4778 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
4779
4780 // Remap uses.
4781 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4782 if (!SDValue(N, 0).use_empty()) {
4783 SDValue Result;
4784 if (isThumb)
4785 Result = SDValue(Ld, 0);
4786 else {
4787 SDValue SubRegIdx =
4788 CurDAG->getTargetConstant(Val: ARM::gsub_0, DL: dl, VT: MVT::i32);
4789 SDNode *ResNode = CurDAG->getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG,
4790 dl, VT: MVT::i32, Op1: SDValue(Ld, 0), Op2: SubRegIdx);
4791 Result = SDValue(ResNode,0);
4792 }
4793 ReplaceUses(F: SDValue(N, 0), T: Result);
4794 }
4795 if (!SDValue(N, 1).use_empty()) {
4796 SDValue Result;
4797 if (isThumb)
4798 Result = SDValue(Ld, 1);
4799 else {
4800 SDValue SubRegIdx =
4801 CurDAG->getTargetConstant(Val: ARM::gsub_1, DL: dl, VT: MVT::i32);
4802 SDNode *ResNode = CurDAG->getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG,
4803 dl, VT: MVT::i32, Op1: SDValue(Ld, 0), Op2: SubRegIdx);
4804 Result = SDValue(ResNode,0);
4805 }
4806 ReplaceUses(F: SDValue(N, 1), T: Result);
4807 }
4808 ReplaceUses(F: SDValue(N, 2), T: OutChain);
4809 CurDAG->RemoveDeadNode(N);
4810 return;
4811 }
4812 case Intrinsic::arm_stlexd:
4813 case Intrinsic::arm_strexd: {
4814 SDLoc dl(N);
4815 SDValue Chain = N->getOperand(Num: 0);
4816 SDValue Val0 = N->getOperand(Num: 2);
4817 SDValue Val1 = N->getOperand(Num: 3);
4818 SDValue MemAddr = N->getOperand(Num: 4);
4819
4820 // Store exclusive double return a i32 value which is the return status
4821 // of the issued store.
4822 const EVT ResTys[] = {MVT::i32, MVT::Other};
4823
4824 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4825 // Place arguments in the right order.
4826 SmallVector<SDValue, 7> Ops;
4827 if (isThumb) {
4828 Ops.push_back(Elt: Val0);
4829 Ops.push_back(Elt: Val1);
4830 } else
4831 // arm_strexd uses GPRPair.
4832 Ops.push_back(Elt: SDValue(createGPRPairNode(VT: MVT::Untyped, V0: Val0, V1: Val1), 0));
4833 Ops.push_back(Elt: MemAddr);
4834 Ops.push_back(Elt: getAL(CurDAG, dl));
4835 Ops.push_back(Elt: CurDAG->getRegister(Reg: 0, VT: MVT::i32));
4836 Ops.push_back(Elt: Chain);
4837
4838 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4839 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4840 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4841
4842 SDNode *St = CurDAG->getMachineNode(Opcode: NewOpc, dl, ResultTys: ResTys, Ops);
4843 // Transfer memoperands.
4844 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
4845 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
4846
4847 ReplaceNode(F: N, T: St);
4848 return;
4849 }
4850
4851 case Intrinsic::arm_neon_vld1: {
4852 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4853 ARM::VLD1d32, ARM::VLD1d64 };
4854 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4855 ARM::VLD1q32, ARM::VLD1q64};
4856 SelectVLD(N, isUpdating: false, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4857 return;
4858 }
4859
4860 case Intrinsic::arm_neon_vld1x2: {
4861 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4862 ARM::VLD1q32, ARM::VLD1q64 };
4863 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4864 ARM::VLD1d16QPseudo,
4865 ARM::VLD1d32QPseudo,
4866 ARM::VLD1d64QPseudo };
4867 SelectVLD(N, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4868 return;
4869 }
4870
4871 case Intrinsic::arm_neon_vld1x3: {
4872 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4873 ARM::VLD1d16TPseudo,
4874 ARM::VLD1d32TPseudo,
4875 ARM::VLD1d64TPseudo };
4876 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4877 ARM::VLD1q16LowTPseudo_UPD,
4878 ARM::VLD1q32LowTPseudo_UPD,
4879 ARM::VLD1q64LowTPseudo_UPD };
4880 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4881 ARM::VLD1q16HighTPseudo,
4882 ARM::VLD1q32HighTPseudo,
4883 ARM::VLD1q64HighTPseudo };
4884 SelectVLD(N, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4885 return;
4886 }
4887
4888 case Intrinsic::arm_neon_vld1x4: {
4889 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4890 ARM::VLD1d16QPseudo,
4891 ARM::VLD1d32QPseudo,
4892 ARM::VLD1d64QPseudo };
4893 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4894 ARM::VLD1q16LowQPseudo_UPD,
4895 ARM::VLD1q32LowQPseudo_UPD,
4896 ARM::VLD1q64LowQPseudo_UPD };
4897 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4898 ARM::VLD1q16HighQPseudo,
4899 ARM::VLD1q32HighQPseudo,
4900 ARM::VLD1q64HighQPseudo };
4901 SelectVLD(N, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4902 return;
4903 }
4904
4905 case Intrinsic::arm_neon_vld2: {
4906 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4907 ARM::VLD2d32, ARM::VLD1q64 };
4908 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4909 ARM::VLD2q32Pseudo };
4910 SelectVLD(N, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4911 return;
4912 }
4913
4914 case Intrinsic::arm_neon_vld3: {
4915 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4916 ARM::VLD3d16Pseudo,
4917 ARM::VLD3d32Pseudo,
4918 ARM::VLD1d64TPseudo };
4919 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4920 ARM::VLD3q16Pseudo_UPD,
4921 ARM::VLD3q32Pseudo_UPD };
4922 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4923 ARM::VLD3q16oddPseudo,
4924 ARM::VLD3q32oddPseudo };
4925 SelectVLD(N, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4926 return;
4927 }
4928
4929 case Intrinsic::arm_neon_vld4: {
4930 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4931 ARM::VLD4d16Pseudo,
4932 ARM::VLD4d32Pseudo,
4933 ARM::VLD1d64QPseudo };
4934 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4935 ARM::VLD4q16Pseudo_UPD,
4936 ARM::VLD4q32Pseudo_UPD };
4937 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4938 ARM::VLD4q16oddPseudo,
4939 ARM::VLD4q32oddPseudo };
4940 SelectVLD(N, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4941 return;
4942 }
4943
4944 case Intrinsic::arm_neon_vld2dup: {
4945 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4946 ARM::VLD2DUPd32, ARM::VLD1q64 };
4947 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4948 ARM::VLD2DUPq16EvenPseudo,
4949 ARM::VLD2DUPq32EvenPseudo };
4950 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4951 ARM::VLD2DUPq16OddPseudo,
4952 ARM::VLD2DUPq32OddPseudo };
4953 SelectVLDDup(N, /* IsIntrinsic= */ true, isUpdating: false, NumVecs: 2,
4954 DOpcodes, QOpcodes0, QOpcodes1);
4955 return;
4956 }
4957
4958 case Intrinsic::arm_neon_vld3dup: {
4959 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4960 ARM::VLD3DUPd16Pseudo,
4961 ARM::VLD3DUPd32Pseudo,
4962 ARM::VLD1d64TPseudo };
4963 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4964 ARM::VLD3DUPq16EvenPseudo,
4965 ARM::VLD3DUPq32EvenPseudo };
4966 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4967 ARM::VLD3DUPq16OddPseudo,
4968 ARM::VLD3DUPq32OddPseudo };
4969 SelectVLDDup(N, /* IsIntrinsic= */ true, isUpdating: false, NumVecs: 3,
4970 DOpcodes, QOpcodes0, QOpcodes1);
4971 return;
4972 }
4973
4974 case Intrinsic::arm_neon_vld4dup: {
4975 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4976 ARM::VLD4DUPd16Pseudo,
4977 ARM::VLD4DUPd32Pseudo,
4978 ARM::VLD1d64QPseudo };
4979 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4980 ARM::VLD4DUPq16EvenPseudo,
4981 ARM::VLD4DUPq32EvenPseudo };
4982 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4983 ARM::VLD4DUPq16OddPseudo,
4984 ARM::VLD4DUPq32OddPseudo };
4985 SelectVLDDup(N, /* IsIntrinsic= */ true, isUpdating: false, NumVecs: 4,
4986 DOpcodes, QOpcodes0, QOpcodes1);
4987 return;
4988 }
4989
4990 case Intrinsic::arm_neon_vld2lane: {
4991 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4992 ARM::VLD2LNd16Pseudo,
4993 ARM::VLD2LNd32Pseudo };
4994 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4995 ARM::VLD2LNq32Pseudo };
4996 SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes);
4997 return;
4998 }
4999
5000 case Intrinsic::arm_neon_vld3lane: {
5001 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
5002 ARM::VLD3LNd16Pseudo,
5003 ARM::VLD3LNd32Pseudo };
5004 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
5005 ARM::VLD3LNq32Pseudo };
5006 SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes);
5007 return;
5008 }
5009
5010 case Intrinsic::arm_neon_vld4lane: {
5011 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
5012 ARM::VLD4LNd16Pseudo,
5013 ARM::VLD4LNd32Pseudo };
5014 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
5015 ARM::VLD4LNq32Pseudo };
5016 SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes);
5017 return;
5018 }
5019
5020 case Intrinsic::arm_neon_vst1: {
5021 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
5022 ARM::VST1d32, ARM::VST1d64 };
5023 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5024 ARM::VST1q32, ARM::VST1q64 };
5025 SelectVST(N, isUpdating: false, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
5026 return;
5027 }
5028
5029 case Intrinsic::arm_neon_vst1x2: {
5030 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5031 ARM::VST1q32, ARM::VST1q64 };
5032 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
5033 ARM::VST1d16QPseudo,
5034 ARM::VST1d32QPseudo,
5035 ARM::VST1d64QPseudo };
5036 SelectVST(N, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
5037 return;
5038 }
5039
5040 case Intrinsic::arm_neon_vst1x3: {
5041 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
5042 ARM::VST1d16TPseudo,
5043 ARM::VST1d32TPseudo,
5044 ARM::VST1d64TPseudo };
5045 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
5046 ARM::VST1q16LowTPseudo_UPD,
5047 ARM::VST1q32LowTPseudo_UPD,
5048 ARM::VST1q64LowTPseudo_UPD };
5049 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
5050 ARM::VST1q16HighTPseudo,
5051 ARM::VST1q32HighTPseudo,
5052 ARM::VST1q64HighTPseudo };
5053 SelectVST(N, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
5054 return;
5055 }
5056
5057 case Intrinsic::arm_neon_vst1x4: {
5058 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
5059 ARM::VST1d16QPseudo,
5060 ARM::VST1d32QPseudo,
5061 ARM::VST1d64QPseudo };
5062 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
5063 ARM::VST1q16LowQPseudo_UPD,
5064 ARM::VST1q32LowQPseudo_UPD,
5065 ARM::VST1q64LowQPseudo_UPD };
5066 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
5067 ARM::VST1q16HighQPseudo,
5068 ARM::VST1q32HighQPseudo,
5069 ARM::VST1q64HighQPseudo };
5070 SelectVST(N, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
5071 return;
5072 }
5073
5074 case Intrinsic::arm_neon_vst2: {
5075 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
5076 ARM::VST2d32, ARM::VST1q64 };
5077 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
5078 ARM::VST2q32Pseudo };
5079 SelectVST(N, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
5080 return;
5081 }
5082
5083 case Intrinsic::arm_neon_vst3: {
5084 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
5085 ARM::VST3d16Pseudo,
5086 ARM::VST3d32Pseudo,
5087 ARM::VST1d64TPseudo };
5088 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
5089 ARM::VST3q16Pseudo_UPD,
5090 ARM::VST3q32Pseudo_UPD };
5091 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
5092 ARM::VST3q16oddPseudo,
5093 ARM::VST3q32oddPseudo };
5094 SelectVST(N, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
5095 return;
5096 }
5097
5098 case Intrinsic::arm_neon_vst4: {
5099 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5100 ARM::VST4d16Pseudo,
5101 ARM::VST4d32Pseudo,
5102 ARM::VST1d64QPseudo };
5103 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5104 ARM::VST4q16Pseudo_UPD,
5105 ARM::VST4q32Pseudo_UPD };
5106 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5107 ARM::VST4q16oddPseudo,
5108 ARM::VST4q32oddPseudo };
5109 SelectVST(N, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
5110 return;
5111 }
5112
5113 case Intrinsic::arm_neon_vst2lane: {
5114 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5115 ARM::VST2LNd16Pseudo,
5116 ARM::VST2LNd32Pseudo };
5117 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5118 ARM::VST2LNq32Pseudo };
5119 SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes);
5120 return;
5121 }
5122
5123 case Intrinsic::arm_neon_vst3lane: {
5124 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5125 ARM::VST3LNd16Pseudo,
5126 ARM::VST3LNd32Pseudo };
5127 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5128 ARM::VST3LNq32Pseudo };
5129 SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes);
5130 return;
5131 }
5132
5133 case Intrinsic::arm_neon_vst4lane: {
5134 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5135 ARM::VST4LNd16Pseudo,
5136 ARM::VST4LNd32Pseudo };
5137 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5138 ARM::VST4LNq32Pseudo };
5139 SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes);
5140 return;
5141 }
5142
5143 case Intrinsic::arm_mve_vldr_gather_base_wb:
5144 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5145 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5146 ARM::MVE_VLDRDU64_qi_pre};
5147 SelectMVE_WB(N, Opcodes,
5148 Predicated: IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5149 return;
5150 }
5151
5152 case Intrinsic::arm_mve_vld2q: {
5153 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5154 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5155 ARM::MVE_VLD21_16};
5156 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5157 ARM::MVE_VLD21_32};
5158 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5159 SelectMVE_VLD(N, NumVecs: 2, Opcodes, HasWriteback: false);
5160 return;
5161 }
5162
5163 case Intrinsic::arm_mve_vld4q: {
5164 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5165 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5166 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5167 ARM::MVE_VLD42_16,
5168 ARM::MVE_VLD43_16};
5169 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5170 ARM::MVE_VLD42_32,
5171 ARM::MVE_VLD43_32};
5172 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5173 SelectMVE_VLD(N, NumVecs: 4, Opcodes, HasWriteback: false);
5174 return;
5175 }
5176 }
5177 break;
5178 }
5179
5180 case ISD::INTRINSIC_WO_CHAIN: {
5181 unsigned IntNo = N->getConstantOperandVal(Num: 0);
5182 switch (IntNo) {
5183 default:
5184 break;
5185
5186 // Scalar f32 -> bf16
5187 case Intrinsic::arm_neon_vcvtbfp2bf: {
5188 SDLoc dl(N);
5189 const SDValue &Src = N->getOperand(Num: 1);
5190 llvm::EVT DestTy = N->getValueType(ResNo: 0);
5191 SDValue Pred = getAL(CurDAG, dl);
5192 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
5193 SDValue Ops[] = { Src, Src, Pred, Reg0 };
5194 CurDAG->SelectNodeTo(N, MachineOpc: ARM::BF16_VCVTB, VT: DestTy, Ops);
5195 return;
5196 }
5197
5198 // Vector v4f32 -> v4bf16
5199 case Intrinsic::arm_neon_vcvtfp2bf: {
5200 SDLoc dl(N);
5201 const SDValue &Src = N->getOperand(Num: 1);
5202 SDValue Pred = getAL(CurDAG, dl);
5203 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
5204 SDValue Ops[] = { Src, Pred, Reg0 };
5205 CurDAG->SelectNodeTo(N, MachineOpc: ARM::BF16_VCVT, VT: MVT::v4bf16, Ops);
5206 return;
5207 }
5208
5209 case Intrinsic::arm_mve_urshrl:
5210 SelectMVE_LongShift(N, Opcode: ARM::MVE_URSHRL, Immediate: true, HasSaturationOperand: false);
5211 return;
5212 case Intrinsic::arm_mve_uqshll:
5213 SelectMVE_LongShift(N, Opcode: ARM::MVE_UQSHLL, Immediate: true, HasSaturationOperand: false);
5214 return;
5215 case Intrinsic::arm_mve_srshrl:
5216 SelectMVE_LongShift(N, Opcode: ARM::MVE_SRSHRL, Immediate: true, HasSaturationOperand: false);
5217 return;
5218 case Intrinsic::arm_mve_sqshll:
5219 SelectMVE_LongShift(N, Opcode: ARM::MVE_SQSHLL, Immediate: true, HasSaturationOperand: false);
5220 return;
5221 case Intrinsic::arm_mve_uqrshll:
5222 SelectMVE_LongShift(N, Opcode: ARM::MVE_UQRSHLL, Immediate: false, HasSaturationOperand: true);
5223 return;
5224 case Intrinsic::arm_mve_sqrshrl:
5225 SelectMVE_LongShift(N, Opcode: ARM::MVE_SQRSHRL, Immediate: false, HasSaturationOperand: true);
5226 return;
5227
5228 case Intrinsic::arm_mve_vadc:
5229 case Intrinsic::arm_mve_vadc_predicated:
5230 SelectMVE_VADCSBC(N, OpcodeWithCarry: ARM::MVE_VADC, OpcodeWithNoCarry: ARM::MVE_VADCI, Add: true,
5231 Predicated: IntNo == Intrinsic::arm_mve_vadc_predicated);
5232 return;
5233 case Intrinsic::arm_mve_vsbc:
5234 case Intrinsic::arm_mve_vsbc_predicated:
5235 SelectMVE_VADCSBC(N, OpcodeWithCarry: ARM::MVE_VSBC, OpcodeWithNoCarry: ARM::MVE_VSBCI, Add: false,
5236 Predicated: IntNo == Intrinsic::arm_mve_vsbc_predicated);
5237 return;
5238 case Intrinsic::arm_mve_vshlc:
5239 case Intrinsic::arm_mve_vshlc_predicated:
5240 SelectMVE_VSHLC(N, Predicated: IntNo == Intrinsic::arm_mve_vshlc_predicated);
5241 return;
5242
5243 case Intrinsic::arm_mve_vmlldava:
5244 case Intrinsic::arm_mve_vmlldava_predicated: {
5245 static const uint16_t OpcodesU[] = {
5246 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
5247 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
5248 };
5249 static const uint16_t OpcodesS[] = {
5250 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
5251 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
5252 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
5253 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5254 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
5255 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
5256 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
5257 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5258 };
5259 SelectMVE_VMLLDAV(N, Predicated: IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5260 OpcodesS, OpcodesU);
5261 return;
5262 }
5263
5264 case Intrinsic::arm_mve_vrmlldavha:
5265 case Intrinsic::arm_mve_vrmlldavha_predicated: {
5266 static const uint16_t OpcodesU[] = {
5267 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
5268 };
5269 static const uint16_t OpcodesS[] = {
5270 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
5271 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5272 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
5273 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5274 };
5275 SelectMVE_VRMLLDAVH(N, Predicated: IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5276 OpcodesS, OpcodesU);
5277 return;
5278 }
5279
5280 case Intrinsic::arm_mve_vidup:
5281 case Intrinsic::arm_mve_vidup_predicated: {
5282 static const uint16_t Opcodes[] = {
5283 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5284 };
5285 SelectMVE_VxDUP(N, Opcodes, Wrapping: false,
5286 Predicated: IntNo == Intrinsic::arm_mve_vidup_predicated);
5287 return;
5288 }
5289
5290 case Intrinsic::arm_mve_vddup:
5291 case Intrinsic::arm_mve_vddup_predicated: {
5292 static const uint16_t Opcodes[] = {
5293 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5294 };
5295 SelectMVE_VxDUP(N, Opcodes, Wrapping: false,
5296 Predicated: IntNo == Intrinsic::arm_mve_vddup_predicated);
5297 return;
5298 }
5299
5300 case Intrinsic::arm_mve_viwdup:
5301 case Intrinsic::arm_mve_viwdup_predicated: {
5302 static const uint16_t Opcodes[] = {
5303 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5304 };
5305 SelectMVE_VxDUP(N, Opcodes, Wrapping: true,
5306 Predicated: IntNo == Intrinsic::arm_mve_viwdup_predicated);
5307 return;
5308 }
5309
5310 case Intrinsic::arm_mve_vdwdup:
5311 case Intrinsic::arm_mve_vdwdup_predicated: {
5312 static const uint16_t Opcodes[] = {
5313 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5314 };
5315 SelectMVE_VxDUP(N, Opcodes, Wrapping: true,
5316 Predicated: IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5317 return;
5318 }
5319
5320 case Intrinsic::arm_cde_cx1d:
5321 case Intrinsic::arm_cde_cx1da:
5322 case Intrinsic::arm_cde_cx2d:
5323 case Intrinsic::arm_cde_cx2da:
5324 case Intrinsic::arm_cde_cx3d:
5325 case Intrinsic::arm_cde_cx3da: {
5326 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
5327 IntNo == Intrinsic::arm_cde_cx2da ||
5328 IntNo == Intrinsic::arm_cde_cx3da;
5329 size_t NumExtraOps;
5330 uint16_t Opcode;
5331 switch (IntNo) {
5332 case Intrinsic::arm_cde_cx1d:
5333 case Intrinsic::arm_cde_cx1da:
5334 NumExtraOps = 0;
5335 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5336 break;
5337 case Intrinsic::arm_cde_cx2d:
5338 case Intrinsic::arm_cde_cx2da:
5339 NumExtraOps = 1;
5340 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5341 break;
5342 case Intrinsic::arm_cde_cx3d:
5343 case Intrinsic::arm_cde_cx3da:
5344 NumExtraOps = 2;
5345 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5346 break;
5347 default:
5348 llvm_unreachable("Unexpected opcode");
5349 }
5350 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5351 return;
5352 }
5353 }
5354 break;
5355 }
5356
5357 case ISD::ATOMIC_CMP_SWAP:
5358 SelectCMP_SWAP(N);
5359 return;
5360 }
5361
5362 SelectCode(N);
5363}
5364
5365// Inspect a register string of the form
5366// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5367// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5368// and obtain the integer operands from them, adding these operands to the
5369// provided vector.
5370static void getIntOperandsFromRegisterString(StringRef RegString,
5371 SelectionDAG *CurDAG,
5372 const SDLoc &DL,
5373 std::vector<SDValue> &Ops) {
5374 SmallVector<StringRef, 5> Fields;
5375 RegString.split(A&: Fields, Separator: ':');
5376
5377 if (Fields.size() > 1) {
5378 bool AllIntFields = true;
5379
5380 for (StringRef Field : Fields) {
5381 // Need to trim out leading 'cp' characters and get the integer field.
5382 unsigned IntField;
5383 AllIntFields &= !Field.trim(Chars: "CPcp").getAsInteger(Radix: 10, Result&: IntField);
5384 Ops.push_back(x: CurDAG->getTargetConstant(Val: IntField, DL, VT: MVT::i32));
5385 }
5386
5387 assert(AllIntFields &&
5388 "Unexpected non-integer value in special register string.");
5389 (void)AllIntFields;
5390 }
5391}
5392
5393// Maps a Banked Register string to its mask value. The mask value returned is
5394// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5395// mask operand, which expresses which register is to be used, e.g. r8, and in
5396// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5397// was invalid.
5398static inline int getBankedRegisterMask(StringRef RegString) {
5399 auto TheReg = ARMBankedReg::lookupBankedRegByName(Name: RegString.lower());
5400 if (!TheReg)
5401 return -1;
5402 return TheReg->Encoding;
5403}
5404
5405// The flags here are common to those allowed for apsr in the A class cores and
5406// those allowed for the special registers in the M class cores. Returns a
5407// value representing which flags were present, -1 if invalid.
5408static inline int getMClassFlagsMask(StringRef Flags) {
5409 return StringSwitch<int>(Flags)
5410 .Case(S: "", Value: 0x2) // no flags means nzcvq for psr registers, and 0x2 is
5411 // correct when flags are not permitted
5412 .Case(S: "g", Value: 0x1)
5413 .Case(S: "nzcvq", Value: 0x2)
5414 .Case(S: "nzcvqg", Value: 0x3)
5415 .Default(Value: -1);
5416}
5417
5418// Maps MClass special registers string to its value for use in the
5419// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5420// Returns -1 to signify that the string was invalid.
5421static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5422 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Name: Reg);
5423 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5424 if (!TheReg || !TheReg->hasRequiredFeatures(ActiveFeatures: FeatureBits))
5425 return -1;
5426 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
5427}
5428
5429static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
5430 // The mask operand contains the special register (R Bit) in bit 4, whether
5431 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5432 // bits 3-0 contains the fields to be accessed in the special register, set by
5433 // the flags provided with the register.
5434 int Mask = 0;
5435 if (Reg == "apsr") {
5436 // The flags permitted for apsr are the same flags that are allowed in
5437 // M class registers. We get the flag value and then shift the flags into
5438 // the correct place to combine with the mask.
5439 Mask = getMClassFlagsMask(Flags);
5440 if (Mask == -1)
5441 return -1;
5442 return Mask << 2;
5443 }
5444
5445 if (Reg != "cpsr" && Reg != "spsr") {
5446 return -1;
5447 }
5448
5449 // This is the same as if the flags were "fc"
5450 if (Flags.empty() || Flags == "all")
5451 return Mask | 0x9;
5452
5453 // Inspect the supplied flags string and set the bits in the mask for
5454 // the relevant and valid flags allowed for cpsr and spsr.
5455 for (char Flag : Flags) {
5456 int FlagVal;
5457 switch (Flag) {
5458 case 'c':
5459 FlagVal = 0x1;
5460 break;
5461 case 'x':
5462 FlagVal = 0x2;
5463 break;
5464 case 's':
5465 FlagVal = 0x4;
5466 break;
5467 case 'f':
5468 FlagVal = 0x8;
5469 break;
5470 default:
5471 FlagVal = 0;
5472 }
5473
5474 // This avoids allowing strings where the same flag bit appears twice.
5475 if (!FlagVal || (Mask & FlagVal))
5476 return -1;
5477 Mask |= FlagVal;
5478 }
5479
5480 // If the register is spsr then we need to set the R bit.
5481 if (Reg == "spsr")
5482 Mask |= 0x10;
5483
5484 return Mask;
5485}
5486
5487// Lower the read_register intrinsic to ARM specific DAG nodes
5488// using the supplied metadata string to select the instruction node to use
5489// and the registers/masks to construct as operands for the node.
5490bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5491 const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: 1));
5492 const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: 0));
5493 bool IsThumb2 = Subtarget->isThumb2();
5494 SDLoc DL(N);
5495
5496 std::vector<SDValue> Ops;
5497 getIntOperandsFromRegisterString(RegString: RegString->getString(), CurDAG, DL, Ops);
5498
5499 if (!Ops.empty()) {
5500 // If the special register string was constructed of fields (as defined
5501 // in the ACLE) then need to lower to MRC node (32 bit) or
5502 // MRRC node(64 bit), we can make the distinction based on the number of
5503 // operands we have.
5504 unsigned Opcode;
5505 SmallVector<EVT, 3> ResTypes;
5506 if (Ops.size() == 5){
5507 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5508 ResTypes.append(IL: { MVT::i32, MVT::Other });
5509 } else {
5510 assert(Ops.size() == 3 &&
5511 "Invalid number of fields in special register string.");
5512 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5513 ResTypes.append(IL: { MVT::i32, MVT::i32, MVT::Other });
5514 }
5515
5516 Ops.push_back(x: getAL(CurDAG, dl: DL));
5517 Ops.push_back(x: CurDAG->getRegister(Reg: 0, VT: MVT::i32));
5518 Ops.push_back(x: N->getOperand(Num: 0));
5519 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, ResultTys: ResTypes, Ops));
5520 return true;
5521 }
5522
5523 std::string SpecialReg = RegString->getString().lower();
5524
5525 int BankedReg = getBankedRegisterMask(RegString: SpecialReg);
5526 if (BankedReg != -1) {
5527 Ops = { CurDAG->getTargetConstant(Val: BankedReg, DL, VT: MVT::i32),
5528 getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5529 N->getOperand(Num: 0) };
5530 ReplaceNode(
5531 F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5532 dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5533 return true;
5534 }
5535
5536 // The VFP registers are read by creating SelectionDAG nodes with opcodes
5537 // corresponding to the register that is being read from. So we switch on the
5538 // string to find which opcode we need to use.
5539 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5540 .Case(S: "fpscr", Value: ARM::VMRS)
5541 .Case(S: "fpexc", Value: ARM::VMRS_FPEXC)
5542 .Case(S: "fpsid", Value: ARM::VMRS_FPSID)
5543 .Case(S: "mvfr0", Value: ARM::VMRS_MVFR0)
5544 .Case(S: "mvfr1", Value: ARM::VMRS_MVFR1)
5545 .Case(S: "mvfr2", Value: ARM::VMRS_MVFR2)
5546 .Case(S: "fpinst", Value: ARM::VMRS_FPINST)
5547 .Case(S: "fpinst2", Value: ARM::VMRS_FPINST2)
5548 .Default(Value: 0);
5549
5550 // If an opcode was found then we can lower the read to a VFP instruction.
5551 if (Opcode) {
5552 if (!Subtarget->hasVFP2Base())
5553 return false;
5554 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5555 return false;
5556
5557 Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5558 N->getOperand(Num: 0) };
5559 ReplaceNode(F: N,
5560 T: CurDAG->getMachineNode(Opcode, dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5561 return true;
5562 }
5563
5564 // If the target is M Class then need to validate that the register string
5565 // is an acceptable value, so check that a mask can be constructed from the
5566 // string.
5567 if (Subtarget->isMClass()) {
5568 int SYSmValue = getMClassRegisterMask(Reg: SpecialReg, Subtarget);
5569 if (SYSmValue == -1)
5570 return false;
5571
5572 SDValue Ops[] = { CurDAG->getTargetConstant(Val: SYSmValue, DL, VT: MVT::i32),
5573 getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5574 N->getOperand(Num: 0) };
5575 ReplaceNode(
5576 F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2MRS_M, dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5577 return true;
5578 }
5579
5580 // Here we know the target is not M Class so we need to check if it is one
5581 // of the remaining possible values which are apsr, cpsr or spsr.
5582 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
5583 Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5584 N->getOperand(Num: 0) };
5585 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5586 dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5587 return true;
5588 }
5589
5590 if (SpecialReg == "spsr") {
5591 Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5592 N->getOperand(Num: 0) };
5593 ReplaceNode(
5594 F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, dl: DL,
5595 VT1: MVT::i32, VT2: MVT::Other, Ops));
5596 return true;
5597 }
5598
5599 return false;
5600}
5601
5602// Lower the write_register intrinsic to ARM specific DAG nodes
5603// using the supplied metadata string to select the instruction node to use
5604// and the registers/masks to use in the nodes
5605bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5606 const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: 1));
5607 const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: 0));
5608 bool IsThumb2 = Subtarget->isThumb2();
5609 SDLoc DL(N);
5610
5611 std::vector<SDValue> Ops;
5612 getIntOperandsFromRegisterString(RegString: RegString->getString(), CurDAG, DL, Ops);
5613
5614 if (!Ops.empty()) {
5615 // If the special register string was constructed of fields (as defined
5616 // in the ACLE) then need to lower to MCR node (32 bit) or
5617 // MCRR node(64 bit), we can make the distinction based on the number of
5618 // operands we have.
5619 unsigned Opcode;
5620 if (Ops.size() == 5) {
5621 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5622 Ops.insert(position: Ops.begin()+2, x: N->getOperand(Num: 2));
5623 } else {
5624 assert(Ops.size() == 3 &&
5625 "Invalid number of fields in special register string.");
5626 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5627 SDValue WriteValue[] = { N->getOperand(Num: 2), N->getOperand(Num: 3) };
5628 Ops.insert(position: Ops.begin()+2, first: WriteValue, last: WriteValue+2);
5629 }
5630
5631 Ops.push_back(x: getAL(CurDAG, dl: DL));
5632 Ops.push_back(x: CurDAG->getRegister(Reg: 0, VT: MVT::i32));
5633 Ops.push_back(x: N->getOperand(Num: 0));
5634
5635 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Other, Ops));
5636 return true;
5637 }
5638
5639 std::string SpecialReg = RegString->getString().lower();
5640 int BankedReg = getBankedRegisterMask(RegString: SpecialReg);
5641 if (BankedReg != -1) {
5642 Ops = { CurDAG->getTargetConstant(Val: BankedReg, DL, VT: MVT::i32), N->getOperand(Num: 2),
5643 getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5644 N->getOperand(Num: 0) };
5645 ReplaceNode(
5646 F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5647 dl: DL, VT: MVT::Other, Ops));
5648 return true;
5649 }
5650
5651 // The VFP registers are written to by creating SelectionDAG nodes with
5652 // opcodes corresponding to the register that is being written. So we switch
5653 // on the string to find which opcode we need to use.
5654 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5655 .Case(S: "fpscr", Value: ARM::VMSR)
5656 .Case(S: "fpexc", Value: ARM::VMSR_FPEXC)
5657 .Case(S: "fpsid", Value: ARM::VMSR_FPSID)
5658 .Case(S: "fpinst", Value: ARM::VMSR_FPINST)
5659 .Case(S: "fpinst2", Value: ARM::VMSR_FPINST2)
5660 .Default(Value: 0);
5661
5662 if (Opcode) {
5663 if (!Subtarget->hasVFP2Base())
5664 return false;
5665 Ops = { N->getOperand(Num: 2), getAL(CurDAG, dl: DL),
5666 CurDAG->getRegister(Reg: 0, VT: MVT::i32), N->getOperand(Num: 0) };
5667 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Other, Ops));
5668 return true;
5669 }
5670
5671 std::pair<StringRef, StringRef> Fields;
5672 Fields = StringRef(SpecialReg).rsplit(Separator: '_');
5673 std::string Reg = Fields.first.str();
5674 StringRef Flags = Fields.second;
5675
5676 // If the target was M Class then need to validate the special register value
5677 // and retrieve the mask for use in the instruction node.
5678 if (Subtarget->isMClass()) {
5679 int SYSmValue = getMClassRegisterMask(Reg: SpecialReg, Subtarget);
5680 if (SYSmValue == -1)
5681 return false;
5682
5683 SDValue Ops[] = { CurDAG->getTargetConstant(Val: SYSmValue, DL, VT: MVT::i32),
5684 N->getOperand(Num: 2), getAL(CurDAG, dl: DL),
5685 CurDAG->getRegister(Reg: 0, VT: MVT::i32), N->getOperand(Num: 0) };
5686 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2MSR_M, dl: DL, VT: MVT::Other, Ops));
5687 return true;
5688 }
5689
5690 // We then check to see if a valid mask can be constructed for one of the
5691 // register string values permitted for the A and R class cores. These values
5692 // are apsr, spsr and cpsr; these are also valid on older cores.
5693 int Mask = getARClassRegisterMask(Reg, Flags);
5694 if (Mask != -1) {
5695 Ops = { CurDAG->getTargetConstant(Val: Mask, DL, VT: MVT::i32), N->getOperand(Num: 2),
5696 getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5697 N->getOperand(Num: 0) };
5698 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5699 dl: DL, VT: MVT::Other, Ops));
5700 return true;
5701 }
5702
5703 return false;
5704}
5705
5706bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5707 std::vector<SDValue> AsmNodeOperands;
5708 InlineAsm::Flag Flag;
5709 bool Changed = false;
5710 unsigned NumOps = N->getNumOperands();
5711
5712 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5713 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5714 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5715 // respectively. Since there is no constraint to explicitly specify a
5716 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5717 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5718 // them into a GPRPair.
5719
5720 SDLoc dl(N);
5721 SDValue Glue = N->getGluedNode() ? N->getOperand(Num: NumOps - 1) : SDValue();
5722
5723 SmallVector<bool, 8> OpChanged;
5724 // Glue node will be appended late.
5725 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
5726 SDValue op = N->getOperand(Num: i);
5727 AsmNodeOperands.push_back(x: op);
5728
5729 if (i < InlineAsm::Op_FirstOperand)
5730 continue;
5731
5732 if (const auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: i)))
5733 Flag = InlineAsm::Flag(C->getZExtValue());
5734 else
5735 continue;
5736
5737 // Immediate operands to inline asm in the SelectionDAG are modeled with
5738 // two operands. The first is a constant of value InlineAsm::Kind::Imm, and
5739 // the second is a constant with the value of the immediate. If we get here
5740 // and we have a Kind::Imm, skip the next operand, and continue.
5741 if (Flag.isImmKind()) {
5742 SDValue op = N->getOperand(Num: ++i);
5743 AsmNodeOperands.push_back(x: op);
5744 continue;
5745 }
5746
5747 const unsigned NumRegs = Flag.getNumOperandRegisters();
5748 if (NumRegs)
5749 OpChanged.push_back(Elt: false);
5750
5751 unsigned DefIdx = 0;
5752 bool IsTiedToChangedOp = false;
5753 // If it's a use that is tied with a previous def, it has no
5754 // reg class constraint.
5755 if (Changed && Flag.isUseOperandTiedToDef(Idx&: DefIdx))
5756 IsTiedToChangedOp = OpChanged[DefIdx];
5757
5758 // Memory operands to inline asm in the SelectionDAG are modeled with two
5759 // operands: a constant of value InlineAsm::Kind::Mem followed by the input
5760 // operand. If we get here and we have a Kind::Mem, skip the next operand
5761 // (so it doesn't get misinterpreted), and continue. We do this here because
5762 // it's important to update the OpChanged array correctly before moving on.
5763 if (Flag.isMemKind()) {
5764 SDValue op = N->getOperand(Num: ++i);
5765 AsmNodeOperands.push_back(x: op);
5766 continue;
5767 }
5768
5769 if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
5770 !Flag.isRegDefEarlyClobberKind())
5771 continue;
5772
5773 unsigned RC;
5774 const bool HasRC = Flag.hasRegClassConstraint(RC);
5775 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
5776 || NumRegs != 2)
5777 continue;
5778
5779 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
5780 SDValue V0 = N->getOperand(Num: i+1);
5781 SDValue V1 = N->getOperand(Num: i+2);
5782 Register Reg0 = cast<RegisterSDNode>(Val&: V0)->getReg();
5783 Register Reg1 = cast<RegisterSDNode>(Val&: V1)->getReg();
5784 SDValue PairedReg;
5785 MachineRegisterInfo &MRI = MF->getRegInfo();
5786
5787 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
5788 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5789 // the original GPRs.
5790
5791 Register GPVR = MRI.createVirtualRegister(RegClass: &ARM::GPRPairRegClass);
5792 PairedReg = CurDAG->getRegister(Reg: GPVR, VT: MVT::Untyped);
5793 SDValue Chain = SDValue(N,0);
5794
5795 SDNode *GU = N->getGluedUser();
5796 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, Reg: GPVR, VT: MVT::Untyped,
5797 Glue: Chain.getValue(R: 1));
5798
5799 // Extract values from a GPRPair reg and copy to the original GPR reg.
5800 SDValue Sub0 = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_0, DL: dl, VT: MVT::i32,
5801 Operand: RegCopy);
5802 SDValue Sub1 = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_1, DL: dl, VT: MVT::i32,
5803 Operand: RegCopy);
5804 SDValue T0 = CurDAG->getCopyToReg(Chain: Sub0, dl, Reg: Reg0, N: Sub0,
5805 Glue: RegCopy.getValue(R: 1));
5806 SDValue T1 = CurDAG->getCopyToReg(Chain: Sub1, dl, Reg: Reg1, N: Sub1, Glue: T0.getValue(R: 1));
5807
5808 // Update the original glue user.
5809 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
5810 Ops.push_back(x: T1.getValue(R: 1));
5811 CurDAG->UpdateNodeOperands(N: GU, Ops);
5812 } else {
5813 // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
5814 // GPRPair and then pass the GPRPair to the inline asm.
5815 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
5816
5817 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5818 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg: Reg0, VT: MVT::i32,
5819 Glue: Chain.getValue(R: 1));
5820 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg: Reg1, VT: MVT::i32,
5821 Glue: T0.getValue(R: 1));
5822 SDValue Pair = SDValue(createGPRPairNode(VT: MVT::Untyped, V0: T0, V1: T1), 0);
5823
5824 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5825 // i32 VRs of inline asm with it.
5826 Register GPVR = MRI.createVirtualRegister(RegClass: &ARM::GPRPairRegClass);
5827 PairedReg = CurDAG->getRegister(Reg: GPVR, VT: MVT::Untyped);
5828 Chain = CurDAG->getCopyToReg(Chain: T1, dl, Reg: GPVR, N: Pair, Glue: T1.getValue(R: 1));
5829
5830 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5831 Glue = Chain.getValue(R: 1);
5832 }
5833
5834 Changed = true;
5835
5836 if(PairedReg.getNode()) {
5837 OpChanged[OpChanged.size() -1 ] = true;
5838 Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/);
5839 if (IsTiedToChangedOp)
5840 Flag.setMatchingOp(DefIdx);
5841 else
5842 Flag.setRegClass(ARM::GPRPairRegClassID);
5843 // Replace the current flag.
5844 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
5845 Val: Flag, DL: dl, VT: MVT::i32);
5846 // Add the new register node and skip the original two GPRs.
5847 AsmNodeOperands.push_back(x: PairedReg);
5848 // Skip the next two GPRs.
5849 i += 2;
5850 }
5851 }
5852
5853 if (Glue.getNode())
5854 AsmNodeOperands.push_back(x: Glue);
5855 if (!Changed)
5856 return false;
5857
5858 SDValue New = CurDAG->getNode(Opcode: N->getOpcode(), DL: SDLoc(N),
5859 VTList: CurDAG->getVTList(VT1: MVT::Other, VT2: MVT::Glue), Ops: AsmNodeOperands);
5860 New->setNodeId(-1);
5861 ReplaceNode(F: N, T: New.getNode());
5862 return true;
5863}
5864
5865bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
5866 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
5867 std::vector<SDValue> &OutOps) {
5868 switch(ConstraintID) {
5869 default:
5870 llvm_unreachable("Unexpected asm memory constraint");
5871 case InlineAsm::ConstraintCode::m:
5872 case InlineAsm::ConstraintCode::o:
5873 case InlineAsm::ConstraintCode::Q:
5874 case InlineAsm::ConstraintCode::Um:
5875 case InlineAsm::ConstraintCode::Un:
5876 case InlineAsm::ConstraintCode::Uq:
5877 case InlineAsm::ConstraintCode::Us:
5878 case InlineAsm::ConstraintCode::Ut:
5879 case InlineAsm::ConstraintCode::Uv:
5880 case InlineAsm::ConstraintCode::Uy:
5881 // Require the address to be in a register. That is safe for all ARM
5882 // variants and it is hard to do anything much smarter without knowing
5883 // how the operand is used.
5884 OutOps.push_back(x: Op);
5885 return false;
5886 }
5887 return true;
5888}
5889
5890/// createARMISelDag - This pass converts a legalized DAG into a
5891/// ARM-specific DAG, ready for instruction scheduling.
5892///
5893FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
5894 CodeGenOptLevel OptLevel) {
5895 return new ARMDAGToDAGISelLegacy(TM, OptLevel);
5896}
5897