1//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the ARM target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARM.h"
14#include "ARMBaseInstrInfo.h"
15#include "ARMTargetMachine.h"
16#include "MCTargetDesc/ARMAddressingModes.h"
17#include "Utils/ARMBaseInfo.h"
18#include "llvm/ADT/APSInt.h"
19#include "llvm/ADT/StringSwitch.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/SelectionDAG.h"
25#include "llvm/CodeGen/SelectionDAGISel.h"
26#include "llvm/CodeGen/TargetLowering.h"
27#include "llvm/IR/Constants.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/LLVMContext.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Target/TargetOptions.h"
36#include <optional>
37
38using namespace llvm;
39
40#define DEBUG_TYPE "arm-isel"
41#define PASS_NAME "ARM Instruction Selection"
42
43static cl::opt<bool>
44DisableShifterOp("disable-shifter-op", cl::Hidden,
45 cl::desc("Disable isel of shifter-op"),
46 cl::init(Val: false));
47
48//===--------------------------------------------------------------------===//
49/// ARMDAGToDAGISel - ARM specific code to select ARM machine
50/// instructions for SelectionDAG operations.
51///
52namespace {
53
54class ARMDAGToDAGISel : public SelectionDAGISel {
55 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
56 /// make the right decision when generating code for different targets.
57 const ARMSubtarget *Subtarget;
58
59public:
60 ARMDAGToDAGISel() = delete;
61
62 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
63 : SelectionDAGISel(tm, OptLevel) {}
64
65 bool runOnMachineFunction(MachineFunction &MF) override {
66 // Reset the subtarget each time through.
67 Subtarget = &MF.getSubtarget<ARMSubtarget>();
68 SelectionDAGISel::runOnMachineFunction(mf&: MF);
69 return true;
70 }
71
72 void PreprocessISelDAG() override;
73
74 /// getI32Imm - Return a target constant of type i32 with the specified
75 /// value.
76 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
77 return CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32);
78 }
79
80 void Select(SDNode *N) override;
81
82 /// Return true as some complex patterns, like those that call
83 /// canExtractShiftFromMul can modify the DAG inplace.
84 bool ComplexPatternFuncMutatesDAG() const override { return true; }
85
86 bool hasNoVMLxHazardUse(SDNode *N) const;
87 bool isShifterOpProfitable(const SDValue &Shift,
88 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
89 bool SelectRegShifterOperand(SDValue N, SDValue &A,
90 SDValue &B, SDValue &C,
91 bool CheckProfitability = true);
92 bool SelectImmShifterOperand(SDValue N, SDValue &A,
93 SDValue &B, bool CheckProfitability = true);
94 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
95 SDValue &C) {
96 // Don't apply the profitability check
97 return SelectRegShifterOperand(N, A, B, C, CheckProfitability: false);
98 }
99 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
100 // Don't apply the profitability check
101 return SelectImmShifterOperand(N, A, B, CheckProfitability: false);
102 }
103 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
104 if (!N.hasOneUse())
105 return false;
106 return SelectImmShifterOperand(N, A, B, CheckProfitability: false);
107 }
108
109 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
110
111 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
112 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
113
114 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
115 SDValue &Offset, SDValue &Opc);
116 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
117 SDValue &Offset, SDValue &Opc);
118 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
119 SDValue &Offset, SDValue &Opc);
120 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
121 bool SelectAddrMode3(SDValue N, SDValue &Base,
122 SDValue &Offset, SDValue &Opc);
123 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
124 SDValue &Offset, SDValue &Opc);
125 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
126 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
127 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
128 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
129 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
130
131 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
132
133 // Thumb Addressing Modes:
134 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
135 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
136 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
137 SDValue &OffImm);
138 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
139 SDValue &OffImm);
140 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
141 SDValue &OffImm);
142 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
143 SDValue &OffImm);
144 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
145 template <unsigned Shift>
146 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
147
148 // Thumb 2 Addressing Modes:
149 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
150 template <unsigned Shift>
151 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
152 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
153 SDValue &OffImm);
154 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
155 SDValue &OffImm);
156 template <unsigned Shift>
157 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
158 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
159 unsigned Shift);
160 template <unsigned Shift>
161 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
162 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
163 SDValue &OffReg, SDValue &ShImm);
164 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
165
166 template<int Min, int Max>
167 bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
168
169 inline bool is_so_imm(unsigned Imm) const {
170 return ARM_AM::getSOImmVal(Arg: Imm) != -1;
171 }
172
173 inline bool is_so_imm_not(unsigned Imm) const {
174 return ARM_AM::getSOImmVal(Arg: ~Imm) != -1;
175 }
176
177 inline bool is_t2_so_imm(unsigned Imm) const {
178 return ARM_AM::getT2SOImmVal(Arg: Imm) != -1;
179 }
180
181 inline bool is_t2_so_imm_not(unsigned Imm) const {
182 return ARM_AM::getT2SOImmVal(Arg: ~Imm) != -1;
183 }
184
185 // Include the pieces autogenerated from the target description.
186#include "ARMGenDAGISel.inc"
187
188private:
189 void transferMemOperands(SDNode *Src, SDNode *Dst);
190
191 /// Indexed (pre/post inc/dec) load matching code for ARM.
192 bool tryARMIndexedLoad(SDNode *N);
193 bool tryT1IndexedLoad(SDNode *N);
194 bool tryT2IndexedLoad(SDNode *N);
195 bool tryMVEIndexedLoad(SDNode *N);
196 bool tryFMULFixed(SDNode *N, SDLoc dl);
197 bool tryFP_TO_INT(SDNode *N, SDLoc dl);
198 bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
199 bool IsUnsigned,
200 bool FixedToFloat);
201
202 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
203 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
204 /// loads of D registers and even subregs and odd subregs of Q registers.
205 /// For NumVecs <= 2, QOpcodes1 is not used.
206 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
207 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
208 const uint16_t *QOpcodes1);
209
210 /// SelectVST - Select NEON store intrinsics. NumVecs should
211 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
212 /// stores of D registers and even subregs and odd subregs of Q registers.
213 /// For NumVecs <= 2, QOpcodes1 is not used.
214 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
215 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
216 const uint16_t *QOpcodes1);
217
218 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
219 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
220 /// load/store of D registers and Q registers.
221 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
222 unsigned NumVecs, const uint16_t *DOpcodes,
223 const uint16_t *QOpcodes);
224
225 /// Helper functions for setting up clusters of MVE predication operands.
226 template <typename SDValueVector>
227 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
228 SDValue PredicateMask);
229 template <typename SDValueVector>
230 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
231 SDValue PredicateMask, SDValue Inactive);
232
233 template <typename SDValueVector>
234 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
235 template <typename SDValueVector>
236 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
237
238 /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
239 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
240
241 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
242 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
243 bool HasSaturationOperand);
244
245 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
246 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
247 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
248
249 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
250 /// vector lanes.
251 void SelectMVE_VSHLC(SDNode *N, bool Predicated);
252
253 /// Select long MVE vector reductions with two vector operands
254 /// Stride is the number of vector element widths the instruction can operate
255 /// on:
256 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
257 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
258 /// Stride is used when addressing the OpcodesS array which contains multiple
259 /// opcodes for each element width.
260 /// TySize is the index into the list of element types listed above
261 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
262 const uint16_t *OpcodesS, const uint16_t *OpcodesU,
263 size_t Stride, size_t TySize);
264
265 /// Select a 64-bit MVE vector reduction with two vector operands
266 /// arm_mve_vmlldava_[predicated]
267 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
268 const uint16_t *OpcodesU);
269 /// Select a 72-bit MVE vector rounding reduction with two vector operands
270 /// int_arm_mve_vrmlldavha[_predicated]
271 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
272 const uint16_t *OpcodesU);
273
274 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
275 /// should be 2 or 4. The opcode array specifies the instructions
276 /// used for 8, 16 and 32-bit lane sizes respectively, and each
277 /// pointer points to a set of NumVecs sub-opcodes used for the
278 /// different stages (e.g. VLD20 versus VLD21) of each load family.
279 void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
280 const uint16_t *const *Opcodes, bool HasWriteback);
281
282 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
283 /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
284 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
285 bool Wrapping, bool Predicated);
286
287 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
288 /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
289 /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
290 /// the accumulator and the immediate operand, i.e. 0
291 /// for CX1*, 1 for CX2*, 2 for CX3*
292 /// \arg \c HasAccum whether the instruction has an accumulator operand
293 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
294 bool HasAccum);
295
296 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
297 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
298 /// for loading D registers.
299 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
300 unsigned NumVecs, const uint16_t *DOpcodes,
301 const uint16_t *QOpcodes0 = nullptr,
302 const uint16_t *QOpcodes1 = nullptr);
303
304 /// Try to select SBFX/UBFX instructions for ARM.
305 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
306
307 bool tryInsertVectorElt(SDNode *N);
308
309 bool tryReadRegister(SDNode *N);
310 bool tryWriteRegister(SDNode *N);
311
312 bool tryInlineAsm(SDNode *N);
313
314 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
315
316 void SelectCMP_SWAP(SDNode *N);
317
318 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
319 /// inline asm expressions.
320 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
321 InlineAsm::ConstraintCode ConstraintID,
322 std::vector<SDValue> &OutOps) override;
323
324 // Form pairs of consecutive R, S, D, or Q registers.
325 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
326 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
327 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
328 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
329
330 // Form sequences of 4 consecutive S, D, or Q registers.
331 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
332 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
333 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
334
335 // Get the alignment operand for a NEON VLD or VST instruction.
336 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
337 bool is64BitVector);
338
339 /// Checks if N is a multiplication by a constant where we can extract out a
340 /// power of two from the constant so that it can be used in a shift, but only
341 /// if it simplifies the materialization of the constant. Returns true if it
342 /// is, and assigns to PowerOfTwo the power of two that should be extracted
343 /// out and to NewMulConst the new constant to be multiplied by.
344 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
345 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
346
347 /// Replace N with M in CurDAG, in a way that also ensures that M gets
348 /// selected when N would have been selected.
349 void replaceDAGValue(const SDValue &N, SDValue M);
350};
351
352class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
353public:
354 static char ID;
355 ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
356 : SelectionDAGISelLegacy(
357 ID, std::make_unique<ARMDAGToDAGISel>(args&: tm, args&: OptLevel)) {}
358};
359}
360
361char ARMDAGToDAGISelLegacy::ID = 0;
362
363INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
364
365/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
366/// operand. If so Imm will receive the 32-bit value.
367static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
368 if (N->getOpcode() == ISD::Constant && N->getValueType(ResNo: 0) == MVT::i32) {
369 Imm = N->getAsZExtVal();
370 return true;
371 }
372 return false;
373}
374
375// isInt32Immediate - This method tests to see if a constant operand.
376// If so Imm will receive the 32 bit value.
377static bool isInt32Immediate(SDValue N, unsigned &Imm) {
378 return isInt32Immediate(N: N.getNode(), Imm);
379}
380
381// isOpcWithIntImmediate - This method tests to see if the node is a specific
382// opcode and that it has a immediate integer right operand.
383// If so Imm will receive the 32 bit value.
384static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
385 return N->getOpcode() == Opc &&
386 isInt32Immediate(N: N->getOperand(Num: 1).getNode(), Imm);
387}
388
389/// Check whether a particular node is a constant value representable as
390/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
391///
392/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
393static bool isScaledConstantInRange(SDValue Node, int Scale,
394 int RangeMin, int RangeMax,
395 int &ScaledConstant) {
396 assert(Scale > 0 && "Invalid scale!");
397
398 // Check that this is a constant.
399 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: Node);
400 if (!C)
401 return false;
402
403 ScaledConstant = (int) C->getZExtValue();
404 if ((ScaledConstant % Scale) != 0)
405 return false;
406
407 ScaledConstant /= Scale;
408 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
409}
410
411void ARMDAGToDAGISel::PreprocessISelDAG() {
412 if (!Subtarget->hasV6T2Ops())
413 return;
414
415 bool isThumb2 = Subtarget->isThumb();
416 // We use make_early_inc_range to avoid invalidation issues.
417 for (SDNode &N : llvm::make_early_inc_range(Range: CurDAG->allnodes())) {
418 if (N.getOpcode() != ISD::ADD)
419 continue;
420
421 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
422 // leading zeros, followed by consecutive set bits, followed by 1 or 2
423 // trailing zeros, e.g. 1020.
424 // Transform the expression to
425 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
426 // of trailing zeros of c2. The left shift would be folded as an shifter
427 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
428 // node (UBFX).
429
430 SDValue N0 = N.getOperand(Num: 0);
431 SDValue N1 = N.getOperand(Num: 1);
432 unsigned And_imm = 0;
433 if (!isOpcWithIntImmediate(N: N1.getNode(), Opc: ISD::AND, Imm&: And_imm)) {
434 if (isOpcWithIntImmediate(N: N0.getNode(), Opc: ISD::AND, Imm&: And_imm))
435 std::swap(a&: N0, b&: N1);
436 }
437 if (!And_imm)
438 continue;
439
440 // Check if the AND mask is an immediate of the form: 000.....1111111100
441 unsigned TZ = llvm::countr_zero(Val: And_imm);
442 if (TZ != 1 && TZ != 2)
443 // Be conservative here. Shifter operands aren't always free. e.g. On
444 // Swift, left shifter operand of 1 / 2 for free but others are not.
445 // e.g.
446 // ubfx r3, r1, #16, #8
447 // ldr.w r3, [r0, r3, lsl #2]
448 // vs.
449 // mov.w r9, #1020
450 // and.w r2, r9, r1, lsr #14
451 // ldr r2, [r0, r2]
452 continue;
453 And_imm >>= TZ;
454 if (And_imm & (And_imm + 1))
455 continue;
456
457 // Look for (and (srl X, c1), c2).
458 SDValue Srl = N1.getOperand(i: 0);
459 unsigned Srl_imm = 0;
460 if (!isOpcWithIntImmediate(N: Srl.getNode(), Opc: ISD::SRL, Imm&: Srl_imm) ||
461 (Srl_imm <= 2))
462 continue;
463
464 // Make sure first operand is not a shifter operand which would prevent
465 // folding of the left shift.
466 SDValue CPTmp0;
467 SDValue CPTmp1;
468 SDValue CPTmp2;
469 if (isThumb2) {
470 if (SelectImmShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1))
471 continue;
472 } else {
473 if (SelectImmShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1) ||
474 SelectRegShifterOperand(N: N0, A&: CPTmp0, B&: CPTmp1, C&: CPTmp2))
475 continue;
476 }
477
478 // Now make the transformation.
479 Srl = CurDAG->getNode(Opcode: ISD::SRL, DL: SDLoc(Srl), VT: MVT::i32,
480 N1: Srl.getOperand(i: 0),
481 N2: CurDAG->getConstant(Val: Srl_imm + TZ, DL: SDLoc(Srl),
482 VT: MVT::i32));
483 N1 = CurDAG->getNode(Opcode: ISD::AND, DL: SDLoc(N1), VT: MVT::i32,
484 N1: Srl,
485 N2: CurDAG->getConstant(Val: And_imm, DL: SDLoc(Srl), VT: MVT::i32));
486 N1 = CurDAG->getNode(Opcode: ISD::SHL, DL: SDLoc(N1), VT: MVT::i32,
487 N1, N2: CurDAG->getConstant(Val: TZ, DL: SDLoc(Srl), VT: MVT::i32));
488 CurDAG->UpdateNodeOperands(N: &N, Op1: N0, Op2: N1);
489 }
490}
491
492/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
493/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
494/// least on current ARM implementations) which should be avoidded.
495bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
496 if (OptLevel == CodeGenOptLevel::None)
497 return true;
498
499 if (!Subtarget->hasVMLxHazards())
500 return true;
501
502 if (!N->hasOneUse())
503 return false;
504
505 SDNode *User = *N->user_begin();
506 if (User->getOpcode() == ISD::CopyToReg)
507 return true;
508 if (User->isMachineOpcode()) {
509 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
510 CurDAG->getSubtarget().getInstrInfo());
511
512 const MCInstrDesc &MCID = TII->get(Opcode: User->getMachineOpcode());
513 if (MCID.mayStore())
514 return true;
515 unsigned Opcode = MCID.getOpcode();
516 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
517 return true;
518 // vmlx feeding into another vmlx. We actually want to unfold
519 // the use later in the MLxExpansion pass. e.g.
520 // vmla
521 // vmla (stall 8 cycles)
522 //
523 // vmul (5 cycles)
524 // vadd (5 cycles)
525 // vmla
526 // This adds up to about 18 - 19 cycles.
527 //
528 // vmla
529 // vmul (stall 4 cycles)
530 // vadd adds up to about 14 cycles.
531 return TII->isFpMLxInstruction(Opcode);
532 }
533
534 return false;
535}
536
537bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
538 ARM_AM::ShiftOpc ShOpcVal,
539 unsigned ShAmt) {
540 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
541 return true;
542 if (Shift.hasOneUse())
543 return true;
544 // R << 2 is free.
545 return ShOpcVal == ARM_AM::lsl &&
546 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
547}
548
549bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
550 unsigned MaxShift,
551 unsigned &PowerOfTwo,
552 SDValue &NewMulConst) const {
553 assert(N.getOpcode() == ISD::MUL);
554 assert(MaxShift > 0);
555
556 // If the multiply is used in more than one place then changing the constant
557 // will make other uses incorrect, so don't.
558 if (!N.hasOneUse()) return false;
559 // Check if the multiply is by a constant
560 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
561 if (!MulConst) return false;
562 // If the constant is used in more than one place then modifying it will mean
563 // we need to materialize two constants instead of one, which is a bad idea.
564 if (!MulConst->hasOneUse()) return false;
565 unsigned MulConstVal = MulConst->getZExtValue();
566 if (MulConstVal == 0) return false;
567
568 // Find the largest power of 2 that MulConstVal is a multiple of
569 PowerOfTwo = MaxShift;
570 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
571 --PowerOfTwo;
572 if (PowerOfTwo == 0) return false;
573 }
574
575 // Only optimise if the new cost is better
576 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
577 NewMulConst = CurDAG->getConstant(Val: NewMulConstVal, DL: SDLoc(N), VT: MVT::i32);
578 unsigned OldCost = ConstantMaterializationCost(Val: MulConstVal, Subtarget);
579 unsigned NewCost = ConstantMaterializationCost(Val: NewMulConstVal, Subtarget);
580 return NewCost < OldCost;
581}
582
583void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
584 CurDAG->RepositionNode(Position: N.getNode()->getIterator(), N: M.getNode());
585 ReplaceUses(F: N, T: M);
586}
587
588bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
589 SDValue &BaseReg,
590 SDValue &Opc,
591 bool CheckProfitability) {
592 if (DisableShifterOp)
593 return false;
594
595 // If N is a multiply-by-constant and it's profitable to extract a shift and
596 // use it in a shifted operand do so.
597 if (N.getOpcode() == ISD::MUL) {
598 unsigned PowerOfTwo = 0;
599 SDValue NewMulConst;
600 if (canExtractShiftFromMul(N, MaxShift: 31, PowerOfTwo, NewMulConst)) {
601 HandleSDNode Handle(N);
602 SDLoc Loc(N);
603 replaceDAGValue(N: N.getOperand(i: 1), M: NewMulConst);
604 BaseReg = Handle.getValue();
605 Opc = CurDAG->getTargetConstant(
606 Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: PowerOfTwo), DL: Loc, VT: MVT::i32);
607 return true;
608 }
609 }
610
611 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
612
613 // Don't match base register only case. That is matched to a separate
614 // lower complexity pattern with explicit register operand.
615 if (ShOpcVal == ARM_AM::no_shift) return false;
616
617 BaseReg = N.getOperand(i: 0);
618 unsigned ShImmVal = 0;
619 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
620 if (!RHS) return false;
621 ShImmVal = RHS->getZExtValue() & 31;
622 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: ShImmVal),
623 DL: SDLoc(N), VT: MVT::i32);
624 return true;
625}
626
627bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
628 SDValue &BaseReg,
629 SDValue &ShReg,
630 SDValue &Opc,
631 bool CheckProfitability) {
632 if (DisableShifterOp)
633 return false;
634
635 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
636
637 // Don't match base register only case. That is matched to a separate
638 // lower complexity pattern with explicit register operand.
639 if (ShOpcVal == ARM_AM::no_shift) return false;
640
641 BaseReg = N.getOperand(i: 0);
642 unsigned ShImmVal = 0;
643 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
644 if (RHS) return false;
645
646 ShReg = N.getOperand(i: 1);
647 if (CheckProfitability && !isShifterOpProfitable(Shift: N, ShOpcVal, ShAmt: ShImmVal))
648 return false;
649 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: ShImmVal),
650 DL: SDLoc(N), VT: MVT::i32);
651 return true;
652}
653
654// Determine whether an ISD::OR's operands are suitable to turn the operation
655// into an addition, which often has more compact encodings.
656bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
657 assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
658 Out = N;
659 return CurDAG->haveNoCommonBitsSet(A: N, B: Parent->getOperand(Num: 1));
660}
661
662
663bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
664 SDValue &Base,
665 SDValue &OffImm) {
666 // Match simple R + imm12 operands.
667
668 // Base only.
669 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
670 !CurDAG->isBaseWithConstantOffset(Op: N)) {
671 if (N.getOpcode() == ISD::FrameIndex) {
672 // Match frame index.
673 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
674 Base = CurDAG->getTargetFrameIndex(
675 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
676 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
677 return true;
678 }
679
680 if (N.getOpcode() == ARMISD::Wrapper &&
681 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalAddress &&
682 N.getOperand(i: 0).getOpcode() != ISD::TargetExternalSymbol &&
683 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalTLSAddress) {
684 Base = N.getOperand(i: 0);
685 } else
686 Base = N;
687 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
688 return true;
689 }
690
691 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
692 int RHSC = (int)RHS->getSExtValue();
693 if (N.getOpcode() == ISD::SUB)
694 RHSC = -RHSC;
695
696 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
697 Base = N.getOperand(i: 0);
698 if (Base.getOpcode() == ISD::FrameIndex) {
699 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
700 Base = CurDAG->getTargetFrameIndex(
701 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
702 }
703 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32);
704 return true;
705 }
706 }
707
708 // Base only.
709 Base = N;
710 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
711 return true;
712}
713
714
715
716bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
717 SDValue &Opc) {
718 if (N.getOpcode() == ISD::MUL &&
719 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
720 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
721 // X * [3,5,9] -> X + X * [2,4,8] etc.
722 int RHSC = (int)RHS->getZExtValue();
723 if (RHSC & 1) {
724 RHSC = RHSC & ~1;
725 ARM_AM::AddrOpc AddSub = ARM_AM::add;
726 if (RHSC < 0) {
727 AddSub = ARM_AM::sub;
728 RHSC = - RHSC;
729 }
730 if (isPowerOf2_32(Value: RHSC)) {
731 unsigned ShAmt = Log2_32(Value: RHSC);
732 Base = Offset = N.getOperand(i: 0);
733 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt,
734 SO: ARM_AM::lsl),
735 DL: SDLoc(N), VT: MVT::i32);
736 return true;
737 }
738 }
739 }
740 }
741
742 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
743 // ISD::OR that is equivalent to an ISD::ADD.
744 !CurDAG->isBaseWithConstantOffset(Op: N))
745 return false;
746
747 // Leave simple R +/- imm12 operands for LDRi12
748 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
749 int RHSC;
750 if (isScaledConstantInRange(Node: N.getOperand(i: 1), /*Scale=*/1,
751 RangeMin: -0x1000+1, RangeMax: 0x1000, ScaledConstant&: RHSC)) // 12 bits.
752 return false;
753 }
754
755 // Otherwise this is R +/- [possibly shifted] R.
756 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
757 ARM_AM::ShiftOpc ShOpcVal =
758 ARM_AM::getShiftOpcForNode(Opcode: N.getOperand(i: 1).getOpcode());
759 unsigned ShAmt = 0;
760
761 Base = N.getOperand(i: 0);
762 Offset = N.getOperand(i: 1);
763
764 if (ShOpcVal != ARM_AM::no_shift) {
765 // Check to see if the RHS of the shift is a constant, if not, we can't fold
766 // it.
767 if (ConstantSDNode *Sh =
768 dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1).getOperand(i: 1))) {
769 ShAmt = Sh->getZExtValue();
770 if (isShifterOpProfitable(Shift: Offset, ShOpcVal, ShAmt))
771 Offset = N.getOperand(i: 1).getOperand(i: 0);
772 else {
773 ShAmt = 0;
774 ShOpcVal = ARM_AM::no_shift;
775 }
776 } else {
777 ShOpcVal = ARM_AM::no_shift;
778 }
779 }
780
781 // Try matching (R shl C) + (R).
782 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
783 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
784 N.getOperand(i: 0).hasOneUse())) {
785 ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOperand(i: 0).getOpcode());
786 if (ShOpcVal != ARM_AM::no_shift) {
787 // Check to see if the RHS of the shift is a constant, if not, we can't
788 // fold it.
789 if (ConstantSDNode *Sh =
790 dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 0).getOperand(i: 1))) {
791 ShAmt = Sh->getZExtValue();
792 if (isShifterOpProfitable(Shift: N.getOperand(i: 0), ShOpcVal, ShAmt)) {
793 Offset = N.getOperand(i: 0).getOperand(i: 0);
794 Base = N.getOperand(i: 1);
795 } else {
796 ShAmt = 0;
797 ShOpcVal = ARM_AM::no_shift;
798 }
799 } else {
800 ShOpcVal = ARM_AM::no_shift;
801 }
802 }
803 }
804
805 // If Offset is a multiply-by-constant and it's profitable to extract a shift
806 // and use it in a shifted operand do so.
807 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
808 unsigned PowerOfTwo = 0;
809 SDValue NewMulConst;
810 if (canExtractShiftFromMul(N: Offset, MaxShift: 31, PowerOfTwo, NewMulConst)) {
811 HandleSDNode Handle(Offset);
812 replaceDAGValue(N: Offset.getOperand(i: 1), M: NewMulConst);
813 Offset = Handle.getValue();
814 ShAmt = PowerOfTwo;
815 ShOpcVal = ARM_AM::lsl;
816 }
817 }
818
819 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt, SO: ShOpcVal),
820 DL: SDLoc(N), VT: MVT::i32);
821 return true;
822}
823
824bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
825 SDValue &Offset, SDValue &Opc) {
826 unsigned Opcode = Op->getOpcode();
827 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
828 ? cast<LoadSDNode>(Val: Op)->getAddressingMode()
829 : cast<StoreSDNode>(Val: Op)->getAddressingMode();
830 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
831 ? ARM_AM::add : ARM_AM::sub;
832 int Val;
833 if (isScaledConstantInRange(Node: N, /*Scale=*/1, RangeMin: 0, RangeMax: 0x1000, ScaledConstant&: Val))
834 return false;
835
836 Offset = N;
837 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: N.getOpcode());
838 unsigned ShAmt = 0;
839 if (ShOpcVal != ARM_AM::no_shift) {
840 // Check to see if the RHS of the shift is a constant, if not, we can't fold
841 // it.
842 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
843 ShAmt = Sh->getZExtValue();
844 if (isShifterOpProfitable(Shift: N, ShOpcVal, ShAmt))
845 Offset = N.getOperand(i: 0);
846 else {
847 ShAmt = 0;
848 ShOpcVal = ARM_AM::no_shift;
849 }
850 } else {
851 ShOpcVal = ARM_AM::no_shift;
852 }
853 }
854
855 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: ShAmt, SO: ShOpcVal),
856 DL: SDLoc(N), VT: MVT::i32);
857 return true;
858}
859
860bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
861 SDValue &Offset, SDValue &Opc) {
862 unsigned Opcode = Op->getOpcode();
863 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
864 ? cast<LoadSDNode>(Val: Op)->getAddressingMode()
865 : cast<StoreSDNode>(Val: Op)->getAddressingMode();
866 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
867 ? ARM_AM::add : ARM_AM::sub;
868 int Val;
869 if (isScaledConstantInRange(Node: N, /*Scale=*/1, RangeMin: 0, RangeMax: 0x1000, ScaledConstant&: Val)) { // 12 bits.
870 if (AddSub == ARM_AM::sub) Val *= -1;
871 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
872 Opc = CurDAG->getSignedTargetConstant(Val, DL: SDLoc(Op), VT: MVT::i32);
873 return true;
874 }
875
876 return false;
877}
878
879
880bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
881 SDValue &Offset, SDValue &Opc) {
882 unsigned Opcode = Op->getOpcode();
883 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
884 ? cast<LoadSDNode>(Val: Op)->getAddressingMode()
885 : cast<StoreSDNode>(Val: Op)->getAddressingMode();
886 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
887 ? ARM_AM::add : ARM_AM::sub;
888 int Val;
889 if (isScaledConstantInRange(Node: N, /*Scale=*/1, RangeMin: 0, RangeMax: 0x1000, ScaledConstant&: Val)) { // 12 bits.
890 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
891 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM2Opc(Opc: AddSub, Imm12: Val,
892 SO: ARM_AM::no_shift),
893 DL: SDLoc(Op), VT: MVT::i32);
894 return true;
895 }
896
897 return false;
898}
899
900bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
901 Base = N;
902 return true;
903}
904
905bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
906 SDValue &Base, SDValue &Offset,
907 SDValue &Opc) {
908 if (N.getOpcode() == ISD::SUB) {
909 // X - C is canonicalize to X + -C, no need to handle it here.
910 Base = N.getOperand(i: 0);
911 Offset = N.getOperand(i: 1);
912 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::sub, Offset: 0), DL: SDLoc(N),
913 VT: MVT::i32);
914 return true;
915 }
916
917 if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
918 Base = N;
919 if (N.getOpcode() == ISD::FrameIndex) {
920 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
921 Base = CurDAG->getTargetFrameIndex(
922 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
923 }
924 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
925 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::add, Offset: 0), DL: SDLoc(N),
926 VT: MVT::i32);
927 return true;
928 }
929
930 // If the RHS is +/- imm8, fold into addr mode.
931 int RHSC;
932 if (isScaledConstantInRange(Node: N.getOperand(i: 1), /*Scale=*/1,
933 RangeMin: -256 + 1, RangeMax: 256, ScaledConstant&: RHSC)) { // 8 bits.
934 Base = N.getOperand(i: 0);
935 if (Base.getOpcode() == ISD::FrameIndex) {
936 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
937 Base = CurDAG->getTargetFrameIndex(
938 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
939 }
940 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
941
942 ARM_AM::AddrOpc AddSub = ARM_AM::add;
943 if (RHSC < 0) {
944 AddSub = ARM_AM::sub;
945 RHSC = -RHSC;
946 }
947 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: RHSC), DL: SDLoc(N),
948 VT: MVT::i32);
949 return true;
950 }
951
952 Base = N.getOperand(i: 0);
953 Offset = N.getOperand(i: 1);
954 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: ARM_AM::add, Offset: 0), DL: SDLoc(N),
955 VT: MVT::i32);
956 return true;
957}
958
959bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
960 SDValue &Offset, SDValue &Opc) {
961 unsigned Opcode = Op->getOpcode();
962 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
963 ? cast<LoadSDNode>(Val: Op)->getAddressingMode()
964 : cast<StoreSDNode>(Val: Op)->getAddressingMode();
965 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
966 ? ARM_AM::add : ARM_AM::sub;
967 int Val;
968 if (isScaledConstantInRange(Node: N, /*Scale=*/1, RangeMin: 0, RangeMax: 256, ScaledConstant&: Val)) { // 12 bits.
969 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
970 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: Val), DL: SDLoc(Op),
971 VT: MVT::i32);
972 return true;
973 }
974
975 Offset = N;
976 Opc = CurDAG->getTargetConstant(Val: ARM_AM::getAM3Opc(Opc: AddSub, Offset: 0), DL: SDLoc(Op),
977 VT: MVT::i32);
978 return true;
979}
980
981bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
982 bool FP16) {
983 if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
984 Base = N;
985 if (N.getOpcode() == ISD::FrameIndex) {
986 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
987 Base = CurDAG->getTargetFrameIndex(
988 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
989 } else if (N.getOpcode() == ARMISD::Wrapper &&
990 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalAddress &&
991 N.getOperand(i: 0).getOpcode() != ISD::TargetExternalSymbol &&
992 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalTLSAddress) {
993 Base = N.getOperand(i: 0);
994 }
995 Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: ARM_AM::add, Offset: 0),
996 DL: SDLoc(N), VT: MVT::i32);
997 return true;
998 }
999
1000 // If the RHS is +/- imm8, fold into addr mode.
1001 int RHSC;
1002 const int Scale = FP16 ? 2 : 4;
1003
1004 if (isScaledConstantInRange(Node: N.getOperand(i: 1), Scale, RangeMin: -255, RangeMax: 256, ScaledConstant&: RHSC)) {
1005 Base = N.getOperand(i: 0);
1006 if (Base.getOpcode() == ISD::FrameIndex) {
1007 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1008 Base = CurDAG->getTargetFrameIndex(
1009 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1010 }
1011
1012 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1013 if (RHSC < 0) {
1014 AddSub = ARM_AM::sub;
1015 RHSC = -RHSC;
1016 }
1017
1018 if (FP16)
1019 Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5FP16Opc(Opc: AddSub, Offset: RHSC),
1020 DL: SDLoc(N), VT: MVT::i32);
1021 else
1022 Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: AddSub, Offset: RHSC),
1023 DL: SDLoc(N), VT: MVT::i32);
1024
1025 return true;
1026 }
1027
1028 Base = N;
1029
1030 if (FP16)
1031 Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5FP16Opc(Opc: ARM_AM::add, Offset: 0),
1032 DL: SDLoc(N), VT: MVT::i32);
1033 else
1034 Offset = CurDAG->getTargetConstant(Val: ARM_AM::getAM5Opc(Opc: ARM_AM::add, Offset: 0),
1035 DL: SDLoc(N), VT: MVT::i32);
1036
1037 return true;
1038}
1039
1040bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1041 SDValue &Base, SDValue &Offset) {
1042 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1043}
1044
1045bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1046 SDValue &Base, SDValue &Offset) {
1047 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1048}
1049
1050bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1051 SDValue &Align) {
1052 Addr = N;
1053
1054 unsigned Alignment = 0;
1055
1056 MemSDNode *MemN = cast<MemSDNode>(Val: Parent);
1057
1058 if (isa<LSBaseSDNode>(Val: MemN) ||
1059 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1060 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1061 MemN->getConstantOperandVal(Num: MemN->getNumOperands() - 1) == 1)) {
1062 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1063 // The maximum alignment is equal to the memory size being referenced.
1064 llvm::Align MMOAlign = MemN->getAlign();
1065 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1066 if (MMOAlign.value() >= MemSize && MemSize > 1)
1067 Alignment = MemSize;
1068 } else {
1069 // All other uses of addrmode6 are for intrinsics. For now just record
1070 // the raw alignment value; it will be refined later based on the legal
1071 // alignment operands for the intrinsic.
1072 Alignment = MemN->getAlign().value();
1073 }
1074
1075 Align = CurDAG->getTargetConstant(Val: Alignment, DL: SDLoc(N), VT: MVT::i32);
1076 return true;
1077}
1078
1079bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1080 SDValue &Offset) {
1081 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Val: Op);
1082 ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1083 if (AM != ISD::POST_INC)
1084 return false;
1085 Offset = N;
1086 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(Val&: N)) {
1087 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1088 Offset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
1089 }
1090 return true;
1091}
1092
1093bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1094 SDValue &Offset, SDValue &Label) {
1095 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1096 Offset = N.getOperand(i: 0);
1097 SDValue N1 = N.getOperand(i: 1);
1098 Label = CurDAG->getTargetConstant(Val: N1->getAsZExtVal(), DL: SDLoc(N), VT: MVT::i32);
1099 return true;
1100 }
1101
1102 return false;
1103}
1104
1105
1106//===----------------------------------------------------------------------===//
1107// Thumb Addressing Modes
1108//===----------------------------------------------------------------------===//
1109
1110static bool shouldUseZeroOffsetLdSt(SDValue N) {
1111 // Negative numbers are difficult to materialise in thumb1. If we are
1112 // selecting the add of a negative, instead try to select ri with a zero
1113 // offset, so create the add node directly which will become a sub.
1114 if (N.getOpcode() != ISD::ADD)
1115 return false;
1116
1117 // Look for an imm which is not legal for ld/st, but is legal for sub.
1118 if (auto C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1)))
1119 return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1120
1121 return false;
1122}
1123
1124bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1125 SDValue &Offset) {
1126 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(Op: N)) {
1127 if (!isNullConstant(V: N))
1128 return false;
1129
1130 Base = Offset = N;
1131 return true;
1132 }
1133
1134 Base = N.getOperand(i: 0);
1135 Offset = N.getOperand(i: 1);
1136 return true;
1137}
1138
1139bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1140 SDValue &Offset) {
1141 if (shouldUseZeroOffsetLdSt(N))
1142 return false; // Select ri instead
1143 return SelectThumbAddrModeRRSext(N, Base, Offset);
1144}
1145
1146bool
1147ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1148 SDValue &Base, SDValue &OffImm) {
1149 if (shouldUseZeroOffsetLdSt(N)) {
1150 Base = N;
1151 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1152 return true;
1153 }
1154
1155 if (!CurDAG->isBaseWithConstantOffset(Op: N)) {
1156 if (N.getOpcode() == ISD::ADD) {
1157 return false; // We want to select register offset instead
1158 } else if (N.getOpcode() == ARMISD::Wrapper &&
1159 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalAddress &&
1160 N.getOperand(i: 0).getOpcode() != ISD::TargetExternalSymbol &&
1161 N.getOperand(i: 0).getOpcode() != ISD::TargetConstantPool &&
1162 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1163 Base = N.getOperand(i: 0);
1164 } else {
1165 Base = N;
1166 }
1167
1168 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1169 return true;
1170 }
1171
1172 // If the RHS is + imm5 * scale, fold into addr mode.
1173 int RHSC;
1174 if (isScaledConstantInRange(Node: N.getOperand(i: 1), Scale, RangeMin: 0, RangeMax: 32, ScaledConstant&: RHSC)) {
1175 Base = N.getOperand(i: 0);
1176 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32);
1177 return true;
1178 }
1179
1180 // Offset is too large, so use register offset instead.
1181 return false;
1182}
1183
1184bool
1185ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1186 SDValue &OffImm) {
1187 return SelectThumbAddrModeImm5S(N, Scale: 4, Base, OffImm);
1188}
1189
1190bool
1191ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1192 SDValue &OffImm) {
1193 return SelectThumbAddrModeImm5S(N, Scale: 2, Base, OffImm);
1194}
1195
1196bool
1197ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1198 SDValue &OffImm) {
1199 return SelectThumbAddrModeImm5S(N, Scale: 1, Base, OffImm);
1200}
1201
1202bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1203 SDValue &Base, SDValue &OffImm) {
1204 if (N.getOpcode() == ISD::FrameIndex) {
1205 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1206 // Only multiples of 4 are allowed for the offset, so the frame object
1207 // alignment must be at least 4.
1208 MachineFrameInfo &MFI = MF->getFrameInfo();
1209 if (MFI.getObjectAlign(ObjectIdx: FI) < Align(4))
1210 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align(4));
1211 Base = CurDAG->getTargetFrameIndex(
1212 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1213 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1214 return true;
1215 }
1216
1217 if (!CurDAG->isBaseWithConstantOffset(Op: N))
1218 return false;
1219
1220 if (N.getOperand(i: 0).getOpcode() == ISD::FrameIndex) {
1221 // If the RHS is + imm8 * scale, fold into addr mode.
1222 int RHSC;
1223 if (isScaledConstantInRange(Node: N.getOperand(i: 1), /*Scale=*/4, RangeMin: 0, RangeMax: 256, ScaledConstant&: RHSC)) {
1224 Base = N.getOperand(i: 0);
1225 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1226 // Make sure the offset is inside the object, or we might fail to
1227 // allocate an emergency spill slot. (An out-of-range access is UB, but
1228 // it could show up anyway.)
1229 MachineFrameInfo &MFI = MF->getFrameInfo();
1230 if (RHSC * 4 < MFI.getObjectSize(ObjectIdx: FI)) {
1231 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1232 // indexed by the LHS must be 4-byte aligned.
1233 if (!MFI.isFixedObjectIndex(ObjectIdx: FI) && MFI.getObjectAlign(ObjectIdx: FI) < Align(4))
1234 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align(4));
1235 if (MFI.getObjectAlign(ObjectIdx: FI) >= Align(4)) {
1236 Base = CurDAG->getTargetFrameIndex(
1237 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1238 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32);
1239 return true;
1240 }
1241 }
1242 }
1243 }
1244
1245 return false;
1246}
1247
1248template <unsigned Shift>
1249bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1250 SDValue &OffImm) {
1251 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(Op: N)) {
1252 int RHSC;
1253 if (isScaledConstantInRange(Node: N.getOperand(i: 1), Scale: 1 << Shift, RangeMin: -0x7f, RangeMax: 0x80,
1254 ScaledConstant&: RHSC)) {
1255 Base = N.getOperand(i: 0);
1256 if (N.getOpcode() == ISD::SUB)
1257 RHSC = -RHSC;
1258 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC * (1 << Shift), DL: SDLoc(N),
1259 VT: MVT::i32);
1260 return true;
1261 }
1262 }
1263
1264 // Base only.
1265 Base = N;
1266 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1267 return true;
1268}
1269
1270
1271//===----------------------------------------------------------------------===//
1272// Thumb 2 Addressing Modes
1273//===----------------------------------------------------------------------===//
1274
1275
1276bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1277 SDValue &Base, SDValue &OffImm) {
1278 // Match simple R + imm12 operands.
1279
1280 // Base only.
1281 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1282 !CurDAG->isBaseWithConstantOffset(Op: N)) {
1283 if (N.getOpcode() == ISD::FrameIndex) {
1284 // Match frame index.
1285 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1286 Base = CurDAG->getTargetFrameIndex(
1287 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1288 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1289 return true;
1290 }
1291
1292 if (N.getOpcode() == ARMISD::Wrapper &&
1293 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalAddress &&
1294 N.getOperand(i: 0).getOpcode() != ISD::TargetExternalSymbol &&
1295 N.getOperand(i: 0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1296 Base = N.getOperand(i: 0);
1297 if (Base.getOpcode() == ISD::TargetConstantPool)
1298 return false; // We want to select t2LDRpci instead.
1299 } else
1300 Base = N;
1301 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1302 return true;
1303 }
1304
1305 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1306 if (SelectT2AddrModeImm8(N, Base, OffImm))
1307 // Let t2LDRi8 handle (R - imm8).
1308 return false;
1309
1310 int RHSC = (int)RHS->getZExtValue();
1311 if (N.getOpcode() == ISD::SUB)
1312 RHSC = -RHSC;
1313
1314 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1315 Base = N.getOperand(i: 0);
1316 if (Base.getOpcode() == ISD::FrameIndex) {
1317 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1318 Base = CurDAG->getTargetFrameIndex(
1319 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1320 }
1321 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32);
1322 return true;
1323 }
1324 }
1325
1326 // Base only.
1327 Base = N;
1328 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1329 return true;
1330}
1331
1332template <unsigned Shift>
1333bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1334 SDValue &OffImm) {
1335 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(Op: N)) {
1336 int RHSC;
1337 if (isScaledConstantInRange(Node: N.getOperand(i: 1), Scale: 1 << Shift, RangeMin: -255, RangeMax: 256, ScaledConstant&: RHSC)) {
1338 Base = N.getOperand(i: 0);
1339 if (Base.getOpcode() == ISD::FrameIndex) {
1340 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1341 Base = CurDAG->getTargetFrameIndex(
1342 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1343 }
1344
1345 if (N.getOpcode() == ISD::SUB)
1346 RHSC = -RHSC;
1347 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC * (1 << Shift), DL: SDLoc(N),
1348 VT: MVT::i32);
1349 return true;
1350 }
1351 }
1352
1353 // Base only.
1354 Base = N;
1355 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1356 return true;
1357}
1358
1359bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1360 SDValue &Base, SDValue &OffImm) {
1361 // Match simple R - imm8 operands.
1362 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1363 !CurDAG->isBaseWithConstantOffset(Op: N))
1364 return false;
1365
1366 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1367 int RHSC = (int)RHS->getSExtValue();
1368 if (N.getOpcode() == ISD::SUB)
1369 RHSC = -RHSC;
1370
1371 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1372 Base = N.getOperand(i: 0);
1373 if (Base.getOpcode() == ISD::FrameIndex) {
1374 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1375 Base = CurDAG->getTargetFrameIndex(
1376 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1377 }
1378 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32);
1379 return true;
1380 }
1381 }
1382
1383 return false;
1384}
1385
1386bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1387 SDValue &OffImm){
1388 unsigned Opcode = Op->getOpcode();
1389 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1390 ? cast<LoadSDNode>(Val: Op)->getAddressingMode()
1391 : cast<StoreSDNode>(Val: Op)->getAddressingMode();
1392 int RHSC;
1393 if (isScaledConstantInRange(Node: N, /*Scale=*/1, RangeMin: 0, RangeMax: 0x100, ScaledConstant&: RHSC)) { // 8 bits.
1394 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1395 ? CurDAG->getSignedTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i32)
1396 : CurDAG->getSignedTargetConstant(Val: -RHSC, DL: SDLoc(N), VT: MVT::i32);
1397 return true;
1398 }
1399
1400 return false;
1401}
1402
1403template <unsigned Shift>
1404bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1405 SDValue &OffImm) {
1406 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(Op: N)) {
1407 int RHSC;
1408 if (isScaledConstantInRange(Node: N.getOperand(i: 1), Scale: 1 << Shift, RangeMin: -0x7f, RangeMax: 0x80,
1409 ScaledConstant&: RHSC)) {
1410 Base = N.getOperand(i: 0);
1411 if (Base.getOpcode() == ISD::FrameIndex) {
1412 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1413 Base = CurDAG->getTargetFrameIndex(
1414 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1415 }
1416
1417 if (N.getOpcode() == ISD::SUB)
1418 RHSC = -RHSC;
1419 OffImm = CurDAG->getSignedTargetConstant(Val: RHSC * (1 << Shift), DL: SDLoc(N),
1420 VT: MVT::i32);
1421 return true;
1422 }
1423 }
1424
1425 // Base only.
1426 Base = N;
1427 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1428 return true;
1429}
1430
1431template <unsigned Shift>
1432bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1433 SDValue &OffImm) {
1434 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1435}
1436
1437bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1438 SDValue &OffImm,
1439 unsigned Shift) {
1440 unsigned Opcode = Op->getOpcode();
1441 ISD::MemIndexedMode AM;
1442 switch (Opcode) {
1443 case ISD::LOAD:
1444 AM = cast<LoadSDNode>(Val: Op)->getAddressingMode();
1445 break;
1446 case ISD::STORE:
1447 AM = cast<StoreSDNode>(Val: Op)->getAddressingMode();
1448 break;
1449 case ISD::MLOAD:
1450 AM = cast<MaskedLoadSDNode>(Val: Op)->getAddressingMode();
1451 break;
1452 case ISD::MSTORE:
1453 AM = cast<MaskedStoreSDNode>(Val: Op)->getAddressingMode();
1454 break;
1455 default:
1456 llvm_unreachable("Unexpected Opcode for Imm7Offset");
1457 }
1458
1459 int RHSC;
1460 // 7 bit constant, shifted by Shift.
1461 if (isScaledConstantInRange(Node: N, Scale: 1 << Shift, RangeMin: 0, RangeMax: 0x80, ScaledConstant&: RHSC)) {
1462 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1463 ? CurDAG->getSignedTargetConstant(Val: RHSC * (1 << Shift),
1464 DL: SDLoc(N), VT: MVT::i32)
1465 : CurDAG->getSignedTargetConstant(Val: -RHSC * (1 << Shift),
1466 DL: SDLoc(N), VT: MVT::i32);
1467 return true;
1468 }
1469 return false;
1470}
1471
1472template <int Min, int Max>
1473bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1474 int Val;
1475 if (isScaledConstantInRange(Node: N, Scale: 1, RangeMin: Min, RangeMax: Max, ScaledConstant&: Val)) {
1476 OffImm = CurDAG->getSignedTargetConstant(Val, DL: SDLoc(N), VT: MVT::i32);
1477 return true;
1478 }
1479 return false;
1480}
1481
1482bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1483 SDValue &Base,
1484 SDValue &OffReg, SDValue &ShImm) {
1485 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1486 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(Op: N))
1487 return false;
1488
1489 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1490 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1491 int RHSC = (int)RHS->getZExtValue();
1492 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1493 return false;
1494 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1495 return false;
1496 }
1497
1498 // Look for (R + R) or (R + (R << [1,2,3])).
1499 unsigned ShAmt = 0;
1500 Base = N.getOperand(i: 0);
1501 OffReg = N.getOperand(i: 1);
1502
1503 // Swap if it is ((R << c) + R).
1504 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: OffReg.getOpcode());
1505 if (ShOpcVal != ARM_AM::lsl) {
1506 ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: Base.getOpcode());
1507 if (ShOpcVal == ARM_AM::lsl)
1508 std::swap(a&: Base, b&: OffReg);
1509 }
1510
1511 if (ShOpcVal == ARM_AM::lsl) {
1512 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1513 // it.
1514 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(Val: OffReg.getOperand(i: 1))) {
1515 ShAmt = Sh->getZExtValue();
1516 if (ShAmt < 4 && isShifterOpProfitable(Shift: OffReg, ShOpcVal, ShAmt))
1517 OffReg = OffReg.getOperand(i: 0);
1518 else {
1519 ShAmt = 0;
1520 }
1521 }
1522 }
1523
1524 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1525 // and use it in a shifted operand do so.
1526 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1527 unsigned PowerOfTwo = 0;
1528 SDValue NewMulConst;
1529 if (canExtractShiftFromMul(N: OffReg, MaxShift: 3, PowerOfTwo, NewMulConst)) {
1530 HandleSDNode Handle(OffReg);
1531 replaceDAGValue(N: OffReg.getOperand(i: 1), M: NewMulConst);
1532 OffReg = Handle.getValue();
1533 ShAmt = PowerOfTwo;
1534 }
1535 }
1536
1537 ShImm = CurDAG->getTargetConstant(Val: ShAmt, DL: SDLoc(N), VT: MVT::i32);
1538
1539 return true;
1540}
1541
1542bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1543 SDValue &OffImm) {
1544 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1545 // instructions.
1546 Base = N;
1547 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
1548
1549 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(Op: N))
1550 return true;
1551
1552 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
1553 if (!RHS)
1554 return true;
1555
1556 uint32_t RHSC = (int)RHS->getZExtValue();
1557 if (RHSC > 1020 || RHSC % 4 != 0)
1558 return true;
1559
1560 Base = N.getOperand(i: 0);
1561 if (Base.getOpcode() == ISD::FrameIndex) {
1562 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1563 Base = CurDAG->getTargetFrameIndex(
1564 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1565 }
1566
1567 OffImm = CurDAG->getTargetConstant(Val: RHSC/4, DL: SDLoc(N), VT: MVT::i32);
1568 return true;
1569}
1570
1571//===--------------------------------------------------------------------===//
1572
1573/// getAL - Returns a ARMCC::AL immediate node.
1574static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1575 return CurDAG->getTargetConstant(Val: (uint64_t)ARMCC::AL, DL: dl, VT: MVT::i32);
1576}
1577
1578void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1579 MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
1580 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Result), NewMemRefs: {MemOp});
1581}
1582
1583bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1584 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1585 ISD::MemIndexedMode AM = LD->getAddressingMode();
1586 if (AM == ISD::UNINDEXED)
1587 return false;
1588
1589 EVT LoadedVT = LD->getMemoryVT();
1590 SDValue Offset, AMOpc;
1591 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1592 unsigned Opcode = 0;
1593 bool Match = false;
1594 if (LoadedVT == MVT::i32 && isPre &&
1595 SelectAddrMode2OffsetImmPre(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1596 Opcode = ARM::LDR_PRE_IMM;
1597 Match = true;
1598 } else if (LoadedVT == MVT::i32 && !isPre &&
1599 SelectAddrMode2OffsetImm(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1600 Opcode = ARM::LDR_POST_IMM;
1601 Match = true;
1602 } else if (LoadedVT == MVT::i32 &&
1603 SelectAddrMode2OffsetReg(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1604 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1605 Match = true;
1606
1607 } else if (LoadedVT == MVT::i16 &&
1608 SelectAddrMode3Offset(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1609 Match = true;
1610 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1611 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1612 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1613 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1614 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1615 if (SelectAddrMode3Offset(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1616 Match = true;
1617 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1618 }
1619 } else {
1620 if (isPre &&
1621 SelectAddrMode2OffsetImmPre(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1622 Match = true;
1623 Opcode = ARM::LDRB_PRE_IMM;
1624 } else if (!isPre &&
1625 SelectAddrMode2OffsetImm(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1626 Match = true;
1627 Opcode = ARM::LDRB_POST_IMM;
1628 } else if (SelectAddrMode2OffsetReg(Op: N, N: LD->getOffset(), Offset, Opc&: AMOpc)) {
1629 Match = true;
1630 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1631 }
1632 }
1633 }
1634
1635 if (Match) {
1636 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1637 SDValue Chain = LD->getChain();
1638 SDValue Base = LD->getBasePtr();
1639 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, dl: SDLoc(N)),
1640 CurDAG->getRegister(Reg: 0, VT: MVT::i32), Chain };
1641 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(N), VT1: MVT::i32, VT2: MVT::i32,
1642 VT3: MVT::Other, Ops);
1643 transferMemOperands(N, Result: New);
1644 ReplaceNode(F: N, T: New);
1645 return true;
1646 } else {
1647 SDValue Chain = LD->getChain();
1648 SDValue Base = LD->getBasePtr();
1649 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, dl: SDLoc(N)),
1650 CurDAG->getRegister(Reg: 0, VT: MVT::i32), Chain };
1651 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(N), VT1: MVT::i32, VT2: MVT::i32,
1652 VT3: MVT::Other, Ops);
1653 transferMemOperands(N, Result: New);
1654 ReplaceNode(F: N, T: New);
1655 return true;
1656 }
1657 }
1658
1659 return false;
1660}
1661
1662bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1663 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1664 EVT LoadedVT = LD->getMemoryVT();
1665 ISD::MemIndexedMode AM = LD->getAddressingMode();
1666 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1667 LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1668 return false;
1669
1670 auto *COffs = dyn_cast<ConstantSDNode>(Val: LD->getOffset());
1671 if (!COffs || COffs->getZExtValue() != 4)
1672 return false;
1673
1674 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1675 // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1676 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1677 // ISel.
1678 SDValue Chain = LD->getChain();
1679 SDValue Base = LD->getBasePtr();
1680 SDValue Ops[]= { Base, getAL(CurDAG, dl: SDLoc(N)),
1681 CurDAG->getRegister(Reg: 0, VT: MVT::i32), Chain };
1682 SDNode *New = CurDAG->getMachineNode(Opcode: ARM::tLDR_postidx, dl: SDLoc(N), VT1: MVT::i32,
1683 VT2: MVT::i32, VT3: MVT::Other, Ops);
1684 transferMemOperands(N, Result: New);
1685 ReplaceNode(F: N, T: New);
1686 return true;
1687}
1688
1689bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1690 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1691 ISD::MemIndexedMode AM = LD->getAddressingMode();
1692 if (AM == ISD::UNINDEXED)
1693 return false;
1694
1695 EVT LoadedVT = LD->getMemoryVT();
1696 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1697 SDValue Offset;
1698 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1699 unsigned Opcode = 0;
1700 bool Match = false;
1701 if (SelectT2AddrModeImm8Offset(Op: N, N: LD->getOffset(), OffImm&: Offset)) {
1702 switch (LoadedVT.getSimpleVT().SimpleTy) {
1703 case MVT::i32:
1704 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1705 break;
1706 case MVT::i16:
1707 if (isSExtLd)
1708 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1709 else
1710 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1711 break;
1712 case MVT::i8:
1713 case MVT::i1:
1714 if (isSExtLd)
1715 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1716 else
1717 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1718 break;
1719 default:
1720 return false;
1721 }
1722 Match = true;
1723 }
1724
1725 if (Match) {
1726 SDValue Chain = LD->getChain();
1727 SDValue Base = LD->getBasePtr();
1728 SDValue Ops[]= { Base, Offset, getAL(CurDAG, dl: SDLoc(N)),
1729 CurDAG->getRegister(Reg: 0, VT: MVT::i32), Chain };
1730 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(N), VT1: MVT::i32, VT2: MVT::i32,
1731 VT3: MVT::Other, Ops);
1732 transferMemOperands(N, Result: New);
1733 ReplaceNode(F: N, T: New);
1734 return true;
1735 }
1736
1737 return false;
1738}
1739
1740bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1741 EVT LoadedVT;
1742 unsigned Opcode = 0;
1743 bool isSExtLd, isPre;
1744 Align Alignment;
1745 ARMVCC::VPTCodes Pred;
1746 SDValue PredReg;
1747 SDValue Chain, Base, Offset;
1748
1749 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
1750 ISD::MemIndexedMode AM = LD->getAddressingMode();
1751 if (AM == ISD::UNINDEXED)
1752 return false;
1753 LoadedVT = LD->getMemoryVT();
1754 if (!LoadedVT.isVector())
1755 return false;
1756
1757 Chain = LD->getChain();
1758 Base = LD->getBasePtr();
1759 Offset = LD->getOffset();
1760 Alignment = LD->getAlign();
1761 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1762 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1763 Pred = ARMVCC::None;
1764 PredReg = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
1765 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Val: N)) {
1766 ISD::MemIndexedMode AM = LD->getAddressingMode();
1767 if (AM == ISD::UNINDEXED)
1768 return false;
1769 LoadedVT = LD->getMemoryVT();
1770 if (!LoadedVT.isVector())
1771 return false;
1772
1773 Chain = LD->getChain();
1774 Base = LD->getBasePtr();
1775 Offset = LD->getOffset();
1776 Alignment = LD->getAlign();
1777 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1778 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1779 Pred = ARMVCC::Then;
1780 PredReg = LD->getMask();
1781 } else
1782 llvm_unreachable("Expected a Load or a Masked Load!");
1783
1784 // We allow LE non-masked loads to change the type (for example use a vldrb.8
1785 // as opposed to a vldrw.32). This can allow extra addressing modes or
1786 // alignments for what is otherwise an equivalent instruction.
1787 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(Val: N);
1788
1789 SDValue NewOffset;
1790 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1791 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 1)) {
1792 if (isSExtLd)
1793 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1794 else
1795 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1796 } else if (LoadedVT == MVT::v8i8 &&
1797 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 0)) {
1798 if (isSExtLd)
1799 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1800 else
1801 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1802 } else if (LoadedVT == MVT::v4i8 &&
1803 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 0)) {
1804 if (isSExtLd)
1805 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1806 else
1807 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1808 } else if (Alignment >= Align(4) &&
1809 (CanChangeType || LoadedVT == MVT::v4i32 ||
1810 LoadedVT == MVT::v4f32) &&
1811 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 2))
1812 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1813 else if (Alignment >= Align(2) &&
1814 (CanChangeType || LoadedVT == MVT::v8i16 ||
1815 LoadedVT == MVT::v8f16) &&
1816 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 1))
1817 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1818 else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1819 SelectT2AddrModeImm7Offset(Op: N, N: Offset, OffImm&: NewOffset, Shift: 0))
1820 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1821 else
1822 return false;
1823
1824 SDValue Ops[] = {Base,
1825 NewOffset,
1826 CurDAG->getTargetConstant(Val: Pred, DL: SDLoc(N), VT: MVT::i32),
1827 PredReg,
1828 CurDAG->getRegister(Reg: 0, VT: MVT::i32), // tp_reg
1829 Chain};
1830 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(N), VT1: MVT::i32,
1831 VT2: N->getValueType(ResNo: 0), VT3: MVT::Other, Ops);
1832 transferMemOperands(N, Result: New);
1833 ReplaceUses(F: SDValue(N, 0), T: SDValue(New, 1));
1834 ReplaceUses(F: SDValue(N, 1), T: SDValue(New, 0));
1835 ReplaceUses(F: SDValue(N, 2), T: SDValue(New, 2));
1836 CurDAG->RemoveDeadNode(N);
1837 return true;
1838}
1839
1840/// Form a GPRPair pseudo register from a pair of GPR regs.
1841SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1842 SDLoc dl(V0.getNode());
1843 SDValue RegClass =
1844 CurDAG->getTargetConstant(Val: ARM::GPRPairRegClassID, DL: dl, VT: MVT::i32);
1845 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::gsub_0, DL: dl, VT: MVT::i32);
1846 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::gsub_1, DL: dl, VT: MVT::i32);
1847 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1848 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1849}
1850
1851/// Form a D register from a pair of S registers.
1852SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1853 SDLoc dl(V0.getNode());
1854 SDValue RegClass =
1855 CurDAG->getTargetConstant(Val: ARM::DPR_VFP2RegClassID, DL: dl, VT: MVT::i32);
1856 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::ssub_0, DL: dl, VT: MVT::i32);
1857 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::ssub_1, DL: dl, VT: MVT::i32);
1858 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1859 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1860}
1861
1862/// Form a quad register from a pair of D registers.
1863SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1864 SDLoc dl(V0.getNode());
1865 SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QPRRegClassID, DL: dl,
1866 VT: MVT::i32);
1867 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::dsub_0, DL: dl, VT: MVT::i32);
1868 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::dsub_1, DL: dl, VT: MVT::i32);
1869 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1870 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1871}
1872
1873/// Form 4 consecutive D registers from a pair of Q registers.
1874SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1875 SDLoc dl(V0.getNode());
1876 SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQPRRegClassID, DL: dl,
1877 VT: MVT::i32);
1878 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::qsub_0, DL: dl, VT: MVT::i32);
1879 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::qsub_1, DL: dl, VT: MVT::i32);
1880 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1881 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1882}
1883
1884/// Form 4 consecutive S registers.
1885SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1886 SDValue V2, SDValue V3) {
1887 SDLoc dl(V0.getNode());
1888 SDValue RegClass =
1889 CurDAG->getTargetConstant(Val: ARM::QPR_VFP2RegClassID, DL: dl, VT: MVT::i32);
1890 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::ssub_0, DL: dl, VT: MVT::i32);
1891 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::ssub_1, DL: dl, VT: MVT::i32);
1892 SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::ssub_2, DL: dl, VT: MVT::i32);
1893 SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::ssub_3, DL: dl, VT: MVT::i32);
1894 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1895 V2, SubReg2, V3, SubReg3 };
1896 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1897}
1898
1899/// Form 4 consecutive D registers.
1900SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1901 SDValue V2, SDValue V3) {
1902 SDLoc dl(V0.getNode());
1903 SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQPRRegClassID, DL: dl,
1904 VT: MVT::i32);
1905 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::dsub_0, DL: dl, VT: MVT::i32);
1906 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::dsub_1, DL: dl, VT: MVT::i32);
1907 SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::dsub_2, DL: dl, VT: MVT::i32);
1908 SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::dsub_3, DL: dl, VT: MVT::i32);
1909 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1910 V2, SubReg2, V3, SubReg3 };
1911 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1912}
1913
1914/// Form 4 consecutive Q registers.
1915SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1916 SDValue V2, SDValue V3) {
1917 SDLoc dl(V0.getNode());
1918 SDValue RegClass = CurDAG->getTargetConstant(Val: ARM::QQQQPRRegClassID, DL: dl,
1919 VT: MVT::i32);
1920 SDValue SubReg0 = CurDAG->getTargetConstant(Val: ARM::qsub_0, DL: dl, VT: MVT::i32);
1921 SDValue SubReg1 = CurDAG->getTargetConstant(Val: ARM::qsub_1, DL: dl, VT: MVT::i32);
1922 SDValue SubReg2 = CurDAG->getTargetConstant(Val: ARM::qsub_2, DL: dl, VT: MVT::i32);
1923 SDValue SubReg3 = CurDAG->getTargetConstant(Val: ARM::qsub_3, DL: dl, VT: MVT::i32);
1924 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1925 V2, SubReg2, V3, SubReg3 };
1926 return CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1927}
1928
1929/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1930/// of a NEON VLD or VST instruction. The supported values depend on the
1931/// number of registers being loaded.
1932SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1933 unsigned NumVecs, bool is64BitVector) {
1934 unsigned NumRegs = NumVecs;
1935 if (!is64BitVector && NumVecs < 3)
1936 NumRegs *= 2;
1937
1938 unsigned Alignment = Align->getAsZExtVal();
1939 if (Alignment >= 32 && NumRegs == 4)
1940 Alignment = 32;
1941 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1942 Alignment = 16;
1943 else if (Alignment >= 8)
1944 Alignment = 8;
1945 else
1946 Alignment = 0;
1947
1948 return CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
1949}
1950
1951static bool isVLDfixed(unsigned Opc)
1952{
1953 switch (Opc) {
1954 default: return false;
1955 case ARM::VLD1d8wb_fixed : return true;
1956 case ARM::VLD1d16wb_fixed : return true;
1957 case ARM::VLD1d64Qwb_fixed : return true;
1958 case ARM::VLD1d32wb_fixed : return true;
1959 case ARM::VLD1d64wb_fixed : return true;
1960 case ARM::VLD1d8TPseudoWB_fixed : return true;
1961 case ARM::VLD1d16TPseudoWB_fixed : return true;
1962 case ARM::VLD1d32TPseudoWB_fixed : return true;
1963 case ARM::VLD1d64TPseudoWB_fixed : return true;
1964 case ARM::VLD1d8QPseudoWB_fixed : return true;
1965 case ARM::VLD1d16QPseudoWB_fixed : return true;
1966 case ARM::VLD1d32QPseudoWB_fixed : return true;
1967 case ARM::VLD1d64QPseudoWB_fixed : return true;
1968 case ARM::VLD1q8wb_fixed : return true;
1969 case ARM::VLD1q16wb_fixed : return true;
1970 case ARM::VLD1q32wb_fixed : return true;
1971 case ARM::VLD1q64wb_fixed : return true;
1972 case ARM::VLD1DUPd8wb_fixed : return true;
1973 case ARM::VLD1DUPd16wb_fixed : return true;
1974 case ARM::VLD1DUPd32wb_fixed : return true;
1975 case ARM::VLD1DUPq8wb_fixed : return true;
1976 case ARM::VLD1DUPq16wb_fixed : return true;
1977 case ARM::VLD1DUPq32wb_fixed : return true;
1978 case ARM::VLD2d8wb_fixed : return true;
1979 case ARM::VLD2d16wb_fixed : return true;
1980 case ARM::VLD2d32wb_fixed : return true;
1981 case ARM::VLD2q8PseudoWB_fixed : return true;
1982 case ARM::VLD2q16PseudoWB_fixed : return true;
1983 case ARM::VLD2q32PseudoWB_fixed : return true;
1984 case ARM::VLD2DUPd8wb_fixed : return true;
1985 case ARM::VLD2DUPd16wb_fixed : return true;
1986 case ARM::VLD2DUPd32wb_fixed : return true;
1987 case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
1988 case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
1989 case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
1990 }
1991}
1992
1993static bool isVSTfixed(unsigned Opc)
1994{
1995 switch (Opc) {
1996 default: return false;
1997 case ARM::VST1d8wb_fixed : return true;
1998 case ARM::VST1d16wb_fixed : return true;
1999 case ARM::VST1d32wb_fixed : return true;
2000 case ARM::VST1d64wb_fixed : return true;
2001 case ARM::VST1q8wb_fixed : return true;
2002 case ARM::VST1q16wb_fixed : return true;
2003 case ARM::VST1q32wb_fixed : return true;
2004 case ARM::VST1q64wb_fixed : return true;
2005 case ARM::VST1d8TPseudoWB_fixed : return true;
2006 case ARM::VST1d16TPseudoWB_fixed : return true;
2007 case ARM::VST1d32TPseudoWB_fixed : return true;
2008 case ARM::VST1d64TPseudoWB_fixed : return true;
2009 case ARM::VST1d8QPseudoWB_fixed : return true;
2010 case ARM::VST1d16QPseudoWB_fixed : return true;
2011 case ARM::VST1d32QPseudoWB_fixed : return true;
2012 case ARM::VST1d64QPseudoWB_fixed : return true;
2013 case ARM::VST2d8wb_fixed : return true;
2014 case ARM::VST2d16wb_fixed : return true;
2015 case ARM::VST2d32wb_fixed : return true;
2016 case ARM::VST2q8PseudoWB_fixed : return true;
2017 case ARM::VST2q16PseudoWB_fixed : return true;
2018 case ARM::VST2q32PseudoWB_fixed : return true;
2019 }
2020}
2021
2022// Get the register stride update opcode of a VLD/VST instruction that
2023// is otherwise equivalent to the given fixed stride updating instruction.
2024static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2025 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
2026 && "Incorrect fixed stride updating instruction.");
2027 switch (Opc) {
2028 default: break;
2029 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2030 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2031 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2032 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2033 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2034 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2035 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2036 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2037 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2038 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2039 case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2040 case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2041 case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2042 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2043 case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2044 case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2045 case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2046 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2047 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2048 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2049 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2050 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2051 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2052 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2053 case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2054 case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2055 case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2056
2057 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2058 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2059 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2060 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2061 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2062 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2063 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2064 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2065 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2066 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2067 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2068 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2069 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2070 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2071 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2072 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2073
2074 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2075 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2076 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2077 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2078 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2079 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2080
2081 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2082 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2083 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2084 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2085 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2086 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2087
2088 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2089 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2090 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2091 }
2092 return Opc; // If not one we handle, return it unchanged.
2093}
2094
2095/// Returns true if the given increment is a Constant known to be equal to the
2096/// access size performed by a NEON load/store. This means the "[rN]!" form can
2097/// be used.
2098static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2099 auto C = dyn_cast<ConstantSDNode>(Val&: Inc);
2100 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2101}
2102
2103void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2104 const uint16_t *DOpcodes,
2105 const uint16_t *QOpcodes0,
2106 const uint16_t *QOpcodes1) {
2107 assert(Subtarget->hasNEON());
2108 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2109 SDLoc dl(N);
2110
2111 SDValue MemAddr, Align;
2112 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2113 // nodes are not intrinsics.
2114 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2115 if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2116 return;
2117
2118 SDValue Chain = N->getOperand(Num: 0);
2119 EVT VT = N->getValueType(ResNo: 0);
2120 bool is64BitVector = VT.is64BitVector();
2121 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2122
2123 unsigned OpcodeIndex;
2124 switch (VT.getSimpleVT().SimpleTy) {
2125 default: llvm_unreachable("unhandled vld type");
2126 // Double-register operations:
2127 case MVT::v8i8: OpcodeIndex = 0; break;
2128 case MVT::v4f16:
2129 case MVT::v4bf16:
2130 case MVT::v4i16: OpcodeIndex = 1; break;
2131 case MVT::v2f32:
2132 case MVT::v2i32: OpcodeIndex = 2; break;
2133 case MVT::v1i64: OpcodeIndex = 3; break;
2134 // Quad-register operations:
2135 case MVT::v16i8: OpcodeIndex = 0; break;
2136 case MVT::v8f16:
2137 case MVT::v8bf16:
2138 case MVT::v8i16: OpcodeIndex = 1; break;
2139 case MVT::v4f32:
2140 case MVT::v4i32: OpcodeIndex = 2; break;
2141 case MVT::v2f64:
2142 case MVT::v2i64: OpcodeIndex = 3; break;
2143 }
2144
2145 EVT ResTy;
2146 if (NumVecs == 1)
2147 ResTy = VT;
2148 else {
2149 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2150 if (!is64BitVector)
2151 ResTyElts *= 2;
2152 ResTy = EVT::getVectorVT(Context&: *CurDAG->getContext(), VT: MVT::i64, NumElements: ResTyElts);
2153 }
2154 std::vector<EVT> ResTys;
2155 ResTys.push_back(x: ResTy);
2156 if (isUpdating)
2157 ResTys.push_back(x: MVT::i32);
2158 ResTys.push_back(x: MVT::Other);
2159
2160 SDValue Pred = getAL(CurDAG, dl);
2161 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
2162 SDNode *VLd;
2163 SmallVector<SDValue, 7> Ops;
2164
2165 // Double registers and VLD1/VLD2 quad registers are directly supported.
2166 if (is64BitVector || NumVecs <= 2) {
2167 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2168 QOpcodes0[OpcodeIndex]);
2169 Ops.push_back(Elt: MemAddr);
2170 Ops.push_back(Elt: Align);
2171 if (isUpdating) {
2172 SDValue Inc = N->getOperand(Num: AddrOpIdx + 1);
2173 bool IsImmUpdate = isPerfectIncrement(Inc, VecTy: VT, NumVecs);
2174 if (!IsImmUpdate) {
2175 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2176 // check for the opcode rather than the number of vector elements.
2177 if (isVLDfixed(Opc))
2178 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2179 Ops.push_back(Elt: Inc);
2180 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2181 // the operands if not such an opcode.
2182 } else if (!isVLDfixed(Opc))
2183 Ops.push_back(Elt: Reg0);
2184 }
2185 Ops.push_back(Elt: Pred);
2186 Ops.push_back(Elt: Reg0);
2187 Ops.push_back(Elt: Chain);
2188 VLd = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2189
2190 } else {
2191 // Otherwise, quad registers are loaded with two separate instructions,
2192 // where one loads the even registers and the other loads the odd registers.
2193 EVT AddrTy = MemAddr.getValueType();
2194
2195 // Load the even subregs. This is always an updating load, so that it
2196 // provides the address to the second load for the odd subregs.
2197 SDValue ImplDef =
2198 SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: ResTy), 0);
2199 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2200 SDNode *VLdA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl,
2201 VT1: ResTy, VT2: AddrTy, VT3: MVT::Other, Ops: OpsA);
2202 Chain = SDValue(VLdA, 2);
2203
2204 // Load the odd subregs.
2205 Ops.push_back(Elt: SDValue(VLdA, 1));
2206 Ops.push_back(Elt: Align);
2207 if (isUpdating) {
2208 SDValue Inc = N->getOperand(Num: AddrOpIdx + 1);
2209 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2210 "only constant post-increment update allowed for VLD3/4");
2211 (void)Inc;
2212 Ops.push_back(Elt: Reg0);
2213 }
2214 Ops.push_back(Elt: SDValue(VLdA, 0));
2215 Ops.push_back(Elt: Pred);
2216 Ops.push_back(Elt: Reg0);
2217 Ops.push_back(Elt: Chain);
2218 VLd = CurDAG->getMachineNode(Opcode: QOpcodes1[OpcodeIndex], dl, ResultTys: ResTys, Ops);
2219 }
2220
2221 // Transfer memoperands.
2222 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2223 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLd), NewMemRefs: {MemOp});
2224
2225 if (NumVecs == 1) {
2226 ReplaceNode(F: N, T: VLd);
2227 return;
2228 }
2229
2230 // Extract out the subregisters.
2231 SDValue SuperReg = SDValue(VLd, 0);
2232 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2233 ARM::qsub_3 == ARM::qsub_0 + 3,
2234 "Unexpected subreg numbering");
2235 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2236 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2237 ReplaceUses(F: SDValue(N, Vec),
2238 T: CurDAG->getTargetExtractSubreg(SRIdx: Sub0 + Vec, DL: dl, VT, Operand: SuperReg));
2239 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(VLd, 1));
2240 if (isUpdating)
2241 ReplaceUses(F: SDValue(N, NumVecs + 1), T: SDValue(VLd, 2));
2242 CurDAG->RemoveDeadNode(N);
2243}
2244
2245void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2246 const uint16_t *DOpcodes,
2247 const uint16_t *QOpcodes0,
2248 const uint16_t *QOpcodes1) {
2249 assert(Subtarget->hasNEON());
2250 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2251 SDLoc dl(N);
2252
2253 SDValue MemAddr, Align;
2254 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2255 // nodes are not intrinsics.
2256 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2257 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2258 if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2259 return;
2260
2261 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2262
2263 SDValue Chain = N->getOperand(Num: 0);
2264 EVT VT = N->getOperand(Num: Vec0Idx).getValueType();
2265 bool is64BitVector = VT.is64BitVector();
2266 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2267
2268 unsigned OpcodeIndex;
2269 switch (VT.getSimpleVT().SimpleTy) {
2270 default: llvm_unreachable("unhandled vst type");
2271 // Double-register operations:
2272 case MVT::v8i8: OpcodeIndex = 0; break;
2273 case MVT::v4f16:
2274 case MVT::v4bf16:
2275 case MVT::v4i16: OpcodeIndex = 1; break;
2276 case MVT::v2f32:
2277 case MVT::v2i32: OpcodeIndex = 2; break;
2278 case MVT::v1i64: OpcodeIndex = 3; break;
2279 // Quad-register operations:
2280 case MVT::v16i8: OpcodeIndex = 0; break;
2281 case MVT::v8f16:
2282 case MVT::v8bf16:
2283 case MVT::v8i16: OpcodeIndex = 1; break;
2284 case MVT::v4f32:
2285 case MVT::v4i32: OpcodeIndex = 2; break;
2286 case MVT::v2f64:
2287 case MVT::v2i64: OpcodeIndex = 3; break;
2288 }
2289
2290 std::vector<EVT> ResTys;
2291 if (isUpdating)
2292 ResTys.push_back(x: MVT::i32);
2293 ResTys.push_back(x: MVT::Other);
2294
2295 SDValue Pred = getAL(CurDAG, dl);
2296 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
2297 SmallVector<SDValue, 7> Ops;
2298
2299 // Double registers and VST1/VST2 quad registers are directly supported.
2300 if (is64BitVector || NumVecs <= 2) {
2301 SDValue SrcReg;
2302 if (NumVecs == 1) {
2303 SrcReg = N->getOperand(Num: Vec0Idx);
2304 } else if (is64BitVector) {
2305 // Form a REG_SEQUENCE to force register allocation.
2306 SDValue V0 = N->getOperand(Num: Vec0Idx + 0);
2307 SDValue V1 = N->getOperand(Num: Vec0Idx + 1);
2308 if (NumVecs == 2)
2309 SrcReg = SDValue(createDRegPairNode(VT: MVT::v2i64, V0, V1), 0);
2310 else {
2311 SDValue V2 = N->getOperand(Num: Vec0Idx + 2);
2312 // If it's a vst3, form a quad D-register and leave the last part as
2313 // an undef.
2314 SDValue V3 = (NumVecs == 3)
2315 ? SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2316 : N->getOperand(Num: Vec0Idx + 3);
2317 SrcReg = SDValue(createQuadDRegsNode(VT: MVT::v4i64, V0, V1, V2, V3), 0);
2318 }
2319 } else {
2320 // Form a QQ register.
2321 SDValue Q0 = N->getOperand(Num: Vec0Idx);
2322 SDValue Q1 = N->getOperand(Num: Vec0Idx + 1);
2323 SrcReg = SDValue(createQRegPairNode(VT: MVT::v4i64, V0: Q0, V1: Q1), 0);
2324 }
2325
2326 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2327 QOpcodes0[OpcodeIndex]);
2328 Ops.push_back(Elt: MemAddr);
2329 Ops.push_back(Elt: Align);
2330 if (isUpdating) {
2331 SDValue Inc = N->getOperand(Num: AddrOpIdx + 1);
2332 bool IsImmUpdate = isPerfectIncrement(Inc, VecTy: VT, NumVecs);
2333 if (!IsImmUpdate) {
2334 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2335 // check for the opcode rather than the number of vector elements.
2336 if (isVSTfixed(Opc))
2337 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2338 Ops.push_back(Elt: Inc);
2339 }
2340 // VST1/VST2 fixed increment does not need Reg0 so only include it in
2341 // the operands if not such an opcode.
2342 else if (!isVSTfixed(Opc))
2343 Ops.push_back(Elt: Reg0);
2344 }
2345 Ops.push_back(Elt: SrcReg);
2346 Ops.push_back(Elt: Pred);
2347 Ops.push_back(Elt: Reg0);
2348 Ops.push_back(Elt: Chain);
2349 SDNode *VSt = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2350
2351 // Transfer memoperands.
2352 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VSt), NewMemRefs: {MemOp});
2353
2354 ReplaceNode(F: N, T: VSt);
2355 return;
2356 }
2357
2358 // Otherwise, quad registers are stored with two separate instructions,
2359 // where one stores the even registers and the other stores the odd registers.
2360
2361 // Form the QQQQ REG_SEQUENCE.
2362 SDValue V0 = N->getOperand(Num: Vec0Idx + 0);
2363 SDValue V1 = N->getOperand(Num: Vec0Idx + 1);
2364 SDValue V2 = N->getOperand(Num: Vec0Idx + 2);
2365 SDValue V3 = (NumVecs == 3)
2366 ? SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2367 : N->getOperand(Num: Vec0Idx + 3);
2368 SDValue RegSeq = SDValue(createQuadQRegsNode(VT: MVT::v8i64, V0, V1, V2, V3), 0);
2369
2370 // Store the even D registers. This is always an updating store, so that it
2371 // provides the address to the second store for the odd subregs.
2372 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2373 SDNode *VStA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl,
2374 VT1: MemAddr.getValueType(),
2375 VT2: MVT::Other, Ops: OpsA);
2376 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VStA), NewMemRefs: {MemOp});
2377 Chain = SDValue(VStA, 1);
2378
2379 // Store the odd D registers.
2380 Ops.push_back(Elt: SDValue(VStA, 0));
2381 Ops.push_back(Elt: Align);
2382 if (isUpdating) {
2383 SDValue Inc = N->getOperand(Num: AddrOpIdx + 1);
2384 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2385 "only constant post-increment update allowed for VST3/4");
2386 (void)Inc;
2387 Ops.push_back(Elt: Reg0);
2388 }
2389 Ops.push_back(Elt: RegSeq);
2390 Ops.push_back(Elt: Pred);
2391 Ops.push_back(Elt: Reg0);
2392 Ops.push_back(Elt: Chain);
2393 SDNode *VStB = CurDAG->getMachineNode(Opcode: QOpcodes1[OpcodeIndex], dl, ResultTys: ResTys,
2394 Ops);
2395 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VStB), NewMemRefs: {MemOp});
2396 ReplaceNode(F: N, T: VStB);
2397}
2398
2399void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2400 unsigned NumVecs,
2401 const uint16_t *DOpcodes,
2402 const uint16_t *QOpcodes) {
2403 assert(Subtarget->hasNEON());
2404 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2405 SDLoc dl(N);
2406
2407 SDValue MemAddr, Align;
2408 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2409 // nodes are not intrinsics.
2410 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2411 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2412 if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2413 return;
2414
2415 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2416
2417 SDValue Chain = N->getOperand(Num: 0);
2418 unsigned Lane = N->getConstantOperandVal(Num: Vec0Idx + NumVecs);
2419 EVT VT = N->getOperand(Num: Vec0Idx).getValueType();
2420 bool is64BitVector = VT.is64BitVector();
2421
2422 unsigned Alignment = 0;
2423 if (NumVecs != 3) {
2424 Alignment = Align->getAsZExtVal();
2425 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2426 if (Alignment > NumBytes)
2427 Alignment = NumBytes;
2428 if (Alignment < 8 && Alignment < NumBytes)
2429 Alignment = 0;
2430 // Alignment must be a power of two; make sure of that.
2431 Alignment = (Alignment & -Alignment);
2432 if (Alignment == 1)
2433 Alignment = 0;
2434 }
2435 Align = CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
2436
2437 unsigned OpcodeIndex;
2438 switch (VT.getSimpleVT().SimpleTy) {
2439 default: llvm_unreachable("unhandled vld/vst lane type");
2440 // Double-register operations:
2441 case MVT::v8i8: OpcodeIndex = 0; break;
2442 case MVT::v4f16:
2443 case MVT::v4bf16:
2444 case MVT::v4i16: OpcodeIndex = 1; break;
2445 case MVT::v2f32:
2446 case MVT::v2i32: OpcodeIndex = 2; break;
2447 // Quad-register operations:
2448 case MVT::v8f16:
2449 case MVT::v8bf16:
2450 case MVT::v8i16: OpcodeIndex = 0; break;
2451 case MVT::v4f32:
2452 case MVT::v4i32: OpcodeIndex = 1; break;
2453 }
2454
2455 std::vector<EVT> ResTys;
2456 if (IsLoad) {
2457 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2458 if (!is64BitVector)
2459 ResTyElts *= 2;
2460 ResTys.push_back(x: EVT::getVectorVT(Context&: *CurDAG->getContext(),
2461 VT: MVT::i64, NumElements: ResTyElts));
2462 }
2463 if (isUpdating)
2464 ResTys.push_back(x: MVT::i32);
2465 ResTys.push_back(x: MVT::Other);
2466
2467 SDValue Pred = getAL(CurDAG, dl);
2468 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
2469
2470 SmallVector<SDValue, 8> Ops;
2471 Ops.push_back(Elt: MemAddr);
2472 Ops.push_back(Elt: Align);
2473 if (isUpdating) {
2474 SDValue Inc = N->getOperand(Num: AddrOpIdx + 1);
2475 bool IsImmUpdate =
2476 isPerfectIncrement(Inc, VecTy: VT.getVectorElementType(), NumVecs);
2477 Ops.push_back(Elt: IsImmUpdate ? Reg0 : Inc);
2478 }
2479
2480 SDValue SuperReg;
2481 SDValue V0 = N->getOperand(Num: Vec0Idx + 0);
2482 SDValue V1 = N->getOperand(Num: Vec0Idx + 1);
2483 if (NumVecs == 2) {
2484 if (is64BitVector)
2485 SuperReg = SDValue(createDRegPairNode(VT: MVT::v2i64, V0, V1), 0);
2486 else
2487 SuperReg = SDValue(createQRegPairNode(VT: MVT::v4i64, V0, V1), 0);
2488 } else {
2489 SDValue V2 = N->getOperand(Num: Vec0Idx + 2);
2490 SDValue V3 = (NumVecs == 3)
2491 ? SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2492 : N->getOperand(Num: Vec0Idx + 3);
2493 if (is64BitVector)
2494 SuperReg = SDValue(createQuadDRegsNode(VT: MVT::v4i64, V0, V1, V2, V3), 0);
2495 else
2496 SuperReg = SDValue(createQuadQRegsNode(VT: MVT::v8i64, V0, V1, V2, V3), 0);
2497 }
2498 Ops.push_back(Elt: SuperReg);
2499 Ops.push_back(Elt: getI32Imm(Imm: Lane, dl));
2500 Ops.push_back(Elt: Pred);
2501 Ops.push_back(Elt: Reg0);
2502 Ops.push_back(Elt: Chain);
2503
2504 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2505 QOpcodes[OpcodeIndex]);
2506 SDNode *VLdLn = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2507 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLdLn), NewMemRefs: {MemOp});
2508 if (!IsLoad) {
2509 ReplaceNode(F: N, T: VLdLn);
2510 return;
2511 }
2512
2513 // Extract the subregisters.
2514 SuperReg = SDValue(VLdLn, 0);
2515 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2516 ARM::qsub_3 == ARM::qsub_0 + 3,
2517 "Unexpected subreg numbering");
2518 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2519 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2520 ReplaceUses(F: SDValue(N, Vec),
2521 T: CurDAG->getTargetExtractSubreg(SRIdx: Sub0 + Vec, DL: dl, VT, Operand: SuperReg));
2522 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(VLdLn, 1));
2523 if (isUpdating)
2524 ReplaceUses(F: SDValue(N, NumVecs + 1), T: SDValue(VLdLn, 2));
2525 CurDAG->RemoveDeadNode(N);
2526}
2527
2528template <typename SDValueVector>
2529void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2530 SDValue PredicateMask) {
2531 Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::Then, DL: Loc, VT: MVT::i32));
2532 Ops.push_back(PredicateMask);
2533 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32)); // tp_reg
2534}
2535
2536template <typename SDValueVector>
2537void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2538 SDValue PredicateMask,
2539 SDValue Inactive) {
2540 Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::Then, DL: Loc, VT: MVT::i32));
2541 Ops.push_back(PredicateMask);
2542 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32)); // tp_reg
2543 Ops.push_back(Inactive);
2544}
2545
2546template <typename SDValueVector>
2547void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2548 Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::None, DL: Loc, VT: MVT::i32));
2549 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32));
2550 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32)); // tp_reg
2551}
2552
2553template <typename SDValueVector>
2554void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2555 EVT InactiveTy) {
2556 Ops.push_back(CurDAG->getTargetConstant(Val: ARMVCC::None, DL: Loc, VT: MVT::i32));
2557 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32));
2558 Ops.push_back(CurDAG->getRegister(Reg: 0, VT: MVT::i32)); // tp_reg
2559 Ops.push_back(SDValue(
2560 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: Loc, VT: InactiveTy), 0));
2561}
2562
2563void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2564 bool Predicated) {
2565 SDLoc Loc(N);
2566 SmallVector<SDValue, 8> Ops;
2567
2568 uint16_t Opcode;
2569 switch (N->getValueType(ResNo: 1).getVectorElementType().getSizeInBits()) {
2570 case 32:
2571 Opcode = Opcodes[0];
2572 break;
2573 case 64:
2574 Opcode = Opcodes[1];
2575 break;
2576 default:
2577 llvm_unreachable("bad vector element size in SelectMVE_WB");
2578 }
2579
2580 Ops.push_back(Elt: N->getOperand(Num: 2)); // vector of base addresses
2581
2582 int32_t ImmValue = N->getConstantOperandVal(Num: 3);
2583 Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate offset
2584
2585 if (Predicated)
2586 AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: 4));
2587 else
2588 AddEmptyMVEPredicateToOps(Ops, Loc);
2589
2590 Ops.push_back(Elt: N->getOperand(Num: 0)); // chain
2591
2592 SmallVector<EVT, 8> VTs;
2593 VTs.push_back(Elt: N->getValueType(ResNo: 1));
2594 VTs.push_back(Elt: N->getValueType(ResNo: 0));
2595 VTs.push_back(Elt: N->getValueType(ResNo: 2));
2596
2597 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(N), ResultTys: VTs, Ops);
2598 ReplaceUses(F: SDValue(N, 0), T: SDValue(New, 1));
2599 ReplaceUses(F: SDValue(N, 1), T: SDValue(New, 0));
2600 ReplaceUses(F: SDValue(N, 2), T: SDValue(New, 2));
2601 transferMemOperands(N, Result: New);
2602 CurDAG->RemoveDeadNode(N);
2603}
2604
2605void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2606 bool Immediate,
2607 bool HasSaturationOperand) {
2608 SDLoc Loc(N);
2609 SmallVector<SDValue, 8> Ops;
2610
2611 // Two 32-bit halves of the value to be shifted
2612 Ops.push_back(Elt: N->getOperand(Num: 1));
2613 Ops.push_back(Elt: N->getOperand(Num: 2));
2614
2615 // The shift count
2616 if (Immediate) {
2617 int32_t ImmValue = N->getConstantOperandVal(Num: 3);
2618 Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate shift count
2619 } else {
2620 Ops.push_back(Elt: N->getOperand(Num: 3));
2621 }
2622
2623 // The immediate saturation operand, if any
2624 if (HasSaturationOperand) {
2625 int32_t SatOp = N->getConstantOperandVal(Num: 4);
2626 int SatBit = (SatOp == 64 ? 0 : 1);
2627 Ops.push_back(Elt: getI32Imm(Imm: SatBit, dl: Loc));
2628 }
2629
2630 // MVE scalar shifts are IT-predicable, so include the standard
2631 // predicate arguments.
2632 Ops.push_back(Elt: getAL(CurDAG, dl: Loc));
2633 Ops.push_back(Elt: CurDAG->getRegister(Reg: 0, VT: MVT::i32));
2634
2635 CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2636}
2637
2638void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2639 uint16_t OpcodeWithNoCarry,
2640 bool Add, bool Predicated) {
2641 SDLoc Loc(N);
2642 SmallVector<SDValue, 8> Ops;
2643 uint16_t Opcode;
2644
2645 unsigned FirstInputOp = Predicated ? 2 : 1;
2646
2647 // Two input vectors and the input carry flag
2648 Ops.push_back(Elt: N->getOperand(Num: FirstInputOp));
2649 Ops.push_back(Elt: N->getOperand(Num: FirstInputOp + 1));
2650 SDValue CarryIn = N->getOperand(Num: FirstInputOp + 2);
2651 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(Val&: CarryIn);
2652 uint32_t CarryMask = 1 << 29;
2653 uint32_t CarryExpected = Add ? 0 : CarryMask;
2654 if (CarryInConstant &&
2655 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2656 Opcode = OpcodeWithNoCarry;
2657 } else {
2658 Ops.push_back(Elt: CarryIn);
2659 Opcode = OpcodeWithCarry;
2660 }
2661
2662 if (Predicated)
2663 AddMVEPredicateToOps(Ops, Loc,
2664 PredicateMask: N->getOperand(Num: FirstInputOp + 3), // predicate
2665 Inactive: N->getOperand(Num: FirstInputOp - 1)); // inactive
2666 else
2667 AddEmptyMVEPredicateToOps(Ops, Loc, InactiveTy: N->getValueType(ResNo: 0));
2668
2669 CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2670}
2671
2672void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2673 SDLoc Loc(N);
2674 SmallVector<SDValue, 8> Ops;
2675
2676 // One vector input, followed by a 32-bit word of bits to shift in
2677 // and then an immediate shift count
2678 Ops.push_back(Elt: N->getOperand(Num: 1));
2679 Ops.push_back(Elt: N->getOperand(Num: 2));
2680 int32_t ImmValue = N->getConstantOperandVal(Num: 3);
2681 Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc)); // immediate shift count
2682
2683 if (Predicated)
2684 AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: 4));
2685 else
2686 AddEmptyMVEPredicateToOps(Ops, Loc);
2687
2688 CurDAG->SelectNodeTo(N, MachineOpc: ARM::MVE_VSHLC, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2689}
2690
2691static bool SDValueToConstBool(SDValue SDVal) {
2692 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2693 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(Val&: SDVal);
2694 uint64_t Value = SDValConstant->getZExtValue();
2695 assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2696 return Value;
2697}
2698
2699void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2700 const uint16_t *OpcodesS,
2701 const uint16_t *OpcodesU,
2702 size_t Stride, size_t TySize) {
2703 assert(TySize < Stride && "Invalid TySize");
2704 bool IsUnsigned = SDValueToConstBool(SDVal: N->getOperand(Num: 1));
2705 bool IsSub = SDValueToConstBool(SDVal: N->getOperand(Num: 2));
2706 bool IsExchange = SDValueToConstBool(SDVal: N->getOperand(Num: 3));
2707 if (IsUnsigned) {
2708 assert(!IsSub &&
2709 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2710 assert(!IsExchange &&
2711 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2712 }
2713
2714 auto OpIsZero = [N](size_t OpNo) {
2715 return isNullConstant(V: N->getOperand(Num: OpNo));
2716 };
2717
2718 // If the input accumulator value is not zero, select an instruction with
2719 // accumulator, otherwise select an instruction without accumulator
2720 bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2721
2722 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2723 if (IsSub)
2724 Opcodes += 4 * Stride;
2725 if (IsExchange)
2726 Opcodes += 2 * Stride;
2727 if (IsAccum)
2728 Opcodes += Stride;
2729 uint16_t Opcode = Opcodes[TySize];
2730
2731 SDLoc Loc(N);
2732 SmallVector<SDValue, 8> Ops;
2733 // Push the accumulator operands, if they are used
2734 if (IsAccum) {
2735 Ops.push_back(Elt: N->getOperand(Num: 4));
2736 Ops.push_back(Elt: N->getOperand(Num: 5));
2737 }
2738 // Push the two vector operands
2739 Ops.push_back(Elt: N->getOperand(Num: 6));
2740 Ops.push_back(Elt: N->getOperand(Num: 7));
2741
2742 if (Predicated)
2743 AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: 8));
2744 else
2745 AddEmptyMVEPredicateToOps(Ops, Loc);
2746
2747 CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2748}
2749
2750void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2751 const uint16_t *OpcodesS,
2752 const uint16_t *OpcodesU) {
2753 EVT VecTy = N->getOperand(Num: 6).getValueType();
2754 size_t SizeIndex;
2755 switch (VecTy.getVectorElementType().getSizeInBits()) {
2756 case 16:
2757 SizeIndex = 0;
2758 break;
2759 case 32:
2760 SizeIndex = 1;
2761 break;
2762 default:
2763 llvm_unreachable("bad vector element size");
2764 }
2765
2766 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, Stride: 2, TySize: SizeIndex);
2767}
2768
2769void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2770 const uint16_t *OpcodesS,
2771 const uint16_t *OpcodesU) {
2772 assert(
2773 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2774 32 &&
2775 "bad vector element size");
2776 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, Stride: 1, TySize: 0);
2777}
2778
2779void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2780 const uint16_t *const *Opcodes,
2781 bool HasWriteback) {
2782 EVT VT = N->getValueType(ResNo: 0);
2783 SDLoc Loc(N);
2784
2785 const uint16_t *OurOpcodes;
2786 switch (VT.getVectorElementType().getSizeInBits()) {
2787 case 8:
2788 OurOpcodes = Opcodes[0];
2789 break;
2790 case 16:
2791 OurOpcodes = Opcodes[1];
2792 break;
2793 case 32:
2794 OurOpcodes = Opcodes[2];
2795 break;
2796 default:
2797 llvm_unreachable("bad vector element size in SelectMVE_VLD");
2798 }
2799
2800 EVT DataTy = EVT::getVectorVT(Context&: *CurDAG->getContext(), VT: MVT::i64, NumElements: NumVecs * 2);
2801 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2802 unsigned PtrOperand = HasWriteback ? 1 : 2;
2803
2804 auto Data = SDValue(
2805 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: Loc, VT: DataTy), 0);
2806 SDValue Chain = N->getOperand(Num: 0);
2807 // Add a MVE_VLDn instruction for each Vec, except the last
2808 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2809 SDValue Ops[] = {Data, N->getOperand(Num: PtrOperand), Chain};
2810 auto LoadInst =
2811 CurDAG->getMachineNode(Opcode: OurOpcodes[Stage], dl: Loc, ResultTys, Ops);
2812 Data = SDValue(LoadInst, 0);
2813 Chain = SDValue(LoadInst, 1);
2814 transferMemOperands(N, Result: LoadInst);
2815 }
2816 // The last may need a writeback on it
2817 if (HasWriteback)
2818 ResultTys = {DataTy, MVT::i32, MVT::Other};
2819 SDValue Ops[] = {Data, N->getOperand(Num: PtrOperand), Chain};
2820 auto LoadInst =
2821 CurDAG->getMachineNode(Opcode: OurOpcodes[NumVecs - 1], dl: Loc, ResultTys, Ops);
2822 transferMemOperands(N, Result: LoadInst);
2823
2824 unsigned i;
2825 for (i = 0; i < NumVecs; i++)
2826 ReplaceUses(F: SDValue(N, i),
2827 T: CurDAG->getTargetExtractSubreg(SRIdx: ARM::qsub_0 + i, DL: Loc, VT,
2828 Operand: SDValue(LoadInst, 0)));
2829 if (HasWriteback)
2830 ReplaceUses(F: SDValue(N, i++), T: SDValue(LoadInst, 1));
2831 ReplaceUses(F: SDValue(N, i), T: SDValue(LoadInst, HasWriteback ? 2 : 1));
2832 CurDAG->RemoveDeadNode(N);
2833}
2834
2835void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2836 bool Wrapping, bool Predicated) {
2837 EVT VT = N->getValueType(ResNo: 0);
2838 SDLoc Loc(N);
2839
2840 uint16_t Opcode;
2841 switch (VT.getScalarSizeInBits()) {
2842 case 8:
2843 Opcode = Opcodes[0];
2844 break;
2845 case 16:
2846 Opcode = Opcodes[1];
2847 break;
2848 case 32:
2849 Opcode = Opcodes[2];
2850 break;
2851 default:
2852 llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2853 }
2854
2855 SmallVector<SDValue, 8> Ops;
2856 unsigned OpIdx = 1;
2857
2858 SDValue Inactive;
2859 if (Predicated)
2860 Inactive = N->getOperand(Num: OpIdx++);
2861
2862 Ops.push_back(Elt: N->getOperand(Num: OpIdx++)); // base
2863 if (Wrapping)
2864 Ops.push_back(Elt: N->getOperand(Num: OpIdx++)); // limit
2865
2866 SDValue ImmOp = N->getOperand(Num: OpIdx++); // step
2867 int ImmValue = ImmOp->getAsZExtVal();
2868 Ops.push_back(Elt: getI32Imm(Imm: ImmValue, dl: Loc));
2869
2870 if (Predicated)
2871 AddMVEPredicateToOps(Ops, Loc, PredicateMask: N->getOperand(Num: OpIdx), Inactive);
2872 else
2873 AddEmptyMVEPredicateToOps(Ops, Loc, InactiveTy: N->getValueType(ResNo: 0));
2874
2875 CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VTs: N->getVTList(), Ops: ArrayRef(Ops));
2876}
2877
2878void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2879 size_t NumExtraOps, bool HasAccum) {
2880 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2881 SDLoc Loc(N);
2882 SmallVector<SDValue, 8> Ops;
2883
2884 unsigned OpIdx = 1;
2885
2886 // Convert and append the immediate operand designating the coprocessor.
2887 SDValue ImmCorpoc = N->getOperand(Num: OpIdx++);
2888 uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal();
2889 Ops.push_back(Elt: getI32Imm(Imm: ImmCoprocVal, dl: Loc));
2890
2891 // For accumulating variants copy the low and high order parts of the
2892 // accumulator into a register pair and add it to the operand vector.
2893 if (HasAccum) {
2894 SDValue AccLo = N->getOperand(Num: OpIdx++);
2895 SDValue AccHi = N->getOperand(Num: OpIdx++);
2896 if (IsBigEndian)
2897 std::swap(a&: AccLo, b&: AccHi);
2898 Ops.push_back(Elt: SDValue(createGPRPairNode(VT: MVT::Untyped, V0: AccLo, V1: AccHi), 0));
2899 }
2900
2901 // Copy extra operands as-is.
2902 for (size_t I = 0; I < NumExtraOps; I++)
2903 Ops.push_back(Elt: N->getOperand(Num: OpIdx++));
2904
2905 // Convert and append the immediate operand
2906 SDValue Imm = N->getOperand(Num: OpIdx);
2907 uint32_t ImmVal = Imm->getAsZExtVal();
2908 Ops.push_back(Elt: getI32Imm(Imm: ImmVal, dl: Loc));
2909
2910 // Accumulating variants are IT-predicable, add predicate operands.
2911 if (HasAccum) {
2912 SDValue Pred = getAL(CurDAG, dl: Loc);
2913 SDValue PredReg = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
2914 Ops.push_back(Elt: Pred);
2915 Ops.push_back(Elt: PredReg);
2916 }
2917
2918 // Create the CDE intruction
2919 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, dl: Loc, VT: MVT::Untyped, Ops);
2920 SDValue ResultPair = SDValue(InstrNode, 0);
2921
2922 // The original intrinsic had two outputs, and the output of the dual-register
2923 // CDE instruction is a register pair. We need to extract the two subregisters
2924 // and replace all uses of the original outputs with the extracted
2925 // subregisters.
2926 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2927 if (IsBigEndian)
2928 std::swap(a&: SubRegs[0], b&: SubRegs[1]);
2929
2930 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2931 if (SDValue(N, ResIdx).use_empty())
2932 continue;
2933 SDValue SubReg = CurDAG->getTargetExtractSubreg(SRIdx: SubRegs[ResIdx], DL: Loc,
2934 VT: MVT::i32, Operand: ResultPair);
2935 ReplaceUses(F: SDValue(N, ResIdx), T: SubReg);
2936 }
2937
2938 CurDAG->RemoveDeadNode(N);
2939}
2940
2941void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2942 bool isUpdating, unsigned NumVecs,
2943 const uint16_t *DOpcodes,
2944 const uint16_t *QOpcodes0,
2945 const uint16_t *QOpcodes1) {
2946 assert(Subtarget->hasNEON());
2947 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2948 SDLoc dl(N);
2949
2950 SDValue MemAddr, Align;
2951 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2952 if (!SelectAddrMode6(Parent: N, N: N->getOperand(Num: AddrOpIdx), Addr&: MemAddr, Align))
2953 return;
2954
2955 SDValue Chain = N->getOperand(Num: 0);
2956 EVT VT = N->getValueType(ResNo: 0);
2957 bool is64BitVector = VT.is64BitVector();
2958
2959 unsigned Alignment = 0;
2960 if (NumVecs != 3) {
2961 Alignment = Align->getAsZExtVal();
2962 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2963 if (Alignment > NumBytes)
2964 Alignment = NumBytes;
2965 if (Alignment < 8 && Alignment < NumBytes)
2966 Alignment = 0;
2967 // Alignment must be a power of two; make sure of that.
2968 Alignment = (Alignment & -Alignment);
2969 if (Alignment == 1)
2970 Alignment = 0;
2971 }
2972 Align = CurDAG->getTargetConstant(Val: Alignment, DL: dl, VT: MVT::i32);
2973
2974 unsigned OpcodeIndex;
2975 switch (VT.getSimpleVT().SimpleTy) {
2976 default: llvm_unreachable("unhandled vld-dup type");
2977 case MVT::v8i8:
2978 case MVT::v16i8: OpcodeIndex = 0; break;
2979 case MVT::v4i16:
2980 case MVT::v8i16:
2981 case MVT::v4f16:
2982 case MVT::v8f16:
2983 case MVT::v4bf16:
2984 case MVT::v8bf16:
2985 OpcodeIndex = 1; break;
2986 case MVT::v2f32:
2987 case MVT::v2i32:
2988 case MVT::v4f32:
2989 case MVT::v4i32: OpcodeIndex = 2; break;
2990 case MVT::v1f64:
2991 case MVT::v1i64: OpcodeIndex = 3; break;
2992 }
2993
2994 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2995 if (!is64BitVector)
2996 ResTyElts *= 2;
2997 EVT ResTy = EVT::getVectorVT(Context&: *CurDAG->getContext(), VT: MVT::i64, NumElements: ResTyElts);
2998
2999 std::vector<EVT> ResTys;
3000 ResTys.push_back(x: ResTy);
3001 if (isUpdating)
3002 ResTys.push_back(x: MVT::i32);
3003 ResTys.push_back(x: MVT::Other);
3004
3005 SDValue Pred = getAL(CurDAG, dl);
3006 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3007
3008 SmallVector<SDValue, 6> Ops;
3009 Ops.push_back(Elt: MemAddr);
3010 Ops.push_back(Elt: Align);
3011 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3012 : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
3013 : QOpcodes1[OpcodeIndex];
3014 if (isUpdating) {
3015 SDValue Inc = N->getOperand(Num: 2);
3016 bool IsImmUpdate =
3017 isPerfectIncrement(Inc, VecTy: VT.getVectorElementType(), NumVecs);
3018 if (IsImmUpdate) {
3019 if (!isVLDfixed(Opc))
3020 Ops.push_back(Elt: Reg0);
3021 } else {
3022 if (isVLDfixed(Opc))
3023 Opc = getVLDSTRegisterUpdateOpcode(Opc);
3024 Ops.push_back(Elt: Inc);
3025 }
3026 }
3027 if (is64BitVector || NumVecs == 1) {
3028 // Double registers and VLD1 quad registers are directly supported.
3029 } else {
3030 SDValue ImplDef = SDValue(
3031 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: ResTy), 0);
3032 const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3033 SDNode *VLdA = CurDAG->getMachineNode(Opcode: QOpcodes0[OpcodeIndex], dl, VT1: ResTy,
3034 VT2: MVT::Other, Ops: OpsA);
3035 Ops.push_back(Elt: SDValue(VLdA, 0));
3036 Chain = SDValue(VLdA, 1);
3037 }
3038
3039 Ops.push_back(Elt: Pred);
3040 Ops.push_back(Elt: Reg0);
3041 Ops.push_back(Elt: Chain);
3042
3043 SDNode *VLdDup = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
3044
3045 // Transfer memoperands.
3046 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
3047 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: VLdDup), NewMemRefs: {MemOp});
3048
3049 // Extract the subregisters.
3050 if (NumVecs == 1) {
3051 ReplaceUses(F: SDValue(N, 0), T: SDValue(VLdDup, 0));
3052 } else {
3053 SDValue SuperReg = SDValue(VLdDup, 0);
3054 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3055 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3056 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3057 ReplaceUses(F: SDValue(N, Vec),
3058 T: CurDAG->getTargetExtractSubreg(SRIdx: SubIdx+Vec, DL: dl, VT, Operand: SuperReg));
3059 }
3060 }
3061 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(VLdDup, 1));
3062 if (isUpdating)
3063 ReplaceUses(F: SDValue(N, NumVecs + 1), T: SDValue(VLdDup, 2));
3064 CurDAG->RemoveDeadNode(N);
3065}
3066
3067bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3068 if (!Subtarget->hasMVEIntegerOps())
3069 return false;
3070
3071 SDLoc dl(N);
3072
3073 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3074 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3075 // inserts of the correct type:
3076 SDValue Ins1 = SDValue(N, 0);
3077 SDValue Ins2 = N->getOperand(Num: 0);
3078 EVT VT = Ins1.getValueType();
3079 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3080 !isa<ConstantSDNode>(Val: Ins1.getOperand(i: 2)) ||
3081 !isa<ConstantSDNode>(Val: Ins2.getOperand(i: 2)) ||
3082 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3083 return false;
3084
3085 unsigned Lane1 = Ins1.getConstantOperandVal(i: 2);
3086 unsigned Lane2 = Ins2.getConstantOperandVal(i: 2);
3087 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3088 return false;
3089
3090 // If the inserted values will be able to use T/B already, leave it to the
3091 // existing tablegen patterns. For example VCVTT/VCVTB.
3092 SDValue Val1 = Ins1.getOperand(i: 1);
3093 SDValue Val2 = Ins2.getOperand(i: 1);
3094 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3095 return false;
3096
3097 // Check if the inserted values are both extracts.
3098 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3099 Val1.getOpcode() == ARMISD::VGETLANEu) &&
3100 (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3101 Val2.getOpcode() == ARMISD::VGETLANEu) &&
3102 isa<ConstantSDNode>(Val: Val1.getOperand(i: 1)) &&
3103 isa<ConstantSDNode>(Val: Val2.getOperand(i: 1)) &&
3104 (Val1.getOperand(i: 0).getValueType() == MVT::v8f16 ||
3105 Val1.getOperand(i: 0).getValueType() == MVT::v8i16) &&
3106 (Val2.getOperand(i: 0).getValueType() == MVT::v8f16 ||
3107 Val2.getOperand(i: 0).getValueType() == MVT::v8i16)) {
3108 unsigned ExtractLane1 = Val1.getConstantOperandVal(i: 1);
3109 unsigned ExtractLane2 = Val2.getConstantOperandVal(i: 1);
3110
3111 // If the two extracted lanes are from the same place and adjacent, this
3112 // simplifies into a f32 lane move.
3113 if (Val1.getOperand(i: 0) == Val2.getOperand(i: 0) && ExtractLane2 % 2 == 0 &&
3114 ExtractLane1 == ExtractLane2 + 1) {
3115 SDValue NewExt = CurDAG->getTargetExtractSubreg(
3116 SRIdx: ARM::ssub_0 + ExtractLane2 / 2, DL: dl, VT: MVT::f32, Operand: Val1.getOperand(i: 0));
3117 SDValue NewIns = CurDAG->getTargetInsertSubreg(
3118 SRIdx: ARM::ssub_0 + Lane2 / 2, DL: dl, VT, Operand: Ins2.getOperand(i: 0),
3119 Subreg: NewExt);
3120 ReplaceUses(F: Ins1, T: NewIns);
3121 return true;
3122 }
3123
3124 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3125 // extracting odd lanes.
3126 if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3127 SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3128 SRIdx: ARM::ssub_0 + ExtractLane1 / 2, DL: dl, VT: MVT::f32, Operand: Val1.getOperand(i: 0));
3129 SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3130 SRIdx: ARM::ssub_0 + ExtractLane2 / 2, DL: dl, VT: MVT::f32, Operand: Val2.getOperand(i: 0));
3131 if (ExtractLane1 % 2 != 0)
3132 Inp1 = SDValue(CurDAG->getMachineNode(Opcode: ARM::VMOVH, dl, VT: MVT::f32, Op1: Inp1), 0);
3133 if (ExtractLane2 % 2 != 0)
3134 Inp2 = SDValue(CurDAG->getMachineNode(Opcode: ARM::VMOVH, dl, VT: MVT::f32, Op1: Inp2), 0);
3135 SDNode *VINS = CurDAG->getMachineNode(Opcode: ARM::VINSH, dl, VT: MVT::f32, Op1: Inp2, Op2: Inp1);
3136 SDValue NewIns =
3137 CurDAG->getTargetInsertSubreg(SRIdx: ARM::ssub_0 + Lane2 / 2, DL: dl, VT: MVT::v4f32,
3138 Operand: Ins2.getOperand(i: 0), Subreg: SDValue(VINS, 0));
3139 ReplaceUses(F: Ins1, T: NewIns);
3140 return true;
3141 }
3142 }
3143
3144 // The inserted values are not extracted - if they are f16 then insert them
3145 // directly using a VINS.
3146 if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3147 SDNode *VINS = CurDAG->getMachineNode(Opcode: ARM::VINSH, dl, VT: MVT::f32, Op1: Val2, Op2: Val1);
3148 SDValue NewIns =
3149 CurDAG->getTargetInsertSubreg(SRIdx: ARM::ssub_0 + Lane2 / 2, DL: dl, VT: MVT::v4f32,
3150 Operand: Ins2.getOperand(i: 0), Subreg: SDValue(VINS, 0));
3151 ReplaceUses(F: Ins1, T: NewIns);
3152 return true;
3153 }
3154
3155 return false;
3156}
3157
3158bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3159 SDNode *FMul,
3160 bool IsUnsigned,
3161 bool FixedToFloat) {
3162 auto Type = N->getValueType(ResNo: 0);
3163 unsigned ScalarBits = Type.getScalarSizeInBits();
3164 if (ScalarBits > 32)
3165 return false;
3166
3167 SDNodeFlags FMulFlags = FMul->getFlags();
3168 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3169 // allowed in 16 bit unsigned floats
3170 if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
3171 return false;
3172
3173 SDValue ImmNode = FMul->getOperand(Num: 1);
3174 SDValue VecVal = FMul->getOperand(Num: 0);
3175 if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
3176 VecVal->getOpcode() == ISD::SINT_TO_FP)
3177 VecVal = VecVal->getOperand(Num: 0);
3178
3179 if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3180 return false;
3181
3182 if (ImmNode.getOpcode() == ISD::BITCAST) {
3183 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3184 return false;
3185 ImmNode = ImmNode.getOperand(i: 0);
3186 }
3187
3188 if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3189 return false;
3190
3191 APFloat ImmAPF(0.0f);
3192 switch (ImmNode.getOpcode()) {
3193 case ARMISD::VMOVIMM:
3194 case ARMISD::VDUP: {
3195 if (!isa<ConstantSDNode>(Val: ImmNode.getOperand(i: 0)))
3196 return false;
3197 unsigned Imm = ImmNode.getConstantOperandVal(i: 0);
3198 if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3199 Imm = ARM_AM::decodeVMOVModImm(ModImm: Imm, EltBits&: ScalarBits);
3200 ImmAPF =
3201 APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3202 APInt(ScalarBits, Imm));
3203 break;
3204 }
3205 case ARMISD::VMOVFPIMM: {
3206 ImmAPF = APFloat(ARM_AM::getFPImmFloat(Imm: ImmNode.getConstantOperandVal(i: 0)));
3207 break;
3208 }
3209 default:
3210 return false;
3211 }
3212
3213 // Where n is the number of fractional bits, multiplying by 2^n will convert
3214 // from float to fixed and multiplying by 2^-n will convert from fixed to
3215 // float. Taking log2 of the factor (after taking the inverse in the case of
3216 // float to fixed) will give n.
3217 APFloat ToConvert = ImmAPF;
3218 if (FixedToFloat) {
3219 if (!ImmAPF.getExactInverse(Inv: &ToConvert))
3220 return false;
3221 }
3222 APSInt Converted(64, false);
3223 bool IsExact;
3224 ToConvert.convertToInteger(Result&: Converted, RM: llvm::RoundingMode::NearestTiesToEven,
3225 IsExact: &IsExact);
3226 if (!IsExact || !Converted.isPowerOf2())
3227 return false;
3228
3229 unsigned FracBits = Converted.logBase2();
3230 if (FracBits > ScalarBits)
3231 return false;
3232
3233 SmallVector<SDValue, 3> Ops{
3234 VecVal, CurDAG->getConstant(Val: FracBits, DL: SDLoc(N), VT: MVT::i32)};
3235 AddEmptyMVEPredicateToOps(Ops, Loc: SDLoc(N), InactiveTy: Type);
3236
3237 unsigned int Opcode;
3238 switch (ScalarBits) {
3239 case 16:
3240 if (FixedToFloat)
3241 Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3242 else
3243 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3244 break;
3245 case 32:
3246 if (FixedToFloat)
3247 Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3248 else
3249 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3250 break;
3251 default:
3252 llvm_unreachable("unexpected number of scalar bits");
3253 break;
3254 }
3255
3256 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: SDLoc(N), VT: Type, Ops));
3257 return true;
3258}
3259
3260bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3261 // Transform a floating-point to fixed-point conversion to a VCVT
3262 if (!Subtarget->hasMVEFloatOps())
3263 return false;
3264 EVT Type = N->getValueType(ResNo: 0);
3265 if (!Type.isVector())
3266 return false;
3267 unsigned int ScalarBits = Type.getScalarSizeInBits();
3268
3269 bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
3270 N->getOpcode() == ISD::FP_TO_UINT_SAT;
3271 SDNode *Node = N->getOperand(Num: 0).getNode();
3272
3273 // floating-point to fixed-point with one fractional bit gets turned into an
3274 // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3275 if (Node->getOpcode() == ISD::FADD) {
3276 if (Node->getOperand(Num: 0) != Node->getOperand(Num: 1))
3277 return false;
3278 SDNodeFlags Flags = Node->getFlags();
3279 // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3280 // allowed in 16 bit unsigned floats
3281 if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
3282 return false;
3283
3284 unsigned Opcode;
3285 switch (ScalarBits) {
3286 case 16:
3287 Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3288 break;
3289 case 32:
3290 Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3291 break;
3292 }
3293 SmallVector<SDValue, 3> Ops{Node->getOperand(Num: 0),
3294 CurDAG->getConstant(Val: 1, DL: dl, VT: MVT::i32)};
3295 AddEmptyMVEPredicateToOps(Ops, Loc: dl, InactiveTy: Type);
3296
3297 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl, VT: Type, Ops));
3298 return true;
3299 }
3300
3301 if (Node->getOpcode() != ISD::FMUL)
3302 return false;
3303
3304 return transformFixedFloatingPointConversion(N, FMul: Node, IsUnsigned, FixedToFloat: false);
3305}
3306
3307bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3308 // Transform a fixed-point to floating-point conversion to a VCVT
3309 if (!Subtarget->hasMVEFloatOps())
3310 return false;
3311 auto Type = N->getValueType(ResNo: 0);
3312 if (!Type.isVector())
3313 return false;
3314
3315 auto LHS = N->getOperand(Num: 0);
3316 if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3317 return false;
3318
3319 return transformFixedFloatingPointConversion(
3320 N, FMul: N, IsUnsigned: LHS.getOpcode() == ISD::UINT_TO_FP, FixedToFloat: true);
3321}
3322
3323bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3324 if (!Subtarget->hasV6T2Ops())
3325 return false;
3326
3327 unsigned Opc = isSigned
3328 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3329 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3330 SDLoc dl(N);
3331
3332 // For unsigned extracts, check for a shift right and mask
3333 unsigned And_imm = 0;
3334 if (N->getOpcode() == ISD::AND) {
3335 if (isOpcWithIntImmediate(N, Opc: ISD::AND, Imm&: And_imm)) {
3336
3337 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3338 if (And_imm & (And_imm + 1))
3339 return false;
3340
3341 unsigned Srl_imm = 0;
3342 if (isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::SRL,
3343 Imm&: Srl_imm)) {
3344 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3345
3346 // Mask off the unnecessary bits of the AND immediate; normally
3347 // DAGCombine will do this, but that might not happen if
3348 // targetShrinkDemandedConstant chooses a different immediate.
3349 And_imm &= -1U >> Srl_imm;
3350
3351 // Note: The width operand is encoded as width-1.
3352 unsigned Width = llvm::countr_one(Value: And_imm) - 1;
3353 unsigned LSB = Srl_imm;
3354
3355 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3356
3357 if ((LSB + Width + 1) == N->getValueType(ResNo: 0).getSizeInBits()) {
3358 // It's cheaper to use a right shift to extract the top bits.
3359 if (Subtarget->isThumb()) {
3360 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3361 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0),
3362 CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3363 getAL(CurDAG, dl), Reg0, Reg0 };
3364 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3365 return true;
3366 }
3367
3368 // ARM models shift instructions as MOVsi with shifter operand.
3369 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(Opcode: ISD::SRL);
3370 SDValue ShOpc =
3371 CurDAG->getTargetConstant(Val: ARM_AM::getSORegOpc(ShOp: ShOpcVal, Imm: LSB), DL: dl,
3372 VT: MVT::i32);
3373 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0), ShOpc,
3374 getAL(CurDAG, dl), Reg0, Reg0 };
3375 CurDAG->SelectNodeTo(N, MachineOpc: ARM::MOVsi, VT: MVT::i32, Ops);
3376 return true;
3377 }
3378
3379 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3380 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0),
3381 CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3382 CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3383 getAL(CurDAG, dl), Reg0 };
3384 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3385 return true;
3386 }
3387 }
3388 return false;
3389 }
3390
3391 // Otherwise, we're looking for a shift of a shift
3392 unsigned Shl_imm = 0;
3393 if (isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::SHL, Imm&: Shl_imm)) {
3394 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3395 unsigned Srl_imm = 0;
3396 if (isInt32Immediate(N: N->getOperand(Num: 1), Imm&: Srl_imm)) {
3397 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3398 // Note: The width operand is encoded as width-1.
3399 unsigned Width = 32 - Srl_imm - 1;
3400 int LSB = Srl_imm - Shl_imm;
3401 if (LSB < 0)
3402 return false;
3403 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3404 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3405 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0),
3406 CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3407 CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3408 getAL(CurDAG, dl), Reg0 };
3409 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3410 return true;
3411 }
3412 }
3413
3414 // Or we are looking for a shift of an and, with a mask operand
3415 if (isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::AND, Imm&: And_imm) &&
3416 isShiftedMask_32(Value: And_imm)) {
3417 unsigned Srl_imm = 0;
3418 unsigned LSB = llvm::countr_zero(Val: And_imm);
3419 // Shift must be the same as the ands lsb
3420 if (isInt32Immediate(N: N->getOperand(Num: 1), Imm&: Srl_imm) && Srl_imm == LSB) {
3421 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3422 unsigned MSB = llvm::Log2_32(Value: And_imm);
3423 // Note: The width operand is encoded as width-1.
3424 unsigned Width = MSB - LSB;
3425 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3426 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3427 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0),
3428 CurDAG->getTargetConstant(Val: Srl_imm, DL: dl, VT: MVT::i32),
3429 CurDAG->getTargetConstant(Val: Width, DL: dl, VT: MVT::i32),
3430 getAL(CurDAG, dl), Reg0 };
3431 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3432 return true;
3433 }
3434 }
3435
3436 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3437 unsigned Width = cast<VTSDNode>(Val: N->getOperand(Num: 1))->getVT().getSizeInBits();
3438 unsigned LSB = 0;
3439 if (!isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::SRL, Imm&: LSB) &&
3440 !isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::SRA, Imm&: LSB))
3441 return false;
3442
3443 if (LSB + Width > 32)
3444 return false;
3445
3446 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3447 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3448 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0),
3449 CurDAG->getTargetConstant(Val: LSB, DL: dl, VT: MVT::i32),
3450 CurDAG->getTargetConstant(Val: Width - 1, DL: dl, VT: MVT::i32),
3451 getAL(CurDAG, dl), Reg0 };
3452 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3453 return true;
3454 }
3455
3456 return false;
3457}
3458
3459/// We've got special pseudo-instructions for these
3460void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3461 unsigned Opcode;
3462 EVT MemTy = cast<MemSDNode>(Val: N)->getMemoryVT();
3463 if (MemTy == MVT::i8)
3464 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3465 else if (MemTy == MVT::i16)
3466 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3467 else if (MemTy == MVT::i32)
3468 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3469 else
3470 llvm_unreachable("Unknown AtomicCmpSwap type");
3471
3472 SDValue Ops[] = {N->getOperand(Num: 1), N->getOperand(Num: 2), N->getOperand(Num: 3),
3473 N->getOperand(Num: 0)};
3474 SDNode *CmpSwap = CurDAG->getMachineNode(
3475 Opcode, dl: SDLoc(N),
3476 VTs: CurDAG->getVTList(VT1: MVT::i32, VT2: MVT::i32, VT3: MVT::Other), Ops);
3477
3478 MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
3479 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
3480
3481 ReplaceUses(F: SDValue(N, 0), T: SDValue(CmpSwap, 0));
3482 ReplaceUses(F: SDValue(N, 1), T: SDValue(CmpSwap, 2));
3483 CurDAG->RemoveDeadNode(N);
3484}
3485
3486static std::optional<std::pair<unsigned, unsigned>>
3487getContiguousRangeOfSetBits(const APInt &A) {
3488 unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1;
3489 unsigned LastOne = A.countr_zero();
3490 if (A.popcount() != (FirstOne - LastOne + 1))
3491 return std::nullopt;
3492 return std::make_pair(x&: FirstOne, y&: LastOne);
3493}
3494
3495void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3496 assert(N->getOpcode() == ARMISD::CMPZ);
3497 SwitchEQNEToPLMI = false;
3498
3499 if (!Subtarget->isThumb())
3500 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3501 // LSR don't exist as standalone instructions - they need the barrel shifter.
3502 return;
3503
3504 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3505 SDValue And = N->getOperand(Num: 0);
3506 if (!And->hasOneUse())
3507 return;
3508
3509 SDValue Zero = N->getOperand(Num: 1);
3510 if (!isNullConstant(V: Zero) || And->getOpcode() != ISD::AND)
3511 return;
3512 SDValue X = And.getOperand(i: 0);
3513 auto C = dyn_cast<ConstantSDNode>(Val: And.getOperand(i: 1));
3514
3515 if (!C)
3516 return;
3517 auto Range = getContiguousRangeOfSetBits(A: C->getAPIntValue());
3518 if (!Range)
3519 return;
3520
3521 // There are several ways to lower this:
3522 SDNode *NewN;
3523 SDLoc dl(N);
3524
3525 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3526 if (Subtarget->isThumb2()) {
3527 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3528 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32),
3529 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3530 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3531 return CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
3532 } else {
3533 SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), Src,
3534 CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32),
3535 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3536 return CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
3537 }
3538 };
3539
3540 if (Range->second == 0) {
3541 // 1. Mask includes the LSB -> Simply shift the top N bits off
3542 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3543 ReplaceNode(F: And.getNode(), T: NewN);
3544 } else if (Range->first == 31) {
3545 // 2. Mask includes the MSB -> Simply shift the bottom N bits off
3546 NewN = EmitShift(ARM::tLSRri, X, Range->second);
3547 ReplaceNode(F: And.getNode(), T: NewN);
3548 } else if (Range->first == Range->second) {
3549 // 3. Only one bit is set. We can shift this into the sign bit and use a
3550 // PL/MI comparison. This is not safe if CMPZ has multiple uses because
3551 // only one of them (the one currently being selected) will be switched
3552 // to use the new condition code.
3553 if (!N->hasOneUse())
3554 return;
3555 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3556 ReplaceNode(F: And.getNode(), T: NewN);
3557
3558 SwitchEQNEToPLMI = true;
3559 } else if (!Subtarget->hasV6T2Ops()) {
3560 // 4. Do a double shift to clear bottom and top bits, but only in
3561 // thumb-1 mode as in thumb-2 we can use UBFX.
3562 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3563 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3564 Range->second + (31 - Range->first));
3565 ReplaceNode(F: And.getNode(), T: NewN);
3566 }
3567}
3568
3569static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3],
3570 unsigned Opc128[3]) {
3571 assert((VT.is64BitVector() || VT.is128BitVector()) &&
3572 "Unexpected vector shuffle length");
3573 switch (VT.getScalarSizeInBits()) {
3574 default:
3575 llvm_unreachable("Unexpected vector shuffle element size");
3576 case 8:
3577 return VT.is64BitVector() ? Opc64[0] : Opc128[0];
3578 case 16:
3579 return VT.is64BitVector() ? Opc64[1] : Opc128[1];
3580 case 32:
3581 return VT.is64BitVector() ? Opc64[2] : Opc128[2];
3582 }
3583}
3584
3585void ARMDAGToDAGISel::Select(SDNode *N) {
3586 SDLoc dl(N);
3587
3588 if (N->isMachineOpcode()) {
3589 N->setNodeId(-1);
3590 return; // Already selected.
3591 }
3592
3593 switch (N->getOpcode()) {
3594 default: break;
3595 case ISD::STORE: {
3596 // For Thumb1, match an sp-relative store in C++. This is a little
3597 // unfortunate, but I don't think I can make the chain check work
3598 // otherwise. (The chain of the store has to be the same as the chain
3599 // of the CopyFromReg, or else we can't replace the CopyFromReg with
3600 // a direct reference to "SP".)
3601 //
3602 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3603 // a different addressing mode from other four-byte stores.
3604 //
3605 // This pattern usually comes up with call arguments.
3606 StoreSDNode *ST = cast<StoreSDNode>(Val: N);
3607 SDValue Ptr = ST->getBasePtr();
3608 if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3609 int RHSC = 0;
3610 if (Ptr.getOpcode() == ISD::ADD &&
3611 isScaledConstantInRange(Node: Ptr.getOperand(i: 1), /*Scale=*/4, RangeMin: 0, RangeMax: 256, ScaledConstant&: RHSC))
3612 Ptr = Ptr.getOperand(i: 0);
3613
3614 if (Ptr.getOpcode() == ISD::CopyFromReg &&
3615 cast<RegisterSDNode>(Val: Ptr.getOperand(i: 1))->getReg() == ARM::SP &&
3616 Ptr.getOperand(i: 0) == ST->getChain()) {
3617 SDValue Ops[] = {ST->getValue(),
3618 CurDAG->getRegister(Reg: ARM::SP, VT: MVT::i32),
3619 CurDAG->getTargetConstant(Val: RHSC, DL: dl, VT: MVT::i32),
3620 getAL(CurDAG, dl),
3621 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3622 ST->getChain()};
3623 MachineSDNode *ResNode =
3624 CurDAG->getMachineNode(Opcode: ARM::tSTRspi, dl, VT: MVT::Other, Ops);
3625 MachineMemOperand *MemOp = ST->getMemOperand();
3626 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: ResNode), NewMemRefs: {MemOp});
3627 ReplaceNode(F: N, T: ResNode);
3628 return;
3629 }
3630 }
3631 break;
3632 }
3633 case ISD::WRITE_REGISTER:
3634 if (tryWriteRegister(N))
3635 return;
3636 break;
3637 case ISD::READ_REGISTER:
3638 if (tryReadRegister(N))
3639 return;
3640 break;
3641 case ISD::INLINEASM:
3642 case ISD::INLINEASM_BR:
3643 if (tryInlineAsm(N))
3644 return;
3645 break;
3646 case ISD::Constant: {
3647 unsigned Val = N->getAsZExtVal();
3648 // If we can't materialize the constant we need to use a literal pool
3649 if (ConstantMaterializationCost(Val, Subtarget) > 2 &&
3650 !Subtarget->genExecuteOnly()) {
3651 SDValue CPIdx = CurDAG->getTargetConstantPool(
3652 C: ConstantInt::get(Ty: Type::getInt32Ty(C&: *CurDAG->getContext()), V: Val),
3653 VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
3654
3655 SDNode *ResNode;
3656 if (Subtarget->isThumb()) {
3657 SDValue Ops[] = {
3658 CPIdx,
3659 getAL(CurDAG, dl),
3660 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3661 CurDAG->getEntryNode()
3662 };
3663 ResNode = CurDAG->getMachineNode(Opcode: ARM::tLDRpci, dl, VT1: MVT::i32, VT2: MVT::Other,
3664 Ops);
3665 } else {
3666 SDValue Ops[] = {
3667 CPIdx,
3668 CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i32),
3669 getAL(CurDAG, dl),
3670 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3671 CurDAG->getEntryNode()
3672 };
3673 ResNode = CurDAG->getMachineNode(Opcode: ARM::LDRcp, dl, VT1: MVT::i32, VT2: MVT::Other,
3674 Ops);
3675 }
3676 // Annotate the Node with memory operand information so that MachineInstr
3677 // queries work properly. This e.g. gives the register allocation the
3678 // required information for rematerialization.
3679 MachineFunction& MF = CurDAG->getMachineFunction();
3680 MachineMemOperand *MemOp =
3681 MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF),
3682 F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(4));
3683
3684 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: ResNode), NewMemRefs: {MemOp});
3685
3686 ReplaceNode(F: N, T: ResNode);
3687 return;
3688 }
3689
3690 // Other cases are autogenerated.
3691 break;
3692 }
3693 case ISD::FrameIndex: {
3694 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3695 int FI = cast<FrameIndexSDNode>(Val: N)->getIndex();
3696 SDValue TFI = CurDAG->getTargetFrameIndex(
3697 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
3698 if (Subtarget->isThumb1Only()) {
3699 // Set the alignment of the frame object to 4, to avoid having to generate
3700 // more than one ADD
3701 MachineFrameInfo &MFI = MF->getFrameInfo();
3702 if (MFI.getObjectAlign(ObjectIdx: FI) < Align(4))
3703 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: Align(4));
3704 CurDAG->SelectNodeTo(N, MachineOpc: ARM::tADDframe, VT: MVT::i32, Op1: TFI,
3705 Op2: CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i32));
3706 return;
3707 } else {
3708 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3709 ARM::t2ADDri : ARM::ADDri);
3710 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i32),
3711 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3712 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3713 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::i32, Ops);
3714 return;
3715 }
3716 }
3717 case ISD::INSERT_VECTOR_ELT: {
3718 if (tryInsertVectorElt(N))
3719 return;
3720 break;
3721 }
3722 case ISD::SRL:
3723 if (tryV6T2BitfieldExtractOp(N, isSigned: false))
3724 return;
3725 break;
3726 case ISD::SIGN_EXTEND_INREG:
3727 case ISD::SRA:
3728 if (tryV6T2BitfieldExtractOp(N, isSigned: true))
3729 return;
3730 break;
3731 case ISD::FP_TO_UINT:
3732 case ISD::FP_TO_SINT:
3733 case ISD::FP_TO_UINT_SAT:
3734 case ISD::FP_TO_SINT_SAT:
3735 if (tryFP_TO_INT(N, dl))
3736 return;
3737 break;
3738 case ISD::FMUL:
3739 if (tryFMULFixed(N, dl))
3740 return;
3741 break;
3742 case ISD::MUL:
3743 if (Subtarget->isThumb1Only())
3744 break;
3745 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1))) {
3746 unsigned RHSV = C->getZExtValue();
3747 if (!RHSV) break;
3748 if (isPowerOf2_32(Value: RHSV-1)) { // 2^n+1?
3749 unsigned ShImm = Log2_32(Value: RHSV-1);
3750 if (ShImm >= 32)
3751 break;
3752 SDValue V = N->getOperand(Num: 0);
3753 ShImm = ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: ShImm);
3754 SDValue ShImmOp = CurDAG->getTargetConstant(Val: ShImm, DL: dl, VT: MVT::i32);
3755 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3756 if (Subtarget->isThumb()) {
3757 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3758 CurDAG->SelectNodeTo(N, MachineOpc: ARM::t2ADDrs, VT: MVT::i32, Ops);
3759 return;
3760 } else {
3761 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3762 Reg0 };
3763 CurDAG->SelectNodeTo(N, MachineOpc: ARM::ADDrsi, VT: MVT::i32, Ops);
3764 return;
3765 }
3766 }
3767 if (isPowerOf2_32(Value: RHSV+1)) { // 2^n-1?
3768 unsigned ShImm = Log2_32(Value: RHSV+1);
3769 if (ShImm >= 32)
3770 break;
3771 SDValue V = N->getOperand(Num: 0);
3772 ShImm = ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: ShImm);
3773 SDValue ShImmOp = CurDAG->getTargetConstant(Val: ShImm, DL: dl, VT: MVT::i32);
3774 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3775 if (Subtarget->isThumb()) {
3776 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3777 CurDAG->SelectNodeTo(N, MachineOpc: ARM::t2RSBrs, VT: MVT::i32, Ops);
3778 return;
3779 } else {
3780 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3781 Reg0 };
3782 CurDAG->SelectNodeTo(N, MachineOpc: ARM::RSBrsi, VT: MVT::i32, Ops);
3783 return;
3784 }
3785 }
3786 }
3787 break;
3788 case ISD::AND: {
3789 // Check for unsigned bitfield extract
3790 if (tryV6T2BitfieldExtractOp(N, isSigned: false))
3791 return;
3792
3793 // If an immediate is used in an AND node, it is possible that the immediate
3794 // can be more optimally materialized when negated. If this is the case we
3795 // can negate the immediate and use a BIC instead.
3796 auto *N1C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
3797 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3798 uint32_t Imm = (uint32_t) N1C->getZExtValue();
3799
3800 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3801 // immediate can be negated and fit in the immediate operand of
3802 // a t2BIC, don't do any manual transform here as this can be
3803 // handled by the generic ISel machinery.
3804 bool PreferImmediateEncoding =
3805 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3806 if (!PreferImmediateEncoding &&
3807 ConstantMaterializationCost(Val: Imm, Subtarget) >
3808 ConstantMaterializationCost(Val: ~Imm, Subtarget)) {
3809 // The current immediate costs more to materialize than a negated
3810 // immediate, so negate the immediate and use a BIC.
3811 SDValue NewImm = CurDAG->getConstant(Val: ~Imm, DL: dl, VT: MVT::i32);
3812 // If the new constant didn't exist before, reposition it in the topological
3813 // ordering so it is just before N. Otherwise, don't touch its location.
3814 if (NewImm->getNodeId() == -1)
3815 CurDAG->RepositionNode(Position: N->getIterator(), N: NewImm.getNode());
3816
3817 if (!Subtarget->hasThumb2()) {
3818 SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32),
3819 N->getOperand(Num: 0), NewImm, getAL(CurDAG, dl),
3820 CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3821 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: ARM::tBIC, dl, VT: MVT::i32, Ops));
3822 return;
3823 } else {
3824 SDValue Ops[] = {N->getOperand(Num: 0), NewImm, getAL(CurDAG, dl),
3825 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3826 CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3827 ReplaceNode(F: N,
3828 T: CurDAG->getMachineNode(Opcode: ARM::t2BICrr, dl, VT: MVT::i32, Ops));
3829 return;
3830 }
3831 }
3832 }
3833
3834 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3835 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3836 // are entirely contributed by c2 and lower 16-bits are entirely contributed
3837 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3838 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3839 EVT VT = N->getValueType(ResNo: 0);
3840 if (VT != MVT::i32)
3841 break;
3842 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3843 ? ARM::t2MOVTi16
3844 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3845 if (!Opc)
3846 break;
3847 SDValue N0 = N->getOperand(Num: 0), N1 = N->getOperand(Num: 1);
3848 N1C = dyn_cast<ConstantSDNode>(Val&: N1);
3849 if (!N1C)
3850 break;
3851 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3852 SDValue N2 = N0.getOperand(i: 1);
3853 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Val&: N2);
3854 if (!N2C)
3855 break;
3856 unsigned N1CVal = N1C->getZExtValue();
3857 unsigned N2CVal = N2C->getZExtValue();
3858 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3859 (N1CVal & 0xffffU) == 0xffffU &&
3860 (N2CVal & 0xffffU) == 0x0U) {
3861 SDValue Imm16 = CurDAG->getTargetConstant(Val: (N2CVal & 0xFFFF0000U) >> 16,
3862 DL: dl, VT: MVT::i32);
3863 SDValue Ops[] = { N0.getOperand(i: 0), Imm16,
3864 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3865 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT, Ops));
3866 return;
3867 }
3868 }
3869
3870 break;
3871 }
3872 case ARMISD::UMAAL: {
3873 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3874 SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1),
3875 N->getOperand(Num: 2), N->getOperand(Num: 3),
3876 getAL(CurDAG, dl),
3877 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3878 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
3879 return;
3880 }
3881 case ARMISD::UMLAL:{
3882 if (Subtarget->isThumb()) {
3883 SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1), N->getOperand(Num: 2),
3884 N->getOperand(Num: 3), getAL(CurDAG, dl),
3885 CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3886 ReplaceNode(
3887 F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2UMLAL, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
3888 return;
3889 }else{
3890 SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1), N->getOperand(Num: 2),
3891 N->getOperand(Num: 3), getAL(CurDAG, dl),
3892 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3893 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3894 ReplaceNode(F: N, T: CurDAG->getMachineNode(
3895 Opcode: Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3896 VT1: MVT::i32, VT2: MVT::i32, Ops));
3897 return;
3898 }
3899 }
3900 case ARMISD::SMLAL:{
3901 if (Subtarget->isThumb()) {
3902 SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1), N->getOperand(Num: 2),
3903 N->getOperand(Num: 3), getAL(CurDAG, dl),
3904 CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
3905 ReplaceNode(
3906 F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2SMLAL, dl, VT1: MVT::i32, VT2: MVT::i32, Ops));
3907 return;
3908 }else{
3909 SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1), N->getOperand(Num: 2),
3910 N->getOperand(Num: 3), getAL(CurDAG, dl),
3911 CurDAG->getRegister(Reg: 0, VT: MVT::i32),
3912 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3913 ReplaceNode(F: N, T: CurDAG->getMachineNode(
3914 Opcode: Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3915 VT1: MVT::i32, VT2: MVT::i32, Ops));
3916 return;
3917 }
3918 }
3919 case ARMISD::SUBE: {
3920 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3921 break;
3922 // Look for a pattern to match SMMLS
3923 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3924 if (N->getOperand(Num: 1).getOpcode() != ISD::SMUL_LOHI ||
3925 N->getOperand(Num: 2).getOpcode() != ARMISD::SUBC ||
3926 !SDValue(N, 1).use_empty())
3927 break;
3928
3929 if (Subtarget->isThumb())
3930 assert(Subtarget->hasThumb2() &&
3931 "This pattern should not be generated for Thumb");
3932
3933 SDValue SmulLoHi = N->getOperand(Num: 1);
3934 SDValue Subc = N->getOperand(Num: 2);
3935 SDValue Zero = Subc.getOperand(i: 0);
3936
3937 if (!isNullConstant(V: Zero) || Subc.getOperand(i: 1) != SmulLoHi.getValue(R: 0) ||
3938 N->getOperand(Num: 1) != SmulLoHi.getValue(R: 1) ||
3939 N->getOperand(Num: 2) != Subc.getValue(R: 1))
3940 break;
3941
3942 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3943 SDValue Ops[] = { SmulLoHi.getOperand(i: 0), SmulLoHi.getOperand(i: 1),
3944 N->getOperand(Num: 0), getAL(CurDAG, dl),
3945 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
3946 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops));
3947 return;
3948 }
3949 case ISD::LOAD: {
3950 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3951 return;
3952 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3953 if (tryT2IndexedLoad(N))
3954 return;
3955 } else if (Subtarget->isThumb()) {
3956 if (tryT1IndexedLoad(N))
3957 return;
3958 } else if (tryARMIndexedLoad(N))
3959 return;
3960 // Other cases are autogenerated.
3961 break;
3962 }
3963 case ISD::MLOAD:
3964 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3965 return;
3966 // Other cases are autogenerated.
3967 break;
3968 case ARMISD::LDRD: {
3969 if (Subtarget->isThumb2())
3970 break; // TableGen handles isel in this case.
3971 SDValue Base, RegOffset, ImmOffset;
3972 const SDValue &Chain = N->getOperand(Num: 0);
3973 const SDValue &Addr = N->getOperand(Num: 1);
3974 SelectAddrMode3(N: Addr, Base, Offset&: RegOffset, Opc&: ImmOffset);
3975 if (RegOffset != CurDAG->getRegister(Reg: 0, VT: MVT::i32)) {
3976 // The register-offset variant of LDRD mandates that the register
3977 // allocated to RegOffset is not reused in any of the remaining operands.
3978 // This restriction is currently not enforced. Therefore emitting this
3979 // variant is explicitly avoided.
3980 Base = Addr;
3981 RegOffset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
3982 }
3983 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
3984 SDNode *New = CurDAG->getMachineNode(Opcode: ARM::LOADDUAL, dl,
3985 ResultTys: {MVT::Untyped, MVT::Other}, Ops);
3986 SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_0, DL: dl, VT: MVT::i32,
3987 Operand: SDValue(New, 0));
3988 SDValue Hi = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_1, DL: dl, VT: MVT::i32,
3989 Operand: SDValue(New, 0));
3990 transferMemOperands(N, Result: New);
3991 ReplaceUses(F: SDValue(N, 0), T: Lo);
3992 ReplaceUses(F: SDValue(N, 1), T: Hi);
3993 ReplaceUses(F: SDValue(N, 2), T: SDValue(New, 1));
3994 CurDAG->RemoveDeadNode(N);
3995 return;
3996 }
3997 case ARMISD::STRD: {
3998 if (Subtarget->isThumb2())
3999 break; // TableGen handles isel in this case.
4000 SDValue Base, RegOffset, ImmOffset;
4001 const SDValue &Chain = N->getOperand(Num: 0);
4002 const SDValue &Addr = N->getOperand(Num: 3);
4003 SelectAddrMode3(N: Addr, Base, Offset&: RegOffset, Opc&: ImmOffset);
4004 if (RegOffset != CurDAG->getRegister(Reg: 0, VT: MVT::i32)) {
4005 // The register-offset variant of STRD mandates that the register
4006 // allocated to RegOffset is not reused in any of the remaining operands.
4007 // This restriction is currently not enforced. Therefore emitting this
4008 // variant is explicitly avoided.
4009 Base = Addr;
4010 RegOffset = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
4011 }
4012 SDNode *RegPair =
4013 createGPRPairNode(VT: MVT::Untyped, V0: N->getOperand(Num: 1), V1: N->getOperand(Num: 2));
4014 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
4015 SDNode *New = CurDAG->getMachineNode(Opcode: ARM::STOREDUAL, dl, VT: MVT::Other, Ops);
4016 transferMemOperands(N, Result: New);
4017 ReplaceUses(F: SDValue(N, 0), T: SDValue(New, 0));
4018 CurDAG->RemoveDeadNode(N);
4019 return;
4020 }
4021 case ARMISD::BRCOND: {
4022 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4023 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4024 // Pattern complexity = 6 cost = 1 size = 0
4025
4026 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4027 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4028 // Pattern complexity = 6 cost = 1 size = 0
4029
4030 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4031 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4032 // Pattern complexity = 6 cost = 1 size = 0
4033
4034 unsigned Opc = Subtarget->isThumb() ?
4035 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4036 SDValue Chain = N->getOperand(Num: 0);
4037 SDValue N1 = N->getOperand(Num: 1);
4038 SDValue N2 = N->getOperand(Num: 2);
4039 SDValue Flags = N->getOperand(Num: 3);
4040 assert(N1.getOpcode() == ISD::BasicBlock);
4041 assert(N2.getOpcode() == ISD::Constant);
4042
4043 unsigned CC = (unsigned)N2->getAsZExtVal();
4044
4045 if (Flags.getOpcode() == ARMISD::CMPZ) {
4046 if (Flags.getOperand(i: 0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4047 SDValue Int = Flags.getOperand(i: 0);
4048 uint64_t ID = Int->getConstantOperandVal(Num: 1);
4049
4050 // Handle low-overhead loops.
4051 if (ID == Intrinsic::loop_decrement_reg) {
4052 SDValue Elements = Int.getOperand(i: 2);
4053 SDValue Size = CurDAG->getTargetConstant(Val: Int.getConstantOperandVal(i: 3),
4054 DL: dl, VT: MVT::i32);
4055
4056 SDValue Args[] = { Elements, Size, Int.getOperand(i: 0) };
4057 SDNode *LoopDec =
4058 CurDAG->getMachineNode(Opcode: ARM::t2LoopDec, dl,
4059 VTs: CurDAG->getVTList(VT1: MVT::i32, VT2: MVT::Other),
4060 Ops: Args);
4061 ReplaceUses(F: Int.getNode(), T: LoopDec);
4062
4063 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
4064 SDNode *LoopEnd =
4065 CurDAG->getMachineNode(Opcode: ARM::t2LoopEnd, dl, VT: MVT::Other, Ops: EndArgs);
4066
4067 ReplaceUses(F: N, T: LoopEnd);
4068 CurDAG->RemoveDeadNode(N);
4069 CurDAG->RemoveDeadNode(N: Flags.getNode());
4070 CurDAG->RemoveDeadNode(N: Int.getNode());
4071 return;
4072 }
4073 }
4074
4075 bool SwitchEQNEToPLMI;
4076 SelectCMPZ(N: Flags.getNode(), SwitchEQNEToPLMI);
4077 Flags = N->getOperand(Num: 3);
4078
4079 if (SwitchEQNEToPLMI) {
4080 switch ((ARMCC::CondCodes)CC) {
4081 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4082 case ARMCC::NE:
4083 CC = (unsigned)ARMCC::MI;
4084 break;
4085 case ARMCC::EQ:
4086 CC = (unsigned)ARMCC::PL;
4087 break;
4088 }
4089 }
4090 }
4091
4092 SDValue Tmp2 = CurDAG->getTargetConstant(Val: CC, DL: dl, VT: MVT::i32);
4093 Chain = CurDAG->getCopyToReg(Chain, dl, Reg: ARM::CPSR, N: Flags, Glue: SDValue());
4094 SDValue Ops[] = {N1, Tmp2, CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), Chain,
4095 Chain.getValue(R: 1)};
4096 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT: MVT::Other, Ops);
4097 return;
4098 }
4099
4100 case ARMISD::CMPZ: {
4101 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4102 // This allows us to avoid materializing the expensive negative constant.
4103 // The CMPZ #0 is useless and will be peepholed away but we need to keep
4104 // it for its flags output.
4105 SDValue X = N->getOperand(Num: 0);
4106 auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1).getNode());
4107 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
4108 int64_t Addend = -C->getSExtValue();
4109
4110 SDNode *Add = nullptr;
4111 // ADDS can be better than CMN if the immediate fits in a
4112 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4113 // Outside that range we can just use a CMN which is 32-bit but has a
4114 // 12-bit immediate range.
4115 if (Addend < 1<<8) {
4116 if (Subtarget->isThumb2()) {
4117 SDValue Ops[] = { X, CurDAG->getTargetConstant(Val: Addend, DL: dl, VT: MVT::i32),
4118 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
4119 CurDAG->getRegister(Reg: 0, VT: MVT::i32) };
4120 Add = CurDAG->getMachineNode(Opcode: ARM::t2ADDri, dl, VT: MVT::i32, Ops);
4121 } else {
4122 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
4123 SDValue Ops[] = {CurDAG->getRegister(Reg: ARM::CPSR, VT: MVT::i32), X,
4124 CurDAG->getTargetConstant(Val: Addend, DL: dl, VT: MVT::i32),
4125 getAL(CurDAG, dl), CurDAG->getRegister(Reg: 0, VT: MVT::i32)};
4126 Add = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Ops);
4127 }
4128 }
4129 if (Add) {
4130 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(Val: 0, DL: dl, VT: MVT::i32)};
4131 CurDAG->MorphNodeTo(N, Opc: ARMISD::CMPZ, VTs: N->getVTList(), Ops: Ops2);
4132 }
4133 }
4134 // Other cases are autogenerated.
4135 break;
4136 }
4137
4138 case ARMISD::CMOV: {
4139 SDValue Flags = N->getOperand(Num: 3);
4140
4141 if (Flags.getOpcode() == ARMISD::CMPZ) {
4142 bool SwitchEQNEToPLMI;
4143 SelectCMPZ(N: Flags.getNode(), SwitchEQNEToPLMI);
4144
4145 if (SwitchEQNEToPLMI) {
4146 SDValue ARMcc = N->getOperand(Num: 2);
4147 ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal();
4148
4149 switch (CC) {
4150 default: llvm_unreachable("CMPZ must be either NE or EQ!");
4151 case ARMCC::NE:
4152 CC = ARMCC::MI;
4153 break;
4154 case ARMCC::EQ:
4155 CC = ARMCC::PL;
4156 break;
4157 }
4158 SDValue NewARMcc = CurDAG->getConstant(Val: (unsigned)CC, DL: dl, VT: MVT::i32);
4159 SDValue Ops[] = {N->getOperand(Num: 0), N->getOperand(Num: 1), NewARMcc,
4160 N->getOperand(Num: 3)};
4161 CurDAG->MorphNodeTo(N, Opc: ARMISD::CMOV, VTs: N->getVTList(), Ops);
4162 }
4163 }
4164 // Other cases are autogenerated.
4165 break;
4166 }
4167 case ARMISD::VZIP: {
4168 EVT VT = N->getValueType(ResNo: 0);
4169 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4170 unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4171 unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4172 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4173 SDValue Pred = getAL(CurDAG, dl);
4174 SDValue PredReg = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
4175 SDValue Ops[] = {N->getOperand(Num: 0), N->getOperand(Num: 1), Pred, PredReg};
4176 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4177 return;
4178 }
4179 case ARMISD::VUZP: {
4180 EVT VT = N->getValueType(ResNo: 0);
4181 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4182 unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4183 unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4184 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4185 SDValue Pred = getAL(CurDAG, dl);
4186 SDValue PredReg = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
4187 SDValue Ops[] = {N->getOperand(Num: 0), N->getOperand(Num: 1), Pred, PredReg};
4188 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4189 return;
4190 }
4191 case ARMISD::VTRN: {
4192 EVT VT = N->getValueType(ResNo: 0);
4193 unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4194 unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4195 unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4196 SDValue Pred = getAL(CurDAG, dl);
4197 SDValue PredReg = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
4198 SDValue Ops[] = {N->getOperand(Num: 0), N->getOperand(Num: 1), Pred, PredReg};
4199 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT1: VT, VT2: VT, Ops));
4200 return;
4201 }
4202 case ARMISD::BUILD_VECTOR: {
4203 EVT VecVT = N->getValueType(ResNo: 0);
4204 EVT EltVT = VecVT.getVectorElementType();
4205 unsigned NumElts = VecVT.getVectorNumElements();
4206 if (EltVT == MVT::f64) {
4207 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4208 ReplaceNode(
4209 F: N, T: createDRegPairNode(VT: VecVT, V0: N->getOperand(Num: 0), V1: N->getOperand(Num: 1)));
4210 return;
4211 }
4212 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4213 if (NumElts == 2) {
4214 ReplaceNode(
4215 F: N, T: createSRegPairNode(VT: VecVT, V0: N->getOperand(Num: 0), V1: N->getOperand(Num: 1)));
4216 return;
4217 }
4218 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4219 ReplaceNode(F: N,
4220 T: createQuadSRegsNode(VT: VecVT, V0: N->getOperand(Num: 0), V1: N->getOperand(Num: 1),
4221 V2: N->getOperand(Num: 2), V3: N->getOperand(Num: 3)));
4222 return;
4223 }
4224
4225 case ARMISD::VLD1DUP: {
4226 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4227 ARM::VLD1DUPd32 };
4228 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4229 ARM::VLD1DUPq32 };
4230 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: false, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes);
4231 return;
4232 }
4233
4234 case ARMISD::VLD2DUP: {
4235 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4236 ARM::VLD2DUPd32 };
4237 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: false, NumVecs: 2, DOpcodes: Opcodes);
4238 return;
4239 }
4240
4241 case ARMISD::VLD3DUP: {
4242 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4243 ARM::VLD3DUPd16Pseudo,
4244 ARM::VLD3DUPd32Pseudo };
4245 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: false, NumVecs: 3, DOpcodes: Opcodes);
4246 return;
4247 }
4248
4249 case ARMISD::VLD4DUP: {
4250 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4251 ARM::VLD4DUPd16Pseudo,
4252 ARM::VLD4DUPd32Pseudo };
4253 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: false, NumVecs: 4, DOpcodes: Opcodes);
4254 return;
4255 }
4256
4257 case ARMISD::VLD1DUP_UPD: {
4258 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4259 ARM::VLD1DUPd16wb_fixed,
4260 ARM::VLD1DUPd32wb_fixed };
4261 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4262 ARM::VLD1DUPq16wb_fixed,
4263 ARM::VLD1DUPq32wb_fixed };
4264 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: true, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes);
4265 return;
4266 }
4267
4268 case ARMISD::VLD2DUP_UPD: {
4269 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4270 ARM::VLD2DUPd16wb_fixed,
4271 ARM::VLD2DUPd32wb_fixed,
4272 ARM::VLD1q64wb_fixed };
4273 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4274 ARM::VLD2DUPq16EvenPseudo,
4275 ARM::VLD2DUPq32EvenPseudo };
4276 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4277 ARM::VLD2DUPq16OddPseudoWB_fixed,
4278 ARM::VLD2DUPq32OddPseudoWB_fixed };
4279 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes0, QOpcodes1);
4280 return;
4281 }
4282
4283 case ARMISD::VLD3DUP_UPD: {
4284 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4285 ARM::VLD3DUPd16Pseudo_UPD,
4286 ARM::VLD3DUPd32Pseudo_UPD,
4287 ARM::VLD1d64TPseudoWB_fixed };
4288 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4289 ARM::VLD3DUPq16EvenPseudo,
4290 ARM::VLD3DUPq32EvenPseudo };
4291 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4292 ARM::VLD3DUPq16OddPseudo_UPD,
4293 ARM::VLD3DUPq32OddPseudo_UPD };
4294 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4295 return;
4296 }
4297
4298 case ARMISD::VLD4DUP_UPD: {
4299 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4300 ARM::VLD4DUPd16Pseudo_UPD,
4301 ARM::VLD4DUPd32Pseudo_UPD,
4302 ARM::VLD1d64QPseudoWB_fixed };
4303 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4304 ARM::VLD4DUPq16EvenPseudo,
4305 ARM::VLD4DUPq32EvenPseudo };
4306 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4307 ARM::VLD4DUPq16OddPseudo_UPD,
4308 ARM::VLD4DUPq32OddPseudo_UPD };
4309 SelectVLDDup(N, /* IsIntrinsic= */ false, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4310 return;
4311 }
4312
4313 case ARMISD::VLD1_UPD: {
4314 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4315 ARM::VLD1d16wb_fixed,
4316 ARM::VLD1d32wb_fixed,
4317 ARM::VLD1d64wb_fixed };
4318 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4319 ARM::VLD1q16wb_fixed,
4320 ARM::VLD1q32wb_fixed,
4321 ARM::VLD1q64wb_fixed };
4322 SelectVLD(N, isUpdating: true, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4323 return;
4324 }
4325
4326 case ARMISD::VLD2_UPD: {
4327 if (Subtarget->hasNEON()) {
4328 static const uint16_t DOpcodes[] = {
4329 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4330 ARM::VLD1q64wb_fixed};
4331 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4332 ARM::VLD2q16PseudoWB_fixed,
4333 ARM::VLD2q32PseudoWB_fixed};
4334 SelectVLD(N, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4335 } else {
4336 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4337 ARM::MVE_VLD21_8_wb};
4338 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4339 ARM::MVE_VLD21_16_wb};
4340 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4341 ARM::MVE_VLD21_32_wb};
4342 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4343 SelectMVE_VLD(N, NumVecs: 2, Opcodes, HasWriteback: true);
4344 }
4345 return;
4346 }
4347
4348 case ARMISD::VLD3_UPD: {
4349 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4350 ARM::VLD3d16Pseudo_UPD,
4351 ARM::VLD3d32Pseudo_UPD,
4352 ARM::VLD1d64TPseudoWB_fixed};
4353 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4354 ARM::VLD3q16Pseudo_UPD,
4355 ARM::VLD3q32Pseudo_UPD };
4356 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4357 ARM::VLD3q16oddPseudo_UPD,
4358 ARM::VLD3q32oddPseudo_UPD };
4359 SelectVLD(N, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4360 return;
4361 }
4362
4363 case ARMISD::VLD4_UPD: {
4364 if (Subtarget->hasNEON()) {
4365 static const uint16_t DOpcodes[] = {
4366 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4367 ARM::VLD1d64QPseudoWB_fixed};
4368 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4369 ARM::VLD4q16Pseudo_UPD,
4370 ARM::VLD4q32Pseudo_UPD};
4371 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4372 ARM::VLD4q16oddPseudo_UPD,
4373 ARM::VLD4q32oddPseudo_UPD};
4374 SelectVLD(N, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4375 } else {
4376 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4377 ARM::MVE_VLD42_8,
4378 ARM::MVE_VLD43_8_wb};
4379 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4380 ARM::MVE_VLD42_16,
4381 ARM::MVE_VLD43_16_wb};
4382 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4383 ARM::MVE_VLD42_32,
4384 ARM::MVE_VLD43_32_wb};
4385 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4386 SelectMVE_VLD(N, NumVecs: 4, Opcodes, HasWriteback: true);
4387 }
4388 return;
4389 }
4390
4391 case ARMISD::VLD1x2_UPD: {
4392 if (Subtarget->hasNEON()) {
4393 static const uint16_t DOpcodes[] = {
4394 ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4395 ARM::VLD1q64wb_fixed};
4396 static const uint16_t QOpcodes[] = {
4397 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4398 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4399 SelectVLD(N, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4400 return;
4401 }
4402 break;
4403 }
4404
4405 case ARMISD::VLD1x3_UPD: {
4406 if (Subtarget->hasNEON()) {
4407 static const uint16_t DOpcodes[] = {
4408 ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4409 ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4410 static const uint16_t QOpcodes0[] = {
4411 ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4412 ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4413 static const uint16_t QOpcodes1[] = {
4414 ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4415 ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4416 SelectVLD(N, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4417 return;
4418 }
4419 break;
4420 }
4421
4422 case ARMISD::VLD1x4_UPD: {
4423 if (Subtarget->hasNEON()) {
4424 static const uint16_t DOpcodes[] = {
4425 ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4426 ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4427 static const uint16_t QOpcodes0[] = {
4428 ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4429 ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4430 static const uint16_t QOpcodes1[] = {
4431 ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4432 ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4433 SelectVLD(N, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4434 return;
4435 }
4436 break;
4437 }
4438
4439 case ARMISD::VLD2LN_UPD: {
4440 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4441 ARM::VLD2LNd16Pseudo_UPD,
4442 ARM::VLD2LNd32Pseudo_UPD };
4443 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4444 ARM::VLD2LNq32Pseudo_UPD };
4445 SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes);
4446 return;
4447 }
4448
4449 case ARMISD::VLD3LN_UPD: {
4450 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4451 ARM::VLD3LNd16Pseudo_UPD,
4452 ARM::VLD3LNd32Pseudo_UPD };
4453 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4454 ARM::VLD3LNq32Pseudo_UPD };
4455 SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes);
4456 return;
4457 }
4458
4459 case ARMISD::VLD4LN_UPD: {
4460 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4461 ARM::VLD4LNd16Pseudo_UPD,
4462 ARM::VLD4LNd32Pseudo_UPD };
4463 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4464 ARM::VLD4LNq32Pseudo_UPD };
4465 SelectVLDSTLane(N, IsLoad: true, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes);
4466 return;
4467 }
4468
4469 case ARMISD::VST1_UPD: {
4470 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4471 ARM::VST1d16wb_fixed,
4472 ARM::VST1d32wb_fixed,
4473 ARM::VST1d64wb_fixed };
4474 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4475 ARM::VST1q16wb_fixed,
4476 ARM::VST1q32wb_fixed,
4477 ARM::VST1q64wb_fixed };
4478 SelectVST(N, isUpdating: true, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4479 return;
4480 }
4481
4482 case ARMISD::VST2_UPD: {
4483 if (Subtarget->hasNEON()) {
4484 static const uint16_t DOpcodes[] = {
4485 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4486 ARM::VST1q64wb_fixed};
4487 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4488 ARM::VST2q16PseudoWB_fixed,
4489 ARM::VST2q32PseudoWB_fixed};
4490 SelectVST(N, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4491 return;
4492 }
4493 break;
4494 }
4495
4496 case ARMISD::VST3_UPD: {
4497 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4498 ARM::VST3d16Pseudo_UPD,
4499 ARM::VST3d32Pseudo_UPD,
4500 ARM::VST1d64TPseudoWB_fixed};
4501 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4502 ARM::VST3q16Pseudo_UPD,
4503 ARM::VST3q32Pseudo_UPD };
4504 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4505 ARM::VST3q16oddPseudo_UPD,
4506 ARM::VST3q32oddPseudo_UPD };
4507 SelectVST(N, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4508 return;
4509 }
4510
4511 case ARMISD::VST4_UPD: {
4512 if (Subtarget->hasNEON()) {
4513 static const uint16_t DOpcodes[] = {
4514 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4515 ARM::VST1d64QPseudoWB_fixed};
4516 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4517 ARM::VST4q16Pseudo_UPD,
4518 ARM::VST4q32Pseudo_UPD};
4519 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4520 ARM::VST4q16oddPseudo_UPD,
4521 ARM::VST4q32oddPseudo_UPD};
4522 SelectVST(N, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4523 return;
4524 }
4525 break;
4526 }
4527
4528 case ARMISD::VST1x2_UPD: {
4529 if (Subtarget->hasNEON()) {
4530 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4531 ARM::VST1q16wb_fixed,
4532 ARM::VST1q32wb_fixed,
4533 ARM::VST1q64wb_fixed};
4534 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4535 ARM::VST1d16QPseudoWB_fixed,
4536 ARM::VST1d32QPseudoWB_fixed,
4537 ARM::VST1d64QPseudoWB_fixed };
4538 SelectVST(N, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4539 return;
4540 }
4541 break;
4542 }
4543
4544 case ARMISD::VST1x3_UPD: {
4545 if (Subtarget->hasNEON()) {
4546 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4547 ARM::VST1d16TPseudoWB_fixed,
4548 ARM::VST1d32TPseudoWB_fixed,
4549 ARM::VST1d64TPseudoWB_fixed };
4550 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4551 ARM::VST1q16LowTPseudo_UPD,
4552 ARM::VST1q32LowTPseudo_UPD,
4553 ARM::VST1q64LowTPseudo_UPD };
4554 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4555 ARM::VST1q16HighTPseudo_UPD,
4556 ARM::VST1q32HighTPseudo_UPD,
4557 ARM::VST1q64HighTPseudo_UPD };
4558 SelectVST(N, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4559 return;
4560 }
4561 break;
4562 }
4563
4564 case ARMISD::VST1x4_UPD: {
4565 if (Subtarget->hasNEON()) {
4566 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4567 ARM::VST1d16QPseudoWB_fixed,
4568 ARM::VST1d32QPseudoWB_fixed,
4569 ARM::VST1d64QPseudoWB_fixed };
4570 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4571 ARM::VST1q16LowQPseudo_UPD,
4572 ARM::VST1q32LowQPseudo_UPD,
4573 ARM::VST1q64LowQPseudo_UPD };
4574 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4575 ARM::VST1q16HighQPseudo_UPD,
4576 ARM::VST1q32HighQPseudo_UPD,
4577 ARM::VST1q64HighQPseudo_UPD };
4578 SelectVST(N, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4579 return;
4580 }
4581 break;
4582 }
4583 case ARMISD::VST2LN_UPD: {
4584 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4585 ARM::VST2LNd16Pseudo_UPD,
4586 ARM::VST2LNd32Pseudo_UPD };
4587 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4588 ARM::VST2LNq32Pseudo_UPD };
4589 SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: 2, DOpcodes, QOpcodes);
4590 return;
4591 }
4592
4593 case ARMISD::VST3LN_UPD: {
4594 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4595 ARM::VST3LNd16Pseudo_UPD,
4596 ARM::VST3LNd32Pseudo_UPD };
4597 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4598 ARM::VST3LNq32Pseudo_UPD };
4599 SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: 3, DOpcodes, QOpcodes);
4600 return;
4601 }
4602
4603 case ARMISD::VST4LN_UPD: {
4604 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4605 ARM::VST4LNd16Pseudo_UPD,
4606 ARM::VST4LNd32Pseudo_UPD };
4607 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4608 ARM::VST4LNq32Pseudo_UPD };
4609 SelectVLDSTLane(N, IsLoad: false, isUpdating: true, NumVecs: 4, DOpcodes, QOpcodes);
4610 return;
4611 }
4612
4613 case ISD::INTRINSIC_VOID:
4614 case ISD::INTRINSIC_W_CHAIN: {
4615 unsigned IntNo = N->getConstantOperandVal(Num: 1);
4616 switch (IntNo) {
4617 default:
4618 break;
4619
4620 case Intrinsic::arm_mrrc:
4621 case Intrinsic::arm_mrrc2: {
4622 SDLoc dl(N);
4623 SDValue Chain = N->getOperand(Num: 0);
4624 unsigned Opc;
4625
4626 if (Subtarget->isThumb())
4627 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4628 else
4629 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4630
4631 SmallVector<SDValue, 5> Ops;
4632 Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: 2), dl)); /* coproc */
4633 Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: 3), dl)); /* opc */
4634 Ops.push_back(Elt: getI32Imm(Imm: N->getConstantOperandVal(Num: 4), dl)); /* CRm */
4635
4636 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4637 // instruction will always be '1111' but it is possible in assembly language to specify
4638 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4639 if (Opc != ARM::MRRC2) {
4640 Ops.push_back(Elt: getAL(CurDAG, dl));
4641 Ops.push_back(Elt: CurDAG->getRegister(Reg: 0, VT: MVT::i32));
4642 }
4643
4644 Ops.push_back(Elt: Chain);
4645
4646 // Writes to two registers.
4647 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4648
4649 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: RetType, Ops));
4650 return;
4651 }
4652 case Intrinsic::arm_ldaexd:
4653 case Intrinsic::arm_ldrexd: {
4654 SDLoc dl(N);
4655 SDValue Chain = N->getOperand(Num: 0);
4656 SDValue MemAddr = N->getOperand(Num: 2);
4657 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4658
4659 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4660 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4661 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4662
4663 // arm_ldrexd returns a i64 value in {i32, i32}
4664 std::vector<EVT> ResTys;
4665 if (isThumb) {
4666 ResTys.push_back(x: MVT::i32);
4667 ResTys.push_back(x: MVT::i32);
4668 } else
4669 ResTys.push_back(x: MVT::Untyped);
4670 ResTys.push_back(x: MVT::Other);
4671
4672 // Place arguments in the right order.
4673 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4674 CurDAG->getRegister(Reg: 0, VT: MVT::i32), Chain};
4675 SDNode *Ld = CurDAG->getMachineNode(Opcode: NewOpc, dl, ResultTys: ResTys, Ops);
4676 // Transfer memoperands.
4677 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
4678 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
4679
4680 // Remap uses.
4681 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4682 if (!SDValue(N, 0).use_empty()) {
4683 SDValue Result;
4684 if (isThumb)
4685 Result = SDValue(Ld, 0);
4686 else {
4687 SDValue SubRegIdx =
4688 CurDAG->getTargetConstant(Val: ARM::gsub_0, DL: dl, VT: MVT::i32);
4689 SDNode *ResNode = CurDAG->getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG,
4690 dl, VT: MVT::i32, Op1: SDValue(Ld, 0), Op2: SubRegIdx);
4691 Result = SDValue(ResNode,0);
4692 }
4693 ReplaceUses(F: SDValue(N, 0), T: Result);
4694 }
4695 if (!SDValue(N, 1).use_empty()) {
4696 SDValue Result;
4697 if (isThumb)
4698 Result = SDValue(Ld, 1);
4699 else {
4700 SDValue SubRegIdx =
4701 CurDAG->getTargetConstant(Val: ARM::gsub_1, DL: dl, VT: MVT::i32);
4702 SDNode *ResNode = CurDAG->getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG,
4703 dl, VT: MVT::i32, Op1: SDValue(Ld, 0), Op2: SubRegIdx);
4704 Result = SDValue(ResNode,0);
4705 }
4706 ReplaceUses(F: SDValue(N, 1), T: Result);
4707 }
4708 ReplaceUses(F: SDValue(N, 2), T: OutChain);
4709 CurDAG->RemoveDeadNode(N);
4710 return;
4711 }
4712 case Intrinsic::arm_stlexd:
4713 case Intrinsic::arm_strexd: {
4714 SDLoc dl(N);
4715 SDValue Chain = N->getOperand(Num: 0);
4716 SDValue Val0 = N->getOperand(Num: 2);
4717 SDValue Val1 = N->getOperand(Num: 3);
4718 SDValue MemAddr = N->getOperand(Num: 4);
4719
4720 // Store exclusive double return a i32 value which is the return status
4721 // of the issued store.
4722 const EVT ResTys[] = {MVT::i32, MVT::Other};
4723
4724 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4725 // Place arguments in the right order.
4726 SmallVector<SDValue, 7> Ops;
4727 if (isThumb) {
4728 Ops.push_back(Elt: Val0);
4729 Ops.push_back(Elt: Val1);
4730 } else
4731 // arm_strexd uses GPRPair.
4732 Ops.push_back(Elt: SDValue(createGPRPairNode(VT: MVT::Untyped, V0: Val0, V1: Val1), 0));
4733 Ops.push_back(Elt: MemAddr);
4734 Ops.push_back(Elt: getAL(CurDAG, dl));
4735 Ops.push_back(Elt: CurDAG->getRegister(Reg: 0, VT: MVT::i32));
4736 Ops.push_back(Elt: Chain);
4737
4738 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4739 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4740 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4741
4742 SDNode *St = CurDAG->getMachineNode(Opcode: NewOpc, dl, ResultTys: ResTys, Ops);
4743 // Transfer memoperands.
4744 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
4745 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
4746
4747 ReplaceNode(F: N, T: St);
4748 return;
4749 }
4750
4751 case Intrinsic::arm_neon_vld1: {
4752 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4753 ARM::VLD1d32, ARM::VLD1d64 };
4754 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4755 ARM::VLD1q32, ARM::VLD1q64};
4756 SelectVLD(N, isUpdating: false, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4757 return;
4758 }
4759
4760 case Intrinsic::arm_neon_vld1x2: {
4761 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4762 ARM::VLD1q32, ARM::VLD1q64 };
4763 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4764 ARM::VLD1d16QPseudo,
4765 ARM::VLD1d32QPseudo,
4766 ARM::VLD1d64QPseudo };
4767 SelectVLD(N, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4768 return;
4769 }
4770
4771 case Intrinsic::arm_neon_vld1x3: {
4772 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4773 ARM::VLD1d16TPseudo,
4774 ARM::VLD1d32TPseudo,
4775 ARM::VLD1d64TPseudo };
4776 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4777 ARM::VLD1q16LowTPseudo_UPD,
4778 ARM::VLD1q32LowTPseudo_UPD,
4779 ARM::VLD1q64LowTPseudo_UPD };
4780 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4781 ARM::VLD1q16HighTPseudo,
4782 ARM::VLD1q32HighTPseudo,
4783 ARM::VLD1q64HighTPseudo };
4784 SelectVLD(N, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4785 return;
4786 }
4787
4788 case Intrinsic::arm_neon_vld1x4: {
4789 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4790 ARM::VLD1d16QPseudo,
4791 ARM::VLD1d32QPseudo,
4792 ARM::VLD1d64QPseudo };
4793 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4794 ARM::VLD1q16LowQPseudo_UPD,
4795 ARM::VLD1q32LowQPseudo_UPD,
4796 ARM::VLD1q64LowQPseudo_UPD };
4797 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4798 ARM::VLD1q16HighQPseudo,
4799 ARM::VLD1q32HighQPseudo,
4800 ARM::VLD1q64HighQPseudo };
4801 SelectVLD(N, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4802 return;
4803 }
4804
4805 case Intrinsic::arm_neon_vld2: {
4806 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4807 ARM::VLD2d32, ARM::VLD1q64 };
4808 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4809 ARM::VLD2q32Pseudo };
4810 SelectVLD(N, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4811 return;
4812 }
4813
4814 case Intrinsic::arm_neon_vld3: {
4815 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4816 ARM::VLD3d16Pseudo,
4817 ARM::VLD3d32Pseudo,
4818 ARM::VLD1d64TPseudo };
4819 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4820 ARM::VLD3q16Pseudo_UPD,
4821 ARM::VLD3q32Pseudo_UPD };
4822 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4823 ARM::VLD3q16oddPseudo,
4824 ARM::VLD3q32oddPseudo };
4825 SelectVLD(N, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4826 return;
4827 }
4828
4829 case Intrinsic::arm_neon_vld4: {
4830 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4831 ARM::VLD4d16Pseudo,
4832 ARM::VLD4d32Pseudo,
4833 ARM::VLD1d64QPseudo };
4834 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4835 ARM::VLD4q16Pseudo_UPD,
4836 ARM::VLD4q32Pseudo_UPD };
4837 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4838 ARM::VLD4q16oddPseudo,
4839 ARM::VLD4q32oddPseudo };
4840 SelectVLD(N, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4841 return;
4842 }
4843
4844 case Intrinsic::arm_neon_vld2dup: {
4845 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4846 ARM::VLD2DUPd32, ARM::VLD1q64 };
4847 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4848 ARM::VLD2DUPq16EvenPseudo,
4849 ARM::VLD2DUPq32EvenPseudo };
4850 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4851 ARM::VLD2DUPq16OddPseudo,
4852 ARM::VLD2DUPq32OddPseudo };
4853 SelectVLDDup(N, /* IsIntrinsic= */ true, isUpdating: false, NumVecs: 2,
4854 DOpcodes, QOpcodes0, QOpcodes1);
4855 return;
4856 }
4857
4858 case Intrinsic::arm_neon_vld3dup: {
4859 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4860 ARM::VLD3DUPd16Pseudo,
4861 ARM::VLD3DUPd32Pseudo,
4862 ARM::VLD1d64TPseudo };
4863 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4864 ARM::VLD3DUPq16EvenPseudo,
4865 ARM::VLD3DUPq32EvenPseudo };
4866 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4867 ARM::VLD3DUPq16OddPseudo,
4868 ARM::VLD3DUPq32OddPseudo };
4869 SelectVLDDup(N, /* IsIntrinsic= */ true, isUpdating: false, NumVecs: 3,
4870 DOpcodes, QOpcodes0, QOpcodes1);
4871 return;
4872 }
4873
4874 case Intrinsic::arm_neon_vld4dup: {
4875 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4876 ARM::VLD4DUPd16Pseudo,
4877 ARM::VLD4DUPd32Pseudo,
4878 ARM::VLD1d64QPseudo };
4879 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4880 ARM::VLD4DUPq16EvenPseudo,
4881 ARM::VLD4DUPq32EvenPseudo };
4882 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4883 ARM::VLD4DUPq16OddPseudo,
4884 ARM::VLD4DUPq32OddPseudo };
4885 SelectVLDDup(N, /* IsIntrinsic= */ true, isUpdating: false, NumVecs: 4,
4886 DOpcodes, QOpcodes0, QOpcodes1);
4887 return;
4888 }
4889
4890 case Intrinsic::arm_neon_vld2lane: {
4891 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4892 ARM::VLD2LNd16Pseudo,
4893 ARM::VLD2LNd32Pseudo };
4894 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4895 ARM::VLD2LNq32Pseudo };
4896 SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes);
4897 return;
4898 }
4899
4900 case Intrinsic::arm_neon_vld3lane: {
4901 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
4902 ARM::VLD3LNd16Pseudo,
4903 ARM::VLD3LNd32Pseudo };
4904 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
4905 ARM::VLD3LNq32Pseudo };
4906 SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes);
4907 return;
4908 }
4909
4910 case Intrinsic::arm_neon_vld4lane: {
4911 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
4912 ARM::VLD4LNd16Pseudo,
4913 ARM::VLD4LNd32Pseudo };
4914 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
4915 ARM::VLD4LNq32Pseudo };
4916 SelectVLDSTLane(N, IsLoad: true, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes);
4917 return;
4918 }
4919
4920 case Intrinsic::arm_neon_vst1: {
4921 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
4922 ARM::VST1d32, ARM::VST1d64 };
4923 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
4924 ARM::VST1q32, ARM::VST1q64 };
4925 SelectVST(N, isUpdating: false, NumVecs: 1, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4926 return;
4927 }
4928
4929 case Intrinsic::arm_neon_vst1x2: {
4930 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
4931 ARM::VST1q32, ARM::VST1q64 };
4932 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
4933 ARM::VST1d16QPseudo,
4934 ARM::VST1d32QPseudo,
4935 ARM::VST1d64QPseudo };
4936 SelectVST(N, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4937 return;
4938 }
4939
4940 case Intrinsic::arm_neon_vst1x3: {
4941 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
4942 ARM::VST1d16TPseudo,
4943 ARM::VST1d32TPseudo,
4944 ARM::VST1d64TPseudo };
4945 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4946 ARM::VST1q16LowTPseudo_UPD,
4947 ARM::VST1q32LowTPseudo_UPD,
4948 ARM::VST1q64LowTPseudo_UPD };
4949 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
4950 ARM::VST1q16HighTPseudo,
4951 ARM::VST1q32HighTPseudo,
4952 ARM::VST1q64HighTPseudo };
4953 SelectVST(N, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4954 return;
4955 }
4956
4957 case Intrinsic::arm_neon_vst1x4: {
4958 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
4959 ARM::VST1d16QPseudo,
4960 ARM::VST1d32QPseudo,
4961 ARM::VST1d64QPseudo };
4962 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4963 ARM::VST1q16LowQPseudo_UPD,
4964 ARM::VST1q32LowQPseudo_UPD,
4965 ARM::VST1q64LowQPseudo_UPD };
4966 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
4967 ARM::VST1q16HighQPseudo,
4968 ARM::VST1q32HighQPseudo,
4969 ARM::VST1q64HighQPseudo };
4970 SelectVST(N, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
4971 return;
4972 }
4973
4974 case Intrinsic::arm_neon_vst2: {
4975 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
4976 ARM::VST2d32, ARM::VST1q64 };
4977 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
4978 ARM::VST2q32Pseudo };
4979 SelectVST(N, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes0: QOpcodes, QOpcodes1: nullptr);
4980 return;
4981 }
4982
4983 case Intrinsic::arm_neon_vst3: {
4984 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
4985 ARM::VST3d16Pseudo,
4986 ARM::VST3d32Pseudo,
4987 ARM::VST1d64TPseudo };
4988 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4989 ARM::VST3q16Pseudo_UPD,
4990 ARM::VST3q32Pseudo_UPD };
4991 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
4992 ARM::VST3q16oddPseudo,
4993 ARM::VST3q32oddPseudo };
4994 SelectVST(N, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes0, QOpcodes1);
4995 return;
4996 }
4997
4998 case Intrinsic::arm_neon_vst4: {
4999 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5000 ARM::VST4d16Pseudo,
5001 ARM::VST4d32Pseudo,
5002 ARM::VST1d64QPseudo };
5003 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5004 ARM::VST4q16Pseudo_UPD,
5005 ARM::VST4q32Pseudo_UPD };
5006 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5007 ARM::VST4q16oddPseudo,
5008 ARM::VST4q32oddPseudo };
5009 SelectVST(N, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes0, QOpcodes1);
5010 return;
5011 }
5012
5013 case Intrinsic::arm_neon_vst2lane: {
5014 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5015 ARM::VST2LNd16Pseudo,
5016 ARM::VST2LNd32Pseudo };
5017 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5018 ARM::VST2LNq32Pseudo };
5019 SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: 2, DOpcodes, QOpcodes);
5020 return;
5021 }
5022
5023 case Intrinsic::arm_neon_vst3lane: {
5024 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5025 ARM::VST3LNd16Pseudo,
5026 ARM::VST3LNd32Pseudo };
5027 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5028 ARM::VST3LNq32Pseudo };
5029 SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: 3, DOpcodes, QOpcodes);
5030 return;
5031 }
5032
5033 case Intrinsic::arm_neon_vst4lane: {
5034 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5035 ARM::VST4LNd16Pseudo,
5036 ARM::VST4LNd32Pseudo };
5037 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5038 ARM::VST4LNq32Pseudo };
5039 SelectVLDSTLane(N, IsLoad: false, isUpdating: false, NumVecs: 4, DOpcodes, QOpcodes);
5040 return;
5041 }
5042
5043 case Intrinsic::arm_mve_vldr_gather_base_wb:
5044 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5045 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5046 ARM::MVE_VLDRDU64_qi_pre};
5047 SelectMVE_WB(N, Opcodes,
5048 Predicated: IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5049 return;
5050 }
5051
5052 case Intrinsic::arm_mve_vld2q: {
5053 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5054 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5055 ARM::MVE_VLD21_16};
5056 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5057 ARM::MVE_VLD21_32};
5058 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5059 SelectMVE_VLD(N, NumVecs: 2, Opcodes, HasWriteback: false);
5060 return;
5061 }
5062
5063 case Intrinsic::arm_mve_vld4q: {
5064 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5065 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5066 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5067 ARM::MVE_VLD42_16,
5068 ARM::MVE_VLD43_16};
5069 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5070 ARM::MVE_VLD42_32,
5071 ARM::MVE_VLD43_32};
5072 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5073 SelectMVE_VLD(N, NumVecs: 4, Opcodes, HasWriteback: false);
5074 return;
5075 }
5076 }
5077 break;
5078 }
5079
5080 case ISD::INTRINSIC_WO_CHAIN: {
5081 unsigned IntNo = N->getConstantOperandVal(Num: 0);
5082 switch (IntNo) {
5083 default:
5084 break;
5085
5086 // Scalar f32 -> bf16
5087 case Intrinsic::arm_neon_vcvtbfp2bf: {
5088 SDLoc dl(N);
5089 const SDValue &Src = N->getOperand(Num: 1);
5090 llvm::EVT DestTy = N->getValueType(ResNo: 0);
5091 SDValue Pred = getAL(CurDAG, dl);
5092 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
5093 SDValue Ops[] = { Src, Src, Pred, Reg0 };
5094 CurDAG->SelectNodeTo(N, MachineOpc: ARM::BF16_VCVTB, VT: DestTy, Ops);
5095 return;
5096 }
5097
5098 // Vector v4f32 -> v4bf16
5099 case Intrinsic::arm_neon_vcvtfp2bf: {
5100 SDLoc dl(N);
5101 const SDValue &Src = N->getOperand(Num: 1);
5102 SDValue Pred = getAL(CurDAG, dl);
5103 SDValue Reg0 = CurDAG->getRegister(Reg: 0, VT: MVT::i32);
5104 SDValue Ops[] = { Src, Pred, Reg0 };
5105 CurDAG->SelectNodeTo(N, MachineOpc: ARM::BF16_VCVT, VT: MVT::v4bf16, Ops);
5106 return;
5107 }
5108
5109 case Intrinsic::arm_mve_urshrl:
5110 SelectMVE_LongShift(N, Opcode: ARM::MVE_URSHRL, Immediate: true, HasSaturationOperand: false);
5111 return;
5112 case Intrinsic::arm_mve_uqshll:
5113 SelectMVE_LongShift(N, Opcode: ARM::MVE_UQSHLL, Immediate: true, HasSaturationOperand: false);
5114 return;
5115 case Intrinsic::arm_mve_srshrl:
5116 SelectMVE_LongShift(N, Opcode: ARM::MVE_SRSHRL, Immediate: true, HasSaturationOperand: false);
5117 return;
5118 case Intrinsic::arm_mve_sqshll:
5119 SelectMVE_LongShift(N, Opcode: ARM::MVE_SQSHLL, Immediate: true, HasSaturationOperand: false);
5120 return;
5121 case Intrinsic::arm_mve_uqrshll:
5122 SelectMVE_LongShift(N, Opcode: ARM::MVE_UQRSHLL, Immediate: false, HasSaturationOperand: true);
5123 return;
5124 case Intrinsic::arm_mve_sqrshrl:
5125 SelectMVE_LongShift(N, Opcode: ARM::MVE_SQRSHRL, Immediate: false, HasSaturationOperand: true);
5126 return;
5127
5128 case Intrinsic::arm_mve_vadc:
5129 case Intrinsic::arm_mve_vadc_predicated:
5130 SelectMVE_VADCSBC(N, OpcodeWithCarry: ARM::MVE_VADC, OpcodeWithNoCarry: ARM::MVE_VADCI, Add: true,
5131 Predicated: IntNo == Intrinsic::arm_mve_vadc_predicated);
5132 return;
5133 case Intrinsic::arm_mve_vsbc:
5134 case Intrinsic::arm_mve_vsbc_predicated:
5135 SelectMVE_VADCSBC(N, OpcodeWithCarry: ARM::MVE_VSBC, OpcodeWithNoCarry: ARM::MVE_VSBCI, Add: false,
5136 Predicated: IntNo == Intrinsic::arm_mve_vsbc_predicated);
5137 return;
5138 case Intrinsic::arm_mve_vshlc:
5139 case Intrinsic::arm_mve_vshlc_predicated:
5140 SelectMVE_VSHLC(N, Predicated: IntNo == Intrinsic::arm_mve_vshlc_predicated);
5141 return;
5142
5143 case Intrinsic::arm_mve_vmlldava:
5144 case Intrinsic::arm_mve_vmlldava_predicated: {
5145 static const uint16_t OpcodesU[] = {
5146 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
5147 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
5148 };
5149 static const uint16_t OpcodesS[] = {
5150 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
5151 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
5152 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
5153 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5154 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
5155 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
5156 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
5157 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5158 };
5159 SelectMVE_VMLLDAV(N, Predicated: IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5160 OpcodesS, OpcodesU);
5161 return;
5162 }
5163
5164 case Intrinsic::arm_mve_vrmlldavha:
5165 case Intrinsic::arm_mve_vrmlldavha_predicated: {
5166 static const uint16_t OpcodesU[] = {
5167 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
5168 };
5169 static const uint16_t OpcodesS[] = {
5170 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
5171 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5172 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
5173 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5174 };
5175 SelectMVE_VRMLLDAVH(N, Predicated: IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5176 OpcodesS, OpcodesU);
5177 return;
5178 }
5179
5180 case Intrinsic::arm_mve_vidup:
5181 case Intrinsic::arm_mve_vidup_predicated: {
5182 static const uint16_t Opcodes[] = {
5183 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5184 };
5185 SelectMVE_VxDUP(N, Opcodes, Wrapping: false,
5186 Predicated: IntNo == Intrinsic::arm_mve_vidup_predicated);
5187 return;
5188 }
5189
5190 case Intrinsic::arm_mve_vddup:
5191 case Intrinsic::arm_mve_vddup_predicated: {
5192 static const uint16_t Opcodes[] = {
5193 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5194 };
5195 SelectMVE_VxDUP(N, Opcodes, Wrapping: false,
5196 Predicated: IntNo == Intrinsic::arm_mve_vddup_predicated);
5197 return;
5198 }
5199
5200 case Intrinsic::arm_mve_viwdup:
5201 case Intrinsic::arm_mve_viwdup_predicated: {
5202 static const uint16_t Opcodes[] = {
5203 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5204 };
5205 SelectMVE_VxDUP(N, Opcodes, Wrapping: true,
5206 Predicated: IntNo == Intrinsic::arm_mve_viwdup_predicated);
5207 return;
5208 }
5209
5210 case Intrinsic::arm_mve_vdwdup:
5211 case Intrinsic::arm_mve_vdwdup_predicated: {
5212 static const uint16_t Opcodes[] = {
5213 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5214 };
5215 SelectMVE_VxDUP(N, Opcodes, Wrapping: true,
5216 Predicated: IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5217 return;
5218 }
5219
5220 case Intrinsic::arm_cde_cx1d:
5221 case Intrinsic::arm_cde_cx1da:
5222 case Intrinsic::arm_cde_cx2d:
5223 case Intrinsic::arm_cde_cx2da:
5224 case Intrinsic::arm_cde_cx3d:
5225 case Intrinsic::arm_cde_cx3da: {
5226 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
5227 IntNo == Intrinsic::arm_cde_cx2da ||
5228 IntNo == Intrinsic::arm_cde_cx3da;
5229 size_t NumExtraOps;
5230 uint16_t Opcode;
5231 switch (IntNo) {
5232 case Intrinsic::arm_cde_cx1d:
5233 case Intrinsic::arm_cde_cx1da:
5234 NumExtraOps = 0;
5235 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5236 break;
5237 case Intrinsic::arm_cde_cx2d:
5238 case Intrinsic::arm_cde_cx2da:
5239 NumExtraOps = 1;
5240 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5241 break;
5242 case Intrinsic::arm_cde_cx3d:
5243 case Intrinsic::arm_cde_cx3da:
5244 NumExtraOps = 2;
5245 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5246 break;
5247 default:
5248 llvm_unreachable("Unexpected opcode");
5249 }
5250 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5251 return;
5252 }
5253 }
5254 break;
5255 }
5256
5257 case ISD::ATOMIC_CMP_SWAP:
5258 SelectCMP_SWAP(N);
5259 return;
5260 }
5261
5262 SelectCode(N);
5263}
5264
5265// Inspect a register string of the form
5266// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5267// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5268// and obtain the integer operands from them, adding these operands to the
5269// provided vector.
5270static void getIntOperandsFromRegisterString(StringRef RegString,
5271 SelectionDAG *CurDAG,
5272 const SDLoc &DL,
5273 std::vector<SDValue> &Ops) {
5274 SmallVector<StringRef, 5> Fields;
5275 RegString.split(A&: Fields, Separator: ':');
5276
5277 if (Fields.size() > 1) {
5278 bool AllIntFields = true;
5279
5280 for (StringRef Field : Fields) {
5281 // Need to trim out leading 'cp' characters and get the integer field.
5282 unsigned IntField;
5283 AllIntFields &= !Field.trim(Chars: "CPcp").getAsInteger(Radix: 10, Result&: IntField);
5284 Ops.push_back(x: CurDAG->getTargetConstant(Val: IntField, DL, VT: MVT::i32));
5285 }
5286
5287 assert(AllIntFields &&
5288 "Unexpected non-integer value in special register string.");
5289 (void)AllIntFields;
5290 }
5291}
5292
5293// Maps a Banked Register string to its mask value. The mask value returned is
5294// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5295// mask operand, which expresses which register is to be used, e.g. r8, and in
5296// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5297// was invalid.
5298static inline int getBankedRegisterMask(StringRef RegString) {
5299 auto TheReg = ARMBankedReg::lookupBankedRegByName(Name: RegString.lower());
5300 if (!TheReg)
5301 return -1;
5302 return TheReg->Encoding;
5303}
5304
5305// The flags here are common to those allowed for apsr in the A class cores and
5306// those allowed for the special registers in the M class cores. Returns a
5307// value representing which flags were present, -1 if invalid.
5308static inline int getMClassFlagsMask(StringRef Flags) {
5309 return StringSwitch<int>(Flags)
5310 .Case(S: "", Value: 0x2) // no flags means nzcvq for psr registers, and 0x2 is
5311 // correct when flags are not permitted
5312 .Case(S: "g", Value: 0x1)
5313 .Case(S: "nzcvq", Value: 0x2)
5314 .Case(S: "nzcvqg", Value: 0x3)
5315 .Default(Value: -1);
5316}
5317
5318// Maps MClass special registers string to its value for use in the
5319// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5320// Returns -1 to signify that the string was invalid.
5321static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5322 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Name: Reg);
5323 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5324 if (!TheReg || !TheReg->hasRequiredFeatures(ActiveFeatures: FeatureBits))
5325 return -1;
5326 return (int)(TheReg->Encoding & 0xFFF); // SYSm value
5327}
5328
5329static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
5330 // The mask operand contains the special register (R Bit) in bit 4, whether
5331 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5332 // bits 3-0 contains the fields to be accessed in the special register, set by
5333 // the flags provided with the register.
5334 int Mask = 0;
5335 if (Reg == "apsr") {
5336 // The flags permitted for apsr are the same flags that are allowed in
5337 // M class registers. We get the flag value and then shift the flags into
5338 // the correct place to combine with the mask.
5339 Mask = getMClassFlagsMask(Flags);
5340 if (Mask == -1)
5341 return -1;
5342 return Mask << 2;
5343 }
5344
5345 if (Reg != "cpsr" && Reg != "spsr") {
5346 return -1;
5347 }
5348
5349 // This is the same as if the flags were "fc"
5350 if (Flags.empty() || Flags == "all")
5351 return Mask | 0x9;
5352
5353 // Inspect the supplied flags string and set the bits in the mask for
5354 // the relevant and valid flags allowed for cpsr and spsr.
5355 for (char Flag : Flags) {
5356 int FlagVal;
5357 switch (Flag) {
5358 case 'c':
5359 FlagVal = 0x1;
5360 break;
5361 case 'x':
5362 FlagVal = 0x2;
5363 break;
5364 case 's':
5365 FlagVal = 0x4;
5366 break;
5367 case 'f':
5368 FlagVal = 0x8;
5369 break;
5370 default:
5371 FlagVal = 0;
5372 }
5373
5374 // This avoids allowing strings where the same flag bit appears twice.
5375 if (!FlagVal || (Mask & FlagVal))
5376 return -1;
5377 Mask |= FlagVal;
5378 }
5379
5380 // If the register is spsr then we need to set the R bit.
5381 if (Reg == "spsr")
5382 Mask |= 0x10;
5383
5384 return Mask;
5385}
5386
5387// Lower the read_register intrinsic to ARM specific DAG nodes
5388// using the supplied metadata string to select the instruction node to use
5389// and the registers/masks to construct as operands for the node.
5390bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5391 const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: 1));
5392 const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: 0));
5393 bool IsThumb2 = Subtarget->isThumb2();
5394 SDLoc DL(N);
5395
5396 std::vector<SDValue> Ops;
5397 getIntOperandsFromRegisterString(RegString: RegString->getString(), CurDAG, DL, Ops);
5398
5399 if (!Ops.empty()) {
5400 // If the special register string was constructed of fields (as defined
5401 // in the ACLE) then need to lower to MRC node (32 bit) or
5402 // MRRC node(64 bit), we can make the distinction based on the number of
5403 // operands we have.
5404 unsigned Opcode;
5405 SmallVector<EVT, 3> ResTypes;
5406 if (Ops.size() == 5){
5407 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5408 ResTypes.append(IL: { MVT::i32, MVT::Other });
5409 } else {
5410 assert(Ops.size() == 3 &&
5411 "Invalid number of fields in special register string.");
5412 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5413 ResTypes.append(IL: { MVT::i32, MVT::i32, MVT::Other });
5414 }
5415
5416 Ops.push_back(x: getAL(CurDAG, dl: DL));
5417 Ops.push_back(x: CurDAG->getRegister(Reg: 0, VT: MVT::i32));
5418 Ops.push_back(x: N->getOperand(Num: 0));
5419 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, ResultTys: ResTypes, Ops));
5420 return true;
5421 }
5422
5423 std::string SpecialReg = RegString->getString().lower();
5424
5425 int BankedReg = getBankedRegisterMask(RegString: SpecialReg);
5426 if (BankedReg != -1) {
5427 Ops = { CurDAG->getTargetConstant(Val: BankedReg, DL, VT: MVT::i32),
5428 getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5429 N->getOperand(Num: 0) };
5430 ReplaceNode(
5431 F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5432 dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5433 return true;
5434 }
5435
5436 // The VFP registers are read by creating SelectionDAG nodes with opcodes
5437 // corresponding to the register that is being read from. So we switch on the
5438 // string to find which opcode we need to use.
5439 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5440 .Case(S: "fpscr", Value: ARM::VMRS)
5441 .Case(S: "fpexc", Value: ARM::VMRS_FPEXC)
5442 .Case(S: "fpsid", Value: ARM::VMRS_FPSID)
5443 .Case(S: "mvfr0", Value: ARM::VMRS_MVFR0)
5444 .Case(S: "mvfr1", Value: ARM::VMRS_MVFR1)
5445 .Case(S: "mvfr2", Value: ARM::VMRS_MVFR2)
5446 .Case(S: "fpinst", Value: ARM::VMRS_FPINST)
5447 .Case(S: "fpinst2", Value: ARM::VMRS_FPINST2)
5448 .Default(Value: 0);
5449
5450 // If an opcode was found then we can lower the read to a VFP instruction.
5451 if (Opcode) {
5452 if (!Subtarget->hasVFP2Base())
5453 return false;
5454 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5455 return false;
5456
5457 Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5458 N->getOperand(Num: 0) };
5459 ReplaceNode(F: N,
5460 T: CurDAG->getMachineNode(Opcode, dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5461 return true;
5462 }
5463
5464 // If the target is M Class then need to validate that the register string
5465 // is an acceptable value, so check that a mask can be constructed from the
5466 // string.
5467 if (Subtarget->isMClass()) {
5468 int SYSmValue = getMClassRegisterMask(Reg: SpecialReg, Subtarget);
5469 if (SYSmValue == -1)
5470 return false;
5471
5472 SDValue Ops[] = { CurDAG->getTargetConstant(Val: SYSmValue, DL, VT: MVT::i32),
5473 getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5474 N->getOperand(Num: 0) };
5475 ReplaceNode(
5476 F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2MRS_M, dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5477 return true;
5478 }
5479
5480 // Here we know the target is not M Class so we need to check if it is one
5481 // of the remaining possible values which are apsr, cpsr or spsr.
5482 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
5483 Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5484 N->getOperand(Num: 0) };
5485 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5486 dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops));
5487 return true;
5488 }
5489
5490 if (SpecialReg == "spsr") {
5491 Ops = { getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5492 N->getOperand(Num: 0) };
5493 ReplaceNode(
5494 F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, dl: DL,
5495 VT1: MVT::i32, VT2: MVT::Other, Ops));
5496 return true;
5497 }
5498
5499 return false;
5500}
5501
5502// Lower the write_register intrinsic to ARM specific DAG nodes
5503// using the supplied metadata string to select the instruction node to use
5504// and the registers/masks to use in the nodes
5505bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5506 const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: 1));
5507 const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: 0));
5508 bool IsThumb2 = Subtarget->isThumb2();
5509 SDLoc DL(N);
5510
5511 std::vector<SDValue> Ops;
5512 getIntOperandsFromRegisterString(RegString: RegString->getString(), CurDAG, DL, Ops);
5513
5514 if (!Ops.empty()) {
5515 // If the special register string was constructed of fields (as defined
5516 // in the ACLE) then need to lower to MCR node (32 bit) or
5517 // MCRR node(64 bit), we can make the distinction based on the number of
5518 // operands we have.
5519 unsigned Opcode;
5520 if (Ops.size() == 5) {
5521 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5522 Ops.insert(position: Ops.begin()+2, x: N->getOperand(Num: 2));
5523 } else {
5524 assert(Ops.size() == 3 &&
5525 "Invalid number of fields in special register string.");
5526 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5527 SDValue WriteValue[] = { N->getOperand(Num: 2), N->getOperand(Num: 3) };
5528 Ops.insert(position: Ops.begin()+2, first: WriteValue, last: WriteValue+2);
5529 }
5530
5531 Ops.push_back(x: getAL(CurDAG, dl: DL));
5532 Ops.push_back(x: CurDAG->getRegister(Reg: 0, VT: MVT::i32));
5533 Ops.push_back(x: N->getOperand(Num: 0));
5534
5535 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Other, Ops));
5536 return true;
5537 }
5538
5539 std::string SpecialReg = RegString->getString().lower();
5540 int BankedReg = getBankedRegisterMask(RegString: SpecialReg);
5541 if (BankedReg != -1) {
5542 Ops = { CurDAG->getTargetConstant(Val: BankedReg, DL, VT: MVT::i32), N->getOperand(Num: 2),
5543 getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5544 N->getOperand(Num: 0) };
5545 ReplaceNode(
5546 F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5547 dl: DL, VT: MVT::Other, Ops));
5548 return true;
5549 }
5550
5551 // The VFP registers are written to by creating SelectionDAG nodes with
5552 // opcodes corresponding to the register that is being written. So we switch
5553 // on the string to find which opcode we need to use.
5554 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5555 .Case(S: "fpscr", Value: ARM::VMSR)
5556 .Case(S: "fpexc", Value: ARM::VMSR_FPEXC)
5557 .Case(S: "fpsid", Value: ARM::VMSR_FPSID)
5558 .Case(S: "fpinst", Value: ARM::VMSR_FPINST)
5559 .Case(S: "fpinst2", Value: ARM::VMSR_FPINST2)
5560 .Default(Value: 0);
5561
5562 if (Opcode) {
5563 if (!Subtarget->hasVFP2Base())
5564 return false;
5565 Ops = { N->getOperand(Num: 2), getAL(CurDAG, dl: DL),
5566 CurDAG->getRegister(Reg: 0, VT: MVT::i32), N->getOperand(Num: 0) };
5567 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Other, Ops));
5568 return true;
5569 }
5570
5571 std::pair<StringRef, StringRef> Fields;
5572 Fields = StringRef(SpecialReg).rsplit(Separator: '_');
5573 std::string Reg = Fields.first.str();
5574 StringRef Flags = Fields.second;
5575
5576 // If the target was M Class then need to validate the special register value
5577 // and retrieve the mask for use in the instruction node.
5578 if (Subtarget->isMClass()) {
5579 int SYSmValue = getMClassRegisterMask(Reg: SpecialReg, Subtarget);
5580 if (SYSmValue == -1)
5581 return false;
5582
5583 SDValue Ops[] = { CurDAG->getTargetConstant(Val: SYSmValue, DL, VT: MVT::i32),
5584 N->getOperand(Num: 2), getAL(CurDAG, dl: DL),
5585 CurDAG->getRegister(Reg: 0, VT: MVT::i32), N->getOperand(Num: 0) };
5586 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: ARM::t2MSR_M, dl: DL, VT: MVT::Other, Ops));
5587 return true;
5588 }
5589
5590 // We then check to see if a valid mask can be constructed for one of the
5591 // register string values permitted for the A and R class cores. These values
5592 // are apsr, spsr and cpsr; these are also valid on older cores.
5593 int Mask = getARClassRegisterMask(Reg, Flags);
5594 if (Mask != -1) {
5595 Ops = { CurDAG->getTargetConstant(Val: Mask, DL, VT: MVT::i32), N->getOperand(Num: 2),
5596 getAL(CurDAG, dl: DL), CurDAG->getRegister(Reg: 0, VT: MVT::i32),
5597 N->getOperand(Num: 0) };
5598 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5599 dl: DL, VT: MVT::Other, Ops));
5600 return true;
5601 }
5602
5603 return false;
5604}
5605
5606bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5607 std::vector<SDValue> AsmNodeOperands;
5608 InlineAsm::Flag Flag;
5609 bool Changed = false;
5610 unsigned NumOps = N->getNumOperands();
5611
5612 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5613 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5614 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5615 // respectively. Since there is no constraint to explicitly specify a
5616 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5617 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5618 // them into a GPRPair.
5619
5620 SDLoc dl(N);
5621 SDValue Glue = N->getGluedNode() ? N->getOperand(Num: NumOps - 1) : SDValue();
5622
5623 SmallVector<bool, 8> OpChanged;
5624 // Glue node will be appended late.
5625 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
5626 SDValue op = N->getOperand(Num: i);
5627 AsmNodeOperands.push_back(x: op);
5628
5629 if (i < InlineAsm::Op_FirstOperand)
5630 continue;
5631
5632 if (const auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: i)))
5633 Flag = InlineAsm::Flag(C->getZExtValue());
5634 else
5635 continue;
5636
5637 // Immediate operands to inline asm in the SelectionDAG are modeled with
5638 // two operands. The first is a constant of value InlineAsm::Kind::Imm, and
5639 // the second is a constant with the value of the immediate. If we get here
5640 // and we have a Kind::Imm, skip the next operand, and continue.
5641 if (Flag.isImmKind()) {
5642 SDValue op = N->getOperand(Num: ++i);
5643 AsmNodeOperands.push_back(x: op);
5644 continue;
5645 }
5646
5647 const unsigned NumRegs = Flag.getNumOperandRegisters();
5648 if (NumRegs)
5649 OpChanged.push_back(Elt: false);
5650
5651 unsigned DefIdx = 0;
5652 bool IsTiedToChangedOp = false;
5653 // If it's a use that is tied with a previous def, it has no
5654 // reg class constraint.
5655 if (Changed && Flag.isUseOperandTiedToDef(Idx&: DefIdx))
5656 IsTiedToChangedOp = OpChanged[DefIdx];
5657
5658 // Memory operands to inline asm in the SelectionDAG are modeled with two
5659 // operands: a constant of value InlineAsm::Kind::Mem followed by the input
5660 // operand. If we get here and we have a Kind::Mem, skip the next operand
5661 // (so it doesn't get misinterpreted), and continue. We do this here because
5662 // it's important to update the OpChanged array correctly before moving on.
5663 if (Flag.isMemKind()) {
5664 SDValue op = N->getOperand(Num: ++i);
5665 AsmNodeOperands.push_back(x: op);
5666 continue;
5667 }
5668
5669 if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
5670 !Flag.isRegDefEarlyClobberKind())
5671 continue;
5672
5673 unsigned RC;
5674 const bool HasRC = Flag.hasRegClassConstraint(RC);
5675 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
5676 || NumRegs != 2)
5677 continue;
5678
5679 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
5680 SDValue V0 = N->getOperand(Num: i+1);
5681 SDValue V1 = N->getOperand(Num: i+2);
5682 Register Reg0 = cast<RegisterSDNode>(Val&: V0)->getReg();
5683 Register Reg1 = cast<RegisterSDNode>(Val&: V1)->getReg();
5684 SDValue PairedReg;
5685 MachineRegisterInfo &MRI = MF->getRegInfo();
5686
5687 if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
5688 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5689 // the original GPRs.
5690
5691 Register GPVR = MRI.createVirtualRegister(RegClass: &ARM::GPRPairRegClass);
5692 PairedReg = CurDAG->getRegister(Reg: GPVR, VT: MVT::Untyped);
5693 SDValue Chain = SDValue(N,0);
5694
5695 SDNode *GU = N->getGluedUser();
5696 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, Reg: GPVR, VT: MVT::Untyped,
5697 Glue: Chain.getValue(R: 1));
5698
5699 // Extract values from a GPRPair reg and copy to the original GPR reg.
5700 SDValue Sub0 = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_0, DL: dl, VT: MVT::i32,
5701 Operand: RegCopy);
5702 SDValue Sub1 = CurDAG->getTargetExtractSubreg(SRIdx: ARM::gsub_1, DL: dl, VT: MVT::i32,
5703 Operand: RegCopy);
5704 SDValue T0 = CurDAG->getCopyToReg(Chain: Sub0, dl, Reg: Reg0, N: Sub0,
5705 Glue: RegCopy.getValue(R: 1));
5706 SDValue T1 = CurDAG->getCopyToReg(Chain: Sub1, dl, Reg: Reg1, N: Sub1, Glue: T0.getValue(R: 1));
5707
5708 // Update the original glue user.
5709 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
5710 Ops.push_back(x: T1.getValue(R: 1));
5711 CurDAG->UpdateNodeOperands(N: GU, Ops);
5712 } else {
5713 // For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
5714 // GPRPair and then pass the GPRPair to the inline asm.
5715 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
5716
5717 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5718 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg: Reg0, VT: MVT::i32,
5719 Glue: Chain.getValue(R: 1));
5720 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg: Reg1, VT: MVT::i32,
5721 Glue: T0.getValue(R: 1));
5722 SDValue Pair = SDValue(createGPRPairNode(VT: MVT::Untyped, V0: T0, V1: T1), 0);
5723
5724 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5725 // i32 VRs of inline asm with it.
5726 Register GPVR = MRI.createVirtualRegister(RegClass: &ARM::GPRPairRegClass);
5727 PairedReg = CurDAG->getRegister(Reg: GPVR, VT: MVT::Untyped);
5728 Chain = CurDAG->getCopyToReg(Chain: T1, dl, Reg: GPVR, N: Pair, Glue: T1.getValue(R: 1));
5729
5730 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5731 Glue = Chain.getValue(R: 1);
5732 }
5733
5734 Changed = true;
5735
5736 if(PairedReg.getNode()) {
5737 OpChanged[OpChanged.size() -1 ] = true;
5738 Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/);
5739 if (IsTiedToChangedOp)
5740 Flag.setMatchingOp(DefIdx);
5741 else
5742 Flag.setRegClass(ARM::GPRPairRegClassID);
5743 // Replace the current flag.
5744 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
5745 Val: Flag, DL: dl, VT: MVT::i32);
5746 // Add the new register node and skip the original two GPRs.
5747 AsmNodeOperands.push_back(x: PairedReg);
5748 // Skip the next two GPRs.
5749 i += 2;
5750 }
5751 }
5752
5753 if (Glue.getNode())
5754 AsmNodeOperands.push_back(x: Glue);
5755 if (!Changed)
5756 return false;
5757
5758 SDValue New = CurDAG->getNode(Opcode: N->getOpcode(), DL: SDLoc(N),
5759 VTList: CurDAG->getVTList(VT1: MVT::Other, VT2: MVT::Glue), Ops: AsmNodeOperands);
5760 New->setNodeId(-1);
5761 ReplaceNode(F: N, T: New.getNode());
5762 return true;
5763}
5764
5765bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
5766 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
5767 std::vector<SDValue> &OutOps) {
5768 switch(ConstraintID) {
5769 default:
5770 llvm_unreachable("Unexpected asm memory constraint");
5771 case InlineAsm::ConstraintCode::m:
5772 case InlineAsm::ConstraintCode::o:
5773 case InlineAsm::ConstraintCode::Q:
5774 case InlineAsm::ConstraintCode::Um:
5775 case InlineAsm::ConstraintCode::Un:
5776 case InlineAsm::ConstraintCode::Uq:
5777 case InlineAsm::ConstraintCode::Us:
5778 case InlineAsm::ConstraintCode::Ut:
5779 case InlineAsm::ConstraintCode::Uv:
5780 case InlineAsm::ConstraintCode::Uy:
5781 // Require the address to be in a register. That is safe for all ARM
5782 // variants and it is hard to do anything much smarter without knowing
5783 // how the operand is used.
5784 OutOps.push_back(x: Op);
5785 return false;
5786 }
5787 return true;
5788}
5789
5790/// createARMISelDag - This pass converts a legalized DAG into a
5791/// ARM-specific DAG, ready for instruction scheduling.
5792///
5793FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
5794 CodeGenOptLevel OptLevel) {
5795 return new ARMDAGToDAGISelLegacy(TM, OptLevel);
5796}
5797