1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64MachineFunctionInfo.h"
14#include "AArch64TargetMachine.h"
15#include "MCTargetDesc/AArch64AddressingModes.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/CodeGen/ISDOpcodes.h"
18#include "llvm/CodeGen/SelectionDAGISel.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/ErrorHandling.h"
25#include "llvm/Support/KnownBits.h"
26#include "llvm/Support/MathExtras.h"
27#include "llvm/Support/raw_ostream.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
60 return SelectionDAGISel::runOnMachineFunction(mf&: MF);
61 }
62
63 void Select(SDNode *Node) override;
64 void PreprocessISelDAG() override;
65
66 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
67 /// inline asm expressions.
68 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
69 InlineAsm::ConstraintCode ConstraintID,
70 std::vector<SDValue> &OutOps) override;
71
72 template <signed Low, signed High, signed Scale>
73 bool SelectRDVLImm(SDValue N, SDValue &Imm);
74
75 template <signed Low, signed High>
76 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
77
78 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
80 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
82 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
83 return SelectShiftedRegister(N, AllowROR: false, Reg, Shift);
84 }
85 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
86 return SelectShiftedRegister(N, AllowROR: true, Reg, Shift);
87 }
88 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
89 return SelectAddrModeIndexed7S(N, Size: 1, Base, OffImm);
90 }
91 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
92 return SelectAddrModeIndexed7S(N, Size: 2, Base, OffImm);
93 }
94 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
95 return SelectAddrModeIndexed7S(N, Size: 4, Base, OffImm);
96 }
97 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
98 return SelectAddrModeIndexed7S(N, Size: 8, Base, OffImm);
99 }
100 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
101 return SelectAddrModeIndexed7S(N, Size: 16, Base, OffImm);
102 }
103 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
104 return SelectAddrModeIndexedBitWidth(N, IsSignedImm: true, BW: 9, Size: 16, Base, OffImm);
105 }
106 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
107 return SelectAddrModeIndexedBitWidth(N, IsSignedImm: false, BW: 6, Size: 16, Base, OffImm);
108 }
109 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
110 return SelectAddrModeIndexed(N, Size: 1, Base, OffImm);
111 }
112 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
113 return SelectAddrModeIndexed(N, Size: 2, Base, OffImm);
114 }
115 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
116 return SelectAddrModeIndexed(N, Size: 4, Base, OffImm);
117 }
118 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
119 return SelectAddrModeIndexed(N, Size: 8, Base, OffImm);
120 }
121 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
122 return SelectAddrModeIndexed(N, Size: 16, Base, OffImm);
123 }
124 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
125 return SelectAddrModeUnscaled(N, Size: 1, Base, OffImm);
126 }
127 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
128 return SelectAddrModeUnscaled(N, Size: 2, Base, OffImm);
129 }
130 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
131 return SelectAddrModeUnscaled(N, Size: 4, Base, OffImm);
132 }
133 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
134 return SelectAddrModeUnscaled(N, Size: 8, Base, OffImm);
135 }
136 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
137 return SelectAddrModeUnscaled(N, Size: 16, Base, OffImm);
138 }
139 template <unsigned Size, unsigned Max>
140 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
141 // Test if there is an appropriate addressing mode and check if the
142 // immediate fits.
143 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
144 if (Found) {
145 if (auto *CI = dyn_cast<ConstantSDNode>(Val&: OffImm)) {
146 int64_t C = CI->getSExtValue();
147 if (C <= Max)
148 return true;
149 }
150 }
151
152 // Otherwise, base only, materialize address in register.
153 Base = N;
154 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i64);
155 return true;
156 }
157
158 template<int Width>
159 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
160 SDValue &SignExtend, SDValue &DoShift) {
161 return SelectAddrModeWRO(N, Size: Width / 8, Base, Offset, SignExtend, DoShift);
162 }
163
164 template<int Width>
165 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
166 SDValue &SignExtend, SDValue &DoShift) {
167 return SelectAddrModeXRO(N, Size: Width / 8, Base, Offset, SignExtend, DoShift);
168 }
169
170 bool SelectExtractHigh(SDValue N, SDValue &Res) {
171 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
172 N = N->getOperand(Num: 0);
173 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
174 !isa<ConstantSDNode>(Val: N->getOperand(Num: 1)))
175 return false;
176 EVT VT = N->getValueType(ResNo: 0);
177 EVT LVT = N->getOperand(Num: 0).getValueType();
178 unsigned Index = N->getConstantOperandVal(Num: 1);
179 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
180 Index != VT.getVectorNumElements())
181 return false;
182 Res = N->getOperand(Num: 0);
183 return true;
184 }
185
186 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
187 if (N.getOpcode() != AArch64ISD::VLSHR)
188 return false;
189 SDValue Op = N->getOperand(Num: 0);
190 EVT VT = Op.getValueType();
191 unsigned ShtAmt = N->getConstantOperandVal(Num: 1);
192 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
193 return false;
194
195 APInt Imm;
196 if (Op.getOperand(i: 1).getOpcode() == AArch64ISD::MOVIshift)
197 Imm = APInt(VT.getScalarSizeInBits(),
198 Op.getOperand(i: 1).getConstantOperandVal(i: 0)
199 << Op.getOperand(i: 1).getConstantOperandVal(i: 1));
200 else if (Op.getOperand(i: 1).getOpcode() == AArch64ISD::DUP &&
201 isa<ConstantSDNode>(Val: Op.getOperand(i: 1).getOperand(i: 0)))
202 Imm = APInt(VT.getScalarSizeInBits(),
203 Op.getOperand(i: 1).getConstantOperandVal(i: 0));
204 else
205 return false;
206
207 if (Imm != 1ULL << (ShtAmt - 1))
208 return false;
209
210 Res1 = Op.getOperand(i: 0);
211 Res2 = CurDAG->getTargetConstant(Val: ShtAmt, DL: SDLoc(N), VT: MVT::i32);
212 return true;
213 }
214
215 bool SelectDupZeroOrUndef(SDValue N) {
216 switch(N->getOpcode()) {
217 case ISD::UNDEF:
218 return true;
219 case AArch64ISD::DUP:
220 case ISD::SPLAT_VECTOR: {
221 auto Opnd0 = N->getOperand(Num: 0);
222 if (isNullConstant(V: Opnd0))
223 return true;
224 if (isNullFPConstant(V: Opnd0))
225 return true;
226 break;
227 }
228 default:
229 break;
230 }
231
232 return false;
233 }
234
235 bool SelectAny(SDValue) { return true; }
236
237 bool SelectDupZero(SDValue N) {
238 switch(N->getOpcode()) {
239 case AArch64ISD::DUP:
240 case ISD::SPLAT_VECTOR: {
241 auto Opnd0 = N->getOperand(Num: 0);
242 if (isNullConstant(V: Opnd0))
243 return true;
244 if (isNullFPConstant(V: Opnd0))
245 return true;
246 break;
247 }
248 }
249
250 return false;
251 }
252
253 template <MVT::SimpleValueType VT, bool Negate>
254 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
255 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
256 }
257
258 template <MVT::SimpleValueType VT, bool Negate>
259 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
260 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
261 }
262
263 template <MVT::SimpleValueType VT>
264 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
265 return SelectSVECpyDupImm(N, VT, Imm, Shift);
266 }
267
268 template <MVT::SimpleValueType VT, bool Invert = false>
269 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
270 return SelectSVELogicalImm(N, VT, Imm, Invert);
271 }
272
273 template <MVT::SimpleValueType VT>
274 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
275 return SelectSVEArithImm(N, VT, Imm);
276 }
277
278 template <unsigned Low, unsigned High, bool AllowSaturation = false>
279 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
280 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
281 }
282
283 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
284 if (N->getOpcode() != ISD::SPLAT_VECTOR)
285 return false;
286
287 EVT EltVT = N->getValueType(ResNo: 0).getVectorElementType();
288 return SelectSVEShiftImm(N: N->getOperand(Num: 0), /* Low */ 1,
289 /* High */ EltVT.getFixedSizeInBits(),
290 /* AllowSaturation */ true, Imm);
291 }
292
293 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
294 template<signed Min, signed Max, signed Scale, bool Shift>
295 bool SelectCntImm(SDValue N, SDValue &Imm) {
296 if (!isa<ConstantSDNode>(Val: N))
297 return false;
298
299 int64_t MulImm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
300 if (Shift)
301 MulImm = 1LL << MulImm;
302
303 if ((MulImm % std::abs(x: Scale)) != 0)
304 return false;
305
306 MulImm /= Scale;
307 if ((MulImm >= Min) && (MulImm <= Max)) {
308 Imm = CurDAG->getTargetConstant(Val: MulImm, DL: SDLoc(N), VT: MVT::i32);
309 return true;
310 }
311
312 return false;
313 }
314
315 template <signed Max, signed Scale>
316 bool SelectEXTImm(SDValue N, SDValue &Imm) {
317 if (!isa<ConstantSDNode>(Val: N))
318 return false;
319
320 int64_t MulImm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
321
322 if (MulImm >= 0 && MulImm <= Max) {
323 MulImm *= Scale;
324 Imm = CurDAG->getTargetConstant(Val: MulImm, DL: SDLoc(N), VT: MVT::i32);
325 return true;
326 }
327
328 return false;
329 }
330
331 template <unsigned BaseReg, unsigned Max>
332 bool ImmToReg(SDValue N, SDValue &Imm) {
333 if (auto *CI = dyn_cast<ConstantSDNode>(Val&: N)) {
334 uint64_t C = CI->getZExtValue();
335
336 if (C > Max)
337 return false;
338
339 Imm = CurDAG->getRegister(Reg: BaseReg + C, VT: MVT::Other);
340 return true;
341 }
342 return false;
343 }
344
345 /// Form sequences of consecutive 64/128-bit registers for use in NEON
346 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
347 /// between 1 and 4 elements. If it contains a single element that is returned
348 /// unchanged; otherwise a REG_SEQUENCE value is returned.
349 SDValue createDTuple(ArrayRef<SDValue> Vecs);
350 SDValue createQTuple(ArrayRef<SDValue> Vecs);
351 // Form a sequence of SVE registers for instructions using list of vectors,
352 // e.g. structured loads and stores (ldN, stN).
353 SDValue createZTuple(ArrayRef<SDValue> Vecs);
354
355 // Similar to above, except the register must start at a multiple of the
356 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
357 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
358
359 /// Generic helper for the createDTuple/createQTuple
360 /// functions. Those should almost always be called instead.
361 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
362 const unsigned SubRegs[]);
363
364 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
365
366 bool tryIndexedLoad(SDNode *N);
367
368 void SelectPtrauthAuth(SDNode *N);
369 void SelectPtrauthResign(SDNode *N);
370
371 bool trySelectStackSlotTagP(SDNode *N);
372 void SelectTagP(SDNode *N);
373
374 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
375 unsigned SubRegIdx);
376 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
377 unsigned SubRegIdx);
378 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
381 unsigned Opc_rr, unsigned Opc_ri,
382 bool IsIntr = false);
383 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
384 unsigned Scale, unsigned Opc_ri,
385 unsigned Opc_rr);
386 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
387 bool IsZmMulti, unsigned Opcode,
388 bool HasPred = false);
389 void SelectPExtPair(SDNode *N, unsigned Opc);
390 void SelectWhilePair(SDNode *N, unsigned Opc);
391 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
394 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
395 bool IsTupleInput, unsigned Opc);
396 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
397
398 template <unsigned MaxIdx, unsigned Scale>
399 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
400 unsigned Op);
401 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
402 unsigned Op, unsigned MaxIdx, unsigned Scale,
403 unsigned BaseReg = 0);
404 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
405 /// SVE Reg+Imm addressing mode.
406 template <int64_t Min, int64_t Max>
407 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
408 SDValue &OffImm);
409 /// SVE Reg+Reg address mode.
410 template <unsigned Scale>
411 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
412 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
413 }
414
415 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
416 unsigned Opc, uint32_t MaxImm);
417
418 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
419
420 template <unsigned MaxIdx, unsigned Scale>
421 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
422 return SelectSMETileSlice(N, MaxSize: MaxIdx, Vector, Offset, Scale);
423 }
424
425 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
429 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
430 unsigned Opc_rr, unsigned Opc_ri);
431 std::tuple<unsigned, SDValue, SDValue>
432 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
433 const SDValue &OldBase, const SDValue &OldOffset,
434 unsigned Scale);
435
436 bool tryBitfieldExtractOp(SDNode *N);
437 bool tryBitfieldExtractOpFromSExt(SDNode *N);
438 bool tryBitfieldInsertOp(SDNode *N);
439 bool tryBitfieldInsertInZeroOp(SDNode *N);
440 bool tryShiftAmountMod(SDNode *N);
441
442 bool tryReadRegister(SDNode *N);
443 bool tryWriteRegister(SDNode *N);
444
445 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
446 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
447
448 bool trySelectXAR(SDNode *N);
449
450// Include the pieces autogenerated from the target description.
451#include "AArch64GenDAGISel.inc"
452
453private:
454 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
455 SDValue &Shift);
456 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
457 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
458 SDValue &OffImm) {
459 return SelectAddrModeIndexedBitWidth(N, IsSignedImm: true, BW: 7, Size, Base, OffImm);
460 }
461 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
462 unsigned Size, SDValue &Base,
463 SDValue &OffImm);
464 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
465 SDValue &OffImm);
466 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
467 SDValue &OffImm);
468 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
469 SDValue &Offset, SDValue &SignExtend,
470 SDValue &DoShift);
471 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
472 SDValue &Offset, SDValue &SignExtend,
473 SDValue &DoShift);
474 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
475 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
476 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
477 SDValue &Offset, SDValue &SignExtend);
478
479 template<unsigned RegWidth>
480 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
481 return SelectCVTFixedPosOperand(N, FixedPos, Width: RegWidth);
482 }
483 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
484
485 template <unsigned RegWidth>
486 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos) {
487 return SelectCVTFixedPointVec(N, FixedPos, Width: RegWidth);
488 }
489 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos, unsigned Width);
490
491 template<unsigned RegWidth>
492 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
493 return SelectCVTFixedPosRecipOperand(N, FixedPos, Width: RegWidth);
494 }
495
496 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
497 unsigned Width);
498
499 bool SelectCMP_SWAP(SDNode *N);
500
501 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
502 bool Negate);
503 bool SelectSVEAddSubImm(SDLoc DL, APInt Value, MVT VT, SDValue &Imm,
504 SDValue &Shift, bool Negate);
505 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
506 bool Negate);
507 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
508 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
509
510 // Match `<NEON Splat> SVEImm` (where <NEON Splat> could be fmov, movi, etc).
511 bool SelectNEONSplatOfSVELogicalImm(SDValue N, SDValue &Imm);
512 bool SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift);
513 bool SelectNEONSplatOfSVEArithSImm(SDValue N, SDValue &Imm);
514
515 bool SelectSVESignedArithImm(SDLoc DL, APInt Value, SDValue &Imm);
516 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
517 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
518 bool AllowSaturation, SDValue &Imm);
519
520 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
521 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
522 SDValue &Offset);
523 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
524 SDValue &Offset, unsigned Scale = 1);
525
526 bool SelectAllActivePredicate(SDValue N);
527 bool SelectAnyPredicate(SDValue N);
528
529 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
530
531 template <bool MatchCBB>
532 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
533};
534
535class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
536public:
537 static char ID;
538 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
539 CodeGenOptLevel OptLevel)
540 : SelectionDAGISelLegacy(
541 ID, std::make_unique<AArch64DAGToDAGISel>(args&: tm, args&: OptLevel)) {}
542};
543} // end anonymous namespace
544
545char AArch64DAGToDAGISelLegacy::ID = 0;
546
547INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
548
549/// addBitcastHints - This method adds bitcast hints to the operands of a node
550/// to help instruction selector determine which operands are in Neon registers.
551static SDValue addBitcastHints(SelectionDAG &DAG, SDNode &N) {
552 SDLoc DL(&N);
553 auto getFloatVT = [&](EVT VT) {
554 EVT ScalarVT = VT.getScalarType();
555 assert((ScalarVT == MVT::i32 || ScalarVT == MVT::i64) && "Unexpected VT");
556 return VT.changeElementType(Context&: *(DAG.getContext()),
557 EltVT: ScalarVT == MVT::i32 ? MVT::f32 : MVT::f64);
558 };
559 SmallVector<SDValue, 2> NewOps;
560 NewOps.reserve(N: N.getNumOperands());
561
562 for (unsigned I = 0, E = N.getNumOperands(); I < E; ++I) {
563 auto bitcasted = DAG.getBitcast(VT: getFloatVT(N.getOperand(Num: I).getValueType()),
564 V: N.getOperand(Num: I));
565 NewOps.push_back(Elt: bitcasted);
566 }
567 EVT OrigVT = N.getValueType(ResNo: 0);
568 SDValue OpNode = DAG.getNode(Opcode: N.getOpcode(), DL, VT: getFloatVT(OrigVT), Ops: NewOps);
569 return DAG.getBitcast(VT: OrigVT, V: OpNode);
570}
571
572/// isIntImmediate - This method tests to see if the node is a constant
573/// operand. If so Imm will receive the 32-bit value.
574static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
575 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(Val: N)) {
576 Imm = C->getZExtValue();
577 return true;
578 }
579 return false;
580}
581
582// isIntImmediate - This method tests to see if a constant operand.
583// If so Imm will receive the value.
584static bool isIntImmediate(SDValue N, uint64_t &Imm) {
585 return isIntImmediate(N: N.getNode(), Imm);
586}
587
588// isOpcWithIntImmediate - This method tests to see if the node is a specific
589// opcode and that it has a immediate integer right operand.
590// If so Imm will receive the 32 bit value.
591static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
592 uint64_t &Imm) {
593 return N->getOpcode() == Opc &&
594 isIntImmediate(N: N->getOperand(Num: 1).getNode(), Imm);
595}
596
597// isIntImmediateEq - This method tests to see if N is a constant operand that
598// is equivalent to 'ImmExpected'.
599#ifndef NDEBUG
600static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
601 uint64_t Imm;
602 if (!isIntImmediate(N.getNode(), Imm))
603 return false;
604 return Imm == ImmExpected;
605}
606#endif
607
608static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth) {
609 assert(RegWidth == 32 || RegWidth == 64);
610 if (RegWidth == 32)
611 return APInt(RegWidth,
612 uint32_t(AArch64_AM::decodeAdvSIMDModImmType11(Imm)));
613 return APInt(RegWidth, AArch64_AM::decodeAdvSIMDModImmType12(Imm));
614}
615
616// Decodes the raw integer splat value from a NEON splat operation.
617static std::optional<APInt> DecodeNEONSplat(SDValue N) {
618 assert(N.getValueType().isInteger() && "Only integers are supported");
619 if (N->getOpcode() == AArch64ISD::NVCAST)
620 N = N->getOperand(Num: 0);
621 unsigned SplatWidth = N.getScalarValueSizeInBits();
622 if (N.getOpcode() == AArch64ISD::FMOV)
623 return DecodeFMOVImm(Imm: N.getConstantOperandVal(i: 0), RegWidth: SplatWidth);
624 if (N->getOpcode() == AArch64ISD::MOVI)
625 return APInt(SplatWidth, N.getConstantOperandVal(i: 0));
626 if (N->getOpcode() == AArch64ISD::MOVIshift)
627 return APInt(SplatWidth, N.getConstantOperandVal(i: 0)
628 << N.getConstantOperandVal(i: 1));
629 if (N->getOpcode() == AArch64ISD::MVNIshift)
630 return ~APInt(SplatWidth, N.getConstantOperandVal(i: 0)
631 << N.getConstantOperandVal(i: 1));
632 if (N->getOpcode() == AArch64ISD::MOVIedit)
633 return APInt(SplatWidth, AArch64_AM::decodeAdvSIMDModImmType10(
634 Imm: N.getConstantOperandVal(i: 0)));
635 if (N->getOpcode() == AArch64ISD::DUP)
636 if (auto *Const = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 0)))
637 return Const->getAPIntValue().trunc(width: SplatWidth);
638 // TODO: Recognize more splat-like NEON operations. See ConstantBuildVector
639 // in AArch64ISelLowering.
640 return std::nullopt;
641}
642
643// If \p N is a NEON splat operation (movi, fmov, etc), return the splat value
644// matching the element size of N.
645static std::optional<APInt> GetNEONSplatValue(SDValue N) {
646 unsigned SplatWidth = N.getScalarValueSizeInBits();
647 if (std::optional<APInt> SplatVal = DecodeNEONSplat(N)) {
648 if (SplatVal->getBitWidth() <= SplatWidth)
649 return APInt::getSplat(NewLen: SplatWidth, V: *SplatVal);
650 if (SplatVal->isSplat(SplatSizeInBits: SplatWidth))
651 return SplatVal->trunc(width: SplatWidth);
652 }
653 return std::nullopt;
654}
655
656bool AArch64DAGToDAGISel::SelectNEONSplatOfSVELogicalImm(SDValue N,
657 SDValue &Imm) {
658 std::optional<APInt> ImmVal = GetNEONSplatValue(N);
659 if (!ImmVal)
660 return false;
661 uint64_t Encoding;
662 if (!AArch64_AM::isSVELogicalImm(SizeInBits: N.getScalarValueSizeInBits(),
663 ImmVal: ImmVal->getZExtValue(), Encoding))
664 return false;
665
666 Imm = CurDAG->getTargetConstant(Val: Encoding, DL: SDLoc(N), VT: MVT::i64);
667 return true;
668}
669
670bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm,
671 SDValue &Shift) {
672 if (std::optional<APInt> ImmVal = GetNEONSplatValue(N))
673 return SelectSVEAddSubImm(DL: SDLoc(N), Value: *ImmVal,
674 VT: N.getValueType().getScalarType().getSimpleVT(),
675 Imm, Shift,
676 /*Negate=*/false);
677 return false;
678}
679
680bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEArithSImm(SDValue N,
681 SDValue &Imm) {
682 if (std::optional<APInt> ImmVal = GetNEONSplatValue(N))
683 return SelectSVESignedArithImm(DL: SDLoc(N), Value: *ImmVal, Imm);
684 return false;
685}
686
687bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
688 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
689 std::vector<SDValue> &OutOps) {
690 switch(ConstraintID) {
691 default:
692 llvm_unreachable("Unexpected asm memory constraint");
693 case InlineAsm::ConstraintCode::m:
694 case InlineAsm::ConstraintCode::o:
695 case InlineAsm::ConstraintCode::Q:
696 // We need to make sure that this one operand does not end up in XZR, thus
697 // require the address to be in a PointerRegClass register.
698 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
699 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
700 SDLoc dl(Op);
701 SDValue RC = CurDAG->getTargetConstant(Val: TRC->getID(), DL: dl, VT: MVT::i64);
702 SDValue NewOp =
703 SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS,
704 dl, VT: Op.getValueType(),
705 Op1: Op, Op2: RC), 0);
706 OutOps.push_back(x: NewOp);
707 return false;
708 }
709 return true;
710}
711
712/// SelectArithImmed - Select an immediate value that can be represented as
713/// a 12-bit value shifted left by either 0 or 12. If so, return true with
714/// Val set to the 12-bit value and Shift set to the shifter operand.
715bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
716 SDValue &Shift) {
717 // This function is called from the addsub_shifted_imm ComplexPattern,
718 // which lists [imm] as the list of opcode it's interested in, however
719 // we still need to check whether the operand is actually an immediate
720 // here because the ComplexPattern opcode list is only used in
721 // root-level opcode matching.
722 if (!isa<ConstantSDNode>(Val: N.getNode()))
723 return false;
724
725 uint64_t Immed = N.getNode()->getAsZExtVal();
726 unsigned ShiftAmt;
727
728 if (Immed >> 12 == 0) {
729 ShiftAmt = 0;
730 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
731 ShiftAmt = 12;
732 Immed = Immed >> 12;
733 } else
734 return false;
735
736 unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt);
737 SDLoc dl(N);
738 Val = CurDAG->getTargetConstant(Val: Immed, DL: dl, VT: MVT::i32);
739 Shift = CurDAG->getTargetConstant(Val: ShVal, DL: dl, VT: MVT::i32);
740 return true;
741}
742
743/// SelectNegArithImmed - As above, but negates the value before trying to
744/// select it.
745bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
746 SDValue &Shift) {
747 // This function is called from the addsub_shifted_imm ComplexPattern,
748 // which lists [imm] as the list of opcode it's interested in, however
749 // we still need to check whether the operand is actually an immediate
750 // here because the ComplexPattern opcode list is only used in
751 // root-level opcode matching.
752 if (!isa<ConstantSDNode>(Val: N.getNode()))
753 return false;
754
755 // The immediate operand must be a 24-bit zero-extended immediate.
756 uint64_t Immed = N.getNode()->getAsZExtVal();
757
758 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
759 // have the opposite effect on the C flag, so this pattern mustn't match under
760 // those circumstances.
761 if (Immed == 0)
762 return false;
763
764 if (N.getValueType() == MVT::i32)
765 Immed = ~((uint32_t)Immed) + 1;
766 else
767 Immed = ~Immed + 1ULL;
768 if (Immed & 0xFFFFFFFFFF000000ULL)
769 return false;
770
771 Immed &= 0xFFFFFFULL;
772 return SelectArithImmed(N: CurDAG->getConstant(Val: Immed, DL: SDLoc(N), VT: MVT::i32), Val,
773 Shift);
774}
775
776/// getShiftTypeForNode - Translate a shift node to the corresponding
777/// ShiftType value.
778static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
779 switch (N.getOpcode()) {
780 default:
781 return AArch64_AM::InvalidShiftExtend;
782 case ISD::SHL:
783 return AArch64_AM::LSL;
784 case ISD::SRL:
785 return AArch64_AM::LSR;
786 case ISD::SRA:
787 return AArch64_AM::ASR;
788 case ISD::ROTR:
789 return AArch64_AM::ROR;
790 }
791}
792
793static bool isMemOpOrPrefetch(SDNode *N) {
794 return isa<MemSDNode>(Val: *N) || N->getOpcode() == AArch64ISD::PREFETCH;
795}
796
797/// Determine whether it is worth it to fold SHL into the addressing
798/// mode.
799static bool isWorthFoldingSHL(SDValue V) {
800 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
801 // It is worth folding logical shift of up to three places.
802 auto *CSD = dyn_cast<ConstantSDNode>(Val: V.getOperand(i: 1));
803 if (!CSD)
804 return false;
805 unsigned ShiftVal = CSD->getZExtValue();
806 if (ShiftVal > 3)
807 return false;
808
809 // Check if this particular node is reused in any non-memory related
810 // operation. If yes, do not try to fold this node into the address
811 // computation, since the computation will be kept.
812 const SDNode *Node = V.getNode();
813 for (SDNode *UI : Node->users())
814 if (!isMemOpOrPrefetch(N: UI))
815 for (SDNode *UII : UI->users())
816 if (!isMemOpOrPrefetch(N: UII))
817 return false;
818 return true;
819}
820
821/// Determine whether it is worth to fold V into an extended register addressing
822/// mode.
823bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
824 // Trivial if we are optimizing for code size or if there is only
825 // one use of the value.
826 if (CurDAG->shouldOptForSize() || V.hasOneUse())
827 return true;
828
829 // If a subtarget has a slow shift, folding a shift into multiple loads
830 // costs additional micro-ops.
831 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
832 return false;
833
834 // Check whether we're going to emit the address arithmetic anyway because
835 // it's used by a non-address operation.
836 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
837 return true;
838 if (V.getOpcode() == ISD::ADD) {
839 const SDValue LHS = V.getOperand(i: 0);
840 const SDValue RHS = V.getOperand(i: 1);
841 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(V: LHS))
842 return true;
843 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(V: RHS))
844 return true;
845 }
846
847 // It hurts otherwise, since the value will be reused.
848 return false;
849}
850
851/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
852/// to select more shifted register
853bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
854 SDValue &Shift) {
855 EVT VT = N.getValueType();
856 if (VT != MVT::i32 && VT != MVT::i64)
857 return false;
858
859 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
860 return false;
861 SDValue LHS = N.getOperand(i: 0);
862 if (!LHS->hasOneUse())
863 return false;
864
865 unsigned LHSOpcode = LHS->getOpcode();
866 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
867 return false;
868
869 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(Val: LHS.getOperand(i: 1));
870 if (!ShiftAmtNode)
871 return false;
872
873 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
874 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
875 if (!RHSC)
876 return false;
877
878 APInt AndMask = RHSC->getAPIntValue();
879 unsigned LowZBits, MaskLen;
880 if (!AndMask.isShiftedMask(MaskIdx&: LowZBits, MaskLen))
881 return false;
882
883 unsigned BitWidth = N.getValueSizeInBits();
884 SDLoc DL(LHS);
885 uint64_t NewShiftC;
886 unsigned NewShiftOp;
887 if (LHSOpcode == ISD::SHL) {
888 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
889 // BitWidth != LowZBits + MaskLen doesn't match the pattern
890 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
891 return false;
892
893 NewShiftC = LowZBits - ShiftAmtC;
894 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
895 } else {
896 if (LowZBits == 0)
897 return false;
898
899 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
900 NewShiftC = LowZBits + ShiftAmtC;
901 if (NewShiftC >= BitWidth)
902 return false;
903
904 // SRA need all high bits
905 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
906 return false;
907
908 // SRL high bits can be 0 or 1
909 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
910 return false;
911
912 if (LHSOpcode == ISD::SRL)
913 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
914 else
915 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
916 }
917
918 assert(NewShiftC < BitWidth && "Invalid shift amount");
919 SDValue NewShiftAmt = CurDAG->getTargetConstant(Val: NewShiftC, DL, VT);
920 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(Val: BitWidth - 1, DL, VT);
921 Reg = SDValue(CurDAG->getMachineNode(Opcode: NewShiftOp, dl: DL, VT, Op1: LHS->getOperand(Num: 0),
922 Op2: NewShiftAmt, Op3: BitWidthMinus1),
923 0);
924 unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: LowZBits);
925 Shift = CurDAG->getTargetConstant(Val: ShVal, DL, VT: MVT::i32);
926 return true;
927}
928
929/// getExtendTypeForNode - Translate an extend node to the corresponding
930/// ExtendType value.
931static AArch64_AM::ShiftExtendType
932getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
933 if (N.getOpcode() == ISD::SIGN_EXTEND ||
934 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
935 EVT SrcVT;
936 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
937 SrcVT = cast<VTSDNode>(Val: N.getOperand(i: 1))->getVT();
938 else
939 SrcVT = N.getOperand(i: 0).getValueType();
940
941 if (!IsLoadStore && SrcVT == MVT::i8)
942 return AArch64_AM::SXTB;
943 else if (!IsLoadStore && SrcVT == MVT::i16)
944 return AArch64_AM::SXTH;
945 else if (SrcVT == MVT::i32)
946 return AArch64_AM::SXTW;
947 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
948
949 return AArch64_AM::InvalidShiftExtend;
950 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
951 N.getOpcode() == ISD::ANY_EXTEND) {
952 EVT SrcVT = N.getOperand(i: 0).getValueType();
953 if (!IsLoadStore && SrcVT == MVT::i8)
954 return AArch64_AM::UXTB;
955 else if (!IsLoadStore && SrcVT == MVT::i16)
956 return AArch64_AM::UXTH;
957 else if (SrcVT == MVT::i32)
958 return AArch64_AM::UXTW;
959 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
960
961 return AArch64_AM::InvalidShiftExtend;
962 } else if (N.getOpcode() == ISD::AND) {
963 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
964 if (!CSD)
965 return AArch64_AM::InvalidShiftExtend;
966 uint64_t AndMask = CSD->getZExtValue();
967
968 switch (AndMask) {
969 default:
970 return AArch64_AM::InvalidShiftExtend;
971 case 0xFF:
972 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
973 case 0xFFFF:
974 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
975 case 0xFFFFFFFF:
976 return AArch64_AM::UXTW;
977 }
978 }
979
980 return AArch64_AM::InvalidShiftExtend;
981}
982
983/// Determine whether it is worth to fold V into an extended register of an
984/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
985/// instruction, and the shift should be treated as worth folding even if has
986/// multiple uses.
987bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
988 // Trivial if we are optimizing for code size or if there is only
989 // one use of the value.
990 if (CurDAG->shouldOptForSize() || V.hasOneUse())
991 return true;
992
993 // If a subtarget has a fastpath LSL we can fold a logical shift into
994 // the add/sub and save a cycle.
995 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
996 V.getConstantOperandVal(i: 1) <= 4 &&
997 getExtendTypeForNode(N: V.getOperand(i: 0)) == AArch64_AM::InvalidShiftExtend)
998 return true;
999
1000 // It hurts otherwise, since the value will be reused.
1001 return false;
1002}
1003
1004/// SelectShiftedRegister - Select a "shifted register" operand. If the value
1005/// is not shifted, set the Shift operand to default of "LSL 0". The logical
1006/// instructions allow the shifted register to be rotated, but the arithmetic
1007/// instructions do not. The AllowROR parameter specifies whether ROR is
1008/// supported.
1009bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
1010 SDValue &Reg, SDValue &Shift) {
1011 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
1012 return true;
1013
1014 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
1015 if (ShType == AArch64_AM::InvalidShiftExtend)
1016 return false;
1017 if (!AllowROR && ShType == AArch64_AM::ROR)
1018 return false;
1019
1020 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1021 unsigned BitSize = N.getValueSizeInBits();
1022 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
1023 unsigned ShVal = AArch64_AM::getShifterImm(ST: ShType, Imm: Val);
1024
1025 Reg = N.getOperand(i: 0);
1026 Shift = CurDAG->getTargetConstant(Val: ShVal, DL: SDLoc(N), VT: MVT::i32);
1027 return isWorthFoldingALU(V: N, LSL: true);
1028 }
1029
1030 return false;
1031}
1032
1033/// Instructions that accept extend modifiers like UXTW expect the register
1034/// being extended to be a GPR32, but the incoming DAG might be acting on a
1035/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
1036/// this is the case.
1037static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
1038 if (N.getValueType() == MVT::i32)
1039 return N;
1040
1041 SDLoc dl(N);
1042 return CurDAG->getTargetExtractSubreg(SRIdx: AArch64::sub_32, DL: dl, VT: MVT::i32, Operand: N);
1043}
1044
1045// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
1046template<signed Low, signed High, signed Scale>
1047bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
1048 if (!isa<ConstantSDNode>(Val: N))
1049 return false;
1050
1051 int64_t MulImm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
1052 if ((MulImm % std::abs(x: Scale)) == 0) {
1053 int64_t RDVLImm = MulImm / Scale;
1054 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
1055 Imm = CurDAG->getSignedTargetConstant(Val: RDVLImm, DL: SDLoc(N), VT: MVT::i32);
1056 return true;
1057 }
1058 }
1059
1060 return false;
1061}
1062
1063// Returns a suitable RDSVL multiplier from a left shift.
1064template <signed Low, signed High>
1065bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
1066 if (!isa<ConstantSDNode>(Val: N))
1067 return false;
1068
1069 int64_t MulImm = 1LL << cast<ConstantSDNode>(Val&: N)->getSExtValue();
1070 if (MulImm >= Low && MulImm <= High) {
1071 Imm = CurDAG->getSignedTargetConstant(Val: MulImm, DL: SDLoc(N), VT: MVT::i32);
1072 return true;
1073 }
1074
1075 return false;
1076}
1077
1078/// SelectArithExtendedRegister - Select a "extended register" operand. This
1079/// operand folds in an extend followed by an optional left shift.
1080bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
1081 SDValue &Shift) {
1082 unsigned ShiftVal = 0;
1083 AArch64_AM::ShiftExtendType Ext;
1084
1085 if (N.getOpcode() == ISD::SHL) {
1086 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
1087 if (!CSD)
1088 return false;
1089 ShiftVal = CSD->getZExtValue();
1090 if (ShiftVal > 4)
1091 return false;
1092
1093 Ext = getExtendTypeForNode(N: N.getOperand(i: 0));
1094 if (Ext == AArch64_AM::InvalidShiftExtend)
1095 return false;
1096
1097 Reg = N.getOperand(i: 0).getOperand(i: 0);
1098 } else {
1099 Ext = getExtendTypeForNode(N);
1100 if (Ext == AArch64_AM::InvalidShiftExtend)
1101 return false;
1102
1103 // Don't match sext of vector extracts. These can use SMOV, but if we match
1104 // this as an extended register, we'll always fold the extend into an ALU op
1105 // user of the extend (which results in a UMOV).
1106 if (AArch64_AM::isSignExtendShiftType(Type: Ext)) {
1107 SDValue Op = N.getOperand(i: 0);
1108 if (Op->getOpcode() == ISD::ANY_EXTEND)
1109 Op = Op->getOperand(Num: 0);
1110 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
1111 Op.getOperand(i: 0).getValueType().isFixedLengthVector())
1112 return false;
1113 }
1114
1115 Reg = N.getOperand(i: 0);
1116
1117 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
1118 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
1119 auto isDef32 = [](SDValue N) {
1120 unsigned Opc = N.getOpcode();
1121 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
1122 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
1123 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
1124 Opc != ISD::FREEZE;
1125 };
1126 if (Ext == AArch64_AM::UXTW && Reg->getValueType(ResNo: 0).getSizeInBits() == 32 &&
1127 isDef32(Reg))
1128 return false;
1129 }
1130
1131 // AArch64 mandates that the RHS of the operation must use the smallest
1132 // register class that could contain the size being extended from. Thus,
1133 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1134 // there might not be an actual 32-bit value in the program. We can
1135 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1136 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1137 Reg = narrowIfNeeded(CurDAG, N: Reg);
1138 Shift = CurDAG->getTargetConstant(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal), DL: SDLoc(N),
1139 VT: MVT::i32);
1140 return isWorthFoldingALU(V: N);
1141}
1142
1143/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1144/// operand is referred by the instructions have SP operand
1145bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1146 SDValue &Shift) {
1147 unsigned ShiftVal = 0;
1148 AArch64_AM::ShiftExtendType Ext;
1149
1150 if (N.getOpcode() != ISD::SHL)
1151 return false;
1152
1153 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
1154 if (!CSD)
1155 return false;
1156 ShiftVal = CSD->getZExtValue();
1157 if (ShiftVal > 4)
1158 return false;
1159
1160 Ext = AArch64_AM::UXTX;
1161 Reg = N.getOperand(i: 0);
1162 Shift = CurDAG->getTargetConstant(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal), DL: SDLoc(N),
1163 VT: MVT::i32);
1164 return isWorthFoldingALU(V: N);
1165}
1166
1167/// If there's a use of this ADDlow that's not itself a load/store then we'll
1168/// need to create a real ADD instruction from it anyway and there's no point in
1169/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1170/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1171/// leads to duplicated ADRP instructions.
1172static bool isWorthFoldingADDlow(SDValue N) {
1173 for (auto *User : N->users()) {
1174 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1175 User->getOpcode() != ISD::ATOMIC_LOAD &&
1176 User->getOpcode() != ISD::ATOMIC_STORE)
1177 return false;
1178
1179 // ldar and stlr have much more restrictive addressing modes (just a
1180 // register).
1181 if (isStrongerThanMonotonic(AO: cast<MemSDNode>(Val: User)->getSuccessOrdering()))
1182 return false;
1183 }
1184
1185 return true;
1186}
1187
1188/// Check if the immediate offset is valid as a scaled immediate.
1189static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1190 unsigned Size) {
1191 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1192 Offset < (Range << Log2_32(Value: Size)))
1193 return true;
1194 return false;
1195}
1196
1197/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1198/// immediate" address. The "Size" argument is the size in bytes of the memory
1199/// reference, which determines the scale.
1200bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1201 unsigned BW, unsigned Size,
1202 SDValue &Base,
1203 SDValue &OffImm) {
1204 SDLoc dl(N);
1205 const DataLayout &DL = CurDAG->getDataLayout();
1206 const TargetLowering *TLI = getTargetLowering();
1207 if (N.getOpcode() == ISD::FrameIndex) {
1208 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1209 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1210 OffImm = CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i64);
1211 return true;
1212 }
1213
1214 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1215 // selected here doesn't support labels/immediates, only base+offset.
1216 if (CurDAG->isBaseWithConstantOffset(Op: N)) {
1217 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1218 if (IsSignedImm) {
1219 int64_t RHSC = RHS->getSExtValue();
1220 unsigned Scale = Log2_32(Value: Size);
1221 int64_t Range = 0x1LL << (BW - 1);
1222
1223 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1224 RHSC < (Range << Scale)) {
1225 Base = N.getOperand(i: 0);
1226 if (Base.getOpcode() == ISD::FrameIndex) {
1227 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1228 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1229 }
1230 OffImm = CurDAG->getTargetConstant(Val: RHSC >> Scale, DL: dl, VT: MVT::i64);
1231 return true;
1232 }
1233 } else {
1234 // unsigned Immediate
1235 uint64_t RHSC = RHS->getZExtValue();
1236 unsigned Scale = Log2_32(Value: Size);
1237 uint64_t Range = 0x1ULL << BW;
1238
1239 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1240 Base = N.getOperand(i: 0);
1241 if (Base.getOpcode() == ISD::FrameIndex) {
1242 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1243 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1244 }
1245 OffImm = CurDAG->getTargetConstant(Val: RHSC >> Scale, DL: dl, VT: MVT::i64);
1246 return true;
1247 }
1248 }
1249 }
1250 }
1251 // Base only. The address will be materialized into a register before
1252 // the memory is accessed.
1253 // add x0, Xbase, #offset
1254 // stp x1, x2, [x0]
1255 Base = N;
1256 OffImm = CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i64);
1257 return true;
1258}
1259
1260/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1261/// immediate" address. The "Size" argument is the size in bytes of the memory
1262/// reference, which determines the scale.
1263bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1264 SDValue &Base, SDValue &OffImm) {
1265 SDLoc dl(N);
1266 const DataLayout &DL = CurDAG->getDataLayout();
1267 const TargetLowering *TLI = getTargetLowering();
1268 if (N.getOpcode() == ISD::FrameIndex) {
1269 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1270 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1271 OffImm = CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i64);
1272 return true;
1273 }
1274
1275 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1276 GlobalAddressSDNode *GAN =
1277 dyn_cast<GlobalAddressSDNode>(Val: N.getOperand(i: 1).getNode());
1278 Base = N.getOperand(i: 0);
1279 OffImm = N.getOperand(i: 1);
1280 if (!GAN)
1281 return true;
1282
1283 if (GAN->getOffset() % Size == 0 &&
1284 GAN->getGlobal()->getPointerAlignment(DL) >= Size)
1285 return true;
1286 }
1287
1288 if (CurDAG->isBaseWithConstantOffset(Op: N)) {
1289 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1290 int64_t RHSC = (int64_t)RHS->getZExtValue();
1291 unsigned Scale = Log2_32(Value: Size);
1292 if (isValidAsScaledImmediate(Offset: RHSC, Range: 0x1000, Size)) {
1293 Base = N.getOperand(i: 0);
1294 if (Base.getOpcode() == ISD::FrameIndex) {
1295 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1296 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1297 }
1298 OffImm = CurDAG->getTargetConstant(Val: RHSC >> Scale, DL: dl, VT: MVT::i64);
1299 return true;
1300 }
1301 }
1302 }
1303
1304 // Before falling back to our general case, check if the unscaled
1305 // instructions can handle this. If so, that's preferable.
1306 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1307 return false;
1308
1309 // Base only. The address will be materialized into a register before
1310 // the memory is accessed.
1311 // add x0, Xbase, #offset
1312 // ldr x0, [x0]
1313 Base = N;
1314 OffImm = CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i64);
1315 return true;
1316}
1317
1318/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1319/// immediate" address. This should only match when there is an offset that
1320/// is not valid for a scaled immediate addressing mode. The "Size" argument
1321/// is the size in bytes of the memory reference, which is needed here to know
1322/// what is valid for a scaled immediate.
1323bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1324 SDValue &Base,
1325 SDValue &OffImm) {
1326 if (!CurDAG->isBaseWithConstantOffset(Op: N))
1327 return false;
1328 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1329 int64_t RHSC = RHS->getSExtValue();
1330 if (RHSC >= -256 && RHSC < 256) {
1331 Base = N.getOperand(i: 0);
1332 if (Base.getOpcode() == ISD::FrameIndex) {
1333 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1334 const TargetLowering *TLI = getTargetLowering();
1335 Base = CurDAG->getTargetFrameIndex(
1336 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1337 }
1338 OffImm = CurDAG->getTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i64);
1339 return true;
1340 }
1341 }
1342 return false;
1343}
1344
1345static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
1346 SDLoc dl(N);
1347 SDValue ImpDef = SDValue(
1348 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: MVT::i64), 0);
1349 return CurDAG->getTargetInsertSubreg(SRIdx: AArch64::sub_32, DL: dl, VT: MVT::i64, Operand: ImpDef,
1350 Subreg: N);
1351}
1352
1353/// Check if the given SHL node (\p N), can be used to form an
1354/// extended register for an addressing mode.
1355bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1356 bool WantExtend, SDValue &Offset,
1357 SDValue &SignExtend) {
1358 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1359 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
1360 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1361 return false;
1362
1363 SDLoc dl(N);
1364 if (WantExtend) {
1365 AArch64_AM::ShiftExtendType Ext =
1366 getExtendTypeForNode(N: N.getOperand(i: 0), IsLoadStore: true);
1367 if (Ext == AArch64_AM::InvalidShiftExtend)
1368 return false;
1369
1370 Offset = narrowIfNeeded(CurDAG, N: N.getOperand(i: 0).getOperand(i: 0));
1371 SignExtend = CurDAG->getTargetConstant(Val: Ext == AArch64_AM::SXTW, DL: dl,
1372 VT: MVT::i32);
1373 } else {
1374 Offset = N.getOperand(i: 0);
1375 SignExtend = CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i32);
1376 }
1377
1378 unsigned LegalShiftVal = Log2_32(Value: Size);
1379 unsigned ShiftVal = CSD->getZExtValue();
1380
1381 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1382 return false;
1383
1384 return isWorthFoldingAddr(V: N, Size);
1385}
1386
1387bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1388 SDValue &Base, SDValue &Offset,
1389 SDValue &SignExtend,
1390 SDValue &DoShift) {
1391 if (N.getOpcode() != ISD::ADD)
1392 return false;
1393 SDValue LHS = N.getOperand(i: 0);
1394 SDValue RHS = N.getOperand(i: 1);
1395 SDLoc dl(N);
1396
1397 // We don't want to match immediate adds here, because they are better lowered
1398 // to the register-immediate addressing modes.
1399 if (isa<ConstantSDNode>(Val: LHS) || isa<ConstantSDNode>(Val: RHS))
1400 return false;
1401
1402 // Check if this particular node is reused in any non-memory related
1403 // operation. If yes, do not try to fold this node into the address
1404 // computation, since the computation will be kept.
1405 const SDNode *Node = N.getNode();
1406 for (SDNode *UI : Node->users()) {
1407 if (!isMemOpOrPrefetch(N: UI))
1408 return false;
1409 }
1410
1411 // Remember if it is worth folding N when it produces extended register.
1412 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(V: N, Size);
1413
1414 // Try to match a shifted extend on the RHS.
1415 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1416 SelectExtendedSHL(N: RHS, Size, WantExtend: true, Offset, SignExtend)) {
1417 Base = LHS;
1418 DoShift = CurDAG->getTargetConstant(Val: true, DL: dl, VT: MVT::i32);
1419 return true;
1420 }
1421
1422 // Try to match a shifted extend on the LHS.
1423 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1424 SelectExtendedSHL(N: LHS, Size, WantExtend: true, Offset, SignExtend)) {
1425 Base = RHS;
1426 DoShift = CurDAG->getTargetConstant(Val: true, DL: dl, VT: MVT::i32);
1427 return true;
1428 }
1429
1430 // There was no shift, whatever else we find.
1431 DoShift = CurDAG->getTargetConstant(Val: false, DL: dl, VT: MVT::i32);
1432
1433 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
1434 // Try to match an unshifted extend on the LHS.
1435 if (IsExtendedRegisterWorthFolding &&
1436 (Ext = getExtendTypeForNode(N: LHS, IsLoadStore: true)) !=
1437 AArch64_AM::InvalidShiftExtend) {
1438 Base = RHS;
1439 Offset = narrowIfNeeded(CurDAG, N: LHS.getOperand(i: 0));
1440 SignExtend = CurDAG->getTargetConstant(Val: Ext == AArch64_AM::SXTW, DL: dl,
1441 VT: MVT::i32);
1442 if (isWorthFoldingAddr(V: LHS, Size))
1443 return true;
1444 }
1445
1446 // Try to match an unshifted extend on the RHS.
1447 if (IsExtendedRegisterWorthFolding &&
1448 (Ext = getExtendTypeForNode(N: RHS, IsLoadStore: true)) !=
1449 AArch64_AM::InvalidShiftExtend) {
1450 Base = LHS;
1451 Offset = narrowIfNeeded(CurDAG, N: RHS.getOperand(i: 0));
1452 SignExtend = CurDAG->getTargetConstant(Val: Ext == AArch64_AM::SXTW, DL: dl,
1453 VT: MVT::i32);
1454 if (isWorthFoldingAddr(V: RHS, Size))
1455 return true;
1456 }
1457
1458 return false;
1459}
1460
1461// Check if the given immediate is preferred by ADD. If an immediate can be
1462// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1463// encoded by one MOVZ, return true.
1464static bool isPreferredADD(int64_t ImmOff) {
1465 // Constant in [0x0, 0xfff] can be encoded in ADD.
1466 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1467 return true;
1468 // Check if it can be encoded in an "ADD LSL #12".
1469 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1470 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1471 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1472 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1473 return false;
1474}
1475
1476bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1477 SDValue &Base, SDValue &Offset,
1478 SDValue &SignExtend,
1479 SDValue &DoShift) {
1480 if (N.getOpcode() != ISD::ADD)
1481 return false;
1482 SDValue LHS = N.getOperand(i: 0);
1483 SDValue RHS = N.getOperand(i: 1);
1484 SDLoc DL(N);
1485
1486 // Check if this particular node is reused in any non-memory related
1487 // operation. If yes, do not try to fold this node into the address
1488 // computation, since the computation will be kept.
1489 const SDNode *Node = N.getNode();
1490 for (SDNode *UI : Node->users()) {
1491 if (!isMemOpOrPrefetch(N: UI))
1492 return false;
1493 }
1494
1495 // Watch out if RHS is a wide immediate, it can not be selected into
1496 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1497 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1498 // instructions like:
1499 // MOV X0, WideImmediate
1500 // ADD X1, BaseReg, X0
1501 // LDR X2, [X1, 0]
1502 // For such situation, using [BaseReg, XReg] addressing mode can save one
1503 // ADD/SUB:
1504 // MOV X0, WideImmediate
1505 // LDR X2, [BaseReg, X0]
1506 if (isa<ConstantSDNode>(Val: RHS)) {
1507 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1508 // Skip the immediate can be selected by load/store addressing mode.
1509 // Also skip the immediate can be encoded by a single ADD (SUB is also
1510 // checked by using -ImmOff).
1511 if (isValidAsScaledImmediate(Offset: ImmOff, Range: 0x1000, Size) ||
1512 isPreferredADD(ImmOff) || isPreferredADD(ImmOff: -ImmOff))
1513 return false;
1514
1515 SDValue Ops[] = { RHS };
1516 SDNode *MOVI =
1517 CurDAG->getMachineNode(Opcode: AArch64::MOVi64imm, dl: DL, VT: MVT::i64, Ops);
1518 SDValue MOVIV = SDValue(MOVI, 0);
1519 // This ADD of two X register will be selected into [Reg+Reg] mode.
1520 N = CurDAG->getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: LHS, N2: MOVIV);
1521 }
1522
1523 // Remember if it is worth folding N when it produces extended register.
1524 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(V: N, Size);
1525
1526 // Try to match a shifted extend on the RHS.
1527 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1528 SelectExtendedSHL(N: RHS, Size, WantExtend: false, Offset, SignExtend)) {
1529 Base = LHS;
1530 DoShift = CurDAG->getTargetConstant(Val: true, DL, VT: MVT::i32);
1531 return true;
1532 }
1533
1534 // Try to match a shifted extend on the LHS.
1535 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1536 SelectExtendedSHL(N: LHS, Size, WantExtend: false, Offset, SignExtend)) {
1537 Base = RHS;
1538 DoShift = CurDAG->getTargetConstant(Val: true, DL, VT: MVT::i32);
1539 return true;
1540 }
1541
1542 // Match any non-shifted, non-extend, non-immediate add expression.
1543 Base = LHS;
1544 Offset = RHS;
1545 SignExtend = CurDAG->getTargetConstant(Val: false, DL, VT: MVT::i32);
1546 DoShift = CurDAG->getTargetConstant(Val: false, DL, VT: MVT::i32);
1547 // Reg1 + Reg2 is free: no check needed.
1548 return true;
1549}
1550
1551SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1552 static const unsigned RegClassIDs[] = {
1553 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1554 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1555 AArch64::dsub2, AArch64::dsub3};
1556
1557 return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1558}
1559
1560SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1561 static const unsigned RegClassIDs[] = {
1562 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1563 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1564 AArch64::qsub2, AArch64::qsub3};
1565
1566 return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1567}
1568
1569SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1570 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1571 AArch64::ZPR3RegClassID,
1572 AArch64::ZPR4RegClassID};
1573 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1574 AArch64::zsub2, AArch64::zsub3};
1575
1576 return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1577}
1578
1579SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1580 assert(Regs.size() == 2 || Regs.size() == 4);
1581
1582 // The createTuple interface requires 3 RegClassIDs for each possible
1583 // tuple type even though we only have them for ZPR2 and ZPR4.
1584 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1585 AArch64::ZPR4Mul4RegClassID};
1586 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1587 AArch64::zsub2, AArch64::zsub3};
1588 return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1589}
1590
1591SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1592 const unsigned RegClassIDs[],
1593 const unsigned SubRegs[]) {
1594 // There's no special register-class for a vector-list of 1 element: it's just
1595 // a vector.
1596 if (Regs.size() == 1)
1597 return Regs[0];
1598
1599 assert(Regs.size() >= 2 && Regs.size() <= 4);
1600
1601 SDLoc DL(Regs[0]);
1602
1603 SmallVector<SDValue, 4> Ops;
1604
1605 // First operand of REG_SEQUENCE is the desired RegClass.
1606 Ops.push_back(
1607 Elt: CurDAG->getTargetConstant(Val: RegClassIDs[Regs.size() - 2], DL, VT: MVT::i32));
1608
1609 // Then we get pairs of source & subregister-position for the components.
1610 for (unsigned i = 0; i < Regs.size(); ++i) {
1611 Ops.push_back(Elt: Regs[i]);
1612 Ops.push_back(Elt: CurDAG->getTargetConstant(Val: SubRegs[i], DL, VT: MVT::i32));
1613 }
1614
1615 SDNode *N =
1616 CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl: DL, VT: MVT::Untyped, Ops);
1617 return SDValue(N, 0);
1618}
1619
1620void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1621 bool isExt) {
1622 SDLoc dl(N);
1623 EVT VT = N->getValueType(ResNo: 0);
1624
1625 unsigned ExtOff = isExt;
1626
1627 // Form a REG_SEQUENCE to force register allocation.
1628 unsigned Vec0Off = ExtOff + 1;
1629 SmallVector<SDValue, 4> Regs(N->ops().slice(N: Vec0Off, M: NumVecs));
1630 SDValue RegSeq = createQTuple(Regs);
1631
1632 SmallVector<SDValue, 6> Ops;
1633 if (isExt)
1634 Ops.push_back(Elt: N->getOperand(Num: 1));
1635 Ops.push_back(Elt: RegSeq);
1636 Ops.push_back(Elt: N->getOperand(Num: NumVecs + ExtOff + 1));
1637 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT, Ops));
1638}
1639
1640static std::tuple<SDValue, SDValue>
1641extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG) {
1642 SDLoc DL(Disc);
1643 SDValue AddrDisc;
1644 SDValue ConstDisc;
1645
1646 // If this is a blend, remember the constant and address discriminators.
1647 // Otherwise, it's either a constant discriminator, or a non-blended
1648 // address discriminator.
1649 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1650 Disc->getConstantOperandVal(Num: 0) == Intrinsic::ptrauth_blend) {
1651 AddrDisc = Disc->getOperand(Num: 1);
1652 ConstDisc = Disc->getOperand(Num: 2);
1653 } else {
1654 ConstDisc = Disc;
1655 }
1656
1657 // If the constant discriminator (either the blend RHS, or the entire
1658 // discriminator value) isn't a 16-bit constant, bail out, and let the
1659 // discriminator be computed separately.
1660 auto *ConstDiscN = dyn_cast<ConstantSDNode>(Val&: ConstDisc);
1661 if (!ConstDiscN || !isUInt<16>(x: ConstDiscN->getZExtValue()))
1662 return std::make_tuple(args: DAG->getTargetConstant(Val: 0, DL, VT: MVT::i64), args&: Disc);
1663
1664 // If there's no address discriminator, use XZR directly.
1665 if (!AddrDisc)
1666 AddrDisc = DAG->getRegister(Reg: AArch64::XZR, VT: MVT::i64);
1667
1668 return std::make_tuple(
1669 args: DAG->getTargetConstant(Val: ConstDiscN->getZExtValue(), DL, VT: MVT::i64),
1670 args&: AddrDisc);
1671}
1672
1673void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1674 SDLoc DL(N);
1675 // IntrinsicID is operand #0
1676 SDValue Val = N->getOperand(Num: 1);
1677 SDValue AUTKey = N->getOperand(Num: 2);
1678 SDValue AUTDisc = N->getOperand(Num: 3);
1679
1680 unsigned AUTKeyC = cast<ConstantSDNode>(Val&: AUTKey)->getZExtValue();
1681 AUTKey = CurDAG->getTargetConstant(Val: AUTKeyC, DL, VT: MVT::i64);
1682
1683 SDValue AUTAddrDisc, AUTConstDisc;
1684 std::tie(args&: AUTConstDisc, args&: AUTAddrDisc) =
1685 extractPtrauthBlendDiscriminators(Disc: AUTDisc, DAG: CurDAG);
1686
1687 if (!Subtarget->isX16X17Safer()) {
1688 std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1689 // Copy deactivation symbol if present.
1690 if (N->getNumOperands() > 4)
1691 Ops.push_back(x: N->getOperand(Num: 4));
1692
1693 SDNode *AUT =
1694 CurDAG->getMachineNode(Opcode: AArch64::AUTxMxN, dl: DL, VT1: MVT::i64, VT2: MVT::i64, Ops);
1695 ReplaceNode(F: N, T: AUT);
1696 } else {
1697 SDValue X16Copy = CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl: DL,
1698 Reg: AArch64::X16, N: Val, Glue: SDValue());
1699 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(R: 1)};
1700
1701 SDNode *AUT = CurDAG->getMachineNode(Opcode: AArch64::AUTx16x17, dl: DL, VT: MVT::i64, Ops);
1702 ReplaceNode(F: N, T: AUT);
1703 }
1704}
1705
1706void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1707 SDLoc DL(N);
1708 // IntrinsicID is operand #0, if W_CHAIN it is #1
1709 int OffsetBase = N->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 1 : 0;
1710 SDValue Val = N->getOperand(Num: OffsetBase + 1);
1711 SDValue AUTKey = N->getOperand(Num: OffsetBase + 2);
1712 SDValue AUTDisc = N->getOperand(Num: OffsetBase + 3);
1713 SDValue PACKey = N->getOperand(Num: OffsetBase + 4);
1714 SDValue PACDisc = N->getOperand(Num: OffsetBase + 5);
1715 uint32_t IntNum = N->getConstantOperandVal(Num: OffsetBase + 0);
1716 bool HasLoad = IntNum == Intrinsic::ptrauth_resign_load_relative;
1717
1718 unsigned AUTKeyC = cast<ConstantSDNode>(Val&: AUTKey)->getZExtValue();
1719 unsigned PACKeyC = cast<ConstantSDNode>(Val&: PACKey)->getZExtValue();
1720
1721 AUTKey = CurDAG->getTargetConstant(Val: AUTKeyC, DL, VT: MVT::i64);
1722 PACKey = CurDAG->getTargetConstant(Val: PACKeyC, DL, VT: MVT::i64);
1723
1724 SDValue AUTAddrDisc, AUTConstDisc;
1725 std::tie(args&: AUTConstDisc, args&: AUTAddrDisc) =
1726 extractPtrauthBlendDiscriminators(Disc: AUTDisc, DAG: CurDAG);
1727
1728 SDValue PACAddrDisc, PACConstDisc;
1729 std::tie(args&: PACConstDisc, args&: PACAddrDisc) =
1730 extractPtrauthBlendDiscriminators(Disc: PACDisc, DAG: CurDAG);
1731
1732 SDValue X16Copy = CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl: DL,
1733 Reg: AArch64::X16, N: Val, Glue: SDValue());
1734
1735 if (HasLoad) {
1736 SDValue Addend = N->getOperand(Num: OffsetBase + 6);
1737 SDValue IncomingChain = N->getOperand(Num: 0);
1738 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc,
1739 PACKey, PACConstDisc, PACAddrDisc,
1740 Addend, IncomingChain, X16Copy.getValue(R: 1)};
1741
1742 SDNode *AUTRELLOADPAC = CurDAG->getMachineNode(Opcode: AArch64::AUTRELLOADPAC, dl: DL,
1743 VT1: MVT::i64, VT2: MVT::Other, Ops);
1744 ReplaceNode(F: N, T: AUTRELLOADPAC);
1745 } else {
1746 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1747 PACConstDisc, PACAddrDisc, X16Copy.getValue(R: 1)};
1748
1749 SDNode *AUTPAC = CurDAG->getMachineNode(Opcode: AArch64::AUTPAC, dl: DL, VT: MVT::i64, Ops);
1750 ReplaceNode(F: N, T: AUTPAC);
1751 }
1752}
1753
1754bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1755 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1756 if (LD->isUnindexed())
1757 return false;
1758 EVT VT = LD->getMemoryVT();
1759 EVT DstVT = N->getValueType(ResNo: 0);
1760 ISD::MemIndexedMode AM = LD->getAddressingMode();
1761 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1762 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(Val: LD->getOffset());
1763 int OffsetVal = (int)OffsetOp->getZExtValue();
1764
1765 // We're not doing validity checking here. That was done when checking
1766 // if we should mark the load as indexed or not. We're just selecting
1767 // the right instruction.
1768 unsigned Opcode = 0;
1769
1770 ISD::LoadExtType ExtType = LD->getExtensionType();
1771 bool InsertTo64 = false;
1772 if (VT == MVT::i64)
1773 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1774 else if (VT == MVT::i32) {
1775 if (ExtType == ISD::NON_EXTLOAD)
1776 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1777 else if (ExtType == ISD::SEXTLOAD)
1778 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1779 else {
1780 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1781 InsertTo64 = true;
1782 // The result of the load is only i32. It's the subreg_to_reg that makes
1783 // it into an i64.
1784 DstVT = MVT::i32;
1785 }
1786 } else if (VT == MVT::i16) {
1787 if (ExtType == ISD::SEXTLOAD) {
1788 if (DstVT == MVT::i64)
1789 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1790 else
1791 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1792 } else {
1793 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1794 InsertTo64 = DstVT == MVT::i64;
1795 // The result of the load is only i32. It's the subreg_to_reg that makes
1796 // it into an i64.
1797 DstVT = MVT::i32;
1798 }
1799 } else if (VT == MVT::i8) {
1800 if (ExtType == ISD::SEXTLOAD) {
1801 if (DstVT == MVT::i64)
1802 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1803 else
1804 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1805 } else {
1806 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1807 InsertTo64 = DstVT == MVT::i64;
1808 // The result of the load is only i32. It's the subreg_to_reg that makes
1809 // it into an i64.
1810 DstVT = MVT::i32;
1811 }
1812 } else if (VT == MVT::f16) {
1813 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1814 } else if (VT == MVT::bf16) {
1815 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1816 } else if (VT == MVT::f32) {
1817 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1818 } else if (VT == MVT::f64 ||
1819 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1820 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1821 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1822 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1823 } else if (VT.is64BitVector()) {
1824 if (IsPre || OffsetVal != 8)
1825 return false;
1826 switch (VT.getScalarSizeInBits()) {
1827 case 8:
1828 Opcode = AArch64::LD1Onev8b_POST;
1829 break;
1830 case 16:
1831 Opcode = AArch64::LD1Onev4h_POST;
1832 break;
1833 case 32:
1834 Opcode = AArch64::LD1Onev2s_POST;
1835 break;
1836 case 64:
1837 Opcode = AArch64::LD1Onev1d_POST;
1838 break;
1839 default:
1840 llvm_unreachable("Expected vector element to be a power of 2");
1841 }
1842 } else if (VT.is128BitVector()) {
1843 if (IsPre || OffsetVal != 16)
1844 return false;
1845 switch (VT.getScalarSizeInBits()) {
1846 case 8:
1847 Opcode = AArch64::LD1Onev16b_POST;
1848 break;
1849 case 16:
1850 Opcode = AArch64::LD1Onev8h_POST;
1851 break;
1852 case 32:
1853 Opcode = AArch64::LD1Onev4s_POST;
1854 break;
1855 case 64:
1856 Opcode = AArch64::LD1Onev2d_POST;
1857 break;
1858 default:
1859 llvm_unreachable("Expected vector element to be a power of 2");
1860 }
1861 } else
1862 return false;
1863 SDValue Chain = LD->getChain();
1864 SDValue Base = LD->getBasePtr();
1865 SDLoc dl(N);
1866 // LD1 encodes an immediate offset by using XZR as the offset register.
1867 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1868 ? CurDAG->getRegister(Reg: AArch64::XZR, VT: MVT::i64)
1869 : CurDAG->getTargetConstant(Val: OffsetVal, DL: dl, VT: MVT::i64);
1870 SDValue Ops[] = { Base, Offset, Chain };
1871 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, VT1: MVT::i64, VT2: DstVT,
1872 VT3: MVT::Other, Ops);
1873
1874 // Transfer memoperands.
1875 MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
1876 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Res), NewMemRefs: {MemOp});
1877
1878 // Either way, we're replacing the node, so tell the caller that.
1879 SDValue LoadedVal = SDValue(Res, 1);
1880 if (InsertTo64) {
1881 SDValue SubReg = CurDAG->getTargetConstant(Val: AArch64::sub_32, DL: dl, VT: MVT::i32);
1882 LoadedVal = SDValue(CurDAG->getMachineNode(Opcode: AArch64::SUBREG_TO_REG, dl,
1883 VT: MVT::i64, Op1: LoadedVal, Op2: SubReg),
1884 0);
1885 }
1886
1887 ReplaceUses(F: SDValue(N, 0), T: LoadedVal);
1888 ReplaceUses(F: SDValue(N, 1), T: SDValue(Res, 0));
1889 ReplaceUses(F: SDValue(N, 2), T: SDValue(Res, 2));
1890 CurDAG->RemoveDeadNode(N);
1891 return true;
1892}
1893
1894void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1895 unsigned SubRegIdx) {
1896 SDLoc dl(N);
1897 EVT VT = N->getValueType(ResNo: 0);
1898 SDValue Chain = N->getOperand(Num: 0);
1899
1900 SDValue Ops[] = {N->getOperand(Num: 2), // Mem operand;
1901 Chain};
1902
1903 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1904
1905 SDNode *Ld = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
1906 SDValue SuperReg = SDValue(Ld, 0);
1907 for (unsigned i = 0; i < NumVecs; ++i)
1908 ReplaceUses(F: SDValue(N, i),
1909 T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx + i, DL: dl, VT, Operand: SuperReg));
1910
1911 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(Ld, 1));
1912
1913 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1914 // because it's too simple to have needed special treatment during lowering.
1915 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Val: N)) {
1916 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1917 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
1918 }
1919
1920 CurDAG->RemoveDeadNode(N);
1921}
1922
1923void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1924 unsigned Opc, unsigned SubRegIdx) {
1925 SDLoc dl(N);
1926 EVT VT = N->getValueType(ResNo: 0);
1927 SDValue Chain = N->getOperand(Num: 0);
1928
1929 SDValue Ops[] = {N->getOperand(Num: 1), // Mem operand
1930 N->getOperand(Num: 2), // Incremental
1931 Chain};
1932
1933 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1934 MVT::Untyped, MVT::Other};
1935
1936 SDNode *Ld = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
1937
1938 // Update uses of write back register
1939 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(Ld, 0));
1940
1941 // Update uses of vector list
1942 SDValue SuperReg = SDValue(Ld, 1);
1943 if (NumVecs == 1)
1944 ReplaceUses(F: SDValue(N, 0), T: SuperReg);
1945 else
1946 for (unsigned i = 0; i < NumVecs; ++i)
1947 ReplaceUses(F: SDValue(N, i),
1948 T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx + i, DL: dl, VT, Operand: SuperReg));
1949
1950 // Update the chain
1951 ReplaceUses(F: SDValue(N, NumVecs + 1), T: SDValue(Ld, 2));
1952 CurDAG->RemoveDeadNode(N);
1953}
1954
1955/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1956/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1957/// new Base and an SDValue representing the new offset.
1958std::tuple<unsigned, SDValue, SDValue>
1959AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1960 unsigned Opc_ri,
1961 const SDValue &OldBase,
1962 const SDValue &OldOffset,
1963 unsigned Scale) {
1964 SDValue NewBase = OldBase;
1965 SDValue NewOffset = OldOffset;
1966 // Detect a possible Reg+Imm addressing mode.
1967 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1968 Root: N, N: OldBase, Base&: NewBase, OffImm&: NewOffset);
1969
1970 // Detect a possible reg+reg addressing mode, but only if we haven't already
1971 // detected a Reg+Imm one.
1972 const bool IsRegReg =
1973 !IsRegImm && SelectSVERegRegAddrMode(N: OldBase, Scale, Base&: NewBase, Offset&: NewOffset);
1974
1975 // Select the instruction.
1976 return std::make_tuple(args&: IsRegReg ? Opc_rr : Opc_ri, args&: NewBase, args&: NewOffset);
1977}
1978
1979enum class SelectTypeKind {
1980 Int1 = 0,
1981 Int = 1,
1982 FP = 2,
1983 AnyType = 3,
1984};
1985
1986/// This function selects an opcode from a list of opcodes, which is
1987/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1988/// element types, in this order.
1989template <SelectTypeKind Kind>
1990static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1991 // Only match scalable vector VTs
1992 if (!VT.isScalableVector())
1993 return 0;
1994
1995 EVT EltVT = VT.getVectorElementType();
1996 unsigned Key = VT.getVectorMinNumElements();
1997 switch (Kind) {
1998 case SelectTypeKind::AnyType:
1999 break;
2000 case SelectTypeKind::Int:
2001 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
2002 EltVT != MVT::i64)
2003 return 0;
2004 break;
2005 case SelectTypeKind::Int1:
2006 if (EltVT != MVT::i1)
2007 return 0;
2008 break;
2009 case SelectTypeKind::FP:
2010 if (EltVT == MVT::bf16)
2011 Key = 16;
2012 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
2013 EltVT != MVT::f64)
2014 return 0;
2015 break;
2016 }
2017
2018 unsigned Offset;
2019 switch (Key) {
2020 case 16: // 8-bit or bf16
2021 Offset = 0;
2022 break;
2023 case 8: // 16-bit
2024 Offset = 1;
2025 break;
2026 case 4: // 32-bit
2027 Offset = 2;
2028 break;
2029 case 2: // 64-bit
2030 Offset = 3;
2031 break;
2032 default:
2033 return 0;
2034 }
2035
2036 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
2037}
2038
2039// This function is almost identical to SelectWhilePair, but has an
2040// extra check on the range of the immediate operand.
2041// TODO: Merge these two functions together at some point?
2042void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
2043 // Immediate can be either 0 or 1.
2044 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 2)))
2045 if (Imm->getZExtValue() > 1)
2046 return;
2047
2048 SDLoc DL(N);
2049 EVT VT = N->getValueType(ResNo: 0);
2050 SDValue Ops[] = {N->getOperand(Num: 1), N->getOperand(Num: 2)};
2051 SDNode *WhilePair = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Ops);
2052 SDValue SuperReg = SDValue(WhilePair, 0);
2053
2054 for (unsigned I = 0; I < 2; ++I)
2055 ReplaceUses(F: SDValue(N, I), T: CurDAG->getTargetExtractSubreg(
2056 SRIdx: AArch64::psub0 + I, DL, VT, Operand: SuperReg));
2057
2058 CurDAG->RemoveDeadNode(N);
2059}
2060
2061void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
2062 SDLoc DL(N);
2063 EVT VT = N->getValueType(ResNo: 0);
2064
2065 SDValue Ops[] = {N->getOperand(Num: 1), N->getOperand(Num: 2)};
2066
2067 SDNode *WhilePair = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Ops);
2068 SDValue SuperReg = SDValue(WhilePair, 0);
2069
2070 for (unsigned I = 0; I < 2; ++I)
2071 ReplaceUses(F: SDValue(N, I), T: CurDAG->getTargetExtractSubreg(
2072 SRIdx: AArch64::psub0 + I, DL, VT, Operand: SuperReg));
2073
2074 CurDAG->RemoveDeadNode(N);
2075}
2076
2077void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
2078 unsigned Opcode) {
2079 EVT VT = N->getValueType(ResNo: 0);
2080 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 1, M: NumVecs));
2081 SDValue Ops = createZTuple(Regs);
2082 SDLoc DL(N);
2083 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Untyped, Op1: Ops);
2084 SDValue SuperReg = SDValue(Intrinsic, 0);
2085 for (unsigned i = 0; i < NumVecs; ++i)
2086 ReplaceUses(F: SDValue(N, i), T: CurDAG->getTargetExtractSubreg(
2087 SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
2088
2089 CurDAG->RemoveDeadNode(N);
2090}
2091
2092void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
2093 unsigned Opcode) {
2094 SDLoc DL(N);
2095 EVT VT = N->getValueType(ResNo: 0);
2096 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
2097 Ops.push_back(/*Chain*/ Elt: N->getOperand(Num: 0));
2098
2099 SDNode *Instruction =
2100 CurDAG->getMachineNode(Opcode, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2101 SDValue SuperReg = SDValue(Instruction, 0);
2102
2103 for (unsigned i = 0; i < NumVecs; ++i)
2104 ReplaceUses(F: SDValue(N, i), T: CurDAG->getTargetExtractSubreg(
2105 SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
2106
2107 // Copy chain
2108 unsigned ChainIdx = NumVecs;
2109 ReplaceUses(F: SDValue(N, ChainIdx), T: SDValue(Instruction, 1));
2110 CurDAG->RemoveDeadNode(N);
2111}
2112
2113void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
2114 unsigned NumVecs,
2115 bool IsZmMulti,
2116 unsigned Opcode,
2117 bool HasPred) {
2118 assert(Opcode != 0 && "Unexpected opcode");
2119
2120 SDLoc DL(N);
2121 EVT VT = N->getValueType(ResNo: 0);
2122 SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID
2123 SmallVector<SDValue, 4> Ops;
2124
2125 auto GetMultiVecOperand = [&]() {
2126 SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs);
2127 OpsIter += NumVecs;
2128 return createZMulTuple(Regs);
2129 };
2130
2131 if (HasPred)
2132 Ops.push_back(Elt: *OpsIter++);
2133
2134 Ops.push_back(Elt: GetMultiVecOperand());
2135 if (IsZmMulti)
2136 Ops.push_back(Elt: GetMultiVecOperand());
2137 else
2138 Ops.push_back(Elt: *OpsIter++);
2139
2140 // Append any remaining operands.
2141 Ops.append(in_start: OpsIter, in_end: N->op_end());
2142 SDNode *Intrinsic;
2143 Intrinsic = CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Untyped, Ops);
2144 SDValue SuperReg = SDValue(Intrinsic, 0);
2145 for (unsigned i = 0; i < NumVecs; ++i)
2146 ReplaceUses(F: SDValue(N, i), T: CurDAG->getTargetExtractSubreg(
2147 SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
2148
2149 CurDAG->RemoveDeadNode(N);
2150}
2151
2152void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2153 unsigned Scale, unsigned Opc_ri,
2154 unsigned Opc_rr, bool IsIntr) {
2155 assert(Scale < 5 && "Invalid scaling value.");
2156 SDLoc DL(N);
2157 EVT VT = N->getValueType(ResNo: 0);
2158 SDValue Chain = N->getOperand(Num: 0);
2159
2160 // Optimize addressing mode.
2161 SDValue Base, Offset;
2162 unsigned Opc;
2163 std::tie(args&: Opc, args&: Base, args&: Offset) = findAddrModeSVELoadStore(
2164 N, Opc_rr, Opc_ri, OldBase: N->getOperand(Num: IsIntr ? 3 : 2),
2165 OldOffset: CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i64), Scale);
2166
2167 SDValue Ops[] = {N->getOperand(Num: IsIntr ? 2 : 1), // Predicate
2168 Base, // Memory operand
2169 Offset, Chain};
2170
2171 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2172
2173 SDNode *Load = CurDAG->getMachineNode(Opcode: Opc, dl: DL, ResultTys: ResTys, Ops);
2174 SDValue SuperReg = SDValue(Load, 0);
2175 for (unsigned i = 0; i < NumVecs; ++i)
2176 ReplaceUses(F: SDValue(N, i), T: CurDAG->getTargetExtractSubreg(
2177 SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
2178
2179 // Copy chain
2180 unsigned ChainIdx = NumVecs;
2181 ReplaceUses(F: SDValue(N, ChainIdx), T: SDValue(Load, 1));
2182 CurDAG->RemoveDeadNode(N);
2183}
2184
2185void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2186 unsigned NumVecs,
2187 unsigned Scale,
2188 unsigned Opc_ri,
2189 unsigned Opc_rr) {
2190 assert(Scale < 4 && "Invalid scaling value.");
2191 SDLoc DL(N);
2192 EVT VT = N->getValueType(ResNo: 0);
2193 SDValue Chain = N->getOperand(Num: 0);
2194
2195 SDValue PNg = N->getOperand(Num: 2);
2196 SDValue Base = N->getOperand(Num: 3);
2197 SDValue Offset = CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i64);
2198 unsigned Opc;
2199 std::tie(args&: Opc, args&: Base, args&: Offset) =
2200 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, OldBase: Base, OldOffset: Offset, Scale);
2201
2202 SDValue Ops[] = {PNg, // Predicate-as-counter
2203 Base, // Memory operand
2204 Offset, Chain};
2205
2206 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2207
2208 SDNode *Load = CurDAG->getMachineNode(Opcode: Opc, dl: DL, ResultTys: ResTys, Ops);
2209 SDValue SuperReg = SDValue(Load, 0);
2210 for (unsigned i = 0; i < NumVecs; ++i)
2211 ReplaceUses(F: SDValue(N, i), T: CurDAG->getTargetExtractSubreg(
2212 SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
2213
2214 // Copy chain
2215 unsigned ChainIdx = NumVecs;
2216 ReplaceUses(F: SDValue(N, ChainIdx), T: SDValue(Load, 1));
2217 CurDAG->RemoveDeadNode(N);
2218}
2219
2220void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2221 unsigned Opcode) {
2222 if (N->getValueType(ResNo: 0) != MVT::nxv4f32)
2223 return;
2224 SelectUnaryMultiIntrinsic(N, NumOutVecs: NumVecs, IsTupleInput: true, Opc: Opcode);
2225}
2226
2227void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2228 unsigned NumOutVecs,
2229 unsigned Opc,
2230 uint32_t MaxImm) {
2231 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 4)))
2232 if (Imm->getZExtValue() > MaxImm)
2233 return;
2234
2235 SDValue ZtValue;
2236 if (!ImmToReg<AArch64::ZT0, 0>(N: Node->getOperand(Num: 2), Imm&: ZtValue))
2237 return;
2238
2239 SDValue Chain = Node->getOperand(Num: 0);
2240 SDValue Ops[] = {ZtValue, Node->getOperand(Num: 3), Node->getOperand(Num: 4), Chain};
2241 SDLoc DL(Node);
2242 EVT VT = Node->getValueType(ResNo: 0);
2243
2244 SDNode *Instruction =
2245 CurDAG->getMachineNode(Opcode: Opc, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2246 SDValue SuperReg = SDValue(Instruction, 0);
2247
2248 for (unsigned I = 0; I < NumOutVecs; ++I)
2249 ReplaceUses(F: SDValue(Node, I), T: CurDAG->getTargetExtractSubreg(
2250 SRIdx: AArch64::zsub0 + I, DL, VT, Operand: SuperReg));
2251
2252 // Copy chain
2253 unsigned ChainIdx = NumOutVecs;
2254 ReplaceUses(F: SDValue(Node, ChainIdx), T: SDValue(Instruction, 1));
2255 CurDAG->RemoveDeadNode(N: Node);
2256}
2257
2258void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2259 unsigned NumOutVecs,
2260 unsigned Opc) {
2261 SDValue ZtValue;
2262 if (!ImmToReg<AArch64::ZT0, 0>(N: Node->getOperand(Num: 2), Imm&: ZtValue))
2263 return;
2264
2265 SDValue Chain = Node->getOperand(Num: 0);
2266 SDValue Ops[] = {ZtValue,
2267 createZMulTuple(Regs: {Node->getOperand(Num: 3), Node->getOperand(Num: 4)}),
2268 Chain};
2269
2270 SDLoc DL(Node);
2271 EVT VT = Node->getValueType(ResNo: 0);
2272
2273 SDNode *Instruction =
2274 CurDAG->getMachineNode(Opcode: Opc, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2275 SDValue SuperReg = SDValue(Instruction, 0);
2276
2277 for (unsigned I = 0; I < NumOutVecs; ++I)
2278 ReplaceUses(F: SDValue(Node, I), T: CurDAG->getTargetExtractSubreg(
2279 SRIdx: AArch64::zsub0 + I, DL, VT, Operand: SuperReg));
2280
2281 // Copy chain
2282 unsigned ChainIdx = NumOutVecs;
2283 ReplaceUses(F: SDValue(Node, ChainIdx), T: SDValue(Instruction, 1));
2284 CurDAG->RemoveDeadNode(N: Node);
2285}
2286
2287void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2288 unsigned Op) {
2289 SDLoc DL(N);
2290 EVT VT = N->getValueType(ResNo: 0);
2291
2292 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 1, M: NumVecs));
2293 SDValue Zd = createZMulTuple(Regs);
2294 SDValue Zn = N->getOperand(Num: 1 + NumVecs);
2295 SDValue Zm = N->getOperand(Num: 2 + NumVecs);
2296
2297 SDValue Ops[] = {Zd, Zn, Zm};
2298
2299 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode: Op, dl: DL, VT: MVT::Untyped, Ops);
2300 SDValue SuperReg = SDValue(Intrinsic, 0);
2301 for (unsigned i = 0; i < NumVecs; ++i)
2302 ReplaceUses(F: SDValue(N, i), T: CurDAG->getTargetExtractSubreg(
2303 SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
2304
2305 CurDAG->RemoveDeadNode(N);
2306}
2307
2308bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2309 switch (BaseReg) {
2310 default:
2311 return false;
2312 case AArch64::ZA:
2313 case AArch64::ZAB0:
2314 if (TileNum == 0)
2315 break;
2316 return false;
2317 case AArch64::ZAH0:
2318 if (TileNum <= 1)
2319 break;
2320 return false;
2321 case AArch64::ZAS0:
2322 if (TileNum <= 3)
2323 break;
2324 return false;
2325 case AArch64::ZAD0:
2326 if (TileNum <= 7)
2327 break;
2328 return false;
2329 }
2330
2331 BaseReg += TileNum;
2332 return true;
2333}
2334
2335template <unsigned MaxIdx, unsigned Scale>
2336void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2337 unsigned BaseReg, unsigned Op) {
2338 unsigned TileNum = 0;
2339 if (BaseReg != AArch64::ZA)
2340 TileNum = N->getConstantOperandVal(Num: 2);
2341
2342 if (!SelectSMETile(BaseReg, TileNum))
2343 return;
2344
2345 SDValue SliceBase, Base, Offset;
2346 if (BaseReg == AArch64::ZA)
2347 SliceBase = N->getOperand(Num: 2);
2348 else
2349 SliceBase = N->getOperand(Num: 3);
2350
2351 if (!SelectSMETileSlice(N: SliceBase, MaxSize: MaxIdx, Vector&: Base, Offset, Scale))
2352 return;
2353
2354 SDLoc DL(N);
2355 SDValue SubReg = CurDAG->getRegister(Reg: BaseReg, VT: MVT::Other);
2356 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(Num: 0)};
2357 SDNode *Mov = CurDAG->getMachineNode(Opcode: Op, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2358
2359 EVT VT = N->getValueType(ResNo: 0);
2360 for (unsigned I = 0; I < NumVecs; ++I)
2361 ReplaceUses(F: SDValue(N, I),
2362 T: CurDAG->getTargetExtractSubreg(SRIdx: AArch64::zsub0 + I, DL, VT,
2363 Operand: SDValue(Mov, 0)));
2364 // Copy chain
2365 unsigned ChainIdx = NumVecs;
2366 ReplaceUses(F: SDValue(N, ChainIdx), T: SDValue(Mov, 1));
2367 CurDAG->RemoveDeadNode(N);
2368}
2369
2370void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2371 unsigned Op, unsigned MaxIdx,
2372 unsigned Scale, unsigned BaseReg) {
2373 // Slice can be in different positions
2374 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2375 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2376 SDValue SliceBase = N->getOperand(Num: 2);
2377 if (BaseReg != AArch64::ZA)
2378 SliceBase = N->getOperand(Num: 3);
2379
2380 SDValue Base, Offset;
2381 if (!SelectSMETileSlice(N: SliceBase, MaxSize: MaxIdx, Vector&: Base, Offset, Scale))
2382 return;
2383 // The correct Za tile number is computed in Machine Instruction
2384 // See EmitZAInstr
2385 // DAG cannot select Za tile as an output register with ZReg
2386 SDLoc DL(N);
2387 SmallVector<SDValue, 6> Ops;
2388 if (BaseReg != AArch64::ZA )
2389 Ops.push_back(Elt: N->getOperand(Num: 2));
2390 Ops.push_back(Elt: Base);
2391 Ops.push_back(Elt: Offset);
2392 Ops.push_back(Elt: N->getOperand(Num: 0)); //Chain
2393 SDNode *Mov = CurDAG->getMachineNode(Opcode: Op, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2394
2395 EVT VT = N->getValueType(ResNo: 0);
2396 for (unsigned I = 0; I < NumVecs; ++I)
2397 ReplaceUses(F: SDValue(N, I),
2398 T: CurDAG->getTargetExtractSubreg(SRIdx: AArch64::zsub0 + I, DL, VT,
2399 Operand: SDValue(Mov, 0)));
2400
2401 // Copy chain
2402 unsigned ChainIdx = NumVecs;
2403 ReplaceUses(F: SDValue(N, ChainIdx), T: SDValue(Mov, 1));
2404 CurDAG->RemoveDeadNode(N);
2405}
2406
2407void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2408 unsigned NumOutVecs,
2409 bool IsTupleInput,
2410 unsigned Opc) {
2411 SDLoc DL(N);
2412 EVT VT = N->getValueType(ResNo: 0);
2413 unsigned NumInVecs = N->getNumOperands() - 1;
2414
2415 SmallVector<SDValue, 6> Ops;
2416 if (IsTupleInput) {
2417 assert((NumInVecs == 2 || NumInVecs == 4) &&
2418 "Don't know how to handle multi-register input!");
2419 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 1, M: NumInVecs));
2420 Ops.push_back(Elt: createZMulTuple(Regs));
2421 } else {
2422 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2423 for (unsigned I = 0; I < NumInVecs; I++)
2424 Ops.push_back(Elt: N->getOperand(Num: 1 + I));
2425 }
2426
2427 SDNode *Res = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Ops);
2428 SDValue SuperReg = SDValue(Res, 0);
2429
2430 for (unsigned I = 0; I < NumOutVecs; I++)
2431 ReplaceUses(F: SDValue(N, I), T: CurDAG->getTargetExtractSubreg(
2432 SRIdx: AArch64::zsub0 + I, DL, VT, Operand: SuperReg));
2433 CurDAG->RemoveDeadNode(N);
2434}
2435
2436void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2437 unsigned Opc) {
2438 SDLoc dl(N);
2439 EVT VT = N->getOperand(Num: 2)->getValueType(ResNo: 0);
2440
2441 // Form a REG_SEQUENCE to force register allocation.
2442 bool Is128Bit = VT.getSizeInBits() == 128;
2443 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 2, M: NumVecs));
2444 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2445
2446 SDValue Ops[] = {RegSeq, N->getOperand(Num: NumVecs + 2), N->getOperand(Num: 0)};
2447 SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, VT: N->getValueType(ResNo: 0), Ops);
2448
2449 // Transfer memoperands.
2450 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2451 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2452
2453 ReplaceNode(F: N, T: St);
2454}
2455
2456void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2457 unsigned Scale, unsigned Opc_rr,
2458 unsigned Opc_ri) {
2459 SDLoc dl(N);
2460
2461 // Form a REG_SEQUENCE to force register allocation.
2462 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 2, M: NumVecs));
2463 SDValue RegSeq = createZTuple(Regs);
2464
2465 // Optimize addressing mode.
2466 unsigned Opc;
2467 SDValue Offset, Base;
2468 std::tie(args&: Opc, args&: Base, args&: Offset) = findAddrModeSVELoadStore(
2469 N, Opc_rr, Opc_ri, OldBase: N->getOperand(Num: NumVecs + 3),
2470 OldOffset: CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i64), Scale);
2471
2472 SDValue Ops[] = {RegSeq, N->getOperand(Num: NumVecs + 2), // predicate
2473 Base, // address
2474 Offset, // offset
2475 N->getOperand(Num: 0)}; // chain
2476 SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, VT: N->getValueType(ResNo: 0), Ops);
2477
2478 ReplaceNode(F: N, T: St);
2479}
2480
2481bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2482 SDValue &OffImm) {
2483 SDLoc dl(N);
2484 const DataLayout &DL = CurDAG->getDataLayout();
2485 const TargetLowering *TLI = getTargetLowering();
2486
2487 // Try to match it for the frame address
2488 if (auto FINode = dyn_cast<FrameIndexSDNode>(Val&: N)) {
2489 int FI = FINode->getIndex();
2490 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
2491 OffImm = CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i64);
2492 return true;
2493 }
2494
2495 return false;
2496}
2497
2498void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2499 unsigned Opc) {
2500 SDLoc dl(N);
2501 EVT VT = N->getOperand(Num: 2)->getValueType(ResNo: 0);
2502 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2503 MVT::Other}; // Type for the Chain
2504
2505 // Form a REG_SEQUENCE to force register allocation.
2506 bool Is128Bit = VT.getSizeInBits() == 128;
2507 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 1, M: NumVecs));
2508 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2509
2510 SDValue Ops[] = {RegSeq,
2511 N->getOperand(Num: NumVecs + 1), // base register
2512 N->getOperand(Num: NumVecs + 2), // Incremental
2513 N->getOperand(Num: 0)}; // Chain
2514 SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2515
2516 ReplaceNode(F: N, T: St);
2517}
2518
2519namespace {
2520/// WidenVector - Given a value in the V64 register class, produce the
2521/// equivalent value in the V128 register class.
2522class WidenVector {
2523 SelectionDAG &DAG;
2524
2525public:
2526 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2527
2528 SDValue operator()(SDValue V64Reg) {
2529 EVT VT = V64Reg.getValueType();
2530 unsigned NarrowSize = VT.getVectorNumElements();
2531 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2532 MVT WideTy = MVT::getVectorVT(VT: EltTy, NumElements: 2 * NarrowSize);
2533 SDLoc DL(V64Reg);
2534
2535 SDValue Undef =
2536 SDValue(DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: WideTy), 0);
2537 return DAG.getTargetInsertSubreg(SRIdx: AArch64::dsub, DL, VT: WideTy, Operand: Undef, Subreg: V64Reg);
2538 }
2539};
2540} // namespace
2541
2542/// NarrowVector - Given a value in the V128 register class, produce the
2543/// equivalent value in the V64 register class.
2544static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
2545 EVT VT = V128Reg.getValueType();
2546 unsigned WideSize = VT.getVectorNumElements();
2547 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2548 MVT NarrowTy = MVT::getVectorVT(VT: EltTy, NumElements: WideSize / 2);
2549
2550 return DAG.getTargetExtractSubreg(SRIdx: AArch64::dsub, DL: SDLoc(V128Reg), VT: NarrowTy,
2551 Operand: V128Reg);
2552}
2553
2554void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2555 unsigned Opc) {
2556 SDLoc dl(N);
2557 EVT VT = N->getValueType(ResNo: 0);
2558 bool Narrow = VT.getSizeInBits() == 64;
2559
2560 // Form a REG_SEQUENCE to force register allocation.
2561 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 2, M: NumVecs));
2562
2563 if (Narrow)
2564 transform(Range&: Regs, d_first: Regs.begin(),
2565 F: WidenVector(*CurDAG));
2566
2567 SDValue RegSeq = createQTuple(Regs);
2568
2569 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2570
2571 unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + 2);
2572
2573 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(Val: LaneNo, DL: dl, VT: MVT::i64),
2574 N->getOperand(Num: NumVecs + 3), N->getOperand(Num: 0)};
2575 SDNode *Ld = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2576 SDValue SuperReg = SDValue(Ld, 0);
2577
2578 EVT WideVT = RegSeq.getOperand(i: 1)->getValueType(ResNo: 0);
2579 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2580 AArch64::qsub2, AArch64::qsub3 };
2581 for (unsigned i = 0; i < NumVecs; ++i) {
2582 SDValue NV = CurDAG->getTargetExtractSubreg(SRIdx: QSubs[i], DL: dl, VT: WideVT, Operand: SuperReg);
2583 if (Narrow)
2584 NV = NarrowVector(V128Reg: NV, DAG&: *CurDAG);
2585 ReplaceUses(F: SDValue(N, i), T: NV);
2586 }
2587
2588 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(Ld, 1));
2589 CurDAG->RemoveDeadNode(N);
2590}
2591
2592void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2593 unsigned Opc) {
2594 SDLoc dl(N);
2595 EVT VT = N->getValueType(ResNo: 0);
2596 bool Narrow = VT.getSizeInBits() == 64;
2597
2598 // Form a REG_SEQUENCE to force register allocation.
2599 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 1, M: NumVecs));
2600
2601 if (Narrow)
2602 transform(Range&: Regs, d_first: Regs.begin(),
2603 F: WidenVector(*CurDAG));
2604
2605 SDValue RegSeq = createQTuple(Regs);
2606
2607 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2608 RegSeq->getValueType(ResNo: 0), MVT::Other};
2609
2610 unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + 1);
2611
2612 SDValue Ops[] = {RegSeq,
2613 CurDAG->getTargetConstant(Val: LaneNo, DL: dl,
2614 VT: MVT::i64), // Lane Number
2615 N->getOperand(Num: NumVecs + 2), // Base register
2616 N->getOperand(Num: NumVecs + 3), // Incremental
2617 N->getOperand(Num: 0)};
2618 SDNode *Ld = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2619
2620 // Update uses of the write back register
2621 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(Ld, 0));
2622
2623 // Update uses of the vector list
2624 SDValue SuperReg = SDValue(Ld, 1);
2625 if (NumVecs == 1) {
2626 ReplaceUses(F: SDValue(N, 0),
2627 T: Narrow ? NarrowVector(V128Reg: SuperReg, DAG&: *CurDAG) : SuperReg);
2628 } else {
2629 EVT WideVT = RegSeq.getOperand(i: 1)->getValueType(ResNo: 0);
2630 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2631 AArch64::qsub2, AArch64::qsub3 };
2632 for (unsigned i = 0; i < NumVecs; ++i) {
2633 SDValue NV = CurDAG->getTargetExtractSubreg(SRIdx: QSubs[i], DL: dl, VT: WideVT,
2634 Operand: SuperReg);
2635 if (Narrow)
2636 NV = NarrowVector(V128Reg: NV, DAG&: *CurDAG);
2637 ReplaceUses(F: SDValue(N, i), T: NV);
2638 }
2639 }
2640
2641 // Update the Chain
2642 ReplaceUses(F: SDValue(N, NumVecs + 1), T: SDValue(Ld, 2));
2643 CurDAG->RemoveDeadNode(N);
2644}
2645
2646void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2647 unsigned Opc) {
2648 SDLoc dl(N);
2649 EVT VT = N->getOperand(Num: 2)->getValueType(ResNo: 0);
2650 bool Narrow = VT.getSizeInBits() == 64;
2651
2652 // Form a REG_SEQUENCE to force register allocation.
2653 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 2, M: NumVecs));
2654
2655 if (Narrow)
2656 transform(Range&: Regs, d_first: Regs.begin(),
2657 F: WidenVector(*CurDAG));
2658
2659 SDValue RegSeq = createQTuple(Regs);
2660
2661 unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + 2);
2662
2663 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(Val: LaneNo, DL: dl, VT: MVT::i64),
2664 N->getOperand(Num: NumVecs + 3), N->getOperand(Num: 0)};
2665 SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::Other, Ops);
2666
2667 // Transfer memoperands.
2668 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2669 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2670
2671 ReplaceNode(F: N, T: St);
2672}
2673
2674void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2675 unsigned Opc) {
2676 SDLoc dl(N);
2677 EVT VT = N->getOperand(Num: 2)->getValueType(ResNo: 0);
2678 bool Narrow = VT.getSizeInBits() == 64;
2679
2680 // Form a REG_SEQUENCE to force register allocation.
2681 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 1, M: NumVecs));
2682
2683 if (Narrow)
2684 transform(Range&: Regs, d_first: Regs.begin(),
2685 F: WidenVector(*CurDAG));
2686
2687 SDValue RegSeq = createQTuple(Regs);
2688
2689 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2690 MVT::Other};
2691
2692 unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + 1);
2693
2694 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(Val: LaneNo, DL: dl, VT: MVT::i64),
2695 N->getOperand(Num: NumVecs + 2), // Base Register
2696 N->getOperand(Num: NumVecs + 3), // Incremental
2697 N->getOperand(Num: 0)};
2698 SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2699
2700 // Transfer memoperands.
2701 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2702 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2703
2704 ReplaceNode(F: N, T: St);
2705}
2706
2707static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
2708 unsigned &Opc, SDValue &Opd0,
2709 unsigned &LSB, unsigned &MSB,
2710 unsigned NumberOfIgnoredLowBits,
2711 bool BiggerPattern) {
2712 assert(N->getOpcode() == ISD::AND &&
2713 "N must be a AND operation to call this function");
2714
2715 EVT VT = N->getValueType(ResNo: 0);
2716
2717 // Here we can test the type of VT and return false when the type does not
2718 // match, but since it is done prior to that call in the current context
2719 // we turned that into an assert to avoid redundant code.
2720 assert((VT == MVT::i32 || VT == MVT::i64) &&
2721 "Type checking must have been done before calling this function");
2722
2723 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2724 // changed the AND node to a 32-bit mask operation. We'll have to
2725 // undo that as part of the transform here if we want to catch all
2726 // the opportunities.
2727 // Currently the NumberOfIgnoredLowBits argument helps to recover
2728 // from these situations when matching bigger pattern (bitfield insert).
2729
2730 // For unsigned extracts, check for a shift right and mask
2731 uint64_t AndImm = 0;
2732 if (!isOpcWithIntImmediate(N, Opc: ISD::AND, Imm&: AndImm))
2733 return false;
2734
2735 const SDNode *Op0 = N->getOperand(Num: 0).getNode();
2736
2737 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2738 // simplified. Try to undo that
2739 AndImm |= maskTrailingOnes<uint64_t>(N: NumberOfIgnoredLowBits);
2740
2741 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2742 if (AndImm & (AndImm + 1))
2743 return false;
2744
2745 bool ClampMSB = false;
2746 uint64_t SrlImm = 0;
2747 // Handle the SRL + ANY_EXTEND case.
2748 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2749 isOpcWithIntImmediate(N: Op0->getOperand(Num: 0).getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
2750 // Extend the incoming operand of the SRL to 64-bit.
2751 Opd0 = Widen(CurDAG, N: Op0->getOperand(Num: 0).getOperand(i: 0));
2752 // Make sure to clamp the MSB so that we preserve the semantics of the
2753 // original operations.
2754 ClampMSB = true;
2755 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2756 isOpcWithIntImmediate(N: Op0->getOperand(Num: 0).getNode(), Opc: ISD::SRL,
2757 Imm&: SrlImm)) {
2758 // If the shift result was truncated, we can still combine them.
2759 Opd0 = Op0->getOperand(Num: 0).getOperand(i: 0);
2760
2761 // Use the type of SRL node.
2762 VT = Opd0->getValueType(ResNo: 0);
2763 } else if (isOpcWithIntImmediate(N: Op0, Opc: ISD::SRL, Imm&: SrlImm)) {
2764 Opd0 = Op0->getOperand(Num: 0);
2765 ClampMSB = (VT == MVT::i32);
2766 } else if (BiggerPattern) {
2767 // Let's pretend a 0 shift right has been performed.
2768 // The resulting code will be at least as good as the original one
2769 // plus it may expose more opportunities for bitfield insert pattern.
2770 // FIXME: Currently we limit this to the bigger pattern, because
2771 // some optimizations expect AND and not UBFM.
2772 Opd0 = N->getOperand(Num: 0);
2773 } else
2774 return false;
2775
2776 // Bail out on large immediates. This happens when no proper
2777 // combining/constant folding was performed.
2778 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2779 LLVM_DEBUG(
2780 (dbgs() << N
2781 << ": Found large shift immediate, this should not happen\n"));
2782 return false;
2783 }
2784
2785 LSB = SrlImm;
2786 MSB = SrlImm +
2787 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(Value: AndImm)
2788 : llvm::countr_one<uint64_t>(Value: AndImm)) -
2789 1;
2790 if (ClampMSB)
2791 // Since we're moving the extend before the right shift operation, we need
2792 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2793 // the zeros which would get shifted in with the original right shift
2794 // operation.
2795 MSB = MSB > 31 ? 31 : MSB;
2796
2797 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2798 return true;
2799}
2800
2801static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2802 SDValue &Opd0, unsigned &Immr,
2803 unsigned &Imms) {
2804 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2805
2806 EVT VT = N->getValueType(ResNo: 0);
2807 unsigned BitWidth = VT.getSizeInBits();
2808 assert((VT == MVT::i32 || VT == MVT::i64) &&
2809 "Type checking must have been done before calling this function");
2810
2811 SDValue Op = N->getOperand(Num: 0);
2812 if (Op->getOpcode() == ISD::TRUNCATE) {
2813 Op = Op->getOperand(Num: 0);
2814 VT = Op->getValueType(ResNo: 0);
2815 BitWidth = VT.getSizeInBits();
2816 }
2817
2818 uint64_t ShiftImm;
2819 if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRL, Imm&: ShiftImm) &&
2820 !isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRA, Imm&: ShiftImm))
2821 return false;
2822
2823 unsigned Width = cast<VTSDNode>(Val: N->getOperand(Num: 1))->getVT().getSizeInBits();
2824 if (ShiftImm + Width > BitWidth)
2825 return false;
2826
2827 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2828 Opd0 = Op.getOperand(i: 0);
2829 Immr = ShiftImm;
2830 Imms = ShiftImm + Width - 1;
2831 return true;
2832}
2833
2834static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2835 SDValue &Opd0, unsigned &LSB,
2836 unsigned &MSB) {
2837 // We are looking for the following pattern which basically extracts several
2838 // continuous bits from the source value and places it from the LSB of the
2839 // destination value, all other bits of the destination value or set to zero:
2840 //
2841 // Value2 = AND Value, MaskImm
2842 // SRL Value2, ShiftImm
2843 //
2844 // with MaskImm >> ShiftImm to search for the bit width.
2845 //
2846 // This gets selected into a single UBFM:
2847 //
2848 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2849 //
2850
2851 if (N->getOpcode() != ISD::SRL)
2852 return false;
2853
2854 uint64_t AndMask = 0;
2855 if (!isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::AND, Imm&: AndMask))
2856 return false;
2857
2858 Opd0 = N->getOperand(Num: 0).getOperand(i: 0);
2859
2860 uint64_t SrlImm = 0;
2861 if (!isIntImmediate(N: N->getOperand(Num: 1), Imm&: SrlImm))
2862 return false;
2863
2864 // Check whether we really have several bits extract here.
2865 if (!isMask_64(Value: AndMask >> SrlImm))
2866 return false;
2867
2868 Opc = N->getValueType(ResNo: 0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2869 LSB = SrlImm;
2870 MSB = llvm::Log2_64(Value: AndMask);
2871 return true;
2872}
2873
2874static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2875 unsigned &Immr, unsigned &Imms,
2876 bool BiggerPattern) {
2877 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2878 "N must be a SHR/SRA operation to call this function");
2879
2880 EVT VT = N->getValueType(ResNo: 0);
2881
2882 // Here we can test the type of VT and return false when the type does not
2883 // match, but since it is done prior to that call in the current context
2884 // we turned that into an assert to avoid redundant code.
2885 assert((VT == MVT::i32 || VT == MVT::i64) &&
2886 "Type checking must have been done before calling this function");
2887
2888 // Check for AND + SRL doing several bits extract.
2889 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, LSB&: Immr, MSB&: Imms))
2890 return true;
2891
2892 // We're looking for a shift of a shift.
2893 uint64_t ShlImm = 0;
2894 uint64_t TruncBits = 0;
2895 if (isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
2896 Opd0 = N->getOperand(Num: 0).getOperand(i: 0);
2897 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2898 N->getOperand(Num: 0).getNode()->getOpcode() == ISD::TRUNCATE) {
2899 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2900 // be considered as setting high 32 bits as zero. Our strategy here is to
2901 // always generate 64bit UBFM. This consistency will help the CSE pass
2902 // later find more redundancy.
2903 Opd0 = N->getOperand(Num: 0).getOperand(i: 0);
2904 TruncBits = Opd0->getValueType(ResNo: 0).getSizeInBits() - VT.getSizeInBits();
2905 VT = Opd0.getValueType();
2906 assert(VT == MVT::i64 && "the promoted type should be i64");
2907 } else if (BiggerPattern) {
2908 // Let's pretend a 0 shift left has been performed.
2909 // FIXME: Currently we limit this to the bigger pattern case,
2910 // because some optimizations expect AND and not UBFM
2911 Opd0 = N->getOperand(Num: 0);
2912 } else
2913 return false;
2914
2915 // Missing combines/constant folding may have left us with strange
2916 // constants.
2917 if (ShlImm >= VT.getSizeInBits()) {
2918 LLVM_DEBUG(
2919 (dbgs() << N
2920 << ": Found large shift immediate, this should not happen\n"));
2921 return false;
2922 }
2923
2924 uint64_t SrlImm = 0;
2925 if (!isIntImmediate(N: N->getOperand(Num: 1), Imm&: SrlImm))
2926 return false;
2927
2928 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2929 "bad amount in shift node!");
2930 int immr = SrlImm - ShlImm;
2931 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2932 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2933 // SRA requires a signed extraction
2934 if (VT == MVT::i32)
2935 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2936 else
2937 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2938 return true;
2939}
2940
2941bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2942 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2943
2944 EVT VT = N->getValueType(ResNo: 0);
2945 EVT NarrowVT = N->getOperand(Num: 0)->getValueType(ResNo: 0);
2946 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2947 return false;
2948
2949 uint64_t ShiftImm;
2950 SDValue Op = N->getOperand(Num: 0);
2951 if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRA, Imm&: ShiftImm))
2952 return false;
2953
2954 SDLoc dl(N);
2955 // Extend the incoming operand of the shift to 64-bits.
2956 SDValue Opd0 = Widen(CurDAG, N: Op.getOperand(i: 0));
2957 unsigned Immr = ShiftImm;
2958 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2959 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Val: Immr, DL: dl, VT),
2960 CurDAG->getTargetConstant(Val: Imms, DL: dl, VT)};
2961 CurDAG->SelectNodeTo(N, MachineOpc: AArch64::SBFMXri, VT, Ops);
2962 return true;
2963}
2964
2965static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2966 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2967 unsigned NumberOfIgnoredLowBits = 0,
2968 bool BiggerPattern = false) {
2969 if (N->getValueType(ResNo: 0) != MVT::i32 && N->getValueType(ResNo: 0) != MVT::i64)
2970 return false;
2971
2972 switch (N->getOpcode()) {
2973 default:
2974 if (!N->isMachineOpcode())
2975 return false;
2976 break;
2977 case ISD::AND:
2978 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB&: Immr, MSB&: Imms,
2979 NumberOfIgnoredLowBits, BiggerPattern);
2980 case ISD::SRL:
2981 case ISD::SRA:
2982 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2983
2984 case ISD::SIGN_EXTEND_INREG:
2985 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2986 }
2987
2988 unsigned NOpc = N->getMachineOpcode();
2989 switch (NOpc) {
2990 default:
2991 return false;
2992 case AArch64::SBFMWri:
2993 case AArch64::UBFMWri:
2994 case AArch64::SBFMXri:
2995 case AArch64::UBFMXri:
2996 Opc = NOpc;
2997 Opd0 = N->getOperand(Num: 0);
2998 Immr = N->getConstantOperandVal(Num: 1);
2999 Imms = N->getConstantOperandVal(Num: 2);
3000 return true;
3001 }
3002 // Unreachable
3003 return false;
3004}
3005
3006bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
3007 unsigned Opc, Immr, Imms;
3008 SDValue Opd0;
3009 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
3010 return false;
3011
3012 EVT VT = N->getValueType(ResNo: 0);
3013 SDLoc dl(N);
3014
3015 // If the bit extract operation is 64bit but the original type is 32bit, we
3016 // need to add one EXTRACT_SUBREG.
3017 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
3018 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Val: Immr, DL: dl, VT: MVT::i64),
3019 CurDAG->getTargetConstant(Val: Imms, DL: dl, VT: MVT::i64)};
3020
3021 SDNode *BFM = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i64, Ops: Ops64);
3022 SDValue Inner = CurDAG->getTargetExtractSubreg(SRIdx: AArch64::sub_32, DL: dl,
3023 VT: MVT::i32, Operand: SDValue(BFM, 0));
3024 ReplaceNode(F: N, T: Inner.getNode());
3025 return true;
3026 }
3027
3028 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Val: Immr, DL: dl, VT),
3029 CurDAG->getTargetConstant(Val: Imms, DL: dl, VT)};
3030 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3031 return true;
3032}
3033
3034/// Does DstMask form a complementary pair with the mask provided by
3035/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
3036/// this asks whether DstMask zeroes precisely those bits that will be set by
3037/// the other half.
3038static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
3039 unsigned NumberOfIgnoredHighBits, EVT VT) {
3040 assert((VT == MVT::i32 || VT == MVT::i64) &&
3041 "i32 or i64 mask type expected!");
3042 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
3043
3044 // Enable implicitTrunc as we're intentionally ignoring high bits.
3045 APInt SignificantDstMask =
3046 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
3047 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(width: BitWidth);
3048
3049 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
3050 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
3051}
3052
3053// Look for bits that will be useful for later uses.
3054// A bit is consider useless as soon as it is dropped and never used
3055// before it as been dropped.
3056// E.g., looking for useful bit of x
3057// 1. y = x & 0x7
3058// 2. z = y >> 2
3059// After #1, x useful bits are 0x7, then the useful bits of x, live through
3060// y.
3061// After #2, the useful bits of x are 0x4.
3062// However, if x is used on an unpredictable instruction, then all its bits
3063// are useful.
3064// E.g.
3065// 1. y = x & 0x7
3066// 2. z = y >> 2
3067// 3. str x, [@x]
3068static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
3069
3070static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
3071 unsigned Depth) {
3072 uint64_t Imm =
3073 cast<const ConstantSDNode>(Val: Op.getOperand(i: 1).getNode())->getZExtValue();
3074 Imm = AArch64_AM::decodeLogicalImmediate(val: Imm, regSize: UsefulBits.getBitWidth());
3075 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
3076 getUsefulBits(Op, UsefulBits, Depth: Depth + 1);
3077}
3078
3079static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
3080 uint64_t Imm, uint64_t MSB,
3081 unsigned Depth) {
3082 // inherit the bitwidth value
3083 APInt OpUsefulBits(UsefulBits);
3084 OpUsefulBits = 1;
3085
3086 if (MSB >= Imm) {
3087 OpUsefulBits <<= MSB - Imm + 1;
3088 --OpUsefulBits;
3089 // The interesting part will be in the lower part of the result
3090 getUsefulBits(Op, UsefulBits&: OpUsefulBits, Depth: Depth + 1);
3091 // The interesting part was starting at Imm in the argument
3092 OpUsefulBits <<= Imm;
3093 } else {
3094 OpUsefulBits <<= MSB + 1;
3095 --OpUsefulBits;
3096 // The interesting part will be shifted in the result
3097 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
3098 getUsefulBits(Op, UsefulBits&: OpUsefulBits, Depth: Depth + 1);
3099 // The interesting part was at zero in the argument
3100 OpUsefulBits.lshrInPlace(ShiftAmt: OpUsefulBits.getBitWidth() - Imm);
3101 }
3102
3103 UsefulBits &= OpUsefulBits;
3104}
3105
3106static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
3107 unsigned Depth) {
3108 uint64_t Imm =
3109 cast<const ConstantSDNode>(Val: Op.getOperand(i: 1).getNode())->getZExtValue();
3110 uint64_t MSB =
3111 cast<const ConstantSDNode>(Val: Op.getOperand(i: 2).getNode())->getZExtValue();
3112
3113 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
3114}
3115
3116static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
3117 unsigned Depth) {
3118 uint64_t ShiftTypeAndValue =
3119 cast<const ConstantSDNode>(Val: Op.getOperand(i: 2).getNode())->getZExtValue();
3120 APInt Mask(UsefulBits);
3121 Mask.clearAllBits();
3122 Mask.flipAllBits();
3123
3124 if (AArch64_AM::getShiftType(Imm: ShiftTypeAndValue) == AArch64_AM::LSL) {
3125 // Shift Left
3126 uint64_t ShiftAmt = AArch64_AM::getShiftValue(Imm: ShiftTypeAndValue);
3127 Mask <<= ShiftAmt;
3128 getUsefulBits(Op, UsefulBits&: Mask, Depth: Depth + 1);
3129 Mask.lshrInPlace(ShiftAmt);
3130 } else if (AArch64_AM::getShiftType(Imm: ShiftTypeAndValue) == AArch64_AM::LSR) {
3131 // Shift Right
3132 // We do not handle AArch64_AM::ASR, because the sign will change the
3133 // number of useful bits
3134 uint64_t ShiftAmt = AArch64_AM::getShiftValue(Imm: ShiftTypeAndValue);
3135 Mask.lshrInPlace(ShiftAmt);
3136 getUsefulBits(Op, UsefulBits&: Mask, Depth: Depth + 1);
3137 Mask <<= ShiftAmt;
3138 } else
3139 return;
3140
3141 UsefulBits &= Mask;
3142}
3143
3144static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3145 unsigned Depth) {
3146 uint64_t Imm =
3147 cast<const ConstantSDNode>(Val: Op.getOperand(i: 2).getNode())->getZExtValue();
3148 uint64_t MSB =
3149 cast<const ConstantSDNode>(Val: Op.getOperand(i: 3).getNode())->getZExtValue();
3150
3151 APInt OpUsefulBits(UsefulBits);
3152 OpUsefulBits = 1;
3153
3154 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3155 ResultUsefulBits.flipAllBits();
3156 APInt Mask(UsefulBits.getBitWidth(), 0);
3157
3158 getUsefulBits(Op, UsefulBits&: ResultUsefulBits, Depth: Depth + 1);
3159
3160 if (MSB >= Imm) {
3161 // The instruction is a BFXIL.
3162 uint64_t Width = MSB - Imm + 1;
3163 uint64_t LSB = Imm;
3164
3165 OpUsefulBits <<= Width;
3166 --OpUsefulBits;
3167
3168 if (Op.getOperand(i: 1) == Orig) {
3169 // Copy the low bits from the result to bits starting from LSB.
3170 Mask = ResultUsefulBits & OpUsefulBits;
3171 Mask <<= LSB;
3172 }
3173
3174 if (Op.getOperand(i: 0) == Orig)
3175 // Bits starting from LSB in the input contribute to the result.
3176 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3177 } else {
3178 // The instruction is a BFI.
3179 uint64_t Width = MSB + 1;
3180 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3181
3182 OpUsefulBits <<= Width;
3183 --OpUsefulBits;
3184 OpUsefulBits <<= LSB;
3185
3186 if (Op.getOperand(i: 1) == Orig) {
3187 // Copy the bits from the result to the zero bits.
3188 Mask = ResultUsefulBits & OpUsefulBits;
3189 Mask.lshrInPlace(ShiftAmt: LSB);
3190 }
3191
3192 if (Op.getOperand(i: 0) == Orig)
3193 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3194 }
3195
3196 UsefulBits &= Mask;
3197}
3198
3199static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3200 SDValue Orig, unsigned Depth) {
3201
3202 // Users of this node should have already been instruction selected
3203 // FIXME: Can we turn that into an assert?
3204 if (!UserNode->isMachineOpcode())
3205 return;
3206
3207 switch (UserNode->getMachineOpcode()) {
3208 default:
3209 return;
3210 case AArch64::ANDSWri:
3211 case AArch64::ANDSXri:
3212 case AArch64::ANDWri:
3213 case AArch64::ANDXri:
3214 // We increment Depth only when we call the getUsefulBits
3215 return getUsefulBitsFromAndWithImmediate(Op: SDValue(UserNode, 0), UsefulBits,
3216 Depth);
3217 case AArch64::UBFMWri:
3218 case AArch64::UBFMXri:
3219 return getUsefulBitsFromUBFM(Op: SDValue(UserNode, 0), UsefulBits, Depth);
3220
3221 case AArch64::ORRWrs:
3222 case AArch64::ORRXrs:
3223 if (UserNode->getOperand(Num: 0) != Orig && UserNode->getOperand(Num: 1) == Orig)
3224 getUsefulBitsFromOrWithShiftedReg(Op: SDValue(UserNode, 0), UsefulBits,
3225 Depth);
3226 return;
3227 case AArch64::BFMWri:
3228 case AArch64::BFMXri:
3229 return getUsefulBitsFromBFM(Op: SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3230
3231 case AArch64::STRBBui:
3232 case AArch64::STURBBi:
3233 if (UserNode->getOperand(Num: 0) != Orig)
3234 return;
3235 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3236 return;
3237
3238 case AArch64::STRHHui:
3239 case AArch64::STURHHi:
3240 if (UserNode->getOperand(Num: 0) != Orig)
3241 return;
3242 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3243 return;
3244 }
3245}
3246
3247static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3248 if (Depth >= SelectionDAG::MaxRecursionDepth)
3249 return;
3250 // Initialize UsefulBits
3251 if (!Depth) {
3252 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3253 // At the beginning, assume every produced bits is useful
3254 UsefulBits = APInt(Bitwidth, 0);
3255 UsefulBits.flipAllBits();
3256 }
3257 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3258
3259 for (SDNode *Node : Op.getNode()->users()) {
3260 // A use cannot produce useful bits
3261 APInt UsefulBitsForUse = APInt(UsefulBits);
3262 getUsefulBitsForUse(UserNode: Node, UsefulBits&: UsefulBitsForUse, Orig: Op, Depth);
3263 UsersUsefulBits |= UsefulBitsForUse;
3264 }
3265 // UsefulBits contains the produced bits that are meaningful for the
3266 // current definition, thus a user cannot make a bit meaningful at
3267 // this point
3268 UsefulBits &= UsersUsefulBits;
3269}
3270
3271/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3272/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3273/// 0, return Op unchanged.
3274static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3275 if (ShlAmount == 0)
3276 return Op;
3277
3278 EVT VT = Op.getValueType();
3279 SDLoc dl(Op);
3280 unsigned BitWidth = VT.getSizeInBits();
3281 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3282
3283 SDNode *ShiftNode;
3284 if (ShlAmount > 0) {
3285 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3286 ShiftNode = CurDAG->getMachineNode(
3287 Opcode: UBFMOpc, dl, VT, Op1: Op,
3288 Op2: CurDAG->getTargetConstant(Val: BitWidth - ShlAmount, DL: dl, VT),
3289 Op3: CurDAG->getTargetConstant(Val: BitWidth - 1 - ShlAmount, DL: dl, VT));
3290 } else {
3291 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3292 assert(ShlAmount < 0 && "expected right shift");
3293 int ShrAmount = -ShlAmount;
3294 ShiftNode = CurDAG->getMachineNode(
3295 Opcode: UBFMOpc, dl, VT, Op1: Op, Op2: CurDAG->getTargetConstant(Val: ShrAmount, DL: dl, VT),
3296 Op3: CurDAG->getTargetConstant(Val: BitWidth - 1, DL: dl, VT));
3297 }
3298
3299 return SDValue(ShiftNode, 0);
3300}
3301
3302// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3303static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3304 bool BiggerPattern,
3305 const uint64_t NonZeroBits,
3306 SDValue &Src, int &DstLSB,
3307 int &Width);
3308
3309// For bit-field-positioning pattern "shl VAL, N)".
3310static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3311 bool BiggerPattern,
3312 const uint64_t NonZeroBits,
3313 SDValue &Src, int &DstLSB,
3314 int &Width);
3315
3316/// Does this tree qualify as an attempt to move a bitfield into position,
3317/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3318static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
3319 bool BiggerPattern, SDValue &Src,
3320 int &DstLSB, int &Width) {
3321 EVT VT = Op.getValueType();
3322 unsigned BitWidth = VT.getSizeInBits();
3323 (void)BitWidth;
3324 assert(BitWidth == 32 || BitWidth == 64);
3325
3326 KnownBits Known = CurDAG->computeKnownBits(Op);
3327
3328 // Non-zero in the sense that they're not provably zero, which is the key
3329 // point if we want to use this value
3330 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3331 if (!isShiftedMask_64(Value: NonZeroBits))
3332 return false;
3333
3334 switch (Op.getOpcode()) {
3335 default:
3336 break;
3337 case ISD::AND:
3338 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3339 NonZeroBits, Src, DstLSB, Width);
3340 case ISD::SHL:
3341 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3342 NonZeroBits, Src, DstLSB, Width);
3343 }
3344
3345 return false;
3346}
3347
3348static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3349 bool BiggerPattern,
3350 const uint64_t NonZeroBits,
3351 SDValue &Src, int &DstLSB,
3352 int &Width) {
3353 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3354
3355 EVT VT = Op.getValueType();
3356 assert((VT == MVT::i32 || VT == MVT::i64) &&
3357 "Caller guarantees VT is one of i32 or i64");
3358 (void)VT;
3359
3360 uint64_t AndImm;
3361 if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::AND, Imm&: AndImm))
3362 return false;
3363
3364 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3365 // 1) (AndImm & (1 << POS) == 0)
3366 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3367 //
3368 // 1) and 2) don't agree so something must be wrong (e.g., in
3369 // 'SelectionDAG::computeKnownBits')
3370 assert((~AndImm & NonZeroBits) == 0 &&
3371 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3372
3373 SDValue AndOp0 = Op.getOperand(i: 0);
3374
3375 uint64_t ShlImm;
3376 SDValue ShlOp0;
3377 if (isOpcWithIntImmediate(N: AndOp0.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
3378 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3379 ShlOp0 = AndOp0.getOperand(i: 0);
3380 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3381 isOpcWithIntImmediate(N: AndOp0.getOperand(i: 0).getNode(), Opc: ISD::SHL,
3382 Imm&: ShlImm)) {
3383 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3384
3385 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3386 SDValue ShlVal = AndOp0.getOperand(i: 0);
3387
3388 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3389 // expect VT to be MVT::i32.
3390 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3391
3392 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3393 ShlOp0 = Widen(CurDAG, N: ShlVal.getOperand(i: 0));
3394 } else
3395 return false;
3396
3397 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3398 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3399 // AndOp0+AND.
3400 if (!BiggerPattern && !AndOp0.hasOneUse())
3401 return false;
3402
3403 DstLSB = llvm::countr_zero(Val: NonZeroBits);
3404 Width = llvm::countr_one(Value: NonZeroBits >> DstLSB);
3405
3406 // Bail out on large Width. This happens when no proper combining / constant
3407 // folding was performed.
3408 if (Width >= (int)VT.getSizeInBits()) {
3409 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3410 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3411 // "val".
3412 // If VT is i32, what Width >= 32 means:
3413 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3414 // demands at least 'Width' bits (after dag-combiner). This together with
3415 // `any_extend` Op (undefined higher bits) indicates missed combination
3416 // when lowering the 'and' IR instruction to an machine IR instruction.
3417 LLVM_DEBUG(
3418 dbgs()
3419 << "Found large Width in bit-field-positioning -- this indicates no "
3420 "proper combining / constant folding was performed\n");
3421 return false;
3422 }
3423
3424 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3425 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3426 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3427 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3428 // which case it is not profitable to insert an extra shift.
3429 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3430 return false;
3431
3432 Src = getLeftShift(CurDAG, Op: ShlOp0, ShlAmount: ShlImm - DstLSB);
3433 return true;
3434}
3435
3436// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3437// UBFIZ.
3438static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op,
3439 SDValue &Src, int &DstLSB,
3440 int &Width) {
3441 // Caller should have verified that N is a left shift with constant shift
3442 // amount; asserts that.
3443 assert(Op.getOpcode() == ISD::SHL &&
3444 "Op.getNode() should be a SHL node to call this function");
3445 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3446 "Op.getNode() should shift ShlImm to call this function");
3447
3448 uint64_t AndImm = 0;
3449 SDValue Op0 = Op.getOperand(i: 0);
3450 if (!isOpcWithIntImmediate(N: Op0.getNode(), Opc: ISD::AND, Imm&: AndImm))
3451 return false;
3452
3453 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3454 if (isMask_64(Value: ShiftedAndImm)) {
3455 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3456 // should end with Mask, and could be prefixed with random bits if those
3457 // bits are shifted out.
3458 //
3459 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3460 // the AND result corresponding to those bits are shifted out, so it's fine
3461 // to not extract them.
3462 Width = llvm::countr_one(Value: ShiftedAndImm);
3463 DstLSB = ShlImm;
3464 Src = Op0.getOperand(i: 0);
3465 return true;
3466 }
3467 return false;
3468}
3469
3470static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3471 bool BiggerPattern,
3472 const uint64_t NonZeroBits,
3473 SDValue &Src, int &DstLSB,
3474 int &Width) {
3475 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3476
3477 EVT VT = Op.getValueType();
3478 assert((VT == MVT::i32 || VT == MVT::i64) &&
3479 "Caller guarantees that type is i32 or i64");
3480 (void)VT;
3481
3482 uint64_t ShlImm;
3483 if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SHL, Imm&: ShlImm))
3484 return false;
3485
3486 if (!BiggerPattern && !Op.hasOneUse())
3487 return false;
3488
3489 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3490 return true;
3491
3492 DstLSB = llvm::countr_zero(Val: NonZeroBits);
3493 Width = llvm::countr_one(Value: NonZeroBits >> DstLSB);
3494
3495 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3496 return false;
3497
3498 Src = getLeftShift(CurDAG, Op: Op.getOperand(i: 0), ShlAmount: ShlImm - DstLSB);
3499 return true;
3500}
3501
3502static bool isShiftedMask(uint64_t Mask, EVT VT) {
3503 assert(VT == MVT::i32 || VT == MVT::i64);
3504 if (VT == MVT::i32)
3505 return isShiftedMask_32(Value: Mask);
3506 return isShiftedMask_64(Value: Mask);
3507}
3508
3509// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3510// inserted only sets known zero bits.
3511static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
3512 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3513
3514 EVT VT = N->getValueType(ResNo: 0);
3515 if (VT != MVT::i32 && VT != MVT::i64)
3516 return false;
3517
3518 unsigned BitWidth = VT.getSizeInBits();
3519
3520 uint64_t OrImm;
3521 if (!isOpcWithIntImmediate(N, Opc: ISD::OR, Imm&: OrImm))
3522 return false;
3523
3524 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3525 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3526 // performance neutral.
3527 if (AArch64_AM::isLogicalImmediate(imm: OrImm, regSize: BitWidth))
3528 return false;
3529
3530 uint64_t MaskImm;
3531 SDValue And = N->getOperand(Num: 0);
3532 // Must be a single use AND with an immediate operand.
3533 if (!And.hasOneUse() ||
3534 !isOpcWithIntImmediate(N: And.getNode(), Opc: ISD::AND, Imm&: MaskImm))
3535 return false;
3536
3537 // Compute the Known Zero for the AND as this allows us to catch more general
3538 // cases than just looking for AND with imm.
3539 KnownBits Known = CurDAG->computeKnownBits(Op: And);
3540
3541 // Non-zero in the sense that they're not provably zero, which is the key
3542 // point if we want to use this value.
3543 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3544
3545 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3546 if (!isShiftedMask(Mask: Known.Zero.getZExtValue(), VT))
3547 return false;
3548
3549 // The bits being inserted must only set those bits that are known to be zero.
3550 if ((OrImm & NotKnownZero) != 0) {
3551 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3552 // currently handle this case.
3553 return false;
3554 }
3555
3556 // BFI/BFXIL dst, src, #lsb, #width.
3557 int LSB = llvm::countr_one(Value: NotKnownZero);
3558 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3559
3560 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3561 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3562 unsigned ImmS = Width - 1;
3563
3564 // If we're creating a BFI instruction avoid cases where we need more
3565 // instructions to materialize the BFI constant as compared to the original
3566 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3567 // should be no worse in this case.
3568 bool IsBFI = LSB != 0;
3569 uint64_t BFIImm = OrImm >> LSB;
3570 if (IsBFI && !AArch64_AM::isLogicalImmediate(imm: BFIImm, regSize: BitWidth)) {
3571 // We have a BFI instruction and we know the constant can't be materialized
3572 // with a ORR-immediate with the zero register.
3573 unsigned OrChunks = 0, BFIChunks = 0;
3574 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3575 if (((OrImm >> Shift) & 0xFFFF) != 0)
3576 ++OrChunks;
3577 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3578 ++BFIChunks;
3579 }
3580 if (BFIChunks > OrChunks)
3581 return false;
3582 }
3583
3584 // Materialize the constant to be inserted.
3585 SDLoc DL(N);
3586 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3587 SDNode *MOVI = CurDAG->getMachineNode(
3588 Opcode: MOVIOpc, dl: DL, VT, Op1: CurDAG->getTargetConstant(Val: BFIImm, DL, VT));
3589
3590 // Create the BFI/BFXIL instruction.
3591 SDValue Ops[] = {And.getOperand(i: 0), SDValue(MOVI, 0),
3592 CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3593 CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3594 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3595 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3596 return true;
3597}
3598
3599static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG,
3600 SDValue &ShiftedOperand,
3601 uint64_t &EncodedShiftImm) {
3602 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3603 if (!Dst.hasOneUse())
3604 return false;
3605
3606 EVT VT = Dst.getValueType();
3607 assert((VT == MVT::i32 || VT == MVT::i64) &&
3608 "Caller should guarantee that VT is one of i32 or i64");
3609 const unsigned SizeInBits = VT.getSizeInBits();
3610
3611 SDLoc DL(Dst.getNode());
3612 uint64_t AndImm, ShlImm;
3613 if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::AND, Imm&: AndImm) &&
3614 isShiftedMask_64(Value: AndImm)) {
3615 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3616 SDValue DstOp0 = Dst.getOperand(i: 0);
3617 if (!DstOp0.hasOneUse())
3618 return false;
3619
3620 // An example to illustrate the transformation
3621 // From:
3622 // lsr x8, x1, #1
3623 // and x8, x8, #0x3f80
3624 // bfxil x8, x1, #0, #7
3625 // To:
3626 // and x8, x23, #0x7f
3627 // ubfx x9, x23, #8, #7
3628 // orr x23, x8, x9, lsl #7
3629 //
3630 // The number of instructions remains the same, but ORR is faster than BFXIL
3631 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3632 // the dependency chain is improved after the transformation.
3633 uint64_t SrlImm;
3634 if (isOpcWithIntImmediate(N: DstOp0.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3635 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(Val: AndImm);
3636 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3637 unsigned MaskWidth =
3638 llvm::countr_one(Value: AndImm >> NumTrailingZeroInShiftedMask);
3639 unsigned UBFMOpc =
3640 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3641 SDNode *UBFMNode = CurDAG->getMachineNode(
3642 Opcode: UBFMOpc, dl: DL, VT, Op1: DstOp0.getOperand(i: 0),
3643 Op2: CurDAG->getTargetConstant(Val: SrlImm + NumTrailingZeroInShiftedMask, DL,
3644 VT),
3645 Op3: CurDAG->getTargetConstant(
3646 Val: SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3647 ShiftedOperand = SDValue(UBFMNode, 0);
3648 EncodedShiftImm = AArch64_AM::getShifterImm(
3649 ST: AArch64_AM::LSL, Imm: NumTrailingZeroInShiftedMask);
3650 return true;
3651 }
3652 }
3653 return false;
3654 }
3655
3656 if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
3657 ShiftedOperand = Dst.getOperand(i: 0);
3658 EncodedShiftImm = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm);
3659 return true;
3660 }
3661
3662 uint64_t SrlImm;
3663 if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3664 ShiftedOperand = Dst.getOperand(i: 0);
3665 EncodedShiftImm = AArch64_AM::getShifterImm(ST: AArch64_AM::LSR, Imm: SrlImm);
3666 return true;
3667 }
3668 return false;
3669}
3670
3671// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3672// the operands and select it to AArch64::ORR with shifted registers if
3673// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3674static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3675 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3676 const bool BiggerPattern) {
3677 EVT VT = N->getValueType(ResNo: 0);
3678 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3679 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3680 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3681 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3682 assert((VT == MVT::i32 || VT == MVT::i64) &&
3683 "Expect result type to be i32 or i64 since N is combinable to BFM");
3684 SDLoc DL(N);
3685
3686 // Bail out if BFM simplifies away one node in BFM Dst.
3687 if (OrOpd1 != Dst)
3688 return false;
3689
3690 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3691 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3692 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3693 if (BiggerPattern) {
3694 uint64_t SrcAndImm;
3695 if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::AND, Imm&: SrcAndImm) &&
3696 isMask_64(Value: SrcAndImm) && OrOpd0.getOperand(i: 0) == Src) {
3697 // OrOpd0 = AND Src, #Mask
3698 // So BFM simplifies away one AND node from Src and doesn't simplify away
3699 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3700 // one node (from Rd), ORR is better since it has higher throughput and
3701 // smaller latency than BFM on many AArch64 processors (and for the rest
3702 // ORR is at least as good as BFM).
3703 SDValue ShiftedOperand;
3704 uint64_t EncodedShiftImm;
3705 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3706 EncodedShiftImm)) {
3707 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3708 CurDAG->getTargetConstant(Val: EncodedShiftImm, DL, VT)};
3709 CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3710 return true;
3711 }
3712 }
3713 return false;
3714 }
3715
3716 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3717
3718 uint64_t ShlImm;
3719 if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
3720 if (OrOpd0.getOperand(i: 0) == Src && OrOpd0.hasOneUse()) {
3721 SDValue Ops[] = {
3722 Dst, Src,
3723 CurDAG->getTargetConstant(
3724 Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm), DL, VT)};
3725 CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3726 return true;
3727 }
3728
3729 // Select the following pattern to left-shifted operand rather than BFI.
3730 // %val1 = op ..
3731 // %val2 = shl %val1, #imm
3732 // %res = or %val1, %val2
3733 //
3734 // If N is selected to be BFI, we know that
3735 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3736 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3737 //
3738 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3739 if (OrOpd0.getOperand(i: 0) == OrOpd1) {
3740 SDValue Ops[] = {
3741 OrOpd1, OrOpd1,
3742 CurDAG->getTargetConstant(
3743 Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm), DL, VT)};
3744 CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3745 return true;
3746 }
3747 }
3748
3749 uint64_t SrlImm;
3750 if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3751 // Select the following pattern to right-shifted operand rather than BFXIL.
3752 // %val1 = op ..
3753 // %val2 = lshr %val1, #imm
3754 // %res = or %val1, %val2
3755 //
3756 // If N is selected to be BFXIL, we know that
3757 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3758 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3759 //
3760 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3761 if (OrOpd0.getOperand(i: 0) == OrOpd1) {
3762 SDValue Ops[] = {
3763 OrOpd1, OrOpd1,
3764 CurDAG->getTargetConstant(
3765 Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSR, Imm: SrlImm), DL, VT)};
3766 CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3767 return true;
3768 }
3769 }
3770
3771 return false;
3772}
3773
3774static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3775 SelectionDAG *CurDAG) {
3776 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3777
3778 EVT VT = N->getValueType(ResNo: 0);
3779 if (VT != MVT::i32 && VT != MVT::i64)
3780 return false;
3781
3782 unsigned BitWidth = VT.getSizeInBits();
3783
3784 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3785 // have the expected shape. Try to undo that.
3786
3787 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3788 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3789
3790 // Given a OR operation, check if we have the following pattern
3791 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3792 // isBitfieldExtractOp)
3793 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3794 // countTrailingZeros(mask2) == imm2 - imm + 1
3795 // f = d | c
3796 // if yes, replace the OR instruction with:
3797 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3798
3799 // OR is commutative, check all combinations of operand order and values of
3800 // BiggerPattern, i.e.
3801 // Opd0, Opd1, BiggerPattern=false
3802 // Opd1, Opd0, BiggerPattern=false
3803 // Opd0, Opd1, BiggerPattern=true
3804 // Opd1, Opd0, BiggerPattern=true
3805 // Several of these combinations may match, so check with BiggerPattern=false
3806 // first since that will produce better results by matching more instructions
3807 // and/or inserting fewer extra instructions.
3808 for (int I = 0; I < 4; ++I) {
3809
3810 SDValue Dst, Src;
3811 unsigned ImmR, ImmS;
3812 bool BiggerPattern = I / 2;
3813 SDValue OrOpd0Val = N->getOperand(Num: I % 2);
3814 SDNode *OrOpd0 = OrOpd0Val.getNode();
3815 SDValue OrOpd1Val = N->getOperand(Num: (I + 1) % 2);
3816 SDNode *OrOpd1 = OrOpd1Val.getNode();
3817
3818 unsigned BFXOpc;
3819 int DstLSB, Width;
3820 if (isBitfieldExtractOp(CurDAG, N: OrOpd0, Opc&: BFXOpc, Opd0&: Src, Immr&: ImmR, Imms&: ImmS,
3821 NumberOfIgnoredLowBits, BiggerPattern)) {
3822 // Check that the returned opcode is compatible with the pattern,
3823 // i.e., same type and zero extended (U and not S)
3824 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3825 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3826 continue;
3827
3828 // Compute the width of the bitfield insertion
3829 DstLSB = 0;
3830 Width = ImmS - ImmR + 1;
3831 // FIXME: This constraint is to catch bitfield insertion we may
3832 // want to widen the pattern if we want to grab general bitfield
3833 // move case
3834 if (Width <= 0)
3835 continue;
3836
3837 // If the mask on the insertee is correct, we have a BFXIL operation. We
3838 // can share the ImmR and ImmS values from the already-computed UBFM.
3839 } else if (isBitfieldPositioningOp(CurDAG, Op: OrOpd0Val,
3840 BiggerPattern,
3841 Src, DstLSB, Width)) {
3842 ImmR = (BitWidth - DstLSB) % BitWidth;
3843 ImmS = Width - 1;
3844 } else
3845 continue;
3846
3847 // Check the second part of the pattern
3848 EVT VT = OrOpd1Val.getValueType();
3849 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3850
3851 // Compute the Known Zero for the candidate of the first operand.
3852 // This allows to catch more general case than just looking for
3853 // AND with imm. Indeed, simplify-demanded-bits may have removed
3854 // the AND instruction because it proves it was useless.
3855 KnownBits Known = CurDAG->computeKnownBits(Op: OrOpd1Val);
3856
3857 // Check if there is enough room for the second operand to appear
3858 // in the first one
3859 APInt BitsToBeInserted =
3860 APInt::getBitsSet(numBits: Known.getBitWidth(), loBit: DstLSB, hiBit: DstLSB + Width);
3861
3862 if ((BitsToBeInserted & ~Known.Zero) != 0)
3863 continue;
3864
3865 // Set the first operand
3866 uint64_t Imm;
3867 if (isOpcWithIntImmediate(N: OrOpd1, Opc: ISD::AND, Imm) &&
3868 isBitfieldDstMask(DstMask: Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3869 // In that case, we can eliminate the AND
3870 Dst = OrOpd1->getOperand(Num: 0);
3871 else
3872 // Maybe the AND has been removed by simplify-demanded-bits
3873 // or is useful because it discards more bits
3874 Dst = OrOpd1Val;
3875
3876 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3877 // with shifted operand is more efficient.
3878 if (tryOrrWithShift(N, OrOpd0: OrOpd0Val, OrOpd1: OrOpd1Val, Src, Dst, CurDAG,
3879 BiggerPattern))
3880 return true;
3881
3882 // both parts match
3883 SDLoc DL(N);
3884 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3885 CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3886 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3887 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3888 return true;
3889 }
3890
3891 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3892 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3893 // mask (e.g., 0x000ffff0).
3894 uint64_t Mask0Imm, Mask1Imm;
3895 SDValue And0 = N->getOperand(Num: 0);
3896 SDValue And1 = N->getOperand(Num: 1);
3897 if (And0.hasOneUse() && And1.hasOneUse() &&
3898 isOpcWithIntImmediate(N: And0.getNode(), Opc: ISD::AND, Imm&: Mask0Imm) &&
3899 isOpcWithIntImmediate(N: And1.getNode(), Opc: ISD::AND, Imm&: Mask1Imm) &&
3900 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3901 (isShiftedMask(Mask: Mask0Imm, VT) || isShiftedMask(Mask: Mask1Imm, VT))) {
3902
3903 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3904 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3905 // bits to be inserted.
3906 if (isShiftedMask(Mask: Mask0Imm, VT)) {
3907 std::swap(a&: And0, b&: And1);
3908 std::swap(a&: Mask0Imm, b&: Mask1Imm);
3909 }
3910
3911 SDValue Src = And1->getOperand(Num: 0);
3912 SDValue Dst = And0->getOperand(Num: 0);
3913 unsigned LSB = llvm::countr_zero(Val: Mask1Imm);
3914 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3915
3916 // The BFXIL inserts the low-order bits from a source register, so right
3917 // shift the needed bits into place.
3918 SDLoc DL(N);
3919 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3920 uint64_t LsrImm = LSB;
3921 if (Src->hasOneUse() &&
3922 isOpcWithIntImmediate(N: Src.getNode(), Opc: ISD::SRL, Imm&: LsrImm) &&
3923 (LsrImm + LSB) < BitWidth) {
3924 Src = Src->getOperand(Num: 0);
3925 LsrImm += LSB;
3926 }
3927
3928 SDNode *LSR = CurDAG->getMachineNode(
3929 Opcode: ShiftOpc, dl: DL, VT, Op1: Src, Op2: CurDAG->getTargetConstant(Val: LsrImm, DL, VT),
3930 Op3: CurDAG->getTargetConstant(Val: BitWidth - 1, DL, VT));
3931
3932 // BFXIL is an alias of BFM, so translate to BFM operands.
3933 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3934 unsigned ImmS = Width - 1;
3935
3936 // Create the BFXIL instruction.
3937 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3938 CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3939 CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3940 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3941 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3942 return true;
3943 }
3944
3945 return false;
3946}
3947
3948bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3949 if (N->getOpcode() != ISD::OR)
3950 return false;
3951
3952 APInt NUsefulBits;
3953 getUsefulBits(Op: SDValue(N, 0), UsefulBits&: NUsefulBits);
3954
3955 // If all bits are not useful, just return UNDEF.
3956 if (!NUsefulBits) {
3957 CurDAG->SelectNodeTo(N, MachineOpc: TargetOpcode::IMPLICIT_DEF, VT: N->getValueType(ResNo: 0));
3958 return true;
3959 }
3960
3961 if (tryBitfieldInsertOpFromOr(N, UsefulBits: NUsefulBits, CurDAG))
3962 return true;
3963
3964 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3965}
3966
3967/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3968/// equivalent of a left shift by a constant amount followed by an and masking
3969/// out a contiguous set of bits.
3970bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3971 if (N->getOpcode() != ISD::AND)
3972 return false;
3973
3974 EVT VT = N->getValueType(ResNo: 0);
3975 if (VT != MVT::i32 && VT != MVT::i64)
3976 return false;
3977
3978 SDValue Op0;
3979 int DstLSB, Width;
3980 if (!isBitfieldPositioningOp(CurDAG, Op: SDValue(N, 0), /*BiggerPattern=*/false,
3981 Src&: Op0, DstLSB, Width))
3982 return false;
3983
3984 // ImmR is the rotate right amount.
3985 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3986 // ImmS is the most significant bit of the source to be moved.
3987 unsigned ImmS = Width - 1;
3988
3989 SDLoc DL(N);
3990 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3991 CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3992 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3993 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3994 return true;
3995}
3996
3997/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3998/// variable shift/rotate instructions.
3999bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
4000 EVT VT = N->getValueType(ResNo: 0);
4001
4002 unsigned Opc;
4003 switch (N->getOpcode()) {
4004 case ISD::ROTR:
4005 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
4006 break;
4007 case ISD::SHL:
4008 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
4009 break;
4010 case ISD::SRL:
4011 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
4012 break;
4013 case ISD::SRA:
4014 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
4015 break;
4016 default:
4017 return false;
4018 }
4019
4020 uint64_t Size;
4021 uint64_t Bits;
4022 if (VT == MVT::i32) {
4023 Bits = 5;
4024 Size = 32;
4025 } else if (VT == MVT::i64) {
4026 Bits = 6;
4027 Size = 64;
4028 } else
4029 return false;
4030
4031 SDValue ShiftAmt = N->getOperand(Num: 1);
4032 SDLoc DL(N);
4033 SDValue NewShiftAmt;
4034
4035 // Skip over an extend of the shift amount.
4036 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
4037 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
4038 ShiftAmt = ShiftAmt->getOperand(Num: 0);
4039
4040 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
4041 SDValue Add0 = ShiftAmt->getOperand(Num: 0);
4042 SDValue Add1 = ShiftAmt->getOperand(Num: 1);
4043 uint64_t Add0Imm;
4044 uint64_t Add1Imm;
4045 if (isIntImmediate(N: Add1, Imm&: Add1Imm) && (Add1Imm % Size == 0)) {
4046 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
4047 // to avoid the ADD/SUB.
4048 NewShiftAmt = Add0;
4049 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4050 isIntImmediate(N: Add0, Imm&: Add0Imm) && Add0Imm != 0 &&
4051 (Add0Imm % Size == 0)) {
4052 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
4053 // to generate a NEG instead of a SUB from a constant.
4054 unsigned NegOpc;
4055 unsigned ZeroReg;
4056 EVT SubVT = ShiftAmt->getValueType(ResNo: 0);
4057 if (SubVT == MVT::i32) {
4058 NegOpc = AArch64::SUBWrr;
4059 ZeroReg = AArch64::WZR;
4060 } else {
4061 assert(SubVT == MVT::i64);
4062 NegOpc = AArch64::SUBXrr;
4063 ZeroReg = AArch64::XZR;
4064 }
4065 SDValue Zero =
4066 CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: ZeroReg, VT: SubVT);
4067 MachineSDNode *Neg =
4068 CurDAG->getMachineNode(Opcode: NegOpc, dl: DL, VT: SubVT, Op1: Zero, Op2: Add1);
4069 NewShiftAmt = SDValue(Neg, 0);
4070 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4071 isIntImmediate(N: Add0, Imm&: Add0Imm) && (Add0Imm % Size == Size - 1)) {
4072 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
4073 // to generate a NOT instead of a SUB from a constant.
4074 unsigned NotOpc;
4075 unsigned ZeroReg;
4076 EVT SubVT = ShiftAmt->getValueType(ResNo: 0);
4077 if (SubVT == MVT::i32) {
4078 NotOpc = AArch64::ORNWrr;
4079 ZeroReg = AArch64::WZR;
4080 } else {
4081 assert(SubVT == MVT::i64);
4082 NotOpc = AArch64::ORNXrr;
4083 ZeroReg = AArch64::XZR;
4084 }
4085 SDValue Zero =
4086 CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: ZeroReg, VT: SubVT);
4087 MachineSDNode *Not =
4088 CurDAG->getMachineNode(Opcode: NotOpc, dl: DL, VT: SubVT, Op1: Zero, Op2: Add1);
4089 NewShiftAmt = SDValue(Not, 0);
4090 } else
4091 return false;
4092 } else {
4093 // If the shift amount is masked with an AND, check that the mask covers the
4094 // bits that are implicitly ANDed off by the above opcodes and if so, skip
4095 // the AND.
4096 uint64_t MaskImm;
4097 if (!isOpcWithIntImmediate(N: ShiftAmt.getNode(), Opc: ISD::AND, Imm&: MaskImm) &&
4098 !isOpcWithIntImmediate(N: ShiftAmt.getNode(), Opc: AArch64ISD::ANDS, Imm&: MaskImm))
4099 return false;
4100
4101 if ((unsigned)llvm::countr_one(Value: MaskImm) < Bits)
4102 return false;
4103
4104 NewShiftAmt = ShiftAmt->getOperand(Num: 0);
4105 }
4106
4107 // Narrow/widen the shift amount to match the size of the shift operation.
4108 if (VT == MVT::i32)
4109 NewShiftAmt = narrowIfNeeded(CurDAG, N: NewShiftAmt);
4110 else if (VT == MVT::i64 && NewShiftAmt->getValueType(ResNo: 0) == MVT::i32) {
4111 SDValue SubReg = CurDAG->getTargetConstant(Val: AArch64::sub_32, DL, VT: MVT::i32);
4112 MachineSDNode *Ext = CurDAG->getMachineNode(Opcode: AArch64::SUBREG_TO_REG, dl: DL, VT,
4113 Op1: NewShiftAmt, Op2: SubReg);
4114 NewShiftAmt = SDValue(Ext, 0);
4115 }
4116
4117 SDValue Ops[] = {N->getOperand(Num: 0), NewShiftAmt};
4118 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
4119 return true;
4120}
4121
4122static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N,
4123 SDValue &FixedPos,
4124 unsigned RegWidth,
4125 bool isReciprocal) {
4126 APFloat FVal(0.0);
4127 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Val&: N))
4128 FVal = CN->getValueAPF();
4129 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(Val&: N)) {
4130 // Some otherwise illegal constants are allowed in this case.
4131 if (LN->getOperand(Num: 1).getOpcode() != AArch64ISD::ADDlow ||
4132 !isa<ConstantPoolSDNode>(Val: LN->getOperand(Num: 1)->getOperand(Num: 1)))
4133 return false;
4134
4135 ConstantPoolSDNode *CN =
4136 dyn_cast<ConstantPoolSDNode>(Val: LN->getOperand(Num: 1)->getOperand(Num: 1));
4137 FVal = cast<ConstantFP>(Val: CN->getConstVal())->getValueAPF();
4138 } else
4139 return false;
4140
4141 if (unsigned FBits =
4142 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4143 FixedPos = CurDAG->getTargetConstant(Val: FBits, DL: SDLoc(N), VT: MVT::i32);
4144 return true;
4145 }
4146
4147 return false;
4148}
4149
4150bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4151 unsigned RegWidth) {
4152 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4153 /*isReciprocal*/ false);
4154}
4155
4156bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
4157 unsigned RegWidth) {
4158 if ((N.getOpcode() == AArch64ISD::NVCAST || N.getOpcode() == ISD::BITCAST) &&
4159 N.getValueType().getScalarSizeInBits() ==
4160 N.getOperand(i: 0).getValueType().getScalarSizeInBits())
4161 N = N.getOperand(i: 0);
4162
4163 auto ImmToFloat = [RegWidth](APInt Imm) {
4164 switch (RegWidth) {
4165 case 16:
4166 return APFloat(APFloat::IEEEhalf(), Imm);
4167 case 32:
4168 return APFloat(APFloat::IEEEsingle(), Imm);
4169 case 64:
4170 return APFloat(APFloat::IEEEdouble(), Imm);
4171 default:
4172 llvm_unreachable("Unexpected RegWidth!");
4173 };
4174 };
4175
4176 APFloat FVal(0.0);
4177 switch (N->getOpcode()) {
4178 case AArch64ISD::MOVIshift:
4179 FVal = ImmToFloat(APInt(RegWidth, N.getConstantOperandVal(i: 0)
4180 << N.getConstantOperandVal(i: 1)));
4181 break;
4182 case AArch64ISD::FMOV:
4183 FVal = ImmToFloat(DecodeFMOVImm(Imm: N.getConstantOperandVal(i: 0), RegWidth));
4184 break;
4185 case AArch64ISD::DUP:
4186 if (isa<ConstantSDNode>(Val: N.getOperand(i: 0)))
4187 FVal = ImmToFloat(N.getConstantOperandAPInt(i: 0).trunc(width: RegWidth));
4188 else
4189 return false;
4190 break;
4191 default:
4192 return false;
4193 }
4194
4195 if (unsigned FBits = CheckFixedPointOperandConstant(FVal, RegWidth,
4196 /*isReciprocal*/ false)) {
4197 FixedPos = CurDAG->getTargetConstant(Val: FBits, DL: SDLoc(N), VT: MVT::i32);
4198 return true;
4199 }
4200
4201 return false;
4202}
4203
4204bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4205 SDValue &FixedPos,
4206 unsigned RegWidth) {
4207 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4208 /*isReciprocal*/ true);
4209}
4210
4211// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4212// of the string and obtains the integer values from them and combines these
4213// into a single value to be used in the MRS/MSR instruction.
4214static int getIntOperandFromRegisterString(StringRef RegString) {
4215 SmallVector<StringRef, 5> Fields;
4216 RegString.split(A&: Fields, Separator: ':');
4217
4218 if (Fields.size() == 1)
4219 return -1;
4220
4221 assert(Fields.size() == 5
4222 && "Invalid number of fields in read register string");
4223
4224 SmallVector<int, 5> Ops;
4225 bool AllIntFields = true;
4226
4227 for (StringRef Field : Fields) {
4228 unsigned IntField;
4229 AllIntFields &= !Field.getAsInteger(Radix: 10, Result&: IntField);
4230 Ops.push_back(Elt: IntField);
4231 }
4232
4233 assert(AllIntFields &&
4234 "Unexpected non-integer value in special register string.");
4235 (void)AllIntFields;
4236
4237 // Need to combine the integer fields of the string into a single value
4238 // based on the bit encoding of MRS/MSR instruction.
4239 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4240 (Ops[3] << 3) | (Ops[4]);
4241}
4242
4243// Lower the read_register intrinsic to an MRS instruction node if the special
4244// register string argument is either of the form detailed in the ALCE (the
4245// form described in getIntOperandsFromRegisterString) or is a named register
4246// known by the MRS SysReg mapper.
4247bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4248 const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: 1));
4249 const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: 0));
4250 SDLoc DL(N);
4251
4252 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4253
4254 unsigned Opcode64Bit = AArch64::MRS;
4255 int Imm = getIntOperandFromRegisterString(RegString: RegString->getString());
4256 if (Imm == -1) {
4257 // No match, Use the sysreg mapper to map the remaining possible strings to
4258 // the value for the register to be used for the instruction operand.
4259 const auto *TheReg =
4260 AArch64SysReg::lookupSysRegByName(Name: RegString->getString());
4261 if (TheReg && TheReg->Readable &&
4262 TheReg->haveFeatures(ActiveFeatures: Subtarget->getFeatureBits()))
4263 Imm = TheReg->Encoding;
4264 else
4265 Imm = AArch64SysReg::parseGenericRegister(Name: RegString->getString());
4266
4267 if (Imm == -1) {
4268 // Still no match, see if this is "pc" or give up.
4269 if (!ReadIs128Bit && RegString->getString() == "pc") {
4270 Opcode64Bit = AArch64::ADR;
4271 Imm = 0;
4272 } else {
4273 return false;
4274 }
4275 }
4276 }
4277
4278 SDValue InChain = N->getOperand(Num: 0);
4279 SDValue SysRegImm = CurDAG->getTargetConstant(Val: Imm, DL, VT: MVT::i32);
4280 if (!ReadIs128Bit) {
4281 CurDAG->SelectNodeTo(N, MachineOpc: Opcode64Bit, VT1: MVT::i64, VT2: MVT::Other /* Chain */,
4282 Ops: {SysRegImm, InChain});
4283 } else {
4284 SDNode *MRRS = CurDAG->getMachineNode(
4285 Opcode: AArch64::MRRS, dl: DL,
4286 ResultTys: {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4287 Ops: {SysRegImm, InChain});
4288
4289 // Sysregs are not endian. The even register always contains the low half
4290 // of the register.
4291 SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: AArch64::sube64, DL, VT: MVT::i64,
4292 Operand: SDValue(MRRS, 0));
4293 SDValue Hi = CurDAG->getTargetExtractSubreg(SRIdx: AArch64::subo64, DL, VT: MVT::i64,
4294 Operand: SDValue(MRRS, 0));
4295 SDValue OutChain = SDValue(MRRS, 1);
4296
4297 ReplaceUses(F: SDValue(N, 0), T: Lo);
4298 ReplaceUses(F: SDValue(N, 1), T: Hi);
4299 ReplaceUses(F: SDValue(N, 2), T: OutChain);
4300 };
4301 return true;
4302}
4303
4304// Lower the write_register intrinsic to an MSR instruction node if the special
4305// register string argument is either of the form detailed in the ALCE (the
4306// form described in getIntOperandsFromRegisterString) or is a named register
4307// known by the MSR SysReg mapper.
4308bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4309 const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: 1));
4310 const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: 0));
4311 SDLoc DL(N);
4312
4313 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4314
4315 if (!WriteIs128Bit) {
4316 // Check if the register was one of those allowed as the pstatefield value
4317 // in the MSR (immediate) instruction. To accept the values allowed in the
4318 // pstatefield for the MSR (immediate) instruction, we also require that an
4319 // immediate value has been provided as an argument, we know that this is
4320 // the case as it has been ensured by semantic checking.
4321 auto trySelectPState = [&](auto PMapper, unsigned State) {
4322 if (PMapper) {
4323 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4324 "Expected a constant integer expression.");
4325 unsigned Reg = PMapper->Encoding;
4326 uint64_t Immed = N->getConstantOperandVal(Num: 2);
4327 CurDAG->SelectNodeTo(
4328 N, MachineOpc: State, VT: MVT::Other, Op1: CurDAG->getTargetConstant(Val: Reg, DL, VT: MVT::i32),
4329 Op2: CurDAG->getTargetConstant(Val: Immed, DL, VT: MVT::i16), Op3: N->getOperand(Num: 0));
4330 return true;
4331 }
4332 return false;
4333 };
4334
4335 if (trySelectPState(
4336 AArch64PState::lookupPStateImm0_15ByName(Name: RegString->getString()),
4337 AArch64::MSRpstateImm4))
4338 return true;
4339 if (trySelectPState(
4340 AArch64PState::lookupPStateImm0_1ByName(Name: RegString->getString()),
4341 AArch64::MSRpstateImm1))
4342 return true;
4343 }
4344
4345 int Imm = getIntOperandFromRegisterString(RegString: RegString->getString());
4346 if (Imm == -1) {
4347 // Use the sysreg mapper to attempt to map the remaining possible strings
4348 // to the value for the register to be used for the MSR (register)
4349 // instruction operand.
4350 auto TheReg = AArch64SysReg::lookupSysRegByName(Name: RegString->getString());
4351 if (TheReg && TheReg->Writeable &&
4352 TheReg->haveFeatures(ActiveFeatures: Subtarget->getFeatureBits()))
4353 Imm = TheReg->Encoding;
4354 else
4355 Imm = AArch64SysReg::parseGenericRegister(Name: RegString->getString());
4356
4357 if (Imm == -1)
4358 return false;
4359 }
4360
4361 SDValue InChain = N->getOperand(Num: 0);
4362 if (!WriteIs128Bit) {
4363 CurDAG->SelectNodeTo(N, MachineOpc: AArch64::MSR, VT: MVT::Other,
4364 Op1: CurDAG->getTargetConstant(Val: Imm, DL, VT: MVT::i32),
4365 Op2: N->getOperand(Num: 2), Op3: InChain);
4366 } else {
4367 // No endian swap. The lower half always goes into the even subreg, and the
4368 // higher half always into the odd supreg.
4369 SDNode *Pair = CurDAG->getMachineNode(
4370 Opcode: TargetOpcode::REG_SEQUENCE, dl: DL, VT: MVT::Untyped /* XSeqPair */,
4371 Ops: {CurDAG->getTargetConstant(Val: AArch64::XSeqPairsClassRegClass.getID(), DL,
4372 VT: MVT::i32),
4373 N->getOperand(Num: 2),
4374 CurDAG->getTargetConstant(Val: AArch64::sube64, DL, VT: MVT::i32),
4375 N->getOperand(Num: 3),
4376 CurDAG->getTargetConstant(Val: AArch64::subo64, DL, VT: MVT::i32)});
4377
4378 CurDAG->SelectNodeTo(N, MachineOpc: AArch64::MSRR, VT: MVT::Other,
4379 Op1: CurDAG->getTargetConstant(Val: Imm, DL, VT: MVT::i32),
4380 Op2: SDValue(Pair, 0), Op3: InChain);
4381 }
4382
4383 return true;
4384}
4385
4386/// We've got special pseudo-instructions for these
4387bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4388 unsigned Opcode;
4389 EVT MemTy = cast<MemSDNode>(Val: N)->getMemoryVT();
4390
4391 // Leave IR for LSE if subtarget supports it.
4392 if (Subtarget->hasLSE()) return false;
4393
4394 if (MemTy == MVT::i8)
4395 Opcode = AArch64::CMP_SWAP_8;
4396 else if (MemTy == MVT::i16)
4397 Opcode = AArch64::CMP_SWAP_16;
4398 else if (MemTy == MVT::i32)
4399 Opcode = AArch64::CMP_SWAP_32;
4400 else if (MemTy == MVT::i64)
4401 Opcode = AArch64::CMP_SWAP_64;
4402 else
4403 llvm_unreachable("Unknown AtomicCmpSwap type");
4404
4405 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4406 SDValue Ops[] = {N->getOperand(Num: 1), N->getOperand(Num: 2), N->getOperand(Num: 3),
4407 N->getOperand(Num: 0)};
4408 SDNode *CmpSwap = CurDAG->getMachineNode(
4409 Opcode, dl: SDLoc(N),
4410 VTs: CurDAG->getVTList(VT1: RegTy, VT2: MVT::i32, VT3: MVT::Other), Ops);
4411
4412 MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
4413 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
4414
4415 ReplaceUses(F: SDValue(N, 0), T: SDValue(CmpSwap, 0));
4416 ReplaceUses(F: SDValue(N, 1), T: SDValue(CmpSwap, 2));
4417 CurDAG->RemoveDeadNode(N);
4418
4419 return true;
4420}
4421
4422bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4423 SDValue &Shift, bool Negate) {
4424 if (!isa<ConstantSDNode>(Val: N))
4425 return false;
4426
4427 APInt Val =
4428 cast<ConstantSDNode>(Val&: N)->getAPIntValue().trunc(width: VT.getFixedSizeInBits());
4429
4430 return SelectSVEAddSubImm(DL: SDLoc(N), Value: Val, VT, Imm, Shift, Negate);
4431}
4432
4433bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDLoc DL, APInt Val, MVT VT,
4434 SDValue &Imm, SDValue &Shift,
4435 bool Negate) {
4436 if (Negate)
4437 Val = -Val;
4438
4439 switch (VT.SimpleTy) {
4440 case MVT::i8:
4441 // All immediates are supported.
4442 Shift = CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i32);
4443 Imm = CurDAG->getTargetConstant(Val: Val.getZExtValue(), DL, VT: MVT::i32);
4444 return true;
4445 case MVT::i16:
4446 case MVT::i32:
4447 case MVT::i64:
4448 // Support 8bit unsigned immediates.
4449 if ((Val & ~0xff) == 0) {
4450 Shift = CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i32);
4451 Imm = CurDAG->getTargetConstant(Val: Val.getZExtValue(), DL, VT: MVT::i32);
4452 return true;
4453 }
4454 // Support 16bit unsigned immediates that are a multiple of 256.
4455 if ((Val & ~0xff00) == 0) {
4456 Shift = CurDAG->getTargetConstant(Val: 8, DL, VT: MVT::i32);
4457 Imm = CurDAG->getTargetConstant(Val: Val.lshr(shiftAmt: 8).getZExtValue(), DL, VT: MVT::i32);
4458 return true;
4459 }
4460 break;
4461 default:
4462 break;
4463 }
4464
4465 return false;
4466}
4467
4468bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4469 SDValue &Imm, SDValue &Shift,
4470 bool Negate) {
4471 if (!isa<ConstantSDNode>(Val: N))
4472 return false;
4473
4474 SDLoc DL(N);
4475 int64_t Val = cast<ConstantSDNode>(Val&: N)
4476 ->getAPIntValue()
4477 .trunc(width: VT.getFixedSizeInBits())
4478 .getSExtValue();
4479
4480 if (Negate)
4481 Val = -Val;
4482
4483 // Signed saturating instructions treat their immediate operand as unsigned,
4484 // whereas the related intrinsics define their operands to be signed. This
4485 // means we can only use the immediate form when the operand is non-negative.
4486 if (Val < 0)
4487 return false;
4488
4489 switch (VT.SimpleTy) {
4490 case MVT::i8:
4491 // All positive immediates are supported.
4492 Shift = CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i32);
4493 Imm = CurDAG->getTargetConstant(Val, DL, VT: MVT::i32);
4494 return true;
4495 case MVT::i16:
4496 case MVT::i32:
4497 case MVT::i64:
4498 // Support 8bit positive immediates.
4499 if (Val <= 255) {
4500 Shift = CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i32);
4501 Imm = CurDAG->getTargetConstant(Val, DL, VT: MVT::i32);
4502 return true;
4503 }
4504 // Support 16bit positive immediates that are a multiple of 256.
4505 if (Val <= 65280 && Val % 256 == 0) {
4506 Shift = CurDAG->getTargetConstant(Val: 8, DL, VT: MVT::i32);
4507 Imm = CurDAG->getTargetConstant(Val: Val >> 8, DL, VT: MVT::i32);
4508 return true;
4509 }
4510 break;
4511 default:
4512 break;
4513 }
4514
4515 return false;
4516}
4517
4518bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4519 SDValue &Shift) {
4520 if (!isa<ConstantSDNode>(Val: N))
4521 return false;
4522
4523 SDLoc DL(N);
4524 int64_t Val = cast<ConstantSDNode>(Val&: N)
4525 ->getAPIntValue()
4526 .trunc(width: VT.getFixedSizeInBits())
4527 .getSExtValue();
4528 int32_t ImmVal, ShiftVal;
4529 if (!AArch64_AM::isSVECpyDupImm(SizeInBits: VT.getScalarSizeInBits(), Val, Imm&: ImmVal,
4530 Shift&: ShiftVal))
4531 return false;
4532
4533 Shift = CurDAG->getTargetConstant(Val: ShiftVal, DL, VT: MVT::i32);
4534 Imm = CurDAG->getTargetConstant(Val: ImmVal, DL, VT: MVT::i32);
4535 return true;
4536}
4537
4538bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4539 if (auto CNode = dyn_cast<ConstantSDNode>(Val&: N))
4540 return SelectSVESignedArithImm(DL: SDLoc(N), Value: CNode->getAPIntValue(), Imm);
4541 return false;
4542}
4543
4544bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDLoc DL, APInt Val,
4545 SDValue &Imm) {
4546 int64_t ImmVal = Val.getSExtValue();
4547 if (ImmVal >= -128 && ImmVal < 128) {
4548 Imm = CurDAG->getSignedTargetConstant(Val: ImmVal, DL, VT: MVT::i32);
4549 return true;
4550 }
4551 return false;
4552}
4553
4554bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4555 if (auto CNode = dyn_cast<ConstantSDNode>(Val&: N)) {
4556 uint64_t ImmVal = CNode->getZExtValue();
4557
4558 switch (VT.SimpleTy) {
4559 case MVT::i8:
4560 ImmVal &= 0xFF;
4561 break;
4562 case MVT::i16:
4563 ImmVal &= 0xFFFF;
4564 break;
4565 case MVT::i32:
4566 ImmVal &= 0xFFFFFFFF;
4567 break;
4568 case MVT::i64:
4569 break;
4570 default:
4571 llvm_unreachable("Unexpected type");
4572 }
4573
4574 if (ImmVal < 256) {
4575 Imm = CurDAG->getTargetConstant(Val: ImmVal, DL: SDLoc(N), VT: MVT::i32);
4576 return true;
4577 }
4578 }
4579 return false;
4580}
4581
4582bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4583 bool Invert) {
4584 uint64_t ImmVal;
4585 if (auto CI = dyn_cast<ConstantSDNode>(Val&: N))
4586 ImmVal = CI->getZExtValue();
4587 else if (auto CFP = dyn_cast<ConstantFPSDNode>(Val&: N))
4588 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4589 else
4590 return false;
4591
4592 if (Invert)
4593 ImmVal = ~ImmVal;
4594
4595 uint64_t encoding;
4596 if (!AArch64_AM::isSVELogicalImm(SizeInBits: VT.getScalarSizeInBits(), ImmVal, Encoding&: encoding))
4597 return false;
4598
4599 Imm = CurDAG->getTargetConstant(Val: encoding, DL: SDLoc(N), VT: MVT::i64);
4600 return true;
4601}
4602
4603// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4604// Rather than attempt to normalise everything we can sometimes saturate the
4605// shift amount during selection. This function also allows for consistent
4606// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4607// required by the instructions.
4608bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4609 uint64_t High, bool AllowSaturation,
4610 SDValue &Imm) {
4611 if (auto *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
4612 uint64_t ImmVal = CN->getZExtValue();
4613
4614 // Reject shift amounts that are too small.
4615 if (ImmVal < Low)
4616 return false;
4617
4618 // Reject or saturate shift amounts that are too big.
4619 if (ImmVal > High) {
4620 if (!AllowSaturation)
4621 return false;
4622 ImmVal = High;
4623 }
4624
4625 Imm = CurDAG->getTargetConstant(Val: ImmVal, DL: SDLoc(N), VT: MVT::i32);
4626 return true;
4627 }
4628
4629 return false;
4630}
4631
4632bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4633 // tagp(FrameIndex, IRGstack, tag_offset):
4634 // since the offset between FrameIndex and IRGstack is a compile-time
4635 // constant, this can be lowered to a single ADDG instruction.
4636 if (!(isa<FrameIndexSDNode>(Val: N->getOperand(Num: 1)))) {
4637 return false;
4638 }
4639
4640 SDValue IRG_SP = N->getOperand(Num: 2);
4641 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4642 IRG_SP->getConstantOperandVal(Num: 1) != Intrinsic::aarch64_irg_sp) {
4643 return false;
4644 }
4645
4646 const TargetLowering *TLI = getTargetLowering();
4647 SDLoc DL(N);
4648 int FI = cast<FrameIndexSDNode>(Val: N->getOperand(Num: 1))->getIndex();
4649 SDValue FiOp = CurDAG->getTargetFrameIndex(
4650 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
4651 int TagOffset = N->getConstantOperandVal(Num: 3);
4652
4653 SDNode *Out = CurDAG->getMachineNode(
4654 Opcode: AArch64::TAGPstack, dl: DL, VT: MVT::i64,
4655 Ops: {FiOp, CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i64), N->getOperand(Num: 2),
4656 CurDAG->getTargetConstant(Val: TagOffset, DL, VT: MVT::i64)});
4657 ReplaceNode(F: N, T: Out);
4658 return true;
4659}
4660
4661void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4662 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4663 "llvm.aarch64.tagp third argument must be an immediate");
4664 if (trySelectStackSlotTagP(N))
4665 return;
4666 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4667 // compile-time constant, not just for stack allocations.
4668
4669 // General case for unrelated pointers in Op1 and Op2.
4670 SDLoc DL(N);
4671 int TagOffset = N->getConstantOperandVal(Num: 3);
4672 SDNode *N1 = CurDAG->getMachineNode(Opcode: AArch64::SUBP, dl: DL, VT: MVT::i64,
4673 Ops: {N->getOperand(Num: 1), N->getOperand(Num: 2)});
4674 SDNode *N2 = CurDAG->getMachineNode(Opcode: AArch64::ADDXrr, dl: DL, VT: MVT::i64,
4675 Ops: {SDValue(N1, 0), N->getOperand(Num: 2)});
4676 SDNode *N3 = CurDAG->getMachineNode(
4677 Opcode: AArch64::ADDG, dl: DL, VT: MVT::i64,
4678 Ops: {SDValue(N2, 0), CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i64),
4679 CurDAG->getTargetConstant(Val: TagOffset, DL, VT: MVT::i64)});
4680 ReplaceNode(F: N, T: N3);
4681}
4682
4683bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4684 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4685
4686 // Bail when not a "cast" like insert_subvector.
4687 if (N->getConstantOperandVal(Num: 2) != 0)
4688 return false;
4689 if (!N->getOperand(Num: 0).isUndef())
4690 return false;
4691
4692 // Bail when normal isel should do the job.
4693 EVT VT = N->getValueType(ResNo: 0);
4694 EVT InVT = N->getOperand(Num: 1).getValueType();
4695 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4696 return false;
4697 if (InVT.getSizeInBits() <= 128)
4698 return false;
4699
4700 // NOTE: We can only get here when doing fixed length SVE code generation.
4701 // We do manual selection because the types involved are not linked to real
4702 // registers (despite being legal) and must be coerced into SVE registers.
4703
4704 assert(VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4705 "Expected to insert into a packed scalable vector!");
4706
4707 SDLoc DL(N);
4708 auto RC = CurDAG->getTargetConstant(Val: AArch64::ZPRRegClassID, DL, VT: MVT::i64);
4709 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, dl: DL, VT,
4710 Op1: N->getOperand(Num: 1), Op2: RC));
4711 return true;
4712}
4713
4714bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4715 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4716
4717 // Bail when not a "cast" like extract_subvector.
4718 if (N->getConstantOperandVal(Num: 1) != 0)
4719 return false;
4720
4721 // Bail when normal isel can do the job.
4722 EVT VT = N->getValueType(ResNo: 0);
4723 EVT InVT = N->getOperand(Num: 0).getValueType();
4724 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4725 return false;
4726 if (VT.getSizeInBits() <= 128)
4727 return false;
4728
4729 // NOTE: We can only get here when doing fixed length SVE code generation.
4730 // We do manual selection because the types involved are not linked to real
4731 // registers (despite being legal) and must be coerced into SVE registers.
4732
4733 assert(InVT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4734 "Expected to extract from a packed scalable vector!");
4735
4736 SDLoc DL(N);
4737 auto RC = CurDAG->getTargetConstant(Val: AArch64::ZPRRegClassID, DL, VT: MVT::i64);
4738 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, dl: DL, VT,
4739 Op1: N->getOperand(Num: 0), Op2: RC));
4740 return true;
4741}
4742
4743bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4744 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4745
4746 SDValue N0 = N->getOperand(Num: 0);
4747 SDValue N1 = N->getOperand(Num: 1);
4748
4749 EVT VT = N->getValueType(ResNo: 0);
4750 SDLoc DL(N);
4751
4752 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4753 // Rotate by a constant is a funnel shift in IR which is expanded to
4754 // an OR with shifted operands.
4755 // We do the following transform:
4756 // OR N0, N1 -> xar (x, y, imm)
4757 // Where:
4758 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4759 // N0 = SHL_PRED true, V, splat(bits-imm)
4760 // V = (xor x, y)
4761 if (VT.isScalableVector() &&
4762 (Subtarget->hasSVE2() ||
4763 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4764 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4765 N1.getOpcode() != AArch64ISD::SRL_PRED)
4766 std::swap(a&: N0, b&: N1);
4767 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4768 N1.getOpcode() != AArch64ISD::SRL_PRED)
4769 return false;
4770
4771 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4772 if (!TLI->isAllActivePredicate(DAG&: *CurDAG, N: N0.getOperand(i: 0)) ||
4773 !TLI->isAllActivePredicate(DAG&: *CurDAG, N: N1.getOperand(i: 0)))
4774 return false;
4775
4776 if (N0.getOperand(i: 1) != N1.getOperand(i: 1))
4777 return false;
4778
4779 SDValue R1, R2;
4780 bool IsXOROperand = true;
4781 if (N0.getOperand(i: 1).getOpcode() != ISD::XOR) {
4782 IsXOROperand = false;
4783 } else {
4784 R1 = N0.getOperand(i: 1).getOperand(i: 0);
4785 R2 = N1.getOperand(i: 1).getOperand(i: 1);
4786 }
4787
4788 APInt ShlAmt, ShrAmt;
4789 if (!ISD::isConstantSplatVector(N: N0.getOperand(i: 2).getNode(), SplatValue&: ShlAmt) ||
4790 !ISD::isConstantSplatVector(N: N1.getOperand(i: 2).getNode(), SplatValue&: ShrAmt))
4791 return false;
4792
4793 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4794 return false;
4795
4796 if (!IsXOROperand) {
4797 SDValue Zero = CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i64);
4798 SDNode *MOV = CurDAG->getMachineNode(Opcode: AArch64::MOVIv2d_ns, dl: DL, VT, Op1: Zero);
4799 SDValue MOVIV = SDValue(MOV, 0);
4800
4801 SDValue ZSub = CurDAG->getTargetConstant(Val: AArch64::zsub, DL, VT: MVT::i32);
4802 SDNode *SubRegToReg =
4803 CurDAG->getMachineNode(Opcode: AArch64::SUBREG_TO_REG, dl: DL, VT, Op1: MOVIV, Op2: ZSub);
4804
4805 R1 = N1->getOperand(Num: 1);
4806 R2 = SDValue(SubRegToReg, 0);
4807 }
4808
4809 SDValue Imm =
4810 CurDAG->getTargetConstant(Val: ShrAmt.getZExtValue(), DL, VT: MVT::i32);
4811
4812 SDValue Ops[] = {R1, R2, Imm};
4813 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4814 VT, Opcodes: {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4815 AArch64::XAR_ZZZI_D})) {
4816 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
4817 return true;
4818 }
4819 return false;
4820 }
4821
4822 // We have Neon SHA3 XAR operation for v2i64 but for types
4823 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4824 // is available.
4825 EVT SVT;
4826 switch (VT.getSimpleVT().SimpleTy) {
4827 case MVT::v4i32:
4828 case MVT::v2i32:
4829 SVT = MVT::nxv4i32;
4830 break;
4831 case MVT::v8i16:
4832 case MVT::v4i16:
4833 SVT = MVT::nxv8i16;
4834 break;
4835 case MVT::v16i8:
4836 case MVT::v8i8:
4837 SVT = MVT::nxv16i8;
4838 break;
4839 case MVT::v2i64:
4840 case MVT::v1i64:
4841 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4842 break;
4843 default:
4844 return false;
4845 }
4846
4847 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4848 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4849 return false;
4850
4851 if (N0->getOpcode() != AArch64ISD::VSHL ||
4852 N1->getOpcode() != AArch64ISD::VLSHR)
4853 return false;
4854
4855 if (N0->getOperand(Num: 0) != N1->getOperand(Num: 0))
4856 return false;
4857
4858 SDValue R1, R2;
4859 bool IsXOROperand = true;
4860 if (N1->getOperand(Num: 0)->getOpcode() != ISD::XOR) {
4861 IsXOROperand = false;
4862 } else {
4863 SDValue XOR = N0.getOperand(i: 0);
4864 R1 = XOR.getOperand(i: 0);
4865 R2 = XOR.getOperand(i: 1);
4866 }
4867
4868 unsigned HsAmt = N0.getConstantOperandVal(i: 1);
4869 unsigned ShAmt = N1.getConstantOperandVal(i: 1);
4870
4871 SDValue Imm = CurDAG->getTargetConstant(
4872 Val: ShAmt, DL, VT: N0.getOperand(i: 1).getValueType(), isOpaque: false);
4873
4874 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4875 if (ShAmt + HsAmt != VTSizeInBits)
4876 return false;
4877
4878 if (!IsXOROperand) {
4879 SDValue Zero = CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i64);
4880 SDNode *MOV =
4881 CurDAG->getMachineNode(Opcode: AArch64::MOVIv2d_ns, dl: DL, VT: MVT::v2i64, Op1: Zero);
4882 SDValue MOVIV = SDValue(MOV, 0);
4883
4884 R1 = N1->getOperand(Num: 0);
4885 R2 = MOVIV;
4886 }
4887
4888 if (SVT != VT) {
4889 SDValue Undef =
4890 SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: SVT), 0);
4891
4892 if (SVT.isScalableVector() && VT.is64BitVector()) {
4893 EVT QVT = VT.getDoubleNumVectorElementsVT(Context&: *CurDAG->getContext());
4894
4895 SDValue UndefQ = SDValue(
4896 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: QVT), 0);
4897 SDValue DSub = CurDAG->getTargetConstant(Val: AArch64::dsub, DL, VT: MVT::i32);
4898
4899 R1 = SDValue(CurDAG->getMachineNode(Opcode: AArch64::INSERT_SUBREG, dl: DL, VT: QVT,
4900 Op1: UndefQ, Op2: R1, Op3: DSub),
4901 0);
4902 if (R2.getValueType() == VT)
4903 R2 = SDValue(CurDAG->getMachineNode(Opcode: AArch64::INSERT_SUBREG, dl: DL, VT: QVT,
4904 Op1: UndefQ, Op2: R2, Op3: DSub),
4905 0);
4906 }
4907
4908 SDValue SubReg = CurDAG->getTargetConstant(
4909 Val: (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, VT: MVT::i32);
4910
4911 R1 = SDValue(CurDAG->getMachineNode(Opcode: AArch64::INSERT_SUBREG, dl: DL, VT: SVT, Op1: Undef,
4912 Op2: R1, Op3: SubReg),
4913 0);
4914
4915 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4916 R2 = SDValue(CurDAG->getMachineNode(Opcode: AArch64::INSERT_SUBREG, dl: DL, VT: SVT,
4917 Op1: Undef, Op2: R2, Op3: SubReg),
4918 0);
4919 }
4920
4921 SDValue Ops[] = {R1, R2, Imm};
4922 SDNode *XAR = nullptr;
4923
4924 if (SVT.isScalableVector()) {
4925 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4926 VT: SVT, Opcodes: {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4927 AArch64::XAR_ZZZI_D}))
4928 XAR = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: SVT, Ops);
4929 } else {
4930 XAR = CurDAG->getMachineNode(Opcode: AArch64::XAR, dl: DL, VT: SVT, Ops);
4931 }
4932
4933 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4934
4935 if (SVT != VT) {
4936 if (VT.is64BitVector() && SVT.isScalableVector()) {
4937 EVT QVT = VT.getDoubleNumVectorElementsVT(Context&: *CurDAG->getContext());
4938
4939 SDValue ZSub = CurDAG->getTargetConstant(Val: AArch64::zsub, DL, VT: MVT::i32);
4940 SDNode *Q = CurDAG->getMachineNode(Opcode: AArch64::EXTRACT_SUBREG, dl: DL, VT: QVT,
4941 Op1: SDValue(XAR, 0), Op2: ZSub);
4942
4943 SDValue DSub = CurDAG->getTargetConstant(Val: AArch64::dsub, DL, VT: MVT::i32);
4944 XAR = CurDAG->getMachineNode(Opcode: AArch64::EXTRACT_SUBREG, dl: DL, VT,
4945 Op1: SDValue(Q, 0), Op2: DSub);
4946 } else {
4947 SDValue SubReg = CurDAG->getTargetConstant(
4948 Val: (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4949 VT: MVT::i32);
4950 XAR = CurDAG->getMachineNode(Opcode: AArch64::EXTRACT_SUBREG, dl: DL, VT,
4951 Op1: SDValue(XAR, 0), Op2: SubReg);
4952 }
4953 }
4954 ReplaceNode(F: N, T: XAR);
4955 return true;
4956}
4957
4958void AArch64DAGToDAGISel::Select(SDNode *Node) {
4959 // If we have a custom node, we already have selected!
4960 if (Node->isMachineOpcode()) {
4961 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4962 Node->setNodeId(-1);
4963 return;
4964 }
4965
4966 // Few custom selection stuff.
4967 EVT VT = Node->getValueType(ResNo: 0);
4968
4969 switch (Node->getOpcode()) {
4970 default:
4971 break;
4972
4973 case ISD::ATOMIC_CMP_SWAP:
4974 if (SelectCMP_SWAP(N: Node))
4975 return;
4976 break;
4977
4978 case ISD::READ_REGISTER:
4979 case AArch64ISD::MRRS:
4980 if (tryReadRegister(N: Node))
4981 return;
4982 break;
4983
4984 case ISD::WRITE_REGISTER:
4985 case AArch64ISD::MSRR:
4986 if (tryWriteRegister(N: Node))
4987 return;
4988 break;
4989
4990 case ISD::LOAD: {
4991 // Try to select as an indexed load. Fall through to normal processing
4992 // if we can't.
4993 if (tryIndexedLoad(N: Node))
4994 return;
4995 break;
4996 }
4997
4998 case ISD::SRL:
4999 case ISD::AND:
5000 case ISD::SRA:
5001 case ISD::SIGN_EXTEND_INREG:
5002 if (tryBitfieldExtractOp(N: Node))
5003 return;
5004 if (tryBitfieldInsertInZeroOp(N: Node))
5005 return;
5006 [[fallthrough]];
5007 case ISD::ROTR:
5008 case ISD::SHL:
5009 if (tryShiftAmountMod(N: Node))
5010 return;
5011 break;
5012
5013 case ISD::SIGN_EXTEND:
5014 if (tryBitfieldExtractOpFromSExt(N: Node))
5015 return;
5016 break;
5017
5018 case ISD::OR:
5019 if (tryBitfieldInsertOp(N: Node))
5020 return;
5021 if (trySelectXAR(N: Node))
5022 return;
5023 break;
5024
5025 case ISD::EXTRACT_SUBVECTOR: {
5026 if (trySelectCastScalableToFixedLengthVector(N: Node))
5027 return;
5028 break;
5029 }
5030
5031 case ISD::INSERT_SUBVECTOR: {
5032 if (trySelectCastFixedLengthToScalableVector(N: Node))
5033 return;
5034 break;
5035 }
5036
5037 case ISD::Constant: {
5038 // Materialize zero constants as copies from WZR/XZR. This allows
5039 // the coalescer to propagate these into other instructions.
5040 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Val: Node);
5041 if (ConstNode->isZero()) {
5042 if (VT == MVT::i32) {
5043 SDValue New = CurDAG->getCopyFromReg(
5044 Chain: CurDAG->getEntryNode(), dl: SDLoc(Node), Reg: AArch64::WZR, VT: MVT::i32);
5045 ReplaceNode(F: Node, T: New.getNode());
5046 return;
5047 } else if (VT == MVT::i64) {
5048 SDValue New = CurDAG->getCopyFromReg(
5049 Chain: CurDAG->getEntryNode(), dl: SDLoc(Node), Reg: AArch64::XZR, VT: MVT::i64);
5050 ReplaceNode(F: Node, T: New.getNode());
5051 return;
5052 }
5053 }
5054 break;
5055 }
5056
5057 case ISD::FrameIndex: {
5058 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
5059 int FI = cast<FrameIndexSDNode>(Val: Node)->getIndex();
5060 unsigned Shifter = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0);
5061 const TargetLowering *TLI = getTargetLowering();
5062 SDValue TFI = CurDAG->getTargetFrameIndex(
5063 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
5064 SDLoc DL(Node);
5065 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i32),
5066 CurDAG->getTargetConstant(Val: Shifter, DL, VT: MVT::i32) };
5067 CurDAG->SelectNodeTo(N: Node, MachineOpc: AArch64::ADDXri, VT: MVT::i64, Ops);
5068 return;
5069 }
5070 case ISD::INTRINSIC_W_CHAIN: {
5071 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
5072 switch (IntNo) {
5073 default:
5074 break;
5075 case Intrinsic::aarch64_gcsss: {
5076 SDLoc DL(Node);
5077 SDValue Chain = Node->getOperand(Num: 0);
5078 SDValue Val = Node->getOperand(Num: 2);
5079 SDValue Zero = CurDAG->getCopyFromReg(Chain, dl: DL, Reg: AArch64::XZR, VT: MVT::i64);
5080 SDNode *SS1 =
5081 CurDAG->getMachineNode(Opcode: AArch64::GCSSS1, dl: DL, VT: MVT::Other, Op1: Val, Op2: Chain);
5082 SDNode *SS2 = CurDAG->getMachineNode(Opcode: AArch64::GCSSS2, dl: DL, VT1: MVT::i64,
5083 VT2: MVT::Other, Op1: Zero, Op2: SDValue(SS1, 0));
5084 ReplaceNode(F: Node, T: SS2);
5085 return;
5086 }
5087 case Intrinsic::aarch64_ldaxp:
5088 case Intrinsic::aarch64_ldxp: {
5089 unsigned Op =
5090 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
5091 SDValue MemAddr = Node->getOperand(Num: 2);
5092 SDLoc DL(Node);
5093 SDValue Chain = Node->getOperand(Num: 0);
5094
5095 SDNode *Ld = CurDAG->getMachineNode(Opcode: Op, dl: DL, VT1: MVT::i64, VT2: MVT::i64,
5096 VT3: MVT::Other, Op1: MemAddr, Op2: Chain);
5097
5098 // Transfer memoperands.
5099 MachineMemOperand *MemOp =
5100 cast<MemIntrinsicSDNode>(Val: Node)->getMemOperand();
5101 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
5102 ReplaceNode(F: Node, T: Ld);
5103 return;
5104 }
5105 case Intrinsic::aarch64_stlxp:
5106 case Intrinsic::aarch64_stxp: {
5107 unsigned Op =
5108 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
5109 SDLoc DL(Node);
5110 SDValue Chain = Node->getOperand(Num: 0);
5111 SDValue ValLo = Node->getOperand(Num: 2);
5112 SDValue ValHi = Node->getOperand(Num: 3);
5113 SDValue MemAddr = Node->getOperand(Num: 4);
5114
5115 // Place arguments in the right order.
5116 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
5117
5118 SDNode *St = CurDAG->getMachineNode(Opcode: Op, dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops);
5119 // Transfer memoperands.
5120 MachineMemOperand *MemOp =
5121 cast<MemIntrinsicSDNode>(Val: Node)->getMemOperand();
5122 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
5123
5124 ReplaceNode(F: Node, T: St);
5125 return;
5126 }
5127 case Intrinsic::aarch64_neon_ld1x2:
5128 if (VT == MVT::v8i8) {
5129 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov8b, SubRegIdx: AArch64::dsub0);
5130 return;
5131 } else if (VT == MVT::v16i8) {
5132 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov16b, SubRegIdx: AArch64::qsub0);
5133 return;
5134 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5135 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov4h, SubRegIdx: AArch64::dsub0);
5136 return;
5137 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5138 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov8h, SubRegIdx: AArch64::qsub0);
5139 return;
5140 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5141 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov2s, SubRegIdx: AArch64::dsub0);
5142 return;
5143 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5144 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov4s, SubRegIdx: AArch64::qsub0);
5145 return;
5146 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5147 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov1d, SubRegIdx: AArch64::dsub0);
5148 return;
5149 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5150 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov2d, SubRegIdx: AArch64::qsub0);
5151 return;
5152 }
5153 break;
5154 case Intrinsic::aarch64_neon_ld1x3:
5155 if (VT == MVT::v8i8) {
5156 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev8b, SubRegIdx: AArch64::dsub0);
5157 return;
5158 } else if (VT == MVT::v16i8) {
5159 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev16b, SubRegIdx: AArch64::qsub0);
5160 return;
5161 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5162 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev4h, SubRegIdx: AArch64::dsub0);
5163 return;
5164 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5165 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev8h, SubRegIdx: AArch64::qsub0);
5166 return;
5167 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5168 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev2s, SubRegIdx: AArch64::dsub0);
5169 return;
5170 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5171 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev4s, SubRegIdx: AArch64::qsub0);
5172 return;
5173 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5174 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev1d, SubRegIdx: AArch64::dsub0);
5175 return;
5176 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5177 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev2d, SubRegIdx: AArch64::qsub0);
5178 return;
5179 }
5180 break;
5181 case Intrinsic::aarch64_neon_ld1x4:
5182 if (VT == MVT::v8i8) {
5183 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv8b, SubRegIdx: AArch64::dsub0);
5184 return;
5185 } else if (VT == MVT::v16i8) {
5186 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv16b, SubRegIdx: AArch64::qsub0);
5187 return;
5188 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5189 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv4h, SubRegIdx: AArch64::dsub0);
5190 return;
5191 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5192 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv8h, SubRegIdx: AArch64::qsub0);
5193 return;
5194 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5195 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv2s, SubRegIdx: AArch64::dsub0);
5196 return;
5197 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5198 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv4s, SubRegIdx: AArch64::qsub0);
5199 return;
5200 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5201 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv1d, SubRegIdx: AArch64::dsub0);
5202 return;
5203 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5204 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv2d, SubRegIdx: AArch64::qsub0);
5205 return;
5206 }
5207 break;
5208 case Intrinsic::aarch64_neon_ld2:
5209 if (VT == MVT::v8i8) {
5210 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov8b, SubRegIdx: AArch64::dsub0);
5211 return;
5212 } else if (VT == MVT::v16i8) {
5213 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov16b, SubRegIdx: AArch64::qsub0);
5214 return;
5215 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5216 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov4h, SubRegIdx: AArch64::dsub0);
5217 return;
5218 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5219 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov8h, SubRegIdx: AArch64::qsub0);
5220 return;
5221 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5222 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov2s, SubRegIdx: AArch64::dsub0);
5223 return;
5224 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5225 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov4s, SubRegIdx: AArch64::qsub0);
5226 return;
5227 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5228 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov1d, SubRegIdx: AArch64::dsub0);
5229 return;
5230 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5231 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov2d, SubRegIdx: AArch64::qsub0);
5232 return;
5233 }
5234 break;
5235 case Intrinsic::aarch64_neon_ld3:
5236 if (VT == MVT::v8i8) {
5237 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev8b, SubRegIdx: AArch64::dsub0);
5238 return;
5239 } else if (VT == MVT::v16i8) {
5240 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev16b, SubRegIdx: AArch64::qsub0);
5241 return;
5242 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5243 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev4h, SubRegIdx: AArch64::dsub0);
5244 return;
5245 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5246 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev8h, SubRegIdx: AArch64::qsub0);
5247 return;
5248 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5249 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev2s, SubRegIdx: AArch64::dsub0);
5250 return;
5251 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5252 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev4s, SubRegIdx: AArch64::qsub0);
5253 return;
5254 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5255 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev1d, SubRegIdx: AArch64::dsub0);
5256 return;
5257 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5258 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev2d, SubRegIdx: AArch64::qsub0);
5259 return;
5260 }
5261 break;
5262 case Intrinsic::aarch64_neon_ld4:
5263 if (VT == MVT::v8i8) {
5264 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv8b, SubRegIdx: AArch64::dsub0);
5265 return;
5266 } else if (VT == MVT::v16i8) {
5267 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv16b, SubRegIdx: AArch64::qsub0);
5268 return;
5269 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5270 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv4h, SubRegIdx: AArch64::dsub0);
5271 return;
5272 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5273 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv8h, SubRegIdx: AArch64::qsub0);
5274 return;
5275 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5276 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv2s, SubRegIdx: AArch64::dsub0);
5277 return;
5278 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5279 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv4s, SubRegIdx: AArch64::qsub0);
5280 return;
5281 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5282 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv1d, SubRegIdx: AArch64::dsub0);
5283 return;
5284 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5285 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv2d, SubRegIdx: AArch64::qsub0);
5286 return;
5287 }
5288 break;
5289 case Intrinsic::aarch64_neon_ld2r:
5290 if (VT == MVT::v8i8) {
5291 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv8b, SubRegIdx: AArch64::dsub0);
5292 return;
5293 } else if (VT == MVT::v16i8) {
5294 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv16b, SubRegIdx: AArch64::qsub0);
5295 return;
5296 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5297 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv4h, SubRegIdx: AArch64::dsub0);
5298 return;
5299 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5300 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv8h, SubRegIdx: AArch64::qsub0);
5301 return;
5302 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5303 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv2s, SubRegIdx: AArch64::dsub0);
5304 return;
5305 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5306 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv4s, SubRegIdx: AArch64::qsub0);
5307 return;
5308 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5309 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv1d, SubRegIdx: AArch64::dsub0);
5310 return;
5311 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5312 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv2d, SubRegIdx: AArch64::qsub0);
5313 return;
5314 }
5315 break;
5316 case Intrinsic::aarch64_neon_ld3r:
5317 if (VT == MVT::v8i8) {
5318 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv8b, SubRegIdx: AArch64::dsub0);
5319 return;
5320 } else if (VT == MVT::v16i8) {
5321 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv16b, SubRegIdx: AArch64::qsub0);
5322 return;
5323 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5324 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv4h, SubRegIdx: AArch64::dsub0);
5325 return;
5326 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5327 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv8h, SubRegIdx: AArch64::qsub0);
5328 return;
5329 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5330 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv2s, SubRegIdx: AArch64::dsub0);
5331 return;
5332 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5333 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv4s, SubRegIdx: AArch64::qsub0);
5334 return;
5335 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5336 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv1d, SubRegIdx: AArch64::dsub0);
5337 return;
5338 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5339 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv2d, SubRegIdx: AArch64::qsub0);
5340 return;
5341 }
5342 break;
5343 case Intrinsic::aarch64_neon_ld4r:
5344 if (VT == MVT::v8i8) {
5345 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv8b, SubRegIdx: AArch64::dsub0);
5346 return;
5347 } else if (VT == MVT::v16i8) {
5348 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv16b, SubRegIdx: AArch64::qsub0);
5349 return;
5350 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5351 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv4h, SubRegIdx: AArch64::dsub0);
5352 return;
5353 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5354 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv8h, SubRegIdx: AArch64::qsub0);
5355 return;
5356 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5357 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv2s, SubRegIdx: AArch64::dsub0);
5358 return;
5359 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5360 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv4s, SubRegIdx: AArch64::qsub0);
5361 return;
5362 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5363 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv1d, SubRegIdx: AArch64::dsub0);
5364 return;
5365 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5366 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv2d, SubRegIdx: AArch64::qsub0);
5367 return;
5368 }
5369 break;
5370 case Intrinsic::aarch64_neon_ld2lane:
5371 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5372 SelectLoadLane(N: Node, NumVecs: 2, Opc: AArch64::LD2i8);
5373 return;
5374 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5375 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5376 SelectLoadLane(N: Node, NumVecs: 2, Opc: AArch64::LD2i16);
5377 return;
5378 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5379 VT == MVT::v2f32) {
5380 SelectLoadLane(N: Node, NumVecs: 2, Opc: AArch64::LD2i32);
5381 return;
5382 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5383 VT == MVT::v1f64) {
5384 SelectLoadLane(N: Node, NumVecs: 2, Opc: AArch64::LD2i64);
5385 return;
5386 }
5387 break;
5388 case Intrinsic::aarch64_neon_ld3lane:
5389 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5390 SelectLoadLane(N: Node, NumVecs: 3, Opc: AArch64::LD3i8);
5391 return;
5392 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5393 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5394 SelectLoadLane(N: Node, NumVecs: 3, Opc: AArch64::LD3i16);
5395 return;
5396 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5397 VT == MVT::v2f32) {
5398 SelectLoadLane(N: Node, NumVecs: 3, Opc: AArch64::LD3i32);
5399 return;
5400 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5401 VT == MVT::v1f64) {
5402 SelectLoadLane(N: Node, NumVecs: 3, Opc: AArch64::LD3i64);
5403 return;
5404 }
5405 break;
5406 case Intrinsic::aarch64_neon_ld4lane:
5407 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5408 SelectLoadLane(N: Node, NumVecs: 4, Opc: AArch64::LD4i8);
5409 return;
5410 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5411 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5412 SelectLoadLane(N: Node, NumVecs: 4, Opc: AArch64::LD4i16);
5413 return;
5414 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5415 VT == MVT::v2f32) {
5416 SelectLoadLane(N: Node, NumVecs: 4, Opc: AArch64::LD4i32);
5417 return;
5418 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5419 VT == MVT::v1f64) {
5420 SelectLoadLane(N: Node, NumVecs: 4, Opc: AArch64::LD4i64);
5421 return;
5422 }
5423 break;
5424 case Intrinsic::aarch64_ld64b:
5425 SelectLoad(N: Node, NumVecs: 8, Opc: AArch64::LD64B, SubRegIdx: AArch64::x8sub_0);
5426 return;
5427 case Intrinsic::aarch64_sve_ld2q_sret: {
5428 SelectPredicatedLoad(N: Node, NumVecs: 2, Scale: 4, Opc_ri: AArch64::LD2Q_IMM, Opc_rr: AArch64::LD2Q, IsIntr: true);
5429 return;
5430 }
5431 case Intrinsic::aarch64_sve_ld3q_sret: {
5432 SelectPredicatedLoad(N: Node, NumVecs: 3, Scale: 4, Opc_ri: AArch64::LD3Q_IMM, Opc_rr: AArch64::LD3Q, IsIntr: true);
5433 return;
5434 }
5435 case Intrinsic::aarch64_sve_ld4q_sret: {
5436 SelectPredicatedLoad(N: Node, NumVecs: 4, Scale: 4, Opc_ri: AArch64::LD4Q_IMM, Opc_rr: AArch64::LD4Q, IsIntr: true);
5437 return;
5438 }
5439 case Intrinsic::aarch64_sve_ld2_sret: {
5440 if (VT == MVT::nxv16i8) {
5441 SelectPredicatedLoad(N: Node, NumVecs: 2, Scale: 0, Opc_ri: AArch64::LD2B_IMM, Opc_rr: AArch64::LD2B,
5442 IsIntr: true);
5443 return;
5444 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5445 VT == MVT::nxv8bf16) {
5446 SelectPredicatedLoad(N: Node, NumVecs: 2, Scale: 1, Opc_ri: AArch64::LD2H_IMM, Opc_rr: AArch64::LD2H,
5447 IsIntr: true);
5448 return;
5449 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5450 SelectPredicatedLoad(N: Node, NumVecs: 2, Scale: 2, Opc_ri: AArch64::LD2W_IMM, Opc_rr: AArch64::LD2W,
5451 IsIntr: true);
5452 return;
5453 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5454 SelectPredicatedLoad(N: Node, NumVecs: 2, Scale: 3, Opc_ri: AArch64::LD2D_IMM, Opc_rr: AArch64::LD2D,
5455 IsIntr: true);
5456 return;
5457 }
5458 break;
5459 }
5460 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5461 if (VT == MVT::nxv16i8) {
5462 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5463 SelectContiguousMultiVectorLoad(
5464 N: Node, NumVecs: 2, Scale: 0, Opc_ri: AArch64::LD1B_2Z_IMM_PSEUDO, Opc_rr: AArch64::LD1B_2Z_PSEUDO);
5465 else if (Subtarget->hasSVE2p1())
5466 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 0, Opc_ri: AArch64::LD1B_2Z_IMM,
5467 Opc_rr: AArch64::LD1B_2Z);
5468 else
5469 break;
5470 return;
5471 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5472 VT == MVT::nxv8bf16) {
5473 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5474 SelectContiguousMultiVectorLoad(
5475 N: Node, NumVecs: 2, Scale: 1, Opc_ri: AArch64::LD1H_2Z_IMM_PSEUDO, Opc_rr: AArch64::LD1H_2Z_PSEUDO);
5476 else if (Subtarget->hasSVE2p1())
5477 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 1, Opc_ri: AArch64::LD1H_2Z_IMM,
5478 Opc_rr: AArch64::LD1H_2Z);
5479 else
5480 break;
5481 return;
5482 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5483 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5484 SelectContiguousMultiVectorLoad(
5485 N: Node, NumVecs: 2, Scale: 2, Opc_ri: AArch64::LD1W_2Z_IMM_PSEUDO, Opc_rr: AArch64::LD1W_2Z_PSEUDO);
5486 else if (Subtarget->hasSVE2p1())
5487 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 2, Opc_ri: AArch64::LD1W_2Z_IMM,
5488 Opc_rr: AArch64::LD1W_2Z);
5489 else
5490 break;
5491 return;
5492 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5493 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5494 SelectContiguousMultiVectorLoad(
5495 N: Node, NumVecs: 2, Scale: 3, Opc_ri: AArch64::LD1D_2Z_IMM_PSEUDO, Opc_rr: AArch64::LD1D_2Z_PSEUDO);
5496 else if (Subtarget->hasSVE2p1())
5497 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 3, Opc_ri: AArch64::LD1D_2Z_IMM,
5498 Opc_rr: AArch64::LD1D_2Z);
5499 else
5500 break;
5501 return;
5502 }
5503 break;
5504 }
5505 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5506 if (VT == MVT::nxv16i8) {
5507 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5508 SelectContiguousMultiVectorLoad(
5509 N: Node, NumVecs: 4, Scale: 0, Opc_ri: AArch64::LD1B_4Z_IMM_PSEUDO, Opc_rr: AArch64::LD1B_4Z_PSEUDO);
5510 else if (Subtarget->hasSVE2p1())
5511 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 0, Opc_ri: AArch64::LD1B_4Z_IMM,
5512 Opc_rr: AArch64::LD1B_4Z);
5513 else
5514 break;
5515 return;
5516 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5517 VT == MVT::nxv8bf16) {
5518 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5519 SelectContiguousMultiVectorLoad(
5520 N: Node, NumVecs: 4, Scale: 1, Opc_ri: AArch64::LD1H_4Z_IMM_PSEUDO, Opc_rr: AArch64::LD1H_4Z_PSEUDO);
5521 else if (Subtarget->hasSVE2p1())
5522 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 1, Opc_ri: AArch64::LD1H_4Z_IMM,
5523 Opc_rr: AArch64::LD1H_4Z);
5524 else
5525 break;
5526 return;
5527 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5528 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5529 SelectContiguousMultiVectorLoad(
5530 N: Node, NumVecs: 4, Scale: 2, Opc_ri: AArch64::LD1W_4Z_IMM_PSEUDO, Opc_rr: AArch64::LD1W_4Z_PSEUDO);
5531 else if (Subtarget->hasSVE2p1())
5532 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 2, Opc_ri: AArch64::LD1W_4Z_IMM,
5533 Opc_rr: AArch64::LD1W_4Z);
5534 else
5535 break;
5536 return;
5537 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5538 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5539 SelectContiguousMultiVectorLoad(
5540 N: Node, NumVecs: 4, Scale: 3, Opc_ri: AArch64::LD1D_4Z_IMM_PSEUDO, Opc_rr: AArch64::LD1D_4Z_PSEUDO);
5541 else if (Subtarget->hasSVE2p1())
5542 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 3, Opc_ri: AArch64::LD1D_4Z_IMM,
5543 Opc_rr: AArch64::LD1D_4Z);
5544 else
5545 break;
5546 return;
5547 }
5548 break;
5549 }
5550 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5551 if (VT == MVT::nxv16i8) {
5552 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5553 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 0,
5554 Opc_ri: AArch64::LDNT1B_2Z_IMM_PSEUDO,
5555 Opc_rr: AArch64::LDNT1B_2Z_PSEUDO);
5556 else if (Subtarget->hasSVE2p1())
5557 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 0, Opc_ri: AArch64::LDNT1B_2Z_IMM,
5558 Opc_rr: AArch64::LDNT1B_2Z);
5559 else
5560 break;
5561 return;
5562 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5563 VT == MVT::nxv8bf16) {
5564 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5565 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 1,
5566 Opc_ri: AArch64::LDNT1H_2Z_IMM_PSEUDO,
5567 Opc_rr: AArch64::LDNT1H_2Z_PSEUDO);
5568 else if (Subtarget->hasSVE2p1())
5569 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 1, Opc_ri: AArch64::LDNT1H_2Z_IMM,
5570 Opc_rr: AArch64::LDNT1H_2Z);
5571 else
5572 break;
5573 return;
5574 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5575 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5576 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 2,
5577 Opc_ri: AArch64::LDNT1W_2Z_IMM_PSEUDO,
5578 Opc_rr: AArch64::LDNT1W_2Z_PSEUDO);
5579 else if (Subtarget->hasSVE2p1())
5580 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 2, Opc_ri: AArch64::LDNT1W_2Z_IMM,
5581 Opc_rr: AArch64::LDNT1W_2Z);
5582 else
5583 break;
5584 return;
5585 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5586 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5587 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 3,
5588 Opc_ri: AArch64::LDNT1D_2Z_IMM_PSEUDO,
5589 Opc_rr: AArch64::LDNT1D_2Z_PSEUDO);
5590 else if (Subtarget->hasSVE2p1())
5591 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 3, Opc_ri: AArch64::LDNT1D_2Z_IMM,
5592 Opc_rr: AArch64::LDNT1D_2Z);
5593 else
5594 break;
5595 return;
5596 }
5597 break;
5598 }
5599 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5600 if (VT == MVT::nxv16i8) {
5601 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5602 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 0,
5603 Opc_ri: AArch64::LDNT1B_4Z_IMM_PSEUDO,
5604 Opc_rr: AArch64::LDNT1B_4Z_PSEUDO);
5605 else if (Subtarget->hasSVE2p1())
5606 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 0, Opc_ri: AArch64::LDNT1B_4Z_IMM,
5607 Opc_rr: AArch64::LDNT1B_4Z);
5608 else
5609 break;
5610 return;
5611 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5612 VT == MVT::nxv8bf16) {
5613 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5614 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 1,
5615 Opc_ri: AArch64::LDNT1H_4Z_IMM_PSEUDO,
5616 Opc_rr: AArch64::LDNT1H_4Z_PSEUDO);
5617 else if (Subtarget->hasSVE2p1())
5618 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 1, Opc_ri: AArch64::LDNT1H_4Z_IMM,
5619 Opc_rr: AArch64::LDNT1H_4Z);
5620 else
5621 break;
5622 return;
5623 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5624 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5625 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 2,
5626 Opc_ri: AArch64::LDNT1W_4Z_IMM_PSEUDO,
5627 Opc_rr: AArch64::LDNT1W_4Z_PSEUDO);
5628 else if (Subtarget->hasSVE2p1())
5629 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 2, Opc_ri: AArch64::LDNT1W_4Z_IMM,
5630 Opc_rr: AArch64::LDNT1W_4Z);
5631 else
5632 break;
5633 return;
5634 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5635 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5636 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 3,
5637 Opc_ri: AArch64::LDNT1D_4Z_IMM_PSEUDO,
5638 Opc_rr: AArch64::LDNT1D_4Z_PSEUDO);
5639 else if (Subtarget->hasSVE2p1())
5640 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 3, Opc_ri: AArch64::LDNT1D_4Z_IMM,
5641 Opc_rr: AArch64::LDNT1D_4Z);
5642 else
5643 break;
5644 return;
5645 }
5646 break;
5647 }
5648 case Intrinsic::aarch64_sve_ld3_sret: {
5649 if (VT == MVT::nxv16i8) {
5650 SelectPredicatedLoad(N: Node, NumVecs: 3, Scale: 0, Opc_ri: AArch64::LD3B_IMM, Opc_rr: AArch64::LD3B,
5651 IsIntr: true);
5652 return;
5653 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5654 VT == MVT::nxv8bf16) {
5655 SelectPredicatedLoad(N: Node, NumVecs: 3, Scale: 1, Opc_ri: AArch64::LD3H_IMM, Opc_rr: AArch64::LD3H,
5656 IsIntr: true);
5657 return;
5658 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5659 SelectPredicatedLoad(N: Node, NumVecs: 3, Scale: 2, Opc_ri: AArch64::LD3W_IMM, Opc_rr: AArch64::LD3W,
5660 IsIntr: true);
5661 return;
5662 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5663 SelectPredicatedLoad(N: Node, NumVecs: 3, Scale: 3, Opc_ri: AArch64::LD3D_IMM, Opc_rr: AArch64::LD3D,
5664 IsIntr: true);
5665 return;
5666 }
5667 break;
5668 }
5669 case Intrinsic::aarch64_sve_ld4_sret: {
5670 if (VT == MVT::nxv16i8) {
5671 SelectPredicatedLoad(N: Node, NumVecs: 4, Scale: 0, Opc_ri: AArch64::LD4B_IMM, Opc_rr: AArch64::LD4B,
5672 IsIntr: true);
5673 return;
5674 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5675 VT == MVT::nxv8bf16) {
5676 SelectPredicatedLoad(N: Node, NumVecs: 4, Scale: 1, Opc_ri: AArch64::LD4H_IMM, Opc_rr: AArch64::LD4H,
5677 IsIntr: true);
5678 return;
5679 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5680 SelectPredicatedLoad(N: Node, NumVecs: 4, Scale: 2, Opc_ri: AArch64::LD4W_IMM, Opc_rr: AArch64::LD4W,
5681 IsIntr: true);
5682 return;
5683 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5684 SelectPredicatedLoad(N: Node, NumVecs: 4, Scale: 3, Opc_ri: AArch64::LD4D_IMM, Opc_rr: AArch64::LD4D,
5685 IsIntr: true);
5686 return;
5687 }
5688 break;
5689 }
5690 case Intrinsic::aarch64_sme_read_hor_vg2: {
5691 if (VT == MVT::nxv16i8) {
5692 SelectMultiVectorMove<14, 2>(N: Node, NumVecs: 2, BaseReg: AArch64::ZAB0,
5693 Op: AArch64::MOVA_2ZMXI_H_B);
5694 return;
5695 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5696 VT == MVT::nxv8bf16) {
5697 SelectMultiVectorMove<6, 2>(N: Node, NumVecs: 2, BaseReg: AArch64::ZAH0,
5698 Op: AArch64::MOVA_2ZMXI_H_H);
5699 return;
5700 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5701 SelectMultiVectorMove<2, 2>(N: Node, NumVecs: 2, BaseReg: AArch64::ZAS0,
5702 Op: AArch64::MOVA_2ZMXI_H_S);
5703 return;
5704 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5705 SelectMultiVectorMove<0, 2>(N: Node, NumVecs: 2, BaseReg: AArch64::ZAD0,
5706 Op: AArch64::MOVA_2ZMXI_H_D);
5707 return;
5708 }
5709 break;
5710 }
5711 case Intrinsic::aarch64_sme_read_ver_vg2: {
5712 if (VT == MVT::nxv16i8) {
5713 SelectMultiVectorMove<14, 2>(N: Node, NumVecs: 2, BaseReg: AArch64::ZAB0,
5714 Op: AArch64::MOVA_2ZMXI_V_B);
5715 return;
5716 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5717 VT == MVT::nxv8bf16) {
5718 SelectMultiVectorMove<6, 2>(N: Node, NumVecs: 2, BaseReg: AArch64::ZAH0,
5719 Op: AArch64::MOVA_2ZMXI_V_H);
5720 return;
5721 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5722 SelectMultiVectorMove<2, 2>(N: Node, NumVecs: 2, BaseReg: AArch64::ZAS0,
5723 Op: AArch64::MOVA_2ZMXI_V_S);
5724 return;
5725 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5726 SelectMultiVectorMove<0, 2>(N: Node, NumVecs: 2, BaseReg: AArch64::ZAD0,
5727 Op: AArch64::MOVA_2ZMXI_V_D);
5728 return;
5729 }
5730 break;
5731 }
5732 case Intrinsic::aarch64_sme_read_hor_vg4: {
5733 if (VT == MVT::nxv16i8) {
5734 SelectMultiVectorMove<12, 4>(N: Node, NumVecs: 4, BaseReg: AArch64::ZAB0,
5735 Op: AArch64::MOVA_4ZMXI_H_B);
5736 return;
5737 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5738 VT == MVT::nxv8bf16) {
5739 SelectMultiVectorMove<4, 4>(N: Node, NumVecs: 4, BaseReg: AArch64::ZAH0,
5740 Op: AArch64::MOVA_4ZMXI_H_H);
5741 return;
5742 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5743 SelectMultiVectorMove<0, 2>(N: Node, NumVecs: 4, BaseReg: AArch64::ZAS0,
5744 Op: AArch64::MOVA_4ZMXI_H_S);
5745 return;
5746 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5747 SelectMultiVectorMove<0, 2>(N: Node, NumVecs: 4, BaseReg: AArch64::ZAD0,
5748 Op: AArch64::MOVA_4ZMXI_H_D);
5749 return;
5750 }
5751 break;
5752 }
5753 case Intrinsic::aarch64_sme_read_ver_vg4: {
5754 if (VT == MVT::nxv16i8) {
5755 SelectMultiVectorMove<12, 4>(N: Node, NumVecs: 4, BaseReg: AArch64::ZAB0,
5756 Op: AArch64::MOVA_4ZMXI_V_B);
5757 return;
5758 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5759 VT == MVT::nxv8bf16) {
5760 SelectMultiVectorMove<4, 4>(N: Node, NumVecs: 4, BaseReg: AArch64::ZAH0,
5761 Op: AArch64::MOVA_4ZMXI_V_H);
5762 return;
5763 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5764 SelectMultiVectorMove<0, 4>(N: Node, NumVecs: 4, BaseReg: AArch64::ZAS0,
5765 Op: AArch64::MOVA_4ZMXI_V_S);
5766 return;
5767 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5768 SelectMultiVectorMove<0, 4>(N: Node, NumVecs: 4, BaseReg: AArch64::ZAD0,
5769 Op: AArch64::MOVA_4ZMXI_V_D);
5770 return;
5771 }
5772 break;
5773 }
5774 case Intrinsic::aarch64_sme_read_vg1x2: {
5775 SelectMultiVectorMove<7, 1>(N: Node, NumVecs: 2, BaseReg: AArch64::ZA,
5776 Op: AArch64::MOVA_VG2_2ZMXI);
5777 return;
5778 }
5779 case Intrinsic::aarch64_sme_read_vg1x4: {
5780 SelectMultiVectorMove<7, 1>(N: Node, NumVecs: 4, BaseReg: AArch64::ZA,
5781 Op: AArch64::MOVA_VG4_4ZMXI);
5782 return;
5783 }
5784 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5785 if (VT == MVT::nxv16i8) {
5786 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_2ZMI_H_B_PSEUDO, MaxIdx: 14, Scale: 2);
5787 return;
5788 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5789 VT == MVT::nxv8bf16) {
5790 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_2ZMI_H_H_PSEUDO, MaxIdx: 6, Scale: 2);
5791 return;
5792 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5793 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_2ZMI_H_S_PSEUDO, MaxIdx: 2, Scale: 2);
5794 return;
5795 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5796 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_2ZMI_H_D_PSEUDO, MaxIdx: 0, Scale: 2);
5797 return;
5798 }
5799 break;
5800 }
5801 case Intrinsic::aarch64_sme_readz_vert_x2: {
5802 if (VT == MVT::nxv16i8) {
5803 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_2ZMI_V_B_PSEUDO, MaxIdx: 14, Scale: 2);
5804 return;
5805 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5806 VT == MVT::nxv8bf16) {
5807 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_2ZMI_V_H_PSEUDO, MaxIdx: 6, Scale: 2);
5808 return;
5809 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5810 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_2ZMI_V_S_PSEUDO, MaxIdx: 2, Scale: 2);
5811 return;
5812 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5813 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_2ZMI_V_D_PSEUDO, MaxIdx: 0, Scale: 2);
5814 return;
5815 }
5816 break;
5817 }
5818 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5819 if (VT == MVT::nxv16i8) {
5820 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_4ZMI_H_B_PSEUDO, MaxIdx: 12, Scale: 4);
5821 return;
5822 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5823 VT == MVT::nxv8bf16) {
5824 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_4ZMI_H_H_PSEUDO, MaxIdx: 4, Scale: 4);
5825 return;
5826 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5827 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_4ZMI_H_S_PSEUDO, MaxIdx: 0, Scale: 4);
5828 return;
5829 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5830 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_4ZMI_H_D_PSEUDO, MaxIdx: 0, Scale: 4);
5831 return;
5832 }
5833 break;
5834 }
5835 case Intrinsic::aarch64_sme_readz_vert_x4: {
5836 if (VT == MVT::nxv16i8) {
5837 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_4ZMI_V_B_PSEUDO, MaxIdx: 12, Scale: 4);
5838 return;
5839 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5840 VT == MVT::nxv8bf16) {
5841 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_4ZMI_V_H_PSEUDO, MaxIdx: 4, Scale: 4);
5842 return;
5843 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5844 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_4ZMI_V_S_PSEUDO, MaxIdx: 0, Scale: 4);
5845 return;
5846 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5847 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_4ZMI_V_D_PSEUDO, MaxIdx: 0, Scale: 4);
5848 return;
5849 }
5850 break;
5851 }
5852 case Intrinsic::aarch64_sme_readz_x2: {
5853 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, MaxIdx: 7, Scale: 1,
5854 BaseReg: AArch64::ZA);
5855 return;
5856 }
5857 case Intrinsic::aarch64_sme_readz_x4: {
5858 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, MaxIdx: 7, Scale: 1,
5859 BaseReg: AArch64::ZA);
5860 return;
5861 }
5862 case Intrinsic::swift_async_context_addr: {
5863 SDLoc DL(Node);
5864 SDValue Chain = Node->getOperand(Num: 0);
5865 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, dl: DL, Reg: AArch64::FP, VT: MVT::i64);
5866 SDValue Res = SDValue(
5867 CurDAG->getMachineNode(Opcode: AArch64::SUBXri, dl: DL, VT: MVT::i64, Op1: CopyFP,
5868 Op2: CurDAG->getTargetConstant(Val: 8, DL, VT: MVT::i32),
5869 Op3: CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i32)),
5870 0);
5871 ReplaceUses(F: SDValue(Node, 0), T: Res);
5872 ReplaceUses(F: SDValue(Node, 1), T: CopyFP.getValue(R: 1));
5873 CurDAG->RemoveDeadNode(N: Node);
5874
5875 auto &MF = CurDAG->getMachineFunction();
5876 MF.getFrameInfo().setFrameAddressIsTaken(true);
5877 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5878 return;
5879 }
5880 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5881 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5882 VT: Node->getValueType(ResNo: 0),
5883 Opcodes: {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5884 AArch64::LUTI2_4ZTZI_S}))
5885 // Second Immediate must be <= 3:
5886 SelectMultiVectorLutiLane(Node, NumOutVecs: 4, Opc, MaxImm: 3);
5887 return;
5888 }
5889 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5890 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5891 VT: Node->getValueType(ResNo: 0),
5892 Opcodes: {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5893 // Second Immediate must be <= 1:
5894 SelectMultiVectorLutiLane(Node, NumOutVecs: 4, Opc, MaxImm: 1);
5895 return;
5896 }
5897 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5898 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5899 VT: Node->getValueType(ResNo: 0),
5900 Opcodes: {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5901 AArch64::LUTI2_2ZTZI_S}))
5902 // Second Immediate must be <= 7:
5903 SelectMultiVectorLutiLane(Node, NumOutVecs: 2, Opc, MaxImm: 7);
5904 return;
5905 }
5906 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5907 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5908 VT: Node->getValueType(ResNo: 0),
5909 Opcodes: {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5910 AArch64::LUTI4_2ZTZI_S}))
5911 // Second Immediate must be <= 3:
5912 SelectMultiVectorLutiLane(Node, NumOutVecs: 2, Opc, MaxImm: 3);
5913 return;
5914 }
5915 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5916 SelectMultiVectorLuti(Node, NumOutVecs: 4, Opc: AArch64::LUTI4_4ZZT2Z);
5917 return;
5918 }
5919 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5920 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5921 VT: Node->getValueType(ResNo: 0),
5922 Opcodes: {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5923 SelectCVTIntrinsicFP8(N: Node, NumVecs: 2, Opcode: Opc);
5924 return;
5925 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5926 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5927 VT: Node->getValueType(ResNo: 0),
5928 Opcodes: {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5929 SelectCVTIntrinsicFP8(N: Node, NumVecs: 2, Opcode: Opc);
5930 return;
5931 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5932 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5933 VT: Node->getValueType(ResNo: 0),
5934 Opcodes: {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5935 SelectCVTIntrinsicFP8(N: Node, NumVecs: 2, Opcode: Opc);
5936 return;
5937 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5938 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5939 VT: Node->getValueType(ResNo: 0),
5940 Opcodes: {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5941 SelectCVTIntrinsicFP8(N: Node, NumVecs: 2, Opcode: Opc);
5942 return;
5943 case Intrinsic::ptrauth_resign_load_relative:
5944 SelectPtrauthResign(N: Node);
5945 return;
5946 }
5947 } break;
5948 case ISD::INTRINSIC_WO_CHAIN: {
5949 unsigned IntNo = Node->getConstantOperandVal(Num: 0);
5950 switch (IntNo) {
5951 default:
5952 break;
5953 case Intrinsic::aarch64_tagp:
5954 SelectTagP(N: Node);
5955 return;
5956
5957 case Intrinsic::ptrauth_auth:
5958 SelectPtrauthAuth(N: Node);
5959 return;
5960
5961 case Intrinsic::ptrauth_resign:
5962 SelectPtrauthResign(N: Node);
5963 return;
5964
5965 case Intrinsic::aarch64_neon_tbl2:
5966 SelectTable(N: Node, NumVecs: 2,
5967 Opc: VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5968 isExt: false);
5969 return;
5970 case Intrinsic::aarch64_neon_tbl3:
5971 SelectTable(N: Node, NumVecs: 3, Opc: VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5972 : AArch64::TBLv16i8Three,
5973 isExt: false);
5974 return;
5975 case Intrinsic::aarch64_neon_tbl4:
5976 SelectTable(N: Node, NumVecs: 4, Opc: VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5977 : AArch64::TBLv16i8Four,
5978 isExt: false);
5979 return;
5980 case Intrinsic::aarch64_neon_tbx2:
5981 SelectTable(N: Node, NumVecs: 2,
5982 Opc: VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5983 isExt: true);
5984 return;
5985 case Intrinsic::aarch64_neon_tbx3:
5986 SelectTable(N: Node, NumVecs: 3, Opc: VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5987 : AArch64::TBXv16i8Three,
5988 isExt: true);
5989 return;
5990 case Intrinsic::aarch64_neon_tbx4:
5991 SelectTable(N: Node, NumVecs: 4, Opc: VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5992 : AArch64::TBXv16i8Four,
5993 isExt: true);
5994 return;
5995 case Intrinsic::aarch64_sve_srshl_single_x2:
5996 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5997 VT: Node->getValueType(ResNo: 0),
5998 Opcodes: {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5999 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
6000 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6001 return;
6002 case Intrinsic::aarch64_sve_srshl_single_x4:
6003 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6004 VT: Node->getValueType(ResNo: 0),
6005 Opcodes: {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
6006 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
6007 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6008 return;
6009 case Intrinsic::aarch64_sve_urshl_single_x2:
6010 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6011 VT: Node->getValueType(ResNo: 0),
6012 Opcodes: {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
6013 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
6014 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6015 return;
6016 case Intrinsic::aarch64_sve_urshl_single_x4:
6017 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6018 VT: Node->getValueType(ResNo: 0),
6019 Opcodes: {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
6020 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
6021 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6022 return;
6023 case Intrinsic::aarch64_sve_srshl_x2:
6024 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6025 VT: Node->getValueType(ResNo: 0),
6026 Opcodes: {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
6027 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
6028 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6029 return;
6030 case Intrinsic::aarch64_sve_srshl_x4:
6031 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6032 VT: Node->getValueType(ResNo: 0),
6033 Opcodes: {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
6034 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
6035 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6036 return;
6037 case Intrinsic::aarch64_sve_urshl_x2:
6038 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6039 VT: Node->getValueType(ResNo: 0),
6040 Opcodes: {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
6041 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
6042 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6043 return;
6044 case Intrinsic::aarch64_sve_urshl_x4:
6045 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6046 VT: Node->getValueType(ResNo: 0),
6047 Opcodes: {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
6048 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
6049 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6050 return;
6051 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
6052 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6053 VT: Node->getValueType(ResNo: 0),
6054 Opcodes: {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
6055 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
6056 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6057 return;
6058 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
6059 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6060 VT: Node->getValueType(ResNo: 0),
6061 Opcodes: {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
6062 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
6063 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6064 return;
6065 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
6066 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6067 VT: Node->getValueType(ResNo: 0),
6068 Opcodes: {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
6069 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
6070 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6071 return;
6072 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
6073 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6074 VT: Node->getValueType(ResNo: 0),
6075 Opcodes: {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
6076 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
6077 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6078 return;
6079 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
6080 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6081 VT: Node->getValueType(ResNo: 0),
6082 Opcodes: {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
6083 AArch64::FSCALE_2ZZ_D}))
6084 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6085 return;
6086 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
6087 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6088 VT: Node->getValueType(ResNo: 0),
6089 Opcodes: {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
6090 AArch64::FSCALE_4ZZ_D}))
6091 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6092 return;
6093 case Intrinsic::aarch64_sme_fp8_scale_x2:
6094 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6095 VT: Node->getValueType(ResNo: 0),
6096 Opcodes: {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
6097 AArch64::FSCALE_2Z2Z_D}))
6098 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6099 return;
6100 case Intrinsic::aarch64_sme_fp8_scale_x4:
6101 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6102 VT: Node->getValueType(ResNo: 0),
6103 Opcodes: {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
6104 AArch64::FSCALE_4Z4Z_D}))
6105 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6106 return;
6107 case Intrinsic::aarch64_sve_whilege_x2:
6108 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6109 VT: Node->getValueType(ResNo: 0),
6110 Opcodes: {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
6111 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
6112 SelectWhilePair(N: Node, Opc: Op);
6113 return;
6114 case Intrinsic::aarch64_sve_whilegt_x2:
6115 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6116 VT: Node->getValueType(ResNo: 0),
6117 Opcodes: {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
6118 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
6119 SelectWhilePair(N: Node, Opc: Op);
6120 return;
6121 case Intrinsic::aarch64_sve_whilehi_x2:
6122 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6123 VT: Node->getValueType(ResNo: 0),
6124 Opcodes: {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
6125 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
6126 SelectWhilePair(N: Node, Opc: Op);
6127 return;
6128 case Intrinsic::aarch64_sve_whilehs_x2:
6129 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6130 VT: Node->getValueType(ResNo: 0),
6131 Opcodes: {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
6132 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
6133 SelectWhilePair(N: Node, Opc: Op);
6134 return;
6135 case Intrinsic::aarch64_sve_whilele_x2:
6136 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6137 VT: Node->getValueType(ResNo: 0),
6138 Opcodes: {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
6139 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
6140 SelectWhilePair(N: Node, Opc: Op);
6141 return;
6142 case Intrinsic::aarch64_sve_whilelo_x2:
6143 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6144 VT: Node->getValueType(ResNo: 0),
6145 Opcodes: {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
6146 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
6147 SelectWhilePair(N: Node, Opc: Op);
6148 return;
6149 case Intrinsic::aarch64_sve_whilels_x2:
6150 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6151 VT: Node->getValueType(ResNo: 0),
6152 Opcodes: {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
6153 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
6154 SelectWhilePair(N: Node, Opc: Op);
6155 return;
6156 case Intrinsic::aarch64_sve_whilelt_x2:
6157 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6158 VT: Node->getValueType(ResNo: 0),
6159 Opcodes: {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
6160 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
6161 SelectWhilePair(N: Node, Opc: Op);
6162 return;
6163 case Intrinsic::aarch64_sve_smax_single_x2:
6164 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6165 VT: Node->getValueType(ResNo: 0),
6166 Opcodes: {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6167 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6168 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6169 return;
6170 case Intrinsic::aarch64_sve_umax_single_x2:
6171 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6172 VT: Node->getValueType(ResNo: 0),
6173 Opcodes: {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6174 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6175 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6176 return;
6177 case Intrinsic::aarch64_sve_fmax_single_x2:
6178 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6179 VT: Node->getValueType(ResNo: 0),
6180 Opcodes: {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6181 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6182 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6183 return;
6184 case Intrinsic::aarch64_sve_smax_single_x4:
6185 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6186 VT: Node->getValueType(ResNo: 0),
6187 Opcodes: {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6188 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6189 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6190 return;
6191 case Intrinsic::aarch64_sve_umax_single_x4:
6192 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6193 VT: Node->getValueType(ResNo: 0),
6194 Opcodes: {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6195 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6196 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6197 return;
6198 case Intrinsic::aarch64_sve_fmax_single_x4:
6199 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6200 VT: Node->getValueType(ResNo: 0),
6201 Opcodes: {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6202 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6203 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6204 return;
6205 case Intrinsic::aarch64_sve_smin_single_x2:
6206 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6207 VT: Node->getValueType(ResNo: 0),
6208 Opcodes: {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6209 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6210 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6211 return;
6212 case Intrinsic::aarch64_sve_umin_single_x2:
6213 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6214 VT: Node->getValueType(ResNo: 0),
6215 Opcodes: {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6216 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6217 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6218 return;
6219 case Intrinsic::aarch64_sve_fmin_single_x2:
6220 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6221 VT: Node->getValueType(ResNo: 0),
6222 Opcodes: {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6223 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6224 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6225 return;
6226 case Intrinsic::aarch64_sve_smin_single_x4:
6227 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6228 VT: Node->getValueType(ResNo: 0),
6229 Opcodes: {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6230 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6231 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6232 return;
6233 case Intrinsic::aarch64_sve_umin_single_x4:
6234 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6235 VT: Node->getValueType(ResNo: 0),
6236 Opcodes: {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6237 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6238 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6239 return;
6240 case Intrinsic::aarch64_sve_fmin_single_x4:
6241 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6242 VT: Node->getValueType(ResNo: 0),
6243 Opcodes: {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6244 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6245 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6246 return;
6247 case Intrinsic::aarch64_sve_smax_x2:
6248 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6249 VT: Node->getValueType(ResNo: 0),
6250 Opcodes: {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6251 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6252 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6253 return;
6254 case Intrinsic::aarch64_sve_umax_x2:
6255 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6256 VT: Node->getValueType(ResNo: 0),
6257 Opcodes: {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6258 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6259 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6260 return;
6261 case Intrinsic::aarch64_sve_fmax_x2:
6262 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6263 VT: Node->getValueType(ResNo: 0),
6264 Opcodes: {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6265 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6266 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6267 return;
6268 case Intrinsic::aarch64_sve_smax_x4:
6269 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6270 VT: Node->getValueType(ResNo: 0),
6271 Opcodes: {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6272 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6273 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6274 return;
6275 case Intrinsic::aarch64_sve_umax_x4:
6276 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6277 VT: Node->getValueType(ResNo: 0),
6278 Opcodes: {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6279 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6280 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6281 return;
6282 case Intrinsic::aarch64_sve_fmax_x4:
6283 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6284 VT: Node->getValueType(ResNo: 0),
6285 Opcodes: {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6286 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6287 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6288 return;
6289 case Intrinsic::aarch64_sme_famax_x2:
6290 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6291 VT: Node->getValueType(ResNo: 0),
6292 Opcodes: {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6293 AArch64::FAMAX_2Z2Z_D}))
6294 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6295 return;
6296 case Intrinsic::aarch64_sme_famax_x4:
6297 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6298 VT: Node->getValueType(ResNo: 0),
6299 Opcodes: {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6300 AArch64::FAMAX_4Z4Z_D}))
6301 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6302 return;
6303 case Intrinsic::aarch64_sme_famin_x2:
6304 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6305 VT: Node->getValueType(ResNo: 0),
6306 Opcodes: {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6307 AArch64::FAMIN_2Z2Z_D}))
6308 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6309 return;
6310 case Intrinsic::aarch64_sme_famin_x4:
6311 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6312 VT: Node->getValueType(ResNo: 0),
6313 Opcodes: {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6314 AArch64::FAMIN_4Z4Z_D}))
6315 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6316 return;
6317 case Intrinsic::aarch64_sve_smin_x2:
6318 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6319 VT: Node->getValueType(ResNo: 0),
6320 Opcodes: {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6321 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6322 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6323 return;
6324 case Intrinsic::aarch64_sve_umin_x2:
6325 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6326 VT: Node->getValueType(ResNo: 0),
6327 Opcodes: {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6328 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6329 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6330 return;
6331 case Intrinsic::aarch64_sve_fmin_x2:
6332 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6333 VT: Node->getValueType(ResNo: 0),
6334 Opcodes: {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6335 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6336 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6337 return;
6338 case Intrinsic::aarch64_sve_smin_x4:
6339 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6340 VT: Node->getValueType(ResNo: 0),
6341 Opcodes: {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6342 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6343 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6344 return;
6345 case Intrinsic::aarch64_sve_umin_x4:
6346 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6347 VT: Node->getValueType(ResNo: 0),
6348 Opcodes: {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6349 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6350 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6351 return;
6352 case Intrinsic::aarch64_sve_fmin_x4:
6353 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6354 VT: Node->getValueType(ResNo: 0),
6355 Opcodes: {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6356 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6357 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6358 return;
6359 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6360 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6361 VT: Node->getValueType(ResNo: 0),
6362 Opcodes: {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6363 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6364 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6365 return;
6366 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6367 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6368 VT: Node->getValueType(ResNo: 0),
6369 Opcodes: {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6370 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6371 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6372 return;
6373 case Intrinsic::aarch64_sve_fminnm_single_x2:
6374 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6375 VT: Node->getValueType(ResNo: 0),
6376 Opcodes: {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6377 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6378 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6379 return;
6380 case Intrinsic::aarch64_sve_fminnm_single_x4:
6381 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6382 VT: Node->getValueType(ResNo: 0),
6383 Opcodes: {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6384 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6385 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6386 return;
6387 case Intrinsic::aarch64_sve_fscale_single_x4:
6388 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: AArch64::BFSCALE_4ZZ);
6389 return;
6390 case Intrinsic::aarch64_sve_fscale_single_x2:
6391 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: AArch64::BFSCALE_2ZZ);
6392 return;
6393 case Intrinsic::aarch64_sve_fmul_single_x4:
6394 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6395 VT: Node->getValueType(ResNo: 0),
6396 Opcodes: {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S,
6397 AArch64::FMUL_4ZZ_D}))
6398 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6399 return;
6400 case Intrinsic::aarch64_sve_fmul_single_x2:
6401 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6402 VT: Node->getValueType(ResNo: 0),
6403 Opcodes: {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S,
6404 AArch64::FMUL_2ZZ_D}))
6405 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6406 return;
6407 case Intrinsic::aarch64_sve_fmaxnm_x2:
6408 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6409 VT: Node->getValueType(ResNo: 0),
6410 Opcodes: {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6411 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6412 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6413 return;
6414 case Intrinsic::aarch64_sve_fmaxnm_x4:
6415 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6416 VT: Node->getValueType(ResNo: 0),
6417 Opcodes: {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6418 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6419 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6420 return;
6421 case Intrinsic::aarch64_sve_fminnm_x2:
6422 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6423 VT: Node->getValueType(ResNo: 0),
6424 Opcodes: {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6425 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6426 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6427 return;
6428 case Intrinsic::aarch64_sve_fminnm_x4:
6429 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6430 VT: Node->getValueType(ResNo: 0),
6431 Opcodes: {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6432 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6433 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6434 return;
6435 case Intrinsic::aarch64_sve_aese_lane_x2:
6436 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: AArch64::AESE_2ZZI_B);
6437 return;
6438 case Intrinsic::aarch64_sve_aesd_lane_x2:
6439 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: AArch64::AESD_2ZZI_B);
6440 return;
6441 case Intrinsic::aarch64_sve_aesemc_lane_x2:
6442 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: AArch64::AESEMC_2ZZI_B);
6443 return;
6444 case Intrinsic::aarch64_sve_aesdimc_lane_x2:
6445 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: AArch64::AESDIMC_2ZZI_B);
6446 return;
6447 case Intrinsic::aarch64_sve_aese_lane_x4:
6448 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: AArch64::AESE_4ZZI_B);
6449 return;
6450 case Intrinsic::aarch64_sve_aesd_lane_x4:
6451 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: AArch64::AESD_4ZZI_B);
6452 return;
6453 case Intrinsic::aarch64_sve_aesemc_lane_x4:
6454 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: AArch64::AESEMC_4ZZI_B);
6455 return;
6456 case Intrinsic::aarch64_sve_aesdimc_lane_x4:
6457 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: AArch64::AESDIMC_4ZZI_B);
6458 return;
6459 case Intrinsic::aarch64_sve_pmlal_pair_x2:
6460 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: AArch64::PMLAL_2ZZZ_Q);
6461 return;
6462 case Intrinsic::aarch64_sve_pmull_pair_x2: {
6463 SDLoc DL(Node);
6464 SmallVector<SDValue, 4> Regs(Node->ops().slice(N: 1, M: 2));
6465 SDNode *Res =
6466 CurDAG->getMachineNode(Opcode: AArch64::PMULL_2ZZZ_Q, dl: DL, VT: MVT::Untyped, Ops: Regs);
6467 SDValue SuperReg = SDValue(Res, 0);
6468 for (unsigned I = 0; I < 2; I++)
6469 ReplaceUses(F: SDValue(Node, I),
6470 T: CurDAG->getTargetExtractSubreg(SRIdx: AArch64::zsub0 + I, DL, VT,
6471 Operand: SuperReg));
6472 CurDAG->RemoveDeadNode(N: Node);
6473 return;
6474 }
6475 case Intrinsic::aarch64_sve_fscale_x4:
6476 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: AArch64::BFSCALE_4Z4Z);
6477 return;
6478 case Intrinsic::aarch64_sve_fscale_x2:
6479 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: AArch64::BFSCALE_2Z2Z);
6480 return;
6481 case Intrinsic::aarch64_sve_fmul_x4:
6482 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6483 VT: Node->getValueType(ResNo: 0),
6484 Opcodes: {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S,
6485 AArch64::FMUL_4Z4Z_D}))
6486 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6487 return;
6488 case Intrinsic::aarch64_sve_fmul_x2:
6489 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6490 VT: Node->getValueType(ResNo: 0),
6491 Opcodes: {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S,
6492 AArch64::FMUL_2Z2Z_D}))
6493 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6494 return;
6495 case Intrinsic::aarch64_sve_fcvtzs_x2:
6496 SelectCVTIntrinsic(N: Node, NumVecs: 2, Opcode: AArch64::FCVTZS_2Z2Z_StoS);
6497 return;
6498 case Intrinsic::aarch64_sve_scvtf_x2:
6499 SelectCVTIntrinsic(N: Node, NumVecs: 2, Opcode: AArch64::SCVTF_2Z2Z_StoS);
6500 return;
6501 case Intrinsic::aarch64_sve_fcvtzu_x2:
6502 SelectCVTIntrinsic(N: Node, NumVecs: 2, Opcode: AArch64::FCVTZU_2Z2Z_StoS);
6503 return;
6504 case Intrinsic::aarch64_sve_ucvtf_x2:
6505 SelectCVTIntrinsic(N: Node, NumVecs: 2, Opcode: AArch64::UCVTF_2Z2Z_StoS);
6506 return;
6507 case Intrinsic::aarch64_sve_fcvtzs_x4:
6508 SelectCVTIntrinsic(N: Node, NumVecs: 4, Opcode: AArch64::FCVTZS_4Z4Z_StoS);
6509 return;
6510 case Intrinsic::aarch64_sve_scvtf_x4:
6511 SelectCVTIntrinsic(N: Node, NumVecs: 4, Opcode: AArch64::SCVTF_4Z4Z_StoS);
6512 return;
6513 case Intrinsic::aarch64_sve_fcvtzu_x4:
6514 SelectCVTIntrinsic(N: Node, NumVecs: 4, Opcode: AArch64::FCVTZU_4Z4Z_StoS);
6515 return;
6516 case Intrinsic::aarch64_sve_ucvtf_x4:
6517 SelectCVTIntrinsic(N: Node, NumVecs: 4, Opcode: AArch64::UCVTF_4Z4Z_StoS);
6518 return;
6519 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6520 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, IsTupleInput: false, Opc: AArch64::FCVT_2ZZ_H_S);
6521 return;
6522 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6523 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, IsTupleInput: false, Opc: AArch64::FCVTL_2ZZ_H_S);
6524 return;
6525 case Intrinsic::aarch64_sve_sclamp_single_x2:
6526 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6527 VT: Node->getValueType(ResNo: 0),
6528 Opcodes: {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6529 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6530 SelectClamp(N: Node, NumVecs: 2, Op);
6531 return;
6532 case Intrinsic::aarch64_sve_uclamp_single_x2:
6533 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6534 VT: Node->getValueType(ResNo: 0),
6535 Opcodes: {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6536 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6537 SelectClamp(N: Node, NumVecs: 2, Op);
6538 return;
6539 case Intrinsic::aarch64_sve_fclamp_single_x2:
6540 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6541 VT: Node->getValueType(ResNo: 0),
6542 Opcodes: {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6543 AArch64::FCLAMP_VG2_2Z2Z_D}))
6544 SelectClamp(N: Node, NumVecs: 2, Op);
6545 return;
6546 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6547 SelectClamp(N: Node, NumVecs: 2, Op: AArch64::BFCLAMP_VG2_2ZZZ_H);
6548 return;
6549 case Intrinsic::aarch64_sve_sclamp_single_x4:
6550 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6551 VT: Node->getValueType(ResNo: 0),
6552 Opcodes: {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6553 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6554 SelectClamp(N: Node, NumVecs: 4, Op);
6555 return;
6556 case Intrinsic::aarch64_sve_uclamp_single_x4:
6557 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6558 VT: Node->getValueType(ResNo: 0),
6559 Opcodes: {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6560 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6561 SelectClamp(N: Node, NumVecs: 4, Op);
6562 return;
6563 case Intrinsic::aarch64_sve_fclamp_single_x4:
6564 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6565 VT: Node->getValueType(ResNo: 0),
6566 Opcodes: {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6567 AArch64::FCLAMP_VG4_4Z4Z_D}))
6568 SelectClamp(N: Node, NumVecs: 4, Op);
6569 return;
6570 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6571 SelectClamp(N: Node, NumVecs: 4, Op: AArch64::BFCLAMP_VG4_4ZZZ_H);
6572 return;
6573 case Intrinsic::aarch64_sve_add_single_x2:
6574 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6575 VT: Node->getValueType(ResNo: 0),
6576 Opcodes: {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6577 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6578 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6579 return;
6580 case Intrinsic::aarch64_sve_add_single_x4:
6581 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6582 VT: Node->getValueType(ResNo: 0),
6583 Opcodes: {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6584 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6585 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6586 return;
6587 case Intrinsic::aarch64_sve_zip_x2:
6588 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6589 VT: Node->getValueType(ResNo: 0),
6590 Opcodes: {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6591 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6592 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, /*IsTupleInput=*/false, Opc: Op);
6593 return;
6594 case Intrinsic::aarch64_sve_zipq_x2:
6595 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, /*IsTupleInput=*/false,
6596 Opc: AArch64::ZIP_VG2_2ZZZ_Q);
6597 return;
6598 case Intrinsic::aarch64_sve_zip_x4:
6599 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6600 VT: Node->getValueType(ResNo: 0),
6601 Opcodes: {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6602 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6603 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 4, /*IsTupleInput=*/true, Opc: Op);
6604 return;
6605 case Intrinsic::aarch64_sve_zipq_x4:
6606 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 4, /*IsTupleInput=*/true,
6607 Opc: AArch64::ZIP_VG4_4Z4Z_Q);
6608 return;
6609 case Intrinsic::aarch64_sve_uzp_x2:
6610 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6611 VT: Node->getValueType(ResNo: 0),
6612 Opcodes: {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6613 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6614 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, /*IsTupleInput=*/false, Opc: Op);
6615 return;
6616 case Intrinsic::aarch64_sve_uzpq_x2:
6617 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, /*IsTupleInput=*/false,
6618 Opc: AArch64::UZP_VG2_2ZZZ_Q);
6619 return;
6620 case Intrinsic::aarch64_sve_uzp_x4:
6621 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6622 VT: Node->getValueType(ResNo: 0),
6623 Opcodes: {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6624 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6625 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 4, /*IsTupleInput=*/true, Opc: Op);
6626 return;
6627 case Intrinsic::aarch64_sve_uzpq_x4:
6628 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 4, /*IsTupleInput=*/true,
6629 Opc: AArch64::UZP_VG4_4Z4Z_Q);
6630 return;
6631 case Intrinsic::aarch64_sve_sel_x2:
6632 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6633 VT: Node->getValueType(ResNo: 0),
6634 Opcodes: {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6635 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6636 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op, /*HasPred=*/true);
6637 return;
6638 case Intrinsic::aarch64_sve_sel_x4:
6639 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6640 VT: Node->getValueType(ResNo: 0),
6641 Opcodes: {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6642 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6643 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op, /*HasPred=*/true);
6644 return;
6645 case Intrinsic::aarch64_sve_frinta_x2:
6646 SelectFrintFromVT(N: Node, NumVecs: 2, Opcode: AArch64::FRINTA_2Z2Z_S);
6647 return;
6648 case Intrinsic::aarch64_sve_frinta_x4:
6649 SelectFrintFromVT(N: Node, NumVecs: 4, Opcode: AArch64::FRINTA_4Z4Z_S);
6650 return;
6651 case Intrinsic::aarch64_sve_frintm_x2:
6652 SelectFrintFromVT(N: Node, NumVecs: 2, Opcode: AArch64::FRINTM_2Z2Z_S);
6653 return;
6654 case Intrinsic::aarch64_sve_frintm_x4:
6655 SelectFrintFromVT(N: Node, NumVecs: 4, Opcode: AArch64::FRINTM_4Z4Z_S);
6656 return;
6657 case Intrinsic::aarch64_sve_frintn_x2:
6658 SelectFrintFromVT(N: Node, NumVecs: 2, Opcode: AArch64::FRINTN_2Z2Z_S);
6659 return;
6660 case Intrinsic::aarch64_sve_frintn_x4:
6661 SelectFrintFromVT(N: Node, NumVecs: 4, Opcode: AArch64::FRINTN_4Z4Z_S);
6662 return;
6663 case Intrinsic::aarch64_sve_frintp_x2:
6664 SelectFrintFromVT(N: Node, NumVecs: 2, Opcode: AArch64::FRINTP_2Z2Z_S);
6665 return;
6666 case Intrinsic::aarch64_sve_frintp_x4:
6667 SelectFrintFromVT(N: Node, NumVecs: 4, Opcode: AArch64::FRINTP_4Z4Z_S);
6668 return;
6669 case Intrinsic::aarch64_sve_sunpk_x2:
6670 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6671 VT: Node->getValueType(ResNo: 0),
6672 Opcodes: {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6673 AArch64::SUNPK_VG2_2ZZ_D}))
6674 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, /*IsTupleInput=*/false, Opc: Op);
6675 return;
6676 case Intrinsic::aarch64_sve_uunpk_x2:
6677 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6678 VT: Node->getValueType(ResNo: 0),
6679 Opcodes: {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6680 AArch64::UUNPK_VG2_2ZZ_D}))
6681 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, /*IsTupleInput=*/false, Opc: Op);
6682 return;
6683 case Intrinsic::aarch64_sve_sunpk_x4:
6684 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6685 VT: Node->getValueType(ResNo: 0),
6686 Opcodes: {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6687 AArch64::SUNPK_VG4_4Z2Z_D}))
6688 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 4, /*IsTupleInput=*/true, Opc: Op);
6689 return;
6690 case Intrinsic::aarch64_sve_uunpk_x4:
6691 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6692 VT: Node->getValueType(ResNo: 0),
6693 Opcodes: {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6694 AArch64::UUNPK_VG4_4Z2Z_D}))
6695 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 4, /*IsTupleInput=*/true, Opc: Op);
6696 return;
6697 case Intrinsic::aarch64_sve_pext_x2: {
6698 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6699 VT: Node->getValueType(ResNo: 0),
6700 Opcodes: {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6701 AArch64::PEXT_2PCI_D}))
6702 SelectPExtPair(N: Node, Opc: Op);
6703 return;
6704 }
6705 }
6706 break;
6707 }
6708 case ISD::INTRINSIC_VOID: {
6709 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
6710 if (Node->getNumOperands() >= 3)
6711 VT = Node->getOperand(Num: 2)->getValueType(ResNo: 0);
6712 switch (IntNo) {
6713 default:
6714 break;
6715 case Intrinsic::aarch64_neon_st1x2: {
6716 if (VT == MVT::v8i8) {
6717 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov8b);
6718 return;
6719 } else if (VT == MVT::v16i8) {
6720 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov16b);
6721 return;
6722 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6723 VT == MVT::v4bf16) {
6724 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov4h);
6725 return;
6726 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6727 VT == MVT::v8bf16) {
6728 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov8h);
6729 return;
6730 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6731 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov2s);
6732 return;
6733 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6734 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov4s);
6735 return;
6736 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6737 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov2d);
6738 return;
6739 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6740 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov1d);
6741 return;
6742 }
6743 break;
6744 }
6745 case Intrinsic::aarch64_neon_st1x3: {
6746 if (VT == MVT::v8i8) {
6747 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev8b);
6748 return;
6749 } else if (VT == MVT::v16i8) {
6750 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev16b);
6751 return;
6752 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6753 VT == MVT::v4bf16) {
6754 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev4h);
6755 return;
6756 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6757 VT == MVT::v8bf16) {
6758 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev8h);
6759 return;
6760 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6761 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev2s);
6762 return;
6763 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6764 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev4s);
6765 return;
6766 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6767 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev2d);
6768 return;
6769 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6770 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev1d);
6771 return;
6772 }
6773 break;
6774 }
6775 case Intrinsic::aarch64_neon_st1x4: {
6776 if (VT == MVT::v8i8) {
6777 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv8b);
6778 return;
6779 } else if (VT == MVT::v16i8) {
6780 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv16b);
6781 return;
6782 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6783 VT == MVT::v4bf16) {
6784 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv4h);
6785 return;
6786 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6787 VT == MVT::v8bf16) {
6788 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv8h);
6789 return;
6790 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6791 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv2s);
6792 return;
6793 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6794 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv4s);
6795 return;
6796 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6797 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv2d);
6798 return;
6799 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6800 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv1d);
6801 return;
6802 }
6803 break;
6804 }
6805 case Intrinsic::aarch64_neon_st2: {
6806 if (VT == MVT::v8i8) {
6807 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov8b);
6808 return;
6809 } else if (VT == MVT::v16i8) {
6810 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov16b);
6811 return;
6812 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6813 VT == MVT::v4bf16) {
6814 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov4h);
6815 return;
6816 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6817 VT == MVT::v8bf16) {
6818 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov8h);
6819 return;
6820 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6821 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov2s);
6822 return;
6823 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6824 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov4s);
6825 return;
6826 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6827 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov2d);
6828 return;
6829 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6830 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov1d);
6831 return;
6832 }
6833 break;
6834 }
6835 case Intrinsic::aarch64_neon_st3: {
6836 if (VT == MVT::v8i8) {
6837 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev8b);
6838 return;
6839 } else if (VT == MVT::v16i8) {
6840 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev16b);
6841 return;
6842 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6843 VT == MVT::v4bf16) {
6844 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev4h);
6845 return;
6846 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6847 VT == MVT::v8bf16) {
6848 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev8h);
6849 return;
6850 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6851 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev2s);
6852 return;
6853 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6854 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev4s);
6855 return;
6856 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6857 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev2d);
6858 return;
6859 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6860 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev1d);
6861 return;
6862 }
6863 break;
6864 }
6865 case Intrinsic::aarch64_neon_st4: {
6866 if (VT == MVT::v8i8) {
6867 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv8b);
6868 return;
6869 } else if (VT == MVT::v16i8) {
6870 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv16b);
6871 return;
6872 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6873 VT == MVT::v4bf16) {
6874 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv4h);
6875 return;
6876 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6877 VT == MVT::v8bf16) {
6878 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv8h);
6879 return;
6880 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6881 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv2s);
6882 return;
6883 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6884 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv4s);
6885 return;
6886 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6887 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv2d);
6888 return;
6889 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6890 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv1d);
6891 return;
6892 }
6893 break;
6894 }
6895 case Intrinsic::aarch64_neon_st2lane: {
6896 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6897 SelectStoreLane(N: Node, NumVecs: 2, Opc: AArch64::ST2i8);
6898 return;
6899 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6900 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6901 SelectStoreLane(N: Node, NumVecs: 2, Opc: AArch64::ST2i16);
6902 return;
6903 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6904 VT == MVT::v2f32) {
6905 SelectStoreLane(N: Node, NumVecs: 2, Opc: AArch64::ST2i32);
6906 return;
6907 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6908 VT == MVT::v1f64) {
6909 SelectStoreLane(N: Node, NumVecs: 2, Opc: AArch64::ST2i64);
6910 return;
6911 }
6912 break;
6913 }
6914 case Intrinsic::aarch64_neon_st3lane: {
6915 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6916 SelectStoreLane(N: Node, NumVecs: 3, Opc: AArch64::ST3i8);
6917 return;
6918 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6919 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6920 SelectStoreLane(N: Node, NumVecs: 3, Opc: AArch64::ST3i16);
6921 return;
6922 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6923 VT == MVT::v2f32) {
6924 SelectStoreLane(N: Node, NumVecs: 3, Opc: AArch64::ST3i32);
6925 return;
6926 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6927 VT == MVT::v1f64) {
6928 SelectStoreLane(N: Node, NumVecs: 3, Opc: AArch64::ST3i64);
6929 return;
6930 }
6931 break;
6932 }
6933 case Intrinsic::aarch64_neon_st4lane: {
6934 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6935 SelectStoreLane(N: Node, NumVecs: 4, Opc: AArch64::ST4i8);
6936 return;
6937 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6938 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6939 SelectStoreLane(N: Node, NumVecs: 4, Opc: AArch64::ST4i16);
6940 return;
6941 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6942 VT == MVT::v2f32) {
6943 SelectStoreLane(N: Node, NumVecs: 4, Opc: AArch64::ST4i32);
6944 return;
6945 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6946 VT == MVT::v1f64) {
6947 SelectStoreLane(N: Node, NumVecs: 4, Opc: AArch64::ST4i64);
6948 return;
6949 }
6950 break;
6951 }
6952 case Intrinsic::aarch64_sve_st2q: {
6953 SelectPredicatedStore(N: Node, NumVecs: 2, Scale: 4, Opc_rr: AArch64::ST2Q, Opc_ri: AArch64::ST2Q_IMM);
6954 return;
6955 }
6956 case Intrinsic::aarch64_sve_st3q: {
6957 SelectPredicatedStore(N: Node, NumVecs: 3, Scale: 4, Opc_rr: AArch64::ST3Q, Opc_ri: AArch64::ST3Q_IMM);
6958 return;
6959 }
6960 case Intrinsic::aarch64_sve_st4q: {
6961 SelectPredicatedStore(N: Node, NumVecs: 4, Scale: 4, Opc_rr: AArch64::ST4Q, Opc_ri: AArch64::ST4Q_IMM);
6962 return;
6963 }
6964 case Intrinsic::aarch64_sve_st2: {
6965 if (VT == MVT::nxv16i8) {
6966 SelectPredicatedStore(N: Node, NumVecs: 2, Scale: 0, Opc_rr: AArch64::ST2B, Opc_ri: AArch64::ST2B_IMM);
6967 return;
6968 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6969 VT == MVT::nxv8bf16) {
6970 SelectPredicatedStore(N: Node, NumVecs: 2, Scale: 1, Opc_rr: AArch64::ST2H, Opc_ri: AArch64::ST2H_IMM);
6971 return;
6972 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6973 SelectPredicatedStore(N: Node, NumVecs: 2, Scale: 2, Opc_rr: AArch64::ST2W, Opc_ri: AArch64::ST2W_IMM);
6974 return;
6975 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6976 SelectPredicatedStore(N: Node, NumVecs: 2, Scale: 3, Opc_rr: AArch64::ST2D, Opc_ri: AArch64::ST2D_IMM);
6977 return;
6978 }
6979 break;
6980 }
6981 case Intrinsic::aarch64_sve_st3: {
6982 if (VT == MVT::nxv16i8) {
6983 SelectPredicatedStore(N: Node, NumVecs: 3, Scale: 0, Opc_rr: AArch64::ST3B, Opc_ri: AArch64::ST3B_IMM);
6984 return;
6985 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6986 VT == MVT::nxv8bf16) {
6987 SelectPredicatedStore(N: Node, NumVecs: 3, Scale: 1, Opc_rr: AArch64::ST3H, Opc_ri: AArch64::ST3H_IMM);
6988 return;
6989 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6990 SelectPredicatedStore(N: Node, NumVecs: 3, Scale: 2, Opc_rr: AArch64::ST3W, Opc_ri: AArch64::ST3W_IMM);
6991 return;
6992 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6993 SelectPredicatedStore(N: Node, NumVecs: 3, Scale: 3, Opc_rr: AArch64::ST3D, Opc_ri: AArch64::ST3D_IMM);
6994 return;
6995 }
6996 break;
6997 }
6998 case Intrinsic::aarch64_sve_st4: {
6999 if (VT == MVT::nxv16i8) {
7000 SelectPredicatedStore(N: Node, NumVecs: 4, Scale: 0, Opc_rr: AArch64::ST4B, Opc_ri: AArch64::ST4B_IMM);
7001 return;
7002 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7003 VT == MVT::nxv8bf16) {
7004 SelectPredicatedStore(N: Node, NumVecs: 4, Scale: 1, Opc_rr: AArch64::ST4H, Opc_ri: AArch64::ST4H_IMM);
7005 return;
7006 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7007 SelectPredicatedStore(N: Node, NumVecs: 4, Scale: 2, Opc_rr: AArch64::ST4W, Opc_ri: AArch64::ST4W_IMM);
7008 return;
7009 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7010 SelectPredicatedStore(N: Node, NumVecs: 4, Scale: 3, Opc_rr: AArch64::ST4D, Opc_ri: AArch64::ST4D_IMM);
7011 return;
7012 }
7013 break;
7014 }
7015 }
7016 break;
7017 }
7018 case AArch64ISD::LD2post: {
7019 if (VT == MVT::v8i8) {
7020 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov8b_POST, SubRegIdx: AArch64::dsub0);
7021 return;
7022 } else if (VT == MVT::v16i8) {
7023 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov16b_POST, SubRegIdx: AArch64::qsub0);
7024 return;
7025 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7026 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov4h_POST, SubRegIdx: AArch64::dsub0);
7027 return;
7028 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7029 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov8h_POST, SubRegIdx: AArch64::qsub0);
7030 return;
7031 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7032 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov2s_POST, SubRegIdx: AArch64::dsub0);
7033 return;
7034 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7035 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov4s_POST, SubRegIdx: AArch64::qsub0);
7036 return;
7037 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7038 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov1d_POST, SubRegIdx: AArch64::dsub0);
7039 return;
7040 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7041 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov2d_POST, SubRegIdx: AArch64::qsub0);
7042 return;
7043 }
7044 break;
7045 }
7046 case AArch64ISD::LD3post: {
7047 if (VT == MVT::v8i8) {
7048 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev8b_POST, SubRegIdx: AArch64::dsub0);
7049 return;
7050 } else if (VT == MVT::v16i8) {
7051 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev16b_POST, SubRegIdx: AArch64::qsub0);
7052 return;
7053 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7054 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev4h_POST, SubRegIdx: AArch64::dsub0);
7055 return;
7056 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7057 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev8h_POST, SubRegIdx: AArch64::qsub0);
7058 return;
7059 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7060 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev2s_POST, SubRegIdx: AArch64::dsub0);
7061 return;
7062 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7063 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev4s_POST, SubRegIdx: AArch64::qsub0);
7064 return;
7065 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7066 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev1d_POST, SubRegIdx: AArch64::dsub0);
7067 return;
7068 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7069 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev2d_POST, SubRegIdx: AArch64::qsub0);
7070 return;
7071 }
7072 break;
7073 }
7074 case AArch64ISD::LD4post: {
7075 if (VT == MVT::v8i8) {
7076 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv8b_POST, SubRegIdx: AArch64::dsub0);
7077 return;
7078 } else if (VT == MVT::v16i8) {
7079 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv16b_POST, SubRegIdx: AArch64::qsub0);
7080 return;
7081 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7082 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv4h_POST, SubRegIdx: AArch64::dsub0);
7083 return;
7084 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7085 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv8h_POST, SubRegIdx: AArch64::qsub0);
7086 return;
7087 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7088 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv2s_POST, SubRegIdx: AArch64::dsub0);
7089 return;
7090 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7091 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv4s_POST, SubRegIdx: AArch64::qsub0);
7092 return;
7093 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7094 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv1d_POST, SubRegIdx: AArch64::dsub0);
7095 return;
7096 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7097 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv2d_POST, SubRegIdx: AArch64::qsub0);
7098 return;
7099 }
7100 break;
7101 }
7102 case AArch64ISD::LD1x2post: {
7103 if (VT == MVT::v8i8) {
7104 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov8b_POST, SubRegIdx: AArch64::dsub0);
7105 return;
7106 } else if (VT == MVT::v16i8) {
7107 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov16b_POST, SubRegIdx: AArch64::qsub0);
7108 return;
7109 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7110 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov4h_POST, SubRegIdx: AArch64::dsub0);
7111 return;
7112 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7113 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov8h_POST, SubRegIdx: AArch64::qsub0);
7114 return;
7115 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7116 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov2s_POST, SubRegIdx: AArch64::dsub0);
7117 return;
7118 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7119 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov4s_POST, SubRegIdx: AArch64::qsub0);
7120 return;
7121 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7122 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov1d_POST, SubRegIdx: AArch64::dsub0);
7123 return;
7124 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7125 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov2d_POST, SubRegIdx: AArch64::qsub0);
7126 return;
7127 }
7128 break;
7129 }
7130 case AArch64ISD::LD1x3post: {
7131 if (VT == MVT::v8i8) {
7132 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev8b_POST, SubRegIdx: AArch64::dsub0);
7133 return;
7134 } else if (VT == MVT::v16i8) {
7135 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev16b_POST, SubRegIdx: AArch64::qsub0);
7136 return;
7137 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7138 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev4h_POST, SubRegIdx: AArch64::dsub0);
7139 return;
7140 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7141 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev8h_POST, SubRegIdx: AArch64::qsub0);
7142 return;
7143 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7144 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev2s_POST, SubRegIdx: AArch64::dsub0);
7145 return;
7146 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7147 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev4s_POST, SubRegIdx: AArch64::qsub0);
7148 return;
7149 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7150 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev1d_POST, SubRegIdx: AArch64::dsub0);
7151 return;
7152 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7153 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev2d_POST, SubRegIdx: AArch64::qsub0);
7154 return;
7155 }
7156 break;
7157 }
7158 case AArch64ISD::LD1x4post: {
7159 if (VT == MVT::v8i8) {
7160 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv8b_POST, SubRegIdx: AArch64::dsub0);
7161 return;
7162 } else if (VT == MVT::v16i8) {
7163 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv16b_POST, SubRegIdx: AArch64::qsub0);
7164 return;
7165 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7166 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv4h_POST, SubRegIdx: AArch64::dsub0);
7167 return;
7168 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7169 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv8h_POST, SubRegIdx: AArch64::qsub0);
7170 return;
7171 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7172 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv2s_POST, SubRegIdx: AArch64::dsub0);
7173 return;
7174 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7175 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv4s_POST, SubRegIdx: AArch64::qsub0);
7176 return;
7177 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7178 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv1d_POST, SubRegIdx: AArch64::dsub0);
7179 return;
7180 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7181 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv2d_POST, SubRegIdx: AArch64::qsub0);
7182 return;
7183 }
7184 break;
7185 }
7186 case AArch64ISD::LD1DUPpost: {
7187 if (VT == MVT::v8i8) {
7188 SelectPostLoad(N: Node, NumVecs: 1, Opc: AArch64::LD1Rv8b_POST, SubRegIdx: AArch64::dsub0);
7189 return;
7190 } else if (VT == MVT::v16i8) {
7191 SelectPostLoad(N: Node, NumVecs: 1, Opc: AArch64::LD1Rv16b_POST, SubRegIdx: AArch64::qsub0);
7192 return;
7193 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7194 SelectPostLoad(N: Node, NumVecs: 1, Opc: AArch64::LD1Rv4h_POST, SubRegIdx: AArch64::dsub0);
7195 return;
7196 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7197 SelectPostLoad(N: Node, NumVecs: 1, Opc: AArch64::LD1Rv8h_POST, SubRegIdx: AArch64::qsub0);
7198 return;
7199 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7200 SelectPostLoad(N: Node, NumVecs: 1, Opc: AArch64::LD1Rv2s_POST, SubRegIdx: AArch64::dsub0);
7201 return;
7202 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7203 SelectPostLoad(N: Node, NumVecs: 1, Opc: AArch64::LD1Rv4s_POST, SubRegIdx: AArch64::qsub0);
7204 return;
7205 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7206 SelectPostLoad(N: Node, NumVecs: 1, Opc: AArch64::LD1Rv1d_POST, SubRegIdx: AArch64::dsub0);
7207 return;
7208 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7209 SelectPostLoad(N: Node, NumVecs: 1, Opc: AArch64::LD1Rv2d_POST, SubRegIdx: AArch64::qsub0);
7210 return;
7211 }
7212 break;
7213 }
7214 case AArch64ISD::LD2DUPpost: {
7215 if (VT == MVT::v8i8) {
7216 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv8b_POST, SubRegIdx: AArch64::dsub0);
7217 return;
7218 } else if (VT == MVT::v16i8) {
7219 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv16b_POST, SubRegIdx: AArch64::qsub0);
7220 return;
7221 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7222 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv4h_POST, SubRegIdx: AArch64::dsub0);
7223 return;
7224 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7225 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv8h_POST, SubRegIdx: AArch64::qsub0);
7226 return;
7227 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7228 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv2s_POST, SubRegIdx: AArch64::dsub0);
7229 return;
7230 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7231 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv4s_POST, SubRegIdx: AArch64::qsub0);
7232 return;
7233 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7234 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv1d_POST, SubRegIdx: AArch64::dsub0);
7235 return;
7236 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7237 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv2d_POST, SubRegIdx: AArch64::qsub0);
7238 return;
7239 }
7240 break;
7241 }
7242 case AArch64ISD::LD3DUPpost: {
7243 if (VT == MVT::v8i8) {
7244 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv8b_POST, SubRegIdx: AArch64::dsub0);
7245 return;
7246 } else if (VT == MVT::v16i8) {
7247 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv16b_POST, SubRegIdx: AArch64::qsub0);
7248 return;
7249 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7250 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv4h_POST, SubRegIdx: AArch64::dsub0);
7251 return;
7252 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7253 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv8h_POST, SubRegIdx: AArch64::qsub0);
7254 return;
7255 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7256 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv2s_POST, SubRegIdx: AArch64::dsub0);
7257 return;
7258 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7259 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv4s_POST, SubRegIdx: AArch64::qsub0);
7260 return;
7261 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7262 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv1d_POST, SubRegIdx: AArch64::dsub0);
7263 return;
7264 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7265 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv2d_POST, SubRegIdx: AArch64::qsub0);
7266 return;
7267 }
7268 break;
7269 }
7270 case AArch64ISD::LD4DUPpost: {
7271 if (VT == MVT::v8i8) {
7272 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv8b_POST, SubRegIdx: AArch64::dsub0);
7273 return;
7274 } else if (VT == MVT::v16i8) {
7275 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv16b_POST, SubRegIdx: AArch64::qsub0);
7276 return;
7277 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7278 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv4h_POST, SubRegIdx: AArch64::dsub0);
7279 return;
7280 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7281 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv8h_POST, SubRegIdx: AArch64::qsub0);
7282 return;
7283 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7284 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv2s_POST, SubRegIdx: AArch64::dsub0);
7285 return;
7286 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7287 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv4s_POST, SubRegIdx: AArch64::qsub0);
7288 return;
7289 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7290 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv1d_POST, SubRegIdx: AArch64::dsub0);
7291 return;
7292 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7293 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv2d_POST, SubRegIdx: AArch64::qsub0);
7294 return;
7295 }
7296 break;
7297 }
7298 case AArch64ISD::LD1LANEpost: {
7299 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7300 SelectPostLoadLane(N: Node, NumVecs: 1, Opc: AArch64::LD1i8_POST);
7301 return;
7302 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7303 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7304 SelectPostLoadLane(N: Node, NumVecs: 1, Opc: AArch64::LD1i16_POST);
7305 return;
7306 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7307 VT == MVT::v2f32) {
7308 SelectPostLoadLane(N: Node, NumVecs: 1, Opc: AArch64::LD1i32_POST);
7309 return;
7310 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7311 VT == MVT::v1f64) {
7312 SelectPostLoadLane(N: Node, NumVecs: 1, Opc: AArch64::LD1i64_POST);
7313 return;
7314 }
7315 break;
7316 }
7317 case AArch64ISD::LD2LANEpost: {
7318 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7319 SelectPostLoadLane(N: Node, NumVecs: 2, Opc: AArch64::LD2i8_POST);
7320 return;
7321 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7322 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7323 SelectPostLoadLane(N: Node, NumVecs: 2, Opc: AArch64::LD2i16_POST);
7324 return;
7325 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7326 VT == MVT::v2f32) {
7327 SelectPostLoadLane(N: Node, NumVecs: 2, Opc: AArch64::LD2i32_POST);
7328 return;
7329 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7330 VT == MVT::v1f64) {
7331 SelectPostLoadLane(N: Node, NumVecs: 2, Opc: AArch64::LD2i64_POST);
7332 return;
7333 }
7334 break;
7335 }
7336 case AArch64ISD::LD3LANEpost: {
7337 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7338 SelectPostLoadLane(N: Node, NumVecs: 3, Opc: AArch64::LD3i8_POST);
7339 return;
7340 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7341 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7342 SelectPostLoadLane(N: Node, NumVecs: 3, Opc: AArch64::LD3i16_POST);
7343 return;
7344 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7345 VT == MVT::v2f32) {
7346 SelectPostLoadLane(N: Node, NumVecs: 3, Opc: AArch64::LD3i32_POST);
7347 return;
7348 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7349 VT == MVT::v1f64) {
7350 SelectPostLoadLane(N: Node, NumVecs: 3, Opc: AArch64::LD3i64_POST);
7351 return;
7352 }
7353 break;
7354 }
7355 case AArch64ISD::LD4LANEpost: {
7356 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7357 SelectPostLoadLane(N: Node, NumVecs: 4, Opc: AArch64::LD4i8_POST);
7358 return;
7359 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7360 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7361 SelectPostLoadLane(N: Node, NumVecs: 4, Opc: AArch64::LD4i16_POST);
7362 return;
7363 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7364 VT == MVT::v2f32) {
7365 SelectPostLoadLane(N: Node, NumVecs: 4, Opc: AArch64::LD4i32_POST);
7366 return;
7367 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7368 VT == MVT::v1f64) {
7369 SelectPostLoadLane(N: Node, NumVecs: 4, Opc: AArch64::LD4i64_POST);
7370 return;
7371 }
7372 break;
7373 }
7374 case AArch64ISD::ST2post: {
7375 VT = Node->getOperand(Num: 1).getValueType();
7376 if (VT == MVT::v8i8) {
7377 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov8b_POST);
7378 return;
7379 } else if (VT == MVT::v16i8) {
7380 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov16b_POST);
7381 return;
7382 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7383 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov4h_POST);
7384 return;
7385 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7386 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov8h_POST);
7387 return;
7388 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7389 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov2s_POST);
7390 return;
7391 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7392 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov4s_POST);
7393 return;
7394 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7395 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov2d_POST);
7396 return;
7397 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7398 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov1d_POST);
7399 return;
7400 }
7401 break;
7402 }
7403 case AArch64ISD::ST3post: {
7404 VT = Node->getOperand(Num: 1).getValueType();
7405 if (VT == MVT::v8i8) {
7406 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev8b_POST);
7407 return;
7408 } else if (VT == MVT::v16i8) {
7409 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev16b_POST);
7410 return;
7411 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7412 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev4h_POST);
7413 return;
7414 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7415 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev8h_POST);
7416 return;
7417 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7418 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev2s_POST);
7419 return;
7420 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7421 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev4s_POST);
7422 return;
7423 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7424 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev2d_POST);
7425 return;
7426 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7427 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev1d_POST);
7428 return;
7429 }
7430 break;
7431 }
7432 case AArch64ISD::ST4post: {
7433 VT = Node->getOperand(Num: 1).getValueType();
7434 if (VT == MVT::v8i8) {
7435 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv8b_POST);
7436 return;
7437 } else if (VT == MVT::v16i8) {
7438 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv16b_POST);
7439 return;
7440 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7441 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv4h_POST);
7442 return;
7443 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7444 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv8h_POST);
7445 return;
7446 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7447 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv2s_POST);
7448 return;
7449 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7450 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv4s_POST);
7451 return;
7452 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7453 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv2d_POST);
7454 return;
7455 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7456 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv1d_POST);
7457 return;
7458 }
7459 break;
7460 }
7461 case AArch64ISD::ST1x2post: {
7462 VT = Node->getOperand(Num: 1).getValueType();
7463 if (VT == MVT::v8i8) {
7464 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov8b_POST);
7465 return;
7466 } else if (VT == MVT::v16i8) {
7467 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov16b_POST);
7468 return;
7469 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7470 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov4h_POST);
7471 return;
7472 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7473 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov8h_POST);
7474 return;
7475 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7476 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov2s_POST);
7477 return;
7478 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7479 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov4s_POST);
7480 return;
7481 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7482 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov1d_POST);
7483 return;
7484 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7485 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov2d_POST);
7486 return;
7487 }
7488 break;
7489 }
7490 case AArch64ISD::ST1x3post: {
7491 VT = Node->getOperand(Num: 1).getValueType();
7492 if (VT == MVT::v8i8) {
7493 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev8b_POST);
7494 return;
7495 } else if (VT == MVT::v16i8) {
7496 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev16b_POST);
7497 return;
7498 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7499 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev4h_POST);
7500 return;
7501 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7502 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev8h_POST);
7503 return;
7504 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7505 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev2s_POST);
7506 return;
7507 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7508 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev4s_POST);
7509 return;
7510 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7511 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev1d_POST);
7512 return;
7513 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7514 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev2d_POST);
7515 return;
7516 }
7517 break;
7518 }
7519 case AArch64ISD::ST1x4post: {
7520 VT = Node->getOperand(Num: 1).getValueType();
7521 if (VT == MVT::v8i8) {
7522 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv8b_POST);
7523 return;
7524 } else if (VT == MVT::v16i8) {
7525 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv16b_POST);
7526 return;
7527 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7528 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv4h_POST);
7529 return;
7530 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7531 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv8h_POST);
7532 return;
7533 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7534 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv2s_POST);
7535 return;
7536 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7537 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv4s_POST);
7538 return;
7539 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7540 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv1d_POST);
7541 return;
7542 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7543 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv2d_POST);
7544 return;
7545 }
7546 break;
7547 }
7548 case AArch64ISD::ST2LANEpost: {
7549 VT = Node->getOperand(Num: 1).getValueType();
7550 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7551 SelectPostStoreLane(N: Node, NumVecs: 2, Opc: AArch64::ST2i8_POST);
7552 return;
7553 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7554 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7555 SelectPostStoreLane(N: Node, NumVecs: 2, Opc: AArch64::ST2i16_POST);
7556 return;
7557 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7558 VT == MVT::v2f32) {
7559 SelectPostStoreLane(N: Node, NumVecs: 2, Opc: AArch64::ST2i32_POST);
7560 return;
7561 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7562 VT == MVT::v1f64) {
7563 SelectPostStoreLane(N: Node, NumVecs: 2, Opc: AArch64::ST2i64_POST);
7564 return;
7565 }
7566 break;
7567 }
7568 case AArch64ISD::ST3LANEpost: {
7569 VT = Node->getOperand(Num: 1).getValueType();
7570 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7571 SelectPostStoreLane(N: Node, NumVecs: 3, Opc: AArch64::ST3i8_POST);
7572 return;
7573 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7574 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7575 SelectPostStoreLane(N: Node, NumVecs: 3, Opc: AArch64::ST3i16_POST);
7576 return;
7577 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7578 VT == MVT::v2f32) {
7579 SelectPostStoreLane(N: Node, NumVecs: 3, Opc: AArch64::ST3i32_POST);
7580 return;
7581 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7582 VT == MVT::v1f64) {
7583 SelectPostStoreLane(N: Node, NumVecs: 3, Opc: AArch64::ST3i64_POST);
7584 return;
7585 }
7586 break;
7587 }
7588 case AArch64ISD::ST4LANEpost: {
7589 VT = Node->getOperand(Num: 1).getValueType();
7590 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7591 SelectPostStoreLane(N: Node, NumVecs: 4, Opc: AArch64::ST4i8_POST);
7592 return;
7593 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7594 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7595 SelectPostStoreLane(N: Node, NumVecs: 4, Opc: AArch64::ST4i16_POST);
7596 return;
7597 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7598 VT == MVT::v2f32) {
7599 SelectPostStoreLane(N: Node, NumVecs: 4, Opc: AArch64::ST4i32_POST);
7600 return;
7601 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7602 VT == MVT::v1f64) {
7603 SelectPostStoreLane(N: Node, NumVecs: 4, Opc: AArch64::ST4i64_POST);
7604 return;
7605 }
7606 break;
7607 }
7608 }
7609
7610 // Select the default instruction
7611 SelectCode(N: Node);
7612}
7613
7614/// createAArch64ISelDag - This pass converts a legalized DAG into a
7615/// AArch64-specific DAG, ready for instruction scheduling.
7616FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
7617 CodeGenOptLevel OptLevel) {
7618 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7619}
7620
7621/// When \p PredVT is a scalable vector predicate in the form
7622/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7623/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7624/// structured vectors (NumVec >1), the output data type is
7625/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7626/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7627/// EVT.
7628static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
7629 unsigned NumVec) {
7630 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7631 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7632 return EVT();
7633
7634 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7635 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7636 return EVT();
7637
7638 ElementCount EC = PredVT.getVectorElementCount();
7639 EVT ScalarVT =
7640 EVT::getIntegerVT(Context&: Ctx, BitWidth: AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7641 EVT MemVT = EVT::getVectorVT(Context&: Ctx, VT: ScalarVT, EC: EC * NumVec);
7642
7643 return MemVT;
7644}
7645
7646/// Return the EVT of the data associated to a memory operation in \p
7647/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7648static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
7649 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Val: Root))
7650 return MemIntr->getMemoryVT();
7651
7652 if (isa<MemSDNode>(Val: Root)) {
7653 EVT MemVT = cast<MemSDNode>(Val: Root)->getMemoryVT();
7654
7655 EVT DataVT;
7656 if (auto *Load = dyn_cast<LoadSDNode>(Val: Root))
7657 DataVT = Load->getValueType(ResNo: 0);
7658 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Val: Root))
7659 DataVT = Load->getValueType(ResNo: 0);
7660 else if (auto *Store = dyn_cast<StoreSDNode>(Val: Root))
7661 DataVT = Store->getValue().getValueType();
7662 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Val: Root))
7663 DataVT = Store->getValue().getValueType();
7664 else
7665 llvm_unreachable("Unexpected MemSDNode!");
7666
7667 return DataVT.changeVectorElementType(Context&: Ctx, EltVT: MemVT.getVectorElementType());
7668 }
7669
7670 const unsigned Opcode = Root->getOpcode();
7671 // For custom ISD nodes, we have to look at them individually to extract the
7672 // type of the data moved to/from memory.
7673 switch (Opcode) {
7674 case AArch64ISD::LD1_MERGE_ZERO:
7675 case AArch64ISD::LD1S_MERGE_ZERO:
7676 case AArch64ISD::LDNF1_MERGE_ZERO:
7677 case AArch64ISD::LDNF1S_MERGE_ZERO:
7678 return cast<VTSDNode>(Val: Root->getOperand(Num: 3))->getVT();
7679 case AArch64ISD::ST1_PRED:
7680 return cast<VTSDNode>(Val: Root->getOperand(Num: 4))->getVT();
7681 default:
7682 break;
7683 }
7684
7685 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7686 return EVT();
7687
7688 switch (Root->getConstantOperandVal(Num: 1)) {
7689 default:
7690 return EVT();
7691 case Intrinsic::aarch64_sme_ldr:
7692 case Intrinsic::aarch64_sme_str:
7693 return MVT::nxv16i8;
7694 case Intrinsic::aarch64_sve_prf:
7695 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7696 // width of the predicate.
7697 return getPackedVectorTypeFromPredicateType(
7698 Ctx, PredVT: Root->getOperand(Num: 2)->getValueType(ResNo: 0), /*NumVec=*/1);
7699 case Intrinsic::aarch64_sve_ld2_sret:
7700 case Intrinsic::aarch64_sve_ld2q_sret:
7701 return getPackedVectorTypeFromPredicateType(
7702 Ctx, PredVT: Root->getOperand(Num: 2)->getValueType(ResNo: 0), /*NumVec=*/2);
7703 case Intrinsic::aarch64_sve_st2q:
7704 return getPackedVectorTypeFromPredicateType(
7705 Ctx, PredVT: Root->getOperand(Num: 4)->getValueType(ResNo: 0), /*NumVec=*/2);
7706 case Intrinsic::aarch64_sve_ld3_sret:
7707 case Intrinsic::aarch64_sve_ld3q_sret:
7708 return getPackedVectorTypeFromPredicateType(
7709 Ctx, PredVT: Root->getOperand(Num: 2)->getValueType(ResNo: 0), /*NumVec=*/3);
7710 case Intrinsic::aarch64_sve_st3q:
7711 return getPackedVectorTypeFromPredicateType(
7712 Ctx, PredVT: Root->getOperand(Num: 5)->getValueType(ResNo: 0), /*NumVec=*/3);
7713 case Intrinsic::aarch64_sve_ld4_sret:
7714 case Intrinsic::aarch64_sve_ld4q_sret:
7715 return getPackedVectorTypeFromPredicateType(
7716 Ctx, PredVT: Root->getOperand(Num: 2)->getValueType(ResNo: 0), /*NumVec=*/4);
7717 case Intrinsic::aarch64_sve_st4q:
7718 return getPackedVectorTypeFromPredicateType(
7719 Ctx, PredVT: Root->getOperand(Num: 6)->getValueType(ResNo: 0), /*NumVec=*/4);
7720 case Intrinsic::aarch64_sve_ld1udq:
7721 case Intrinsic::aarch64_sve_st1dq:
7722 return EVT(MVT::nxv1i64);
7723 case Intrinsic::aarch64_sve_ld1uwq:
7724 case Intrinsic::aarch64_sve_st1wq:
7725 return EVT(MVT::nxv1i32);
7726 }
7727}
7728
7729/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7730/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7731/// where Root is the memory access using N for its address.
7732template <int64_t Min, int64_t Max>
7733bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7734 SDValue &Base,
7735 SDValue &OffImm) {
7736 const EVT MemVT = getMemVTFromNode(Ctx&: *(CurDAG->getContext()), Root);
7737 const DataLayout &DL = CurDAG->getDataLayout();
7738 const MachineFrameInfo &MFI = MF->getFrameInfo();
7739
7740 if (N.getOpcode() == ISD::FrameIndex) {
7741 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
7742 // We can only encode VL scaled offsets, so only fold in frame indexes
7743 // referencing SVE objects.
7744 if (MFI.hasScalableStackID(ObjectIdx: FI)) {
7745 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
7746 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i64);
7747 return true;
7748 }
7749
7750 return false;
7751 }
7752
7753 if (MemVT == EVT())
7754 return false;
7755
7756 if (N.getOpcode() != ISD::ADD)
7757 return false;
7758
7759 SDValue VScale = N.getOperand(i: 1);
7760 int64_t MulImm = std::numeric_limits<int64_t>::max();
7761 if (VScale.getOpcode() == ISD::VSCALE) {
7762 MulImm = cast<ConstantSDNode>(Val: VScale.getOperand(i: 0))->getSExtValue();
7763 } else if (auto C = dyn_cast<ConstantSDNode>(Val&: VScale)) {
7764 int64_t ByteOffset = C->getSExtValue();
7765 const auto KnownVScale =
7766 Subtarget->getSVEVectorSizeInBits() / AArch64::SVEBitsPerBlock;
7767
7768 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7769 return false;
7770
7771 MulImm = ByteOffset / KnownVScale;
7772 } else
7773 return false;
7774
7775 TypeSize TS = MemVT.getSizeInBits();
7776 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7777
7778 if ((MulImm % MemWidthBytes) != 0)
7779 return false;
7780
7781 int64_t Offset = MulImm / MemWidthBytes;
7782 if (Offset < Min || Offset > Max)
7783 return false;
7784
7785 Base = N.getOperand(i: 0);
7786 if (Base.getOpcode() == ISD::FrameIndex) {
7787 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
7788 // We can only encode VL scaled offsets, so only fold in frame indexes
7789 // referencing SVE objects.
7790 if (MFI.hasScalableStackID(ObjectIdx: FI))
7791 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
7792 }
7793
7794 OffImm = CurDAG->getTargetConstant(Val: Offset, DL: SDLoc(N), VT: MVT::i64);
7795 return true;
7796}
7797
7798/// Select register plus register addressing mode for SVE, with scaled
7799/// offset.
7800bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7801 SDValue &Base,
7802 SDValue &Offset) {
7803 if (N.getOpcode() != ISD::ADD)
7804 return false;
7805
7806 // Process an ADD node.
7807 const SDValue LHS = N.getOperand(i: 0);
7808 const SDValue RHS = N.getOperand(i: 1);
7809
7810 // 8 bit data does not come with the SHL node, so it is treated
7811 // separately.
7812 if (Scale == 0) {
7813 Base = LHS;
7814 Offset = RHS;
7815 return true;
7816 }
7817
7818 if (auto C = dyn_cast<ConstantSDNode>(Val: RHS)) {
7819 int64_t ImmOff = C->getSExtValue();
7820 unsigned Size = 1 << Scale;
7821
7822 // To use the reg+reg addressing mode, the immediate must be a multiple of
7823 // the vector element's byte size.
7824 if (ImmOff % Size)
7825 return false;
7826
7827 SDLoc DL(N);
7828 Base = LHS;
7829 Offset = CurDAG->getTargetConstant(Val: ImmOff >> Scale, DL, VT: MVT::i64);
7830 SDValue Ops[] = {Offset};
7831 SDNode *MI = CurDAG->getMachineNode(Opcode: AArch64::MOVi64imm, dl: DL, VT: MVT::i64, Ops);
7832 Offset = SDValue(MI, 0);
7833 return true;
7834 }
7835
7836 // Check if the RHS is a shift node with a constant.
7837 if (RHS.getOpcode() != ISD::SHL)
7838 return false;
7839
7840 const SDValue ShiftRHS = RHS.getOperand(i: 1);
7841 if (auto *C = dyn_cast<ConstantSDNode>(Val: ShiftRHS))
7842 if (C->getZExtValue() == Scale) {
7843 Base = LHS;
7844 Offset = RHS.getOperand(i: 0);
7845 return true;
7846 }
7847
7848 return false;
7849}
7850
7851bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7852 const AArch64TargetLowering *TLI =
7853 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7854
7855 return TLI->isAllActivePredicate(DAG&: *CurDAG, N);
7856}
7857
7858bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7859 EVT VT = N.getValueType();
7860 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7861}
7862
7863bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7864 SDValue &Base, SDValue &Offset,
7865 unsigned Scale) {
7866 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7867 if (auto *C = dyn_cast<ConstantSDNode>(Val&: CN)) {
7868 int64_t ImmOff = C->getSExtValue();
7869 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7870 return CurDAG->getTargetConstant(Val: ImmOff / Scale, DL: SDLoc(N), VT: MVT::i64);
7871 }
7872 return SDValue();
7873 };
7874
7875 if (SDValue C = MatchConstantOffset(N)) {
7876 Base = CurDAG->getConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
7877 Offset = C;
7878 return true;
7879 }
7880
7881 // Try to untangle an ADD node into a 'reg + offset'
7882 if (CurDAG->isBaseWithConstantOffset(Op: N)) {
7883 if (SDValue C = MatchConstantOffset(N.getOperand(i: 1))) {
7884 Base = N.getOperand(i: 0);
7885 Offset = C;
7886 return true;
7887 }
7888 }
7889
7890 // By default, just match reg + 0.
7891 Base = N;
7892 Offset = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i64);
7893 return true;
7894}
7895
7896bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7897 SDValue &Imm) {
7898 AArch64CC::CondCode CC =
7899 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(Num: 1));
7900 if (auto *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
7901 // Check conservatively if the immediate fits the valid range [0, 64).
7902 // Immediate variants for GE and HS definitely need to be decremented
7903 // when lowering the pseudos later, so an immediate of 1 would become 0.
7904 // For the inverse conditions LT and LO we don't know for sure if they
7905 // will need a decrement but should the decision be made to reverse the
7906 // branch condition, we again end up with the need to decrement.
7907 // The same argument holds for LE, LS, GT and HI and possibly
7908 // incremented immediates. This can lead to slightly less optimal
7909 // codegen, e.g. we never codegen the legal case
7910 // cblt w0, #63, A
7911 // because we could end up with the illegal case
7912 // cbge w0, #64, B
7913 // should the decision to reverse the branch direction be made. For the
7914 // lower bound cases this is no problem since we can express comparisons
7915 // against 0 with either tbz/tnbz or using wzr/xzr.
7916 uint64_t LowerBound = 0, UpperBound = 64;
7917 switch (CC) {
7918 case AArch64CC::GE:
7919 case AArch64CC::HS:
7920 case AArch64CC::LT:
7921 case AArch64CC::LO:
7922 LowerBound = 1;
7923 break;
7924 case AArch64CC::LE:
7925 case AArch64CC::LS:
7926 case AArch64CC::GT:
7927 case AArch64CC::HI:
7928 UpperBound = 63;
7929 break;
7930 default:
7931 break;
7932 }
7933
7934 if (CN->getAPIntValue().uge(RHS: LowerBound) &&
7935 CN->getAPIntValue().ult(RHS: UpperBound)) {
7936 SDLoc DL(N);
7937 Imm = CurDAG->getTargetConstant(Val: CN->getZExtValue(), DL, VT: N.getValueType());
7938 return true;
7939 }
7940 }
7941
7942 return false;
7943}
7944
7945template <bool MatchCBB>
7946bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
7947 SDValue &ExtType) {
7948
7949 // Use an invalid shift-extend value to indicate we don't need to extend later
7950 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
7951 EVT Ty = cast<VTSDNode>(Val: N.getOperand(i: 1))->getVT();
7952 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
7953 return false;
7954 Reg = N.getOperand(i: 0);
7955 ExtType = CurDAG->getSignedTargetConstant(Val: AArch64_AM::InvalidShiftExtend,
7956 DL: SDLoc(N), VT: MVT::i32);
7957 return true;
7958 }
7959
7960 AArch64_AM::ShiftExtendType ET = getExtendTypeForNode(N);
7961
7962 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
7963 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
7964 Reg = N.getOperand(i: 0);
7965 ExtType =
7966 CurDAG->getTargetConstant(Val: getExtendEncoding(ET), DL: SDLoc(N), VT: MVT::i32);
7967 return true;
7968 }
7969
7970 return false;
7971}
7972
7973void AArch64DAGToDAGISel::PreprocessISelDAG() {
7974 bool MadeChange = false;
7975 for (SDNode &N : llvm::make_early_inc_range(Range: CurDAG->allnodes())) {
7976 if (N.use_empty())
7977 continue;
7978
7979 SDValue Result;
7980 switch (N.getOpcode()) {
7981 case ISD::SCALAR_TO_VECTOR: {
7982 EVT ScalarTy = N.getValueType(ResNo: 0).getVectorElementType();
7983 if ((ScalarTy == MVT::i32 || ScalarTy == MVT::i64) &&
7984 ScalarTy == N.getOperand(Num: 0).getValueType())
7985 Result = addBitcastHints(DAG&: *CurDAG, N);
7986
7987 break;
7988 }
7989 default:
7990 break;
7991 }
7992
7993 if (Result) {
7994 LLVM_DEBUG(dbgs() << "AArch64 DAG preprocessing replacing:\nOld: ");
7995 LLVM_DEBUG(N.dump(CurDAG));
7996 LLVM_DEBUG(dbgs() << "\nNew: ");
7997 LLVM_DEBUG(Result.dump(CurDAG));
7998 LLVM_DEBUG(dbgs() << "\n");
7999
8000 CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(&N, 0), To: Result);
8001 MadeChange = true;
8002 }
8003 }
8004
8005 if (MadeChange)
8006 CurDAG->RemoveDeadNodes();
8007
8008 SelectionDAGISel::PreprocessISelDAG();
8009}
8010