1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64MachineFunctionInfo.h"
14#include "AArch64TargetMachine.h"
15#include "MCTargetDesc/AArch64AddressingModes.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/CodeGen/ISDOpcodes.h"
18#include "llvm/CodeGen/SelectionDAGISel.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/ErrorHandling.h"
25#include "llvm/Support/KnownBits.h"
26#include "llvm/Support/MathExtras.h"
27#include "llvm/Support/raw_ostream.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
60 return SelectionDAGISel::runOnMachineFunction(mf&: MF);
61 }
62
63 void Select(SDNode *Node) override;
64
65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66 /// inline asm expressions.
67 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
68 InlineAsm::ConstraintCode ConstraintID,
69 std::vector<SDValue> &OutOps) override;
70
71 template <signed Low, signed High, signed Scale>
72 bool SelectRDVLImm(SDValue N, SDValue &Imm);
73
74 template <signed Low, signed High>
75 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
76
77 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
78 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
80 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
82 return SelectShiftedRegister(N, AllowROR: false, Reg, Shift);
83 }
84 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
85 return SelectShiftedRegister(N, AllowROR: true, Reg, Shift);
86 }
87 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, Size: 1, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, Size: 2, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, Size: 4, Base, OffImm);
95 }
96 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexed7S(N, Size: 8, Base, OffImm);
98 }
99 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexed7S(N, Size: 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexedBitWidth(N, IsSignedImm: true, BW: 9, Size: 16, Base, OffImm);
104 }
105 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexedBitWidth(N, IsSignedImm: false, BW: 6, Size: 16, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, Size: 1, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, Size: 2, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, Size: 4, Base, OffImm);
116 }
117 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeIndexed(N, Size: 8, Base, OffImm);
119 }
120 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeIndexed(N, Size: 16, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, Size: 1, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, Size: 2, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, Size: 4, Base, OffImm);
131 }
132 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
133 return SelectAddrModeUnscaled(N, Size: 8, Base, OffImm);
134 }
135 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
136 return SelectAddrModeUnscaled(N, Size: 16, Base, OffImm);
137 }
138 template <unsigned Size, unsigned Max>
139 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
140 // Test if there is an appropriate addressing mode and check if the
141 // immediate fits.
142 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
143 if (Found) {
144 if (auto *CI = dyn_cast<ConstantSDNode>(Val&: OffImm)) {
145 int64_t C = CI->getSExtValue();
146 if (C <= Max)
147 return true;
148 }
149 }
150
151 // Otherwise, base only, materialize address in register.
152 Base = N;
153 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i64);
154 return true;
155 }
156
157 template<int Width>
158 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
159 SDValue &SignExtend, SDValue &DoShift) {
160 return SelectAddrModeWRO(N, Size: Width / 8, Base, Offset, SignExtend, DoShift);
161 }
162
163 template<int Width>
164 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
165 SDValue &SignExtend, SDValue &DoShift) {
166 return SelectAddrModeXRO(N, Size: Width / 8, Base, Offset, SignExtend, DoShift);
167 }
168
169 bool SelectExtractHigh(SDValue N, SDValue &Res) {
170 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
171 N = N->getOperand(Num: 0);
172 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
173 !isa<ConstantSDNode>(Val: N->getOperand(Num: 1)))
174 return false;
175 EVT VT = N->getValueType(ResNo: 0);
176 EVT LVT = N->getOperand(Num: 0).getValueType();
177 unsigned Index = N->getConstantOperandVal(Num: 1);
178 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
179 Index != VT.getVectorNumElements())
180 return false;
181 Res = N->getOperand(Num: 0);
182 return true;
183 }
184
185 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
186 if (N.getOpcode() != AArch64ISD::VLSHR)
187 return false;
188 SDValue Op = N->getOperand(Num: 0);
189 EVT VT = Op.getValueType();
190 unsigned ShtAmt = N->getConstantOperandVal(Num: 1);
191 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
192 return false;
193
194 APInt Imm;
195 if (Op.getOperand(i: 1).getOpcode() == AArch64ISD::MOVIshift)
196 Imm = APInt(VT.getScalarSizeInBits(),
197 Op.getOperand(i: 1).getConstantOperandVal(i: 0)
198 << Op.getOperand(i: 1).getConstantOperandVal(i: 1));
199 else if (Op.getOperand(i: 1).getOpcode() == AArch64ISD::DUP &&
200 isa<ConstantSDNode>(Val: Op.getOperand(i: 1).getOperand(i: 0)))
201 Imm = APInt(VT.getScalarSizeInBits(),
202 Op.getOperand(i: 1).getConstantOperandVal(i: 0));
203 else
204 return false;
205
206 if (Imm != 1ULL << (ShtAmt - 1))
207 return false;
208
209 Res1 = Op.getOperand(i: 0);
210 Res2 = CurDAG->getTargetConstant(Val: ShtAmt, DL: SDLoc(N), VT: MVT::i32);
211 return true;
212 }
213
214 bool SelectDupZeroOrUndef(SDValue N) {
215 switch(N->getOpcode()) {
216 case ISD::UNDEF:
217 return true;
218 case AArch64ISD::DUP:
219 case ISD::SPLAT_VECTOR: {
220 auto Opnd0 = N->getOperand(Num: 0);
221 if (isNullConstant(V: Opnd0))
222 return true;
223 if (isNullFPConstant(V: Opnd0))
224 return true;
225 break;
226 }
227 default:
228 break;
229 }
230
231 return false;
232 }
233
234 bool SelectAny(SDValue) { return true; }
235
236 bool SelectDupZero(SDValue N) {
237 switch(N->getOpcode()) {
238 case AArch64ISD::DUP:
239 case ISD::SPLAT_VECTOR: {
240 auto Opnd0 = N->getOperand(Num: 0);
241 if (isNullConstant(V: Opnd0))
242 return true;
243 if (isNullFPConstant(V: Opnd0))
244 return true;
245 break;
246 }
247 }
248
249 return false;
250 }
251
252 template <MVT::SimpleValueType VT, bool Negate>
253 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
254 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
255 }
256
257 template <MVT::SimpleValueType VT, bool Negate>
258 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
259 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
260 }
261
262 template <MVT::SimpleValueType VT>
263 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
264 return SelectSVECpyDupImm(N, VT, Imm, Shift);
265 }
266
267 template <MVT::SimpleValueType VT, bool Invert = false>
268 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
269 return SelectSVELogicalImm(N, VT, Imm, Invert);
270 }
271
272 template <MVT::SimpleValueType VT>
273 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
274 return SelectSVEArithImm(N, VT, Imm);
275 }
276
277 template <unsigned Low, unsigned High, bool AllowSaturation = false>
278 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
279 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
280 }
281
282 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
283 if (N->getOpcode() != ISD::SPLAT_VECTOR)
284 return false;
285
286 EVT EltVT = N->getValueType(ResNo: 0).getVectorElementType();
287 return SelectSVEShiftImm(N: N->getOperand(Num: 0), /* Low */ 1,
288 /* High */ EltVT.getFixedSizeInBits(),
289 /* AllowSaturation */ true, Imm);
290 }
291
292 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
293 template<signed Min, signed Max, signed Scale, bool Shift>
294 bool SelectCntImm(SDValue N, SDValue &Imm) {
295 if (!isa<ConstantSDNode>(Val: N))
296 return false;
297
298 int64_t MulImm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
299 if (Shift)
300 MulImm = 1LL << MulImm;
301
302 if ((MulImm % std::abs(x: Scale)) != 0)
303 return false;
304
305 MulImm /= Scale;
306 if ((MulImm >= Min) && (MulImm <= Max)) {
307 Imm = CurDAG->getTargetConstant(Val: MulImm, DL: SDLoc(N), VT: MVT::i32);
308 return true;
309 }
310
311 return false;
312 }
313
314 template <signed Max, signed Scale>
315 bool SelectEXTImm(SDValue N, SDValue &Imm) {
316 if (!isa<ConstantSDNode>(Val: N))
317 return false;
318
319 int64_t MulImm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
320
321 if (MulImm >= 0 && MulImm <= Max) {
322 MulImm *= Scale;
323 Imm = CurDAG->getTargetConstant(Val: MulImm, DL: SDLoc(N), VT: MVT::i32);
324 return true;
325 }
326
327 return false;
328 }
329
330 template <unsigned BaseReg, unsigned Max>
331 bool ImmToReg(SDValue N, SDValue &Imm) {
332 if (auto *CI = dyn_cast<ConstantSDNode>(Val&: N)) {
333 uint64_t C = CI->getZExtValue();
334
335 if (C > Max)
336 return false;
337
338 Imm = CurDAG->getRegister(Reg: BaseReg + C, VT: MVT::Other);
339 return true;
340 }
341 return false;
342 }
343
344 /// Form sequences of consecutive 64/128-bit registers for use in NEON
345 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
346 /// between 1 and 4 elements. If it contains a single element that is returned
347 /// unchanged; otherwise a REG_SEQUENCE value is returned.
348 SDValue createDTuple(ArrayRef<SDValue> Vecs);
349 SDValue createQTuple(ArrayRef<SDValue> Vecs);
350 // Form a sequence of SVE registers for instructions using list of vectors,
351 // e.g. structured loads and stores (ldN, stN).
352 SDValue createZTuple(ArrayRef<SDValue> Vecs);
353
354 // Similar to above, except the register must start at a multiple of the
355 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
356 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
357
358 /// Generic helper for the createDTuple/createQTuple
359 /// functions. Those should almost always be called instead.
360 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
361 const unsigned SubRegs[]);
362
363 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
364
365 bool tryIndexedLoad(SDNode *N);
366
367 void SelectPtrauthAuth(SDNode *N);
368 void SelectPtrauthResign(SDNode *N);
369
370 bool trySelectStackSlotTagP(SDNode *N);
371 void SelectTagP(SDNode *N);
372
373 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
374 unsigned SubRegIdx);
375 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
376 unsigned SubRegIdx);
377 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
378 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
380 unsigned Opc_rr, unsigned Opc_ri,
381 bool IsIntr = false);
382 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
383 unsigned Scale, unsigned Opc_ri,
384 unsigned Opc_rr);
385 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
386 bool IsZmMulti, unsigned Opcode,
387 bool HasPred = false);
388 void SelectPExtPair(SDNode *N, unsigned Opc);
389 void SelectWhilePair(SDNode *N, unsigned Opc);
390 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
391 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
394 bool IsTupleInput, unsigned Opc);
395 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
396
397 template <unsigned MaxIdx, unsigned Scale>
398 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
399 unsigned Op);
400 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
401 unsigned Op, unsigned MaxIdx, unsigned Scale,
402 unsigned BaseReg = 0);
403 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
404 /// SVE Reg+Imm addressing mode.
405 template <int64_t Min, int64_t Max>
406 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
407 SDValue &OffImm);
408 /// SVE Reg+Reg address mode.
409 template <unsigned Scale>
410 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
411 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
412 }
413
414 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
415 unsigned Opc, uint32_t MaxImm);
416
417 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
418
419 template <unsigned MaxIdx, unsigned Scale>
420 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
421 return SelectSMETileSlice(N, MaxSize: MaxIdx, Vector, Offset, Scale);
422 }
423
424 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
425 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
429 unsigned Opc_rr, unsigned Opc_ri);
430 std::tuple<unsigned, SDValue, SDValue>
431 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
432 const SDValue &OldBase, const SDValue &OldOffset,
433 unsigned Scale);
434
435 bool tryBitfieldExtractOp(SDNode *N);
436 bool tryBitfieldExtractOpFromSExt(SDNode *N);
437 bool tryBitfieldInsertOp(SDNode *N);
438 bool tryBitfieldInsertInZeroOp(SDNode *N);
439 bool tryShiftAmountMod(SDNode *N);
440
441 bool tryReadRegister(SDNode *N);
442 bool tryWriteRegister(SDNode *N);
443
444 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
445 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
446
447 bool trySelectXAR(SDNode *N);
448
449// Include the pieces autogenerated from the target description.
450#include "AArch64GenDAGISel.inc"
451
452private:
453 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
454 SDValue &Shift);
455 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
456 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
457 SDValue &OffImm) {
458 return SelectAddrModeIndexedBitWidth(N, IsSignedImm: true, BW: 7, Size, Base, OffImm);
459 }
460 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
461 unsigned Size, SDValue &Base,
462 SDValue &OffImm);
463 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
464 SDValue &OffImm);
465 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
466 SDValue &OffImm);
467 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
468 SDValue &Offset, SDValue &SignExtend,
469 SDValue &DoShift);
470 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
471 SDValue &Offset, SDValue &SignExtend,
472 SDValue &DoShift);
473 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
474 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
475 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
476 SDValue &Offset, SDValue &SignExtend);
477
478 template<unsigned RegWidth>
479 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
480 return SelectCVTFixedPosOperand(N, FixedPos, Width: RegWidth);
481 }
482 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
483
484 template <unsigned RegWidth>
485 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos) {
486 return SelectCVTFixedPointVec(N, FixedPos, Width: RegWidth);
487 }
488 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos, unsigned Width);
489
490 template<unsigned RegWidth>
491 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
492 return SelectCVTFixedPosRecipOperand(N, FixedPos, Width: RegWidth);
493 }
494
495 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
496 unsigned Width);
497
498 bool SelectCMP_SWAP(SDNode *N);
499
500 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
501 bool Negate);
502 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
503 bool Negate);
504 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
505 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
506
507 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
508 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
509 bool AllowSaturation, SDValue &Imm);
510
511 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
512 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
513 SDValue &Offset);
514 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
515 SDValue &Offset, unsigned Scale = 1);
516
517 bool SelectAllActivePredicate(SDValue N);
518 bool SelectAnyPredicate(SDValue N);
519
520 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
521
522 template <bool MatchCBB>
523 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
524};
525
526class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
527public:
528 static char ID;
529 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
530 CodeGenOptLevel OptLevel)
531 : SelectionDAGISelLegacy(
532 ID, std::make_unique<AArch64DAGToDAGISel>(args&: tm, args&: OptLevel)) {}
533};
534} // end anonymous namespace
535
536char AArch64DAGToDAGISelLegacy::ID = 0;
537
538INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
539
540/// isIntImmediate - This method tests to see if the node is a constant
541/// operand. If so Imm will receive the 32-bit value.
542static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
543 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(Val: N)) {
544 Imm = C->getZExtValue();
545 return true;
546 }
547 return false;
548}
549
550// isIntImmediate - This method tests to see if a constant operand.
551// If so Imm will receive the value.
552static bool isIntImmediate(SDValue N, uint64_t &Imm) {
553 return isIntImmediate(N: N.getNode(), Imm);
554}
555
556// isOpcWithIntImmediate - This method tests to see if the node is a specific
557// opcode and that it has a immediate integer right operand.
558// If so Imm will receive the 32 bit value.
559static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
560 uint64_t &Imm) {
561 return N->getOpcode() == Opc &&
562 isIntImmediate(N: N->getOperand(Num: 1).getNode(), Imm);
563}
564
565// isIntImmediateEq - This method tests to see if N is a constant operand that
566// is equivalent to 'ImmExpected'.
567#ifndef NDEBUG
568static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
569 uint64_t Imm;
570 if (!isIntImmediate(N.getNode(), Imm))
571 return false;
572 return Imm == ImmExpected;
573}
574#endif
575
576bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
577 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
578 std::vector<SDValue> &OutOps) {
579 switch(ConstraintID) {
580 default:
581 llvm_unreachable("Unexpected asm memory constraint");
582 case InlineAsm::ConstraintCode::m:
583 case InlineAsm::ConstraintCode::o:
584 case InlineAsm::ConstraintCode::Q:
585 // We need to make sure that this one operand does not end up in XZR, thus
586 // require the address to be in a PointerRegClass register.
587 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
588 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
589 SDLoc dl(Op);
590 SDValue RC = CurDAG->getTargetConstant(Val: TRC->getID(), DL: dl, VT: MVT::i64);
591 SDValue NewOp =
592 SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS,
593 dl, VT: Op.getValueType(),
594 Op1: Op, Op2: RC), 0);
595 OutOps.push_back(x: NewOp);
596 return false;
597 }
598 return true;
599}
600
601/// SelectArithImmed - Select an immediate value that can be represented as
602/// a 12-bit value shifted left by either 0 or 12. If so, return true with
603/// Val set to the 12-bit value and Shift set to the shifter operand.
604bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
605 SDValue &Shift) {
606 // This function is called from the addsub_shifted_imm ComplexPattern,
607 // which lists [imm] as the list of opcode it's interested in, however
608 // we still need to check whether the operand is actually an immediate
609 // here because the ComplexPattern opcode list is only used in
610 // root-level opcode matching.
611 if (!isa<ConstantSDNode>(Val: N.getNode()))
612 return false;
613
614 uint64_t Immed = N.getNode()->getAsZExtVal();
615 unsigned ShiftAmt;
616
617 if (Immed >> 12 == 0) {
618 ShiftAmt = 0;
619 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
620 ShiftAmt = 12;
621 Immed = Immed >> 12;
622 } else
623 return false;
624
625 unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt);
626 SDLoc dl(N);
627 Val = CurDAG->getTargetConstant(Val: Immed, DL: dl, VT: MVT::i32);
628 Shift = CurDAG->getTargetConstant(Val: ShVal, DL: dl, VT: MVT::i32);
629 return true;
630}
631
632/// SelectNegArithImmed - As above, but negates the value before trying to
633/// select it.
634bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
635 SDValue &Shift) {
636 // This function is called from the addsub_shifted_imm ComplexPattern,
637 // which lists [imm] as the list of opcode it's interested in, however
638 // we still need to check whether the operand is actually an immediate
639 // here because the ComplexPattern opcode list is only used in
640 // root-level opcode matching.
641 if (!isa<ConstantSDNode>(Val: N.getNode()))
642 return false;
643
644 // The immediate operand must be a 24-bit zero-extended immediate.
645 uint64_t Immed = N.getNode()->getAsZExtVal();
646
647 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
648 // have the opposite effect on the C flag, so this pattern mustn't match under
649 // those circumstances.
650 if (Immed == 0)
651 return false;
652
653 if (N.getValueType() == MVT::i32)
654 Immed = ~((uint32_t)Immed) + 1;
655 else
656 Immed = ~Immed + 1ULL;
657 if (Immed & 0xFFFFFFFFFF000000ULL)
658 return false;
659
660 Immed &= 0xFFFFFFULL;
661 return SelectArithImmed(N: CurDAG->getConstant(Val: Immed, DL: SDLoc(N), VT: MVT::i32), Val,
662 Shift);
663}
664
665/// getShiftTypeForNode - Translate a shift node to the corresponding
666/// ShiftType value.
667static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
668 switch (N.getOpcode()) {
669 default:
670 return AArch64_AM::InvalidShiftExtend;
671 case ISD::SHL:
672 return AArch64_AM::LSL;
673 case ISD::SRL:
674 return AArch64_AM::LSR;
675 case ISD::SRA:
676 return AArch64_AM::ASR;
677 case ISD::ROTR:
678 return AArch64_AM::ROR;
679 }
680}
681
682static bool isMemOpOrPrefetch(SDNode *N) {
683 return isa<MemSDNode>(Val: *N) || N->getOpcode() == AArch64ISD::PREFETCH;
684}
685
686/// Determine whether it is worth it to fold SHL into the addressing
687/// mode.
688static bool isWorthFoldingSHL(SDValue V) {
689 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
690 // It is worth folding logical shift of up to three places.
691 auto *CSD = dyn_cast<ConstantSDNode>(Val: V.getOperand(i: 1));
692 if (!CSD)
693 return false;
694 unsigned ShiftVal = CSD->getZExtValue();
695 if (ShiftVal > 3)
696 return false;
697
698 // Check if this particular node is reused in any non-memory related
699 // operation. If yes, do not try to fold this node into the address
700 // computation, since the computation will be kept.
701 const SDNode *Node = V.getNode();
702 for (SDNode *UI : Node->users())
703 if (!isMemOpOrPrefetch(N: UI))
704 for (SDNode *UII : UI->users())
705 if (!isMemOpOrPrefetch(N: UII))
706 return false;
707 return true;
708}
709
710/// Determine whether it is worth to fold V into an extended register addressing
711/// mode.
712bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
713 // Trivial if we are optimizing for code size or if there is only
714 // one use of the value.
715 if (CurDAG->shouldOptForSize() || V.hasOneUse())
716 return true;
717
718 // If a subtarget has a slow shift, folding a shift into multiple loads
719 // costs additional micro-ops.
720 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
721 return false;
722
723 // Check whether we're going to emit the address arithmetic anyway because
724 // it's used by a non-address operation.
725 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
726 return true;
727 if (V.getOpcode() == ISD::ADD) {
728 const SDValue LHS = V.getOperand(i: 0);
729 const SDValue RHS = V.getOperand(i: 1);
730 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(V: LHS))
731 return true;
732 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(V: RHS))
733 return true;
734 }
735
736 // It hurts otherwise, since the value will be reused.
737 return false;
738}
739
740/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
741/// to select more shifted register
742bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
743 SDValue &Shift) {
744 EVT VT = N.getValueType();
745 if (VT != MVT::i32 && VT != MVT::i64)
746 return false;
747
748 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
749 return false;
750 SDValue LHS = N.getOperand(i: 0);
751 if (!LHS->hasOneUse())
752 return false;
753
754 unsigned LHSOpcode = LHS->getOpcode();
755 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
756 return false;
757
758 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(Val: LHS.getOperand(i: 1));
759 if (!ShiftAmtNode)
760 return false;
761
762 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
763 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
764 if (!RHSC)
765 return false;
766
767 APInt AndMask = RHSC->getAPIntValue();
768 unsigned LowZBits, MaskLen;
769 if (!AndMask.isShiftedMask(MaskIdx&: LowZBits, MaskLen))
770 return false;
771
772 unsigned BitWidth = N.getValueSizeInBits();
773 SDLoc DL(LHS);
774 uint64_t NewShiftC;
775 unsigned NewShiftOp;
776 if (LHSOpcode == ISD::SHL) {
777 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
778 // BitWidth != LowZBits + MaskLen doesn't match the pattern
779 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
780 return false;
781
782 NewShiftC = LowZBits - ShiftAmtC;
783 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
784 } else {
785 if (LowZBits == 0)
786 return false;
787
788 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
789 NewShiftC = LowZBits + ShiftAmtC;
790 if (NewShiftC >= BitWidth)
791 return false;
792
793 // SRA need all high bits
794 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
795 return false;
796
797 // SRL high bits can be 0 or 1
798 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
799 return false;
800
801 if (LHSOpcode == ISD::SRL)
802 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
803 else
804 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
805 }
806
807 assert(NewShiftC < BitWidth && "Invalid shift amount");
808 SDValue NewShiftAmt = CurDAG->getTargetConstant(Val: NewShiftC, DL, VT);
809 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(Val: BitWidth - 1, DL, VT);
810 Reg = SDValue(CurDAG->getMachineNode(Opcode: NewShiftOp, dl: DL, VT, Op1: LHS->getOperand(Num: 0),
811 Op2: NewShiftAmt, Op3: BitWidthMinus1),
812 0);
813 unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: LowZBits);
814 Shift = CurDAG->getTargetConstant(Val: ShVal, DL, VT: MVT::i32);
815 return true;
816}
817
818/// getExtendTypeForNode - Translate an extend node to the corresponding
819/// ExtendType value.
820static AArch64_AM::ShiftExtendType
821getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
822 if (N.getOpcode() == ISD::SIGN_EXTEND ||
823 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
824 EVT SrcVT;
825 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
826 SrcVT = cast<VTSDNode>(Val: N.getOperand(i: 1))->getVT();
827 else
828 SrcVT = N.getOperand(i: 0).getValueType();
829
830 if (!IsLoadStore && SrcVT == MVT::i8)
831 return AArch64_AM::SXTB;
832 else if (!IsLoadStore && SrcVT == MVT::i16)
833 return AArch64_AM::SXTH;
834 else if (SrcVT == MVT::i32)
835 return AArch64_AM::SXTW;
836 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
837
838 return AArch64_AM::InvalidShiftExtend;
839 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
840 N.getOpcode() == ISD::ANY_EXTEND) {
841 EVT SrcVT = N.getOperand(i: 0).getValueType();
842 if (!IsLoadStore && SrcVT == MVT::i8)
843 return AArch64_AM::UXTB;
844 else if (!IsLoadStore && SrcVT == MVT::i16)
845 return AArch64_AM::UXTH;
846 else if (SrcVT == MVT::i32)
847 return AArch64_AM::UXTW;
848 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
849
850 return AArch64_AM::InvalidShiftExtend;
851 } else if (N.getOpcode() == ISD::AND) {
852 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
853 if (!CSD)
854 return AArch64_AM::InvalidShiftExtend;
855 uint64_t AndMask = CSD->getZExtValue();
856
857 switch (AndMask) {
858 default:
859 return AArch64_AM::InvalidShiftExtend;
860 case 0xFF:
861 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
862 case 0xFFFF:
863 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
864 case 0xFFFFFFFF:
865 return AArch64_AM::UXTW;
866 }
867 }
868
869 return AArch64_AM::InvalidShiftExtend;
870}
871
872/// Determine whether it is worth to fold V into an extended register of an
873/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
874/// instruction, and the shift should be treated as worth folding even if has
875/// multiple uses.
876bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
877 // Trivial if we are optimizing for code size or if there is only
878 // one use of the value.
879 if (CurDAG->shouldOptForSize() || V.hasOneUse())
880 return true;
881
882 // If a subtarget has a fastpath LSL we can fold a logical shift into
883 // the add/sub and save a cycle.
884 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
885 V.getConstantOperandVal(i: 1) <= 4 &&
886 getExtendTypeForNode(N: V.getOperand(i: 0)) == AArch64_AM::InvalidShiftExtend)
887 return true;
888
889 // It hurts otherwise, since the value will be reused.
890 return false;
891}
892
893/// SelectShiftedRegister - Select a "shifted register" operand. If the value
894/// is not shifted, set the Shift operand to default of "LSL 0". The logical
895/// instructions allow the shifted register to be rotated, but the arithmetic
896/// instructions do not. The AllowROR parameter specifies whether ROR is
897/// supported.
898bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
899 SDValue &Reg, SDValue &Shift) {
900 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
901 return true;
902
903 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
904 if (ShType == AArch64_AM::InvalidShiftExtend)
905 return false;
906 if (!AllowROR && ShType == AArch64_AM::ROR)
907 return false;
908
909 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
910 unsigned BitSize = N.getValueSizeInBits();
911 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
912 unsigned ShVal = AArch64_AM::getShifterImm(ST: ShType, Imm: Val);
913
914 Reg = N.getOperand(i: 0);
915 Shift = CurDAG->getTargetConstant(Val: ShVal, DL: SDLoc(N), VT: MVT::i32);
916 return isWorthFoldingALU(V: N, LSL: true);
917 }
918
919 return false;
920}
921
922/// Instructions that accept extend modifiers like UXTW expect the register
923/// being extended to be a GPR32, but the incoming DAG might be acting on a
924/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
925/// this is the case.
926static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
927 if (N.getValueType() == MVT::i32)
928 return N;
929
930 SDLoc dl(N);
931 return CurDAG->getTargetExtractSubreg(SRIdx: AArch64::sub_32, DL: dl, VT: MVT::i32, Operand: N);
932}
933
934// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
935template<signed Low, signed High, signed Scale>
936bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
937 if (!isa<ConstantSDNode>(Val: N))
938 return false;
939
940 int64_t MulImm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
941 if ((MulImm % std::abs(x: Scale)) == 0) {
942 int64_t RDVLImm = MulImm / Scale;
943 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
944 Imm = CurDAG->getSignedTargetConstant(Val: RDVLImm, DL: SDLoc(N), VT: MVT::i32);
945 return true;
946 }
947 }
948
949 return false;
950}
951
952// Returns a suitable RDSVL multiplier from a left shift.
953template <signed Low, signed High>
954bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
955 if (!isa<ConstantSDNode>(Val: N))
956 return false;
957
958 int64_t MulImm = 1LL << cast<ConstantSDNode>(Val&: N)->getSExtValue();
959 if (MulImm >= Low && MulImm <= High) {
960 Imm = CurDAG->getSignedTargetConstant(Val: MulImm, DL: SDLoc(N), VT: MVT::i32);
961 return true;
962 }
963
964 return false;
965}
966
967/// SelectArithExtendedRegister - Select a "extended register" operand. This
968/// operand folds in an extend followed by an optional left shift.
969bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
970 SDValue &Shift) {
971 unsigned ShiftVal = 0;
972 AArch64_AM::ShiftExtendType Ext;
973
974 if (N.getOpcode() == ISD::SHL) {
975 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
976 if (!CSD)
977 return false;
978 ShiftVal = CSD->getZExtValue();
979 if (ShiftVal > 4)
980 return false;
981
982 Ext = getExtendTypeForNode(N: N.getOperand(i: 0));
983 if (Ext == AArch64_AM::InvalidShiftExtend)
984 return false;
985
986 Reg = N.getOperand(i: 0).getOperand(i: 0);
987 } else {
988 Ext = getExtendTypeForNode(N);
989 if (Ext == AArch64_AM::InvalidShiftExtend)
990 return false;
991
992 Reg = N.getOperand(i: 0);
993
994 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
995 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
996 auto isDef32 = [](SDValue N) {
997 unsigned Opc = N.getOpcode();
998 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
999 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
1000 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
1001 Opc != ISD::FREEZE;
1002 };
1003 if (Ext == AArch64_AM::UXTW && Reg->getValueType(ResNo: 0).getSizeInBits() == 32 &&
1004 isDef32(Reg))
1005 return false;
1006 }
1007
1008 // AArch64 mandates that the RHS of the operation must use the smallest
1009 // register class that could contain the size being extended from. Thus,
1010 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1011 // there might not be an actual 32-bit value in the program. We can
1012 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1013 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1014 Reg = narrowIfNeeded(CurDAG, N: Reg);
1015 Shift = CurDAG->getTargetConstant(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal), DL: SDLoc(N),
1016 VT: MVT::i32);
1017 return isWorthFoldingALU(V: N);
1018}
1019
1020/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1021/// operand is referred by the instructions have SP operand
1022bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1023 SDValue &Shift) {
1024 unsigned ShiftVal = 0;
1025 AArch64_AM::ShiftExtendType Ext;
1026
1027 if (N.getOpcode() != ISD::SHL)
1028 return false;
1029
1030 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
1031 if (!CSD)
1032 return false;
1033 ShiftVal = CSD->getZExtValue();
1034 if (ShiftVal > 4)
1035 return false;
1036
1037 Ext = AArch64_AM::UXTX;
1038 Reg = N.getOperand(i: 0);
1039 Shift = CurDAG->getTargetConstant(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal), DL: SDLoc(N),
1040 VT: MVT::i32);
1041 return isWorthFoldingALU(V: N);
1042}
1043
1044/// If there's a use of this ADDlow that's not itself a load/store then we'll
1045/// need to create a real ADD instruction from it anyway and there's no point in
1046/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1047/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1048/// leads to duplicated ADRP instructions.
1049static bool isWorthFoldingADDlow(SDValue N) {
1050 for (auto *User : N->users()) {
1051 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1052 User->getOpcode() != ISD::ATOMIC_LOAD &&
1053 User->getOpcode() != ISD::ATOMIC_STORE)
1054 return false;
1055
1056 // ldar and stlr have much more restrictive addressing modes (just a
1057 // register).
1058 if (isStrongerThanMonotonic(AO: cast<MemSDNode>(Val: User)->getSuccessOrdering()))
1059 return false;
1060 }
1061
1062 return true;
1063}
1064
1065/// Check if the immediate offset is valid as a scaled immediate.
1066static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1067 unsigned Size) {
1068 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1069 Offset < (Range << Log2_32(Value: Size)))
1070 return true;
1071 return false;
1072}
1073
1074/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1075/// immediate" address. The "Size" argument is the size in bytes of the memory
1076/// reference, which determines the scale.
1077bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1078 unsigned BW, unsigned Size,
1079 SDValue &Base,
1080 SDValue &OffImm) {
1081 SDLoc dl(N);
1082 const DataLayout &DL = CurDAG->getDataLayout();
1083 const TargetLowering *TLI = getTargetLowering();
1084 if (N.getOpcode() == ISD::FrameIndex) {
1085 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1086 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1087 OffImm = CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i64);
1088 return true;
1089 }
1090
1091 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1092 // selected here doesn't support labels/immediates, only base+offset.
1093 if (CurDAG->isBaseWithConstantOffset(Op: N)) {
1094 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1095 if (IsSignedImm) {
1096 int64_t RHSC = RHS->getSExtValue();
1097 unsigned Scale = Log2_32(Value: Size);
1098 int64_t Range = 0x1LL << (BW - 1);
1099
1100 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1101 RHSC < (Range << Scale)) {
1102 Base = N.getOperand(i: 0);
1103 if (Base.getOpcode() == ISD::FrameIndex) {
1104 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1105 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1106 }
1107 OffImm = CurDAG->getTargetConstant(Val: RHSC >> Scale, DL: dl, VT: MVT::i64);
1108 return true;
1109 }
1110 } else {
1111 // unsigned Immediate
1112 uint64_t RHSC = RHS->getZExtValue();
1113 unsigned Scale = Log2_32(Value: Size);
1114 uint64_t Range = 0x1ULL << BW;
1115
1116 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1117 Base = N.getOperand(i: 0);
1118 if (Base.getOpcode() == ISD::FrameIndex) {
1119 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1120 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1121 }
1122 OffImm = CurDAG->getTargetConstant(Val: RHSC >> Scale, DL: dl, VT: MVT::i64);
1123 return true;
1124 }
1125 }
1126 }
1127 }
1128 // Base only. The address will be materialized into a register before
1129 // the memory is accessed.
1130 // add x0, Xbase, #offset
1131 // stp x1, x2, [x0]
1132 Base = N;
1133 OffImm = CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i64);
1134 return true;
1135}
1136
1137/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1138/// immediate" address. The "Size" argument is the size in bytes of the memory
1139/// reference, which determines the scale.
1140bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1141 SDValue &Base, SDValue &OffImm) {
1142 SDLoc dl(N);
1143 const DataLayout &DL = CurDAG->getDataLayout();
1144 const TargetLowering *TLI = getTargetLowering();
1145 if (N.getOpcode() == ISD::FrameIndex) {
1146 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1147 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1148 OffImm = CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i64);
1149 return true;
1150 }
1151
1152 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1153 GlobalAddressSDNode *GAN =
1154 dyn_cast<GlobalAddressSDNode>(Val: N.getOperand(i: 1).getNode());
1155 Base = N.getOperand(i: 0);
1156 OffImm = N.getOperand(i: 1);
1157 if (!GAN)
1158 return true;
1159
1160 if (GAN->getOffset() % Size == 0 &&
1161 GAN->getGlobal()->getPointerAlignment(DL) >= Size)
1162 return true;
1163 }
1164
1165 if (CurDAG->isBaseWithConstantOffset(Op: N)) {
1166 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1167 int64_t RHSC = (int64_t)RHS->getZExtValue();
1168 unsigned Scale = Log2_32(Value: Size);
1169 if (isValidAsScaledImmediate(Offset: RHSC, Range: 0x1000, Size)) {
1170 Base = N.getOperand(i: 0);
1171 if (Base.getOpcode() == ISD::FrameIndex) {
1172 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1173 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1174 }
1175 OffImm = CurDAG->getTargetConstant(Val: RHSC >> Scale, DL: dl, VT: MVT::i64);
1176 return true;
1177 }
1178 }
1179 }
1180
1181 // Before falling back to our general case, check if the unscaled
1182 // instructions can handle this. If so, that's preferable.
1183 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1184 return false;
1185
1186 // Base only. The address will be materialized into a register before
1187 // the memory is accessed.
1188 // add x0, Xbase, #offset
1189 // ldr x0, [x0]
1190 Base = N;
1191 OffImm = CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i64);
1192 return true;
1193}
1194
1195/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1196/// immediate" address. This should only match when there is an offset that
1197/// is not valid for a scaled immediate addressing mode. The "Size" argument
1198/// is the size in bytes of the memory reference, which is needed here to know
1199/// what is valid for a scaled immediate.
1200bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1201 SDValue &Base,
1202 SDValue &OffImm) {
1203 if (!CurDAG->isBaseWithConstantOffset(Op: N))
1204 return false;
1205 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1))) {
1206 int64_t RHSC = RHS->getSExtValue();
1207 if (RHSC >= -256 && RHSC < 256) {
1208 Base = N.getOperand(i: 0);
1209 if (Base.getOpcode() == ISD::FrameIndex) {
1210 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1211 const TargetLowering *TLI = getTargetLowering();
1212 Base = CurDAG->getTargetFrameIndex(
1213 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1214 }
1215 OffImm = CurDAG->getTargetConstant(Val: RHSC, DL: SDLoc(N), VT: MVT::i64);
1216 return true;
1217 }
1218 }
1219 return false;
1220}
1221
1222static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
1223 SDLoc dl(N);
1224 SDValue ImpDef = SDValue(
1225 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: MVT::i64), 0);
1226 return CurDAG->getTargetInsertSubreg(SRIdx: AArch64::sub_32, DL: dl, VT: MVT::i64, Operand: ImpDef,
1227 Subreg: N);
1228}
1229
1230/// Check if the given SHL node (\p N), can be used to form an
1231/// extended register for an addressing mode.
1232bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1233 bool WantExtend, SDValue &Offset,
1234 SDValue &SignExtend) {
1235 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1236 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
1237 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1238 return false;
1239
1240 SDLoc dl(N);
1241 if (WantExtend) {
1242 AArch64_AM::ShiftExtendType Ext =
1243 getExtendTypeForNode(N: N.getOperand(i: 0), IsLoadStore: true);
1244 if (Ext == AArch64_AM::InvalidShiftExtend)
1245 return false;
1246
1247 Offset = narrowIfNeeded(CurDAG, N: N.getOperand(i: 0).getOperand(i: 0));
1248 SignExtend = CurDAG->getTargetConstant(Val: Ext == AArch64_AM::SXTW, DL: dl,
1249 VT: MVT::i32);
1250 } else {
1251 Offset = N.getOperand(i: 0);
1252 SignExtend = CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i32);
1253 }
1254
1255 unsigned LegalShiftVal = Log2_32(Value: Size);
1256 unsigned ShiftVal = CSD->getZExtValue();
1257
1258 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1259 return false;
1260
1261 return isWorthFoldingAddr(V: N, Size);
1262}
1263
1264bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1265 SDValue &Base, SDValue &Offset,
1266 SDValue &SignExtend,
1267 SDValue &DoShift) {
1268 if (N.getOpcode() != ISD::ADD)
1269 return false;
1270 SDValue LHS = N.getOperand(i: 0);
1271 SDValue RHS = N.getOperand(i: 1);
1272 SDLoc dl(N);
1273
1274 // We don't want to match immediate adds here, because they are better lowered
1275 // to the register-immediate addressing modes.
1276 if (isa<ConstantSDNode>(Val: LHS) || isa<ConstantSDNode>(Val: RHS))
1277 return false;
1278
1279 // Check if this particular node is reused in any non-memory related
1280 // operation. If yes, do not try to fold this node into the address
1281 // computation, since the computation will be kept.
1282 const SDNode *Node = N.getNode();
1283 for (SDNode *UI : Node->users()) {
1284 if (!isMemOpOrPrefetch(N: UI))
1285 return false;
1286 }
1287
1288 // Remember if it is worth folding N when it produces extended register.
1289 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(V: N, Size);
1290
1291 // Try to match a shifted extend on the RHS.
1292 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1293 SelectExtendedSHL(N: RHS, Size, WantExtend: true, Offset, SignExtend)) {
1294 Base = LHS;
1295 DoShift = CurDAG->getTargetConstant(Val: true, DL: dl, VT: MVT::i32);
1296 return true;
1297 }
1298
1299 // Try to match a shifted extend on the LHS.
1300 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1301 SelectExtendedSHL(N: LHS, Size, WantExtend: true, Offset, SignExtend)) {
1302 Base = RHS;
1303 DoShift = CurDAG->getTargetConstant(Val: true, DL: dl, VT: MVT::i32);
1304 return true;
1305 }
1306
1307 // There was no shift, whatever else we find.
1308 DoShift = CurDAG->getTargetConstant(Val: false, DL: dl, VT: MVT::i32);
1309
1310 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
1311 // Try to match an unshifted extend on the LHS.
1312 if (IsExtendedRegisterWorthFolding &&
1313 (Ext = getExtendTypeForNode(N: LHS, IsLoadStore: true)) !=
1314 AArch64_AM::InvalidShiftExtend) {
1315 Base = RHS;
1316 Offset = narrowIfNeeded(CurDAG, N: LHS.getOperand(i: 0));
1317 SignExtend = CurDAG->getTargetConstant(Val: Ext == AArch64_AM::SXTW, DL: dl,
1318 VT: MVT::i32);
1319 if (isWorthFoldingAddr(V: LHS, Size))
1320 return true;
1321 }
1322
1323 // Try to match an unshifted extend on the RHS.
1324 if (IsExtendedRegisterWorthFolding &&
1325 (Ext = getExtendTypeForNode(N: RHS, IsLoadStore: true)) !=
1326 AArch64_AM::InvalidShiftExtend) {
1327 Base = LHS;
1328 Offset = narrowIfNeeded(CurDAG, N: RHS.getOperand(i: 0));
1329 SignExtend = CurDAG->getTargetConstant(Val: Ext == AArch64_AM::SXTW, DL: dl,
1330 VT: MVT::i32);
1331 if (isWorthFoldingAddr(V: RHS, Size))
1332 return true;
1333 }
1334
1335 return false;
1336}
1337
1338// Check if the given immediate is preferred by ADD. If an immediate can be
1339// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1340// encoded by one MOVZ, return true.
1341static bool isPreferredADD(int64_t ImmOff) {
1342 // Constant in [0x0, 0xfff] can be encoded in ADD.
1343 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1344 return true;
1345 // Check if it can be encoded in an "ADD LSL #12".
1346 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1347 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1348 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1349 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1350 return false;
1351}
1352
1353bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1354 SDValue &Base, SDValue &Offset,
1355 SDValue &SignExtend,
1356 SDValue &DoShift) {
1357 if (N.getOpcode() != ISD::ADD)
1358 return false;
1359 SDValue LHS = N.getOperand(i: 0);
1360 SDValue RHS = N.getOperand(i: 1);
1361 SDLoc DL(N);
1362
1363 // Check if this particular node is reused in any non-memory related
1364 // operation. If yes, do not try to fold this node into the address
1365 // computation, since the computation will be kept.
1366 const SDNode *Node = N.getNode();
1367 for (SDNode *UI : Node->users()) {
1368 if (!isMemOpOrPrefetch(N: UI))
1369 return false;
1370 }
1371
1372 // Watch out if RHS is a wide immediate, it can not be selected into
1373 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1374 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1375 // instructions like:
1376 // MOV X0, WideImmediate
1377 // ADD X1, BaseReg, X0
1378 // LDR X2, [X1, 0]
1379 // For such situation, using [BaseReg, XReg] addressing mode can save one
1380 // ADD/SUB:
1381 // MOV X0, WideImmediate
1382 // LDR X2, [BaseReg, X0]
1383 if (isa<ConstantSDNode>(Val: RHS)) {
1384 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1385 // Skip the immediate can be selected by load/store addressing mode.
1386 // Also skip the immediate can be encoded by a single ADD (SUB is also
1387 // checked by using -ImmOff).
1388 if (isValidAsScaledImmediate(Offset: ImmOff, Range: 0x1000, Size) ||
1389 isPreferredADD(ImmOff) || isPreferredADD(ImmOff: -ImmOff))
1390 return false;
1391
1392 SDValue Ops[] = { RHS };
1393 SDNode *MOVI =
1394 CurDAG->getMachineNode(Opcode: AArch64::MOVi64imm, dl: DL, VT: MVT::i64, Ops);
1395 SDValue MOVIV = SDValue(MOVI, 0);
1396 // This ADD of two X register will be selected into [Reg+Reg] mode.
1397 N = CurDAG->getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: LHS, N2: MOVIV);
1398 }
1399
1400 // Remember if it is worth folding N when it produces extended register.
1401 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(V: N, Size);
1402
1403 // Try to match a shifted extend on the RHS.
1404 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1405 SelectExtendedSHL(N: RHS, Size, WantExtend: false, Offset, SignExtend)) {
1406 Base = LHS;
1407 DoShift = CurDAG->getTargetConstant(Val: true, DL, VT: MVT::i32);
1408 return true;
1409 }
1410
1411 // Try to match a shifted extend on the LHS.
1412 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1413 SelectExtendedSHL(N: LHS, Size, WantExtend: false, Offset, SignExtend)) {
1414 Base = RHS;
1415 DoShift = CurDAG->getTargetConstant(Val: true, DL, VT: MVT::i32);
1416 return true;
1417 }
1418
1419 // Match any non-shifted, non-extend, non-immediate add expression.
1420 Base = LHS;
1421 Offset = RHS;
1422 SignExtend = CurDAG->getTargetConstant(Val: false, DL, VT: MVT::i32);
1423 DoShift = CurDAG->getTargetConstant(Val: false, DL, VT: MVT::i32);
1424 // Reg1 + Reg2 is free: no check needed.
1425 return true;
1426}
1427
1428SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1429 static const unsigned RegClassIDs[] = {
1430 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1431 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1432 AArch64::dsub2, AArch64::dsub3};
1433
1434 return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1435}
1436
1437SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1438 static const unsigned RegClassIDs[] = {
1439 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1440 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1441 AArch64::qsub2, AArch64::qsub3};
1442
1443 return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1444}
1445
1446SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1447 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1448 AArch64::ZPR3RegClassID,
1449 AArch64::ZPR4RegClassID};
1450 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1451 AArch64::zsub2, AArch64::zsub3};
1452
1453 return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1454}
1455
1456SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1457 assert(Regs.size() == 2 || Regs.size() == 4);
1458
1459 // The createTuple interface requires 3 RegClassIDs for each possible
1460 // tuple type even though we only have them for ZPR2 and ZPR4.
1461 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1462 AArch64::ZPR4Mul4RegClassID};
1463 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1464 AArch64::zsub2, AArch64::zsub3};
1465 return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1466}
1467
1468SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1469 const unsigned RegClassIDs[],
1470 const unsigned SubRegs[]) {
1471 // There's no special register-class for a vector-list of 1 element: it's just
1472 // a vector.
1473 if (Regs.size() == 1)
1474 return Regs[0];
1475
1476 assert(Regs.size() >= 2 && Regs.size() <= 4);
1477
1478 SDLoc DL(Regs[0]);
1479
1480 SmallVector<SDValue, 4> Ops;
1481
1482 // First operand of REG_SEQUENCE is the desired RegClass.
1483 Ops.push_back(
1484 Elt: CurDAG->getTargetConstant(Val: RegClassIDs[Regs.size() - 2], DL, VT: MVT::i32));
1485
1486 // Then we get pairs of source & subregister-position for the components.
1487 for (unsigned i = 0; i < Regs.size(); ++i) {
1488 Ops.push_back(Elt: Regs[i]);
1489 Ops.push_back(Elt: CurDAG->getTargetConstant(Val: SubRegs[i], DL, VT: MVT::i32));
1490 }
1491
1492 SDNode *N =
1493 CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl: DL, VT: MVT::Untyped, Ops);
1494 return SDValue(N, 0);
1495}
1496
1497void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1498 bool isExt) {
1499 SDLoc dl(N);
1500 EVT VT = N->getValueType(ResNo: 0);
1501
1502 unsigned ExtOff = isExt;
1503
1504 // Form a REG_SEQUENCE to force register allocation.
1505 unsigned Vec0Off = ExtOff + 1;
1506 SmallVector<SDValue, 4> Regs(N->ops().slice(N: Vec0Off, M: NumVecs));
1507 SDValue RegSeq = createQTuple(Regs);
1508
1509 SmallVector<SDValue, 6> Ops;
1510 if (isExt)
1511 Ops.push_back(Elt: N->getOperand(Num: 1));
1512 Ops.push_back(Elt: RegSeq);
1513 Ops.push_back(Elt: N->getOperand(Num: NumVecs + ExtOff + 1));
1514 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT, Ops));
1515}
1516
1517static std::tuple<SDValue, SDValue>
1518extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG) {
1519 SDLoc DL(Disc);
1520 SDValue AddrDisc;
1521 SDValue ConstDisc;
1522
1523 // If this is a blend, remember the constant and address discriminators.
1524 // Otherwise, it's either a constant discriminator, or a non-blended
1525 // address discriminator.
1526 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1527 Disc->getConstantOperandVal(Num: 0) == Intrinsic::ptrauth_blend) {
1528 AddrDisc = Disc->getOperand(Num: 1);
1529 ConstDisc = Disc->getOperand(Num: 2);
1530 } else {
1531 ConstDisc = Disc;
1532 }
1533
1534 // If the constant discriminator (either the blend RHS, or the entire
1535 // discriminator value) isn't a 16-bit constant, bail out, and let the
1536 // discriminator be computed separately.
1537 auto *ConstDiscN = dyn_cast<ConstantSDNode>(Val&: ConstDisc);
1538 if (!ConstDiscN || !isUInt<16>(x: ConstDiscN->getZExtValue()))
1539 return std::make_tuple(args: DAG->getTargetConstant(Val: 0, DL, VT: MVT::i64), args&: Disc);
1540
1541 // If there's no address discriminator, use XZR directly.
1542 if (!AddrDisc)
1543 AddrDisc = DAG->getRegister(Reg: AArch64::XZR, VT: MVT::i64);
1544
1545 return std::make_tuple(
1546 args: DAG->getTargetConstant(Val: ConstDiscN->getZExtValue(), DL, VT: MVT::i64),
1547 args&: AddrDisc);
1548}
1549
1550void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1551 SDLoc DL(N);
1552 // IntrinsicID is operand #0
1553 SDValue Val = N->getOperand(Num: 1);
1554 SDValue AUTKey = N->getOperand(Num: 2);
1555 SDValue AUTDisc = N->getOperand(Num: 3);
1556
1557 unsigned AUTKeyC = cast<ConstantSDNode>(Val&: AUTKey)->getZExtValue();
1558 AUTKey = CurDAG->getTargetConstant(Val: AUTKeyC, DL, VT: MVT::i64);
1559
1560 SDValue AUTAddrDisc, AUTConstDisc;
1561 std::tie(args&: AUTConstDisc, args&: AUTAddrDisc) =
1562 extractPtrauthBlendDiscriminators(Disc: AUTDisc, DAG: CurDAG);
1563
1564 if (!Subtarget->isX16X17Safer()) {
1565 std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1566 // Copy deactivation symbol if present.
1567 if (N->getNumOperands() > 4)
1568 Ops.push_back(x: N->getOperand(Num: 4));
1569
1570 SDNode *AUT =
1571 CurDAG->getMachineNode(Opcode: AArch64::AUTxMxN, dl: DL, VT1: MVT::i64, VT2: MVT::i64, Ops);
1572 ReplaceNode(F: N, T: AUT);
1573 } else {
1574 SDValue X16Copy = CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl: DL,
1575 Reg: AArch64::X16, N: Val, Glue: SDValue());
1576 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(R: 1)};
1577
1578 SDNode *AUT = CurDAG->getMachineNode(Opcode: AArch64::AUTx16x17, dl: DL, VT: MVT::i64, Ops);
1579 ReplaceNode(F: N, T: AUT);
1580 }
1581}
1582
1583void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1584 SDLoc DL(N);
1585 // IntrinsicID is operand #0, if W_CHAIN it is #1
1586 int OffsetBase = N->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 1 : 0;
1587 SDValue Val = N->getOperand(Num: OffsetBase + 1);
1588 SDValue AUTKey = N->getOperand(Num: OffsetBase + 2);
1589 SDValue AUTDisc = N->getOperand(Num: OffsetBase + 3);
1590 SDValue PACKey = N->getOperand(Num: OffsetBase + 4);
1591 SDValue PACDisc = N->getOperand(Num: OffsetBase + 5);
1592 uint32_t IntNum = N->getConstantOperandVal(Num: OffsetBase + 0);
1593 bool HasLoad = IntNum == Intrinsic::ptrauth_resign_load_relative;
1594
1595 unsigned AUTKeyC = cast<ConstantSDNode>(Val&: AUTKey)->getZExtValue();
1596 unsigned PACKeyC = cast<ConstantSDNode>(Val&: PACKey)->getZExtValue();
1597
1598 AUTKey = CurDAG->getTargetConstant(Val: AUTKeyC, DL, VT: MVT::i64);
1599 PACKey = CurDAG->getTargetConstant(Val: PACKeyC, DL, VT: MVT::i64);
1600
1601 SDValue AUTAddrDisc, AUTConstDisc;
1602 std::tie(args&: AUTConstDisc, args&: AUTAddrDisc) =
1603 extractPtrauthBlendDiscriminators(Disc: AUTDisc, DAG: CurDAG);
1604
1605 SDValue PACAddrDisc, PACConstDisc;
1606 std::tie(args&: PACConstDisc, args&: PACAddrDisc) =
1607 extractPtrauthBlendDiscriminators(Disc: PACDisc, DAG: CurDAG);
1608
1609 SDValue X16Copy = CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl: DL,
1610 Reg: AArch64::X16, N: Val, Glue: SDValue());
1611
1612 if (HasLoad) {
1613 SDValue Addend = N->getOperand(Num: OffsetBase + 6);
1614 SDValue IncomingChain = N->getOperand(Num: 0);
1615 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc,
1616 PACKey, PACConstDisc, PACAddrDisc,
1617 Addend, IncomingChain, X16Copy.getValue(R: 1)};
1618
1619 SDNode *AUTRELLOADPAC = CurDAG->getMachineNode(Opcode: AArch64::AUTRELLOADPAC, dl: DL,
1620 VT1: MVT::i64, VT2: MVT::Other, Ops);
1621 ReplaceNode(F: N, T: AUTRELLOADPAC);
1622 } else {
1623 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1624 PACConstDisc, PACAddrDisc, X16Copy.getValue(R: 1)};
1625
1626 SDNode *AUTPAC = CurDAG->getMachineNode(Opcode: AArch64::AUTPAC, dl: DL, VT: MVT::i64, Ops);
1627 ReplaceNode(F: N, T: AUTPAC);
1628 }
1629}
1630
1631bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1632 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1633 if (LD->isUnindexed())
1634 return false;
1635 EVT VT = LD->getMemoryVT();
1636 EVT DstVT = N->getValueType(ResNo: 0);
1637 ISD::MemIndexedMode AM = LD->getAddressingMode();
1638 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1639 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(Val: LD->getOffset());
1640 int OffsetVal = (int)OffsetOp->getZExtValue();
1641
1642 // We're not doing validity checking here. That was done when checking
1643 // if we should mark the load as indexed or not. We're just selecting
1644 // the right instruction.
1645 unsigned Opcode = 0;
1646
1647 ISD::LoadExtType ExtType = LD->getExtensionType();
1648 bool InsertTo64 = false;
1649 if (VT == MVT::i64)
1650 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1651 else if (VT == MVT::i32) {
1652 if (ExtType == ISD::NON_EXTLOAD)
1653 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1654 else if (ExtType == ISD::SEXTLOAD)
1655 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1656 else {
1657 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1658 InsertTo64 = true;
1659 // The result of the load is only i32. It's the subreg_to_reg that makes
1660 // it into an i64.
1661 DstVT = MVT::i32;
1662 }
1663 } else if (VT == MVT::i16) {
1664 if (ExtType == ISD::SEXTLOAD) {
1665 if (DstVT == MVT::i64)
1666 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1667 else
1668 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1669 } else {
1670 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1671 InsertTo64 = DstVT == MVT::i64;
1672 // The result of the load is only i32. It's the subreg_to_reg that makes
1673 // it into an i64.
1674 DstVT = MVT::i32;
1675 }
1676 } else if (VT == MVT::i8) {
1677 if (ExtType == ISD::SEXTLOAD) {
1678 if (DstVT == MVT::i64)
1679 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1680 else
1681 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1682 } else {
1683 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1684 InsertTo64 = DstVT == MVT::i64;
1685 // The result of the load is only i32. It's the subreg_to_reg that makes
1686 // it into an i64.
1687 DstVT = MVT::i32;
1688 }
1689 } else if (VT == MVT::f16) {
1690 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1691 } else if (VT == MVT::bf16) {
1692 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1693 } else if (VT == MVT::f32) {
1694 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1695 } else if (VT == MVT::f64 ||
1696 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1697 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1698 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1699 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1700 } else if (VT.is64BitVector()) {
1701 if (IsPre || OffsetVal != 8)
1702 return false;
1703 switch (VT.getScalarSizeInBits()) {
1704 case 8:
1705 Opcode = AArch64::LD1Onev8b_POST;
1706 break;
1707 case 16:
1708 Opcode = AArch64::LD1Onev4h_POST;
1709 break;
1710 case 32:
1711 Opcode = AArch64::LD1Onev2s_POST;
1712 break;
1713 case 64:
1714 Opcode = AArch64::LD1Onev1d_POST;
1715 break;
1716 default:
1717 llvm_unreachable("Expected vector element to be a power of 2");
1718 }
1719 } else if (VT.is128BitVector()) {
1720 if (IsPre || OffsetVal != 16)
1721 return false;
1722 switch (VT.getScalarSizeInBits()) {
1723 case 8:
1724 Opcode = AArch64::LD1Onev16b_POST;
1725 break;
1726 case 16:
1727 Opcode = AArch64::LD1Onev8h_POST;
1728 break;
1729 case 32:
1730 Opcode = AArch64::LD1Onev4s_POST;
1731 break;
1732 case 64:
1733 Opcode = AArch64::LD1Onev2d_POST;
1734 break;
1735 default:
1736 llvm_unreachable("Expected vector element to be a power of 2");
1737 }
1738 } else
1739 return false;
1740 SDValue Chain = LD->getChain();
1741 SDValue Base = LD->getBasePtr();
1742 SDLoc dl(N);
1743 // LD1 encodes an immediate offset by using XZR as the offset register.
1744 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1745 ? CurDAG->getRegister(Reg: AArch64::XZR, VT: MVT::i64)
1746 : CurDAG->getTargetConstant(Val: OffsetVal, DL: dl, VT: MVT::i64);
1747 SDValue Ops[] = { Base, Offset, Chain };
1748 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, VT1: MVT::i64, VT2: DstVT,
1749 VT3: MVT::Other, Ops);
1750
1751 // Transfer memoperands.
1752 MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
1753 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Res), NewMemRefs: {MemOp});
1754
1755 // Either way, we're replacing the node, so tell the caller that.
1756 SDValue LoadedVal = SDValue(Res, 1);
1757 if (InsertTo64) {
1758 SDValue SubReg = CurDAG->getTargetConstant(Val: AArch64::sub_32, DL: dl, VT: MVT::i32);
1759 LoadedVal =
1760 SDValue(CurDAG->getMachineNode(
1761 Opcode: AArch64::SUBREG_TO_REG, dl, VT: MVT::i64,
1762 Op1: CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i64), Op2: LoadedVal,
1763 Op3: SubReg),
1764 0);
1765 }
1766
1767 ReplaceUses(F: SDValue(N, 0), T: LoadedVal);
1768 ReplaceUses(F: SDValue(N, 1), T: SDValue(Res, 0));
1769 ReplaceUses(F: SDValue(N, 2), T: SDValue(Res, 2));
1770 CurDAG->RemoveDeadNode(N);
1771 return true;
1772}
1773
1774void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1775 unsigned SubRegIdx) {
1776 SDLoc dl(N);
1777 EVT VT = N->getValueType(ResNo: 0);
1778 SDValue Chain = N->getOperand(Num: 0);
1779
1780 SDValue Ops[] = {N->getOperand(Num: 2), // Mem operand;
1781 Chain};
1782
1783 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1784
1785 SDNode *Ld = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
1786 SDValue SuperReg = SDValue(Ld, 0);
1787 for (unsigned i = 0; i < NumVecs; ++i)
1788 ReplaceUses(F: SDValue(N, i),
1789 T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx + i, DL: dl, VT, Operand: SuperReg));
1790
1791 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(Ld, 1));
1792
1793 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1794 // because it's too simple to have needed special treatment during lowering.
1795 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Val: N)) {
1796 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1797 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
1798 }
1799
1800 CurDAG->RemoveDeadNode(N);
1801}
1802
1803void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1804 unsigned Opc, unsigned SubRegIdx) {
1805 SDLoc dl(N);
1806 EVT VT = N->getValueType(ResNo: 0);
1807 SDValue Chain = N->getOperand(Num: 0);
1808
1809 SDValue Ops[] = {N->getOperand(Num: 1), // Mem operand
1810 N->getOperand(Num: 2), // Incremental
1811 Chain};
1812
1813 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1814 MVT::Untyped, MVT::Other};
1815
1816 SDNode *Ld = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
1817
1818 // Update uses of write back register
1819 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(Ld, 0));
1820
1821 // Update uses of vector list
1822 SDValue SuperReg = SDValue(Ld, 1);
1823 if (NumVecs == 1)
1824 ReplaceUses(F: SDValue(N, 0), T: SuperReg);
1825 else
1826 for (unsigned i = 0; i < NumVecs; ++i)
1827 ReplaceUses(F: SDValue(N, i),
1828 T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx + i, DL: dl, VT, Operand: SuperReg));
1829
1830 // Update the chain
1831 ReplaceUses(F: SDValue(N, NumVecs + 1), T: SDValue(Ld, 2));
1832 CurDAG->RemoveDeadNode(N);
1833}
1834
1835/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1836/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1837/// new Base and an SDValue representing the new offset.
1838std::tuple<unsigned, SDValue, SDValue>
1839AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1840 unsigned Opc_ri,
1841 const SDValue &OldBase,
1842 const SDValue &OldOffset,
1843 unsigned Scale) {
1844 SDValue NewBase = OldBase;
1845 SDValue NewOffset = OldOffset;
1846 // Detect a possible Reg+Imm addressing mode.
1847 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1848 Root: N, N: OldBase, Base&: NewBase, OffImm&: NewOffset);
1849
1850 // Detect a possible reg+reg addressing mode, but only if we haven't already
1851 // detected a Reg+Imm one.
1852 const bool IsRegReg =
1853 !IsRegImm && SelectSVERegRegAddrMode(N: OldBase, Scale, Base&: NewBase, Offset&: NewOffset);
1854
1855 // Select the instruction.
1856 return std::make_tuple(args&: IsRegReg ? Opc_rr : Opc_ri, args&: NewBase, args&: NewOffset);
1857}
1858
1859enum class SelectTypeKind {
1860 Int1 = 0,
1861 Int = 1,
1862 FP = 2,
1863 AnyType = 3,
1864};
1865
1866/// This function selects an opcode from a list of opcodes, which is
1867/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1868/// element types, in this order.
1869template <SelectTypeKind Kind>
1870static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1871 // Only match scalable vector VTs
1872 if (!VT.isScalableVector())
1873 return 0;
1874
1875 EVT EltVT = VT.getVectorElementType();
1876 unsigned Key = VT.getVectorMinNumElements();
1877 switch (Kind) {
1878 case SelectTypeKind::AnyType:
1879 break;
1880 case SelectTypeKind::Int:
1881 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1882 EltVT != MVT::i64)
1883 return 0;
1884 break;
1885 case SelectTypeKind::Int1:
1886 if (EltVT != MVT::i1)
1887 return 0;
1888 break;
1889 case SelectTypeKind::FP:
1890 if (EltVT == MVT::bf16)
1891 Key = 16;
1892 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1893 EltVT != MVT::f64)
1894 return 0;
1895 break;
1896 }
1897
1898 unsigned Offset;
1899 switch (Key) {
1900 case 16: // 8-bit or bf16
1901 Offset = 0;
1902 break;
1903 case 8: // 16-bit
1904 Offset = 1;
1905 break;
1906 case 4: // 32-bit
1907 Offset = 2;
1908 break;
1909 case 2: // 64-bit
1910 Offset = 3;
1911 break;
1912 default:
1913 return 0;
1914 }
1915
1916 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1917}
1918
1919// This function is almost identical to SelectWhilePair, but has an
1920// extra check on the range of the immediate operand.
1921// TODO: Merge these two functions together at some point?
1922void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1923 // Immediate can be either 0 or 1.
1924 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 2)))
1925 if (Imm->getZExtValue() > 1)
1926 return;
1927
1928 SDLoc DL(N);
1929 EVT VT = N->getValueType(ResNo: 0);
1930 SDValue Ops[] = {N->getOperand(Num: 1), N->getOperand(Num: 2)};
1931 SDNode *WhilePair = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Ops);
1932 SDValue SuperReg = SDValue(WhilePair, 0);
1933
1934 for (unsigned I = 0; I < 2; ++I)
1935 ReplaceUses(F: SDValue(N, I), T: CurDAG->getTargetExtractSubreg(
1936 SRIdx: AArch64::psub0 + I, DL, VT, Operand: SuperReg));
1937
1938 CurDAG->RemoveDeadNode(N);
1939}
1940
1941void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1942 SDLoc DL(N);
1943 EVT VT = N->getValueType(ResNo: 0);
1944
1945 SDValue Ops[] = {N->getOperand(Num: 1), N->getOperand(Num: 2)};
1946
1947 SDNode *WhilePair = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Ops);
1948 SDValue SuperReg = SDValue(WhilePair, 0);
1949
1950 for (unsigned I = 0; I < 2; ++I)
1951 ReplaceUses(F: SDValue(N, I), T: CurDAG->getTargetExtractSubreg(
1952 SRIdx: AArch64::psub0 + I, DL, VT, Operand: SuperReg));
1953
1954 CurDAG->RemoveDeadNode(N);
1955}
1956
1957void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1958 unsigned Opcode) {
1959 EVT VT = N->getValueType(ResNo: 0);
1960 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 1, M: NumVecs));
1961 SDValue Ops = createZTuple(Regs);
1962 SDLoc DL(N);
1963 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Untyped, Op1: Ops);
1964 SDValue SuperReg = SDValue(Intrinsic, 0);
1965 for (unsigned i = 0; i < NumVecs; ++i)
1966 ReplaceUses(F: SDValue(N, i), T: CurDAG->getTargetExtractSubreg(
1967 SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
1968
1969 CurDAG->RemoveDeadNode(N);
1970}
1971
1972void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1973 unsigned Opcode) {
1974 SDLoc DL(N);
1975 EVT VT = N->getValueType(ResNo: 0);
1976 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1977 Ops.push_back(/*Chain*/ Elt: N->getOperand(Num: 0));
1978
1979 SDNode *Instruction =
1980 CurDAG->getMachineNode(Opcode, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
1981 SDValue SuperReg = SDValue(Instruction, 0);
1982
1983 for (unsigned i = 0; i < NumVecs; ++i)
1984 ReplaceUses(F: SDValue(N, i), T: CurDAG->getTargetExtractSubreg(
1985 SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
1986
1987 // Copy chain
1988 unsigned ChainIdx = NumVecs;
1989 ReplaceUses(F: SDValue(N, ChainIdx), T: SDValue(Instruction, 1));
1990 CurDAG->RemoveDeadNode(N);
1991}
1992
1993void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1994 unsigned NumVecs,
1995 bool IsZmMulti,
1996 unsigned Opcode,
1997 bool HasPred) {
1998 assert(Opcode != 0 && "Unexpected opcode");
1999
2000 SDLoc DL(N);
2001 EVT VT = N->getValueType(ResNo: 0);
2002 SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID
2003 SmallVector<SDValue, 4> Ops;
2004
2005 auto GetMultiVecOperand = [&]() {
2006 SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs);
2007 OpsIter += NumVecs;
2008 return createZMulTuple(Regs);
2009 };
2010
2011 if (HasPred)
2012 Ops.push_back(Elt: *OpsIter++);
2013
2014 Ops.push_back(Elt: GetMultiVecOperand());
2015 if (IsZmMulti)
2016 Ops.push_back(Elt: GetMultiVecOperand());
2017 else
2018 Ops.push_back(Elt: *OpsIter++);
2019
2020 // Append any remaining operands.
2021 Ops.append(in_start: OpsIter, in_end: N->op_end());
2022 SDNode *Intrinsic;
2023 Intrinsic = CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Untyped, Ops);
2024 SDValue SuperReg = SDValue(Intrinsic, 0);
2025 for (unsigned i = 0; i < NumVecs; ++i)
2026 ReplaceUses(F: SDValue(N, i), T: CurDAG->getTargetExtractSubreg(
2027 SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
2028
2029 CurDAG->RemoveDeadNode(N);
2030}
2031
2032void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2033 unsigned Scale, unsigned Opc_ri,
2034 unsigned Opc_rr, bool IsIntr) {
2035 assert(Scale < 5 && "Invalid scaling value.");
2036 SDLoc DL(N);
2037 EVT VT = N->getValueType(ResNo: 0);
2038 SDValue Chain = N->getOperand(Num: 0);
2039
2040 // Optimize addressing mode.
2041 SDValue Base, Offset;
2042 unsigned Opc;
2043 std::tie(args&: Opc, args&: Base, args&: Offset) = findAddrModeSVELoadStore(
2044 N, Opc_rr, Opc_ri, OldBase: N->getOperand(Num: IsIntr ? 3 : 2),
2045 OldOffset: CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i64), Scale);
2046
2047 SDValue Ops[] = {N->getOperand(Num: IsIntr ? 2 : 1), // Predicate
2048 Base, // Memory operand
2049 Offset, Chain};
2050
2051 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2052
2053 SDNode *Load = CurDAG->getMachineNode(Opcode: Opc, dl: DL, ResultTys: ResTys, Ops);
2054 SDValue SuperReg = SDValue(Load, 0);
2055 for (unsigned i = 0; i < NumVecs; ++i)
2056 ReplaceUses(F: SDValue(N, i), T: CurDAG->getTargetExtractSubreg(
2057 SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
2058
2059 // Copy chain
2060 unsigned ChainIdx = NumVecs;
2061 ReplaceUses(F: SDValue(N, ChainIdx), T: SDValue(Load, 1));
2062 CurDAG->RemoveDeadNode(N);
2063}
2064
2065void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2066 unsigned NumVecs,
2067 unsigned Scale,
2068 unsigned Opc_ri,
2069 unsigned Opc_rr) {
2070 assert(Scale < 4 && "Invalid scaling value.");
2071 SDLoc DL(N);
2072 EVT VT = N->getValueType(ResNo: 0);
2073 SDValue Chain = N->getOperand(Num: 0);
2074
2075 SDValue PNg = N->getOperand(Num: 2);
2076 SDValue Base = N->getOperand(Num: 3);
2077 SDValue Offset = CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i64);
2078 unsigned Opc;
2079 std::tie(args&: Opc, args&: Base, args&: Offset) =
2080 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, OldBase: Base, OldOffset: Offset, Scale);
2081
2082 SDValue Ops[] = {PNg, // Predicate-as-counter
2083 Base, // Memory operand
2084 Offset, Chain};
2085
2086 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2087
2088 SDNode *Load = CurDAG->getMachineNode(Opcode: Opc, dl: DL, ResultTys: ResTys, Ops);
2089 SDValue SuperReg = SDValue(Load, 0);
2090 for (unsigned i = 0; i < NumVecs; ++i)
2091 ReplaceUses(F: SDValue(N, i), T: CurDAG->getTargetExtractSubreg(
2092 SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
2093
2094 // Copy chain
2095 unsigned ChainIdx = NumVecs;
2096 ReplaceUses(F: SDValue(N, ChainIdx), T: SDValue(Load, 1));
2097 CurDAG->RemoveDeadNode(N);
2098}
2099
2100void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2101 unsigned Opcode) {
2102 if (N->getValueType(ResNo: 0) != MVT::nxv4f32)
2103 return;
2104 SelectUnaryMultiIntrinsic(N, NumOutVecs: NumVecs, IsTupleInput: true, Opc: Opcode);
2105}
2106
2107void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2108 unsigned NumOutVecs,
2109 unsigned Opc,
2110 uint32_t MaxImm) {
2111 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 4)))
2112 if (Imm->getZExtValue() > MaxImm)
2113 return;
2114
2115 SDValue ZtValue;
2116 if (!ImmToReg<AArch64::ZT0, 0>(N: Node->getOperand(Num: 2), Imm&: ZtValue))
2117 return;
2118
2119 SDValue Chain = Node->getOperand(Num: 0);
2120 SDValue Ops[] = {ZtValue, Node->getOperand(Num: 3), Node->getOperand(Num: 4), Chain};
2121 SDLoc DL(Node);
2122 EVT VT = Node->getValueType(ResNo: 0);
2123
2124 SDNode *Instruction =
2125 CurDAG->getMachineNode(Opcode: Opc, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2126 SDValue SuperReg = SDValue(Instruction, 0);
2127
2128 for (unsigned I = 0; I < NumOutVecs; ++I)
2129 ReplaceUses(F: SDValue(Node, I), T: CurDAG->getTargetExtractSubreg(
2130 SRIdx: AArch64::zsub0 + I, DL, VT, Operand: SuperReg));
2131
2132 // Copy chain
2133 unsigned ChainIdx = NumOutVecs;
2134 ReplaceUses(F: SDValue(Node, ChainIdx), T: SDValue(Instruction, 1));
2135 CurDAG->RemoveDeadNode(N: Node);
2136}
2137
2138void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2139 unsigned NumOutVecs,
2140 unsigned Opc) {
2141 SDValue ZtValue;
2142 if (!ImmToReg<AArch64::ZT0, 0>(N: Node->getOperand(Num: 2), Imm&: ZtValue))
2143 return;
2144
2145 SDValue Chain = Node->getOperand(Num: 0);
2146 SDValue Ops[] = {ZtValue,
2147 createZMulTuple(Regs: {Node->getOperand(Num: 3), Node->getOperand(Num: 4)}),
2148 Chain};
2149
2150 SDLoc DL(Node);
2151 EVT VT = Node->getValueType(ResNo: 0);
2152
2153 SDNode *Instruction =
2154 CurDAG->getMachineNode(Opcode: Opc, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2155 SDValue SuperReg = SDValue(Instruction, 0);
2156
2157 for (unsigned I = 0; I < NumOutVecs; ++I)
2158 ReplaceUses(F: SDValue(Node, I), T: CurDAG->getTargetExtractSubreg(
2159 SRIdx: AArch64::zsub0 + I, DL, VT, Operand: SuperReg));
2160
2161 // Copy chain
2162 unsigned ChainIdx = NumOutVecs;
2163 ReplaceUses(F: SDValue(Node, ChainIdx), T: SDValue(Instruction, 1));
2164 CurDAG->RemoveDeadNode(N: Node);
2165}
2166
2167void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2168 unsigned Op) {
2169 SDLoc DL(N);
2170 EVT VT = N->getValueType(ResNo: 0);
2171
2172 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 1, M: NumVecs));
2173 SDValue Zd = createZMulTuple(Regs);
2174 SDValue Zn = N->getOperand(Num: 1 + NumVecs);
2175 SDValue Zm = N->getOperand(Num: 2 + NumVecs);
2176
2177 SDValue Ops[] = {Zd, Zn, Zm};
2178
2179 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode: Op, dl: DL, VT: MVT::Untyped, Ops);
2180 SDValue SuperReg = SDValue(Intrinsic, 0);
2181 for (unsigned i = 0; i < NumVecs; ++i)
2182 ReplaceUses(F: SDValue(N, i), T: CurDAG->getTargetExtractSubreg(
2183 SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
2184
2185 CurDAG->RemoveDeadNode(N);
2186}
2187
2188bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2189 switch (BaseReg) {
2190 default:
2191 return false;
2192 case AArch64::ZA:
2193 case AArch64::ZAB0:
2194 if (TileNum == 0)
2195 break;
2196 return false;
2197 case AArch64::ZAH0:
2198 if (TileNum <= 1)
2199 break;
2200 return false;
2201 case AArch64::ZAS0:
2202 if (TileNum <= 3)
2203 break;
2204 return false;
2205 case AArch64::ZAD0:
2206 if (TileNum <= 7)
2207 break;
2208 return false;
2209 }
2210
2211 BaseReg += TileNum;
2212 return true;
2213}
2214
2215template <unsigned MaxIdx, unsigned Scale>
2216void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2217 unsigned BaseReg, unsigned Op) {
2218 unsigned TileNum = 0;
2219 if (BaseReg != AArch64::ZA)
2220 TileNum = N->getConstantOperandVal(Num: 2);
2221
2222 if (!SelectSMETile(BaseReg, TileNum))
2223 return;
2224
2225 SDValue SliceBase, Base, Offset;
2226 if (BaseReg == AArch64::ZA)
2227 SliceBase = N->getOperand(Num: 2);
2228 else
2229 SliceBase = N->getOperand(Num: 3);
2230
2231 if (!SelectSMETileSlice(N: SliceBase, MaxSize: MaxIdx, Vector&: Base, Offset, Scale))
2232 return;
2233
2234 SDLoc DL(N);
2235 SDValue SubReg = CurDAG->getRegister(Reg: BaseReg, VT: MVT::Other);
2236 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(Num: 0)};
2237 SDNode *Mov = CurDAG->getMachineNode(Opcode: Op, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2238
2239 EVT VT = N->getValueType(ResNo: 0);
2240 for (unsigned I = 0; I < NumVecs; ++I)
2241 ReplaceUses(F: SDValue(N, I),
2242 T: CurDAG->getTargetExtractSubreg(SRIdx: AArch64::zsub0 + I, DL, VT,
2243 Operand: SDValue(Mov, 0)));
2244 // Copy chain
2245 unsigned ChainIdx = NumVecs;
2246 ReplaceUses(F: SDValue(N, ChainIdx), T: SDValue(Mov, 1));
2247 CurDAG->RemoveDeadNode(N);
2248}
2249
2250void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2251 unsigned Op, unsigned MaxIdx,
2252 unsigned Scale, unsigned BaseReg) {
2253 // Slice can be in different positions
2254 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2255 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2256 SDValue SliceBase = N->getOperand(Num: 2);
2257 if (BaseReg != AArch64::ZA)
2258 SliceBase = N->getOperand(Num: 3);
2259
2260 SDValue Base, Offset;
2261 if (!SelectSMETileSlice(N: SliceBase, MaxSize: MaxIdx, Vector&: Base, Offset, Scale))
2262 return;
2263 // The correct Za tile number is computed in Machine Instruction
2264 // See EmitZAInstr
2265 // DAG cannot select Za tile as an output register with ZReg
2266 SDLoc DL(N);
2267 SmallVector<SDValue, 6> Ops;
2268 if (BaseReg != AArch64::ZA )
2269 Ops.push_back(Elt: N->getOperand(Num: 2));
2270 Ops.push_back(Elt: Base);
2271 Ops.push_back(Elt: Offset);
2272 Ops.push_back(Elt: N->getOperand(Num: 0)); //Chain
2273 SDNode *Mov = CurDAG->getMachineNode(Opcode: Op, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2274
2275 EVT VT = N->getValueType(ResNo: 0);
2276 for (unsigned I = 0; I < NumVecs; ++I)
2277 ReplaceUses(F: SDValue(N, I),
2278 T: CurDAG->getTargetExtractSubreg(SRIdx: AArch64::zsub0 + I, DL, VT,
2279 Operand: SDValue(Mov, 0)));
2280
2281 // Copy chain
2282 unsigned ChainIdx = NumVecs;
2283 ReplaceUses(F: SDValue(N, ChainIdx), T: SDValue(Mov, 1));
2284 CurDAG->RemoveDeadNode(N);
2285}
2286
2287void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2288 unsigned NumOutVecs,
2289 bool IsTupleInput,
2290 unsigned Opc) {
2291 SDLoc DL(N);
2292 EVT VT = N->getValueType(ResNo: 0);
2293 unsigned NumInVecs = N->getNumOperands() - 1;
2294
2295 SmallVector<SDValue, 6> Ops;
2296 if (IsTupleInput) {
2297 assert((NumInVecs == 2 || NumInVecs == 4) &&
2298 "Don't know how to handle multi-register input!");
2299 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 1, M: NumInVecs));
2300 Ops.push_back(Elt: createZMulTuple(Regs));
2301 } else {
2302 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2303 for (unsigned I = 0; I < NumInVecs; I++)
2304 Ops.push_back(Elt: N->getOperand(Num: 1 + I));
2305 }
2306
2307 SDNode *Res = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Ops);
2308 SDValue SuperReg = SDValue(Res, 0);
2309
2310 for (unsigned I = 0; I < NumOutVecs; I++)
2311 ReplaceUses(F: SDValue(N, I), T: CurDAG->getTargetExtractSubreg(
2312 SRIdx: AArch64::zsub0 + I, DL, VT, Operand: SuperReg));
2313 CurDAG->RemoveDeadNode(N);
2314}
2315
2316void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2317 unsigned Opc) {
2318 SDLoc dl(N);
2319 EVT VT = N->getOperand(Num: 2)->getValueType(ResNo: 0);
2320
2321 // Form a REG_SEQUENCE to force register allocation.
2322 bool Is128Bit = VT.getSizeInBits() == 128;
2323 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 2, M: NumVecs));
2324 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2325
2326 SDValue Ops[] = {RegSeq, N->getOperand(Num: NumVecs + 2), N->getOperand(Num: 0)};
2327 SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, VT: N->getValueType(ResNo: 0), Ops);
2328
2329 // Transfer memoperands.
2330 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2331 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2332
2333 ReplaceNode(F: N, T: St);
2334}
2335
2336void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2337 unsigned Scale, unsigned Opc_rr,
2338 unsigned Opc_ri) {
2339 SDLoc dl(N);
2340
2341 // Form a REG_SEQUENCE to force register allocation.
2342 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 2, M: NumVecs));
2343 SDValue RegSeq = createZTuple(Regs);
2344
2345 // Optimize addressing mode.
2346 unsigned Opc;
2347 SDValue Offset, Base;
2348 std::tie(args&: Opc, args&: Base, args&: Offset) = findAddrModeSVELoadStore(
2349 N, Opc_rr, Opc_ri, OldBase: N->getOperand(Num: NumVecs + 3),
2350 OldOffset: CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i64), Scale);
2351
2352 SDValue Ops[] = {RegSeq, N->getOperand(Num: NumVecs + 2), // predicate
2353 Base, // address
2354 Offset, // offset
2355 N->getOperand(Num: 0)}; // chain
2356 SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, VT: N->getValueType(ResNo: 0), Ops);
2357
2358 ReplaceNode(F: N, T: St);
2359}
2360
2361bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2362 SDValue &OffImm) {
2363 SDLoc dl(N);
2364 const DataLayout &DL = CurDAG->getDataLayout();
2365 const TargetLowering *TLI = getTargetLowering();
2366
2367 // Try to match it for the frame address
2368 if (auto FINode = dyn_cast<FrameIndexSDNode>(Val&: N)) {
2369 int FI = FINode->getIndex();
2370 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
2371 OffImm = CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i64);
2372 return true;
2373 }
2374
2375 return false;
2376}
2377
2378void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2379 unsigned Opc) {
2380 SDLoc dl(N);
2381 EVT VT = N->getOperand(Num: 2)->getValueType(ResNo: 0);
2382 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2383 MVT::Other}; // Type for the Chain
2384
2385 // Form a REG_SEQUENCE to force register allocation.
2386 bool Is128Bit = VT.getSizeInBits() == 128;
2387 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 1, M: NumVecs));
2388 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2389
2390 SDValue Ops[] = {RegSeq,
2391 N->getOperand(Num: NumVecs + 1), // base register
2392 N->getOperand(Num: NumVecs + 2), // Incremental
2393 N->getOperand(Num: 0)}; // Chain
2394 SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2395
2396 ReplaceNode(F: N, T: St);
2397}
2398
2399namespace {
2400/// WidenVector - Given a value in the V64 register class, produce the
2401/// equivalent value in the V128 register class.
2402class WidenVector {
2403 SelectionDAG &DAG;
2404
2405public:
2406 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2407
2408 SDValue operator()(SDValue V64Reg) {
2409 EVT VT = V64Reg.getValueType();
2410 unsigned NarrowSize = VT.getVectorNumElements();
2411 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2412 MVT WideTy = MVT::getVectorVT(VT: EltTy, NumElements: 2 * NarrowSize);
2413 SDLoc DL(V64Reg);
2414
2415 SDValue Undef =
2416 SDValue(DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: WideTy), 0);
2417 return DAG.getTargetInsertSubreg(SRIdx: AArch64::dsub, DL, VT: WideTy, Operand: Undef, Subreg: V64Reg);
2418 }
2419};
2420} // namespace
2421
2422/// NarrowVector - Given a value in the V128 register class, produce the
2423/// equivalent value in the V64 register class.
2424static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
2425 EVT VT = V128Reg.getValueType();
2426 unsigned WideSize = VT.getVectorNumElements();
2427 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2428 MVT NarrowTy = MVT::getVectorVT(VT: EltTy, NumElements: WideSize / 2);
2429
2430 return DAG.getTargetExtractSubreg(SRIdx: AArch64::dsub, DL: SDLoc(V128Reg), VT: NarrowTy,
2431 Operand: V128Reg);
2432}
2433
2434void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2435 unsigned Opc) {
2436 SDLoc dl(N);
2437 EVT VT = N->getValueType(ResNo: 0);
2438 bool Narrow = VT.getSizeInBits() == 64;
2439
2440 // Form a REG_SEQUENCE to force register allocation.
2441 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 2, M: NumVecs));
2442
2443 if (Narrow)
2444 transform(Range&: Regs, d_first: Regs.begin(),
2445 F: WidenVector(*CurDAG));
2446
2447 SDValue RegSeq = createQTuple(Regs);
2448
2449 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2450
2451 unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + 2);
2452
2453 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(Val: LaneNo, DL: dl, VT: MVT::i64),
2454 N->getOperand(Num: NumVecs + 3), N->getOperand(Num: 0)};
2455 SDNode *Ld = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2456 SDValue SuperReg = SDValue(Ld, 0);
2457
2458 EVT WideVT = RegSeq.getOperand(i: 1)->getValueType(ResNo: 0);
2459 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2460 AArch64::qsub2, AArch64::qsub3 };
2461 for (unsigned i = 0; i < NumVecs; ++i) {
2462 SDValue NV = CurDAG->getTargetExtractSubreg(SRIdx: QSubs[i], DL: dl, VT: WideVT, Operand: SuperReg);
2463 if (Narrow)
2464 NV = NarrowVector(V128Reg: NV, DAG&: *CurDAG);
2465 ReplaceUses(F: SDValue(N, i), T: NV);
2466 }
2467
2468 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(Ld, 1));
2469 CurDAG->RemoveDeadNode(N);
2470}
2471
2472void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2473 unsigned Opc) {
2474 SDLoc dl(N);
2475 EVT VT = N->getValueType(ResNo: 0);
2476 bool Narrow = VT.getSizeInBits() == 64;
2477
2478 // Form a REG_SEQUENCE to force register allocation.
2479 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 1, M: NumVecs));
2480
2481 if (Narrow)
2482 transform(Range&: Regs, d_first: Regs.begin(),
2483 F: WidenVector(*CurDAG));
2484
2485 SDValue RegSeq = createQTuple(Regs);
2486
2487 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2488 RegSeq->getValueType(ResNo: 0), MVT::Other};
2489
2490 unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + 1);
2491
2492 SDValue Ops[] = {RegSeq,
2493 CurDAG->getTargetConstant(Val: LaneNo, DL: dl,
2494 VT: MVT::i64), // Lane Number
2495 N->getOperand(Num: NumVecs + 2), // Base register
2496 N->getOperand(Num: NumVecs + 3), // Incremental
2497 N->getOperand(Num: 0)};
2498 SDNode *Ld = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2499
2500 // Update uses of the write back register
2501 ReplaceUses(F: SDValue(N, NumVecs), T: SDValue(Ld, 0));
2502
2503 // Update uses of the vector list
2504 SDValue SuperReg = SDValue(Ld, 1);
2505 if (NumVecs == 1) {
2506 ReplaceUses(F: SDValue(N, 0),
2507 T: Narrow ? NarrowVector(V128Reg: SuperReg, DAG&: *CurDAG) : SuperReg);
2508 } else {
2509 EVT WideVT = RegSeq.getOperand(i: 1)->getValueType(ResNo: 0);
2510 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2511 AArch64::qsub2, AArch64::qsub3 };
2512 for (unsigned i = 0; i < NumVecs; ++i) {
2513 SDValue NV = CurDAG->getTargetExtractSubreg(SRIdx: QSubs[i], DL: dl, VT: WideVT,
2514 Operand: SuperReg);
2515 if (Narrow)
2516 NV = NarrowVector(V128Reg: NV, DAG&: *CurDAG);
2517 ReplaceUses(F: SDValue(N, i), T: NV);
2518 }
2519 }
2520
2521 // Update the Chain
2522 ReplaceUses(F: SDValue(N, NumVecs + 1), T: SDValue(Ld, 2));
2523 CurDAG->RemoveDeadNode(N);
2524}
2525
2526void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2527 unsigned Opc) {
2528 SDLoc dl(N);
2529 EVT VT = N->getOperand(Num: 2)->getValueType(ResNo: 0);
2530 bool Narrow = VT.getSizeInBits() == 64;
2531
2532 // Form a REG_SEQUENCE to force register allocation.
2533 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 2, M: NumVecs));
2534
2535 if (Narrow)
2536 transform(Range&: Regs, d_first: Regs.begin(),
2537 F: WidenVector(*CurDAG));
2538
2539 SDValue RegSeq = createQTuple(Regs);
2540
2541 unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + 2);
2542
2543 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(Val: LaneNo, DL: dl, VT: MVT::i64),
2544 N->getOperand(Num: NumVecs + 3), N->getOperand(Num: 0)};
2545 SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::Other, Ops);
2546
2547 // Transfer memoperands.
2548 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2549 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2550
2551 ReplaceNode(F: N, T: St);
2552}
2553
2554void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2555 unsigned Opc) {
2556 SDLoc dl(N);
2557 EVT VT = N->getOperand(Num: 2)->getValueType(ResNo: 0);
2558 bool Narrow = VT.getSizeInBits() == 64;
2559
2560 // Form a REG_SEQUENCE to force register allocation.
2561 SmallVector<SDValue, 4> Regs(N->ops().slice(N: 1, M: NumVecs));
2562
2563 if (Narrow)
2564 transform(Range&: Regs, d_first: Regs.begin(),
2565 F: WidenVector(*CurDAG));
2566
2567 SDValue RegSeq = createQTuple(Regs);
2568
2569 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2570 MVT::Other};
2571
2572 unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + 1);
2573
2574 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(Val: LaneNo, DL: dl, VT: MVT::i64),
2575 N->getOperand(Num: NumVecs + 2), // Base Register
2576 N->getOperand(Num: NumVecs + 3), // Incremental
2577 N->getOperand(Num: 0)};
2578 SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2579
2580 // Transfer memoperands.
2581 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2582 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2583
2584 ReplaceNode(F: N, T: St);
2585}
2586
2587static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
2588 unsigned &Opc, SDValue &Opd0,
2589 unsigned &LSB, unsigned &MSB,
2590 unsigned NumberOfIgnoredLowBits,
2591 bool BiggerPattern) {
2592 assert(N->getOpcode() == ISD::AND &&
2593 "N must be a AND operation to call this function");
2594
2595 EVT VT = N->getValueType(ResNo: 0);
2596
2597 // Here we can test the type of VT and return false when the type does not
2598 // match, but since it is done prior to that call in the current context
2599 // we turned that into an assert to avoid redundant code.
2600 assert((VT == MVT::i32 || VT == MVT::i64) &&
2601 "Type checking must have been done before calling this function");
2602
2603 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2604 // changed the AND node to a 32-bit mask operation. We'll have to
2605 // undo that as part of the transform here if we want to catch all
2606 // the opportunities.
2607 // Currently the NumberOfIgnoredLowBits argument helps to recover
2608 // from these situations when matching bigger pattern (bitfield insert).
2609
2610 // For unsigned extracts, check for a shift right and mask
2611 uint64_t AndImm = 0;
2612 if (!isOpcWithIntImmediate(N, Opc: ISD::AND, Imm&: AndImm))
2613 return false;
2614
2615 const SDNode *Op0 = N->getOperand(Num: 0).getNode();
2616
2617 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2618 // simplified. Try to undo that
2619 AndImm |= maskTrailingOnes<uint64_t>(N: NumberOfIgnoredLowBits);
2620
2621 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2622 if (AndImm & (AndImm + 1))
2623 return false;
2624
2625 bool ClampMSB = false;
2626 uint64_t SrlImm = 0;
2627 // Handle the SRL + ANY_EXTEND case.
2628 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2629 isOpcWithIntImmediate(N: Op0->getOperand(Num: 0).getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
2630 // Extend the incoming operand of the SRL to 64-bit.
2631 Opd0 = Widen(CurDAG, N: Op0->getOperand(Num: 0).getOperand(i: 0));
2632 // Make sure to clamp the MSB so that we preserve the semantics of the
2633 // original operations.
2634 ClampMSB = true;
2635 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2636 isOpcWithIntImmediate(N: Op0->getOperand(Num: 0).getNode(), Opc: ISD::SRL,
2637 Imm&: SrlImm)) {
2638 // If the shift result was truncated, we can still combine them.
2639 Opd0 = Op0->getOperand(Num: 0).getOperand(i: 0);
2640
2641 // Use the type of SRL node.
2642 VT = Opd0->getValueType(ResNo: 0);
2643 } else if (isOpcWithIntImmediate(N: Op0, Opc: ISD::SRL, Imm&: SrlImm)) {
2644 Opd0 = Op0->getOperand(Num: 0);
2645 ClampMSB = (VT == MVT::i32);
2646 } else if (BiggerPattern) {
2647 // Let's pretend a 0 shift right has been performed.
2648 // The resulting code will be at least as good as the original one
2649 // plus it may expose more opportunities for bitfield insert pattern.
2650 // FIXME: Currently we limit this to the bigger pattern, because
2651 // some optimizations expect AND and not UBFM.
2652 Opd0 = N->getOperand(Num: 0);
2653 } else
2654 return false;
2655
2656 // Bail out on large immediates. This happens when no proper
2657 // combining/constant folding was performed.
2658 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2659 LLVM_DEBUG(
2660 (dbgs() << N
2661 << ": Found large shift immediate, this should not happen\n"));
2662 return false;
2663 }
2664
2665 LSB = SrlImm;
2666 MSB = SrlImm +
2667 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(Value: AndImm)
2668 : llvm::countr_one<uint64_t>(Value: AndImm)) -
2669 1;
2670 if (ClampMSB)
2671 // Since we're moving the extend before the right shift operation, we need
2672 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2673 // the zeros which would get shifted in with the original right shift
2674 // operation.
2675 MSB = MSB > 31 ? 31 : MSB;
2676
2677 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2678 return true;
2679}
2680
2681static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2682 SDValue &Opd0, unsigned &Immr,
2683 unsigned &Imms) {
2684 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2685
2686 EVT VT = N->getValueType(ResNo: 0);
2687 unsigned BitWidth = VT.getSizeInBits();
2688 assert((VT == MVT::i32 || VT == MVT::i64) &&
2689 "Type checking must have been done before calling this function");
2690
2691 SDValue Op = N->getOperand(Num: 0);
2692 if (Op->getOpcode() == ISD::TRUNCATE) {
2693 Op = Op->getOperand(Num: 0);
2694 VT = Op->getValueType(ResNo: 0);
2695 BitWidth = VT.getSizeInBits();
2696 }
2697
2698 uint64_t ShiftImm;
2699 if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRL, Imm&: ShiftImm) &&
2700 !isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRA, Imm&: ShiftImm))
2701 return false;
2702
2703 unsigned Width = cast<VTSDNode>(Val: N->getOperand(Num: 1))->getVT().getSizeInBits();
2704 if (ShiftImm + Width > BitWidth)
2705 return false;
2706
2707 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2708 Opd0 = Op.getOperand(i: 0);
2709 Immr = ShiftImm;
2710 Imms = ShiftImm + Width - 1;
2711 return true;
2712}
2713
2714static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2715 SDValue &Opd0, unsigned &LSB,
2716 unsigned &MSB) {
2717 // We are looking for the following pattern which basically extracts several
2718 // continuous bits from the source value and places it from the LSB of the
2719 // destination value, all other bits of the destination value or set to zero:
2720 //
2721 // Value2 = AND Value, MaskImm
2722 // SRL Value2, ShiftImm
2723 //
2724 // with MaskImm >> ShiftImm to search for the bit width.
2725 //
2726 // This gets selected into a single UBFM:
2727 //
2728 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2729 //
2730
2731 if (N->getOpcode() != ISD::SRL)
2732 return false;
2733
2734 uint64_t AndMask = 0;
2735 if (!isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::AND, Imm&: AndMask))
2736 return false;
2737
2738 Opd0 = N->getOperand(Num: 0).getOperand(i: 0);
2739
2740 uint64_t SrlImm = 0;
2741 if (!isIntImmediate(N: N->getOperand(Num: 1), Imm&: SrlImm))
2742 return false;
2743
2744 // Check whether we really have several bits extract here.
2745 if (!isMask_64(Value: AndMask >> SrlImm))
2746 return false;
2747
2748 Opc = N->getValueType(ResNo: 0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2749 LSB = SrlImm;
2750 MSB = llvm::Log2_64(Value: AndMask);
2751 return true;
2752}
2753
2754static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2755 unsigned &Immr, unsigned &Imms,
2756 bool BiggerPattern) {
2757 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2758 "N must be a SHR/SRA operation to call this function");
2759
2760 EVT VT = N->getValueType(ResNo: 0);
2761
2762 // Here we can test the type of VT and return false when the type does not
2763 // match, but since it is done prior to that call in the current context
2764 // we turned that into an assert to avoid redundant code.
2765 assert((VT == MVT::i32 || VT == MVT::i64) &&
2766 "Type checking must have been done before calling this function");
2767
2768 // Check for AND + SRL doing several bits extract.
2769 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, LSB&: Immr, MSB&: Imms))
2770 return true;
2771
2772 // We're looking for a shift of a shift.
2773 uint64_t ShlImm = 0;
2774 uint64_t TruncBits = 0;
2775 if (isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
2776 Opd0 = N->getOperand(Num: 0).getOperand(i: 0);
2777 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2778 N->getOperand(Num: 0).getNode()->getOpcode() == ISD::TRUNCATE) {
2779 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2780 // be considered as setting high 32 bits as zero. Our strategy here is to
2781 // always generate 64bit UBFM. This consistency will help the CSE pass
2782 // later find more redundancy.
2783 Opd0 = N->getOperand(Num: 0).getOperand(i: 0);
2784 TruncBits = Opd0->getValueType(ResNo: 0).getSizeInBits() - VT.getSizeInBits();
2785 VT = Opd0.getValueType();
2786 assert(VT == MVT::i64 && "the promoted type should be i64");
2787 } else if (BiggerPattern) {
2788 // Let's pretend a 0 shift left has been performed.
2789 // FIXME: Currently we limit this to the bigger pattern case,
2790 // because some optimizations expect AND and not UBFM
2791 Opd0 = N->getOperand(Num: 0);
2792 } else
2793 return false;
2794
2795 // Missing combines/constant folding may have left us with strange
2796 // constants.
2797 if (ShlImm >= VT.getSizeInBits()) {
2798 LLVM_DEBUG(
2799 (dbgs() << N
2800 << ": Found large shift immediate, this should not happen\n"));
2801 return false;
2802 }
2803
2804 uint64_t SrlImm = 0;
2805 if (!isIntImmediate(N: N->getOperand(Num: 1), Imm&: SrlImm))
2806 return false;
2807
2808 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2809 "bad amount in shift node!");
2810 int immr = SrlImm - ShlImm;
2811 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2812 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2813 // SRA requires a signed extraction
2814 if (VT == MVT::i32)
2815 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2816 else
2817 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2818 return true;
2819}
2820
2821bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2822 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2823
2824 EVT VT = N->getValueType(ResNo: 0);
2825 EVT NarrowVT = N->getOperand(Num: 0)->getValueType(ResNo: 0);
2826 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2827 return false;
2828
2829 uint64_t ShiftImm;
2830 SDValue Op = N->getOperand(Num: 0);
2831 if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRA, Imm&: ShiftImm))
2832 return false;
2833
2834 SDLoc dl(N);
2835 // Extend the incoming operand of the shift to 64-bits.
2836 SDValue Opd0 = Widen(CurDAG, N: Op.getOperand(i: 0));
2837 unsigned Immr = ShiftImm;
2838 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2839 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Val: Immr, DL: dl, VT),
2840 CurDAG->getTargetConstant(Val: Imms, DL: dl, VT)};
2841 CurDAG->SelectNodeTo(N, MachineOpc: AArch64::SBFMXri, VT, Ops);
2842 return true;
2843}
2844
2845static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2846 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2847 unsigned NumberOfIgnoredLowBits = 0,
2848 bool BiggerPattern = false) {
2849 if (N->getValueType(ResNo: 0) != MVT::i32 && N->getValueType(ResNo: 0) != MVT::i64)
2850 return false;
2851
2852 switch (N->getOpcode()) {
2853 default:
2854 if (!N->isMachineOpcode())
2855 return false;
2856 break;
2857 case ISD::AND:
2858 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB&: Immr, MSB&: Imms,
2859 NumberOfIgnoredLowBits, BiggerPattern);
2860 case ISD::SRL:
2861 case ISD::SRA:
2862 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2863
2864 case ISD::SIGN_EXTEND_INREG:
2865 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2866 }
2867
2868 unsigned NOpc = N->getMachineOpcode();
2869 switch (NOpc) {
2870 default:
2871 return false;
2872 case AArch64::SBFMWri:
2873 case AArch64::UBFMWri:
2874 case AArch64::SBFMXri:
2875 case AArch64::UBFMXri:
2876 Opc = NOpc;
2877 Opd0 = N->getOperand(Num: 0);
2878 Immr = N->getConstantOperandVal(Num: 1);
2879 Imms = N->getConstantOperandVal(Num: 2);
2880 return true;
2881 }
2882 // Unreachable
2883 return false;
2884}
2885
2886bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2887 unsigned Opc, Immr, Imms;
2888 SDValue Opd0;
2889 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2890 return false;
2891
2892 EVT VT = N->getValueType(ResNo: 0);
2893 SDLoc dl(N);
2894
2895 // If the bit extract operation is 64bit but the original type is 32bit, we
2896 // need to add one EXTRACT_SUBREG.
2897 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2898 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Val: Immr, DL: dl, VT: MVT::i64),
2899 CurDAG->getTargetConstant(Val: Imms, DL: dl, VT: MVT::i64)};
2900
2901 SDNode *BFM = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i64, Ops: Ops64);
2902 SDValue Inner = CurDAG->getTargetExtractSubreg(SRIdx: AArch64::sub_32, DL: dl,
2903 VT: MVT::i32, Operand: SDValue(BFM, 0));
2904 ReplaceNode(F: N, T: Inner.getNode());
2905 return true;
2906 }
2907
2908 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Val: Immr, DL: dl, VT),
2909 CurDAG->getTargetConstant(Val: Imms, DL: dl, VT)};
2910 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
2911 return true;
2912}
2913
2914/// Does DstMask form a complementary pair with the mask provided by
2915/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2916/// this asks whether DstMask zeroes precisely those bits that will be set by
2917/// the other half.
2918static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2919 unsigned NumberOfIgnoredHighBits, EVT VT) {
2920 assert((VT == MVT::i32 || VT == MVT::i64) &&
2921 "i32 or i64 mask type expected!");
2922 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2923
2924 // Enable implicitTrunc as we're intentionally ignoring high bits.
2925 APInt SignificantDstMask =
2926 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2927 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(width: BitWidth);
2928
2929 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2930 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2931}
2932
2933// Look for bits that will be useful for later uses.
2934// A bit is consider useless as soon as it is dropped and never used
2935// before it as been dropped.
2936// E.g., looking for useful bit of x
2937// 1. y = x & 0x7
2938// 2. z = y >> 2
2939// After #1, x useful bits are 0x7, then the useful bits of x, live through
2940// y.
2941// After #2, the useful bits of x are 0x4.
2942// However, if x is used on an unpredictable instruction, then all its bits
2943// are useful.
2944// E.g.
2945// 1. y = x & 0x7
2946// 2. z = y >> 2
2947// 3. str x, [@x]
2948static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2949
2950static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
2951 unsigned Depth) {
2952 uint64_t Imm =
2953 cast<const ConstantSDNode>(Val: Op.getOperand(i: 1).getNode())->getZExtValue();
2954 Imm = AArch64_AM::decodeLogicalImmediate(val: Imm, regSize: UsefulBits.getBitWidth());
2955 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2956 getUsefulBits(Op, UsefulBits, Depth: Depth + 1);
2957}
2958
2959static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
2960 uint64_t Imm, uint64_t MSB,
2961 unsigned Depth) {
2962 // inherit the bitwidth value
2963 APInt OpUsefulBits(UsefulBits);
2964 OpUsefulBits = 1;
2965
2966 if (MSB >= Imm) {
2967 OpUsefulBits <<= MSB - Imm + 1;
2968 --OpUsefulBits;
2969 // The interesting part will be in the lower part of the result
2970 getUsefulBits(Op, UsefulBits&: OpUsefulBits, Depth: Depth + 1);
2971 // The interesting part was starting at Imm in the argument
2972 OpUsefulBits <<= Imm;
2973 } else {
2974 OpUsefulBits <<= MSB + 1;
2975 --OpUsefulBits;
2976 // The interesting part will be shifted in the result
2977 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2978 getUsefulBits(Op, UsefulBits&: OpUsefulBits, Depth: Depth + 1);
2979 // The interesting part was at zero in the argument
2980 OpUsefulBits.lshrInPlace(ShiftAmt: OpUsefulBits.getBitWidth() - Imm);
2981 }
2982
2983 UsefulBits &= OpUsefulBits;
2984}
2985
2986static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2987 unsigned Depth) {
2988 uint64_t Imm =
2989 cast<const ConstantSDNode>(Val: Op.getOperand(i: 1).getNode())->getZExtValue();
2990 uint64_t MSB =
2991 cast<const ConstantSDNode>(Val: Op.getOperand(i: 2).getNode())->getZExtValue();
2992
2993 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2994}
2995
2996static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
2997 unsigned Depth) {
2998 uint64_t ShiftTypeAndValue =
2999 cast<const ConstantSDNode>(Val: Op.getOperand(i: 2).getNode())->getZExtValue();
3000 APInt Mask(UsefulBits);
3001 Mask.clearAllBits();
3002 Mask.flipAllBits();
3003
3004 if (AArch64_AM::getShiftType(Imm: ShiftTypeAndValue) == AArch64_AM::LSL) {
3005 // Shift Left
3006 uint64_t ShiftAmt = AArch64_AM::getShiftValue(Imm: ShiftTypeAndValue);
3007 Mask <<= ShiftAmt;
3008 getUsefulBits(Op, UsefulBits&: Mask, Depth: Depth + 1);
3009 Mask.lshrInPlace(ShiftAmt);
3010 } else if (AArch64_AM::getShiftType(Imm: ShiftTypeAndValue) == AArch64_AM::LSR) {
3011 // Shift Right
3012 // We do not handle AArch64_AM::ASR, because the sign will change the
3013 // number of useful bits
3014 uint64_t ShiftAmt = AArch64_AM::getShiftValue(Imm: ShiftTypeAndValue);
3015 Mask.lshrInPlace(ShiftAmt);
3016 getUsefulBits(Op, UsefulBits&: Mask, Depth: Depth + 1);
3017 Mask <<= ShiftAmt;
3018 } else
3019 return;
3020
3021 UsefulBits &= Mask;
3022}
3023
3024static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3025 unsigned Depth) {
3026 uint64_t Imm =
3027 cast<const ConstantSDNode>(Val: Op.getOperand(i: 2).getNode())->getZExtValue();
3028 uint64_t MSB =
3029 cast<const ConstantSDNode>(Val: Op.getOperand(i: 3).getNode())->getZExtValue();
3030
3031 APInt OpUsefulBits(UsefulBits);
3032 OpUsefulBits = 1;
3033
3034 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3035 ResultUsefulBits.flipAllBits();
3036 APInt Mask(UsefulBits.getBitWidth(), 0);
3037
3038 getUsefulBits(Op, UsefulBits&: ResultUsefulBits, Depth: Depth + 1);
3039
3040 if (MSB >= Imm) {
3041 // The instruction is a BFXIL.
3042 uint64_t Width = MSB - Imm + 1;
3043 uint64_t LSB = Imm;
3044
3045 OpUsefulBits <<= Width;
3046 --OpUsefulBits;
3047
3048 if (Op.getOperand(i: 1) == Orig) {
3049 // Copy the low bits from the result to bits starting from LSB.
3050 Mask = ResultUsefulBits & OpUsefulBits;
3051 Mask <<= LSB;
3052 }
3053
3054 if (Op.getOperand(i: 0) == Orig)
3055 // Bits starting from LSB in the input contribute to the result.
3056 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3057 } else {
3058 // The instruction is a BFI.
3059 uint64_t Width = MSB + 1;
3060 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3061
3062 OpUsefulBits <<= Width;
3063 --OpUsefulBits;
3064 OpUsefulBits <<= LSB;
3065
3066 if (Op.getOperand(i: 1) == Orig) {
3067 // Copy the bits from the result to the zero bits.
3068 Mask = ResultUsefulBits & OpUsefulBits;
3069 Mask.lshrInPlace(ShiftAmt: LSB);
3070 }
3071
3072 if (Op.getOperand(i: 0) == Orig)
3073 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3074 }
3075
3076 UsefulBits &= Mask;
3077}
3078
3079static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3080 SDValue Orig, unsigned Depth) {
3081
3082 // Users of this node should have already been instruction selected
3083 // FIXME: Can we turn that into an assert?
3084 if (!UserNode->isMachineOpcode())
3085 return;
3086
3087 switch (UserNode->getMachineOpcode()) {
3088 default:
3089 return;
3090 case AArch64::ANDSWri:
3091 case AArch64::ANDSXri:
3092 case AArch64::ANDWri:
3093 case AArch64::ANDXri:
3094 // We increment Depth only when we call the getUsefulBits
3095 return getUsefulBitsFromAndWithImmediate(Op: SDValue(UserNode, 0), UsefulBits,
3096 Depth);
3097 case AArch64::UBFMWri:
3098 case AArch64::UBFMXri:
3099 return getUsefulBitsFromUBFM(Op: SDValue(UserNode, 0), UsefulBits, Depth);
3100
3101 case AArch64::ORRWrs:
3102 case AArch64::ORRXrs:
3103 if (UserNode->getOperand(Num: 0) != Orig && UserNode->getOperand(Num: 1) == Orig)
3104 getUsefulBitsFromOrWithShiftedReg(Op: SDValue(UserNode, 0), UsefulBits,
3105 Depth);
3106 return;
3107 case AArch64::BFMWri:
3108 case AArch64::BFMXri:
3109 return getUsefulBitsFromBFM(Op: SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3110
3111 case AArch64::STRBBui:
3112 case AArch64::STURBBi:
3113 if (UserNode->getOperand(Num: 0) != Orig)
3114 return;
3115 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3116 return;
3117
3118 case AArch64::STRHHui:
3119 case AArch64::STURHHi:
3120 if (UserNode->getOperand(Num: 0) != Orig)
3121 return;
3122 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3123 return;
3124 }
3125}
3126
3127static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3128 if (Depth >= SelectionDAG::MaxRecursionDepth)
3129 return;
3130 // Initialize UsefulBits
3131 if (!Depth) {
3132 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3133 // At the beginning, assume every produced bits is useful
3134 UsefulBits = APInt(Bitwidth, 0);
3135 UsefulBits.flipAllBits();
3136 }
3137 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3138
3139 for (SDNode *Node : Op.getNode()->users()) {
3140 // A use cannot produce useful bits
3141 APInt UsefulBitsForUse = APInt(UsefulBits);
3142 getUsefulBitsForUse(UserNode: Node, UsefulBits&: UsefulBitsForUse, Orig: Op, Depth);
3143 UsersUsefulBits |= UsefulBitsForUse;
3144 }
3145 // UsefulBits contains the produced bits that are meaningful for the
3146 // current definition, thus a user cannot make a bit meaningful at
3147 // this point
3148 UsefulBits &= UsersUsefulBits;
3149}
3150
3151/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3152/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3153/// 0, return Op unchanged.
3154static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3155 if (ShlAmount == 0)
3156 return Op;
3157
3158 EVT VT = Op.getValueType();
3159 SDLoc dl(Op);
3160 unsigned BitWidth = VT.getSizeInBits();
3161 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3162
3163 SDNode *ShiftNode;
3164 if (ShlAmount > 0) {
3165 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3166 ShiftNode = CurDAG->getMachineNode(
3167 Opcode: UBFMOpc, dl, VT, Op1: Op,
3168 Op2: CurDAG->getTargetConstant(Val: BitWidth - ShlAmount, DL: dl, VT),
3169 Op3: CurDAG->getTargetConstant(Val: BitWidth - 1 - ShlAmount, DL: dl, VT));
3170 } else {
3171 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3172 assert(ShlAmount < 0 && "expected right shift");
3173 int ShrAmount = -ShlAmount;
3174 ShiftNode = CurDAG->getMachineNode(
3175 Opcode: UBFMOpc, dl, VT, Op1: Op, Op2: CurDAG->getTargetConstant(Val: ShrAmount, DL: dl, VT),
3176 Op3: CurDAG->getTargetConstant(Val: BitWidth - 1, DL: dl, VT));
3177 }
3178
3179 return SDValue(ShiftNode, 0);
3180}
3181
3182// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3183static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3184 bool BiggerPattern,
3185 const uint64_t NonZeroBits,
3186 SDValue &Src, int &DstLSB,
3187 int &Width);
3188
3189// For bit-field-positioning pattern "shl VAL, N)".
3190static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3191 bool BiggerPattern,
3192 const uint64_t NonZeroBits,
3193 SDValue &Src, int &DstLSB,
3194 int &Width);
3195
3196/// Does this tree qualify as an attempt to move a bitfield into position,
3197/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3198static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
3199 bool BiggerPattern, SDValue &Src,
3200 int &DstLSB, int &Width) {
3201 EVT VT = Op.getValueType();
3202 unsigned BitWidth = VT.getSizeInBits();
3203 (void)BitWidth;
3204 assert(BitWidth == 32 || BitWidth == 64);
3205
3206 KnownBits Known = CurDAG->computeKnownBits(Op);
3207
3208 // Non-zero in the sense that they're not provably zero, which is the key
3209 // point if we want to use this value
3210 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3211 if (!isShiftedMask_64(Value: NonZeroBits))
3212 return false;
3213
3214 switch (Op.getOpcode()) {
3215 default:
3216 break;
3217 case ISD::AND:
3218 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3219 NonZeroBits, Src, DstLSB, Width);
3220 case ISD::SHL:
3221 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3222 NonZeroBits, Src, DstLSB, Width);
3223 }
3224
3225 return false;
3226}
3227
3228static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3229 bool BiggerPattern,
3230 const uint64_t NonZeroBits,
3231 SDValue &Src, int &DstLSB,
3232 int &Width) {
3233 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3234
3235 EVT VT = Op.getValueType();
3236 assert((VT == MVT::i32 || VT == MVT::i64) &&
3237 "Caller guarantees VT is one of i32 or i64");
3238 (void)VT;
3239
3240 uint64_t AndImm;
3241 if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::AND, Imm&: AndImm))
3242 return false;
3243
3244 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3245 // 1) (AndImm & (1 << POS) == 0)
3246 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3247 //
3248 // 1) and 2) don't agree so something must be wrong (e.g., in
3249 // 'SelectionDAG::computeKnownBits')
3250 assert((~AndImm & NonZeroBits) == 0 &&
3251 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3252
3253 SDValue AndOp0 = Op.getOperand(i: 0);
3254
3255 uint64_t ShlImm;
3256 SDValue ShlOp0;
3257 if (isOpcWithIntImmediate(N: AndOp0.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
3258 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3259 ShlOp0 = AndOp0.getOperand(i: 0);
3260 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3261 isOpcWithIntImmediate(N: AndOp0.getOperand(i: 0).getNode(), Opc: ISD::SHL,
3262 Imm&: ShlImm)) {
3263 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3264
3265 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3266 SDValue ShlVal = AndOp0.getOperand(i: 0);
3267
3268 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3269 // expect VT to be MVT::i32.
3270 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3271
3272 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3273 ShlOp0 = Widen(CurDAG, N: ShlVal.getOperand(i: 0));
3274 } else
3275 return false;
3276
3277 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3278 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3279 // AndOp0+AND.
3280 if (!BiggerPattern && !AndOp0.hasOneUse())
3281 return false;
3282
3283 DstLSB = llvm::countr_zero(Val: NonZeroBits);
3284 Width = llvm::countr_one(Value: NonZeroBits >> DstLSB);
3285
3286 // Bail out on large Width. This happens when no proper combining / constant
3287 // folding was performed.
3288 if (Width >= (int)VT.getSizeInBits()) {
3289 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3290 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3291 // "val".
3292 // If VT is i32, what Width >= 32 means:
3293 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3294 // demands at least 'Width' bits (after dag-combiner). This together with
3295 // `any_extend` Op (undefined higher bits) indicates missed combination
3296 // when lowering the 'and' IR instruction to an machine IR instruction.
3297 LLVM_DEBUG(
3298 dbgs()
3299 << "Found large Width in bit-field-positioning -- this indicates no "
3300 "proper combining / constant folding was performed\n");
3301 return false;
3302 }
3303
3304 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3305 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3306 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3307 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3308 // which case it is not profitable to insert an extra shift.
3309 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3310 return false;
3311
3312 Src = getLeftShift(CurDAG, Op: ShlOp0, ShlAmount: ShlImm - DstLSB);
3313 return true;
3314}
3315
3316// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3317// UBFIZ.
3318static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op,
3319 SDValue &Src, int &DstLSB,
3320 int &Width) {
3321 // Caller should have verified that N is a left shift with constant shift
3322 // amount; asserts that.
3323 assert(Op.getOpcode() == ISD::SHL &&
3324 "Op.getNode() should be a SHL node to call this function");
3325 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3326 "Op.getNode() should shift ShlImm to call this function");
3327
3328 uint64_t AndImm = 0;
3329 SDValue Op0 = Op.getOperand(i: 0);
3330 if (!isOpcWithIntImmediate(N: Op0.getNode(), Opc: ISD::AND, Imm&: AndImm))
3331 return false;
3332
3333 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3334 if (isMask_64(Value: ShiftedAndImm)) {
3335 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3336 // should end with Mask, and could be prefixed with random bits if those
3337 // bits are shifted out.
3338 //
3339 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3340 // the AND result corresponding to those bits are shifted out, so it's fine
3341 // to not extract them.
3342 Width = llvm::countr_one(Value: ShiftedAndImm);
3343 DstLSB = ShlImm;
3344 Src = Op0.getOperand(i: 0);
3345 return true;
3346 }
3347 return false;
3348}
3349
3350static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3351 bool BiggerPattern,
3352 const uint64_t NonZeroBits,
3353 SDValue &Src, int &DstLSB,
3354 int &Width) {
3355 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3356
3357 EVT VT = Op.getValueType();
3358 assert((VT == MVT::i32 || VT == MVT::i64) &&
3359 "Caller guarantees that type is i32 or i64");
3360 (void)VT;
3361
3362 uint64_t ShlImm;
3363 if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SHL, Imm&: ShlImm))
3364 return false;
3365
3366 if (!BiggerPattern && !Op.hasOneUse())
3367 return false;
3368
3369 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3370 return true;
3371
3372 DstLSB = llvm::countr_zero(Val: NonZeroBits);
3373 Width = llvm::countr_one(Value: NonZeroBits >> DstLSB);
3374
3375 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3376 return false;
3377
3378 Src = getLeftShift(CurDAG, Op: Op.getOperand(i: 0), ShlAmount: ShlImm - DstLSB);
3379 return true;
3380}
3381
3382static bool isShiftedMask(uint64_t Mask, EVT VT) {
3383 assert(VT == MVT::i32 || VT == MVT::i64);
3384 if (VT == MVT::i32)
3385 return isShiftedMask_32(Value: Mask);
3386 return isShiftedMask_64(Value: Mask);
3387}
3388
3389// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3390// inserted only sets known zero bits.
3391static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
3392 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3393
3394 EVT VT = N->getValueType(ResNo: 0);
3395 if (VT != MVT::i32 && VT != MVT::i64)
3396 return false;
3397
3398 unsigned BitWidth = VT.getSizeInBits();
3399
3400 uint64_t OrImm;
3401 if (!isOpcWithIntImmediate(N, Opc: ISD::OR, Imm&: OrImm))
3402 return false;
3403
3404 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3405 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3406 // performance neutral.
3407 if (AArch64_AM::isLogicalImmediate(imm: OrImm, regSize: BitWidth))
3408 return false;
3409
3410 uint64_t MaskImm;
3411 SDValue And = N->getOperand(Num: 0);
3412 // Must be a single use AND with an immediate operand.
3413 if (!And.hasOneUse() ||
3414 !isOpcWithIntImmediate(N: And.getNode(), Opc: ISD::AND, Imm&: MaskImm))
3415 return false;
3416
3417 // Compute the Known Zero for the AND as this allows us to catch more general
3418 // cases than just looking for AND with imm.
3419 KnownBits Known = CurDAG->computeKnownBits(Op: And);
3420
3421 // Non-zero in the sense that they're not provably zero, which is the key
3422 // point if we want to use this value.
3423 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3424
3425 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3426 if (!isShiftedMask(Mask: Known.Zero.getZExtValue(), VT))
3427 return false;
3428
3429 // The bits being inserted must only set those bits that are known to be zero.
3430 if ((OrImm & NotKnownZero) != 0) {
3431 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3432 // currently handle this case.
3433 return false;
3434 }
3435
3436 // BFI/BFXIL dst, src, #lsb, #width.
3437 int LSB = llvm::countr_one(Value: NotKnownZero);
3438 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3439
3440 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3441 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3442 unsigned ImmS = Width - 1;
3443
3444 // If we're creating a BFI instruction avoid cases where we need more
3445 // instructions to materialize the BFI constant as compared to the original
3446 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3447 // should be no worse in this case.
3448 bool IsBFI = LSB != 0;
3449 uint64_t BFIImm = OrImm >> LSB;
3450 if (IsBFI && !AArch64_AM::isLogicalImmediate(imm: BFIImm, regSize: BitWidth)) {
3451 // We have a BFI instruction and we know the constant can't be materialized
3452 // with a ORR-immediate with the zero register.
3453 unsigned OrChunks = 0, BFIChunks = 0;
3454 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3455 if (((OrImm >> Shift) & 0xFFFF) != 0)
3456 ++OrChunks;
3457 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3458 ++BFIChunks;
3459 }
3460 if (BFIChunks > OrChunks)
3461 return false;
3462 }
3463
3464 // Materialize the constant to be inserted.
3465 SDLoc DL(N);
3466 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3467 SDNode *MOVI = CurDAG->getMachineNode(
3468 Opcode: MOVIOpc, dl: DL, VT, Op1: CurDAG->getTargetConstant(Val: BFIImm, DL, VT));
3469
3470 // Create the BFI/BFXIL instruction.
3471 SDValue Ops[] = {And.getOperand(i: 0), SDValue(MOVI, 0),
3472 CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3473 CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3474 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3475 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3476 return true;
3477}
3478
3479static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG,
3480 SDValue &ShiftedOperand,
3481 uint64_t &EncodedShiftImm) {
3482 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3483 if (!Dst.hasOneUse())
3484 return false;
3485
3486 EVT VT = Dst.getValueType();
3487 assert((VT == MVT::i32 || VT == MVT::i64) &&
3488 "Caller should guarantee that VT is one of i32 or i64");
3489 const unsigned SizeInBits = VT.getSizeInBits();
3490
3491 SDLoc DL(Dst.getNode());
3492 uint64_t AndImm, ShlImm;
3493 if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::AND, Imm&: AndImm) &&
3494 isShiftedMask_64(Value: AndImm)) {
3495 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3496 SDValue DstOp0 = Dst.getOperand(i: 0);
3497 if (!DstOp0.hasOneUse())
3498 return false;
3499
3500 // An example to illustrate the transformation
3501 // From:
3502 // lsr x8, x1, #1
3503 // and x8, x8, #0x3f80
3504 // bfxil x8, x1, #0, #7
3505 // To:
3506 // and x8, x23, #0x7f
3507 // ubfx x9, x23, #8, #7
3508 // orr x23, x8, x9, lsl #7
3509 //
3510 // The number of instructions remains the same, but ORR is faster than BFXIL
3511 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3512 // the dependency chain is improved after the transformation.
3513 uint64_t SrlImm;
3514 if (isOpcWithIntImmediate(N: DstOp0.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3515 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(Val: AndImm);
3516 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3517 unsigned MaskWidth =
3518 llvm::countr_one(Value: AndImm >> NumTrailingZeroInShiftedMask);
3519 unsigned UBFMOpc =
3520 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3521 SDNode *UBFMNode = CurDAG->getMachineNode(
3522 Opcode: UBFMOpc, dl: DL, VT, Op1: DstOp0.getOperand(i: 0),
3523 Op2: CurDAG->getTargetConstant(Val: SrlImm + NumTrailingZeroInShiftedMask, DL,
3524 VT),
3525 Op3: CurDAG->getTargetConstant(
3526 Val: SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3527 ShiftedOperand = SDValue(UBFMNode, 0);
3528 EncodedShiftImm = AArch64_AM::getShifterImm(
3529 ST: AArch64_AM::LSL, Imm: NumTrailingZeroInShiftedMask);
3530 return true;
3531 }
3532 }
3533 return false;
3534 }
3535
3536 if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
3537 ShiftedOperand = Dst.getOperand(i: 0);
3538 EncodedShiftImm = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm);
3539 return true;
3540 }
3541
3542 uint64_t SrlImm;
3543 if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3544 ShiftedOperand = Dst.getOperand(i: 0);
3545 EncodedShiftImm = AArch64_AM::getShifterImm(ST: AArch64_AM::LSR, Imm: SrlImm);
3546 return true;
3547 }
3548 return false;
3549}
3550
3551// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3552// the operands and select it to AArch64::ORR with shifted registers if
3553// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3554static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3555 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3556 const bool BiggerPattern) {
3557 EVT VT = N->getValueType(ResNo: 0);
3558 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3559 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3560 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3561 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3562 assert((VT == MVT::i32 || VT == MVT::i64) &&
3563 "Expect result type to be i32 or i64 since N is combinable to BFM");
3564 SDLoc DL(N);
3565
3566 // Bail out if BFM simplifies away one node in BFM Dst.
3567 if (OrOpd1 != Dst)
3568 return false;
3569
3570 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3571 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3572 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3573 if (BiggerPattern) {
3574 uint64_t SrcAndImm;
3575 if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::AND, Imm&: SrcAndImm) &&
3576 isMask_64(Value: SrcAndImm) && OrOpd0.getOperand(i: 0) == Src) {
3577 // OrOpd0 = AND Src, #Mask
3578 // So BFM simplifies away one AND node from Src and doesn't simplify away
3579 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3580 // one node (from Rd), ORR is better since it has higher throughput and
3581 // smaller latency than BFM on many AArch64 processors (and for the rest
3582 // ORR is at least as good as BFM).
3583 SDValue ShiftedOperand;
3584 uint64_t EncodedShiftImm;
3585 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3586 EncodedShiftImm)) {
3587 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3588 CurDAG->getTargetConstant(Val: EncodedShiftImm, DL, VT)};
3589 CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3590 return true;
3591 }
3592 }
3593 return false;
3594 }
3595
3596 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3597
3598 uint64_t ShlImm;
3599 if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
3600 if (OrOpd0.getOperand(i: 0) == Src && OrOpd0.hasOneUse()) {
3601 SDValue Ops[] = {
3602 Dst, Src,
3603 CurDAG->getTargetConstant(
3604 Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm), DL, VT)};
3605 CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3606 return true;
3607 }
3608
3609 // Select the following pattern to left-shifted operand rather than BFI.
3610 // %val1 = op ..
3611 // %val2 = shl %val1, #imm
3612 // %res = or %val1, %val2
3613 //
3614 // If N is selected to be BFI, we know that
3615 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3616 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3617 //
3618 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3619 if (OrOpd0.getOperand(i: 0) == OrOpd1) {
3620 SDValue Ops[] = {
3621 OrOpd1, OrOpd1,
3622 CurDAG->getTargetConstant(
3623 Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm), DL, VT)};
3624 CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3625 return true;
3626 }
3627 }
3628
3629 uint64_t SrlImm;
3630 if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3631 // Select the following pattern to right-shifted operand rather than BFXIL.
3632 // %val1 = op ..
3633 // %val2 = lshr %val1, #imm
3634 // %res = or %val1, %val2
3635 //
3636 // If N is selected to be BFXIL, we know that
3637 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3638 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3639 //
3640 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3641 if (OrOpd0.getOperand(i: 0) == OrOpd1) {
3642 SDValue Ops[] = {
3643 OrOpd1, OrOpd1,
3644 CurDAG->getTargetConstant(
3645 Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSR, Imm: SrlImm), DL, VT)};
3646 CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3647 return true;
3648 }
3649 }
3650
3651 return false;
3652}
3653
3654static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3655 SelectionDAG *CurDAG) {
3656 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3657
3658 EVT VT = N->getValueType(ResNo: 0);
3659 if (VT != MVT::i32 && VT != MVT::i64)
3660 return false;
3661
3662 unsigned BitWidth = VT.getSizeInBits();
3663
3664 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3665 // have the expected shape. Try to undo that.
3666
3667 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3668 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3669
3670 // Given a OR operation, check if we have the following pattern
3671 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3672 // isBitfieldExtractOp)
3673 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3674 // countTrailingZeros(mask2) == imm2 - imm + 1
3675 // f = d | c
3676 // if yes, replace the OR instruction with:
3677 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3678
3679 // OR is commutative, check all combinations of operand order and values of
3680 // BiggerPattern, i.e.
3681 // Opd0, Opd1, BiggerPattern=false
3682 // Opd1, Opd0, BiggerPattern=false
3683 // Opd0, Opd1, BiggerPattern=true
3684 // Opd1, Opd0, BiggerPattern=true
3685 // Several of these combinations may match, so check with BiggerPattern=false
3686 // first since that will produce better results by matching more instructions
3687 // and/or inserting fewer extra instructions.
3688 for (int I = 0; I < 4; ++I) {
3689
3690 SDValue Dst, Src;
3691 unsigned ImmR, ImmS;
3692 bool BiggerPattern = I / 2;
3693 SDValue OrOpd0Val = N->getOperand(Num: I % 2);
3694 SDNode *OrOpd0 = OrOpd0Val.getNode();
3695 SDValue OrOpd1Val = N->getOperand(Num: (I + 1) % 2);
3696 SDNode *OrOpd1 = OrOpd1Val.getNode();
3697
3698 unsigned BFXOpc;
3699 int DstLSB, Width;
3700 if (isBitfieldExtractOp(CurDAG, N: OrOpd0, Opc&: BFXOpc, Opd0&: Src, Immr&: ImmR, Imms&: ImmS,
3701 NumberOfIgnoredLowBits, BiggerPattern)) {
3702 // Check that the returned opcode is compatible with the pattern,
3703 // i.e., same type and zero extended (U and not S)
3704 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3705 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3706 continue;
3707
3708 // Compute the width of the bitfield insertion
3709 DstLSB = 0;
3710 Width = ImmS - ImmR + 1;
3711 // FIXME: This constraint is to catch bitfield insertion we may
3712 // want to widen the pattern if we want to grab general bitfield
3713 // move case
3714 if (Width <= 0)
3715 continue;
3716
3717 // If the mask on the insertee is correct, we have a BFXIL operation. We
3718 // can share the ImmR and ImmS values from the already-computed UBFM.
3719 } else if (isBitfieldPositioningOp(CurDAG, Op: OrOpd0Val,
3720 BiggerPattern,
3721 Src, DstLSB, Width)) {
3722 ImmR = (BitWidth - DstLSB) % BitWidth;
3723 ImmS = Width - 1;
3724 } else
3725 continue;
3726
3727 // Check the second part of the pattern
3728 EVT VT = OrOpd1Val.getValueType();
3729 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3730
3731 // Compute the Known Zero for the candidate of the first operand.
3732 // This allows to catch more general case than just looking for
3733 // AND with imm. Indeed, simplify-demanded-bits may have removed
3734 // the AND instruction because it proves it was useless.
3735 KnownBits Known = CurDAG->computeKnownBits(Op: OrOpd1Val);
3736
3737 // Check if there is enough room for the second operand to appear
3738 // in the first one
3739 APInt BitsToBeInserted =
3740 APInt::getBitsSet(numBits: Known.getBitWidth(), loBit: DstLSB, hiBit: DstLSB + Width);
3741
3742 if ((BitsToBeInserted & ~Known.Zero) != 0)
3743 continue;
3744
3745 // Set the first operand
3746 uint64_t Imm;
3747 if (isOpcWithIntImmediate(N: OrOpd1, Opc: ISD::AND, Imm) &&
3748 isBitfieldDstMask(DstMask: Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3749 // In that case, we can eliminate the AND
3750 Dst = OrOpd1->getOperand(Num: 0);
3751 else
3752 // Maybe the AND has been removed by simplify-demanded-bits
3753 // or is useful because it discards more bits
3754 Dst = OrOpd1Val;
3755
3756 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3757 // with shifted operand is more efficient.
3758 if (tryOrrWithShift(N, OrOpd0: OrOpd0Val, OrOpd1: OrOpd1Val, Src, Dst, CurDAG,
3759 BiggerPattern))
3760 return true;
3761
3762 // both parts match
3763 SDLoc DL(N);
3764 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3765 CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3766 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3767 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3768 return true;
3769 }
3770
3771 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3772 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3773 // mask (e.g., 0x000ffff0).
3774 uint64_t Mask0Imm, Mask1Imm;
3775 SDValue And0 = N->getOperand(Num: 0);
3776 SDValue And1 = N->getOperand(Num: 1);
3777 if (And0.hasOneUse() && And1.hasOneUse() &&
3778 isOpcWithIntImmediate(N: And0.getNode(), Opc: ISD::AND, Imm&: Mask0Imm) &&
3779 isOpcWithIntImmediate(N: And1.getNode(), Opc: ISD::AND, Imm&: Mask1Imm) &&
3780 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3781 (isShiftedMask(Mask: Mask0Imm, VT) || isShiftedMask(Mask: Mask1Imm, VT))) {
3782
3783 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3784 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3785 // bits to be inserted.
3786 if (isShiftedMask(Mask: Mask0Imm, VT)) {
3787 std::swap(a&: And0, b&: And1);
3788 std::swap(a&: Mask0Imm, b&: Mask1Imm);
3789 }
3790
3791 SDValue Src = And1->getOperand(Num: 0);
3792 SDValue Dst = And0->getOperand(Num: 0);
3793 unsigned LSB = llvm::countr_zero(Val: Mask1Imm);
3794 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3795
3796 // The BFXIL inserts the low-order bits from a source register, so right
3797 // shift the needed bits into place.
3798 SDLoc DL(N);
3799 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3800 uint64_t LsrImm = LSB;
3801 if (Src->hasOneUse() &&
3802 isOpcWithIntImmediate(N: Src.getNode(), Opc: ISD::SRL, Imm&: LsrImm) &&
3803 (LsrImm + LSB) < BitWidth) {
3804 Src = Src->getOperand(Num: 0);
3805 LsrImm += LSB;
3806 }
3807
3808 SDNode *LSR = CurDAG->getMachineNode(
3809 Opcode: ShiftOpc, dl: DL, VT, Op1: Src, Op2: CurDAG->getTargetConstant(Val: LsrImm, DL, VT),
3810 Op3: CurDAG->getTargetConstant(Val: BitWidth - 1, DL, VT));
3811
3812 // BFXIL is an alias of BFM, so translate to BFM operands.
3813 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3814 unsigned ImmS = Width - 1;
3815
3816 // Create the BFXIL instruction.
3817 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3818 CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3819 CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3820 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3821 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3822 return true;
3823 }
3824
3825 return false;
3826}
3827
3828bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3829 if (N->getOpcode() != ISD::OR)
3830 return false;
3831
3832 APInt NUsefulBits;
3833 getUsefulBits(Op: SDValue(N, 0), UsefulBits&: NUsefulBits);
3834
3835 // If all bits are not useful, just return UNDEF.
3836 if (!NUsefulBits) {
3837 CurDAG->SelectNodeTo(N, MachineOpc: TargetOpcode::IMPLICIT_DEF, VT: N->getValueType(ResNo: 0));
3838 return true;
3839 }
3840
3841 if (tryBitfieldInsertOpFromOr(N, UsefulBits: NUsefulBits, CurDAG))
3842 return true;
3843
3844 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3845}
3846
3847/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3848/// equivalent of a left shift by a constant amount followed by an and masking
3849/// out a contiguous set of bits.
3850bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3851 if (N->getOpcode() != ISD::AND)
3852 return false;
3853
3854 EVT VT = N->getValueType(ResNo: 0);
3855 if (VT != MVT::i32 && VT != MVT::i64)
3856 return false;
3857
3858 SDValue Op0;
3859 int DstLSB, Width;
3860 if (!isBitfieldPositioningOp(CurDAG, Op: SDValue(N, 0), /*BiggerPattern=*/false,
3861 Src&: Op0, DstLSB, Width))
3862 return false;
3863
3864 // ImmR is the rotate right amount.
3865 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3866 // ImmS is the most significant bit of the source to be moved.
3867 unsigned ImmS = Width - 1;
3868
3869 SDLoc DL(N);
3870 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3871 CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3872 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3873 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3874 return true;
3875}
3876
3877/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3878/// variable shift/rotate instructions.
3879bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3880 EVT VT = N->getValueType(ResNo: 0);
3881
3882 unsigned Opc;
3883 switch (N->getOpcode()) {
3884 case ISD::ROTR:
3885 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3886 break;
3887 case ISD::SHL:
3888 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3889 break;
3890 case ISD::SRL:
3891 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3892 break;
3893 case ISD::SRA:
3894 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3895 break;
3896 default:
3897 return false;
3898 }
3899
3900 uint64_t Size;
3901 uint64_t Bits;
3902 if (VT == MVT::i32) {
3903 Bits = 5;
3904 Size = 32;
3905 } else if (VT == MVT::i64) {
3906 Bits = 6;
3907 Size = 64;
3908 } else
3909 return false;
3910
3911 SDValue ShiftAmt = N->getOperand(Num: 1);
3912 SDLoc DL(N);
3913 SDValue NewShiftAmt;
3914
3915 // Skip over an extend of the shift amount.
3916 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3917 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3918 ShiftAmt = ShiftAmt->getOperand(Num: 0);
3919
3920 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3921 SDValue Add0 = ShiftAmt->getOperand(Num: 0);
3922 SDValue Add1 = ShiftAmt->getOperand(Num: 1);
3923 uint64_t Add0Imm;
3924 uint64_t Add1Imm;
3925 if (isIntImmediate(N: Add1, Imm&: Add1Imm) && (Add1Imm % Size == 0)) {
3926 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3927 // to avoid the ADD/SUB.
3928 NewShiftAmt = Add0;
3929 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3930 isIntImmediate(N: Add0, Imm&: Add0Imm) && Add0Imm != 0 &&
3931 (Add0Imm % Size == 0)) {
3932 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3933 // to generate a NEG instead of a SUB from a constant.
3934 unsigned NegOpc;
3935 unsigned ZeroReg;
3936 EVT SubVT = ShiftAmt->getValueType(ResNo: 0);
3937 if (SubVT == MVT::i32) {
3938 NegOpc = AArch64::SUBWrr;
3939 ZeroReg = AArch64::WZR;
3940 } else {
3941 assert(SubVT == MVT::i64);
3942 NegOpc = AArch64::SUBXrr;
3943 ZeroReg = AArch64::XZR;
3944 }
3945 SDValue Zero =
3946 CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: ZeroReg, VT: SubVT);
3947 MachineSDNode *Neg =
3948 CurDAG->getMachineNode(Opcode: NegOpc, dl: DL, VT: SubVT, Op1: Zero, Op2: Add1);
3949 NewShiftAmt = SDValue(Neg, 0);
3950 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3951 isIntImmediate(N: Add0, Imm&: Add0Imm) && (Add0Imm % Size == Size - 1)) {
3952 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3953 // to generate a NOT instead of a SUB from a constant.
3954 unsigned NotOpc;
3955 unsigned ZeroReg;
3956 EVT SubVT = ShiftAmt->getValueType(ResNo: 0);
3957 if (SubVT == MVT::i32) {
3958 NotOpc = AArch64::ORNWrr;
3959 ZeroReg = AArch64::WZR;
3960 } else {
3961 assert(SubVT == MVT::i64);
3962 NotOpc = AArch64::ORNXrr;
3963 ZeroReg = AArch64::XZR;
3964 }
3965 SDValue Zero =
3966 CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: ZeroReg, VT: SubVT);
3967 MachineSDNode *Not =
3968 CurDAG->getMachineNode(Opcode: NotOpc, dl: DL, VT: SubVT, Op1: Zero, Op2: Add1);
3969 NewShiftAmt = SDValue(Not, 0);
3970 } else
3971 return false;
3972 } else {
3973 // If the shift amount is masked with an AND, check that the mask covers the
3974 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3975 // the AND.
3976 uint64_t MaskImm;
3977 if (!isOpcWithIntImmediate(N: ShiftAmt.getNode(), Opc: ISD::AND, Imm&: MaskImm) &&
3978 !isOpcWithIntImmediate(N: ShiftAmt.getNode(), Opc: AArch64ISD::ANDS, Imm&: MaskImm))
3979 return false;
3980
3981 if ((unsigned)llvm::countr_one(Value: MaskImm) < Bits)
3982 return false;
3983
3984 NewShiftAmt = ShiftAmt->getOperand(Num: 0);
3985 }
3986
3987 // Narrow/widen the shift amount to match the size of the shift operation.
3988 if (VT == MVT::i32)
3989 NewShiftAmt = narrowIfNeeded(CurDAG, N: NewShiftAmt);
3990 else if (VT == MVT::i64 && NewShiftAmt->getValueType(ResNo: 0) == MVT::i32) {
3991 SDValue SubReg = CurDAG->getTargetConstant(Val: AArch64::sub_32, DL, VT: MVT::i32);
3992 MachineSDNode *Ext = CurDAG->getMachineNode(
3993 Opcode: AArch64::SUBREG_TO_REG, dl: DL, VT,
3994 Op1: CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i64), Op2: NewShiftAmt, Op3: SubReg);
3995 NewShiftAmt = SDValue(Ext, 0);
3996 }
3997
3998 SDValue Ops[] = {N->getOperand(Num: 0), NewShiftAmt};
3999 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
4000 return true;
4001}
4002
4003static unsigned CheckFixedPointOperandConstant(APFloat &FVal, unsigned RegWidth,
4004 bool isReciprocal) {
4005 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
4006 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
4007 // x-register.
4008 //
4009 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
4010 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
4011 // integers.
4012 bool IsExact;
4013
4014 if (isReciprocal)
4015 if (!FVal.getExactInverse(Inv: &FVal))
4016 return 0;
4017
4018 // fbits is between 1 and 64 in the worst-case, which means the fmul
4019 // could have 2^64 as an actual operand. Need 65 bits of precision.
4020 APSInt IntVal(65, true);
4021 FVal.convertToInteger(Result&: IntVal, RM: APFloat::rmTowardZero, IsExact: &IsExact);
4022
4023 // N.b. isPowerOf2 also checks for > 0.
4024 if (!IsExact || !IntVal.isPowerOf2())
4025 return 0;
4026 unsigned FBits = IntVal.logBase2();
4027
4028 // Checks above should have guaranteed that we haven't lost information in
4029 // finding FBits, but it must still be in range.
4030 if (FBits == 0 || FBits > RegWidth)
4031 return 0;
4032 return FBits;
4033}
4034
4035static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N,
4036 SDValue &FixedPos,
4037 unsigned RegWidth,
4038 bool isReciprocal) {
4039 APFloat FVal(0.0);
4040 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Val&: N))
4041 FVal = CN->getValueAPF();
4042 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(Val&: N)) {
4043 // Some otherwise illegal constants are allowed in this case.
4044 if (LN->getOperand(Num: 1).getOpcode() != AArch64ISD::ADDlow ||
4045 !isa<ConstantPoolSDNode>(Val: LN->getOperand(Num: 1)->getOperand(Num: 1)))
4046 return false;
4047
4048 ConstantPoolSDNode *CN =
4049 dyn_cast<ConstantPoolSDNode>(Val: LN->getOperand(Num: 1)->getOperand(Num: 1));
4050 FVal = cast<ConstantFP>(Val: CN->getConstVal())->getValueAPF();
4051 } else
4052 return false;
4053
4054 if (unsigned FBits =
4055 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4056 FixedPos = CurDAG->getTargetConstant(Val: FBits, DL: SDLoc(N), VT: MVT::i32);
4057 return true;
4058 }
4059
4060 return false;
4061}
4062
4063bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4064 unsigned RegWidth) {
4065 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4066 /*isReciprocal*/ false);
4067}
4068
4069bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
4070 unsigned RegWidth) {
4071 if ((N.getOpcode() == AArch64ISD::NVCAST || N.getOpcode() == ISD::BITCAST) &&
4072 N.getValueType().getScalarSizeInBits() ==
4073 N.getOperand(i: 0).getValueType().getScalarSizeInBits())
4074 N = N.getOperand(i: 0);
4075
4076 auto ImmToFloat = [RegWidth](APInt Imm) {
4077 switch (RegWidth) {
4078 case 16:
4079 return APFloat(APFloat::IEEEhalf(), Imm);
4080 case 32:
4081 return APFloat(APFloat::IEEEsingle(), Imm);
4082 case 64:
4083 return APFloat(APFloat::IEEEdouble(), Imm);
4084 default:
4085 llvm_unreachable("Unexpected RegWidth!");
4086 };
4087 };
4088
4089 APFloat FVal(0.0);
4090 switch (N->getOpcode()) {
4091 case AArch64ISD::MOVIshift:
4092 FVal = ImmToFloat(APInt(RegWidth, N.getConstantOperandVal(i: 0)
4093 << N.getConstantOperandVal(i: 1)));
4094 break;
4095 case AArch64ISD::FMOV:
4096 assert(RegWidth == 32 || RegWidth == 64);
4097 if (RegWidth == 32)
4098 FVal = ImmToFloat(
4099 APInt(RegWidth, (uint32_t)AArch64_AM::decodeAdvSIMDModImmType11(
4100 Imm: N.getConstantOperandVal(i: 0))));
4101 else
4102 FVal = ImmToFloat(APInt(RegWidth, AArch64_AM::decodeAdvSIMDModImmType12(
4103 Imm: N.getConstantOperandVal(i: 0))));
4104 break;
4105 case AArch64ISD::DUP:
4106 if (isa<ConstantSDNode>(Val: N.getOperand(i: 0)))
4107 FVal = ImmToFloat(N.getConstantOperandAPInt(i: 0).trunc(width: RegWidth));
4108 else
4109 return false;
4110 break;
4111 default:
4112 return false;
4113 }
4114
4115 if (unsigned FBits = CheckFixedPointOperandConstant(FVal, RegWidth,
4116 /*isReciprocal*/ false)) {
4117 FixedPos = CurDAG->getTargetConstant(Val: FBits, DL: SDLoc(N), VT: MVT::i32);
4118 return true;
4119 }
4120
4121 return false;
4122}
4123
4124bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4125 SDValue &FixedPos,
4126 unsigned RegWidth) {
4127 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4128 /*isReciprocal*/ true);
4129}
4130
4131// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4132// of the string and obtains the integer values from them and combines these
4133// into a single value to be used in the MRS/MSR instruction.
4134static int getIntOperandFromRegisterString(StringRef RegString) {
4135 SmallVector<StringRef, 5> Fields;
4136 RegString.split(A&: Fields, Separator: ':');
4137
4138 if (Fields.size() == 1)
4139 return -1;
4140
4141 assert(Fields.size() == 5
4142 && "Invalid number of fields in read register string");
4143
4144 SmallVector<int, 5> Ops;
4145 bool AllIntFields = true;
4146
4147 for (StringRef Field : Fields) {
4148 unsigned IntField;
4149 AllIntFields &= !Field.getAsInteger(Radix: 10, Result&: IntField);
4150 Ops.push_back(Elt: IntField);
4151 }
4152
4153 assert(AllIntFields &&
4154 "Unexpected non-integer value in special register string.");
4155 (void)AllIntFields;
4156
4157 // Need to combine the integer fields of the string into a single value
4158 // based on the bit encoding of MRS/MSR instruction.
4159 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4160 (Ops[3] << 3) | (Ops[4]);
4161}
4162
4163// Lower the read_register intrinsic to an MRS instruction node if the special
4164// register string argument is either of the form detailed in the ALCE (the
4165// form described in getIntOperandsFromRegisterString) or is a named register
4166// known by the MRS SysReg mapper.
4167bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4168 const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: 1));
4169 const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: 0));
4170 SDLoc DL(N);
4171
4172 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4173
4174 unsigned Opcode64Bit = AArch64::MRS;
4175 int Imm = getIntOperandFromRegisterString(RegString: RegString->getString());
4176 if (Imm == -1) {
4177 // No match, Use the sysreg mapper to map the remaining possible strings to
4178 // the value for the register to be used for the instruction operand.
4179 const auto *TheReg =
4180 AArch64SysReg::lookupSysRegByName(Name: RegString->getString());
4181 if (TheReg && TheReg->Readable &&
4182 TheReg->haveFeatures(ActiveFeatures: Subtarget->getFeatureBits()))
4183 Imm = TheReg->Encoding;
4184 else
4185 Imm = AArch64SysReg::parseGenericRegister(Name: RegString->getString());
4186
4187 if (Imm == -1) {
4188 // Still no match, see if this is "pc" or give up.
4189 if (!ReadIs128Bit && RegString->getString() == "pc") {
4190 Opcode64Bit = AArch64::ADR;
4191 Imm = 0;
4192 } else {
4193 return false;
4194 }
4195 }
4196 }
4197
4198 SDValue InChain = N->getOperand(Num: 0);
4199 SDValue SysRegImm = CurDAG->getTargetConstant(Val: Imm, DL, VT: MVT::i32);
4200 if (!ReadIs128Bit) {
4201 CurDAG->SelectNodeTo(N, MachineOpc: Opcode64Bit, VT1: MVT::i64, VT2: MVT::Other /* Chain */,
4202 Ops: {SysRegImm, InChain});
4203 } else {
4204 SDNode *MRRS = CurDAG->getMachineNode(
4205 Opcode: AArch64::MRRS, dl: DL,
4206 ResultTys: {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4207 Ops: {SysRegImm, InChain});
4208
4209 // Sysregs are not endian. The even register always contains the low half
4210 // of the register.
4211 SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: AArch64::sube64, DL, VT: MVT::i64,
4212 Operand: SDValue(MRRS, 0));
4213 SDValue Hi = CurDAG->getTargetExtractSubreg(SRIdx: AArch64::subo64, DL, VT: MVT::i64,
4214 Operand: SDValue(MRRS, 0));
4215 SDValue OutChain = SDValue(MRRS, 1);
4216
4217 ReplaceUses(F: SDValue(N, 0), T: Lo);
4218 ReplaceUses(F: SDValue(N, 1), T: Hi);
4219 ReplaceUses(F: SDValue(N, 2), T: OutChain);
4220 };
4221 return true;
4222}
4223
4224// Lower the write_register intrinsic to an MSR instruction node if the special
4225// register string argument is either of the form detailed in the ALCE (the
4226// form described in getIntOperandsFromRegisterString) or is a named register
4227// known by the MSR SysReg mapper.
4228bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4229 const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: 1));
4230 const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: 0));
4231 SDLoc DL(N);
4232
4233 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4234
4235 if (!WriteIs128Bit) {
4236 // Check if the register was one of those allowed as the pstatefield value
4237 // in the MSR (immediate) instruction. To accept the values allowed in the
4238 // pstatefield for the MSR (immediate) instruction, we also require that an
4239 // immediate value has been provided as an argument, we know that this is
4240 // the case as it has been ensured by semantic checking.
4241 auto trySelectPState = [&](auto PMapper, unsigned State) {
4242 if (PMapper) {
4243 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4244 "Expected a constant integer expression.");
4245 unsigned Reg = PMapper->Encoding;
4246 uint64_t Immed = N->getConstantOperandVal(Num: 2);
4247 CurDAG->SelectNodeTo(
4248 N, MachineOpc: State, VT: MVT::Other, Op1: CurDAG->getTargetConstant(Val: Reg, DL, VT: MVT::i32),
4249 Op2: CurDAG->getTargetConstant(Val: Immed, DL, VT: MVT::i16), Op3: N->getOperand(Num: 0));
4250 return true;
4251 }
4252 return false;
4253 };
4254
4255 if (trySelectPState(
4256 AArch64PState::lookupPStateImm0_15ByName(Name: RegString->getString()),
4257 AArch64::MSRpstateImm4))
4258 return true;
4259 if (trySelectPState(
4260 AArch64PState::lookupPStateImm0_1ByName(Name: RegString->getString()),
4261 AArch64::MSRpstateImm1))
4262 return true;
4263 }
4264
4265 int Imm = getIntOperandFromRegisterString(RegString: RegString->getString());
4266 if (Imm == -1) {
4267 // Use the sysreg mapper to attempt to map the remaining possible strings
4268 // to the value for the register to be used for the MSR (register)
4269 // instruction operand.
4270 auto TheReg = AArch64SysReg::lookupSysRegByName(Name: RegString->getString());
4271 if (TheReg && TheReg->Writeable &&
4272 TheReg->haveFeatures(ActiveFeatures: Subtarget->getFeatureBits()))
4273 Imm = TheReg->Encoding;
4274 else
4275 Imm = AArch64SysReg::parseGenericRegister(Name: RegString->getString());
4276
4277 if (Imm == -1)
4278 return false;
4279 }
4280
4281 SDValue InChain = N->getOperand(Num: 0);
4282 if (!WriteIs128Bit) {
4283 CurDAG->SelectNodeTo(N, MachineOpc: AArch64::MSR, VT: MVT::Other,
4284 Op1: CurDAG->getTargetConstant(Val: Imm, DL, VT: MVT::i32),
4285 Op2: N->getOperand(Num: 2), Op3: InChain);
4286 } else {
4287 // No endian swap. The lower half always goes into the even subreg, and the
4288 // higher half always into the odd supreg.
4289 SDNode *Pair = CurDAG->getMachineNode(
4290 Opcode: TargetOpcode::REG_SEQUENCE, dl: DL, VT: MVT::Untyped /* XSeqPair */,
4291 Ops: {CurDAG->getTargetConstant(Val: AArch64::XSeqPairsClassRegClass.getID(), DL,
4292 VT: MVT::i32),
4293 N->getOperand(Num: 2),
4294 CurDAG->getTargetConstant(Val: AArch64::sube64, DL, VT: MVT::i32),
4295 N->getOperand(Num: 3),
4296 CurDAG->getTargetConstant(Val: AArch64::subo64, DL, VT: MVT::i32)});
4297
4298 CurDAG->SelectNodeTo(N, MachineOpc: AArch64::MSRR, VT: MVT::Other,
4299 Op1: CurDAG->getTargetConstant(Val: Imm, DL, VT: MVT::i32),
4300 Op2: SDValue(Pair, 0), Op3: InChain);
4301 }
4302
4303 return true;
4304}
4305
4306/// We've got special pseudo-instructions for these
4307bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4308 unsigned Opcode;
4309 EVT MemTy = cast<MemSDNode>(Val: N)->getMemoryVT();
4310
4311 // Leave IR for LSE if subtarget supports it.
4312 if (Subtarget->hasLSE()) return false;
4313
4314 if (MemTy == MVT::i8)
4315 Opcode = AArch64::CMP_SWAP_8;
4316 else if (MemTy == MVT::i16)
4317 Opcode = AArch64::CMP_SWAP_16;
4318 else if (MemTy == MVT::i32)
4319 Opcode = AArch64::CMP_SWAP_32;
4320 else if (MemTy == MVT::i64)
4321 Opcode = AArch64::CMP_SWAP_64;
4322 else
4323 llvm_unreachable("Unknown AtomicCmpSwap type");
4324
4325 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4326 SDValue Ops[] = {N->getOperand(Num: 1), N->getOperand(Num: 2), N->getOperand(Num: 3),
4327 N->getOperand(Num: 0)};
4328 SDNode *CmpSwap = CurDAG->getMachineNode(
4329 Opcode, dl: SDLoc(N),
4330 VTs: CurDAG->getVTList(VT1: RegTy, VT2: MVT::i32, VT3: MVT::Other), Ops);
4331
4332 MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
4333 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
4334
4335 ReplaceUses(F: SDValue(N, 0), T: SDValue(CmpSwap, 0));
4336 ReplaceUses(F: SDValue(N, 1), T: SDValue(CmpSwap, 2));
4337 CurDAG->RemoveDeadNode(N);
4338
4339 return true;
4340}
4341
4342bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4343 SDValue &Shift, bool Negate) {
4344 if (!isa<ConstantSDNode>(Val: N))
4345 return false;
4346
4347 SDLoc DL(N);
4348 APInt Val =
4349 cast<ConstantSDNode>(Val&: N)->getAPIntValue().trunc(width: VT.getFixedSizeInBits());
4350
4351 if (Negate)
4352 Val = -Val;
4353
4354 switch (VT.SimpleTy) {
4355 case MVT::i8:
4356 // All immediates are supported.
4357 Shift = CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i32);
4358 Imm = CurDAG->getTargetConstant(Val: Val.getZExtValue(), DL, VT: MVT::i32);
4359 return true;
4360 case MVT::i16:
4361 case MVT::i32:
4362 case MVT::i64:
4363 // Support 8bit unsigned immediates.
4364 if ((Val & ~0xff) == 0) {
4365 Shift = CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i32);
4366 Imm = CurDAG->getTargetConstant(Val: Val.getZExtValue(), DL, VT: MVT::i32);
4367 return true;
4368 }
4369 // Support 16bit unsigned immediates that are a multiple of 256.
4370 if ((Val & ~0xff00) == 0) {
4371 Shift = CurDAG->getTargetConstant(Val: 8, DL, VT: MVT::i32);
4372 Imm = CurDAG->getTargetConstant(Val: Val.lshr(shiftAmt: 8).getZExtValue(), DL, VT: MVT::i32);
4373 return true;
4374 }
4375 break;
4376 default:
4377 break;
4378 }
4379
4380 return false;
4381}
4382
4383bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4384 SDValue &Imm, SDValue &Shift,
4385 bool Negate) {
4386 if (!isa<ConstantSDNode>(Val: N))
4387 return false;
4388
4389 SDLoc DL(N);
4390 int64_t Val = cast<ConstantSDNode>(Val&: N)
4391 ->getAPIntValue()
4392 .trunc(width: VT.getFixedSizeInBits())
4393 .getSExtValue();
4394
4395 if (Negate)
4396 Val = -Val;
4397
4398 // Signed saturating instructions treat their immediate operand as unsigned,
4399 // whereas the related intrinsics define their operands to be signed. This
4400 // means we can only use the immediate form when the operand is non-negative.
4401 if (Val < 0)
4402 return false;
4403
4404 switch (VT.SimpleTy) {
4405 case MVT::i8:
4406 // All positive immediates are supported.
4407 Shift = CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i32);
4408 Imm = CurDAG->getTargetConstant(Val, DL, VT: MVT::i32);
4409 return true;
4410 case MVT::i16:
4411 case MVT::i32:
4412 case MVT::i64:
4413 // Support 8bit positive immediates.
4414 if (Val <= 255) {
4415 Shift = CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i32);
4416 Imm = CurDAG->getTargetConstant(Val, DL, VT: MVT::i32);
4417 return true;
4418 }
4419 // Support 16bit positive immediates that are a multiple of 256.
4420 if (Val <= 65280 && Val % 256 == 0) {
4421 Shift = CurDAG->getTargetConstant(Val: 8, DL, VT: MVT::i32);
4422 Imm = CurDAG->getTargetConstant(Val: Val >> 8, DL, VT: MVT::i32);
4423 return true;
4424 }
4425 break;
4426 default:
4427 break;
4428 }
4429
4430 return false;
4431}
4432
4433bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4434 SDValue &Shift) {
4435 if (!isa<ConstantSDNode>(Val: N))
4436 return false;
4437
4438 SDLoc DL(N);
4439 int64_t Val = cast<ConstantSDNode>(Val&: N)
4440 ->getAPIntValue()
4441 .trunc(width: VT.getFixedSizeInBits())
4442 .getSExtValue();
4443 int32_t ImmVal, ShiftVal;
4444 if (!AArch64_AM::isSVECpyDupImm(SizeInBits: VT.getScalarSizeInBits(), Val, Imm&: ImmVal,
4445 Shift&: ShiftVal))
4446 return false;
4447
4448 Shift = CurDAG->getTargetConstant(Val: ShiftVal, DL, VT: MVT::i32);
4449 Imm = CurDAG->getTargetConstant(Val: ImmVal, DL, VT: MVT::i32);
4450 return true;
4451}
4452
4453bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4454 if (auto CNode = dyn_cast<ConstantSDNode>(Val&: N)) {
4455 int64_t ImmVal = CNode->getSExtValue();
4456 SDLoc DL(N);
4457 if (ImmVal >= -128 && ImmVal < 128) {
4458 Imm = CurDAG->getSignedTargetConstant(Val: ImmVal, DL, VT: MVT::i32);
4459 return true;
4460 }
4461 }
4462 return false;
4463}
4464
4465bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4466 if (auto CNode = dyn_cast<ConstantSDNode>(Val&: N)) {
4467 uint64_t ImmVal = CNode->getZExtValue();
4468
4469 switch (VT.SimpleTy) {
4470 case MVT::i8:
4471 ImmVal &= 0xFF;
4472 break;
4473 case MVT::i16:
4474 ImmVal &= 0xFFFF;
4475 break;
4476 case MVT::i32:
4477 ImmVal &= 0xFFFFFFFF;
4478 break;
4479 case MVT::i64:
4480 break;
4481 default:
4482 llvm_unreachable("Unexpected type");
4483 }
4484
4485 if (ImmVal < 256) {
4486 Imm = CurDAG->getTargetConstant(Val: ImmVal, DL: SDLoc(N), VT: MVT::i32);
4487 return true;
4488 }
4489 }
4490 return false;
4491}
4492
4493bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4494 bool Invert) {
4495 uint64_t ImmVal;
4496 if (auto CI = dyn_cast<ConstantSDNode>(Val&: N))
4497 ImmVal = CI->getZExtValue();
4498 else if (auto CFP = dyn_cast<ConstantFPSDNode>(Val&: N))
4499 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4500 else
4501 return false;
4502
4503 if (Invert)
4504 ImmVal = ~ImmVal;
4505
4506 uint64_t encoding;
4507 if (!AArch64_AM::isSVELogicalImm(SizeInBits: VT.getScalarSizeInBits(), ImmVal, Encoding&: encoding))
4508 return false;
4509
4510 Imm = CurDAG->getTargetConstant(Val: encoding, DL: SDLoc(N), VT: MVT::i64);
4511 return true;
4512}
4513
4514// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4515// Rather than attempt to normalise everything we can sometimes saturate the
4516// shift amount during selection. This function also allows for consistent
4517// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4518// required by the instructions.
4519bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4520 uint64_t High, bool AllowSaturation,
4521 SDValue &Imm) {
4522 if (auto *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
4523 uint64_t ImmVal = CN->getZExtValue();
4524
4525 // Reject shift amounts that are too small.
4526 if (ImmVal < Low)
4527 return false;
4528
4529 // Reject or saturate shift amounts that are too big.
4530 if (ImmVal > High) {
4531 if (!AllowSaturation)
4532 return false;
4533 ImmVal = High;
4534 }
4535
4536 Imm = CurDAG->getTargetConstant(Val: ImmVal, DL: SDLoc(N), VT: MVT::i32);
4537 return true;
4538 }
4539
4540 return false;
4541}
4542
4543bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4544 // tagp(FrameIndex, IRGstack, tag_offset):
4545 // since the offset between FrameIndex and IRGstack is a compile-time
4546 // constant, this can be lowered to a single ADDG instruction.
4547 if (!(isa<FrameIndexSDNode>(Val: N->getOperand(Num: 1)))) {
4548 return false;
4549 }
4550
4551 SDValue IRG_SP = N->getOperand(Num: 2);
4552 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4553 IRG_SP->getConstantOperandVal(Num: 1) != Intrinsic::aarch64_irg_sp) {
4554 return false;
4555 }
4556
4557 const TargetLowering *TLI = getTargetLowering();
4558 SDLoc DL(N);
4559 int FI = cast<FrameIndexSDNode>(Val: N->getOperand(Num: 1))->getIndex();
4560 SDValue FiOp = CurDAG->getTargetFrameIndex(
4561 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
4562 int TagOffset = N->getConstantOperandVal(Num: 3);
4563
4564 SDNode *Out = CurDAG->getMachineNode(
4565 Opcode: AArch64::TAGPstack, dl: DL, VT: MVT::i64,
4566 Ops: {FiOp, CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i64), N->getOperand(Num: 2),
4567 CurDAG->getTargetConstant(Val: TagOffset, DL, VT: MVT::i64)});
4568 ReplaceNode(F: N, T: Out);
4569 return true;
4570}
4571
4572void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4573 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4574 "llvm.aarch64.tagp third argument must be an immediate");
4575 if (trySelectStackSlotTagP(N))
4576 return;
4577 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4578 // compile-time constant, not just for stack allocations.
4579
4580 // General case for unrelated pointers in Op1 and Op2.
4581 SDLoc DL(N);
4582 int TagOffset = N->getConstantOperandVal(Num: 3);
4583 SDNode *N1 = CurDAG->getMachineNode(Opcode: AArch64::SUBP, dl: DL, VT: MVT::i64,
4584 Ops: {N->getOperand(Num: 1), N->getOperand(Num: 2)});
4585 SDNode *N2 = CurDAG->getMachineNode(Opcode: AArch64::ADDXrr, dl: DL, VT: MVT::i64,
4586 Ops: {SDValue(N1, 0), N->getOperand(Num: 2)});
4587 SDNode *N3 = CurDAG->getMachineNode(
4588 Opcode: AArch64::ADDG, dl: DL, VT: MVT::i64,
4589 Ops: {SDValue(N2, 0), CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i64),
4590 CurDAG->getTargetConstant(Val: TagOffset, DL, VT: MVT::i64)});
4591 ReplaceNode(F: N, T: N3);
4592}
4593
4594bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4595 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4596
4597 // Bail when not a "cast" like insert_subvector.
4598 if (N->getConstantOperandVal(Num: 2) != 0)
4599 return false;
4600 if (!N->getOperand(Num: 0).isUndef())
4601 return false;
4602
4603 // Bail when normal isel should do the job.
4604 EVT VT = N->getValueType(ResNo: 0);
4605 EVT InVT = N->getOperand(Num: 1).getValueType();
4606 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4607 return false;
4608 if (InVT.getSizeInBits() <= 128)
4609 return false;
4610
4611 // NOTE: We can only get here when doing fixed length SVE code generation.
4612 // We do manual selection because the types involved are not linked to real
4613 // registers (despite being legal) and must be coerced into SVE registers.
4614
4615 assert(VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4616 "Expected to insert into a packed scalable vector!");
4617
4618 SDLoc DL(N);
4619 auto RC = CurDAG->getTargetConstant(Val: AArch64::ZPRRegClassID, DL, VT: MVT::i64);
4620 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, dl: DL, VT,
4621 Op1: N->getOperand(Num: 1), Op2: RC));
4622 return true;
4623}
4624
4625bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4626 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4627
4628 // Bail when not a "cast" like extract_subvector.
4629 if (N->getConstantOperandVal(Num: 1) != 0)
4630 return false;
4631
4632 // Bail when normal isel can do the job.
4633 EVT VT = N->getValueType(ResNo: 0);
4634 EVT InVT = N->getOperand(Num: 0).getValueType();
4635 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4636 return false;
4637 if (VT.getSizeInBits() <= 128)
4638 return false;
4639
4640 // NOTE: We can only get here when doing fixed length SVE code generation.
4641 // We do manual selection because the types involved are not linked to real
4642 // registers (despite being legal) and must be coerced into SVE registers.
4643
4644 assert(InVT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4645 "Expected to extract from a packed scalable vector!");
4646
4647 SDLoc DL(N);
4648 auto RC = CurDAG->getTargetConstant(Val: AArch64::ZPRRegClassID, DL, VT: MVT::i64);
4649 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, dl: DL, VT,
4650 Op1: N->getOperand(Num: 0), Op2: RC));
4651 return true;
4652}
4653
4654bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4655 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4656
4657 SDValue N0 = N->getOperand(Num: 0);
4658 SDValue N1 = N->getOperand(Num: 1);
4659
4660 EVT VT = N->getValueType(ResNo: 0);
4661 SDLoc DL(N);
4662
4663 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4664 // Rotate by a constant is a funnel shift in IR which is exanded to
4665 // an OR with shifted operands.
4666 // We do the following transform:
4667 // OR N0, N1 -> xar (x, y, imm)
4668 // Where:
4669 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4670 // N0 = SHL_PRED true, V, splat(bits-imm)
4671 // V = (xor x, y)
4672 if (VT.isScalableVector() &&
4673 (Subtarget->hasSVE2() ||
4674 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4675 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4676 N1.getOpcode() != AArch64ISD::SRL_PRED)
4677 std::swap(a&: N0, b&: N1);
4678 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4679 N1.getOpcode() != AArch64ISD::SRL_PRED)
4680 return false;
4681
4682 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4683 if (!TLI->isAllActivePredicate(DAG&: *CurDAG, N: N0.getOperand(i: 0)) ||
4684 !TLI->isAllActivePredicate(DAG&: *CurDAG, N: N1.getOperand(i: 0)))
4685 return false;
4686
4687 if (N0.getOperand(i: 1) != N1.getOperand(i: 1))
4688 return false;
4689
4690 SDValue R1, R2;
4691 bool IsXOROperand = true;
4692 if (N0.getOperand(i: 1).getOpcode() != ISD::XOR) {
4693 IsXOROperand = false;
4694 } else {
4695 R1 = N0.getOperand(i: 1).getOperand(i: 0);
4696 R2 = N1.getOperand(i: 1).getOperand(i: 1);
4697 }
4698
4699 APInt ShlAmt, ShrAmt;
4700 if (!ISD::isConstantSplatVector(N: N0.getOperand(i: 2).getNode(), SplatValue&: ShlAmt) ||
4701 !ISD::isConstantSplatVector(N: N1.getOperand(i: 2).getNode(), SplatValue&: ShrAmt))
4702 return false;
4703
4704 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4705 return false;
4706
4707 if (!IsXOROperand) {
4708 SDValue Zero = CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i64);
4709 SDNode *MOV = CurDAG->getMachineNode(Opcode: AArch64::MOVIv2d_ns, dl: DL, VT, Op1: Zero);
4710 SDValue MOVIV = SDValue(MOV, 0);
4711
4712 SDValue ZSub = CurDAG->getTargetConstant(Val: AArch64::zsub, DL, VT: MVT::i32);
4713 SDNode *SubRegToReg = CurDAG->getMachineNode(Opcode: AArch64::SUBREG_TO_REG, dl: DL,
4714 VT, Op1: Zero, Op2: MOVIV, Op3: ZSub);
4715
4716 R1 = N1->getOperand(Num: 1);
4717 R2 = SDValue(SubRegToReg, 0);
4718 }
4719
4720 SDValue Imm =
4721 CurDAG->getTargetConstant(Val: ShrAmt.getZExtValue(), DL, VT: MVT::i32);
4722
4723 SDValue Ops[] = {R1, R2, Imm};
4724 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4725 VT, Opcodes: {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4726 AArch64::XAR_ZZZI_D})) {
4727 CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
4728 return true;
4729 }
4730 return false;
4731 }
4732
4733 // We have Neon SHA3 XAR operation for v2i64 but for types
4734 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4735 // is available.
4736 EVT SVT;
4737 switch (VT.getSimpleVT().SimpleTy) {
4738 case MVT::v4i32:
4739 case MVT::v2i32:
4740 SVT = MVT::nxv4i32;
4741 break;
4742 case MVT::v8i16:
4743 case MVT::v4i16:
4744 SVT = MVT::nxv8i16;
4745 break;
4746 case MVT::v16i8:
4747 case MVT::v8i8:
4748 SVT = MVT::nxv16i8;
4749 break;
4750 case MVT::v2i64:
4751 case MVT::v1i64:
4752 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4753 break;
4754 default:
4755 return false;
4756 }
4757
4758 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4759 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4760 return false;
4761
4762 if (N0->getOpcode() != AArch64ISD::VSHL ||
4763 N1->getOpcode() != AArch64ISD::VLSHR)
4764 return false;
4765
4766 if (N0->getOperand(Num: 0) != N1->getOperand(Num: 0))
4767 return false;
4768
4769 SDValue R1, R2;
4770 bool IsXOROperand = true;
4771 if (N1->getOperand(Num: 0)->getOpcode() != ISD::XOR) {
4772 IsXOROperand = false;
4773 } else {
4774 SDValue XOR = N0.getOperand(i: 0);
4775 R1 = XOR.getOperand(i: 0);
4776 R2 = XOR.getOperand(i: 1);
4777 }
4778
4779 unsigned HsAmt = N0.getConstantOperandVal(i: 1);
4780 unsigned ShAmt = N1.getConstantOperandVal(i: 1);
4781
4782 SDValue Imm = CurDAG->getTargetConstant(
4783 Val: ShAmt, DL, VT: N0.getOperand(i: 1).getValueType(), isOpaque: false);
4784
4785 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4786 if (ShAmt + HsAmt != VTSizeInBits)
4787 return false;
4788
4789 if (!IsXOROperand) {
4790 SDValue Zero = CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i64);
4791 SDNode *MOV =
4792 CurDAG->getMachineNode(Opcode: AArch64::MOVIv2d_ns, dl: DL, VT: MVT::v2i64, Op1: Zero);
4793 SDValue MOVIV = SDValue(MOV, 0);
4794
4795 R1 = N1->getOperand(Num: 0);
4796 R2 = MOVIV;
4797 }
4798
4799 if (SVT != VT) {
4800 SDValue Undef =
4801 SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: SVT), 0);
4802
4803 if (SVT.isScalableVector() && VT.is64BitVector()) {
4804 EVT QVT = VT.getDoubleNumVectorElementsVT(Context&: *CurDAG->getContext());
4805
4806 SDValue UndefQ = SDValue(
4807 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: QVT), 0);
4808 SDValue DSub = CurDAG->getTargetConstant(Val: AArch64::dsub, DL, VT: MVT::i32);
4809
4810 R1 = SDValue(CurDAG->getMachineNode(Opcode: AArch64::INSERT_SUBREG, dl: DL, VT: QVT,
4811 Op1: UndefQ, Op2: R1, Op3: DSub),
4812 0);
4813 if (R2.getValueType() == VT)
4814 R2 = SDValue(CurDAG->getMachineNode(Opcode: AArch64::INSERT_SUBREG, dl: DL, VT: QVT,
4815 Op1: UndefQ, Op2: R2, Op3: DSub),
4816 0);
4817 }
4818
4819 SDValue SubReg = CurDAG->getTargetConstant(
4820 Val: (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, VT: MVT::i32);
4821
4822 R1 = SDValue(CurDAG->getMachineNode(Opcode: AArch64::INSERT_SUBREG, dl: DL, VT: SVT, Op1: Undef,
4823 Op2: R1, Op3: SubReg),
4824 0);
4825
4826 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4827 R2 = SDValue(CurDAG->getMachineNode(Opcode: AArch64::INSERT_SUBREG, dl: DL, VT: SVT,
4828 Op1: Undef, Op2: R2, Op3: SubReg),
4829 0);
4830 }
4831
4832 SDValue Ops[] = {R1, R2, Imm};
4833 SDNode *XAR = nullptr;
4834
4835 if (SVT.isScalableVector()) {
4836 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4837 VT: SVT, Opcodes: {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4838 AArch64::XAR_ZZZI_D}))
4839 XAR = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: SVT, Ops);
4840 } else {
4841 XAR = CurDAG->getMachineNode(Opcode: AArch64::XAR, dl: DL, VT: SVT, Ops);
4842 }
4843
4844 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4845
4846 if (SVT != VT) {
4847 if (VT.is64BitVector() && SVT.isScalableVector()) {
4848 EVT QVT = VT.getDoubleNumVectorElementsVT(Context&: *CurDAG->getContext());
4849
4850 SDValue ZSub = CurDAG->getTargetConstant(Val: AArch64::zsub, DL, VT: MVT::i32);
4851 SDNode *Q = CurDAG->getMachineNode(Opcode: AArch64::EXTRACT_SUBREG, dl: DL, VT: QVT,
4852 Op1: SDValue(XAR, 0), Op2: ZSub);
4853
4854 SDValue DSub = CurDAG->getTargetConstant(Val: AArch64::dsub, DL, VT: MVT::i32);
4855 XAR = CurDAG->getMachineNode(Opcode: AArch64::EXTRACT_SUBREG, dl: DL, VT,
4856 Op1: SDValue(Q, 0), Op2: DSub);
4857 } else {
4858 SDValue SubReg = CurDAG->getTargetConstant(
4859 Val: (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4860 VT: MVT::i32);
4861 XAR = CurDAG->getMachineNode(Opcode: AArch64::EXTRACT_SUBREG, dl: DL, VT,
4862 Op1: SDValue(XAR, 0), Op2: SubReg);
4863 }
4864 }
4865 ReplaceNode(F: N, T: XAR);
4866 return true;
4867}
4868
4869void AArch64DAGToDAGISel::Select(SDNode *Node) {
4870 // If we have a custom node, we already have selected!
4871 if (Node->isMachineOpcode()) {
4872 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4873 Node->setNodeId(-1);
4874 return;
4875 }
4876
4877 // Few custom selection stuff.
4878 EVT VT = Node->getValueType(ResNo: 0);
4879
4880 switch (Node->getOpcode()) {
4881 default:
4882 break;
4883
4884 case ISD::ATOMIC_CMP_SWAP:
4885 if (SelectCMP_SWAP(N: Node))
4886 return;
4887 break;
4888
4889 case ISD::READ_REGISTER:
4890 case AArch64ISD::MRRS:
4891 if (tryReadRegister(N: Node))
4892 return;
4893 break;
4894
4895 case ISD::WRITE_REGISTER:
4896 case AArch64ISD::MSRR:
4897 if (tryWriteRegister(N: Node))
4898 return;
4899 break;
4900
4901 case ISD::LOAD: {
4902 // Try to select as an indexed load. Fall through to normal processing
4903 // if we can't.
4904 if (tryIndexedLoad(N: Node))
4905 return;
4906 break;
4907 }
4908
4909 case ISD::SRL:
4910 case ISD::AND:
4911 case ISD::SRA:
4912 case ISD::SIGN_EXTEND_INREG:
4913 if (tryBitfieldExtractOp(N: Node))
4914 return;
4915 if (tryBitfieldInsertInZeroOp(N: Node))
4916 return;
4917 [[fallthrough]];
4918 case ISD::ROTR:
4919 case ISD::SHL:
4920 if (tryShiftAmountMod(N: Node))
4921 return;
4922 break;
4923
4924 case ISD::SIGN_EXTEND:
4925 if (tryBitfieldExtractOpFromSExt(N: Node))
4926 return;
4927 break;
4928
4929 case ISD::OR:
4930 if (tryBitfieldInsertOp(N: Node))
4931 return;
4932 if (trySelectXAR(N: Node))
4933 return;
4934 break;
4935
4936 case ISD::EXTRACT_SUBVECTOR: {
4937 if (trySelectCastScalableToFixedLengthVector(N: Node))
4938 return;
4939 break;
4940 }
4941
4942 case ISD::INSERT_SUBVECTOR: {
4943 if (trySelectCastFixedLengthToScalableVector(N: Node))
4944 return;
4945 break;
4946 }
4947
4948 case ISD::Constant: {
4949 // Materialize zero constants as copies from WZR/XZR. This allows
4950 // the coalescer to propagate these into other instructions.
4951 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Val: Node);
4952 if (ConstNode->isZero()) {
4953 if (VT == MVT::i32) {
4954 SDValue New = CurDAG->getCopyFromReg(
4955 Chain: CurDAG->getEntryNode(), dl: SDLoc(Node), Reg: AArch64::WZR, VT: MVT::i32);
4956 ReplaceNode(F: Node, T: New.getNode());
4957 return;
4958 } else if (VT == MVT::i64) {
4959 SDValue New = CurDAG->getCopyFromReg(
4960 Chain: CurDAG->getEntryNode(), dl: SDLoc(Node), Reg: AArch64::XZR, VT: MVT::i64);
4961 ReplaceNode(F: Node, T: New.getNode());
4962 return;
4963 }
4964 }
4965 break;
4966 }
4967
4968 case ISD::FrameIndex: {
4969 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4970 int FI = cast<FrameIndexSDNode>(Val: Node)->getIndex();
4971 unsigned Shifter = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0);
4972 const TargetLowering *TLI = getTargetLowering();
4973 SDValue TFI = CurDAG->getTargetFrameIndex(
4974 FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
4975 SDLoc DL(Node);
4976 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i32),
4977 CurDAG->getTargetConstant(Val: Shifter, DL, VT: MVT::i32) };
4978 CurDAG->SelectNodeTo(N: Node, MachineOpc: AArch64::ADDXri, VT: MVT::i64, Ops);
4979 return;
4980 }
4981 case ISD::INTRINSIC_W_CHAIN: {
4982 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
4983 switch (IntNo) {
4984 default:
4985 break;
4986 case Intrinsic::aarch64_gcsss: {
4987 SDLoc DL(Node);
4988 SDValue Chain = Node->getOperand(Num: 0);
4989 SDValue Val = Node->getOperand(Num: 2);
4990 SDValue Zero = CurDAG->getCopyFromReg(Chain, dl: DL, Reg: AArch64::XZR, VT: MVT::i64);
4991 SDNode *SS1 =
4992 CurDAG->getMachineNode(Opcode: AArch64::GCSSS1, dl: DL, VT: MVT::Other, Op1: Val, Op2: Chain);
4993 SDNode *SS2 = CurDAG->getMachineNode(Opcode: AArch64::GCSSS2, dl: DL, VT1: MVT::i64,
4994 VT2: MVT::Other, Op1: Zero, Op2: SDValue(SS1, 0));
4995 ReplaceNode(F: Node, T: SS2);
4996 return;
4997 }
4998 case Intrinsic::aarch64_ldaxp:
4999 case Intrinsic::aarch64_ldxp: {
5000 unsigned Op =
5001 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
5002 SDValue MemAddr = Node->getOperand(Num: 2);
5003 SDLoc DL(Node);
5004 SDValue Chain = Node->getOperand(Num: 0);
5005
5006 SDNode *Ld = CurDAG->getMachineNode(Opcode: Op, dl: DL, VT1: MVT::i64, VT2: MVT::i64,
5007 VT3: MVT::Other, Op1: MemAddr, Op2: Chain);
5008
5009 // Transfer memoperands.
5010 MachineMemOperand *MemOp =
5011 cast<MemIntrinsicSDNode>(Val: Node)->getMemOperand();
5012 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
5013 ReplaceNode(F: Node, T: Ld);
5014 return;
5015 }
5016 case Intrinsic::aarch64_stlxp:
5017 case Intrinsic::aarch64_stxp: {
5018 unsigned Op =
5019 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
5020 SDLoc DL(Node);
5021 SDValue Chain = Node->getOperand(Num: 0);
5022 SDValue ValLo = Node->getOperand(Num: 2);
5023 SDValue ValHi = Node->getOperand(Num: 3);
5024 SDValue MemAddr = Node->getOperand(Num: 4);
5025
5026 // Place arguments in the right order.
5027 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
5028
5029 SDNode *St = CurDAG->getMachineNode(Opcode: Op, dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops);
5030 // Transfer memoperands.
5031 MachineMemOperand *MemOp =
5032 cast<MemIntrinsicSDNode>(Val: Node)->getMemOperand();
5033 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
5034
5035 ReplaceNode(F: Node, T: St);
5036 return;
5037 }
5038 case Intrinsic::aarch64_neon_ld1x2:
5039 if (VT == MVT::v8i8) {
5040 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov8b, SubRegIdx: AArch64::dsub0);
5041 return;
5042 } else if (VT == MVT::v16i8) {
5043 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov16b, SubRegIdx: AArch64::qsub0);
5044 return;
5045 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5046 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov4h, SubRegIdx: AArch64::dsub0);
5047 return;
5048 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5049 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov8h, SubRegIdx: AArch64::qsub0);
5050 return;
5051 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5052 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov2s, SubRegIdx: AArch64::dsub0);
5053 return;
5054 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5055 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov4s, SubRegIdx: AArch64::qsub0);
5056 return;
5057 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5058 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov1d, SubRegIdx: AArch64::dsub0);
5059 return;
5060 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5061 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov2d, SubRegIdx: AArch64::qsub0);
5062 return;
5063 }
5064 break;
5065 case Intrinsic::aarch64_neon_ld1x3:
5066 if (VT == MVT::v8i8) {
5067 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev8b, SubRegIdx: AArch64::dsub0);
5068 return;
5069 } else if (VT == MVT::v16i8) {
5070 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev16b, SubRegIdx: AArch64::qsub0);
5071 return;
5072 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5073 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev4h, SubRegIdx: AArch64::dsub0);
5074 return;
5075 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5076 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev8h, SubRegIdx: AArch64::qsub0);
5077 return;
5078 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5079 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev2s, SubRegIdx: AArch64::dsub0);
5080 return;
5081 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5082 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev4s, SubRegIdx: AArch64::qsub0);
5083 return;
5084 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5085 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev1d, SubRegIdx: AArch64::dsub0);
5086 return;
5087 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5088 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev2d, SubRegIdx: AArch64::qsub0);
5089 return;
5090 }
5091 break;
5092 case Intrinsic::aarch64_neon_ld1x4:
5093 if (VT == MVT::v8i8) {
5094 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv8b, SubRegIdx: AArch64::dsub0);
5095 return;
5096 } else if (VT == MVT::v16i8) {
5097 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv16b, SubRegIdx: AArch64::qsub0);
5098 return;
5099 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5100 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv4h, SubRegIdx: AArch64::dsub0);
5101 return;
5102 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5103 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv8h, SubRegIdx: AArch64::qsub0);
5104 return;
5105 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5106 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv2s, SubRegIdx: AArch64::dsub0);
5107 return;
5108 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5109 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv4s, SubRegIdx: AArch64::qsub0);
5110 return;
5111 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5112 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv1d, SubRegIdx: AArch64::dsub0);
5113 return;
5114 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5115 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv2d, SubRegIdx: AArch64::qsub0);
5116 return;
5117 }
5118 break;
5119 case Intrinsic::aarch64_neon_ld2:
5120 if (VT == MVT::v8i8) {
5121 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov8b, SubRegIdx: AArch64::dsub0);
5122 return;
5123 } else if (VT == MVT::v16i8) {
5124 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov16b, SubRegIdx: AArch64::qsub0);
5125 return;
5126 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5127 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov4h, SubRegIdx: AArch64::dsub0);
5128 return;
5129 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5130 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov8h, SubRegIdx: AArch64::qsub0);
5131 return;
5132 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5133 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov2s, SubRegIdx: AArch64::dsub0);
5134 return;
5135 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5136 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov4s, SubRegIdx: AArch64::qsub0);
5137 return;
5138 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5139 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov1d, SubRegIdx: AArch64::dsub0);
5140 return;
5141 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5142 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov2d, SubRegIdx: AArch64::qsub0);
5143 return;
5144 }
5145 break;
5146 case Intrinsic::aarch64_neon_ld3:
5147 if (VT == MVT::v8i8) {
5148 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev8b, SubRegIdx: AArch64::dsub0);
5149 return;
5150 } else if (VT == MVT::v16i8) {
5151 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev16b, SubRegIdx: AArch64::qsub0);
5152 return;
5153 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5154 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev4h, SubRegIdx: AArch64::dsub0);
5155 return;
5156 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5157 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev8h, SubRegIdx: AArch64::qsub0);
5158 return;
5159 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5160 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev2s, SubRegIdx: AArch64::dsub0);
5161 return;
5162 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5163 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev4s, SubRegIdx: AArch64::qsub0);
5164 return;
5165 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5166 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev1d, SubRegIdx: AArch64::dsub0);
5167 return;
5168 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5169 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev2d, SubRegIdx: AArch64::qsub0);
5170 return;
5171 }
5172 break;
5173 case Intrinsic::aarch64_neon_ld4:
5174 if (VT == MVT::v8i8) {
5175 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv8b, SubRegIdx: AArch64::dsub0);
5176 return;
5177 } else if (VT == MVT::v16i8) {
5178 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv16b, SubRegIdx: AArch64::qsub0);
5179 return;
5180 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5181 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv4h, SubRegIdx: AArch64::dsub0);
5182 return;
5183 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5184 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv8h, SubRegIdx: AArch64::qsub0);
5185 return;
5186 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5187 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv2s, SubRegIdx: AArch64::dsub0);
5188 return;
5189 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5190 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv4s, SubRegIdx: AArch64::qsub0);
5191 return;
5192 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5193 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv1d, SubRegIdx: AArch64::dsub0);
5194 return;
5195 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5196 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv2d, SubRegIdx: AArch64::qsub0);
5197 return;
5198 }
5199 break;
5200 case Intrinsic::aarch64_neon_ld2r:
5201 if (VT == MVT::v8i8) {
5202 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv8b, SubRegIdx: AArch64::dsub0);
5203 return;
5204 } else if (VT == MVT::v16i8) {
5205 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv16b, SubRegIdx: AArch64::qsub0);
5206 return;
5207 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5208 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv4h, SubRegIdx: AArch64::dsub0);
5209 return;
5210 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5211 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv8h, SubRegIdx: AArch64::qsub0);
5212 return;
5213 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5214 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv2s, SubRegIdx: AArch64::dsub0);
5215 return;
5216 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5217 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv4s, SubRegIdx: AArch64::qsub0);
5218 return;
5219 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5220 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv1d, SubRegIdx: AArch64::dsub0);
5221 return;
5222 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5223 SelectLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv2d, SubRegIdx: AArch64::qsub0);
5224 return;
5225 }
5226 break;
5227 case Intrinsic::aarch64_neon_ld3r:
5228 if (VT == MVT::v8i8) {
5229 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv8b, SubRegIdx: AArch64::dsub0);
5230 return;
5231 } else if (VT == MVT::v16i8) {
5232 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv16b, SubRegIdx: AArch64::qsub0);
5233 return;
5234 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5235 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv4h, SubRegIdx: AArch64::dsub0);
5236 return;
5237 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5238 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv8h, SubRegIdx: AArch64::qsub0);
5239 return;
5240 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5241 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv2s, SubRegIdx: AArch64::dsub0);
5242 return;
5243 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5244 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv4s, SubRegIdx: AArch64::qsub0);
5245 return;
5246 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5247 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv1d, SubRegIdx: AArch64::dsub0);
5248 return;
5249 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5250 SelectLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv2d, SubRegIdx: AArch64::qsub0);
5251 return;
5252 }
5253 break;
5254 case Intrinsic::aarch64_neon_ld4r:
5255 if (VT == MVT::v8i8) {
5256 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv8b, SubRegIdx: AArch64::dsub0);
5257 return;
5258 } else if (VT == MVT::v16i8) {
5259 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv16b, SubRegIdx: AArch64::qsub0);
5260 return;
5261 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5262 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv4h, SubRegIdx: AArch64::dsub0);
5263 return;
5264 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5265 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv8h, SubRegIdx: AArch64::qsub0);
5266 return;
5267 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5268 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv2s, SubRegIdx: AArch64::dsub0);
5269 return;
5270 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5271 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv4s, SubRegIdx: AArch64::qsub0);
5272 return;
5273 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5274 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv1d, SubRegIdx: AArch64::dsub0);
5275 return;
5276 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5277 SelectLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv2d, SubRegIdx: AArch64::qsub0);
5278 return;
5279 }
5280 break;
5281 case Intrinsic::aarch64_neon_ld2lane:
5282 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5283 SelectLoadLane(N: Node, NumVecs: 2, Opc: AArch64::LD2i8);
5284 return;
5285 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5286 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5287 SelectLoadLane(N: Node, NumVecs: 2, Opc: AArch64::LD2i16);
5288 return;
5289 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5290 VT == MVT::v2f32) {
5291 SelectLoadLane(N: Node, NumVecs: 2, Opc: AArch64::LD2i32);
5292 return;
5293 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5294 VT == MVT::v1f64) {
5295 SelectLoadLane(N: Node, NumVecs: 2, Opc: AArch64::LD2i64);
5296 return;
5297 }
5298 break;
5299 case Intrinsic::aarch64_neon_ld3lane:
5300 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5301 SelectLoadLane(N: Node, NumVecs: 3, Opc: AArch64::LD3i8);
5302 return;
5303 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5304 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5305 SelectLoadLane(N: Node, NumVecs: 3, Opc: AArch64::LD3i16);
5306 return;
5307 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5308 VT == MVT::v2f32) {
5309 SelectLoadLane(N: Node, NumVecs: 3, Opc: AArch64::LD3i32);
5310 return;
5311 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5312 VT == MVT::v1f64) {
5313 SelectLoadLane(N: Node, NumVecs: 3, Opc: AArch64::LD3i64);
5314 return;
5315 }
5316 break;
5317 case Intrinsic::aarch64_neon_ld4lane:
5318 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5319 SelectLoadLane(N: Node, NumVecs: 4, Opc: AArch64::LD4i8);
5320 return;
5321 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5322 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5323 SelectLoadLane(N: Node, NumVecs: 4, Opc: AArch64::LD4i16);
5324 return;
5325 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5326 VT == MVT::v2f32) {
5327 SelectLoadLane(N: Node, NumVecs: 4, Opc: AArch64::LD4i32);
5328 return;
5329 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5330 VT == MVT::v1f64) {
5331 SelectLoadLane(N: Node, NumVecs: 4, Opc: AArch64::LD4i64);
5332 return;
5333 }
5334 break;
5335 case Intrinsic::aarch64_ld64b:
5336 SelectLoad(N: Node, NumVecs: 8, Opc: AArch64::LD64B, SubRegIdx: AArch64::x8sub_0);
5337 return;
5338 case Intrinsic::aarch64_sve_ld2q_sret: {
5339 SelectPredicatedLoad(N: Node, NumVecs: 2, Scale: 4, Opc_ri: AArch64::LD2Q_IMM, Opc_rr: AArch64::LD2Q, IsIntr: true);
5340 return;
5341 }
5342 case Intrinsic::aarch64_sve_ld3q_sret: {
5343 SelectPredicatedLoad(N: Node, NumVecs: 3, Scale: 4, Opc_ri: AArch64::LD3Q_IMM, Opc_rr: AArch64::LD3Q, IsIntr: true);
5344 return;
5345 }
5346 case Intrinsic::aarch64_sve_ld4q_sret: {
5347 SelectPredicatedLoad(N: Node, NumVecs: 4, Scale: 4, Opc_ri: AArch64::LD4Q_IMM, Opc_rr: AArch64::LD4Q, IsIntr: true);
5348 return;
5349 }
5350 case Intrinsic::aarch64_sve_ld2_sret: {
5351 if (VT == MVT::nxv16i8) {
5352 SelectPredicatedLoad(N: Node, NumVecs: 2, Scale: 0, Opc_ri: AArch64::LD2B_IMM, Opc_rr: AArch64::LD2B,
5353 IsIntr: true);
5354 return;
5355 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5356 VT == MVT::nxv8bf16) {
5357 SelectPredicatedLoad(N: Node, NumVecs: 2, Scale: 1, Opc_ri: AArch64::LD2H_IMM, Opc_rr: AArch64::LD2H,
5358 IsIntr: true);
5359 return;
5360 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5361 SelectPredicatedLoad(N: Node, NumVecs: 2, Scale: 2, Opc_ri: AArch64::LD2W_IMM, Opc_rr: AArch64::LD2W,
5362 IsIntr: true);
5363 return;
5364 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5365 SelectPredicatedLoad(N: Node, NumVecs: 2, Scale: 3, Opc_ri: AArch64::LD2D_IMM, Opc_rr: AArch64::LD2D,
5366 IsIntr: true);
5367 return;
5368 }
5369 break;
5370 }
5371 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5372 if (VT == MVT::nxv16i8) {
5373 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5374 SelectContiguousMultiVectorLoad(
5375 N: Node, NumVecs: 2, Scale: 0, Opc_ri: AArch64::LD1B_2Z_IMM_PSEUDO, Opc_rr: AArch64::LD1B_2Z_PSEUDO);
5376 else if (Subtarget->hasSVE2p1())
5377 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 0, Opc_ri: AArch64::LD1B_2Z_IMM,
5378 Opc_rr: AArch64::LD1B_2Z);
5379 else
5380 break;
5381 return;
5382 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5383 VT == MVT::nxv8bf16) {
5384 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5385 SelectContiguousMultiVectorLoad(
5386 N: Node, NumVecs: 2, Scale: 1, Opc_ri: AArch64::LD1H_2Z_IMM_PSEUDO, Opc_rr: AArch64::LD1H_2Z_PSEUDO);
5387 else if (Subtarget->hasSVE2p1())
5388 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 1, Opc_ri: AArch64::LD1H_2Z_IMM,
5389 Opc_rr: AArch64::LD1H_2Z);
5390 else
5391 break;
5392 return;
5393 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5394 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5395 SelectContiguousMultiVectorLoad(
5396 N: Node, NumVecs: 2, Scale: 2, Opc_ri: AArch64::LD1W_2Z_IMM_PSEUDO, Opc_rr: AArch64::LD1W_2Z_PSEUDO);
5397 else if (Subtarget->hasSVE2p1())
5398 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 2, Opc_ri: AArch64::LD1W_2Z_IMM,
5399 Opc_rr: AArch64::LD1W_2Z);
5400 else
5401 break;
5402 return;
5403 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5404 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5405 SelectContiguousMultiVectorLoad(
5406 N: Node, NumVecs: 2, Scale: 3, Opc_ri: AArch64::LD1D_2Z_IMM_PSEUDO, Opc_rr: AArch64::LD1D_2Z_PSEUDO);
5407 else if (Subtarget->hasSVE2p1())
5408 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 3, Opc_ri: AArch64::LD1D_2Z_IMM,
5409 Opc_rr: AArch64::LD1D_2Z);
5410 else
5411 break;
5412 return;
5413 }
5414 break;
5415 }
5416 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5417 if (VT == MVT::nxv16i8) {
5418 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5419 SelectContiguousMultiVectorLoad(
5420 N: Node, NumVecs: 4, Scale: 0, Opc_ri: AArch64::LD1B_4Z_IMM_PSEUDO, Opc_rr: AArch64::LD1B_4Z_PSEUDO);
5421 else if (Subtarget->hasSVE2p1())
5422 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 0, Opc_ri: AArch64::LD1B_4Z_IMM,
5423 Opc_rr: AArch64::LD1B_4Z);
5424 else
5425 break;
5426 return;
5427 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5428 VT == MVT::nxv8bf16) {
5429 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5430 SelectContiguousMultiVectorLoad(
5431 N: Node, NumVecs: 4, Scale: 1, Opc_ri: AArch64::LD1H_4Z_IMM_PSEUDO, Opc_rr: AArch64::LD1H_4Z_PSEUDO);
5432 else if (Subtarget->hasSVE2p1())
5433 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 1, Opc_ri: AArch64::LD1H_4Z_IMM,
5434 Opc_rr: AArch64::LD1H_4Z);
5435 else
5436 break;
5437 return;
5438 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5439 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5440 SelectContiguousMultiVectorLoad(
5441 N: Node, NumVecs: 4, Scale: 2, Opc_ri: AArch64::LD1W_4Z_IMM_PSEUDO, Opc_rr: AArch64::LD1W_4Z_PSEUDO);
5442 else if (Subtarget->hasSVE2p1())
5443 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 2, Opc_ri: AArch64::LD1W_4Z_IMM,
5444 Opc_rr: AArch64::LD1W_4Z);
5445 else
5446 break;
5447 return;
5448 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5449 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5450 SelectContiguousMultiVectorLoad(
5451 N: Node, NumVecs: 4, Scale: 3, Opc_ri: AArch64::LD1D_4Z_IMM_PSEUDO, Opc_rr: AArch64::LD1D_4Z_PSEUDO);
5452 else if (Subtarget->hasSVE2p1())
5453 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 3, Opc_ri: AArch64::LD1D_4Z_IMM,
5454 Opc_rr: AArch64::LD1D_4Z);
5455 else
5456 break;
5457 return;
5458 }
5459 break;
5460 }
5461 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5462 if (VT == MVT::nxv16i8) {
5463 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5464 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 0,
5465 Opc_ri: AArch64::LDNT1B_2Z_IMM_PSEUDO,
5466 Opc_rr: AArch64::LDNT1B_2Z_PSEUDO);
5467 else if (Subtarget->hasSVE2p1())
5468 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 0, Opc_ri: AArch64::LDNT1B_2Z_IMM,
5469 Opc_rr: AArch64::LDNT1B_2Z);
5470 else
5471 break;
5472 return;
5473 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5474 VT == MVT::nxv8bf16) {
5475 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5476 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 1,
5477 Opc_ri: AArch64::LDNT1H_2Z_IMM_PSEUDO,
5478 Opc_rr: AArch64::LDNT1H_2Z_PSEUDO);
5479 else if (Subtarget->hasSVE2p1())
5480 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 1, Opc_ri: AArch64::LDNT1H_2Z_IMM,
5481 Opc_rr: AArch64::LDNT1H_2Z);
5482 else
5483 break;
5484 return;
5485 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5486 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5487 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 2,
5488 Opc_ri: AArch64::LDNT1W_2Z_IMM_PSEUDO,
5489 Opc_rr: AArch64::LDNT1W_2Z_PSEUDO);
5490 else if (Subtarget->hasSVE2p1())
5491 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 2, Opc_ri: AArch64::LDNT1W_2Z_IMM,
5492 Opc_rr: AArch64::LDNT1W_2Z);
5493 else
5494 break;
5495 return;
5496 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5497 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5498 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 3,
5499 Opc_ri: AArch64::LDNT1D_2Z_IMM_PSEUDO,
5500 Opc_rr: AArch64::LDNT1D_2Z_PSEUDO);
5501 else if (Subtarget->hasSVE2p1())
5502 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 2, Scale: 3, Opc_ri: AArch64::LDNT1D_2Z_IMM,
5503 Opc_rr: AArch64::LDNT1D_2Z);
5504 else
5505 break;
5506 return;
5507 }
5508 break;
5509 }
5510 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5511 if (VT == MVT::nxv16i8) {
5512 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5513 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 0,
5514 Opc_ri: AArch64::LDNT1B_4Z_IMM_PSEUDO,
5515 Opc_rr: AArch64::LDNT1B_4Z_PSEUDO);
5516 else if (Subtarget->hasSVE2p1())
5517 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 0, Opc_ri: AArch64::LDNT1B_4Z_IMM,
5518 Opc_rr: AArch64::LDNT1B_4Z);
5519 else
5520 break;
5521 return;
5522 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5523 VT == MVT::nxv8bf16) {
5524 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5525 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 1,
5526 Opc_ri: AArch64::LDNT1H_4Z_IMM_PSEUDO,
5527 Opc_rr: AArch64::LDNT1H_4Z_PSEUDO);
5528 else if (Subtarget->hasSVE2p1())
5529 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 1, Opc_ri: AArch64::LDNT1H_4Z_IMM,
5530 Opc_rr: AArch64::LDNT1H_4Z);
5531 else
5532 break;
5533 return;
5534 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5535 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5536 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 2,
5537 Opc_ri: AArch64::LDNT1W_4Z_IMM_PSEUDO,
5538 Opc_rr: AArch64::LDNT1W_4Z_PSEUDO);
5539 else if (Subtarget->hasSVE2p1())
5540 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 2, Opc_ri: AArch64::LDNT1W_4Z_IMM,
5541 Opc_rr: AArch64::LDNT1W_4Z);
5542 else
5543 break;
5544 return;
5545 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5546 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5547 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 3,
5548 Opc_ri: AArch64::LDNT1D_4Z_IMM_PSEUDO,
5549 Opc_rr: AArch64::LDNT1D_4Z_PSEUDO);
5550 else if (Subtarget->hasSVE2p1())
5551 SelectContiguousMultiVectorLoad(N: Node, NumVecs: 4, Scale: 3, Opc_ri: AArch64::LDNT1D_4Z_IMM,
5552 Opc_rr: AArch64::LDNT1D_4Z);
5553 else
5554 break;
5555 return;
5556 }
5557 break;
5558 }
5559 case Intrinsic::aarch64_sve_ld3_sret: {
5560 if (VT == MVT::nxv16i8) {
5561 SelectPredicatedLoad(N: Node, NumVecs: 3, Scale: 0, Opc_ri: AArch64::LD3B_IMM, Opc_rr: AArch64::LD3B,
5562 IsIntr: true);
5563 return;
5564 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5565 VT == MVT::nxv8bf16) {
5566 SelectPredicatedLoad(N: Node, NumVecs: 3, Scale: 1, Opc_ri: AArch64::LD3H_IMM, Opc_rr: AArch64::LD3H,
5567 IsIntr: true);
5568 return;
5569 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5570 SelectPredicatedLoad(N: Node, NumVecs: 3, Scale: 2, Opc_ri: AArch64::LD3W_IMM, Opc_rr: AArch64::LD3W,
5571 IsIntr: true);
5572 return;
5573 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5574 SelectPredicatedLoad(N: Node, NumVecs: 3, Scale: 3, Opc_ri: AArch64::LD3D_IMM, Opc_rr: AArch64::LD3D,
5575 IsIntr: true);
5576 return;
5577 }
5578 break;
5579 }
5580 case Intrinsic::aarch64_sve_ld4_sret: {
5581 if (VT == MVT::nxv16i8) {
5582 SelectPredicatedLoad(N: Node, NumVecs: 4, Scale: 0, Opc_ri: AArch64::LD4B_IMM, Opc_rr: AArch64::LD4B,
5583 IsIntr: true);
5584 return;
5585 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5586 VT == MVT::nxv8bf16) {
5587 SelectPredicatedLoad(N: Node, NumVecs: 4, Scale: 1, Opc_ri: AArch64::LD4H_IMM, Opc_rr: AArch64::LD4H,
5588 IsIntr: true);
5589 return;
5590 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5591 SelectPredicatedLoad(N: Node, NumVecs: 4, Scale: 2, Opc_ri: AArch64::LD4W_IMM, Opc_rr: AArch64::LD4W,
5592 IsIntr: true);
5593 return;
5594 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5595 SelectPredicatedLoad(N: Node, NumVecs: 4, Scale: 3, Opc_ri: AArch64::LD4D_IMM, Opc_rr: AArch64::LD4D,
5596 IsIntr: true);
5597 return;
5598 }
5599 break;
5600 }
5601 case Intrinsic::aarch64_sme_read_hor_vg2: {
5602 if (VT == MVT::nxv16i8) {
5603 SelectMultiVectorMove<14, 2>(N: Node, NumVecs: 2, BaseReg: AArch64::ZAB0,
5604 Op: AArch64::MOVA_2ZMXI_H_B);
5605 return;
5606 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5607 VT == MVT::nxv8bf16) {
5608 SelectMultiVectorMove<6, 2>(N: Node, NumVecs: 2, BaseReg: AArch64::ZAH0,
5609 Op: AArch64::MOVA_2ZMXI_H_H);
5610 return;
5611 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5612 SelectMultiVectorMove<2, 2>(N: Node, NumVecs: 2, BaseReg: AArch64::ZAS0,
5613 Op: AArch64::MOVA_2ZMXI_H_S);
5614 return;
5615 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5616 SelectMultiVectorMove<0, 2>(N: Node, NumVecs: 2, BaseReg: AArch64::ZAD0,
5617 Op: AArch64::MOVA_2ZMXI_H_D);
5618 return;
5619 }
5620 break;
5621 }
5622 case Intrinsic::aarch64_sme_read_ver_vg2: {
5623 if (VT == MVT::nxv16i8) {
5624 SelectMultiVectorMove<14, 2>(N: Node, NumVecs: 2, BaseReg: AArch64::ZAB0,
5625 Op: AArch64::MOVA_2ZMXI_V_B);
5626 return;
5627 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5628 VT == MVT::nxv8bf16) {
5629 SelectMultiVectorMove<6, 2>(N: Node, NumVecs: 2, BaseReg: AArch64::ZAH0,
5630 Op: AArch64::MOVA_2ZMXI_V_H);
5631 return;
5632 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5633 SelectMultiVectorMove<2, 2>(N: Node, NumVecs: 2, BaseReg: AArch64::ZAS0,
5634 Op: AArch64::MOVA_2ZMXI_V_S);
5635 return;
5636 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5637 SelectMultiVectorMove<0, 2>(N: Node, NumVecs: 2, BaseReg: AArch64::ZAD0,
5638 Op: AArch64::MOVA_2ZMXI_V_D);
5639 return;
5640 }
5641 break;
5642 }
5643 case Intrinsic::aarch64_sme_read_hor_vg4: {
5644 if (VT == MVT::nxv16i8) {
5645 SelectMultiVectorMove<12, 4>(N: Node, NumVecs: 4, BaseReg: AArch64::ZAB0,
5646 Op: AArch64::MOVA_4ZMXI_H_B);
5647 return;
5648 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5649 VT == MVT::nxv8bf16) {
5650 SelectMultiVectorMove<4, 4>(N: Node, NumVecs: 4, BaseReg: AArch64::ZAH0,
5651 Op: AArch64::MOVA_4ZMXI_H_H);
5652 return;
5653 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5654 SelectMultiVectorMove<0, 2>(N: Node, NumVecs: 4, BaseReg: AArch64::ZAS0,
5655 Op: AArch64::MOVA_4ZMXI_H_S);
5656 return;
5657 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5658 SelectMultiVectorMove<0, 2>(N: Node, NumVecs: 4, BaseReg: AArch64::ZAD0,
5659 Op: AArch64::MOVA_4ZMXI_H_D);
5660 return;
5661 }
5662 break;
5663 }
5664 case Intrinsic::aarch64_sme_read_ver_vg4: {
5665 if (VT == MVT::nxv16i8) {
5666 SelectMultiVectorMove<12, 4>(N: Node, NumVecs: 4, BaseReg: AArch64::ZAB0,
5667 Op: AArch64::MOVA_4ZMXI_V_B);
5668 return;
5669 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5670 VT == MVT::nxv8bf16) {
5671 SelectMultiVectorMove<4, 4>(N: Node, NumVecs: 4, BaseReg: AArch64::ZAH0,
5672 Op: AArch64::MOVA_4ZMXI_V_H);
5673 return;
5674 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5675 SelectMultiVectorMove<0, 4>(N: Node, NumVecs: 4, BaseReg: AArch64::ZAS0,
5676 Op: AArch64::MOVA_4ZMXI_V_S);
5677 return;
5678 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5679 SelectMultiVectorMove<0, 4>(N: Node, NumVecs: 4, BaseReg: AArch64::ZAD0,
5680 Op: AArch64::MOVA_4ZMXI_V_D);
5681 return;
5682 }
5683 break;
5684 }
5685 case Intrinsic::aarch64_sme_read_vg1x2: {
5686 SelectMultiVectorMove<7, 1>(N: Node, NumVecs: 2, BaseReg: AArch64::ZA,
5687 Op: AArch64::MOVA_VG2_2ZMXI);
5688 return;
5689 }
5690 case Intrinsic::aarch64_sme_read_vg1x4: {
5691 SelectMultiVectorMove<7, 1>(N: Node, NumVecs: 4, BaseReg: AArch64::ZA,
5692 Op: AArch64::MOVA_VG4_4ZMXI);
5693 return;
5694 }
5695 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5696 if (VT == MVT::nxv16i8) {
5697 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_2ZMI_H_B_PSEUDO, MaxIdx: 14, Scale: 2);
5698 return;
5699 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5700 VT == MVT::nxv8bf16) {
5701 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_2ZMI_H_H_PSEUDO, MaxIdx: 6, Scale: 2);
5702 return;
5703 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5704 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_2ZMI_H_S_PSEUDO, MaxIdx: 2, Scale: 2);
5705 return;
5706 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5707 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_2ZMI_H_D_PSEUDO, MaxIdx: 0, Scale: 2);
5708 return;
5709 }
5710 break;
5711 }
5712 case Intrinsic::aarch64_sme_readz_vert_x2: {
5713 if (VT == MVT::nxv16i8) {
5714 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_2ZMI_V_B_PSEUDO, MaxIdx: 14, Scale: 2);
5715 return;
5716 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5717 VT == MVT::nxv8bf16) {
5718 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_2ZMI_V_H_PSEUDO, MaxIdx: 6, Scale: 2);
5719 return;
5720 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5721 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_2ZMI_V_S_PSEUDO, MaxIdx: 2, Scale: 2);
5722 return;
5723 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5724 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_2ZMI_V_D_PSEUDO, MaxIdx: 0, Scale: 2);
5725 return;
5726 }
5727 break;
5728 }
5729 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5730 if (VT == MVT::nxv16i8) {
5731 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_4ZMI_H_B_PSEUDO, MaxIdx: 12, Scale: 4);
5732 return;
5733 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5734 VT == MVT::nxv8bf16) {
5735 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_4ZMI_H_H_PSEUDO, MaxIdx: 4, Scale: 4);
5736 return;
5737 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5738 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_4ZMI_H_S_PSEUDO, MaxIdx: 0, Scale: 4);
5739 return;
5740 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5741 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_4ZMI_H_D_PSEUDO, MaxIdx: 0, Scale: 4);
5742 return;
5743 }
5744 break;
5745 }
5746 case Intrinsic::aarch64_sme_readz_vert_x4: {
5747 if (VT == MVT::nxv16i8) {
5748 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_4ZMI_V_B_PSEUDO, MaxIdx: 12, Scale: 4);
5749 return;
5750 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5751 VT == MVT::nxv8bf16) {
5752 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_4ZMI_V_H_PSEUDO, MaxIdx: 4, Scale: 4);
5753 return;
5754 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5755 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_4ZMI_V_S_PSEUDO, MaxIdx: 0, Scale: 4);
5756 return;
5757 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5758 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_4ZMI_V_D_PSEUDO, MaxIdx: 0, Scale: 4);
5759 return;
5760 }
5761 break;
5762 }
5763 case Intrinsic::aarch64_sme_readz_x2: {
5764 SelectMultiVectorMoveZ(N: Node, NumVecs: 2, Op: AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, MaxIdx: 7, Scale: 1,
5765 BaseReg: AArch64::ZA);
5766 return;
5767 }
5768 case Intrinsic::aarch64_sme_readz_x4: {
5769 SelectMultiVectorMoveZ(N: Node, NumVecs: 4, Op: AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, MaxIdx: 7, Scale: 1,
5770 BaseReg: AArch64::ZA);
5771 return;
5772 }
5773 case Intrinsic::swift_async_context_addr: {
5774 SDLoc DL(Node);
5775 SDValue Chain = Node->getOperand(Num: 0);
5776 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, dl: DL, Reg: AArch64::FP, VT: MVT::i64);
5777 SDValue Res = SDValue(
5778 CurDAG->getMachineNode(Opcode: AArch64::SUBXri, dl: DL, VT: MVT::i64, Op1: CopyFP,
5779 Op2: CurDAG->getTargetConstant(Val: 8, DL, VT: MVT::i32),
5780 Op3: CurDAG->getTargetConstant(Val: 0, DL, VT: MVT::i32)),
5781 0);
5782 ReplaceUses(F: SDValue(Node, 0), T: Res);
5783 ReplaceUses(F: SDValue(Node, 1), T: CopyFP.getValue(R: 1));
5784 CurDAG->RemoveDeadNode(N: Node);
5785
5786 auto &MF = CurDAG->getMachineFunction();
5787 MF.getFrameInfo().setFrameAddressIsTaken(true);
5788 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5789 return;
5790 }
5791 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5792 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5793 VT: Node->getValueType(ResNo: 0),
5794 Opcodes: {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5795 AArch64::LUTI2_4ZTZI_S}))
5796 // Second Immediate must be <= 3:
5797 SelectMultiVectorLutiLane(Node, NumOutVecs: 4, Opc, MaxImm: 3);
5798 return;
5799 }
5800 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5801 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5802 VT: Node->getValueType(ResNo: 0),
5803 Opcodes: {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5804 // Second Immediate must be <= 1:
5805 SelectMultiVectorLutiLane(Node, NumOutVecs: 4, Opc, MaxImm: 1);
5806 return;
5807 }
5808 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5809 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5810 VT: Node->getValueType(ResNo: 0),
5811 Opcodes: {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5812 AArch64::LUTI2_2ZTZI_S}))
5813 // Second Immediate must be <= 7:
5814 SelectMultiVectorLutiLane(Node, NumOutVecs: 2, Opc, MaxImm: 7);
5815 return;
5816 }
5817 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5818 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5819 VT: Node->getValueType(ResNo: 0),
5820 Opcodes: {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5821 AArch64::LUTI4_2ZTZI_S}))
5822 // Second Immediate must be <= 3:
5823 SelectMultiVectorLutiLane(Node, NumOutVecs: 2, Opc, MaxImm: 3);
5824 return;
5825 }
5826 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5827 SelectMultiVectorLuti(Node, NumOutVecs: 4, Opc: AArch64::LUTI4_4ZZT2Z);
5828 return;
5829 }
5830 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5831 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5832 VT: Node->getValueType(ResNo: 0),
5833 Opcodes: {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5834 SelectCVTIntrinsicFP8(N: Node, NumVecs: 2, Opcode: Opc);
5835 return;
5836 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5837 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5838 VT: Node->getValueType(ResNo: 0),
5839 Opcodes: {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5840 SelectCVTIntrinsicFP8(N: Node, NumVecs: 2, Opcode: Opc);
5841 return;
5842 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5843 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5844 VT: Node->getValueType(ResNo: 0),
5845 Opcodes: {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5846 SelectCVTIntrinsicFP8(N: Node, NumVecs: 2, Opcode: Opc);
5847 return;
5848 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5849 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5850 VT: Node->getValueType(ResNo: 0),
5851 Opcodes: {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5852 SelectCVTIntrinsicFP8(N: Node, NumVecs: 2, Opcode: Opc);
5853 return;
5854 case Intrinsic::ptrauth_resign_load_relative:
5855 SelectPtrauthResign(N: Node);
5856 return;
5857 }
5858 } break;
5859 case ISD::INTRINSIC_WO_CHAIN: {
5860 unsigned IntNo = Node->getConstantOperandVal(Num: 0);
5861 switch (IntNo) {
5862 default:
5863 break;
5864 case Intrinsic::aarch64_tagp:
5865 SelectTagP(N: Node);
5866 return;
5867
5868 case Intrinsic::ptrauth_auth:
5869 SelectPtrauthAuth(N: Node);
5870 return;
5871
5872 case Intrinsic::ptrauth_resign:
5873 SelectPtrauthResign(N: Node);
5874 return;
5875
5876 case Intrinsic::aarch64_neon_tbl2:
5877 SelectTable(N: Node, NumVecs: 2,
5878 Opc: VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5879 isExt: false);
5880 return;
5881 case Intrinsic::aarch64_neon_tbl3:
5882 SelectTable(N: Node, NumVecs: 3, Opc: VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5883 : AArch64::TBLv16i8Three,
5884 isExt: false);
5885 return;
5886 case Intrinsic::aarch64_neon_tbl4:
5887 SelectTable(N: Node, NumVecs: 4, Opc: VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5888 : AArch64::TBLv16i8Four,
5889 isExt: false);
5890 return;
5891 case Intrinsic::aarch64_neon_tbx2:
5892 SelectTable(N: Node, NumVecs: 2,
5893 Opc: VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5894 isExt: true);
5895 return;
5896 case Intrinsic::aarch64_neon_tbx3:
5897 SelectTable(N: Node, NumVecs: 3, Opc: VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5898 : AArch64::TBXv16i8Three,
5899 isExt: true);
5900 return;
5901 case Intrinsic::aarch64_neon_tbx4:
5902 SelectTable(N: Node, NumVecs: 4, Opc: VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5903 : AArch64::TBXv16i8Four,
5904 isExt: true);
5905 return;
5906 case Intrinsic::aarch64_sve_srshl_single_x2:
5907 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5908 VT: Node->getValueType(ResNo: 0),
5909 Opcodes: {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5910 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5911 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
5912 return;
5913 case Intrinsic::aarch64_sve_srshl_single_x4:
5914 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5915 VT: Node->getValueType(ResNo: 0),
5916 Opcodes: {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5917 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5918 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
5919 return;
5920 case Intrinsic::aarch64_sve_urshl_single_x2:
5921 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5922 VT: Node->getValueType(ResNo: 0),
5923 Opcodes: {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5924 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5925 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
5926 return;
5927 case Intrinsic::aarch64_sve_urshl_single_x4:
5928 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5929 VT: Node->getValueType(ResNo: 0),
5930 Opcodes: {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5931 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5932 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
5933 return;
5934 case Intrinsic::aarch64_sve_srshl_x2:
5935 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5936 VT: Node->getValueType(ResNo: 0),
5937 Opcodes: {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5938 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5939 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
5940 return;
5941 case Intrinsic::aarch64_sve_srshl_x4:
5942 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5943 VT: Node->getValueType(ResNo: 0),
5944 Opcodes: {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5945 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5946 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
5947 return;
5948 case Intrinsic::aarch64_sve_urshl_x2:
5949 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5950 VT: Node->getValueType(ResNo: 0),
5951 Opcodes: {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5952 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5953 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
5954 return;
5955 case Intrinsic::aarch64_sve_urshl_x4:
5956 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5957 VT: Node->getValueType(ResNo: 0),
5958 Opcodes: {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5959 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5960 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
5961 return;
5962 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5963 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5964 VT: Node->getValueType(ResNo: 0),
5965 Opcodes: {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5966 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5967 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
5968 return;
5969 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5970 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5971 VT: Node->getValueType(ResNo: 0),
5972 Opcodes: {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5973 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5974 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
5975 return;
5976 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5977 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5978 VT: Node->getValueType(ResNo: 0),
5979 Opcodes: {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5980 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5981 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
5982 return;
5983 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5984 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5985 VT: Node->getValueType(ResNo: 0),
5986 Opcodes: {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5987 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5988 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
5989 return;
5990 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
5991 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5992 VT: Node->getValueType(ResNo: 0),
5993 Opcodes: {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
5994 AArch64::FSCALE_2ZZ_D}))
5995 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
5996 return;
5997 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
5998 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5999 VT: Node->getValueType(ResNo: 0),
6000 Opcodes: {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
6001 AArch64::FSCALE_4ZZ_D}))
6002 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6003 return;
6004 case Intrinsic::aarch64_sme_fp8_scale_x2:
6005 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6006 VT: Node->getValueType(ResNo: 0),
6007 Opcodes: {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
6008 AArch64::FSCALE_2Z2Z_D}))
6009 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6010 return;
6011 case Intrinsic::aarch64_sme_fp8_scale_x4:
6012 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6013 VT: Node->getValueType(ResNo: 0),
6014 Opcodes: {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
6015 AArch64::FSCALE_4Z4Z_D}))
6016 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6017 return;
6018 case Intrinsic::aarch64_sve_whilege_x2:
6019 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6020 VT: Node->getValueType(ResNo: 0),
6021 Opcodes: {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
6022 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
6023 SelectWhilePair(N: Node, Opc: Op);
6024 return;
6025 case Intrinsic::aarch64_sve_whilegt_x2:
6026 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6027 VT: Node->getValueType(ResNo: 0),
6028 Opcodes: {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
6029 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
6030 SelectWhilePair(N: Node, Opc: Op);
6031 return;
6032 case Intrinsic::aarch64_sve_whilehi_x2:
6033 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6034 VT: Node->getValueType(ResNo: 0),
6035 Opcodes: {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
6036 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
6037 SelectWhilePair(N: Node, Opc: Op);
6038 return;
6039 case Intrinsic::aarch64_sve_whilehs_x2:
6040 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6041 VT: Node->getValueType(ResNo: 0),
6042 Opcodes: {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
6043 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
6044 SelectWhilePair(N: Node, Opc: Op);
6045 return;
6046 case Intrinsic::aarch64_sve_whilele_x2:
6047 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6048 VT: Node->getValueType(ResNo: 0),
6049 Opcodes: {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
6050 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
6051 SelectWhilePair(N: Node, Opc: Op);
6052 return;
6053 case Intrinsic::aarch64_sve_whilelo_x2:
6054 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6055 VT: Node->getValueType(ResNo: 0),
6056 Opcodes: {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
6057 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
6058 SelectWhilePair(N: Node, Opc: Op);
6059 return;
6060 case Intrinsic::aarch64_sve_whilels_x2:
6061 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6062 VT: Node->getValueType(ResNo: 0),
6063 Opcodes: {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
6064 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
6065 SelectWhilePair(N: Node, Opc: Op);
6066 return;
6067 case Intrinsic::aarch64_sve_whilelt_x2:
6068 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6069 VT: Node->getValueType(ResNo: 0),
6070 Opcodes: {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
6071 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
6072 SelectWhilePair(N: Node, Opc: Op);
6073 return;
6074 case Intrinsic::aarch64_sve_smax_single_x2:
6075 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6076 VT: Node->getValueType(ResNo: 0),
6077 Opcodes: {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6078 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6079 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6080 return;
6081 case Intrinsic::aarch64_sve_umax_single_x2:
6082 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6083 VT: Node->getValueType(ResNo: 0),
6084 Opcodes: {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6085 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6086 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6087 return;
6088 case Intrinsic::aarch64_sve_fmax_single_x2:
6089 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6090 VT: Node->getValueType(ResNo: 0),
6091 Opcodes: {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6092 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6093 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6094 return;
6095 case Intrinsic::aarch64_sve_smax_single_x4:
6096 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6097 VT: Node->getValueType(ResNo: 0),
6098 Opcodes: {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6099 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6100 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6101 return;
6102 case Intrinsic::aarch64_sve_umax_single_x4:
6103 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6104 VT: Node->getValueType(ResNo: 0),
6105 Opcodes: {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6106 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6107 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6108 return;
6109 case Intrinsic::aarch64_sve_fmax_single_x4:
6110 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6111 VT: Node->getValueType(ResNo: 0),
6112 Opcodes: {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6113 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6114 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6115 return;
6116 case Intrinsic::aarch64_sve_smin_single_x2:
6117 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6118 VT: Node->getValueType(ResNo: 0),
6119 Opcodes: {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6120 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6121 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6122 return;
6123 case Intrinsic::aarch64_sve_umin_single_x2:
6124 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6125 VT: Node->getValueType(ResNo: 0),
6126 Opcodes: {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6127 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6128 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6129 return;
6130 case Intrinsic::aarch64_sve_fmin_single_x2:
6131 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6132 VT: Node->getValueType(ResNo: 0),
6133 Opcodes: {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6134 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6135 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6136 return;
6137 case Intrinsic::aarch64_sve_smin_single_x4:
6138 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6139 VT: Node->getValueType(ResNo: 0),
6140 Opcodes: {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6141 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6142 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6143 return;
6144 case Intrinsic::aarch64_sve_umin_single_x4:
6145 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6146 VT: Node->getValueType(ResNo: 0),
6147 Opcodes: {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6148 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6149 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6150 return;
6151 case Intrinsic::aarch64_sve_fmin_single_x4:
6152 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6153 VT: Node->getValueType(ResNo: 0),
6154 Opcodes: {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6155 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6156 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6157 return;
6158 case Intrinsic::aarch64_sve_smax_x2:
6159 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6160 VT: Node->getValueType(ResNo: 0),
6161 Opcodes: {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6162 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6163 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6164 return;
6165 case Intrinsic::aarch64_sve_umax_x2:
6166 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6167 VT: Node->getValueType(ResNo: 0),
6168 Opcodes: {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6169 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6170 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6171 return;
6172 case Intrinsic::aarch64_sve_fmax_x2:
6173 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6174 VT: Node->getValueType(ResNo: 0),
6175 Opcodes: {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6176 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6177 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6178 return;
6179 case Intrinsic::aarch64_sve_smax_x4:
6180 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6181 VT: Node->getValueType(ResNo: 0),
6182 Opcodes: {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6183 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6184 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6185 return;
6186 case Intrinsic::aarch64_sve_umax_x4:
6187 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6188 VT: Node->getValueType(ResNo: 0),
6189 Opcodes: {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6190 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6191 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6192 return;
6193 case Intrinsic::aarch64_sve_fmax_x4:
6194 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6195 VT: Node->getValueType(ResNo: 0),
6196 Opcodes: {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6197 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6198 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6199 return;
6200 case Intrinsic::aarch64_sme_famax_x2:
6201 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6202 VT: Node->getValueType(ResNo: 0),
6203 Opcodes: {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6204 AArch64::FAMAX_2Z2Z_D}))
6205 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6206 return;
6207 case Intrinsic::aarch64_sme_famax_x4:
6208 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6209 VT: Node->getValueType(ResNo: 0),
6210 Opcodes: {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6211 AArch64::FAMAX_4Z4Z_D}))
6212 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6213 return;
6214 case Intrinsic::aarch64_sme_famin_x2:
6215 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6216 VT: Node->getValueType(ResNo: 0),
6217 Opcodes: {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6218 AArch64::FAMIN_2Z2Z_D}))
6219 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6220 return;
6221 case Intrinsic::aarch64_sme_famin_x4:
6222 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6223 VT: Node->getValueType(ResNo: 0),
6224 Opcodes: {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6225 AArch64::FAMIN_4Z4Z_D}))
6226 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6227 return;
6228 case Intrinsic::aarch64_sve_smin_x2:
6229 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6230 VT: Node->getValueType(ResNo: 0),
6231 Opcodes: {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6232 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6233 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6234 return;
6235 case Intrinsic::aarch64_sve_umin_x2:
6236 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6237 VT: Node->getValueType(ResNo: 0),
6238 Opcodes: {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6239 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6240 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6241 return;
6242 case Intrinsic::aarch64_sve_fmin_x2:
6243 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6244 VT: Node->getValueType(ResNo: 0),
6245 Opcodes: {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6246 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6247 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6248 return;
6249 case Intrinsic::aarch64_sve_smin_x4:
6250 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6251 VT: Node->getValueType(ResNo: 0),
6252 Opcodes: {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6253 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6254 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6255 return;
6256 case Intrinsic::aarch64_sve_umin_x4:
6257 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6258 VT: Node->getValueType(ResNo: 0),
6259 Opcodes: {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6260 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6261 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6262 return;
6263 case Intrinsic::aarch64_sve_fmin_x4:
6264 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6265 VT: Node->getValueType(ResNo: 0),
6266 Opcodes: {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6267 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6268 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6269 return;
6270 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6271 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6272 VT: Node->getValueType(ResNo: 0),
6273 Opcodes: {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6274 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6275 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6276 return;
6277 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6278 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6279 VT: Node->getValueType(ResNo: 0),
6280 Opcodes: {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6281 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6282 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6283 return;
6284 case Intrinsic::aarch64_sve_fminnm_single_x2:
6285 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6286 VT: Node->getValueType(ResNo: 0),
6287 Opcodes: {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6288 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6289 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6290 return;
6291 case Intrinsic::aarch64_sve_fminnm_single_x4:
6292 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6293 VT: Node->getValueType(ResNo: 0),
6294 Opcodes: {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6295 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6296 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6297 return;
6298 case Intrinsic::aarch64_sve_fscale_single_x4:
6299 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: AArch64::BFSCALE_4ZZ);
6300 return;
6301 case Intrinsic::aarch64_sve_fscale_single_x2:
6302 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: AArch64::BFSCALE_2ZZ);
6303 return;
6304 case Intrinsic::aarch64_sve_fmul_single_x4:
6305 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6306 VT: Node->getValueType(ResNo: 0),
6307 Opcodes: {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S,
6308 AArch64::FMUL_4ZZ_D}))
6309 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6310 return;
6311 case Intrinsic::aarch64_sve_fmul_single_x2:
6312 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6313 VT: Node->getValueType(ResNo: 0),
6314 Opcodes: {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S,
6315 AArch64::FMUL_2ZZ_D}))
6316 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6317 return;
6318 case Intrinsic::aarch64_sve_fmaxnm_x2:
6319 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6320 VT: Node->getValueType(ResNo: 0),
6321 Opcodes: {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6322 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6323 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6324 return;
6325 case Intrinsic::aarch64_sve_fmaxnm_x4:
6326 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6327 VT: Node->getValueType(ResNo: 0),
6328 Opcodes: {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6329 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6330 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6331 return;
6332 case Intrinsic::aarch64_sve_fminnm_x2:
6333 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6334 VT: Node->getValueType(ResNo: 0),
6335 Opcodes: {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6336 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6337 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6338 return;
6339 case Intrinsic::aarch64_sve_fminnm_x4:
6340 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6341 VT: Node->getValueType(ResNo: 0),
6342 Opcodes: {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6343 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6344 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6345 return;
6346 case Intrinsic::aarch64_sve_aese_lane_x2:
6347 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: AArch64::AESE_2ZZI_B);
6348 return;
6349 case Intrinsic::aarch64_sve_aesd_lane_x2:
6350 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: AArch64::AESD_2ZZI_B);
6351 return;
6352 case Intrinsic::aarch64_sve_aesemc_lane_x2:
6353 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: AArch64::AESEMC_2ZZI_B);
6354 return;
6355 case Intrinsic::aarch64_sve_aesdimc_lane_x2:
6356 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: AArch64::AESDIMC_2ZZI_B);
6357 return;
6358 case Intrinsic::aarch64_sve_aese_lane_x4:
6359 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: AArch64::AESE_4ZZI_B);
6360 return;
6361 case Intrinsic::aarch64_sve_aesd_lane_x4:
6362 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: AArch64::AESD_4ZZI_B);
6363 return;
6364 case Intrinsic::aarch64_sve_aesemc_lane_x4:
6365 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: AArch64::AESEMC_4ZZI_B);
6366 return;
6367 case Intrinsic::aarch64_sve_aesdimc_lane_x4:
6368 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: AArch64::AESDIMC_4ZZI_B);
6369 return;
6370 case Intrinsic::aarch64_sve_pmlal_pair_x2:
6371 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: AArch64::PMLAL_2ZZZ_Q);
6372 return;
6373 case Intrinsic::aarch64_sve_pmull_pair_x2: {
6374 SDLoc DL(Node);
6375 SmallVector<SDValue, 4> Regs(Node->ops().slice(N: 1, M: 2));
6376 SDNode *Res =
6377 CurDAG->getMachineNode(Opcode: AArch64::PMULL_2ZZZ_Q, dl: DL, VT: MVT::Untyped, Ops: Regs);
6378 SDValue SuperReg = SDValue(Res, 0);
6379 for (unsigned I = 0; I < 2; I++)
6380 ReplaceUses(F: SDValue(Node, I),
6381 T: CurDAG->getTargetExtractSubreg(SRIdx: AArch64::zsub0 + I, DL, VT,
6382 Operand: SuperReg));
6383 CurDAG->RemoveDeadNode(N: Node);
6384 return;
6385 }
6386 case Intrinsic::aarch64_sve_fscale_x4:
6387 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: AArch64::BFSCALE_4Z4Z);
6388 return;
6389 case Intrinsic::aarch64_sve_fscale_x2:
6390 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: AArch64::BFSCALE_2Z2Z);
6391 return;
6392 case Intrinsic::aarch64_sve_fmul_x4:
6393 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6394 VT: Node->getValueType(ResNo: 0),
6395 Opcodes: {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S,
6396 AArch64::FMUL_4Z4Z_D}))
6397 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op);
6398 return;
6399 case Intrinsic::aarch64_sve_fmul_x2:
6400 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6401 VT: Node->getValueType(ResNo: 0),
6402 Opcodes: {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S,
6403 AArch64::FMUL_2Z2Z_D}))
6404 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op);
6405 return;
6406 case Intrinsic::aarch64_sve_fcvtzs_x2:
6407 SelectCVTIntrinsic(N: Node, NumVecs: 2, Opcode: AArch64::FCVTZS_2Z2Z_StoS);
6408 return;
6409 case Intrinsic::aarch64_sve_scvtf_x2:
6410 SelectCVTIntrinsic(N: Node, NumVecs: 2, Opcode: AArch64::SCVTF_2Z2Z_StoS);
6411 return;
6412 case Intrinsic::aarch64_sve_fcvtzu_x2:
6413 SelectCVTIntrinsic(N: Node, NumVecs: 2, Opcode: AArch64::FCVTZU_2Z2Z_StoS);
6414 return;
6415 case Intrinsic::aarch64_sve_ucvtf_x2:
6416 SelectCVTIntrinsic(N: Node, NumVecs: 2, Opcode: AArch64::UCVTF_2Z2Z_StoS);
6417 return;
6418 case Intrinsic::aarch64_sve_fcvtzs_x4:
6419 SelectCVTIntrinsic(N: Node, NumVecs: 4, Opcode: AArch64::FCVTZS_4Z4Z_StoS);
6420 return;
6421 case Intrinsic::aarch64_sve_scvtf_x4:
6422 SelectCVTIntrinsic(N: Node, NumVecs: 4, Opcode: AArch64::SCVTF_4Z4Z_StoS);
6423 return;
6424 case Intrinsic::aarch64_sve_fcvtzu_x4:
6425 SelectCVTIntrinsic(N: Node, NumVecs: 4, Opcode: AArch64::FCVTZU_4Z4Z_StoS);
6426 return;
6427 case Intrinsic::aarch64_sve_ucvtf_x4:
6428 SelectCVTIntrinsic(N: Node, NumVecs: 4, Opcode: AArch64::UCVTF_4Z4Z_StoS);
6429 return;
6430 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6431 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, IsTupleInput: false, Opc: AArch64::FCVT_2ZZ_H_S);
6432 return;
6433 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6434 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, IsTupleInput: false, Opc: AArch64::FCVTL_2ZZ_H_S);
6435 return;
6436 case Intrinsic::aarch64_sve_sclamp_single_x2:
6437 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6438 VT: Node->getValueType(ResNo: 0),
6439 Opcodes: {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6440 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6441 SelectClamp(N: Node, NumVecs: 2, Op);
6442 return;
6443 case Intrinsic::aarch64_sve_uclamp_single_x2:
6444 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6445 VT: Node->getValueType(ResNo: 0),
6446 Opcodes: {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6447 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6448 SelectClamp(N: Node, NumVecs: 2, Op);
6449 return;
6450 case Intrinsic::aarch64_sve_fclamp_single_x2:
6451 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6452 VT: Node->getValueType(ResNo: 0),
6453 Opcodes: {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6454 AArch64::FCLAMP_VG2_2Z2Z_D}))
6455 SelectClamp(N: Node, NumVecs: 2, Op);
6456 return;
6457 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6458 SelectClamp(N: Node, NumVecs: 2, Op: AArch64::BFCLAMP_VG2_2ZZZ_H);
6459 return;
6460 case Intrinsic::aarch64_sve_sclamp_single_x4:
6461 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6462 VT: Node->getValueType(ResNo: 0),
6463 Opcodes: {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6464 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6465 SelectClamp(N: Node, NumVecs: 4, Op);
6466 return;
6467 case Intrinsic::aarch64_sve_uclamp_single_x4:
6468 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6469 VT: Node->getValueType(ResNo: 0),
6470 Opcodes: {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6471 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6472 SelectClamp(N: Node, NumVecs: 4, Op);
6473 return;
6474 case Intrinsic::aarch64_sve_fclamp_single_x4:
6475 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6476 VT: Node->getValueType(ResNo: 0),
6477 Opcodes: {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6478 AArch64::FCLAMP_VG4_4Z4Z_D}))
6479 SelectClamp(N: Node, NumVecs: 4, Op);
6480 return;
6481 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6482 SelectClamp(N: Node, NumVecs: 4, Op: AArch64::BFCLAMP_VG4_4ZZZ_H);
6483 return;
6484 case Intrinsic::aarch64_sve_add_single_x2:
6485 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6486 VT: Node->getValueType(ResNo: 0),
6487 Opcodes: {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6488 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6489 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: false, Opcode: Op);
6490 return;
6491 case Intrinsic::aarch64_sve_add_single_x4:
6492 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6493 VT: Node->getValueType(ResNo: 0),
6494 Opcodes: {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6495 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6496 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: false, Opcode: Op);
6497 return;
6498 case Intrinsic::aarch64_sve_zip_x2:
6499 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6500 VT: Node->getValueType(ResNo: 0),
6501 Opcodes: {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6502 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6503 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, /*IsTupleInput=*/false, Opc: Op);
6504 return;
6505 case Intrinsic::aarch64_sve_zipq_x2:
6506 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, /*IsTupleInput=*/false,
6507 Opc: AArch64::ZIP_VG2_2ZZZ_Q);
6508 return;
6509 case Intrinsic::aarch64_sve_zip_x4:
6510 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6511 VT: Node->getValueType(ResNo: 0),
6512 Opcodes: {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6513 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6514 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 4, /*IsTupleInput=*/true, Opc: Op);
6515 return;
6516 case Intrinsic::aarch64_sve_zipq_x4:
6517 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 4, /*IsTupleInput=*/true,
6518 Opc: AArch64::ZIP_VG4_4Z4Z_Q);
6519 return;
6520 case Intrinsic::aarch64_sve_uzp_x2:
6521 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6522 VT: Node->getValueType(ResNo: 0),
6523 Opcodes: {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6524 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6525 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, /*IsTupleInput=*/false, Opc: Op);
6526 return;
6527 case Intrinsic::aarch64_sve_uzpq_x2:
6528 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, /*IsTupleInput=*/false,
6529 Opc: AArch64::UZP_VG2_2ZZZ_Q);
6530 return;
6531 case Intrinsic::aarch64_sve_uzp_x4:
6532 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6533 VT: Node->getValueType(ResNo: 0),
6534 Opcodes: {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6535 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6536 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 4, /*IsTupleInput=*/true, Opc: Op);
6537 return;
6538 case Intrinsic::aarch64_sve_uzpq_x4:
6539 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 4, /*IsTupleInput=*/true,
6540 Opc: AArch64::UZP_VG4_4Z4Z_Q);
6541 return;
6542 case Intrinsic::aarch64_sve_sel_x2:
6543 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6544 VT: Node->getValueType(ResNo: 0),
6545 Opcodes: {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6546 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6547 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 2, IsZmMulti: true, Opcode: Op, /*HasPred=*/true);
6548 return;
6549 case Intrinsic::aarch64_sve_sel_x4:
6550 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6551 VT: Node->getValueType(ResNo: 0),
6552 Opcodes: {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6553 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6554 SelectDestructiveMultiIntrinsic(N: Node, NumVecs: 4, IsZmMulti: true, Opcode: Op, /*HasPred=*/true);
6555 return;
6556 case Intrinsic::aarch64_sve_frinta_x2:
6557 SelectFrintFromVT(N: Node, NumVecs: 2, Opcode: AArch64::FRINTA_2Z2Z_S);
6558 return;
6559 case Intrinsic::aarch64_sve_frinta_x4:
6560 SelectFrintFromVT(N: Node, NumVecs: 4, Opcode: AArch64::FRINTA_4Z4Z_S);
6561 return;
6562 case Intrinsic::aarch64_sve_frintm_x2:
6563 SelectFrintFromVT(N: Node, NumVecs: 2, Opcode: AArch64::FRINTM_2Z2Z_S);
6564 return;
6565 case Intrinsic::aarch64_sve_frintm_x4:
6566 SelectFrintFromVT(N: Node, NumVecs: 4, Opcode: AArch64::FRINTM_4Z4Z_S);
6567 return;
6568 case Intrinsic::aarch64_sve_frintn_x2:
6569 SelectFrintFromVT(N: Node, NumVecs: 2, Opcode: AArch64::FRINTN_2Z2Z_S);
6570 return;
6571 case Intrinsic::aarch64_sve_frintn_x4:
6572 SelectFrintFromVT(N: Node, NumVecs: 4, Opcode: AArch64::FRINTN_4Z4Z_S);
6573 return;
6574 case Intrinsic::aarch64_sve_frintp_x2:
6575 SelectFrintFromVT(N: Node, NumVecs: 2, Opcode: AArch64::FRINTP_2Z2Z_S);
6576 return;
6577 case Intrinsic::aarch64_sve_frintp_x4:
6578 SelectFrintFromVT(N: Node, NumVecs: 4, Opcode: AArch64::FRINTP_4Z4Z_S);
6579 return;
6580 case Intrinsic::aarch64_sve_sunpk_x2:
6581 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6582 VT: Node->getValueType(ResNo: 0),
6583 Opcodes: {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6584 AArch64::SUNPK_VG2_2ZZ_D}))
6585 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, /*IsTupleInput=*/false, Opc: Op);
6586 return;
6587 case Intrinsic::aarch64_sve_uunpk_x2:
6588 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6589 VT: Node->getValueType(ResNo: 0),
6590 Opcodes: {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6591 AArch64::UUNPK_VG2_2ZZ_D}))
6592 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 2, /*IsTupleInput=*/false, Opc: Op);
6593 return;
6594 case Intrinsic::aarch64_sve_sunpk_x4:
6595 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6596 VT: Node->getValueType(ResNo: 0),
6597 Opcodes: {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6598 AArch64::SUNPK_VG4_4Z2Z_D}))
6599 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 4, /*IsTupleInput=*/true, Opc: Op);
6600 return;
6601 case Intrinsic::aarch64_sve_uunpk_x4:
6602 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6603 VT: Node->getValueType(ResNo: 0),
6604 Opcodes: {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6605 AArch64::UUNPK_VG4_4Z2Z_D}))
6606 SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: 4, /*IsTupleInput=*/true, Opc: Op);
6607 return;
6608 case Intrinsic::aarch64_sve_pext_x2: {
6609 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6610 VT: Node->getValueType(ResNo: 0),
6611 Opcodes: {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6612 AArch64::PEXT_2PCI_D}))
6613 SelectPExtPair(N: Node, Opc: Op);
6614 return;
6615 }
6616 }
6617 break;
6618 }
6619 case ISD::INTRINSIC_VOID: {
6620 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
6621 if (Node->getNumOperands() >= 3)
6622 VT = Node->getOperand(Num: 2)->getValueType(ResNo: 0);
6623 switch (IntNo) {
6624 default:
6625 break;
6626 case Intrinsic::aarch64_neon_st1x2: {
6627 if (VT == MVT::v8i8) {
6628 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov8b);
6629 return;
6630 } else if (VT == MVT::v16i8) {
6631 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov16b);
6632 return;
6633 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6634 VT == MVT::v4bf16) {
6635 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov4h);
6636 return;
6637 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6638 VT == MVT::v8bf16) {
6639 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov8h);
6640 return;
6641 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6642 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov2s);
6643 return;
6644 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6645 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov4s);
6646 return;
6647 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6648 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov2d);
6649 return;
6650 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6651 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov1d);
6652 return;
6653 }
6654 break;
6655 }
6656 case Intrinsic::aarch64_neon_st1x3: {
6657 if (VT == MVT::v8i8) {
6658 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev8b);
6659 return;
6660 } else if (VT == MVT::v16i8) {
6661 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev16b);
6662 return;
6663 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6664 VT == MVT::v4bf16) {
6665 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev4h);
6666 return;
6667 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6668 VT == MVT::v8bf16) {
6669 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev8h);
6670 return;
6671 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6672 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev2s);
6673 return;
6674 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6675 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev4s);
6676 return;
6677 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6678 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev2d);
6679 return;
6680 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6681 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev1d);
6682 return;
6683 }
6684 break;
6685 }
6686 case Intrinsic::aarch64_neon_st1x4: {
6687 if (VT == MVT::v8i8) {
6688 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv8b);
6689 return;
6690 } else if (VT == MVT::v16i8) {
6691 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv16b);
6692 return;
6693 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6694 VT == MVT::v4bf16) {
6695 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv4h);
6696 return;
6697 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6698 VT == MVT::v8bf16) {
6699 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv8h);
6700 return;
6701 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6702 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv2s);
6703 return;
6704 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6705 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv4s);
6706 return;
6707 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6708 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv2d);
6709 return;
6710 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6711 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv1d);
6712 return;
6713 }
6714 break;
6715 }
6716 case Intrinsic::aarch64_neon_st2: {
6717 if (VT == MVT::v8i8) {
6718 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov8b);
6719 return;
6720 } else if (VT == MVT::v16i8) {
6721 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov16b);
6722 return;
6723 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6724 VT == MVT::v4bf16) {
6725 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov4h);
6726 return;
6727 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6728 VT == MVT::v8bf16) {
6729 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov8h);
6730 return;
6731 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6732 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov2s);
6733 return;
6734 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6735 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov4s);
6736 return;
6737 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6738 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov2d);
6739 return;
6740 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6741 SelectStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov1d);
6742 return;
6743 }
6744 break;
6745 }
6746 case Intrinsic::aarch64_neon_st3: {
6747 if (VT == MVT::v8i8) {
6748 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev8b);
6749 return;
6750 } else if (VT == MVT::v16i8) {
6751 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev16b);
6752 return;
6753 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6754 VT == MVT::v4bf16) {
6755 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev4h);
6756 return;
6757 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6758 VT == MVT::v8bf16) {
6759 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev8h);
6760 return;
6761 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6762 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev2s);
6763 return;
6764 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6765 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev4s);
6766 return;
6767 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6768 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev2d);
6769 return;
6770 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6771 SelectStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev1d);
6772 return;
6773 }
6774 break;
6775 }
6776 case Intrinsic::aarch64_neon_st4: {
6777 if (VT == MVT::v8i8) {
6778 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv8b);
6779 return;
6780 } else if (VT == MVT::v16i8) {
6781 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv16b);
6782 return;
6783 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6784 VT == MVT::v4bf16) {
6785 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv4h);
6786 return;
6787 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6788 VT == MVT::v8bf16) {
6789 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv8h);
6790 return;
6791 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6792 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv2s);
6793 return;
6794 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6795 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv4s);
6796 return;
6797 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6798 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv2d);
6799 return;
6800 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6801 SelectStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv1d);
6802 return;
6803 }
6804 break;
6805 }
6806 case Intrinsic::aarch64_neon_st2lane: {
6807 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6808 SelectStoreLane(N: Node, NumVecs: 2, Opc: AArch64::ST2i8);
6809 return;
6810 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6811 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6812 SelectStoreLane(N: Node, NumVecs: 2, Opc: AArch64::ST2i16);
6813 return;
6814 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6815 VT == MVT::v2f32) {
6816 SelectStoreLane(N: Node, NumVecs: 2, Opc: AArch64::ST2i32);
6817 return;
6818 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6819 VT == MVT::v1f64) {
6820 SelectStoreLane(N: Node, NumVecs: 2, Opc: AArch64::ST2i64);
6821 return;
6822 }
6823 break;
6824 }
6825 case Intrinsic::aarch64_neon_st3lane: {
6826 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6827 SelectStoreLane(N: Node, NumVecs: 3, Opc: AArch64::ST3i8);
6828 return;
6829 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6830 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6831 SelectStoreLane(N: Node, NumVecs: 3, Opc: AArch64::ST3i16);
6832 return;
6833 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6834 VT == MVT::v2f32) {
6835 SelectStoreLane(N: Node, NumVecs: 3, Opc: AArch64::ST3i32);
6836 return;
6837 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6838 VT == MVT::v1f64) {
6839 SelectStoreLane(N: Node, NumVecs: 3, Opc: AArch64::ST3i64);
6840 return;
6841 }
6842 break;
6843 }
6844 case Intrinsic::aarch64_neon_st4lane: {
6845 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6846 SelectStoreLane(N: Node, NumVecs: 4, Opc: AArch64::ST4i8);
6847 return;
6848 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6849 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6850 SelectStoreLane(N: Node, NumVecs: 4, Opc: AArch64::ST4i16);
6851 return;
6852 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6853 VT == MVT::v2f32) {
6854 SelectStoreLane(N: Node, NumVecs: 4, Opc: AArch64::ST4i32);
6855 return;
6856 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6857 VT == MVT::v1f64) {
6858 SelectStoreLane(N: Node, NumVecs: 4, Opc: AArch64::ST4i64);
6859 return;
6860 }
6861 break;
6862 }
6863 case Intrinsic::aarch64_sve_st2q: {
6864 SelectPredicatedStore(N: Node, NumVecs: 2, Scale: 4, Opc_rr: AArch64::ST2Q, Opc_ri: AArch64::ST2Q_IMM);
6865 return;
6866 }
6867 case Intrinsic::aarch64_sve_st3q: {
6868 SelectPredicatedStore(N: Node, NumVecs: 3, Scale: 4, Opc_rr: AArch64::ST3Q, Opc_ri: AArch64::ST3Q_IMM);
6869 return;
6870 }
6871 case Intrinsic::aarch64_sve_st4q: {
6872 SelectPredicatedStore(N: Node, NumVecs: 4, Scale: 4, Opc_rr: AArch64::ST4Q, Opc_ri: AArch64::ST4Q_IMM);
6873 return;
6874 }
6875 case Intrinsic::aarch64_sve_st2: {
6876 if (VT == MVT::nxv16i8) {
6877 SelectPredicatedStore(N: Node, NumVecs: 2, Scale: 0, Opc_rr: AArch64::ST2B, Opc_ri: AArch64::ST2B_IMM);
6878 return;
6879 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6880 VT == MVT::nxv8bf16) {
6881 SelectPredicatedStore(N: Node, NumVecs: 2, Scale: 1, Opc_rr: AArch64::ST2H, Opc_ri: AArch64::ST2H_IMM);
6882 return;
6883 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6884 SelectPredicatedStore(N: Node, NumVecs: 2, Scale: 2, Opc_rr: AArch64::ST2W, Opc_ri: AArch64::ST2W_IMM);
6885 return;
6886 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6887 SelectPredicatedStore(N: Node, NumVecs: 2, Scale: 3, Opc_rr: AArch64::ST2D, Opc_ri: AArch64::ST2D_IMM);
6888 return;
6889 }
6890 break;
6891 }
6892 case Intrinsic::aarch64_sve_st3: {
6893 if (VT == MVT::nxv16i8) {
6894 SelectPredicatedStore(N: Node, NumVecs: 3, Scale: 0, Opc_rr: AArch64::ST3B, Opc_ri: AArch64::ST3B_IMM);
6895 return;
6896 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6897 VT == MVT::nxv8bf16) {
6898 SelectPredicatedStore(N: Node, NumVecs: 3, Scale: 1, Opc_rr: AArch64::ST3H, Opc_ri: AArch64::ST3H_IMM);
6899 return;
6900 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6901 SelectPredicatedStore(N: Node, NumVecs: 3, Scale: 2, Opc_rr: AArch64::ST3W, Opc_ri: AArch64::ST3W_IMM);
6902 return;
6903 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6904 SelectPredicatedStore(N: Node, NumVecs: 3, Scale: 3, Opc_rr: AArch64::ST3D, Opc_ri: AArch64::ST3D_IMM);
6905 return;
6906 }
6907 break;
6908 }
6909 case Intrinsic::aarch64_sve_st4: {
6910 if (VT == MVT::nxv16i8) {
6911 SelectPredicatedStore(N: Node, NumVecs: 4, Scale: 0, Opc_rr: AArch64::ST4B, Opc_ri: AArch64::ST4B_IMM);
6912 return;
6913 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6914 VT == MVT::nxv8bf16) {
6915 SelectPredicatedStore(N: Node, NumVecs: 4, Scale: 1, Opc_rr: AArch64::ST4H, Opc_ri: AArch64::ST4H_IMM);
6916 return;
6917 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6918 SelectPredicatedStore(N: Node, NumVecs: 4, Scale: 2, Opc_rr: AArch64::ST4W, Opc_ri: AArch64::ST4W_IMM);
6919 return;
6920 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6921 SelectPredicatedStore(N: Node, NumVecs: 4, Scale: 3, Opc_rr: AArch64::ST4D, Opc_ri: AArch64::ST4D_IMM);
6922 return;
6923 }
6924 break;
6925 }
6926 }
6927 break;
6928 }
6929 case AArch64ISD::LD2post: {
6930 if (VT == MVT::v8i8) {
6931 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov8b_POST, SubRegIdx: AArch64::dsub0);
6932 return;
6933 } else if (VT == MVT::v16i8) {
6934 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov16b_POST, SubRegIdx: AArch64::qsub0);
6935 return;
6936 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6937 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov4h_POST, SubRegIdx: AArch64::dsub0);
6938 return;
6939 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6940 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov8h_POST, SubRegIdx: AArch64::qsub0);
6941 return;
6942 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6943 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov2s_POST, SubRegIdx: AArch64::dsub0);
6944 return;
6945 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6946 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov4s_POST, SubRegIdx: AArch64::qsub0);
6947 return;
6948 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6949 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov1d_POST, SubRegIdx: AArch64::dsub0);
6950 return;
6951 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6952 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Twov2d_POST, SubRegIdx: AArch64::qsub0);
6953 return;
6954 }
6955 break;
6956 }
6957 case AArch64ISD::LD3post: {
6958 if (VT == MVT::v8i8) {
6959 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev8b_POST, SubRegIdx: AArch64::dsub0);
6960 return;
6961 } else if (VT == MVT::v16i8) {
6962 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev16b_POST, SubRegIdx: AArch64::qsub0);
6963 return;
6964 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6965 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev4h_POST, SubRegIdx: AArch64::dsub0);
6966 return;
6967 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6968 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev8h_POST, SubRegIdx: AArch64::qsub0);
6969 return;
6970 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6971 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev2s_POST, SubRegIdx: AArch64::dsub0);
6972 return;
6973 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6974 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev4s_POST, SubRegIdx: AArch64::qsub0);
6975 return;
6976 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6977 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev1d_POST, SubRegIdx: AArch64::dsub0);
6978 return;
6979 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6980 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Threev2d_POST, SubRegIdx: AArch64::qsub0);
6981 return;
6982 }
6983 break;
6984 }
6985 case AArch64ISD::LD4post: {
6986 if (VT == MVT::v8i8) {
6987 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv8b_POST, SubRegIdx: AArch64::dsub0);
6988 return;
6989 } else if (VT == MVT::v16i8) {
6990 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv16b_POST, SubRegIdx: AArch64::qsub0);
6991 return;
6992 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6993 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv4h_POST, SubRegIdx: AArch64::dsub0);
6994 return;
6995 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6996 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv8h_POST, SubRegIdx: AArch64::qsub0);
6997 return;
6998 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6999 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv2s_POST, SubRegIdx: AArch64::dsub0);
7000 return;
7001 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7002 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv4s_POST, SubRegIdx: AArch64::qsub0);
7003 return;
7004 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7005 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv1d_POST, SubRegIdx: AArch64::dsub0);
7006 return;
7007 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7008 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Fourv2d_POST, SubRegIdx: AArch64::qsub0);
7009 return;
7010 }
7011 break;
7012 }
7013 case AArch64ISD::LD1x2post: {
7014 if (VT == MVT::v8i8) {
7015 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov8b_POST, SubRegIdx: AArch64::dsub0);
7016 return;
7017 } else if (VT == MVT::v16i8) {
7018 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov16b_POST, SubRegIdx: AArch64::qsub0);
7019 return;
7020 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7021 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov4h_POST, SubRegIdx: AArch64::dsub0);
7022 return;
7023 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7024 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov8h_POST, SubRegIdx: AArch64::qsub0);
7025 return;
7026 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7027 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov2s_POST, SubRegIdx: AArch64::dsub0);
7028 return;
7029 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7030 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov4s_POST, SubRegIdx: AArch64::qsub0);
7031 return;
7032 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7033 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov1d_POST, SubRegIdx: AArch64::dsub0);
7034 return;
7035 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7036 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD1Twov2d_POST, SubRegIdx: AArch64::qsub0);
7037 return;
7038 }
7039 break;
7040 }
7041 case AArch64ISD::LD1x3post: {
7042 if (VT == MVT::v8i8) {
7043 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev8b_POST, SubRegIdx: AArch64::dsub0);
7044 return;
7045 } else if (VT == MVT::v16i8) {
7046 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev16b_POST, SubRegIdx: AArch64::qsub0);
7047 return;
7048 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7049 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev4h_POST, SubRegIdx: AArch64::dsub0);
7050 return;
7051 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7052 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev8h_POST, SubRegIdx: AArch64::qsub0);
7053 return;
7054 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7055 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev2s_POST, SubRegIdx: AArch64::dsub0);
7056 return;
7057 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7058 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev4s_POST, SubRegIdx: AArch64::qsub0);
7059 return;
7060 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7061 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev1d_POST, SubRegIdx: AArch64::dsub0);
7062 return;
7063 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7064 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD1Threev2d_POST, SubRegIdx: AArch64::qsub0);
7065 return;
7066 }
7067 break;
7068 }
7069 case AArch64ISD::LD1x4post: {
7070 if (VT == MVT::v8i8) {
7071 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv8b_POST, SubRegIdx: AArch64::dsub0);
7072 return;
7073 } else if (VT == MVT::v16i8) {
7074 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv16b_POST, SubRegIdx: AArch64::qsub0);
7075 return;
7076 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7077 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv4h_POST, SubRegIdx: AArch64::dsub0);
7078 return;
7079 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7080 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv8h_POST, SubRegIdx: AArch64::qsub0);
7081 return;
7082 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7083 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv2s_POST, SubRegIdx: AArch64::dsub0);
7084 return;
7085 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7086 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv4s_POST, SubRegIdx: AArch64::qsub0);
7087 return;
7088 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7089 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv1d_POST, SubRegIdx: AArch64::dsub0);
7090 return;
7091 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7092 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD1Fourv2d_POST, SubRegIdx: AArch64::qsub0);
7093 return;
7094 }
7095 break;
7096 }
7097 case AArch64ISD::LD1DUPpost: {
7098 if (VT == MVT::v8i8) {
7099 SelectPostLoad(N: Node, NumVecs: 1, Opc: AArch64::LD1Rv8b_POST, SubRegIdx: AArch64::dsub0);
7100 return;
7101 } else if (VT == MVT::v16i8) {
7102 SelectPostLoad(N: Node, NumVecs: 1, Opc: AArch64::LD1Rv16b_POST, SubRegIdx: AArch64::qsub0);
7103 return;
7104 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7105 SelectPostLoad(N: Node, NumVecs: 1, Opc: AArch64::LD1Rv4h_POST, SubRegIdx: AArch64::dsub0);
7106 return;
7107 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7108 SelectPostLoad(N: Node, NumVecs: 1, Opc: AArch64::LD1Rv8h_POST, SubRegIdx: AArch64::qsub0);
7109 return;
7110 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7111 SelectPostLoad(N: Node, NumVecs: 1, Opc: AArch64::LD1Rv2s_POST, SubRegIdx: AArch64::dsub0);
7112 return;
7113 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7114 SelectPostLoad(N: Node, NumVecs: 1, Opc: AArch64::LD1Rv4s_POST, SubRegIdx: AArch64::qsub0);
7115 return;
7116 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7117 SelectPostLoad(N: Node, NumVecs: 1, Opc: AArch64::LD1Rv1d_POST, SubRegIdx: AArch64::dsub0);
7118 return;
7119 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7120 SelectPostLoad(N: Node, NumVecs: 1, Opc: AArch64::LD1Rv2d_POST, SubRegIdx: AArch64::qsub0);
7121 return;
7122 }
7123 break;
7124 }
7125 case AArch64ISD::LD2DUPpost: {
7126 if (VT == MVT::v8i8) {
7127 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv8b_POST, SubRegIdx: AArch64::dsub0);
7128 return;
7129 } else if (VT == MVT::v16i8) {
7130 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv16b_POST, SubRegIdx: AArch64::qsub0);
7131 return;
7132 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7133 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv4h_POST, SubRegIdx: AArch64::dsub0);
7134 return;
7135 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7136 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv8h_POST, SubRegIdx: AArch64::qsub0);
7137 return;
7138 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7139 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv2s_POST, SubRegIdx: AArch64::dsub0);
7140 return;
7141 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7142 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv4s_POST, SubRegIdx: AArch64::qsub0);
7143 return;
7144 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7145 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv1d_POST, SubRegIdx: AArch64::dsub0);
7146 return;
7147 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7148 SelectPostLoad(N: Node, NumVecs: 2, Opc: AArch64::LD2Rv2d_POST, SubRegIdx: AArch64::qsub0);
7149 return;
7150 }
7151 break;
7152 }
7153 case AArch64ISD::LD3DUPpost: {
7154 if (VT == MVT::v8i8) {
7155 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv8b_POST, SubRegIdx: AArch64::dsub0);
7156 return;
7157 } else if (VT == MVT::v16i8) {
7158 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv16b_POST, SubRegIdx: AArch64::qsub0);
7159 return;
7160 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7161 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv4h_POST, SubRegIdx: AArch64::dsub0);
7162 return;
7163 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7164 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv8h_POST, SubRegIdx: AArch64::qsub0);
7165 return;
7166 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7167 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv2s_POST, SubRegIdx: AArch64::dsub0);
7168 return;
7169 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7170 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv4s_POST, SubRegIdx: AArch64::qsub0);
7171 return;
7172 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7173 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv1d_POST, SubRegIdx: AArch64::dsub0);
7174 return;
7175 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7176 SelectPostLoad(N: Node, NumVecs: 3, Opc: AArch64::LD3Rv2d_POST, SubRegIdx: AArch64::qsub0);
7177 return;
7178 }
7179 break;
7180 }
7181 case AArch64ISD::LD4DUPpost: {
7182 if (VT == MVT::v8i8) {
7183 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv8b_POST, SubRegIdx: AArch64::dsub0);
7184 return;
7185 } else if (VT == MVT::v16i8) {
7186 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv16b_POST, SubRegIdx: AArch64::qsub0);
7187 return;
7188 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7189 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv4h_POST, SubRegIdx: AArch64::dsub0);
7190 return;
7191 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7192 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv8h_POST, SubRegIdx: AArch64::qsub0);
7193 return;
7194 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7195 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv2s_POST, SubRegIdx: AArch64::dsub0);
7196 return;
7197 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7198 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv4s_POST, SubRegIdx: AArch64::qsub0);
7199 return;
7200 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7201 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv1d_POST, SubRegIdx: AArch64::dsub0);
7202 return;
7203 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7204 SelectPostLoad(N: Node, NumVecs: 4, Opc: AArch64::LD4Rv2d_POST, SubRegIdx: AArch64::qsub0);
7205 return;
7206 }
7207 break;
7208 }
7209 case AArch64ISD::LD1LANEpost: {
7210 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7211 SelectPostLoadLane(N: Node, NumVecs: 1, Opc: AArch64::LD1i8_POST);
7212 return;
7213 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7214 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7215 SelectPostLoadLane(N: Node, NumVecs: 1, Opc: AArch64::LD1i16_POST);
7216 return;
7217 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7218 VT == MVT::v2f32) {
7219 SelectPostLoadLane(N: Node, NumVecs: 1, Opc: AArch64::LD1i32_POST);
7220 return;
7221 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7222 VT == MVT::v1f64) {
7223 SelectPostLoadLane(N: Node, NumVecs: 1, Opc: AArch64::LD1i64_POST);
7224 return;
7225 }
7226 break;
7227 }
7228 case AArch64ISD::LD2LANEpost: {
7229 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7230 SelectPostLoadLane(N: Node, NumVecs: 2, Opc: AArch64::LD2i8_POST);
7231 return;
7232 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7233 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7234 SelectPostLoadLane(N: Node, NumVecs: 2, Opc: AArch64::LD2i16_POST);
7235 return;
7236 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7237 VT == MVT::v2f32) {
7238 SelectPostLoadLane(N: Node, NumVecs: 2, Opc: AArch64::LD2i32_POST);
7239 return;
7240 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7241 VT == MVT::v1f64) {
7242 SelectPostLoadLane(N: Node, NumVecs: 2, Opc: AArch64::LD2i64_POST);
7243 return;
7244 }
7245 break;
7246 }
7247 case AArch64ISD::LD3LANEpost: {
7248 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7249 SelectPostLoadLane(N: Node, NumVecs: 3, Opc: AArch64::LD3i8_POST);
7250 return;
7251 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7252 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7253 SelectPostLoadLane(N: Node, NumVecs: 3, Opc: AArch64::LD3i16_POST);
7254 return;
7255 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7256 VT == MVT::v2f32) {
7257 SelectPostLoadLane(N: Node, NumVecs: 3, Opc: AArch64::LD3i32_POST);
7258 return;
7259 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7260 VT == MVT::v1f64) {
7261 SelectPostLoadLane(N: Node, NumVecs: 3, Opc: AArch64::LD3i64_POST);
7262 return;
7263 }
7264 break;
7265 }
7266 case AArch64ISD::LD4LANEpost: {
7267 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7268 SelectPostLoadLane(N: Node, NumVecs: 4, Opc: AArch64::LD4i8_POST);
7269 return;
7270 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7271 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7272 SelectPostLoadLane(N: Node, NumVecs: 4, Opc: AArch64::LD4i16_POST);
7273 return;
7274 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7275 VT == MVT::v2f32) {
7276 SelectPostLoadLane(N: Node, NumVecs: 4, Opc: AArch64::LD4i32_POST);
7277 return;
7278 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7279 VT == MVT::v1f64) {
7280 SelectPostLoadLane(N: Node, NumVecs: 4, Opc: AArch64::LD4i64_POST);
7281 return;
7282 }
7283 break;
7284 }
7285 case AArch64ISD::ST2post: {
7286 VT = Node->getOperand(Num: 1).getValueType();
7287 if (VT == MVT::v8i8) {
7288 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov8b_POST);
7289 return;
7290 } else if (VT == MVT::v16i8) {
7291 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov16b_POST);
7292 return;
7293 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7294 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov4h_POST);
7295 return;
7296 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7297 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov8h_POST);
7298 return;
7299 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7300 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov2s_POST);
7301 return;
7302 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7303 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov4s_POST);
7304 return;
7305 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7306 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST2Twov2d_POST);
7307 return;
7308 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7309 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov1d_POST);
7310 return;
7311 }
7312 break;
7313 }
7314 case AArch64ISD::ST3post: {
7315 VT = Node->getOperand(Num: 1).getValueType();
7316 if (VT == MVT::v8i8) {
7317 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev8b_POST);
7318 return;
7319 } else if (VT == MVT::v16i8) {
7320 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev16b_POST);
7321 return;
7322 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7323 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev4h_POST);
7324 return;
7325 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7326 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev8h_POST);
7327 return;
7328 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7329 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev2s_POST);
7330 return;
7331 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7332 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev4s_POST);
7333 return;
7334 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7335 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST3Threev2d_POST);
7336 return;
7337 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7338 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev1d_POST);
7339 return;
7340 }
7341 break;
7342 }
7343 case AArch64ISD::ST4post: {
7344 VT = Node->getOperand(Num: 1).getValueType();
7345 if (VT == MVT::v8i8) {
7346 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv8b_POST);
7347 return;
7348 } else if (VT == MVT::v16i8) {
7349 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv16b_POST);
7350 return;
7351 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7352 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv4h_POST);
7353 return;
7354 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7355 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv8h_POST);
7356 return;
7357 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7358 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv2s_POST);
7359 return;
7360 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7361 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv4s_POST);
7362 return;
7363 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7364 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST4Fourv2d_POST);
7365 return;
7366 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7367 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv1d_POST);
7368 return;
7369 }
7370 break;
7371 }
7372 case AArch64ISD::ST1x2post: {
7373 VT = Node->getOperand(Num: 1).getValueType();
7374 if (VT == MVT::v8i8) {
7375 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov8b_POST);
7376 return;
7377 } else if (VT == MVT::v16i8) {
7378 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov16b_POST);
7379 return;
7380 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7381 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov4h_POST);
7382 return;
7383 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7384 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov8h_POST);
7385 return;
7386 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7387 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov2s_POST);
7388 return;
7389 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7390 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov4s_POST);
7391 return;
7392 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7393 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov1d_POST);
7394 return;
7395 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7396 SelectPostStore(N: Node, NumVecs: 2, Opc: AArch64::ST1Twov2d_POST);
7397 return;
7398 }
7399 break;
7400 }
7401 case AArch64ISD::ST1x3post: {
7402 VT = Node->getOperand(Num: 1).getValueType();
7403 if (VT == MVT::v8i8) {
7404 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev8b_POST);
7405 return;
7406 } else if (VT == MVT::v16i8) {
7407 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev16b_POST);
7408 return;
7409 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7410 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev4h_POST);
7411 return;
7412 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7413 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev8h_POST);
7414 return;
7415 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7416 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev2s_POST);
7417 return;
7418 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7419 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev4s_POST);
7420 return;
7421 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7422 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev1d_POST);
7423 return;
7424 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7425 SelectPostStore(N: Node, NumVecs: 3, Opc: AArch64::ST1Threev2d_POST);
7426 return;
7427 }
7428 break;
7429 }
7430 case AArch64ISD::ST1x4post: {
7431 VT = Node->getOperand(Num: 1).getValueType();
7432 if (VT == MVT::v8i8) {
7433 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv8b_POST);
7434 return;
7435 } else if (VT == MVT::v16i8) {
7436 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv16b_POST);
7437 return;
7438 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7439 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv4h_POST);
7440 return;
7441 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7442 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv8h_POST);
7443 return;
7444 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7445 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv2s_POST);
7446 return;
7447 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7448 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv4s_POST);
7449 return;
7450 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7451 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv1d_POST);
7452 return;
7453 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7454 SelectPostStore(N: Node, NumVecs: 4, Opc: AArch64::ST1Fourv2d_POST);
7455 return;
7456 }
7457 break;
7458 }
7459 case AArch64ISD::ST2LANEpost: {
7460 VT = Node->getOperand(Num: 1).getValueType();
7461 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7462 SelectPostStoreLane(N: Node, NumVecs: 2, Opc: AArch64::ST2i8_POST);
7463 return;
7464 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7465 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7466 SelectPostStoreLane(N: Node, NumVecs: 2, Opc: AArch64::ST2i16_POST);
7467 return;
7468 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7469 VT == MVT::v2f32) {
7470 SelectPostStoreLane(N: Node, NumVecs: 2, Opc: AArch64::ST2i32_POST);
7471 return;
7472 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7473 VT == MVT::v1f64) {
7474 SelectPostStoreLane(N: Node, NumVecs: 2, Opc: AArch64::ST2i64_POST);
7475 return;
7476 }
7477 break;
7478 }
7479 case AArch64ISD::ST3LANEpost: {
7480 VT = Node->getOperand(Num: 1).getValueType();
7481 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7482 SelectPostStoreLane(N: Node, NumVecs: 3, Opc: AArch64::ST3i8_POST);
7483 return;
7484 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7485 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7486 SelectPostStoreLane(N: Node, NumVecs: 3, Opc: AArch64::ST3i16_POST);
7487 return;
7488 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7489 VT == MVT::v2f32) {
7490 SelectPostStoreLane(N: Node, NumVecs: 3, Opc: AArch64::ST3i32_POST);
7491 return;
7492 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7493 VT == MVT::v1f64) {
7494 SelectPostStoreLane(N: Node, NumVecs: 3, Opc: AArch64::ST3i64_POST);
7495 return;
7496 }
7497 break;
7498 }
7499 case AArch64ISD::ST4LANEpost: {
7500 VT = Node->getOperand(Num: 1).getValueType();
7501 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7502 SelectPostStoreLane(N: Node, NumVecs: 4, Opc: AArch64::ST4i8_POST);
7503 return;
7504 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7505 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7506 SelectPostStoreLane(N: Node, NumVecs: 4, Opc: AArch64::ST4i16_POST);
7507 return;
7508 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7509 VT == MVT::v2f32) {
7510 SelectPostStoreLane(N: Node, NumVecs: 4, Opc: AArch64::ST4i32_POST);
7511 return;
7512 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7513 VT == MVT::v1f64) {
7514 SelectPostStoreLane(N: Node, NumVecs: 4, Opc: AArch64::ST4i64_POST);
7515 return;
7516 }
7517 break;
7518 }
7519 }
7520
7521 // Select the default instruction
7522 SelectCode(N: Node);
7523}
7524
7525/// createAArch64ISelDag - This pass converts a legalized DAG into a
7526/// AArch64-specific DAG, ready for instruction scheduling.
7527FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
7528 CodeGenOptLevel OptLevel) {
7529 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7530}
7531
7532/// When \p PredVT is a scalable vector predicate in the form
7533/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7534/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7535/// structured vectors (NumVec >1), the output data type is
7536/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7537/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7538/// EVT.
7539static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
7540 unsigned NumVec) {
7541 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7542 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7543 return EVT();
7544
7545 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7546 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7547 return EVT();
7548
7549 ElementCount EC = PredVT.getVectorElementCount();
7550 EVT ScalarVT =
7551 EVT::getIntegerVT(Context&: Ctx, BitWidth: AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7552 EVT MemVT = EVT::getVectorVT(Context&: Ctx, VT: ScalarVT, EC: EC * NumVec);
7553
7554 return MemVT;
7555}
7556
7557/// Return the EVT of the data associated to a memory operation in \p
7558/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7559static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
7560 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Val: Root))
7561 return MemIntr->getMemoryVT();
7562
7563 if (isa<MemSDNode>(Val: Root)) {
7564 EVT MemVT = cast<MemSDNode>(Val: Root)->getMemoryVT();
7565
7566 EVT DataVT;
7567 if (auto *Load = dyn_cast<LoadSDNode>(Val: Root))
7568 DataVT = Load->getValueType(ResNo: 0);
7569 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Val: Root))
7570 DataVT = Load->getValueType(ResNo: 0);
7571 else if (auto *Store = dyn_cast<StoreSDNode>(Val: Root))
7572 DataVT = Store->getValue().getValueType();
7573 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Val: Root))
7574 DataVT = Store->getValue().getValueType();
7575 else
7576 llvm_unreachable("Unexpected MemSDNode!");
7577
7578 return DataVT.changeVectorElementType(Context&: Ctx, EltVT: MemVT.getVectorElementType());
7579 }
7580
7581 const unsigned Opcode = Root->getOpcode();
7582 // For custom ISD nodes, we have to look at them individually to extract the
7583 // type of the data moved to/from memory.
7584 switch (Opcode) {
7585 case AArch64ISD::LD1_MERGE_ZERO:
7586 case AArch64ISD::LD1S_MERGE_ZERO:
7587 case AArch64ISD::LDNF1_MERGE_ZERO:
7588 case AArch64ISD::LDNF1S_MERGE_ZERO:
7589 return cast<VTSDNode>(Val: Root->getOperand(Num: 3))->getVT();
7590 case AArch64ISD::ST1_PRED:
7591 return cast<VTSDNode>(Val: Root->getOperand(Num: 4))->getVT();
7592 default:
7593 break;
7594 }
7595
7596 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7597 return EVT();
7598
7599 switch (Root->getConstantOperandVal(Num: 1)) {
7600 default:
7601 return EVT();
7602 case Intrinsic::aarch64_sme_ldr:
7603 case Intrinsic::aarch64_sme_str:
7604 return MVT::nxv16i8;
7605 case Intrinsic::aarch64_sve_prf:
7606 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7607 // width of the predicate.
7608 return getPackedVectorTypeFromPredicateType(
7609 Ctx, PredVT: Root->getOperand(Num: 2)->getValueType(ResNo: 0), /*NumVec=*/1);
7610 case Intrinsic::aarch64_sve_ld2_sret:
7611 case Intrinsic::aarch64_sve_ld2q_sret:
7612 return getPackedVectorTypeFromPredicateType(
7613 Ctx, PredVT: Root->getOperand(Num: 2)->getValueType(ResNo: 0), /*NumVec=*/2);
7614 case Intrinsic::aarch64_sve_st2q:
7615 return getPackedVectorTypeFromPredicateType(
7616 Ctx, PredVT: Root->getOperand(Num: 4)->getValueType(ResNo: 0), /*NumVec=*/2);
7617 case Intrinsic::aarch64_sve_ld3_sret:
7618 case Intrinsic::aarch64_sve_ld3q_sret:
7619 return getPackedVectorTypeFromPredicateType(
7620 Ctx, PredVT: Root->getOperand(Num: 2)->getValueType(ResNo: 0), /*NumVec=*/3);
7621 case Intrinsic::aarch64_sve_st3q:
7622 return getPackedVectorTypeFromPredicateType(
7623 Ctx, PredVT: Root->getOperand(Num: 5)->getValueType(ResNo: 0), /*NumVec=*/3);
7624 case Intrinsic::aarch64_sve_ld4_sret:
7625 case Intrinsic::aarch64_sve_ld4q_sret:
7626 return getPackedVectorTypeFromPredicateType(
7627 Ctx, PredVT: Root->getOperand(Num: 2)->getValueType(ResNo: 0), /*NumVec=*/4);
7628 case Intrinsic::aarch64_sve_st4q:
7629 return getPackedVectorTypeFromPredicateType(
7630 Ctx, PredVT: Root->getOperand(Num: 6)->getValueType(ResNo: 0), /*NumVec=*/4);
7631 case Intrinsic::aarch64_sve_ld1udq:
7632 case Intrinsic::aarch64_sve_st1dq:
7633 return EVT(MVT::nxv1i64);
7634 case Intrinsic::aarch64_sve_ld1uwq:
7635 case Intrinsic::aarch64_sve_st1wq:
7636 return EVT(MVT::nxv1i32);
7637 }
7638}
7639
7640/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7641/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7642/// where Root is the memory access using N for its address.
7643template <int64_t Min, int64_t Max>
7644bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7645 SDValue &Base,
7646 SDValue &OffImm) {
7647 const EVT MemVT = getMemVTFromNode(Ctx&: *(CurDAG->getContext()), Root);
7648 const DataLayout &DL = CurDAG->getDataLayout();
7649 const MachineFrameInfo &MFI = MF->getFrameInfo();
7650
7651 if (N.getOpcode() == ISD::FrameIndex) {
7652 int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
7653 // We can only encode VL scaled offsets, so only fold in frame indexes
7654 // referencing SVE objects.
7655 if (MFI.hasScalableStackID(ObjectIdx: FI)) {
7656 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
7657 OffImm = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i64);
7658 return true;
7659 }
7660
7661 return false;
7662 }
7663
7664 if (MemVT == EVT())
7665 return false;
7666
7667 if (N.getOpcode() != ISD::ADD)
7668 return false;
7669
7670 SDValue VScale = N.getOperand(i: 1);
7671 int64_t MulImm = std::numeric_limits<int64_t>::max();
7672 if (VScale.getOpcode() == ISD::VSCALE) {
7673 MulImm = cast<ConstantSDNode>(Val: VScale.getOperand(i: 0))->getSExtValue();
7674 } else if (auto C = dyn_cast<ConstantSDNode>(Val&: VScale)) {
7675 int64_t ByteOffset = C->getSExtValue();
7676 const auto KnownVScale =
7677 Subtarget->getSVEVectorSizeInBits() / AArch64::SVEBitsPerBlock;
7678
7679 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7680 return false;
7681
7682 MulImm = ByteOffset / KnownVScale;
7683 } else
7684 return false;
7685
7686 TypeSize TS = MemVT.getSizeInBits();
7687 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7688
7689 if ((MulImm % MemWidthBytes) != 0)
7690 return false;
7691
7692 int64_t Offset = MulImm / MemWidthBytes;
7693 if (Offset < Min || Offset > Max)
7694 return false;
7695
7696 Base = N.getOperand(i: 0);
7697 if (Base.getOpcode() == ISD::FrameIndex) {
7698 int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
7699 // We can only encode VL scaled offsets, so only fold in frame indexes
7700 // referencing SVE objects.
7701 if (MFI.hasScalableStackID(ObjectIdx: FI))
7702 Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
7703 }
7704
7705 OffImm = CurDAG->getTargetConstant(Val: Offset, DL: SDLoc(N), VT: MVT::i64);
7706 return true;
7707}
7708
7709/// Select register plus register addressing mode for SVE, with scaled
7710/// offset.
7711bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7712 SDValue &Base,
7713 SDValue &Offset) {
7714 if (N.getOpcode() != ISD::ADD)
7715 return false;
7716
7717 // Process an ADD node.
7718 const SDValue LHS = N.getOperand(i: 0);
7719 const SDValue RHS = N.getOperand(i: 1);
7720
7721 // 8 bit data does not come with the SHL node, so it is treated
7722 // separately.
7723 if (Scale == 0) {
7724 Base = LHS;
7725 Offset = RHS;
7726 return true;
7727 }
7728
7729 if (auto C = dyn_cast<ConstantSDNode>(Val: RHS)) {
7730 int64_t ImmOff = C->getSExtValue();
7731 unsigned Size = 1 << Scale;
7732
7733 // To use the reg+reg addressing mode, the immediate must be a multiple of
7734 // the vector element's byte size.
7735 if (ImmOff % Size)
7736 return false;
7737
7738 SDLoc DL(N);
7739 Base = LHS;
7740 Offset = CurDAG->getTargetConstant(Val: ImmOff >> Scale, DL, VT: MVT::i64);
7741 SDValue Ops[] = {Offset};
7742 SDNode *MI = CurDAG->getMachineNode(Opcode: AArch64::MOVi64imm, dl: DL, VT: MVT::i64, Ops);
7743 Offset = SDValue(MI, 0);
7744 return true;
7745 }
7746
7747 // Check if the RHS is a shift node with a constant.
7748 if (RHS.getOpcode() != ISD::SHL)
7749 return false;
7750
7751 const SDValue ShiftRHS = RHS.getOperand(i: 1);
7752 if (auto *C = dyn_cast<ConstantSDNode>(Val: ShiftRHS))
7753 if (C->getZExtValue() == Scale) {
7754 Base = LHS;
7755 Offset = RHS.getOperand(i: 0);
7756 return true;
7757 }
7758
7759 return false;
7760}
7761
7762bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7763 const AArch64TargetLowering *TLI =
7764 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7765
7766 return TLI->isAllActivePredicate(DAG&: *CurDAG, N);
7767}
7768
7769bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7770 EVT VT = N.getValueType();
7771 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7772}
7773
7774bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7775 SDValue &Base, SDValue &Offset,
7776 unsigned Scale) {
7777 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7778 if (auto *C = dyn_cast<ConstantSDNode>(Val&: CN)) {
7779 int64_t ImmOff = C->getSExtValue();
7780 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7781 return CurDAG->getTargetConstant(Val: ImmOff / Scale, DL: SDLoc(N), VT: MVT::i64);
7782 }
7783 return SDValue();
7784 };
7785
7786 if (SDValue C = MatchConstantOffset(N)) {
7787 Base = CurDAG->getConstant(Val: 0, DL: SDLoc(N), VT: MVT::i32);
7788 Offset = C;
7789 return true;
7790 }
7791
7792 // Try to untangle an ADD node into a 'reg + offset'
7793 if (CurDAG->isBaseWithConstantOffset(Op: N)) {
7794 if (SDValue C = MatchConstantOffset(N.getOperand(i: 1))) {
7795 Base = N.getOperand(i: 0);
7796 Offset = C;
7797 return true;
7798 }
7799 }
7800
7801 // By default, just match reg + 0.
7802 Base = N;
7803 Offset = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(N), VT: MVT::i64);
7804 return true;
7805}
7806
7807bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7808 SDValue &Imm) {
7809 AArch64CC::CondCode CC =
7810 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(Num: 1));
7811 if (auto *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
7812 // Check conservatively if the immediate fits the valid range [0, 64).
7813 // Immediate variants for GE and HS definitely need to be decremented
7814 // when lowering the pseudos later, so an immediate of 1 would become 0.
7815 // For the inverse conditions LT and LO we don't know for sure if they
7816 // will need a decrement but should the decision be made to reverse the
7817 // branch condition, we again end up with the need to decrement.
7818 // The same argument holds for LE, LS, GT and HI and possibly
7819 // incremented immediates. This can lead to slightly less optimal
7820 // codegen, e.g. we never codegen the legal case
7821 // cblt w0, #63, A
7822 // because we could end up with the illegal case
7823 // cbge w0, #64, B
7824 // should the decision to reverse the branch direction be made. For the
7825 // lower bound cases this is no problem since we can express comparisons
7826 // against 0 with either tbz/tnbz or using wzr/xzr.
7827 uint64_t LowerBound = 0, UpperBound = 64;
7828 switch (CC) {
7829 case AArch64CC::GE:
7830 case AArch64CC::HS:
7831 case AArch64CC::LT:
7832 case AArch64CC::LO:
7833 LowerBound = 1;
7834 break;
7835 case AArch64CC::LE:
7836 case AArch64CC::LS:
7837 case AArch64CC::GT:
7838 case AArch64CC::HI:
7839 UpperBound = 63;
7840 break;
7841 default:
7842 break;
7843 }
7844
7845 if (CN->getAPIntValue().uge(RHS: LowerBound) &&
7846 CN->getAPIntValue().ult(RHS: UpperBound)) {
7847 SDLoc DL(N);
7848 Imm = CurDAG->getTargetConstant(Val: CN->getZExtValue(), DL, VT: N.getValueType());
7849 return true;
7850 }
7851 }
7852
7853 return false;
7854}
7855
7856template <bool MatchCBB>
7857bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
7858 SDValue &ExtType) {
7859
7860 // Use an invalid shift-extend value to indicate we don't need to extend later
7861 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
7862 EVT Ty = cast<VTSDNode>(Val: N.getOperand(i: 1))->getVT();
7863 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
7864 return false;
7865 Reg = N.getOperand(i: 0);
7866 ExtType = CurDAG->getSignedTargetConstant(Val: AArch64_AM::InvalidShiftExtend,
7867 DL: SDLoc(N), VT: MVT::i32);
7868 return true;
7869 }
7870
7871 AArch64_AM::ShiftExtendType ET = getExtendTypeForNode(N);
7872
7873 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
7874 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
7875 Reg = N.getOperand(i: 0);
7876 ExtType =
7877 CurDAG->getTargetConstant(Val: getExtendEncoding(ET), DL: SDLoc(N), VT: MVT::i32);
7878 return true;
7879 }
7880
7881 return false;
7882}
7883