1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
14#include "MCTargetDesc/RISCVBaseInfo.h"
15#include "MCTargetDesc/RISCVMCTargetDesc.h"
16#include "MCTargetDesc/RISCVMatInt.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
19#include "RISCVSelectionDAGInfo.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/SDPatternMatch.h"
22#include "llvm/IR/IntrinsicsRISCV.h"
23#include "llvm/Support/Alignment.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/MathExtras.h"
26#include "llvm/Support/raw_ostream.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "riscv-isel"
31#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
32
33extern cl::opt<uint32_t> PreferredLandingPadLabel;
34
35static cl::opt<bool> UsePseudoMovImm(
36 "riscv-use-rematerializable-movimm", cl::Hidden,
37 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
38 "constant materialization"),
39 cl::init(Val: false));
40
41#define GET_DAGISEL_BODY RISCVDAGToDAGISel
42#include "RISCVGenDAGISel.inc"
43
44void RISCVDAGToDAGISel::PreprocessISelDAG() {
45 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
46
47 bool MadeChange = false;
48 while (Position != CurDAG->allnodes_begin()) {
49 SDNode *N = &*--Position;
50 if (N->use_empty())
51 continue;
52
53 SDValue Result;
54 switch (N->getOpcode()) {
55 case ISD::SPLAT_VECTOR: {
56 if (Subtarget->hasStdExtP())
57 break;
58 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
59 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
60 MVT VT = N->getSimpleValueType(ResNo: 0);
61 unsigned Opc =
62 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
63 SDLoc DL(N);
64 SDValue VL = CurDAG->getRegister(Reg: RISCV::X0, VT: Subtarget->getXLenVT());
65 SDValue Src = N->getOperand(Num: 0);
66 if (VT.isInteger())
67 Src = CurDAG->getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget->getXLenVT(),
68 Operand: N->getOperand(Num: 0));
69 Result = CurDAG->getNode(Opcode: Opc, DL, VT, N1: CurDAG->getUNDEF(VT), N2: Src, N3: VL);
70 break;
71 }
72 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
73 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
74 // load. Done after lowering and combining so that we have a chance to
75 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
76 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
77 MVT VT = N->getSimpleValueType(ResNo: 0);
78 SDValue Passthru = N->getOperand(Num: 0);
79 SDValue Lo = N->getOperand(Num: 1);
80 SDValue Hi = N->getOperand(Num: 2);
81 SDValue VL = N->getOperand(Num: 3);
82 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
83 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
84 "Unexpected VTs!");
85 MachineFunction &MF = CurDAG->getMachineFunction();
86 SDLoc DL(N);
87
88 // Create temporary stack for each expanding node.
89 SDValue StackSlot =
90 CurDAG->CreateStackTemporary(Bytes: TypeSize::getFixed(ExactSize: 8), Alignment: Align(8));
91 int FI = cast<FrameIndexSDNode>(Val: StackSlot.getNode())->getIndex();
92 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
93
94 SDValue Chain = CurDAG->getEntryNode();
95 Lo = CurDAG->getStore(Chain, dl: DL, Val: Lo, Ptr: StackSlot, PtrInfo: MPI, Alignment: Align(8));
96
97 SDValue OffsetSlot =
98 CurDAG->getMemBasePlusOffset(Base: StackSlot, Offset: TypeSize::getFixed(ExactSize: 4), DL);
99 Hi = CurDAG->getStore(Chain, dl: DL, Val: Hi, Ptr: OffsetSlot, PtrInfo: MPI.getWithOffset(O: 4),
100 Alignment: Align(8));
101
102 Chain = CurDAG->getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, N1: Lo, N2: Hi);
103
104 SDVTList VTs = CurDAG->getVTList(VTs: {VT, MVT::Other});
105 SDValue IntID =
106 CurDAG->getTargetConstant(Val: Intrinsic::riscv_vlse, DL, VT: MVT::i64);
107 SDValue Ops[] = {Chain,
108 IntID,
109 Passthru,
110 StackSlot,
111 CurDAG->getRegister(Reg: RISCV::X0, VT: MVT::i64),
112 VL};
113
114 Result = CurDAG->getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops,
115 MemVT: MVT::i64, PtrInfo: MPI, Alignment: Align(8),
116 Flags: MachineMemOperand::MOLoad);
117 break;
118 }
119 case ISD::FP_EXTEND: {
120 // We only have vector patterns for riscv_fpextend_vl in isel.
121 SDLoc DL(N);
122 MVT VT = N->getSimpleValueType(ResNo: 0);
123 if (!VT.isVector())
124 break;
125 SDValue VLMAX = CurDAG->getRegister(Reg: RISCV::X0, VT: Subtarget->getXLenVT());
126 SDValue TrueMask = CurDAG->getNode(
127 Opcode: RISCVISD::VMSET_VL, DL, VT: VT.changeVectorElementType(EltVT: MVT::i1), Operand: VLMAX);
128 Result = CurDAG->getNode(Opcode: RISCVISD::FP_EXTEND_VL, DL, VT, N1: N->getOperand(Num: 0),
129 N2: TrueMask, N3: VLMAX);
130 break;
131 }
132 }
133
134 if (Result) {
135 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
136 LLVM_DEBUG(N->dump(CurDAG));
137 LLVM_DEBUG(dbgs() << "\nNew: ");
138 LLVM_DEBUG(Result->dump(CurDAG));
139 LLVM_DEBUG(dbgs() << "\n");
140
141 CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 0), To: Result);
142 MadeChange = true;
143 }
144 }
145
146 if (MadeChange)
147 CurDAG->RemoveDeadNodes();
148}
149
150void RISCVDAGToDAGISel::PostprocessISelDAG() {
151 HandleSDNode Dummy(CurDAG->getRoot());
152 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
153
154 bool MadeChange = false;
155 while (Position != CurDAG->allnodes_begin()) {
156 SDNode *N = &*--Position;
157 // Skip dead nodes and any non-machine opcodes.
158 if (N->use_empty() || !N->isMachineOpcode())
159 continue;
160
161 MadeChange |= doPeepholeSExtW(Node: N);
162
163 // FIXME: This is here only because the VMerge transform doesn't
164 // know how to handle masked true inputs. Once that has been moved
165 // to post-ISEL, this can be deleted as well.
166 MadeChange |= doPeepholeMaskedRVV(Node: cast<MachineSDNode>(Val: N));
167 }
168
169 CurDAG->setRoot(Dummy.getValue());
170
171 // After we're done with everything else, convert IMPLICIT_DEF
172 // passthru operands to NoRegister. This is required to workaround
173 // an optimization deficiency in MachineCSE. This really should
174 // be merged back into each of the patterns (i.e. there's no good
175 // reason not to go directly to NoReg), but is being done this way
176 // to allow easy backporting.
177 MadeChange |= doPeepholeNoRegPassThru();
178
179 if (MadeChange)
180 CurDAG->RemoveDeadNodes();
181}
182
183static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
184 RISCVMatInt::InstSeq &Seq) {
185 SDValue SrcReg = CurDAG->getRegister(Reg: RISCV::X0, VT);
186 for (const RISCVMatInt::Inst &Inst : Seq) {
187 SDValue SDImm = CurDAG->getSignedTargetConstant(Val: Inst.getImm(), DL, VT);
188 SDNode *Result = nullptr;
189 switch (Inst.getOpndKind()) {
190 case RISCVMatInt::Imm:
191 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SDImm);
192 break;
193 case RISCVMatInt::RegX0:
194 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg,
195 Op2: CurDAG->getRegister(Reg: RISCV::X0, VT));
196 break;
197 case RISCVMatInt::RegReg:
198 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg, Op2: SrcReg);
199 break;
200 case RISCVMatInt::RegImm:
201 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg, Op2: SDImm);
202 break;
203 }
204
205 // Only the first instruction has X0 as its source.
206 SrcReg = SDValue(Result, 0);
207 }
208
209 return SrcReg;
210}
211
212static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
213 int64_t Imm, const RISCVSubtarget &Subtarget) {
214 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: Imm, STI: Subtarget);
215
216 // Use a rematerializable pseudo instruction for short sequences if enabled.
217 if (Seq.size() == 2 && UsePseudoMovImm)
218 return SDValue(
219 CurDAG->getMachineNode(Opcode: RISCV::PseudoMovImm, dl: DL, VT,
220 Op1: CurDAG->getSignedTargetConstant(Val: Imm, DL, VT)),
221 0);
222
223 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
224 // worst an LUI+ADDIW. This will require an extra register, but avoids a
225 // constant pool.
226 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
227 // low and high 32 bits are the same and bit 31 and 63 are set.
228 if (Seq.size() > 3) {
229 unsigned ShiftAmt, AddOpc;
230 RISCVMatInt::InstSeq SeqLo =
231 RISCVMatInt::generateTwoRegInstSeq(Val: Imm, STI: Subtarget, ShiftAmt, AddOpc);
232 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
233 SDValue Lo = selectImmSeq(CurDAG, DL, VT, Seq&: SeqLo);
234
235 SDValue SLLI = SDValue(
236 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: Lo,
237 Op2: CurDAG->getTargetConstant(Val: ShiftAmt, DL, VT)),
238 0);
239 return SDValue(CurDAG->getMachineNode(Opcode: AddOpc, dl: DL, VT, Op1: Lo, Op2: SLLI), 0);
240 }
241 }
242
243 // Otherwise, use the original sequence.
244 return selectImmSeq(CurDAG, DL, VT, Seq);
245}
246
247void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
248 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
249 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
250 bool IsLoad, MVT *IndexVT) {
251 SDValue Chain = Node->getOperand(Num: 0);
252
253 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Base pointer.
254
255 if (IsStridedOrIndexed) {
256 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Index.
257 if (IndexVT)
258 *IndexVT = Operands.back()->getSimpleValueType(ResNo: 0);
259 }
260
261 if (IsMasked) {
262 SDValue Mask = Node->getOperand(Num: CurOp++);
263 Operands.push_back(Elt: Mask);
264 }
265 SDValue VL;
266 selectVLOp(N: Node->getOperand(Num: CurOp++), VL);
267 Operands.push_back(Elt: VL);
268
269 MVT XLenVT = Subtarget->getXLenVT();
270 SDValue SEWOp = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT);
271 Operands.push_back(Elt: SEWOp);
272
273 // At the IR layer, all the masked load intrinsics have policy operands,
274 // none of the others do. All have passthru operands. For our pseudos,
275 // all loads have policy operands.
276 if (IsLoad) {
277 uint64_t Policy = RISCVVType::MASK_AGNOSTIC;
278 if (IsMasked)
279 Policy = Node->getConstantOperandVal(Num: CurOp++);
280 SDValue PolicyOp = CurDAG->getTargetConstant(Val: Policy, DL, VT: XLenVT);
281 Operands.push_back(Elt: PolicyOp);
282 }
283
284 Operands.push_back(Elt: Chain); // Chain.
285}
286
287void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
288 bool IsStrided) {
289 SDLoc DL(Node);
290 MVT VT = Node->getSimpleValueType(ResNo: 0);
291 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
292 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
293
294 unsigned CurOp = 2;
295 SmallVector<SDValue, 8> Operands;
296
297 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
298
299 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
300 Operands, /*IsLoad=*/true);
301
302 const RISCV::VLSEGPseudo *P =
303 RISCV::getVLSEGPseudo(NF, Masked: IsMasked, Strided: IsStrided, /*FF*/ false, Log2SEW,
304 LMUL: static_cast<unsigned>(LMUL));
305 MachineSDNode *Load =
306 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped, VT2: MVT::Other, Ops: Operands);
307
308 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
309
310 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Load, 0));
311 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Load, 1));
312 CurDAG->RemoveDeadNode(N: Node);
313}
314
315void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, unsigned NF,
316 bool IsMasked) {
317 SDLoc DL(Node);
318 MVT VT = Node->getSimpleValueType(ResNo: 0);
319 MVT XLenVT = Subtarget->getXLenVT();
320 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
321 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
322
323 unsigned CurOp = 2;
324 SmallVector<SDValue, 7> Operands;
325
326 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
327
328 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
329 /*IsStridedOrIndexed*/ false, Operands,
330 /*IsLoad=*/true);
331
332 const RISCV::VLSEGPseudo *P =
333 RISCV::getVLSEGPseudo(NF, Masked: IsMasked, /*Strided*/ false, /*FF*/ true,
334 Log2SEW, LMUL: static_cast<unsigned>(LMUL));
335 MachineSDNode *Load = CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped,
336 VT2: XLenVT, VT3: MVT::Other, Ops: Operands);
337
338 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
339
340 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Load, 0)); // Result
341 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Load, 1)); // VL
342 ReplaceUses(F: SDValue(Node, 2), T: SDValue(Load, 2)); // Chain
343 CurDAG->RemoveDeadNode(N: Node);
344}
345
346void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
347 bool IsOrdered) {
348 SDLoc DL(Node);
349 MVT VT = Node->getSimpleValueType(ResNo: 0);
350 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
351 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
352
353 unsigned CurOp = 2;
354 SmallVector<SDValue, 8> Operands;
355
356 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
357
358 MVT IndexVT;
359 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
360 /*IsStridedOrIndexed*/ true, Operands,
361 /*IsLoad=*/true, IndexVT: &IndexVT);
362
363#ifndef NDEBUG
364 // Number of element = RVVBitsPerBlock * LMUL / SEW
365 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
366 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
367 if (DecodedLMUL.second)
368 ContainedTyNumElts /= DecodedLMUL.first;
369 else
370 ContainedTyNumElts *= DecodedLMUL.first;
371 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
372 "Element count mismatch");
373#endif
374
375 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
376 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
377 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
378 reportFatalUsageError(reason: "The V extension does not support EEW=64 for index "
379 "values when XLEN=32");
380 }
381 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
382 NF, Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL),
383 IndexLMUL: static_cast<unsigned>(IndexLMUL));
384 MachineSDNode *Load =
385 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped, VT2: MVT::Other, Ops: Operands);
386
387 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
388
389 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Load, 0));
390 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Load, 1));
391 CurDAG->RemoveDeadNode(N: Node);
392}
393
394void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
395 bool IsStrided) {
396 SDLoc DL(Node);
397 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
398 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
399 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
400
401 unsigned CurOp = 2;
402 SmallVector<SDValue, 8> Operands;
403
404 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
405
406 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
407 Operands);
408
409 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
410 NF, Masked: IsMasked, Strided: IsStrided, Log2SEW, LMUL: static_cast<unsigned>(LMUL));
411 MachineSDNode *Store =
412 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT: Node->getValueType(ResNo: 0), Ops: Operands);
413
414 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
415
416 ReplaceNode(F: Node, T: Store);
417}
418
419void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
420 bool IsOrdered) {
421 SDLoc DL(Node);
422 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
423 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
424 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
425
426 unsigned CurOp = 2;
427 SmallVector<SDValue, 8> Operands;
428
429 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
430
431 MVT IndexVT;
432 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
433 /*IsStridedOrIndexed*/ true, Operands,
434 /*IsLoad=*/false, IndexVT: &IndexVT);
435
436#ifndef NDEBUG
437 // Number of element = RVVBitsPerBlock * LMUL / SEW
438 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
439 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
440 if (DecodedLMUL.second)
441 ContainedTyNumElts /= DecodedLMUL.first;
442 else
443 ContainedTyNumElts *= DecodedLMUL.first;
444 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
445 "Element count mismatch");
446#endif
447
448 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
449 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
450 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
451 reportFatalUsageError(reason: "The V extension does not support EEW=64 for index "
452 "values when XLEN=32");
453 }
454 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
455 NF, Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL),
456 IndexLMUL: static_cast<unsigned>(IndexLMUL));
457 MachineSDNode *Store =
458 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT: Node->getValueType(ResNo: 0), Ops: Operands);
459
460 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
461
462 ReplaceNode(F: Node, T: Store);
463}
464
465void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
466 if (!Subtarget->hasVInstructions())
467 return;
468
469 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
470
471 SDLoc DL(Node);
472 MVT XLenVT = Subtarget->getXLenVT();
473
474 unsigned IntNo = Node->getConstantOperandVal(Num: 0);
475
476 assert((IntNo == Intrinsic::riscv_vsetvli ||
477 IntNo == Intrinsic::riscv_vsetvlimax) &&
478 "Unexpected vsetvli intrinsic");
479
480 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
481 unsigned Offset = (VLMax ? 1 : 2);
482
483 assert(Node->getNumOperands() == Offset + 2 &&
484 "Unexpected number of operands");
485
486 unsigned SEW =
487 RISCVVType::decodeVSEW(VSEW: Node->getConstantOperandVal(Num: Offset) & 0x7);
488 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
489 Node->getConstantOperandVal(Num: Offset + 1) & 0x7);
490
491 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMUL: VLMul, SEW, /*TailAgnostic*/ true,
492 /*MaskAgnostic*/ true);
493 SDValue VTypeIOp = CurDAG->getTargetConstant(Val: VTypeI, DL, VT: XLenVT);
494
495 SDValue VLOperand;
496 unsigned Opcode = RISCV::PseudoVSETVLI;
497 if (auto *C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1))) {
498 if (auto VLEN = Subtarget->getRealVLen())
499 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
500 VLMax = true;
501 }
502 if (VLMax || isAllOnesConstant(V: Node->getOperand(Num: 1))) {
503 VLOperand = CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT);
504 Opcode = RISCV::PseudoVSETVLIX0;
505 } else {
506 VLOperand = Node->getOperand(Num: 1);
507
508 if (auto *C = dyn_cast<ConstantSDNode>(Val&: VLOperand)) {
509 uint64_t AVL = C->getZExtValue();
510 if (isUInt<5>(x: AVL)) {
511 SDValue VLImm = CurDAG->getTargetConstant(Val: AVL, DL, VT: XLenVT);
512 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: RISCV::PseudoVSETIVLI, dl: DL,
513 VT: XLenVT, Op1: VLImm, Op2: VTypeIOp));
514 return;
515 }
516 }
517 }
518
519 ReplaceNode(F: Node,
520 T: CurDAG->getMachineNode(Opcode, dl: DL, VT: XLenVT, Op1: VLOperand, Op2: VTypeIOp));
521}
522
523void RISCVDAGToDAGISel::selectXSfmmVSET(SDNode *Node) {
524 if (!Subtarget->hasVendorXSfmmbase())
525 return;
526
527 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
528
529 SDLoc DL(Node);
530 MVT XLenVT = Subtarget->getXLenVT();
531
532 unsigned IntNo = Node->getConstantOperandVal(Num: 0);
533
534 assert((IntNo == Intrinsic::riscv_sf_vsettnt ||
535 IntNo == Intrinsic::riscv_sf_vsettm ||
536 IntNo == Intrinsic::riscv_sf_vsettk) &&
537 "Unexpected XSfmm vset intrinsic");
538
539 unsigned SEW = RISCVVType::decodeVSEW(VSEW: Node->getConstantOperandVal(Num: 2));
540 unsigned Widen = RISCVVType::decodeTWiden(TWiden: Node->getConstantOperandVal(Num: 3));
541 unsigned PseudoOpCode =
542 IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT
543 : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM
544 : RISCV::PseudoSF_VSETTK;
545
546 if (IntNo == Intrinsic::riscv_sf_vsettnt) {
547 unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, AltFmt: 0);
548 SDValue VTypeIOp = CurDAG->getTargetConstant(Val: VTypeI, DL, VT: XLenVT);
549
550 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: PseudoOpCode, dl: DL, VT: XLenVT,
551 Op1: Node->getOperand(Num: 1), Op2: VTypeIOp));
552 } else {
553 SDValue Log2SEW = CurDAG->getTargetConstant(Val: Log2_32(Value: SEW), DL, VT: XLenVT);
554 SDValue TWiden = CurDAG->getTargetConstant(Val: Widen, DL, VT: XLenVT);
555 ReplaceNode(F: Node,
556 T: CurDAG->getMachineNode(Opcode: PseudoOpCode, dl: DL, VT: XLenVT,
557 Op1: Node->getOperand(Num: 1), Op2: Log2SEW, Op3: TWiden));
558 }
559}
560
561bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
562 MVT VT = Node->getSimpleValueType(ResNo: 0);
563 unsigned Opcode = Node->getOpcode();
564 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
565 "Unexpected opcode");
566 SDLoc DL(Node);
567
568 // For operations of the form (x << C1) op C2, check if we can use
569 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
570 SDValue N0 = Node->getOperand(Num: 0);
571 SDValue N1 = Node->getOperand(Num: 1);
572
573 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Val&: N1);
574 if (!Cst)
575 return false;
576
577 int64_t Val = Cst->getSExtValue();
578
579 // Check if immediate can already use ANDI/ORI/XORI.
580 if (isInt<12>(x: Val))
581 return false;
582
583 SDValue Shift = N0;
584
585 // If Val is simm32 and we have a sext_inreg from i32, then the binop
586 // produces at least 33 sign bits. We can peek through the sext_inreg and use
587 // a SLLIW at the end.
588 bool SignExt = false;
589 if (isInt<32>(x: Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
590 N0.hasOneUse() && cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT() == MVT::i32) {
591 SignExt = true;
592 Shift = N0.getOperand(i: 0);
593 }
594
595 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
596 return false;
597
598 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Val: Shift.getOperand(i: 1));
599 if (!ShlCst)
600 return false;
601
602 uint64_t ShAmt = ShlCst->getZExtValue();
603
604 // Make sure that we don't change the operation by removing bits.
605 // This only matters for OR and XOR, AND is unaffected.
606 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(N: ShAmt);
607 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
608 return false;
609
610 int64_t ShiftedVal = Val >> ShAmt;
611 if (!isInt<12>(x: ShiftedVal))
612 return false;
613
614 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
615 if (SignExt && ShAmt >= 32)
616 return false;
617
618 // Ok, we can reorder to get a smaller immediate.
619 unsigned BinOpc;
620 switch (Opcode) {
621 default: llvm_unreachable("Unexpected opcode");
622 case ISD::AND: BinOpc = RISCV::ANDI; break;
623 case ISD::OR: BinOpc = RISCV::ORI; break;
624 case ISD::XOR: BinOpc = RISCV::XORI; break;
625 }
626
627 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
628
629 SDNode *BinOp = CurDAG->getMachineNode(
630 Opcode: BinOpc, dl: DL, VT, Op1: Shift.getOperand(i: 0),
631 Op2: CurDAG->getSignedTargetConstant(Val: ShiftedVal, DL, VT));
632 SDNode *SLLI =
633 CurDAG->getMachineNode(Opcode: ShOpc, dl: DL, VT, Op1: SDValue(BinOp, 0),
634 Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT));
635 ReplaceNode(F: Node, T: SLLI);
636 return true;
637}
638
639bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
640 unsigned Opc;
641
642 if (Subtarget->hasVendorXTHeadBb())
643 Opc = RISCV::TH_EXT;
644 else if (Subtarget->hasVendorXAndesPerf())
645 Opc = RISCV::NDS_BFOS;
646 else if (Subtarget->hasVendorXqcibm())
647 Opc = RISCV::QC_EXT;
648 else
649 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
650 return false;
651
652 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
653 if (!N1C)
654 return false;
655
656 SDValue N0 = Node->getOperand(Num: 0);
657 if (!N0.hasOneUse())
658 return false;
659
660 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
661 const SDLoc &DL, MVT VT) {
662 if (Opc == RISCV::QC_EXT) {
663 // QC.EXT X, width, shamt
664 // shamt is the same as Lsb
665 // width is the number of bits to extract from the Lsb
666 Msb = Msb - Lsb + 1;
667 }
668 return CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: N0.getOperand(i: 0),
669 Op2: CurDAG->getTargetConstant(Val: Msb, DL, VT),
670 Op3: CurDAG->getTargetConstant(Val: Lsb, DL, VT));
671 };
672
673 SDLoc DL(Node);
674 MVT VT = Node->getSimpleValueType(ResNo: 0);
675 const unsigned RightShAmt = N1C->getZExtValue();
676
677 // Transform (sra (shl X, C1) C2) with C1 < C2
678 // -> (SignedBitfieldExtract X, msb, lsb)
679 if (N0.getOpcode() == ISD::SHL) {
680 auto *N01C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
681 if (!N01C)
682 return false;
683
684 const unsigned LeftShAmt = N01C->getZExtValue();
685 // Make sure that this is a bitfield extraction (i.e., the shift-right
686 // amount can not be less than the left-shift).
687 if (LeftShAmt > RightShAmt)
688 return false;
689
690 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
691 const unsigned Msb = MsbPlusOne - 1;
692 const unsigned Lsb = RightShAmt - LeftShAmt;
693
694 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
695 ReplaceNode(F: Node, T: Sbe);
696 return true;
697 }
698
699 // Transform (sra (sext_inreg X, _), C) ->
700 // (SignedBitfieldExtract X, msb, lsb)
701 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
702 unsigned ExtSize =
703 cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT().getSizeInBits();
704
705 // ExtSize of 32 should use sraiw via tablegen pattern.
706 if (ExtSize == 32)
707 return false;
708
709 const unsigned Msb = ExtSize - 1;
710 // If the shift-right amount is greater than Msb, it means that extracts
711 // the X[Msb] bit and sign-extend it.
712 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
713
714 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
715 ReplaceNode(F: Node, T: Sbe);
716 return true;
717 }
718
719 return false;
720}
721
722bool RISCVDAGToDAGISel::trySignedBitfieldInsertInSign(SDNode *Node) {
723 // Only supported with XAndesPerf at the moment.
724 if (!Subtarget->hasVendorXAndesPerf())
725 return false;
726
727 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
728 if (!N1C)
729 return false;
730
731 SDValue N0 = Node->getOperand(Num: 0);
732 if (!N0.hasOneUse())
733 return false;
734
735 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
736 const SDLoc &DL, MVT VT) {
737 unsigned Opc = RISCV::NDS_BFOS;
738 // If the Lsb is equal to the Msb, then the Lsb should be 0.
739 if (Lsb == Msb)
740 Lsb = 0;
741 return CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: N0.getOperand(i: 0),
742 Op2: CurDAG->getTargetConstant(Val: Lsb, DL, VT),
743 Op3: CurDAG->getTargetConstant(Val: Msb, DL, VT));
744 };
745
746 SDLoc DL(Node);
747 MVT VT = Node->getSimpleValueType(ResNo: 0);
748 const unsigned RightShAmt = N1C->getZExtValue();
749
750 // Transform (sra (shl X, C1) C2) with C1 > C2
751 // -> (NDS.BFOS X, lsb, msb)
752 if (N0.getOpcode() == ISD::SHL) {
753 auto *N01C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
754 if (!N01C)
755 return false;
756
757 const unsigned LeftShAmt = N01C->getZExtValue();
758 // Make sure that this is a bitfield insertion (i.e., the shift-right
759 // amount should be less than the left-shift).
760 if (LeftShAmt <= RightShAmt)
761 return false;
762
763 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
764 const unsigned Msb = MsbPlusOne - 1;
765 const unsigned Lsb = LeftShAmt - RightShAmt;
766
767 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
768 ReplaceNode(F: Node, T: Sbi);
769 return true;
770 }
771
772 return false;
773}
774
775bool RISCVDAGToDAGISel::tryUnsignedBitfieldExtract(SDNode *Node,
776 const SDLoc &DL, MVT VT,
777 SDValue X, unsigned Msb,
778 unsigned Lsb) {
779 unsigned Opc;
780
781 if (Subtarget->hasVendorXTHeadBb()) {
782 Opc = RISCV::TH_EXTU;
783 } else if (Subtarget->hasVendorXAndesPerf()) {
784 Opc = RISCV::NDS_BFOZ;
785 } else if (Subtarget->hasVendorXqcibm()) {
786 Opc = RISCV::QC_EXTU;
787 // QC.EXTU X, width, shamt
788 // shamt is the same as Lsb
789 // width is the number of bits to extract from the Lsb
790 Msb = Msb - Lsb + 1;
791 } else {
792 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
793 return false;
794 }
795
796 SDNode *Ube = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: X,
797 Op2: CurDAG->getTargetConstant(Val: Msb, DL, VT),
798 Op3: CurDAG->getTargetConstant(Val: Lsb, DL, VT));
799 ReplaceNode(F: Node, T: Ube);
800 return true;
801}
802
803bool RISCVDAGToDAGISel::tryUnsignedBitfieldInsertInZero(SDNode *Node,
804 const SDLoc &DL, MVT VT,
805 SDValue X, unsigned Msb,
806 unsigned Lsb) {
807 // Only supported with XAndesPerf at the moment.
808 if (!Subtarget->hasVendorXAndesPerf())
809 return false;
810
811 unsigned Opc = RISCV::NDS_BFOZ;
812
813 // If the Lsb is equal to the Msb, then the Lsb should be 0.
814 if (Lsb == Msb)
815 Lsb = 0;
816 SDNode *Ubi = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: X,
817 Op2: CurDAG->getTargetConstant(Val: Lsb, DL, VT),
818 Op3: CurDAG->getTargetConstant(Val: Msb, DL, VT));
819 ReplaceNode(F: Node, T: Ubi);
820 return true;
821}
822
823bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
824 // Target does not support indexed loads.
825 if (!Subtarget->hasVendorXTHeadMemIdx())
826 return false;
827
828 LoadSDNode *Ld = cast<LoadSDNode>(Val: Node);
829 ISD::MemIndexedMode AM = Ld->getAddressingMode();
830 if (AM == ISD::UNINDEXED)
831 return false;
832
833 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Ld->getOffset());
834 if (!C)
835 return false;
836
837 EVT LoadVT = Ld->getMemoryVT();
838 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
839 "Unexpected addressing mode");
840 bool IsPre = AM == ISD::PRE_INC;
841 bool IsPost = AM == ISD::POST_INC;
842 int64_t Offset = C->getSExtValue();
843
844 // The constants that can be encoded in the THeadMemIdx instructions
845 // are of the form (sign_extend(imm5) << imm2).
846 unsigned Shift;
847 for (Shift = 0; Shift < 4; Shift++)
848 if (isInt<5>(x: Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
849 break;
850
851 // Constant cannot be encoded.
852 if (Shift == 4)
853 return false;
854
855 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
856 unsigned Opcode;
857 if (LoadVT == MVT::i8 && IsPre)
858 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
859 else if (LoadVT == MVT::i8 && IsPost)
860 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
861 else if (LoadVT == MVT::i16 && IsPre)
862 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
863 else if (LoadVT == MVT::i16 && IsPost)
864 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
865 else if (LoadVT == MVT::i32 && IsPre)
866 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
867 else if (LoadVT == MVT::i32 && IsPost)
868 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
869 else if (LoadVT == MVT::i64 && IsPre)
870 Opcode = RISCV::TH_LDIB;
871 else if (LoadVT == MVT::i64 && IsPost)
872 Opcode = RISCV::TH_LDIA;
873 else
874 return false;
875
876 EVT Ty = Ld->getOffset().getValueType();
877 SDValue Ops[] = {
878 Ld->getBasePtr(),
879 CurDAG->getSignedTargetConstant(Val: Offset >> Shift, DL: SDLoc(Node), VT: Ty),
880 CurDAG->getTargetConstant(Val: Shift, DL: SDLoc(Node), VT: Ty), Ld->getChain()};
881 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(Node), VT1: Ld->getValueType(ResNo: 0),
882 VT2: Ld->getValueType(ResNo: 1), VT3: MVT::Other, Ops);
883
884 MachineMemOperand *MemOp = cast<MemSDNode>(Val: Node)->getMemOperand();
885 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: New), NewMemRefs: {MemOp});
886
887 ReplaceNode(F: Node, T: New);
888
889 return true;
890}
891
892static SDValue buildGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, MVT VT,
893 SDValue Lo, SDValue Hi) {
894 SDValue Ops[] = {
895 CurDAG->getTargetConstant(Val: RISCV::GPRPairRegClassID, DL, VT: MVT::i32), Lo,
896 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_even, DL, VT: MVT::i32), Hi,
897 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_odd, DL, VT: MVT::i32)};
898
899 return SDValue(
900 CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl: DL, VT, Ops), 0);
901}
902
903// Helper to extract Lo and Hi values from a GPR pair.
904static std::pair<SDValue, SDValue>
905extractGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, SDValue Pair) {
906 SDValue Lo =
907 CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_even, DL, VT: MVT::i32, Operand: Pair);
908 SDValue Hi =
909 CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_odd, DL, VT: MVT::i32, Operand: Pair);
910 return {Lo, Hi};
911}
912
913// Try to match WMACC pattern: ADDD where one operand pair comes from a
914// widening multiply (both results of UMUL_LOHI, SMUL_LOHI, or WMULSU).
915bool RISCVDAGToDAGISel::tryWideningMulAcc(SDNode *Node, const SDLoc &DL) {
916 assert(Node->getOpcode() == RISCVISD::ADDD && "Expected ADDD");
917
918 SDValue Op0Lo = Node->getOperand(Num: 0);
919 SDValue Op0Hi = Node->getOperand(Num: 1);
920 SDValue Op1Lo = Node->getOperand(Num: 2);
921 SDValue Op1Hi = Node->getOperand(Num: 3);
922
923 auto IsSupportedMulWithOneUse = [](SDValue Lo, SDValue Hi) {
924 unsigned Opc = Lo.getOpcode();
925 if (Opc != ISD::UMUL_LOHI && Opc != ISD::SMUL_LOHI &&
926 Opc != RISCVISD::WMULSU)
927 return false;
928 return Lo.getNode() == Hi.getNode() && Lo.getResNo() == 0 &&
929 Hi.getResNo() == 1 && Lo.hasOneUse() && Hi.hasOneUse();
930 };
931
932 SDNode *MulNode = nullptr;
933 SDValue AddLo, AddHi;
934
935 // Check if first operand pair is a supported multiply with single use.
936 if (IsSupportedMulWithOneUse(Op0Lo, Op0Hi)) {
937 MulNode = Op0Lo.getNode();
938 AddLo = Op1Lo;
939 AddHi = Op1Hi;
940 }
941 // ADDD is commutative. Check if second operand pair is a supported multiply
942 // with single use.
943 else if (IsSupportedMulWithOneUse(Op1Lo, Op1Hi)) {
944 MulNode = Op1Lo.getNode();
945 AddLo = Op0Lo;
946 AddHi = Op0Hi;
947 } else {
948 return false;
949 }
950
951 unsigned Opc;
952 switch (MulNode->getOpcode()) {
953 default:
954 llvm_unreachable("Unexpected multiply opcode");
955 case ISD::UMUL_LOHI:
956 Opc = RISCV::WMACCU;
957 break;
958 case ISD::SMUL_LOHI:
959 Opc = RISCV::WMACC;
960 break;
961 case RISCVISD::WMULSU:
962 Opc = RISCV::WMACCSU;
963 break;
964 }
965
966 SDValue Acc = buildGPRPair(CurDAG, DL, VT: MVT::Untyped, Lo: AddLo, Hi: AddHi);
967
968 // WMACC instruction format: rd, rs1, rs2 (rd is accumulator).
969 SDValue M0 = MulNode->getOperand(Num: 0);
970 SDValue M1 = MulNode->getOperand(Num: 1);
971 MachineSDNode *New =
972 CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Op1: Acc, Op2: M0, Op3: M1);
973
974 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, Pair: SDValue(New, 0));
975 ReplaceUses(F: SDValue(Node, 0), T: Lo);
976 ReplaceUses(F: SDValue(Node, 1), T: Hi);
977 CurDAG->RemoveDeadNode(N: Node);
978 return true;
979}
980
981static Register getTileReg(uint64_t TileNum) {
982 assert(TileNum <= 15 && "Invalid tile number");
983 return RISCV::T0 + TileNum;
984}
985
986void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) {
987 if (!Subtarget->hasVInstructions())
988 return;
989
990 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
991
992 SDLoc DL(Node);
993 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
994
995 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
996 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
997 "Unexpected vsetvli intrinsic");
998
999 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
1000 unsigned Log2SEW = Log2_32(Value: Node->getConstantOperandVal(Num: 6));
1001 SDValue SEWOp =
1002 CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: Subtarget->getXLenVT());
1003 SmallVector<SDValue, 8> Operands = {Node->getOperand(Num: 2), Node->getOperand(Num: 3),
1004 Node->getOperand(Num: 4), Node->getOperand(Num: 5),
1005 Node->getOperand(Num: 8), SEWOp,
1006 Node->getOperand(Num: 0)};
1007
1008 unsigned Opcode;
1009 auto *LMulSDNode = cast<ConstantSDNode>(Val: Node->getOperand(Num: 7));
1010 switch (LMulSDNode->getSExtValue()) {
1011 case 5:
1012 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
1013 : RISCV::PseudoSF_VC_I_SE_MF8;
1014 break;
1015 case 6:
1016 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
1017 : RISCV::PseudoSF_VC_I_SE_MF4;
1018 break;
1019 case 7:
1020 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
1021 : RISCV::PseudoSF_VC_I_SE_MF2;
1022 break;
1023 case 0:
1024 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
1025 : RISCV::PseudoSF_VC_I_SE_M1;
1026 break;
1027 case 1:
1028 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
1029 : RISCV::PseudoSF_VC_I_SE_M2;
1030 break;
1031 case 2:
1032 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
1033 : RISCV::PseudoSF_VC_I_SE_M4;
1034 break;
1035 case 3:
1036 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
1037 : RISCV::PseudoSF_VC_I_SE_M8;
1038 break;
1039 }
1040
1041 ReplaceNode(F: Node, T: CurDAG->getMachineNode(
1042 Opcode, dl: DL, VT: Node->getSimpleValueType(ResNo: 0), Ops: Operands));
1043}
1044
1045static unsigned getSegInstNF(unsigned Intrinsic) {
1046#define INST_NF_CASE(NAME, NF) \
1047 case Intrinsic::riscv_##NAME##NF: \
1048 return NF;
1049#define INST_NF_CASE_MASK(NAME, NF) \
1050 case Intrinsic::riscv_##NAME##NF##_mask: \
1051 return NF;
1052#define INST_NF_CASE_FF(NAME, NF) \
1053 case Intrinsic::riscv_##NAME##NF##ff: \
1054 return NF;
1055#define INST_NF_CASE_FF_MASK(NAME, NF) \
1056 case Intrinsic::riscv_##NAME##NF##ff_mask: \
1057 return NF;
1058#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
1059 MACRO_NAME(NAME, 2) \
1060 MACRO_NAME(NAME, 3) \
1061 MACRO_NAME(NAME, 4) \
1062 MACRO_NAME(NAME, 5) \
1063 MACRO_NAME(NAME, 6) \
1064 MACRO_NAME(NAME, 7) \
1065 MACRO_NAME(NAME, 8)
1066#define INST_ALL_NF_CASE(NAME) \
1067 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
1068 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
1069#define INST_ALL_NF_CASE_WITH_FF(NAME) \
1070 INST_ALL_NF_CASE(NAME) \
1071 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
1072 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
1073 switch (Intrinsic) {
1074 default:
1075 llvm_unreachable("Unexpected segment load/store intrinsic");
1076 INST_ALL_NF_CASE_WITH_FF(vlseg)
1077 INST_ALL_NF_CASE(vlsseg)
1078 INST_ALL_NF_CASE(vloxseg)
1079 INST_ALL_NF_CASE(vluxseg)
1080 INST_ALL_NF_CASE(vsseg)
1081 INST_ALL_NF_CASE(vssseg)
1082 INST_ALL_NF_CASE(vsoxseg)
1083 INST_ALL_NF_CASE(vsuxseg)
1084 }
1085}
1086
1087static bool isApplicableToPLIOrPLUI(int Val) {
1088 // Check if the immediate is packed i8 or i10
1089 int16_t Bit31To16 = Val >> 16;
1090 int16_t Bit15To0 = Val;
1091 int8_t Bit15To8 = Bit15To0 >> 8;
1092 int8_t Bit7To0 = Val;
1093 if (Bit31To16 != Bit15To0)
1094 return false;
1095
1096 return isInt<10>(x: Bit15To0) || isShiftedInt<10, 6>(x: Bit15To0) ||
1097 Bit15To8 == Bit7To0;
1098}
1099
1100void RISCVDAGToDAGISel::Select(SDNode *Node) {
1101 // If we have a custom node, we have already selected.
1102 if (Node->isMachineOpcode()) {
1103 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
1104 Node->setNodeId(-1);
1105 return;
1106 }
1107
1108 // Instruction Selection not handled by the auto-generated tablegen selection
1109 // should be handled here.
1110 unsigned Opcode = Node->getOpcode();
1111 MVT XLenVT = Subtarget->getXLenVT();
1112 SDLoc DL(Node);
1113 MVT VT = Node->getSimpleValueType(ResNo: 0);
1114
1115 bool HasBitTest = Subtarget->hasBEXTILike();
1116
1117 switch (Opcode) {
1118 case ISD::Constant: {
1119 assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
1120 auto *ConstNode = cast<ConstantSDNode>(Val: Node);
1121 if (ConstNode->isZero()) {
1122 SDValue New =
1123 CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: RISCV::X0, VT);
1124 ReplaceNode(F: Node, T: New.getNode());
1125 return;
1126 }
1127 int64_t Imm = ConstNode->getSExtValue();
1128 // If only the lower 8 bits are used, try to convert this to a simm6 by
1129 // sign-extending bit 7. This is neutral without the C extension, and
1130 // allows C.LI to be used if C is present.
1131 if (!isInt<8>(x: Imm) && isUInt<8>(x: Imm) && isInt<6>(x: SignExtend64<8>(x: Imm)) &&
1132 hasAllBUsers(Node))
1133 Imm = SignExtend64<8>(x: Imm);
1134 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1135 // by sign extending bit 15.
1136 else if (!isInt<16>(x: Imm) && isUInt<16>(x: Imm) &&
1137 isInt<12>(x: SignExtend64<16>(x: Imm)) && hasAllHUsers(Node))
1138 Imm = SignExtend64<16>(x: Imm);
1139
1140 // If the upper XLen-16 bits are not used, the lower 2 bytes are the same,
1141 // and we can't use li, convert to an xlen splat so we can use pli.b.
1142 if (Subtarget->hasStdExtP() && !isInt<12>(x: Imm) &&
1143 (Imm & 0xff) == ((Imm >> 8) & 0xff) && hasAllHUsers(Node)) {
1144 // Splat the lower 16 bits to XLen. Sign extend for RV32.
1145 uint64_t Splat = Imm & 0xffff;
1146 Splat = (Splat << 16) | Splat;
1147 if (VT == MVT::i64)
1148 Imm = Splat << 32 | Splat;
1149 else
1150 Imm = SignExtend64<32>(x: Splat);
1151 } else {
1152 // If the upper 32-bits are not used try to convert this into a simm32 by
1153 // sign extending bit 32.
1154 if (!isInt<32>(x: Imm) && isUInt<32>(x: Imm) && hasAllWUsers(Node))
1155 Imm = SignExtend64<32>(x: Imm);
1156
1157 if (VT == MVT::i64 && !isInt<12>(x: Imm) && !isShiftedInt<20, 12>(x: Imm) &&
1158 Subtarget->hasStdExtP() && isApplicableToPLIOrPLUI(Val: Imm) &&
1159 hasAllWUsers(Node)) {
1160 // If it's 4 packed 8-bit integers or 2 packed signed 16-bit integers,
1161 // we can simply copy lower 32 bits to higher 32 bits to make it able to
1162 // rematerialize to PLI_B or PLI_H
1163 Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
1164 }
1165 }
1166
1167 ReplaceNode(F: Node, T: selectImm(CurDAG, DL, VT, Imm, Subtarget: *Subtarget).getNode());
1168 return;
1169 }
1170 case ISD::ConstantFP: {
1171 const APFloat &APF = cast<ConstantFPSDNode>(Val: Node)->getValueAPF();
1172
1173 bool Is64Bit = Subtarget->is64Bit();
1174 bool HasZdinx = Subtarget->hasStdExtZdinx();
1175
1176 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1177 SDValue Imm;
1178 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1179 // create an integer immediate.
1180 if (APF.isPosZero() || NegZeroF64) {
1181 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1182 Imm = CurDAG->getRegister(Reg: RISCV::X0_Pair, VT: MVT::f64);
1183 else
1184 Imm = CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT);
1185 } else {
1186 Imm = selectImm(CurDAG, DL, VT: XLenVT, Imm: APF.bitcastToAPInt().getSExtValue(),
1187 Subtarget: *Subtarget);
1188 }
1189
1190 unsigned Opc;
1191 switch (VT.SimpleTy) {
1192 default:
1193 llvm_unreachable("Unexpected size");
1194 case MVT::bf16:
1195 assert(Subtarget->hasStdExtZfbfmin());
1196 Opc = RISCV::FMV_H_X;
1197 break;
1198 case MVT::f16:
1199 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1200 break;
1201 case MVT::f32:
1202 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1203 break;
1204 case MVT::f64:
1205 // For RV32, we can't move from a GPR, we need to convert instead. This
1206 // should only happen for +0.0 and -0.0.
1207 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1208 if (HasZdinx)
1209 Opc = RISCV::COPY;
1210 else
1211 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1212 break;
1213 }
1214
1215 SDNode *Res;
1216 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1217 Res =
1218 CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_16, DL, VT, Operand: Imm).getNode();
1219 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1220 Res =
1221 CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_32, DL, VT, Operand: Imm).getNode();
1222 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1223 Res = CurDAG->getMachineNode(
1224 Opcode: Opc, dl: DL, VT, Op1: Imm,
1225 Op2: CurDAG->getTargetConstant(Val: RISCVFPRndMode::RNE, DL, VT: XLenVT));
1226 else
1227 Res = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: Imm);
1228
1229 // For f64 -0.0, we need to insert a fneg.d idiom.
1230 if (NegZeroF64) {
1231 Opc = RISCV::FSGNJN_D;
1232 if (HasZdinx)
1233 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1234 Res =
1235 CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: SDValue(Res, 0), Op2: SDValue(Res, 0));
1236 }
1237
1238 ReplaceNode(F: Node, T: Res);
1239 return;
1240 }
1241 case RISCVISD::BuildGPRPair:
1242 case RISCVISD::BuildPairF64:
1243 case RISCVISD::BuildPairGPRVec: {
1244 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1245 break;
1246
1247 assert((!Subtarget->is64Bit() || Opcode != RISCVISD::BuildPairF64) &&
1248 "BuildPairF64 only handled here on rv32i_zdinx");
1249
1250 SDValue N =
1251 buildGPRPair(CurDAG, DL, VT, Lo: Node->getOperand(Num: 0), Hi: Node->getOperand(Num: 1));
1252 ReplaceNode(F: Node, T: N.getNode());
1253 return;
1254 }
1255 case RISCVISD::SplitGPRPair:
1256 case RISCVISD::SplitF64:
1257 case RISCVISD::SplitGPRVec: {
1258 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1259 assert((!Subtarget->is64Bit() || Opcode != RISCVISD::SplitF64) &&
1260 "SplitF64 only handled here on rv32i_zdinx");
1261
1262 if (!SDValue(Node, 0).use_empty()) {
1263 SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_even, DL,
1264 VT: Node->getValueType(ResNo: 0),
1265 Operand: Node->getOperand(Num: 0));
1266 ReplaceUses(F: SDValue(Node, 0), T: Lo);
1267 }
1268
1269 if (!SDValue(Node, 1).use_empty()) {
1270 SDValue Hi = CurDAG->getTargetExtractSubreg(
1271 SRIdx: RISCV::sub_gpr_odd, DL, VT: Node->getValueType(ResNo: 1), Operand: Node->getOperand(Num: 0));
1272 ReplaceUses(F: SDValue(Node, 1), T: Hi);
1273 }
1274
1275 CurDAG->RemoveDeadNode(N: Node);
1276 return;
1277 }
1278
1279 if (!Subtarget->hasStdExtZfa())
1280 break;
1281 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1282 "Unexpected subtarget");
1283
1284 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1285 if (!SDValue(Node, 0).use_empty()) {
1286 SDNode *Lo = CurDAG->getMachineNode(Opcode: RISCV::FMV_X_W_FPR64, dl: DL, VT,
1287 Op1: Node->getOperand(Num: 0));
1288 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Lo, 0));
1289 }
1290 if (!SDValue(Node, 1).use_empty()) {
1291 SDNode *Hi = CurDAG->getMachineNode(Opcode: RISCV::FMVH_X_D, dl: DL, VT,
1292 Op1: Node->getOperand(Num: 0));
1293 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Hi, 0));
1294 }
1295
1296 CurDAG->RemoveDeadNode(N: Node);
1297 return;
1298 }
1299 case ISD::SHL: {
1300 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1301 if (!N1C)
1302 break;
1303 SDValue N0 = Node->getOperand(Num: 0);
1304 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1305 !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)))
1306 break;
1307 unsigned ShAmt = N1C->getZExtValue();
1308 uint64_t Mask = N0.getConstantOperandVal(i: 1);
1309
1310 if (isShiftedMask_64(Value: Mask)) {
1311 unsigned XLen = Subtarget->getXLen();
1312 unsigned LeadingZeros = XLen - llvm::bit_width(Value: Mask);
1313 unsigned TrailingZeros = llvm::countr_zero(Val: Mask);
1314 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1315 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1316 // where C2 has 32 leading zeros and C3 trailing zeros.
1317 SDNode *SRLIW = CurDAG->getMachineNode(
1318 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
1319 Op2: CurDAG->getTargetConstant(Val: TrailingZeros, DL, VT));
1320 SDNode *SLLI = CurDAG->getMachineNode(
1321 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1322 Op2: CurDAG->getTargetConstant(Val: TrailingZeros + ShAmt, DL, VT));
1323 ReplaceNode(F: Node, T: SLLI);
1324 return;
1325 }
1326 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1327 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1328 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1329 // where C2 has C4 leading zeros and no trailing zeros.
1330 // This is profitable if the "and" was to be lowered to
1331 // (srli (slli X, C4), C4) and not (andi X, C2).
1332 // For "LeadingZeros == 32":
1333 // - with Zba it's just (slli.uw X, C)
1334 // - without Zba a tablegen pattern applies the very same
1335 // transform as we would have done here
1336 SDNode *SLLI = CurDAG->getMachineNode(
1337 Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1338 Op2: CurDAG->getTargetConstant(Val: LeadingZeros, DL, VT));
1339 SDNode *SRLI = CurDAG->getMachineNode(
1340 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1341 Op2: CurDAG->getTargetConstant(Val: LeadingZeros - ShAmt, DL, VT));
1342 ReplaceNode(F: Node, T: SRLI);
1343 return;
1344 }
1345 }
1346 break;
1347 }
1348 case ISD::SRL: {
1349 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1350 if (!N1C)
1351 break;
1352 SDValue N0 = Node->getOperand(Num: 0);
1353 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)))
1354 break;
1355 unsigned ShAmt = N1C->getZExtValue();
1356 uint64_t Mask = N0.getConstantOperandVal(i: 1);
1357
1358 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1359 // 32 leading zeros and C3 trailing zeros.
1360 if (isShiftedMask_64(Value: Mask) && N0.hasOneUse()) {
1361 unsigned XLen = Subtarget->getXLen();
1362 unsigned LeadingZeros = XLen - llvm::bit_width(Value: Mask);
1363 unsigned TrailingZeros = llvm::countr_zero(Val: Mask);
1364 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1365 SDNode *SRLIW = CurDAG->getMachineNode(
1366 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
1367 Op2: CurDAG->getTargetConstant(Val: TrailingZeros, DL, VT));
1368 SDNode *SLLI = CurDAG->getMachineNode(
1369 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1370 Op2: CurDAG->getTargetConstant(Val: TrailingZeros - ShAmt, DL, VT));
1371 ReplaceNode(F: Node, T: SLLI);
1372 return;
1373 }
1374 }
1375
1376 // Optimize (srl (and X, C2), C) ->
1377 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1378 // Where C2 is a mask with C3 trailing ones.
1379 // Taking into account that the C2 may have had lower bits unset by
1380 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1381 // This pattern occurs when type legalizing right shifts for types with
1382 // less than XLen bits.
1383 Mask |= maskTrailingOnes<uint64_t>(N: ShAmt);
1384 if (!isMask_64(Value: Mask))
1385 break;
1386 unsigned TrailingOnes = llvm::countr_one(Value: Mask);
1387 if (ShAmt >= TrailingOnes)
1388 break;
1389 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1390 if (TrailingOnes == 32) {
1391 SDNode *SRLI = CurDAG->getMachineNode(
1392 Opcode: Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, dl: DL, VT,
1393 Op1: N0.getOperand(i: 0), Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT));
1394 ReplaceNode(F: Node, T: SRLI);
1395 return;
1396 }
1397
1398 // Only do the remaining transforms if the AND has one use.
1399 if (!N0.hasOneUse())
1400 break;
1401
1402 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1403 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1404 SDNode *BEXTI = CurDAG->getMachineNode(
1405 Opcode: Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, dl: DL, VT,
1406 Op1: N0.getOperand(i: 0), Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT));
1407 ReplaceNode(F: Node, T: BEXTI);
1408 return;
1409 }
1410
1411 const unsigned Msb = TrailingOnes - 1;
1412 const unsigned Lsb = ShAmt;
1413 if (tryUnsignedBitfieldExtract(Node, DL, VT, X: N0.getOperand(i: 0), Msb, Lsb))
1414 return;
1415
1416 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1417 SDNode *SLLI =
1418 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1419 Op2: CurDAG->getTargetConstant(Val: LShAmt, DL, VT));
1420 SDNode *SRLI = CurDAG->getMachineNode(
1421 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1422 Op2: CurDAG->getTargetConstant(Val: LShAmt + ShAmt, DL, VT));
1423 ReplaceNode(F: Node, T: SRLI);
1424 return;
1425 }
1426 case ISD::SRA: {
1427 if (trySignedBitfieldExtract(Node))
1428 return;
1429
1430 if (trySignedBitfieldInsertInSign(Node))
1431 return;
1432
1433 // Optimize (sra (sext_inreg X, i16), C) ->
1434 // (srai (slli X, (XLen-16), (XLen-16) + C)
1435 // And (sra (sext_inreg X, i8), C) ->
1436 // (srai (slli X, (XLen-8), (XLen-8) + C)
1437 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1438 // This transform matches the code we get without Zbb. The shifts are more
1439 // compressible, and this can help expose CSE opportunities in the sdiv by
1440 // constant optimization.
1441 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1442 if (!N1C)
1443 break;
1444 SDValue N0 = Node->getOperand(Num: 0);
1445 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1446 break;
1447 unsigned ShAmt = N1C->getZExtValue();
1448 unsigned ExtSize =
1449 cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT().getSizeInBits();
1450 // ExtSize of 32 should use sraiw via tablegen pattern.
1451 if (ExtSize >= 32 || ShAmt >= ExtSize)
1452 break;
1453 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1454 SDNode *SLLI =
1455 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1456 Op2: CurDAG->getTargetConstant(Val: LShAmt, DL, VT));
1457 SDNode *SRAI = CurDAG->getMachineNode(
1458 Opcode: RISCV::SRAI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1459 Op2: CurDAG->getTargetConstant(Val: LShAmt + ShAmt, DL, VT));
1460 ReplaceNode(F: Node, T: SRAI);
1461 return;
1462 }
1463 case ISD::SIGN_EXTEND_INREG: {
1464 // Optimize (sext_inreg (srl X, C), i8/i16) ->
1465 // (srai (slli X, XLen-ExtSize-C), XLen-ExtSize)
1466 // This is a bitfield extract pattern where we're extracting a signed
1467 // 8-bit or 16-bit field from position C.
1468 SDValue N0 = Node->getOperand(Num: 0);
1469 if (N0.getOpcode() != ISD::SRL || !N0.hasOneUse())
1470 break;
1471
1472 auto *ShAmtC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
1473 if (!ShAmtC)
1474 break;
1475
1476 unsigned ExtSize =
1477 cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT().getSizeInBits();
1478 unsigned ShAmt = ShAmtC->getZExtValue();
1479 unsigned XLen = Subtarget->getXLen();
1480
1481 // Only handle types less than 32, and make sure the shift amount is valid.
1482 if (ExtSize >= 32 || ShAmt >= XLen - ExtSize)
1483 break;
1484
1485 unsigned LShAmt = XLen - ExtSize - ShAmt;
1486 SDNode *SLLI =
1487 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1488 Op2: CurDAG->getTargetConstant(Val: LShAmt, DL, VT));
1489 SDNode *SRAI = CurDAG->getMachineNode(
1490 Opcode: RISCV::SRAI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1491 Op2: CurDAG->getTargetConstant(Val: XLen - ExtSize, DL, VT));
1492 ReplaceNode(F: Node, T: SRAI);
1493 return;
1494 }
1495 case ISD::OR: {
1496 if (tryShrinkShlLogicImm(Node))
1497 return;
1498
1499 break;
1500 }
1501 case ISD::XOR:
1502 if (tryShrinkShlLogicImm(Node))
1503 return;
1504
1505 break;
1506 case ISD::AND: {
1507 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1508 if (!N1C)
1509 break;
1510
1511 SDValue N0 = Node->getOperand(Num: 0);
1512
1513 bool LeftShift = N0.getOpcode() == ISD::SHL;
1514 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1515 auto *C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
1516 if (!C)
1517 break;
1518 unsigned C2 = C->getZExtValue();
1519 unsigned XLen = Subtarget->getXLen();
1520 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1521
1522 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1523 // shift pair might offer more compression opportunities.
1524 // TODO: We could check for C extension here, but we don't have many lit
1525 // tests with the C extension enabled so not checking gets better
1526 // coverage.
1527 // TODO: What if ANDI faster than shift?
1528 bool IsCANDI = isInt<6>(x: N1C->getSExtValue());
1529
1530 uint64_t C1 = N1C->getZExtValue();
1531
1532 // Clear irrelevant bits in the mask.
1533 if (LeftShift)
1534 C1 &= maskTrailingZeros<uint64_t>(N: C2);
1535 else
1536 C1 &= maskTrailingOnes<uint64_t>(N: XLen - C2);
1537
1538 // Some transforms should only be done if the shift has a single use or
1539 // the AND would become (srli (slli X, 32), 32)
1540 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1541
1542 SDValue X = N0.getOperand(i: 0);
1543
1544 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1545 // with c3 leading zeros.
1546 if (!LeftShift && isMask_64(Value: C1)) {
1547 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1548 if (C2 < Leading) {
1549 // If the number of leading zeros is C2+32 this can be SRLIW.
1550 if (C2 + 32 == Leading) {
1551 SDNode *SRLIW = CurDAG->getMachineNode(
1552 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X, Op2: CurDAG->getTargetConstant(Val: C2, DL, VT));
1553 ReplaceNode(F: Node, T: SRLIW);
1554 return;
1555 }
1556
1557 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1558 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1559 //
1560 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1561 // legalized and goes through DAG combine.
1562 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1563 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1564 cast<VTSDNode>(Val: X.getOperand(i: 1))->getVT() == MVT::i32) {
1565 SDNode *SRAIW =
1566 CurDAG->getMachineNode(Opcode: RISCV::SRAIW, dl: DL, VT, Op1: X.getOperand(i: 0),
1567 Op2: CurDAG->getTargetConstant(Val: 31, DL, VT));
1568 SDNode *SRLIW = CurDAG->getMachineNode(
1569 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: SDValue(SRAIW, 0),
1570 Op2: CurDAG->getTargetConstant(Val: Leading - 32, DL, VT));
1571 ReplaceNode(F: Node, T: SRLIW);
1572 return;
1573 }
1574
1575 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1576 // available.
1577 // Transform (and (srl x, C2), C1)
1578 // -> (<bfextract> x, msb, lsb)
1579 //
1580 // Make sure to keep this below the SRLIW cases, as we always want to
1581 // prefer the more common instruction.
1582 const unsigned Msb = llvm::bit_width(Value: C1) + C2 - 1;
1583 const unsigned Lsb = C2;
1584 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1585 return;
1586
1587 // (srli (slli x, c3-c2), c3).
1588 // Skip if we could use (zext.w (sraiw X, C2)).
1589 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1590 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1591 cast<VTSDNode>(Val: X.getOperand(i: 1))->getVT() == MVT::i32;
1592 // Also Skip if we can use bexti or th.tst.
1593 Skip |= HasBitTest && Leading == XLen - 1;
1594 if (OneUseOrZExtW && !Skip) {
1595 SDNode *SLLI = CurDAG->getMachineNode(
1596 Opcode: RISCV::SLLI, dl: DL, VT, Op1: X,
1597 Op2: CurDAG->getTargetConstant(Val: Leading - C2, DL, VT));
1598 SDNode *SRLI = CurDAG->getMachineNode(
1599 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1600 Op2: CurDAG->getTargetConstant(Val: Leading, DL, VT));
1601 ReplaceNode(F: Node, T: SRLI);
1602 return;
1603 }
1604 }
1605 }
1606
1607 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1608 // shifted by c2 bits with c3 leading zeros.
1609 if (LeftShift && isShiftedMask_64(Value: C1)) {
1610 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1611
1612 if (C2 + Leading < XLen &&
1613 C1 == (maskTrailingOnes<uint64_t>(N: XLen - (C2 + Leading)) << C2)) {
1614 // Use slli.uw when possible.
1615 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1616 SDNode *SLLI_UW =
1617 CurDAG->getMachineNode(Opcode: RISCV::SLLI_UW, dl: DL, VT, Op1: X,
1618 Op2: CurDAG->getTargetConstant(Val: C2, DL, VT));
1619 ReplaceNode(F: Node, T: SLLI_UW);
1620 return;
1621 }
1622
1623 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1624 // available.
1625 // Transform (and (shl x, c2), c1)
1626 // -> (<bfinsert> x, msb, lsb)
1627 // e.g.
1628 // (and (shl x, 12), 0x00fff000)
1629 // If XLen = 32 and C2 = 12, then
1630 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1631 const unsigned Msb = XLen - Leading - 1;
1632 const unsigned Lsb = C2;
1633 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1634 return;
1635
1636 if (OneUseOrZExtW && !IsCANDI) {
1637 // (packh x0, X)
1638 if (Subtarget->hasStdExtZbkb() && C1 == 0xff00 && C2 == 8) {
1639 SDNode *PACKH = CurDAG->getMachineNode(
1640 Opcode: RISCV::PACKH, dl: DL, VT,
1641 Op1: CurDAG->getRegister(Reg: RISCV::X0, VT: Subtarget->getXLenVT()), Op2: X);
1642 ReplaceNode(F: Node, T: PACKH);
1643 return;
1644 }
1645 // (srli (slli c2+c3), c3)
1646 SDNode *SLLI = CurDAG->getMachineNode(
1647 Opcode: RISCV::SLLI, dl: DL, VT, Op1: X,
1648 Op2: CurDAG->getTargetConstant(Val: C2 + Leading, DL, VT));
1649 SDNode *SRLI = CurDAG->getMachineNode(
1650 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1651 Op2: CurDAG->getTargetConstant(Val: Leading, DL, VT));
1652 ReplaceNode(F: Node, T: SRLI);
1653 return;
1654 }
1655 }
1656 }
1657
1658 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1659 // shifted mask with c2 leading zeros and c3 trailing zeros.
1660 if (!LeftShift && isShiftedMask_64(Value: C1)) {
1661 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1662 unsigned Trailing = llvm::countr_zero(Val: C1);
1663 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1664 !IsCANDI) {
1665 unsigned SrliOpc = RISCV::SRLI;
1666 // If the input is zexti32 we should use SRLIW.
1667 if (X.getOpcode() == ISD::AND &&
1668 isa<ConstantSDNode>(Val: X.getOperand(i: 1)) &&
1669 X.getConstantOperandVal(i: 1) == UINT64_C(0xFFFFFFFF)) {
1670 SrliOpc = RISCV::SRLIW;
1671 X = X.getOperand(i: 0);
1672 }
1673 SDNode *SRLI = CurDAG->getMachineNode(
1674 Opcode: SrliOpc, dl: DL, VT, Op1: X,
1675 Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT));
1676 SDNode *SLLI = CurDAG->getMachineNode(
1677 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLI, 0),
1678 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1679 ReplaceNode(F: Node, T: SLLI);
1680 return;
1681 }
1682 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1683 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1684 OneUseOrZExtW && !IsCANDI) {
1685 SDNode *SRLIW = CurDAG->getMachineNode(
1686 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X,
1687 Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT));
1688 SDNode *SLLI = CurDAG->getMachineNode(
1689 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1690 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1691 ReplaceNode(F: Node, T: SLLI);
1692 return;
1693 }
1694 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1695 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1696 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1697 SDNode *SRLI = CurDAG->getMachineNode(
1698 Opcode: RISCV::SRLI, dl: DL, VT, Op1: X,
1699 Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT));
1700 SDNode *SLLI_UW = CurDAG->getMachineNode(
1701 Opcode: RISCV::SLLI_UW, dl: DL, VT, Op1: SDValue(SRLI, 0),
1702 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1703 ReplaceNode(F: Node, T: SLLI_UW);
1704 return;
1705 }
1706 }
1707
1708 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1709 // shifted mask with no leading zeros and c3 trailing zeros.
1710 if (LeftShift && isShiftedMask_64(Value: C1)) {
1711 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1712 unsigned Trailing = llvm::countr_zero(Val: C1);
1713 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1714 SDNode *SRLI = CurDAG->getMachineNode(
1715 Opcode: RISCV::SRLI, dl: DL, VT, Op1: X,
1716 Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT));
1717 SDNode *SLLI = CurDAG->getMachineNode(
1718 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLI, 0),
1719 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1720 ReplaceNode(F: Node, T: SLLI);
1721 return;
1722 }
1723 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1724 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1725 SDNode *SRLIW = CurDAG->getMachineNode(
1726 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X,
1727 Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT));
1728 SDNode *SLLI = CurDAG->getMachineNode(
1729 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1730 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1731 ReplaceNode(F: Node, T: SLLI);
1732 return;
1733 }
1734
1735 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1736 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1737 Subtarget->hasStdExtZba()) {
1738 SDNode *SRLI = CurDAG->getMachineNode(
1739 Opcode: RISCV::SRLI, dl: DL, VT, Op1: X,
1740 Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT));
1741 SDNode *SLLI_UW = CurDAG->getMachineNode(
1742 Opcode: RISCV::SLLI_UW, dl: DL, VT, Op1: SDValue(SRLI, 0),
1743 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1744 ReplaceNode(F: Node, T: SLLI_UW);
1745 return;
1746 }
1747 }
1748 }
1749
1750 const uint64_t C1 = N1C->getZExtValue();
1751
1752 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
1753 N0.hasOneUse()) {
1754 unsigned C2 = N0.getConstantOperandVal(i: 1);
1755 unsigned XLen = Subtarget->getXLen();
1756 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1757
1758 SDValue X = N0.getOperand(i: 0);
1759
1760 // Prefer SRAIW + ANDI when possible.
1761 bool Skip = C2 > 32 && isInt<12>(x: N1C->getSExtValue()) &&
1762 X.getOpcode() == ISD::SHL &&
1763 isa<ConstantSDNode>(Val: X.getOperand(i: 1)) &&
1764 X.getConstantOperandVal(i: 1) == 32;
1765 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1766 // mask with c3 leading zeros and c2 is larger than c3.
1767 if (isMask_64(Value: C1) && !Skip) {
1768 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1769 if (C2 > Leading) {
1770 SDNode *SRAI = CurDAG->getMachineNode(
1771 Opcode: RISCV::SRAI, dl: DL, VT, Op1: X,
1772 Op2: CurDAG->getTargetConstant(Val: C2 - Leading, DL, VT));
1773 SDNode *SRLI = CurDAG->getMachineNode(
1774 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SRAI, 0),
1775 Op2: CurDAG->getTargetConstant(Val: Leading, DL, VT));
1776 ReplaceNode(F: Node, T: SRLI);
1777 return;
1778 }
1779 }
1780
1781 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1782 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1783 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1784 if (isShiftedMask_64(Value: C1) && !Skip) {
1785 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1786 unsigned Trailing = llvm::countr_zero(Val: C1);
1787 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1788 SDNode *SRAI = CurDAG->getMachineNode(
1789 Opcode: RISCV::SRAI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1790 Op2: CurDAG->getTargetConstant(Val: C2 - Leading, DL, VT));
1791 SDNode *SRLI = CurDAG->getMachineNode(
1792 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SRAI, 0),
1793 Op2: CurDAG->getTargetConstant(Val: Leading + Trailing, DL, VT));
1794 SDNode *SLLI = CurDAG->getMachineNode(
1795 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLI, 0),
1796 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1797 ReplaceNode(F: Node, T: SLLI);
1798 return;
1799 }
1800 }
1801 }
1802
1803 // If C1 masks off the upper bits only (but can't be formed as an
1804 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1805 // available.
1806 // Transform (and x, C1)
1807 // -> (<bfextract> x, msb, lsb)
1808 if (isMask_64(Value: C1) && !isInt<12>(x: N1C->getSExtValue()) &&
1809 !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
1810 !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
1811 const unsigned Msb = llvm::bit_width(Value: C1) - 1;
1812 if (tryUnsignedBitfieldExtract(Node, DL, VT, X: N0, Msb, Lsb: 0))
1813 return;
1814 }
1815
1816 if (tryShrinkShlLogicImm(Node))
1817 return;
1818
1819 break;
1820 }
1821 case ISD::MUL: {
1822 // Special case for calculating (mul (and X, C2), C1) where the full product
1823 // fits in XLen bits. We can shift X left by the number of leading zeros in
1824 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1825 // product has XLen trailing zeros, putting it in the output of MULHU. This
1826 // can avoid materializing a constant in a register for C2.
1827
1828 // RHS should be a constant.
1829 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1830 if (!N1C || !N1C->hasOneUse())
1831 break;
1832
1833 // LHS should be an AND with constant.
1834 SDValue N0 = Node->getOperand(Num: 0);
1835 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)))
1836 break;
1837
1838 uint64_t C2 = N0.getConstantOperandVal(i: 1);
1839
1840 // Constant should be a mask.
1841 if (!isMask_64(Value: C2))
1842 break;
1843
1844 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1845 // multiple users or the constant is a simm12. This prevents inserting a
1846 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1847 // make it more costly to materialize. Otherwise, using a SLLI might allow
1848 // it to be compressed.
1849 bool IsANDIOrZExt =
1850 isInt<12>(x: C2) ||
1851 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1852 // With XTHeadBb, we can use TH.EXTU.
1853 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1854 if (IsANDIOrZExt && (isInt<12>(x: N1C->getSExtValue()) || !N0.hasOneUse()))
1855 break;
1856 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1857 // the constant is a simm32.
1858 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1859 // With XTHeadBb, we can use TH.EXTU.
1860 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1861 if (IsZExtW && (isInt<32>(x: N1C->getSExtValue()) || !N0.hasOneUse()))
1862 break;
1863
1864 // We need to shift left the AND input and C1 by a total of XLen bits.
1865
1866 // How far left do we need to shift the AND input?
1867 unsigned XLen = Subtarget->getXLen();
1868 unsigned LeadingZeros = XLen - llvm::bit_width(Value: C2);
1869
1870 // The constant gets shifted by the remaining amount unless that would
1871 // shift bits out.
1872 uint64_t C1 = N1C->getZExtValue();
1873 unsigned ConstantShift = XLen - LeadingZeros;
1874 if (ConstantShift > (XLen - llvm::bit_width(Value: C1)))
1875 break;
1876
1877 uint64_t ShiftedC1 = C1 << ConstantShift;
1878 // If this RV32, we need to sign extend the constant.
1879 if (XLen == 32)
1880 ShiftedC1 = SignExtend64<32>(x: ShiftedC1);
1881
1882 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1883 SDNode *Imm = selectImm(CurDAG, DL, VT, Imm: ShiftedC1, Subtarget: *Subtarget).getNode();
1884 SDNode *SLLI =
1885 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1886 Op2: CurDAG->getTargetConstant(Val: LeadingZeros, DL, VT));
1887 SDNode *MULHU = CurDAG->getMachineNode(Opcode: RISCV::MULHU, dl: DL, VT,
1888 Op1: SDValue(SLLI, 0), Op2: SDValue(Imm, 0));
1889 ReplaceNode(F: Node, T: MULHU);
1890 return;
1891 }
1892 case ISD::SMUL_LOHI:
1893 case ISD::UMUL_LOHI:
1894 case RISCVISD::WMULSU:
1895 case RISCVISD::WADDU:
1896 case RISCVISD::WSUBU: {
1897 assert(Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::i32 &&
1898 "Unexpected opcode");
1899
1900 unsigned Opc;
1901 switch (Node->getOpcode()) {
1902 default:
1903 llvm_unreachable("Unexpected opcode");
1904 case ISD::SMUL_LOHI:
1905 Opc = RISCV::WMUL;
1906 break;
1907 case ISD::UMUL_LOHI:
1908 Opc = RISCV::WMULU;
1909 break;
1910 case RISCVISD::WMULSU:
1911 Opc = RISCV::WMULSU;
1912 break;
1913 case RISCVISD::WADDU:
1914 Opc = RISCV::WADDU;
1915 break;
1916 case RISCVISD::WSUBU:
1917 Opc = RISCV::WSUBU;
1918 break;
1919 }
1920
1921 SDNode *Result = CurDAG->getMachineNode(
1922 Opcode: Opc, dl: DL, VT: MVT::Untyped, Op1: Node->getOperand(Num: 0), Op2: Node->getOperand(Num: 1));
1923
1924 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, Pair: SDValue(Result, 0));
1925 ReplaceUses(F: SDValue(Node, 0), T: Lo);
1926 ReplaceUses(F: SDValue(Node, 1), T: Hi);
1927 CurDAG->RemoveDeadNode(N: Node);
1928 return;
1929 }
1930 case RISCVISD::WSLL:
1931 case RISCVISD::WSLA: {
1932 // Custom select WSLL/WSLA for RV32P.
1933 assert(Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::i32 &&
1934 "Unexpected opcode");
1935
1936 bool IsSigned = Node->getOpcode() == RISCVISD::WSLA;
1937
1938 SDValue ShAmt = Node->getOperand(Num: 1);
1939
1940 unsigned Opc;
1941
1942 auto *ShAmtC = dyn_cast<ConstantSDNode>(Val&: ShAmt);
1943 if (ShAmtC && ShAmtC->getZExtValue() < 64) {
1944 Opc = IsSigned ? RISCV::WSLAI : RISCV::WSLLI;
1945 ShAmt = CurDAG->getTargetConstant(Val: ShAmtC->getZExtValue(), DL, VT: XLenVT);
1946 } else {
1947 Opc = IsSigned ? RISCV::WSLA : RISCV::WSLL;
1948 }
1949
1950 SDNode *WShift = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped,
1951 Op1: Node->getOperand(Num: 0), Op2: ShAmt);
1952
1953 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, Pair: SDValue(WShift, 0));
1954 ReplaceUses(F: SDValue(Node, 0), T: Lo);
1955 ReplaceUses(F: SDValue(Node, 1), T: Hi);
1956 CurDAG->RemoveDeadNode(N: Node);
1957 return;
1958 }
1959 case ISD::LOAD: {
1960 if (tryIndexedLoad(Node))
1961 return;
1962
1963 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1964 // We match post-incrementing load here
1965 LoadSDNode *Load = cast<LoadSDNode>(Val: Node);
1966 if (Load->getAddressingMode() != ISD::POST_INC)
1967 break;
1968
1969 SDValue Chain = Node->getOperand(Num: 0);
1970 SDValue Base = Node->getOperand(Num: 1);
1971 SDValue Offset = Node->getOperand(Num: 2);
1972
1973 bool Simm12 = false;
1974 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1975
1976 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Val&: Offset)) {
1977 int ConstantVal = ConstantOffset->getSExtValue();
1978 Simm12 = isInt<12>(x: ConstantVal);
1979 if (Simm12)
1980 Offset = CurDAG->getSignedTargetConstant(Val: ConstantVal, DL: SDLoc(Offset),
1981 VT: Offset.getValueType());
1982 }
1983
1984 unsigned Opcode = 0;
1985 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1986 case MVT::i8:
1987 if (Simm12 && SignExtend)
1988 Opcode = RISCV::CV_LB_ri_inc;
1989 else if (Simm12 && !SignExtend)
1990 Opcode = RISCV::CV_LBU_ri_inc;
1991 else if (!Simm12 && SignExtend)
1992 Opcode = RISCV::CV_LB_rr_inc;
1993 else
1994 Opcode = RISCV::CV_LBU_rr_inc;
1995 break;
1996 case MVT::i16:
1997 if (Simm12 && SignExtend)
1998 Opcode = RISCV::CV_LH_ri_inc;
1999 else if (Simm12 && !SignExtend)
2000 Opcode = RISCV::CV_LHU_ri_inc;
2001 else if (!Simm12 && SignExtend)
2002 Opcode = RISCV::CV_LH_rr_inc;
2003 else
2004 Opcode = RISCV::CV_LHU_rr_inc;
2005 break;
2006 case MVT::i32:
2007 if (Simm12)
2008 Opcode = RISCV::CV_LW_ri_inc;
2009 else
2010 Opcode = RISCV::CV_LW_rr_inc;
2011 break;
2012 default:
2013 break;
2014 }
2015 if (!Opcode)
2016 break;
2017
2018 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode, dl: DL, VT1: XLenVT, VT2: XLenVT,
2019 VT3: Chain.getSimpleValueType(), Op1: Base,
2020 Op2: Offset, Op3: Chain));
2021 return;
2022 }
2023 break;
2024 }
2025 case RISCVISD::LD_RV32: {
2026 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
2027
2028 SDValue Base, Offset;
2029 SDValue Chain = Node->getOperand(Num: 0);
2030 SDValue Addr = Node->getOperand(Num: 1);
2031 SelectAddrRegImm(Addr, Base, Offset);
2032
2033 SDValue Ops[] = {Base, Offset, Chain};
2034 MachineSDNode *New = CurDAG->getMachineNode(
2035 Opcode: RISCV::LD_RV32, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2036 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, Pair: SDValue(New, 0));
2037 CurDAG->setNodeMemRefs(N: New, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2038 ReplaceUses(F: SDValue(Node, 0), T: Lo);
2039 ReplaceUses(F: SDValue(Node, 1), T: Hi);
2040 ReplaceUses(F: SDValue(Node, 2), T: SDValue(New, 1));
2041 CurDAG->RemoveDeadNode(N: Node);
2042 return;
2043 }
2044 case RISCVISD::SD_RV32: {
2045 SDValue Base, Offset;
2046 SDValue Chain = Node->getOperand(Num: 0);
2047 SDValue Addr = Node->getOperand(Num: 3);
2048 SelectAddrRegImm(Addr, Base, Offset);
2049
2050 SDValue Lo = Node->getOperand(Num: 1);
2051 SDValue Hi = Node->getOperand(Num: 2);
2052
2053 SDValue RegPair;
2054 // Peephole to use X0_Pair for storing zero.
2055 if (isNullConstant(V: Lo) && isNullConstant(V: Hi)) {
2056 RegPair = CurDAG->getRegister(Reg: RISCV::X0_Pair, VT: MVT::Untyped);
2057 } else {
2058 RegPair = buildGPRPair(CurDAG, DL, VT: MVT::Untyped, Lo, Hi);
2059 }
2060
2061 MachineSDNode *New = CurDAG->getMachineNode(Opcode: RISCV::SD_RV32, dl: DL, VT: MVT::Other,
2062 Ops: {RegPair, Base, Offset, Chain});
2063 CurDAG->setNodeMemRefs(N: New, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2064 ReplaceUses(F: SDValue(Node, 0), T: SDValue(New, 0));
2065 CurDAG->RemoveDeadNode(N: Node);
2066 return;
2067 }
2068 case RISCVISD::ADDD:
2069 // Try to match WMACC pattern: ADDD where one operand pair comes from a
2070 // widening multiply.
2071 if (tryWideningMulAcc(Node, DL))
2072 return;
2073
2074 // Fall through to regular ADDD selection.
2075 [[fallthrough]];
2076 case RISCVISD::SUBD:
2077 case RISCVISD::PPAIRE_DB:
2078 case RISCVISD::WADDAU:
2079 case RISCVISD::WSUBAU:
2080 case RISCVISD::WADDA:
2081 case RISCVISD::WSUBA: {
2082 assert(!Subtarget->is64Bit() && "Unexpected opcode");
2083 assert(
2084 (Node->getOpcode() != RISCVISD::PPAIRE_DB || Subtarget->hasStdExtP()) &&
2085 "Unexpected opcode");
2086
2087 SDValue Op0Lo = Node->getOperand(Num: 0);
2088 SDValue Op0Hi = Node->getOperand(Num: 1);
2089
2090 SDValue Op0;
2091 if (isNullConstant(V: Op0Lo) && isNullConstant(V: Op0Hi)) {
2092 Op0 = CurDAG->getRegister(Reg: RISCV::X0_Pair, VT: MVT::Untyped);
2093 } else {
2094 Op0 = buildGPRPair(CurDAG, DL, VT: MVT::Untyped, Lo: Op0Lo, Hi: Op0Hi);
2095 }
2096
2097 SDValue Op1Lo = Node->getOperand(Num: 2);
2098 SDValue Op1Hi = Node->getOperand(Num: 3);
2099
2100 MachineSDNode *New;
2101 if (Opcode == RISCVISD::WADDAU || Opcode == RISCVISD::WSUBAU ||
2102 Opcode == RISCVISD::WADDA || Opcode == RISCVISD::WSUBA) {
2103 // Widening accumulate: Op0 is the accumulator (GPRPair), Op1Lo and Op1Hi
2104 // are the two 32-bit values.
2105 unsigned Opc;
2106 switch (Opcode) {
2107 default:
2108 llvm_unreachable("Unexpected opcode");
2109 case RISCVISD::WADDAU:
2110 Opc = RISCV::WADDAU;
2111 break;
2112 case RISCVISD::WSUBAU:
2113 Opc = RISCV::WSUBAU;
2114 break;
2115 case RISCVISD::WADDA:
2116 Opc = RISCV::WADDA;
2117 break;
2118 case RISCVISD::WSUBA:
2119 Opc = RISCV::WSUBA;
2120 break;
2121 }
2122 New = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Op1: Op0, Op2: Op1Lo, Op3: Op1Hi);
2123 } else {
2124 SDValue Op1 = buildGPRPair(CurDAG, DL, VT: MVT::Untyped, Lo: Op1Lo, Hi: Op1Hi);
2125
2126 unsigned Opc;
2127 switch (Opcode) {
2128 default:
2129 llvm_unreachable("Unexpected opcode");
2130 case RISCVISD::ADDD:
2131 Opc = RISCV::ADDD;
2132 break;
2133 case RISCVISD::SUBD:
2134 Opc = RISCV::SUBD;
2135 break;
2136 case RISCVISD::PPAIRE_DB:
2137 Opc = RISCV::PPAIRE_DB;
2138 break;
2139 }
2140 New = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Op1: Op0, Op2: Op1);
2141 }
2142
2143 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, Pair: SDValue(New, 0));
2144 ReplaceUses(F: SDValue(Node, 0), T: Lo);
2145 ReplaceUses(F: SDValue(Node, 1), T: Hi);
2146 CurDAG->RemoveDeadNode(N: Node);
2147 return;
2148 }
2149 case ISD::INTRINSIC_WO_CHAIN: {
2150 unsigned IntNo = Node->getConstantOperandVal(Num: 0);
2151 switch (IntNo) {
2152 // By default we do not custom select any intrinsic.
2153 default:
2154 break;
2155 case Intrinsic::riscv_vmsgeu:
2156 case Intrinsic::riscv_vmsge: {
2157 SDValue Src1 = Node->getOperand(Num: 1);
2158 SDValue Src2 = Node->getOperand(Num: 2);
2159 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
2160 bool IsCmpConstant = false;
2161 bool IsCmpMinimum = false;
2162 // Only custom select scalar second operand.
2163 if (Src2.getValueType() != XLenVT)
2164 break;
2165 // Small constants are handled with patterns.
2166 int64_t CVal = 0;
2167 MVT Src1VT = Src1.getSimpleValueType();
2168 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Src2)) {
2169 IsCmpConstant = true;
2170 CVal = C->getSExtValue();
2171 if (CVal >= -15 && CVal <= 16) {
2172 if (!IsUnsigned || CVal != 0)
2173 break;
2174 IsCmpMinimum = true;
2175 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2176 numBits: Src1VT.getScalarSizeInBits())
2177 .getSExtValue()) {
2178 IsCmpMinimum = true;
2179 }
2180 }
2181 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
2182 switch (RISCVTargetLowering::getLMUL(VT: Src1VT)) {
2183 default:
2184 llvm_unreachable("Unexpected LMUL!");
2185#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2186 case RISCVVType::lmulenum: \
2187 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2188 : RISCV::PseudoVMSLT_VX_##suffix; \
2189 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
2190 : RISCV::PseudoVMSGT_VX_##suffix; \
2191 break;
2192 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2193 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2194 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2195 CASE_VMSLT_OPCODES(LMUL_1, M1)
2196 CASE_VMSLT_OPCODES(LMUL_2, M2)
2197 CASE_VMSLT_OPCODES(LMUL_4, M4)
2198 CASE_VMSLT_OPCODES(LMUL_8, M8)
2199#undef CASE_VMSLT_OPCODES
2200 }
2201 // Mask operations use the LMUL from the mask type.
2202 switch (RISCVTargetLowering::getLMUL(VT)) {
2203 default:
2204 llvm_unreachable("Unexpected LMUL!");
2205#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
2206 case RISCVVType::lmulenum: \
2207 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
2208 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
2209 break;
2210 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
2211 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
2212 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
2213 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
2214 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
2215 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
2216 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
2217#undef CASE_VMNAND_VMSET_OPCODES
2218 }
2219 SDValue SEW = CurDAG->getTargetConstant(
2220 Val: Log2_32(Value: Src1VT.getScalarSizeInBits()), DL, VT: XLenVT);
2221 SDValue MaskSEW = CurDAG->getTargetConstant(Val: 0, DL, VT: XLenVT);
2222 SDValue VL;
2223 selectVLOp(N: Node->getOperand(Num: 3), VL);
2224
2225 // If vmsge(u) with minimum value, expand it to vmset.
2226 if (IsCmpMinimum) {
2227 ReplaceNode(F: Node,
2228 T: CurDAG->getMachineNode(Opcode: VMSetOpcode, dl: DL, VT, Op1: VL, Op2: MaskSEW));
2229 return;
2230 }
2231
2232 if (IsCmpConstant) {
2233 SDValue Imm =
2234 selectImm(CurDAG, DL: SDLoc(Src2), VT: XLenVT, Imm: CVal - 1, Subtarget: *Subtarget);
2235
2236 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMSGTOpcode, dl: DL, VT,
2237 Ops: {Src1, Imm, VL, SEW}));
2238 return;
2239 }
2240
2241 // Expand to
2242 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
2243 SDValue Cmp = SDValue(
2244 CurDAG->getMachineNode(Opcode: VMSLTOpcode, dl: DL, VT, Ops: {Src1, Src2, VL, SEW}),
2245 0);
2246 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMNANDOpcode, dl: DL, VT,
2247 Ops: {Cmp, Cmp, VL, MaskSEW}));
2248 return;
2249 }
2250 case Intrinsic::riscv_vmsgeu_mask:
2251 case Intrinsic::riscv_vmsge_mask: {
2252 SDValue Src1 = Node->getOperand(Num: 2);
2253 SDValue Src2 = Node->getOperand(Num: 3);
2254 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
2255 bool IsCmpConstant = false;
2256 bool IsCmpMinimum = false;
2257 // Only custom select scalar second operand.
2258 if (Src2.getValueType() != XLenVT)
2259 break;
2260 // Small constants are handled with patterns.
2261 MVT Src1VT = Src1.getSimpleValueType();
2262 int64_t CVal = 0;
2263 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Src2)) {
2264 IsCmpConstant = true;
2265 CVal = C->getSExtValue();
2266 if (CVal >= -15 && CVal <= 16) {
2267 if (!IsUnsigned || CVal != 0)
2268 break;
2269 IsCmpMinimum = true;
2270 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2271 numBits: Src1VT.getScalarSizeInBits())
2272 .getSExtValue()) {
2273 IsCmpMinimum = true;
2274 }
2275 }
2276 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
2277 VMOROpcode, VMSGTMaskOpcode;
2278 switch (RISCVTargetLowering::getLMUL(VT: Src1VT)) {
2279 default:
2280 llvm_unreachable("Unexpected LMUL!");
2281#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2282 case RISCVVType::lmulenum: \
2283 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2284 : RISCV::PseudoVMSLT_VX_##suffix; \
2285 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
2286 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
2287 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
2288 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
2289 break;
2290 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2291 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2292 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2293 CASE_VMSLT_OPCODES(LMUL_1, M1)
2294 CASE_VMSLT_OPCODES(LMUL_2, M2)
2295 CASE_VMSLT_OPCODES(LMUL_4, M4)
2296 CASE_VMSLT_OPCODES(LMUL_8, M8)
2297#undef CASE_VMSLT_OPCODES
2298 }
2299 // Mask operations use the LMUL from the mask type.
2300 switch (RISCVTargetLowering::getLMUL(VT)) {
2301 default:
2302 llvm_unreachable("Unexpected LMUL!");
2303#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2304 case RISCVVType::lmulenum: \
2305 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2306 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2307 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2308 break;
2309 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2310 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2311 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2312 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, B8)
2313 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, B4)
2314 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, B2)
2315 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, B1)
2316#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2317 }
2318 SDValue SEW = CurDAG->getTargetConstant(
2319 Val: Log2_32(Value: Src1VT.getScalarSizeInBits()), DL, VT: XLenVT);
2320 SDValue MaskSEW = CurDAG->getTargetConstant(Val: 0, DL, VT: XLenVT);
2321 SDValue VL;
2322 selectVLOp(N: Node->getOperand(Num: 5), VL);
2323 SDValue MaskedOff = Node->getOperand(Num: 1);
2324 SDValue Mask = Node->getOperand(Num: 4);
2325
2326 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2327 if (IsCmpMinimum) {
2328 // We don't need vmor if the MaskedOff and the Mask are the same
2329 // value.
2330 if (Mask == MaskedOff) {
2331 ReplaceUses(F: Node, T: Mask.getNode());
2332 return;
2333 }
2334 ReplaceNode(F: Node,
2335 T: CurDAG->getMachineNode(Opcode: VMOROpcode, dl: DL, VT,
2336 Ops: {Mask, MaskedOff, VL, MaskSEW}));
2337 return;
2338 }
2339
2340 // If the MaskedOff value and the Mask are the same value use
2341 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2342 // This avoids needing to copy v0 to vd before starting the next sequence.
2343 if (Mask == MaskedOff) {
2344 SDValue Cmp = SDValue(
2345 CurDAG->getMachineNode(Opcode: VMSLTOpcode, dl: DL, VT, Ops: {Src1, Src2, VL, SEW}),
2346 0);
2347 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMANDNOpcode, dl: DL, VT,
2348 Ops: {Mask, Cmp, VL, MaskSEW}));
2349 return;
2350 }
2351
2352 SDValue PolicyOp =
2353 CurDAG->getTargetConstant(Val: RISCVVType::TAIL_AGNOSTIC, DL, VT: XLenVT);
2354
2355 if (IsCmpConstant) {
2356 SDValue Imm =
2357 selectImm(CurDAG, DL: SDLoc(Src2), VT: XLenVT, Imm: CVal - 1, Subtarget: *Subtarget);
2358
2359 ReplaceNode(F: Node, T: CurDAG->getMachineNode(
2360 Opcode: VMSGTMaskOpcode, dl: DL, VT,
2361 Ops: {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2362 return;
2363 }
2364
2365 // Otherwise use
2366 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2367 // The result is mask undisturbed.
2368 // We use the same instructions to emulate mask agnostic behavior, because
2369 // the agnostic result can be either undisturbed or all 1.
2370 SDValue Cmp = SDValue(CurDAG->getMachineNode(Opcode: VMSLTMaskOpcode, dl: DL, VT,
2371 Ops: {MaskedOff, Src1, Src2, Mask,
2372 VL, SEW, PolicyOp}),
2373 0);
2374 // vmxor.mm vd, vd, v0 is used to update active value.
2375 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMXOROpcode, dl: DL, VT,
2376 Ops: {Cmp, Mask, VL, MaskSEW}));
2377 return;
2378 }
2379 case Intrinsic::riscv_vsetvli:
2380 case Intrinsic::riscv_vsetvlimax:
2381 return selectVSETVLI(Node);
2382 case Intrinsic::riscv_sf_vsettnt:
2383 case Intrinsic::riscv_sf_vsettm:
2384 case Intrinsic::riscv_sf_vsettk:
2385 return selectXSfmmVSET(Node);
2386 }
2387 break;
2388 }
2389 case ISD::INTRINSIC_W_CHAIN: {
2390 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
2391 switch (IntNo) {
2392 // By default we do not custom select any intrinsic.
2393 default:
2394 break;
2395 case Intrinsic::riscv_vlseg2:
2396 case Intrinsic::riscv_vlseg3:
2397 case Intrinsic::riscv_vlseg4:
2398 case Intrinsic::riscv_vlseg5:
2399 case Intrinsic::riscv_vlseg6:
2400 case Intrinsic::riscv_vlseg7:
2401 case Intrinsic::riscv_vlseg8: {
2402 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2403 /*IsStrided*/ false);
2404 return;
2405 }
2406 case Intrinsic::riscv_vlseg2_mask:
2407 case Intrinsic::riscv_vlseg3_mask:
2408 case Intrinsic::riscv_vlseg4_mask:
2409 case Intrinsic::riscv_vlseg5_mask:
2410 case Intrinsic::riscv_vlseg6_mask:
2411 case Intrinsic::riscv_vlseg7_mask:
2412 case Intrinsic::riscv_vlseg8_mask: {
2413 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2414 /*IsStrided*/ false);
2415 return;
2416 }
2417 case Intrinsic::riscv_vlsseg2:
2418 case Intrinsic::riscv_vlsseg3:
2419 case Intrinsic::riscv_vlsseg4:
2420 case Intrinsic::riscv_vlsseg5:
2421 case Intrinsic::riscv_vlsseg6:
2422 case Intrinsic::riscv_vlsseg7:
2423 case Intrinsic::riscv_vlsseg8: {
2424 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2425 /*IsStrided*/ true);
2426 return;
2427 }
2428 case Intrinsic::riscv_vlsseg2_mask:
2429 case Intrinsic::riscv_vlsseg3_mask:
2430 case Intrinsic::riscv_vlsseg4_mask:
2431 case Intrinsic::riscv_vlsseg5_mask:
2432 case Intrinsic::riscv_vlsseg6_mask:
2433 case Intrinsic::riscv_vlsseg7_mask:
2434 case Intrinsic::riscv_vlsseg8_mask: {
2435 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2436 /*IsStrided*/ true);
2437 return;
2438 }
2439 case Intrinsic::riscv_vloxseg2:
2440 case Intrinsic::riscv_vloxseg3:
2441 case Intrinsic::riscv_vloxseg4:
2442 case Intrinsic::riscv_vloxseg5:
2443 case Intrinsic::riscv_vloxseg6:
2444 case Intrinsic::riscv_vloxseg7:
2445 case Intrinsic::riscv_vloxseg8:
2446 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2447 /*IsOrdered*/ true);
2448 return;
2449 case Intrinsic::riscv_vluxseg2:
2450 case Intrinsic::riscv_vluxseg3:
2451 case Intrinsic::riscv_vluxseg4:
2452 case Intrinsic::riscv_vluxseg5:
2453 case Intrinsic::riscv_vluxseg6:
2454 case Intrinsic::riscv_vluxseg7:
2455 case Intrinsic::riscv_vluxseg8:
2456 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2457 /*IsOrdered*/ false);
2458 return;
2459 case Intrinsic::riscv_vloxseg2_mask:
2460 case Intrinsic::riscv_vloxseg3_mask:
2461 case Intrinsic::riscv_vloxseg4_mask:
2462 case Intrinsic::riscv_vloxseg5_mask:
2463 case Intrinsic::riscv_vloxseg6_mask:
2464 case Intrinsic::riscv_vloxseg7_mask:
2465 case Intrinsic::riscv_vloxseg8_mask:
2466 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2467 /*IsOrdered*/ true);
2468 return;
2469 case Intrinsic::riscv_vluxseg2_mask:
2470 case Intrinsic::riscv_vluxseg3_mask:
2471 case Intrinsic::riscv_vluxseg4_mask:
2472 case Intrinsic::riscv_vluxseg5_mask:
2473 case Intrinsic::riscv_vluxseg6_mask:
2474 case Intrinsic::riscv_vluxseg7_mask:
2475 case Intrinsic::riscv_vluxseg8_mask:
2476 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2477 /*IsOrdered*/ false);
2478 return;
2479 case Intrinsic::riscv_vlseg8ff:
2480 case Intrinsic::riscv_vlseg7ff:
2481 case Intrinsic::riscv_vlseg6ff:
2482 case Intrinsic::riscv_vlseg5ff:
2483 case Intrinsic::riscv_vlseg4ff:
2484 case Intrinsic::riscv_vlseg3ff:
2485 case Intrinsic::riscv_vlseg2ff: {
2486 selectVLSEGFF(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false);
2487 return;
2488 }
2489 case Intrinsic::riscv_vlseg8ff_mask:
2490 case Intrinsic::riscv_vlseg7ff_mask:
2491 case Intrinsic::riscv_vlseg6ff_mask:
2492 case Intrinsic::riscv_vlseg5ff_mask:
2493 case Intrinsic::riscv_vlseg4ff_mask:
2494 case Intrinsic::riscv_vlseg3ff_mask:
2495 case Intrinsic::riscv_vlseg2ff_mask: {
2496 selectVLSEGFF(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true);
2497 return;
2498 }
2499 case Intrinsic::riscv_vloxei:
2500 case Intrinsic::riscv_vloxei_mask:
2501 case Intrinsic::riscv_vluxei:
2502 case Intrinsic::riscv_vluxei_mask: {
2503 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2504 IntNo == Intrinsic::riscv_vluxei_mask;
2505 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2506 IntNo == Intrinsic::riscv_vloxei_mask;
2507
2508 MVT VT = Node->getSimpleValueType(ResNo: 0);
2509 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2510
2511 unsigned CurOp = 2;
2512 SmallVector<SDValue, 8> Operands;
2513 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2514
2515 MVT IndexVT;
2516 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2517 /*IsStridedOrIndexed*/ true, Operands,
2518 /*IsLoad=*/true, IndexVT: &IndexVT);
2519
2520 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2521 "Element count mismatch");
2522
2523 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2524 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
2525 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
2526 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2527 reportFatalUsageError(reason: "The V extension does not support EEW=64 for "
2528 "index values when XLEN=32");
2529 }
2530 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2531 Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL),
2532 IndexLMUL: static_cast<unsigned>(IndexLMUL));
2533 MachineSDNode *Load =
2534 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2535
2536 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2537
2538 ReplaceNode(F: Node, T: Load);
2539 return;
2540 }
2541 case Intrinsic::riscv_vlm:
2542 case Intrinsic::riscv_vle:
2543 case Intrinsic::riscv_vle_mask:
2544 case Intrinsic::riscv_vlse:
2545 case Intrinsic::riscv_vlse_mask: {
2546 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2547 IntNo == Intrinsic::riscv_vlse_mask;
2548 bool IsStrided =
2549 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2550
2551 MVT VT = Node->getSimpleValueType(ResNo: 0);
2552 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2553
2554 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2555 // operand at the IR level. In pseudos, they have both policy and
2556 // passthru operand. The passthru operand is needed to track the
2557 // "tail undefined" state, and the policy is there just for
2558 // for consistency - it will always be "don't care" for the
2559 // unmasked form.
2560 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2561 unsigned CurOp = 2;
2562 SmallVector<SDValue, 8> Operands;
2563 if (HasPassthruOperand)
2564 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2565 else {
2566 // We eagerly lower to implicit_def (instead of undef), as we
2567 // otherwise fail to select nodes such as: nxv1i1 = undef
2568 SDNode *Passthru =
2569 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT);
2570 Operands.push_back(Elt: SDValue(Passthru, 0));
2571 }
2572 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
2573 Operands, /*IsLoad=*/true);
2574
2575 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2576 const RISCV::VLEPseudo *P =
2577 RISCV::getVLEPseudo(Masked: IsMasked, Strided: IsStrided, /*FF*/ false, Log2SEW,
2578 LMUL: static_cast<unsigned>(LMUL));
2579 MachineSDNode *Load =
2580 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2581
2582 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2583
2584 ReplaceNode(F: Node, T: Load);
2585 return;
2586 }
2587 case Intrinsic::riscv_vleff:
2588 case Intrinsic::riscv_vleff_mask: {
2589 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2590
2591 MVT VT = Node->getSimpleValueType(ResNo: 0);
2592 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2593
2594 unsigned CurOp = 2;
2595 SmallVector<SDValue, 7> Operands;
2596 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2597 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2598 /*IsStridedOrIndexed*/ false, Operands,
2599 /*IsLoad=*/true);
2600
2601 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2602 const RISCV::VLEPseudo *P =
2603 RISCV::getVLEPseudo(Masked: IsMasked, /*Strided*/ false, /*FF*/ true,
2604 Log2SEW, LMUL: static_cast<unsigned>(LMUL));
2605 MachineSDNode *Load = CurDAG->getMachineNode(
2606 Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2607 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2608
2609 ReplaceNode(F: Node, T: Load);
2610 return;
2611 }
2612 case Intrinsic::riscv_nds_vln:
2613 case Intrinsic::riscv_nds_vln_mask:
2614 case Intrinsic::riscv_nds_vlnu:
2615 case Intrinsic::riscv_nds_vlnu_mask: {
2616 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2617 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2618 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2619 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2620
2621 MVT VT = Node->getSimpleValueType(ResNo: 0);
2622 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2623 unsigned CurOp = 2;
2624 SmallVector<SDValue, 8> Operands;
2625
2626 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2627 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2628 /*IsStridedOrIndexed=*/false, Operands,
2629 /*IsLoad=*/true);
2630
2631 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2632 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2633 Masked: IsMasked, Unsigned: IsUnsigned, Log2SEW, LMUL: static_cast<unsigned>(LMUL));
2634 MachineSDNode *Load =
2635 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2636
2637 if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node))
2638 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()});
2639
2640 ReplaceNode(F: Node, T: Load);
2641 return;
2642 }
2643 }
2644 break;
2645 }
2646 case ISD::INTRINSIC_VOID: {
2647 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
2648 switch (IntNo) {
2649 case Intrinsic::riscv_vsseg2:
2650 case Intrinsic::riscv_vsseg3:
2651 case Intrinsic::riscv_vsseg4:
2652 case Intrinsic::riscv_vsseg5:
2653 case Intrinsic::riscv_vsseg6:
2654 case Intrinsic::riscv_vsseg7:
2655 case Intrinsic::riscv_vsseg8: {
2656 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2657 /*IsStrided*/ false);
2658 return;
2659 }
2660 case Intrinsic::riscv_vsseg2_mask:
2661 case Intrinsic::riscv_vsseg3_mask:
2662 case Intrinsic::riscv_vsseg4_mask:
2663 case Intrinsic::riscv_vsseg5_mask:
2664 case Intrinsic::riscv_vsseg6_mask:
2665 case Intrinsic::riscv_vsseg7_mask:
2666 case Intrinsic::riscv_vsseg8_mask: {
2667 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2668 /*IsStrided*/ false);
2669 return;
2670 }
2671 case Intrinsic::riscv_vssseg2:
2672 case Intrinsic::riscv_vssseg3:
2673 case Intrinsic::riscv_vssseg4:
2674 case Intrinsic::riscv_vssseg5:
2675 case Intrinsic::riscv_vssseg6:
2676 case Intrinsic::riscv_vssseg7:
2677 case Intrinsic::riscv_vssseg8: {
2678 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2679 /*IsStrided*/ true);
2680 return;
2681 }
2682 case Intrinsic::riscv_vssseg2_mask:
2683 case Intrinsic::riscv_vssseg3_mask:
2684 case Intrinsic::riscv_vssseg4_mask:
2685 case Intrinsic::riscv_vssseg5_mask:
2686 case Intrinsic::riscv_vssseg6_mask:
2687 case Intrinsic::riscv_vssseg7_mask:
2688 case Intrinsic::riscv_vssseg8_mask: {
2689 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2690 /*IsStrided*/ true);
2691 return;
2692 }
2693 case Intrinsic::riscv_vsoxseg2:
2694 case Intrinsic::riscv_vsoxseg3:
2695 case Intrinsic::riscv_vsoxseg4:
2696 case Intrinsic::riscv_vsoxseg5:
2697 case Intrinsic::riscv_vsoxseg6:
2698 case Intrinsic::riscv_vsoxseg7:
2699 case Intrinsic::riscv_vsoxseg8:
2700 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2701 /*IsOrdered*/ true);
2702 return;
2703 case Intrinsic::riscv_vsuxseg2:
2704 case Intrinsic::riscv_vsuxseg3:
2705 case Intrinsic::riscv_vsuxseg4:
2706 case Intrinsic::riscv_vsuxseg5:
2707 case Intrinsic::riscv_vsuxseg6:
2708 case Intrinsic::riscv_vsuxseg7:
2709 case Intrinsic::riscv_vsuxseg8:
2710 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2711 /*IsOrdered*/ false);
2712 return;
2713 case Intrinsic::riscv_vsoxseg2_mask:
2714 case Intrinsic::riscv_vsoxseg3_mask:
2715 case Intrinsic::riscv_vsoxseg4_mask:
2716 case Intrinsic::riscv_vsoxseg5_mask:
2717 case Intrinsic::riscv_vsoxseg6_mask:
2718 case Intrinsic::riscv_vsoxseg7_mask:
2719 case Intrinsic::riscv_vsoxseg8_mask:
2720 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2721 /*IsOrdered*/ true);
2722 return;
2723 case Intrinsic::riscv_vsuxseg2_mask:
2724 case Intrinsic::riscv_vsuxseg3_mask:
2725 case Intrinsic::riscv_vsuxseg4_mask:
2726 case Intrinsic::riscv_vsuxseg5_mask:
2727 case Intrinsic::riscv_vsuxseg6_mask:
2728 case Intrinsic::riscv_vsuxseg7_mask:
2729 case Intrinsic::riscv_vsuxseg8_mask:
2730 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2731 /*IsOrdered*/ false);
2732 return;
2733 case Intrinsic::riscv_vsoxei:
2734 case Intrinsic::riscv_vsoxei_mask:
2735 case Intrinsic::riscv_vsuxei:
2736 case Intrinsic::riscv_vsuxei_mask: {
2737 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2738 IntNo == Intrinsic::riscv_vsuxei_mask;
2739 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2740 IntNo == Intrinsic::riscv_vsoxei_mask;
2741
2742 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
2743 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2744
2745 unsigned CurOp = 2;
2746 SmallVector<SDValue, 8> Operands;
2747 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Store value.
2748
2749 MVT IndexVT;
2750 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2751 /*IsStridedOrIndexed*/ true, Operands,
2752 /*IsLoad=*/false, IndexVT: &IndexVT);
2753
2754 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2755 "Element count mismatch");
2756
2757 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2758 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
2759 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
2760 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2761 reportFatalUsageError(reason: "The V extension does not support EEW=64 for "
2762 "index values when XLEN=32");
2763 }
2764 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2765 Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW,
2766 LMUL: static_cast<unsigned>(LMUL), IndexLMUL: static_cast<unsigned>(IndexLMUL));
2767 MachineSDNode *Store =
2768 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2769
2770 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2771
2772 ReplaceNode(F: Node, T: Store);
2773 return;
2774 }
2775 case Intrinsic::riscv_vsm:
2776 case Intrinsic::riscv_vse:
2777 case Intrinsic::riscv_vse_mask:
2778 case Intrinsic::riscv_vsse:
2779 case Intrinsic::riscv_vsse_mask: {
2780 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2781 IntNo == Intrinsic::riscv_vsse_mask;
2782 bool IsStrided =
2783 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2784
2785 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
2786 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2787
2788 unsigned CurOp = 2;
2789 SmallVector<SDValue, 8> Operands;
2790 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Store value.
2791
2792 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
2793 Operands);
2794
2795 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2796 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2797 Masked: IsMasked, Strided: IsStrided, Log2SEW, LMUL: static_cast<unsigned>(LMUL));
2798 MachineSDNode *Store =
2799 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2800 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2801
2802 ReplaceNode(F: Node, T: Store);
2803 return;
2804 }
2805 case Intrinsic::riscv_sf_vc_x_se:
2806 case Intrinsic::riscv_sf_vc_i_se:
2807 selectSF_VC_X_SE(Node);
2808 return;
2809 case Intrinsic::riscv_sf_vlte8:
2810 case Intrinsic::riscv_sf_vlte16:
2811 case Intrinsic::riscv_sf_vlte32:
2812 case Intrinsic::riscv_sf_vlte64: {
2813 unsigned Log2SEW;
2814 unsigned PseudoInst;
2815 switch (IntNo) {
2816 case Intrinsic::riscv_sf_vlte8:
2817 PseudoInst = RISCV::PseudoSF_VLTE8;
2818 Log2SEW = 3;
2819 break;
2820 case Intrinsic::riscv_sf_vlte16:
2821 PseudoInst = RISCV::PseudoSF_VLTE16;
2822 Log2SEW = 4;
2823 break;
2824 case Intrinsic::riscv_sf_vlte32:
2825 PseudoInst = RISCV::PseudoSF_VLTE32;
2826 Log2SEW = 5;
2827 break;
2828 case Intrinsic::riscv_sf_vlte64:
2829 PseudoInst = RISCV::PseudoSF_VLTE64;
2830 Log2SEW = 6;
2831 break;
2832 }
2833
2834 SDValue SEWOp = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT);
2835 SDValue TWidenOp = CurDAG->getTargetConstant(Val: 1, DL, VT: XLenVT);
2836 SDValue Operands[] = {Node->getOperand(Num: 2),
2837 Node->getOperand(Num: 3),
2838 Node->getOperand(Num: 4),
2839 SEWOp,
2840 TWidenOp,
2841 Node->getOperand(Num: 0)};
2842
2843 MachineSDNode *TileLoad =
2844 CurDAG->getMachineNode(Opcode: PseudoInst, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2845 CurDAG->setNodeMemRefs(N: TileLoad,
2846 NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2847
2848 ReplaceNode(F: Node, T: TileLoad);
2849 return;
2850 }
2851 case Intrinsic::riscv_sf_mm_s_s:
2852 case Intrinsic::riscv_sf_mm_s_u:
2853 case Intrinsic::riscv_sf_mm_u_s:
2854 case Intrinsic::riscv_sf_mm_u_u:
2855 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2856 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2857 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2858 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2859 case Intrinsic::riscv_sf_mm_f_f: {
2860 bool HasFRM = false;
2861 unsigned PseudoInst;
2862 switch (IntNo) {
2863 case Intrinsic::riscv_sf_mm_s_s:
2864 PseudoInst = RISCV::PseudoSF_MM_S_S;
2865 break;
2866 case Intrinsic::riscv_sf_mm_s_u:
2867 PseudoInst = RISCV::PseudoSF_MM_S_U;
2868 break;
2869 case Intrinsic::riscv_sf_mm_u_s:
2870 PseudoInst = RISCV::PseudoSF_MM_U_S;
2871 break;
2872 case Intrinsic::riscv_sf_mm_u_u:
2873 PseudoInst = RISCV::PseudoSF_MM_U_U;
2874 break;
2875 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2876 PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2;
2877 HasFRM = true;
2878 break;
2879 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2880 PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3;
2881 HasFRM = true;
2882 break;
2883 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2884 PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2;
2885 HasFRM = true;
2886 break;
2887 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2888 PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3;
2889 HasFRM = true;
2890 break;
2891 case Intrinsic::riscv_sf_mm_f_f:
2892 if (Node->getOperand(Num: 3).getValueType().getScalarType() == MVT::bf16)
2893 PseudoInst = RISCV::PseudoSF_MM_F_F_ALT;
2894 else
2895 PseudoInst = RISCV::PseudoSF_MM_F_F;
2896 HasFRM = true;
2897 break;
2898 }
2899 uint64_t TileNum = Node->getConstantOperandVal(Num: 2);
2900 SDValue Op1 = Node->getOperand(Num: 3);
2901 SDValue Op2 = Node->getOperand(Num: 4);
2902 MVT VT = Op1->getSimpleValueType(ResNo: 0);
2903 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2904 SDValue TmOp = Node->getOperand(Num: 5);
2905 SDValue TnOp = Node->getOperand(Num: 6);
2906 SDValue TkOp = Node->getOperand(Num: 7);
2907 SDValue TWidenOp = Node->getOperand(Num: 8);
2908 SDValue Chain = Node->getOperand(Num: 0);
2909
2910 // sf.mm.f.f with sew=32, twiden=2 is invalid
2911 if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 &&
2912 TWidenOp->getAsZExtVal() == 2)
2913 reportFatalUsageError(reason: "sf.mm.f.f doesn't support (sew=32, twiden=2)");
2914
2915 SmallVector<SDValue, 10> Operands(
2916 {CurDAG->getRegister(Reg: getTileReg(TileNum), VT: XLenVT), Op1, Op2});
2917 if (HasFRM)
2918 Operands.push_back(
2919 Elt: CurDAG->getTargetConstant(Val: RISCVFPRndMode::DYN, DL, VT: XLenVT));
2920 Operands.append(IL: {TmOp, TnOp, TkOp,
2921 CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT), TWidenOp,
2922 Chain});
2923
2924 auto *NewNode =
2925 CurDAG->getMachineNode(Opcode: PseudoInst, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2926
2927 ReplaceNode(F: Node, T: NewNode);
2928 return;
2929 }
2930 case Intrinsic::riscv_sf_vtzero_t: {
2931 uint64_t TileNum = Node->getConstantOperandVal(Num: 2);
2932 SDValue Tm = Node->getOperand(Num: 3);
2933 SDValue Tn = Node->getOperand(Num: 4);
2934 SDValue Log2SEW = Node->getOperand(Num: 5);
2935 SDValue TWiden = Node->getOperand(Num: 6);
2936 SDValue Chain = Node->getOperand(Num: 0);
2937 auto *NewNode = CurDAG->getMachineNode(
2938 Opcode: RISCV::PseudoSF_VTZERO_T, dl: DL, VTs: Node->getVTList(),
2939 Ops: {CurDAG->getRegister(Reg: getTileReg(TileNum), VT: XLenVT), Tm, Tn, Log2SEW,
2940 TWiden, Chain});
2941
2942 ReplaceNode(F: Node, T: NewNode);
2943 return;
2944 }
2945 }
2946 break;
2947 }
2948 case ISD::BITCAST: {
2949 MVT SrcVT = Node->getOperand(Num: 0).getSimpleValueType();
2950 // Just drop bitcasts between vectors if both are fixed or both are
2951 // scalable.
2952 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2953 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2954 ReplaceUses(F: SDValue(Node, 0), T: Node->getOperand(Num: 0));
2955 CurDAG->RemoveDeadNode(N: Node);
2956 return;
2957 }
2958 if (Subtarget->hasStdExtP()) {
2959 bool Is32BitCast =
2960 (VT == MVT::i32 && (SrcVT == MVT::v4i8 || SrcVT == MVT::v2i16)) ||
2961 (SrcVT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
2962 bool Is64BitCast =
2963 (VT == MVT::i64 && (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i16 ||
2964 SrcVT == MVT::v2i32)) ||
2965 (SrcVT == MVT::i64 &&
2966 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
2967 if (Is32BitCast || Is64BitCast) {
2968 ReplaceUses(F: SDValue(Node, 0), T: Node->getOperand(Num: 0));
2969 CurDAG->RemoveDeadNode(N: Node);
2970 return;
2971 }
2972 }
2973 break;
2974 }
2975 case ISD::SPLAT_VECTOR: {
2976 if (!Subtarget->hasStdExtP())
2977 break;
2978 if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 0))) {
2979 bool IsDoubleWide = Subtarget->isPExtPackedDoubleType(VT);
2980
2981 if (ConstNode->isZero()) {
2982 MCPhysReg X0Reg = IsDoubleWide ? RISCV::X0_Pair : RISCV::X0;
2983 SDValue New =
2984 CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: X0Reg, VT);
2985 ReplaceNode(F: Node, T: New.getNode());
2986 return;
2987 }
2988
2989 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
2990 APInt Val = ConstNode->getAPIntValue().trunc(width: EltSize);
2991
2992 // Use LI for all ones since it can be compressed to c.li.
2993 if (Val.isAllOnes() && !IsDoubleWide) {
2994 SDNode *NewNode = CurDAG->getMachineNode(
2995 Opcode: RISCV::ADDI, dl: DL, VT, Op1: CurDAG->getRegister(Reg: RISCV::X0, VT),
2996 Op2: CurDAG->getAllOnesConstant(DL, VT: XLenVT, /*IsTarget=*/true));
2997 ReplaceNode(F: Node, T: NewNode);
2998 return;
2999 }
3000
3001 // Find the smallest splat.
3002 if (Val.getBitWidth() > 16 && Val.isSplat(SplatSizeInBits: 16))
3003 Val = Val.trunc(width: 16);
3004 if (Val.getBitWidth() > 8 && Val.isSplat(SplatSizeInBits: 8))
3005 Val = Val.trunc(width: 8);
3006
3007 EltSize = Val.getBitWidth();
3008 int64_t Imm = Val.getSExtValue();
3009
3010 unsigned Opc = 0;
3011 if (EltSize == 8) {
3012 Opc = IsDoubleWide ? RISCV::PLI_DB : RISCV::PLI_B;
3013 } else if (EltSize == 16 && isInt<10>(x: Imm)) {
3014 Opc = IsDoubleWide ? RISCV::PLI_DH : RISCV::PLI_H;
3015 } else if (!IsDoubleWide && EltSize == 32 && isInt<10>(x: Imm)) {
3016 Opc = RISCV::PLI_W;
3017 } else if (EltSize == 16 && isShiftedInt<10, 6>(x: Imm)) {
3018 Opc = IsDoubleWide ? RISCV::PLUI_DH : RISCV::PLUI_H;
3019 Imm = Imm >> 6;
3020 } else if (!IsDoubleWide && EltSize == 32 && isShiftedInt<10, 22>(x: Imm)) {
3021 Opc = RISCV::PLUI_W;
3022 Imm = Imm >> 22;
3023 }
3024
3025 if (Opc) {
3026 SDNode *NewNode = CurDAG->getMachineNode(
3027 Opcode: Opc, dl: DL, VT, Op1: CurDAG->getSignedTargetConstant(Val: Imm, DL, VT: XLenVT));
3028 ReplaceNode(F: Node, T: NewNode);
3029 return;
3030 }
3031 }
3032
3033 break;
3034 }
3035 case ISD::SCALAR_TO_VECTOR:
3036 if (Subtarget->hasStdExtP()) {
3037 MVT SrcVT = Node->getOperand(Num: 0).getSimpleValueType();
3038 if ((VT == MVT::v2i32 && SrcVT == MVT::i64) ||
3039 (VT == MVT::v4i8 && SrcVT == MVT::i32)) {
3040 ReplaceUses(F: SDValue(Node, 0), T: Node->getOperand(Num: 0));
3041 CurDAG->RemoveDeadNode(N: Node);
3042 return;
3043 }
3044 }
3045 break;
3046 case ISD::INSERT_SUBVECTOR:
3047 case RISCVISD::TUPLE_INSERT: {
3048 SDValue V = Node->getOperand(Num: 0);
3049 SDValue SubV = Node->getOperand(Num: 1);
3050 SDLoc DL(SubV);
3051 auto Idx = Node->getConstantOperandVal(Num: 2);
3052 MVT SubVecVT = SubV.getSimpleValueType();
3053
3054 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
3055 MVT SubVecContainerVT = SubVecVT;
3056 // Establish the correct scalable-vector types for any fixed-length type.
3057 if (SubVecVT.isFixedLengthVector()) {
3058 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT: SubVecVT);
3059 TypeSize VecRegSize = TypeSize::getScalable(MinimumSize: RISCV::RVVBitsPerBlock);
3060 [[maybe_unused]] bool ExactlyVecRegSized =
3061 Subtarget->expandVScale(X: SubVecVT.getSizeInBits())
3062 .isKnownMultipleOf(RHS: Subtarget->expandVScale(X: VecRegSize));
3063 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
3064 .getKnownMinValue()));
3065 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
3066 }
3067 MVT ContainerVT = VT;
3068 if (VT.isFixedLengthVector())
3069 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
3070
3071 const auto *TRI = Subtarget->getRegisterInfo();
3072 unsigned SubRegIdx;
3073 std::tie(args&: SubRegIdx, args&: Idx) =
3074 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
3075 VecVT: ContainerVT, SubVecVT: SubVecContainerVT, InsertExtractIdx: Idx, TRI);
3076
3077 // If the Idx hasn't been completely eliminated then this is a subvector
3078 // insert which doesn't naturally align to a vector register. These must
3079 // be handled using instructions to manipulate the vector registers.
3080 if (Idx != 0)
3081 break;
3082
3083 RISCVVType::VLMUL SubVecLMUL =
3084 RISCVTargetLowering::getLMUL(VT: SubVecContainerVT);
3085 [[maybe_unused]] bool IsSubVecPartReg =
3086 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
3087 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
3088 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
3089 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
3090 V.isUndef()) &&
3091 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
3092 "the subvector is smaller than a full-sized register");
3093
3094 // If we haven't set a SubRegIdx, then we must be going between
3095 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
3096 if (SubRegIdx == RISCV::NoSubRegister) {
3097 unsigned InRegClassID =
3098 RISCVTargetLowering::getRegClassIDForVecVT(VT: ContainerVT);
3099 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
3100 InRegClassID &&
3101 "Unexpected subvector extraction");
3102 SDValue RC = CurDAG->getTargetConstant(Val: InRegClassID, DL, VT: XLenVT);
3103 SDNode *NewNode = CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS,
3104 dl: DL, VT, Op1: SubV, Op2: RC);
3105 ReplaceNode(F: Node, T: NewNode);
3106 return;
3107 }
3108
3109 SDValue Insert = CurDAG->getTargetInsertSubreg(SRIdx: SubRegIdx, DL, VT, Operand: V, Subreg: SubV);
3110 ReplaceNode(F: Node, T: Insert.getNode());
3111 return;
3112 }
3113 case ISD::EXTRACT_SUBVECTOR:
3114 case RISCVISD::TUPLE_EXTRACT: {
3115 SDValue V = Node->getOperand(Num: 0);
3116 auto Idx = Node->getConstantOperandVal(Num: 1);
3117 MVT InVT = V.getSimpleValueType();
3118
3119 // Handle P-extension extract_subvector for v2i16 from v4i16 and v4i8 from
3120 // v8i8
3121 if (Subtarget->hasStdExtP() && !Subtarget->is64Bit() &&
3122 ((InVT == MVT::v4i16 && VT == MVT::v2i16) ||
3123 (InVT == MVT::v8i8 && VT == MVT::v4i8))) {
3124 unsigned NumElts = VT.getVectorNumElements();
3125 if (Idx != 0 && Idx != NumElts)
3126 break;
3127
3128 unsigned SubRegIdx = Idx == 0 ? RISCV::sub_gpr_even : RISCV::sub_gpr_odd;
3129 SDValue Extract = CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT, Operand: V);
3130 ReplaceNode(F: Node, T: Extract.getNode());
3131 return;
3132 }
3133
3134 SDLoc DL(V);
3135
3136 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
3137 MVT SubVecContainerVT = VT;
3138 // Establish the correct scalable-vector types for any fixed-length type.
3139 if (VT.isFixedLengthVector()) {
3140 assert(Idx == 0);
3141 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
3142 }
3143 if (InVT.isFixedLengthVector())
3144 InVT = TLI.getContainerForFixedLengthVector(VT: InVT);
3145
3146 const auto *TRI = Subtarget->getRegisterInfo();
3147 unsigned SubRegIdx;
3148 std::tie(args&: SubRegIdx, args&: Idx) =
3149 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
3150 VecVT: InVT, SubVecVT: SubVecContainerVT, InsertExtractIdx: Idx, TRI);
3151
3152 // If the Idx hasn't been completely eliminated then this is a subvector
3153 // extract which doesn't naturally align to a vector register. These must
3154 // be handled using instructions to manipulate the vector registers.
3155 if (Idx != 0)
3156 break;
3157
3158 // If we haven't set a SubRegIdx, then we must be going between
3159 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
3160 if (SubRegIdx == RISCV::NoSubRegister) {
3161 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT: InVT);
3162 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
3163 InRegClassID &&
3164 "Unexpected subvector extraction");
3165 SDValue RC = CurDAG->getTargetConstant(Val: InRegClassID, DL, VT: XLenVT);
3166 SDNode *NewNode =
3167 CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, dl: DL, VT, Op1: V, Op2: RC);
3168 ReplaceNode(F: Node, T: NewNode);
3169 return;
3170 }
3171
3172 SDValue Extract = CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT, Operand: V);
3173 ReplaceNode(F: Node, T: Extract.getNode());
3174 return;
3175 }
3176 case RISCVISD::VMV_S_X_VL:
3177 case RISCVISD::VFMV_S_F_VL:
3178 case RISCVISD::VMV_V_X_VL:
3179 case RISCVISD::VFMV_V_F_VL: {
3180 // Try to match splat of a scalar load to a strided load with stride of x0.
3181 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
3182 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
3183 if (!Node->getOperand(Num: 0).isUndef())
3184 break;
3185 SDValue Src = Node->getOperand(Num: 1);
3186 auto *Ld = dyn_cast<LoadSDNode>(Val&: Src);
3187 // Can't fold load update node because the second
3188 // output is used so that load update node can't be removed.
3189 if (!Ld || Ld->isIndexed())
3190 break;
3191 EVT MemVT = Ld->getMemoryVT();
3192 // The memory VT should be the same size as the element type.
3193 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
3194 break;
3195 if (!IsProfitableToFold(N: Src, U: Node, Root: Node) ||
3196 !IsLegalToFold(N: Src, U: Node, Root: Node, OptLevel: TM.getOptLevel()))
3197 break;
3198
3199 SDValue VL;
3200 if (IsScalarMove) {
3201 // We could deal with more VL if we update the VSETVLI insert pass to
3202 // avoid introducing more VSETVLI.
3203 if (!isOneConstant(V: Node->getOperand(Num: 2)))
3204 break;
3205 selectVLOp(N: Node->getOperand(Num: 2), VL);
3206 } else
3207 selectVLOp(N: Node->getOperand(Num: 2), VL);
3208
3209 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
3210 SDValue SEW = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT);
3211
3212 // If VL=1, then we don't need to do a strided load and can just do a
3213 // regular load.
3214 bool IsStrided = !isOneConstant(V: VL);
3215
3216 // Only do a strided load if we have optimized zero-stride vector load.
3217 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
3218 break;
3219
3220 SmallVector<SDValue> Operands = {
3221 SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT), 0),
3222 Ld->getBasePtr()};
3223 if (IsStrided)
3224 Operands.push_back(Elt: CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT));
3225 uint64_t Policy = RISCVVType::MASK_AGNOSTIC | RISCVVType::TAIL_AGNOSTIC;
3226 SDValue PolicyOp = CurDAG->getTargetConstant(Val: Policy, DL, VT: XLenVT);
3227 Operands.append(IL: {VL, SEW, PolicyOp, Ld->getChain()});
3228
3229 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
3230 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
3231 /*IsMasked*/ Masked: false, Strided: IsStrided, /*FF*/ false,
3232 Log2SEW, LMUL: static_cast<unsigned>(LMUL));
3233 MachineSDNode *Load =
3234 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, ResultTys: {VT, MVT::Other}, Ops: Operands);
3235 // Update the chain.
3236 ReplaceUses(F: Src.getValue(R: 1), T: SDValue(Load, 1));
3237 // Record the mem-refs
3238 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {Ld->getMemOperand()});
3239 // Replace the splat with the vlse.
3240 ReplaceNode(F: Node, T: Load);
3241 return;
3242 }
3243 case RISCVISD::LPAD_CALL:
3244 case RISCVISD::LPAD_CALL_INDIRECT: {
3245 bool IsIndirect = Opcode == RISCVISD::LPAD_CALL_INDIRECT;
3246 unsigned PseudoOpc = IsIndirect ? RISCV::PseudoCALLIndirectLpadAlign
3247 : RISCV::PseudoCALLLpadAlign;
3248
3249 uint32_t LpadLabel = 0;
3250 if (PreferredLandingPadLabel.getNumOccurrences() > 0) {
3251 if (!isUInt<20>(x: PreferredLandingPadLabel))
3252 report_fatal_error(reason: "riscv-landing-pad-label=<val>, <val> needs to fit "
3253 "in unsigned 20-bits");
3254 LpadLabel = PreferredLandingPadLabel;
3255 }
3256
3257 SmallVector<SDValue, 4> Ops;
3258 Ops.push_back(Elt: Node->getOperand(Num: 1));
3259 Ops.push_back(Elt: CurDAG->getTargetConstant(Val: LpadLabel, DL, VT: XLenVT));
3260 Ops.push_back(Elt: Node->getOperand(Num: 0));
3261 if (Node->getGluedNode())
3262 Ops.push_back(Elt: Node->getOperand(Num: Node->getNumOperands() - 1));
3263
3264 ReplaceNode(F: Node,
3265 T: CurDAG->getMachineNode(Opcode: PseudoOpc, dl: DL, VTs: Node->getVTList(), Ops));
3266 return;
3267 }
3268 case ISD::PREFETCH:
3269 // MIPS's prefetch instruction already encodes the hint within the
3270 // instruction itself, so no extra NTL hint is needed.
3271 if (Subtarget->hasVendorXMIPSCBOP())
3272 break;
3273
3274 unsigned Locality = Node->getConstantOperandVal(Num: 3);
3275 if (Locality > 2)
3276 break;
3277
3278 auto *LoadStoreMem = cast<MemSDNode>(Val: Node);
3279 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
3280 MMO->setFlags(MachineMemOperand::MONonTemporal);
3281
3282 int NontemporalLevel = 0;
3283 switch (Locality) {
3284 case 0:
3285 NontemporalLevel = 3; // NTL.ALL
3286 break;
3287 case 1:
3288 NontemporalLevel = 1; // NTL.PALL
3289 break;
3290 case 2:
3291 NontemporalLevel = 0; // NTL.P1
3292 break;
3293 default:
3294 llvm_unreachable("unexpected locality value.");
3295 }
3296
3297 if (NontemporalLevel & 0b1)
3298 MMO->setFlags(MONontemporalBit0);
3299 if (NontemporalLevel & 0b10)
3300 MMO->setFlags(MONontemporalBit1);
3301 break;
3302 }
3303
3304 // Select the default instruction.
3305 SelectCode(N: Node);
3306}
3307
3308bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
3309 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
3310 std::vector<SDValue> &OutOps) {
3311 // Always produce a register and immediate operand, as expected by
3312 // RISCVAsmPrinter::PrintAsmMemoryOperand.
3313 switch (ConstraintID) {
3314 case InlineAsm::ConstraintCode::o:
3315 case InlineAsm::ConstraintCode::m: {
3316 SDValue Op0, Op1;
3317 [[maybe_unused]] bool Found = SelectAddrRegImm(Addr: Op, Base&: Op0, Offset&: Op1);
3318 assert(Found && "SelectAddrRegImm should always succeed");
3319 OutOps.push_back(x: Op0);
3320 OutOps.push_back(x: Op1);
3321 return false;
3322 }
3323 case InlineAsm::ConstraintCode::A:
3324 OutOps.push_back(x: Op);
3325 OutOps.push_back(
3326 x: CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget->getXLenVT()));
3327 return false;
3328 default:
3329 report_fatal_error(reason: "Unexpected asm memory constraint " +
3330 InlineAsm::getMemConstraintName(C: ConstraintID));
3331 }
3332
3333 return true;
3334}
3335
3336bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
3337 SDValue &Offset) {
3338 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Addr)) {
3339 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT: Subtarget->getXLenVT());
3340 Offset = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Addr), VT: Subtarget->getXLenVT());
3341 return true;
3342 }
3343
3344 return false;
3345}
3346
3347// Fold constant addresses.
3348static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
3349 const MVT VT, const RISCVSubtarget *Subtarget,
3350 SDValue Addr, SDValue &Base, SDValue &Offset,
3351 bool IsPrefetch = false) {
3352 if (!isa<ConstantSDNode>(Val: Addr))
3353 return false;
3354
3355 int64_t CVal = cast<ConstantSDNode>(Val&: Addr)->getSExtValue();
3356
3357 // If the constant is a simm12, we can fold the whole constant and use X0 as
3358 // the base. If the constant can be materialized with LUI+simm12, use LUI as
3359 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
3360 int64_t Lo12 = SignExtend64<12>(x: CVal);
3361 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
3362 if (!Subtarget->is64Bit() || isInt<32>(x: Hi)) {
3363 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3364 return false;
3365 if (Hi) {
3366 int64_t Hi20 = (Hi >> 12) & 0xfffff;
3367 Base = SDValue(
3368 CurDAG->getMachineNode(Opcode: RISCV::LUI, dl: DL, VT,
3369 Op1: CurDAG->getTargetConstant(Val: Hi20, DL, VT)),
3370 0);
3371 } else {
3372 Base = CurDAG->getRegister(Reg: RISCV::X0, VT);
3373 }
3374 Offset = CurDAG->getSignedTargetConstant(Val: Lo12, DL, VT);
3375 return true;
3376 }
3377
3378 // Ask how constant materialization would handle this constant.
3379 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: CVal, STI: *Subtarget);
3380
3381 // If the last instruction would be an ADDI, we can fold its immediate and
3382 // emit the rest of the sequence as the base.
3383 if (Seq.back().getOpcode() != RISCV::ADDI)
3384 return false;
3385 Lo12 = Seq.back().getImm();
3386 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3387 return false;
3388
3389 // Drop the last instruction.
3390 Seq.pop_back();
3391 assert(!Seq.empty() && "Expected more instructions in sequence");
3392
3393 Base = selectImmSeq(CurDAG, DL, VT, Seq);
3394 Offset = CurDAG->getSignedTargetConstant(Val: Lo12, DL, VT);
3395 return true;
3396}
3397
3398// Is this ADD instruction only used as the base pointer of scalar loads and
3399// stores?
3400static bool isWorthFoldingAdd(SDValue Add) {
3401 for (auto *User : Add->users()) {
3402 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
3403 User->getOpcode() != RISCVISD::LD_RV32 &&
3404 User->getOpcode() != RISCVISD::SD_RV32 &&
3405 User->getOpcode() != ISD::ATOMIC_LOAD &&
3406 User->getOpcode() != ISD::ATOMIC_STORE)
3407 return false;
3408 EVT VT = cast<MemSDNode>(Val: User)->getMemoryVT();
3409 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
3410 VT != MVT::f64)
3411 return false;
3412 // Don't allow stores of the value. It must be used as the address.
3413 if (User->getOpcode() == ISD::STORE &&
3414 cast<StoreSDNode>(Val: User)->getValue() == Add)
3415 return false;
3416 if (User->getOpcode() == ISD::ATOMIC_STORE &&
3417 cast<AtomicSDNode>(Val: User)->getVal() == Add)
3418 return false;
3419 if (User->getOpcode() == RISCVISD::SD_RV32 &&
3420 (User->getOperand(Num: 0) == Add || User->getOperand(Num: 1) == Add))
3421 return false;
3422 if (isStrongerThanMonotonic(AO: cast<MemSDNode>(Val: User)->getSuccessOrdering()))
3423 return false;
3424 }
3425
3426 return true;
3427}
3428
3429bool isRegImmLoadOrStore(SDNode *User, SDValue Add) {
3430 switch (User->getOpcode()) {
3431 default:
3432 return false;
3433 case ISD::LOAD:
3434 case RISCVISD::LD_RV32:
3435 case ISD::ATOMIC_LOAD:
3436 break;
3437 case ISD::STORE:
3438 // Don't allow stores of Add. It must only be used as the address.
3439 if (cast<StoreSDNode>(Val: User)->getValue() == Add)
3440 return false;
3441 break;
3442 case RISCVISD::SD_RV32:
3443 // Don't allow stores of Add. It must only be used as the address.
3444 if (User->getOperand(Num: 0) == Add || User->getOperand(Num: 1) == Add)
3445 return false;
3446 break;
3447 case ISD::ATOMIC_STORE:
3448 // Don't allow stores of Add. It must only be used as the address.
3449 if (cast<AtomicSDNode>(Val: User)->getVal() == Add)
3450 return false;
3451 break;
3452 }
3453
3454 return true;
3455}
3456
3457// To prevent SelectAddrRegImm from folding offsets that conflict with the
3458// fusion of PseudoMovAddr, check if the offset of every use of a given address
3459// is within the alignment.
3460bool RISCVDAGToDAGISel::areOffsetsWithinAlignment(SDValue Addr,
3461 Align Alignment) {
3462 assert(Addr->getOpcode() == RISCVISD::ADD_LO);
3463 for (auto *User : Addr->users()) {
3464 // If the user is a load or store, then the offset is 0 which is always
3465 // within alignment.
3466 if (isRegImmLoadOrStore(User, Add: Addr))
3467 continue;
3468
3469 if (CurDAG->isBaseWithConstantOffset(Op: SDValue(User, 0))) {
3470 int64_t CVal = cast<ConstantSDNode>(Val: User->getOperand(Num: 1))->getSExtValue();
3471 if (!isInt<12>(x: CVal) || Alignment <= CVal)
3472 return false;
3473
3474 // Make sure all uses are foldable load/stores.
3475 for (auto *AddUser : User->users())
3476 if (!isRegImmLoadOrStore(User: AddUser, Add: SDValue(User, 0)))
3477 return false;
3478
3479 continue;
3480 }
3481
3482 return false;
3483 }
3484
3485 return true;
3486}
3487
3488bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
3489 SDValue &Offset) {
3490 if (SelectAddrFrameIndex(Addr, Base, Offset))
3491 return true;
3492
3493 SDLoc DL(Addr);
3494 MVT VT = Addr.getSimpleValueType();
3495
3496 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
3497 bool CanFold = true;
3498 // Unconditionally fold if operand 1 is not a global address (e.g.
3499 // externsymbol)
3500 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Val: Addr.getOperand(i: 1))) {
3501 const DataLayout &DL = CurDAG->getDataLayout();
3502 Align Alignment = commonAlignment(
3503 A: GA->getGlobal()->getPointerAlignment(DL), Offset: GA->getOffset());
3504 if (!areOffsetsWithinAlignment(Addr, Alignment))
3505 CanFold = false;
3506 }
3507 if (CanFold) {
3508 Base = Addr.getOperand(i: 0);
3509 Offset = Addr.getOperand(i: 1);
3510 return true;
3511 }
3512 }
3513
3514 if (CurDAG->isBaseWithConstantOffset(Op: Addr)) {
3515 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3516 if (isInt<12>(x: CVal)) {
3517 Base = Addr.getOperand(i: 0);
3518 if (Base.getOpcode() == RISCVISD::ADD_LO) {
3519 SDValue LoOperand = Base.getOperand(i: 1);
3520 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: LoOperand)) {
3521 // If the Lo in (ADD_LO hi, lo) is a global variable's address
3522 // (its low part, really), then we can rely on the alignment of that
3523 // variable to provide a margin of safety before low part can overflow
3524 // the 12 bits of the load/store offset. Check if CVal falls within
3525 // that margin; if so (low part + CVal) can't overflow.
3526 const DataLayout &DL = CurDAG->getDataLayout();
3527 Align Alignment = commonAlignment(
3528 A: GA->getGlobal()->getPointerAlignment(DL), Offset: GA->getOffset());
3529 if ((CVal == 0 || Alignment > CVal) &&
3530 areOffsetsWithinAlignment(Addr: Base, Alignment)) {
3531 int64_t CombinedOffset = CVal + GA->getOffset();
3532 Base = Base.getOperand(i: 0);
3533 Offset = CurDAG->getTargetGlobalAddress(
3534 GV: GA->getGlobal(), DL: SDLoc(LoOperand), VT: LoOperand.getValueType(),
3535 offset: CombinedOffset, TargetFlags: GA->getTargetFlags());
3536 return true;
3537 }
3538 }
3539 }
3540
3541 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base))
3542 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT);
3543 Offset = CurDAG->getSignedTargetConstant(Val: CVal, DL, VT);
3544 return true;
3545 }
3546 }
3547
3548 // Handle ADD with large immediates.
3549 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Val: Addr.getOperand(i: 1))) {
3550 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3551 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3552
3553 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
3554 // an ADDI for part of the offset and fold the rest into the load/store.
3555 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
3556 if (CVal >= -4096 && CVal <= 4094) {
3557 int64_t Adj = CVal < 0 ? -2048 : 2047;
3558 Base = SDValue(
3559 CurDAG->getMachineNode(Opcode: RISCV::ADDI, dl: DL, VT, Op1: Addr.getOperand(i: 0),
3560 Op2: CurDAG->getSignedTargetConstant(Val: Adj, DL, VT)),
3561 0);
3562 Offset = CurDAG->getSignedTargetConstant(Val: CVal - Adj, DL, VT);
3563 return true;
3564 }
3565
3566 // For larger immediates, we might be able to save one instruction from
3567 // constant materialization by folding the Lo12 bits of the immediate into
3568 // the address. We should only do this if the ADD is only used by loads and
3569 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
3570 // separately with the full materialized immediate creating extra
3571 // instructions.
3572 if (isWorthFoldingAdd(Add: Addr) &&
3573 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr: Addr.getOperand(i: 1), Base,
3574 Offset, /*IsPrefetch=*/false)) {
3575 // Insert an ADD instruction with the materialized Hi52 bits.
3576 Base = SDValue(
3577 CurDAG->getMachineNode(Opcode: RISCV::ADD, dl: DL, VT, Op1: Addr.getOperand(i: 0), Op2: Base),
3578 0);
3579 return true;
3580 }
3581 }
3582
3583 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3584 /*IsPrefetch=*/false))
3585 return true;
3586
3587 Base = Addr;
3588 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
3589 return true;
3590}
3591
3592/// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
3593bool RISCVDAGToDAGISel::SelectAddrRegImm9(SDValue Addr, SDValue &Base,
3594 SDValue &Offset) {
3595 if (SelectAddrFrameIndex(Addr, Base, Offset))
3596 return true;
3597
3598 SDLoc DL(Addr);
3599 MVT VT = Addr.getSimpleValueType();
3600
3601 if (CurDAG->isBaseWithConstantOffset(Op: Addr)) {
3602 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3603 if (isUInt<9>(x: CVal)) {
3604 Base = Addr.getOperand(i: 0);
3605
3606 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base))
3607 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT);
3608 Offset = CurDAG->getSignedTargetConstant(Val: CVal, DL, VT);
3609 return true;
3610 }
3611 }
3612
3613 Base = Addr;
3614 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
3615 return true;
3616}
3617
3618/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
3619/// Offset should be all zeros.
3620bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
3621 SDValue &Offset) {
3622 if (SelectAddrFrameIndex(Addr, Base, Offset))
3623 return true;
3624
3625 SDLoc DL(Addr);
3626 MVT VT = Addr.getSimpleValueType();
3627
3628 if (CurDAG->isBaseWithConstantOffset(Op: Addr)) {
3629 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3630 if (isInt<12>(x: CVal)) {
3631 Base = Addr.getOperand(i: 0);
3632
3633 // Early-out if not a valid offset.
3634 if ((CVal & 0b11111) != 0) {
3635 Base = Addr;
3636 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
3637 return true;
3638 }
3639
3640 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base))
3641 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT);
3642 Offset = CurDAG->getSignedTargetConstant(Val: CVal, DL, VT);
3643 return true;
3644 }
3645 }
3646
3647 // Handle ADD with large immediates.
3648 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Val: Addr.getOperand(i: 1))) {
3649 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3650 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3651
3652 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3653 // one instruction by folding adjustment (-2048 or 2016) into the address.
3654 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3655 int64_t Adj = CVal < 0 ? -2048 : 2016;
3656 int64_t AdjustedOffset = CVal - Adj;
3657 Base =
3658 SDValue(CurDAG->getMachineNode(
3659 Opcode: RISCV::ADDI, dl: DL, VT, Op1: Addr.getOperand(i: 0),
3660 Op2: CurDAG->getSignedTargetConstant(Val: AdjustedOffset, DL, VT)),
3661 0);
3662 Offset = CurDAG->getSignedTargetConstant(Val: Adj, DL, VT);
3663 return true;
3664 }
3665
3666 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr: Addr.getOperand(i: 1), Base,
3667 Offset, /*IsPrefetch=*/true)) {
3668 // Insert an ADD instruction with the materialized Hi52 bits.
3669 Base = SDValue(
3670 CurDAG->getMachineNode(Opcode: RISCV::ADD, dl: DL, VT, Op1: Addr.getOperand(i: 0), Op2: Base),
3671 0);
3672 return true;
3673 }
3674 }
3675
3676 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3677 /*IsPrefetch=*/true))
3678 return true;
3679
3680 Base = Addr;
3681 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
3682 return true;
3683}
3684
3685/// Return true if this a load/store that we have a RegRegScale instruction for.
3686static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add,
3687 const RISCVSubtarget &Subtarget) {
3688 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3689 return false;
3690 EVT VT = cast<MemSDNode>(Val: User)->getMemoryVT();
3691 if (!(VT.isScalarInteger() &&
3692 (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3693 !((VT == MVT::f32 || VT == MVT::f64) &&
3694 Subtarget.hasVendorXTHeadFMemIdx()))
3695 return false;
3696 // Don't allow stores of the value. It must be used as the address.
3697 if (User->getOpcode() == ISD::STORE &&
3698 cast<StoreSDNode>(Val: User)->getValue() == Add)
3699 return false;
3700
3701 return true;
3702}
3703
3704/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3705/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3706/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3707/// single addi and we don't have a SHXADD instruction we could use.
3708/// FIXME: May still need to check how many and what kind of users the SHL has.
3709static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget,
3710 SDValue Add,
3711 SDValue Shift = SDValue()) {
3712 bool FoundADDI = false;
3713 for (auto *User : Add->users()) {
3714 if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3715 continue;
3716
3717 // Allow a single ADDI that is used by loads/stores if we matched a shift.
3718 if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3719 !isa<ConstantSDNode>(Val: User->getOperand(Num: 1)) ||
3720 !isInt<12>(x: cast<ConstantSDNode>(Val: User->getOperand(Num: 1))->getSExtValue()))
3721 return false;
3722
3723 FoundADDI = true;
3724
3725 // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3726 assert(Shift.getOpcode() == ISD::SHL);
3727 unsigned ShiftAmt = Shift.getConstantOperandVal(i: 1);
3728 if (Subtarget.hasShlAdd(ShAmt: ShiftAmt))
3729 return false;
3730
3731 // All users of the ADDI should be load/store.
3732 for (auto *ADDIUser : User->users())
3733 if (!isRegRegScaleLoadOrStore(User: ADDIUser, Add: SDValue(User, 0), Subtarget))
3734 return false;
3735 }
3736
3737 return true;
3738}
3739
3740bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
3741 unsigned MaxShiftAmount,
3742 SDValue &Base, SDValue &Index,
3743 SDValue &Scale) {
3744 if (Addr.getOpcode() != ISD::ADD)
3745 return false;
3746 SDValue LHS = Addr.getOperand(i: 0);
3747 SDValue RHS = Addr.getOperand(i: 1);
3748
3749 EVT VT = Addr.getSimpleValueType();
3750 auto SelectShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3751 SDValue &Shift) {
3752 if (N.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Val: N.getOperand(i: 1)))
3753 return false;
3754
3755 // Only match shifts by a value in range [0, MaxShiftAmount].
3756 unsigned ShiftAmt = N.getConstantOperandVal(i: 1);
3757 if (ShiftAmt > MaxShiftAmount)
3758 return false;
3759
3760 Index = N.getOperand(i: 0);
3761 Shift = CurDAG->getTargetConstant(Val: ShiftAmt, DL: SDLoc(N), VT);
3762 return true;
3763 };
3764
3765 if (auto *C1 = dyn_cast<ConstantSDNode>(Val&: RHS)) {
3766 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3767 if (LHS.getOpcode() == ISD::ADD &&
3768 !isa<ConstantSDNode>(Val: LHS.getOperand(i: 1)) &&
3769 isInt<12>(x: C1->getSExtValue())) {
3770 if (SelectShl(LHS.getOperand(i: 1), Index, Scale) &&
3771 isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: LHS, Shift: LHS.getOperand(i: 1))) {
3772 SDValue C1Val = CurDAG->getTargetConstant(Val: *C1->getConstantIntValue(),
3773 DL: SDLoc(Addr), VT);
3774 Base = SDValue(CurDAG->getMachineNode(Opcode: RISCV::ADDI, dl: SDLoc(Addr), VT,
3775 Op1: LHS.getOperand(i: 0), Op2: C1Val),
3776 0);
3777 return true;
3778 }
3779
3780 // Add is commutative so we need to check both operands.
3781 if (SelectShl(LHS.getOperand(i: 0), Index, Scale) &&
3782 isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: LHS, Shift: LHS.getOperand(i: 0))) {
3783 SDValue C1Val = CurDAG->getTargetConstant(Val: *C1->getConstantIntValue(),
3784 DL: SDLoc(Addr), VT);
3785 Base = SDValue(CurDAG->getMachineNode(Opcode: RISCV::ADDI, dl: SDLoc(Addr), VT,
3786 Op1: LHS.getOperand(i: 1), Op2: C1Val),
3787 0);
3788 return true;
3789 }
3790 }
3791
3792 // Don't match add with constants.
3793 // FIXME: Is this profitable for large constants that have 0s in the lower
3794 // 12 bits that we can materialize with LUI?
3795 return false;
3796 }
3797
3798 // Try to match a shift on the RHS.
3799 if (SelectShl(RHS, Index, Scale)) {
3800 if (!isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: Addr, Shift: RHS))
3801 return false;
3802 Base = LHS;
3803 return true;
3804 }
3805
3806 // Try to match a shift on the LHS.
3807 if (SelectShl(LHS, Index, Scale)) {
3808 if (!isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: Addr, Shift: LHS))
3809 return false;
3810 Base = RHS;
3811 return true;
3812 }
3813
3814 if (!isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: Addr))
3815 return false;
3816
3817 Base = LHS;
3818 Index = RHS;
3819 Scale = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Addr), VT);
3820 return true;
3821}
3822
3823bool RISCVDAGToDAGISel::SelectAddrRegZextRegScale(SDValue Addr,
3824 unsigned MaxShiftAmount,
3825 unsigned Bits, SDValue &Base,
3826 SDValue &Index,
3827 SDValue &Scale) {
3828 if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
3829 return false;
3830
3831 if (Index.getOpcode() == ISD::AND) {
3832 auto *C = dyn_cast<ConstantSDNode>(Val: Index.getOperand(i: 1));
3833 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(N: Bits)) {
3834 Index = Index.getOperand(i: 0);
3835 return true;
3836 }
3837 }
3838
3839 return false;
3840}
3841
3842bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base,
3843 SDValue &Offset) {
3844 if (Addr.getOpcode() != ISD::ADD)
3845 return false;
3846
3847 if (isa<ConstantSDNode>(Val: Addr.getOperand(i: 1)))
3848 return false;
3849
3850 Base = Addr.getOperand(i: 0);
3851 Offset = Addr.getOperand(i: 1);
3852 return true;
3853}
3854
3855bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
3856 SDValue &ShAmt) {
3857 ShAmt = N;
3858
3859 // Peek through zext.
3860 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3861 ShAmt = ShAmt.getOperand(i: 0);
3862
3863 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3864 // amount. If there is an AND on the shift amount, we can bypass it if it
3865 // doesn't affect any of those bits.
3866 if (ShAmt.getOpcode() == ISD::AND &&
3867 isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 1))) {
3868 const APInt &AndMask = ShAmt.getConstantOperandAPInt(i: 1);
3869
3870 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3871 // mask that covers the bits needed to represent all shift amounts.
3872 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3873 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3874
3875 if (ShMask.isSubsetOf(RHS: AndMask)) {
3876 ShAmt = ShAmt.getOperand(i: 0);
3877 } else {
3878 // SimplifyDemandedBits may have optimized the mask so try restoring any
3879 // bits that are known zero.
3880 KnownBits Known = CurDAG->computeKnownBits(Op: ShAmt.getOperand(i: 0));
3881 if (!ShMask.isSubsetOf(RHS: AndMask | Known.Zero))
3882 return true;
3883 ShAmt = ShAmt.getOperand(i: 0);
3884 }
3885 }
3886
3887 if (ShAmt.getOpcode() == ISD::ADD &&
3888 isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 1))) {
3889 uint64_t Imm = ShAmt.getConstantOperandVal(i: 1);
3890 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3891 // to avoid the ADD.
3892 if (Imm != 0 && Imm % ShiftWidth == 0) {
3893 ShAmt = ShAmt.getOperand(i: 0);
3894 return true;
3895 }
3896 } else if (ShAmt.getOpcode() == ISD::SUB &&
3897 isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 0))) {
3898 uint64_t Imm = ShAmt.getConstantOperandVal(i: 0);
3899 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3900 // generate a NEG instead of a SUB of a constant.
3901 if (Imm != 0 && Imm % ShiftWidth == 0) {
3902 SDLoc DL(ShAmt);
3903 EVT VT = ShAmt.getValueType();
3904 SDValue Zero = CurDAG->getRegister(Reg: RISCV::X0, VT);
3905 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3906 MachineSDNode *Neg = CurDAG->getMachineNode(Opcode: NegOpc, dl: DL, VT, Op1: Zero,
3907 Op2: ShAmt.getOperand(i: 1));
3908 ShAmt = SDValue(Neg, 0);
3909 return true;
3910 }
3911 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3912 // to generate a NOT instead of a SUB of a constant.
3913 if (Imm % ShiftWidth == ShiftWidth - 1) {
3914 SDLoc DL(ShAmt);
3915 EVT VT = ShAmt.getValueType();
3916 MachineSDNode *Not = CurDAG->getMachineNode(
3917 Opcode: RISCV::XORI, dl: DL, VT, Op1: ShAmt.getOperand(i: 1),
3918 Op2: CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/IsTarget: true));
3919 ShAmt = SDValue(Not, 0);
3920 return true;
3921 }
3922 }
3923
3924 return true;
3925}
3926
3927/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3928/// check for equality with 0. This function emits instructions that convert the
3929/// seteq/setne into something that can be compared with 0.
3930/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3931/// ISD::SETNE).
3932bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
3933 SDValue &Val) {
3934 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3935 "Unexpected condition code!");
3936
3937 // We're looking for a setcc.
3938 if (N->getOpcode() != ISD::SETCC)
3939 return false;
3940
3941 // Must be an equality comparison.
3942 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
3943 if (CCVal != ExpectedCCVal)
3944 return false;
3945
3946 SDValue LHS = N->getOperand(Num: 0);
3947 SDValue RHS = N->getOperand(Num: 1);
3948
3949 if (!LHS.getValueType().isScalarInteger())
3950 return false;
3951
3952 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3953 if (isNullConstant(V: RHS)) {
3954 Val = LHS;
3955 return true;
3956 }
3957
3958 SDLoc DL(N);
3959
3960 if (auto *C = dyn_cast<ConstantSDNode>(Val&: RHS)) {
3961 int64_t CVal = C->getSExtValue();
3962 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3963 // non-zero otherwise.
3964 if (CVal == -2048) {
3965 Val = SDValue(
3966 CurDAG->getMachineNode(
3967 Opcode: RISCV::XORI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3968 Op2: CurDAG->getSignedTargetConstant(Val: CVal, DL, VT: N->getValueType(ResNo: 0))),
3969 0);
3970 return true;
3971 }
3972 // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0
3973 // if the LHS is equal to the RHS and non-zero otherwise.
3974 if (isInt<12>(x: CVal) || CVal == 2048) {
3975 unsigned Opc = RISCV::ADDI;
3976 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3977 cast<VTSDNode>(Val: LHS.getOperand(i: 1))->getVT() == MVT::i32) {
3978 Opc = RISCV::ADDIW;
3979 LHS = LHS.getOperand(i: 0);
3980 }
3981
3982 Val = SDValue(CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3983 Op2: CurDAG->getSignedTargetConstant(
3984 Val: -CVal, DL, VT: N->getValueType(ResNo: 0))),
3985 0);
3986 return true;
3987 }
3988 if (isPowerOf2_64(Value: CVal) && Subtarget->hasStdExtZbs()) {
3989 Val = SDValue(
3990 CurDAG->getMachineNode(
3991 Opcode: RISCV::BINVI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3992 Op2: CurDAG->getTargetConstant(Val: Log2_64(Value: CVal), DL, VT: N->getValueType(ResNo: 0))),
3993 0);
3994 return true;
3995 }
3996 // Same as the addi case above but for larger immediates (signed 26-bit) use
3997 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3998 // anything which can be done with a single lui as it might be compressible.
3999 if (Subtarget->hasVendorXqcilia() && isInt<26>(x: CVal) &&
4000 (CVal & 0xFFF) != 0) {
4001 Val = SDValue(
4002 CurDAG->getMachineNode(
4003 Opcode: RISCV::QC_E_ADDI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
4004 Op2: CurDAG->getSignedTargetConstant(Val: -CVal, DL, VT: N->getValueType(ResNo: 0))),
4005 0);
4006 return true;
4007 }
4008 }
4009
4010 // If nothing else we can XOR the LHS and RHS to produce zero if they are
4011 // equal and a non-zero value if they aren't.
4012 Val = SDValue(
4013 CurDAG->getMachineNode(Opcode: RISCV::XOR, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS, Op2: RHS), 0);
4014 return true;
4015}
4016
4017bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {
4018 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4019 cast<VTSDNode>(Val: N.getOperand(i: 1))->getVT().getSizeInBits() == Bits) {
4020 Val = N.getOperand(i: 0);
4021 return true;
4022 }
4023
4024 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
4025 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(Val: N.getOperand(i: 1)))
4026 return N;
4027
4028 SDValue N0 = N.getOperand(i: 0);
4029 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
4030 N.getConstantOperandVal(i: 1) == ShiftAmt &&
4031 N0.getConstantOperandVal(i: 1) == ShiftAmt)
4032 return N0.getOperand(i: 0);
4033
4034 return N;
4035 };
4036
4037 MVT VT = N.getSimpleValueType();
4038 if (CurDAG->ComputeNumSignBits(Op: N) > (VT.getSizeInBits() - Bits)) {
4039 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
4040 return true;
4041 }
4042
4043 return false;
4044}
4045
4046bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) {
4047 if (N.getOpcode() == ISD::AND) {
4048 auto *C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
4049 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(N: Bits)) {
4050 Val = N.getOperand(i: 0);
4051 return true;
4052 }
4053 }
4054 MVT VT = N.getSimpleValueType();
4055 APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: Bits);
4056 if (CurDAG->MaskedValueIsZero(Op: N, Mask)) {
4057 Val = N;
4058 return true;
4059 }
4060
4061 return false;
4062}
4063
4064/// Look for various patterns that can be done with a SHL that can be folded
4065/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
4066/// SHXADD we are trying to match.
4067bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
4068 SDValue &Val) {
4069 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(Val: N.getOperand(i: 1))) {
4070 SDValue N0 = N.getOperand(i: 0);
4071
4072 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
4073 (LeftShift || N0.getOpcode() == ISD::SRL) &&
4074 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
4075 uint64_t Mask = N.getConstantOperandVal(i: 1);
4076 unsigned C2 = N0.getConstantOperandVal(i: 1);
4077
4078 unsigned XLen = Subtarget->getXLen();
4079 if (LeftShift)
4080 Mask &= maskTrailingZeros<uint64_t>(N: C2);
4081 else
4082 Mask &= maskTrailingOnes<uint64_t>(N: XLen - C2);
4083
4084 if (isShiftedMask_64(Value: Mask)) {
4085 unsigned Leading = XLen - llvm::bit_width(Value: Mask);
4086 unsigned Trailing = llvm::countr_zero(Val: Mask);
4087 if (Trailing != ShAmt)
4088 return false;
4089
4090 unsigned Opcode;
4091 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
4092 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
4093 // followed by a SHXADD with c3 for the X amount.
4094 if (LeftShift && Leading == 0 && C2 < Trailing)
4095 Opcode = RISCV::SRLI;
4096 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
4097 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
4098 // followed by a SHXADD with c3 for the X amount.
4099 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
4100 Opcode = RISCV::SRLIW;
4101 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
4102 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
4103 // followed by a SHXADD using c3 for the X amount.
4104 else if (!LeftShift && Leading == C2)
4105 Opcode = RISCV::SRLI;
4106 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
4107 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
4108 // followed by a SHXADD using c3 for the X amount.
4109 else if (!LeftShift && Leading == 32 + C2)
4110 Opcode = RISCV::SRLIW;
4111 else
4112 return false;
4113
4114 SDLoc DL(N);
4115 EVT VT = N.getValueType();
4116 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
4117 Val = SDValue(
4118 CurDAG->getMachineNode(Opcode, dl: DL, VT, Op1: N0.getOperand(i: 0),
4119 Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT)),
4120 0);
4121 return true;
4122 }
4123 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
4124 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
4125 uint64_t Mask = N.getConstantOperandVal(i: 1);
4126 unsigned C2 = N0.getConstantOperandVal(i: 1);
4127
4128 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
4129 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
4130 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
4131 // the X amount.
4132 if (isShiftedMask_64(Value: Mask)) {
4133 unsigned XLen = Subtarget->getXLen();
4134 unsigned Leading = XLen - llvm::bit_width(Value: Mask);
4135 unsigned Trailing = llvm::countr_zero(Val: Mask);
4136 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
4137 SDLoc DL(N);
4138 EVT VT = N.getValueType();
4139 Val = SDValue(CurDAG->getMachineNode(
4140 Opcode: RISCV::SRAI, dl: DL, VT, Op1: N0.getOperand(i: 0),
4141 Op2: CurDAG->getTargetConstant(Val: C2 - Leading, DL, VT)),
4142 0);
4143 Val = SDValue(CurDAG->getMachineNode(
4144 Opcode: RISCV::SRLI, dl: DL, VT, Op1: Val,
4145 Op2: CurDAG->getTargetConstant(Val: Leading + ShAmt, DL, VT)),
4146 0);
4147 return true;
4148 }
4149 }
4150 }
4151 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
4152 (LeftShift || N.getOpcode() == ISD::SRL) &&
4153 isa<ConstantSDNode>(Val: N.getOperand(i: 1))) {
4154 SDValue N0 = N.getOperand(i: 0);
4155 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
4156 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
4157 uint64_t Mask = N0.getConstantOperandVal(i: 1);
4158 if (isShiftedMask_64(Value: Mask)) {
4159 unsigned C1 = N.getConstantOperandVal(i: 1);
4160 unsigned XLen = Subtarget->getXLen();
4161 unsigned Leading = XLen - llvm::bit_width(Value: Mask);
4162 unsigned Trailing = llvm::countr_zero(Val: Mask);
4163 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
4164 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
4165 if (LeftShift && Leading == 32 && Trailing > 0 &&
4166 (Trailing + C1) == ShAmt) {
4167 SDLoc DL(N);
4168 EVT VT = N.getValueType();
4169 Val = SDValue(CurDAG->getMachineNode(
4170 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
4171 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT)),
4172 0);
4173 return true;
4174 }
4175 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
4176 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
4177 if (!LeftShift && Leading == 32 && Trailing > C1 &&
4178 (Trailing - C1) == ShAmt) {
4179 SDLoc DL(N);
4180 EVT VT = N.getValueType();
4181 Val = SDValue(CurDAG->getMachineNode(
4182 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
4183 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT)),
4184 0);
4185 return true;
4186 }
4187 }
4188 }
4189 }
4190
4191 return false;
4192}
4193
4194/// Look for various patterns that can be done with a SHL that can be folded
4195/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
4196/// SHXADD_UW we are trying to match.
4197bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
4198 SDValue &Val) {
4199 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(Val: N.getOperand(i: 1)) &&
4200 N.hasOneUse()) {
4201 SDValue N0 = N.getOperand(i: 0);
4202 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
4203 N0.hasOneUse()) {
4204 uint64_t Mask = N.getConstantOperandVal(i: 1);
4205 unsigned C2 = N0.getConstantOperandVal(i: 1);
4206
4207 Mask &= maskTrailingZeros<uint64_t>(N: C2);
4208
4209 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
4210 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
4211 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
4212 if (isShiftedMask_64(Value: Mask)) {
4213 unsigned Leading = llvm::countl_zero(Val: Mask);
4214 unsigned Trailing = llvm::countr_zero(Val: Mask);
4215 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
4216 SDLoc DL(N);
4217 EVT VT = N.getValueType();
4218 Val = SDValue(CurDAG->getMachineNode(
4219 Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
4220 Op2: CurDAG->getTargetConstant(Val: C2 - ShAmt, DL, VT)),
4221 0);
4222 return true;
4223 }
4224 }
4225 }
4226 }
4227
4228 return false;
4229}
4230
4231bool RISCVDAGToDAGISel::orDisjoint(const SDNode *N) const {
4232 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
4233 if (N->getFlags().hasDisjoint())
4234 return true;
4235 return CurDAG->haveNoCommonBitsSet(A: N->getOperand(Num: 0), B: N->getOperand(Num: 1));
4236}
4237
4238bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
4239 SDValue N, SDValue &Val) {
4240 int OrigCost = RISCVMatInt::getIntMatCost(Val: APInt(64, OrigImm), Size: 64, STI: *Subtarget,
4241 /*CompressionCost=*/true);
4242 int Cost = RISCVMatInt::getIntMatCost(Val: APInt(64, Imm), Size: 64, STI: *Subtarget,
4243 /*CompressionCost=*/true);
4244 if (OrigCost <= Cost)
4245 return false;
4246
4247 Val = selectImm(CurDAG, DL: SDLoc(N), VT: N->getSimpleValueType(ResNo: 0), Imm, Subtarget: *Subtarget);
4248 return true;
4249}
4250
4251bool RISCVDAGToDAGISel::selectZExtImm32(SDValue N, SDValue &Val) {
4252 if (!isa<ConstantSDNode>(Val: N))
4253 return false;
4254 int64_t Imm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
4255 if ((Imm >> 31) != 1)
4256 return false;
4257
4258 for (const SDNode *U : N->users()) {
4259 switch (U->getOpcode()) {
4260 case ISD::ADD:
4261 break;
4262 case ISD::OR:
4263 if (orDisjoint(N: U))
4264 break;
4265 return false;
4266 default:
4267 return false;
4268 }
4269 }
4270
4271 return selectImm64IfCheaper(Imm: 0xffffffff00000000 | Imm, OrigImm: Imm, N, Val);
4272}
4273
4274bool RISCVDAGToDAGISel::selectNegImm(SDValue N, SDValue &Val) {
4275 if (!isa<ConstantSDNode>(Val: N))
4276 return false;
4277 int64_t Imm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
4278 if (isInt<32>(x: Imm))
4279 return false;
4280 if (Imm == INT64_MIN)
4281 return false;
4282
4283 for (const SDNode *U : N->users()) {
4284 switch (U->getOpcode()) {
4285 case ISD::ADD:
4286 break;
4287 case RISCVISD::VMV_V_X_VL:
4288 if (!all_of(Range: U->users(), P: [](const SDNode *V) {
4289 return V->getOpcode() == ISD::ADD ||
4290 V->getOpcode() == RISCVISD::ADD_VL;
4291 }))
4292 return false;
4293 break;
4294 default:
4295 return false;
4296 }
4297 }
4298
4299 return selectImm64IfCheaper(Imm: -Imm, OrigImm: Imm, N, Val);
4300}
4301
4302bool RISCVDAGToDAGISel::selectInvLogicImm(SDValue N, SDValue &Val) {
4303 if (!isa<ConstantSDNode>(Val: N))
4304 return false;
4305 int64_t Imm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
4306
4307 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
4308 if (isInt<32>(x: Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
4309 return false;
4310
4311 // Abandon this transform if the constant is needed elsewhere.
4312 for (const SDNode *U : N->users()) {
4313 switch (U->getOpcode()) {
4314 case ISD::AND:
4315 case ISD::OR:
4316 case ISD::XOR:
4317 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
4318 return false;
4319 break;
4320 case RISCVISD::VMV_V_X_VL:
4321 if (!Subtarget->hasStdExtZvkb())
4322 return false;
4323 if (!all_of(Range: U->users(), P: [](const SDNode *V) {
4324 return V->getOpcode() == ISD::AND ||
4325 V->getOpcode() == RISCVISD::AND_VL;
4326 }))
4327 return false;
4328 break;
4329 default:
4330 return false;
4331 }
4332 }
4333
4334 if (isInt<32>(x: Imm)) {
4335 Val =
4336 selectImm(CurDAG, DL: SDLoc(N), VT: N->getSimpleValueType(ResNo: 0), Imm: ~Imm, Subtarget: *Subtarget);
4337 return true;
4338 }
4339
4340 // For 64-bit constants, the instruction sequences get complex,
4341 // so we select inverted only if it's cheaper.
4342 return selectImm64IfCheaper(Imm: ~Imm, OrigImm: Imm, N, Val);
4343}
4344
4345static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
4346 unsigned Bits,
4347 const TargetInstrInfo *TII) {
4348 unsigned MCOpcode = RISCV::getRVVMCOpcode(RVVPseudoOpcode: User->getMachineOpcode());
4349
4350 if (!MCOpcode)
4351 return false;
4352
4353 const MCInstrDesc &MCID = TII->get(Opcode: User->getMachineOpcode());
4354 const uint64_t TSFlags = MCID.TSFlags;
4355 if (!RISCVII::hasSEWOp(TSFlags))
4356 return false;
4357 assert(RISCVII::hasVLOp(TSFlags));
4358
4359 unsigned ChainOpIdx = User->getNumOperands() - 1;
4360 bool HasChainOp = User->getOperand(Num: ChainOpIdx).getValueType() == MVT::Other;
4361 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
4362 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
4363 const unsigned Log2SEW = User->getConstantOperandVal(Num: VLIdx + 1);
4364
4365 if (UserOpNo == VLIdx)
4366 return false;
4367
4368 auto NumDemandedBits =
4369 RISCV::getVectorLowDemandedScalarBits(Opcode: MCOpcode, Log2SEW);
4370 return NumDemandedBits && Bits >= *NumDemandedBits;
4371}
4372
4373// Return true if all users of this SDNode* only consume the lower \p Bits.
4374// This can be used to form W instructions for add/sub/mul/shl even when the
4375// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
4376// SimplifyDemandedBits has made it so some users see a sext_inreg and some
4377// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
4378// the add/sub/mul/shl to become non-W instructions. By checking the users we
4379// may be able to use a W instruction and CSE with the other instruction if
4380// this has happened. We could try to detect that the CSE opportunity exists
4381// before doing this, but that would be more complicated.
4382bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
4383 const unsigned Depth) const {
4384 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
4385 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
4386 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
4387 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
4388 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
4389 isa<ConstantSDNode>(Node) || Depth != 0) &&
4390 "Unexpected opcode");
4391
4392 if (Depth >= SelectionDAG::MaxRecursionDepth)
4393 return false;
4394
4395 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
4396 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
4397 if (Depth == 0 && !Node->getValueType(ResNo: 0).isScalarInteger())
4398 return false;
4399
4400 for (SDUse &Use : Node->uses()) {
4401 SDNode *User = Use.getUser();
4402 // Users of this node should have already been instruction selected
4403 if (!User->isMachineOpcode())
4404 return false;
4405
4406 // TODO: Add more opcodes?
4407 switch (User->getMachineOpcode()) {
4408 default:
4409 if (vectorPseudoHasAllNBitUsers(User, UserOpNo: Use.getOperandNo(), Bits, TII))
4410 break;
4411 return false;
4412 case RISCV::ADDW:
4413 case RISCV::ADDIW:
4414 case RISCV::SUBW:
4415 case RISCV::MULW:
4416 case RISCV::SLLW:
4417 case RISCV::SLLIW:
4418 case RISCV::SRAW:
4419 case RISCV::SRAIW:
4420 case RISCV::SRLW:
4421 case RISCV::SRLIW:
4422 case RISCV::DIVW:
4423 case RISCV::DIVUW:
4424 case RISCV::REMW:
4425 case RISCV::REMUW:
4426 case RISCV::ROLW:
4427 case RISCV::RORW:
4428 case RISCV::RORIW:
4429 case RISCV::CLSW:
4430 case RISCV::CLZW:
4431 case RISCV::CTZW:
4432 case RISCV::CPOPW:
4433 case RISCV::SLLI_UW:
4434 case RISCV::ABSW:
4435 case RISCV::FMV_W_X:
4436 case RISCV::FCVT_H_W:
4437 case RISCV::FCVT_H_W_INX:
4438 case RISCV::FCVT_H_WU:
4439 case RISCV::FCVT_H_WU_INX:
4440 case RISCV::FCVT_S_W:
4441 case RISCV::FCVT_S_W_INX:
4442 case RISCV::FCVT_S_WU:
4443 case RISCV::FCVT_S_WU_INX:
4444 case RISCV::FCVT_D_W:
4445 case RISCV::FCVT_D_W_INX:
4446 case RISCV::FCVT_D_WU:
4447 case RISCV::FCVT_D_WU_INX:
4448 case RISCV::TH_REVW:
4449 case RISCV::TH_SRRIW:
4450 if (Bits >= 32)
4451 break;
4452 return false;
4453 case RISCV::SLL:
4454 case RISCV::SRA:
4455 case RISCV::SRL:
4456 case RISCV::ROL:
4457 case RISCV::ROR:
4458 case RISCV::BSET:
4459 case RISCV::BCLR:
4460 case RISCV::BINV:
4461 // Shift amount operands only use log2(Xlen) bits.
4462 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Value: Subtarget->getXLen()))
4463 break;
4464 return false;
4465 case RISCV::SLLI:
4466 // SLLI only uses the lower (XLen - ShAmt) bits.
4467 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(Num: 1))
4468 break;
4469 return false;
4470 case RISCV::ANDI:
4471 if (Bits >= (unsigned)llvm::bit_width(Value: User->getConstantOperandVal(Num: 1)))
4472 break;
4473 goto RecCheck;
4474 case RISCV::ORI: {
4475 uint64_t Imm = cast<ConstantSDNode>(Val: User->getOperand(Num: 1))->getSExtValue();
4476 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(Value: ~Imm))
4477 break;
4478 [[fallthrough]];
4479 }
4480 case RISCV::AND:
4481 case RISCV::OR:
4482 case RISCV::XOR:
4483 case RISCV::XORI:
4484 case RISCV::ANDN:
4485 case RISCV::ORN:
4486 case RISCV::XNOR:
4487 case RISCV::SH1ADD:
4488 case RISCV::SH2ADD:
4489 case RISCV::SH3ADD:
4490 RecCheck:
4491 if (hasAllNBitUsers(Node: User, Bits, Depth: Depth + 1))
4492 break;
4493 return false;
4494 case RISCV::SRLI: {
4495 unsigned ShAmt = User->getConstantOperandVal(Num: 1);
4496 // If we are shifting right by less than Bits, and users don't demand any
4497 // bits that were shifted into [Bits-1:0], then we can consider this as an
4498 // N-Bit user.
4499 if (Bits > ShAmt && hasAllNBitUsers(Node: User, Bits: Bits - ShAmt, Depth: Depth + 1))
4500 break;
4501 return false;
4502 }
4503 case RISCV::SEXT_B:
4504 case RISCV::PACKH:
4505 if (Bits >= 8)
4506 break;
4507 return false;
4508 case RISCV::SEXT_H:
4509 case RISCV::FMV_H_X:
4510 case RISCV::ZEXT_H_RV32:
4511 case RISCV::ZEXT_H_RV64:
4512 case RISCV::PACKW:
4513 if (Bits >= 16)
4514 break;
4515 return false;
4516 case RISCV::PACK:
4517 if (Bits >= (Subtarget->getXLen() / 2))
4518 break;
4519 return false;
4520 case RISCV::PPAIRE_H:
4521 // If only the lower 32-bits of the result are used, then only the
4522 // lower 16 bits of the inputs are used.
4523 if (Bits >= 16 && hasAllNBitUsers(Node: User, Bits: 32, Depth: Depth + 1))
4524 break;
4525 return false;
4526 case RISCV::ADD_UW:
4527 case RISCV::SH1ADD_UW:
4528 case RISCV::SH2ADD_UW:
4529 case RISCV::SH3ADD_UW:
4530 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
4531 // 32 bits.
4532 if (Use.getOperandNo() == 0 && Bits >= 32)
4533 break;
4534 return false;
4535 case RISCV::SB:
4536 if (Use.getOperandNo() == 0 && Bits >= 8)
4537 break;
4538 return false;
4539 case RISCV::SH:
4540 if (Use.getOperandNo() == 0 && Bits >= 16)
4541 break;
4542 return false;
4543 case RISCV::SW:
4544 if (Use.getOperandNo() == 0 && Bits >= 32)
4545 break;
4546 return false;
4547 case RISCV::TH_EXT:
4548 case RISCV::TH_EXTU: {
4549 unsigned Msb = User->getConstantOperandVal(Num: 1);
4550 unsigned Lsb = User->getConstantOperandVal(Num: 2);
4551 // Behavior of Msb < Lsb is not well documented.
4552 if (Msb >= Lsb && Bits > Msb)
4553 break;
4554 return false;
4555 }
4556 }
4557 }
4558
4559 return true;
4560}
4561
4562// Select a constant that can be represented as (sign_extend(imm5) << imm2).
4563bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,
4564 SDValue &Shl2) {
4565 auto *C = dyn_cast<ConstantSDNode>(Val&: N);
4566 if (!C)
4567 return false;
4568
4569 int64_t Offset = C->getSExtValue();
4570 for (unsigned Shift = 0; Shift < 4; Shift++) {
4571 if (isInt<5>(x: Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
4572 EVT VT = N->getValueType(ResNo: 0);
4573 Simm5 = CurDAG->getSignedTargetConstant(Val: Offset >> Shift, DL: SDLoc(N), VT);
4574 Shl2 = CurDAG->getTargetConstant(Val: Shift, DL: SDLoc(N), VT);
4575 return true;
4576 }
4577 }
4578
4579 return false;
4580}
4581
4582// Select VL as a 5 bit immediate or a value that will become a register. This
4583// allows us to choose between VSETIVLI or VSETVLI later.
4584bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
4585 auto *C = dyn_cast<ConstantSDNode>(Val&: N);
4586 if (C && isUInt<5>(x: C->getZExtValue())) {
4587 VL = CurDAG->getTargetConstant(Val: C->getZExtValue(), DL: SDLoc(N),
4588 VT: N->getValueType(ResNo: 0));
4589 } else if (C && C->isAllOnes()) {
4590 // Treat all ones as VLMax.
4591 VL = CurDAG->getSignedTargetConstant(Val: RISCV::VLMaxSentinel, DL: SDLoc(N),
4592 VT: N->getValueType(ResNo: 0));
4593 } else if (isa<RegisterSDNode>(Val: N) &&
4594 cast<RegisterSDNode>(Val&: N)->getReg() == RISCV::X0) {
4595 // All our VL operands use an operand that allows GPRNoX0 or an immediate
4596 // as the register class. Convert X0 to a special immediate to pass the
4597 // MachineVerifier. This is recognized specially by the vsetvli insertion
4598 // pass.
4599 VL = CurDAG->getSignedTargetConstant(Val: RISCV::VLMaxSentinel, DL: SDLoc(N),
4600 VT: N->getValueType(ResNo: 0));
4601 } else {
4602 VL = N;
4603 }
4604
4605 return true;
4606}
4607
4608static SDValue findVSplat(SDValue N) {
4609 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
4610 if (!N.getOperand(i: 0).isUndef())
4611 return SDValue();
4612 N = N.getOperand(i: 1);
4613 }
4614 SDValue Splat = N;
4615 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
4616 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
4617 !Splat.getOperand(i: 0).isUndef())
4618 return SDValue();
4619 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
4620 return Splat;
4621}
4622
4623bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
4624 SDValue Splat = findVSplat(N);
4625 if (!Splat)
4626 return false;
4627
4628 SplatVal = Splat.getOperand(i: 1);
4629 return true;
4630}
4631
4632static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,
4633 SelectionDAG &DAG,
4634 const RISCVSubtarget &Subtarget,
4635 std::function<bool(int64_t)> ValidateImm,
4636 bool Decrement = false) {
4637 SDValue Splat = findVSplat(N);
4638 if (!Splat || !isa<ConstantSDNode>(Val: Splat.getOperand(i: 1)))
4639 return false;
4640
4641 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
4642 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
4643 "Unexpected splat operand type");
4644
4645 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
4646 // type is wider than the resulting vector element type: an implicit
4647 // truncation first takes place. Therefore, perform a manual
4648 // truncation/sign-extension in order to ignore any truncated bits and catch
4649 // any zero-extended immediate.
4650 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
4651 // sign-extending to (XLenVT -1).
4652 APInt SplatConst = Splat.getConstantOperandAPInt(i: 1).sextOrTrunc(width: SplatEltSize);
4653
4654 int64_t SplatImm = SplatConst.getSExtValue();
4655
4656 if (!ValidateImm(SplatImm))
4657 return false;
4658
4659 if (Decrement)
4660 SplatImm -= 1;
4661
4662 SplatVal =
4663 DAG.getSignedTargetConstant(Val: SplatImm, DL: SDLoc(N), VT: Subtarget.getXLenVT());
4664 return true;
4665}
4666
4667bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
4668 return selectVSplatImmHelper(N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4669 ValidateImm: [](int64_t Imm) { return isInt<5>(x: Imm); });
4670}
4671
4672bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
4673 return selectVSplatImmHelper(
4674 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4675 ValidateImm: [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4676 /*Decrement=*/true);
4677}
4678
4679bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal) {
4680 return selectVSplatImmHelper(
4681 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4682 ValidateImm: [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4683 /*Decrement=*/false);
4684}
4685
4686bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
4687 SDValue &SplatVal) {
4688 return selectVSplatImmHelper(
4689 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4690 ValidateImm: [](int64_t Imm) { return Imm != 0 && Imm >= -15 && Imm <= 16; },
4691 /*Decrement=*/true);
4692}
4693
4694bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
4695 SDValue &SplatVal) {
4696 return selectVSplatImmHelper(
4697 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4698 ValidateImm: [Bits](int64_t Imm) { return isUIntN(N: Bits, x: Imm); });
4699}
4700
4701bool RISCVDAGToDAGISel::selectVSplatImm64Neg(SDValue N, SDValue &SplatVal) {
4702 SDValue Splat = findVSplat(N);
4703 return Splat && selectNegImm(N: Splat.getOperand(i: 1), Val&: SplatVal);
4704}
4705
4706bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
4707 auto IsExtOrTrunc = [](SDValue N) {
4708 switch (N->getOpcode()) {
4709 case ISD::SIGN_EXTEND:
4710 case ISD::ZERO_EXTEND:
4711 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
4712 // inactive elements will be undef.
4713 case RISCVISD::TRUNCATE_VECTOR_VL:
4714 case RISCVISD::VSEXT_VL:
4715 case RISCVISD::VZEXT_VL:
4716 return true;
4717 default:
4718 return false;
4719 }
4720 };
4721
4722 // We can have multiple nested nodes, so unravel them all if needed.
4723 while (IsExtOrTrunc(N)) {
4724 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
4725 return false;
4726 N = N->getOperand(Num: 0);
4727 }
4728
4729 return selectVSplat(N, SplatVal);
4730}
4731
4732bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N, SDValue &Imm) {
4733 // Allow bitcasts from XLenVT -> FP.
4734 if (N.getOpcode() == ISD::BITCAST &&
4735 N.getOperand(i: 0).getValueType() == Subtarget->getXLenVT()) {
4736 Imm = N.getOperand(i: 0);
4737 return true;
4738 }
4739 // Allow moves from XLenVT to FP.
4740 if (N.getOpcode() == RISCVISD::FMV_H_X ||
4741 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
4742 Imm = N.getOperand(i: 0);
4743 return true;
4744 }
4745
4746 // Otherwise, look for FP constants that can materialized with scalar int.
4747 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Val: N.getNode());
4748 if (!CFP)
4749 return false;
4750 const APFloat &APF = CFP->getValueAPF();
4751 // td can handle +0.0 already.
4752 if (APF.isPosZero())
4753 return false;
4754
4755 MVT VT = CFP->getSimpleValueType(ResNo: 0);
4756
4757 MVT XLenVT = Subtarget->getXLenVT();
4758 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
4759 assert(APF.isNegZero() && "Unexpected constant.");
4760 return false;
4761 }
4762 SDLoc DL(N);
4763 Imm = selectImm(CurDAG, DL, VT: XLenVT, Imm: APF.bitcastToAPInt().getSExtValue(),
4764 Subtarget: *Subtarget);
4765 return true;
4766}
4767
4768bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
4769 SDValue &Imm) {
4770 if (auto *C = dyn_cast<ConstantSDNode>(Val&: N)) {
4771 int64_t ImmVal = SignExtend64(X: C->getSExtValue(), B: Width);
4772
4773 if (!isInt<5>(x: ImmVal))
4774 return false;
4775
4776 Imm = CurDAG->getSignedTargetConstant(Val: ImmVal, DL: SDLoc(N),
4777 VT: Subtarget->getXLenVT());
4778 return true;
4779 }
4780
4781 return false;
4782}
4783
4784// Match XOR with a VMSET_VL operand. Return the other operand.
4785bool RISCVDAGToDAGISel::selectVMNOTOp(SDValue N, SDValue &Res) {
4786 if (N.getOpcode() != ISD::XOR)
4787 return false;
4788
4789 if (N.getOperand(i: 0).getOpcode() == RISCVISD::VMSET_VL) {
4790 Res = N.getOperand(i: 1);
4791 return true;
4792 }
4793
4794 if (N.getOperand(i: 1).getOpcode() == RISCVISD::VMSET_VL) {
4795 Res = N.getOperand(i: 0);
4796 return true;
4797 }
4798
4799 return false;
4800}
4801
4802// Match VMXOR_VL with a VMSET_VL operand. Making sure that that VL operand
4803// matches the parent's VL. Return the other operand of the VMXOR_VL.
4804bool RISCVDAGToDAGISel::selectVMNOT_VLOp(SDNode *Parent, SDValue N,
4805 SDValue &Res) {
4806 if (N.getOpcode() != RISCVISD::VMXOR_VL)
4807 return false;
4808
4809 assert(Parent &&
4810 (Parent->getOpcode() == RISCVISD::VMAND_VL ||
4811 Parent->getOpcode() == RISCVISD::VMOR_VL ||
4812 Parent->getOpcode() == RISCVISD::VMXOR_VL) &&
4813 "Unexpected parent");
4814
4815 // The VL should match the parent.
4816 if (Parent->getOperand(Num: 2) != N->getOperand(Num: 2))
4817 return false;
4818
4819 if (N.getOperand(i: 0).getOpcode() == RISCVISD::VMSET_VL) {
4820 Res = N.getOperand(i: 1);
4821 return true;
4822 }
4823
4824 if (N.getOperand(i: 1).getOpcode() == RISCVISD::VMSET_VL) {
4825 Res = N.getOperand(i: 0);
4826 return true;
4827 }
4828
4829 return false;
4830}
4831
4832// Try to remove sext.w if the input is a W instruction or can be made into
4833// a W instruction cheaply.
4834bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4835 // Look for the sext.w pattern, addiw rd, rs1, 0.
4836 if (N->getMachineOpcode() != RISCV::ADDIW ||
4837 !isNullConstant(V: N->getOperand(Num: 1)))
4838 return false;
4839
4840 SDValue N0 = N->getOperand(Num: 0);
4841 if (!N0.isMachineOpcode())
4842 return false;
4843
4844 switch (N0.getMachineOpcode()) {
4845 default:
4846 break;
4847 case RISCV::ADD:
4848 case RISCV::ADDI:
4849 case RISCV::SUB:
4850 case RISCV::MUL:
4851 case RISCV::SLLI: {
4852 // Convert sext.w+add/sub/mul to their W instructions. This will create
4853 // a new independent instruction. This improves latency.
4854 unsigned Opc;
4855 switch (N0.getMachineOpcode()) {
4856 default:
4857 llvm_unreachable("Unexpected opcode!");
4858 case RISCV::ADD: Opc = RISCV::ADDW; break;
4859 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4860 case RISCV::SUB: Opc = RISCV::SUBW; break;
4861 case RISCV::MUL: Opc = RISCV::MULW; break;
4862 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4863 }
4864
4865 SDValue N00 = N0.getOperand(i: 0);
4866 SDValue N01 = N0.getOperand(i: 1);
4867
4868 // Shift amount needs to be uimm5.
4869 if (N0.getMachineOpcode() == RISCV::SLLI &&
4870 !isUInt<5>(x: cast<ConstantSDNode>(Val&: N01)->getSExtValue()))
4871 break;
4872
4873 SDNode *Result =
4874 CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VT: N->getValueType(ResNo: 0),
4875 Op1: N00, Op2: N01);
4876 ReplaceUses(F: N, T: Result);
4877 return true;
4878 }
4879 case RISCV::ADDW:
4880 case RISCV::ADDIW:
4881 case RISCV::SUBW:
4882 case RISCV::MULW:
4883 case RISCV::SLLIW:
4884 case RISCV::PACKW:
4885 case RISCV::TH_MULAW:
4886 case RISCV::TH_MULAH:
4887 case RISCV::TH_MULSW:
4888 case RISCV::TH_MULSH:
4889 if (N0.getValueType() == MVT::i32)
4890 break;
4891
4892 // Result is already sign extended just remove the sext.w.
4893 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4894 ReplaceUses(F: N, T: N0.getNode());
4895 return true;
4896 }
4897
4898 return false;
4899}
4900
4901static bool usesAllOnesMask(SDValue MaskOp) {
4902 const auto IsVMSet = [](unsigned Opc) {
4903 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4904 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4905 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4906 Opc == RISCV::PseudoVMSET_M_B8;
4907 };
4908
4909 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4910 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4911 // assume that it's all-ones? Same applies to its VL.
4912 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4913}
4914
4915static bool isImplicitDef(SDValue V) {
4916 if (!V.isMachineOpcode())
4917 return false;
4918 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4919 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4920 if (!isImplicitDef(V: V.getOperand(i: I)))
4921 return false;
4922 return true;
4923 }
4924 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4925}
4926
4927// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4928// corresponding "unmasked" pseudo versions.
4929bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4930 const RISCV::RISCVMaskedPseudoInfo *I =
4931 RISCV::getMaskedPseudoInfo(MaskedPseudo: N->getMachineOpcode());
4932 if (!I)
4933 return false;
4934
4935 unsigned MaskOpIdx = I->MaskOpIdx;
4936 if (!usesAllOnesMask(MaskOp: N->getOperand(Num: MaskOpIdx)))
4937 return false;
4938
4939 // There are two classes of pseudos in the table - compares and
4940 // everything else. See the comment on RISCVMaskedPseudo for details.
4941 const unsigned Opc = I->UnmaskedPseudo;
4942 const MCInstrDesc &MCID = TII->get(Opcode: Opc);
4943 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc: MCID);
4944
4945 const MCInstrDesc &MaskedMCID = TII->get(Opcode: N->getMachineOpcode());
4946 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc: MaskedMCID);
4947
4948 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4949 !RISCVII::hasVecPolicyOp(MCID.TSFlags)) &&
4950 "Unmasked pseudo has policy but masked pseudo doesn't?");
4951 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4952 "Unexpected pseudo structure");
4953 assert(!(HasPassthru && !MaskedHasPassthru) &&
4954 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4955
4956 SmallVector<SDValue, 8> Ops;
4957 // Skip the passthru operand at index 0 if the unmasked don't have one.
4958 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4959 bool DropPolicy = !RISCVII::hasVecPolicyOp(TSFlags: MCID.TSFlags) &&
4960 RISCVII::hasVecPolicyOp(TSFlags: MaskedMCID.TSFlags);
4961 bool HasChainOp =
4962 N->getOperand(Num: N->getNumOperands() - 1).getValueType() == MVT::Other;
4963 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4964 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4965 // Skip the mask
4966 SDValue Op = N->getOperand(Num: I);
4967 if (I == MaskOpIdx)
4968 continue;
4969 if (DropPolicy && I == LastOpNum)
4970 continue;
4971 Ops.push_back(Elt: Op);
4972 }
4973
4974 MachineSDNode *Result =
4975 CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VTs: N->getVTList(), Ops);
4976
4977 if (!N->memoperands_empty())
4978 CurDAG->setNodeMemRefs(N: Result, NewMemRefs: N->memoperands());
4979
4980 Result->setFlags(N->getFlags());
4981 ReplaceUses(F: N, T: Result);
4982
4983 return true;
4984}
4985
4986/// If our passthru is an implicit_def, use noreg instead. This side
4987/// steps issues with MachineCSE not being able to CSE expressions with
4988/// IMPLICIT_DEF operands while preserving the semantic intent. See
4989/// pr64282 for context. Note that this transform is the last one
4990/// performed at ISEL DAG to DAG.
4991bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4992 bool MadeChange = false;
4993 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4994
4995 while (Position != CurDAG->allnodes_begin()) {
4996 SDNode *N = &*--Position;
4997 if (N->use_empty() || !N->isMachineOpcode())
4998 continue;
4999
5000 const unsigned Opc = N->getMachineOpcode();
5001 if (!RISCVVPseudosTable::getPseudoInfo(Pseudo: Opc) ||
5002 !RISCVII::isFirstDefTiedToFirstUse(Desc: TII->get(Opcode: Opc)) ||
5003 !isImplicitDef(V: N->getOperand(Num: 0)))
5004 continue;
5005
5006 SmallVector<SDValue> Ops;
5007 Ops.push_back(Elt: CurDAG->getRegister(Reg: RISCV::NoRegister, VT: N->getValueType(ResNo: 0)));
5008 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
5009 SDValue Op = N->getOperand(Num: I);
5010 Ops.push_back(Elt: Op);
5011 }
5012
5013 MachineSDNode *Result =
5014 CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VTs: N->getVTList(), Ops);
5015 Result->setFlags(N->getFlags());
5016 CurDAG->setNodeMemRefs(N: Result, NewMemRefs: cast<MachineSDNode>(Val: N)->memoperands());
5017 ReplaceUses(F: N, T: Result);
5018 MadeChange = true;
5019 }
5020 return MadeChange;
5021}
5022
5023
5024// This pass converts a legalized DAG into a RISCV-specific DAG, ready
5025// for instruction scheduling.
5026FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
5027 CodeGenOptLevel OptLevel) {
5028 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
5029}
5030
5031char RISCVDAGToDAGISelLegacy::ID = 0;
5032
5033RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TM,
5034 CodeGenOptLevel OptLevel)
5035 : SelectionDAGISelLegacy(
5036 ID, std::make_unique<RISCVDAGToDAGISel>(args&: TM, args&: OptLevel)) {}
5037
5038INITIALIZE_PASS(RISCVDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
5039