1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
14#include "MCTargetDesc/RISCVBaseInfo.h"
15#include "MCTargetDesc/RISCVMCTargetDesc.h"
16#include "MCTargetDesc/RISCVMatInt.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
19#include "RISCVSelectionDAGInfo.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/SDPatternMatch.h"
22#include "llvm/IR/IntrinsicsRISCV.h"
23#include "llvm/Support/Alignment.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/MathExtras.h"
26#include "llvm/Support/raw_ostream.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "riscv-isel"
31#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
32
33static cl::opt<bool> UsePseudoMovImm(
34 "riscv-use-rematerializable-movimm", cl::Hidden,
35 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
36 "constant materialization"),
37 cl::init(Val: false));
38
39#define GET_DAGISEL_BODY RISCVDAGToDAGISel
40#include "RISCVGenDAGISel.inc"
41
42void RISCVDAGToDAGISel::PreprocessISelDAG() {
43 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
44
45 bool MadeChange = false;
46 while (Position != CurDAG->allnodes_begin()) {
47 SDNode *N = &*--Position;
48 if (N->use_empty())
49 continue;
50
51 SDValue Result;
52 switch (N->getOpcode()) {
53 case ISD::SPLAT_VECTOR: {
54 if (Subtarget->enablePExtSIMDCodeGen())
55 break;
56 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
57 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
58 MVT VT = N->getSimpleValueType(ResNo: 0);
59 unsigned Opc =
60 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
61 SDLoc DL(N);
62 SDValue VL = CurDAG->getRegister(Reg: RISCV::X0, VT: Subtarget->getXLenVT());
63 SDValue Src = N->getOperand(Num: 0);
64 if (VT.isInteger())
65 Src = CurDAG->getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget->getXLenVT(),
66 Operand: N->getOperand(Num: 0));
67 Result = CurDAG->getNode(Opcode: Opc, DL, VT, N1: CurDAG->getUNDEF(VT), N2: Src, N3: VL);
68 break;
69 }
70 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
71 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
72 // load. Done after lowering and combining so that we have a chance to
73 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
74 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
75 MVT VT = N->getSimpleValueType(ResNo: 0);
76 SDValue Passthru = N->getOperand(Num: 0);
77 SDValue Lo = N->getOperand(Num: 1);
78 SDValue Hi = N->getOperand(Num: 2);
79 SDValue VL = N->getOperand(Num: 3);
80 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
81 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
82 "Unexpected VTs!");
83 MachineFunction &MF = CurDAG->getMachineFunction();
84 SDLoc DL(N);
85
86 // Create temporary stack for each expanding node.
87 SDValue StackSlot =
88 CurDAG->CreateStackTemporary(Bytes: TypeSize::getFixed(ExactSize: 8), Alignment: Align(8));
89 int FI = cast<FrameIndexSDNode>(Val: StackSlot.getNode())->getIndex();
90 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
91
92 SDValue Chain = CurDAG->getEntryNode();
93 Lo = CurDAG->getStore(Chain, dl: DL, Val: Lo, Ptr: StackSlot, PtrInfo: MPI, Alignment: Align(8));
94
95 SDValue OffsetSlot =
96 CurDAG->getMemBasePlusOffset(Base: StackSlot, Offset: TypeSize::getFixed(ExactSize: 4), DL);
97 Hi = CurDAG->getStore(Chain, dl: DL, Val: Hi, Ptr: OffsetSlot, PtrInfo: MPI.getWithOffset(O: 4),
98 Alignment: Align(8));
99
100 Chain = CurDAG->getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, N1: Lo, N2: Hi);
101
102 SDVTList VTs = CurDAG->getVTList(VTs: {VT, MVT::Other});
103 SDValue IntID =
104 CurDAG->getTargetConstant(Val: Intrinsic::riscv_vlse, DL, VT: MVT::i64);
105 SDValue Ops[] = {Chain,
106 IntID,
107 Passthru,
108 StackSlot,
109 CurDAG->getRegister(Reg: RISCV::X0, VT: MVT::i64),
110 VL};
111
112 Result = CurDAG->getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops,
113 MemVT: MVT::i64, PtrInfo: MPI, Alignment: Align(8),
114 Flags: MachineMemOperand::MOLoad);
115 break;
116 }
117 case ISD::FP_EXTEND: {
118 // We only have vector patterns for riscv_fpextend_vl in isel.
119 SDLoc DL(N);
120 MVT VT = N->getSimpleValueType(ResNo: 0);
121 if (!VT.isVector())
122 break;
123 SDValue VLMAX = CurDAG->getRegister(Reg: RISCV::X0, VT: Subtarget->getXLenVT());
124 SDValue TrueMask = CurDAG->getNode(
125 Opcode: RISCVISD::VMSET_VL, DL, VT: VT.changeVectorElementType(EltVT: MVT::i1), Operand: VLMAX);
126 Result = CurDAG->getNode(Opcode: RISCVISD::FP_EXTEND_VL, DL, VT, N1: N->getOperand(Num: 0),
127 N2: TrueMask, N3: VLMAX);
128 break;
129 }
130 }
131
132 if (Result) {
133 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
134 LLVM_DEBUG(N->dump(CurDAG));
135 LLVM_DEBUG(dbgs() << "\nNew: ");
136 LLVM_DEBUG(Result->dump(CurDAG));
137 LLVM_DEBUG(dbgs() << "\n");
138
139 CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 0), To: Result);
140 MadeChange = true;
141 }
142 }
143
144 if (MadeChange)
145 CurDAG->RemoveDeadNodes();
146}
147
148void RISCVDAGToDAGISel::PostprocessISelDAG() {
149 HandleSDNode Dummy(CurDAG->getRoot());
150 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
151
152 bool MadeChange = false;
153 while (Position != CurDAG->allnodes_begin()) {
154 SDNode *N = &*--Position;
155 // Skip dead nodes and any non-machine opcodes.
156 if (N->use_empty() || !N->isMachineOpcode())
157 continue;
158
159 MadeChange |= doPeepholeSExtW(Node: N);
160
161 // FIXME: This is here only because the VMerge transform doesn't
162 // know how to handle masked true inputs. Once that has been moved
163 // to post-ISEL, this can be deleted as well.
164 MadeChange |= doPeepholeMaskedRVV(Node: cast<MachineSDNode>(Val: N));
165 }
166
167 CurDAG->setRoot(Dummy.getValue());
168
169 // After we're done with everything else, convert IMPLICIT_DEF
170 // passthru operands to NoRegister. This is required to workaround
171 // an optimization deficiency in MachineCSE. This really should
172 // be merged back into each of the patterns (i.e. there's no good
173 // reason not to go directly to NoReg), but is being done this way
174 // to allow easy backporting.
175 MadeChange |= doPeepholeNoRegPassThru();
176
177 if (MadeChange)
178 CurDAG->RemoveDeadNodes();
179}
180
181static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
182 RISCVMatInt::InstSeq &Seq) {
183 SDValue SrcReg = CurDAG->getRegister(Reg: RISCV::X0, VT);
184 for (const RISCVMatInt::Inst &Inst : Seq) {
185 SDValue SDImm = CurDAG->getSignedTargetConstant(Val: Inst.getImm(), DL, VT);
186 SDNode *Result = nullptr;
187 switch (Inst.getOpndKind()) {
188 case RISCVMatInt::Imm:
189 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SDImm);
190 break;
191 case RISCVMatInt::RegX0:
192 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg,
193 Op2: CurDAG->getRegister(Reg: RISCV::X0, VT));
194 break;
195 case RISCVMatInt::RegReg:
196 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg, Op2: SrcReg);
197 break;
198 case RISCVMatInt::RegImm:
199 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg, Op2: SDImm);
200 break;
201 }
202
203 // Only the first instruction has X0 as its source.
204 SrcReg = SDValue(Result, 0);
205 }
206
207 return SrcReg;
208}
209
210static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
211 int64_t Imm, const RISCVSubtarget &Subtarget) {
212 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: Imm, STI: Subtarget);
213
214 // Use a rematerializable pseudo instruction for short sequences if enabled.
215 if (Seq.size() == 2 && UsePseudoMovImm)
216 return SDValue(
217 CurDAG->getMachineNode(Opcode: RISCV::PseudoMovImm, dl: DL, VT,
218 Op1: CurDAG->getSignedTargetConstant(Val: Imm, DL, VT)),
219 0);
220
221 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
222 // worst an LUI+ADDIW. This will require an extra register, but avoids a
223 // constant pool.
224 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
225 // low and high 32 bits are the same and bit 31 and 63 are set.
226 if (Seq.size() > 3) {
227 unsigned ShiftAmt, AddOpc;
228 RISCVMatInt::InstSeq SeqLo =
229 RISCVMatInt::generateTwoRegInstSeq(Val: Imm, STI: Subtarget, ShiftAmt, AddOpc);
230 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
231 SDValue Lo = selectImmSeq(CurDAG, DL, VT, Seq&: SeqLo);
232
233 SDValue SLLI = SDValue(
234 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: Lo,
235 Op2: CurDAG->getTargetConstant(Val: ShiftAmt, DL, VT)),
236 0);
237 return SDValue(CurDAG->getMachineNode(Opcode: AddOpc, dl: DL, VT, Op1: Lo, Op2: SLLI), 0);
238 }
239 }
240
241 // Otherwise, use the original sequence.
242 return selectImmSeq(CurDAG, DL, VT, Seq);
243}
244
245void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
246 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
247 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
248 bool IsLoad, MVT *IndexVT) {
249 SDValue Chain = Node->getOperand(Num: 0);
250
251 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Base pointer.
252
253 if (IsStridedOrIndexed) {
254 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Index.
255 if (IndexVT)
256 *IndexVT = Operands.back()->getSimpleValueType(ResNo: 0);
257 }
258
259 if (IsMasked) {
260 SDValue Mask = Node->getOperand(Num: CurOp++);
261 Operands.push_back(Elt: Mask);
262 }
263 SDValue VL;
264 selectVLOp(N: Node->getOperand(Num: CurOp++), VL);
265 Operands.push_back(Elt: VL);
266
267 MVT XLenVT = Subtarget->getXLenVT();
268 SDValue SEWOp = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT);
269 Operands.push_back(Elt: SEWOp);
270
271 // At the IR layer, all the masked load intrinsics have policy operands,
272 // none of the others do. All have passthru operands. For our pseudos,
273 // all loads have policy operands.
274 if (IsLoad) {
275 uint64_t Policy = RISCVVType::MASK_AGNOSTIC;
276 if (IsMasked)
277 Policy = Node->getConstantOperandVal(Num: CurOp++);
278 SDValue PolicyOp = CurDAG->getTargetConstant(Val: Policy, DL, VT: XLenVT);
279 Operands.push_back(Elt: PolicyOp);
280 }
281
282 Operands.push_back(Elt: Chain); // Chain.
283}
284
285void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
286 bool IsStrided) {
287 SDLoc DL(Node);
288 MVT VT = Node->getSimpleValueType(ResNo: 0);
289 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
290 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
291
292 unsigned CurOp = 2;
293 SmallVector<SDValue, 8> Operands;
294
295 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
296
297 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
298 Operands, /*IsLoad=*/true);
299
300 const RISCV::VLSEGPseudo *P =
301 RISCV::getVLSEGPseudo(NF, Masked: IsMasked, Strided: IsStrided, /*FF*/ false, Log2SEW,
302 LMUL: static_cast<unsigned>(LMUL));
303 MachineSDNode *Load =
304 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped, VT2: MVT::Other, Ops: Operands);
305
306 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
307
308 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Load, 0));
309 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Load, 1));
310 CurDAG->RemoveDeadNode(N: Node);
311}
312
313void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, unsigned NF,
314 bool IsMasked) {
315 SDLoc DL(Node);
316 MVT VT = Node->getSimpleValueType(ResNo: 0);
317 MVT XLenVT = Subtarget->getXLenVT();
318 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
319 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
320
321 unsigned CurOp = 2;
322 SmallVector<SDValue, 7> Operands;
323
324 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
325
326 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
327 /*IsStridedOrIndexed*/ false, Operands,
328 /*IsLoad=*/true);
329
330 const RISCV::VLSEGPseudo *P =
331 RISCV::getVLSEGPseudo(NF, Masked: IsMasked, /*Strided*/ false, /*FF*/ true,
332 Log2SEW, LMUL: static_cast<unsigned>(LMUL));
333 MachineSDNode *Load = CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped,
334 VT2: XLenVT, VT3: MVT::Other, Ops: Operands);
335
336 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
337
338 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Load, 0)); // Result
339 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Load, 1)); // VL
340 ReplaceUses(F: SDValue(Node, 2), T: SDValue(Load, 2)); // Chain
341 CurDAG->RemoveDeadNode(N: Node);
342}
343
344void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
345 bool IsOrdered) {
346 SDLoc DL(Node);
347 MVT VT = Node->getSimpleValueType(ResNo: 0);
348 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
349 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
350
351 unsigned CurOp = 2;
352 SmallVector<SDValue, 8> Operands;
353
354 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
355
356 MVT IndexVT;
357 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
358 /*IsStridedOrIndexed*/ true, Operands,
359 /*IsLoad=*/true, IndexVT: &IndexVT);
360
361#ifndef NDEBUG
362 // Number of element = RVVBitsPerBlock * LMUL / SEW
363 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
364 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
365 if (DecodedLMUL.second)
366 ContainedTyNumElts /= DecodedLMUL.first;
367 else
368 ContainedTyNumElts *= DecodedLMUL.first;
369 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
370 "Element count mismatch");
371#endif
372
373 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
374 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
375 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
376 reportFatalUsageError(reason: "The V extension does not support EEW=64 for index "
377 "values when XLEN=32");
378 }
379 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
380 NF, Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL),
381 IndexLMUL: static_cast<unsigned>(IndexLMUL));
382 MachineSDNode *Load =
383 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped, VT2: MVT::Other, Ops: Operands);
384
385 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
386
387 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Load, 0));
388 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Load, 1));
389 CurDAG->RemoveDeadNode(N: Node);
390}
391
392void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
393 bool IsStrided) {
394 SDLoc DL(Node);
395 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
396 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
397 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
398
399 unsigned CurOp = 2;
400 SmallVector<SDValue, 8> Operands;
401
402 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
403
404 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
405 Operands);
406
407 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
408 NF, Masked: IsMasked, Strided: IsStrided, Log2SEW, LMUL: static_cast<unsigned>(LMUL));
409 MachineSDNode *Store =
410 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT: Node->getValueType(ResNo: 0), Ops: Operands);
411
412 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
413
414 ReplaceNode(F: Node, T: Store);
415}
416
417void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
418 bool IsOrdered) {
419 SDLoc DL(Node);
420 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
421 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
422 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
423
424 unsigned CurOp = 2;
425 SmallVector<SDValue, 8> Operands;
426
427 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
428
429 MVT IndexVT;
430 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
431 /*IsStridedOrIndexed*/ true, Operands,
432 /*IsLoad=*/false, IndexVT: &IndexVT);
433
434#ifndef NDEBUG
435 // Number of element = RVVBitsPerBlock * LMUL / SEW
436 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
437 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
438 if (DecodedLMUL.second)
439 ContainedTyNumElts /= DecodedLMUL.first;
440 else
441 ContainedTyNumElts *= DecodedLMUL.first;
442 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
443 "Element count mismatch");
444#endif
445
446 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
447 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
448 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449 reportFatalUsageError(reason: "The V extension does not support EEW=64 for index "
450 "values when XLEN=32");
451 }
452 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
453 NF, Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL),
454 IndexLMUL: static_cast<unsigned>(IndexLMUL));
455 MachineSDNode *Store =
456 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT: Node->getValueType(ResNo: 0), Ops: Operands);
457
458 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
459
460 ReplaceNode(F: Node, T: Store);
461}
462
463void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
464 if (!Subtarget->hasVInstructions())
465 return;
466
467 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
468
469 SDLoc DL(Node);
470 MVT XLenVT = Subtarget->getXLenVT();
471
472 unsigned IntNo = Node->getConstantOperandVal(Num: 0);
473
474 assert((IntNo == Intrinsic::riscv_vsetvli ||
475 IntNo == Intrinsic::riscv_vsetvlimax) &&
476 "Unexpected vsetvli intrinsic");
477
478 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
479 unsigned Offset = (VLMax ? 1 : 2);
480
481 assert(Node->getNumOperands() == Offset + 2 &&
482 "Unexpected number of operands");
483
484 unsigned SEW =
485 RISCVVType::decodeVSEW(VSEW: Node->getConstantOperandVal(Num: Offset) & 0x7);
486 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
487 Node->getConstantOperandVal(Num: Offset + 1) & 0x7);
488
489 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMUL: VLMul, SEW, /*TailAgnostic*/ true,
490 /*MaskAgnostic*/ true);
491 SDValue VTypeIOp = CurDAG->getTargetConstant(Val: VTypeI, DL, VT: XLenVT);
492
493 SDValue VLOperand;
494 unsigned Opcode = RISCV::PseudoVSETVLI;
495 if (auto *C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1))) {
496 if (auto VLEN = Subtarget->getRealVLen())
497 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
498 VLMax = true;
499 }
500 if (VLMax || isAllOnesConstant(V: Node->getOperand(Num: 1))) {
501 VLOperand = CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT);
502 Opcode = RISCV::PseudoVSETVLIX0;
503 } else {
504 VLOperand = Node->getOperand(Num: 1);
505
506 if (auto *C = dyn_cast<ConstantSDNode>(Val&: VLOperand)) {
507 uint64_t AVL = C->getZExtValue();
508 if (isUInt<5>(x: AVL)) {
509 SDValue VLImm = CurDAG->getTargetConstant(Val: AVL, DL, VT: XLenVT);
510 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: RISCV::PseudoVSETIVLI, dl: DL,
511 VT: XLenVT, Op1: VLImm, Op2: VTypeIOp));
512 return;
513 }
514 }
515 }
516
517 ReplaceNode(F: Node,
518 T: CurDAG->getMachineNode(Opcode, dl: DL, VT: XLenVT, Op1: VLOperand, Op2: VTypeIOp));
519}
520
521void RISCVDAGToDAGISel::selectXSfmmVSET(SDNode *Node) {
522 if (!Subtarget->hasVendorXSfmmbase())
523 return;
524
525 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
526
527 SDLoc DL(Node);
528 MVT XLenVT = Subtarget->getXLenVT();
529
530 unsigned IntNo = Node->getConstantOperandVal(Num: 0);
531
532 assert((IntNo == Intrinsic::riscv_sf_vsettnt ||
533 IntNo == Intrinsic::riscv_sf_vsettm ||
534 IntNo == Intrinsic::riscv_sf_vsettk) &&
535 "Unexpected XSfmm vset intrinsic");
536
537 unsigned SEW = RISCVVType::decodeVSEW(VSEW: Node->getConstantOperandVal(Num: 2));
538 unsigned Widen = RISCVVType::decodeTWiden(TWiden: Node->getConstantOperandVal(Num: 3));
539 unsigned PseudoOpCode =
540 IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT
541 : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM
542 : RISCV::PseudoSF_VSETTK;
543
544 if (IntNo == Intrinsic::riscv_sf_vsettnt) {
545 unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, AltFmt: 0);
546 SDValue VTypeIOp = CurDAG->getTargetConstant(Val: VTypeI, DL, VT: XLenVT);
547
548 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: PseudoOpCode, dl: DL, VT: XLenVT,
549 Op1: Node->getOperand(Num: 1), Op2: VTypeIOp));
550 } else {
551 SDValue Log2SEW = CurDAG->getTargetConstant(Val: Log2_32(Value: SEW), DL, VT: XLenVT);
552 SDValue TWiden = CurDAG->getTargetConstant(Val: Widen, DL, VT: XLenVT);
553 ReplaceNode(F: Node,
554 T: CurDAG->getMachineNode(Opcode: PseudoOpCode, dl: DL, VT: XLenVT,
555 Op1: Node->getOperand(Num: 1), Op2: Log2SEW, Op3: TWiden));
556 }
557}
558
559bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
560 MVT VT = Node->getSimpleValueType(ResNo: 0);
561 unsigned Opcode = Node->getOpcode();
562 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
563 "Unexpected opcode");
564 SDLoc DL(Node);
565
566 // For operations of the form (x << C1) op C2, check if we can use
567 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
568 SDValue N0 = Node->getOperand(Num: 0);
569 SDValue N1 = Node->getOperand(Num: 1);
570
571 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Val&: N1);
572 if (!Cst)
573 return false;
574
575 int64_t Val = Cst->getSExtValue();
576
577 // Check if immediate can already use ANDI/ORI/XORI.
578 if (isInt<12>(x: Val))
579 return false;
580
581 SDValue Shift = N0;
582
583 // If Val is simm32 and we have a sext_inreg from i32, then the binop
584 // produces at least 33 sign bits. We can peek through the sext_inreg and use
585 // a SLLIW at the end.
586 bool SignExt = false;
587 if (isInt<32>(x: Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
588 N0.hasOneUse() && cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT() == MVT::i32) {
589 SignExt = true;
590 Shift = N0.getOperand(i: 0);
591 }
592
593 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
594 return false;
595
596 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Val: Shift.getOperand(i: 1));
597 if (!ShlCst)
598 return false;
599
600 uint64_t ShAmt = ShlCst->getZExtValue();
601
602 // Make sure that we don't change the operation by removing bits.
603 // This only matters for OR and XOR, AND is unaffected.
604 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(N: ShAmt);
605 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
606 return false;
607
608 int64_t ShiftedVal = Val >> ShAmt;
609 if (!isInt<12>(x: ShiftedVal))
610 return false;
611
612 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
613 if (SignExt && ShAmt >= 32)
614 return false;
615
616 // Ok, we can reorder to get a smaller immediate.
617 unsigned BinOpc;
618 switch (Opcode) {
619 default: llvm_unreachable("Unexpected opcode");
620 case ISD::AND: BinOpc = RISCV::ANDI; break;
621 case ISD::OR: BinOpc = RISCV::ORI; break;
622 case ISD::XOR: BinOpc = RISCV::XORI; break;
623 }
624
625 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
626
627 SDNode *BinOp = CurDAG->getMachineNode(
628 Opcode: BinOpc, dl: DL, VT, Op1: Shift.getOperand(i: 0),
629 Op2: CurDAG->getSignedTargetConstant(Val: ShiftedVal, DL, VT));
630 SDNode *SLLI =
631 CurDAG->getMachineNode(Opcode: ShOpc, dl: DL, VT, Op1: SDValue(BinOp, 0),
632 Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT));
633 ReplaceNode(F: Node, T: SLLI);
634 return true;
635}
636
637bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
638 unsigned Opc;
639
640 if (Subtarget->hasVendorXTHeadBb())
641 Opc = RISCV::TH_EXT;
642 else if (Subtarget->hasVendorXAndesPerf())
643 Opc = RISCV::NDS_BFOS;
644 else if (Subtarget->hasVendorXqcibm())
645 Opc = RISCV::QC_EXT;
646 else
647 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
648 return false;
649
650 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
651 if (!N1C)
652 return false;
653
654 SDValue N0 = Node->getOperand(Num: 0);
655 if (!N0.hasOneUse())
656 return false;
657
658 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
659 const SDLoc &DL, MVT VT) {
660 if (Opc == RISCV::QC_EXT) {
661 // QC.EXT X, width, shamt
662 // shamt is the same as Lsb
663 // width is the number of bits to extract from the Lsb
664 Msb = Msb - Lsb + 1;
665 }
666 return CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: N0.getOperand(i: 0),
667 Op2: CurDAG->getTargetConstant(Val: Msb, DL, VT),
668 Op3: CurDAG->getTargetConstant(Val: Lsb, DL, VT));
669 };
670
671 SDLoc DL(Node);
672 MVT VT = Node->getSimpleValueType(ResNo: 0);
673 const unsigned RightShAmt = N1C->getZExtValue();
674
675 // Transform (sra (shl X, C1) C2) with C1 < C2
676 // -> (SignedBitfieldExtract X, msb, lsb)
677 if (N0.getOpcode() == ISD::SHL) {
678 auto *N01C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
679 if (!N01C)
680 return false;
681
682 const unsigned LeftShAmt = N01C->getZExtValue();
683 // Make sure that this is a bitfield extraction (i.e., the shift-right
684 // amount can not be less than the left-shift).
685 if (LeftShAmt > RightShAmt)
686 return false;
687
688 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
689 const unsigned Msb = MsbPlusOne - 1;
690 const unsigned Lsb = RightShAmt - LeftShAmt;
691
692 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
693 ReplaceNode(F: Node, T: Sbe);
694 return true;
695 }
696
697 // Transform (sra (sext_inreg X, _), C) ->
698 // (SignedBitfieldExtract X, msb, lsb)
699 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
700 unsigned ExtSize =
701 cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT().getSizeInBits();
702
703 // ExtSize of 32 should use sraiw via tablegen pattern.
704 if (ExtSize == 32)
705 return false;
706
707 const unsigned Msb = ExtSize - 1;
708 // If the shift-right amount is greater than Msb, it means that extracts
709 // the X[Msb] bit and sign-extend it.
710 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
711
712 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
713 ReplaceNode(F: Node, T: Sbe);
714 return true;
715 }
716
717 return false;
718}
719
720bool RISCVDAGToDAGISel::trySignedBitfieldInsertInSign(SDNode *Node) {
721 // Only supported with XAndesPerf at the moment.
722 if (!Subtarget->hasVendorXAndesPerf())
723 return false;
724
725 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
726 if (!N1C)
727 return false;
728
729 SDValue N0 = Node->getOperand(Num: 0);
730 if (!N0.hasOneUse())
731 return false;
732
733 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
734 const SDLoc &DL, MVT VT) {
735 unsigned Opc = RISCV::NDS_BFOS;
736 // If the Lsb is equal to the Msb, then the Lsb should be 0.
737 if (Lsb == Msb)
738 Lsb = 0;
739 return CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: N0.getOperand(i: 0),
740 Op2: CurDAG->getTargetConstant(Val: Lsb, DL, VT),
741 Op3: CurDAG->getTargetConstant(Val: Msb, DL, VT));
742 };
743
744 SDLoc DL(Node);
745 MVT VT = Node->getSimpleValueType(ResNo: 0);
746 const unsigned RightShAmt = N1C->getZExtValue();
747
748 // Transform (sra (shl X, C1) C2) with C1 > C2
749 // -> (NDS.BFOS X, lsb, msb)
750 if (N0.getOpcode() == ISD::SHL) {
751 auto *N01C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
752 if (!N01C)
753 return false;
754
755 const unsigned LeftShAmt = N01C->getZExtValue();
756 // Make sure that this is a bitfield insertion (i.e., the shift-right
757 // amount should be less than the left-shift).
758 if (LeftShAmt <= RightShAmt)
759 return false;
760
761 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
762 const unsigned Msb = MsbPlusOne - 1;
763 const unsigned Lsb = LeftShAmt - RightShAmt;
764
765 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
766 ReplaceNode(F: Node, T: Sbi);
767 return true;
768 }
769
770 return false;
771}
772
773bool RISCVDAGToDAGISel::tryUnsignedBitfieldExtract(SDNode *Node,
774 const SDLoc &DL, MVT VT,
775 SDValue X, unsigned Msb,
776 unsigned Lsb) {
777 unsigned Opc;
778
779 if (Subtarget->hasVendorXTHeadBb()) {
780 Opc = RISCV::TH_EXTU;
781 } else if (Subtarget->hasVendorXAndesPerf()) {
782 Opc = RISCV::NDS_BFOZ;
783 } else if (Subtarget->hasVendorXqcibm()) {
784 Opc = RISCV::QC_EXTU;
785 // QC.EXTU X, width, shamt
786 // shamt is the same as Lsb
787 // width is the number of bits to extract from the Lsb
788 Msb = Msb - Lsb + 1;
789 } else {
790 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
791 return false;
792 }
793
794 SDNode *Ube = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: X,
795 Op2: CurDAG->getTargetConstant(Val: Msb, DL, VT),
796 Op3: CurDAG->getTargetConstant(Val: Lsb, DL, VT));
797 ReplaceNode(F: Node, T: Ube);
798 return true;
799}
800
801bool RISCVDAGToDAGISel::tryUnsignedBitfieldInsertInZero(SDNode *Node,
802 const SDLoc &DL, MVT VT,
803 SDValue X, unsigned Msb,
804 unsigned Lsb) {
805 // Only supported with XAndesPerf at the moment.
806 if (!Subtarget->hasVendorXAndesPerf())
807 return false;
808
809 unsigned Opc = RISCV::NDS_BFOZ;
810
811 // If the Lsb is equal to the Msb, then the Lsb should be 0.
812 if (Lsb == Msb)
813 Lsb = 0;
814 SDNode *Ubi = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: X,
815 Op2: CurDAG->getTargetConstant(Val: Lsb, DL, VT),
816 Op3: CurDAG->getTargetConstant(Val: Msb, DL, VT));
817 ReplaceNode(F: Node, T: Ubi);
818 return true;
819}
820
821bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
822 // Target does not support indexed loads.
823 if (!Subtarget->hasVendorXTHeadMemIdx())
824 return false;
825
826 LoadSDNode *Ld = cast<LoadSDNode>(Val: Node);
827 ISD::MemIndexedMode AM = Ld->getAddressingMode();
828 if (AM == ISD::UNINDEXED)
829 return false;
830
831 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Ld->getOffset());
832 if (!C)
833 return false;
834
835 EVT LoadVT = Ld->getMemoryVT();
836 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
837 "Unexpected addressing mode");
838 bool IsPre = AM == ISD::PRE_INC;
839 bool IsPost = AM == ISD::POST_INC;
840 int64_t Offset = C->getSExtValue();
841
842 // The constants that can be encoded in the THeadMemIdx instructions
843 // are of the form (sign_extend(imm5) << imm2).
844 unsigned Shift;
845 for (Shift = 0; Shift < 4; Shift++)
846 if (isInt<5>(x: Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
847 break;
848
849 // Constant cannot be encoded.
850 if (Shift == 4)
851 return false;
852
853 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
854 unsigned Opcode;
855 if (LoadVT == MVT::i8 && IsPre)
856 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
857 else if (LoadVT == MVT::i8 && IsPost)
858 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
859 else if (LoadVT == MVT::i16 && IsPre)
860 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
861 else if (LoadVT == MVT::i16 && IsPost)
862 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
863 else if (LoadVT == MVT::i32 && IsPre)
864 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
865 else if (LoadVT == MVT::i32 && IsPost)
866 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
867 else if (LoadVT == MVT::i64 && IsPre)
868 Opcode = RISCV::TH_LDIB;
869 else if (LoadVT == MVT::i64 && IsPost)
870 Opcode = RISCV::TH_LDIA;
871 else
872 return false;
873
874 EVT Ty = Ld->getOffset().getValueType();
875 SDValue Ops[] = {
876 Ld->getBasePtr(),
877 CurDAG->getSignedTargetConstant(Val: Offset >> Shift, DL: SDLoc(Node), VT: Ty),
878 CurDAG->getTargetConstant(Val: Shift, DL: SDLoc(Node), VT: Ty), Ld->getChain()};
879 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(Node), VT1: Ld->getValueType(ResNo: 0),
880 VT2: Ld->getValueType(ResNo: 1), VT3: MVT::Other, Ops);
881
882 MachineMemOperand *MemOp = cast<MemSDNode>(Val: Node)->getMemOperand();
883 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: New), NewMemRefs: {MemOp});
884
885 ReplaceNode(F: Node, T: New);
886
887 return true;
888}
889
890static SDValue buildGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, MVT VT,
891 SDValue Lo, SDValue Hi) {
892 SDValue Ops[] = {
893 CurDAG->getTargetConstant(Val: RISCV::GPRPairRegClassID, DL, VT: MVT::i32), Lo,
894 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_even, DL, VT: MVT::i32), Hi,
895 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_odd, DL, VT: MVT::i32)};
896
897 return SDValue(
898 CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl: DL, VT, Ops), 0);
899}
900
901// Helper to extract Lo and Hi values from a GPR pair.
902static std::pair<SDValue, SDValue>
903extractGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, SDValue Pair) {
904 SDValue Lo =
905 CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_even, DL, VT: MVT::i32, Operand: Pair);
906 SDValue Hi =
907 CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_odd, DL, VT: MVT::i32, Operand: Pair);
908 return {Lo, Hi};
909}
910
911// Try to match WMACC pattern: ADDD where one operand pair comes from a
912// widening multiply (both results of UMUL_LOHI, SMUL_LOHI, or WMULSU).
913bool RISCVDAGToDAGISel::tryWideningMulAcc(SDNode *Node, const SDLoc &DL) {
914 assert(Node->getOpcode() == RISCVISD::ADDD && "Expected ADDD");
915
916 SDValue Op0Lo = Node->getOperand(Num: 0);
917 SDValue Op0Hi = Node->getOperand(Num: 1);
918 SDValue Op1Lo = Node->getOperand(Num: 2);
919 SDValue Op1Hi = Node->getOperand(Num: 3);
920
921 auto IsSupportedMulWithOneUse = [](SDValue Lo, SDValue Hi) {
922 unsigned Opc = Lo.getOpcode();
923 if (Opc != ISD::UMUL_LOHI && Opc != ISD::SMUL_LOHI &&
924 Opc != RISCVISD::WMULSU)
925 return false;
926 return Lo.getNode() == Hi.getNode() && Lo.getResNo() == 0 &&
927 Hi.getResNo() == 1 && Lo.hasOneUse() && Hi.hasOneUse();
928 };
929
930 SDNode *MulNode = nullptr;
931 SDValue AddLo, AddHi;
932
933 // Check if first operand pair is a supported multiply with single use.
934 if (IsSupportedMulWithOneUse(Op0Lo, Op0Hi)) {
935 MulNode = Op0Lo.getNode();
936 AddLo = Op1Lo;
937 AddHi = Op1Hi;
938 }
939 // ADDD is commutative. Check if second operand pair is a supported multiply
940 // with single use.
941 else if (IsSupportedMulWithOneUse(Op1Lo, Op1Hi)) {
942 MulNode = Op1Lo.getNode();
943 AddLo = Op0Lo;
944 AddHi = Op0Hi;
945 } else {
946 return false;
947 }
948
949 unsigned Opc;
950 switch (MulNode->getOpcode()) {
951 default:
952 llvm_unreachable("Unexpected multiply opcode");
953 case ISD::UMUL_LOHI:
954 Opc = RISCV::WMACCU;
955 break;
956 case ISD::SMUL_LOHI:
957 Opc = RISCV::WMACC;
958 break;
959 case RISCVISD::WMULSU:
960 Opc = RISCV::WMACCSU;
961 break;
962 }
963
964 SDValue Acc = buildGPRPair(CurDAG, DL, VT: MVT::Untyped, Lo: AddLo, Hi: AddHi);
965
966 // WMACC instruction format: rd, rs1, rs2 (rd is accumulator).
967 SDValue M0 = MulNode->getOperand(Num: 0);
968 SDValue M1 = MulNode->getOperand(Num: 1);
969 MachineSDNode *New =
970 CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Op1: Acc, Op2: M0, Op3: M1);
971
972 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, Pair: SDValue(New, 0));
973 ReplaceUses(F: SDValue(Node, 0), T: Lo);
974 ReplaceUses(F: SDValue(Node, 1), T: Hi);
975 CurDAG->RemoveDeadNode(N: Node);
976 return true;
977}
978
979static Register getTileReg(uint64_t TileNum) {
980 assert(TileNum <= 15 && "Invalid tile number");
981 return RISCV::T0 + TileNum;
982}
983
984void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) {
985 if (!Subtarget->hasVInstructions())
986 return;
987
988 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
989
990 SDLoc DL(Node);
991 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
992
993 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
994 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
995 "Unexpected vsetvli intrinsic");
996
997 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
998 unsigned Log2SEW = Log2_32(Value: Node->getConstantOperandVal(Num: 6));
999 SDValue SEWOp =
1000 CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: Subtarget->getXLenVT());
1001 SmallVector<SDValue, 8> Operands = {Node->getOperand(Num: 2), Node->getOperand(Num: 3),
1002 Node->getOperand(Num: 4), Node->getOperand(Num: 5),
1003 Node->getOperand(Num: 8), SEWOp,
1004 Node->getOperand(Num: 0)};
1005
1006 unsigned Opcode;
1007 auto *LMulSDNode = cast<ConstantSDNode>(Val: Node->getOperand(Num: 7));
1008 switch (LMulSDNode->getSExtValue()) {
1009 case 5:
1010 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
1011 : RISCV::PseudoSF_VC_I_SE_MF8;
1012 break;
1013 case 6:
1014 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
1015 : RISCV::PseudoSF_VC_I_SE_MF4;
1016 break;
1017 case 7:
1018 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
1019 : RISCV::PseudoSF_VC_I_SE_MF2;
1020 break;
1021 case 0:
1022 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
1023 : RISCV::PseudoSF_VC_I_SE_M1;
1024 break;
1025 case 1:
1026 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
1027 : RISCV::PseudoSF_VC_I_SE_M2;
1028 break;
1029 case 2:
1030 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
1031 : RISCV::PseudoSF_VC_I_SE_M4;
1032 break;
1033 case 3:
1034 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
1035 : RISCV::PseudoSF_VC_I_SE_M8;
1036 break;
1037 }
1038
1039 ReplaceNode(F: Node, T: CurDAG->getMachineNode(
1040 Opcode, dl: DL, VT: Node->getSimpleValueType(ResNo: 0), Ops: Operands));
1041}
1042
1043static unsigned getSegInstNF(unsigned Intrinsic) {
1044#define INST_NF_CASE(NAME, NF) \
1045 case Intrinsic::riscv_##NAME##NF: \
1046 return NF;
1047#define INST_NF_CASE_MASK(NAME, NF) \
1048 case Intrinsic::riscv_##NAME##NF##_mask: \
1049 return NF;
1050#define INST_NF_CASE_FF(NAME, NF) \
1051 case Intrinsic::riscv_##NAME##NF##ff: \
1052 return NF;
1053#define INST_NF_CASE_FF_MASK(NAME, NF) \
1054 case Intrinsic::riscv_##NAME##NF##ff_mask: \
1055 return NF;
1056#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
1057 MACRO_NAME(NAME, 2) \
1058 MACRO_NAME(NAME, 3) \
1059 MACRO_NAME(NAME, 4) \
1060 MACRO_NAME(NAME, 5) \
1061 MACRO_NAME(NAME, 6) \
1062 MACRO_NAME(NAME, 7) \
1063 MACRO_NAME(NAME, 8)
1064#define INST_ALL_NF_CASE(NAME) \
1065 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
1066 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
1067#define INST_ALL_NF_CASE_WITH_FF(NAME) \
1068 INST_ALL_NF_CASE(NAME) \
1069 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
1070 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
1071 switch (Intrinsic) {
1072 default:
1073 llvm_unreachable("Unexpected segment load/store intrinsic");
1074 INST_ALL_NF_CASE_WITH_FF(vlseg)
1075 INST_ALL_NF_CASE(vlsseg)
1076 INST_ALL_NF_CASE(vloxseg)
1077 INST_ALL_NF_CASE(vluxseg)
1078 INST_ALL_NF_CASE(vsseg)
1079 INST_ALL_NF_CASE(vssseg)
1080 INST_ALL_NF_CASE(vsoxseg)
1081 INST_ALL_NF_CASE(vsuxseg)
1082 }
1083}
1084
1085static bool isApplicableToPLI(int Val) {
1086 // Check if the immediate is packed i8 or i10
1087 int16_t Bit31To16 = Val >> 16;
1088 int16_t Bit15To0 = Val;
1089 int8_t Bit15To8 = Bit15To0 >> 8;
1090 int8_t Bit7To0 = Val;
1091 if (Bit31To16 != Bit15To0)
1092 return false;
1093
1094 return isInt<10>(x: Bit31To16) || Bit15To8 == Bit7To0;
1095}
1096
1097void RISCVDAGToDAGISel::Select(SDNode *Node) {
1098 // If we have a custom node, we have already selected.
1099 if (Node->isMachineOpcode()) {
1100 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
1101 Node->setNodeId(-1);
1102 return;
1103 }
1104
1105 // Instruction Selection not handled by the auto-generated tablegen selection
1106 // should be handled here.
1107 unsigned Opcode = Node->getOpcode();
1108 MVT XLenVT = Subtarget->getXLenVT();
1109 SDLoc DL(Node);
1110 MVT VT = Node->getSimpleValueType(ResNo: 0);
1111
1112 bool HasBitTest = Subtarget->hasBEXTILike();
1113
1114 switch (Opcode) {
1115 case ISD::Constant: {
1116 assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
1117 auto *ConstNode = cast<ConstantSDNode>(Val: Node);
1118 if (ConstNode->isZero()) {
1119 SDValue New =
1120 CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: RISCV::X0, VT);
1121 ReplaceNode(F: Node, T: New.getNode());
1122 return;
1123 }
1124 int64_t Imm = ConstNode->getSExtValue();
1125 // If only the lower 8 bits are used, try to convert this to a simm6 by
1126 // sign-extending bit 7. This is neutral without the C extension, and
1127 // allows C.LI to be used if C is present.
1128 if (!isInt<8>(x: Imm) && isUInt<8>(x: Imm) && isInt<6>(x: SignExtend64<8>(x: Imm)) &&
1129 hasAllBUsers(Node))
1130 Imm = SignExtend64<8>(x: Imm);
1131 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1132 // by sign extending bit 15.
1133 else if (!isInt<16>(x: Imm) && isUInt<16>(x: Imm) &&
1134 isInt<12>(x: SignExtend64<16>(x: Imm)) && hasAllHUsers(Node))
1135 Imm = SignExtend64<16>(x: Imm);
1136 // If the upper 32-bits are not used try to convert this into a simm32 by
1137 // sign extending bit 32.
1138 else if (!isInt<32>(x: Imm) && isUInt<32>(x: Imm) && hasAllWUsers(Node))
1139 Imm = SignExtend64<32>(x: Imm);
1140
1141 if (VT == MVT::i64 && Subtarget->hasStdExtP() && isApplicableToPLI(Val: Imm) &&
1142 hasAllWUsers(Node)) {
1143 // If it's 4 packed 8-bit integers or 2 packed signed 16-bit integers, we
1144 // can simply copy lower 32 bits to higher 32 bits to make it able to
1145 // rematerialize to PLI_B or PLI_H
1146 Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
1147 }
1148
1149 ReplaceNode(F: Node, T: selectImm(CurDAG, DL, VT, Imm, Subtarget: *Subtarget).getNode());
1150 return;
1151 }
1152 case ISD::ConstantFP: {
1153 const APFloat &APF = cast<ConstantFPSDNode>(Val: Node)->getValueAPF();
1154
1155 bool Is64Bit = Subtarget->is64Bit();
1156 bool HasZdinx = Subtarget->hasStdExtZdinx();
1157
1158 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1159 SDValue Imm;
1160 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1161 // create an integer immediate.
1162 if (APF.isPosZero() || NegZeroF64) {
1163 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1164 Imm = CurDAG->getRegister(Reg: RISCV::X0_Pair, VT: MVT::f64);
1165 else
1166 Imm = CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT);
1167 } else {
1168 Imm = selectImm(CurDAG, DL, VT: XLenVT, Imm: APF.bitcastToAPInt().getSExtValue(),
1169 Subtarget: *Subtarget);
1170 }
1171
1172 unsigned Opc;
1173 switch (VT.SimpleTy) {
1174 default:
1175 llvm_unreachable("Unexpected size");
1176 case MVT::bf16:
1177 assert(Subtarget->hasStdExtZfbfmin());
1178 Opc = RISCV::FMV_H_X;
1179 break;
1180 case MVT::f16:
1181 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1182 break;
1183 case MVT::f32:
1184 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1185 break;
1186 case MVT::f64:
1187 // For RV32, we can't move from a GPR, we need to convert instead. This
1188 // should only happen for +0.0 and -0.0.
1189 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1190 if (HasZdinx)
1191 Opc = RISCV::COPY;
1192 else
1193 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1194 break;
1195 }
1196
1197 SDNode *Res;
1198 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1199 Res =
1200 CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_16, DL, VT, Operand: Imm).getNode();
1201 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1202 Res =
1203 CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_32, DL, VT, Operand: Imm).getNode();
1204 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1205 Res = CurDAG->getMachineNode(
1206 Opcode: Opc, dl: DL, VT, Op1: Imm,
1207 Op2: CurDAG->getTargetConstant(Val: RISCVFPRndMode::RNE, DL, VT: XLenVT));
1208 else
1209 Res = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: Imm);
1210
1211 // For f64 -0.0, we need to insert a fneg.d idiom.
1212 if (NegZeroF64) {
1213 Opc = RISCV::FSGNJN_D;
1214 if (HasZdinx)
1215 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1216 Res =
1217 CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: SDValue(Res, 0), Op2: SDValue(Res, 0));
1218 }
1219
1220 ReplaceNode(F: Node, T: Res);
1221 return;
1222 }
1223 case RISCVISD::BuildGPRPair:
1224 case RISCVISD::BuildPairF64: {
1225 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1226 break;
1227
1228 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
1229 "BuildPairF64 only handled here on rv32i_zdinx");
1230
1231 SDValue N =
1232 buildGPRPair(CurDAG, DL, VT, Lo: Node->getOperand(Num: 0), Hi: Node->getOperand(Num: 1));
1233 ReplaceNode(F: Node, T: N.getNode());
1234 return;
1235 }
1236 case RISCVISD::SplitGPRPair:
1237 case RISCVISD::SplitF64: {
1238 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1239 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
1240 "SplitF64 only handled here on rv32i_zdinx");
1241
1242 if (!SDValue(Node, 0).use_empty()) {
1243 SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_even, DL,
1244 VT: Node->getValueType(ResNo: 0),
1245 Operand: Node->getOperand(Num: 0));
1246 ReplaceUses(F: SDValue(Node, 0), T: Lo);
1247 }
1248
1249 if (!SDValue(Node, 1).use_empty()) {
1250 SDValue Hi = CurDAG->getTargetExtractSubreg(
1251 SRIdx: RISCV::sub_gpr_odd, DL, VT: Node->getValueType(ResNo: 1), Operand: Node->getOperand(Num: 0));
1252 ReplaceUses(F: SDValue(Node, 1), T: Hi);
1253 }
1254
1255 CurDAG->RemoveDeadNode(N: Node);
1256 return;
1257 }
1258
1259 assert(Opcode != RISCVISD::SplitGPRPair &&
1260 "SplitGPRPair should already be handled");
1261
1262 if (!Subtarget->hasStdExtZfa())
1263 break;
1264 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1265 "Unexpected subtarget");
1266
1267 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1268 if (!SDValue(Node, 0).use_empty()) {
1269 SDNode *Lo = CurDAG->getMachineNode(Opcode: RISCV::FMV_X_W_FPR64, dl: DL, VT,
1270 Op1: Node->getOperand(Num: 0));
1271 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Lo, 0));
1272 }
1273 if (!SDValue(Node, 1).use_empty()) {
1274 SDNode *Hi = CurDAG->getMachineNode(Opcode: RISCV::FMVH_X_D, dl: DL, VT,
1275 Op1: Node->getOperand(Num: 0));
1276 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Hi, 0));
1277 }
1278
1279 CurDAG->RemoveDeadNode(N: Node);
1280 return;
1281 }
1282 case ISD::SHL: {
1283 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1284 if (!N1C)
1285 break;
1286 SDValue N0 = Node->getOperand(Num: 0);
1287 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1288 !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)))
1289 break;
1290 unsigned ShAmt = N1C->getZExtValue();
1291 uint64_t Mask = N0.getConstantOperandVal(i: 1);
1292
1293 if (isShiftedMask_64(Value: Mask)) {
1294 unsigned XLen = Subtarget->getXLen();
1295 unsigned LeadingZeros = XLen - llvm::bit_width(Value: Mask);
1296 unsigned TrailingZeros = llvm::countr_zero(Val: Mask);
1297 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1298 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1299 // where C2 has 32 leading zeros and C3 trailing zeros.
1300 SDNode *SRLIW = CurDAG->getMachineNode(
1301 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
1302 Op2: CurDAG->getTargetConstant(Val: TrailingZeros, DL, VT));
1303 SDNode *SLLI = CurDAG->getMachineNode(
1304 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1305 Op2: CurDAG->getTargetConstant(Val: TrailingZeros + ShAmt, DL, VT));
1306 ReplaceNode(F: Node, T: SLLI);
1307 return;
1308 }
1309 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1310 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1311 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1312 // where C2 has C4 leading zeros and no trailing zeros.
1313 // This is profitable if the "and" was to be lowered to
1314 // (srli (slli X, C4), C4) and not (andi X, C2).
1315 // For "LeadingZeros == 32":
1316 // - with Zba it's just (slli.uw X, C)
1317 // - without Zba a tablegen pattern applies the very same
1318 // transform as we would have done here
1319 SDNode *SLLI = CurDAG->getMachineNode(
1320 Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1321 Op2: CurDAG->getTargetConstant(Val: LeadingZeros, DL, VT));
1322 SDNode *SRLI = CurDAG->getMachineNode(
1323 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1324 Op2: CurDAG->getTargetConstant(Val: LeadingZeros - ShAmt, DL, VT));
1325 ReplaceNode(F: Node, T: SRLI);
1326 return;
1327 }
1328 }
1329 break;
1330 }
1331 case ISD::SRL: {
1332 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1333 if (!N1C)
1334 break;
1335 SDValue N0 = Node->getOperand(Num: 0);
1336 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)))
1337 break;
1338 unsigned ShAmt = N1C->getZExtValue();
1339 uint64_t Mask = N0.getConstantOperandVal(i: 1);
1340
1341 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1342 // 32 leading zeros and C3 trailing zeros.
1343 if (isShiftedMask_64(Value: Mask) && N0.hasOneUse()) {
1344 unsigned XLen = Subtarget->getXLen();
1345 unsigned LeadingZeros = XLen - llvm::bit_width(Value: Mask);
1346 unsigned TrailingZeros = llvm::countr_zero(Val: Mask);
1347 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1348 SDNode *SRLIW = CurDAG->getMachineNode(
1349 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
1350 Op2: CurDAG->getTargetConstant(Val: TrailingZeros, DL, VT));
1351 SDNode *SLLI = CurDAG->getMachineNode(
1352 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1353 Op2: CurDAG->getTargetConstant(Val: TrailingZeros - ShAmt, DL, VT));
1354 ReplaceNode(F: Node, T: SLLI);
1355 return;
1356 }
1357 }
1358
1359 // Optimize (srl (and X, C2), C) ->
1360 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1361 // Where C2 is a mask with C3 trailing ones.
1362 // Taking into account that the C2 may have had lower bits unset by
1363 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1364 // This pattern occurs when type legalizing right shifts for types with
1365 // less than XLen bits.
1366 Mask |= maskTrailingOnes<uint64_t>(N: ShAmt);
1367 if (!isMask_64(Value: Mask))
1368 break;
1369 unsigned TrailingOnes = llvm::countr_one(Value: Mask);
1370 if (ShAmt >= TrailingOnes)
1371 break;
1372 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1373 if (TrailingOnes == 32) {
1374 SDNode *SRLI = CurDAG->getMachineNode(
1375 Opcode: Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, dl: DL, VT,
1376 Op1: N0.getOperand(i: 0), Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT));
1377 ReplaceNode(F: Node, T: SRLI);
1378 return;
1379 }
1380
1381 // Only do the remaining transforms if the AND has one use.
1382 if (!N0.hasOneUse())
1383 break;
1384
1385 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1386 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1387 SDNode *BEXTI = CurDAG->getMachineNode(
1388 Opcode: Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, dl: DL, VT,
1389 Op1: N0.getOperand(i: 0), Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT));
1390 ReplaceNode(F: Node, T: BEXTI);
1391 return;
1392 }
1393
1394 const unsigned Msb = TrailingOnes - 1;
1395 const unsigned Lsb = ShAmt;
1396 if (tryUnsignedBitfieldExtract(Node, DL, VT, X: N0.getOperand(i: 0), Msb, Lsb))
1397 return;
1398
1399 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1400 SDNode *SLLI =
1401 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1402 Op2: CurDAG->getTargetConstant(Val: LShAmt, DL, VT));
1403 SDNode *SRLI = CurDAG->getMachineNode(
1404 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1405 Op2: CurDAG->getTargetConstant(Val: LShAmt + ShAmt, DL, VT));
1406 ReplaceNode(F: Node, T: SRLI);
1407 return;
1408 }
1409 case ISD::SRA: {
1410 if (trySignedBitfieldExtract(Node))
1411 return;
1412
1413 if (trySignedBitfieldInsertInSign(Node))
1414 return;
1415
1416 // Optimize (sra (sext_inreg X, i16), C) ->
1417 // (srai (slli X, (XLen-16), (XLen-16) + C)
1418 // And (sra (sext_inreg X, i8), C) ->
1419 // (srai (slli X, (XLen-8), (XLen-8) + C)
1420 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1421 // This transform matches the code we get without Zbb. The shifts are more
1422 // compressible, and this can help expose CSE opportunities in the sdiv by
1423 // constant optimization.
1424 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1425 if (!N1C)
1426 break;
1427 SDValue N0 = Node->getOperand(Num: 0);
1428 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1429 break;
1430 unsigned ShAmt = N1C->getZExtValue();
1431 unsigned ExtSize =
1432 cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT().getSizeInBits();
1433 // ExtSize of 32 should use sraiw via tablegen pattern.
1434 if (ExtSize >= 32 || ShAmt >= ExtSize)
1435 break;
1436 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1437 SDNode *SLLI =
1438 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1439 Op2: CurDAG->getTargetConstant(Val: LShAmt, DL, VT));
1440 SDNode *SRAI = CurDAG->getMachineNode(
1441 Opcode: RISCV::SRAI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1442 Op2: CurDAG->getTargetConstant(Val: LShAmt + ShAmt, DL, VT));
1443 ReplaceNode(F: Node, T: SRAI);
1444 return;
1445 }
1446 case ISD::OR: {
1447 if (tryShrinkShlLogicImm(Node))
1448 return;
1449
1450 break;
1451 }
1452 case ISD::XOR:
1453 if (tryShrinkShlLogicImm(Node))
1454 return;
1455
1456 break;
1457 case ISD::AND: {
1458 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1459 if (!N1C)
1460 break;
1461
1462 SDValue N0 = Node->getOperand(Num: 0);
1463
1464 bool LeftShift = N0.getOpcode() == ISD::SHL;
1465 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1466 auto *C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
1467 if (!C)
1468 break;
1469 unsigned C2 = C->getZExtValue();
1470 unsigned XLen = Subtarget->getXLen();
1471 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1472
1473 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1474 // shift pair might offer more compression opportunities.
1475 // TODO: We could check for C extension here, but we don't have many lit
1476 // tests with the C extension enabled so not checking gets better
1477 // coverage.
1478 // TODO: What if ANDI faster than shift?
1479 bool IsCANDI = isInt<6>(x: N1C->getSExtValue());
1480
1481 uint64_t C1 = N1C->getZExtValue();
1482
1483 // Clear irrelevant bits in the mask.
1484 if (LeftShift)
1485 C1 &= maskTrailingZeros<uint64_t>(N: C2);
1486 else
1487 C1 &= maskTrailingOnes<uint64_t>(N: XLen - C2);
1488
1489 // Some transforms should only be done if the shift has a single use or
1490 // the AND would become (srli (slli X, 32), 32)
1491 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1492
1493 SDValue X = N0.getOperand(i: 0);
1494
1495 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1496 // with c3 leading zeros.
1497 if (!LeftShift && isMask_64(Value: C1)) {
1498 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1499 if (C2 < Leading) {
1500 // If the number of leading zeros is C2+32 this can be SRLIW.
1501 if (C2 + 32 == Leading) {
1502 SDNode *SRLIW = CurDAG->getMachineNode(
1503 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X, Op2: CurDAG->getTargetConstant(Val: C2, DL, VT));
1504 ReplaceNode(F: Node, T: SRLIW);
1505 return;
1506 }
1507
1508 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1509 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1510 //
1511 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1512 // legalized and goes through DAG combine.
1513 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1514 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1515 cast<VTSDNode>(Val: X.getOperand(i: 1))->getVT() == MVT::i32) {
1516 SDNode *SRAIW =
1517 CurDAG->getMachineNode(Opcode: RISCV::SRAIW, dl: DL, VT, Op1: X.getOperand(i: 0),
1518 Op2: CurDAG->getTargetConstant(Val: 31, DL, VT));
1519 SDNode *SRLIW = CurDAG->getMachineNode(
1520 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: SDValue(SRAIW, 0),
1521 Op2: CurDAG->getTargetConstant(Val: Leading - 32, DL, VT));
1522 ReplaceNode(F: Node, T: SRLIW);
1523 return;
1524 }
1525
1526 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1527 // available.
1528 // Transform (and (srl x, C2), C1)
1529 // -> (<bfextract> x, msb, lsb)
1530 //
1531 // Make sure to keep this below the SRLIW cases, as we always want to
1532 // prefer the more common instruction.
1533 const unsigned Msb = llvm::bit_width(Value: C1) + C2 - 1;
1534 const unsigned Lsb = C2;
1535 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1536 return;
1537
1538 // (srli (slli x, c3-c2), c3).
1539 // Skip if we could use (zext.w (sraiw X, C2)).
1540 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1541 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1542 cast<VTSDNode>(Val: X.getOperand(i: 1))->getVT() == MVT::i32;
1543 // Also Skip if we can use bexti or th.tst.
1544 Skip |= HasBitTest && Leading == XLen - 1;
1545 if (OneUseOrZExtW && !Skip) {
1546 SDNode *SLLI = CurDAG->getMachineNode(
1547 Opcode: RISCV::SLLI, dl: DL, VT, Op1: X,
1548 Op2: CurDAG->getTargetConstant(Val: Leading - C2, DL, VT));
1549 SDNode *SRLI = CurDAG->getMachineNode(
1550 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1551 Op2: CurDAG->getTargetConstant(Val: Leading, DL, VT));
1552 ReplaceNode(F: Node, T: SRLI);
1553 return;
1554 }
1555 }
1556 }
1557
1558 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1559 // shifted by c2 bits with c3 leading zeros.
1560 if (LeftShift && isShiftedMask_64(Value: C1)) {
1561 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1562
1563 if (C2 + Leading < XLen &&
1564 C1 == (maskTrailingOnes<uint64_t>(N: XLen - (C2 + Leading)) << C2)) {
1565 // Use slli.uw when possible.
1566 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1567 SDNode *SLLI_UW =
1568 CurDAG->getMachineNode(Opcode: RISCV::SLLI_UW, dl: DL, VT, Op1: X,
1569 Op2: CurDAG->getTargetConstant(Val: C2, DL, VT));
1570 ReplaceNode(F: Node, T: SLLI_UW);
1571 return;
1572 }
1573
1574 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1575 // available.
1576 // Transform (and (shl x, c2), c1)
1577 // -> (<bfinsert> x, msb, lsb)
1578 // e.g.
1579 // (and (shl x, 12), 0x00fff000)
1580 // If XLen = 32 and C2 = 12, then
1581 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1582 const unsigned Msb = XLen - Leading - 1;
1583 const unsigned Lsb = C2;
1584 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1585 return;
1586
1587 if (OneUseOrZExtW && !IsCANDI) {
1588 // (packh x0, X)
1589 if (Subtarget->hasStdExtZbkb() && C1 == 0xff00 && C2 == 8) {
1590 SDNode *PACKH = CurDAG->getMachineNode(
1591 Opcode: RISCV::PACKH, dl: DL, VT,
1592 Op1: CurDAG->getRegister(Reg: RISCV::X0, VT: Subtarget->getXLenVT()), Op2: X);
1593 ReplaceNode(F: Node, T: PACKH);
1594 return;
1595 }
1596 // (srli (slli c2+c3), c3)
1597 SDNode *SLLI = CurDAG->getMachineNode(
1598 Opcode: RISCV::SLLI, dl: DL, VT, Op1: X,
1599 Op2: CurDAG->getTargetConstant(Val: C2 + Leading, DL, VT));
1600 SDNode *SRLI = CurDAG->getMachineNode(
1601 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1602 Op2: CurDAG->getTargetConstant(Val: Leading, DL, VT));
1603 ReplaceNode(F: Node, T: SRLI);
1604 return;
1605 }
1606 }
1607 }
1608
1609 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1610 // shifted mask with c2 leading zeros and c3 trailing zeros.
1611 if (!LeftShift && isShiftedMask_64(Value: C1)) {
1612 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1613 unsigned Trailing = llvm::countr_zero(Val: C1);
1614 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1615 !IsCANDI) {
1616 unsigned SrliOpc = RISCV::SRLI;
1617 // If the input is zexti32 we should use SRLIW.
1618 if (X.getOpcode() == ISD::AND &&
1619 isa<ConstantSDNode>(Val: X.getOperand(i: 1)) &&
1620 X.getConstantOperandVal(i: 1) == UINT64_C(0xFFFFFFFF)) {
1621 SrliOpc = RISCV::SRLIW;
1622 X = X.getOperand(i: 0);
1623 }
1624 SDNode *SRLI = CurDAG->getMachineNode(
1625 Opcode: SrliOpc, dl: DL, VT, Op1: X,
1626 Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT));
1627 SDNode *SLLI = CurDAG->getMachineNode(
1628 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLI, 0),
1629 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1630 ReplaceNode(F: Node, T: SLLI);
1631 return;
1632 }
1633 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1634 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1635 OneUseOrZExtW && !IsCANDI) {
1636 SDNode *SRLIW = CurDAG->getMachineNode(
1637 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X,
1638 Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT));
1639 SDNode *SLLI = CurDAG->getMachineNode(
1640 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1641 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1642 ReplaceNode(F: Node, T: SLLI);
1643 return;
1644 }
1645 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1646 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1647 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1648 SDNode *SRLI = CurDAG->getMachineNode(
1649 Opcode: RISCV::SRLI, dl: DL, VT, Op1: X,
1650 Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT));
1651 SDNode *SLLI_UW = CurDAG->getMachineNode(
1652 Opcode: RISCV::SLLI_UW, dl: DL, VT, Op1: SDValue(SRLI, 0),
1653 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1654 ReplaceNode(F: Node, T: SLLI_UW);
1655 return;
1656 }
1657 }
1658
1659 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1660 // shifted mask with no leading zeros and c3 trailing zeros.
1661 if (LeftShift && isShiftedMask_64(Value: C1)) {
1662 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1663 unsigned Trailing = llvm::countr_zero(Val: C1);
1664 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1665 SDNode *SRLI = CurDAG->getMachineNode(
1666 Opcode: RISCV::SRLI, dl: DL, VT, Op1: X,
1667 Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT));
1668 SDNode *SLLI = CurDAG->getMachineNode(
1669 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLI, 0),
1670 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1671 ReplaceNode(F: Node, T: SLLI);
1672 return;
1673 }
1674 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1675 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1676 SDNode *SRLIW = CurDAG->getMachineNode(
1677 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X,
1678 Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT));
1679 SDNode *SLLI = CurDAG->getMachineNode(
1680 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1681 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1682 ReplaceNode(F: Node, T: SLLI);
1683 return;
1684 }
1685
1686 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1687 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1688 Subtarget->hasStdExtZba()) {
1689 SDNode *SRLI = CurDAG->getMachineNode(
1690 Opcode: RISCV::SRLI, dl: DL, VT, Op1: X,
1691 Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT));
1692 SDNode *SLLI_UW = CurDAG->getMachineNode(
1693 Opcode: RISCV::SLLI_UW, dl: DL, VT, Op1: SDValue(SRLI, 0),
1694 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1695 ReplaceNode(F: Node, T: SLLI_UW);
1696 return;
1697 }
1698 }
1699 }
1700
1701 const uint64_t C1 = N1C->getZExtValue();
1702
1703 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
1704 N0.hasOneUse()) {
1705 unsigned C2 = N0.getConstantOperandVal(i: 1);
1706 unsigned XLen = Subtarget->getXLen();
1707 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1708
1709 SDValue X = N0.getOperand(i: 0);
1710
1711 // Prefer SRAIW + ANDI when possible.
1712 bool Skip = C2 > 32 && isInt<12>(x: N1C->getSExtValue()) &&
1713 X.getOpcode() == ISD::SHL &&
1714 isa<ConstantSDNode>(Val: X.getOperand(i: 1)) &&
1715 X.getConstantOperandVal(i: 1) == 32;
1716 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1717 // mask with c3 leading zeros and c2 is larger than c3.
1718 if (isMask_64(Value: C1) && !Skip) {
1719 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1720 if (C2 > Leading) {
1721 SDNode *SRAI = CurDAG->getMachineNode(
1722 Opcode: RISCV::SRAI, dl: DL, VT, Op1: X,
1723 Op2: CurDAG->getTargetConstant(Val: C2 - Leading, DL, VT));
1724 SDNode *SRLI = CurDAG->getMachineNode(
1725 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SRAI, 0),
1726 Op2: CurDAG->getTargetConstant(Val: Leading, DL, VT));
1727 ReplaceNode(F: Node, T: SRLI);
1728 return;
1729 }
1730 }
1731
1732 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1733 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1734 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1735 if (isShiftedMask_64(Value: C1) && !Skip) {
1736 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1737 unsigned Trailing = llvm::countr_zero(Val: C1);
1738 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1739 SDNode *SRAI = CurDAG->getMachineNode(
1740 Opcode: RISCV::SRAI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1741 Op2: CurDAG->getTargetConstant(Val: C2 - Leading, DL, VT));
1742 SDNode *SRLI = CurDAG->getMachineNode(
1743 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SRAI, 0),
1744 Op2: CurDAG->getTargetConstant(Val: Leading + Trailing, DL, VT));
1745 SDNode *SLLI = CurDAG->getMachineNode(
1746 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLI, 0),
1747 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1748 ReplaceNode(F: Node, T: SLLI);
1749 return;
1750 }
1751 }
1752 }
1753
1754 // If C1 masks off the upper bits only (but can't be formed as an
1755 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1756 // available.
1757 // Transform (and x, C1)
1758 // -> (<bfextract> x, msb, lsb)
1759 if (isMask_64(Value: C1) && !isInt<12>(x: N1C->getSExtValue()) &&
1760 !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
1761 !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
1762 const unsigned Msb = llvm::bit_width(Value: C1) - 1;
1763 if (tryUnsignedBitfieldExtract(Node, DL, VT, X: N0, Msb, Lsb: 0))
1764 return;
1765 }
1766
1767 if (tryShrinkShlLogicImm(Node))
1768 return;
1769
1770 break;
1771 }
1772 case ISD::MUL: {
1773 // Special case for calculating (mul (and X, C2), C1) where the full product
1774 // fits in XLen bits. We can shift X left by the number of leading zeros in
1775 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1776 // product has XLen trailing zeros, putting it in the output of MULHU. This
1777 // can avoid materializing a constant in a register for C2.
1778
1779 // RHS should be a constant.
1780 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1781 if (!N1C || !N1C->hasOneUse())
1782 break;
1783
1784 // LHS should be an AND with constant.
1785 SDValue N0 = Node->getOperand(Num: 0);
1786 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)))
1787 break;
1788
1789 uint64_t C2 = N0.getConstantOperandVal(i: 1);
1790
1791 // Constant should be a mask.
1792 if (!isMask_64(Value: C2))
1793 break;
1794
1795 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1796 // multiple users or the constant is a simm12. This prevents inserting a
1797 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1798 // make it more costly to materialize. Otherwise, using a SLLI might allow
1799 // it to be compressed.
1800 bool IsANDIOrZExt =
1801 isInt<12>(x: C2) ||
1802 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1803 // With XTHeadBb, we can use TH.EXTU.
1804 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1805 if (IsANDIOrZExt && (isInt<12>(x: N1C->getSExtValue()) || !N0.hasOneUse()))
1806 break;
1807 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1808 // the constant is a simm32.
1809 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1810 // With XTHeadBb, we can use TH.EXTU.
1811 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1812 if (IsZExtW && (isInt<32>(x: N1C->getSExtValue()) || !N0.hasOneUse()))
1813 break;
1814
1815 // We need to shift left the AND input and C1 by a total of XLen bits.
1816
1817 // How far left do we need to shift the AND input?
1818 unsigned XLen = Subtarget->getXLen();
1819 unsigned LeadingZeros = XLen - llvm::bit_width(Value: C2);
1820
1821 // The constant gets shifted by the remaining amount unless that would
1822 // shift bits out.
1823 uint64_t C1 = N1C->getZExtValue();
1824 unsigned ConstantShift = XLen - LeadingZeros;
1825 if (ConstantShift > (XLen - llvm::bit_width(Value: C1)))
1826 break;
1827
1828 uint64_t ShiftedC1 = C1 << ConstantShift;
1829 // If this RV32, we need to sign extend the constant.
1830 if (XLen == 32)
1831 ShiftedC1 = SignExtend64<32>(x: ShiftedC1);
1832
1833 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1834 SDNode *Imm = selectImm(CurDAG, DL, VT, Imm: ShiftedC1, Subtarget: *Subtarget).getNode();
1835 SDNode *SLLI =
1836 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1837 Op2: CurDAG->getTargetConstant(Val: LeadingZeros, DL, VT));
1838 SDNode *MULHU = CurDAG->getMachineNode(Opcode: RISCV::MULHU, dl: DL, VT,
1839 Op1: SDValue(SLLI, 0), Op2: SDValue(Imm, 0));
1840 ReplaceNode(F: Node, T: MULHU);
1841 return;
1842 }
1843 case ISD::SMUL_LOHI:
1844 case ISD::UMUL_LOHI:
1845 case RISCVISD::WMULSU: {
1846 // Custom select (S/U)MUL_LOHI to WMUL(U) for RV32P.
1847 assert(Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::i32 &&
1848 "Unexpected opcode");
1849
1850 unsigned Opc;
1851 switch (Node->getOpcode()) {
1852 default:
1853 llvm_unreachable("Unexpected opcode");
1854 case ISD::SMUL_LOHI:
1855 Opc = RISCV::WMUL;
1856 break;
1857 case ISD::UMUL_LOHI:
1858 Opc = RISCV::WMULU;
1859 break;
1860 case RISCVISD::WMULSU:
1861 Opc = RISCV::WMULSU;
1862 break;
1863 }
1864
1865 SDNode *WMUL = CurDAG->getMachineNode(
1866 Opcode: Opc, dl: DL, VT: MVT::Untyped, Op1: Node->getOperand(Num: 0), Op2: Node->getOperand(Num: 1));
1867
1868 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, Pair: SDValue(WMUL, 0));
1869 ReplaceUses(F: SDValue(Node, 0), T: Lo);
1870 ReplaceUses(F: SDValue(Node, 1), T: Hi);
1871 CurDAG->RemoveDeadNode(N: Node);
1872 return;
1873 }
1874 case ISD::LOAD: {
1875 if (tryIndexedLoad(Node))
1876 return;
1877
1878 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1879 // We match post-incrementing load here
1880 LoadSDNode *Load = cast<LoadSDNode>(Val: Node);
1881 if (Load->getAddressingMode() != ISD::POST_INC)
1882 break;
1883
1884 SDValue Chain = Node->getOperand(Num: 0);
1885 SDValue Base = Node->getOperand(Num: 1);
1886 SDValue Offset = Node->getOperand(Num: 2);
1887
1888 bool Simm12 = false;
1889 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1890
1891 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Val&: Offset)) {
1892 int ConstantVal = ConstantOffset->getSExtValue();
1893 Simm12 = isInt<12>(x: ConstantVal);
1894 if (Simm12)
1895 Offset = CurDAG->getTargetConstant(Val: ConstantVal, DL: SDLoc(Offset),
1896 VT: Offset.getValueType());
1897 }
1898
1899 unsigned Opcode = 0;
1900 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1901 case MVT::i8:
1902 if (Simm12 && SignExtend)
1903 Opcode = RISCV::CV_LB_ri_inc;
1904 else if (Simm12 && !SignExtend)
1905 Opcode = RISCV::CV_LBU_ri_inc;
1906 else if (!Simm12 && SignExtend)
1907 Opcode = RISCV::CV_LB_rr_inc;
1908 else
1909 Opcode = RISCV::CV_LBU_rr_inc;
1910 break;
1911 case MVT::i16:
1912 if (Simm12 && SignExtend)
1913 Opcode = RISCV::CV_LH_ri_inc;
1914 else if (Simm12 && !SignExtend)
1915 Opcode = RISCV::CV_LHU_ri_inc;
1916 else if (!Simm12 && SignExtend)
1917 Opcode = RISCV::CV_LH_rr_inc;
1918 else
1919 Opcode = RISCV::CV_LHU_rr_inc;
1920 break;
1921 case MVT::i32:
1922 if (Simm12)
1923 Opcode = RISCV::CV_LW_ri_inc;
1924 else
1925 Opcode = RISCV::CV_LW_rr_inc;
1926 break;
1927 default:
1928 break;
1929 }
1930 if (!Opcode)
1931 break;
1932
1933 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode, dl: DL, VT1: XLenVT, VT2: XLenVT,
1934 VT3: Chain.getSimpleValueType(), Op1: Base,
1935 Op2: Offset, Op3: Chain));
1936 return;
1937 }
1938 break;
1939 }
1940 case RISCVISD::LD_RV32: {
1941 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
1942
1943 SDValue Base, Offset;
1944 SDValue Chain = Node->getOperand(Num: 0);
1945 SDValue Addr = Node->getOperand(Num: 1);
1946 SelectAddrRegImm(Addr, Base, Offset);
1947
1948 SDValue Ops[] = {Base, Offset, Chain};
1949 MachineSDNode *New = CurDAG->getMachineNode(
1950 Opcode: RISCV::LD_RV32, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
1951 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, Pair: SDValue(New, 0));
1952 CurDAG->setNodeMemRefs(N: New, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
1953 ReplaceUses(F: SDValue(Node, 0), T: Lo);
1954 ReplaceUses(F: SDValue(Node, 1), T: Hi);
1955 ReplaceUses(F: SDValue(Node, 2), T: SDValue(New, 1));
1956 CurDAG->RemoveDeadNode(N: Node);
1957 return;
1958 }
1959 case RISCVISD::SD_RV32: {
1960 SDValue Base, Offset;
1961 SDValue Chain = Node->getOperand(Num: 0);
1962 SDValue Addr = Node->getOperand(Num: 3);
1963 SelectAddrRegImm(Addr, Base, Offset);
1964
1965 SDValue Lo = Node->getOperand(Num: 1);
1966 SDValue Hi = Node->getOperand(Num: 2);
1967
1968 SDValue RegPair;
1969 // Peephole to use X0_Pair for storing zero.
1970 if (isNullConstant(V: Lo) && isNullConstant(V: Hi)) {
1971 RegPair = CurDAG->getRegister(Reg: RISCV::X0_Pair, VT: MVT::Untyped);
1972 } else {
1973 RegPair = buildGPRPair(CurDAG, DL, VT: MVT::Untyped, Lo, Hi);
1974 }
1975
1976 MachineSDNode *New = CurDAG->getMachineNode(Opcode: RISCV::SD_RV32, dl: DL, VT: MVT::Other,
1977 Ops: {RegPair, Base, Offset, Chain});
1978 CurDAG->setNodeMemRefs(N: New, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
1979 ReplaceUses(F: SDValue(Node, 0), T: SDValue(New, 0));
1980 CurDAG->RemoveDeadNode(N: Node);
1981 return;
1982 }
1983 case RISCVISD::ADDD:
1984 // Try to match WMACC pattern: ADDD where one operand pair comes from a
1985 // widening multiply.
1986 if (tryWideningMulAcc(Node, DL))
1987 return;
1988
1989 // Fall through to regular ADDD selection.
1990 [[fallthrough]];
1991 case RISCVISD::SUBD:
1992 case RISCVISD::PPAIRE_DB:
1993 case RISCVISD::WADDAU:
1994 case RISCVISD::WSUBAU: {
1995 assert(!Subtarget->is64Bit() && "Unexpected opcode");
1996 assert((Node->getOpcode() != RISCVISD::PPAIRE_DB ||
1997 Subtarget->enablePExtSIMDCodeGen()) &&
1998 "Unexpected opcode");
1999
2000 SDValue Op0Lo = Node->getOperand(Num: 0);
2001 SDValue Op0Hi = Node->getOperand(Num: 1);
2002
2003 SDValue Op0;
2004 if (isNullConstant(V: Op0Lo) && isNullConstant(V: Op0Hi)) {
2005 Op0 = CurDAG->getRegister(Reg: RISCV::X0_Pair, VT: MVT::Untyped);
2006 } else {
2007 Op0 = buildGPRPair(CurDAG, DL, VT: MVT::Untyped, Lo: Op0Lo, Hi: Op0Hi);
2008 }
2009
2010 SDValue Op1Lo = Node->getOperand(Num: 2);
2011 SDValue Op1Hi = Node->getOperand(Num: 3);
2012
2013 MachineSDNode *New;
2014 if (Opcode == RISCVISD::WADDAU || Opcode == RISCVISD::WSUBAU) {
2015 // WADDAU/WSUBAU: Op0 is the accumulator (GPRPair), Op1Lo and Op1Hi are
2016 // the two 32-bit values.
2017 unsigned Opc = Opcode == RISCVISD::WADDAU ? RISCV::WADDAU : RISCV::WSUBAU;
2018 New = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Op1: Op0, Op2: Op1Lo, Op3: Op1Hi);
2019 } else {
2020 SDValue Op1 = buildGPRPair(CurDAG, DL, VT: MVT::Untyped, Lo: Op1Lo, Hi: Op1Hi);
2021
2022 unsigned Opc;
2023 switch (Opcode) {
2024 default:
2025 llvm_unreachable("Unexpected opcode");
2026 case RISCVISD::ADDD:
2027 Opc = RISCV::ADDD;
2028 break;
2029 case RISCVISD::SUBD:
2030 Opc = RISCV::SUBD;
2031 break;
2032 case RISCVISD::PPAIRE_DB:
2033 Opc = RISCV::PPAIRE_DB;
2034 break;
2035 }
2036 New = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Op1: Op0, Op2: Op1);
2037 }
2038
2039 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, Pair: SDValue(New, 0));
2040 ReplaceUses(F: SDValue(Node, 0), T: Lo);
2041 ReplaceUses(F: SDValue(Node, 1), T: Hi);
2042 CurDAG->RemoveDeadNode(N: Node);
2043 return;
2044 }
2045 case ISD::INTRINSIC_WO_CHAIN: {
2046 unsigned IntNo = Node->getConstantOperandVal(Num: 0);
2047 switch (IntNo) {
2048 // By default we do not custom select any intrinsic.
2049 default:
2050 break;
2051 case Intrinsic::riscv_vmsgeu:
2052 case Intrinsic::riscv_vmsge: {
2053 SDValue Src1 = Node->getOperand(Num: 1);
2054 SDValue Src2 = Node->getOperand(Num: 2);
2055 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
2056 bool IsCmpConstant = false;
2057 bool IsCmpMinimum = false;
2058 // Only custom select scalar second operand.
2059 if (Src2.getValueType() != XLenVT)
2060 break;
2061 // Small constants are handled with patterns.
2062 int64_t CVal = 0;
2063 MVT Src1VT = Src1.getSimpleValueType();
2064 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Src2)) {
2065 IsCmpConstant = true;
2066 CVal = C->getSExtValue();
2067 if (CVal >= -15 && CVal <= 16) {
2068 if (!IsUnsigned || CVal != 0)
2069 break;
2070 IsCmpMinimum = true;
2071 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2072 numBits: Src1VT.getScalarSizeInBits())
2073 .getSExtValue()) {
2074 IsCmpMinimum = true;
2075 }
2076 }
2077 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
2078 switch (RISCVTargetLowering::getLMUL(VT: Src1VT)) {
2079 default:
2080 llvm_unreachable("Unexpected LMUL!");
2081#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2082 case RISCVVType::lmulenum: \
2083 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2084 : RISCV::PseudoVMSLT_VX_##suffix; \
2085 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
2086 : RISCV::PseudoVMSGT_VX_##suffix; \
2087 break;
2088 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2089 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2090 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2091 CASE_VMSLT_OPCODES(LMUL_1, M1)
2092 CASE_VMSLT_OPCODES(LMUL_2, M2)
2093 CASE_VMSLT_OPCODES(LMUL_4, M4)
2094 CASE_VMSLT_OPCODES(LMUL_8, M8)
2095#undef CASE_VMSLT_OPCODES
2096 }
2097 // Mask operations use the LMUL from the mask type.
2098 switch (RISCVTargetLowering::getLMUL(VT)) {
2099 default:
2100 llvm_unreachable("Unexpected LMUL!");
2101#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
2102 case RISCVVType::lmulenum: \
2103 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
2104 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
2105 break;
2106 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
2107 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
2108 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
2109 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
2110 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
2111 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
2112 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
2113#undef CASE_VMNAND_VMSET_OPCODES
2114 }
2115 SDValue SEW = CurDAG->getTargetConstant(
2116 Val: Log2_32(Value: Src1VT.getScalarSizeInBits()), DL, VT: XLenVT);
2117 SDValue MaskSEW = CurDAG->getTargetConstant(Val: 0, DL, VT: XLenVT);
2118 SDValue VL;
2119 selectVLOp(N: Node->getOperand(Num: 3), VL);
2120
2121 // If vmsge(u) with minimum value, expand it to vmset.
2122 if (IsCmpMinimum) {
2123 ReplaceNode(F: Node,
2124 T: CurDAG->getMachineNode(Opcode: VMSetOpcode, dl: DL, VT, Op1: VL, Op2: MaskSEW));
2125 return;
2126 }
2127
2128 if (IsCmpConstant) {
2129 SDValue Imm =
2130 selectImm(CurDAG, DL: SDLoc(Src2), VT: XLenVT, Imm: CVal - 1, Subtarget: *Subtarget);
2131
2132 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMSGTOpcode, dl: DL, VT,
2133 Ops: {Src1, Imm, VL, SEW}));
2134 return;
2135 }
2136
2137 // Expand to
2138 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
2139 SDValue Cmp = SDValue(
2140 CurDAG->getMachineNode(Opcode: VMSLTOpcode, dl: DL, VT, Ops: {Src1, Src2, VL, SEW}),
2141 0);
2142 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMNANDOpcode, dl: DL, VT,
2143 Ops: {Cmp, Cmp, VL, MaskSEW}));
2144 return;
2145 }
2146 case Intrinsic::riscv_vmsgeu_mask:
2147 case Intrinsic::riscv_vmsge_mask: {
2148 SDValue Src1 = Node->getOperand(Num: 2);
2149 SDValue Src2 = Node->getOperand(Num: 3);
2150 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
2151 bool IsCmpConstant = false;
2152 bool IsCmpMinimum = false;
2153 // Only custom select scalar second operand.
2154 if (Src2.getValueType() != XLenVT)
2155 break;
2156 // Small constants are handled with patterns.
2157 MVT Src1VT = Src1.getSimpleValueType();
2158 int64_t CVal = 0;
2159 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Src2)) {
2160 IsCmpConstant = true;
2161 CVal = C->getSExtValue();
2162 if (CVal >= -15 && CVal <= 16) {
2163 if (!IsUnsigned || CVal != 0)
2164 break;
2165 IsCmpMinimum = true;
2166 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2167 numBits: Src1VT.getScalarSizeInBits())
2168 .getSExtValue()) {
2169 IsCmpMinimum = true;
2170 }
2171 }
2172 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
2173 VMOROpcode, VMSGTMaskOpcode;
2174 switch (RISCVTargetLowering::getLMUL(VT: Src1VT)) {
2175 default:
2176 llvm_unreachable("Unexpected LMUL!");
2177#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2178 case RISCVVType::lmulenum: \
2179 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2180 : RISCV::PseudoVMSLT_VX_##suffix; \
2181 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
2182 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
2183 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
2184 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
2185 break;
2186 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2187 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2188 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2189 CASE_VMSLT_OPCODES(LMUL_1, M1)
2190 CASE_VMSLT_OPCODES(LMUL_2, M2)
2191 CASE_VMSLT_OPCODES(LMUL_4, M4)
2192 CASE_VMSLT_OPCODES(LMUL_8, M8)
2193#undef CASE_VMSLT_OPCODES
2194 }
2195 // Mask operations use the LMUL from the mask type.
2196 switch (RISCVTargetLowering::getLMUL(VT)) {
2197 default:
2198 llvm_unreachable("Unexpected LMUL!");
2199#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2200 case RISCVVType::lmulenum: \
2201 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2202 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2203 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2204 break;
2205 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2206 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2207 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2208 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, B8)
2209 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, B4)
2210 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, B2)
2211 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, B1)
2212#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2213 }
2214 SDValue SEW = CurDAG->getTargetConstant(
2215 Val: Log2_32(Value: Src1VT.getScalarSizeInBits()), DL, VT: XLenVT);
2216 SDValue MaskSEW = CurDAG->getTargetConstant(Val: 0, DL, VT: XLenVT);
2217 SDValue VL;
2218 selectVLOp(N: Node->getOperand(Num: 5), VL);
2219 SDValue MaskedOff = Node->getOperand(Num: 1);
2220 SDValue Mask = Node->getOperand(Num: 4);
2221
2222 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2223 if (IsCmpMinimum) {
2224 // We don't need vmor if the MaskedOff and the Mask are the same
2225 // value.
2226 if (Mask == MaskedOff) {
2227 ReplaceUses(F: Node, T: Mask.getNode());
2228 return;
2229 }
2230 ReplaceNode(F: Node,
2231 T: CurDAG->getMachineNode(Opcode: VMOROpcode, dl: DL, VT,
2232 Ops: {Mask, MaskedOff, VL, MaskSEW}));
2233 return;
2234 }
2235
2236 // If the MaskedOff value and the Mask are the same value use
2237 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2238 // This avoids needing to copy v0 to vd before starting the next sequence.
2239 if (Mask == MaskedOff) {
2240 SDValue Cmp = SDValue(
2241 CurDAG->getMachineNode(Opcode: VMSLTOpcode, dl: DL, VT, Ops: {Src1, Src2, VL, SEW}),
2242 0);
2243 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMANDNOpcode, dl: DL, VT,
2244 Ops: {Mask, Cmp, VL, MaskSEW}));
2245 return;
2246 }
2247
2248 SDValue PolicyOp =
2249 CurDAG->getTargetConstant(Val: RISCVVType::TAIL_AGNOSTIC, DL, VT: XLenVT);
2250
2251 if (IsCmpConstant) {
2252 SDValue Imm =
2253 selectImm(CurDAG, DL: SDLoc(Src2), VT: XLenVT, Imm: CVal - 1, Subtarget: *Subtarget);
2254
2255 ReplaceNode(F: Node, T: CurDAG->getMachineNode(
2256 Opcode: VMSGTMaskOpcode, dl: DL, VT,
2257 Ops: {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2258 return;
2259 }
2260
2261 // Otherwise use
2262 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2263 // The result is mask undisturbed.
2264 // We use the same instructions to emulate mask agnostic behavior, because
2265 // the agnostic result can be either undisturbed or all 1.
2266 SDValue Cmp = SDValue(CurDAG->getMachineNode(Opcode: VMSLTMaskOpcode, dl: DL, VT,
2267 Ops: {MaskedOff, Src1, Src2, Mask,
2268 VL, SEW, PolicyOp}),
2269 0);
2270 // vmxor.mm vd, vd, v0 is used to update active value.
2271 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMXOROpcode, dl: DL, VT,
2272 Ops: {Cmp, Mask, VL, MaskSEW}));
2273 return;
2274 }
2275 case Intrinsic::riscv_vsetvli:
2276 case Intrinsic::riscv_vsetvlimax:
2277 return selectVSETVLI(Node);
2278 case Intrinsic::riscv_sf_vsettnt:
2279 case Intrinsic::riscv_sf_vsettm:
2280 case Intrinsic::riscv_sf_vsettk:
2281 return selectXSfmmVSET(Node);
2282 }
2283 break;
2284 }
2285 case ISD::INTRINSIC_W_CHAIN: {
2286 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
2287 switch (IntNo) {
2288 // By default we do not custom select any intrinsic.
2289 default:
2290 break;
2291 case Intrinsic::riscv_vlseg2:
2292 case Intrinsic::riscv_vlseg3:
2293 case Intrinsic::riscv_vlseg4:
2294 case Intrinsic::riscv_vlseg5:
2295 case Intrinsic::riscv_vlseg6:
2296 case Intrinsic::riscv_vlseg7:
2297 case Intrinsic::riscv_vlseg8: {
2298 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2299 /*IsStrided*/ false);
2300 return;
2301 }
2302 case Intrinsic::riscv_vlseg2_mask:
2303 case Intrinsic::riscv_vlseg3_mask:
2304 case Intrinsic::riscv_vlseg4_mask:
2305 case Intrinsic::riscv_vlseg5_mask:
2306 case Intrinsic::riscv_vlseg6_mask:
2307 case Intrinsic::riscv_vlseg7_mask:
2308 case Intrinsic::riscv_vlseg8_mask: {
2309 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2310 /*IsStrided*/ false);
2311 return;
2312 }
2313 case Intrinsic::riscv_vlsseg2:
2314 case Intrinsic::riscv_vlsseg3:
2315 case Intrinsic::riscv_vlsseg4:
2316 case Intrinsic::riscv_vlsseg5:
2317 case Intrinsic::riscv_vlsseg6:
2318 case Intrinsic::riscv_vlsseg7:
2319 case Intrinsic::riscv_vlsseg8: {
2320 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2321 /*IsStrided*/ true);
2322 return;
2323 }
2324 case Intrinsic::riscv_vlsseg2_mask:
2325 case Intrinsic::riscv_vlsseg3_mask:
2326 case Intrinsic::riscv_vlsseg4_mask:
2327 case Intrinsic::riscv_vlsseg5_mask:
2328 case Intrinsic::riscv_vlsseg6_mask:
2329 case Intrinsic::riscv_vlsseg7_mask:
2330 case Intrinsic::riscv_vlsseg8_mask: {
2331 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2332 /*IsStrided*/ true);
2333 return;
2334 }
2335 case Intrinsic::riscv_vloxseg2:
2336 case Intrinsic::riscv_vloxseg3:
2337 case Intrinsic::riscv_vloxseg4:
2338 case Intrinsic::riscv_vloxseg5:
2339 case Intrinsic::riscv_vloxseg6:
2340 case Intrinsic::riscv_vloxseg7:
2341 case Intrinsic::riscv_vloxseg8:
2342 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2343 /*IsOrdered*/ true);
2344 return;
2345 case Intrinsic::riscv_vluxseg2:
2346 case Intrinsic::riscv_vluxseg3:
2347 case Intrinsic::riscv_vluxseg4:
2348 case Intrinsic::riscv_vluxseg5:
2349 case Intrinsic::riscv_vluxseg6:
2350 case Intrinsic::riscv_vluxseg7:
2351 case Intrinsic::riscv_vluxseg8:
2352 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2353 /*IsOrdered*/ false);
2354 return;
2355 case Intrinsic::riscv_vloxseg2_mask:
2356 case Intrinsic::riscv_vloxseg3_mask:
2357 case Intrinsic::riscv_vloxseg4_mask:
2358 case Intrinsic::riscv_vloxseg5_mask:
2359 case Intrinsic::riscv_vloxseg6_mask:
2360 case Intrinsic::riscv_vloxseg7_mask:
2361 case Intrinsic::riscv_vloxseg8_mask:
2362 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2363 /*IsOrdered*/ true);
2364 return;
2365 case Intrinsic::riscv_vluxseg2_mask:
2366 case Intrinsic::riscv_vluxseg3_mask:
2367 case Intrinsic::riscv_vluxseg4_mask:
2368 case Intrinsic::riscv_vluxseg5_mask:
2369 case Intrinsic::riscv_vluxseg6_mask:
2370 case Intrinsic::riscv_vluxseg7_mask:
2371 case Intrinsic::riscv_vluxseg8_mask:
2372 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2373 /*IsOrdered*/ false);
2374 return;
2375 case Intrinsic::riscv_vlseg8ff:
2376 case Intrinsic::riscv_vlseg7ff:
2377 case Intrinsic::riscv_vlseg6ff:
2378 case Intrinsic::riscv_vlseg5ff:
2379 case Intrinsic::riscv_vlseg4ff:
2380 case Intrinsic::riscv_vlseg3ff:
2381 case Intrinsic::riscv_vlseg2ff: {
2382 selectVLSEGFF(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false);
2383 return;
2384 }
2385 case Intrinsic::riscv_vlseg8ff_mask:
2386 case Intrinsic::riscv_vlseg7ff_mask:
2387 case Intrinsic::riscv_vlseg6ff_mask:
2388 case Intrinsic::riscv_vlseg5ff_mask:
2389 case Intrinsic::riscv_vlseg4ff_mask:
2390 case Intrinsic::riscv_vlseg3ff_mask:
2391 case Intrinsic::riscv_vlseg2ff_mask: {
2392 selectVLSEGFF(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true);
2393 return;
2394 }
2395 case Intrinsic::riscv_vloxei:
2396 case Intrinsic::riscv_vloxei_mask:
2397 case Intrinsic::riscv_vluxei:
2398 case Intrinsic::riscv_vluxei_mask: {
2399 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2400 IntNo == Intrinsic::riscv_vluxei_mask;
2401 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2402 IntNo == Intrinsic::riscv_vloxei_mask;
2403
2404 MVT VT = Node->getSimpleValueType(ResNo: 0);
2405 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2406
2407 unsigned CurOp = 2;
2408 SmallVector<SDValue, 8> Operands;
2409 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2410
2411 MVT IndexVT;
2412 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2413 /*IsStridedOrIndexed*/ true, Operands,
2414 /*IsLoad=*/true, IndexVT: &IndexVT);
2415
2416 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2417 "Element count mismatch");
2418
2419 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2420 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
2421 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
2422 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2423 reportFatalUsageError(reason: "The V extension does not support EEW=64 for "
2424 "index values when XLEN=32");
2425 }
2426 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2427 Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL),
2428 IndexLMUL: static_cast<unsigned>(IndexLMUL));
2429 MachineSDNode *Load =
2430 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2431
2432 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2433
2434 ReplaceNode(F: Node, T: Load);
2435 return;
2436 }
2437 case Intrinsic::riscv_vlm:
2438 case Intrinsic::riscv_vle:
2439 case Intrinsic::riscv_vle_mask:
2440 case Intrinsic::riscv_vlse:
2441 case Intrinsic::riscv_vlse_mask: {
2442 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2443 IntNo == Intrinsic::riscv_vlse_mask;
2444 bool IsStrided =
2445 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2446
2447 MVT VT = Node->getSimpleValueType(ResNo: 0);
2448 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2449
2450 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2451 // operand at the IR level. In pseudos, they have both policy and
2452 // passthru operand. The passthru operand is needed to track the
2453 // "tail undefined" state, and the policy is there just for
2454 // for consistency - it will always be "don't care" for the
2455 // unmasked form.
2456 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2457 unsigned CurOp = 2;
2458 SmallVector<SDValue, 8> Operands;
2459 if (HasPassthruOperand)
2460 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2461 else {
2462 // We eagerly lower to implicit_def (instead of undef), as we
2463 // otherwise fail to select nodes such as: nxv1i1 = undef
2464 SDNode *Passthru =
2465 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT);
2466 Operands.push_back(Elt: SDValue(Passthru, 0));
2467 }
2468 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
2469 Operands, /*IsLoad=*/true);
2470
2471 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2472 const RISCV::VLEPseudo *P =
2473 RISCV::getVLEPseudo(Masked: IsMasked, Strided: IsStrided, /*FF*/ false, Log2SEW,
2474 LMUL: static_cast<unsigned>(LMUL));
2475 MachineSDNode *Load =
2476 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2477
2478 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2479
2480 ReplaceNode(F: Node, T: Load);
2481 return;
2482 }
2483 case Intrinsic::riscv_vleff:
2484 case Intrinsic::riscv_vleff_mask: {
2485 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2486
2487 MVT VT = Node->getSimpleValueType(ResNo: 0);
2488 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2489
2490 unsigned CurOp = 2;
2491 SmallVector<SDValue, 7> Operands;
2492 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2493 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2494 /*IsStridedOrIndexed*/ false, Operands,
2495 /*IsLoad=*/true);
2496
2497 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2498 const RISCV::VLEPseudo *P =
2499 RISCV::getVLEPseudo(Masked: IsMasked, /*Strided*/ false, /*FF*/ true,
2500 Log2SEW, LMUL: static_cast<unsigned>(LMUL));
2501 MachineSDNode *Load = CurDAG->getMachineNode(
2502 Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2503 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2504
2505 ReplaceNode(F: Node, T: Load);
2506 return;
2507 }
2508 case Intrinsic::riscv_nds_vln:
2509 case Intrinsic::riscv_nds_vln_mask:
2510 case Intrinsic::riscv_nds_vlnu:
2511 case Intrinsic::riscv_nds_vlnu_mask: {
2512 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2513 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2514 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2515 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2516
2517 MVT VT = Node->getSimpleValueType(ResNo: 0);
2518 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2519 unsigned CurOp = 2;
2520 SmallVector<SDValue, 8> Operands;
2521
2522 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2523 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2524 /*IsStridedOrIndexed=*/false, Operands,
2525 /*IsLoad=*/true);
2526
2527 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2528 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2529 Masked: IsMasked, Unsigned: IsUnsigned, Log2SEW, LMUL: static_cast<unsigned>(LMUL));
2530 MachineSDNode *Load =
2531 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2532
2533 if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node))
2534 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()});
2535
2536 ReplaceNode(F: Node, T: Load);
2537 return;
2538 }
2539 }
2540 break;
2541 }
2542 case ISD::INTRINSIC_VOID: {
2543 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
2544 switch (IntNo) {
2545 case Intrinsic::riscv_vsseg2:
2546 case Intrinsic::riscv_vsseg3:
2547 case Intrinsic::riscv_vsseg4:
2548 case Intrinsic::riscv_vsseg5:
2549 case Intrinsic::riscv_vsseg6:
2550 case Intrinsic::riscv_vsseg7:
2551 case Intrinsic::riscv_vsseg8: {
2552 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2553 /*IsStrided*/ false);
2554 return;
2555 }
2556 case Intrinsic::riscv_vsseg2_mask:
2557 case Intrinsic::riscv_vsseg3_mask:
2558 case Intrinsic::riscv_vsseg4_mask:
2559 case Intrinsic::riscv_vsseg5_mask:
2560 case Intrinsic::riscv_vsseg6_mask:
2561 case Intrinsic::riscv_vsseg7_mask:
2562 case Intrinsic::riscv_vsseg8_mask: {
2563 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2564 /*IsStrided*/ false);
2565 return;
2566 }
2567 case Intrinsic::riscv_vssseg2:
2568 case Intrinsic::riscv_vssseg3:
2569 case Intrinsic::riscv_vssseg4:
2570 case Intrinsic::riscv_vssseg5:
2571 case Intrinsic::riscv_vssseg6:
2572 case Intrinsic::riscv_vssseg7:
2573 case Intrinsic::riscv_vssseg8: {
2574 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2575 /*IsStrided*/ true);
2576 return;
2577 }
2578 case Intrinsic::riscv_vssseg2_mask:
2579 case Intrinsic::riscv_vssseg3_mask:
2580 case Intrinsic::riscv_vssseg4_mask:
2581 case Intrinsic::riscv_vssseg5_mask:
2582 case Intrinsic::riscv_vssseg6_mask:
2583 case Intrinsic::riscv_vssseg7_mask:
2584 case Intrinsic::riscv_vssseg8_mask: {
2585 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2586 /*IsStrided*/ true);
2587 return;
2588 }
2589 case Intrinsic::riscv_vsoxseg2:
2590 case Intrinsic::riscv_vsoxseg3:
2591 case Intrinsic::riscv_vsoxseg4:
2592 case Intrinsic::riscv_vsoxseg5:
2593 case Intrinsic::riscv_vsoxseg6:
2594 case Intrinsic::riscv_vsoxseg7:
2595 case Intrinsic::riscv_vsoxseg8:
2596 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2597 /*IsOrdered*/ true);
2598 return;
2599 case Intrinsic::riscv_vsuxseg2:
2600 case Intrinsic::riscv_vsuxseg3:
2601 case Intrinsic::riscv_vsuxseg4:
2602 case Intrinsic::riscv_vsuxseg5:
2603 case Intrinsic::riscv_vsuxseg6:
2604 case Intrinsic::riscv_vsuxseg7:
2605 case Intrinsic::riscv_vsuxseg8:
2606 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2607 /*IsOrdered*/ false);
2608 return;
2609 case Intrinsic::riscv_vsoxseg2_mask:
2610 case Intrinsic::riscv_vsoxseg3_mask:
2611 case Intrinsic::riscv_vsoxseg4_mask:
2612 case Intrinsic::riscv_vsoxseg5_mask:
2613 case Intrinsic::riscv_vsoxseg6_mask:
2614 case Intrinsic::riscv_vsoxseg7_mask:
2615 case Intrinsic::riscv_vsoxseg8_mask:
2616 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2617 /*IsOrdered*/ true);
2618 return;
2619 case Intrinsic::riscv_vsuxseg2_mask:
2620 case Intrinsic::riscv_vsuxseg3_mask:
2621 case Intrinsic::riscv_vsuxseg4_mask:
2622 case Intrinsic::riscv_vsuxseg5_mask:
2623 case Intrinsic::riscv_vsuxseg6_mask:
2624 case Intrinsic::riscv_vsuxseg7_mask:
2625 case Intrinsic::riscv_vsuxseg8_mask:
2626 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2627 /*IsOrdered*/ false);
2628 return;
2629 case Intrinsic::riscv_vsoxei:
2630 case Intrinsic::riscv_vsoxei_mask:
2631 case Intrinsic::riscv_vsuxei:
2632 case Intrinsic::riscv_vsuxei_mask: {
2633 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2634 IntNo == Intrinsic::riscv_vsuxei_mask;
2635 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2636 IntNo == Intrinsic::riscv_vsoxei_mask;
2637
2638 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
2639 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2640
2641 unsigned CurOp = 2;
2642 SmallVector<SDValue, 8> Operands;
2643 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Store value.
2644
2645 MVT IndexVT;
2646 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2647 /*IsStridedOrIndexed*/ true, Operands,
2648 /*IsLoad=*/false, IndexVT: &IndexVT);
2649
2650 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2651 "Element count mismatch");
2652
2653 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2654 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
2655 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
2656 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2657 reportFatalUsageError(reason: "The V extension does not support EEW=64 for "
2658 "index values when XLEN=32");
2659 }
2660 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2661 Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW,
2662 LMUL: static_cast<unsigned>(LMUL), IndexLMUL: static_cast<unsigned>(IndexLMUL));
2663 MachineSDNode *Store =
2664 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2665
2666 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2667
2668 ReplaceNode(F: Node, T: Store);
2669 return;
2670 }
2671 case Intrinsic::riscv_vsm:
2672 case Intrinsic::riscv_vse:
2673 case Intrinsic::riscv_vse_mask:
2674 case Intrinsic::riscv_vsse:
2675 case Intrinsic::riscv_vsse_mask: {
2676 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2677 IntNo == Intrinsic::riscv_vsse_mask;
2678 bool IsStrided =
2679 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2680
2681 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
2682 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2683
2684 unsigned CurOp = 2;
2685 SmallVector<SDValue, 8> Operands;
2686 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Store value.
2687
2688 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
2689 Operands);
2690
2691 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2692 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2693 Masked: IsMasked, Strided: IsStrided, Log2SEW, LMUL: static_cast<unsigned>(LMUL));
2694 MachineSDNode *Store =
2695 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2696 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2697
2698 ReplaceNode(F: Node, T: Store);
2699 return;
2700 }
2701 case Intrinsic::riscv_sf_vc_x_se:
2702 case Intrinsic::riscv_sf_vc_i_se:
2703 selectSF_VC_X_SE(Node);
2704 return;
2705 case Intrinsic::riscv_sf_vlte8:
2706 case Intrinsic::riscv_sf_vlte16:
2707 case Intrinsic::riscv_sf_vlte32:
2708 case Intrinsic::riscv_sf_vlte64: {
2709 unsigned Log2SEW;
2710 unsigned PseudoInst;
2711 switch (IntNo) {
2712 case Intrinsic::riscv_sf_vlte8:
2713 PseudoInst = RISCV::PseudoSF_VLTE8;
2714 Log2SEW = 3;
2715 break;
2716 case Intrinsic::riscv_sf_vlte16:
2717 PseudoInst = RISCV::PseudoSF_VLTE16;
2718 Log2SEW = 4;
2719 break;
2720 case Intrinsic::riscv_sf_vlte32:
2721 PseudoInst = RISCV::PseudoSF_VLTE32;
2722 Log2SEW = 5;
2723 break;
2724 case Intrinsic::riscv_sf_vlte64:
2725 PseudoInst = RISCV::PseudoSF_VLTE64;
2726 Log2SEW = 6;
2727 break;
2728 }
2729
2730 SDValue SEWOp = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT);
2731 SDValue TWidenOp = CurDAG->getTargetConstant(Val: 1, DL, VT: XLenVT);
2732 SDValue Operands[] = {Node->getOperand(Num: 2),
2733 Node->getOperand(Num: 3),
2734 Node->getOperand(Num: 4),
2735 SEWOp,
2736 TWidenOp,
2737 Node->getOperand(Num: 0)};
2738
2739 MachineSDNode *TileLoad =
2740 CurDAG->getMachineNode(Opcode: PseudoInst, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2741 CurDAG->setNodeMemRefs(N: TileLoad,
2742 NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2743
2744 ReplaceNode(F: Node, T: TileLoad);
2745 return;
2746 }
2747 case Intrinsic::riscv_sf_mm_s_s:
2748 case Intrinsic::riscv_sf_mm_s_u:
2749 case Intrinsic::riscv_sf_mm_u_s:
2750 case Intrinsic::riscv_sf_mm_u_u:
2751 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2752 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2753 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2754 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2755 case Intrinsic::riscv_sf_mm_f_f: {
2756 bool HasFRM = false;
2757 unsigned PseudoInst;
2758 switch (IntNo) {
2759 case Intrinsic::riscv_sf_mm_s_s:
2760 PseudoInst = RISCV::PseudoSF_MM_S_S;
2761 break;
2762 case Intrinsic::riscv_sf_mm_s_u:
2763 PseudoInst = RISCV::PseudoSF_MM_S_U;
2764 break;
2765 case Intrinsic::riscv_sf_mm_u_s:
2766 PseudoInst = RISCV::PseudoSF_MM_U_S;
2767 break;
2768 case Intrinsic::riscv_sf_mm_u_u:
2769 PseudoInst = RISCV::PseudoSF_MM_U_U;
2770 break;
2771 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2772 PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2;
2773 HasFRM = true;
2774 break;
2775 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2776 PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3;
2777 HasFRM = true;
2778 break;
2779 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2780 PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2;
2781 HasFRM = true;
2782 break;
2783 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2784 PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3;
2785 HasFRM = true;
2786 break;
2787 case Intrinsic::riscv_sf_mm_f_f:
2788 if (Node->getOperand(Num: 3).getValueType().getScalarType() == MVT::bf16)
2789 PseudoInst = RISCV::PseudoSF_MM_F_F_ALT;
2790 else
2791 PseudoInst = RISCV::PseudoSF_MM_F_F;
2792 HasFRM = true;
2793 break;
2794 }
2795 uint64_t TileNum = Node->getConstantOperandVal(Num: 2);
2796 SDValue Op1 = Node->getOperand(Num: 3);
2797 SDValue Op2 = Node->getOperand(Num: 4);
2798 MVT VT = Op1->getSimpleValueType(ResNo: 0);
2799 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2800 SDValue TmOp = Node->getOperand(Num: 5);
2801 SDValue TnOp = Node->getOperand(Num: 6);
2802 SDValue TkOp = Node->getOperand(Num: 7);
2803 SDValue TWidenOp = Node->getOperand(Num: 8);
2804 SDValue Chain = Node->getOperand(Num: 0);
2805
2806 // sf.mm.f.f with sew=32, twiden=2 is invalid
2807 if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 &&
2808 TWidenOp->getAsZExtVal() == 2)
2809 reportFatalUsageError(reason: "sf.mm.f.f doesn't support (sew=32, twiden=2)");
2810
2811 SmallVector<SDValue, 10> Operands(
2812 {CurDAG->getRegister(Reg: getTileReg(TileNum), VT: XLenVT), Op1, Op2});
2813 if (HasFRM)
2814 Operands.push_back(
2815 Elt: CurDAG->getTargetConstant(Val: RISCVFPRndMode::DYN, DL, VT: XLenVT));
2816 Operands.append(IL: {TmOp, TnOp, TkOp,
2817 CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT), TWidenOp,
2818 Chain});
2819
2820 auto *NewNode =
2821 CurDAG->getMachineNode(Opcode: PseudoInst, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2822
2823 ReplaceNode(F: Node, T: NewNode);
2824 return;
2825 }
2826 case Intrinsic::riscv_sf_vtzero_t: {
2827 uint64_t TileNum = Node->getConstantOperandVal(Num: 2);
2828 SDValue Tm = Node->getOperand(Num: 3);
2829 SDValue Tn = Node->getOperand(Num: 4);
2830 SDValue Log2SEW = Node->getOperand(Num: 5);
2831 SDValue TWiden = Node->getOperand(Num: 6);
2832 SDValue Chain = Node->getOperand(Num: 0);
2833 auto *NewNode = CurDAG->getMachineNode(
2834 Opcode: RISCV::PseudoSF_VTZERO_T, dl: DL, VTs: Node->getVTList(),
2835 Ops: {CurDAG->getRegister(Reg: getTileReg(TileNum), VT: XLenVT), Tm, Tn, Log2SEW,
2836 TWiden, Chain});
2837
2838 ReplaceNode(F: Node, T: NewNode);
2839 return;
2840 }
2841 }
2842 break;
2843 }
2844 case ISD::BITCAST: {
2845 MVT SrcVT = Node->getOperand(Num: 0).getSimpleValueType();
2846 // Just drop bitcasts between vectors if both are fixed or both are
2847 // scalable.
2848 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2849 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2850 ReplaceUses(F: SDValue(Node, 0), T: Node->getOperand(Num: 0));
2851 CurDAG->RemoveDeadNode(N: Node);
2852 return;
2853 }
2854 if (Subtarget->enablePExtSIMDCodeGen()) {
2855 bool Is32BitCast =
2856 (VT == MVT::i32 && (SrcVT == MVT::v4i8 || SrcVT == MVT::v2i16)) ||
2857 (SrcVT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
2858 bool Is64BitCast =
2859 (VT == MVT::i64 && (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i16 ||
2860 SrcVT == MVT::v2i32)) ||
2861 (SrcVT == MVT::i64 &&
2862 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
2863 if (Is32BitCast || Is64BitCast) {
2864 ReplaceUses(F: SDValue(Node, 0), T: Node->getOperand(Num: 0));
2865 CurDAG->RemoveDeadNode(N: Node);
2866 return;
2867 }
2868 }
2869 break;
2870 }
2871 case ISD::SCALAR_TO_VECTOR:
2872 if (Subtarget->enablePExtSIMDCodeGen()) {
2873 MVT SrcVT = Node->getOperand(Num: 0).getSimpleValueType();
2874 if ((VT == MVT::v2i32 && SrcVT == MVT::i64) ||
2875 (VT == MVT::v4i8 && SrcVT == MVT::i32)) {
2876 ReplaceUses(F: SDValue(Node, 0), T: Node->getOperand(Num: 0));
2877 CurDAG->RemoveDeadNode(N: Node);
2878 return;
2879 }
2880 }
2881 break;
2882 case ISD::INSERT_SUBVECTOR:
2883 case RISCVISD::TUPLE_INSERT: {
2884 SDValue V = Node->getOperand(Num: 0);
2885 SDValue SubV = Node->getOperand(Num: 1);
2886 SDLoc DL(SubV);
2887 auto Idx = Node->getConstantOperandVal(Num: 2);
2888 MVT SubVecVT = SubV.getSimpleValueType();
2889
2890 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2891 MVT SubVecContainerVT = SubVecVT;
2892 // Establish the correct scalable-vector types for any fixed-length type.
2893 if (SubVecVT.isFixedLengthVector()) {
2894 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT: SubVecVT);
2895 TypeSize VecRegSize = TypeSize::getScalable(MinimumSize: RISCV::RVVBitsPerBlock);
2896 [[maybe_unused]] bool ExactlyVecRegSized =
2897 Subtarget->expandVScale(X: SubVecVT.getSizeInBits())
2898 .isKnownMultipleOf(RHS: Subtarget->expandVScale(X: VecRegSize));
2899 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2900 .getKnownMinValue()));
2901 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2902 }
2903 MVT ContainerVT = VT;
2904 if (VT.isFixedLengthVector())
2905 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2906
2907 const auto *TRI = Subtarget->getRegisterInfo();
2908 unsigned SubRegIdx;
2909 std::tie(args&: SubRegIdx, args&: Idx) =
2910 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2911 VecVT: ContainerVT, SubVecVT: SubVecContainerVT, InsertExtractIdx: Idx, TRI);
2912
2913 // If the Idx hasn't been completely eliminated then this is a subvector
2914 // insert which doesn't naturally align to a vector register. These must
2915 // be handled using instructions to manipulate the vector registers.
2916 if (Idx != 0)
2917 break;
2918
2919 RISCVVType::VLMUL SubVecLMUL =
2920 RISCVTargetLowering::getLMUL(VT: SubVecContainerVT);
2921 [[maybe_unused]] bool IsSubVecPartReg =
2922 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
2923 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
2924 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
2925 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
2926 V.isUndef()) &&
2927 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2928 "the subvector is smaller than a full-sized register");
2929
2930 // If we haven't set a SubRegIdx, then we must be going between
2931 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2932 if (SubRegIdx == RISCV::NoSubRegister) {
2933 unsigned InRegClassID =
2934 RISCVTargetLowering::getRegClassIDForVecVT(VT: ContainerVT);
2935 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2936 InRegClassID &&
2937 "Unexpected subvector extraction");
2938 SDValue RC = CurDAG->getTargetConstant(Val: InRegClassID, DL, VT: XLenVT);
2939 SDNode *NewNode = CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS,
2940 dl: DL, VT, Op1: SubV, Op2: RC);
2941 ReplaceNode(F: Node, T: NewNode);
2942 return;
2943 }
2944
2945 SDValue Insert = CurDAG->getTargetInsertSubreg(SRIdx: SubRegIdx, DL, VT, Operand: V, Subreg: SubV);
2946 ReplaceNode(F: Node, T: Insert.getNode());
2947 return;
2948 }
2949 case ISD::EXTRACT_SUBVECTOR:
2950 case RISCVISD::TUPLE_EXTRACT: {
2951 SDValue V = Node->getOperand(Num: 0);
2952 auto Idx = Node->getConstantOperandVal(Num: 1);
2953 MVT InVT = V.getSimpleValueType();
2954 SDLoc DL(V);
2955
2956 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2957 MVT SubVecContainerVT = VT;
2958 // Establish the correct scalable-vector types for any fixed-length type.
2959 if (VT.isFixedLengthVector()) {
2960 assert(Idx == 0);
2961 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2962 }
2963 if (InVT.isFixedLengthVector())
2964 InVT = TLI.getContainerForFixedLengthVector(VT: InVT);
2965
2966 const auto *TRI = Subtarget->getRegisterInfo();
2967 unsigned SubRegIdx;
2968 std::tie(args&: SubRegIdx, args&: Idx) =
2969 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2970 VecVT: InVT, SubVecVT: SubVecContainerVT, InsertExtractIdx: Idx, TRI);
2971
2972 // If the Idx hasn't been completely eliminated then this is a subvector
2973 // extract which doesn't naturally align to a vector register. These must
2974 // be handled using instructions to manipulate the vector registers.
2975 if (Idx != 0)
2976 break;
2977
2978 // If we haven't set a SubRegIdx, then we must be going between
2979 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2980 if (SubRegIdx == RISCV::NoSubRegister) {
2981 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT: InVT);
2982 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2983 InRegClassID &&
2984 "Unexpected subvector extraction");
2985 SDValue RC = CurDAG->getTargetConstant(Val: InRegClassID, DL, VT: XLenVT);
2986 SDNode *NewNode =
2987 CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, dl: DL, VT, Op1: V, Op2: RC);
2988 ReplaceNode(F: Node, T: NewNode);
2989 return;
2990 }
2991
2992 SDValue Extract = CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT, Operand: V);
2993 ReplaceNode(F: Node, T: Extract.getNode());
2994 return;
2995 }
2996 case RISCVISD::VMV_S_X_VL:
2997 case RISCVISD::VFMV_S_F_VL:
2998 case RISCVISD::VMV_V_X_VL:
2999 case RISCVISD::VFMV_V_F_VL: {
3000 // Try to match splat of a scalar load to a strided load with stride of x0.
3001 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
3002 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
3003 if (!Node->getOperand(Num: 0).isUndef())
3004 break;
3005 SDValue Src = Node->getOperand(Num: 1);
3006 auto *Ld = dyn_cast<LoadSDNode>(Val&: Src);
3007 // Can't fold load update node because the second
3008 // output is used so that load update node can't be removed.
3009 if (!Ld || Ld->isIndexed())
3010 break;
3011 EVT MemVT = Ld->getMemoryVT();
3012 // The memory VT should be the same size as the element type.
3013 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
3014 break;
3015 if (!IsProfitableToFold(N: Src, U: Node, Root: Node) ||
3016 !IsLegalToFold(N: Src, U: Node, Root: Node, OptLevel: TM.getOptLevel()))
3017 break;
3018
3019 SDValue VL;
3020 if (IsScalarMove) {
3021 // We could deal with more VL if we update the VSETVLI insert pass to
3022 // avoid introducing more VSETVLI.
3023 if (!isOneConstant(V: Node->getOperand(Num: 2)))
3024 break;
3025 selectVLOp(N: Node->getOperand(Num: 2), VL);
3026 } else
3027 selectVLOp(N: Node->getOperand(Num: 2), VL);
3028
3029 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
3030 SDValue SEW = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT);
3031
3032 // If VL=1, then we don't need to do a strided load and can just do a
3033 // regular load.
3034 bool IsStrided = !isOneConstant(V: VL);
3035
3036 // Only do a strided load if we have optimized zero-stride vector load.
3037 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
3038 break;
3039
3040 SmallVector<SDValue> Operands = {
3041 SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT), 0),
3042 Ld->getBasePtr()};
3043 if (IsStrided)
3044 Operands.push_back(Elt: CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT));
3045 uint64_t Policy = RISCVVType::MASK_AGNOSTIC | RISCVVType::TAIL_AGNOSTIC;
3046 SDValue PolicyOp = CurDAG->getTargetConstant(Val: Policy, DL, VT: XLenVT);
3047 Operands.append(IL: {VL, SEW, PolicyOp, Ld->getChain()});
3048
3049 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
3050 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
3051 /*IsMasked*/ Masked: false, Strided: IsStrided, /*FF*/ false,
3052 Log2SEW, LMUL: static_cast<unsigned>(LMUL));
3053 MachineSDNode *Load =
3054 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, ResultTys: {VT, MVT::Other}, Ops: Operands);
3055 // Update the chain.
3056 ReplaceUses(F: Src.getValue(R: 1), T: SDValue(Load, 1));
3057 // Record the mem-refs
3058 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {Ld->getMemOperand()});
3059 // Replace the splat with the vlse.
3060 ReplaceNode(F: Node, T: Load);
3061 return;
3062 }
3063 case ISD::PREFETCH:
3064 unsigned Locality = Node->getConstantOperandVal(Num: 3);
3065 if (Locality > 2)
3066 break;
3067
3068 auto *LoadStoreMem = cast<MemSDNode>(Val: Node);
3069 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
3070 MMO->setFlags(MachineMemOperand::MONonTemporal);
3071
3072 int NontemporalLevel = 0;
3073 switch (Locality) {
3074 case 0:
3075 NontemporalLevel = 3; // NTL.ALL
3076 break;
3077 case 1:
3078 NontemporalLevel = 1; // NTL.PALL
3079 break;
3080 case 2:
3081 NontemporalLevel = 0; // NTL.P1
3082 break;
3083 default:
3084 llvm_unreachable("unexpected locality value.");
3085 }
3086
3087 if (NontemporalLevel & 0b1)
3088 MMO->setFlags(MONontemporalBit0);
3089 if (NontemporalLevel & 0b10)
3090 MMO->setFlags(MONontemporalBit1);
3091 break;
3092 }
3093
3094 // Select the default instruction.
3095 SelectCode(N: Node);
3096}
3097
3098bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
3099 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
3100 std::vector<SDValue> &OutOps) {
3101 // Always produce a register and immediate operand, as expected by
3102 // RISCVAsmPrinter::PrintAsmMemoryOperand.
3103 switch (ConstraintID) {
3104 case InlineAsm::ConstraintCode::o:
3105 case InlineAsm::ConstraintCode::m: {
3106 SDValue Op0, Op1;
3107 [[maybe_unused]] bool Found = SelectAddrRegImm(Addr: Op, Base&: Op0, Offset&: Op1);
3108 assert(Found && "SelectAddrRegImm should always succeed");
3109 OutOps.push_back(x: Op0);
3110 OutOps.push_back(x: Op1);
3111 return false;
3112 }
3113 case InlineAsm::ConstraintCode::A:
3114 OutOps.push_back(x: Op);
3115 OutOps.push_back(
3116 x: CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget->getXLenVT()));
3117 return false;
3118 default:
3119 report_fatal_error(reason: "Unexpected asm memory constraint " +
3120 InlineAsm::getMemConstraintName(C: ConstraintID));
3121 }
3122
3123 return true;
3124}
3125
3126bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
3127 SDValue &Offset) {
3128 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Addr)) {
3129 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT: Subtarget->getXLenVT());
3130 Offset = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Addr), VT: Subtarget->getXLenVT());
3131 return true;
3132 }
3133
3134 return false;
3135}
3136
3137// Fold constant addresses.
3138static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
3139 const MVT VT, const RISCVSubtarget *Subtarget,
3140 SDValue Addr, SDValue &Base, SDValue &Offset,
3141 bool IsPrefetch = false) {
3142 if (!isa<ConstantSDNode>(Val: Addr))
3143 return false;
3144
3145 int64_t CVal = cast<ConstantSDNode>(Val&: Addr)->getSExtValue();
3146
3147 // If the constant is a simm12, we can fold the whole constant and use X0 as
3148 // the base. If the constant can be materialized with LUI+simm12, use LUI as
3149 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
3150 int64_t Lo12 = SignExtend64<12>(x: CVal);
3151 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
3152 if (!Subtarget->is64Bit() || isInt<32>(x: Hi)) {
3153 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3154 return false;
3155 if (Hi) {
3156 int64_t Hi20 = (Hi >> 12) & 0xfffff;
3157 Base = SDValue(
3158 CurDAG->getMachineNode(Opcode: RISCV::LUI, dl: DL, VT,
3159 Op1: CurDAG->getTargetConstant(Val: Hi20, DL, VT)),
3160 0);
3161 } else {
3162 Base = CurDAG->getRegister(Reg: RISCV::X0, VT);
3163 }
3164 Offset = CurDAG->getSignedTargetConstant(Val: Lo12, DL, VT);
3165 return true;
3166 }
3167
3168 // Ask how constant materialization would handle this constant.
3169 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: CVal, STI: *Subtarget);
3170
3171 // If the last instruction would be an ADDI, we can fold its immediate and
3172 // emit the rest of the sequence as the base.
3173 if (Seq.back().getOpcode() != RISCV::ADDI)
3174 return false;
3175 Lo12 = Seq.back().getImm();
3176 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3177 return false;
3178
3179 // Drop the last instruction.
3180 Seq.pop_back();
3181 assert(!Seq.empty() && "Expected more instructions in sequence");
3182
3183 Base = selectImmSeq(CurDAG, DL, VT, Seq);
3184 Offset = CurDAG->getSignedTargetConstant(Val: Lo12, DL, VT);
3185 return true;
3186}
3187
3188// Is this ADD instruction only used as the base pointer of scalar loads and
3189// stores?
3190static bool isWorthFoldingAdd(SDValue Add) {
3191 for (auto *User : Add->users()) {
3192 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
3193 User->getOpcode() != RISCVISD::LD_RV32 &&
3194 User->getOpcode() != RISCVISD::SD_RV32 &&
3195 User->getOpcode() != ISD::ATOMIC_LOAD &&
3196 User->getOpcode() != ISD::ATOMIC_STORE)
3197 return false;
3198 EVT VT = cast<MemSDNode>(Val: User)->getMemoryVT();
3199 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
3200 VT != MVT::f64)
3201 return false;
3202 // Don't allow stores of the value. It must be used as the address.
3203 if (User->getOpcode() == ISD::STORE &&
3204 cast<StoreSDNode>(Val: User)->getValue() == Add)
3205 return false;
3206 if (User->getOpcode() == ISD::ATOMIC_STORE &&
3207 cast<AtomicSDNode>(Val: User)->getVal() == Add)
3208 return false;
3209 if (User->getOpcode() == RISCVISD::SD_RV32 &&
3210 (User->getOperand(Num: 0) == Add || User->getOperand(Num: 1) == Add))
3211 return false;
3212 if (isStrongerThanMonotonic(AO: cast<MemSDNode>(Val: User)->getSuccessOrdering()))
3213 return false;
3214 }
3215
3216 return true;
3217}
3218
3219bool isRegImmLoadOrStore(SDNode *User, SDValue Add) {
3220 switch (User->getOpcode()) {
3221 default:
3222 return false;
3223 case ISD::LOAD:
3224 case RISCVISD::LD_RV32:
3225 case ISD::ATOMIC_LOAD:
3226 break;
3227 case ISD::STORE:
3228 // Don't allow stores of Add. It must only be used as the address.
3229 if (cast<StoreSDNode>(Val: User)->getValue() == Add)
3230 return false;
3231 break;
3232 case RISCVISD::SD_RV32:
3233 // Don't allow stores of Add. It must only be used as the address.
3234 if (User->getOperand(Num: 0) == Add || User->getOperand(Num: 1) == Add)
3235 return false;
3236 break;
3237 case ISD::ATOMIC_STORE:
3238 // Don't allow stores of Add. It must only be used as the address.
3239 if (cast<AtomicSDNode>(Val: User)->getVal() == Add)
3240 return false;
3241 break;
3242 }
3243
3244 return true;
3245}
3246
3247// To prevent SelectAddrRegImm from folding offsets that conflict with the
3248// fusion of PseudoMovAddr, check if the offset of every use of a given address
3249// is within the alignment.
3250bool RISCVDAGToDAGISel::areOffsetsWithinAlignment(SDValue Addr,
3251 Align Alignment) {
3252 assert(Addr->getOpcode() == RISCVISD::ADD_LO);
3253 for (auto *User : Addr->users()) {
3254 // If the user is a load or store, then the offset is 0 which is always
3255 // within alignment.
3256 if (isRegImmLoadOrStore(User, Add: Addr))
3257 continue;
3258
3259 if (CurDAG->isBaseWithConstantOffset(Op: SDValue(User, 0))) {
3260 int64_t CVal = cast<ConstantSDNode>(Val: User->getOperand(Num: 1))->getSExtValue();
3261 if (!isInt<12>(x: CVal) || Alignment <= CVal)
3262 return false;
3263
3264 // Make sure all uses are foldable load/stores.
3265 for (auto *AddUser : User->users())
3266 if (!isRegImmLoadOrStore(User: AddUser, Add: SDValue(User, 0)))
3267 return false;
3268
3269 continue;
3270 }
3271
3272 return false;
3273 }
3274
3275 return true;
3276}
3277
3278bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
3279 SDValue &Offset) {
3280 if (SelectAddrFrameIndex(Addr, Base, Offset))
3281 return true;
3282
3283 SDLoc DL(Addr);
3284 MVT VT = Addr.getSimpleValueType();
3285
3286 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
3287 bool CanFold = true;
3288 // Unconditionally fold if operand 1 is not a global address (e.g.
3289 // externsymbol)
3290 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Val: Addr.getOperand(i: 1))) {
3291 const DataLayout &DL = CurDAG->getDataLayout();
3292 Align Alignment = commonAlignment(
3293 A: GA->getGlobal()->getPointerAlignment(DL), Offset: GA->getOffset());
3294 if (!areOffsetsWithinAlignment(Addr, Alignment))
3295 CanFold = false;
3296 }
3297 if (CanFold) {
3298 Base = Addr.getOperand(i: 0);
3299 Offset = Addr.getOperand(i: 1);
3300 return true;
3301 }
3302 }
3303
3304 if (CurDAG->isBaseWithConstantOffset(Op: Addr)) {
3305 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3306 if (isInt<12>(x: CVal)) {
3307 Base = Addr.getOperand(i: 0);
3308 if (Base.getOpcode() == RISCVISD::ADD_LO) {
3309 SDValue LoOperand = Base.getOperand(i: 1);
3310 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: LoOperand)) {
3311 // If the Lo in (ADD_LO hi, lo) is a global variable's address
3312 // (its low part, really), then we can rely on the alignment of that
3313 // variable to provide a margin of safety before low part can overflow
3314 // the 12 bits of the load/store offset. Check if CVal falls within
3315 // that margin; if so (low part + CVal) can't overflow.
3316 const DataLayout &DL = CurDAG->getDataLayout();
3317 Align Alignment = commonAlignment(
3318 A: GA->getGlobal()->getPointerAlignment(DL), Offset: GA->getOffset());
3319 if ((CVal == 0 || Alignment > CVal) &&
3320 areOffsetsWithinAlignment(Addr: Base, Alignment)) {
3321 int64_t CombinedOffset = CVal + GA->getOffset();
3322 Base = Base.getOperand(i: 0);
3323 Offset = CurDAG->getTargetGlobalAddress(
3324 GV: GA->getGlobal(), DL: SDLoc(LoOperand), VT: LoOperand.getValueType(),
3325 offset: CombinedOffset, TargetFlags: GA->getTargetFlags());
3326 return true;
3327 }
3328 }
3329 }
3330
3331 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base))
3332 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT);
3333 Offset = CurDAG->getSignedTargetConstant(Val: CVal, DL, VT);
3334 return true;
3335 }
3336 }
3337
3338 // Handle ADD with large immediates.
3339 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Val: Addr.getOperand(i: 1))) {
3340 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3341 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3342
3343 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
3344 // an ADDI for part of the offset and fold the rest into the load/store.
3345 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
3346 if (CVal >= -4096 && CVal <= 4094) {
3347 int64_t Adj = CVal < 0 ? -2048 : 2047;
3348 Base = SDValue(
3349 CurDAG->getMachineNode(Opcode: RISCV::ADDI, dl: DL, VT, Op1: Addr.getOperand(i: 0),
3350 Op2: CurDAG->getSignedTargetConstant(Val: Adj, DL, VT)),
3351 0);
3352 Offset = CurDAG->getSignedTargetConstant(Val: CVal - Adj, DL, VT);
3353 return true;
3354 }
3355
3356 // For larger immediates, we might be able to save one instruction from
3357 // constant materialization by folding the Lo12 bits of the immediate into
3358 // the address. We should only do this if the ADD is only used by loads and
3359 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
3360 // separately with the full materialized immediate creating extra
3361 // instructions.
3362 if (isWorthFoldingAdd(Add: Addr) &&
3363 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr: Addr.getOperand(i: 1), Base,
3364 Offset, /*IsPrefetch=*/false)) {
3365 // Insert an ADD instruction with the materialized Hi52 bits.
3366 Base = SDValue(
3367 CurDAG->getMachineNode(Opcode: RISCV::ADD, dl: DL, VT, Op1: Addr.getOperand(i: 0), Op2: Base),
3368 0);
3369 return true;
3370 }
3371 }
3372
3373 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3374 /*IsPrefetch=*/false))
3375 return true;
3376
3377 Base = Addr;
3378 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
3379 return true;
3380}
3381
3382/// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
3383bool RISCVDAGToDAGISel::SelectAddrRegImm9(SDValue Addr, SDValue &Base,
3384 SDValue &Offset) {
3385 if (SelectAddrFrameIndex(Addr, Base, Offset))
3386 return true;
3387
3388 SDLoc DL(Addr);
3389 MVT VT = Addr.getSimpleValueType();
3390
3391 if (CurDAG->isBaseWithConstantOffset(Op: Addr)) {
3392 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3393 if (isUInt<9>(x: CVal)) {
3394 Base = Addr.getOperand(i: 0);
3395
3396 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base))
3397 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT);
3398 Offset = CurDAG->getSignedTargetConstant(Val: CVal, DL, VT);
3399 return true;
3400 }
3401 }
3402
3403 Base = Addr;
3404 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
3405 return true;
3406}
3407
3408/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
3409/// Offset should be all zeros.
3410bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
3411 SDValue &Offset) {
3412 if (SelectAddrFrameIndex(Addr, Base, Offset))
3413 return true;
3414
3415 SDLoc DL(Addr);
3416 MVT VT = Addr.getSimpleValueType();
3417
3418 if (CurDAG->isBaseWithConstantOffset(Op: Addr)) {
3419 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3420 if (isInt<12>(x: CVal)) {
3421 Base = Addr.getOperand(i: 0);
3422
3423 // Early-out if not a valid offset.
3424 if ((CVal & 0b11111) != 0) {
3425 Base = Addr;
3426 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
3427 return true;
3428 }
3429
3430 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base))
3431 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT);
3432 Offset = CurDAG->getSignedTargetConstant(Val: CVal, DL, VT);
3433 return true;
3434 }
3435 }
3436
3437 // Handle ADD with large immediates.
3438 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Val: Addr.getOperand(i: 1))) {
3439 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3440 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3441
3442 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3443 // one instruction by folding adjustment (-2048 or 2016) into the address.
3444 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3445 int64_t Adj = CVal < 0 ? -2048 : 2016;
3446 int64_t AdjustedOffset = CVal - Adj;
3447 Base =
3448 SDValue(CurDAG->getMachineNode(
3449 Opcode: RISCV::ADDI, dl: DL, VT, Op1: Addr.getOperand(i: 0),
3450 Op2: CurDAG->getSignedTargetConstant(Val: AdjustedOffset, DL, VT)),
3451 0);
3452 Offset = CurDAG->getSignedTargetConstant(Val: Adj, DL, VT);
3453 return true;
3454 }
3455
3456 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr: Addr.getOperand(i: 1), Base,
3457 Offset, /*IsPrefetch=*/true)) {
3458 // Insert an ADD instruction with the materialized Hi52 bits.
3459 Base = SDValue(
3460 CurDAG->getMachineNode(Opcode: RISCV::ADD, dl: DL, VT, Op1: Addr.getOperand(i: 0), Op2: Base),
3461 0);
3462 return true;
3463 }
3464 }
3465
3466 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3467 /*IsPrefetch=*/true))
3468 return true;
3469
3470 Base = Addr;
3471 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
3472 return true;
3473}
3474
3475/// Return true if this a load/store that we have a RegRegScale instruction for.
3476static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add,
3477 const RISCVSubtarget &Subtarget) {
3478 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3479 return false;
3480 EVT VT = cast<MemSDNode>(Val: User)->getMemoryVT();
3481 if (!(VT.isScalarInteger() &&
3482 (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3483 !((VT == MVT::f32 || VT == MVT::f64) &&
3484 Subtarget.hasVendorXTHeadFMemIdx()))
3485 return false;
3486 // Don't allow stores of the value. It must be used as the address.
3487 if (User->getOpcode() == ISD::STORE &&
3488 cast<StoreSDNode>(Val: User)->getValue() == Add)
3489 return false;
3490
3491 return true;
3492}
3493
3494/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3495/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3496/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3497/// single addi and we don't have a SHXADD instruction we could use.
3498/// FIXME: May still need to check how many and what kind of users the SHL has.
3499static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget,
3500 SDValue Add,
3501 SDValue Shift = SDValue()) {
3502 bool FoundADDI = false;
3503 for (auto *User : Add->users()) {
3504 if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3505 continue;
3506
3507 // Allow a single ADDI that is used by loads/stores if we matched a shift.
3508 if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3509 !isa<ConstantSDNode>(Val: User->getOperand(Num: 1)) ||
3510 !isInt<12>(x: cast<ConstantSDNode>(Val: User->getOperand(Num: 1))->getSExtValue()))
3511 return false;
3512
3513 FoundADDI = true;
3514
3515 // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3516 assert(Shift.getOpcode() == ISD::SHL);
3517 unsigned ShiftAmt = Shift.getConstantOperandVal(i: 1);
3518 if (Subtarget.hasShlAdd(ShAmt: ShiftAmt))
3519 return false;
3520
3521 // All users of the ADDI should be load/store.
3522 for (auto *ADDIUser : User->users())
3523 if (!isRegRegScaleLoadOrStore(User: ADDIUser, Add: SDValue(User, 0), Subtarget))
3524 return false;
3525 }
3526
3527 return true;
3528}
3529
3530bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
3531 unsigned MaxShiftAmount,
3532 SDValue &Base, SDValue &Index,
3533 SDValue &Scale) {
3534 if (Addr.getOpcode() != ISD::ADD)
3535 return false;
3536 SDValue LHS = Addr.getOperand(i: 0);
3537 SDValue RHS = Addr.getOperand(i: 1);
3538
3539 EVT VT = Addr.getSimpleValueType();
3540 auto SelectShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3541 SDValue &Shift) {
3542 if (N.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Val: N.getOperand(i: 1)))
3543 return false;
3544
3545 // Only match shifts by a value in range [0, MaxShiftAmount].
3546 unsigned ShiftAmt = N.getConstantOperandVal(i: 1);
3547 if (ShiftAmt > MaxShiftAmount)
3548 return false;
3549
3550 Index = N.getOperand(i: 0);
3551 Shift = CurDAG->getTargetConstant(Val: ShiftAmt, DL: SDLoc(N), VT);
3552 return true;
3553 };
3554
3555 if (auto *C1 = dyn_cast<ConstantSDNode>(Val&: RHS)) {
3556 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3557 if (LHS.getOpcode() == ISD::ADD &&
3558 !isa<ConstantSDNode>(Val: LHS.getOperand(i: 1)) &&
3559 isInt<12>(x: C1->getSExtValue())) {
3560 if (SelectShl(LHS.getOperand(i: 1), Index, Scale) &&
3561 isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: LHS, Shift: LHS.getOperand(i: 1))) {
3562 SDValue C1Val = CurDAG->getTargetConstant(Val: *C1->getConstantIntValue(),
3563 DL: SDLoc(Addr), VT);
3564 Base = SDValue(CurDAG->getMachineNode(Opcode: RISCV::ADDI, dl: SDLoc(Addr), VT,
3565 Op1: LHS.getOperand(i: 0), Op2: C1Val),
3566 0);
3567 return true;
3568 }
3569
3570 // Add is commutative so we need to check both operands.
3571 if (SelectShl(LHS.getOperand(i: 0), Index, Scale) &&
3572 isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: LHS, Shift: LHS.getOperand(i: 0))) {
3573 SDValue C1Val = CurDAG->getTargetConstant(Val: *C1->getConstantIntValue(),
3574 DL: SDLoc(Addr), VT);
3575 Base = SDValue(CurDAG->getMachineNode(Opcode: RISCV::ADDI, dl: SDLoc(Addr), VT,
3576 Op1: LHS.getOperand(i: 1), Op2: C1Val),
3577 0);
3578 return true;
3579 }
3580 }
3581
3582 // Don't match add with constants.
3583 // FIXME: Is this profitable for large constants that have 0s in the lower
3584 // 12 bits that we can materialize with LUI?
3585 return false;
3586 }
3587
3588 // Try to match a shift on the RHS.
3589 if (SelectShl(RHS, Index, Scale)) {
3590 if (!isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: Addr, Shift: RHS))
3591 return false;
3592 Base = LHS;
3593 return true;
3594 }
3595
3596 // Try to match a shift on the LHS.
3597 if (SelectShl(LHS, Index, Scale)) {
3598 if (!isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: Addr, Shift: LHS))
3599 return false;
3600 Base = RHS;
3601 return true;
3602 }
3603
3604 if (!isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: Addr))
3605 return false;
3606
3607 Base = LHS;
3608 Index = RHS;
3609 Scale = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Addr), VT);
3610 return true;
3611}
3612
3613bool RISCVDAGToDAGISel::SelectAddrRegZextRegScale(SDValue Addr,
3614 unsigned MaxShiftAmount,
3615 unsigned Bits, SDValue &Base,
3616 SDValue &Index,
3617 SDValue &Scale) {
3618 if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
3619 return false;
3620
3621 if (Index.getOpcode() == ISD::AND) {
3622 auto *C = dyn_cast<ConstantSDNode>(Val: Index.getOperand(i: 1));
3623 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(N: Bits)) {
3624 Index = Index.getOperand(i: 0);
3625 return true;
3626 }
3627 }
3628
3629 return false;
3630}
3631
3632bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base,
3633 SDValue &Offset) {
3634 if (Addr.getOpcode() != ISD::ADD)
3635 return false;
3636
3637 if (isa<ConstantSDNode>(Val: Addr.getOperand(i: 1)))
3638 return false;
3639
3640 Base = Addr.getOperand(i: 0);
3641 Offset = Addr.getOperand(i: 1);
3642 return true;
3643}
3644
3645bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
3646 SDValue &ShAmt) {
3647 ShAmt = N;
3648
3649 // Peek through zext.
3650 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3651 ShAmt = ShAmt.getOperand(i: 0);
3652
3653 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3654 // amount. If there is an AND on the shift amount, we can bypass it if it
3655 // doesn't affect any of those bits.
3656 if (ShAmt.getOpcode() == ISD::AND &&
3657 isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 1))) {
3658 const APInt &AndMask = ShAmt.getConstantOperandAPInt(i: 1);
3659
3660 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3661 // mask that covers the bits needed to represent all shift amounts.
3662 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3663 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3664
3665 if (ShMask.isSubsetOf(RHS: AndMask)) {
3666 ShAmt = ShAmt.getOperand(i: 0);
3667 } else {
3668 // SimplifyDemandedBits may have optimized the mask so try restoring any
3669 // bits that are known zero.
3670 KnownBits Known = CurDAG->computeKnownBits(Op: ShAmt.getOperand(i: 0));
3671 if (!ShMask.isSubsetOf(RHS: AndMask | Known.Zero))
3672 return true;
3673 ShAmt = ShAmt.getOperand(i: 0);
3674 }
3675 }
3676
3677 if (ShAmt.getOpcode() == ISD::ADD &&
3678 isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 1))) {
3679 uint64_t Imm = ShAmt.getConstantOperandVal(i: 1);
3680 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3681 // to avoid the ADD.
3682 if (Imm != 0 && Imm % ShiftWidth == 0) {
3683 ShAmt = ShAmt.getOperand(i: 0);
3684 return true;
3685 }
3686 } else if (ShAmt.getOpcode() == ISD::SUB &&
3687 isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 0))) {
3688 uint64_t Imm = ShAmt.getConstantOperandVal(i: 0);
3689 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3690 // generate a NEG instead of a SUB of a constant.
3691 if (Imm != 0 && Imm % ShiftWidth == 0) {
3692 SDLoc DL(ShAmt);
3693 EVT VT = ShAmt.getValueType();
3694 SDValue Zero = CurDAG->getRegister(Reg: RISCV::X0, VT);
3695 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3696 MachineSDNode *Neg = CurDAG->getMachineNode(Opcode: NegOpc, dl: DL, VT, Op1: Zero,
3697 Op2: ShAmt.getOperand(i: 1));
3698 ShAmt = SDValue(Neg, 0);
3699 return true;
3700 }
3701 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3702 // to generate a NOT instead of a SUB of a constant.
3703 if (Imm % ShiftWidth == ShiftWidth - 1) {
3704 SDLoc DL(ShAmt);
3705 EVT VT = ShAmt.getValueType();
3706 MachineSDNode *Not = CurDAG->getMachineNode(
3707 Opcode: RISCV::XORI, dl: DL, VT, Op1: ShAmt.getOperand(i: 1),
3708 Op2: CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/IsTarget: true));
3709 ShAmt = SDValue(Not, 0);
3710 return true;
3711 }
3712 }
3713
3714 return true;
3715}
3716
3717/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3718/// check for equality with 0. This function emits instructions that convert the
3719/// seteq/setne into something that can be compared with 0.
3720/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3721/// ISD::SETNE).
3722bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
3723 SDValue &Val) {
3724 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3725 "Unexpected condition code!");
3726
3727 // We're looking for a setcc.
3728 if (N->getOpcode() != ISD::SETCC)
3729 return false;
3730
3731 // Must be an equality comparison.
3732 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
3733 if (CCVal != ExpectedCCVal)
3734 return false;
3735
3736 SDValue LHS = N->getOperand(Num: 0);
3737 SDValue RHS = N->getOperand(Num: 1);
3738
3739 if (!LHS.getValueType().isScalarInteger())
3740 return false;
3741
3742 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3743 if (isNullConstant(V: RHS)) {
3744 Val = LHS;
3745 return true;
3746 }
3747
3748 SDLoc DL(N);
3749
3750 if (auto *C = dyn_cast<ConstantSDNode>(Val&: RHS)) {
3751 int64_t CVal = C->getSExtValue();
3752 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3753 // non-zero otherwise.
3754 if (CVal == -2048) {
3755 Val = SDValue(
3756 CurDAG->getMachineNode(
3757 Opcode: RISCV::XORI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3758 Op2: CurDAG->getSignedTargetConstant(Val: CVal, DL, VT: N->getValueType(ResNo: 0))),
3759 0);
3760 return true;
3761 }
3762 // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0
3763 // if the LHS is equal to the RHS and non-zero otherwise.
3764 if (isInt<12>(x: CVal) || CVal == 2048) {
3765 unsigned Opc = RISCV::ADDI;
3766 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3767 cast<VTSDNode>(Val: LHS.getOperand(i: 1))->getVT() == MVT::i32) {
3768 Opc = RISCV::ADDIW;
3769 LHS = LHS.getOperand(i: 0);
3770 }
3771
3772 Val = SDValue(CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3773 Op2: CurDAG->getSignedTargetConstant(
3774 Val: -CVal, DL, VT: N->getValueType(ResNo: 0))),
3775 0);
3776 return true;
3777 }
3778 if (isPowerOf2_64(Value: CVal) && Subtarget->hasStdExtZbs()) {
3779 Val = SDValue(
3780 CurDAG->getMachineNode(
3781 Opcode: RISCV::BINVI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3782 Op2: CurDAG->getTargetConstant(Val: Log2_64(Value: CVal), DL, VT: N->getValueType(ResNo: 0))),
3783 0);
3784 return true;
3785 }
3786 // Same as the addi case above but for larger immediates (signed 26-bit) use
3787 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3788 // anything which can be done with a single lui as it might be compressible.
3789 if (Subtarget->hasVendorXqcilia() && isInt<26>(x: CVal) &&
3790 (CVal & 0xFFF) != 0) {
3791 Val = SDValue(
3792 CurDAG->getMachineNode(
3793 Opcode: RISCV::QC_E_ADDI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3794 Op2: CurDAG->getSignedTargetConstant(Val: -CVal, DL, VT: N->getValueType(ResNo: 0))),
3795 0);
3796 return true;
3797 }
3798 }
3799
3800 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3801 // equal and a non-zero value if they aren't.
3802 Val = SDValue(
3803 CurDAG->getMachineNode(Opcode: RISCV::XOR, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS, Op2: RHS), 0);
3804 return true;
3805}
3806
3807bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {
3808 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3809 cast<VTSDNode>(Val: N.getOperand(i: 1))->getVT().getSizeInBits() == Bits) {
3810 Val = N.getOperand(i: 0);
3811 return true;
3812 }
3813
3814 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3815 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(Val: N.getOperand(i: 1)))
3816 return N;
3817
3818 SDValue N0 = N.getOperand(i: 0);
3819 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
3820 N.getConstantOperandVal(i: 1) == ShiftAmt &&
3821 N0.getConstantOperandVal(i: 1) == ShiftAmt)
3822 return N0.getOperand(i: 0);
3823
3824 return N;
3825 };
3826
3827 MVT VT = N.getSimpleValueType();
3828 if (CurDAG->ComputeNumSignBits(Op: N) > (VT.getSizeInBits() - Bits)) {
3829 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3830 return true;
3831 }
3832
3833 return false;
3834}
3835
3836bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) {
3837 if (N.getOpcode() == ISD::AND) {
3838 auto *C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
3839 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(N: Bits)) {
3840 Val = N.getOperand(i: 0);
3841 return true;
3842 }
3843 }
3844 MVT VT = N.getSimpleValueType();
3845 APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: Bits);
3846 if (CurDAG->MaskedValueIsZero(Op: N, Mask)) {
3847 Val = N;
3848 return true;
3849 }
3850
3851 return false;
3852}
3853
3854/// Look for various patterns that can be done with a SHL that can be folded
3855/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3856/// SHXADD we are trying to match.
3857bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
3858 SDValue &Val) {
3859 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(Val: N.getOperand(i: 1))) {
3860 SDValue N0 = N.getOperand(i: 0);
3861
3862 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3863 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3864 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
3865 uint64_t Mask = N.getConstantOperandVal(i: 1);
3866 unsigned C2 = N0.getConstantOperandVal(i: 1);
3867
3868 unsigned XLen = Subtarget->getXLen();
3869 if (LeftShift)
3870 Mask &= maskTrailingZeros<uint64_t>(N: C2);
3871 else
3872 Mask &= maskTrailingOnes<uint64_t>(N: XLen - C2);
3873
3874 if (isShiftedMask_64(Value: Mask)) {
3875 unsigned Leading = XLen - llvm::bit_width(Value: Mask);
3876 unsigned Trailing = llvm::countr_zero(Val: Mask);
3877 if (Trailing != ShAmt)
3878 return false;
3879
3880 unsigned Opcode;
3881 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3882 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
3883 // followed by a SHXADD with c3 for the X amount.
3884 if (LeftShift && Leading == 0 && C2 < Trailing)
3885 Opcode = RISCV::SRLI;
3886 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
3887 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
3888 // followed by a SHXADD with c3 for the X amount.
3889 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
3890 Opcode = RISCV::SRLIW;
3891 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3892 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3893 // followed by a SHXADD using c3 for the X amount.
3894 else if (!LeftShift && Leading == C2)
3895 Opcode = RISCV::SRLI;
3896 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
3897 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
3898 // followed by a SHXADD using c3 for the X amount.
3899 else if (!LeftShift && Leading == 32 + C2)
3900 Opcode = RISCV::SRLIW;
3901 else
3902 return false;
3903
3904 SDLoc DL(N);
3905 EVT VT = N.getValueType();
3906 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
3907 Val = SDValue(
3908 CurDAG->getMachineNode(Opcode, dl: DL, VT, Op1: N0.getOperand(i: 0),
3909 Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT)),
3910 0);
3911 return true;
3912 }
3913 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3914 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
3915 uint64_t Mask = N.getConstantOperandVal(i: 1);
3916 unsigned C2 = N0.getConstantOperandVal(i: 1);
3917
3918 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3919 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3920 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3921 // the X amount.
3922 if (isShiftedMask_64(Value: Mask)) {
3923 unsigned XLen = Subtarget->getXLen();
3924 unsigned Leading = XLen - llvm::bit_width(Value: Mask);
3925 unsigned Trailing = llvm::countr_zero(Val: Mask);
3926 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3927 SDLoc DL(N);
3928 EVT VT = N.getValueType();
3929 Val = SDValue(CurDAG->getMachineNode(
3930 Opcode: RISCV::SRAI, dl: DL, VT, Op1: N0.getOperand(i: 0),
3931 Op2: CurDAG->getTargetConstant(Val: C2 - Leading, DL, VT)),
3932 0);
3933 Val = SDValue(CurDAG->getMachineNode(
3934 Opcode: RISCV::SRLI, dl: DL, VT, Op1: Val,
3935 Op2: CurDAG->getTargetConstant(Val: Leading + ShAmt, DL, VT)),
3936 0);
3937 return true;
3938 }
3939 }
3940 }
3941 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3942 (LeftShift || N.getOpcode() == ISD::SRL) &&
3943 isa<ConstantSDNode>(Val: N.getOperand(i: 1))) {
3944 SDValue N0 = N.getOperand(i: 0);
3945 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3946 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
3947 uint64_t Mask = N0.getConstantOperandVal(i: 1);
3948 if (isShiftedMask_64(Value: Mask)) {
3949 unsigned C1 = N.getConstantOperandVal(i: 1);
3950 unsigned XLen = Subtarget->getXLen();
3951 unsigned Leading = XLen - llvm::bit_width(Value: Mask);
3952 unsigned Trailing = llvm::countr_zero(Val: Mask);
3953 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3954 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3955 if (LeftShift && Leading == 32 && Trailing > 0 &&
3956 (Trailing + C1) == ShAmt) {
3957 SDLoc DL(N);
3958 EVT VT = N.getValueType();
3959 Val = SDValue(CurDAG->getMachineNode(
3960 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
3961 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT)),
3962 0);
3963 return true;
3964 }
3965 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3966 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3967 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3968 (Trailing - C1) == ShAmt) {
3969 SDLoc DL(N);
3970 EVT VT = N.getValueType();
3971 Val = SDValue(CurDAG->getMachineNode(
3972 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
3973 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT)),
3974 0);
3975 return true;
3976 }
3977 }
3978 }
3979 }
3980
3981 return false;
3982}
3983
3984/// Look for various patterns that can be done with a SHL that can be folded
3985/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3986/// SHXADD_UW we are trying to match.
3987bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
3988 SDValue &Val) {
3989 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(Val: N.getOperand(i: 1)) &&
3990 N.hasOneUse()) {
3991 SDValue N0 = N.getOperand(i: 0);
3992 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
3993 N0.hasOneUse()) {
3994 uint64_t Mask = N.getConstantOperandVal(i: 1);
3995 unsigned C2 = N0.getConstantOperandVal(i: 1);
3996
3997 Mask &= maskTrailingZeros<uint64_t>(N: C2);
3998
3999 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
4000 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
4001 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
4002 if (isShiftedMask_64(Value: Mask)) {
4003 unsigned Leading = llvm::countl_zero(Val: Mask);
4004 unsigned Trailing = llvm::countr_zero(Val: Mask);
4005 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
4006 SDLoc DL(N);
4007 EVT VT = N.getValueType();
4008 Val = SDValue(CurDAG->getMachineNode(
4009 Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
4010 Op2: CurDAG->getTargetConstant(Val: C2 - ShAmt, DL, VT)),
4011 0);
4012 return true;
4013 }
4014 }
4015 }
4016 }
4017
4018 return false;
4019}
4020
4021bool RISCVDAGToDAGISel::orDisjoint(const SDNode *N) const {
4022 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
4023 if (N->getFlags().hasDisjoint())
4024 return true;
4025 return CurDAG->haveNoCommonBitsSet(A: N->getOperand(Num: 0), B: N->getOperand(Num: 1));
4026}
4027
4028bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
4029 SDValue N, SDValue &Val) {
4030 int OrigCost = RISCVMatInt::getIntMatCost(Val: APInt(64, OrigImm), Size: 64, STI: *Subtarget,
4031 /*CompressionCost=*/true);
4032 int Cost = RISCVMatInt::getIntMatCost(Val: APInt(64, Imm), Size: 64, STI: *Subtarget,
4033 /*CompressionCost=*/true);
4034 if (OrigCost <= Cost)
4035 return false;
4036
4037 Val = selectImm(CurDAG, DL: SDLoc(N), VT: N->getSimpleValueType(ResNo: 0), Imm, Subtarget: *Subtarget);
4038 return true;
4039}
4040
4041bool RISCVDAGToDAGISel::selectZExtImm32(SDValue N, SDValue &Val) {
4042 if (!isa<ConstantSDNode>(Val: N))
4043 return false;
4044 int64_t Imm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
4045 if ((Imm >> 31) != 1)
4046 return false;
4047
4048 for (const SDNode *U : N->users()) {
4049 switch (U->getOpcode()) {
4050 case ISD::ADD:
4051 break;
4052 case ISD::OR:
4053 if (orDisjoint(N: U))
4054 break;
4055 return false;
4056 default:
4057 return false;
4058 }
4059 }
4060
4061 return selectImm64IfCheaper(Imm: 0xffffffff00000000 | Imm, OrigImm: Imm, N, Val);
4062}
4063
4064bool RISCVDAGToDAGISel::selectNegImm(SDValue N, SDValue &Val) {
4065 if (!isa<ConstantSDNode>(Val: N))
4066 return false;
4067 int64_t Imm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
4068 if (isInt<32>(x: Imm))
4069 return false;
4070 if (Imm == INT64_MIN)
4071 return false;
4072
4073 for (const SDNode *U : N->users()) {
4074 switch (U->getOpcode()) {
4075 case ISD::ADD:
4076 break;
4077 case RISCVISD::VMV_V_X_VL:
4078 if (!all_of(Range: U->users(), P: [](const SDNode *V) {
4079 return V->getOpcode() == ISD::ADD ||
4080 V->getOpcode() == RISCVISD::ADD_VL;
4081 }))
4082 return false;
4083 break;
4084 default:
4085 return false;
4086 }
4087 }
4088
4089 return selectImm64IfCheaper(Imm: -Imm, OrigImm: Imm, N, Val);
4090}
4091
4092bool RISCVDAGToDAGISel::selectInvLogicImm(SDValue N, SDValue &Val) {
4093 if (!isa<ConstantSDNode>(Val: N))
4094 return false;
4095 int64_t Imm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
4096
4097 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
4098 if (isInt<32>(x: Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
4099 return false;
4100
4101 // Abandon this transform if the constant is needed elsewhere.
4102 for (const SDNode *U : N->users()) {
4103 switch (U->getOpcode()) {
4104 case ISD::AND:
4105 case ISD::OR:
4106 case ISD::XOR:
4107 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
4108 return false;
4109 break;
4110 case RISCVISD::VMV_V_X_VL:
4111 if (!Subtarget->hasStdExtZvkb())
4112 return false;
4113 if (!all_of(Range: U->users(), P: [](const SDNode *V) {
4114 return V->getOpcode() == ISD::AND ||
4115 V->getOpcode() == RISCVISD::AND_VL;
4116 }))
4117 return false;
4118 break;
4119 default:
4120 return false;
4121 }
4122 }
4123
4124 if (isInt<32>(x: Imm)) {
4125 Val =
4126 selectImm(CurDAG, DL: SDLoc(N), VT: N->getSimpleValueType(ResNo: 0), Imm: ~Imm, Subtarget: *Subtarget);
4127 return true;
4128 }
4129
4130 // For 64-bit constants, the instruction sequences get complex,
4131 // so we select inverted only if it's cheaper.
4132 return selectImm64IfCheaper(Imm: ~Imm, OrigImm: Imm, N, Val);
4133}
4134
4135static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
4136 unsigned Bits,
4137 const TargetInstrInfo *TII) {
4138 unsigned MCOpcode = RISCV::getRVVMCOpcode(RVVPseudoOpcode: User->getMachineOpcode());
4139
4140 if (!MCOpcode)
4141 return false;
4142
4143 const MCInstrDesc &MCID = TII->get(Opcode: User->getMachineOpcode());
4144 const uint64_t TSFlags = MCID.TSFlags;
4145 if (!RISCVII::hasSEWOp(TSFlags))
4146 return false;
4147 assert(RISCVII::hasVLOp(TSFlags));
4148
4149 unsigned ChainOpIdx = User->getNumOperands() - 1;
4150 bool HasChainOp = User->getOperand(Num: ChainOpIdx).getValueType() == MVT::Other;
4151 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
4152 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
4153 const unsigned Log2SEW = User->getConstantOperandVal(Num: VLIdx + 1);
4154
4155 if (UserOpNo == VLIdx)
4156 return false;
4157
4158 auto NumDemandedBits =
4159 RISCV::getVectorLowDemandedScalarBits(Opcode: MCOpcode, Log2SEW);
4160 return NumDemandedBits && Bits >= *NumDemandedBits;
4161}
4162
4163// Return true if all users of this SDNode* only consume the lower \p Bits.
4164// This can be used to form W instructions for add/sub/mul/shl even when the
4165// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
4166// SimplifyDemandedBits has made it so some users see a sext_inreg and some
4167// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
4168// the add/sub/mul/shl to become non-W instructions. By checking the users we
4169// may be able to use a W instruction and CSE with the other instruction if
4170// this has happened. We could try to detect that the CSE opportunity exists
4171// before doing this, but that would be more complicated.
4172bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
4173 const unsigned Depth) const {
4174 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
4175 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
4176 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
4177 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
4178 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
4179 isa<ConstantSDNode>(Node) || Depth != 0) &&
4180 "Unexpected opcode");
4181
4182 if (Depth >= SelectionDAG::MaxRecursionDepth)
4183 return false;
4184
4185 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
4186 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
4187 if (Depth == 0 && !Node->getValueType(ResNo: 0).isScalarInteger())
4188 return false;
4189
4190 for (SDUse &Use : Node->uses()) {
4191 SDNode *User = Use.getUser();
4192 // Users of this node should have already been instruction selected
4193 if (!User->isMachineOpcode())
4194 return false;
4195
4196 // TODO: Add more opcodes?
4197 switch (User->getMachineOpcode()) {
4198 default:
4199 if (vectorPseudoHasAllNBitUsers(User, UserOpNo: Use.getOperandNo(), Bits, TII))
4200 break;
4201 return false;
4202 case RISCV::ADDW:
4203 case RISCV::ADDIW:
4204 case RISCV::SUBW:
4205 case RISCV::MULW:
4206 case RISCV::SLLW:
4207 case RISCV::SLLIW:
4208 case RISCV::SRAW:
4209 case RISCV::SRAIW:
4210 case RISCV::SRLW:
4211 case RISCV::SRLIW:
4212 case RISCV::DIVW:
4213 case RISCV::DIVUW:
4214 case RISCV::REMW:
4215 case RISCV::REMUW:
4216 case RISCV::ROLW:
4217 case RISCV::RORW:
4218 case RISCV::RORIW:
4219 case RISCV::CLSW:
4220 case RISCV::CLZW:
4221 case RISCV::CTZW:
4222 case RISCV::CPOPW:
4223 case RISCV::SLLI_UW:
4224 case RISCV::ABSW:
4225 case RISCV::FMV_W_X:
4226 case RISCV::FCVT_H_W:
4227 case RISCV::FCVT_H_W_INX:
4228 case RISCV::FCVT_H_WU:
4229 case RISCV::FCVT_H_WU_INX:
4230 case RISCV::FCVT_S_W:
4231 case RISCV::FCVT_S_W_INX:
4232 case RISCV::FCVT_S_WU:
4233 case RISCV::FCVT_S_WU_INX:
4234 case RISCV::FCVT_D_W:
4235 case RISCV::FCVT_D_W_INX:
4236 case RISCV::FCVT_D_WU:
4237 case RISCV::FCVT_D_WU_INX:
4238 case RISCV::TH_REVW:
4239 case RISCV::TH_SRRIW:
4240 if (Bits >= 32)
4241 break;
4242 return false;
4243 case RISCV::SLL:
4244 case RISCV::SRA:
4245 case RISCV::SRL:
4246 case RISCV::ROL:
4247 case RISCV::ROR:
4248 case RISCV::BSET:
4249 case RISCV::BCLR:
4250 case RISCV::BINV:
4251 // Shift amount operands only use log2(Xlen) bits.
4252 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Value: Subtarget->getXLen()))
4253 break;
4254 return false;
4255 case RISCV::SLLI:
4256 // SLLI only uses the lower (XLen - ShAmt) bits.
4257 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(Num: 1))
4258 break;
4259 return false;
4260 case RISCV::ANDI:
4261 if (Bits >= (unsigned)llvm::bit_width(Value: User->getConstantOperandVal(Num: 1)))
4262 break;
4263 goto RecCheck;
4264 case RISCV::ORI: {
4265 uint64_t Imm = cast<ConstantSDNode>(Val: User->getOperand(Num: 1))->getSExtValue();
4266 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(Value: ~Imm))
4267 break;
4268 [[fallthrough]];
4269 }
4270 case RISCV::AND:
4271 case RISCV::OR:
4272 case RISCV::XOR:
4273 case RISCV::XORI:
4274 case RISCV::ANDN:
4275 case RISCV::ORN:
4276 case RISCV::XNOR:
4277 case RISCV::SH1ADD:
4278 case RISCV::SH2ADD:
4279 case RISCV::SH3ADD:
4280 RecCheck:
4281 if (hasAllNBitUsers(Node: User, Bits, Depth: Depth + 1))
4282 break;
4283 return false;
4284 case RISCV::SRLI: {
4285 unsigned ShAmt = User->getConstantOperandVal(Num: 1);
4286 // If we are shifting right by less than Bits, and users don't demand any
4287 // bits that were shifted into [Bits-1:0], then we can consider this as an
4288 // N-Bit user.
4289 if (Bits > ShAmt && hasAllNBitUsers(Node: User, Bits: Bits - ShAmt, Depth: Depth + 1))
4290 break;
4291 return false;
4292 }
4293 case RISCV::SEXT_B:
4294 case RISCV::PACKH:
4295 if (Bits >= 8)
4296 break;
4297 return false;
4298 case RISCV::SEXT_H:
4299 case RISCV::FMV_H_X:
4300 case RISCV::ZEXT_H_RV32:
4301 case RISCV::ZEXT_H_RV64:
4302 case RISCV::PACKW:
4303 if (Bits >= 16)
4304 break;
4305 return false;
4306 case RISCV::PACK:
4307 if (Bits >= (Subtarget->getXLen() / 2))
4308 break;
4309 return false;
4310 case RISCV::ADD_UW:
4311 case RISCV::SH1ADD_UW:
4312 case RISCV::SH2ADD_UW:
4313 case RISCV::SH3ADD_UW:
4314 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
4315 // 32 bits.
4316 if (Use.getOperandNo() == 0 && Bits >= 32)
4317 break;
4318 return false;
4319 case RISCV::SB:
4320 if (Use.getOperandNo() == 0 && Bits >= 8)
4321 break;
4322 return false;
4323 case RISCV::SH:
4324 if (Use.getOperandNo() == 0 && Bits >= 16)
4325 break;
4326 return false;
4327 case RISCV::SW:
4328 if (Use.getOperandNo() == 0 && Bits >= 32)
4329 break;
4330 return false;
4331 case RISCV::TH_EXT:
4332 case RISCV::TH_EXTU: {
4333 unsigned Msb = User->getConstantOperandVal(Num: 1);
4334 unsigned Lsb = User->getConstantOperandVal(Num: 2);
4335 // Behavior of Msb < Lsb is not well documented.
4336 if (Msb >= Lsb && Bits > Msb)
4337 break;
4338 return false;
4339 }
4340 }
4341 }
4342
4343 return true;
4344}
4345
4346// Select a constant that can be represented as (sign_extend(imm5) << imm2).
4347bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,
4348 SDValue &Shl2) {
4349 auto *C = dyn_cast<ConstantSDNode>(Val&: N);
4350 if (!C)
4351 return false;
4352
4353 int64_t Offset = C->getSExtValue();
4354 for (unsigned Shift = 0; Shift < 4; Shift++) {
4355 if (isInt<5>(x: Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
4356 EVT VT = N->getValueType(ResNo: 0);
4357 Simm5 = CurDAG->getSignedTargetConstant(Val: Offset >> Shift, DL: SDLoc(N), VT);
4358 Shl2 = CurDAG->getTargetConstant(Val: Shift, DL: SDLoc(N), VT);
4359 return true;
4360 }
4361 }
4362
4363 return false;
4364}
4365
4366// Select VL as a 5 bit immediate or a value that will become a register. This
4367// allows us to choose between VSETIVLI or VSETVLI later.
4368bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
4369 auto *C = dyn_cast<ConstantSDNode>(Val&: N);
4370 if (C && isUInt<5>(x: C->getZExtValue())) {
4371 VL = CurDAG->getTargetConstant(Val: C->getZExtValue(), DL: SDLoc(N),
4372 VT: N->getValueType(ResNo: 0));
4373 } else if (C && C->isAllOnes()) {
4374 // Treat all ones as VLMax.
4375 VL = CurDAG->getSignedTargetConstant(Val: RISCV::VLMaxSentinel, DL: SDLoc(N),
4376 VT: N->getValueType(ResNo: 0));
4377 } else if (isa<RegisterSDNode>(Val: N) &&
4378 cast<RegisterSDNode>(Val&: N)->getReg() == RISCV::X0) {
4379 // All our VL operands use an operand that allows GPRNoX0 or an immediate
4380 // as the register class. Convert X0 to a special immediate to pass the
4381 // MachineVerifier. This is recognized specially by the vsetvli insertion
4382 // pass.
4383 VL = CurDAG->getSignedTargetConstant(Val: RISCV::VLMaxSentinel, DL: SDLoc(N),
4384 VT: N->getValueType(ResNo: 0));
4385 } else {
4386 VL = N;
4387 }
4388
4389 return true;
4390}
4391
4392static SDValue findVSplat(SDValue N) {
4393 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
4394 if (!N.getOperand(i: 0).isUndef())
4395 return SDValue();
4396 N = N.getOperand(i: 1);
4397 }
4398 SDValue Splat = N;
4399 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
4400 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
4401 !Splat.getOperand(i: 0).isUndef())
4402 return SDValue();
4403 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
4404 return Splat;
4405}
4406
4407bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
4408 SDValue Splat = findVSplat(N);
4409 if (!Splat)
4410 return false;
4411
4412 SplatVal = Splat.getOperand(i: 1);
4413 return true;
4414}
4415
4416static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,
4417 SelectionDAG &DAG,
4418 const RISCVSubtarget &Subtarget,
4419 std::function<bool(int64_t)> ValidateImm,
4420 bool Decrement = false) {
4421 SDValue Splat = findVSplat(N);
4422 if (!Splat || !isa<ConstantSDNode>(Val: Splat.getOperand(i: 1)))
4423 return false;
4424
4425 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
4426 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
4427 "Unexpected splat operand type");
4428
4429 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
4430 // type is wider than the resulting vector element type: an implicit
4431 // truncation first takes place. Therefore, perform a manual
4432 // truncation/sign-extension in order to ignore any truncated bits and catch
4433 // any zero-extended immediate.
4434 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
4435 // sign-extending to (XLenVT -1).
4436 APInt SplatConst = Splat.getConstantOperandAPInt(i: 1).sextOrTrunc(width: SplatEltSize);
4437
4438 int64_t SplatImm = SplatConst.getSExtValue();
4439
4440 if (!ValidateImm(SplatImm))
4441 return false;
4442
4443 if (Decrement)
4444 SplatImm -= 1;
4445
4446 SplatVal =
4447 DAG.getSignedTargetConstant(Val: SplatImm, DL: SDLoc(N), VT: Subtarget.getXLenVT());
4448 return true;
4449}
4450
4451bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
4452 return selectVSplatImmHelper(N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4453 ValidateImm: [](int64_t Imm) { return isInt<5>(x: Imm); });
4454}
4455
4456bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
4457 return selectVSplatImmHelper(
4458 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4459 ValidateImm: [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4460 /*Decrement=*/true);
4461}
4462
4463bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal) {
4464 return selectVSplatImmHelper(
4465 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4466 ValidateImm: [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4467 /*Decrement=*/false);
4468}
4469
4470bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
4471 SDValue &SplatVal) {
4472 return selectVSplatImmHelper(
4473 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4474 ValidateImm: [](int64_t Imm) { return Imm != 0 && Imm >= -15 && Imm <= 16; },
4475 /*Decrement=*/true);
4476}
4477
4478bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
4479 SDValue &SplatVal) {
4480 return selectVSplatImmHelper(
4481 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4482 ValidateImm: [Bits](int64_t Imm) { return isUIntN(N: Bits, x: Imm); });
4483}
4484
4485bool RISCVDAGToDAGISel::selectVSplatImm64Neg(SDValue N, SDValue &SplatVal) {
4486 SDValue Splat = findVSplat(N);
4487 return Splat && selectNegImm(N: Splat.getOperand(i: 1), Val&: SplatVal);
4488}
4489
4490bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
4491 auto IsExtOrTrunc = [](SDValue N) {
4492 switch (N->getOpcode()) {
4493 case ISD::SIGN_EXTEND:
4494 case ISD::ZERO_EXTEND:
4495 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
4496 // inactive elements will be undef.
4497 case RISCVISD::TRUNCATE_VECTOR_VL:
4498 case RISCVISD::VSEXT_VL:
4499 case RISCVISD::VZEXT_VL:
4500 return true;
4501 default:
4502 return false;
4503 }
4504 };
4505
4506 // We can have multiple nested nodes, so unravel them all if needed.
4507 while (IsExtOrTrunc(N)) {
4508 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
4509 return false;
4510 N = N->getOperand(Num: 0);
4511 }
4512
4513 return selectVSplat(N, SplatVal);
4514}
4515
4516bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N, SDValue &Imm) {
4517 // Allow bitcasts from XLenVT -> FP.
4518 if (N.getOpcode() == ISD::BITCAST &&
4519 N.getOperand(i: 0).getValueType() == Subtarget->getXLenVT()) {
4520 Imm = N.getOperand(i: 0);
4521 return true;
4522 }
4523 // Allow moves from XLenVT to FP.
4524 if (N.getOpcode() == RISCVISD::FMV_H_X ||
4525 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
4526 Imm = N.getOperand(i: 0);
4527 return true;
4528 }
4529
4530 // Otherwise, look for FP constants that can materialized with scalar int.
4531 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Val: N.getNode());
4532 if (!CFP)
4533 return false;
4534 const APFloat &APF = CFP->getValueAPF();
4535 // td can handle +0.0 already.
4536 if (APF.isPosZero())
4537 return false;
4538
4539 MVT VT = CFP->getSimpleValueType(ResNo: 0);
4540
4541 MVT XLenVT = Subtarget->getXLenVT();
4542 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
4543 assert(APF.isNegZero() && "Unexpected constant.");
4544 return false;
4545 }
4546 SDLoc DL(N);
4547 Imm = selectImm(CurDAG, DL, VT: XLenVT, Imm: APF.bitcastToAPInt().getSExtValue(),
4548 Subtarget: *Subtarget);
4549 return true;
4550}
4551
4552bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
4553 SDValue &Imm) {
4554 if (auto *C = dyn_cast<ConstantSDNode>(Val&: N)) {
4555 int64_t ImmVal = SignExtend64(X: C->getSExtValue(), B: Width);
4556
4557 if (!isInt<5>(x: ImmVal))
4558 return false;
4559
4560 Imm = CurDAG->getSignedTargetConstant(Val: ImmVal, DL: SDLoc(N),
4561 VT: Subtarget->getXLenVT());
4562 return true;
4563 }
4564
4565 return false;
4566}
4567
4568// Match XOR with a VMSET_VL operand. Return the other operand.
4569bool RISCVDAGToDAGISel::selectVMNOTOp(SDValue N, SDValue &Res) {
4570 if (N.getOpcode() != ISD::XOR)
4571 return false;
4572
4573 if (N.getOperand(i: 0).getOpcode() == RISCVISD::VMSET_VL) {
4574 Res = N.getOperand(i: 1);
4575 return true;
4576 }
4577
4578 if (N.getOperand(i: 1).getOpcode() == RISCVISD::VMSET_VL) {
4579 Res = N.getOperand(i: 0);
4580 return true;
4581 }
4582
4583 return false;
4584}
4585
4586// Match VMXOR_VL with a VMSET_VL operand. Making sure that that VL operand
4587// matches the parent's VL. Return the other operand of the VMXOR_VL.
4588bool RISCVDAGToDAGISel::selectVMNOT_VLOp(SDNode *Parent, SDValue N,
4589 SDValue &Res) {
4590 if (N.getOpcode() != RISCVISD::VMXOR_VL)
4591 return false;
4592
4593 assert(Parent &&
4594 (Parent->getOpcode() == RISCVISD::VMAND_VL ||
4595 Parent->getOpcode() == RISCVISD::VMOR_VL ||
4596 Parent->getOpcode() == RISCVISD::VMXOR_VL) &&
4597 "Unexpected parent");
4598
4599 // The VL should match the parent.
4600 if (Parent->getOperand(Num: 2) != N->getOperand(Num: 2))
4601 return false;
4602
4603 if (N.getOperand(i: 0).getOpcode() == RISCVISD::VMSET_VL) {
4604 Res = N.getOperand(i: 1);
4605 return true;
4606 }
4607
4608 if (N.getOperand(i: 1).getOpcode() == RISCVISD::VMSET_VL) {
4609 Res = N.getOperand(i: 0);
4610 return true;
4611 }
4612
4613 return false;
4614}
4615
4616// Try to remove sext.w if the input is a W instruction or can be made into
4617// a W instruction cheaply.
4618bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4619 // Look for the sext.w pattern, addiw rd, rs1, 0.
4620 if (N->getMachineOpcode() != RISCV::ADDIW ||
4621 !isNullConstant(V: N->getOperand(Num: 1)))
4622 return false;
4623
4624 SDValue N0 = N->getOperand(Num: 0);
4625 if (!N0.isMachineOpcode())
4626 return false;
4627
4628 switch (N0.getMachineOpcode()) {
4629 default:
4630 break;
4631 case RISCV::ADD:
4632 case RISCV::ADDI:
4633 case RISCV::SUB:
4634 case RISCV::MUL:
4635 case RISCV::SLLI: {
4636 // Convert sext.w+add/sub/mul to their W instructions. This will create
4637 // a new independent instruction. This improves latency.
4638 unsigned Opc;
4639 switch (N0.getMachineOpcode()) {
4640 default:
4641 llvm_unreachable("Unexpected opcode!");
4642 case RISCV::ADD: Opc = RISCV::ADDW; break;
4643 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4644 case RISCV::SUB: Opc = RISCV::SUBW; break;
4645 case RISCV::MUL: Opc = RISCV::MULW; break;
4646 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4647 }
4648
4649 SDValue N00 = N0.getOperand(i: 0);
4650 SDValue N01 = N0.getOperand(i: 1);
4651
4652 // Shift amount needs to be uimm5.
4653 if (N0.getMachineOpcode() == RISCV::SLLI &&
4654 !isUInt<5>(x: cast<ConstantSDNode>(Val&: N01)->getSExtValue()))
4655 break;
4656
4657 SDNode *Result =
4658 CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VT: N->getValueType(ResNo: 0),
4659 Op1: N00, Op2: N01);
4660 ReplaceUses(F: N, T: Result);
4661 return true;
4662 }
4663 case RISCV::ADDW:
4664 case RISCV::ADDIW:
4665 case RISCV::SUBW:
4666 case RISCV::MULW:
4667 case RISCV::SLLIW:
4668 case RISCV::PACKW:
4669 case RISCV::TH_MULAW:
4670 case RISCV::TH_MULAH:
4671 case RISCV::TH_MULSW:
4672 case RISCV::TH_MULSH:
4673 if (N0.getValueType() == MVT::i32)
4674 break;
4675
4676 // Result is already sign extended just remove the sext.w.
4677 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4678 ReplaceUses(F: N, T: N0.getNode());
4679 return true;
4680 }
4681
4682 return false;
4683}
4684
4685static bool usesAllOnesMask(SDValue MaskOp) {
4686 const auto IsVMSet = [](unsigned Opc) {
4687 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4688 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4689 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4690 Opc == RISCV::PseudoVMSET_M_B8;
4691 };
4692
4693 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4694 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4695 // assume that it's all-ones? Same applies to its VL.
4696 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4697}
4698
4699static bool isImplicitDef(SDValue V) {
4700 if (!V.isMachineOpcode())
4701 return false;
4702 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4703 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4704 if (!isImplicitDef(V: V.getOperand(i: I)))
4705 return false;
4706 return true;
4707 }
4708 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4709}
4710
4711// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4712// corresponding "unmasked" pseudo versions.
4713bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4714 const RISCV::RISCVMaskedPseudoInfo *I =
4715 RISCV::getMaskedPseudoInfo(MaskedPseudo: N->getMachineOpcode());
4716 if (!I)
4717 return false;
4718
4719 unsigned MaskOpIdx = I->MaskOpIdx;
4720 if (!usesAllOnesMask(MaskOp: N->getOperand(Num: MaskOpIdx)))
4721 return false;
4722
4723 // There are two classes of pseudos in the table - compares and
4724 // everything else. See the comment on RISCVMaskedPseudo for details.
4725 const unsigned Opc = I->UnmaskedPseudo;
4726 const MCInstrDesc &MCID = TII->get(Opcode: Opc);
4727 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc: MCID);
4728
4729 const MCInstrDesc &MaskedMCID = TII->get(Opcode: N->getMachineOpcode());
4730 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc: MaskedMCID);
4731
4732 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4733 !RISCVII::hasVecPolicyOp(MCID.TSFlags)) &&
4734 "Unmasked pseudo has policy but masked pseudo doesn't?");
4735 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4736 "Unexpected pseudo structure");
4737 assert(!(HasPassthru && !MaskedHasPassthru) &&
4738 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4739
4740 SmallVector<SDValue, 8> Ops;
4741 // Skip the passthru operand at index 0 if the unmasked don't have one.
4742 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4743 bool DropPolicy = !RISCVII::hasVecPolicyOp(TSFlags: MCID.TSFlags) &&
4744 RISCVII::hasVecPolicyOp(TSFlags: MaskedMCID.TSFlags);
4745 bool HasChainOp =
4746 N->getOperand(Num: N->getNumOperands() - 1).getValueType() == MVT::Other;
4747 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4748 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4749 // Skip the mask
4750 SDValue Op = N->getOperand(Num: I);
4751 if (I == MaskOpIdx)
4752 continue;
4753 if (DropPolicy && I == LastOpNum)
4754 continue;
4755 Ops.push_back(Elt: Op);
4756 }
4757
4758 MachineSDNode *Result =
4759 CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VTs: N->getVTList(), Ops);
4760
4761 if (!N->memoperands_empty())
4762 CurDAG->setNodeMemRefs(N: Result, NewMemRefs: N->memoperands());
4763
4764 Result->setFlags(N->getFlags());
4765 ReplaceUses(F: N, T: Result);
4766
4767 return true;
4768}
4769
4770/// If our passthru is an implicit_def, use noreg instead. This side
4771/// steps issues with MachineCSE not being able to CSE expressions with
4772/// IMPLICIT_DEF operands while preserving the semantic intent. See
4773/// pr64282 for context. Note that this transform is the last one
4774/// performed at ISEL DAG to DAG.
4775bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4776 bool MadeChange = false;
4777 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4778
4779 while (Position != CurDAG->allnodes_begin()) {
4780 SDNode *N = &*--Position;
4781 if (N->use_empty() || !N->isMachineOpcode())
4782 continue;
4783
4784 const unsigned Opc = N->getMachineOpcode();
4785 if (!RISCVVPseudosTable::getPseudoInfo(Pseudo: Opc) ||
4786 !RISCVII::isFirstDefTiedToFirstUse(Desc: TII->get(Opcode: Opc)) ||
4787 !isImplicitDef(V: N->getOperand(Num: 0)))
4788 continue;
4789
4790 SmallVector<SDValue> Ops;
4791 Ops.push_back(Elt: CurDAG->getRegister(Reg: RISCV::NoRegister, VT: N->getValueType(ResNo: 0)));
4792 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4793 SDValue Op = N->getOperand(Num: I);
4794 Ops.push_back(Elt: Op);
4795 }
4796
4797 MachineSDNode *Result =
4798 CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VTs: N->getVTList(), Ops);
4799 Result->setFlags(N->getFlags());
4800 CurDAG->setNodeMemRefs(N: Result, NewMemRefs: cast<MachineSDNode>(Val: N)->memoperands());
4801 ReplaceUses(F: N, T: Result);
4802 MadeChange = true;
4803 }
4804 return MadeChange;
4805}
4806
4807
4808// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4809// for instruction scheduling.
4810FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
4811 CodeGenOptLevel OptLevel) {
4812 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4813}
4814
4815char RISCVDAGToDAGISelLegacy::ID = 0;
4816
4817RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TM,
4818 CodeGenOptLevel OptLevel)
4819 : SelectionDAGISelLegacy(
4820 ID, std::make_unique<RISCVDAGToDAGISel>(args&: TM, args&: OptLevel)) {}
4821
4822INITIALIZE_PASS(RISCVDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
4823