1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
14#include "MCTargetDesc/RISCVBaseInfo.h"
15#include "MCTargetDesc/RISCVMCTargetDesc.h"
16#include "MCTargetDesc/RISCVMatInt.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
19#include "RISCVSelectionDAGInfo.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/SDPatternMatch.h"
22#include "llvm/IR/IntrinsicsRISCV.h"
23#include "llvm/Support/Alignment.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/MathExtras.h"
26#include "llvm/Support/raw_ostream.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "riscv-isel"
31#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
32
33static cl::opt<bool> UsePseudoMovImm(
34 "riscv-use-rematerializable-movimm", cl::Hidden,
35 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
36 "constant materialization"),
37 cl::init(Val: false));
38
39#define GET_DAGISEL_BODY RISCVDAGToDAGISel
40#include "RISCVGenDAGISel.inc"
41
42void RISCVDAGToDAGISel::PreprocessISelDAG() {
43 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
44
45 bool MadeChange = false;
46 while (Position != CurDAG->allnodes_begin()) {
47 SDNode *N = &*--Position;
48 if (N->use_empty())
49 continue;
50
51 SDValue Result;
52 switch (N->getOpcode()) {
53 case ISD::SPLAT_VECTOR: {
54 if (Subtarget->hasStdExtP())
55 break;
56 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
57 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
58 MVT VT = N->getSimpleValueType(ResNo: 0);
59 unsigned Opc =
60 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
61 SDLoc DL(N);
62 SDValue VL = CurDAG->getRegister(Reg: RISCV::X0, VT: Subtarget->getXLenVT());
63 SDValue Src = N->getOperand(Num: 0);
64 if (VT.isInteger())
65 Src = CurDAG->getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget->getXLenVT(),
66 Operand: N->getOperand(Num: 0));
67 Result = CurDAG->getNode(Opcode: Opc, DL, VT, N1: CurDAG->getUNDEF(VT), N2: Src, N3: VL);
68 break;
69 }
70 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
71 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
72 // load. Done after lowering and combining so that we have a chance to
73 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
74 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
75 MVT VT = N->getSimpleValueType(ResNo: 0);
76 SDValue Passthru = N->getOperand(Num: 0);
77 SDValue Lo = N->getOperand(Num: 1);
78 SDValue Hi = N->getOperand(Num: 2);
79 SDValue VL = N->getOperand(Num: 3);
80 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
81 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
82 "Unexpected VTs!");
83 MachineFunction &MF = CurDAG->getMachineFunction();
84 SDLoc DL(N);
85
86 // Create temporary stack for each expanding node.
87 SDValue StackSlot =
88 CurDAG->CreateStackTemporary(Bytes: TypeSize::getFixed(ExactSize: 8), Alignment: Align(8));
89 int FI = cast<FrameIndexSDNode>(Val: StackSlot.getNode())->getIndex();
90 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
91
92 SDValue Chain = CurDAG->getEntryNode();
93 Lo = CurDAG->getStore(Chain, dl: DL, Val: Lo, Ptr: StackSlot, PtrInfo: MPI, Alignment: Align(8));
94
95 SDValue OffsetSlot =
96 CurDAG->getMemBasePlusOffset(Base: StackSlot, Offset: TypeSize::getFixed(ExactSize: 4), DL);
97 Hi = CurDAG->getStore(Chain, dl: DL, Val: Hi, Ptr: OffsetSlot, PtrInfo: MPI.getWithOffset(O: 4),
98 Alignment: Align(8));
99
100 Chain = CurDAG->getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, N1: Lo, N2: Hi);
101
102 SDVTList VTs = CurDAG->getVTList(VTs: {VT, MVT::Other});
103 SDValue IntID =
104 CurDAG->getTargetConstant(Val: Intrinsic::riscv_vlse, DL, VT: MVT::i64);
105 SDValue Ops[] = {Chain,
106 IntID,
107 Passthru,
108 StackSlot,
109 CurDAG->getRegister(Reg: RISCV::X0, VT: MVT::i64),
110 VL};
111
112 Result = CurDAG->getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops,
113 MemVT: MVT::i64, PtrInfo: MPI, Alignment: Align(8),
114 Flags: MachineMemOperand::MOLoad);
115 break;
116 }
117 case ISD::FP_EXTEND: {
118 // We only have vector patterns for riscv_fpextend_vl in isel.
119 SDLoc DL(N);
120 MVT VT = N->getSimpleValueType(ResNo: 0);
121 if (!VT.isVector())
122 break;
123 SDValue VLMAX = CurDAG->getRegister(Reg: RISCV::X0, VT: Subtarget->getXLenVT());
124 SDValue TrueMask = CurDAG->getNode(
125 Opcode: RISCVISD::VMSET_VL, DL, VT: VT.changeVectorElementType(EltVT: MVT::i1), Operand: VLMAX);
126 Result = CurDAG->getNode(Opcode: RISCVISD::FP_EXTEND_VL, DL, VT, N1: N->getOperand(Num: 0),
127 N2: TrueMask, N3: VLMAX);
128 break;
129 }
130 }
131
132 if (Result) {
133 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
134 LLVM_DEBUG(N->dump(CurDAG));
135 LLVM_DEBUG(dbgs() << "\nNew: ");
136 LLVM_DEBUG(Result->dump(CurDAG));
137 LLVM_DEBUG(dbgs() << "\n");
138
139 CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 0), To: Result);
140 MadeChange = true;
141 }
142 }
143
144 if (MadeChange)
145 CurDAG->RemoveDeadNodes();
146}
147
148void RISCVDAGToDAGISel::PostprocessISelDAG() {
149 HandleSDNode Dummy(CurDAG->getRoot());
150 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
151
152 bool MadeChange = false;
153 while (Position != CurDAG->allnodes_begin()) {
154 SDNode *N = &*--Position;
155 // Skip dead nodes and any non-machine opcodes.
156 if (N->use_empty() || !N->isMachineOpcode())
157 continue;
158
159 MadeChange |= doPeepholeSExtW(Node: N);
160
161 // FIXME: This is here only because the VMerge transform doesn't
162 // know how to handle masked true inputs. Once that has been moved
163 // to post-ISEL, this can be deleted as well.
164 MadeChange |= doPeepholeMaskedRVV(Node: cast<MachineSDNode>(Val: N));
165 }
166
167 CurDAG->setRoot(Dummy.getValue());
168
169 // After we're done with everything else, convert IMPLICIT_DEF
170 // passthru operands to NoRegister. This is required to workaround
171 // an optimization deficiency in MachineCSE. This really should
172 // be merged back into each of the patterns (i.e. there's no good
173 // reason not to go directly to NoReg), but is being done this way
174 // to allow easy backporting.
175 MadeChange |= doPeepholeNoRegPassThru();
176
177 if (MadeChange)
178 CurDAG->RemoveDeadNodes();
179}
180
181static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
182 RISCVMatInt::InstSeq &Seq) {
183 SDValue SrcReg = CurDAG->getRegister(Reg: RISCV::X0, VT);
184 for (const RISCVMatInt::Inst &Inst : Seq) {
185 SDValue SDImm = CurDAG->getSignedTargetConstant(Val: Inst.getImm(), DL, VT);
186 SDNode *Result = nullptr;
187 switch (Inst.getOpndKind()) {
188 case RISCVMatInt::Imm:
189 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SDImm);
190 break;
191 case RISCVMatInt::RegX0:
192 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg,
193 Op2: CurDAG->getRegister(Reg: RISCV::X0, VT));
194 break;
195 case RISCVMatInt::RegReg:
196 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg, Op2: SrcReg);
197 break;
198 case RISCVMatInt::RegImm:
199 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg, Op2: SDImm);
200 break;
201 }
202
203 // Only the first instruction has X0 as its source.
204 SrcReg = SDValue(Result, 0);
205 }
206
207 return SrcReg;
208}
209
210static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
211 int64_t Imm, const RISCVSubtarget &Subtarget) {
212 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: Imm, STI: Subtarget);
213
214 // Use a rematerializable pseudo instruction for short sequences if enabled.
215 if (Seq.size() == 2 && UsePseudoMovImm)
216 return SDValue(
217 CurDAG->getMachineNode(Opcode: RISCV::PseudoMovImm, dl: DL, VT,
218 Op1: CurDAG->getSignedTargetConstant(Val: Imm, DL, VT)),
219 0);
220
221 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
222 // worst an LUI+ADDIW. This will require an extra register, but avoids a
223 // constant pool.
224 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
225 // low and high 32 bits are the same and bit 31 and 63 are set.
226 if (Seq.size() > 3) {
227 unsigned ShiftAmt, AddOpc;
228 RISCVMatInt::InstSeq SeqLo =
229 RISCVMatInt::generateTwoRegInstSeq(Val: Imm, STI: Subtarget, ShiftAmt, AddOpc);
230 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
231 SDValue Lo = selectImmSeq(CurDAG, DL, VT, Seq&: SeqLo);
232
233 SDValue SLLI = SDValue(
234 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: Lo,
235 Op2: CurDAG->getTargetConstant(Val: ShiftAmt, DL, VT)),
236 0);
237 return SDValue(CurDAG->getMachineNode(Opcode: AddOpc, dl: DL, VT, Op1: Lo, Op2: SLLI), 0);
238 }
239 }
240
241 // Otherwise, use the original sequence.
242 return selectImmSeq(CurDAG, DL, VT, Seq);
243}
244
245void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
246 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
247 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
248 bool IsLoad, MVT *IndexVT) {
249 SDValue Chain = Node->getOperand(Num: 0);
250
251 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Base pointer.
252
253 if (IsStridedOrIndexed) {
254 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Index.
255 if (IndexVT)
256 *IndexVT = Operands.back()->getSimpleValueType(ResNo: 0);
257 }
258
259 if (IsMasked) {
260 SDValue Mask = Node->getOperand(Num: CurOp++);
261 Operands.push_back(Elt: Mask);
262 }
263 SDValue VL;
264 selectVLOp(N: Node->getOperand(Num: CurOp++), VL);
265 Operands.push_back(Elt: VL);
266
267 MVT XLenVT = Subtarget->getXLenVT();
268 SDValue SEWOp = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT);
269 Operands.push_back(Elt: SEWOp);
270
271 // At the IR layer, all the masked load intrinsics have policy operands,
272 // none of the others do. All have passthru operands. For our pseudos,
273 // all loads have policy operands.
274 if (IsLoad) {
275 uint64_t Policy = RISCVVType::MASK_AGNOSTIC;
276 if (IsMasked)
277 Policy = Node->getConstantOperandVal(Num: CurOp++);
278 SDValue PolicyOp = CurDAG->getTargetConstant(Val: Policy, DL, VT: XLenVT);
279 Operands.push_back(Elt: PolicyOp);
280 }
281
282 Operands.push_back(Elt: Chain); // Chain.
283}
284
285void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
286 bool IsStrided) {
287 SDLoc DL(Node);
288 MVT VT = Node->getSimpleValueType(ResNo: 0);
289 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
290 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
291
292 unsigned CurOp = 2;
293 SmallVector<SDValue, 8> Operands;
294
295 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
296
297 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
298 Operands, /*IsLoad=*/true);
299
300 const RISCV::VLSEGPseudo *P =
301 RISCV::getVLSEGPseudo(NF, Masked: IsMasked, Strided: IsStrided, /*FF*/ false, Log2SEW,
302 LMUL: static_cast<unsigned>(LMUL));
303 MachineSDNode *Load =
304 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped, VT2: MVT::Other, Ops: Operands);
305
306 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
307
308 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Load, 0));
309 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Load, 1));
310 CurDAG->RemoveDeadNode(N: Node);
311}
312
313void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, unsigned NF,
314 bool IsMasked) {
315 SDLoc DL(Node);
316 MVT VT = Node->getSimpleValueType(ResNo: 0);
317 MVT XLenVT = Subtarget->getXLenVT();
318 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
319 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
320
321 unsigned CurOp = 2;
322 SmallVector<SDValue, 7> Operands;
323
324 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
325
326 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
327 /*IsStridedOrIndexed*/ false, Operands,
328 /*IsLoad=*/true);
329
330 const RISCV::VLSEGPseudo *P =
331 RISCV::getVLSEGPseudo(NF, Masked: IsMasked, /*Strided*/ false, /*FF*/ true,
332 Log2SEW, LMUL: static_cast<unsigned>(LMUL));
333 MachineSDNode *Load = CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped,
334 VT2: XLenVT, VT3: MVT::Other, Ops: Operands);
335
336 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
337
338 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Load, 0)); // Result
339 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Load, 1)); // VL
340 ReplaceUses(F: SDValue(Node, 2), T: SDValue(Load, 2)); // Chain
341 CurDAG->RemoveDeadNode(N: Node);
342}
343
344void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
345 bool IsOrdered) {
346 SDLoc DL(Node);
347 MVT VT = Node->getSimpleValueType(ResNo: 0);
348 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
349 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
350
351 unsigned CurOp = 2;
352 SmallVector<SDValue, 8> Operands;
353
354 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
355
356 MVT IndexVT;
357 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
358 /*IsStridedOrIndexed*/ true, Operands,
359 /*IsLoad=*/true, IndexVT: &IndexVT);
360
361#ifndef NDEBUG
362 // Number of element = RVVBitsPerBlock * LMUL / SEW
363 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
364 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
365 if (DecodedLMUL.second)
366 ContainedTyNumElts /= DecodedLMUL.first;
367 else
368 ContainedTyNumElts *= DecodedLMUL.first;
369 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
370 "Element count mismatch");
371#endif
372
373 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
374 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
375 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
376 reportFatalUsageError(reason: "The V extension does not support EEW=64 for index "
377 "values when XLEN=32");
378 }
379 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
380 NF, Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL),
381 IndexLMUL: static_cast<unsigned>(IndexLMUL));
382 MachineSDNode *Load =
383 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped, VT2: MVT::Other, Ops: Operands);
384
385 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
386
387 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Load, 0));
388 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Load, 1));
389 CurDAG->RemoveDeadNode(N: Node);
390}
391
392void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
393 bool IsStrided) {
394 SDLoc DL(Node);
395 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
396 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
397 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
398
399 unsigned CurOp = 2;
400 SmallVector<SDValue, 8> Operands;
401
402 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
403
404 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
405 Operands);
406
407 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
408 NF, Masked: IsMasked, Strided: IsStrided, Log2SEW, LMUL: static_cast<unsigned>(LMUL));
409 MachineSDNode *Store =
410 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT: Node->getValueType(ResNo: 0), Ops: Operands);
411
412 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
413
414 ReplaceNode(F: Node, T: Store);
415}
416
417void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
418 bool IsOrdered) {
419 SDLoc DL(Node);
420 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
421 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
422 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
423
424 unsigned CurOp = 2;
425 SmallVector<SDValue, 8> Operands;
426
427 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
428
429 MVT IndexVT;
430 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
431 /*IsStridedOrIndexed*/ true, Operands,
432 /*IsLoad=*/false, IndexVT: &IndexVT);
433
434#ifndef NDEBUG
435 // Number of element = RVVBitsPerBlock * LMUL / SEW
436 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
437 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
438 if (DecodedLMUL.second)
439 ContainedTyNumElts /= DecodedLMUL.first;
440 else
441 ContainedTyNumElts *= DecodedLMUL.first;
442 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
443 "Element count mismatch");
444#endif
445
446 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
447 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
448 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449 reportFatalUsageError(reason: "The V extension does not support EEW=64 for index "
450 "values when XLEN=32");
451 }
452 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
453 NF, Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL),
454 IndexLMUL: static_cast<unsigned>(IndexLMUL));
455 MachineSDNode *Store =
456 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT: Node->getValueType(ResNo: 0), Ops: Operands);
457
458 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
459
460 ReplaceNode(F: Node, T: Store);
461}
462
463void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
464 if (!Subtarget->hasVInstructions())
465 return;
466
467 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
468
469 SDLoc DL(Node);
470 MVT XLenVT = Subtarget->getXLenVT();
471
472 unsigned IntNo = Node->getConstantOperandVal(Num: 0);
473
474 assert((IntNo == Intrinsic::riscv_vsetvli ||
475 IntNo == Intrinsic::riscv_vsetvlimax) &&
476 "Unexpected vsetvli intrinsic");
477
478 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
479 unsigned Offset = (VLMax ? 1 : 2);
480
481 assert(Node->getNumOperands() == Offset + 2 &&
482 "Unexpected number of operands");
483
484 unsigned SEW =
485 RISCVVType::decodeVSEW(VSEW: Node->getConstantOperandVal(Num: Offset) & 0x7);
486 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
487 Node->getConstantOperandVal(Num: Offset + 1) & 0x7);
488
489 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMUL: VLMul, SEW, /*TailAgnostic*/ true,
490 /*MaskAgnostic*/ true);
491 SDValue VTypeIOp = CurDAG->getTargetConstant(Val: VTypeI, DL, VT: XLenVT);
492
493 SDValue VLOperand;
494 unsigned Opcode = RISCV::PseudoVSETVLI;
495 if (auto *C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1))) {
496 if (auto VLEN = Subtarget->getRealVLen())
497 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
498 VLMax = true;
499 }
500 if (VLMax || isAllOnesConstant(V: Node->getOperand(Num: 1))) {
501 VLOperand = CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT);
502 Opcode = RISCV::PseudoVSETVLIX0;
503 } else {
504 VLOperand = Node->getOperand(Num: 1);
505
506 if (auto *C = dyn_cast<ConstantSDNode>(Val&: VLOperand)) {
507 uint64_t AVL = C->getZExtValue();
508 if (isUInt<5>(x: AVL)) {
509 SDValue VLImm = CurDAG->getTargetConstant(Val: AVL, DL, VT: XLenVT);
510 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: RISCV::PseudoVSETIVLI, dl: DL,
511 VT: XLenVT, Op1: VLImm, Op2: VTypeIOp));
512 return;
513 }
514 }
515 }
516
517 ReplaceNode(F: Node,
518 T: CurDAG->getMachineNode(Opcode, dl: DL, VT: XLenVT, Op1: VLOperand, Op2: VTypeIOp));
519}
520
521void RISCVDAGToDAGISel::selectXSfmmVSET(SDNode *Node) {
522 if (!Subtarget->hasVendorXSfmmbase())
523 return;
524
525 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
526
527 SDLoc DL(Node);
528 MVT XLenVT = Subtarget->getXLenVT();
529
530 unsigned IntNo = Node->getConstantOperandVal(Num: 0);
531
532 assert((IntNo == Intrinsic::riscv_sf_vsettnt ||
533 IntNo == Intrinsic::riscv_sf_vsettm ||
534 IntNo == Intrinsic::riscv_sf_vsettk) &&
535 "Unexpected XSfmm vset intrinsic");
536
537 unsigned SEW = RISCVVType::decodeVSEW(VSEW: Node->getConstantOperandVal(Num: 2));
538 unsigned Widen = RISCVVType::decodeTWiden(TWiden: Node->getConstantOperandVal(Num: 3));
539 unsigned PseudoOpCode =
540 IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT
541 : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM
542 : RISCV::PseudoSF_VSETTK;
543
544 if (IntNo == Intrinsic::riscv_sf_vsettnt) {
545 unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, AltFmt: 0);
546 SDValue VTypeIOp = CurDAG->getTargetConstant(Val: VTypeI, DL, VT: XLenVT);
547
548 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: PseudoOpCode, dl: DL, VT: XLenVT,
549 Op1: Node->getOperand(Num: 1), Op2: VTypeIOp));
550 } else {
551 SDValue Log2SEW = CurDAG->getTargetConstant(Val: Log2_32(Value: SEW), DL, VT: XLenVT);
552 SDValue TWiden = CurDAG->getTargetConstant(Val: Widen, DL, VT: XLenVT);
553 ReplaceNode(F: Node,
554 T: CurDAG->getMachineNode(Opcode: PseudoOpCode, dl: DL, VT: XLenVT,
555 Op1: Node->getOperand(Num: 1), Op2: Log2SEW, Op3: TWiden));
556 }
557}
558
559bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
560 MVT VT = Node->getSimpleValueType(ResNo: 0);
561 unsigned Opcode = Node->getOpcode();
562 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
563 "Unexpected opcode");
564 SDLoc DL(Node);
565
566 // For operations of the form (x << C1) op C2, check if we can use
567 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
568 SDValue N0 = Node->getOperand(Num: 0);
569 SDValue N1 = Node->getOperand(Num: 1);
570
571 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Val&: N1);
572 if (!Cst)
573 return false;
574
575 int64_t Val = Cst->getSExtValue();
576
577 // Check if immediate can already use ANDI/ORI/XORI.
578 if (isInt<12>(x: Val))
579 return false;
580
581 SDValue Shift = N0;
582
583 // If Val is simm32 and we have a sext_inreg from i32, then the binop
584 // produces at least 33 sign bits. We can peek through the sext_inreg and use
585 // a SLLIW at the end.
586 bool SignExt = false;
587 if (isInt<32>(x: Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
588 N0.hasOneUse() && cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT() == MVT::i32) {
589 SignExt = true;
590 Shift = N0.getOperand(i: 0);
591 }
592
593 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
594 return false;
595
596 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Val: Shift.getOperand(i: 1));
597 if (!ShlCst)
598 return false;
599
600 uint64_t ShAmt = ShlCst->getZExtValue();
601
602 // Make sure that we don't change the operation by removing bits.
603 // This only matters for OR and XOR, AND is unaffected.
604 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(N: ShAmt);
605 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
606 return false;
607
608 int64_t ShiftedVal = Val >> ShAmt;
609 if (!isInt<12>(x: ShiftedVal))
610 return false;
611
612 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
613 if (SignExt && ShAmt >= 32)
614 return false;
615
616 // Ok, we can reorder to get a smaller immediate.
617 unsigned BinOpc;
618 switch (Opcode) {
619 default: llvm_unreachable("Unexpected opcode");
620 case ISD::AND: BinOpc = RISCV::ANDI; break;
621 case ISD::OR: BinOpc = RISCV::ORI; break;
622 case ISD::XOR: BinOpc = RISCV::XORI; break;
623 }
624
625 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
626
627 SDNode *BinOp = CurDAG->getMachineNode(
628 Opcode: BinOpc, dl: DL, VT, Op1: Shift.getOperand(i: 0),
629 Op2: CurDAG->getSignedTargetConstant(Val: ShiftedVal, DL, VT));
630 SDNode *SLLI =
631 CurDAG->getMachineNode(Opcode: ShOpc, dl: DL, VT, Op1: SDValue(BinOp, 0),
632 Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT));
633 ReplaceNode(F: Node, T: SLLI);
634 return true;
635}
636
637bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
638 unsigned Opc;
639
640 if (Subtarget->hasVendorXTHeadBb())
641 Opc = RISCV::TH_EXT;
642 else if (Subtarget->hasVendorXAndesPerf())
643 Opc = RISCV::NDS_BFOS;
644 else if (Subtarget->hasVendorXqcibm())
645 Opc = RISCV::QC_EXT;
646 else
647 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
648 return false;
649
650 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
651 if (!N1C)
652 return false;
653
654 SDValue N0 = Node->getOperand(Num: 0);
655 if (!N0.hasOneUse())
656 return false;
657
658 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
659 const SDLoc &DL, MVT VT) {
660 if (Opc == RISCV::QC_EXT) {
661 // QC.EXT X, width, shamt
662 // shamt is the same as Lsb
663 // width is the number of bits to extract from the Lsb
664 Msb = Msb - Lsb + 1;
665 }
666 return CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: N0.getOperand(i: 0),
667 Op2: CurDAG->getTargetConstant(Val: Msb, DL, VT),
668 Op3: CurDAG->getTargetConstant(Val: Lsb, DL, VT));
669 };
670
671 SDLoc DL(Node);
672 MVT VT = Node->getSimpleValueType(ResNo: 0);
673 const unsigned RightShAmt = N1C->getZExtValue();
674
675 // Transform (sra (shl X, C1) C2) with C1 < C2
676 // -> (SignedBitfieldExtract X, msb, lsb)
677 if (N0.getOpcode() == ISD::SHL) {
678 auto *N01C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
679 if (!N01C)
680 return false;
681
682 const unsigned LeftShAmt = N01C->getZExtValue();
683 // Make sure that this is a bitfield extraction (i.e., the shift-right
684 // amount can not be less than the left-shift).
685 if (LeftShAmt > RightShAmt)
686 return false;
687
688 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
689 const unsigned Msb = MsbPlusOne - 1;
690 const unsigned Lsb = RightShAmt - LeftShAmt;
691
692 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
693 ReplaceNode(F: Node, T: Sbe);
694 return true;
695 }
696
697 // Transform (sra (sext_inreg X, _), C) ->
698 // (SignedBitfieldExtract X, msb, lsb)
699 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
700 unsigned ExtSize =
701 cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT().getSizeInBits();
702
703 // ExtSize of 32 should use sraiw via tablegen pattern.
704 if (ExtSize == 32)
705 return false;
706
707 const unsigned Msb = ExtSize - 1;
708 // If the shift-right amount is greater than Msb, it means that extracts
709 // the X[Msb] bit and sign-extend it.
710 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
711
712 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
713 ReplaceNode(F: Node, T: Sbe);
714 return true;
715 }
716
717 return false;
718}
719
720bool RISCVDAGToDAGISel::trySignedBitfieldInsertInSign(SDNode *Node) {
721 // Only supported with XAndesPerf at the moment.
722 if (!Subtarget->hasVendorXAndesPerf())
723 return false;
724
725 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
726 if (!N1C)
727 return false;
728
729 SDValue N0 = Node->getOperand(Num: 0);
730 if (!N0.hasOneUse())
731 return false;
732
733 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
734 const SDLoc &DL, MVT VT) {
735 unsigned Opc = RISCV::NDS_BFOS;
736 // If the Lsb is equal to the Msb, then the Lsb should be 0.
737 if (Lsb == Msb)
738 Lsb = 0;
739 return CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: N0.getOperand(i: 0),
740 Op2: CurDAG->getTargetConstant(Val: Lsb, DL, VT),
741 Op3: CurDAG->getTargetConstant(Val: Msb, DL, VT));
742 };
743
744 SDLoc DL(Node);
745 MVT VT = Node->getSimpleValueType(ResNo: 0);
746 const unsigned RightShAmt = N1C->getZExtValue();
747
748 // Transform (sra (shl X, C1) C2) with C1 > C2
749 // -> (NDS.BFOS X, lsb, msb)
750 if (N0.getOpcode() == ISD::SHL) {
751 auto *N01C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
752 if (!N01C)
753 return false;
754
755 const unsigned LeftShAmt = N01C->getZExtValue();
756 // Make sure that this is a bitfield insertion (i.e., the shift-right
757 // amount should be less than the left-shift).
758 if (LeftShAmt <= RightShAmt)
759 return false;
760
761 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
762 const unsigned Msb = MsbPlusOne - 1;
763 const unsigned Lsb = LeftShAmt - RightShAmt;
764
765 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
766 ReplaceNode(F: Node, T: Sbi);
767 return true;
768 }
769
770 return false;
771}
772
773bool RISCVDAGToDAGISel::tryUnsignedBitfieldExtract(SDNode *Node,
774 const SDLoc &DL, MVT VT,
775 SDValue X, unsigned Msb,
776 unsigned Lsb) {
777 unsigned Opc;
778
779 if (Subtarget->hasVendorXTHeadBb()) {
780 Opc = RISCV::TH_EXTU;
781 } else if (Subtarget->hasVendorXAndesPerf()) {
782 Opc = RISCV::NDS_BFOZ;
783 } else if (Subtarget->hasVendorXqcibm()) {
784 Opc = RISCV::QC_EXTU;
785 // QC.EXTU X, width, shamt
786 // shamt is the same as Lsb
787 // width is the number of bits to extract from the Lsb
788 Msb = Msb - Lsb + 1;
789 } else {
790 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
791 return false;
792 }
793
794 SDNode *Ube = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: X,
795 Op2: CurDAG->getTargetConstant(Val: Msb, DL, VT),
796 Op3: CurDAG->getTargetConstant(Val: Lsb, DL, VT));
797 ReplaceNode(F: Node, T: Ube);
798 return true;
799}
800
801bool RISCVDAGToDAGISel::tryUnsignedBitfieldInsertInZero(SDNode *Node,
802 const SDLoc &DL, MVT VT,
803 SDValue X, unsigned Msb,
804 unsigned Lsb) {
805 // Only supported with XAndesPerf at the moment.
806 if (!Subtarget->hasVendorXAndesPerf())
807 return false;
808
809 unsigned Opc = RISCV::NDS_BFOZ;
810
811 // If the Lsb is equal to the Msb, then the Lsb should be 0.
812 if (Lsb == Msb)
813 Lsb = 0;
814 SDNode *Ubi = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: X,
815 Op2: CurDAG->getTargetConstant(Val: Lsb, DL, VT),
816 Op3: CurDAG->getTargetConstant(Val: Msb, DL, VT));
817 ReplaceNode(F: Node, T: Ubi);
818 return true;
819}
820
821bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
822 // Target does not support indexed loads.
823 if (!Subtarget->hasVendorXTHeadMemIdx())
824 return false;
825
826 LoadSDNode *Ld = cast<LoadSDNode>(Val: Node);
827 ISD::MemIndexedMode AM = Ld->getAddressingMode();
828 if (AM == ISD::UNINDEXED)
829 return false;
830
831 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Ld->getOffset());
832 if (!C)
833 return false;
834
835 EVT LoadVT = Ld->getMemoryVT();
836 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
837 "Unexpected addressing mode");
838 bool IsPre = AM == ISD::PRE_INC;
839 bool IsPost = AM == ISD::POST_INC;
840 int64_t Offset = C->getSExtValue();
841
842 // The constants that can be encoded in the THeadMemIdx instructions
843 // are of the form (sign_extend(imm5) << imm2).
844 unsigned Shift;
845 for (Shift = 0; Shift < 4; Shift++)
846 if (isInt<5>(x: Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
847 break;
848
849 // Constant cannot be encoded.
850 if (Shift == 4)
851 return false;
852
853 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
854 unsigned Opcode;
855 if (LoadVT == MVT::i8 && IsPre)
856 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
857 else if (LoadVT == MVT::i8 && IsPost)
858 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
859 else if (LoadVT == MVT::i16 && IsPre)
860 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
861 else if (LoadVT == MVT::i16 && IsPost)
862 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
863 else if (LoadVT == MVT::i32 && IsPre)
864 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
865 else if (LoadVT == MVT::i32 && IsPost)
866 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
867 else if (LoadVT == MVT::i64 && IsPre)
868 Opcode = RISCV::TH_LDIB;
869 else if (LoadVT == MVT::i64 && IsPost)
870 Opcode = RISCV::TH_LDIA;
871 else
872 return false;
873
874 EVT Ty = Ld->getOffset().getValueType();
875 SDValue Ops[] = {
876 Ld->getBasePtr(),
877 CurDAG->getSignedTargetConstant(Val: Offset >> Shift, DL: SDLoc(Node), VT: Ty),
878 CurDAG->getTargetConstant(Val: Shift, DL: SDLoc(Node), VT: Ty), Ld->getChain()};
879 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(Node), VT1: Ld->getValueType(ResNo: 0),
880 VT2: Ld->getValueType(ResNo: 1), VT3: MVT::Other, Ops);
881
882 MachineMemOperand *MemOp = cast<MemSDNode>(Val: Node)->getMemOperand();
883 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: New), NewMemRefs: {MemOp});
884
885 ReplaceNode(F: Node, T: New);
886
887 return true;
888}
889
890static SDValue buildGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, MVT VT,
891 SDValue Lo, SDValue Hi) {
892 SDValue Ops[] = {
893 CurDAG->getTargetConstant(Val: RISCV::GPRPairRegClassID, DL, VT: MVT::i32), Lo,
894 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_even, DL, VT: MVT::i32), Hi,
895 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_odd, DL, VT: MVT::i32)};
896
897 return SDValue(
898 CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl: DL, VT, Ops), 0);
899}
900
901// Helper to extract Lo and Hi values from a GPR pair.
902static std::pair<SDValue, SDValue>
903extractGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, SDValue Pair) {
904 SDValue Lo =
905 CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_even, DL, VT: MVT::i32, Operand: Pair);
906 SDValue Hi =
907 CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_odd, DL, VT: MVT::i32, Operand: Pair);
908 return {Lo, Hi};
909}
910
911// Try to match WMACC pattern: ADDD where one operand pair comes from a
912// widening multiply (both results of UMUL_LOHI, SMUL_LOHI, or WMULSU).
913bool RISCVDAGToDAGISel::tryWideningMulAcc(SDNode *Node, const SDLoc &DL) {
914 assert(Node->getOpcode() == RISCVISD::ADDD && "Expected ADDD");
915
916 SDValue Op0Lo = Node->getOperand(Num: 0);
917 SDValue Op0Hi = Node->getOperand(Num: 1);
918 SDValue Op1Lo = Node->getOperand(Num: 2);
919 SDValue Op1Hi = Node->getOperand(Num: 3);
920
921 auto IsSupportedMulWithOneUse = [](SDValue Lo, SDValue Hi) {
922 unsigned Opc = Lo.getOpcode();
923 if (Opc != ISD::UMUL_LOHI && Opc != ISD::SMUL_LOHI &&
924 Opc != RISCVISD::WMULSU)
925 return false;
926 return Lo.getNode() == Hi.getNode() && Lo.getResNo() == 0 &&
927 Hi.getResNo() == 1 && Lo.hasOneUse() && Hi.hasOneUse();
928 };
929
930 SDNode *MulNode = nullptr;
931 SDValue AddLo, AddHi;
932
933 // Check if first operand pair is a supported multiply with single use.
934 if (IsSupportedMulWithOneUse(Op0Lo, Op0Hi)) {
935 MulNode = Op0Lo.getNode();
936 AddLo = Op1Lo;
937 AddHi = Op1Hi;
938 }
939 // ADDD is commutative. Check if second operand pair is a supported multiply
940 // with single use.
941 else if (IsSupportedMulWithOneUse(Op1Lo, Op1Hi)) {
942 MulNode = Op1Lo.getNode();
943 AddLo = Op0Lo;
944 AddHi = Op0Hi;
945 } else {
946 return false;
947 }
948
949 unsigned Opc;
950 switch (MulNode->getOpcode()) {
951 default:
952 llvm_unreachable("Unexpected multiply opcode");
953 case ISD::UMUL_LOHI:
954 Opc = RISCV::WMACCU;
955 break;
956 case ISD::SMUL_LOHI:
957 Opc = RISCV::WMACC;
958 break;
959 case RISCVISD::WMULSU:
960 Opc = RISCV::WMACCSU;
961 break;
962 }
963
964 SDValue Acc = buildGPRPair(CurDAG, DL, VT: MVT::Untyped, Lo: AddLo, Hi: AddHi);
965
966 // WMACC instruction format: rd, rs1, rs2 (rd is accumulator).
967 SDValue M0 = MulNode->getOperand(Num: 0);
968 SDValue M1 = MulNode->getOperand(Num: 1);
969 MachineSDNode *New =
970 CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Op1: Acc, Op2: M0, Op3: M1);
971
972 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, Pair: SDValue(New, 0));
973 ReplaceUses(F: SDValue(Node, 0), T: Lo);
974 ReplaceUses(F: SDValue(Node, 1), T: Hi);
975 CurDAG->RemoveDeadNode(N: Node);
976 return true;
977}
978
979static Register getTileReg(uint64_t TileNum) {
980 assert(TileNum <= 15 && "Invalid tile number");
981 return RISCV::T0 + TileNum;
982}
983
984void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) {
985 if (!Subtarget->hasVInstructions())
986 return;
987
988 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
989
990 SDLoc DL(Node);
991 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
992
993 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
994 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
995 "Unexpected vsetvli intrinsic");
996
997 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
998 unsigned Log2SEW = Log2_32(Value: Node->getConstantOperandVal(Num: 6));
999 SDValue SEWOp =
1000 CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: Subtarget->getXLenVT());
1001 SmallVector<SDValue, 8> Operands = {Node->getOperand(Num: 2), Node->getOperand(Num: 3),
1002 Node->getOperand(Num: 4), Node->getOperand(Num: 5),
1003 Node->getOperand(Num: 8), SEWOp,
1004 Node->getOperand(Num: 0)};
1005
1006 unsigned Opcode;
1007 auto *LMulSDNode = cast<ConstantSDNode>(Val: Node->getOperand(Num: 7));
1008 switch (LMulSDNode->getSExtValue()) {
1009 case 5:
1010 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
1011 : RISCV::PseudoSF_VC_I_SE_MF8;
1012 break;
1013 case 6:
1014 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
1015 : RISCV::PseudoSF_VC_I_SE_MF4;
1016 break;
1017 case 7:
1018 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
1019 : RISCV::PseudoSF_VC_I_SE_MF2;
1020 break;
1021 case 0:
1022 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
1023 : RISCV::PseudoSF_VC_I_SE_M1;
1024 break;
1025 case 1:
1026 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
1027 : RISCV::PseudoSF_VC_I_SE_M2;
1028 break;
1029 case 2:
1030 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
1031 : RISCV::PseudoSF_VC_I_SE_M4;
1032 break;
1033 case 3:
1034 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
1035 : RISCV::PseudoSF_VC_I_SE_M8;
1036 break;
1037 }
1038
1039 ReplaceNode(F: Node, T: CurDAG->getMachineNode(
1040 Opcode, dl: DL, VT: Node->getSimpleValueType(ResNo: 0), Ops: Operands));
1041}
1042
1043static unsigned getSegInstNF(unsigned Intrinsic) {
1044#define INST_NF_CASE(NAME, NF) \
1045 case Intrinsic::riscv_##NAME##NF: \
1046 return NF;
1047#define INST_NF_CASE_MASK(NAME, NF) \
1048 case Intrinsic::riscv_##NAME##NF##_mask: \
1049 return NF;
1050#define INST_NF_CASE_FF(NAME, NF) \
1051 case Intrinsic::riscv_##NAME##NF##ff: \
1052 return NF;
1053#define INST_NF_CASE_FF_MASK(NAME, NF) \
1054 case Intrinsic::riscv_##NAME##NF##ff_mask: \
1055 return NF;
1056#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
1057 MACRO_NAME(NAME, 2) \
1058 MACRO_NAME(NAME, 3) \
1059 MACRO_NAME(NAME, 4) \
1060 MACRO_NAME(NAME, 5) \
1061 MACRO_NAME(NAME, 6) \
1062 MACRO_NAME(NAME, 7) \
1063 MACRO_NAME(NAME, 8)
1064#define INST_ALL_NF_CASE(NAME) \
1065 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
1066 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
1067#define INST_ALL_NF_CASE_WITH_FF(NAME) \
1068 INST_ALL_NF_CASE(NAME) \
1069 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
1070 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
1071 switch (Intrinsic) {
1072 default:
1073 llvm_unreachable("Unexpected segment load/store intrinsic");
1074 INST_ALL_NF_CASE_WITH_FF(vlseg)
1075 INST_ALL_NF_CASE(vlsseg)
1076 INST_ALL_NF_CASE(vloxseg)
1077 INST_ALL_NF_CASE(vluxseg)
1078 INST_ALL_NF_CASE(vsseg)
1079 INST_ALL_NF_CASE(vssseg)
1080 INST_ALL_NF_CASE(vsoxseg)
1081 INST_ALL_NF_CASE(vsuxseg)
1082 }
1083}
1084
1085static bool isApplicableToPLI(int Val) {
1086 // Check if the immediate is packed i8 or i10
1087 int16_t Bit31To16 = Val >> 16;
1088 int16_t Bit15To0 = Val;
1089 int8_t Bit15To8 = Bit15To0 >> 8;
1090 int8_t Bit7To0 = Val;
1091 if (Bit31To16 != Bit15To0)
1092 return false;
1093
1094 return isInt<10>(x: Bit31To16) || Bit15To8 == Bit7To0;
1095}
1096
1097void RISCVDAGToDAGISel::Select(SDNode *Node) {
1098 // If we have a custom node, we have already selected.
1099 if (Node->isMachineOpcode()) {
1100 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
1101 Node->setNodeId(-1);
1102 return;
1103 }
1104
1105 // Instruction Selection not handled by the auto-generated tablegen selection
1106 // should be handled here.
1107 unsigned Opcode = Node->getOpcode();
1108 MVT XLenVT = Subtarget->getXLenVT();
1109 SDLoc DL(Node);
1110 MVT VT = Node->getSimpleValueType(ResNo: 0);
1111
1112 bool HasBitTest = Subtarget->hasBEXTILike();
1113
1114 switch (Opcode) {
1115 case ISD::Constant: {
1116 assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
1117 auto *ConstNode = cast<ConstantSDNode>(Val: Node);
1118 if (ConstNode->isZero()) {
1119 SDValue New =
1120 CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: RISCV::X0, VT);
1121 ReplaceNode(F: Node, T: New.getNode());
1122 return;
1123 }
1124 int64_t Imm = ConstNode->getSExtValue();
1125 // If only the lower 8 bits are used, try to convert this to a simm6 by
1126 // sign-extending bit 7. This is neutral without the C extension, and
1127 // allows C.LI to be used if C is present.
1128 if (!isInt<8>(x: Imm) && isUInt<8>(x: Imm) && isInt<6>(x: SignExtend64<8>(x: Imm)) &&
1129 hasAllBUsers(Node))
1130 Imm = SignExtend64<8>(x: Imm);
1131 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1132 // by sign extending bit 15.
1133 else if (!isInt<16>(x: Imm) && isUInt<16>(x: Imm) &&
1134 isInt<12>(x: SignExtend64<16>(x: Imm)) && hasAllHUsers(Node))
1135 Imm = SignExtend64<16>(x: Imm);
1136 // If the upper 32-bits are not used try to convert this into a simm32 by
1137 // sign extending bit 32.
1138 else if (!isInt<32>(x: Imm) && isUInt<32>(x: Imm) && hasAllWUsers(Node))
1139 Imm = SignExtend64<32>(x: Imm);
1140
1141 if (VT == MVT::i64 && Subtarget->hasStdExtP() && isApplicableToPLI(Val: Imm) &&
1142 hasAllWUsers(Node)) {
1143 // If it's 4 packed 8-bit integers or 2 packed signed 16-bit integers, we
1144 // can simply copy lower 32 bits to higher 32 bits to make it able to
1145 // rematerialize to PLI_B or PLI_H
1146 Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
1147 }
1148
1149 ReplaceNode(F: Node, T: selectImm(CurDAG, DL, VT, Imm, Subtarget: *Subtarget).getNode());
1150 return;
1151 }
1152 case ISD::ConstantFP: {
1153 const APFloat &APF = cast<ConstantFPSDNode>(Val: Node)->getValueAPF();
1154
1155 bool Is64Bit = Subtarget->is64Bit();
1156 bool HasZdinx = Subtarget->hasStdExtZdinx();
1157
1158 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1159 SDValue Imm;
1160 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1161 // create an integer immediate.
1162 if (APF.isPosZero() || NegZeroF64) {
1163 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1164 Imm = CurDAG->getRegister(Reg: RISCV::X0_Pair, VT: MVT::f64);
1165 else
1166 Imm = CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT);
1167 } else {
1168 Imm = selectImm(CurDAG, DL, VT: XLenVT, Imm: APF.bitcastToAPInt().getSExtValue(),
1169 Subtarget: *Subtarget);
1170 }
1171
1172 unsigned Opc;
1173 switch (VT.SimpleTy) {
1174 default:
1175 llvm_unreachable("Unexpected size");
1176 case MVT::bf16:
1177 assert(Subtarget->hasStdExtZfbfmin());
1178 Opc = RISCV::FMV_H_X;
1179 break;
1180 case MVT::f16:
1181 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1182 break;
1183 case MVT::f32:
1184 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1185 break;
1186 case MVT::f64:
1187 // For RV32, we can't move from a GPR, we need to convert instead. This
1188 // should only happen for +0.0 and -0.0.
1189 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1190 if (HasZdinx)
1191 Opc = RISCV::COPY;
1192 else
1193 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1194 break;
1195 }
1196
1197 SDNode *Res;
1198 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1199 Res =
1200 CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_16, DL, VT, Operand: Imm).getNode();
1201 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1202 Res =
1203 CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_32, DL, VT, Operand: Imm).getNode();
1204 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1205 Res = CurDAG->getMachineNode(
1206 Opcode: Opc, dl: DL, VT, Op1: Imm,
1207 Op2: CurDAG->getTargetConstant(Val: RISCVFPRndMode::RNE, DL, VT: XLenVT));
1208 else
1209 Res = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: Imm);
1210
1211 // For f64 -0.0, we need to insert a fneg.d idiom.
1212 if (NegZeroF64) {
1213 Opc = RISCV::FSGNJN_D;
1214 if (HasZdinx)
1215 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1216 Res =
1217 CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: SDValue(Res, 0), Op2: SDValue(Res, 0));
1218 }
1219
1220 ReplaceNode(F: Node, T: Res);
1221 return;
1222 }
1223 case RISCVISD::BuildGPRPair:
1224 case RISCVISD::BuildPairF64: {
1225 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1226 break;
1227
1228 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
1229 "BuildPairF64 only handled here on rv32i_zdinx");
1230
1231 SDValue N =
1232 buildGPRPair(CurDAG, DL, VT, Lo: Node->getOperand(Num: 0), Hi: Node->getOperand(Num: 1));
1233 ReplaceNode(F: Node, T: N.getNode());
1234 return;
1235 }
1236 case RISCVISD::SplitGPRPair:
1237 case RISCVISD::SplitF64: {
1238 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1239 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
1240 "SplitF64 only handled here on rv32i_zdinx");
1241
1242 if (!SDValue(Node, 0).use_empty()) {
1243 SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_even, DL,
1244 VT: Node->getValueType(ResNo: 0),
1245 Operand: Node->getOperand(Num: 0));
1246 ReplaceUses(F: SDValue(Node, 0), T: Lo);
1247 }
1248
1249 if (!SDValue(Node, 1).use_empty()) {
1250 SDValue Hi = CurDAG->getTargetExtractSubreg(
1251 SRIdx: RISCV::sub_gpr_odd, DL, VT: Node->getValueType(ResNo: 1), Operand: Node->getOperand(Num: 0));
1252 ReplaceUses(F: SDValue(Node, 1), T: Hi);
1253 }
1254
1255 CurDAG->RemoveDeadNode(N: Node);
1256 return;
1257 }
1258
1259 assert(Opcode != RISCVISD::SplitGPRPair &&
1260 "SplitGPRPair should already be handled");
1261
1262 if (!Subtarget->hasStdExtZfa())
1263 break;
1264 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1265 "Unexpected subtarget");
1266
1267 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1268 if (!SDValue(Node, 0).use_empty()) {
1269 SDNode *Lo = CurDAG->getMachineNode(Opcode: RISCV::FMV_X_W_FPR64, dl: DL, VT,
1270 Op1: Node->getOperand(Num: 0));
1271 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Lo, 0));
1272 }
1273 if (!SDValue(Node, 1).use_empty()) {
1274 SDNode *Hi = CurDAG->getMachineNode(Opcode: RISCV::FMVH_X_D, dl: DL, VT,
1275 Op1: Node->getOperand(Num: 0));
1276 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Hi, 0));
1277 }
1278
1279 CurDAG->RemoveDeadNode(N: Node);
1280 return;
1281 }
1282 case ISD::SHL: {
1283 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1284 if (!N1C)
1285 break;
1286 SDValue N0 = Node->getOperand(Num: 0);
1287 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1288 !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)))
1289 break;
1290 unsigned ShAmt = N1C->getZExtValue();
1291 uint64_t Mask = N0.getConstantOperandVal(i: 1);
1292
1293 if (isShiftedMask_64(Value: Mask)) {
1294 unsigned XLen = Subtarget->getXLen();
1295 unsigned LeadingZeros = XLen - llvm::bit_width(Value: Mask);
1296 unsigned TrailingZeros = llvm::countr_zero(Val: Mask);
1297 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1298 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1299 // where C2 has 32 leading zeros and C3 trailing zeros.
1300 SDNode *SRLIW = CurDAG->getMachineNode(
1301 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
1302 Op2: CurDAG->getTargetConstant(Val: TrailingZeros, DL, VT));
1303 SDNode *SLLI = CurDAG->getMachineNode(
1304 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1305 Op2: CurDAG->getTargetConstant(Val: TrailingZeros + ShAmt, DL, VT));
1306 ReplaceNode(F: Node, T: SLLI);
1307 return;
1308 }
1309 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1310 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1311 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1312 // where C2 has C4 leading zeros and no trailing zeros.
1313 // This is profitable if the "and" was to be lowered to
1314 // (srli (slli X, C4), C4) and not (andi X, C2).
1315 // For "LeadingZeros == 32":
1316 // - with Zba it's just (slli.uw X, C)
1317 // - without Zba a tablegen pattern applies the very same
1318 // transform as we would have done here
1319 SDNode *SLLI = CurDAG->getMachineNode(
1320 Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1321 Op2: CurDAG->getTargetConstant(Val: LeadingZeros, DL, VT));
1322 SDNode *SRLI = CurDAG->getMachineNode(
1323 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1324 Op2: CurDAG->getTargetConstant(Val: LeadingZeros - ShAmt, DL, VT));
1325 ReplaceNode(F: Node, T: SRLI);
1326 return;
1327 }
1328 }
1329 break;
1330 }
1331 case ISD::SRL: {
1332 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1333 if (!N1C)
1334 break;
1335 SDValue N0 = Node->getOperand(Num: 0);
1336 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)))
1337 break;
1338 unsigned ShAmt = N1C->getZExtValue();
1339 uint64_t Mask = N0.getConstantOperandVal(i: 1);
1340
1341 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1342 // 32 leading zeros and C3 trailing zeros.
1343 if (isShiftedMask_64(Value: Mask) && N0.hasOneUse()) {
1344 unsigned XLen = Subtarget->getXLen();
1345 unsigned LeadingZeros = XLen - llvm::bit_width(Value: Mask);
1346 unsigned TrailingZeros = llvm::countr_zero(Val: Mask);
1347 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1348 SDNode *SRLIW = CurDAG->getMachineNode(
1349 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
1350 Op2: CurDAG->getTargetConstant(Val: TrailingZeros, DL, VT));
1351 SDNode *SLLI = CurDAG->getMachineNode(
1352 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1353 Op2: CurDAG->getTargetConstant(Val: TrailingZeros - ShAmt, DL, VT));
1354 ReplaceNode(F: Node, T: SLLI);
1355 return;
1356 }
1357 }
1358
1359 // Optimize (srl (and X, C2), C) ->
1360 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1361 // Where C2 is a mask with C3 trailing ones.
1362 // Taking into account that the C2 may have had lower bits unset by
1363 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1364 // This pattern occurs when type legalizing right shifts for types with
1365 // less than XLen bits.
1366 Mask |= maskTrailingOnes<uint64_t>(N: ShAmt);
1367 if (!isMask_64(Value: Mask))
1368 break;
1369 unsigned TrailingOnes = llvm::countr_one(Value: Mask);
1370 if (ShAmt >= TrailingOnes)
1371 break;
1372 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1373 if (TrailingOnes == 32) {
1374 SDNode *SRLI = CurDAG->getMachineNode(
1375 Opcode: Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, dl: DL, VT,
1376 Op1: N0.getOperand(i: 0), Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT));
1377 ReplaceNode(F: Node, T: SRLI);
1378 return;
1379 }
1380
1381 // Only do the remaining transforms if the AND has one use.
1382 if (!N0.hasOneUse())
1383 break;
1384
1385 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1386 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1387 SDNode *BEXTI = CurDAG->getMachineNode(
1388 Opcode: Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, dl: DL, VT,
1389 Op1: N0.getOperand(i: 0), Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT));
1390 ReplaceNode(F: Node, T: BEXTI);
1391 return;
1392 }
1393
1394 const unsigned Msb = TrailingOnes - 1;
1395 const unsigned Lsb = ShAmt;
1396 if (tryUnsignedBitfieldExtract(Node, DL, VT, X: N0.getOperand(i: 0), Msb, Lsb))
1397 return;
1398
1399 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1400 SDNode *SLLI =
1401 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1402 Op2: CurDAG->getTargetConstant(Val: LShAmt, DL, VT));
1403 SDNode *SRLI = CurDAG->getMachineNode(
1404 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1405 Op2: CurDAG->getTargetConstant(Val: LShAmt + ShAmt, DL, VT));
1406 ReplaceNode(F: Node, T: SRLI);
1407 return;
1408 }
1409 case ISD::SRA: {
1410 if (trySignedBitfieldExtract(Node))
1411 return;
1412
1413 if (trySignedBitfieldInsertInSign(Node))
1414 return;
1415
1416 // Optimize (sra (sext_inreg X, i16), C) ->
1417 // (srai (slli X, (XLen-16), (XLen-16) + C)
1418 // And (sra (sext_inreg X, i8), C) ->
1419 // (srai (slli X, (XLen-8), (XLen-8) + C)
1420 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1421 // This transform matches the code we get without Zbb. The shifts are more
1422 // compressible, and this can help expose CSE opportunities in the sdiv by
1423 // constant optimization.
1424 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1425 if (!N1C)
1426 break;
1427 SDValue N0 = Node->getOperand(Num: 0);
1428 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1429 break;
1430 unsigned ShAmt = N1C->getZExtValue();
1431 unsigned ExtSize =
1432 cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT().getSizeInBits();
1433 // ExtSize of 32 should use sraiw via tablegen pattern.
1434 if (ExtSize >= 32 || ShAmt >= ExtSize)
1435 break;
1436 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1437 SDNode *SLLI =
1438 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1439 Op2: CurDAG->getTargetConstant(Val: LShAmt, DL, VT));
1440 SDNode *SRAI = CurDAG->getMachineNode(
1441 Opcode: RISCV::SRAI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1442 Op2: CurDAG->getTargetConstant(Val: LShAmt + ShAmt, DL, VT));
1443 ReplaceNode(F: Node, T: SRAI);
1444 return;
1445 }
1446 case ISD::SIGN_EXTEND_INREG: {
1447 // Optimize (sext_inreg (srl X, C), i8/i16) ->
1448 // (srai (slli X, XLen-ExtSize-C), XLen-ExtSize)
1449 // This is a bitfield extract pattern where we're extracting a signed
1450 // 8-bit or 16-bit field from position C.
1451 SDValue N0 = Node->getOperand(Num: 0);
1452 if (N0.getOpcode() != ISD::SRL || !N0.hasOneUse())
1453 break;
1454
1455 auto *ShAmtC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
1456 if (!ShAmtC)
1457 break;
1458
1459 unsigned ExtSize =
1460 cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT().getSizeInBits();
1461 unsigned ShAmt = ShAmtC->getZExtValue();
1462 unsigned XLen = Subtarget->getXLen();
1463
1464 // Only handle types less than 32, and make sure the shift amount is valid.
1465 if (ExtSize >= 32 || ShAmt >= XLen - ExtSize)
1466 break;
1467
1468 unsigned LShAmt = XLen - ExtSize - ShAmt;
1469 SDNode *SLLI =
1470 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1471 Op2: CurDAG->getTargetConstant(Val: LShAmt, DL, VT));
1472 SDNode *SRAI = CurDAG->getMachineNode(
1473 Opcode: RISCV::SRAI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1474 Op2: CurDAG->getTargetConstant(Val: XLen - ExtSize, DL, VT));
1475 ReplaceNode(F: Node, T: SRAI);
1476 return;
1477 }
1478 case ISD::OR: {
1479 if (tryShrinkShlLogicImm(Node))
1480 return;
1481
1482 break;
1483 }
1484 case ISD::XOR:
1485 if (tryShrinkShlLogicImm(Node))
1486 return;
1487
1488 break;
1489 case ISD::AND: {
1490 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1491 if (!N1C)
1492 break;
1493
1494 SDValue N0 = Node->getOperand(Num: 0);
1495
1496 bool LeftShift = N0.getOpcode() == ISD::SHL;
1497 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1498 auto *C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
1499 if (!C)
1500 break;
1501 unsigned C2 = C->getZExtValue();
1502 unsigned XLen = Subtarget->getXLen();
1503 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1504
1505 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1506 // shift pair might offer more compression opportunities.
1507 // TODO: We could check for C extension here, but we don't have many lit
1508 // tests with the C extension enabled so not checking gets better
1509 // coverage.
1510 // TODO: What if ANDI faster than shift?
1511 bool IsCANDI = isInt<6>(x: N1C->getSExtValue());
1512
1513 uint64_t C1 = N1C->getZExtValue();
1514
1515 // Clear irrelevant bits in the mask.
1516 if (LeftShift)
1517 C1 &= maskTrailingZeros<uint64_t>(N: C2);
1518 else
1519 C1 &= maskTrailingOnes<uint64_t>(N: XLen - C2);
1520
1521 // Some transforms should only be done if the shift has a single use or
1522 // the AND would become (srli (slli X, 32), 32)
1523 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1524
1525 SDValue X = N0.getOperand(i: 0);
1526
1527 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1528 // with c3 leading zeros.
1529 if (!LeftShift && isMask_64(Value: C1)) {
1530 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1531 if (C2 < Leading) {
1532 // If the number of leading zeros is C2+32 this can be SRLIW.
1533 if (C2 + 32 == Leading) {
1534 SDNode *SRLIW = CurDAG->getMachineNode(
1535 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X, Op2: CurDAG->getTargetConstant(Val: C2, DL, VT));
1536 ReplaceNode(F: Node, T: SRLIW);
1537 return;
1538 }
1539
1540 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1541 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1542 //
1543 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1544 // legalized and goes through DAG combine.
1545 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1546 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1547 cast<VTSDNode>(Val: X.getOperand(i: 1))->getVT() == MVT::i32) {
1548 SDNode *SRAIW =
1549 CurDAG->getMachineNode(Opcode: RISCV::SRAIW, dl: DL, VT, Op1: X.getOperand(i: 0),
1550 Op2: CurDAG->getTargetConstant(Val: 31, DL, VT));
1551 SDNode *SRLIW = CurDAG->getMachineNode(
1552 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: SDValue(SRAIW, 0),
1553 Op2: CurDAG->getTargetConstant(Val: Leading - 32, DL, VT));
1554 ReplaceNode(F: Node, T: SRLIW);
1555 return;
1556 }
1557
1558 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1559 // available.
1560 // Transform (and (srl x, C2), C1)
1561 // -> (<bfextract> x, msb, lsb)
1562 //
1563 // Make sure to keep this below the SRLIW cases, as we always want to
1564 // prefer the more common instruction.
1565 const unsigned Msb = llvm::bit_width(Value: C1) + C2 - 1;
1566 const unsigned Lsb = C2;
1567 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1568 return;
1569
1570 // (srli (slli x, c3-c2), c3).
1571 // Skip if we could use (zext.w (sraiw X, C2)).
1572 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1573 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1574 cast<VTSDNode>(Val: X.getOperand(i: 1))->getVT() == MVT::i32;
1575 // Also Skip if we can use bexti or th.tst.
1576 Skip |= HasBitTest && Leading == XLen - 1;
1577 if (OneUseOrZExtW && !Skip) {
1578 SDNode *SLLI = CurDAG->getMachineNode(
1579 Opcode: RISCV::SLLI, dl: DL, VT, Op1: X,
1580 Op2: CurDAG->getTargetConstant(Val: Leading - C2, DL, VT));
1581 SDNode *SRLI = CurDAG->getMachineNode(
1582 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1583 Op2: CurDAG->getTargetConstant(Val: Leading, DL, VT));
1584 ReplaceNode(F: Node, T: SRLI);
1585 return;
1586 }
1587 }
1588 }
1589
1590 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1591 // shifted by c2 bits with c3 leading zeros.
1592 if (LeftShift && isShiftedMask_64(Value: C1)) {
1593 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1594
1595 if (C2 + Leading < XLen &&
1596 C1 == (maskTrailingOnes<uint64_t>(N: XLen - (C2 + Leading)) << C2)) {
1597 // Use slli.uw when possible.
1598 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1599 SDNode *SLLI_UW =
1600 CurDAG->getMachineNode(Opcode: RISCV::SLLI_UW, dl: DL, VT, Op1: X,
1601 Op2: CurDAG->getTargetConstant(Val: C2, DL, VT));
1602 ReplaceNode(F: Node, T: SLLI_UW);
1603 return;
1604 }
1605
1606 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1607 // available.
1608 // Transform (and (shl x, c2), c1)
1609 // -> (<bfinsert> x, msb, lsb)
1610 // e.g.
1611 // (and (shl x, 12), 0x00fff000)
1612 // If XLen = 32 and C2 = 12, then
1613 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1614 const unsigned Msb = XLen - Leading - 1;
1615 const unsigned Lsb = C2;
1616 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1617 return;
1618
1619 if (OneUseOrZExtW && !IsCANDI) {
1620 // (packh x0, X)
1621 if (Subtarget->hasStdExtZbkb() && C1 == 0xff00 && C2 == 8) {
1622 SDNode *PACKH = CurDAG->getMachineNode(
1623 Opcode: RISCV::PACKH, dl: DL, VT,
1624 Op1: CurDAG->getRegister(Reg: RISCV::X0, VT: Subtarget->getXLenVT()), Op2: X);
1625 ReplaceNode(F: Node, T: PACKH);
1626 return;
1627 }
1628 // (srli (slli c2+c3), c3)
1629 SDNode *SLLI = CurDAG->getMachineNode(
1630 Opcode: RISCV::SLLI, dl: DL, VT, Op1: X,
1631 Op2: CurDAG->getTargetConstant(Val: C2 + Leading, DL, VT));
1632 SDNode *SRLI = CurDAG->getMachineNode(
1633 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1634 Op2: CurDAG->getTargetConstant(Val: Leading, DL, VT));
1635 ReplaceNode(F: Node, T: SRLI);
1636 return;
1637 }
1638 }
1639 }
1640
1641 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1642 // shifted mask with c2 leading zeros and c3 trailing zeros.
1643 if (!LeftShift && isShiftedMask_64(Value: C1)) {
1644 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1645 unsigned Trailing = llvm::countr_zero(Val: C1);
1646 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1647 !IsCANDI) {
1648 unsigned SrliOpc = RISCV::SRLI;
1649 // If the input is zexti32 we should use SRLIW.
1650 if (X.getOpcode() == ISD::AND &&
1651 isa<ConstantSDNode>(Val: X.getOperand(i: 1)) &&
1652 X.getConstantOperandVal(i: 1) == UINT64_C(0xFFFFFFFF)) {
1653 SrliOpc = RISCV::SRLIW;
1654 X = X.getOperand(i: 0);
1655 }
1656 SDNode *SRLI = CurDAG->getMachineNode(
1657 Opcode: SrliOpc, dl: DL, VT, Op1: X,
1658 Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT));
1659 SDNode *SLLI = CurDAG->getMachineNode(
1660 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLI, 0),
1661 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1662 ReplaceNode(F: Node, T: SLLI);
1663 return;
1664 }
1665 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1666 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1667 OneUseOrZExtW && !IsCANDI) {
1668 SDNode *SRLIW = CurDAG->getMachineNode(
1669 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X,
1670 Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT));
1671 SDNode *SLLI = CurDAG->getMachineNode(
1672 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1673 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1674 ReplaceNode(F: Node, T: SLLI);
1675 return;
1676 }
1677 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1678 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1679 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1680 SDNode *SRLI = CurDAG->getMachineNode(
1681 Opcode: RISCV::SRLI, dl: DL, VT, Op1: X,
1682 Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT));
1683 SDNode *SLLI_UW = CurDAG->getMachineNode(
1684 Opcode: RISCV::SLLI_UW, dl: DL, VT, Op1: SDValue(SRLI, 0),
1685 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1686 ReplaceNode(F: Node, T: SLLI_UW);
1687 return;
1688 }
1689 }
1690
1691 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1692 // shifted mask with no leading zeros and c3 trailing zeros.
1693 if (LeftShift && isShiftedMask_64(Value: C1)) {
1694 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1695 unsigned Trailing = llvm::countr_zero(Val: C1);
1696 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1697 SDNode *SRLI = CurDAG->getMachineNode(
1698 Opcode: RISCV::SRLI, dl: DL, VT, Op1: X,
1699 Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT));
1700 SDNode *SLLI = CurDAG->getMachineNode(
1701 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLI, 0),
1702 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1703 ReplaceNode(F: Node, T: SLLI);
1704 return;
1705 }
1706 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1707 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1708 SDNode *SRLIW = CurDAG->getMachineNode(
1709 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X,
1710 Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT));
1711 SDNode *SLLI = CurDAG->getMachineNode(
1712 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1713 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1714 ReplaceNode(F: Node, T: SLLI);
1715 return;
1716 }
1717
1718 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1719 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1720 Subtarget->hasStdExtZba()) {
1721 SDNode *SRLI = CurDAG->getMachineNode(
1722 Opcode: RISCV::SRLI, dl: DL, VT, Op1: X,
1723 Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT));
1724 SDNode *SLLI_UW = CurDAG->getMachineNode(
1725 Opcode: RISCV::SLLI_UW, dl: DL, VT, Op1: SDValue(SRLI, 0),
1726 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1727 ReplaceNode(F: Node, T: SLLI_UW);
1728 return;
1729 }
1730 }
1731 }
1732
1733 const uint64_t C1 = N1C->getZExtValue();
1734
1735 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
1736 N0.hasOneUse()) {
1737 unsigned C2 = N0.getConstantOperandVal(i: 1);
1738 unsigned XLen = Subtarget->getXLen();
1739 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1740
1741 SDValue X = N0.getOperand(i: 0);
1742
1743 // Prefer SRAIW + ANDI when possible.
1744 bool Skip = C2 > 32 && isInt<12>(x: N1C->getSExtValue()) &&
1745 X.getOpcode() == ISD::SHL &&
1746 isa<ConstantSDNode>(Val: X.getOperand(i: 1)) &&
1747 X.getConstantOperandVal(i: 1) == 32;
1748 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1749 // mask with c3 leading zeros and c2 is larger than c3.
1750 if (isMask_64(Value: C1) && !Skip) {
1751 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1752 if (C2 > Leading) {
1753 SDNode *SRAI = CurDAG->getMachineNode(
1754 Opcode: RISCV::SRAI, dl: DL, VT, Op1: X,
1755 Op2: CurDAG->getTargetConstant(Val: C2 - Leading, DL, VT));
1756 SDNode *SRLI = CurDAG->getMachineNode(
1757 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SRAI, 0),
1758 Op2: CurDAG->getTargetConstant(Val: Leading, DL, VT));
1759 ReplaceNode(F: Node, T: SRLI);
1760 return;
1761 }
1762 }
1763
1764 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1765 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1766 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1767 if (isShiftedMask_64(Value: C1) && !Skip) {
1768 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1769 unsigned Trailing = llvm::countr_zero(Val: C1);
1770 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1771 SDNode *SRAI = CurDAG->getMachineNode(
1772 Opcode: RISCV::SRAI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1773 Op2: CurDAG->getTargetConstant(Val: C2 - Leading, DL, VT));
1774 SDNode *SRLI = CurDAG->getMachineNode(
1775 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SRAI, 0),
1776 Op2: CurDAG->getTargetConstant(Val: Leading + Trailing, DL, VT));
1777 SDNode *SLLI = CurDAG->getMachineNode(
1778 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLI, 0),
1779 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1780 ReplaceNode(F: Node, T: SLLI);
1781 return;
1782 }
1783 }
1784 }
1785
1786 // If C1 masks off the upper bits only (but can't be formed as an
1787 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1788 // available.
1789 // Transform (and x, C1)
1790 // -> (<bfextract> x, msb, lsb)
1791 if (isMask_64(Value: C1) && !isInt<12>(x: N1C->getSExtValue()) &&
1792 !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
1793 !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
1794 const unsigned Msb = llvm::bit_width(Value: C1) - 1;
1795 if (tryUnsignedBitfieldExtract(Node, DL, VT, X: N0, Msb, Lsb: 0))
1796 return;
1797 }
1798
1799 if (tryShrinkShlLogicImm(Node))
1800 return;
1801
1802 break;
1803 }
1804 case ISD::MUL: {
1805 // Special case for calculating (mul (and X, C2), C1) where the full product
1806 // fits in XLen bits. We can shift X left by the number of leading zeros in
1807 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1808 // product has XLen trailing zeros, putting it in the output of MULHU. This
1809 // can avoid materializing a constant in a register for C2.
1810
1811 // RHS should be a constant.
1812 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1813 if (!N1C || !N1C->hasOneUse())
1814 break;
1815
1816 // LHS should be an AND with constant.
1817 SDValue N0 = Node->getOperand(Num: 0);
1818 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)))
1819 break;
1820
1821 uint64_t C2 = N0.getConstantOperandVal(i: 1);
1822
1823 // Constant should be a mask.
1824 if (!isMask_64(Value: C2))
1825 break;
1826
1827 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1828 // multiple users or the constant is a simm12. This prevents inserting a
1829 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1830 // make it more costly to materialize. Otherwise, using a SLLI might allow
1831 // it to be compressed.
1832 bool IsANDIOrZExt =
1833 isInt<12>(x: C2) ||
1834 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1835 // With XTHeadBb, we can use TH.EXTU.
1836 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1837 if (IsANDIOrZExt && (isInt<12>(x: N1C->getSExtValue()) || !N0.hasOneUse()))
1838 break;
1839 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1840 // the constant is a simm32.
1841 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1842 // With XTHeadBb, we can use TH.EXTU.
1843 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1844 if (IsZExtW && (isInt<32>(x: N1C->getSExtValue()) || !N0.hasOneUse()))
1845 break;
1846
1847 // We need to shift left the AND input and C1 by a total of XLen bits.
1848
1849 // How far left do we need to shift the AND input?
1850 unsigned XLen = Subtarget->getXLen();
1851 unsigned LeadingZeros = XLen - llvm::bit_width(Value: C2);
1852
1853 // The constant gets shifted by the remaining amount unless that would
1854 // shift bits out.
1855 uint64_t C1 = N1C->getZExtValue();
1856 unsigned ConstantShift = XLen - LeadingZeros;
1857 if (ConstantShift > (XLen - llvm::bit_width(Value: C1)))
1858 break;
1859
1860 uint64_t ShiftedC1 = C1 << ConstantShift;
1861 // If this RV32, we need to sign extend the constant.
1862 if (XLen == 32)
1863 ShiftedC1 = SignExtend64<32>(x: ShiftedC1);
1864
1865 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1866 SDNode *Imm = selectImm(CurDAG, DL, VT, Imm: ShiftedC1, Subtarget: *Subtarget).getNode();
1867 SDNode *SLLI =
1868 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1869 Op2: CurDAG->getTargetConstant(Val: LeadingZeros, DL, VT));
1870 SDNode *MULHU = CurDAG->getMachineNode(Opcode: RISCV::MULHU, dl: DL, VT,
1871 Op1: SDValue(SLLI, 0), Op2: SDValue(Imm, 0));
1872 ReplaceNode(F: Node, T: MULHU);
1873 return;
1874 }
1875 case ISD::SMUL_LOHI:
1876 case ISD::UMUL_LOHI:
1877 case RISCVISD::WMULSU:
1878 case RISCVISD::WADDU:
1879 case RISCVISD::WSUBU: {
1880 assert(Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::i32 &&
1881 "Unexpected opcode");
1882
1883 unsigned Opc;
1884 switch (Node->getOpcode()) {
1885 default:
1886 llvm_unreachable("Unexpected opcode");
1887 case ISD::SMUL_LOHI:
1888 Opc = RISCV::WMUL;
1889 break;
1890 case ISD::UMUL_LOHI:
1891 Opc = RISCV::WMULU;
1892 break;
1893 case RISCVISD::WMULSU:
1894 Opc = RISCV::WMULSU;
1895 break;
1896 case RISCVISD::WADDU:
1897 Opc = RISCV::WADDU;
1898 break;
1899 case RISCVISD::WSUBU:
1900 Opc = RISCV::WSUBU;
1901 break;
1902 }
1903
1904 SDNode *Result = CurDAG->getMachineNode(
1905 Opcode: Opc, dl: DL, VT: MVT::Untyped, Op1: Node->getOperand(Num: 0), Op2: Node->getOperand(Num: 1));
1906
1907 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, Pair: SDValue(Result, 0));
1908 ReplaceUses(F: SDValue(Node, 0), T: Lo);
1909 ReplaceUses(F: SDValue(Node, 1), T: Hi);
1910 CurDAG->RemoveDeadNode(N: Node);
1911 return;
1912 }
1913 case RISCVISD::WSLL:
1914 case RISCVISD::WSLA: {
1915 // Custom select WSLL/WSLA for RV32P.
1916 assert(Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::i32 &&
1917 "Unexpected opcode");
1918
1919 bool IsSigned = Node->getOpcode() == RISCVISD::WSLA;
1920
1921 SDValue ShAmt = Node->getOperand(Num: 1);
1922
1923 unsigned Opc;
1924
1925 auto *ShAmtC = dyn_cast<ConstantSDNode>(Val&: ShAmt);
1926 if (ShAmtC && ShAmtC->getZExtValue() < 64) {
1927 Opc = IsSigned ? RISCV::WSLAI : RISCV::WSLLI;
1928 ShAmt = CurDAG->getTargetConstant(Val: ShAmtC->getZExtValue(), DL, VT: XLenVT);
1929 } else {
1930 Opc = IsSigned ? RISCV::WSLA : RISCV::WSLL;
1931 }
1932
1933 SDNode *WShift = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped,
1934 Op1: Node->getOperand(Num: 0), Op2: ShAmt);
1935
1936 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, Pair: SDValue(WShift, 0));
1937 ReplaceUses(F: SDValue(Node, 0), T: Lo);
1938 ReplaceUses(F: SDValue(Node, 1), T: Hi);
1939 CurDAG->RemoveDeadNode(N: Node);
1940 return;
1941 }
1942 case ISD::LOAD: {
1943 if (tryIndexedLoad(Node))
1944 return;
1945
1946 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1947 // We match post-incrementing load here
1948 LoadSDNode *Load = cast<LoadSDNode>(Val: Node);
1949 if (Load->getAddressingMode() != ISD::POST_INC)
1950 break;
1951
1952 SDValue Chain = Node->getOperand(Num: 0);
1953 SDValue Base = Node->getOperand(Num: 1);
1954 SDValue Offset = Node->getOperand(Num: 2);
1955
1956 bool Simm12 = false;
1957 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1958
1959 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Val&: Offset)) {
1960 int ConstantVal = ConstantOffset->getSExtValue();
1961 Simm12 = isInt<12>(x: ConstantVal);
1962 if (Simm12)
1963 Offset = CurDAG->getTargetConstant(Val: ConstantVal, DL: SDLoc(Offset),
1964 VT: Offset.getValueType());
1965 }
1966
1967 unsigned Opcode = 0;
1968 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1969 case MVT::i8:
1970 if (Simm12 && SignExtend)
1971 Opcode = RISCV::CV_LB_ri_inc;
1972 else if (Simm12 && !SignExtend)
1973 Opcode = RISCV::CV_LBU_ri_inc;
1974 else if (!Simm12 && SignExtend)
1975 Opcode = RISCV::CV_LB_rr_inc;
1976 else
1977 Opcode = RISCV::CV_LBU_rr_inc;
1978 break;
1979 case MVT::i16:
1980 if (Simm12 && SignExtend)
1981 Opcode = RISCV::CV_LH_ri_inc;
1982 else if (Simm12 && !SignExtend)
1983 Opcode = RISCV::CV_LHU_ri_inc;
1984 else if (!Simm12 && SignExtend)
1985 Opcode = RISCV::CV_LH_rr_inc;
1986 else
1987 Opcode = RISCV::CV_LHU_rr_inc;
1988 break;
1989 case MVT::i32:
1990 if (Simm12)
1991 Opcode = RISCV::CV_LW_ri_inc;
1992 else
1993 Opcode = RISCV::CV_LW_rr_inc;
1994 break;
1995 default:
1996 break;
1997 }
1998 if (!Opcode)
1999 break;
2000
2001 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode, dl: DL, VT1: XLenVT, VT2: XLenVT,
2002 VT3: Chain.getSimpleValueType(), Op1: Base,
2003 Op2: Offset, Op3: Chain));
2004 return;
2005 }
2006 break;
2007 }
2008 case RISCVISD::LD_RV32: {
2009 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
2010
2011 SDValue Base, Offset;
2012 SDValue Chain = Node->getOperand(Num: 0);
2013 SDValue Addr = Node->getOperand(Num: 1);
2014 SelectAddrRegImm(Addr, Base, Offset);
2015
2016 SDValue Ops[] = {Base, Offset, Chain};
2017 MachineSDNode *New = CurDAG->getMachineNode(
2018 Opcode: RISCV::LD_RV32, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2019 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, Pair: SDValue(New, 0));
2020 CurDAG->setNodeMemRefs(N: New, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2021 ReplaceUses(F: SDValue(Node, 0), T: Lo);
2022 ReplaceUses(F: SDValue(Node, 1), T: Hi);
2023 ReplaceUses(F: SDValue(Node, 2), T: SDValue(New, 1));
2024 CurDAG->RemoveDeadNode(N: Node);
2025 return;
2026 }
2027 case RISCVISD::SD_RV32: {
2028 SDValue Base, Offset;
2029 SDValue Chain = Node->getOperand(Num: 0);
2030 SDValue Addr = Node->getOperand(Num: 3);
2031 SelectAddrRegImm(Addr, Base, Offset);
2032
2033 SDValue Lo = Node->getOperand(Num: 1);
2034 SDValue Hi = Node->getOperand(Num: 2);
2035
2036 SDValue RegPair;
2037 // Peephole to use X0_Pair for storing zero.
2038 if (isNullConstant(V: Lo) && isNullConstant(V: Hi)) {
2039 RegPair = CurDAG->getRegister(Reg: RISCV::X0_Pair, VT: MVT::Untyped);
2040 } else {
2041 RegPair = buildGPRPair(CurDAG, DL, VT: MVT::Untyped, Lo, Hi);
2042 }
2043
2044 MachineSDNode *New = CurDAG->getMachineNode(Opcode: RISCV::SD_RV32, dl: DL, VT: MVT::Other,
2045 Ops: {RegPair, Base, Offset, Chain});
2046 CurDAG->setNodeMemRefs(N: New, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2047 ReplaceUses(F: SDValue(Node, 0), T: SDValue(New, 0));
2048 CurDAG->RemoveDeadNode(N: Node);
2049 return;
2050 }
2051 case RISCVISD::ADDD:
2052 // Try to match WMACC pattern: ADDD where one operand pair comes from a
2053 // widening multiply.
2054 if (tryWideningMulAcc(Node, DL))
2055 return;
2056
2057 // Fall through to regular ADDD selection.
2058 [[fallthrough]];
2059 case RISCVISD::SUBD:
2060 case RISCVISD::PPAIRE_DB:
2061 case RISCVISD::WADDAU:
2062 case RISCVISD::WSUBAU: {
2063 assert(!Subtarget->is64Bit() && "Unexpected opcode");
2064 assert(
2065 (Node->getOpcode() != RISCVISD::PPAIRE_DB || Subtarget->hasStdExtP()) &&
2066 "Unexpected opcode");
2067
2068 SDValue Op0Lo = Node->getOperand(Num: 0);
2069 SDValue Op0Hi = Node->getOperand(Num: 1);
2070
2071 SDValue Op0;
2072 if (isNullConstant(V: Op0Lo) && isNullConstant(V: Op0Hi)) {
2073 Op0 = CurDAG->getRegister(Reg: RISCV::X0_Pair, VT: MVT::Untyped);
2074 } else {
2075 Op0 = buildGPRPair(CurDAG, DL, VT: MVT::Untyped, Lo: Op0Lo, Hi: Op0Hi);
2076 }
2077
2078 SDValue Op1Lo = Node->getOperand(Num: 2);
2079 SDValue Op1Hi = Node->getOperand(Num: 3);
2080
2081 MachineSDNode *New;
2082 if (Opcode == RISCVISD::WADDAU || Opcode == RISCVISD::WSUBAU) {
2083 // WADDAU/WSUBAU: Op0 is the accumulator (GPRPair), Op1Lo and Op1Hi are
2084 // the two 32-bit values.
2085 unsigned Opc = Opcode == RISCVISD::WADDAU ? RISCV::WADDAU : RISCV::WSUBAU;
2086 New = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Op1: Op0, Op2: Op1Lo, Op3: Op1Hi);
2087 } else {
2088 SDValue Op1 = buildGPRPair(CurDAG, DL, VT: MVT::Untyped, Lo: Op1Lo, Hi: Op1Hi);
2089
2090 unsigned Opc;
2091 switch (Opcode) {
2092 default:
2093 llvm_unreachable("Unexpected opcode");
2094 case RISCVISD::ADDD:
2095 Opc = RISCV::ADDD;
2096 break;
2097 case RISCVISD::SUBD:
2098 Opc = RISCV::SUBD;
2099 break;
2100 case RISCVISD::PPAIRE_DB:
2101 Opc = RISCV::PPAIRE_DB;
2102 break;
2103 }
2104 New = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Op1: Op0, Op2: Op1);
2105 }
2106
2107 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, Pair: SDValue(New, 0));
2108 ReplaceUses(F: SDValue(Node, 0), T: Lo);
2109 ReplaceUses(F: SDValue(Node, 1), T: Hi);
2110 CurDAG->RemoveDeadNode(N: Node);
2111 return;
2112 }
2113 case ISD::INTRINSIC_WO_CHAIN: {
2114 unsigned IntNo = Node->getConstantOperandVal(Num: 0);
2115 switch (IntNo) {
2116 // By default we do not custom select any intrinsic.
2117 default:
2118 break;
2119 case Intrinsic::riscv_vmsgeu:
2120 case Intrinsic::riscv_vmsge: {
2121 SDValue Src1 = Node->getOperand(Num: 1);
2122 SDValue Src2 = Node->getOperand(Num: 2);
2123 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
2124 bool IsCmpConstant = false;
2125 bool IsCmpMinimum = false;
2126 // Only custom select scalar second operand.
2127 if (Src2.getValueType() != XLenVT)
2128 break;
2129 // Small constants are handled with patterns.
2130 int64_t CVal = 0;
2131 MVT Src1VT = Src1.getSimpleValueType();
2132 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Src2)) {
2133 IsCmpConstant = true;
2134 CVal = C->getSExtValue();
2135 if (CVal >= -15 && CVal <= 16) {
2136 if (!IsUnsigned || CVal != 0)
2137 break;
2138 IsCmpMinimum = true;
2139 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2140 numBits: Src1VT.getScalarSizeInBits())
2141 .getSExtValue()) {
2142 IsCmpMinimum = true;
2143 }
2144 }
2145 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
2146 switch (RISCVTargetLowering::getLMUL(VT: Src1VT)) {
2147 default:
2148 llvm_unreachable("Unexpected LMUL!");
2149#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2150 case RISCVVType::lmulenum: \
2151 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2152 : RISCV::PseudoVMSLT_VX_##suffix; \
2153 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
2154 : RISCV::PseudoVMSGT_VX_##suffix; \
2155 break;
2156 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2157 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2158 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2159 CASE_VMSLT_OPCODES(LMUL_1, M1)
2160 CASE_VMSLT_OPCODES(LMUL_2, M2)
2161 CASE_VMSLT_OPCODES(LMUL_4, M4)
2162 CASE_VMSLT_OPCODES(LMUL_8, M8)
2163#undef CASE_VMSLT_OPCODES
2164 }
2165 // Mask operations use the LMUL from the mask type.
2166 switch (RISCVTargetLowering::getLMUL(VT)) {
2167 default:
2168 llvm_unreachable("Unexpected LMUL!");
2169#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
2170 case RISCVVType::lmulenum: \
2171 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
2172 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
2173 break;
2174 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
2175 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
2176 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
2177 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
2178 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
2179 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
2180 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
2181#undef CASE_VMNAND_VMSET_OPCODES
2182 }
2183 SDValue SEW = CurDAG->getTargetConstant(
2184 Val: Log2_32(Value: Src1VT.getScalarSizeInBits()), DL, VT: XLenVT);
2185 SDValue MaskSEW = CurDAG->getTargetConstant(Val: 0, DL, VT: XLenVT);
2186 SDValue VL;
2187 selectVLOp(N: Node->getOperand(Num: 3), VL);
2188
2189 // If vmsge(u) with minimum value, expand it to vmset.
2190 if (IsCmpMinimum) {
2191 ReplaceNode(F: Node,
2192 T: CurDAG->getMachineNode(Opcode: VMSetOpcode, dl: DL, VT, Op1: VL, Op2: MaskSEW));
2193 return;
2194 }
2195
2196 if (IsCmpConstant) {
2197 SDValue Imm =
2198 selectImm(CurDAG, DL: SDLoc(Src2), VT: XLenVT, Imm: CVal - 1, Subtarget: *Subtarget);
2199
2200 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMSGTOpcode, dl: DL, VT,
2201 Ops: {Src1, Imm, VL, SEW}));
2202 return;
2203 }
2204
2205 // Expand to
2206 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
2207 SDValue Cmp = SDValue(
2208 CurDAG->getMachineNode(Opcode: VMSLTOpcode, dl: DL, VT, Ops: {Src1, Src2, VL, SEW}),
2209 0);
2210 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMNANDOpcode, dl: DL, VT,
2211 Ops: {Cmp, Cmp, VL, MaskSEW}));
2212 return;
2213 }
2214 case Intrinsic::riscv_vmsgeu_mask:
2215 case Intrinsic::riscv_vmsge_mask: {
2216 SDValue Src1 = Node->getOperand(Num: 2);
2217 SDValue Src2 = Node->getOperand(Num: 3);
2218 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
2219 bool IsCmpConstant = false;
2220 bool IsCmpMinimum = false;
2221 // Only custom select scalar second operand.
2222 if (Src2.getValueType() != XLenVT)
2223 break;
2224 // Small constants are handled with patterns.
2225 MVT Src1VT = Src1.getSimpleValueType();
2226 int64_t CVal = 0;
2227 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Src2)) {
2228 IsCmpConstant = true;
2229 CVal = C->getSExtValue();
2230 if (CVal >= -15 && CVal <= 16) {
2231 if (!IsUnsigned || CVal != 0)
2232 break;
2233 IsCmpMinimum = true;
2234 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2235 numBits: Src1VT.getScalarSizeInBits())
2236 .getSExtValue()) {
2237 IsCmpMinimum = true;
2238 }
2239 }
2240 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
2241 VMOROpcode, VMSGTMaskOpcode;
2242 switch (RISCVTargetLowering::getLMUL(VT: Src1VT)) {
2243 default:
2244 llvm_unreachable("Unexpected LMUL!");
2245#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2246 case RISCVVType::lmulenum: \
2247 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2248 : RISCV::PseudoVMSLT_VX_##suffix; \
2249 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
2250 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
2251 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
2252 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
2253 break;
2254 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2255 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2256 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2257 CASE_VMSLT_OPCODES(LMUL_1, M1)
2258 CASE_VMSLT_OPCODES(LMUL_2, M2)
2259 CASE_VMSLT_OPCODES(LMUL_4, M4)
2260 CASE_VMSLT_OPCODES(LMUL_8, M8)
2261#undef CASE_VMSLT_OPCODES
2262 }
2263 // Mask operations use the LMUL from the mask type.
2264 switch (RISCVTargetLowering::getLMUL(VT)) {
2265 default:
2266 llvm_unreachable("Unexpected LMUL!");
2267#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2268 case RISCVVType::lmulenum: \
2269 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2270 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2271 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2272 break;
2273 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2274 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2275 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2276 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, B8)
2277 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, B4)
2278 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, B2)
2279 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, B1)
2280#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2281 }
2282 SDValue SEW = CurDAG->getTargetConstant(
2283 Val: Log2_32(Value: Src1VT.getScalarSizeInBits()), DL, VT: XLenVT);
2284 SDValue MaskSEW = CurDAG->getTargetConstant(Val: 0, DL, VT: XLenVT);
2285 SDValue VL;
2286 selectVLOp(N: Node->getOperand(Num: 5), VL);
2287 SDValue MaskedOff = Node->getOperand(Num: 1);
2288 SDValue Mask = Node->getOperand(Num: 4);
2289
2290 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2291 if (IsCmpMinimum) {
2292 // We don't need vmor if the MaskedOff and the Mask are the same
2293 // value.
2294 if (Mask == MaskedOff) {
2295 ReplaceUses(F: Node, T: Mask.getNode());
2296 return;
2297 }
2298 ReplaceNode(F: Node,
2299 T: CurDAG->getMachineNode(Opcode: VMOROpcode, dl: DL, VT,
2300 Ops: {Mask, MaskedOff, VL, MaskSEW}));
2301 return;
2302 }
2303
2304 // If the MaskedOff value and the Mask are the same value use
2305 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2306 // This avoids needing to copy v0 to vd before starting the next sequence.
2307 if (Mask == MaskedOff) {
2308 SDValue Cmp = SDValue(
2309 CurDAG->getMachineNode(Opcode: VMSLTOpcode, dl: DL, VT, Ops: {Src1, Src2, VL, SEW}),
2310 0);
2311 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMANDNOpcode, dl: DL, VT,
2312 Ops: {Mask, Cmp, VL, MaskSEW}));
2313 return;
2314 }
2315
2316 SDValue PolicyOp =
2317 CurDAG->getTargetConstant(Val: RISCVVType::TAIL_AGNOSTIC, DL, VT: XLenVT);
2318
2319 if (IsCmpConstant) {
2320 SDValue Imm =
2321 selectImm(CurDAG, DL: SDLoc(Src2), VT: XLenVT, Imm: CVal - 1, Subtarget: *Subtarget);
2322
2323 ReplaceNode(F: Node, T: CurDAG->getMachineNode(
2324 Opcode: VMSGTMaskOpcode, dl: DL, VT,
2325 Ops: {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2326 return;
2327 }
2328
2329 // Otherwise use
2330 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2331 // The result is mask undisturbed.
2332 // We use the same instructions to emulate mask agnostic behavior, because
2333 // the agnostic result can be either undisturbed or all 1.
2334 SDValue Cmp = SDValue(CurDAG->getMachineNode(Opcode: VMSLTMaskOpcode, dl: DL, VT,
2335 Ops: {MaskedOff, Src1, Src2, Mask,
2336 VL, SEW, PolicyOp}),
2337 0);
2338 // vmxor.mm vd, vd, v0 is used to update active value.
2339 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMXOROpcode, dl: DL, VT,
2340 Ops: {Cmp, Mask, VL, MaskSEW}));
2341 return;
2342 }
2343 case Intrinsic::riscv_vsetvli:
2344 case Intrinsic::riscv_vsetvlimax:
2345 return selectVSETVLI(Node);
2346 case Intrinsic::riscv_sf_vsettnt:
2347 case Intrinsic::riscv_sf_vsettm:
2348 case Intrinsic::riscv_sf_vsettk:
2349 return selectXSfmmVSET(Node);
2350 }
2351 break;
2352 }
2353 case ISD::INTRINSIC_W_CHAIN: {
2354 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
2355 switch (IntNo) {
2356 // By default we do not custom select any intrinsic.
2357 default:
2358 break;
2359 case Intrinsic::riscv_vlseg2:
2360 case Intrinsic::riscv_vlseg3:
2361 case Intrinsic::riscv_vlseg4:
2362 case Intrinsic::riscv_vlseg5:
2363 case Intrinsic::riscv_vlseg6:
2364 case Intrinsic::riscv_vlseg7:
2365 case Intrinsic::riscv_vlseg8: {
2366 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2367 /*IsStrided*/ false);
2368 return;
2369 }
2370 case Intrinsic::riscv_vlseg2_mask:
2371 case Intrinsic::riscv_vlseg3_mask:
2372 case Intrinsic::riscv_vlseg4_mask:
2373 case Intrinsic::riscv_vlseg5_mask:
2374 case Intrinsic::riscv_vlseg6_mask:
2375 case Intrinsic::riscv_vlseg7_mask:
2376 case Intrinsic::riscv_vlseg8_mask: {
2377 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2378 /*IsStrided*/ false);
2379 return;
2380 }
2381 case Intrinsic::riscv_vlsseg2:
2382 case Intrinsic::riscv_vlsseg3:
2383 case Intrinsic::riscv_vlsseg4:
2384 case Intrinsic::riscv_vlsseg5:
2385 case Intrinsic::riscv_vlsseg6:
2386 case Intrinsic::riscv_vlsseg7:
2387 case Intrinsic::riscv_vlsseg8: {
2388 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2389 /*IsStrided*/ true);
2390 return;
2391 }
2392 case Intrinsic::riscv_vlsseg2_mask:
2393 case Intrinsic::riscv_vlsseg3_mask:
2394 case Intrinsic::riscv_vlsseg4_mask:
2395 case Intrinsic::riscv_vlsseg5_mask:
2396 case Intrinsic::riscv_vlsseg6_mask:
2397 case Intrinsic::riscv_vlsseg7_mask:
2398 case Intrinsic::riscv_vlsseg8_mask: {
2399 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2400 /*IsStrided*/ true);
2401 return;
2402 }
2403 case Intrinsic::riscv_vloxseg2:
2404 case Intrinsic::riscv_vloxseg3:
2405 case Intrinsic::riscv_vloxseg4:
2406 case Intrinsic::riscv_vloxseg5:
2407 case Intrinsic::riscv_vloxseg6:
2408 case Intrinsic::riscv_vloxseg7:
2409 case Intrinsic::riscv_vloxseg8:
2410 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2411 /*IsOrdered*/ true);
2412 return;
2413 case Intrinsic::riscv_vluxseg2:
2414 case Intrinsic::riscv_vluxseg3:
2415 case Intrinsic::riscv_vluxseg4:
2416 case Intrinsic::riscv_vluxseg5:
2417 case Intrinsic::riscv_vluxseg6:
2418 case Intrinsic::riscv_vluxseg7:
2419 case Intrinsic::riscv_vluxseg8:
2420 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2421 /*IsOrdered*/ false);
2422 return;
2423 case Intrinsic::riscv_vloxseg2_mask:
2424 case Intrinsic::riscv_vloxseg3_mask:
2425 case Intrinsic::riscv_vloxseg4_mask:
2426 case Intrinsic::riscv_vloxseg5_mask:
2427 case Intrinsic::riscv_vloxseg6_mask:
2428 case Intrinsic::riscv_vloxseg7_mask:
2429 case Intrinsic::riscv_vloxseg8_mask:
2430 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2431 /*IsOrdered*/ true);
2432 return;
2433 case Intrinsic::riscv_vluxseg2_mask:
2434 case Intrinsic::riscv_vluxseg3_mask:
2435 case Intrinsic::riscv_vluxseg4_mask:
2436 case Intrinsic::riscv_vluxseg5_mask:
2437 case Intrinsic::riscv_vluxseg6_mask:
2438 case Intrinsic::riscv_vluxseg7_mask:
2439 case Intrinsic::riscv_vluxseg8_mask:
2440 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2441 /*IsOrdered*/ false);
2442 return;
2443 case Intrinsic::riscv_vlseg8ff:
2444 case Intrinsic::riscv_vlseg7ff:
2445 case Intrinsic::riscv_vlseg6ff:
2446 case Intrinsic::riscv_vlseg5ff:
2447 case Intrinsic::riscv_vlseg4ff:
2448 case Intrinsic::riscv_vlseg3ff:
2449 case Intrinsic::riscv_vlseg2ff: {
2450 selectVLSEGFF(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false);
2451 return;
2452 }
2453 case Intrinsic::riscv_vlseg8ff_mask:
2454 case Intrinsic::riscv_vlseg7ff_mask:
2455 case Intrinsic::riscv_vlseg6ff_mask:
2456 case Intrinsic::riscv_vlseg5ff_mask:
2457 case Intrinsic::riscv_vlseg4ff_mask:
2458 case Intrinsic::riscv_vlseg3ff_mask:
2459 case Intrinsic::riscv_vlseg2ff_mask: {
2460 selectVLSEGFF(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true);
2461 return;
2462 }
2463 case Intrinsic::riscv_vloxei:
2464 case Intrinsic::riscv_vloxei_mask:
2465 case Intrinsic::riscv_vluxei:
2466 case Intrinsic::riscv_vluxei_mask: {
2467 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2468 IntNo == Intrinsic::riscv_vluxei_mask;
2469 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2470 IntNo == Intrinsic::riscv_vloxei_mask;
2471
2472 MVT VT = Node->getSimpleValueType(ResNo: 0);
2473 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2474
2475 unsigned CurOp = 2;
2476 SmallVector<SDValue, 8> Operands;
2477 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2478
2479 MVT IndexVT;
2480 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2481 /*IsStridedOrIndexed*/ true, Operands,
2482 /*IsLoad=*/true, IndexVT: &IndexVT);
2483
2484 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2485 "Element count mismatch");
2486
2487 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2488 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
2489 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
2490 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2491 reportFatalUsageError(reason: "The V extension does not support EEW=64 for "
2492 "index values when XLEN=32");
2493 }
2494 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2495 Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL),
2496 IndexLMUL: static_cast<unsigned>(IndexLMUL));
2497 MachineSDNode *Load =
2498 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2499
2500 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2501
2502 ReplaceNode(F: Node, T: Load);
2503 return;
2504 }
2505 case Intrinsic::riscv_vlm:
2506 case Intrinsic::riscv_vle:
2507 case Intrinsic::riscv_vle_mask:
2508 case Intrinsic::riscv_vlse:
2509 case Intrinsic::riscv_vlse_mask: {
2510 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2511 IntNo == Intrinsic::riscv_vlse_mask;
2512 bool IsStrided =
2513 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2514
2515 MVT VT = Node->getSimpleValueType(ResNo: 0);
2516 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2517
2518 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2519 // operand at the IR level. In pseudos, they have both policy and
2520 // passthru operand. The passthru operand is needed to track the
2521 // "tail undefined" state, and the policy is there just for
2522 // for consistency - it will always be "don't care" for the
2523 // unmasked form.
2524 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2525 unsigned CurOp = 2;
2526 SmallVector<SDValue, 8> Operands;
2527 if (HasPassthruOperand)
2528 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2529 else {
2530 // We eagerly lower to implicit_def (instead of undef), as we
2531 // otherwise fail to select nodes such as: nxv1i1 = undef
2532 SDNode *Passthru =
2533 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT);
2534 Operands.push_back(Elt: SDValue(Passthru, 0));
2535 }
2536 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
2537 Operands, /*IsLoad=*/true);
2538
2539 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2540 const RISCV::VLEPseudo *P =
2541 RISCV::getVLEPseudo(Masked: IsMasked, Strided: IsStrided, /*FF*/ false, Log2SEW,
2542 LMUL: static_cast<unsigned>(LMUL));
2543 MachineSDNode *Load =
2544 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2545
2546 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2547
2548 ReplaceNode(F: Node, T: Load);
2549 return;
2550 }
2551 case Intrinsic::riscv_vleff:
2552 case Intrinsic::riscv_vleff_mask: {
2553 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2554
2555 MVT VT = Node->getSimpleValueType(ResNo: 0);
2556 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2557
2558 unsigned CurOp = 2;
2559 SmallVector<SDValue, 7> Operands;
2560 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2561 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2562 /*IsStridedOrIndexed*/ false, Operands,
2563 /*IsLoad=*/true);
2564
2565 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2566 const RISCV::VLEPseudo *P =
2567 RISCV::getVLEPseudo(Masked: IsMasked, /*Strided*/ false, /*FF*/ true,
2568 Log2SEW, LMUL: static_cast<unsigned>(LMUL));
2569 MachineSDNode *Load = CurDAG->getMachineNode(
2570 Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2571 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2572
2573 ReplaceNode(F: Node, T: Load);
2574 return;
2575 }
2576 case Intrinsic::riscv_nds_vln:
2577 case Intrinsic::riscv_nds_vln_mask:
2578 case Intrinsic::riscv_nds_vlnu:
2579 case Intrinsic::riscv_nds_vlnu_mask: {
2580 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2581 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2582 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2583 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2584
2585 MVT VT = Node->getSimpleValueType(ResNo: 0);
2586 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2587 unsigned CurOp = 2;
2588 SmallVector<SDValue, 8> Operands;
2589
2590 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2591 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2592 /*IsStridedOrIndexed=*/false, Operands,
2593 /*IsLoad=*/true);
2594
2595 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2596 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2597 Masked: IsMasked, Unsigned: IsUnsigned, Log2SEW, LMUL: static_cast<unsigned>(LMUL));
2598 MachineSDNode *Load =
2599 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2600
2601 if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node))
2602 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()});
2603
2604 ReplaceNode(F: Node, T: Load);
2605 return;
2606 }
2607 }
2608 break;
2609 }
2610 case ISD::INTRINSIC_VOID: {
2611 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
2612 switch (IntNo) {
2613 case Intrinsic::riscv_vsseg2:
2614 case Intrinsic::riscv_vsseg3:
2615 case Intrinsic::riscv_vsseg4:
2616 case Intrinsic::riscv_vsseg5:
2617 case Intrinsic::riscv_vsseg6:
2618 case Intrinsic::riscv_vsseg7:
2619 case Intrinsic::riscv_vsseg8: {
2620 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2621 /*IsStrided*/ false);
2622 return;
2623 }
2624 case Intrinsic::riscv_vsseg2_mask:
2625 case Intrinsic::riscv_vsseg3_mask:
2626 case Intrinsic::riscv_vsseg4_mask:
2627 case Intrinsic::riscv_vsseg5_mask:
2628 case Intrinsic::riscv_vsseg6_mask:
2629 case Intrinsic::riscv_vsseg7_mask:
2630 case Intrinsic::riscv_vsseg8_mask: {
2631 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2632 /*IsStrided*/ false);
2633 return;
2634 }
2635 case Intrinsic::riscv_vssseg2:
2636 case Intrinsic::riscv_vssseg3:
2637 case Intrinsic::riscv_vssseg4:
2638 case Intrinsic::riscv_vssseg5:
2639 case Intrinsic::riscv_vssseg6:
2640 case Intrinsic::riscv_vssseg7:
2641 case Intrinsic::riscv_vssseg8: {
2642 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2643 /*IsStrided*/ true);
2644 return;
2645 }
2646 case Intrinsic::riscv_vssseg2_mask:
2647 case Intrinsic::riscv_vssseg3_mask:
2648 case Intrinsic::riscv_vssseg4_mask:
2649 case Intrinsic::riscv_vssseg5_mask:
2650 case Intrinsic::riscv_vssseg6_mask:
2651 case Intrinsic::riscv_vssseg7_mask:
2652 case Intrinsic::riscv_vssseg8_mask: {
2653 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2654 /*IsStrided*/ true);
2655 return;
2656 }
2657 case Intrinsic::riscv_vsoxseg2:
2658 case Intrinsic::riscv_vsoxseg3:
2659 case Intrinsic::riscv_vsoxseg4:
2660 case Intrinsic::riscv_vsoxseg5:
2661 case Intrinsic::riscv_vsoxseg6:
2662 case Intrinsic::riscv_vsoxseg7:
2663 case Intrinsic::riscv_vsoxseg8:
2664 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2665 /*IsOrdered*/ true);
2666 return;
2667 case Intrinsic::riscv_vsuxseg2:
2668 case Intrinsic::riscv_vsuxseg3:
2669 case Intrinsic::riscv_vsuxseg4:
2670 case Intrinsic::riscv_vsuxseg5:
2671 case Intrinsic::riscv_vsuxseg6:
2672 case Intrinsic::riscv_vsuxseg7:
2673 case Intrinsic::riscv_vsuxseg8:
2674 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2675 /*IsOrdered*/ false);
2676 return;
2677 case Intrinsic::riscv_vsoxseg2_mask:
2678 case Intrinsic::riscv_vsoxseg3_mask:
2679 case Intrinsic::riscv_vsoxseg4_mask:
2680 case Intrinsic::riscv_vsoxseg5_mask:
2681 case Intrinsic::riscv_vsoxseg6_mask:
2682 case Intrinsic::riscv_vsoxseg7_mask:
2683 case Intrinsic::riscv_vsoxseg8_mask:
2684 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2685 /*IsOrdered*/ true);
2686 return;
2687 case Intrinsic::riscv_vsuxseg2_mask:
2688 case Intrinsic::riscv_vsuxseg3_mask:
2689 case Intrinsic::riscv_vsuxseg4_mask:
2690 case Intrinsic::riscv_vsuxseg5_mask:
2691 case Intrinsic::riscv_vsuxseg6_mask:
2692 case Intrinsic::riscv_vsuxseg7_mask:
2693 case Intrinsic::riscv_vsuxseg8_mask:
2694 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2695 /*IsOrdered*/ false);
2696 return;
2697 case Intrinsic::riscv_vsoxei:
2698 case Intrinsic::riscv_vsoxei_mask:
2699 case Intrinsic::riscv_vsuxei:
2700 case Intrinsic::riscv_vsuxei_mask: {
2701 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2702 IntNo == Intrinsic::riscv_vsuxei_mask;
2703 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2704 IntNo == Intrinsic::riscv_vsoxei_mask;
2705
2706 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
2707 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2708
2709 unsigned CurOp = 2;
2710 SmallVector<SDValue, 8> Operands;
2711 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Store value.
2712
2713 MVT IndexVT;
2714 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2715 /*IsStridedOrIndexed*/ true, Operands,
2716 /*IsLoad=*/false, IndexVT: &IndexVT);
2717
2718 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2719 "Element count mismatch");
2720
2721 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2722 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
2723 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
2724 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2725 reportFatalUsageError(reason: "The V extension does not support EEW=64 for "
2726 "index values when XLEN=32");
2727 }
2728 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2729 Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW,
2730 LMUL: static_cast<unsigned>(LMUL), IndexLMUL: static_cast<unsigned>(IndexLMUL));
2731 MachineSDNode *Store =
2732 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2733
2734 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2735
2736 ReplaceNode(F: Node, T: Store);
2737 return;
2738 }
2739 case Intrinsic::riscv_vsm:
2740 case Intrinsic::riscv_vse:
2741 case Intrinsic::riscv_vse_mask:
2742 case Intrinsic::riscv_vsse:
2743 case Intrinsic::riscv_vsse_mask: {
2744 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2745 IntNo == Intrinsic::riscv_vsse_mask;
2746 bool IsStrided =
2747 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2748
2749 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
2750 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2751
2752 unsigned CurOp = 2;
2753 SmallVector<SDValue, 8> Operands;
2754 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Store value.
2755
2756 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
2757 Operands);
2758
2759 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2760 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2761 Masked: IsMasked, Strided: IsStrided, Log2SEW, LMUL: static_cast<unsigned>(LMUL));
2762 MachineSDNode *Store =
2763 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2764 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2765
2766 ReplaceNode(F: Node, T: Store);
2767 return;
2768 }
2769 case Intrinsic::riscv_sf_vc_x_se:
2770 case Intrinsic::riscv_sf_vc_i_se:
2771 selectSF_VC_X_SE(Node);
2772 return;
2773 case Intrinsic::riscv_sf_vlte8:
2774 case Intrinsic::riscv_sf_vlte16:
2775 case Intrinsic::riscv_sf_vlte32:
2776 case Intrinsic::riscv_sf_vlte64: {
2777 unsigned Log2SEW;
2778 unsigned PseudoInst;
2779 switch (IntNo) {
2780 case Intrinsic::riscv_sf_vlte8:
2781 PseudoInst = RISCV::PseudoSF_VLTE8;
2782 Log2SEW = 3;
2783 break;
2784 case Intrinsic::riscv_sf_vlte16:
2785 PseudoInst = RISCV::PseudoSF_VLTE16;
2786 Log2SEW = 4;
2787 break;
2788 case Intrinsic::riscv_sf_vlte32:
2789 PseudoInst = RISCV::PseudoSF_VLTE32;
2790 Log2SEW = 5;
2791 break;
2792 case Intrinsic::riscv_sf_vlte64:
2793 PseudoInst = RISCV::PseudoSF_VLTE64;
2794 Log2SEW = 6;
2795 break;
2796 }
2797
2798 SDValue SEWOp = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT);
2799 SDValue TWidenOp = CurDAG->getTargetConstant(Val: 1, DL, VT: XLenVT);
2800 SDValue Operands[] = {Node->getOperand(Num: 2),
2801 Node->getOperand(Num: 3),
2802 Node->getOperand(Num: 4),
2803 SEWOp,
2804 TWidenOp,
2805 Node->getOperand(Num: 0)};
2806
2807 MachineSDNode *TileLoad =
2808 CurDAG->getMachineNode(Opcode: PseudoInst, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2809 CurDAG->setNodeMemRefs(N: TileLoad,
2810 NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2811
2812 ReplaceNode(F: Node, T: TileLoad);
2813 return;
2814 }
2815 case Intrinsic::riscv_sf_mm_s_s:
2816 case Intrinsic::riscv_sf_mm_s_u:
2817 case Intrinsic::riscv_sf_mm_u_s:
2818 case Intrinsic::riscv_sf_mm_u_u:
2819 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2820 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2821 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2822 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2823 case Intrinsic::riscv_sf_mm_f_f: {
2824 bool HasFRM = false;
2825 unsigned PseudoInst;
2826 switch (IntNo) {
2827 case Intrinsic::riscv_sf_mm_s_s:
2828 PseudoInst = RISCV::PseudoSF_MM_S_S;
2829 break;
2830 case Intrinsic::riscv_sf_mm_s_u:
2831 PseudoInst = RISCV::PseudoSF_MM_S_U;
2832 break;
2833 case Intrinsic::riscv_sf_mm_u_s:
2834 PseudoInst = RISCV::PseudoSF_MM_U_S;
2835 break;
2836 case Intrinsic::riscv_sf_mm_u_u:
2837 PseudoInst = RISCV::PseudoSF_MM_U_U;
2838 break;
2839 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2840 PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2;
2841 HasFRM = true;
2842 break;
2843 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2844 PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3;
2845 HasFRM = true;
2846 break;
2847 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2848 PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2;
2849 HasFRM = true;
2850 break;
2851 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2852 PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3;
2853 HasFRM = true;
2854 break;
2855 case Intrinsic::riscv_sf_mm_f_f:
2856 if (Node->getOperand(Num: 3).getValueType().getScalarType() == MVT::bf16)
2857 PseudoInst = RISCV::PseudoSF_MM_F_F_ALT;
2858 else
2859 PseudoInst = RISCV::PseudoSF_MM_F_F;
2860 HasFRM = true;
2861 break;
2862 }
2863 uint64_t TileNum = Node->getConstantOperandVal(Num: 2);
2864 SDValue Op1 = Node->getOperand(Num: 3);
2865 SDValue Op2 = Node->getOperand(Num: 4);
2866 MVT VT = Op1->getSimpleValueType(ResNo: 0);
2867 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2868 SDValue TmOp = Node->getOperand(Num: 5);
2869 SDValue TnOp = Node->getOperand(Num: 6);
2870 SDValue TkOp = Node->getOperand(Num: 7);
2871 SDValue TWidenOp = Node->getOperand(Num: 8);
2872 SDValue Chain = Node->getOperand(Num: 0);
2873
2874 // sf.mm.f.f with sew=32, twiden=2 is invalid
2875 if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 &&
2876 TWidenOp->getAsZExtVal() == 2)
2877 reportFatalUsageError(reason: "sf.mm.f.f doesn't support (sew=32, twiden=2)");
2878
2879 SmallVector<SDValue, 10> Operands(
2880 {CurDAG->getRegister(Reg: getTileReg(TileNum), VT: XLenVT), Op1, Op2});
2881 if (HasFRM)
2882 Operands.push_back(
2883 Elt: CurDAG->getTargetConstant(Val: RISCVFPRndMode::DYN, DL, VT: XLenVT));
2884 Operands.append(IL: {TmOp, TnOp, TkOp,
2885 CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT), TWidenOp,
2886 Chain});
2887
2888 auto *NewNode =
2889 CurDAG->getMachineNode(Opcode: PseudoInst, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2890
2891 ReplaceNode(F: Node, T: NewNode);
2892 return;
2893 }
2894 case Intrinsic::riscv_sf_vtzero_t: {
2895 uint64_t TileNum = Node->getConstantOperandVal(Num: 2);
2896 SDValue Tm = Node->getOperand(Num: 3);
2897 SDValue Tn = Node->getOperand(Num: 4);
2898 SDValue Log2SEW = Node->getOperand(Num: 5);
2899 SDValue TWiden = Node->getOperand(Num: 6);
2900 SDValue Chain = Node->getOperand(Num: 0);
2901 auto *NewNode = CurDAG->getMachineNode(
2902 Opcode: RISCV::PseudoSF_VTZERO_T, dl: DL, VTs: Node->getVTList(),
2903 Ops: {CurDAG->getRegister(Reg: getTileReg(TileNum), VT: XLenVT), Tm, Tn, Log2SEW,
2904 TWiden, Chain});
2905
2906 ReplaceNode(F: Node, T: NewNode);
2907 return;
2908 }
2909 }
2910 break;
2911 }
2912 case ISD::BITCAST: {
2913 MVT SrcVT = Node->getOperand(Num: 0).getSimpleValueType();
2914 // Just drop bitcasts between vectors if both are fixed or both are
2915 // scalable.
2916 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2917 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2918 ReplaceUses(F: SDValue(Node, 0), T: Node->getOperand(Num: 0));
2919 CurDAG->RemoveDeadNode(N: Node);
2920 return;
2921 }
2922 if (Subtarget->hasStdExtP()) {
2923 bool Is32BitCast =
2924 (VT == MVT::i32 && (SrcVT == MVT::v4i8 || SrcVT == MVT::v2i16)) ||
2925 (SrcVT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
2926 bool Is64BitCast =
2927 (VT == MVT::i64 && (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i16 ||
2928 SrcVT == MVT::v2i32)) ||
2929 (SrcVT == MVT::i64 &&
2930 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
2931 if (Is32BitCast || Is64BitCast) {
2932 ReplaceUses(F: SDValue(Node, 0), T: Node->getOperand(Num: 0));
2933 CurDAG->RemoveDeadNode(N: Node);
2934 return;
2935 }
2936 }
2937 break;
2938 }
2939 case ISD::SPLAT_VECTOR: {
2940 if (!Subtarget->hasStdExtP())
2941 break;
2942 auto *ConstNode = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 0));
2943 if (!ConstNode)
2944 break;
2945
2946 if (ConstNode->isZero()) {
2947 SDValue New =
2948 CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: RISCV::X0, VT);
2949 ReplaceNode(F: Node, T: New.getNode());
2950 return;
2951 }
2952
2953 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
2954 APInt Val = ConstNode->getAPIntValue().trunc(width: EltSize);
2955
2956 // Find the smallest splat.
2957 if (Val.getBitWidth() > 16 && Val.isSplat(SplatSizeInBits: 16))
2958 Val = Val.trunc(width: 16);
2959 if (Val.getBitWidth() > 8 && Val.isSplat(SplatSizeInBits: 8))
2960 Val = Val.trunc(width: 8);
2961
2962 EltSize = Val.getBitWidth();
2963 int64_t Imm = Val.getSExtValue();
2964
2965 unsigned Opc = 0;
2966 if (EltSize == 8) {
2967 Opc = RISCV::PLI_B;
2968 } else if (isInt<10>(x: Imm)) {
2969 Opc = EltSize == 32 ? RISCV::PLI_W : RISCV::PLI_H;
2970 } else if (EltSize == 16 && isShiftedInt<10, 6>(x: Imm)) {
2971 Opc = RISCV::PLUI_H;
2972 Imm = Imm >> 6;
2973 } else if (EltSize == 32 && isShiftedInt<10, 22>(x: Imm)) {
2974 Opc = RISCV::PLUI_W;
2975 Imm = Imm >> 22;
2976 }
2977
2978 if (Opc) {
2979 SDNode *NewNode = CurDAG->getMachineNode(
2980 Opcode: Opc, dl: DL, VT, Op1: CurDAG->getSignedTargetConstant(Val: Imm, DL, VT: XLenVT));
2981 ReplaceNode(F: Node, T: NewNode);
2982 return;
2983 }
2984
2985 break;
2986 }
2987 case ISD::SCALAR_TO_VECTOR:
2988 if (Subtarget->hasStdExtP()) {
2989 MVT SrcVT = Node->getOperand(Num: 0).getSimpleValueType();
2990 if ((VT == MVT::v2i32 && SrcVT == MVT::i64) ||
2991 (VT == MVT::v4i8 && SrcVT == MVT::i32)) {
2992 ReplaceUses(F: SDValue(Node, 0), T: Node->getOperand(Num: 0));
2993 CurDAG->RemoveDeadNode(N: Node);
2994 return;
2995 }
2996 }
2997 break;
2998 case ISD::INSERT_SUBVECTOR:
2999 case RISCVISD::TUPLE_INSERT: {
3000 SDValue V = Node->getOperand(Num: 0);
3001 SDValue SubV = Node->getOperand(Num: 1);
3002 SDLoc DL(SubV);
3003 auto Idx = Node->getConstantOperandVal(Num: 2);
3004 MVT SubVecVT = SubV.getSimpleValueType();
3005
3006 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
3007 MVT SubVecContainerVT = SubVecVT;
3008 // Establish the correct scalable-vector types for any fixed-length type.
3009 if (SubVecVT.isFixedLengthVector()) {
3010 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT: SubVecVT);
3011 TypeSize VecRegSize = TypeSize::getScalable(MinimumSize: RISCV::RVVBitsPerBlock);
3012 [[maybe_unused]] bool ExactlyVecRegSized =
3013 Subtarget->expandVScale(X: SubVecVT.getSizeInBits())
3014 .isKnownMultipleOf(RHS: Subtarget->expandVScale(X: VecRegSize));
3015 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
3016 .getKnownMinValue()));
3017 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
3018 }
3019 MVT ContainerVT = VT;
3020 if (VT.isFixedLengthVector())
3021 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
3022
3023 const auto *TRI = Subtarget->getRegisterInfo();
3024 unsigned SubRegIdx;
3025 std::tie(args&: SubRegIdx, args&: Idx) =
3026 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
3027 VecVT: ContainerVT, SubVecVT: SubVecContainerVT, InsertExtractIdx: Idx, TRI);
3028
3029 // If the Idx hasn't been completely eliminated then this is a subvector
3030 // insert which doesn't naturally align to a vector register. These must
3031 // be handled using instructions to manipulate the vector registers.
3032 if (Idx != 0)
3033 break;
3034
3035 RISCVVType::VLMUL SubVecLMUL =
3036 RISCVTargetLowering::getLMUL(VT: SubVecContainerVT);
3037 [[maybe_unused]] bool IsSubVecPartReg =
3038 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
3039 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
3040 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
3041 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
3042 V.isUndef()) &&
3043 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
3044 "the subvector is smaller than a full-sized register");
3045
3046 // If we haven't set a SubRegIdx, then we must be going between
3047 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
3048 if (SubRegIdx == RISCV::NoSubRegister) {
3049 unsigned InRegClassID =
3050 RISCVTargetLowering::getRegClassIDForVecVT(VT: ContainerVT);
3051 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
3052 InRegClassID &&
3053 "Unexpected subvector extraction");
3054 SDValue RC = CurDAG->getTargetConstant(Val: InRegClassID, DL, VT: XLenVT);
3055 SDNode *NewNode = CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS,
3056 dl: DL, VT, Op1: SubV, Op2: RC);
3057 ReplaceNode(F: Node, T: NewNode);
3058 return;
3059 }
3060
3061 SDValue Insert = CurDAG->getTargetInsertSubreg(SRIdx: SubRegIdx, DL, VT, Operand: V, Subreg: SubV);
3062 ReplaceNode(F: Node, T: Insert.getNode());
3063 return;
3064 }
3065 case ISD::EXTRACT_SUBVECTOR:
3066 case RISCVISD::TUPLE_EXTRACT: {
3067 SDValue V = Node->getOperand(Num: 0);
3068 auto Idx = Node->getConstantOperandVal(Num: 1);
3069 MVT InVT = V.getSimpleValueType();
3070 SDLoc DL(V);
3071
3072 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
3073 MVT SubVecContainerVT = VT;
3074 // Establish the correct scalable-vector types for any fixed-length type.
3075 if (VT.isFixedLengthVector()) {
3076 assert(Idx == 0);
3077 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
3078 }
3079 if (InVT.isFixedLengthVector())
3080 InVT = TLI.getContainerForFixedLengthVector(VT: InVT);
3081
3082 const auto *TRI = Subtarget->getRegisterInfo();
3083 unsigned SubRegIdx;
3084 std::tie(args&: SubRegIdx, args&: Idx) =
3085 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
3086 VecVT: InVT, SubVecVT: SubVecContainerVT, InsertExtractIdx: Idx, TRI);
3087
3088 // If the Idx hasn't been completely eliminated then this is a subvector
3089 // extract which doesn't naturally align to a vector register. These must
3090 // be handled using instructions to manipulate the vector registers.
3091 if (Idx != 0)
3092 break;
3093
3094 // If we haven't set a SubRegIdx, then we must be going between
3095 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
3096 if (SubRegIdx == RISCV::NoSubRegister) {
3097 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT: InVT);
3098 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
3099 InRegClassID &&
3100 "Unexpected subvector extraction");
3101 SDValue RC = CurDAG->getTargetConstant(Val: InRegClassID, DL, VT: XLenVT);
3102 SDNode *NewNode =
3103 CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, dl: DL, VT, Op1: V, Op2: RC);
3104 ReplaceNode(F: Node, T: NewNode);
3105 return;
3106 }
3107
3108 SDValue Extract = CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT, Operand: V);
3109 ReplaceNode(F: Node, T: Extract.getNode());
3110 return;
3111 }
3112 case RISCVISD::VMV_S_X_VL:
3113 case RISCVISD::VFMV_S_F_VL:
3114 case RISCVISD::VMV_V_X_VL:
3115 case RISCVISD::VFMV_V_F_VL: {
3116 // Try to match splat of a scalar load to a strided load with stride of x0.
3117 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
3118 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
3119 if (!Node->getOperand(Num: 0).isUndef())
3120 break;
3121 SDValue Src = Node->getOperand(Num: 1);
3122 auto *Ld = dyn_cast<LoadSDNode>(Val&: Src);
3123 // Can't fold load update node because the second
3124 // output is used so that load update node can't be removed.
3125 if (!Ld || Ld->isIndexed())
3126 break;
3127 EVT MemVT = Ld->getMemoryVT();
3128 // The memory VT should be the same size as the element type.
3129 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
3130 break;
3131 if (!IsProfitableToFold(N: Src, U: Node, Root: Node) ||
3132 !IsLegalToFold(N: Src, U: Node, Root: Node, OptLevel: TM.getOptLevel()))
3133 break;
3134
3135 SDValue VL;
3136 if (IsScalarMove) {
3137 // We could deal with more VL if we update the VSETVLI insert pass to
3138 // avoid introducing more VSETVLI.
3139 if (!isOneConstant(V: Node->getOperand(Num: 2)))
3140 break;
3141 selectVLOp(N: Node->getOperand(Num: 2), VL);
3142 } else
3143 selectVLOp(N: Node->getOperand(Num: 2), VL);
3144
3145 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
3146 SDValue SEW = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT);
3147
3148 // If VL=1, then we don't need to do a strided load and can just do a
3149 // regular load.
3150 bool IsStrided = !isOneConstant(V: VL);
3151
3152 // Only do a strided load if we have optimized zero-stride vector load.
3153 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
3154 break;
3155
3156 SmallVector<SDValue> Operands = {
3157 SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT), 0),
3158 Ld->getBasePtr()};
3159 if (IsStrided)
3160 Operands.push_back(Elt: CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT));
3161 uint64_t Policy = RISCVVType::MASK_AGNOSTIC | RISCVVType::TAIL_AGNOSTIC;
3162 SDValue PolicyOp = CurDAG->getTargetConstant(Val: Policy, DL, VT: XLenVT);
3163 Operands.append(IL: {VL, SEW, PolicyOp, Ld->getChain()});
3164
3165 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
3166 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
3167 /*IsMasked*/ Masked: false, Strided: IsStrided, /*FF*/ false,
3168 Log2SEW, LMUL: static_cast<unsigned>(LMUL));
3169 MachineSDNode *Load =
3170 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, ResultTys: {VT, MVT::Other}, Ops: Operands);
3171 // Update the chain.
3172 ReplaceUses(F: Src.getValue(R: 1), T: SDValue(Load, 1));
3173 // Record the mem-refs
3174 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {Ld->getMemOperand()});
3175 // Replace the splat with the vlse.
3176 ReplaceNode(F: Node, T: Load);
3177 return;
3178 }
3179 case ISD::PREFETCH:
3180 // MIPS's prefetch instruction already encodes the hint within the
3181 // instruction itself, so no extra NTL hint is needed.
3182 if (Subtarget->hasVendorXMIPSCBOP())
3183 break;
3184
3185 unsigned Locality = Node->getConstantOperandVal(Num: 3);
3186 if (Locality > 2)
3187 break;
3188
3189 auto *LoadStoreMem = cast<MemSDNode>(Val: Node);
3190 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
3191 MMO->setFlags(MachineMemOperand::MONonTemporal);
3192
3193 int NontemporalLevel = 0;
3194 switch (Locality) {
3195 case 0:
3196 NontemporalLevel = 3; // NTL.ALL
3197 break;
3198 case 1:
3199 NontemporalLevel = 1; // NTL.PALL
3200 break;
3201 case 2:
3202 NontemporalLevel = 0; // NTL.P1
3203 break;
3204 default:
3205 llvm_unreachable("unexpected locality value.");
3206 }
3207
3208 if (NontemporalLevel & 0b1)
3209 MMO->setFlags(MONontemporalBit0);
3210 if (NontemporalLevel & 0b10)
3211 MMO->setFlags(MONontemporalBit1);
3212 break;
3213 }
3214
3215 // Select the default instruction.
3216 SelectCode(N: Node);
3217}
3218
3219bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
3220 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
3221 std::vector<SDValue> &OutOps) {
3222 // Always produce a register and immediate operand, as expected by
3223 // RISCVAsmPrinter::PrintAsmMemoryOperand.
3224 switch (ConstraintID) {
3225 case InlineAsm::ConstraintCode::o:
3226 case InlineAsm::ConstraintCode::m: {
3227 SDValue Op0, Op1;
3228 [[maybe_unused]] bool Found = SelectAddrRegImm(Addr: Op, Base&: Op0, Offset&: Op1);
3229 assert(Found && "SelectAddrRegImm should always succeed");
3230 OutOps.push_back(x: Op0);
3231 OutOps.push_back(x: Op1);
3232 return false;
3233 }
3234 case InlineAsm::ConstraintCode::A:
3235 OutOps.push_back(x: Op);
3236 OutOps.push_back(
3237 x: CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget->getXLenVT()));
3238 return false;
3239 default:
3240 report_fatal_error(reason: "Unexpected asm memory constraint " +
3241 InlineAsm::getMemConstraintName(C: ConstraintID));
3242 }
3243
3244 return true;
3245}
3246
3247bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
3248 SDValue &Offset) {
3249 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Addr)) {
3250 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT: Subtarget->getXLenVT());
3251 Offset = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Addr), VT: Subtarget->getXLenVT());
3252 return true;
3253 }
3254
3255 return false;
3256}
3257
3258// Fold constant addresses.
3259static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
3260 const MVT VT, const RISCVSubtarget *Subtarget,
3261 SDValue Addr, SDValue &Base, SDValue &Offset,
3262 bool IsPrefetch = false) {
3263 if (!isa<ConstantSDNode>(Val: Addr))
3264 return false;
3265
3266 int64_t CVal = cast<ConstantSDNode>(Val&: Addr)->getSExtValue();
3267
3268 // If the constant is a simm12, we can fold the whole constant and use X0 as
3269 // the base. If the constant can be materialized with LUI+simm12, use LUI as
3270 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
3271 int64_t Lo12 = SignExtend64<12>(x: CVal);
3272 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
3273 if (!Subtarget->is64Bit() || isInt<32>(x: Hi)) {
3274 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3275 return false;
3276 if (Hi) {
3277 int64_t Hi20 = (Hi >> 12) & 0xfffff;
3278 Base = SDValue(
3279 CurDAG->getMachineNode(Opcode: RISCV::LUI, dl: DL, VT,
3280 Op1: CurDAG->getTargetConstant(Val: Hi20, DL, VT)),
3281 0);
3282 } else {
3283 Base = CurDAG->getRegister(Reg: RISCV::X0, VT);
3284 }
3285 Offset = CurDAG->getSignedTargetConstant(Val: Lo12, DL, VT);
3286 return true;
3287 }
3288
3289 // Ask how constant materialization would handle this constant.
3290 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: CVal, STI: *Subtarget);
3291
3292 // If the last instruction would be an ADDI, we can fold its immediate and
3293 // emit the rest of the sequence as the base.
3294 if (Seq.back().getOpcode() != RISCV::ADDI)
3295 return false;
3296 Lo12 = Seq.back().getImm();
3297 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3298 return false;
3299
3300 // Drop the last instruction.
3301 Seq.pop_back();
3302 assert(!Seq.empty() && "Expected more instructions in sequence");
3303
3304 Base = selectImmSeq(CurDAG, DL, VT, Seq);
3305 Offset = CurDAG->getSignedTargetConstant(Val: Lo12, DL, VT);
3306 return true;
3307}
3308
3309// Is this ADD instruction only used as the base pointer of scalar loads and
3310// stores?
3311static bool isWorthFoldingAdd(SDValue Add) {
3312 for (auto *User : Add->users()) {
3313 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
3314 User->getOpcode() != RISCVISD::LD_RV32 &&
3315 User->getOpcode() != RISCVISD::SD_RV32 &&
3316 User->getOpcode() != ISD::ATOMIC_LOAD &&
3317 User->getOpcode() != ISD::ATOMIC_STORE)
3318 return false;
3319 EVT VT = cast<MemSDNode>(Val: User)->getMemoryVT();
3320 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
3321 VT != MVT::f64)
3322 return false;
3323 // Don't allow stores of the value. It must be used as the address.
3324 if (User->getOpcode() == ISD::STORE &&
3325 cast<StoreSDNode>(Val: User)->getValue() == Add)
3326 return false;
3327 if (User->getOpcode() == ISD::ATOMIC_STORE &&
3328 cast<AtomicSDNode>(Val: User)->getVal() == Add)
3329 return false;
3330 if (User->getOpcode() == RISCVISD::SD_RV32 &&
3331 (User->getOperand(Num: 0) == Add || User->getOperand(Num: 1) == Add))
3332 return false;
3333 if (isStrongerThanMonotonic(AO: cast<MemSDNode>(Val: User)->getSuccessOrdering()))
3334 return false;
3335 }
3336
3337 return true;
3338}
3339
3340bool isRegImmLoadOrStore(SDNode *User, SDValue Add) {
3341 switch (User->getOpcode()) {
3342 default:
3343 return false;
3344 case ISD::LOAD:
3345 case RISCVISD::LD_RV32:
3346 case ISD::ATOMIC_LOAD:
3347 break;
3348 case ISD::STORE:
3349 // Don't allow stores of Add. It must only be used as the address.
3350 if (cast<StoreSDNode>(Val: User)->getValue() == Add)
3351 return false;
3352 break;
3353 case RISCVISD::SD_RV32:
3354 // Don't allow stores of Add. It must only be used as the address.
3355 if (User->getOperand(Num: 0) == Add || User->getOperand(Num: 1) == Add)
3356 return false;
3357 break;
3358 case ISD::ATOMIC_STORE:
3359 // Don't allow stores of Add. It must only be used as the address.
3360 if (cast<AtomicSDNode>(Val: User)->getVal() == Add)
3361 return false;
3362 break;
3363 }
3364
3365 return true;
3366}
3367
3368// To prevent SelectAddrRegImm from folding offsets that conflict with the
3369// fusion of PseudoMovAddr, check if the offset of every use of a given address
3370// is within the alignment.
3371bool RISCVDAGToDAGISel::areOffsetsWithinAlignment(SDValue Addr,
3372 Align Alignment) {
3373 assert(Addr->getOpcode() == RISCVISD::ADD_LO);
3374 for (auto *User : Addr->users()) {
3375 // If the user is a load or store, then the offset is 0 which is always
3376 // within alignment.
3377 if (isRegImmLoadOrStore(User, Add: Addr))
3378 continue;
3379
3380 if (CurDAG->isBaseWithConstantOffset(Op: SDValue(User, 0))) {
3381 int64_t CVal = cast<ConstantSDNode>(Val: User->getOperand(Num: 1))->getSExtValue();
3382 if (!isInt<12>(x: CVal) || Alignment <= CVal)
3383 return false;
3384
3385 // Make sure all uses are foldable load/stores.
3386 for (auto *AddUser : User->users())
3387 if (!isRegImmLoadOrStore(User: AddUser, Add: SDValue(User, 0)))
3388 return false;
3389
3390 continue;
3391 }
3392
3393 return false;
3394 }
3395
3396 return true;
3397}
3398
3399bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
3400 SDValue &Offset) {
3401 if (SelectAddrFrameIndex(Addr, Base, Offset))
3402 return true;
3403
3404 SDLoc DL(Addr);
3405 MVT VT = Addr.getSimpleValueType();
3406
3407 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
3408 bool CanFold = true;
3409 // Unconditionally fold if operand 1 is not a global address (e.g.
3410 // externsymbol)
3411 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Val: Addr.getOperand(i: 1))) {
3412 const DataLayout &DL = CurDAG->getDataLayout();
3413 Align Alignment = commonAlignment(
3414 A: GA->getGlobal()->getPointerAlignment(DL), Offset: GA->getOffset());
3415 if (!areOffsetsWithinAlignment(Addr, Alignment))
3416 CanFold = false;
3417 }
3418 if (CanFold) {
3419 Base = Addr.getOperand(i: 0);
3420 Offset = Addr.getOperand(i: 1);
3421 return true;
3422 }
3423 }
3424
3425 if (CurDAG->isBaseWithConstantOffset(Op: Addr)) {
3426 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3427 if (isInt<12>(x: CVal)) {
3428 Base = Addr.getOperand(i: 0);
3429 if (Base.getOpcode() == RISCVISD::ADD_LO) {
3430 SDValue LoOperand = Base.getOperand(i: 1);
3431 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: LoOperand)) {
3432 // If the Lo in (ADD_LO hi, lo) is a global variable's address
3433 // (its low part, really), then we can rely on the alignment of that
3434 // variable to provide a margin of safety before low part can overflow
3435 // the 12 bits of the load/store offset. Check if CVal falls within
3436 // that margin; if so (low part + CVal) can't overflow.
3437 const DataLayout &DL = CurDAG->getDataLayout();
3438 Align Alignment = commonAlignment(
3439 A: GA->getGlobal()->getPointerAlignment(DL), Offset: GA->getOffset());
3440 if ((CVal == 0 || Alignment > CVal) &&
3441 areOffsetsWithinAlignment(Addr: Base, Alignment)) {
3442 int64_t CombinedOffset = CVal + GA->getOffset();
3443 Base = Base.getOperand(i: 0);
3444 Offset = CurDAG->getTargetGlobalAddress(
3445 GV: GA->getGlobal(), DL: SDLoc(LoOperand), VT: LoOperand.getValueType(),
3446 offset: CombinedOffset, TargetFlags: GA->getTargetFlags());
3447 return true;
3448 }
3449 }
3450 }
3451
3452 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base))
3453 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT);
3454 Offset = CurDAG->getSignedTargetConstant(Val: CVal, DL, VT);
3455 return true;
3456 }
3457 }
3458
3459 // Handle ADD with large immediates.
3460 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Val: Addr.getOperand(i: 1))) {
3461 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3462 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3463
3464 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
3465 // an ADDI for part of the offset and fold the rest into the load/store.
3466 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
3467 if (CVal >= -4096 && CVal <= 4094) {
3468 int64_t Adj = CVal < 0 ? -2048 : 2047;
3469 Base = SDValue(
3470 CurDAG->getMachineNode(Opcode: RISCV::ADDI, dl: DL, VT, Op1: Addr.getOperand(i: 0),
3471 Op2: CurDAG->getSignedTargetConstant(Val: Adj, DL, VT)),
3472 0);
3473 Offset = CurDAG->getSignedTargetConstant(Val: CVal - Adj, DL, VT);
3474 return true;
3475 }
3476
3477 // For larger immediates, we might be able to save one instruction from
3478 // constant materialization by folding the Lo12 bits of the immediate into
3479 // the address. We should only do this if the ADD is only used by loads and
3480 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
3481 // separately with the full materialized immediate creating extra
3482 // instructions.
3483 if (isWorthFoldingAdd(Add: Addr) &&
3484 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr: Addr.getOperand(i: 1), Base,
3485 Offset, /*IsPrefetch=*/false)) {
3486 // Insert an ADD instruction with the materialized Hi52 bits.
3487 Base = SDValue(
3488 CurDAG->getMachineNode(Opcode: RISCV::ADD, dl: DL, VT, Op1: Addr.getOperand(i: 0), Op2: Base),
3489 0);
3490 return true;
3491 }
3492 }
3493
3494 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3495 /*IsPrefetch=*/false))
3496 return true;
3497
3498 Base = Addr;
3499 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
3500 return true;
3501}
3502
3503/// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
3504bool RISCVDAGToDAGISel::SelectAddrRegImm9(SDValue Addr, SDValue &Base,
3505 SDValue &Offset) {
3506 if (SelectAddrFrameIndex(Addr, Base, Offset))
3507 return true;
3508
3509 SDLoc DL(Addr);
3510 MVT VT = Addr.getSimpleValueType();
3511
3512 if (CurDAG->isBaseWithConstantOffset(Op: Addr)) {
3513 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3514 if (isUInt<9>(x: CVal)) {
3515 Base = Addr.getOperand(i: 0);
3516
3517 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base))
3518 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT);
3519 Offset = CurDAG->getSignedTargetConstant(Val: CVal, DL, VT);
3520 return true;
3521 }
3522 }
3523
3524 Base = Addr;
3525 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
3526 return true;
3527}
3528
3529/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
3530/// Offset should be all zeros.
3531bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
3532 SDValue &Offset) {
3533 if (SelectAddrFrameIndex(Addr, Base, Offset))
3534 return true;
3535
3536 SDLoc DL(Addr);
3537 MVT VT = Addr.getSimpleValueType();
3538
3539 if (CurDAG->isBaseWithConstantOffset(Op: Addr)) {
3540 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3541 if (isInt<12>(x: CVal)) {
3542 Base = Addr.getOperand(i: 0);
3543
3544 // Early-out if not a valid offset.
3545 if ((CVal & 0b11111) != 0) {
3546 Base = Addr;
3547 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
3548 return true;
3549 }
3550
3551 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base))
3552 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT);
3553 Offset = CurDAG->getSignedTargetConstant(Val: CVal, DL, VT);
3554 return true;
3555 }
3556 }
3557
3558 // Handle ADD with large immediates.
3559 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Val: Addr.getOperand(i: 1))) {
3560 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3561 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3562
3563 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3564 // one instruction by folding adjustment (-2048 or 2016) into the address.
3565 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3566 int64_t Adj = CVal < 0 ? -2048 : 2016;
3567 int64_t AdjustedOffset = CVal - Adj;
3568 Base =
3569 SDValue(CurDAG->getMachineNode(
3570 Opcode: RISCV::ADDI, dl: DL, VT, Op1: Addr.getOperand(i: 0),
3571 Op2: CurDAG->getSignedTargetConstant(Val: AdjustedOffset, DL, VT)),
3572 0);
3573 Offset = CurDAG->getSignedTargetConstant(Val: Adj, DL, VT);
3574 return true;
3575 }
3576
3577 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr: Addr.getOperand(i: 1), Base,
3578 Offset, /*IsPrefetch=*/true)) {
3579 // Insert an ADD instruction with the materialized Hi52 bits.
3580 Base = SDValue(
3581 CurDAG->getMachineNode(Opcode: RISCV::ADD, dl: DL, VT, Op1: Addr.getOperand(i: 0), Op2: Base),
3582 0);
3583 return true;
3584 }
3585 }
3586
3587 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3588 /*IsPrefetch=*/true))
3589 return true;
3590
3591 Base = Addr;
3592 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
3593 return true;
3594}
3595
3596/// Return true if this a load/store that we have a RegRegScale instruction for.
3597static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add,
3598 const RISCVSubtarget &Subtarget) {
3599 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3600 return false;
3601 EVT VT = cast<MemSDNode>(Val: User)->getMemoryVT();
3602 if (!(VT.isScalarInteger() &&
3603 (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3604 !((VT == MVT::f32 || VT == MVT::f64) &&
3605 Subtarget.hasVendorXTHeadFMemIdx()))
3606 return false;
3607 // Don't allow stores of the value. It must be used as the address.
3608 if (User->getOpcode() == ISD::STORE &&
3609 cast<StoreSDNode>(Val: User)->getValue() == Add)
3610 return false;
3611
3612 return true;
3613}
3614
3615/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3616/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3617/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3618/// single addi and we don't have a SHXADD instruction we could use.
3619/// FIXME: May still need to check how many and what kind of users the SHL has.
3620static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget,
3621 SDValue Add,
3622 SDValue Shift = SDValue()) {
3623 bool FoundADDI = false;
3624 for (auto *User : Add->users()) {
3625 if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3626 continue;
3627
3628 // Allow a single ADDI that is used by loads/stores if we matched a shift.
3629 if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3630 !isa<ConstantSDNode>(Val: User->getOperand(Num: 1)) ||
3631 !isInt<12>(x: cast<ConstantSDNode>(Val: User->getOperand(Num: 1))->getSExtValue()))
3632 return false;
3633
3634 FoundADDI = true;
3635
3636 // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3637 assert(Shift.getOpcode() == ISD::SHL);
3638 unsigned ShiftAmt = Shift.getConstantOperandVal(i: 1);
3639 if (Subtarget.hasShlAdd(ShAmt: ShiftAmt))
3640 return false;
3641
3642 // All users of the ADDI should be load/store.
3643 for (auto *ADDIUser : User->users())
3644 if (!isRegRegScaleLoadOrStore(User: ADDIUser, Add: SDValue(User, 0), Subtarget))
3645 return false;
3646 }
3647
3648 return true;
3649}
3650
3651bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
3652 unsigned MaxShiftAmount,
3653 SDValue &Base, SDValue &Index,
3654 SDValue &Scale) {
3655 if (Addr.getOpcode() != ISD::ADD)
3656 return false;
3657 SDValue LHS = Addr.getOperand(i: 0);
3658 SDValue RHS = Addr.getOperand(i: 1);
3659
3660 EVT VT = Addr.getSimpleValueType();
3661 auto SelectShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3662 SDValue &Shift) {
3663 if (N.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Val: N.getOperand(i: 1)))
3664 return false;
3665
3666 // Only match shifts by a value in range [0, MaxShiftAmount].
3667 unsigned ShiftAmt = N.getConstantOperandVal(i: 1);
3668 if (ShiftAmt > MaxShiftAmount)
3669 return false;
3670
3671 Index = N.getOperand(i: 0);
3672 Shift = CurDAG->getTargetConstant(Val: ShiftAmt, DL: SDLoc(N), VT);
3673 return true;
3674 };
3675
3676 if (auto *C1 = dyn_cast<ConstantSDNode>(Val&: RHS)) {
3677 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3678 if (LHS.getOpcode() == ISD::ADD &&
3679 !isa<ConstantSDNode>(Val: LHS.getOperand(i: 1)) &&
3680 isInt<12>(x: C1->getSExtValue())) {
3681 if (SelectShl(LHS.getOperand(i: 1), Index, Scale) &&
3682 isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: LHS, Shift: LHS.getOperand(i: 1))) {
3683 SDValue C1Val = CurDAG->getTargetConstant(Val: *C1->getConstantIntValue(),
3684 DL: SDLoc(Addr), VT);
3685 Base = SDValue(CurDAG->getMachineNode(Opcode: RISCV::ADDI, dl: SDLoc(Addr), VT,
3686 Op1: LHS.getOperand(i: 0), Op2: C1Val),
3687 0);
3688 return true;
3689 }
3690
3691 // Add is commutative so we need to check both operands.
3692 if (SelectShl(LHS.getOperand(i: 0), Index, Scale) &&
3693 isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: LHS, Shift: LHS.getOperand(i: 0))) {
3694 SDValue C1Val = CurDAG->getTargetConstant(Val: *C1->getConstantIntValue(),
3695 DL: SDLoc(Addr), VT);
3696 Base = SDValue(CurDAG->getMachineNode(Opcode: RISCV::ADDI, dl: SDLoc(Addr), VT,
3697 Op1: LHS.getOperand(i: 1), Op2: C1Val),
3698 0);
3699 return true;
3700 }
3701 }
3702
3703 // Don't match add with constants.
3704 // FIXME: Is this profitable for large constants that have 0s in the lower
3705 // 12 bits that we can materialize with LUI?
3706 return false;
3707 }
3708
3709 // Try to match a shift on the RHS.
3710 if (SelectShl(RHS, Index, Scale)) {
3711 if (!isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: Addr, Shift: RHS))
3712 return false;
3713 Base = LHS;
3714 return true;
3715 }
3716
3717 // Try to match a shift on the LHS.
3718 if (SelectShl(LHS, Index, Scale)) {
3719 if (!isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: Addr, Shift: LHS))
3720 return false;
3721 Base = RHS;
3722 return true;
3723 }
3724
3725 if (!isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: Addr))
3726 return false;
3727
3728 Base = LHS;
3729 Index = RHS;
3730 Scale = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Addr), VT);
3731 return true;
3732}
3733
3734bool RISCVDAGToDAGISel::SelectAddrRegZextRegScale(SDValue Addr,
3735 unsigned MaxShiftAmount,
3736 unsigned Bits, SDValue &Base,
3737 SDValue &Index,
3738 SDValue &Scale) {
3739 if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
3740 return false;
3741
3742 if (Index.getOpcode() == ISD::AND) {
3743 auto *C = dyn_cast<ConstantSDNode>(Val: Index.getOperand(i: 1));
3744 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(N: Bits)) {
3745 Index = Index.getOperand(i: 0);
3746 return true;
3747 }
3748 }
3749
3750 return false;
3751}
3752
3753bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base,
3754 SDValue &Offset) {
3755 if (Addr.getOpcode() != ISD::ADD)
3756 return false;
3757
3758 if (isa<ConstantSDNode>(Val: Addr.getOperand(i: 1)))
3759 return false;
3760
3761 Base = Addr.getOperand(i: 0);
3762 Offset = Addr.getOperand(i: 1);
3763 return true;
3764}
3765
3766bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
3767 SDValue &ShAmt) {
3768 ShAmt = N;
3769
3770 // Peek through zext.
3771 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3772 ShAmt = ShAmt.getOperand(i: 0);
3773
3774 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3775 // amount. If there is an AND on the shift amount, we can bypass it if it
3776 // doesn't affect any of those bits.
3777 if (ShAmt.getOpcode() == ISD::AND &&
3778 isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 1))) {
3779 const APInt &AndMask = ShAmt.getConstantOperandAPInt(i: 1);
3780
3781 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3782 // mask that covers the bits needed to represent all shift amounts.
3783 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3784 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3785
3786 if (ShMask.isSubsetOf(RHS: AndMask)) {
3787 ShAmt = ShAmt.getOperand(i: 0);
3788 } else {
3789 // SimplifyDemandedBits may have optimized the mask so try restoring any
3790 // bits that are known zero.
3791 KnownBits Known = CurDAG->computeKnownBits(Op: ShAmt.getOperand(i: 0));
3792 if (!ShMask.isSubsetOf(RHS: AndMask | Known.Zero))
3793 return true;
3794 ShAmt = ShAmt.getOperand(i: 0);
3795 }
3796 }
3797
3798 if (ShAmt.getOpcode() == ISD::ADD &&
3799 isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 1))) {
3800 uint64_t Imm = ShAmt.getConstantOperandVal(i: 1);
3801 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3802 // to avoid the ADD.
3803 if (Imm != 0 && Imm % ShiftWidth == 0) {
3804 ShAmt = ShAmt.getOperand(i: 0);
3805 return true;
3806 }
3807 } else if (ShAmt.getOpcode() == ISD::SUB &&
3808 isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 0))) {
3809 uint64_t Imm = ShAmt.getConstantOperandVal(i: 0);
3810 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3811 // generate a NEG instead of a SUB of a constant.
3812 if (Imm != 0 && Imm % ShiftWidth == 0) {
3813 SDLoc DL(ShAmt);
3814 EVT VT = ShAmt.getValueType();
3815 SDValue Zero = CurDAG->getRegister(Reg: RISCV::X0, VT);
3816 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3817 MachineSDNode *Neg = CurDAG->getMachineNode(Opcode: NegOpc, dl: DL, VT, Op1: Zero,
3818 Op2: ShAmt.getOperand(i: 1));
3819 ShAmt = SDValue(Neg, 0);
3820 return true;
3821 }
3822 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3823 // to generate a NOT instead of a SUB of a constant.
3824 if (Imm % ShiftWidth == ShiftWidth - 1) {
3825 SDLoc DL(ShAmt);
3826 EVT VT = ShAmt.getValueType();
3827 MachineSDNode *Not = CurDAG->getMachineNode(
3828 Opcode: RISCV::XORI, dl: DL, VT, Op1: ShAmt.getOperand(i: 1),
3829 Op2: CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/IsTarget: true));
3830 ShAmt = SDValue(Not, 0);
3831 return true;
3832 }
3833 }
3834
3835 return true;
3836}
3837
3838/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3839/// check for equality with 0. This function emits instructions that convert the
3840/// seteq/setne into something that can be compared with 0.
3841/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3842/// ISD::SETNE).
3843bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
3844 SDValue &Val) {
3845 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3846 "Unexpected condition code!");
3847
3848 // We're looking for a setcc.
3849 if (N->getOpcode() != ISD::SETCC)
3850 return false;
3851
3852 // Must be an equality comparison.
3853 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
3854 if (CCVal != ExpectedCCVal)
3855 return false;
3856
3857 SDValue LHS = N->getOperand(Num: 0);
3858 SDValue RHS = N->getOperand(Num: 1);
3859
3860 if (!LHS.getValueType().isScalarInteger())
3861 return false;
3862
3863 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3864 if (isNullConstant(V: RHS)) {
3865 Val = LHS;
3866 return true;
3867 }
3868
3869 SDLoc DL(N);
3870
3871 if (auto *C = dyn_cast<ConstantSDNode>(Val&: RHS)) {
3872 int64_t CVal = C->getSExtValue();
3873 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3874 // non-zero otherwise.
3875 if (CVal == -2048) {
3876 Val = SDValue(
3877 CurDAG->getMachineNode(
3878 Opcode: RISCV::XORI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3879 Op2: CurDAG->getSignedTargetConstant(Val: CVal, DL, VT: N->getValueType(ResNo: 0))),
3880 0);
3881 return true;
3882 }
3883 // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0
3884 // if the LHS is equal to the RHS and non-zero otherwise.
3885 if (isInt<12>(x: CVal) || CVal == 2048) {
3886 unsigned Opc = RISCV::ADDI;
3887 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3888 cast<VTSDNode>(Val: LHS.getOperand(i: 1))->getVT() == MVT::i32) {
3889 Opc = RISCV::ADDIW;
3890 LHS = LHS.getOperand(i: 0);
3891 }
3892
3893 Val = SDValue(CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3894 Op2: CurDAG->getSignedTargetConstant(
3895 Val: -CVal, DL, VT: N->getValueType(ResNo: 0))),
3896 0);
3897 return true;
3898 }
3899 if (isPowerOf2_64(Value: CVal) && Subtarget->hasStdExtZbs()) {
3900 Val = SDValue(
3901 CurDAG->getMachineNode(
3902 Opcode: RISCV::BINVI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3903 Op2: CurDAG->getTargetConstant(Val: Log2_64(Value: CVal), DL, VT: N->getValueType(ResNo: 0))),
3904 0);
3905 return true;
3906 }
3907 // Same as the addi case above but for larger immediates (signed 26-bit) use
3908 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3909 // anything which can be done with a single lui as it might be compressible.
3910 if (Subtarget->hasVendorXqcilia() && isInt<26>(x: CVal) &&
3911 (CVal & 0xFFF) != 0) {
3912 Val = SDValue(
3913 CurDAG->getMachineNode(
3914 Opcode: RISCV::QC_E_ADDI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3915 Op2: CurDAG->getSignedTargetConstant(Val: -CVal, DL, VT: N->getValueType(ResNo: 0))),
3916 0);
3917 return true;
3918 }
3919 }
3920
3921 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3922 // equal and a non-zero value if they aren't.
3923 Val = SDValue(
3924 CurDAG->getMachineNode(Opcode: RISCV::XOR, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS, Op2: RHS), 0);
3925 return true;
3926}
3927
3928bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {
3929 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3930 cast<VTSDNode>(Val: N.getOperand(i: 1))->getVT().getSizeInBits() == Bits) {
3931 Val = N.getOperand(i: 0);
3932 return true;
3933 }
3934
3935 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3936 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(Val: N.getOperand(i: 1)))
3937 return N;
3938
3939 SDValue N0 = N.getOperand(i: 0);
3940 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
3941 N.getConstantOperandVal(i: 1) == ShiftAmt &&
3942 N0.getConstantOperandVal(i: 1) == ShiftAmt)
3943 return N0.getOperand(i: 0);
3944
3945 return N;
3946 };
3947
3948 MVT VT = N.getSimpleValueType();
3949 if (CurDAG->ComputeNumSignBits(Op: N) > (VT.getSizeInBits() - Bits)) {
3950 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3951 return true;
3952 }
3953
3954 return false;
3955}
3956
3957bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) {
3958 if (N.getOpcode() == ISD::AND) {
3959 auto *C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
3960 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(N: Bits)) {
3961 Val = N.getOperand(i: 0);
3962 return true;
3963 }
3964 }
3965 MVT VT = N.getSimpleValueType();
3966 APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: Bits);
3967 if (CurDAG->MaskedValueIsZero(Op: N, Mask)) {
3968 Val = N;
3969 return true;
3970 }
3971
3972 return false;
3973}
3974
3975/// Look for various patterns that can be done with a SHL that can be folded
3976/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3977/// SHXADD we are trying to match.
3978bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
3979 SDValue &Val) {
3980 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(Val: N.getOperand(i: 1))) {
3981 SDValue N0 = N.getOperand(i: 0);
3982
3983 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3984 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3985 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
3986 uint64_t Mask = N.getConstantOperandVal(i: 1);
3987 unsigned C2 = N0.getConstantOperandVal(i: 1);
3988
3989 unsigned XLen = Subtarget->getXLen();
3990 if (LeftShift)
3991 Mask &= maskTrailingZeros<uint64_t>(N: C2);
3992 else
3993 Mask &= maskTrailingOnes<uint64_t>(N: XLen - C2);
3994
3995 if (isShiftedMask_64(Value: Mask)) {
3996 unsigned Leading = XLen - llvm::bit_width(Value: Mask);
3997 unsigned Trailing = llvm::countr_zero(Val: Mask);
3998 if (Trailing != ShAmt)
3999 return false;
4000
4001 unsigned Opcode;
4002 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
4003 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
4004 // followed by a SHXADD with c3 for the X amount.
4005 if (LeftShift && Leading == 0 && C2 < Trailing)
4006 Opcode = RISCV::SRLI;
4007 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
4008 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
4009 // followed by a SHXADD with c3 for the X amount.
4010 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
4011 Opcode = RISCV::SRLIW;
4012 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
4013 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
4014 // followed by a SHXADD using c3 for the X amount.
4015 else if (!LeftShift && Leading == C2)
4016 Opcode = RISCV::SRLI;
4017 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
4018 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
4019 // followed by a SHXADD using c3 for the X amount.
4020 else if (!LeftShift && Leading == 32 + C2)
4021 Opcode = RISCV::SRLIW;
4022 else
4023 return false;
4024
4025 SDLoc DL(N);
4026 EVT VT = N.getValueType();
4027 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
4028 Val = SDValue(
4029 CurDAG->getMachineNode(Opcode, dl: DL, VT, Op1: N0.getOperand(i: 0),
4030 Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT)),
4031 0);
4032 return true;
4033 }
4034 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
4035 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
4036 uint64_t Mask = N.getConstantOperandVal(i: 1);
4037 unsigned C2 = N0.getConstantOperandVal(i: 1);
4038
4039 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
4040 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
4041 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
4042 // the X amount.
4043 if (isShiftedMask_64(Value: Mask)) {
4044 unsigned XLen = Subtarget->getXLen();
4045 unsigned Leading = XLen - llvm::bit_width(Value: Mask);
4046 unsigned Trailing = llvm::countr_zero(Val: Mask);
4047 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
4048 SDLoc DL(N);
4049 EVT VT = N.getValueType();
4050 Val = SDValue(CurDAG->getMachineNode(
4051 Opcode: RISCV::SRAI, dl: DL, VT, Op1: N0.getOperand(i: 0),
4052 Op2: CurDAG->getTargetConstant(Val: C2 - Leading, DL, VT)),
4053 0);
4054 Val = SDValue(CurDAG->getMachineNode(
4055 Opcode: RISCV::SRLI, dl: DL, VT, Op1: Val,
4056 Op2: CurDAG->getTargetConstant(Val: Leading + ShAmt, DL, VT)),
4057 0);
4058 return true;
4059 }
4060 }
4061 }
4062 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
4063 (LeftShift || N.getOpcode() == ISD::SRL) &&
4064 isa<ConstantSDNode>(Val: N.getOperand(i: 1))) {
4065 SDValue N0 = N.getOperand(i: 0);
4066 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
4067 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
4068 uint64_t Mask = N0.getConstantOperandVal(i: 1);
4069 if (isShiftedMask_64(Value: Mask)) {
4070 unsigned C1 = N.getConstantOperandVal(i: 1);
4071 unsigned XLen = Subtarget->getXLen();
4072 unsigned Leading = XLen - llvm::bit_width(Value: Mask);
4073 unsigned Trailing = llvm::countr_zero(Val: Mask);
4074 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
4075 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
4076 if (LeftShift && Leading == 32 && Trailing > 0 &&
4077 (Trailing + C1) == ShAmt) {
4078 SDLoc DL(N);
4079 EVT VT = N.getValueType();
4080 Val = SDValue(CurDAG->getMachineNode(
4081 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
4082 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT)),
4083 0);
4084 return true;
4085 }
4086 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
4087 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
4088 if (!LeftShift && Leading == 32 && Trailing > C1 &&
4089 (Trailing - C1) == ShAmt) {
4090 SDLoc DL(N);
4091 EVT VT = N.getValueType();
4092 Val = SDValue(CurDAG->getMachineNode(
4093 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
4094 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT)),
4095 0);
4096 return true;
4097 }
4098 }
4099 }
4100 }
4101
4102 return false;
4103}
4104
4105/// Look for various patterns that can be done with a SHL that can be folded
4106/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
4107/// SHXADD_UW we are trying to match.
4108bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
4109 SDValue &Val) {
4110 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(Val: N.getOperand(i: 1)) &&
4111 N.hasOneUse()) {
4112 SDValue N0 = N.getOperand(i: 0);
4113 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
4114 N0.hasOneUse()) {
4115 uint64_t Mask = N.getConstantOperandVal(i: 1);
4116 unsigned C2 = N0.getConstantOperandVal(i: 1);
4117
4118 Mask &= maskTrailingZeros<uint64_t>(N: C2);
4119
4120 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
4121 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
4122 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
4123 if (isShiftedMask_64(Value: Mask)) {
4124 unsigned Leading = llvm::countl_zero(Val: Mask);
4125 unsigned Trailing = llvm::countr_zero(Val: Mask);
4126 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
4127 SDLoc DL(N);
4128 EVT VT = N.getValueType();
4129 Val = SDValue(CurDAG->getMachineNode(
4130 Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
4131 Op2: CurDAG->getTargetConstant(Val: C2 - ShAmt, DL, VT)),
4132 0);
4133 return true;
4134 }
4135 }
4136 }
4137 }
4138
4139 return false;
4140}
4141
4142bool RISCVDAGToDAGISel::orDisjoint(const SDNode *N) const {
4143 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
4144 if (N->getFlags().hasDisjoint())
4145 return true;
4146 return CurDAG->haveNoCommonBitsSet(A: N->getOperand(Num: 0), B: N->getOperand(Num: 1));
4147}
4148
4149bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
4150 SDValue N, SDValue &Val) {
4151 int OrigCost = RISCVMatInt::getIntMatCost(Val: APInt(64, OrigImm), Size: 64, STI: *Subtarget,
4152 /*CompressionCost=*/true);
4153 int Cost = RISCVMatInt::getIntMatCost(Val: APInt(64, Imm), Size: 64, STI: *Subtarget,
4154 /*CompressionCost=*/true);
4155 if (OrigCost <= Cost)
4156 return false;
4157
4158 Val = selectImm(CurDAG, DL: SDLoc(N), VT: N->getSimpleValueType(ResNo: 0), Imm, Subtarget: *Subtarget);
4159 return true;
4160}
4161
4162bool RISCVDAGToDAGISel::selectZExtImm32(SDValue N, SDValue &Val) {
4163 if (!isa<ConstantSDNode>(Val: N))
4164 return false;
4165 int64_t Imm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
4166 if ((Imm >> 31) != 1)
4167 return false;
4168
4169 for (const SDNode *U : N->users()) {
4170 switch (U->getOpcode()) {
4171 case ISD::ADD:
4172 break;
4173 case ISD::OR:
4174 if (orDisjoint(N: U))
4175 break;
4176 return false;
4177 default:
4178 return false;
4179 }
4180 }
4181
4182 return selectImm64IfCheaper(Imm: 0xffffffff00000000 | Imm, OrigImm: Imm, N, Val);
4183}
4184
4185bool RISCVDAGToDAGISel::selectNegImm(SDValue N, SDValue &Val) {
4186 if (!isa<ConstantSDNode>(Val: N))
4187 return false;
4188 int64_t Imm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
4189 if (isInt<32>(x: Imm))
4190 return false;
4191 if (Imm == INT64_MIN)
4192 return false;
4193
4194 for (const SDNode *U : N->users()) {
4195 switch (U->getOpcode()) {
4196 case ISD::ADD:
4197 break;
4198 case RISCVISD::VMV_V_X_VL:
4199 if (!all_of(Range: U->users(), P: [](const SDNode *V) {
4200 return V->getOpcode() == ISD::ADD ||
4201 V->getOpcode() == RISCVISD::ADD_VL;
4202 }))
4203 return false;
4204 break;
4205 default:
4206 return false;
4207 }
4208 }
4209
4210 return selectImm64IfCheaper(Imm: -Imm, OrigImm: Imm, N, Val);
4211}
4212
4213bool RISCVDAGToDAGISel::selectInvLogicImm(SDValue N, SDValue &Val) {
4214 if (!isa<ConstantSDNode>(Val: N))
4215 return false;
4216 int64_t Imm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
4217
4218 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
4219 if (isInt<32>(x: Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
4220 return false;
4221
4222 // Abandon this transform if the constant is needed elsewhere.
4223 for (const SDNode *U : N->users()) {
4224 switch (U->getOpcode()) {
4225 case ISD::AND:
4226 case ISD::OR:
4227 case ISD::XOR:
4228 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
4229 return false;
4230 break;
4231 case RISCVISD::VMV_V_X_VL:
4232 if (!Subtarget->hasStdExtZvkb())
4233 return false;
4234 if (!all_of(Range: U->users(), P: [](const SDNode *V) {
4235 return V->getOpcode() == ISD::AND ||
4236 V->getOpcode() == RISCVISD::AND_VL;
4237 }))
4238 return false;
4239 break;
4240 default:
4241 return false;
4242 }
4243 }
4244
4245 if (isInt<32>(x: Imm)) {
4246 Val =
4247 selectImm(CurDAG, DL: SDLoc(N), VT: N->getSimpleValueType(ResNo: 0), Imm: ~Imm, Subtarget: *Subtarget);
4248 return true;
4249 }
4250
4251 // For 64-bit constants, the instruction sequences get complex,
4252 // so we select inverted only if it's cheaper.
4253 return selectImm64IfCheaper(Imm: ~Imm, OrigImm: Imm, N, Val);
4254}
4255
4256static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
4257 unsigned Bits,
4258 const TargetInstrInfo *TII) {
4259 unsigned MCOpcode = RISCV::getRVVMCOpcode(RVVPseudoOpcode: User->getMachineOpcode());
4260
4261 if (!MCOpcode)
4262 return false;
4263
4264 const MCInstrDesc &MCID = TII->get(Opcode: User->getMachineOpcode());
4265 const uint64_t TSFlags = MCID.TSFlags;
4266 if (!RISCVII::hasSEWOp(TSFlags))
4267 return false;
4268 assert(RISCVII::hasVLOp(TSFlags));
4269
4270 unsigned ChainOpIdx = User->getNumOperands() - 1;
4271 bool HasChainOp = User->getOperand(Num: ChainOpIdx).getValueType() == MVT::Other;
4272 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
4273 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
4274 const unsigned Log2SEW = User->getConstantOperandVal(Num: VLIdx + 1);
4275
4276 if (UserOpNo == VLIdx)
4277 return false;
4278
4279 auto NumDemandedBits =
4280 RISCV::getVectorLowDemandedScalarBits(Opcode: MCOpcode, Log2SEW);
4281 return NumDemandedBits && Bits >= *NumDemandedBits;
4282}
4283
4284// Return true if all users of this SDNode* only consume the lower \p Bits.
4285// This can be used to form W instructions for add/sub/mul/shl even when the
4286// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
4287// SimplifyDemandedBits has made it so some users see a sext_inreg and some
4288// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
4289// the add/sub/mul/shl to become non-W instructions. By checking the users we
4290// may be able to use a W instruction and CSE with the other instruction if
4291// this has happened. We could try to detect that the CSE opportunity exists
4292// before doing this, but that would be more complicated.
4293bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
4294 const unsigned Depth) const {
4295 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
4296 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
4297 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
4298 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
4299 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
4300 isa<ConstantSDNode>(Node) || Depth != 0) &&
4301 "Unexpected opcode");
4302
4303 if (Depth >= SelectionDAG::MaxRecursionDepth)
4304 return false;
4305
4306 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
4307 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
4308 if (Depth == 0 && !Node->getValueType(ResNo: 0).isScalarInteger())
4309 return false;
4310
4311 for (SDUse &Use : Node->uses()) {
4312 SDNode *User = Use.getUser();
4313 // Users of this node should have already been instruction selected
4314 if (!User->isMachineOpcode())
4315 return false;
4316
4317 // TODO: Add more opcodes?
4318 switch (User->getMachineOpcode()) {
4319 default:
4320 if (vectorPseudoHasAllNBitUsers(User, UserOpNo: Use.getOperandNo(), Bits, TII))
4321 break;
4322 return false;
4323 case RISCV::ADDW:
4324 case RISCV::ADDIW:
4325 case RISCV::SUBW:
4326 case RISCV::MULW:
4327 case RISCV::SLLW:
4328 case RISCV::SLLIW:
4329 case RISCV::SRAW:
4330 case RISCV::SRAIW:
4331 case RISCV::SRLW:
4332 case RISCV::SRLIW:
4333 case RISCV::DIVW:
4334 case RISCV::DIVUW:
4335 case RISCV::REMW:
4336 case RISCV::REMUW:
4337 case RISCV::ROLW:
4338 case RISCV::RORW:
4339 case RISCV::RORIW:
4340 case RISCV::CLSW:
4341 case RISCV::CLZW:
4342 case RISCV::CTZW:
4343 case RISCV::CPOPW:
4344 case RISCV::SLLI_UW:
4345 case RISCV::ABSW:
4346 case RISCV::FMV_W_X:
4347 case RISCV::FCVT_H_W:
4348 case RISCV::FCVT_H_W_INX:
4349 case RISCV::FCVT_H_WU:
4350 case RISCV::FCVT_H_WU_INX:
4351 case RISCV::FCVT_S_W:
4352 case RISCV::FCVT_S_W_INX:
4353 case RISCV::FCVT_S_WU:
4354 case RISCV::FCVT_S_WU_INX:
4355 case RISCV::FCVT_D_W:
4356 case RISCV::FCVT_D_W_INX:
4357 case RISCV::FCVT_D_WU:
4358 case RISCV::FCVT_D_WU_INX:
4359 case RISCV::TH_REVW:
4360 case RISCV::TH_SRRIW:
4361 if (Bits >= 32)
4362 break;
4363 return false;
4364 case RISCV::SLL:
4365 case RISCV::SRA:
4366 case RISCV::SRL:
4367 case RISCV::ROL:
4368 case RISCV::ROR:
4369 case RISCV::BSET:
4370 case RISCV::BCLR:
4371 case RISCV::BINV:
4372 // Shift amount operands only use log2(Xlen) bits.
4373 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Value: Subtarget->getXLen()))
4374 break;
4375 return false;
4376 case RISCV::SLLI:
4377 // SLLI only uses the lower (XLen - ShAmt) bits.
4378 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(Num: 1))
4379 break;
4380 return false;
4381 case RISCV::ANDI:
4382 if (Bits >= (unsigned)llvm::bit_width(Value: User->getConstantOperandVal(Num: 1)))
4383 break;
4384 goto RecCheck;
4385 case RISCV::ORI: {
4386 uint64_t Imm = cast<ConstantSDNode>(Val: User->getOperand(Num: 1))->getSExtValue();
4387 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(Value: ~Imm))
4388 break;
4389 [[fallthrough]];
4390 }
4391 case RISCV::AND:
4392 case RISCV::OR:
4393 case RISCV::XOR:
4394 case RISCV::XORI:
4395 case RISCV::ANDN:
4396 case RISCV::ORN:
4397 case RISCV::XNOR:
4398 case RISCV::SH1ADD:
4399 case RISCV::SH2ADD:
4400 case RISCV::SH3ADD:
4401 RecCheck:
4402 if (hasAllNBitUsers(Node: User, Bits, Depth: Depth + 1))
4403 break;
4404 return false;
4405 case RISCV::SRLI: {
4406 unsigned ShAmt = User->getConstantOperandVal(Num: 1);
4407 // If we are shifting right by less than Bits, and users don't demand any
4408 // bits that were shifted into [Bits-1:0], then we can consider this as an
4409 // N-Bit user.
4410 if (Bits > ShAmt && hasAllNBitUsers(Node: User, Bits: Bits - ShAmt, Depth: Depth + 1))
4411 break;
4412 return false;
4413 }
4414 case RISCV::SEXT_B:
4415 case RISCV::PACKH:
4416 if (Bits >= 8)
4417 break;
4418 return false;
4419 case RISCV::SEXT_H:
4420 case RISCV::FMV_H_X:
4421 case RISCV::ZEXT_H_RV32:
4422 case RISCV::ZEXT_H_RV64:
4423 case RISCV::PACKW:
4424 if (Bits >= 16)
4425 break;
4426 return false;
4427 case RISCV::PACK:
4428 if (Bits >= (Subtarget->getXLen() / 2))
4429 break;
4430 return false;
4431 case RISCV::PPAIRE_H:
4432 // If only the lower 32-bits of the result are used, then only the
4433 // lower 16 bits of the inputs are used.
4434 if (Bits >= 16 && hasAllNBitUsers(Node: User, Bits: 32, Depth: Depth + 1))
4435 break;
4436 return false;
4437 case RISCV::ADD_UW:
4438 case RISCV::SH1ADD_UW:
4439 case RISCV::SH2ADD_UW:
4440 case RISCV::SH3ADD_UW:
4441 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
4442 // 32 bits.
4443 if (Use.getOperandNo() == 0 && Bits >= 32)
4444 break;
4445 return false;
4446 case RISCV::SB:
4447 if (Use.getOperandNo() == 0 && Bits >= 8)
4448 break;
4449 return false;
4450 case RISCV::SH:
4451 if (Use.getOperandNo() == 0 && Bits >= 16)
4452 break;
4453 return false;
4454 case RISCV::SW:
4455 if (Use.getOperandNo() == 0 && Bits >= 32)
4456 break;
4457 return false;
4458 case RISCV::TH_EXT:
4459 case RISCV::TH_EXTU: {
4460 unsigned Msb = User->getConstantOperandVal(Num: 1);
4461 unsigned Lsb = User->getConstantOperandVal(Num: 2);
4462 // Behavior of Msb < Lsb is not well documented.
4463 if (Msb >= Lsb && Bits > Msb)
4464 break;
4465 return false;
4466 }
4467 }
4468 }
4469
4470 return true;
4471}
4472
4473// Select a constant that can be represented as (sign_extend(imm5) << imm2).
4474bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,
4475 SDValue &Shl2) {
4476 auto *C = dyn_cast<ConstantSDNode>(Val&: N);
4477 if (!C)
4478 return false;
4479
4480 int64_t Offset = C->getSExtValue();
4481 for (unsigned Shift = 0; Shift < 4; Shift++) {
4482 if (isInt<5>(x: Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
4483 EVT VT = N->getValueType(ResNo: 0);
4484 Simm5 = CurDAG->getSignedTargetConstant(Val: Offset >> Shift, DL: SDLoc(N), VT);
4485 Shl2 = CurDAG->getTargetConstant(Val: Shift, DL: SDLoc(N), VT);
4486 return true;
4487 }
4488 }
4489
4490 return false;
4491}
4492
4493// Select VL as a 5 bit immediate or a value that will become a register. This
4494// allows us to choose between VSETIVLI or VSETVLI later.
4495bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
4496 auto *C = dyn_cast<ConstantSDNode>(Val&: N);
4497 if (C && isUInt<5>(x: C->getZExtValue())) {
4498 VL = CurDAG->getTargetConstant(Val: C->getZExtValue(), DL: SDLoc(N),
4499 VT: N->getValueType(ResNo: 0));
4500 } else if (C && C->isAllOnes()) {
4501 // Treat all ones as VLMax.
4502 VL = CurDAG->getSignedTargetConstant(Val: RISCV::VLMaxSentinel, DL: SDLoc(N),
4503 VT: N->getValueType(ResNo: 0));
4504 } else if (isa<RegisterSDNode>(Val: N) &&
4505 cast<RegisterSDNode>(Val&: N)->getReg() == RISCV::X0) {
4506 // All our VL operands use an operand that allows GPRNoX0 or an immediate
4507 // as the register class. Convert X0 to a special immediate to pass the
4508 // MachineVerifier. This is recognized specially by the vsetvli insertion
4509 // pass.
4510 VL = CurDAG->getSignedTargetConstant(Val: RISCV::VLMaxSentinel, DL: SDLoc(N),
4511 VT: N->getValueType(ResNo: 0));
4512 } else {
4513 VL = N;
4514 }
4515
4516 return true;
4517}
4518
4519static SDValue findVSplat(SDValue N) {
4520 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
4521 if (!N.getOperand(i: 0).isUndef())
4522 return SDValue();
4523 N = N.getOperand(i: 1);
4524 }
4525 SDValue Splat = N;
4526 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
4527 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
4528 !Splat.getOperand(i: 0).isUndef())
4529 return SDValue();
4530 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
4531 return Splat;
4532}
4533
4534bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
4535 SDValue Splat = findVSplat(N);
4536 if (!Splat)
4537 return false;
4538
4539 SplatVal = Splat.getOperand(i: 1);
4540 return true;
4541}
4542
4543static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,
4544 SelectionDAG &DAG,
4545 const RISCVSubtarget &Subtarget,
4546 std::function<bool(int64_t)> ValidateImm,
4547 bool Decrement = false) {
4548 SDValue Splat = findVSplat(N);
4549 if (!Splat || !isa<ConstantSDNode>(Val: Splat.getOperand(i: 1)))
4550 return false;
4551
4552 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
4553 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
4554 "Unexpected splat operand type");
4555
4556 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
4557 // type is wider than the resulting vector element type: an implicit
4558 // truncation first takes place. Therefore, perform a manual
4559 // truncation/sign-extension in order to ignore any truncated bits and catch
4560 // any zero-extended immediate.
4561 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
4562 // sign-extending to (XLenVT -1).
4563 APInt SplatConst = Splat.getConstantOperandAPInt(i: 1).sextOrTrunc(width: SplatEltSize);
4564
4565 int64_t SplatImm = SplatConst.getSExtValue();
4566
4567 if (!ValidateImm(SplatImm))
4568 return false;
4569
4570 if (Decrement)
4571 SplatImm -= 1;
4572
4573 SplatVal =
4574 DAG.getSignedTargetConstant(Val: SplatImm, DL: SDLoc(N), VT: Subtarget.getXLenVT());
4575 return true;
4576}
4577
4578bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
4579 return selectVSplatImmHelper(N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4580 ValidateImm: [](int64_t Imm) { return isInt<5>(x: Imm); });
4581}
4582
4583bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
4584 return selectVSplatImmHelper(
4585 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4586 ValidateImm: [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4587 /*Decrement=*/true);
4588}
4589
4590bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal) {
4591 return selectVSplatImmHelper(
4592 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4593 ValidateImm: [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4594 /*Decrement=*/false);
4595}
4596
4597bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
4598 SDValue &SplatVal) {
4599 return selectVSplatImmHelper(
4600 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4601 ValidateImm: [](int64_t Imm) { return Imm != 0 && Imm >= -15 && Imm <= 16; },
4602 /*Decrement=*/true);
4603}
4604
4605bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
4606 SDValue &SplatVal) {
4607 return selectVSplatImmHelper(
4608 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4609 ValidateImm: [Bits](int64_t Imm) { return isUIntN(N: Bits, x: Imm); });
4610}
4611
4612bool RISCVDAGToDAGISel::selectVSplatImm64Neg(SDValue N, SDValue &SplatVal) {
4613 SDValue Splat = findVSplat(N);
4614 return Splat && selectNegImm(N: Splat.getOperand(i: 1), Val&: SplatVal);
4615}
4616
4617bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
4618 auto IsExtOrTrunc = [](SDValue N) {
4619 switch (N->getOpcode()) {
4620 case ISD::SIGN_EXTEND:
4621 case ISD::ZERO_EXTEND:
4622 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
4623 // inactive elements will be undef.
4624 case RISCVISD::TRUNCATE_VECTOR_VL:
4625 case RISCVISD::VSEXT_VL:
4626 case RISCVISD::VZEXT_VL:
4627 return true;
4628 default:
4629 return false;
4630 }
4631 };
4632
4633 // We can have multiple nested nodes, so unravel them all if needed.
4634 while (IsExtOrTrunc(N)) {
4635 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
4636 return false;
4637 N = N->getOperand(Num: 0);
4638 }
4639
4640 return selectVSplat(N, SplatVal);
4641}
4642
4643bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N, SDValue &Imm) {
4644 // Allow bitcasts from XLenVT -> FP.
4645 if (N.getOpcode() == ISD::BITCAST &&
4646 N.getOperand(i: 0).getValueType() == Subtarget->getXLenVT()) {
4647 Imm = N.getOperand(i: 0);
4648 return true;
4649 }
4650 // Allow moves from XLenVT to FP.
4651 if (N.getOpcode() == RISCVISD::FMV_H_X ||
4652 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
4653 Imm = N.getOperand(i: 0);
4654 return true;
4655 }
4656
4657 // Otherwise, look for FP constants that can materialized with scalar int.
4658 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Val: N.getNode());
4659 if (!CFP)
4660 return false;
4661 const APFloat &APF = CFP->getValueAPF();
4662 // td can handle +0.0 already.
4663 if (APF.isPosZero())
4664 return false;
4665
4666 MVT VT = CFP->getSimpleValueType(ResNo: 0);
4667
4668 MVT XLenVT = Subtarget->getXLenVT();
4669 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
4670 assert(APF.isNegZero() && "Unexpected constant.");
4671 return false;
4672 }
4673 SDLoc DL(N);
4674 Imm = selectImm(CurDAG, DL, VT: XLenVT, Imm: APF.bitcastToAPInt().getSExtValue(),
4675 Subtarget: *Subtarget);
4676 return true;
4677}
4678
4679bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
4680 SDValue &Imm) {
4681 if (auto *C = dyn_cast<ConstantSDNode>(Val&: N)) {
4682 int64_t ImmVal = SignExtend64(X: C->getSExtValue(), B: Width);
4683
4684 if (!isInt<5>(x: ImmVal))
4685 return false;
4686
4687 Imm = CurDAG->getSignedTargetConstant(Val: ImmVal, DL: SDLoc(N),
4688 VT: Subtarget->getXLenVT());
4689 return true;
4690 }
4691
4692 return false;
4693}
4694
4695// Match XOR with a VMSET_VL operand. Return the other operand.
4696bool RISCVDAGToDAGISel::selectVMNOTOp(SDValue N, SDValue &Res) {
4697 if (N.getOpcode() != ISD::XOR)
4698 return false;
4699
4700 if (N.getOperand(i: 0).getOpcode() == RISCVISD::VMSET_VL) {
4701 Res = N.getOperand(i: 1);
4702 return true;
4703 }
4704
4705 if (N.getOperand(i: 1).getOpcode() == RISCVISD::VMSET_VL) {
4706 Res = N.getOperand(i: 0);
4707 return true;
4708 }
4709
4710 return false;
4711}
4712
4713// Match VMXOR_VL with a VMSET_VL operand. Making sure that that VL operand
4714// matches the parent's VL. Return the other operand of the VMXOR_VL.
4715bool RISCVDAGToDAGISel::selectVMNOT_VLOp(SDNode *Parent, SDValue N,
4716 SDValue &Res) {
4717 if (N.getOpcode() != RISCVISD::VMXOR_VL)
4718 return false;
4719
4720 assert(Parent &&
4721 (Parent->getOpcode() == RISCVISD::VMAND_VL ||
4722 Parent->getOpcode() == RISCVISD::VMOR_VL ||
4723 Parent->getOpcode() == RISCVISD::VMXOR_VL) &&
4724 "Unexpected parent");
4725
4726 // The VL should match the parent.
4727 if (Parent->getOperand(Num: 2) != N->getOperand(Num: 2))
4728 return false;
4729
4730 if (N.getOperand(i: 0).getOpcode() == RISCVISD::VMSET_VL) {
4731 Res = N.getOperand(i: 1);
4732 return true;
4733 }
4734
4735 if (N.getOperand(i: 1).getOpcode() == RISCVISD::VMSET_VL) {
4736 Res = N.getOperand(i: 0);
4737 return true;
4738 }
4739
4740 return false;
4741}
4742
4743// Try to remove sext.w if the input is a W instruction or can be made into
4744// a W instruction cheaply.
4745bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4746 // Look for the sext.w pattern, addiw rd, rs1, 0.
4747 if (N->getMachineOpcode() != RISCV::ADDIW ||
4748 !isNullConstant(V: N->getOperand(Num: 1)))
4749 return false;
4750
4751 SDValue N0 = N->getOperand(Num: 0);
4752 if (!N0.isMachineOpcode())
4753 return false;
4754
4755 switch (N0.getMachineOpcode()) {
4756 default:
4757 break;
4758 case RISCV::ADD:
4759 case RISCV::ADDI:
4760 case RISCV::SUB:
4761 case RISCV::MUL:
4762 case RISCV::SLLI: {
4763 // Convert sext.w+add/sub/mul to their W instructions. This will create
4764 // a new independent instruction. This improves latency.
4765 unsigned Opc;
4766 switch (N0.getMachineOpcode()) {
4767 default:
4768 llvm_unreachable("Unexpected opcode!");
4769 case RISCV::ADD: Opc = RISCV::ADDW; break;
4770 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4771 case RISCV::SUB: Opc = RISCV::SUBW; break;
4772 case RISCV::MUL: Opc = RISCV::MULW; break;
4773 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4774 }
4775
4776 SDValue N00 = N0.getOperand(i: 0);
4777 SDValue N01 = N0.getOperand(i: 1);
4778
4779 // Shift amount needs to be uimm5.
4780 if (N0.getMachineOpcode() == RISCV::SLLI &&
4781 !isUInt<5>(x: cast<ConstantSDNode>(Val&: N01)->getSExtValue()))
4782 break;
4783
4784 SDNode *Result =
4785 CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VT: N->getValueType(ResNo: 0),
4786 Op1: N00, Op2: N01);
4787 ReplaceUses(F: N, T: Result);
4788 return true;
4789 }
4790 case RISCV::ADDW:
4791 case RISCV::ADDIW:
4792 case RISCV::SUBW:
4793 case RISCV::MULW:
4794 case RISCV::SLLIW:
4795 case RISCV::PACKW:
4796 case RISCV::TH_MULAW:
4797 case RISCV::TH_MULAH:
4798 case RISCV::TH_MULSW:
4799 case RISCV::TH_MULSH:
4800 if (N0.getValueType() == MVT::i32)
4801 break;
4802
4803 // Result is already sign extended just remove the sext.w.
4804 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4805 ReplaceUses(F: N, T: N0.getNode());
4806 return true;
4807 }
4808
4809 return false;
4810}
4811
4812static bool usesAllOnesMask(SDValue MaskOp) {
4813 const auto IsVMSet = [](unsigned Opc) {
4814 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4815 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4816 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4817 Opc == RISCV::PseudoVMSET_M_B8;
4818 };
4819
4820 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4821 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4822 // assume that it's all-ones? Same applies to its VL.
4823 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4824}
4825
4826static bool isImplicitDef(SDValue V) {
4827 if (!V.isMachineOpcode())
4828 return false;
4829 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4830 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4831 if (!isImplicitDef(V: V.getOperand(i: I)))
4832 return false;
4833 return true;
4834 }
4835 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4836}
4837
4838// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4839// corresponding "unmasked" pseudo versions.
4840bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4841 const RISCV::RISCVMaskedPseudoInfo *I =
4842 RISCV::getMaskedPseudoInfo(MaskedPseudo: N->getMachineOpcode());
4843 if (!I)
4844 return false;
4845
4846 unsigned MaskOpIdx = I->MaskOpIdx;
4847 if (!usesAllOnesMask(MaskOp: N->getOperand(Num: MaskOpIdx)))
4848 return false;
4849
4850 // There are two classes of pseudos in the table - compares and
4851 // everything else. See the comment on RISCVMaskedPseudo for details.
4852 const unsigned Opc = I->UnmaskedPseudo;
4853 const MCInstrDesc &MCID = TII->get(Opcode: Opc);
4854 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc: MCID);
4855
4856 const MCInstrDesc &MaskedMCID = TII->get(Opcode: N->getMachineOpcode());
4857 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc: MaskedMCID);
4858
4859 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4860 !RISCVII::hasVecPolicyOp(MCID.TSFlags)) &&
4861 "Unmasked pseudo has policy but masked pseudo doesn't?");
4862 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4863 "Unexpected pseudo structure");
4864 assert(!(HasPassthru && !MaskedHasPassthru) &&
4865 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4866
4867 SmallVector<SDValue, 8> Ops;
4868 // Skip the passthru operand at index 0 if the unmasked don't have one.
4869 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4870 bool DropPolicy = !RISCVII::hasVecPolicyOp(TSFlags: MCID.TSFlags) &&
4871 RISCVII::hasVecPolicyOp(TSFlags: MaskedMCID.TSFlags);
4872 bool HasChainOp =
4873 N->getOperand(Num: N->getNumOperands() - 1).getValueType() == MVT::Other;
4874 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4875 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4876 // Skip the mask
4877 SDValue Op = N->getOperand(Num: I);
4878 if (I == MaskOpIdx)
4879 continue;
4880 if (DropPolicy && I == LastOpNum)
4881 continue;
4882 Ops.push_back(Elt: Op);
4883 }
4884
4885 MachineSDNode *Result =
4886 CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VTs: N->getVTList(), Ops);
4887
4888 if (!N->memoperands_empty())
4889 CurDAG->setNodeMemRefs(N: Result, NewMemRefs: N->memoperands());
4890
4891 Result->setFlags(N->getFlags());
4892 ReplaceUses(F: N, T: Result);
4893
4894 return true;
4895}
4896
4897/// If our passthru is an implicit_def, use noreg instead. This side
4898/// steps issues with MachineCSE not being able to CSE expressions with
4899/// IMPLICIT_DEF operands while preserving the semantic intent. See
4900/// pr64282 for context. Note that this transform is the last one
4901/// performed at ISEL DAG to DAG.
4902bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4903 bool MadeChange = false;
4904 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4905
4906 while (Position != CurDAG->allnodes_begin()) {
4907 SDNode *N = &*--Position;
4908 if (N->use_empty() || !N->isMachineOpcode())
4909 continue;
4910
4911 const unsigned Opc = N->getMachineOpcode();
4912 if (!RISCVVPseudosTable::getPseudoInfo(Pseudo: Opc) ||
4913 !RISCVII::isFirstDefTiedToFirstUse(Desc: TII->get(Opcode: Opc)) ||
4914 !isImplicitDef(V: N->getOperand(Num: 0)))
4915 continue;
4916
4917 SmallVector<SDValue> Ops;
4918 Ops.push_back(Elt: CurDAG->getRegister(Reg: RISCV::NoRegister, VT: N->getValueType(ResNo: 0)));
4919 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4920 SDValue Op = N->getOperand(Num: I);
4921 Ops.push_back(Elt: Op);
4922 }
4923
4924 MachineSDNode *Result =
4925 CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VTs: N->getVTList(), Ops);
4926 Result->setFlags(N->getFlags());
4927 CurDAG->setNodeMemRefs(N: Result, NewMemRefs: cast<MachineSDNode>(Val: N)->memoperands());
4928 ReplaceUses(F: N, T: Result);
4929 MadeChange = true;
4930 }
4931 return MadeChange;
4932}
4933
4934
4935// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4936// for instruction scheduling.
4937FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
4938 CodeGenOptLevel OptLevel) {
4939 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4940}
4941
4942char RISCVDAGToDAGISelLegacy::ID = 0;
4943
4944RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TM,
4945 CodeGenOptLevel OptLevel)
4946 : SelectionDAGISelLegacy(
4947 ID, std::make_unique<RISCVDAGToDAGISel>(args&: TM, args&: OptLevel)) {}
4948
4949INITIALIZE_PASS(RISCVDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
4950