1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
14#include "MCTargetDesc/RISCVBaseInfo.h"
15#include "MCTargetDesc/RISCVMCTargetDesc.h"
16#include "MCTargetDesc/RISCVMatInt.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
19#include "RISCVSelectionDAGInfo.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/SDPatternMatch.h"
22#include "llvm/IR/IntrinsicsRISCV.h"
23#include "llvm/Support/Alignment.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/MathExtras.h"
26#include "llvm/Support/raw_ostream.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "riscv-isel"
31#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
32
33static cl::opt<bool> UsePseudoMovImm(
34 "riscv-use-rematerializable-movimm", cl::Hidden,
35 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
36 "constant materialization"),
37 cl::init(Val: false));
38
39#define GET_DAGISEL_BODY RISCVDAGToDAGISel
40#include "RISCVGenDAGISel.inc"
41
42void RISCVDAGToDAGISel::PreprocessISelDAG() {
43 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
44
45 bool MadeChange = false;
46 while (Position != CurDAG->allnodes_begin()) {
47 SDNode *N = &*--Position;
48 if (N->use_empty())
49 continue;
50
51 SDValue Result;
52 switch (N->getOpcode()) {
53 case ISD::SPLAT_VECTOR: {
54 if (Subtarget->enablePExtSIMDCodeGen())
55 break;
56 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
57 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
58 MVT VT = N->getSimpleValueType(ResNo: 0);
59 unsigned Opc =
60 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
61 SDLoc DL(N);
62 SDValue VL = CurDAG->getRegister(Reg: RISCV::X0, VT: Subtarget->getXLenVT());
63 SDValue Src = N->getOperand(Num: 0);
64 if (VT.isInteger())
65 Src = CurDAG->getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget->getXLenVT(),
66 Operand: N->getOperand(Num: 0));
67 Result = CurDAG->getNode(Opcode: Opc, DL, VT, N1: CurDAG->getUNDEF(VT), N2: Src, N3: VL);
68 break;
69 }
70 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
71 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
72 // load. Done after lowering and combining so that we have a chance to
73 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
74 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
75 MVT VT = N->getSimpleValueType(ResNo: 0);
76 SDValue Passthru = N->getOperand(Num: 0);
77 SDValue Lo = N->getOperand(Num: 1);
78 SDValue Hi = N->getOperand(Num: 2);
79 SDValue VL = N->getOperand(Num: 3);
80 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
81 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
82 "Unexpected VTs!");
83 MachineFunction &MF = CurDAG->getMachineFunction();
84 SDLoc DL(N);
85
86 // Create temporary stack for each expanding node.
87 SDValue StackSlot =
88 CurDAG->CreateStackTemporary(Bytes: TypeSize::getFixed(ExactSize: 8), Alignment: Align(8));
89 int FI = cast<FrameIndexSDNode>(Val: StackSlot.getNode())->getIndex();
90 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
91
92 SDValue Chain = CurDAG->getEntryNode();
93 Lo = CurDAG->getStore(Chain, dl: DL, Val: Lo, Ptr: StackSlot, PtrInfo: MPI, Alignment: Align(8));
94
95 SDValue OffsetSlot =
96 CurDAG->getMemBasePlusOffset(Base: StackSlot, Offset: TypeSize::getFixed(ExactSize: 4), DL);
97 Hi = CurDAG->getStore(Chain, dl: DL, Val: Hi, Ptr: OffsetSlot, PtrInfo: MPI.getWithOffset(O: 4),
98 Alignment: Align(8));
99
100 Chain = CurDAG->getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, N1: Lo, N2: Hi);
101
102 SDVTList VTs = CurDAG->getVTList(VTs: {VT, MVT::Other});
103 SDValue IntID =
104 CurDAG->getTargetConstant(Val: Intrinsic::riscv_vlse, DL, VT: MVT::i64);
105 SDValue Ops[] = {Chain,
106 IntID,
107 Passthru,
108 StackSlot,
109 CurDAG->getRegister(Reg: RISCV::X0, VT: MVT::i64),
110 VL};
111
112 Result = CurDAG->getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops,
113 MemVT: MVT::i64, PtrInfo: MPI, Alignment: Align(8),
114 Flags: MachineMemOperand::MOLoad);
115 break;
116 }
117 case ISD::FP_EXTEND: {
118 // We only have vector patterns for riscv_fpextend_vl in isel.
119 SDLoc DL(N);
120 MVT VT = N->getSimpleValueType(ResNo: 0);
121 if (!VT.isVector())
122 break;
123 SDValue VLMAX = CurDAG->getRegister(Reg: RISCV::X0, VT: Subtarget->getXLenVT());
124 SDValue TrueMask = CurDAG->getNode(
125 Opcode: RISCVISD::VMSET_VL, DL, VT: VT.changeVectorElementType(EltVT: MVT::i1), Operand: VLMAX);
126 Result = CurDAG->getNode(Opcode: RISCVISD::FP_EXTEND_VL, DL, VT, N1: N->getOperand(Num: 0),
127 N2: TrueMask, N3: VLMAX);
128 break;
129 }
130 }
131
132 if (Result) {
133 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
134 LLVM_DEBUG(N->dump(CurDAG));
135 LLVM_DEBUG(dbgs() << "\nNew: ");
136 LLVM_DEBUG(Result->dump(CurDAG));
137 LLVM_DEBUG(dbgs() << "\n");
138
139 CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 0), To: Result);
140 MadeChange = true;
141 }
142 }
143
144 if (MadeChange)
145 CurDAG->RemoveDeadNodes();
146}
147
148void RISCVDAGToDAGISel::PostprocessISelDAG() {
149 HandleSDNode Dummy(CurDAG->getRoot());
150 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
151
152 bool MadeChange = false;
153 while (Position != CurDAG->allnodes_begin()) {
154 SDNode *N = &*--Position;
155 // Skip dead nodes and any non-machine opcodes.
156 if (N->use_empty() || !N->isMachineOpcode())
157 continue;
158
159 MadeChange |= doPeepholeSExtW(Node: N);
160
161 // FIXME: This is here only because the VMerge transform doesn't
162 // know how to handle masked true inputs. Once that has been moved
163 // to post-ISEL, this can be deleted as well.
164 MadeChange |= doPeepholeMaskedRVV(Node: cast<MachineSDNode>(Val: N));
165 }
166
167 CurDAG->setRoot(Dummy.getValue());
168
169 // After we're done with everything else, convert IMPLICIT_DEF
170 // passthru operands to NoRegister. This is required to workaround
171 // an optimization deficiency in MachineCSE. This really should
172 // be merged back into each of the patterns (i.e. there's no good
173 // reason not to go directly to NoReg), but is being done this way
174 // to allow easy backporting.
175 MadeChange |= doPeepholeNoRegPassThru();
176
177 if (MadeChange)
178 CurDAG->RemoveDeadNodes();
179}
180
181static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
182 RISCVMatInt::InstSeq &Seq) {
183 SDValue SrcReg = CurDAG->getRegister(Reg: RISCV::X0, VT);
184 for (const RISCVMatInt::Inst &Inst : Seq) {
185 SDValue SDImm = CurDAG->getSignedTargetConstant(Val: Inst.getImm(), DL, VT);
186 SDNode *Result = nullptr;
187 switch (Inst.getOpndKind()) {
188 case RISCVMatInt::Imm:
189 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SDImm);
190 break;
191 case RISCVMatInt::RegX0:
192 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg,
193 Op2: CurDAG->getRegister(Reg: RISCV::X0, VT));
194 break;
195 case RISCVMatInt::RegReg:
196 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg, Op2: SrcReg);
197 break;
198 case RISCVMatInt::RegImm:
199 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg, Op2: SDImm);
200 break;
201 }
202
203 // Only the first instruction has X0 as its source.
204 SrcReg = SDValue(Result, 0);
205 }
206
207 return SrcReg;
208}
209
210static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
211 int64_t Imm, const RISCVSubtarget &Subtarget) {
212 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: Imm, STI: Subtarget);
213
214 // Use a rematerializable pseudo instruction for short sequences if enabled.
215 if (Seq.size() == 2 && UsePseudoMovImm)
216 return SDValue(
217 CurDAG->getMachineNode(Opcode: RISCV::PseudoMovImm, dl: DL, VT,
218 Op1: CurDAG->getSignedTargetConstant(Val: Imm, DL, VT)),
219 0);
220
221 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
222 // worst an LUI+ADDIW. This will require an extra register, but avoids a
223 // constant pool.
224 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
225 // low and high 32 bits are the same and bit 31 and 63 are set.
226 if (Seq.size() > 3) {
227 unsigned ShiftAmt, AddOpc;
228 RISCVMatInt::InstSeq SeqLo =
229 RISCVMatInt::generateTwoRegInstSeq(Val: Imm, STI: Subtarget, ShiftAmt, AddOpc);
230 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
231 SDValue Lo = selectImmSeq(CurDAG, DL, VT, Seq&: SeqLo);
232
233 SDValue SLLI = SDValue(
234 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: Lo,
235 Op2: CurDAG->getTargetConstant(Val: ShiftAmt, DL, VT)),
236 0);
237 return SDValue(CurDAG->getMachineNode(Opcode: AddOpc, dl: DL, VT, Op1: Lo, Op2: SLLI), 0);
238 }
239 }
240
241 // Otherwise, use the original sequence.
242 return selectImmSeq(CurDAG, DL, VT, Seq);
243}
244
245void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
246 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
247 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
248 bool IsLoad, MVT *IndexVT) {
249 SDValue Chain = Node->getOperand(Num: 0);
250
251 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Base pointer.
252
253 if (IsStridedOrIndexed) {
254 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Index.
255 if (IndexVT)
256 *IndexVT = Operands.back()->getSimpleValueType(ResNo: 0);
257 }
258
259 if (IsMasked) {
260 SDValue Mask = Node->getOperand(Num: CurOp++);
261 Operands.push_back(Elt: Mask);
262 }
263 SDValue VL;
264 selectVLOp(N: Node->getOperand(Num: CurOp++), VL);
265 Operands.push_back(Elt: VL);
266
267 MVT XLenVT = Subtarget->getXLenVT();
268 SDValue SEWOp = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT);
269 Operands.push_back(Elt: SEWOp);
270
271 // At the IR layer, all the masked load intrinsics have policy operands,
272 // none of the others do. All have passthru operands. For our pseudos,
273 // all loads have policy operands.
274 if (IsLoad) {
275 uint64_t Policy = RISCVVType::MASK_AGNOSTIC;
276 if (IsMasked)
277 Policy = Node->getConstantOperandVal(Num: CurOp++);
278 SDValue PolicyOp = CurDAG->getTargetConstant(Val: Policy, DL, VT: XLenVT);
279 Operands.push_back(Elt: PolicyOp);
280 }
281
282 Operands.push_back(Elt: Chain); // Chain.
283}
284
285void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
286 bool IsStrided) {
287 SDLoc DL(Node);
288 MVT VT = Node->getSimpleValueType(ResNo: 0);
289 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
290 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
291
292 unsigned CurOp = 2;
293 SmallVector<SDValue, 8> Operands;
294
295 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
296
297 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
298 Operands, /*IsLoad=*/true);
299
300 const RISCV::VLSEGPseudo *P =
301 RISCV::getVLSEGPseudo(NF, Masked: IsMasked, Strided: IsStrided, /*FF*/ false, Log2SEW,
302 LMUL: static_cast<unsigned>(LMUL));
303 MachineSDNode *Load =
304 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped, VT2: MVT::Other, Ops: Operands);
305
306 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
307
308 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Load, 0));
309 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Load, 1));
310 CurDAG->RemoveDeadNode(N: Node);
311}
312
313void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, unsigned NF,
314 bool IsMasked) {
315 SDLoc DL(Node);
316 MVT VT = Node->getSimpleValueType(ResNo: 0);
317 MVT XLenVT = Subtarget->getXLenVT();
318 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
319 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
320
321 unsigned CurOp = 2;
322 SmallVector<SDValue, 7> Operands;
323
324 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
325
326 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
327 /*IsStridedOrIndexed*/ false, Operands,
328 /*IsLoad=*/true);
329
330 const RISCV::VLSEGPseudo *P =
331 RISCV::getVLSEGPseudo(NF, Masked: IsMasked, /*Strided*/ false, /*FF*/ true,
332 Log2SEW, LMUL: static_cast<unsigned>(LMUL));
333 MachineSDNode *Load = CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped,
334 VT2: XLenVT, VT3: MVT::Other, Ops: Operands);
335
336 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
337
338 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Load, 0)); // Result
339 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Load, 1)); // VL
340 ReplaceUses(F: SDValue(Node, 2), T: SDValue(Load, 2)); // Chain
341 CurDAG->RemoveDeadNode(N: Node);
342}
343
344void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
345 bool IsOrdered) {
346 SDLoc DL(Node);
347 MVT VT = Node->getSimpleValueType(ResNo: 0);
348 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
349 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
350
351 unsigned CurOp = 2;
352 SmallVector<SDValue, 8> Operands;
353
354 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
355
356 MVT IndexVT;
357 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
358 /*IsStridedOrIndexed*/ true, Operands,
359 /*IsLoad=*/true, IndexVT: &IndexVT);
360
361#ifndef NDEBUG
362 // Number of element = RVVBitsPerBlock * LMUL / SEW
363 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
364 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
365 if (DecodedLMUL.second)
366 ContainedTyNumElts /= DecodedLMUL.first;
367 else
368 ContainedTyNumElts *= DecodedLMUL.first;
369 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
370 "Element count mismatch");
371#endif
372
373 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
374 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
375 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
376 reportFatalUsageError(reason: "The V extension does not support EEW=64 for index "
377 "values when XLEN=32");
378 }
379 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
380 NF, Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL),
381 IndexLMUL: static_cast<unsigned>(IndexLMUL));
382 MachineSDNode *Load =
383 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped, VT2: MVT::Other, Ops: Operands);
384
385 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
386
387 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Load, 0));
388 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Load, 1));
389 CurDAG->RemoveDeadNode(N: Node);
390}
391
392void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
393 bool IsStrided) {
394 SDLoc DL(Node);
395 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
396 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
397 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
398
399 unsigned CurOp = 2;
400 SmallVector<SDValue, 8> Operands;
401
402 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
403
404 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
405 Operands);
406
407 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
408 NF, Masked: IsMasked, Strided: IsStrided, Log2SEW, LMUL: static_cast<unsigned>(LMUL));
409 MachineSDNode *Store =
410 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT: Node->getValueType(ResNo: 0), Ops: Operands);
411
412 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
413
414 ReplaceNode(F: Node, T: Store);
415}
416
417void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
418 bool IsOrdered) {
419 SDLoc DL(Node);
420 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
421 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
422 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
423
424 unsigned CurOp = 2;
425 SmallVector<SDValue, 8> Operands;
426
427 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
428
429 MVT IndexVT;
430 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
431 /*IsStridedOrIndexed*/ true, Operands,
432 /*IsLoad=*/false, IndexVT: &IndexVT);
433
434#ifndef NDEBUG
435 // Number of element = RVVBitsPerBlock * LMUL / SEW
436 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
437 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
438 if (DecodedLMUL.second)
439 ContainedTyNumElts /= DecodedLMUL.first;
440 else
441 ContainedTyNumElts *= DecodedLMUL.first;
442 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
443 "Element count mismatch");
444#endif
445
446 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
447 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
448 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449 reportFatalUsageError(reason: "The V extension does not support EEW=64 for index "
450 "values when XLEN=32");
451 }
452 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
453 NF, Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL),
454 IndexLMUL: static_cast<unsigned>(IndexLMUL));
455 MachineSDNode *Store =
456 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT: Node->getValueType(ResNo: 0), Ops: Operands);
457
458 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
459
460 ReplaceNode(F: Node, T: Store);
461}
462
463void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
464 if (!Subtarget->hasVInstructions())
465 return;
466
467 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
468
469 SDLoc DL(Node);
470 MVT XLenVT = Subtarget->getXLenVT();
471
472 unsigned IntNo = Node->getConstantOperandVal(Num: 0);
473
474 assert((IntNo == Intrinsic::riscv_vsetvli ||
475 IntNo == Intrinsic::riscv_vsetvlimax) &&
476 "Unexpected vsetvli intrinsic");
477
478 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
479 unsigned Offset = (VLMax ? 1 : 2);
480
481 assert(Node->getNumOperands() == Offset + 2 &&
482 "Unexpected number of operands");
483
484 unsigned SEW =
485 RISCVVType::decodeVSEW(VSEW: Node->getConstantOperandVal(Num: Offset) & 0x7);
486 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
487 Node->getConstantOperandVal(Num: Offset + 1) & 0x7);
488
489 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMUL: VLMul, SEW, /*TailAgnostic*/ true,
490 /*MaskAgnostic*/ true);
491 SDValue VTypeIOp = CurDAG->getTargetConstant(Val: VTypeI, DL, VT: XLenVT);
492
493 SDValue VLOperand;
494 unsigned Opcode = RISCV::PseudoVSETVLI;
495 if (auto *C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1))) {
496 if (auto VLEN = Subtarget->getRealVLen())
497 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
498 VLMax = true;
499 }
500 if (VLMax || isAllOnesConstant(V: Node->getOperand(Num: 1))) {
501 VLOperand = CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT);
502 Opcode = RISCV::PseudoVSETVLIX0;
503 } else {
504 VLOperand = Node->getOperand(Num: 1);
505
506 if (auto *C = dyn_cast<ConstantSDNode>(Val&: VLOperand)) {
507 uint64_t AVL = C->getZExtValue();
508 if (isUInt<5>(x: AVL)) {
509 SDValue VLImm = CurDAG->getTargetConstant(Val: AVL, DL, VT: XLenVT);
510 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: RISCV::PseudoVSETIVLI, dl: DL,
511 VT: XLenVT, Op1: VLImm, Op2: VTypeIOp));
512 return;
513 }
514 }
515 }
516
517 ReplaceNode(F: Node,
518 T: CurDAG->getMachineNode(Opcode, dl: DL, VT: XLenVT, Op1: VLOperand, Op2: VTypeIOp));
519}
520
521void RISCVDAGToDAGISel::selectXSfmmVSET(SDNode *Node) {
522 if (!Subtarget->hasVendorXSfmmbase())
523 return;
524
525 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
526
527 SDLoc DL(Node);
528 MVT XLenVT = Subtarget->getXLenVT();
529
530 unsigned IntNo = Node->getConstantOperandVal(Num: 0);
531
532 assert((IntNo == Intrinsic::riscv_sf_vsettnt ||
533 IntNo == Intrinsic::riscv_sf_vsettm ||
534 IntNo == Intrinsic::riscv_sf_vsettk) &&
535 "Unexpected XSfmm vset intrinsic");
536
537 unsigned SEW = RISCVVType::decodeVSEW(VSEW: Node->getConstantOperandVal(Num: 2));
538 unsigned Widen = RISCVVType::decodeTWiden(TWiden: Node->getConstantOperandVal(Num: 3));
539 unsigned PseudoOpCode =
540 IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT
541 : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM
542 : RISCV::PseudoSF_VSETTK;
543
544 if (IntNo == Intrinsic::riscv_sf_vsettnt) {
545 unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, AltFmt: 0);
546 SDValue VTypeIOp = CurDAG->getTargetConstant(Val: VTypeI, DL, VT: XLenVT);
547
548 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: PseudoOpCode, dl: DL, VT: XLenVT,
549 Op1: Node->getOperand(Num: 1), Op2: VTypeIOp));
550 } else {
551 SDValue Log2SEW = CurDAG->getTargetConstant(Val: Log2_32(Value: SEW), DL, VT: XLenVT);
552 SDValue TWiden = CurDAG->getTargetConstant(Val: Widen, DL, VT: XLenVT);
553 ReplaceNode(F: Node,
554 T: CurDAG->getMachineNode(Opcode: PseudoOpCode, dl: DL, VT: XLenVT,
555 Op1: Node->getOperand(Num: 1), Op2: Log2SEW, Op3: TWiden));
556 }
557}
558
559bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
560 MVT VT = Node->getSimpleValueType(ResNo: 0);
561 unsigned Opcode = Node->getOpcode();
562 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
563 "Unexpected opcode");
564 SDLoc DL(Node);
565
566 // For operations of the form (x << C1) op C2, check if we can use
567 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
568 SDValue N0 = Node->getOperand(Num: 0);
569 SDValue N1 = Node->getOperand(Num: 1);
570
571 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Val&: N1);
572 if (!Cst)
573 return false;
574
575 int64_t Val = Cst->getSExtValue();
576
577 // Check if immediate can already use ANDI/ORI/XORI.
578 if (isInt<12>(x: Val))
579 return false;
580
581 SDValue Shift = N0;
582
583 // If Val is simm32 and we have a sext_inreg from i32, then the binop
584 // produces at least 33 sign bits. We can peek through the sext_inreg and use
585 // a SLLIW at the end.
586 bool SignExt = false;
587 if (isInt<32>(x: Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
588 N0.hasOneUse() && cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT() == MVT::i32) {
589 SignExt = true;
590 Shift = N0.getOperand(i: 0);
591 }
592
593 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
594 return false;
595
596 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Val: Shift.getOperand(i: 1));
597 if (!ShlCst)
598 return false;
599
600 uint64_t ShAmt = ShlCst->getZExtValue();
601
602 // Make sure that we don't change the operation by removing bits.
603 // This only matters for OR and XOR, AND is unaffected.
604 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(N: ShAmt);
605 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
606 return false;
607
608 int64_t ShiftedVal = Val >> ShAmt;
609 if (!isInt<12>(x: ShiftedVal))
610 return false;
611
612 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
613 if (SignExt && ShAmt >= 32)
614 return false;
615
616 // Ok, we can reorder to get a smaller immediate.
617 unsigned BinOpc;
618 switch (Opcode) {
619 default: llvm_unreachable("Unexpected opcode");
620 case ISD::AND: BinOpc = RISCV::ANDI; break;
621 case ISD::OR: BinOpc = RISCV::ORI; break;
622 case ISD::XOR: BinOpc = RISCV::XORI; break;
623 }
624
625 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
626
627 SDNode *BinOp = CurDAG->getMachineNode(
628 Opcode: BinOpc, dl: DL, VT, Op1: Shift.getOperand(i: 0),
629 Op2: CurDAG->getSignedTargetConstant(Val: ShiftedVal, DL, VT));
630 SDNode *SLLI =
631 CurDAG->getMachineNode(Opcode: ShOpc, dl: DL, VT, Op1: SDValue(BinOp, 0),
632 Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT));
633 ReplaceNode(F: Node, T: SLLI);
634 return true;
635}
636
637bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
638 unsigned Opc;
639
640 if (Subtarget->hasVendorXTHeadBb())
641 Opc = RISCV::TH_EXT;
642 else if (Subtarget->hasVendorXAndesPerf())
643 Opc = RISCV::NDS_BFOS;
644 else if (Subtarget->hasVendorXqcibm())
645 Opc = RISCV::QC_EXT;
646 else
647 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
648 return false;
649
650 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
651 if (!N1C)
652 return false;
653
654 SDValue N0 = Node->getOperand(Num: 0);
655 if (!N0.hasOneUse())
656 return false;
657
658 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
659 const SDLoc &DL, MVT VT) {
660 if (Opc == RISCV::QC_EXT) {
661 // QC.EXT X, width, shamt
662 // shamt is the same as Lsb
663 // width is the number of bits to extract from the Lsb
664 Msb = Msb - Lsb + 1;
665 }
666 return CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: N0.getOperand(i: 0),
667 Op2: CurDAG->getTargetConstant(Val: Msb, DL, VT),
668 Op3: CurDAG->getTargetConstant(Val: Lsb, DL, VT));
669 };
670
671 SDLoc DL(Node);
672 MVT VT = Node->getSimpleValueType(ResNo: 0);
673 const unsigned RightShAmt = N1C->getZExtValue();
674
675 // Transform (sra (shl X, C1) C2) with C1 < C2
676 // -> (SignedBitfieldExtract X, msb, lsb)
677 if (N0.getOpcode() == ISD::SHL) {
678 auto *N01C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
679 if (!N01C)
680 return false;
681
682 const unsigned LeftShAmt = N01C->getZExtValue();
683 // Make sure that this is a bitfield extraction (i.e., the shift-right
684 // amount can not be less than the left-shift).
685 if (LeftShAmt > RightShAmt)
686 return false;
687
688 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
689 const unsigned Msb = MsbPlusOne - 1;
690 const unsigned Lsb = RightShAmt - LeftShAmt;
691
692 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
693 ReplaceNode(F: Node, T: Sbe);
694 return true;
695 }
696
697 // Transform (sra (sext_inreg X, _), C) ->
698 // (SignedBitfieldExtract X, msb, lsb)
699 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
700 unsigned ExtSize =
701 cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT().getSizeInBits();
702
703 // ExtSize of 32 should use sraiw via tablegen pattern.
704 if (ExtSize == 32)
705 return false;
706
707 const unsigned Msb = ExtSize - 1;
708 // If the shift-right amount is greater than Msb, it means that extracts
709 // the X[Msb] bit and sign-extend it.
710 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
711
712 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
713 ReplaceNode(F: Node, T: Sbe);
714 return true;
715 }
716
717 return false;
718}
719
720bool RISCVDAGToDAGISel::trySignedBitfieldInsertInSign(SDNode *Node) {
721 // Only supported with XAndesPerf at the moment.
722 if (!Subtarget->hasVendorXAndesPerf())
723 return false;
724
725 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
726 if (!N1C)
727 return false;
728
729 SDValue N0 = Node->getOperand(Num: 0);
730 if (!N0.hasOneUse())
731 return false;
732
733 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
734 const SDLoc &DL, MVT VT) {
735 unsigned Opc = RISCV::NDS_BFOS;
736 // If the Lsb is equal to the Msb, then the Lsb should be 0.
737 if (Lsb == Msb)
738 Lsb = 0;
739 return CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: N0.getOperand(i: 0),
740 Op2: CurDAG->getTargetConstant(Val: Lsb, DL, VT),
741 Op3: CurDAG->getTargetConstant(Val: Msb, DL, VT));
742 };
743
744 SDLoc DL(Node);
745 MVT VT = Node->getSimpleValueType(ResNo: 0);
746 const unsigned RightShAmt = N1C->getZExtValue();
747
748 // Transform (sra (shl X, C1) C2) with C1 > C2
749 // -> (NDS.BFOS X, lsb, msb)
750 if (N0.getOpcode() == ISD::SHL) {
751 auto *N01C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
752 if (!N01C)
753 return false;
754
755 const unsigned LeftShAmt = N01C->getZExtValue();
756 // Make sure that this is a bitfield insertion (i.e., the shift-right
757 // amount should be less than the left-shift).
758 if (LeftShAmt <= RightShAmt)
759 return false;
760
761 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
762 const unsigned Msb = MsbPlusOne - 1;
763 const unsigned Lsb = LeftShAmt - RightShAmt;
764
765 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
766 ReplaceNode(F: Node, T: Sbi);
767 return true;
768 }
769
770 return false;
771}
772
773bool RISCVDAGToDAGISel::tryUnsignedBitfieldExtract(SDNode *Node,
774 const SDLoc &DL, MVT VT,
775 SDValue X, unsigned Msb,
776 unsigned Lsb) {
777 unsigned Opc;
778
779 if (Subtarget->hasVendorXTHeadBb()) {
780 Opc = RISCV::TH_EXTU;
781 } else if (Subtarget->hasVendorXAndesPerf()) {
782 Opc = RISCV::NDS_BFOZ;
783 } else if (Subtarget->hasVendorXqcibm()) {
784 Opc = RISCV::QC_EXTU;
785 // QC.EXTU X, width, shamt
786 // shamt is the same as Lsb
787 // width is the number of bits to extract from the Lsb
788 Msb = Msb - Lsb + 1;
789 } else {
790 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
791 return false;
792 }
793
794 SDNode *Ube = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: X,
795 Op2: CurDAG->getTargetConstant(Val: Msb, DL, VT),
796 Op3: CurDAG->getTargetConstant(Val: Lsb, DL, VT));
797 ReplaceNode(F: Node, T: Ube);
798 return true;
799}
800
801bool RISCVDAGToDAGISel::tryUnsignedBitfieldInsertInZero(SDNode *Node,
802 const SDLoc &DL, MVT VT,
803 SDValue X, unsigned Msb,
804 unsigned Lsb) {
805 // Only supported with XAndesPerf at the moment.
806 if (!Subtarget->hasVendorXAndesPerf())
807 return false;
808
809 unsigned Opc = RISCV::NDS_BFOZ;
810
811 // If the Lsb is equal to the Msb, then the Lsb should be 0.
812 if (Lsb == Msb)
813 Lsb = 0;
814 SDNode *Ubi = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: X,
815 Op2: CurDAG->getTargetConstant(Val: Lsb, DL, VT),
816 Op3: CurDAG->getTargetConstant(Val: Msb, DL, VT));
817 ReplaceNode(F: Node, T: Ubi);
818 return true;
819}
820
821bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
822 // Target does not support indexed loads.
823 if (!Subtarget->hasVendorXTHeadMemIdx())
824 return false;
825
826 LoadSDNode *Ld = cast<LoadSDNode>(Val: Node);
827 ISD::MemIndexedMode AM = Ld->getAddressingMode();
828 if (AM == ISD::UNINDEXED)
829 return false;
830
831 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Ld->getOffset());
832 if (!C)
833 return false;
834
835 EVT LoadVT = Ld->getMemoryVT();
836 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
837 "Unexpected addressing mode");
838 bool IsPre = AM == ISD::PRE_INC;
839 bool IsPost = AM == ISD::POST_INC;
840 int64_t Offset = C->getSExtValue();
841
842 // The constants that can be encoded in the THeadMemIdx instructions
843 // are of the form (sign_extend(imm5) << imm2).
844 unsigned Shift;
845 for (Shift = 0; Shift < 4; Shift++)
846 if (isInt<5>(x: Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
847 break;
848
849 // Constant cannot be encoded.
850 if (Shift == 4)
851 return false;
852
853 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
854 unsigned Opcode;
855 if (LoadVT == MVT::i8 && IsPre)
856 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
857 else if (LoadVT == MVT::i8 && IsPost)
858 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
859 else if (LoadVT == MVT::i16 && IsPre)
860 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
861 else if (LoadVT == MVT::i16 && IsPost)
862 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
863 else if (LoadVT == MVT::i32 && IsPre)
864 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
865 else if (LoadVT == MVT::i32 && IsPost)
866 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
867 else if (LoadVT == MVT::i64 && IsPre)
868 Opcode = RISCV::TH_LDIB;
869 else if (LoadVT == MVT::i64 && IsPost)
870 Opcode = RISCV::TH_LDIA;
871 else
872 return false;
873
874 EVT Ty = Ld->getOffset().getValueType();
875 SDValue Ops[] = {
876 Ld->getBasePtr(),
877 CurDAG->getSignedTargetConstant(Val: Offset >> Shift, DL: SDLoc(Node), VT: Ty),
878 CurDAG->getTargetConstant(Val: Shift, DL: SDLoc(Node), VT: Ty), Ld->getChain()};
879 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(Node), VT1: Ld->getValueType(ResNo: 0),
880 VT2: Ld->getValueType(ResNo: 1), VT3: MVT::Other, Ops);
881
882 MachineMemOperand *MemOp = cast<MemSDNode>(Val: Node)->getMemOperand();
883 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: New), NewMemRefs: {MemOp});
884
885 ReplaceNode(F: Node, T: New);
886
887 return true;
888}
889
890static Register getTileReg(uint64_t TileNum) {
891 assert(TileNum <= 15 && "Invalid tile number");
892 return RISCV::T0 + TileNum;
893}
894
895void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) {
896 if (!Subtarget->hasVInstructions())
897 return;
898
899 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
900
901 SDLoc DL(Node);
902 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
903
904 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
905 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
906 "Unexpected vsetvli intrinsic");
907
908 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
909 unsigned Log2SEW = Log2_32(Value: Node->getConstantOperandVal(Num: 6));
910 SDValue SEWOp =
911 CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: Subtarget->getXLenVT());
912 SmallVector<SDValue, 8> Operands = {Node->getOperand(Num: 2), Node->getOperand(Num: 3),
913 Node->getOperand(Num: 4), Node->getOperand(Num: 5),
914 Node->getOperand(Num: 8), SEWOp,
915 Node->getOperand(Num: 0)};
916
917 unsigned Opcode;
918 auto *LMulSDNode = cast<ConstantSDNode>(Val: Node->getOperand(Num: 7));
919 switch (LMulSDNode->getSExtValue()) {
920 case 5:
921 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
922 : RISCV::PseudoSF_VC_I_SE_MF8;
923 break;
924 case 6:
925 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
926 : RISCV::PseudoSF_VC_I_SE_MF4;
927 break;
928 case 7:
929 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
930 : RISCV::PseudoSF_VC_I_SE_MF2;
931 break;
932 case 0:
933 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
934 : RISCV::PseudoSF_VC_I_SE_M1;
935 break;
936 case 1:
937 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
938 : RISCV::PseudoSF_VC_I_SE_M2;
939 break;
940 case 2:
941 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
942 : RISCV::PseudoSF_VC_I_SE_M4;
943 break;
944 case 3:
945 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
946 : RISCV::PseudoSF_VC_I_SE_M8;
947 break;
948 }
949
950 ReplaceNode(F: Node, T: CurDAG->getMachineNode(
951 Opcode, dl: DL, VT: Node->getSimpleValueType(ResNo: 0), Ops: Operands));
952}
953
954static unsigned getSegInstNF(unsigned Intrinsic) {
955#define INST_NF_CASE(NAME, NF) \
956 case Intrinsic::riscv_##NAME##NF: \
957 return NF;
958#define INST_NF_CASE_MASK(NAME, NF) \
959 case Intrinsic::riscv_##NAME##NF##_mask: \
960 return NF;
961#define INST_NF_CASE_FF(NAME, NF) \
962 case Intrinsic::riscv_##NAME##NF##ff: \
963 return NF;
964#define INST_NF_CASE_FF_MASK(NAME, NF) \
965 case Intrinsic::riscv_##NAME##NF##ff_mask: \
966 return NF;
967#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
968 MACRO_NAME(NAME, 2) \
969 MACRO_NAME(NAME, 3) \
970 MACRO_NAME(NAME, 4) \
971 MACRO_NAME(NAME, 5) \
972 MACRO_NAME(NAME, 6) \
973 MACRO_NAME(NAME, 7) \
974 MACRO_NAME(NAME, 8)
975#define INST_ALL_NF_CASE(NAME) \
976 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
977 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
978#define INST_ALL_NF_CASE_WITH_FF(NAME) \
979 INST_ALL_NF_CASE(NAME) \
980 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
981 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
982 switch (Intrinsic) {
983 default:
984 llvm_unreachable("Unexpected segment load/store intrinsic");
985 INST_ALL_NF_CASE_WITH_FF(vlseg)
986 INST_ALL_NF_CASE(vlsseg)
987 INST_ALL_NF_CASE(vloxseg)
988 INST_ALL_NF_CASE(vluxseg)
989 INST_ALL_NF_CASE(vsseg)
990 INST_ALL_NF_CASE(vssseg)
991 INST_ALL_NF_CASE(vsoxseg)
992 INST_ALL_NF_CASE(vsuxseg)
993 }
994}
995
996static bool isApplicableToPLI(int Val) {
997 // Check if the immediate is packed i8 or i10
998 int16_t Bit31To16 = Val >> 16;
999 int16_t Bit15To0 = Val;
1000 int8_t Bit15To8 = Bit15To0 >> 8;
1001 int8_t Bit7To0 = Val;
1002 if (Bit31To16 != Bit15To0)
1003 return false;
1004
1005 return isInt<10>(x: Bit31To16) || Bit15To8 == Bit7To0;
1006}
1007
1008void RISCVDAGToDAGISel::Select(SDNode *Node) {
1009 // If we have a custom node, we have already selected.
1010 if (Node->isMachineOpcode()) {
1011 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
1012 Node->setNodeId(-1);
1013 return;
1014 }
1015
1016 // Instruction Selection not handled by the auto-generated tablegen selection
1017 // should be handled here.
1018 unsigned Opcode = Node->getOpcode();
1019 MVT XLenVT = Subtarget->getXLenVT();
1020 SDLoc DL(Node);
1021 MVT VT = Node->getSimpleValueType(ResNo: 0);
1022
1023 bool HasBitTest = Subtarget->hasBEXTILike();
1024
1025 switch (Opcode) {
1026 case ISD::Constant: {
1027 assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
1028 auto *ConstNode = cast<ConstantSDNode>(Val: Node);
1029 if (ConstNode->isZero()) {
1030 SDValue New =
1031 CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: RISCV::X0, VT);
1032 ReplaceNode(F: Node, T: New.getNode());
1033 return;
1034 }
1035 int64_t Imm = ConstNode->getSExtValue();
1036 // If only the lower 8 bits are used, try to convert this to a simm6 by
1037 // sign-extending bit 7. This is neutral without the C extension, and
1038 // allows C.LI to be used if C is present.
1039 if (!isInt<8>(x: Imm) && isUInt<8>(x: Imm) && isInt<6>(x: SignExtend64<8>(x: Imm)) &&
1040 hasAllBUsers(Node))
1041 Imm = SignExtend64<8>(x: Imm);
1042 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1043 // by sign extending bit 15.
1044 else if (!isInt<16>(x: Imm) && isUInt<16>(x: Imm) &&
1045 isInt<12>(x: SignExtend64<16>(x: Imm)) && hasAllHUsers(Node))
1046 Imm = SignExtend64<16>(x: Imm);
1047 // If the upper 32-bits are not used try to convert this into a simm32 by
1048 // sign extending bit 32.
1049 else if (!isInt<32>(x: Imm) && isUInt<32>(x: Imm) && hasAllWUsers(Node))
1050 Imm = SignExtend64<32>(x: Imm);
1051
1052 if (VT == MVT::i64 && Subtarget->hasStdExtP() && isApplicableToPLI(Val: Imm) &&
1053 hasAllWUsers(Node)) {
1054 // If it's 4 packed 8-bit integers or 2 packed signed 16-bit integers, we
1055 // can simply copy lower 32 bits to higher 32 bits to make it able to
1056 // rematerialize to PLI_B or PLI_H
1057 Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
1058 }
1059
1060 ReplaceNode(F: Node, T: selectImm(CurDAG, DL, VT, Imm, Subtarget: *Subtarget).getNode());
1061 return;
1062 }
1063 case ISD::ConstantFP: {
1064 const APFloat &APF = cast<ConstantFPSDNode>(Val: Node)->getValueAPF();
1065
1066 bool Is64Bit = Subtarget->is64Bit();
1067 bool HasZdinx = Subtarget->hasStdExtZdinx();
1068
1069 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1070 SDValue Imm;
1071 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1072 // create an integer immediate.
1073 if (APF.isPosZero() || NegZeroF64) {
1074 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1075 Imm = CurDAG->getRegister(Reg: RISCV::X0_Pair, VT: MVT::f64);
1076 else
1077 Imm = CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT);
1078 } else {
1079 Imm = selectImm(CurDAG, DL, VT: XLenVT, Imm: APF.bitcastToAPInt().getSExtValue(),
1080 Subtarget: *Subtarget);
1081 }
1082
1083 unsigned Opc;
1084 switch (VT.SimpleTy) {
1085 default:
1086 llvm_unreachable("Unexpected size");
1087 case MVT::bf16:
1088 assert(Subtarget->hasStdExtZfbfmin());
1089 Opc = RISCV::FMV_H_X;
1090 break;
1091 case MVT::f16:
1092 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1093 break;
1094 case MVT::f32:
1095 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1096 break;
1097 case MVT::f64:
1098 // For RV32, we can't move from a GPR, we need to convert instead. This
1099 // should only happen for +0.0 and -0.0.
1100 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1101 if (HasZdinx)
1102 Opc = RISCV::COPY;
1103 else
1104 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1105 break;
1106 }
1107
1108 SDNode *Res;
1109 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1110 Res =
1111 CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_16, DL, VT, Operand: Imm).getNode();
1112 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1113 Res =
1114 CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_32, DL, VT, Operand: Imm).getNode();
1115 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1116 Res = CurDAG->getMachineNode(
1117 Opcode: Opc, dl: DL, VT, Op1: Imm,
1118 Op2: CurDAG->getTargetConstant(Val: RISCVFPRndMode::RNE, DL, VT: XLenVT));
1119 else
1120 Res = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: Imm);
1121
1122 // For f64 -0.0, we need to insert a fneg.d idiom.
1123 if (NegZeroF64) {
1124 Opc = RISCV::FSGNJN_D;
1125 if (HasZdinx)
1126 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1127 Res =
1128 CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: SDValue(Res, 0), Op2: SDValue(Res, 0));
1129 }
1130
1131 ReplaceNode(F: Node, T: Res);
1132 return;
1133 }
1134 case RISCVISD::BuildGPRPair:
1135 case RISCVISD::BuildPairF64: {
1136 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1137 break;
1138
1139 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
1140 "BuildPairF64 only handled here on rv32i_zdinx");
1141
1142 SDValue Ops[] = {
1143 CurDAG->getTargetConstant(Val: RISCV::GPRPairRegClassID, DL, VT: MVT::i32),
1144 Node->getOperand(Num: 0),
1145 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_even, DL, VT: MVT::i32),
1146 Node->getOperand(Num: 1),
1147 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_odd, DL, VT: MVT::i32)};
1148
1149 SDNode *N = CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl: DL, VT, Ops);
1150 ReplaceNode(F: Node, T: N);
1151 return;
1152 }
1153 case RISCVISD::SplitGPRPair:
1154 case RISCVISD::SplitF64: {
1155 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1156 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
1157 "SplitF64 only handled here on rv32i_zdinx");
1158
1159 if (!SDValue(Node, 0).use_empty()) {
1160 SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_even, DL,
1161 VT: Node->getValueType(ResNo: 0),
1162 Operand: Node->getOperand(Num: 0));
1163 ReplaceUses(F: SDValue(Node, 0), T: Lo);
1164 }
1165
1166 if (!SDValue(Node, 1).use_empty()) {
1167 SDValue Hi = CurDAG->getTargetExtractSubreg(
1168 SRIdx: RISCV::sub_gpr_odd, DL, VT: Node->getValueType(ResNo: 1), Operand: Node->getOperand(Num: 0));
1169 ReplaceUses(F: SDValue(Node, 1), T: Hi);
1170 }
1171
1172 CurDAG->RemoveDeadNode(N: Node);
1173 return;
1174 }
1175
1176 assert(Opcode != RISCVISD::SplitGPRPair &&
1177 "SplitGPRPair should already be handled");
1178
1179 if (!Subtarget->hasStdExtZfa())
1180 break;
1181 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1182 "Unexpected subtarget");
1183
1184 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1185 if (!SDValue(Node, 0).use_empty()) {
1186 SDNode *Lo = CurDAG->getMachineNode(Opcode: RISCV::FMV_X_W_FPR64, dl: DL, VT,
1187 Op1: Node->getOperand(Num: 0));
1188 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Lo, 0));
1189 }
1190 if (!SDValue(Node, 1).use_empty()) {
1191 SDNode *Hi = CurDAG->getMachineNode(Opcode: RISCV::FMVH_X_D, dl: DL, VT,
1192 Op1: Node->getOperand(Num: 0));
1193 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Hi, 0));
1194 }
1195
1196 CurDAG->RemoveDeadNode(N: Node);
1197 return;
1198 }
1199 case ISD::SHL: {
1200 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1201 if (!N1C)
1202 break;
1203 SDValue N0 = Node->getOperand(Num: 0);
1204 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1205 !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)))
1206 break;
1207 unsigned ShAmt = N1C->getZExtValue();
1208 uint64_t Mask = N0.getConstantOperandVal(i: 1);
1209
1210 if (isShiftedMask_64(Value: Mask)) {
1211 unsigned XLen = Subtarget->getXLen();
1212 unsigned LeadingZeros = XLen - llvm::bit_width(Value: Mask);
1213 unsigned TrailingZeros = llvm::countr_zero(Val: Mask);
1214 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1215 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1216 // where C2 has 32 leading zeros and C3 trailing zeros.
1217 SDNode *SRLIW = CurDAG->getMachineNode(
1218 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
1219 Op2: CurDAG->getTargetConstant(Val: TrailingZeros, DL, VT));
1220 SDNode *SLLI = CurDAG->getMachineNode(
1221 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1222 Op2: CurDAG->getTargetConstant(Val: TrailingZeros + ShAmt, DL, VT));
1223 ReplaceNode(F: Node, T: SLLI);
1224 return;
1225 }
1226 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1227 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1228 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1229 // where C2 has C4 leading zeros and no trailing zeros.
1230 // This is profitable if the "and" was to be lowered to
1231 // (srli (slli X, C4), C4) and not (andi X, C2).
1232 // For "LeadingZeros == 32":
1233 // - with Zba it's just (slli.uw X, C)
1234 // - without Zba a tablegen pattern applies the very same
1235 // transform as we would have done here
1236 SDNode *SLLI = CurDAG->getMachineNode(
1237 Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1238 Op2: CurDAG->getTargetConstant(Val: LeadingZeros, DL, VT));
1239 SDNode *SRLI = CurDAG->getMachineNode(
1240 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1241 Op2: CurDAG->getTargetConstant(Val: LeadingZeros - ShAmt, DL, VT));
1242 ReplaceNode(F: Node, T: SRLI);
1243 return;
1244 }
1245 }
1246 break;
1247 }
1248 case ISD::SRL: {
1249 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1250 if (!N1C)
1251 break;
1252 SDValue N0 = Node->getOperand(Num: 0);
1253 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)))
1254 break;
1255 unsigned ShAmt = N1C->getZExtValue();
1256 uint64_t Mask = N0.getConstantOperandVal(i: 1);
1257
1258 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1259 // 32 leading zeros and C3 trailing zeros.
1260 if (isShiftedMask_64(Value: Mask) && N0.hasOneUse()) {
1261 unsigned XLen = Subtarget->getXLen();
1262 unsigned LeadingZeros = XLen - llvm::bit_width(Value: Mask);
1263 unsigned TrailingZeros = llvm::countr_zero(Val: Mask);
1264 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1265 SDNode *SRLIW = CurDAG->getMachineNode(
1266 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
1267 Op2: CurDAG->getTargetConstant(Val: TrailingZeros, DL, VT));
1268 SDNode *SLLI = CurDAG->getMachineNode(
1269 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1270 Op2: CurDAG->getTargetConstant(Val: TrailingZeros - ShAmt, DL, VT));
1271 ReplaceNode(F: Node, T: SLLI);
1272 return;
1273 }
1274 }
1275
1276 // Optimize (srl (and X, C2), C) ->
1277 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1278 // Where C2 is a mask with C3 trailing ones.
1279 // Taking into account that the C2 may have had lower bits unset by
1280 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1281 // This pattern occurs when type legalizing right shifts for types with
1282 // less than XLen bits.
1283 Mask |= maskTrailingOnes<uint64_t>(N: ShAmt);
1284 if (!isMask_64(Value: Mask))
1285 break;
1286 unsigned TrailingOnes = llvm::countr_one(Value: Mask);
1287 if (ShAmt >= TrailingOnes)
1288 break;
1289 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1290 if (TrailingOnes == 32) {
1291 SDNode *SRLI = CurDAG->getMachineNode(
1292 Opcode: Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, dl: DL, VT,
1293 Op1: N0.getOperand(i: 0), Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT));
1294 ReplaceNode(F: Node, T: SRLI);
1295 return;
1296 }
1297
1298 // Only do the remaining transforms if the AND has one use.
1299 if (!N0.hasOneUse())
1300 break;
1301
1302 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1303 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1304 SDNode *BEXTI = CurDAG->getMachineNode(
1305 Opcode: Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, dl: DL, VT,
1306 Op1: N0.getOperand(i: 0), Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT));
1307 ReplaceNode(F: Node, T: BEXTI);
1308 return;
1309 }
1310
1311 const unsigned Msb = TrailingOnes - 1;
1312 const unsigned Lsb = ShAmt;
1313 if (tryUnsignedBitfieldExtract(Node, DL, VT, X: N0.getOperand(i: 0), Msb, Lsb))
1314 return;
1315
1316 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1317 SDNode *SLLI =
1318 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1319 Op2: CurDAG->getTargetConstant(Val: LShAmt, DL, VT));
1320 SDNode *SRLI = CurDAG->getMachineNode(
1321 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1322 Op2: CurDAG->getTargetConstant(Val: LShAmt + ShAmt, DL, VT));
1323 ReplaceNode(F: Node, T: SRLI);
1324 return;
1325 }
1326 case ISD::SRA: {
1327 if (trySignedBitfieldExtract(Node))
1328 return;
1329
1330 if (trySignedBitfieldInsertInSign(Node))
1331 return;
1332
1333 // Optimize (sra (sext_inreg X, i16), C) ->
1334 // (srai (slli X, (XLen-16), (XLen-16) + C)
1335 // And (sra (sext_inreg X, i8), C) ->
1336 // (srai (slli X, (XLen-8), (XLen-8) + C)
1337 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1338 // This transform matches the code we get without Zbb. The shifts are more
1339 // compressible, and this can help expose CSE opportunities in the sdiv by
1340 // constant optimization.
1341 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1342 if (!N1C)
1343 break;
1344 SDValue N0 = Node->getOperand(Num: 0);
1345 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1346 break;
1347 unsigned ShAmt = N1C->getZExtValue();
1348 unsigned ExtSize =
1349 cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT().getSizeInBits();
1350 // ExtSize of 32 should use sraiw via tablegen pattern.
1351 if (ExtSize >= 32 || ShAmt >= ExtSize)
1352 break;
1353 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1354 SDNode *SLLI =
1355 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1356 Op2: CurDAG->getTargetConstant(Val: LShAmt, DL, VT));
1357 SDNode *SRAI = CurDAG->getMachineNode(
1358 Opcode: RISCV::SRAI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1359 Op2: CurDAG->getTargetConstant(Val: LShAmt + ShAmt, DL, VT));
1360 ReplaceNode(F: Node, T: SRAI);
1361 return;
1362 }
1363 case ISD::OR: {
1364 if (tryShrinkShlLogicImm(Node))
1365 return;
1366
1367 break;
1368 }
1369 case ISD::XOR:
1370 if (tryShrinkShlLogicImm(Node))
1371 return;
1372
1373 break;
1374 case ISD::AND: {
1375 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1376 if (!N1C)
1377 break;
1378
1379 SDValue N0 = Node->getOperand(Num: 0);
1380
1381 bool LeftShift = N0.getOpcode() == ISD::SHL;
1382 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1383 auto *C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
1384 if (!C)
1385 break;
1386 unsigned C2 = C->getZExtValue();
1387 unsigned XLen = Subtarget->getXLen();
1388 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1389
1390 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1391 // shift pair might offer more compression opportunities.
1392 // TODO: We could check for C extension here, but we don't have many lit
1393 // tests with the C extension enabled so not checking gets better
1394 // coverage.
1395 // TODO: What if ANDI faster than shift?
1396 bool IsCANDI = isInt<6>(x: N1C->getSExtValue());
1397
1398 uint64_t C1 = N1C->getZExtValue();
1399
1400 // Clear irrelevant bits in the mask.
1401 if (LeftShift)
1402 C1 &= maskTrailingZeros<uint64_t>(N: C2);
1403 else
1404 C1 &= maskTrailingOnes<uint64_t>(N: XLen - C2);
1405
1406 // Some transforms should only be done if the shift has a single use or
1407 // the AND would become (srli (slli X, 32), 32)
1408 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1409
1410 SDValue X = N0.getOperand(i: 0);
1411
1412 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1413 // with c3 leading zeros.
1414 if (!LeftShift && isMask_64(Value: C1)) {
1415 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1416 if (C2 < Leading) {
1417 // If the number of leading zeros is C2+32 this can be SRLIW.
1418 if (C2 + 32 == Leading) {
1419 SDNode *SRLIW = CurDAG->getMachineNode(
1420 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X, Op2: CurDAG->getTargetConstant(Val: C2, DL, VT));
1421 ReplaceNode(F: Node, T: SRLIW);
1422 return;
1423 }
1424
1425 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1426 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1427 //
1428 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1429 // legalized and goes through DAG combine.
1430 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1431 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1432 cast<VTSDNode>(Val: X.getOperand(i: 1))->getVT() == MVT::i32) {
1433 SDNode *SRAIW =
1434 CurDAG->getMachineNode(Opcode: RISCV::SRAIW, dl: DL, VT, Op1: X.getOperand(i: 0),
1435 Op2: CurDAG->getTargetConstant(Val: 31, DL, VT));
1436 SDNode *SRLIW = CurDAG->getMachineNode(
1437 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: SDValue(SRAIW, 0),
1438 Op2: CurDAG->getTargetConstant(Val: Leading - 32, DL, VT));
1439 ReplaceNode(F: Node, T: SRLIW);
1440 return;
1441 }
1442
1443 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1444 // available.
1445 // Transform (and (srl x, C2), C1)
1446 // -> (<bfextract> x, msb, lsb)
1447 //
1448 // Make sure to keep this below the SRLIW cases, as we always want to
1449 // prefer the more common instruction.
1450 const unsigned Msb = llvm::bit_width(Value: C1) + C2 - 1;
1451 const unsigned Lsb = C2;
1452 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1453 return;
1454
1455 // (srli (slli x, c3-c2), c3).
1456 // Skip if we could use (zext.w (sraiw X, C2)).
1457 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1458 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1459 cast<VTSDNode>(Val: X.getOperand(i: 1))->getVT() == MVT::i32;
1460 // Also Skip if we can use bexti or th.tst.
1461 Skip |= HasBitTest && Leading == XLen - 1;
1462 if (OneUseOrZExtW && !Skip) {
1463 SDNode *SLLI = CurDAG->getMachineNode(
1464 Opcode: RISCV::SLLI, dl: DL, VT, Op1: X,
1465 Op2: CurDAG->getTargetConstant(Val: Leading - C2, DL, VT));
1466 SDNode *SRLI = CurDAG->getMachineNode(
1467 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1468 Op2: CurDAG->getTargetConstant(Val: Leading, DL, VT));
1469 ReplaceNode(F: Node, T: SRLI);
1470 return;
1471 }
1472 }
1473 }
1474
1475 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1476 // shifted by c2 bits with c3 leading zeros.
1477 if (LeftShift && isShiftedMask_64(Value: C1)) {
1478 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1479
1480 if (C2 + Leading < XLen &&
1481 C1 == (maskTrailingOnes<uint64_t>(N: XLen - (C2 + Leading)) << C2)) {
1482 // Use slli.uw when possible.
1483 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1484 SDNode *SLLI_UW =
1485 CurDAG->getMachineNode(Opcode: RISCV::SLLI_UW, dl: DL, VT, Op1: X,
1486 Op2: CurDAG->getTargetConstant(Val: C2, DL, VT));
1487 ReplaceNode(F: Node, T: SLLI_UW);
1488 return;
1489 }
1490
1491 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1492 // available.
1493 // Transform (and (shl x, c2), c1)
1494 // -> (<bfinsert> x, msb, lsb)
1495 // e.g.
1496 // (and (shl x, 12), 0x00fff000)
1497 // If XLen = 32 and C2 = 12, then
1498 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1499 const unsigned Msb = XLen - Leading - 1;
1500 const unsigned Lsb = C2;
1501 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1502 return;
1503
1504 if (OneUseOrZExtW && !IsCANDI) {
1505 // (packh x0, X)
1506 if (Subtarget->hasStdExtZbkb() && C1 == 0xff00 && C2 == 8) {
1507 SDNode *PACKH = CurDAG->getMachineNode(
1508 Opcode: RISCV::PACKH, dl: DL, VT,
1509 Op1: CurDAG->getRegister(Reg: RISCV::X0, VT: Subtarget->getXLenVT()), Op2: X);
1510 ReplaceNode(F: Node, T: PACKH);
1511 return;
1512 }
1513 // (srli (slli c2+c3), c3)
1514 SDNode *SLLI = CurDAG->getMachineNode(
1515 Opcode: RISCV::SLLI, dl: DL, VT, Op1: X,
1516 Op2: CurDAG->getTargetConstant(Val: C2 + Leading, DL, VT));
1517 SDNode *SRLI = CurDAG->getMachineNode(
1518 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1519 Op2: CurDAG->getTargetConstant(Val: Leading, DL, VT));
1520 ReplaceNode(F: Node, T: SRLI);
1521 return;
1522 }
1523 }
1524 }
1525
1526 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1527 // shifted mask with c2 leading zeros and c3 trailing zeros.
1528 if (!LeftShift && isShiftedMask_64(Value: C1)) {
1529 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1530 unsigned Trailing = llvm::countr_zero(Val: C1);
1531 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1532 !IsCANDI) {
1533 unsigned SrliOpc = RISCV::SRLI;
1534 // If the input is zexti32 we should use SRLIW.
1535 if (X.getOpcode() == ISD::AND &&
1536 isa<ConstantSDNode>(Val: X.getOperand(i: 1)) &&
1537 X.getConstantOperandVal(i: 1) == UINT64_C(0xFFFFFFFF)) {
1538 SrliOpc = RISCV::SRLIW;
1539 X = X.getOperand(i: 0);
1540 }
1541 SDNode *SRLI = CurDAG->getMachineNode(
1542 Opcode: SrliOpc, dl: DL, VT, Op1: X,
1543 Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT));
1544 SDNode *SLLI = CurDAG->getMachineNode(
1545 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLI, 0),
1546 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1547 ReplaceNode(F: Node, T: SLLI);
1548 return;
1549 }
1550 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1551 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1552 OneUseOrZExtW && !IsCANDI) {
1553 SDNode *SRLIW = CurDAG->getMachineNode(
1554 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X,
1555 Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT));
1556 SDNode *SLLI = CurDAG->getMachineNode(
1557 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1558 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1559 ReplaceNode(F: Node, T: SLLI);
1560 return;
1561 }
1562 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1563 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1564 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1565 SDNode *SRLI = CurDAG->getMachineNode(
1566 Opcode: RISCV::SRLI, dl: DL, VT, Op1: X,
1567 Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT));
1568 SDNode *SLLI_UW = CurDAG->getMachineNode(
1569 Opcode: RISCV::SLLI_UW, dl: DL, VT, Op1: SDValue(SRLI, 0),
1570 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1571 ReplaceNode(F: Node, T: SLLI_UW);
1572 return;
1573 }
1574 }
1575
1576 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1577 // shifted mask with no leading zeros and c3 trailing zeros.
1578 if (LeftShift && isShiftedMask_64(Value: C1)) {
1579 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1580 unsigned Trailing = llvm::countr_zero(Val: C1);
1581 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1582 SDNode *SRLI = CurDAG->getMachineNode(
1583 Opcode: RISCV::SRLI, dl: DL, VT, Op1: X,
1584 Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT));
1585 SDNode *SLLI = CurDAG->getMachineNode(
1586 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLI, 0),
1587 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1588 ReplaceNode(F: Node, T: SLLI);
1589 return;
1590 }
1591 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1592 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1593 SDNode *SRLIW = CurDAG->getMachineNode(
1594 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X,
1595 Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT));
1596 SDNode *SLLI = CurDAG->getMachineNode(
1597 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1598 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1599 ReplaceNode(F: Node, T: SLLI);
1600 return;
1601 }
1602
1603 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1604 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1605 Subtarget->hasStdExtZba()) {
1606 SDNode *SRLI = CurDAG->getMachineNode(
1607 Opcode: RISCV::SRLI, dl: DL, VT, Op1: X,
1608 Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT));
1609 SDNode *SLLI_UW = CurDAG->getMachineNode(
1610 Opcode: RISCV::SLLI_UW, dl: DL, VT, Op1: SDValue(SRLI, 0),
1611 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1612 ReplaceNode(F: Node, T: SLLI_UW);
1613 return;
1614 }
1615 }
1616 }
1617
1618 const uint64_t C1 = N1C->getZExtValue();
1619
1620 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
1621 N0.hasOneUse()) {
1622 unsigned C2 = N0.getConstantOperandVal(i: 1);
1623 unsigned XLen = Subtarget->getXLen();
1624 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1625
1626 SDValue X = N0.getOperand(i: 0);
1627
1628 // Prefer SRAIW + ANDI when possible.
1629 bool Skip = C2 > 32 && isInt<12>(x: N1C->getSExtValue()) &&
1630 X.getOpcode() == ISD::SHL &&
1631 isa<ConstantSDNode>(Val: X.getOperand(i: 1)) &&
1632 X.getConstantOperandVal(i: 1) == 32;
1633 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1634 // mask with c3 leading zeros and c2 is larger than c3.
1635 if (isMask_64(Value: C1) && !Skip) {
1636 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1637 if (C2 > Leading) {
1638 SDNode *SRAI = CurDAG->getMachineNode(
1639 Opcode: RISCV::SRAI, dl: DL, VT, Op1: X,
1640 Op2: CurDAG->getTargetConstant(Val: C2 - Leading, DL, VT));
1641 SDNode *SRLI = CurDAG->getMachineNode(
1642 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SRAI, 0),
1643 Op2: CurDAG->getTargetConstant(Val: Leading, DL, VT));
1644 ReplaceNode(F: Node, T: SRLI);
1645 return;
1646 }
1647 }
1648
1649 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1650 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1651 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1652 if (isShiftedMask_64(Value: C1) && !Skip) {
1653 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1654 unsigned Trailing = llvm::countr_zero(Val: C1);
1655 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1656 SDNode *SRAI = CurDAG->getMachineNode(
1657 Opcode: RISCV::SRAI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1658 Op2: CurDAG->getTargetConstant(Val: C2 - Leading, DL, VT));
1659 SDNode *SRLI = CurDAG->getMachineNode(
1660 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SRAI, 0),
1661 Op2: CurDAG->getTargetConstant(Val: Leading + Trailing, DL, VT));
1662 SDNode *SLLI = CurDAG->getMachineNode(
1663 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLI, 0),
1664 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1665 ReplaceNode(F: Node, T: SLLI);
1666 return;
1667 }
1668 }
1669 }
1670
1671 // If C1 masks off the upper bits only (but can't be formed as an
1672 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1673 // available.
1674 // Transform (and x, C1)
1675 // -> (<bfextract> x, msb, lsb)
1676 if (isMask_64(Value: C1) && !isInt<12>(x: N1C->getSExtValue()) &&
1677 !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
1678 !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
1679 const unsigned Msb = llvm::bit_width(Value: C1) - 1;
1680 if (tryUnsignedBitfieldExtract(Node, DL, VT, X: N0, Msb, Lsb: 0))
1681 return;
1682 }
1683
1684 if (tryShrinkShlLogicImm(Node))
1685 return;
1686
1687 break;
1688 }
1689 case ISD::MUL: {
1690 // Special case for calculating (mul (and X, C2), C1) where the full product
1691 // fits in XLen bits. We can shift X left by the number of leading zeros in
1692 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1693 // product has XLen trailing zeros, putting it in the output of MULHU. This
1694 // can avoid materializing a constant in a register for C2.
1695
1696 // RHS should be a constant.
1697 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1698 if (!N1C || !N1C->hasOneUse())
1699 break;
1700
1701 // LHS should be an AND with constant.
1702 SDValue N0 = Node->getOperand(Num: 0);
1703 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)))
1704 break;
1705
1706 uint64_t C2 = N0.getConstantOperandVal(i: 1);
1707
1708 // Constant should be a mask.
1709 if (!isMask_64(Value: C2))
1710 break;
1711
1712 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1713 // multiple users or the constant is a simm12. This prevents inserting a
1714 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1715 // make it more costly to materialize. Otherwise, using a SLLI might allow
1716 // it to be compressed.
1717 bool IsANDIOrZExt =
1718 isInt<12>(x: C2) ||
1719 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1720 // With XTHeadBb, we can use TH.EXTU.
1721 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1722 if (IsANDIOrZExt && (isInt<12>(x: N1C->getSExtValue()) || !N0.hasOneUse()))
1723 break;
1724 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1725 // the constant is a simm32.
1726 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1727 // With XTHeadBb, we can use TH.EXTU.
1728 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1729 if (IsZExtW && (isInt<32>(x: N1C->getSExtValue()) || !N0.hasOneUse()))
1730 break;
1731
1732 // We need to shift left the AND input and C1 by a total of XLen bits.
1733
1734 // How far left do we need to shift the AND input?
1735 unsigned XLen = Subtarget->getXLen();
1736 unsigned LeadingZeros = XLen - llvm::bit_width(Value: C2);
1737
1738 // The constant gets shifted by the remaining amount unless that would
1739 // shift bits out.
1740 uint64_t C1 = N1C->getZExtValue();
1741 unsigned ConstantShift = XLen - LeadingZeros;
1742 if (ConstantShift > (XLen - llvm::bit_width(Value: C1)))
1743 break;
1744
1745 uint64_t ShiftedC1 = C1 << ConstantShift;
1746 // If this RV32, we need to sign extend the constant.
1747 if (XLen == 32)
1748 ShiftedC1 = SignExtend64<32>(x: ShiftedC1);
1749
1750 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1751 SDNode *Imm = selectImm(CurDAG, DL, VT, Imm: ShiftedC1, Subtarget: *Subtarget).getNode();
1752 SDNode *SLLI =
1753 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1754 Op2: CurDAG->getTargetConstant(Val: LeadingZeros, DL, VT));
1755 SDNode *MULHU = CurDAG->getMachineNode(Opcode: RISCV::MULHU, dl: DL, VT,
1756 Op1: SDValue(SLLI, 0), Op2: SDValue(Imm, 0));
1757 ReplaceNode(F: Node, T: MULHU);
1758 return;
1759 }
1760 case ISD::LOAD: {
1761 if (tryIndexedLoad(Node))
1762 return;
1763
1764 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1765 // We match post-incrementing load here
1766 LoadSDNode *Load = cast<LoadSDNode>(Val: Node);
1767 if (Load->getAddressingMode() != ISD::POST_INC)
1768 break;
1769
1770 SDValue Chain = Node->getOperand(Num: 0);
1771 SDValue Base = Node->getOperand(Num: 1);
1772 SDValue Offset = Node->getOperand(Num: 2);
1773
1774 bool Simm12 = false;
1775 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1776
1777 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Val&: Offset)) {
1778 int ConstantVal = ConstantOffset->getSExtValue();
1779 Simm12 = isInt<12>(x: ConstantVal);
1780 if (Simm12)
1781 Offset = CurDAG->getTargetConstant(Val: ConstantVal, DL: SDLoc(Offset),
1782 VT: Offset.getValueType());
1783 }
1784
1785 unsigned Opcode = 0;
1786 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1787 case MVT::i8:
1788 if (Simm12 && SignExtend)
1789 Opcode = RISCV::CV_LB_ri_inc;
1790 else if (Simm12 && !SignExtend)
1791 Opcode = RISCV::CV_LBU_ri_inc;
1792 else if (!Simm12 && SignExtend)
1793 Opcode = RISCV::CV_LB_rr_inc;
1794 else
1795 Opcode = RISCV::CV_LBU_rr_inc;
1796 break;
1797 case MVT::i16:
1798 if (Simm12 && SignExtend)
1799 Opcode = RISCV::CV_LH_ri_inc;
1800 else if (Simm12 && !SignExtend)
1801 Opcode = RISCV::CV_LHU_ri_inc;
1802 else if (!Simm12 && SignExtend)
1803 Opcode = RISCV::CV_LH_rr_inc;
1804 else
1805 Opcode = RISCV::CV_LHU_rr_inc;
1806 break;
1807 case MVT::i32:
1808 if (Simm12)
1809 Opcode = RISCV::CV_LW_ri_inc;
1810 else
1811 Opcode = RISCV::CV_LW_rr_inc;
1812 break;
1813 default:
1814 break;
1815 }
1816 if (!Opcode)
1817 break;
1818
1819 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode, dl: DL, VT1: XLenVT, VT2: XLenVT,
1820 VT3: Chain.getSimpleValueType(), Op1: Base,
1821 Op2: Offset, Op3: Chain));
1822 return;
1823 }
1824 break;
1825 }
1826 case RISCVISD::LD_RV32: {
1827 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
1828
1829 SDValue Base, Offset;
1830 SDValue Chain = Node->getOperand(Num: 0);
1831 SDValue Addr = Node->getOperand(Num: 1);
1832 SelectAddrRegImm(Addr, Base, Offset);
1833
1834 SDValue Ops[] = {Base, Offset, Chain};
1835 MachineSDNode *New = CurDAG->getMachineNode(
1836 Opcode: RISCV::LD_RV32, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
1837 SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_even, DL,
1838 VT: MVT::i32, Operand: SDValue(New, 0));
1839 SDValue Hi = CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_odd, DL,
1840 VT: MVT::i32, Operand: SDValue(New, 0));
1841 CurDAG->setNodeMemRefs(N: New, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
1842 ReplaceUses(F: SDValue(Node, 0), T: Lo);
1843 ReplaceUses(F: SDValue(Node, 1), T: Hi);
1844 ReplaceUses(F: SDValue(Node, 2), T: SDValue(New, 1));
1845 CurDAG->RemoveDeadNode(N: Node);
1846 return;
1847 }
1848 case RISCVISD::SD_RV32: {
1849 SDValue Base, Offset;
1850 SDValue Chain = Node->getOperand(Num: 0);
1851 SDValue Addr = Node->getOperand(Num: 3);
1852 SelectAddrRegImm(Addr, Base, Offset);
1853
1854 SDValue Lo = Node->getOperand(Num: 1);
1855 SDValue Hi = Node->getOperand(Num: 2);
1856
1857 SDValue RegPair;
1858 // Peephole to use X0_Pair for storing zero.
1859 if (isNullConstant(V: Lo) && isNullConstant(V: Hi)) {
1860 RegPair = CurDAG->getRegister(Reg: RISCV::X0_Pair, VT: MVT::Untyped);
1861 } else {
1862 SDValue Ops[] = {
1863 CurDAG->getTargetConstant(Val: RISCV::GPRPairRegClassID, DL, VT: MVT::i32), Lo,
1864 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_even, DL, VT: MVT::i32), Hi,
1865 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_odd, DL, VT: MVT::i32)};
1866
1867 RegPair = SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl: DL,
1868 VT: MVT::Untyped, Ops),
1869 0);
1870 }
1871
1872 MachineSDNode *New = CurDAG->getMachineNode(Opcode: RISCV::SD_RV32, dl: DL, VT: MVT::Other,
1873 Ops: {RegPair, Base, Offset, Chain});
1874 CurDAG->setNodeMemRefs(N: New, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
1875 ReplaceUses(F: SDValue(Node, 0), T: SDValue(New, 0));
1876 CurDAG->RemoveDeadNode(N: Node);
1877 return;
1878 }
1879 case RISCVISD::PPACK_DH: {
1880 assert(Subtarget->enablePExtSIMDCodeGen() && Subtarget->isRV32());
1881
1882 SDValue Val0 = Node->getOperand(Num: 0);
1883 SDValue Val1 = Node->getOperand(Num: 1);
1884 SDValue Val2 = Node->getOperand(Num: 2);
1885 SDValue Val3 = Node->getOperand(Num: 3);
1886
1887 SDValue Ops[] = {
1888 CurDAG->getTargetConstant(Val: RISCV::GPRPairRegClassID, DL, VT: MVT::i32), Val0,
1889 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_even, DL, VT: MVT::i32), Val2,
1890 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_odd, DL, VT: MVT::i32)};
1891 SDValue RegPair0 =
1892 SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl: DL,
1893 VT: MVT::Untyped, Ops),
1894 0);
1895 SDValue Ops1[] = {
1896 CurDAG->getTargetConstant(Val: RISCV::GPRPairRegClassID, DL, VT: MVT::i32), Val1,
1897 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_even, DL, VT: MVT::i32), Val3,
1898 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_odd, DL, VT: MVT::i32)};
1899 SDValue RegPair1 =
1900 SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl: DL,
1901 VT: MVT::Untyped, Ops: Ops1),
1902 0);
1903
1904 MachineSDNode *PackDH = CurDAG->getMachineNode(
1905 Opcode: RISCV::PPAIRE_DB, dl: DL, VT: MVT::Untyped, Ops: {RegPair0, RegPair1});
1906
1907 SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_even, DL,
1908 VT: MVT::i32, Operand: SDValue(PackDH, 0));
1909 SDValue Hi = CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_odd, DL,
1910 VT: MVT::i32, Operand: SDValue(PackDH, 0));
1911 ReplaceUses(F: SDValue(Node, 0), T: Lo);
1912 ReplaceUses(F: SDValue(Node, 1), T: Hi);
1913 CurDAG->RemoveDeadNode(N: Node);
1914 return;
1915 }
1916 case ISD::INTRINSIC_WO_CHAIN: {
1917 unsigned IntNo = Node->getConstantOperandVal(Num: 0);
1918 switch (IntNo) {
1919 // By default we do not custom select any intrinsic.
1920 default:
1921 break;
1922 case Intrinsic::riscv_vmsgeu:
1923 case Intrinsic::riscv_vmsge: {
1924 SDValue Src1 = Node->getOperand(Num: 1);
1925 SDValue Src2 = Node->getOperand(Num: 2);
1926 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1927 bool IsCmpConstant = false;
1928 bool IsCmpMinimum = false;
1929 // Only custom select scalar second operand.
1930 if (Src2.getValueType() != XLenVT)
1931 break;
1932 // Small constants are handled with patterns.
1933 int64_t CVal = 0;
1934 MVT Src1VT = Src1.getSimpleValueType();
1935 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Src2)) {
1936 IsCmpConstant = true;
1937 CVal = C->getSExtValue();
1938 if (CVal >= -15 && CVal <= 16) {
1939 if (!IsUnsigned || CVal != 0)
1940 break;
1941 IsCmpMinimum = true;
1942 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1943 numBits: Src1VT.getScalarSizeInBits())
1944 .getSExtValue()) {
1945 IsCmpMinimum = true;
1946 }
1947 }
1948 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
1949 switch (RISCVTargetLowering::getLMUL(VT: Src1VT)) {
1950 default:
1951 llvm_unreachable("Unexpected LMUL!");
1952#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1953 case RISCVVType::lmulenum: \
1954 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1955 : RISCV::PseudoVMSLT_VX_##suffix; \
1956 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
1957 : RISCV::PseudoVMSGT_VX_##suffix; \
1958 break;
1959 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1960 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1961 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1962 CASE_VMSLT_OPCODES(LMUL_1, M1)
1963 CASE_VMSLT_OPCODES(LMUL_2, M2)
1964 CASE_VMSLT_OPCODES(LMUL_4, M4)
1965 CASE_VMSLT_OPCODES(LMUL_8, M8)
1966#undef CASE_VMSLT_OPCODES
1967 }
1968 // Mask operations use the LMUL from the mask type.
1969 switch (RISCVTargetLowering::getLMUL(VT)) {
1970 default:
1971 llvm_unreachable("Unexpected LMUL!");
1972#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
1973 case RISCVVType::lmulenum: \
1974 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1975 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
1976 break;
1977 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
1978 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
1979 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
1980 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
1981 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
1982 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
1983 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
1984#undef CASE_VMNAND_VMSET_OPCODES
1985 }
1986 SDValue SEW = CurDAG->getTargetConstant(
1987 Val: Log2_32(Value: Src1VT.getScalarSizeInBits()), DL, VT: XLenVT);
1988 SDValue MaskSEW = CurDAG->getTargetConstant(Val: 0, DL, VT: XLenVT);
1989 SDValue VL;
1990 selectVLOp(N: Node->getOperand(Num: 3), VL);
1991
1992 // If vmsge(u) with minimum value, expand it to vmset.
1993 if (IsCmpMinimum) {
1994 ReplaceNode(F: Node,
1995 T: CurDAG->getMachineNode(Opcode: VMSetOpcode, dl: DL, VT, Op1: VL, Op2: MaskSEW));
1996 return;
1997 }
1998
1999 if (IsCmpConstant) {
2000 SDValue Imm =
2001 selectImm(CurDAG, DL: SDLoc(Src2), VT: XLenVT, Imm: CVal - 1, Subtarget: *Subtarget);
2002
2003 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMSGTOpcode, dl: DL, VT,
2004 Ops: {Src1, Imm, VL, SEW}));
2005 return;
2006 }
2007
2008 // Expand to
2009 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
2010 SDValue Cmp = SDValue(
2011 CurDAG->getMachineNode(Opcode: VMSLTOpcode, dl: DL, VT, Ops: {Src1, Src2, VL, SEW}),
2012 0);
2013 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMNANDOpcode, dl: DL, VT,
2014 Ops: {Cmp, Cmp, VL, MaskSEW}));
2015 return;
2016 }
2017 case Intrinsic::riscv_vmsgeu_mask:
2018 case Intrinsic::riscv_vmsge_mask: {
2019 SDValue Src1 = Node->getOperand(Num: 2);
2020 SDValue Src2 = Node->getOperand(Num: 3);
2021 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
2022 bool IsCmpConstant = false;
2023 bool IsCmpMinimum = false;
2024 // Only custom select scalar second operand.
2025 if (Src2.getValueType() != XLenVT)
2026 break;
2027 // Small constants are handled with patterns.
2028 MVT Src1VT = Src1.getSimpleValueType();
2029 int64_t CVal = 0;
2030 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Src2)) {
2031 IsCmpConstant = true;
2032 CVal = C->getSExtValue();
2033 if (CVal >= -15 && CVal <= 16) {
2034 if (!IsUnsigned || CVal != 0)
2035 break;
2036 IsCmpMinimum = true;
2037 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2038 numBits: Src1VT.getScalarSizeInBits())
2039 .getSExtValue()) {
2040 IsCmpMinimum = true;
2041 }
2042 }
2043 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
2044 VMOROpcode, VMSGTMaskOpcode;
2045 switch (RISCVTargetLowering::getLMUL(VT: Src1VT)) {
2046 default:
2047 llvm_unreachable("Unexpected LMUL!");
2048#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2049 case RISCVVType::lmulenum: \
2050 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2051 : RISCV::PseudoVMSLT_VX_##suffix; \
2052 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
2053 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
2054 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
2055 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
2056 break;
2057 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2058 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2059 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2060 CASE_VMSLT_OPCODES(LMUL_1, M1)
2061 CASE_VMSLT_OPCODES(LMUL_2, M2)
2062 CASE_VMSLT_OPCODES(LMUL_4, M4)
2063 CASE_VMSLT_OPCODES(LMUL_8, M8)
2064#undef CASE_VMSLT_OPCODES
2065 }
2066 // Mask operations use the LMUL from the mask type.
2067 switch (RISCVTargetLowering::getLMUL(VT)) {
2068 default:
2069 llvm_unreachable("Unexpected LMUL!");
2070#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2071 case RISCVVType::lmulenum: \
2072 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2073 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2074 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2075 break;
2076 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2077 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2078 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2079 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, B8)
2080 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, B4)
2081 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, B2)
2082 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, B1)
2083#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2084 }
2085 SDValue SEW = CurDAG->getTargetConstant(
2086 Val: Log2_32(Value: Src1VT.getScalarSizeInBits()), DL, VT: XLenVT);
2087 SDValue MaskSEW = CurDAG->getTargetConstant(Val: 0, DL, VT: XLenVT);
2088 SDValue VL;
2089 selectVLOp(N: Node->getOperand(Num: 5), VL);
2090 SDValue MaskedOff = Node->getOperand(Num: 1);
2091 SDValue Mask = Node->getOperand(Num: 4);
2092
2093 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2094 if (IsCmpMinimum) {
2095 // We don't need vmor if the MaskedOff and the Mask are the same
2096 // value.
2097 if (Mask == MaskedOff) {
2098 ReplaceUses(F: Node, T: Mask.getNode());
2099 return;
2100 }
2101 ReplaceNode(F: Node,
2102 T: CurDAG->getMachineNode(Opcode: VMOROpcode, dl: DL, VT,
2103 Ops: {Mask, MaskedOff, VL, MaskSEW}));
2104 return;
2105 }
2106
2107 // If the MaskedOff value and the Mask are the same value use
2108 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2109 // This avoids needing to copy v0 to vd before starting the next sequence.
2110 if (Mask == MaskedOff) {
2111 SDValue Cmp = SDValue(
2112 CurDAG->getMachineNode(Opcode: VMSLTOpcode, dl: DL, VT, Ops: {Src1, Src2, VL, SEW}),
2113 0);
2114 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMANDNOpcode, dl: DL, VT,
2115 Ops: {Mask, Cmp, VL, MaskSEW}));
2116 return;
2117 }
2118
2119 SDValue PolicyOp =
2120 CurDAG->getTargetConstant(Val: RISCVVType::TAIL_AGNOSTIC, DL, VT: XLenVT);
2121
2122 if (IsCmpConstant) {
2123 SDValue Imm =
2124 selectImm(CurDAG, DL: SDLoc(Src2), VT: XLenVT, Imm: CVal - 1, Subtarget: *Subtarget);
2125
2126 ReplaceNode(F: Node, T: CurDAG->getMachineNode(
2127 Opcode: VMSGTMaskOpcode, dl: DL, VT,
2128 Ops: {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2129 return;
2130 }
2131
2132 // Otherwise use
2133 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2134 // The result is mask undisturbed.
2135 // We use the same instructions to emulate mask agnostic behavior, because
2136 // the agnostic result can be either undisturbed or all 1.
2137 SDValue Cmp = SDValue(CurDAG->getMachineNode(Opcode: VMSLTMaskOpcode, dl: DL, VT,
2138 Ops: {MaskedOff, Src1, Src2, Mask,
2139 VL, SEW, PolicyOp}),
2140 0);
2141 // vmxor.mm vd, vd, v0 is used to update active value.
2142 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMXOROpcode, dl: DL, VT,
2143 Ops: {Cmp, Mask, VL, MaskSEW}));
2144 return;
2145 }
2146 case Intrinsic::riscv_vsetvli:
2147 case Intrinsic::riscv_vsetvlimax:
2148 return selectVSETVLI(Node);
2149 case Intrinsic::riscv_sf_vsettnt:
2150 case Intrinsic::riscv_sf_vsettm:
2151 case Intrinsic::riscv_sf_vsettk:
2152 return selectXSfmmVSET(Node);
2153 }
2154 break;
2155 }
2156 case ISD::INTRINSIC_W_CHAIN: {
2157 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
2158 switch (IntNo) {
2159 // By default we do not custom select any intrinsic.
2160 default:
2161 break;
2162 case Intrinsic::riscv_vlseg2:
2163 case Intrinsic::riscv_vlseg3:
2164 case Intrinsic::riscv_vlseg4:
2165 case Intrinsic::riscv_vlseg5:
2166 case Intrinsic::riscv_vlseg6:
2167 case Intrinsic::riscv_vlseg7:
2168 case Intrinsic::riscv_vlseg8: {
2169 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2170 /*IsStrided*/ false);
2171 return;
2172 }
2173 case Intrinsic::riscv_vlseg2_mask:
2174 case Intrinsic::riscv_vlseg3_mask:
2175 case Intrinsic::riscv_vlseg4_mask:
2176 case Intrinsic::riscv_vlseg5_mask:
2177 case Intrinsic::riscv_vlseg6_mask:
2178 case Intrinsic::riscv_vlseg7_mask:
2179 case Intrinsic::riscv_vlseg8_mask: {
2180 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2181 /*IsStrided*/ false);
2182 return;
2183 }
2184 case Intrinsic::riscv_vlsseg2:
2185 case Intrinsic::riscv_vlsseg3:
2186 case Intrinsic::riscv_vlsseg4:
2187 case Intrinsic::riscv_vlsseg5:
2188 case Intrinsic::riscv_vlsseg6:
2189 case Intrinsic::riscv_vlsseg7:
2190 case Intrinsic::riscv_vlsseg8: {
2191 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2192 /*IsStrided*/ true);
2193 return;
2194 }
2195 case Intrinsic::riscv_vlsseg2_mask:
2196 case Intrinsic::riscv_vlsseg3_mask:
2197 case Intrinsic::riscv_vlsseg4_mask:
2198 case Intrinsic::riscv_vlsseg5_mask:
2199 case Intrinsic::riscv_vlsseg6_mask:
2200 case Intrinsic::riscv_vlsseg7_mask:
2201 case Intrinsic::riscv_vlsseg8_mask: {
2202 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2203 /*IsStrided*/ true);
2204 return;
2205 }
2206 case Intrinsic::riscv_vloxseg2:
2207 case Intrinsic::riscv_vloxseg3:
2208 case Intrinsic::riscv_vloxseg4:
2209 case Intrinsic::riscv_vloxseg5:
2210 case Intrinsic::riscv_vloxseg6:
2211 case Intrinsic::riscv_vloxseg7:
2212 case Intrinsic::riscv_vloxseg8:
2213 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2214 /*IsOrdered*/ true);
2215 return;
2216 case Intrinsic::riscv_vluxseg2:
2217 case Intrinsic::riscv_vluxseg3:
2218 case Intrinsic::riscv_vluxseg4:
2219 case Intrinsic::riscv_vluxseg5:
2220 case Intrinsic::riscv_vluxseg6:
2221 case Intrinsic::riscv_vluxseg7:
2222 case Intrinsic::riscv_vluxseg8:
2223 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2224 /*IsOrdered*/ false);
2225 return;
2226 case Intrinsic::riscv_vloxseg2_mask:
2227 case Intrinsic::riscv_vloxseg3_mask:
2228 case Intrinsic::riscv_vloxseg4_mask:
2229 case Intrinsic::riscv_vloxseg5_mask:
2230 case Intrinsic::riscv_vloxseg6_mask:
2231 case Intrinsic::riscv_vloxseg7_mask:
2232 case Intrinsic::riscv_vloxseg8_mask:
2233 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2234 /*IsOrdered*/ true);
2235 return;
2236 case Intrinsic::riscv_vluxseg2_mask:
2237 case Intrinsic::riscv_vluxseg3_mask:
2238 case Intrinsic::riscv_vluxseg4_mask:
2239 case Intrinsic::riscv_vluxseg5_mask:
2240 case Intrinsic::riscv_vluxseg6_mask:
2241 case Intrinsic::riscv_vluxseg7_mask:
2242 case Intrinsic::riscv_vluxseg8_mask:
2243 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2244 /*IsOrdered*/ false);
2245 return;
2246 case Intrinsic::riscv_vlseg8ff:
2247 case Intrinsic::riscv_vlseg7ff:
2248 case Intrinsic::riscv_vlseg6ff:
2249 case Intrinsic::riscv_vlseg5ff:
2250 case Intrinsic::riscv_vlseg4ff:
2251 case Intrinsic::riscv_vlseg3ff:
2252 case Intrinsic::riscv_vlseg2ff: {
2253 selectVLSEGFF(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false);
2254 return;
2255 }
2256 case Intrinsic::riscv_vlseg8ff_mask:
2257 case Intrinsic::riscv_vlseg7ff_mask:
2258 case Intrinsic::riscv_vlseg6ff_mask:
2259 case Intrinsic::riscv_vlseg5ff_mask:
2260 case Intrinsic::riscv_vlseg4ff_mask:
2261 case Intrinsic::riscv_vlseg3ff_mask:
2262 case Intrinsic::riscv_vlseg2ff_mask: {
2263 selectVLSEGFF(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true);
2264 return;
2265 }
2266 case Intrinsic::riscv_vloxei:
2267 case Intrinsic::riscv_vloxei_mask:
2268 case Intrinsic::riscv_vluxei:
2269 case Intrinsic::riscv_vluxei_mask: {
2270 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2271 IntNo == Intrinsic::riscv_vluxei_mask;
2272 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2273 IntNo == Intrinsic::riscv_vloxei_mask;
2274
2275 MVT VT = Node->getSimpleValueType(ResNo: 0);
2276 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2277
2278 unsigned CurOp = 2;
2279 SmallVector<SDValue, 8> Operands;
2280 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2281
2282 MVT IndexVT;
2283 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2284 /*IsStridedOrIndexed*/ true, Operands,
2285 /*IsLoad=*/true, IndexVT: &IndexVT);
2286
2287 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2288 "Element count mismatch");
2289
2290 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2291 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
2292 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
2293 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2294 reportFatalUsageError(reason: "The V extension does not support EEW=64 for "
2295 "index values when XLEN=32");
2296 }
2297 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2298 Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL),
2299 IndexLMUL: static_cast<unsigned>(IndexLMUL));
2300 MachineSDNode *Load =
2301 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2302
2303 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2304
2305 ReplaceNode(F: Node, T: Load);
2306 return;
2307 }
2308 case Intrinsic::riscv_vlm:
2309 case Intrinsic::riscv_vle:
2310 case Intrinsic::riscv_vle_mask:
2311 case Intrinsic::riscv_vlse:
2312 case Intrinsic::riscv_vlse_mask: {
2313 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2314 IntNo == Intrinsic::riscv_vlse_mask;
2315 bool IsStrided =
2316 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2317
2318 MVT VT = Node->getSimpleValueType(ResNo: 0);
2319 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2320
2321 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2322 // operand at the IR level. In pseudos, they have both policy and
2323 // passthru operand. The passthru operand is needed to track the
2324 // "tail undefined" state, and the policy is there just for
2325 // for consistency - it will always be "don't care" for the
2326 // unmasked form.
2327 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2328 unsigned CurOp = 2;
2329 SmallVector<SDValue, 8> Operands;
2330 if (HasPassthruOperand)
2331 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2332 else {
2333 // We eagerly lower to implicit_def (instead of undef), as we
2334 // otherwise fail to select nodes such as: nxv1i1 = undef
2335 SDNode *Passthru =
2336 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT);
2337 Operands.push_back(Elt: SDValue(Passthru, 0));
2338 }
2339 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
2340 Operands, /*IsLoad=*/true);
2341
2342 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2343 const RISCV::VLEPseudo *P =
2344 RISCV::getVLEPseudo(Masked: IsMasked, Strided: IsStrided, /*FF*/ false, Log2SEW,
2345 LMUL: static_cast<unsigned>(LMUL));
2346 MachineSDNode *Load =
2347 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2348
2349 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2350
2351 ReplaceNode(F: Node, T: Load);
2352 return;
2353 }
2354 case Intrinsic::riscv_vleff:
2355 case Intrinsic::riscv_vleff_mask: {
2356 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2357
2358 MVT VT = Node->getSimpleValueType(ResNo: 0);
2359 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2360
2361 unsigned CurOp = 2;
2362 SmallVector<SDValue, 7> Operands;
2363 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2364 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2365 /*IsStridedOrIndexed*/ false, Operands,
2366 /*IsLoad=*/true);
2367
2368 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2369 const RISCV::VLEPseudo *P =
2370 RISCV::getVLEPseudo(Masked: IsMasked, /*Strided*/ false, /*FF*/ true,
2371 Log2SEW, LMUL: static_cast<unsigned>(LMUL));
2372 MachineSDNode *Load = CurDAG->getMachineNode(
2373 Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2374 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2375
2376 ReplaceNode(F: Node, T: Load);
2377 return;
2378 }
2379 case Intrinsic::riscv_nds_vln:
2380 case Intrinsic::riscv_nds_vln_mask:
2381 case Intrinsic::riscv_nds_vlnu:
2382 case Intrinsic::riscv_nds_vlnu_mask: {
2383 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2384 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2385 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2386 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2387
2388 MVT VT = Node->getSimpleValueType(ResNo: 0);
2389 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2390 unsigned CurOp = 2;
2391 SmallVector<SDValue, 8> Operands;
2392
2393 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2394 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2395 /*IsStridedOrIndexed=*/false, Operands,
2396 /*IsLoad=*/true);
2397
2398 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2399 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2400 Masked: IsMasked, Unsigned: IsUnsigned, Log2SEW, LMUL: static_cast<unsigned>(LMUL));
2401 MachineSDNode *Load =
2402 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2403
2404 if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node))
2405 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()});
2406
2407 ReplaceNode(F: Node, T: Load);
2408 return;
2409 }
2410 }
2411 break;
2412 }
2413 case ISD::INTRINSIC_VOID: {
2414 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
2415 switch (IntNo) {
2416 case Intrinsic::riscv_vsseg2:
2417 case Intrinsic::riscv_vsseg3:
2418 case Intrinsic::riscv_vsseg4:
2419 case Intrinsic::riscv_vsseg5:
2420 case Intrinsic::riscv_vsseg6:
2421 case Intrinsic::riscv_vsseg7:
2422 case Intrinsic::riscv_vsseg8: {
2423 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2424 /*IsStrided*/ false);
2425 return;
2426 }
2427 case Intrinsic::riscv_vsseg2_mask:
2428 case Intrinsic::riscv_vsseg3_mask:
2429 case Intrinsic::riscv_vsseg4_mask:
2430 case Intrinsic::riscv_vsseg5_mask:
2431 case Intrinsic::riscv_vsseg6_mask:
2432 case Intrinsic::riscv_vsseg7_mask:
2433 case Intrinsic::riscv_vsseg8_mask: {
2434 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2435 /*IsStrided*/ false);
2436 return;
2437 }
2438 case Intrinsic::riscv_vssseg2:
2439 case Intrinsic::riscv_vssseg3:
2440 case Intrinsic::riscv_vssseg4:
2441 case Intrinsic::riscv_vssseg5:
2442 case Intrinsic::riscv_vssseg6:
2443 case Intrinsic::riscv_vssseg7:
2444 case Intrinsic::riscv_vssseg8: {
2445 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2446 /*IsStrided*/ true);
2447 return;
2448 }
2449 case Intrinsic::riscv_vssseg2_mask:
2450 case Intrinsic::riscv_vssseg3_mask:
2451 case Intrinsic::riscv_vssseg4_mask:
2452 case Intrinsic::riscv_vssseg5_mask:
2453 case Intrinsic::riscv_vssseg6_mask:
2454 case Intrinsic::riscv_vssseg7_mask:
2455 case Intrinsic::riscv_vssseg8_mask: {
2456 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2457 /*IsStrided*/ true);
2458 return;
2459 }
2460 case Intrinsic::riscv_vsoxseg2:
2461 case Intrinsic::riscv_vsoxseg3:
2462 case Intrinsic::riscv_vsoxseg4:
2463 case Intrinsic::riscv_vsoxseg5:
2464 case Intrinsic::riscv_vsoxseg6:
2465 case Intrinsic::riscv_vsoxseg7:
2466 case Intrinsic::riscv_vsoxseg8:
2467 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2468 /*IsOrdered*/ true);
2469 return;
2470 case Intrinsic::riscv_vsuxseg2:
2471 case Intrinsic::riscv_vsuxseg3:
2472 case Intrinsic::riscv_vsuxseg4:
2473 case Intrinsic::riscv_vsuxseg5:
2474 case Intrinsic::riscv_vsuxseg6:
2475 case Intrinsic::riscv_vsuxseg7:
2476 case Intrinsic::riscv_vsuxseg8:
2477 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2478 /*IsOrdered*/ false);
2479 return;
2480 case Intrinsic::riscv_vsoxseg2_mask:
2481 case Intrinsic::riscv_vsoxseg3_mask:
2482 case Intrinsic::riscv_vsoxseg4_mask:
2483 case Intrinsic::riscv_vsoxseg5_mask:
2484 case Intrinsic::riscv_vsoxseg6_mask:
2485 case Intrinsic::riscv_vsoxseg7_mask:
2486 case Intrinsic::riscv_vsoxseg8_mask:
2487 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2488 /*IsOrdered*/ true);
2489 return;
2490 case Intrinsic::riscv_vsuxseg2_mask:
2491 case Intrinsic::riscv_vsuxseg3_mask:
2492 case Intrinsic::riscv_vsuxseg4_mask:
2493 case Intrinsic::riscv_vsuxseg5_mask:
2494 case Intrinsic::riscv_vsuxseg6_mask:
2495 case Intrinsic::riscv_vsuxseg7_mask:
2496 case Intrinsic::riscv_vsuxseg8_mask:
2497 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2498 /*IsOrdered*/ false);
2499 return;
2500 case Intrinsic::riscv_vsoxei:
2501 case Intrinsic::riscv_vsoxei_mask:
2502 case Intrinsic::riscv_vsuxei:
2503 case Intrinsic::riscv_vsuxei_mask: {
2504 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2505 IntNo == Intrinsic::riscv_vsuxei_mask;
2506 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2507 IntNo == Intrinsic::riscv_vsoxei_mask;
2508
2509 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
2510 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2511
2512 unsigned CurOp = 2;
2513 SmallVector<SDValue, 8> Operands;
2514 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Store value.
2515
2516 MVT IndexVT;
2517 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2518 /*IsStridedOrIndexed*/ true, Operands,
2519 /*IsLoad=*/false, IndexVT: &IndexVT);
2520
2521 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2522 "Element count mismatch");
2523
2524 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2525 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
2526 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
2527 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2528 reportFatalUsageError(reason: "The V extension does not support EEW=64 for "
2529 "index values when XLEN=32");
2530 }
2531 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2532 Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW,
2533 LMUL: static_cast<unsigned>(LMUL), IndexLMUL: static_cast<unsigned>(IndexLMUL));
2534 MachineSDNode *Store =
2535 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2536
2537 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2538
2539 ReplaceNode(F: Node, T: Store);
2540 return;
2541 }
2542 case Intrinsic::riscv_vsm:
2543 case Intrinsic::riscv_vse:
2544 case Intrinsic::riscv_vse_mask:
2545 case Intrinsic::riscv_vsse:
2546 case Intrinsic::riscv_vsse_mask: {
2547 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2548 IntNo == Intrinsic::riscv_vsse_mask;
2549 bool IsStrided =
2550 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2551
2552 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
2553 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2554
2555 unsigned CurOp = 2;
2556 SmallVector<SDValue, 8> Operands;
2557 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Store value.
2558
2559 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
2560 Operands);
2561
2562 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2563 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2564 Masked: IsMasked, Strided: IsStrided, Log2SEW, LMUL: static_cast<unsigned>(LMUL));
2565 MachineSDNode *Store =
2566 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2567 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2568
2569 ReplaceNode(F: Node, T: Store);
2570 return;
2571 }
2572 case Intrinsic::riscv_sf_vc_x_se:
2573 case Intrinsic::riscv_sf_vc_i_se:
2574 selectSF_VC_X_SE(Node);
2575 return;
2576 case Intrinsic::riscv_sf_vlte8:
2577 case Intrinsic::riscv_sf_vlte16:
2578 case Intrinsic::riscv_sf_vlte32:
2579 case Intrinsic::riscv_sf_vlte64: {
2580 unsigned Log2SEW;
2581 unsigned PseudoInst;
2582 switch (IntNo) {
2583 case Intrinsic::riscv_sf_vlte8:
2584 PseudoInst = RISCV::PseudoSF_VLTE8;
2585 Log2SEW = 3;
2586 break;
2587 case Intrinsic::riscv_sf_vlte16:
2588 PseudoInst = RISCV::PseudoSF_VLTE16;
2589 Log2SEW = 4;
2590 break;
2591 case Intrinsic::riscv_sf_vlte32:
2592 PseudoInst = RISCV::PseudoSF_VLTE32;
2593 Log2SEW = 5;
2594 break;
2595 case Intrinsic::riscv_sf_vlte64:
2596 PseudoInst = RISCV::PseudoSF_VLTE64;
2597 Log2SEW = 6;
2598 break;
2599 }
2600
2601 SDValue SEWOp = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT);
2602 SDValue TWidenOp = CurDAG->getTargetConstant(Val: 1, DL, VT: XLenVT);
2603 SDValue Operands[] = {Node->getOperand(Num: 2),
2604 Node->getOperand(Num: 3),
2605 Node->getOperand(Num: 4),
2606 SEWOp,
2607 TWidenOp,
2608 Node->getOperand(Num: 0)};
2609
2610 MachineSDNode *TileLoad =
2611 CurDAG->getMachineNode(Opcode: PseudoInst, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2612 CurDAG->setNodeMemRefs(N: TileLoad,
2613 NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
2614
2615 ReplaceNode(F: Node, T: TileLoad);
2616 return;
2617 }
2618 case Intrinsic::riscv_sf_mm_s_s:
2619 case Intrinsic::riscv_sf_mm_s_u:
2620 case Intrinsic::riscv_sf_mm_u_s:
2621 case Intrinsic::riscv_sf_mm_u_u:
2622 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2623 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2624 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2625 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2626 case Intrinsic::riscv_sf_mm_f_f: {
2627 bool HasFRM = false;
2628 unsigned PseudoInst;
2629 switch (IntNo) {
2630 case Intrinsic::riscv_sf_mm_s_s:
2631 PseudoInst = RISCV::PseudoSF_MM_S_S;
2632 break;
2633 case Intrinsic::riscv_sf_mm_s_u:
2634 PseudoInst = RISCV::PseudoSF_MM_S_U;
2635 break;
2636 case Intrinsic::riscv_sf_mm_u_s:
2637 PseudoInst = RISCV::PseudoSF_MM_U_S;
2638 break;
2639 case Intrinsic::riscv_sf_mm_u_u:
2640 PseudoInst = RISCV::PseudoSF_MM_U_U;
2641 break;
2642 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2643 PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2;
2644 HasFRM = true;
2645 break;
2646 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2647 PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3;
2648 HasFRM = true;
2649 break;
2650 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2651 PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2;
2652 HasFRM = true;
2653 break;
2654 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2655 PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3;
2656 HasFRM = true;
2657 break;
2658 case Intrinsic::riscv_sf_mm_f_f:
2659 if (Node->getOperand(Num: 3).getValueType().getScalarType() == MVT::bf16)
2660 PseudoInst = RISCV::PseudoSF_MM_F_F_ALT;
2661 else
2662 PseudoInst = RISCV::PseudoSF_MM_F_F;
2663 HasFRM = true;
2664 break;
2665 }
2666 uint64_t TileNum = Node->getConstantOperandVal(Num: 2);
2667 SDValue Op1 = Node->getOperand(Num: 3);
2668 SDValue Op2 = Node->getOperand(Num: 4);
2669 MVT VT = Op1->getSimpleValueType(ResNo: 0);
2670 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2671 SDValue TmOp = Node->getOperand(Num: 5);
2672 SDValue TnOp = Node->getOperand(Num: 6);
2673 SDValue TkOp = Node->getOperand(Num: 7);
2674 SDValue TWidenOp = Node->getOperand(Num: 8);
2675 SDValue Chain = Node->getOperand(Num: 0);
2676
2677 // sf.mm.f.f with sew=32, twiden=2 is invalid
2678 if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 &&
2679 TWidenOp->getAsZExtVal() == 2)
2680 reportFatalUsageError(reason: "sf.mm.f.f doesn't support (sew=32, twiden=2)");
2681
2682 SmallVector<SDValue, 10> Operands(
2683 {CurDAG->getRegister(Reg: getTileReg(TileNum), VT: XLenVT), Op1, Op2});
2684 if (HasFRM)
2685 Operands.push_back(
2686 Elt: CurDAG->getTargetConstant(Val: RISCVFPRndMode::DYN, DL, VT: XLenVT));
2687 Operands.append(IL: {TmOp, TnOp, TkOp,
2688 CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT), TWidenOp,
2689 Chain});
2690
2691 auto *NewNode =
2692 CurDAG->getMachineNode(Opcode: PseudoInst, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2693
2694 ReplaceNode(F: Node, T: NewNode);
2695 return;
2696 }
2697 case Intrinsic::riscv_sf_vtzero_t: {
2698 uint64_t TileNum = Node->getConstantOperandVal(Num: 2);
2699 SDValue Tm = Node->getOperand(Num: 3);
2700 SDValue Tn = Node->getOperand(Num: 4);
2701 SDValue Log2SEW = Node->getOperand(Num: 5);
2702 SDValue TWiden = Node->getOperand(Num: 6);
2703 SDValue Chain = Node->getOperand(Num: 0);
2704 auto *NewNode = CurDAG->getMachineNode(
2705 Opcode: RISCV::PseudoSF_VTZERO_T, dl: DL, VTs: Node->getVTList(),
2706 Ops: {CurDAG->getRegister(Reg: getTileReg(TileNum), VT: XLenVT), Tm, Tn, Log2SEW,
2707 TWiden, Chain});
2708
2709 ReplaceNode(F: Node, T: NewNode);
2710 return;
2711 }
2712 }
2713 break;
2714 }
2715 case ISD::BITCAST: {
2716 MVT SrcVT = Node->getOperand(Num: 0).getSimpleValueType();
2717 // Just drop bitcasts between vectors if both are fixed or both are
2718 // scalable.
2719 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2720 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2721 ReplaceUses(F: SDValue(Node, 0), T: Node->getOperand(Num: 0));
2722 CurDAG->RemoveDeadNode(N: Node);
2723 return;
2724 }
2725 if (Subtarget->enablePExtSIMDCodeGen()) {
2726 bool Is32BitCast =
2727 (VT == MVT::i32 && (SrcVT == MVT::v4i8 || SrcVT == MVT::v2i16)) ||
2728 (SrcVT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
2729 bool Is64BitCast =
2730 (VT == MVT::i64 && (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i16 ||
2731 SrcVT == MVT::v2i32)) ||
2732 (SrcVT == MVT::i64 &&
2733 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
2734 if (Is32BitCast || Is64BitCast) {
2735 ReplaceUses(F: SDValue(Node, 0), T: Node->getOperand(Num: 0));
2736 CurDAG->RemoveDeadNode(N: Node);
2737 return;
2738 }
2739 }
2740 break;
2741 }
2742 case ISD::SCALAR_TO_VECTOR:
2743 if (Subtarget->enablePExtSIMDCodeGen()) {
2744 MVT SrcVT = Node->getOperand(Num: 0).getSimpleValueType();
2745 if ((VT == MVT::v2i32 && SrcVT == MVT::i64) ||
2746 (VT == MVT::v4i8 && SrcVT == MVT::i32)) {
2747 ReplaceUses(F: SDValue(Node, 0), T: Node->getOperand(Num: 0));
2748 CurDAG->RemoveDeadNode(N: Node);
2749 return;
2750 }
2751 }
2752 break;
2753 case ISD::INSERT_SUBVECTOR:
2754 case RISCVISD::TUPLE_INSERT: {
2755 SDValue V = Node->getOperand(Num: 0);
2756 SDValue SubV = Node->getOperand(Num: 1);
2757 SDLoc DL(SubV);
2758 auto Idx = Node->getConstantOperandVal(Num: 2);
2759 MVT SubVecVT = SubV.getSimpleValueType();
2760
2761 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2762 MVT SubVecContainerVT = SubVecVT;
2763 // Establish the correct scalable-vector types for any fixed-length type.
2764 if (SubVecVT.isFixedLengthVector()) {
2765 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT: SubVecVT);
2766 TypeSize VecRegSize = TypeSize::getScalable(MinimumSize: RISCV::RVVBitsPerBlock);
2767 [[maybe_unused]] bool ExactlyVecRegSized =
2768 Subtarget->expandVScale(X: SubVecVT.getSizeInBits())
2769 .isKnownMultipleOf(RHS: Subtarget->expandVScale(X: VecRegSize));
2770 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2771 .getKnownMinValue()));
2772 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2773 }
2774 MVT ContainerVT = VT;
2775 if (VT.isFixedLengthVector())
2776 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2777
2778 const auto *TRI = Subtarget->getRegisterInfo();
2779 unsigned SubRegIdx;
2780 std::tie(args&: SubRegIdx, args&: Idx) =
2781 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2782 VecVT: ContainerVT, SubVecVT: SubVecContainerVT, InsertExtractIdx: Idx, TRI);
2783
2784 // If the Idx hasn't been completely eliminated then this is a subvector
2785 // insert which doesn't naturally align to a vector register. These must
2786 // be handled using instructions to manipulate the vector registers.
2787 if (Idx != 0)
2788 break;
2789
2790 RISCVVType::VLMUL SubVecLMUL =
2791 RISCVTargetLowering::getLMUL(VT: SubVecContainerVT);
2792 [[maybe_unused]] bool IsSubVecPartReg =
2793 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
2794 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
2795 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
2796 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
2797 V.isUndef()) &&
2798 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2799 "the subvector is smaller than a full-sized register");
2800
2801 // If we haven't set a SubRegIdx, then we must be going between
2802 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2803 if (SubRegIdx == RISCV::NoSubRegister) {
2804 unsigned InRegClassID =
2805 RISCVTargetLowering::getRegClassIDForVecVT(VT: ContainerVT);
2806 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2807 InRegClassID &&
2808 "Unexpected subvector extraction");
2809 SDValue RC = CurDAG->getTargetConstant(Val: InRegClassID, DL, VT: XLenVT);
2810 SDNode *NewNode = CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS,
2811 dl: DL, VT, Op1: SubV, Op2: RC);
2812 ReplaceNode(F: Node, T: NewNode);
2813 return;
2814 }
2815
2816 SDValue Insert = CurDAG->getTargetInsertSubreg(SRIdx: SubRegIdx, DL, VT, Operand: V, Subreg: SubV);
2817 ReplaceNode(F: Node, T: Insert.getNode());
2818 return;
2819 }
2820 case ISD::EXTRACT_SUBVECTOR:
2821 case RISCVISD::TUPLE_EXTRACT: {
2822 SDValue V = Node->getOperand(Num: 0);
2823 auto Idx = Node->getConstantOperandVal(Num: 1);
2824 MVT InVT = V.getSimpleValueType();
2825 SDLoc DL(V);
2826
2827 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2828 MVT SubVecContainerVT = VT;
2829 // Establish the correct scalable-vector types for any fixed-length type.
2830 if (VT.isFixedLengthVector()) {
2831 assert(Idx == 0);
2832 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2833 }
2834 if (InVT.isFixedLengthVector())
2835 InVT = TLI.getContainerForFixedLengthVector(VT: InVT);
2836
2837 const auto *TRI = Subtarget->getRegisterInfo();
2838 unsigned SubRegIdx;
2839 std::tie(args&: SubRegIdx, args&: Idx) =
2840 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2841 VecVT: InVT, SubVecVT: SubVecContainerVT, InsertExtractIdx: Idx, TRI);
2842
2843 // If the Idx hasn't been completely eliminated then this is a subvector
2844 // extract which doesn't naturally align to a vector register. These must
2845 // be handled using instructions to manipulate the vector registers.
2846 if (Idx != 0)
2847 break;
2848
2849 // If we haven't set a SubRegIdx, then we must be going between
2850 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2851 if (SubRegIdx == RISCV::NoSubRegister) {
2852 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT: InVT);
2853 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2854 InRegClassID &&
2855 "Unexpected subvector extraction");
2856 SDValue RC = CurDAG->getTargetConstant(Val: InRegClassID, DL, VT: XLenVT);
2857 SDNode *NewNode =
2858 CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, dl: DL, VT, Op1: V, Op2: RC);
2859 ReplaceNode(F: Node, T: NewNode);
2860 return;
2861 }
2862
2863 SDValue Extract = CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT, Operand: V);
2864 ReplaceNode(F: Node, T: Extract.getNode());
2865 return;
2866 }
2867 case RISCVISD::VMV_S_X_VL:
2868 case RISCVISD::VFMV_S_F_VL:
2869 case RISCVISD::VMV_V_X_VL:
2870 case RISCVISD::VFMV_V_F_VL: {
2871 // Try to match splat of a scalar load to a strided load with stride of x0.
2872 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2873 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2874 if (!Node->getOperand(Num: 0).isUndef())
2875 break;
2876 SDValue Src = Node->getOperand(Num: 1);
2877 auto *Ld = dyn_cast<LoadSDNode>(Val&: Src);
2878 // Can't fold load update node because the second
2879 // output is used so that load update node can't be removed.
2880 if (!Ld || Ld->isIndexed())
2881 break;
2882 EVT MemVT = Ld->getMemoryVT();
2883 // The memory VT should be the same size as the element type.
2884 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2885 break;
2886 if (!IsProfitableToFold(N: Src, U: Node, Root: Node) ||
2887 !IsLegalToFold(N: Src, U: Node, Root: Node, OptLevel: TM.getOptLevel()))
2888 break;
2889
2890 SDValue VL;
2891 if (IsScalarMove) {
2892 // We could deal with more VL if we update the VSETVLI insert pass to
2893 // avoid introducing more VSETVLI.
2894 if (!isOneConstant(V: Node->getOperand(Num: 2)))
2895 break;
2896 selectVLOp(N: Node->getOperand(Num: 2), VL);
2897 } else
2898 selectVLOp(N: Node->getOperand(Num: 2), VL);
2899
2900 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2901 SDValue SEW = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT);
2902
2903 // If VL=1, then we don't need to do a strided load and can just do a
2904 // regular load.
2905 bool IsStrided = !isOneConstant(V: VL);
2906
2907 // Only do a strided load if we have optimized zero-stride vector load.
2908 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2909 break;
2910
2911 SmallVector<SDValue> Operands = {
2912 SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT), 0),
2913 Ld->getBasePtr()};
2914 if (IsStrided)
2915 Operands.push_back(Elt: CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT));
2916 uint64_t Policy = RISCVVType::MASK_AGNOSTIC | RISCVVType::TAIL_AGNOSTIC;
2917 SDValue PolicyOp = CurDAG->getTargetConstant(Val: Policy, DL, VT: XLenVT);
2918 Operands.append(IL: {VL, SEW, PolicyOp, Ld->getChain()});
2919
2920 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2921 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2922 /*IsMasked*/ Masked: false, Strided: IsStrided, /*FF*/ false,
2923 Log2SEW, LMUL: static_cast<unsigned>(LMUL));
2924 MachineSDNode *Load =
2925 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, ResultTys: {VT, MVT::Other}, Ops: Operands);
2926 // Update the chain.
2927 ReplaceUses(F: Src.getValue(R: 1), T: SDValue(Load, 1));
2928 // Record the mem-refs
2929 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {Ld->getMemOperand()});
2930 // Replace the splat with the vlse.
2931 ReplaceNode(F: Node, T: Load);
2932 return;
2933 }
2934 case ISD::PREFETCH:
2935 unsigned Locality = Node->getConstantOperandVal(Num: 3);
2936 if (Locality > 2)
2937 break;
2938
2939 auto *LoadStoreMem = cast<MemSDNode>(Val: Node);
2940 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2941 MMO->setFlags(MachineMemOperand::MONonTemporal);
2942
2943 int NontemporalLevel = 0;
2944 switch (Locality) {
2945 case 0:
2946 NontemporalLevel = 3; // NTL.ALL
2947 break;
2948 case 1:
2949 NontemporalLevel = 1; // NTL.PALL
2950 break;
2951 case 2:
2952 NontemporalLevel = 0; // NTL.P1
2953 break;
2954 default:
2955 llvm_unreachable("unexpected locality value.");
2956 }
2957
2958 if (NontemporalLevel & 0b1)
2959 MMO->setFlags(MONontemporalBit0);
2960 if (NontemporalLevel & 0b10)
2961 MMO->setFlags(MONontemporalBit1);
2962 break;
2963 }
2964
2965 // Select the default instruction.
2966 SelectCode(N: Node);
2967}
2968
2969bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
2970 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2971 std::vector<SDValue> &OutOps) {
2972 // Always produce a register and immediate operand, as expected by
2973 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2974 switch (ConstraintID) {
2975 case InlineAsm::ConstraintCode::o:
2976 case InlineAsm::ConstraintCode::m: {
2977 SDValue Op0, Op1;
2978 [[maybe_unused]] bool Found = SelectAddrRegImm(Addr: Op, Base&: Op0, Offset&: Op1);
2979 assert(Found && "SelectAddrRegImm should always succeed");
2980 OutOps.push_back(x: Op0);
2981 OutOps.push_back(x: Op1);
2982 return false;
2983 }
2984 case InlineAsm::ConstraintCode::A:
2985 OutOps.push_back(x: Op);
2986 OutOps.push_back(
2987 x: CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget->getXLenVT()));
2988 return false;
2989 default:
2990 report_fatal_error(reason: "Unexpected asm memory constraint " +
2991 InlineAsm::getMemConstraintName(C: ConstraintID));
2992 }
2993
2994 return true;
2995}
2996
2997bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
2998 SDValue &Offset) {
2999 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Addr)) {
3000 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT: Subtarget->getXLenVT());
3001 Offset = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Addr), VT: Subtarget->getXLenVT());
3002 return true;
3003 }
3004
3005 return false;
3006}
3007
3008// Fold constant addresses.
3009static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
3010 const MVT VT, const RISCVSubtarget *Subtarget,
3011 SDValue Addr, SDValue &Base, SDValue &Offset,
3012 bool IsPrefetch = false) {
3013 if (!isa<ConstantSDNode>(Val: Addr))
3014 return false;
3015
3016 int64_t CVal = cast<ConstantSDNode>(Val&: Addr)->getSExtValue();
3017
3018 // If the constant is a simm12, we can fold the whole constant and use X0 as
3019 // the base. If the constant can be materialized with LUI+simm12, use LUI as
3020 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
3021 int64_t Lo12 = SignExtend64<12>(x: CVal);
3022 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
3023 if (!Subtarget->is64Bit() || isInt<32>(x: Hi)) {
3024 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3025 return false;
3026 if (Hi) {
3027 int64_t Hi20 = (Hi >> 12) & 0xfffff;
3028 Base = SDValue(
3029 CurDAG->getMachineNode(Opcode: RISCV::LUI, dl: DL, VT,
3030 Op1: CurDAG->getTargetConstant(Val: Hi20, DL, VT)),
3031 0);
3032 } else {
3033 Base = CurDAG->getRegister(Reg: RISCV::X0, VT);
3034 }
3035 Offset = CurDAG->getSignedTargetConstant(Val: Lo12, DL, VT);
3036 return true;
3037 }
3038
3039 // Ask how constant materialization would handle this constant.
3040 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: CVal, STI: *Subtarget);
3041
3042 // If the last instruction would be an ADDI, we can fold its immediate and
3043 // emit the rest of the sequence as the base.
3044 if (Seq.back().getOpcode() != RISCV::ADDI)
3045 return false;
3046 Lo12 = Seq.back().getImm();
3047 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3048 return false;
3049
3050 // Drop the last instruction.
3051 Seq.pop_back();
3052 assert(!Seq.empty() && "Expected more instructions in sequence");
3053
3054 Base = selectImmSeq(CurDAG, DL, VT, Seq);
3055 Offset = CurDAG->getSignedTargetConstant(Val: Lo12, DL, VT);
3056 return true;
3057}
3058
3059// Is this ADD instruction only used as the base pointer of scalar loads and
3060// stores?
3061static bool isWorthFoldingAdd(SDValue Add) {
3062 for (auto *User : Add->users()) {
3063 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
3064 User->getOpcode() != RISCVISD::LD_RV32 &&
3065 User->getOpcode() != RISCVISD::SD_RV32 &&
3066 User->getOpcode() != ISD::ATOMIC_LOAD &&
3067 User->getOpcode() != ISD::ATOMIC_STORE)
3068 return false;
3069 EVT VT = cast<MemSDNode>(Val: User)->getMemoryVT();
3070 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
3071 VT != MVT::f64)
3072 return false;
3073 // Don't allow stores of the value. It must be used as the address.
3074 if (User->getOpcode() == ISD::STORE &&
3075 cast<StoreSDNode>(Val: User)->getValue() == Add)
3076 return false;
3077 if (User->getOpcode() == ISD::ATOMIC_STORE &&
3078 cast<AtomicSDNode>(Val: User)->getVal() == Add)
3079 return false;
3080 if (User->getOpcode() == RISCVISD::SD_RV32 &&
3081 (User->getOperand(Num: 0) == Add || User->getOperand(Num: 1) == Add))
3082 return false;
3083 if (isStrongerThanMonotonic(AO: cast<MemSDNode>(Val: User)->getSuccessOrdering()))
3084 return false;
3085 }
3086
3087 return true;
3088}
3089
3090bool isRegImmLoadOrStore(SDNode *User, SDValue Add) {
3091 switch (User->getOpcode()) {
3092 default:
3093 return false;
3094 case ISD::LOAD:
3095 case RISCVISD::LD_RV32:
3096 case ISD::ATOMIC_LOAD:
3097 break;
3098 case ISD::STORE:
3099 // Don't allow stores of Add. It must only be used as the address.
3100 if (cast<StoreSDNode>(Val: User)->getValue() == Add)
3101 return false;
3102 break;
3103 case RISCVISD::SD_RV32:
3104 // Don't allow stores of Add. It must only be used as the address.
3105 if (User->getOperand(Num: 0) == Add || User->getOperand(Num: 1) == Add)
3106 return false;
3107 break;
3108 case ISD::ATOMIC_STORE:
3109 // Don't allow stores of Add. It must only be used as the address.
3110 if (cast<AtomicSDNode>(Val: User)->getVal() == Add)
3111 return false;
3112 break;
3113 }
3114
3115 return true;
3116}
3117
3118// To prevent SelectAddrRegImm from folding offsets that conflict with the
3119// fusion of PseudoMovAddr, check if the offset of every use of a given address
3120// is within the alignment.
3121bool RISCVDAGToDAGISel::areOffsetsWithinAlignment(SDValue Addr,
3122 Align Alignment) {
3123 assert(Addr->getOpcode() == RISCVISD::ADD_LO);
3124 for (auto *User : Addr->users()) {
3125 // If the user is a load or store, then the offset is 0 which is always
3126 // within alignment.
3127 if (isRegImmLoadOrStore(User, Add: Addr))
3128 continue;
3129
3130 if (CurDAG->isBaseWithConstantOffset(Op: SDValue(User, 0))) {
3131 int64_t CVal = cast<ConstantSDNode>(Val: User->getOperand(Num: 1))->getSExtValue();
3132 if (!isInt<12>(x: CVal) || Alignment <= CVal)
3133 return false;
3134
3135 // Make sure all uses are foldable load/stores.
3136 for (auto *AddUser : User->users())
3137 if (!isRegImmLoadOrStore(User: AddUser, Add: SDValue(User, 0)))
3138 return false;
3139
3140 continue;
3141 }
3142
3143 return false;
3144 }
3145
3146 return true;
3147}
3148
3149bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
3150 SDValue &Offset) {
3151 if (SelectAddrFrameIndex(Addr, Base, Offset))
3152 return true;
3153
3154 SDLoc DL(Addr);
3155 MVT VT = Addr.getSimpleValueType();
3156
3157 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
3158 bool CanFold = true;
3159 // Unconditionally fold if operand 1 is not a global address (e.g.
3160 // externsymbol)
3161 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Val: Addr.getOperand(i: 1))) {
3162 const DataLayout &DL = CurDAG->getDataLayout();
3163 Align Alignment = commonAlignment(
3164 A: GA->getGlobal()->getPointerAlignment(DL), Offset: GA->getOffset());
3165 if (!areOffsetsWithinAlignment(Addr, Alignment))
3166 CanFold = false;
3167 }
3168 if (CanFold) {
3169 Base = Addr.getOperand(i: 0);
3170 Offset = Addr.getOperand(i: 1);
3171 return true;
3172 }
3173 }
3174
3175 if (CurDAG->isBaseWithConstantOffset(Op: Addr)) {
3176 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3177 if (isInt<12>(x: CVal)) {
3178 Base = Addr.getOperand(i: 0);
3179 if (Base.getOpcode() == RISCVISD::ADD_LO) {
3180 SDValue LoOperand = Base.getOperand(i: 1);
3181 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: LoOperand)) {
3182 // If the Lo in (ADD_LO hi, lo) is a global variable's address
3183 // (its low part, really), then we can rely on the alignment of that
3184 // variable to provide a margin of safety before low part can overflow
3185 // the 12 bits of the load/store offset. Check if CVal falls within
3186 // that margin; if so (low part + CVal) can't overflow.
3187 const DataLayout &DL = CurDAG->getDataLayout();
3188 Align Alignment = commonAlignment(
3189 A: GA->getGlobal()->getPointerAlignment(DL), Offset: GA->getOffset());
3190 if ((CVal == 0 || Alignment > CVal) &&
3191 areOffsetsWithinAlignment(Addr: Base, Alignment)) {
3192 int64_t CombinedOffset = CVal + GA->getOffset();
3193 Base = Base.getOperand(i: 0);
3194 Offset = CurDAG->getTargetGlobalAddress(
3195 GV: GA->getGlobal(), DL: SDLoc(LoOperand), VT: LoOperand.getValueType(),
3196 offset: CombinedOffset, TargetFlags: GA->getTargetFlags());
3197 return true;
3198 }
3199 }
3200 }
3201
3202 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base))
3203 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT);
3204 Offset = CurDAG->getSignedTargetConstant(Val: CVal, DL, VT);
3205 return true;
3206 }
3207 }
3208
3209 // Handle ADD with large immediates.
3210 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Val: Addr.getOperand(i: 1))) {
3211 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3212 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3213
3214 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
3215 // an ADDI for part of the offset and fold the rest into the load/store.
3216 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
3217 if (CVal >= -4096 && CVal <= 4094) {
3218 int64_t Adj = CVal < 0 ? -2048 : 2047;
3219 Base = SDValue(
3220 CurDAG->getMachineNode(Opcode: RISCV::ADDI, dl: DL, VT, Op1: Addr.getOperand(i: 0),
3221 Op2: CurDAG->getSignedTargetConstant(Val: Adj, DL, VT)),
3222 0);
3223 Offset = CurDAG->getSignedTargetConstant(Val: CVal - Adj, DL, VT);
3224 return true;
3225 }
3226
3227 // For larger immediates, we might be able to save one instruction from
3228 // constant materialization by folding the Lo12 bits of the immediate into
3229 // the address. We should only do this if the ADD is only used by loads and
3230 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
3231 // separately with the full materialized immediate creating extra
3232 // instructions.
3233 if (isWorthFoldingAdd(Add: Addr) &&
3234 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr: Addr.getOperand(i: 1), Base,
3235 Offset, /*IsPrefetch=*/false)) {
3236 // Insert an ADD instruction with the materialized Hi52 bits.
3237 Base = SDValue(
3238 CurDAG->getMachineNode(Opcode: RISCV::ADD, dl: DL, VT, Op1: Addr.getOperand(i: 0), Op2: Base),
3239 0);
3240 return true;
3241 }
3242 }
3243
3244 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3245 /*IsPrefetch=*/false))
3246 return true;
3247
3248 Base = Addr;
3249 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
3250 return true;
3251}
3252
3253/// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
3254bool RISCVDAGToDAGISel::SelectAddrRegImm9(SDValue Addr, SDValue &Base,
3255 SDValue &Offset) {
3256 if (SelectAddrFrameIndex(Addr, Base, Offset))
3257 return true;
3258
3259 SDLoc DL(Addr);
3260 MVT VT = Addr.getSimpleValueType();
3261
3262 if (CurDAG->isBaseWithConstantOffset(Op: Addr)) {
3263 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3264 if (isUInt<9>(x: CVal)) {
3265 Base = Addr.getOperand(i: 0);
3266
3267 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base))
3268 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT);
3269 Offset = CurDAG->getSignedTargetConstant(Val: CVal, DL, VT);
3270 return true;
3271 }
3272 }
3273
3274 Base = Addr;
3275 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
3276 return true;
3277}
3278
3279/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
3280/// Offset should be all zeros.
3281bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
3282 SDValue &Offset) {
3283 if (SelectAddrFrameIndex(Addr, Base, Offset))
3284 return true;
3285
3286 SDLoc DL(Addr);
3287 MVT VT = Addr.getSimpleValueType();
3288
3289 if (CurDAG->isBaseWithConstantOffset(Op: Addr)) {
3290 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3291 if (isInt<12>(x: CVal)) {
3292 Base = Addr.getOperand(i: 0);
3293
3294 // Early-out if not a valid offset.
3295 if ((CVal & 0b11111) != 0) {
3296 Base = Addr;
3297 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
3298 return true;
3299 }
3300
3301 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base))
3302 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT);
3303 Offset = CurDAG->getSignedTargetConstant(Val: CVal, DL, VT);
3304 return true;
3305 }
3306 }
3307
3308 // Handle ADD with large immediates.
3309 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Val: Addr.getOperand(i: 1))) {
3310 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
3311 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3312
3313 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3314 // one instruction by folding adjustment (-2048 or 2016) into the address.
3315 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3316 int64_t Adj = CVal < 0 ? -2048 : 2016;
3317 int64_t AdjustedOffset = CVal - Adj;
3318 Base =
3319 SDValue(CurDAG->getMachineNode(
3320 Opcode: RISCV::ADDI, dl: DL, VT, Op1: Addr.getOperand(i: 0),
3321 Op2: CurDAG->getSignedTargetConstant(Val: AdjustedOffset, DL, VT)),
3322 0);
3323 Offset = CurDAG->getSignedTargetConstant(Val: Adj, DL, VT);
3324 return true;
3325 }
3326
3327 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr: Addr.getOperand(i: 1), Base,
3328 Offset, /*IsPrefetch=*/true)) {
3329 // Insert an ADD instruction with the materialized Hi52 bits.
3330 Base = SDValue(
3331 CurDAG->getMachineNode(Opcode: RISCV::ADD, dl: DL, VT, Op1: Addr.getOperand(i: 0), Op2: Base),
3332 0);
3333 return true;
3334 }
3335 }
3336
3337 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3338 /*IsPrefetch=*/true))
3339 return true;
3340
3341 Base = Addr;
3342 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
3343 return true;
3344}
3345
3346/// Return true if this a load/store that we have a RegRegScale instruction for.
3347static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add,
3348 const RISCVSubtarget &Subtarget) {
3349 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3350 return false;
3351 EVT VT = cast<MemSDNode>(Val: User)->getMemoryVT();
3352 if (!(VT.isScalarInteger() &&
3353 (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3354 !((VT == MVT::f32 || VT == MVT::f64) &&
3355 Subtarget.hasVendorXTHeadFMemIdx()))
3356 return false;
3357 // Don't allow stores of the value. It must be used as the address.
3358 if (User->getOpcode() == ISD::STORE &&
3359 cast<StoreSDNode>(Val: User)->getValue() == Add)
3360 return false;
3361
3362 return true;
3363}
3364
3365/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3366/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3367/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3368/// single addi and we don't have a SHXADD instruction we could use.
3369/// FIXME: May still need to check how many and what kind of users the SHL has.
3370static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget,
3371 SDValue Add,
3372 SDValue Shift = SDValue()) {
3373 bool FoundADDI = false;
3374 for (auto *User : Add->users()) {
3375 if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3376 continue;
3377
3378 // Allow a single ADDI that is used by loads/stores if we matched a shift.
3379 if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3380 !isa<ConstantSDNode>(Val: User->getOperand(Num: 1)) ||
3381 !isInt<12>(x: cast<ConstantSDNode>(Val: User->getOperand(Num: 1))->getSExtValue()))
3382 return false;
3383
3384 FoundADDI = true;
3385
3386 // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3387 assert(Shift.getOpcode() == ISD::SHL);
3388 unsigned ShiftAmt = Shift.getConstantOperandVal(i: 1);
3389 if (Subtarget.hasShlAdd(ShAmt: ShiftAmt))
3390 return false;
3391
3392 // All users of the ADDI should be load/store.
3393 for (auto *ADDIUser : User->users())
3394 if (!isRegRegScaleLoadOrStore(User: ADDIUser, Add: SDValue(User, 0), Subtarget))
3395 return false;
3396 }
3397
3398 return true;
3399}
3400
3401bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
3402 unsigned MaxShiftAmount,
3403 SDValue &Base, SDValue &Index,
3404 SDValue &Scale) {
3405 if (Addr.getOpcode() != ISD::ADD)
3406 return false;
3407 SDValue LHS = Addr.getOperand(i: 0);
3408 SDValue RHS = Addr.getOperand(i: 1);
3409
3410 EVT VT = Addr.getSimpleValueType();
3411 auto SelectShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3412 SDValue &Shift) {
3413 if (N.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Val: N.getOperand(i: 1)))
3414 return false;
3415
3416 // Only match shifts by a value in range [0, MaxShiftAmount].
3417 unsigned ShiftAmt = N.getConstantOperandVal(i: 1);
3418 if (ShiftAmt > MaxShiftAmount)
3419 return false;
3420
3421 Index = N.getOperand(i: 0);
3422 Shift = CurDAG->getTargetConstant(Val: ShiftAmt, DL: SDLoc(N), VT);
3423 return true;
3424 };
3425
3426 if (auto *C1 = dyn_cast<ConstantSDNode>(Val&: RHS)) {
3427 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3428 if (LHS.getOpcode() == ISD::ADD &&
3429 !isa<ConstantSDNode>(Val: LHS.getOperand(i: 1)) &&
3430 isInt<12>(x: C1->getSExtValue())) {
3431 if (SelectShl(LHS.getOperand(i: 1), Index, Scale) &&
3432 isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: LHS, Shift: LHS.getOperand(i: 1))) {
3433 SDValue C1Val = CurDAG->getTargetConstant(Val: *C1->getConstantIntValue(),
3434 DL: SDLoc(Addr), VT);
3435 Base = SDValue(CurDAG->getMachineNode(Opcode: RISCV::ADDI, dl: SDLoc(Addr), VT,
3436 Op1: LHS.getOperand(i: 0), Op2: C1Val),
3437 0);
3438 return true;
3439 }
3440
3441 // Add is commutative so we need to check both operands.
3442 if (SelectShl(LHS.getOperand(i: 0), Index, Scale) &&
3443 isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: LHS, Shift: LHS.getOperand(i: 0))) {
3444 SDValue C1Val = CurDAG->getTargetConstant(Val: *C1->getConstantIntValue(),
3445 DL: SDLoc(Addr), VT);
3446 Base = SDValue(CurDAG->getMachineNode(Opcode: RISCV::ADDI, dl: SDLoc(Addr), VT,
3447 Op1: LHS.getOperand(i: 1), Op2: C1Val),
3448 0);
3449 return true;
3450 }
3451 }
3452
3453 // Don't match add with constants.
3454 // FIXME: Is this profitable for large constants that have 0s in the lower
3455 // 12 bits that we can materialize with LUI?
3456 return false;
3457 }
3458
3459 // Try to match a shift on the RHS.
3460 if (SelectShl(RHS, Index, Scale)) {
3461 if (!isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: Addr, Shift: RHS))
3462 return false;
3463 Base = LHS;
3464 return true;
3465 }
3466
3467 // Try to match a shift on the LHS.
3468 if (SelectShl(LHS, Index, Scale)) {
3469 if (!isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: Addr, Shift: LHS))
3470 return false;
3471 Base = RHS;
3472 return true;
3473 }
3474
3475 if (!isWorthFoldingIntoRegRegScale(Subtarget: *Subtarget, Add: Addr))
3476 return false;
3477
3478 Base = LHS;
3479 Index = RHS;
3480 Scale = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Addr), VT);
3481 return true;
3482}
3483
3484bool RISCVDAGToDAGISel::SelectAddrRegZextRegScale(SDValue Addr,
3485 unsigned MaxShiftAmount,
3486 unsigned Bits, SDValue &Base,
3487 SDValue &Index,
3488 SDValue &Scale) {
3489 if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
3490 return false;
3491
3492 if (Index.getOpcode() == ISD::AND) {
3493 auto *C = dyn_cast<ConstantSDNode>(Val: Index.getOperand(i: 1));
3494 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(N: Bits)) {
3495 Index = Index.getOperand(i: 0);
3496 return true;
3497 }
3498 }
3499
3500 return false;
3501}
3502
3503bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base,
3504 SDValue &Offset) {
3505 if (Addr.getOpcode() != ISD::ADD)
3506 return false;
3507
3508 if (isa<ConstantSDNode>(Val: Addr.getOperand(i: 1)))
3509 return false;
3510
3511 Base = Addr.getOperand(i: 0);
3512 Offset = Addr.getOperand(i: 1);
3513 return true;
3514}
3515
3516bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
3517 SDValue &ShAmt) {
3518 ShAmt = N;
3519
3520 // Peek through zext.
3521 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3522 ShAmt = ShAmt.getOperand(i: 0);
3523
3524 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3525 // amount. If there is an AND on the shift amount, we can bypass it if it
3526 // doesn't affect any of those bits.
3527 if (ShAmt.getOpcode() == ISD::AND &&
3528 isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 1))) {
3529 const APInt &AndMask = ShAmt.getConstantOperandAPInt(i: 1);
3530
3531 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3532 // mask that covers the bits needed to represent all shift amounts.
3533 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3534 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3535
3536 if (ShMask.isSubsetOf(RHS: AndMask)) {
3537 ShAmt = ShAmt.getOperand(i: 0);
3538 } else {
3539 // SimplifyDemandedBits may have optimized the mask so try restoring any
3540 // bits that are known zero.
3541 KnownBits Known = CurDAG->computeKnownBits(Op: ShAmt.getOperand(i: 0));
3542 if (!ShMask.isSubsetOf(RHS: AndMask | Known.Zero))
3543 return true;
3544 ShAmt = ShAmt.getOperand(i: 0);
3545 }
3546 }
3547
3548 if (ShAmt.getOpcode() == ISD::ADD &&
3549 isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 1))) {
3550 uint64_t Imm = ShAmt.getConstantOperandVal(i: 1);
3551 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3552 // to avoid the ADD.
3553 if (Imm != 0 && Imm % ShiftWidth == 0) {
3554 ShAmt = ShAmt.getOperand(i: 0);
3555 return true;
3556 }
3557 } else if (ShAmt.getOpcode() == ISD::SUB &&
3558 isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 0))) {
3559 uint64_t Imm = ShAmt.getConstantOperandVal(i: 0);
3560 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3561 // generate a NEG instead of a SUB of a constant.
3562 if (Imm != 0 && Imm % ShiftWidth == 0) {
3563 SDLoc DL(ShAmt);
3564 EVT VT = ShAmt.getValueType();
3565 SDValue Zero = CurDAG->getRegister(Reg: RISCV::X0, VT);
3566 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3567 MachineSDNode *Neg = CurDAG->getMachineNode(Opcode: NegOpc, dl: DL, VT, Op1: Zero,
3568 Op2: ShAmt.getOperand(i: 1));
3569 ShAmt = SDValue(Neg, 0);
3570 return true;
3571 }
3572 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3573 // to generate a NOT instead of a SUB of a constant.
3574 if (Imm % ShiftWidth == ShiftWidth - 1) {
3575 SDLoc DL(ShAmt);
3576 EVT VT = ShAmt.getValueType();
3577 MachineSDNode *Not = CurDAG->getMachineNode(
3578 Opcode: RISCV::XORI, dl: DL, VT, Op1: ShAmt.getOperand(i: 1),
3579 Op2: CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/IsTarget: true));
3580 ShAmt = SDValue(Not, 0);
3581 return true;
3582 }
3583 }
3584
3585 return true;
3586}
3587
3588/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3589/// check for equality with 0. This function emits instructions that convert the
3590/// seteq/setne into something that can be compared with 0.
3591/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3592/// ISD::SETNE).
3593bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
3594 SDValue &Val) {
3595 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3596 "Unexpected condition code!");
3597
3598 // We're looking for a setcc.
3599 if (N->getOpcode() != ISD::SETCC)
3600 return false;
3601
3602 // Must be an equality comparison.
3603 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
3604 if (CCVal != ExpectedCCVal)
3605 return false;
3606
3607 SDValue LHS = N->getOperand(Num: 0);
3608 SDValue RHS = N->getOperand(Num: 1);
3609
3610 if (!LHS.getValueType().isScalarInteger())
3611 return false;
3612
3613 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3614 if (isNullConstant(V: RHS)) {
3615 Val = LHS;
3616 return true;
3617 }
3618
3619 SDLoc DL(N);
3620
3621 if (auto *C = dyn_cast<ConstantSDNode>(Val&: RHS)) {
3622 int64_t CVal = C->getSExtValue();
3623 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3624 // non-zero otherwise.
3625 if (CVal == -2048) {
3626 Val = SDValue(
3627 CurDAG->getMachineNode(
3628 Opcode: RISCV::XORI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3629 Op2: CurDAG->getSignedTargetConstant(Val: CVal, DL, VT: N->getValueType(ResNo: 0))),
3630 0);
3631 return true;
3632 }
3633 // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0
3634 // if the LHS is equal to the RHS and non-zero otherwise.
3635 if (isInt<12>(x: CVal) || CVal == 2048) {
3636 unsigned Opc = RISCV::ADDI;
3637 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3638 cast<VTSDNode>(Val: LHS.getOperand(i: 1))->getVT() == MVT::i32) {
3639 Opc = RISCV::ADDIW;
3640 LHS = LHS.getOperand(i: 0);
3641 }
3642
3643 Val = SDValue(CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3644 Op2: CurDAG->getSignedTargetConstant(
3645 Val: -CVal, DL, VT: N->getValueType(ResNo: 0))),
3646 0);
3647 return true;
3648 }
3649 if (isPowerOf2_64(Value: CVal) && Subtarget->hasStdExtZbs()) {
3650 Val = SDValue(
3651 CurDAG->getMachineNode(
3652 Opcode: RISCV::BINVI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3653 Op2: CurDAG->getTargetConstant(Val: Log2_64(Value: CVal), DL, VT: N->getValueType(ResNo: 0))),
3654 0);
3655 return true;
3656 }
3657 // Same as the addi case above but for larger immediates (signed 26-bit) use
3658 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3659 // anything which can be done with a single lui as it might be compressible.
3660 if (Subtarget->hasVendorXqcilia() && isInt<26>(x: CVal) &&
3661 (CVal & 0xFFF) != 0) {
3662 Val = SDValue(
3663 CurDAG->getMachineNode(
3664 Opcode: RISCV::QC_E_ADDI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3665 Op2: CurDAG->getSignedTargetConstant(Val: -CVal, DL, VT: N->getValueType(ResNo: 0))),
3666 0);
3667 return true;
3668 }
3669 }
3670
3671 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3672 // equal and a non-zero value if they aren't.
3673 Val = SDValue(
3674 CurDAG->getMachineNode(Opcode: RISCV::XOR, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS, Op2: RHS), 0);
3675 return true;
3676}
3677
3678bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {
3679 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3680 cast<VTSDNode>(Val: N.getOperand(i: 1))->getVT().getSizeInBits() == Bits) {
3681 Val = N.getOperand(i: 0);
3682 return true;
3683 }
3684
3685 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3686 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(Val: N.getOperand(i: 1)))
3687 return N;
3688
3689 SDValue N0 = N.getOperand(i: 0);
3690 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
3691 N.getConstantOperandVal(i: 1) == ShiftAmt &&
3692 N0.getConstantOperandVal(i: 1) == ShiftAmt)
3693 return N0.getOperand(i: 0);
3694
3695 return N;
3696 };
3697
3698 MVT VT = N.getSimpleValueType();
3699 if (CurDAG->ComputeNumSignBits(Op: N) > (VT.getSizeInBits() - Bits)) {
3700 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3701 return true;
3702 }
3703
3704 return false;
3705}
3706
3707bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) {
3708 if (N.getOpcode() == ISD::AND) {
3709 auto *C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
3710 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(N: Bits)) {
3711 Val = N.getOperand(i: 0);
3712 return true;
3713 }
3714 }
3715 MVT VT = N.getSimpleValueType();
3716 APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: Bits);
3717 if (CurDAG->MaskedValueIsZero(Op: N, Mask)) {
3718 Val = N;
3719 return true;
3720 }
3721
3722 return false;
3723}
3724
3725/// Look for various patterns that can be done with a SHL that can be folded
3726/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3727/// SHXADD we are trying to match.
3728bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
3729 SDValue &Val) {
3730 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(Val: N.getOperand(i: 1))) {
3731 SDValue N0 = N.getOperand(i: 0);
3732
3733 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3734 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3735 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
3736 uint64_t Mask = N.getConstantOperandVal(i: 1);
3737 unsigned C2 = N0.getConstantOperandVal(i: 1);
3738
3739 unsigned XLen = Subtarget->getXLen();
3740 if (LeftShift)
3741 Mask &= maskTrailingZeros<uint64_t>(N: C2);
3742 else
3743 Mask &= maskTrailingOnes<uint64_t>(N: XLen - C2);
3744
3745 if (isShiftedMask_64(Value: Mask)) {
3746 unsigned Leading = XLen - llvm::bit_width(Value: Mask);
3747 unsigned Trailing = llvm::countr_zero(Val: Mask);
3748 if (Trailing != ShAmt)
3749 return false;
3750
3751 unsigned Opcode;
3752 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3753 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
3754 // followed by a SHXADD with c3 for the X amount.
3755 if (LeftShift && Leading == 0 && C2 < Trailing)
3756 Opcode = RISCV::SRLI;
3757 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
3758 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
3759 // followed by a SHXADD with c3 for the X amount.
3760 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
3761 Opcode = RISCV::SRLIW;
3762 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3763 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3764 // followed by a SHXADD using c3 for the X amount.
3765 else if (!LeftShift && Leading == C2)
3766 Opcode = RISCV::SRLI;
3767 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
3768 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
3769 // followed by a SHXADD using c3 for the X amount.
3770 else if (!LeftShift && Leading == 32 + C2)
3771 Opcode = RISCV::SRLIW;
3772 else
3773 return false;
3774
3775 SDLoc DL(N);
3776 EVT VT = N.getValueType();
3777 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
3778 Val = SDValue(
3779 CurDAG->getMachineNode(Opcode, dl: DL, VT, Op1: N0.getOperand(i: 0),
3780 Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT)),
3781 0);
3782 return true;
3783 }
3784 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3785 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
3786 uint64_t Mask = N.getConstantOperandVal(i: 1);
3787 unsigned C2 = N0.getConstantOperandVal(i: 1);
3788
3789 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3790 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3791 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3792 // the X amount.
3793 if (isShiftedMask_64(Value: Mask)) {
3794 unsigned XLen = Subtarget->getXLen();
3795 unsigned Leading = XLen - llvm::bit_width(Value: Mask);
3796 unsigned Trailing = llvm::countr_zero(Val: Mask);
3797 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3798 SDLoc DL(N);
3799 EVT VT = N.getValueType();
3800 Val = SDValue(CurDAG->getMachineNode(
3801 Opcode: RISCV::SRAI, dl: DL, VT, Op1: N0.getOperand(i: 0),
3802 Op2: CurDAG->getTargetConstant(Val: C2 - Leading, DL, VT)),
3803 0);
3804 Val = SDValue(CurDAG->getMachineNode(
3805 Opcode: RISCV::SRLI, dl: DL, VT, Op1: Val,
3806 Op2: CurDAG->getTargetConstant(Val: Leading + ShAmt, DL, VT)),
3807 0);
3808 return true;
3809 }
3810 }
3811 }
3812 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3813 (LeftShift || N.getOpcode() == ISD::SRL) &&
3814 isa<ConstantSDNode>(Val: N.getOperand(i: 1))) {
3815 SDValue N0 = N.getOperand(i: 0);
3816 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3817 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
3818 uint64_t Mask = N0.getConstantOperandVal(i: 1);
3819 if (isShiftedMask_64(Value: Mask)) {
3820 unsigned C1 = N.getConstantOperandVal(i: 1);
3821 unsigned XLen = Subtarget->getXLen();
3822 unsigned Leading = XLen - llvm::bit_width(Value: Mask);
3823 unsigned Trailing = llvm::countr_zero(Val: Mask);
3824 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3825 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3826 if (LeftShift && Leading == 32 && Trailing > 0 &&
3827 (Trailing + C1) == ShAmt) {
3828 SDLoc DL(N);
3829 EVT VT = N.getValueType();
3830 Val = SDValue(CurDAG->getMachineNode(
3831 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
3832 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT)),
3833 0);
3834 return true;
3835 }
3836 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3837 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3838 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3839 (Trailing - C1) == ShAmt) {
3840 SDLoc DL(N);
3841 EVT VT = N.getValueType();
3842 Val = SDValue(CurDAG->getMachineNode(
3843 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
3844 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT)),
3845 0);
3846 return true;
3847 }
3848 }
3849 }
3850 }
3851
3852 return false;
3853}
3854
3855/// Look for various patterns that can be done with a SHL that can be folded
3856/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3857/// SHXADD_UW we are trying to match.
3858bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
3859 SDValue &Val) {
3860 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(Val: N.getOperand(i: 1)) &&
3861 N.hasOneUse()) {
3862 SDValue N0 = N.getOperand(i: 0);
3863 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
3864 N0.hasOneUse()) {
3865 uint64_t Mask = N.getConstantOperandVal(i: 1);
3866 unsigned C2 = N0.getConstantOperandVal(i: 1);
3867
3868 Mask &= maskTrailingZeros<uint64_t>(N: C2);
3869
3870 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3871 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3872 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3873 if (isShiftedMask_64(Value: Mask)) {
3874 unsigned Leading = llvm::countl_zero(Val: Mask);
3875 unsigned Trailing = llvm::countr_zero(Val: Mask);
3876 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3877 SDLoc DL(N);
3878 EVT VT = N.getValueType();
3879 Val = SDValue(CurDAG->getMachineNode(
3880 Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
3881 Op2: CurDAG->getTargetConstant(Val: C2 - ShAmt, DL, VT)),
3882 0);
3883 return true;
3884 }
3885 }
3886 }
3887 }
3888
3889 return false;
3890}
3891
3892bool RISCVDAGToDAGISel::orDisjoint(const SDNode *N) const {
3893 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
3894 if (N->getFlags().hasDisjoint())
3895 return true;
3896 return CurDAG->haveNoCommonBitsSet(A: N->getOperand(Num: 0), B: N->getOperand(Num: 1));
3897}
3898
3899bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
3900 SDValue N, SDValue &Val) {
3901 int OrigCost = RISCVMatInt::getIntMatCost(Val: APInt(64, OrigImm), Size: 64, STI: *Subtarget,
3902 /*CompressionCost=*/true);
3903 int Cost = RISCVMatInt::getIntMatCost(Val: APInt(64, Imm), Size: 64, STI: *Subtarget,
3904 /*CompressionCost=*/true);
3905 if (OrigCost <= Cost)
3906 return false;
3907
3908 Val = selectImm(CurDAG, DL: SDLoc(N), VT: N->getSimpleValueType(ResNo: 0), Imm, Subtarget: *Subtarget);
3909 return true;
3910}
3911
3912bool RISCVDAGToDAGISel::selectZExtImm32(SDValue N, SDValue &Val) {
3913 if (!isa<ConstantSDNode>(Val: N))
3914 return false;
3915 int64_t Imm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
3916 if ((Imm >> 31) != 1)
3917 return false;
3918
3919 for (const SDNode *U : N->users()) {
3920 switch (U->getOpcode()) {
3921 case ISD::ADD:
3922 break;
3923 case ISD::OR:
3924 if (orDisjoint(N: U))
3925 break;
3926 return false;
3927 default:
3928 return false;
3929 }
3930 }
3931
3932 return selectImm64IfCheaper(Imm: 0xffffffff00000000 | Imm, OrigImm: Imm, N, Val);
3933}
3934
3935bool RISCVDAGToDAGISel::selectNegImm(SDValue N, SDValue &Val) {
3936 if (!isa<ConstantSDNode>(Val: N))
3937 return false;
3938 int64_t Imm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
3939 if (isInt<32>(x: Imm))
3940 return false;
3941
3942 for (const SDNode *U : N->users()) {
3943 switch (U->getOpcode()) {
3944 case ISD::ADD:
3945 break;
3946 case RISCVISD::VMV_V_X_VL:
3947 if (!all_of(Range: U->users(), P: [](const SDNode *V) {
3948 return V->getOpcode() == ISD::ADD ||
3949 V->getOpcode() == RISCVISD::ADD_VL;
3950 }))
3951 return false;
3952 break;
3953 default:
3954 return false;
3955 }
3956 }
3957
3958 return selectImm64IfCheaper(Imm: -Imm, OrigImm: Imm, N, Val);
3959}
3960
3961bool RISCVDAGToDAGISel::selectInvLogicImm(SDValue N, SDValue &Val) {
3962 if (!isa<ConstantSDNode>(Val: N))
3963 return false;
3964 int64_t Imm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
3965
3966 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
3967 if (isInt<32>(x: Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
3968 return false;
3969
3970 // Abandon this transform if the constant is needed elsewhere.
3971 for (const SDNode *U : N->users()) {
3972 switch (U->getOpcode()) {
3973 case ISD::AND:
3974 case ISD::OR:
3975 case ISD::XOR:
3976 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
3977 return false;
3978 break;
3979 case RISCVISD::VMV_V_X_VL:
3980 if (!Subtarget->hasStdExtZvkb())
3981 return false;
3982 if (!all_of(Range: U->users(), P: [](const SDNode *V) {
3983 return V->getOpcode() == ISD::AND ||
3984 V->getOpcode() == RISCVISD::AND_VL;
3985 }))
3986 return false;
3987 break;
3988 default:
3989 return false;
3990 }
3991 }
3992
3993 if (isInt<32>(x: Imm)) {
3994 Val =
3995 selectImm(CurDAG, DL: SDLoc(N), VT: N->getSimpleValueType(ResNo: 0), Imm: ~Imm, Subtarget: *Subtarget);
3996 return true;
3997 }
3998
3999 // For 64-bit constants, the instruction sequences get complex,
4000 // so we select inverted only if it's cheaper.
4001 return selectImm64IfCheaper(Imm: ~Imm, OrigImm: Imm, N, Val);
4002}
4003
4004static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
4005 unsigned Bits,
4006 const TargetInstrInfo *TII) {
4007 unsigned MCOpcode = RISCV::getRVVMCOpcode(RVVPseudoOpcode: User->getMachineOpcode());
4008
4009 if (!MCOpcode)
4010 return false;
4011
4012 const MCInstrDesc &MCID = TII->get(Opcode: User->getMachineOpcode());
4013 const uint64_t TSFlags = MCID.TSFlags;
4014 if (!RISCVII::hasSEWOp(TSFlags))
4015 return false;
4016 assert(RISCVII::hasVLOp(TSFlags));
4017
4018 unsigned ChainOpIdx = User->getNumOperands() - 1;
4019 bool HasChainOp = User->getOperand(Num: ChainOpIdx).getValueType() == MVT::Other;
4020 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
4021 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
4022 const unsigned Log2SEW = User->getConstantOperandVal(Num: VLIdx + 1);
4023
4024 if (UserOpNo == VLIdx)
4025 return false;
4026
4027 auto NumDemandedBits =
4028 RISCV::getVectorLowDemandedScalarBits(Opcode: MCOpcode, Log2SEW);
4029 return NumDemandedBits && Bits >= *NumDemandedBits;
4030}
4031
4032// Return true if all users of this SDNode* only consume the lower \p Bits.
4033// This can be used to form W instructions for add/sub/mul/shl even when the
4034// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
4035// SimplifyDemandedBits has made it so some users see a sext_inreg and some
4036// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
4037// the add/sub/mul/shl to become non-W instructions. By checking the users we
4038// may be able to use a W instruction and CSE with the other instruction if
4039// this has happened. We could try to detect that the CSE opportunity exists
4040// before doing this, but that would be more complicated.
4041bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
4042 const unsigned Depth) const {
4043 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
4044 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
4045 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
4046 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
4047 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
4048 isa<ConstantSDNode>(Node) || Depth != 0) &&
4049 "Unexpected opcode");
4050
4051 if (Depth >= SelectionDAG::MaxRecursionDepth)
4052 return false;
4053
4054 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
4055 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
4056 if (Depth == 0 && !Node->getValueType(ResNo: 0).isScalarInteger())
4057 return false;
4058
4059 for (SDUse &Use : Node->uses()) {
4060 SDNode *User = Use.getUser();
4061 // Users of this node should have already been instruction selected
4062 if (!User->isMachineOpcode())
4063 return false;
4064
4065 // TODO: Add more opcodes?
4066 switch (User->getMachineOpcode()) {
4067 default:
4068 if (vectorPseudoHasAllNBitUsers(User, UserOpNo: Use.getOperandNo(), Bits, TII))
4069 break;
4070 return false;
4071 case RISCV::ADDW:
4072 case RISCV::ADDIW:
4073 case RISCV::SUBW:
4074 case RISCV::MULW:
4075 case RISCV::SLLW:
4076 case RISCV::SLLIW:
4077 case RISCV::SRAW:
4078 case RISCV::SRAIW:
4079 case RISCV::SRLW:
4080 case RISCV::SRLIW:
4081 case RISCV::DIVW:
4082 case RISCV::DIVUW:
4083 case RISCV::REMW:
4084 case RISCV::REMUW:
4085 case RISCV::ROLW:
4086 case RISCV::RORW:
4087 case RISCV::RORIW:
4088 case RISCV::CLSW:
4089 case RISCV::CLZW:
4090 case RISCV::CTZW:
4091 case RISCV::CPOPW:
4092 case RISCV::SLLI_UW:
4093 case RISCV::ABSW:
4094 case RISCV::FMV_W_X:
4095 case RISCV::FCVT_H_W:
4096 case RISCV::FCVT_H_W_INX:
4097 case RISCV::FCVT_H_WU:
4098 case RISCV::FCVT_H_WU_INX:
4099 case RISCV::FCVT_S_W:
4100 case RISCV::FCVT_S_W_INX:
4101 case RISCV::FCVT_S_WU:
4102 case RISCV::FCVT_S_WU_INX:
4103 case RISCV::FCVT_D_W:
4104 case RISCV::FCVT_D_W_INX:
4105 case RISCV::FCVT_D_WU:
4106 case RISCV::FCVT_D_WU_INX:
4107 case RISCV::TH_REVW:
4108 case RISCV::TH_SRRIW:
4109 if (Bits >= 32)
4110 break;
4111 return false;
4112 case RISCV::SLL:
4113 case RISCV::SRA:
4114 case RISCV::SRL:
4115 case RISCV::ROL:
4116 case RISCV::ROR:
4117 case RISCV::BSET:
4118 case RISCV::BCLR:
4119 case RISCV::BINV:
4120 // Shift amount operands only use log2(Xlen) bits.
4121 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Value: Subtarget->getXLen()))
4122 break;
4123 return false;
4124 case RISCV::SLLI:
4125 // SLLI only uses the lower (XLen - ShAmt) bits.
4126 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(Num: 1))
4127 break;
4128 return false;
4129 case RISCV::ANDI:
4130 if (Bits >= (unsigned)llvm::bit_width(Value: User->getConstantOperandVal(Num: 1)))
4131 break;
4132 goto RecCheck;
4133 case RISCV::ORI: {
4134 uint64_t Imm = cast<ConstantSDNode>(Val: User->getOperand(Num: 1))->getSExtValue();
4135 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(Value: ~Imm))
4136 break;
4137 [[fallthrough]];
4138 }
4139 case RISCV::AND:
4140 case RISCV::OR:
4141 case RISCV::XOR:
4142 case RISCV::XORI:
4143 case RISCV::ANDN:
4144 case RISCV::ORN:
4145 case RISCV::XNOR:
4146 case RISCV::SH1ADD:
4147 case RISCV::SH2ADD:
4148 case RISCV::SH3ADD:
4149 RecCheck:
4150 if (hasAllNBitUsers(Node: User, Bits, Depth: Depth + 1))
4151 break;
4152 return false;
4153 case RISCV::SRLI: {
4154 unsigned ShAmt = User->getConstantOperandVal(Num: 1);
4155 // If we are shifting right by less than Bits, and users don't demand any
4156 // bits that were shifted into [Bits-1:0], then we can consider this as an
4157 // N-Bit user.
4158 if (Bits > ShAmt && hasAllNBitUsers(Node: User, Bits: Bits - ShAmt, Depth: Depth + 1))
4159 break;
4160 return false;
4161 }
4162 case RISCV::SEXT_B:
4163 case RISCV::PACKH:
4164 if (Bits >= 8)
4165 break;
4166 return false;
4167 case RISCV::SEXT_H:
4168 case RISCV::FMV_H_X:
4169 case RISCV::ZEXT_H_RV32:
4170 case RISCV::ZEXT_H_RV64:
4171 case RISCV::PACKW:
4172 if (Bits >= 16)
4173 break;
4174 return false;
4175 case RISCV::PACK:
4176 if (Bits >= (Subtarget->getXLen() / 2))
4177 break;
4178 return false;
4179 case RISCV::ADD_UW:
4180 case RISCV::SH1ADD_UW:
4181 case RISCV::SH2ADD_UW:
4182 case RISCV::SH3ADD_UW:
4183 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
4184 // 32 bits.
4185 if (Use.getOperandNo() == 0 && Bits >= 32)
4186 break;
4187 return false;
4188 case RISCV::SB:
4189 if (Use.getOperandNo() == 0 && Bits >= 8)
4190 break;
4191 return false;
4192 case RISCV::SH:
4193 if (Use.getOperandNo() == 0 && Bits >= 16)
4194 break;
4195 return false;
4196 case RISCV::SW:
4197 if (Use.getOperandNo() == 0 && Bits >= 32)
4198 break;
4199 return false;
4200 case RISCV::TH_EXT:
4201 case RISCV::TH_EXTU: {
4202 unsigned Msb = User->getConstantOperandVal(Num: 1);
4203 unsigned Lsb = User->getConstantOperandVal(Num: 2);
4204 // Behavior of Msb < Lsb is not well documented.
4205 if (Msb >= Lsb && Bits > Msb)
4206 break;
4207 return false;
4208 }
4209 }
4210 }
4211
4212 return true;
4213}
4214
4215// Select a constant that can be represented as (sign_extend(imm5) << imm2).
4216bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,
4217 SDValue &Shl2) {
4218 auto *C = dyn_cast<ConstantSDNode>(Val&: N);
4219 if (!C)
4220 return false;
4221
4222 int64_t Offset = C->getSExtValue();
4223 for (unsigned Shift = 0; Shift < 4; Shift++) {
4224 if (isInt<5>(x: Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
4225 EVT VT = N->getValueType(ResNo: 0);
4226 Simm5 = CurDAG->getSignedTargetConstant(Val: Offset >> Shift, DL: SDLoc(N), VT);
4227 Shl2 = CurDAG->getTargetConstant(Val: Shift, DL: SDLoc(N), VT);
4228 return true;
4229 }
4230 }
4231
4232 return false;
4233}
4234
4235// Select VL as a 5 bit immediate or a value that will become a register. This
4236// allows us to choose between VSETIVLI or VSETVLI later.
4237bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
4238 auto *C = dyn_cast<ConstantSDNode>(Val&: N);
4239 if (C && isUInt<5>(x: C->getZExtValue())) {
4240 VL = CurDAG->getTargetConstant(Val: C->getZExtValue(), DL: SDLoc(N),
4241 VT: N->getValueType(ResNo: 0));
4242 } else if (C && C->isAllOnes()) {
4243 // Treat all ones as VLMax.
4244 VL = CurDAG->getSignedTargetConstant(Val: RISCV::VLMaxSentinel, DL: SDLoc(N),
4245 VT: N->getValueType(ResNo: 0));
4246 } else if (isa<RegisterSDNode>(Val: N) &&
4247 cast<RegisterSDNode>(Val&: N)->getReg() == RISCV::X0) {
4248 // All our VL operands use an operand that allows GPRNoX0 or an immediate
4249 // as the register class. Convert X0 to a special immediate to pass the
4250 // MachineVerifier. This is recognized specially by the vsetvli insertion
4251 // pass.
4252 VL = CurDAG->getSignedTargetConstant(Val: RISCV::VLMaxSentinel, DL: SDLoc(N),
4253 VT: N->getValueType(ResNo: 0));
4254 } else {
4255 VL = N;
4256 }
4257
4258 return true;
4259}
4260
4261static SDValue findVSplat(SDValue N) {
4262 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
4263 if (!N.getOperand(i: 0).isUndef())
4264 return SDValue();
4265 N = N.getOperand(i: 1);
4266 }
4267 SDValue Splat = N;
4268 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
4269 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
4270 !Splat.getOperand(i: 0).isUndef())
4271 return SDValue();
4272 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
4273 return Splat;
4274}
4275
4276bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
4277 SDValue Splat = findVSplat(N);
4278 if (!Splat)
4279 return false;
4280
4281 SplatVal = Splat.getOperand(i: 1);
4282 return true;
4283}
4284
4285static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,
4286 SelectionDAG &DAG,
4287 const RISCVSubtarget &Subtarget,
4288 std::function<bool(int64_t)> ValidateImm,
4289 bool Decrement = false) {
4290 SDValue Splat = findVSplat(N);
4291 if (!Splat || !isa<ConstantSDNode>(Val: Splat.getOperand(i: 1)))
4292 return false;
4293
4294 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
4295 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
4296 "Unexpected splat operand type");
4297
4298 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
4299 // type is wider than the resulting vector element type: an implicit
4300 // truncation first takes place. Therefore, perform a manual
4301 // truncation/sign-extension in order to ignore any truncated bits and catch
4302 // any zero-extended immediate.
4303 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
4304 // sign-extending to (XLenVT -1).
4305 APInt SplatConst = Splat.getConstantOperandAPInt(i: 1).sextOrTrunc(width: SplatEltSize);
4306
4307 int64_t SplatImm = SplatConst.getSExtValue();
4308
4309 if (!ValidateImm(SplatImm))
4310 return false;
4311
4312 if (Decrement)
4313 SplatImm -= 1;
4314
4315 SplatVal =
4316 DAG.getSignedTargetConstant(Val: SplatImm, DL: SDLoc(N), VT: Subtarget.getXLenVT());
4317 return true;
4318}
4319
4320bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
4321 return selectVSplatImmHelper(N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4322 ValidateImm: [](int64_t Imm) { return isInt<5>(x: Imm); });
4323}
4324
4325bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
4326 return selectVSplatImmHelper(
4327 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4328 ValidateImm: [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4329 /*Decrement=*/true);
4330}
4331
4332bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal) {
4333 return selectVSplatImmHelper(
4334 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4335 ValidateImm: [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4336 /*Decrement=*/false);
4337}
4338
4339bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
4340 SDValue &SplatVal) {
4341 return selectVSplatImmHelper(
4342 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4343 ValidateImm: [](int64_t Imm) { return Imm != 0 && Imm >= -15 && Imm <= 16; },
4344 /*Decrement=*/true);
4345}
4346
4347bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
4348 SDValue &SplatVal) {
4349 return selectVSplatImmHelper(
4350 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
4351 ValidateImm: [Bits](int64_t Imm) { return isUIntN(N: Bits, x: Imm); });
4352}
4353
4354bool RISCVDAGToDAGISel::selectVSplatImm64Neg(SDValue N, SDValue &SplatVal) {
4355 SDValue Splat = findVSplat(N);
4356 return Splat && selectNegImm(N: Splat.getOperand(i: 1), Val&: SplatVal);
4357}
4358
4359bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
4360 auto IsExtOrTrunc = [](SDValue N) {
4361 switch (N->getOpcode()) {
4362 case ISD::SIGN_EXTEND:
4363 case ISD::ZERO_EXTEND:
4364 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
4365 // inactive elements will be undef.
4366 case RISCVISD::TRUNCATE_VECTOR_VL:
4367 case RISCVISD::VSEXT_VL:
4368 case RISCVISD::VZEXT_VL:
4369 return true;
4370 default:
4371 return false;
4372 }
4373 };
4374
4375 // We can have multiple nested nodes, so unravel them all if needed.
4376 while (IsExtOrTrunc(N)) {
4377 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
4378 return false;
4379 N = N->getOperand(Num: 0);
4380 }
4381
4382 return selectVSplat(N, SplatVal);
4383}
4384
4385bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N, SDValue &Imm) {
4386 // Allow bitcasts from XLenVT -> FP.
4387 if (N.getOpcode() == ISD::BITCAST &&
4388 N.getOperand(i: 0).getValueType() == Subtarget->getXLenVT()) {
4389 Imm = N.getOperand(i: 0);
4390 return true;
4391 }
4392 // Allow moves from XLenVT to FP.
4393 if (N.getOpcode() == RISCVISD::FMV_H_X ||
4394 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
4395 Imm = N.getOperand(i: 0);
4396 return true;
4397 }
4398
4399 // Otherwise, look for FP constants that can materialized with scalar int.
4400 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Val: N.getNode());
4401 if (!CFP)
4402 return false;
4403 const APFloat &APF = CFP->getValueAPF();
4404 // td can handle +0.0 already.
4405 if (APF.isPosZero())
4406 return false;
4407
4408 MVT VT = CFP->getSimpleValueType(ResNo: 0);
4409
4410 MVT XLenVT = Subtarget->getXLenVT();
4411 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
4412 assert(APF.isNegZero() && "Unexpected constant.");
4413 return false;
4414 }
4415 SDLoc DL(N);
4416 Imm = selectImm(CurDAG, DL, VT: XLenVT, Imm: APF.bitcastToAPInt().getSExtValue(),
4417 Subtarget: *Subtarget);
4418 return true;
4419}
4420
4421bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
4422 SDValue &Imm) {
4423 if (auto *C = dyn_cast<ConstantSDNode>(Val&: N)) {
4424 int64_t ImmVal = SignExtend64(X: C->getSExtValue(), B: Width);
4425
4426 if (!isInt<5>(x: ImmVal))
4427 return false;
4428
4429 Imm = CurDAG->getSignedTargetConstant(Val: ImmVal, DL: SDLoc(N),
4430 VT: Subtarget->getXLenVT());
4431 return true;
4432 }
4433
4434 return false;
4435}
4436
4437// Try to remove sext.w if the input is a W instruction or can be made into
4438// a W instruction cheaply.
4439bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4440 // Look for the sext.w pattern, addiw rd, rs1, 0.
4441 if (N->getMachineOpcode() != RISCV::ADDIW ||
4442 !isNullConstant(V: N->getOperand(Num: 1)))
4443 return false;
4444
4445 SDValue N0 = N->getOperand(Num: 0);
4446 if (!N0.isMachineOpcode())
4447 return false;
4448
4449 switch (N0.getMachineOpcode()) {
4450 default:
4451 break;
4452 case RISCV::ADD:
4453 case RISCV::ADDI:
4454 case RISCV::SUB:
4455 case RISCV::MUL:
4456 case RISCV::SLLI: {
4457 // Convert sext.w+add/sub/mul to their W instructions. This will create
4458 // a new independent instruction. This improves latency.
4459 unsigned Opc;
4460 switch (N0.getMachineOpcode()) {
4461 default:
4462 llvm_unreachable("Unexpected opcode!");
4463 case RISCV::ADD: Opc = RISCV::ADDW; break;
4464 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4465 case RISCV::SUB: Opc = RISCV::SUBW; break;
4466 case RISCV::MUL: Opc = RISCV::MULW; break;
4467 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4468 }
4469
4470 SDValue N00 = N0.getOperand(i: 0);
4471 SDValue N01 = N0.getOperand(i: 1);
4472
4473 // Shift amount needs to be uimm5.
4474 if (N0.getMachineOpcode() == RISCV::SLLI &&
4475 !isUInt<5>(x: cast<ConstantSDNode>(Val&: N01)->getSExtValue()))
4476 break;
4477
4478 SDNode *Result =
4479 CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VT: N->getValueType(ResNo: 0),
4480 Op1: N00, Op2: N01);
4481 ReplaceUses(F: N, T: Result);
4482 return true;
4483 }
4484 case RISCV::ADDW:
4485 case RISCV::ADDIW:
4486 case RISCV::SUBW:
4487 case RISCV::MULW:
4488 case RISCV::SLLIW:
4489 case RISCV::PACKW:
4490 case RISCV::TH_MULAW:
4491 case RISCV::TH_MULAH:
4492 case RISCV::TH_MULSW:
4493 case RISCV::TH_MULSH:
4494 if (N0.getValueType() == MVT::i32)
4495 break;
4496
4497 // Result is already sign extended just remove the sext.w.
4498 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4499 ReplaceUses(F: N, T: N0.getNode());
4500 return true;
4501 }
4502
4503 return false;
4504}
4505
4506static bool usesAllOnesMask(SDValue MaskOp) {
4507 const auto IsVMSet = [](unsigned Opc) {
4508 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4509 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4510 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4511 Opc == RISCV::PseudoVMSET_M_B8;
4512 };
4513
4514 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4515 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4516 // assume that it's all-ones? Same applies to its VL.
4517 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4518}
4519
4520static bool isImplicitDef(SDValue V) {
4521 if (!V.isMachineOpcode())
4522 return false;
4523 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4524 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4525 if (!isImplicitDef(V: V.getOperand(i: I)))
4526 return false;
4527 return true;
4528 }
4529 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4530}
4531
4532// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4533// corresponding "unmasked" pseudo versions.
4534bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4535 const RISCV::RISCVMaskedPseudoInfo *I =
4536 RISCV::getMaskedPseudoInfo(MaskedPseudo: N->getMachineOpcode());
4537 if (!I)
4538 return false;
4539
4540 unsigned MaskOpIdx = I->MaskOpIdx;
4541 if (!usesAllOnesMask(MaskOp: N->getOperand(Num: MaskOpIdx)))
4542 return false;
4543
4544 // There are two classes of pseudos in the table - compares and
4545 // everything else. See the comment on RISCVMaskedPseudo for details.
4546 const unsigned Opc = I->UnmaskedPseudo;
4547 const MCInstrDesc &MCID = TII->get(Opcode: Opc);
4548 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc: MCID);
4549
4550 const MCInstrDesc &MaskedMCID = TII->get(Opcode: N->getMachineOpcode());
4551 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc: MaskedMCID);
4552
4553 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4554 !RISCVII::hasVecPolicyOp(MCID.TSFlags)) &&
4555 "Unmasked pseudo has policy but masked pseudo doesn't?");
4556 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4557 "Unexpected pseudo structure");
4558 assert(!(HasPassthru && !MaskedHasPassthru) &&
4559 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4560
4561 SmallVector<SDValue, 8> Ops;
4562 // Skip the passthru operand at index 0 if the unmasked don't have one.
4563 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4564 bool DropPolicy = !RISCVII::hasVecPolicyOp(TSFlags: MCID.TSFlags) &&
4565 RISCVII::hasVecPolicyOp(TSFlags: MaskedMCID.TSFlags);
4566 bool HasChainOp =
4567 N->getOperand(Num: N->getNumOperands() - 1).getValueType() == MVT::Other;
4568 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4569 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4570 // Skip the mask
4571 SDValue Op = N->getOperand(Num: I);
4572 if (I == MaskOpIdx)
4573 continue;
4574 if (DropPolicy && I == LastOpNum)
4575 continue;
4576 Ops.push_back(Elt: Op);
4577 }
4578
4579 MachineSDNode *Result =
4580 CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VTs: N->getVTList(), Ops);
4581
4582 if (!N->memoperands_empty())
4583 CurDAG->setNodeMemRefs(N: Result, NewMemRefs: N->memoperands());
4584
4585 Result->setFlags(N->getFlags());
4586 ReplaceUses(F: N, T: Result);
4587
4588 return true;
4589}
4590
4591/// If our passthru is an implicit_def, use noreg instead. This side
4592/// steps issues with MachineCSE not being able to CSE expressions with
4593/// IMPLICIT_DEF operands while preserving the semantic intent. See
4594/// pr64282 for context. Note that this transform is the last one
4595/// performed at ISEL DAG to DAG.
4596bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4597 bool MadeChange = false;
4598 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4599
4600 while (Position != CurDAG->allnodes_begin()) {
4601 SDNode *N = &*--Position;
4602 if (N->use_empty() || !N->isMachineOpcode())
4603 continue;
4604
4605 const unsigned Opc = N->getMachineOpcode();
4606 if (!RISCVVPseudosTable::getPseudoInfo(Pseudo: Opc) ||
4607 !RISCVII::isFirstDefTiedToFirstUse(Desc: TII->get(Opcode: Opc)) ||
4608 !isImplicitDef(V: N->getOperand(Num: 0)))
4609 continue;
4610
4611 SmallVector<SDValue> Ops;
4612 Ops.push_back(Elt: CurDAG->getRegister(Reg: RISCV::NoRegister, VT: N->getValueType(ResNo: 0)));
4613 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4614 SDValue Op = N->getOperand(Num: I);
4615 Ops.push_back(Elt: Op);
4616 }
4617
4618 MachineSDNode *Result =
4619 CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VTs: N->getVTList(), Ops);
4620 Result->setFlags(N->getFlags());
4621 CurDAG->setNodeMemRefs(N: Result, NewMemRefs: cast<MachineSDNode>(Val: N)->memoperands());
4622 ReplaceUses(F: N, T: Result);
4623 MadeChange = true;
4624 }
4625 return MadeChange;
4626}
4627
4628
4629// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4630// for instruction scheduling.
4631FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
4632 CodeGenOptLevel OptLevel) {
4633 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4634}
4635
4636char RISCVDAGToDAGISelLegacy::ID = 0;
4637
4638RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TM,
4639 CodeGenOptLevel OptLevel)
4640 : SelectionDAGISelLegacy(
4641 ID, std::make_unique<RISCVDAGToDAGISel>(args&: TM, args&: OptLevel)) {}
4642
4643INITIALIZE_PASS(RISCVDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
4644