1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
14#include "MCTargetDesc/RISCVBaseInfo.h"
15#include "MCTargetDesc/RISCVMCTargetDesc.h"
16#include "MCTargetDesc/RISCVMatInt.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
19#include "RISCVSelectionDAGInfo.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/IR/IntrinsicsRISCV.h"
22#include "llvm/Support/Alignment.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/MathExtras.h"
25#include "llvm/Support/raw_ostream.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "riscv-isel"
30#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
31
32static cl::opt<bool> UsePseudoMovImm(
33 "riscv-use-rematerializable-movimm", cl::Hidden,
34 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
35 "constant materialization"),
36 cl::init(Val: false));
37
38#define GET_DAGISEL_BODY RISCVDAGToDAGISel
39#include "RISCVGenDAGISel.inc"
40
41void RISCVDAGToDAGISel::PreprocessISelDAG() {
42 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
43
44 bool MadeChange = false;
45 while (Position != CurDAG->allnodes_begin()) {
46 SDNode *N = &*--Position;
47 if (N->use_empty())
48 continue;
49
50 SDValue Result;
51 switch (N->getOpcode()) {
52 case ISD::SPLAT_VECTOR: {
53 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
54 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
55 MVT VT = N->getSimpleValueType(ResNo: 0);
56 unsigned Opc =
57 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
58 SDLoc DL(N);
59 SDValue VL = CurDAG->getRegister(Reg: RISCV::X0, VT: Subtarget->getXLenVT());
60 SDValue Src = N->getOperand(Num: 0);
61 if (VT.isInteger())
62 Src = CurDAG->getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget->getXLenVT(),
63 Operand: N->getOperand(Num: 0));
64 Result = CurDAG->getNode(Opcode: Opc, DL, VT, N1: CurDAG->getUNDEF(VT), N2: Src, N3: VL);
65 break;
66 }
67 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
68 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
69 // load. Done after lowering and combining so that we have a chance to
70 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
71 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
72 MVT VT = N->getSimpleValueType(ResNo: 0);
73 SDValue Passthru = N->getOperand(Num: 0);
74 SDValue Lo = N->getOperand(Num: 1);
75 SDValue Hi = N->getOperand(Num: 2);
76 SDValue VL = N->getOperand(Num: 3);
77 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
78 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
79 "Unexpected VTs!");
80 MachineFunction &MF = CurDAG->getMachineFunction();
81 SDLoc DL(N);
82
83 // Create temporary stack for each expanding node.
84 SDValue StackSlot =
85 CurDAG->CreateStackTemporary(Bytes: TypeSize::getFixed(ExactSize: 8), Alignment: Align(8));
86 int FI = cast<FrameIndexSDNode>(Val: StackSlot.getNode())->getIndex();
87 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
88
89 SDValue Chain = CurDAG->getEntryNode();
90 Lo = CurDAG->getStore(Chain, dl: DL, Val: Lo, Ptr: StackSlot, PtrInfo: MPI, Alignment: Align(8));
91
92 SDValue OffsetSlot =
93 CurDAG->getMemBasePlusOffset(Base: StackSlot, Offset: TypeSize::getFixed(ExactSize: 4), DL);
94 Hi = CurDAG->getStore(Chain, dl: DL, Val: Hi, Ptr: OffsetSlot, PtrInfo: MPI.getWithOffset(O: 4),
95 Alignment: Align(8));
96
97 Chain = CurDAG->getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, N1: Lo, N2: Hi);
98
99 SDVTList VTs = CurDAG->getVTList(VTs: {VT, MVT::Other});
100 SDValue IntID =
101 CurDAG->getTargetConstant(Val: Intrinsic::riscv_vlse, DL, VT: MVT::i64);
102 SDValue Ops[] = {Chain,
103 IntID,
104 Passthru,
105 StackSlot,
106 CurDAG->getRegister(Reg: RISCV::X0, VT: MVT::i64),
107 VL};
108
109 Result = CurDAG->getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops,
110 MemVT: MVT::i64, PtrInfo: MPI, Alignment: Align(8),
111 Flags: MachineMemOperand::MOLoad);
112 break;
113 }
114 case ISD::FP_EXTEND: {
115 // We only have vector patterns for riscv_fpextend_vl in isel.
116 SDLoc DL(N);
117 MVT VT = N->getSimpleValueType(ResNo: 0);
118 if (!VT.isVector())
119 break;
120 SDValue VLMAX = CurDAG->getRegister(Reg: RISCV::X0, VT: Subtarget->getXLenVT());
121 SDValue TrueMask = CurDAG->getNode(
122 Opcode: RISCVISD::VMSET_VL, DL, VT: VT.changeVectorElementType(EltVT: MVT::i1), Operand: VLMAX);
123 Result = CurDAG->getNode(Opcode: RISCVISD::FP_EXTEND_VL, DL, VT, N1: N->getOperand(Num: 0),
124 N2: TrueMask, N3: VLMAX);
125 break;
126 }
127 }
128
129 if (Result) {
130 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
131 LLVM_DEBUG(N->dump(CurDAG));
132 LLVM_DEBUG(dbgs() << "\nNew: ");
133 LLVM_DEBUG(Result->dump(CurDAG));
134 LLVM_DEBUG(dbgs() << "\n");
135
136 CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 0), To: Result);
137 MadeChange = true;
138 }
139 }
140
141 if (MadeChange)
142 CurDAG->RemoveDeadNodes();
143}
144
145void RISCVDAGToDAGISel::PostprocessISelDAG() {
146 HandleSDNode Dummy(CurDAG->getRoot());
147 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
148
149 bool MadeChange = false;
150 while (Position != CurDAG->allnodes_begin()) {
151 SDNode *N = &*--Position;
152 // Skip dead nodes and any non-machine opcodes.
153 if (N->use_empty() || !N->isMachineOpcode())
154 continue;
155
156 MadeChange |= doPeepholeSExtW(Node: N);
157
158 // FIXME: This is here only because the VMerge transform doesn't
159 // know how to handle masked true inputs. Once that has been moved
160 // to post-ISEL, this can be deleted as well.
161 MadeChange |= doPeepholeMaskedRVV(Node: cast<MachineSDNode>(Val: N));
162 }
163
164 CurDAG->setRoot(Dummy.getValue());
165
166 MadeChange |= doPeepholeMergeVVMFold();
167
168 // After we're done with everything else, convert IMPLICIT_DEF
169 // passthru operands to NoRegister. This is required to workaround
170 // an optimization deficiency in MachineCSE. This really should
171 // be merged back into each of the patterns (i.e. there's no good
172 // reason not to go directly to NoReg), but is being done this way
173 // to allow easy backporting.
174 MadeChange |= doPeepholeNoRegPassThru();
175
176 if (MadeChange)
177 CurDAG->RemoveDeadNodes();
178}
179
180static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
181 RISCVMatInt::InstSeq &Seq) {
182 SDValue SrcReg = CurDAG->getRegister(Reg: RISCV::X0, VT);
183 for (const RISCVMatInt::Inst &Inst : Seq) {
184 SDValue SDImm = CurDAG->getSignedTargetConstant(Val: Inst.getImm(), DL, VT);
185 SDNode *Result = nullptr;
186 switch (Inst.getOpndKind()) {
187 case RISCVMatInt::Imm:
188 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SDImm);
189 break;
190 case RISCVMatInt::RegX0:
191 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg,
192 Op2: CurDAG->getRegister(Reg: RISCV::X0, VT));
193 break;
194 case RISCVMatInt::RegReg:
195 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg, Op2: SrcReg);
196 break;
197 case RISCVMatInt::RegImm:
198 Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg, Op2: SDImm);
199 break;
200 }
201
202 // Only the first instruction has X0 as its source.
203 SrcReg = SDValue(Result, 0);
204 }
205
206 return SrcReg;
207}
208
209static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
210 int64_t Imm, const RISCVSubtarget &Subtarget) {
211 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: Imm, STI: Subtarget);
212
213 // Use a rematerializable pseudo instruction for short sequences if enabled.
214 if (Seq.size() == 2 && UsePseudoMovImm)
215 return SDValue(
216 CurDAG->getMachineNode(Opcode: RISCV::PseudoMovImm, dl: DL, VT,
217 Op1: CurDAG->getSignedTargetConstant(Val: Imm, DL, VT)),
218 0);
219
220 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
221 // worst an LUI+ADDIW. This will require an extra register, but avoids a
222 // constant pool.
223 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
224 // low and high 32 bits are the same and bit 31 and 63 are set.
225 if (Seq.size() > 3) {
226 unsigned ShiftAmt, AddOpc;
227 RISCVMatInt::InstSeq SeqLo =
228 RISCVMatInt::generateTwoRegInstSeq(Val: Imm, STI: Subtarget, ShiftAmt, AddOpc);
229 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
230 SDValue Lo = selectImmSeq(CurDAG, DL, VT, Seq&: SeqLo);
231
232 SDValue SLLI = SDValue(
233 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: Lo,
234 Op2: CurDAG->getTargetConstant(Val: ShiftAmt, DL, VT)),
235 0);
236 return SDValue(CurDAG->getMachineNode(Opcode: AddOpc, dl: DL, VT, Op1: Lo, Op2: SLLI), 0);
237 }
238 }
239
240 // Otherwise, use the original sequence.
241 return selectImmSeq(CurDAG, DL, VT, Seq);
242}
243
244void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
245 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
246 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
247 bool IsLoad, MVT *IndexVT) {
248 SDValue Chain = Node->getOperand(Num: 0);
249
250 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Base pointer.
251
252 if (IsStridedOrIndexed) {
253 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Index.
254 if (IndexVT)
255 *IndexVT = Operands.back()->getSimpleValueType(ResNo: 0);
256 }
257
258 if (IsMasked) {
259 SDValue Mask = Node->getOperand(Num: CurOp++);
260 Operands.push_back(Elt: Mask);
261 }
262 SDValue VL;
263 selectVLOp(N: Node->getOperand(Num: CurOp++), VL);
264 Operands.push_back(Elt: VL);
265
266 MVT XLenVT = Subtarget->getXLenVT();
267 SDValue SEWOp = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT);
268 Operands.push_back(Elt: SEWOp);
269
270 // At the IR layer, all the masked load intrinsics have policy operands,
271 // none of the others do. All have passthru operands. For our pseudos,
272 // all loads have policy operands.
273 if (IsLoad) {
274 uint64_t Policy = RISCVVType::MASK_AGNOSTIC;
275 if (IsMasked)
276 Policy = Node->getConstantOperandVal(Num: CurOp++);
277 SDValue PolicyOp = CurDAG->getTargetConstant(Val: Policy, DL, VT: XLenVT);
278 Operands.push_back(Elt: PolicyOp);
279 }
280
281 Operands.push_back(Elt: Chain); // Chain.
282}
283
284void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
285 bool IsStrided) {
286 SDLoc DL(Node);
287 MVT VT = Node->getSimpleValueType(ResNo: 0);
288 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
289 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
290
291 unsigned CurOp = 2;
292 SmallVector<SDValue, 8> Operands;
293
294 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
295
296 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
297 Operands, /*IsLoad=*/true);
298
299 const RISCV::VLSEGPseudo *P =
300 RISCV::getVLSEGPseudo(NF, Masked: IsMasked, Strided: IsStrided, /*FF*/ false, Log2SEW,
301 LMUL: static_cast<unsigned>(LMUL));
302 MachineSDNode *Load =
303 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped, VT2: MVT::Other, Ops: Operands);
304
305 if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node))
306 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()});
307
308 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Load, 0));
309 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Load, 1));
310 CurDAG->RemoveDeadNode(N: Node);
311}
312
313void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, unsigned NF,
314 bool IsMasked) {
315 SDLoc DL(Node);
316 MVT VT = Node->getSimpleValueType(ResNo: 0);
317 MVT XLenVT = Subtarget->getXLenVT();
318 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
319 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
320
321 unsigned CurOp = 2;
322 SmallVector<SDValue, 7> Operands;
323
324 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
325
326 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
327 /*IsStridedOrIndexed*/ false, Operands,
328 /*IsLoad=*/true);
329
330 const RISCV::VLSEGPseudo *P =
331 RISCV::getVLSEGPseudo(NF, Masked: IsMasked, /*Strided*/ false, /*FF*/ true,
332 Log2SEW, LMUL: static_cast<unsigned>(LMUL));
333 MachineSDNode *Load = CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped,
334 VT2: XLenVT, VT3: MVT::Other, Ops: Operands);
335
336 if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node))
337 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()});
338
339 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Load, 0)); // Result
340 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Load, 1)); // VL
341 ReplaceUses(F: SDValue(Node, 2), T: SDValue(Load, 2)); // Chain
342 CurDAG->RemoveDeadNode(N: Node);
343}
344
345void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
346 bool IsOrdered) {
347 SDLoc DL(Node);
348 MVT VT = Node->getSimpleValueType(ResNo: 0);
349 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
350 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
351
352 unsigned CurOp = 2;
353 SmallVector<SDValue, 8> Operands;
354
355 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
356
357 MVT IndexVT;
358 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
359 /*IsStridedOrIndexed*/ true, Operands,
360 /*IsLoad=*/true, IndexVT: &IndexVT);
361
362#ifndef NDEBUG
363 // Number of element = RVVBitsPerBlock * LMUL / SEW
364 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
365 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
366 if (DecodedLMUL.second)
367 ContainedTyNumElts /= DecodedLMUL.first;
368 else
369 ContainedTyNumElts *= DecodedLMUL.first;
370 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
371 "Element count mismatch");
372#endif
373
374 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
375 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
376 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
377 report_fatal_error(reason: "The V extension does not support EEW=64 for index "
378 "values when XLEN=32");
379 }
380 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
381 NF, Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL),
382 IndexLMUL: static_cast<unsigned>(IndexLMUL));
383 MachineSDNode *Load =
384 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped, VT2: MVT::Other, Ops: Operands);
385
386 if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node))
387 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()});
388
389 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Load, 0));
390 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Load, 1));
391 CurDAG->RemoveDeadNode(N: Node);
392}
393
394void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
395 bool IsStrided) {
396 SDLoc DL(Node);
397 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
398 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
399 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
400
401 unsigned CurOp = 2;
402 SmallVector<SDValue, 8> Operands;
403
404 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
405
406 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
407 Operands);
408
409 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
410 NF, Masked: IsMasked, Strided: IsStrided, Log2SEW, LMUL: static_cast<unsigned>(LMUL));
411 MachineSDNode *Store =
412 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT: Node->getValueType(ResNo: 0), Ops: Operands);
413
414 if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node))
415 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {MemOp->getMemOperand()});
416
417 ReplaceNode(F: Node, T: Store);
418}
419
420void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
421 bool IsOrdered) {
422 SDLoc DL(Node);
423 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
424 unsigned Log2SEW = Node->getConstantOperandVal(Num: Node->getNumOperands() - 1);
425 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
426
427 unsigned CurOp = 2;
428 SmallVector<SDValue, 8> Operands;
429
430 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
431
432 MVT IndexVT;
433 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
434 /*IsStridedOrIndexed*/ true, Operands,
435 /*IsLoad=*/false, IndexVT: &IndexVT);
436
437#ifndef NDEBUG
438 // Number of element = RVVBitsPerBlock * LMUL / SEW
439 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
440 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
441 if (DecodedLMUL.second)
442 ContainedTyNumElts /= DecodedLMUL.first;
443 else
444 ContainedTyNumElts *= DecodedLMUL.first;
445 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
446 "Element count mismatch");
447#endif
448
449 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
450 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
451 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
452 report_fatal_error(reason: "The V extension does not support EEW=64 for index "
453 "values when XLEN=32");
454 }
455 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
456 NF, Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL),
457 IndexLMUL: static_cast<unsigned>(IndexLMUL));
458 MachineSDNode *Store =
459 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT: Node->getValueType(ResNo: 0), Ops: Operands);
460
461 if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node))
462 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {MemOp->getMemOperand()});
463
464 ReplaceNode(F: Node, T: Store);
465}
466
467void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
468 if (!Subtarget->hasVInstructions())
469 return;
470
471 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
472
473 SDLoc DL(Node);
474 MVT XLenVT = Subtarget->getXLenVT();
475
476 unsigned IntNo = Node->getConstantOperandVal(Num: 0);
477
478 assert((IntNo == Intrinsic::riscv_vsetvli ||
479 IntNo == Intrinsic::riscv_vsetvlimax) &&
480 "Unexpected vsetvli intrinsic");
481
482 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
483 unsigned Offset = (VLMax ? 1 : 2);
484
485 assert(Node->getNumOperands() == Offset + 2 &&
486 "Unexpected number of operands");
487
488 unsigned SEW =
489 RISCVVType::decodeVSEW(VSEW: Node->getConstantOperandVal(Num: Offset) & 0x7);
490 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
491 Node->getConstantOperandVal(Num: Offset + 1) & 0x7);
492
493 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMUL: VLMul, SEW, /*TailAgnostic*/ true,
494 /*MaskAgnostic*/ true);
495 SDValue VTypeIOp = CurDAG->getTargetConstant(Val: VTypeI, DL, VT: XLenVT);
496
497 SDValue VLOperand;
498 unsigned Opcode = RISCV::PseudoVSETVLI;
499 if (auto *C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1))) {
500 if (auto VLEN = Subtarget->getRealVLen())
501 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
502 VLMax = true;
503 }
504 if (VLMax || isAllOnesConstant(V: Node->getOperand(Num: 1))) {
505 VLOperand = CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT);
506 Opcode = RISCV::PseudoVSETVLIX0;
507 } else {
508 VLOperand = Node->getOperand(Num: 1);
509
510 if (auto *C = dyn_cast<ConstantSDNode>(Val&: VLOperand)) {
511 uint64_t AVL = C->getZExtValue();
512 if (isUInt<5>(x: AVL)) {
513 SDValue VLImm = CurDAG->getTargetConstant(Val: AVL, DL, VT: XLenVT);
514 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: RISCV::PseudoVSETIVLI, dl: DL,
515 VT: XLenVT, Op1: VLImm, Op2: VTypeIOp));
516 return;
517 }
518 }
519 }
520
521 ReplaceNode(F: Node,
522 T: CurDAG->getMachineNode(Opcode, dl: DL, VT: XLenVT, Op1: VLOperand, Op2: VTypeIOp));
523}
524
525bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
526 MVT VT = Node->getSimpleValueType(ResNo: 0);
527 unsigned Opcode = Node->getOpcode();
528 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
529 "Unexpected opcode");
530 SDLoc DL(Node);
531
532 // For operations of the form (x << C1) op C2, check if we can use
533 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
534 SDValue N0 = Node->getOperand(Num: 0);
535 SDValue N1 = Node->getOperand(Num: 1);
536
537 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Val&: N1);
538 if (!Cst)
539 return false;
540
541 int64_t Val = Cst->getSExtValue();
542
543 // Check if immediate can already use ANDI/ORI/XORI.
544 if (isInt<12>(x: Val))
545 return false;
546
547 SDValue Shift = N0;
548
549 // If Val is simm32 and we have a sext_inreg from i32, then the binop
550 // produces at least 33 sign bits. We can peek through the sext_inreg and use
551 // a SLLIW at the end.
552 bool SignExt = false;
553 if (isInt<32>(x: Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
554 N0.hasOneUse() && cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT() == MVT::i32) {
555 SignExt = true;
556 Shift = N0.getOperand(i: 0);
557 }
558
559 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
560 return false;
561
562 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Val: Shift.getOperand(i: 1));
563 if (!ShlCst)
564 return false;
565
566 uint64_t ShAmt = ShlCst->getZExtValue();
567
568 // Make sure that we don't change the operation by removing bits.
569 // This only matters for OR and XOR, AND is unaffected.
570 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(N: ShAmt);
571 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
572 return false;
573
574 int64_t ShiftedVal = Val >> ShAmt;
575 if (!isInt<12>(x: ShiftedVal))
576 return false;
577
578 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
579 if (SignExt && ShAmt >= 32)
580 return false;
581
582 // Ok, we can reorder to get a smaller immediate.
583 unsigned BinOpc;
584 switch (Opcode) {
585 default: llvm_unreachable("Unexpected opcode");
586 case ISD::AND: BinOpc = RISCV::ANDI; break;
587 case ISD::OR: BinOpc = RISCV::ORI; break;
588 case ISD::XOR: BinOpc = RISCV::XORI; break;
589 }
590
591 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
592
593 SDNode *BinOp = CurDAG->getMachineNode(
594 Opcode: BinOpc, dl: DL, VT, Op1: Shift.getOperand(i: 0),
595 Op2: CurDAG->getSignedTargetConstant(Val: ShiftedVal, DL, VT));
596 SDNode *SLLI =
597 CurDAG->getMachineNode(Opcode: ShOpc, dl: DL, VT, Op1: SDValue(BinOp, 0),
598 Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT));
599 ReplaceNode(F: Node, T: SLLI);
600 return true;
601}
602
603bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
604 unsigned Opc;
605
606 if (Subtarget->hasVendorXTHeadBb())
607 Opc = RISCV::TH_EXT;
608 else if (Subtarget->hasVendorXAndesPerf())
609 Opc = RISCV::NDS_BFOS;
610 else if (Subtarget->hasVendorXqcibm())
611 Opc = RISCV::QC_EXT;
612 else
613 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
614 return false;
615
616 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
617 if (!N1C)
618 return false;
619
620 SDValue N0 = Node->getOperand(Num: 0);
621 if (!N0.hasOneUse())
622 return false;
623
624 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
625 const SDLoc &DL, MVT VT) {
626 if (Opc == RISCV::QC_EXT) {
627 // QC.EXT X, width, shamt
628 // shamt is the same as Lsb
629 // width is the number of bits to extract from the Lsb
630 Msb = Msb - Lsb + 1;
631 }
632 return CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: N0.getOperand(i: 0),
633 Op2: CurDAG->getTargetConstant(Val: Msb, DL, VT),
634 Op3: CurDAG->getTargetConstant(Val: Lsb, DL, VT));
635 };
636
637 SDLoc DL(Node);
638 MVT VT = Node->getSimpleValueType(ResNo: 0);
639 const unsigned RightShAmt = N1C->getZExtValue();
640
641 // Transform (sra (shl X, C1) C2) with C1 < C2
642 // -> (SignedBitfieldExtract X, msb, lsb)
643 if (N0.getOpcode() == ISD::SHL) {
644 auto *N01C = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1));
645 if (!N01C)
646 return false;
647
648 const unsigned LeftShAmt = N01C->getZExtValue();
649 // Make sure that this is a bitfield extraction (i.e., the shift-right
650 // amount can not be less than the left-shift).
651 if (LeftShAmt > RightShAmt)
652 return false;
653
654 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
655 const unsigned Msb = MsbPlusOne - 1;
656 const unsigned Lsb = RightShAmt - LeftShAmt;
657
658 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
659 ReplaceNode(F: Node, T: Sbe);
660 return true;
661 }
662
663 // Transform (sra (sext_inreg X, _), C) ->
664 // (SignedBitfieldExtract X, msb, lsb)
665 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
666 unsigned ExtSize =
667 cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT().getSizeInBits();
668
669 // ExtSize of 32 should use sraiw via tablegen pattern.
670 if (ExtSize == 32)
671 return false;
672
673 const unsigned Msb = ExtSize - 1;
674 // If the shift-right amount is greater than Msb, it means that extracts
675 // the X[Msb] bit and sign-extend it.
676 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
677
678 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
679 ReplaceNode(F: Node, T: Sbe);
680 return true;
681 }
682
683 return false;
684}
685
686bool RISCVDAGToDAGISel::trySignedBitfieldInsertInSign(SDNode *Node) {
687 // Only supported with XAndesPerf at the moment.
688 if (!Subtarget->hasVendorXAndesPerf())
689 return false;
690
691 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
692 if (!N1C)
693 return false;
694
695 SDValue N0 = Node->getOperand(Num: 0);
696 if (!N0.hasOneUse())
697 return false;
698
699 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
700 const SDLoc &DL, MVT VT) {
701 unsigned Opc = RISCV::NDS_BFOS;
702 // If the Lsb is equal to the Msb, then the Lsb should be 0.
703 if (Lsb == Msb)
704 Lsb = 0;
705 return CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: N0.getOperand(i: 0),
706 Op2: CurDAG->getTargetConstant(Val: Lsb, DL, VT),
707 Op3: CurDAG->getTargetConstant(Val: Msb, DL, VT));
708 };
709
710 SDLoc DL(Node);
711 MVT VT = Node->getSimpleValueType(ResNo: 0);
712 const unsigned RightShAmt = N1C->getZExtValue();
713
714 // Transform (sra (shl X, C1) C2) with C1 > C2
715 // -> (NDS.BFOS X, lsb, msb)
716 if (N0.getOpcode() == ISD::SHL) {
717 auto *N01C = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1));
718 if (!N01C)
719 return false;
720
721 const unsigned LeftShAmt = N01C->getZExtValue();
722 // Make sure that this is a bitfield insertion (i.e., the shift-right
723 // amount should be less than the left-shift).
724 if (LeftShAmt <= RightShAmt)
725 return false;
726
727 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
728 const unsigned Msb = MsbPlusOne - 1;
729 const unsigned Lsb = LeftShAmt - RightShAmt;
730
731 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
732 ReplaceNode(F: Node, T: Sbi);
733 return true;
734 }
735
736 return false;
737}
738
739bool RISCVDAGToDAGISel::tryUnsignedBitfieldExtract(SDNode *Node,
740 const SDLoc &DL, MVT VT,
741 SDValue X, unsigned Msb,
742 unsigned Lsb) {
743 unsigned Opc;
744
745 if (Subtarget->hasVendorXTHeadBb()) {
746 Opc = RISCV::TH_EXTU;
747 } else if (Subtarget->hasVendorXAndesPerf()) {
748 Opc = RISCV::NDS_BFOZ;
749 } else if (Subtarget->hasVendorXqcibm()) {
750 Opc = RISCV::QC_EXTU;
751 // QC.EXTU X, width, shamt
752 // shamt is the same as Lsb
753 // width is the number of bits to extract from the Lsb
754 Msb = Msb - Lsb + 1;
755 } else {
756 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
757 return false;
758 }
759
760 SDNode *Ube = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: X,
761 Op2: CurDAG->getTargetConstant(Val: Msb, DL, VT),
762 Op3: CurDAG->getTargetConstant(Val: Lsb, DL, VT));
763 ReplaceNode(F: Node, T: Ube);
764 return true;
765}
766
767bool RISCVDAGToDAGISel::tryUnsignedBitfieldInsertInZero(SDNode *Node,
768 const SDLoc &DL, MVT VT,
769 SDValue X, unsigned Msb,
770 unsigned Lsb) {
771 // Only supported with XAndesPerf at the moment.
772 if (!Subtarget->hasVendorXAndesPerf())
773 return false;
774
775 unsigned Opc = RISCV::NDS_BFOZ;
776
777 // If the Lsb is equal to the Msb, then the Lsb should be 0.
778 if (Lsb == Msb)
779 Lsb = 0;
780 SDNode *Ubi = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: X,
781 Op2: CurDAG->getTargetConstant(Val: Lsb, DL, VT),
782 Op3: CurDAG->getTargetConstant(Val: Msb, DL, VT));
783 ReplaceNode(F: Node, T: Ubi);
784 return true;
785}
786
787bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
788 // Target does not support indexed loads.
789 if (!Subtarget->hasVendorXTHeadMemIdx())
790 return false;
791
792 LoadSDNode *Ld = cast<LoadSDNode>(Val: Node);
793 ISD::MemIndexedMode AM = Ld->getAddressingMode();
794 if (AM == ISD::UNINDEXED)
795 return false;
796
797 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Ld->getOffset());
798 if (!C)
799 return false;
800
801 EVT LoadVT = Ld->getMemoryVT();
802 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
803 "Unexpected addressing mode");
804 bool IsPre = AM == ISD::PRE_INC;
805 bool IsPost = AM == ISD::POST_INC;
806 int64_t Offset = C->getSExtValue();
807
808 // The constants that can be encoded in the THeadMemIdx instructions
809 // are of the form (sign_extend(imm5) << imm2).
810 unsigned Shift;
811 for (Shift = 0; Shift < 4; Shift++)
812 if (isInt<5>(x: Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
813 break;
814
815 // Constant cannot be encoded.
816 if (Shift == 4)
817 return false;
818
819 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
820 unsigned Opcode;
821 if (LoadVT == MVT::i8 && IsPre)
822 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
823 else if (LoadVT == MVT::i8 && IsPost)
824 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
825 else if (LoadVT == MVT::i16 && IsPre)
826 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
827 else if (LoadVT == MVT::i16 && IsPost)
828 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
829 else if (LoadVT == MVT::i32 && IsPre)
830 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
831 else if (LoadVT == MVT::i32 && IsPost)
832 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
833 else if (LoadVT == MVT::i64 && IsPre)
834 Opcode = RISCV::TH_LDIB;
835 else if (LoadVT == MVT::i64 && IsPost)
836 Opcode = RISCV::TH_LDIA;
837 else
838 return false;
839
840 EVT Ty = Ld->getOffset().getValueType();
841 SDValue Ops[] = {
842 Ld->getBasePtr(),
843 CurDAG->getSignedTargetConstant(Val: Offset >> Shift, DL: SDLoc(Node), VT: Ty),
844 CurDAG->getTargetConstant(Val: Shift, DL: SDLoc(Node), VT: Ty), Ld->getChain()};
845 SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(Node), VT1: Ld->getValueType(ResNo: 0),
846 VT2: Ld->getValueType(ResNo: 1), VT3: MVT::Other, Ops);
847
848 MachineMemOperand *MemOp = cast<MemSDNode>(Val: Node)->getMemOperand();
849 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: New), NewMemRefs: {MemOp});
850
851 ReplaceNode(F: Node, T: New);
852
853 return true;
854}
855
856void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) {
857 if (!Subtarget->hasVInstructions())
858 return;
859
860 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
861
862 SDLoc DL(Node);
863 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
864
865 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
866 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
867 "Unexpected vsetvli intrinsic");
868
869 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
870 unsigned Log2SEW = Log2_32(Value: Node->getConstantOperandVal(Num: 6));
871 SDValue SEWOp =
872 CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: Subtarget->getXLenVT());
873 SmallVector<SDValue, 8> Operands = {Node->getOperand(Num: 2), Node->getOperand(Num: 3),
874 Node->getOperand(Num: 4), Node->getOperand(Num: 5),
875 Node->getOperand(Num: 8), SEWOp,
876 Node->getOperand(Num: 0)};
877
878 unsigned Opcode;
879 auto *LMulSDNode = cast<ConstantSDNode>(Val: Node->getOperand(Num: 7));
880 switch (LMulSDNode->getSExtValue()) {
881 case 5:
882 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
883 : RISCV::PseudoSF_VC_I_SE_MF8;
884 break;
885 case 6:
886 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
887 : RISCV::PseudoSF_VC_I_SE_MF4;
888 break;
889 case 7:
890 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
891 : RISCV::PseudoSF_VC_I_SE_MF2;
892 break;
893 case 0:
894 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
895 : RISCV::PseudoSF_VC_I_SE_M1;
896 break;
897 case 1:
898 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
899 : RISCV::PseudoSF_VC_I_SE_M2;
900 break;
901 case 2:
902 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
903 : RISCV::PseudoSF_VC_I_SE_M4;
904 break;
905 case 3:
906 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
907 : RISCV::PseudoSF_VC_I_SE_M8;
908 break;
909 }
910
911 ReplaceNode(F: Node, T: CurDAG->getMachineNode(
912 Opcode, dl: DL, VT: Node->getSimpleValueType(ResNo: 0), Ops: Operands));
913}
914
915static unsigned getSegInstNF(unsigned Intrinsic) {
916#define INST_NF_CASE(NAME, NF) \
917 case Intrinsic::riscv_##NAME##NF: \
918 return NF;
919#define INST_NF_CASE_MASK(NAME, NF) \
920 case Intrinsic::riscv_##NAME##NF##_mask: \
921 return NF;
922#define INST_NF_CASE_FF(NAME, NF) \
923 case Intrinsic::riscv_##NAME##NF##ff: \
924 return NF;
925#define INST_NF_CASE_FF_MASK(NAME, NF) \
926 case Intrinsic::riscv_##NAME##NF##ff_mask: \
927 return NF;
928#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
929 MACRO_NAME(NAME, 2) \
930 MACRO_NAME(NAME, 3) \
931 MACRO_NAME(NAME, 4) \
932 MACRO_NAME(NAME, 5) \
933 MACRO_NAME(NAME, 6) \
934 MACRO_NAME(NAME, 7) \
935 MACRO_NAME(NAME, 8)
936#define INST_ALL_NF_CASE(NAME) \
937 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
938 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
939#define INST_ALL_NF_CASE_WITH_FF(NAME) \
940 INST_ALL_NF_CASE(NAME) \
941 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
942 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
943 switch (Intrinsic) {
944 default:
945 llvm_unreachable("Unexpected segment load/store intrinsic");
946 INST_ALL_NF_CASE_WITH_FF(vlseg)
947 INST_ALL_NF_CASE(vlsseg)
948 INST_ALL_NF_CASE(vloxseg)
949 INST_ALL_NF_CASE(vluxseg)
950 INST_ALL_NF_CASE(vsseg)
951 INST_ALL_NF_CASE(vssseg)
952 INST_ALL_NF_CASE(vsoxseg)
953 INST_ALL_NF_CASE(vsuxseg)
954 }
955}
956
957void RISCVDAGToDAGISel::Select(SDNode *Node) {
958 // If we have a custom node, we have already selected.
959 if (Node->isMachineOpcode()) {
960 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
961 Node->setNodeId(-1);
962 return;
963 }
964
965 // Instruction Selection not handled by the auto-generated tablegen selection
966 // should be handled here.
967 unsigned Opcode = Node->getOpcode();
968 MVT XLenVT = Subtarget->getXLenVT();
969 SDLoc DL(Node);
970 MVT VT = Node->getSimpleValueType(ResNo: 0);
971
972 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
973
974 switch (Opcode) {
975 case ISD::Constant: {
976 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
977 auto *ConstNode = cast<ConstantSDNode>(Val: Node);
978 if (ConstNode->isZero()) {
979 SDValue New =
980 CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: RISCV::X0, VT);
981 ReplaceNode(F: Node, T: New.getNode());
982 return;
983 }
984 int64_t Imm = ConstNode->getSExtValue();
985 // If only the lower 8 bits are used, try to convert this to a simm6 by
986 // sign-extending bit 7. This is neutral without the C extension, and
987 // allows C.LI to be used if C is present.
988 if (isUInt<8>(x: Imm) && isInt<6>(x: SignExtend64<8>(x: Imm)) && hasAllBUsers(Node))
989 Imm = SignExtend64<8>(x: Imm);
990 // If the upper XLen-16 bits are not used, try to convert this to a simm12
991 // by sign extending bit 15.
992 if (isUInt<16>(x: Imm) && isInt<12>(x: SignExtend64<16>(x: Imm)) &&
993 hasAllHUsers(Node))
994 Imm = SignExtend64<16>(x: Imm);
995 // If the upper 32-bits are not used try to convert this into a simm32 by
996 // sign extending bit 32.
997 if (!isInt<32>(x: Imm) && isUInt<32>(x: Imm) && hasAllWUsers(Node))
998 Imm = SignExtend64<32>(x: Imm);
999
1000 ReplaceNode(F: Node, T: selectImm(CurDAG, DL, VT, Imm, Subtarget: *Subtarget).getNode());
1001 return;
1002 }
1003 case ISD::ConstantFP: {
1004 const APFloat &APF = cast<ConstantFPSDNode>(Val: Node)->getValueAPF();
1005
1006 bool Is64Bit = Subtarget->is64Bit();
1007 bool HasZdinx = Subtarget->hasStdExtZdinx();
1008
1009 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1010 SDValue Imm;
1011 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1012 // create an integer immediate.
1013 if (APF.isPosZero() || NegZeroF64) {
1014 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1015 Imm = CurDAG->getRegister(Reg: RISCV::X0_Pair, VT: MVT::f64);
1016 else
1017 Imm = CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT);
1018 } else {
1019 Imm = selectImm(CurDAG, DL, VT: XLenVT, Imm: APF.bitcastToAPInt().getSExtValue(),
1020 Subtarget: *Subtarget);
1021 }
1022
1023 unsigned Opc;
1024 switch (VT.SimpleTy) {
1025 default:
1026 llvm_unreachable("Unexpected size");
1027 case MVT::bf16:
1028 assert(Subtarget->hasStdExtZfbfmin());
1029 Opc = RISCV::FMV_H_X;
1030 break;
1031 case MVT::f16:
1032 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1033 break;
1034 case MVT::f32:
1035 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1036 break;
1037 case MVT::f64:
1038 // For RV32, we can't move from a GPR, we need to convert instead. This
1039 // should only happen for +0.0 and -0.0.
1040 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1041 if (HasZdinx)
1042 Opc = RISCV::COPY;
1043 else
1044 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1045 break;
1046 }
1047
1048 SDNode *Res;
1049 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1050 Res =
1051 CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_16, DL, VT, Operand: Imm).getNode();
1052 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1053 Res =
1054 CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_32, DL, VT, Operand: Imm).getNode();
1055 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1056 Res = CurDAG->getMachineNode(
1057 Opcode: Opc, dl: DL, VT, Op1: Imm,
1058 Op2: CurDAG->getTargetConstant(Val: RISCVFPRndMode::RNE, DL, VT: XLenVT));
1059 else
1060 Res = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: Imm);
1061
1062 // For f64 -0.0, we need to insert a fneg.d idiom.
1063 if (NegZeroF64) {
1064 Opc = RISCV::FSGNJN_D;
1065 if (HasZdinx)
1066 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1067 Res =
1068 CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: SDValue(Res, 0), Op2: SDValue(Res, 0));
1069 }
1070
1071 ReplaceNode(F: Node, T: Res);
1072 return;
1073 }
1074 case RISCVISD::BuildGPRPair:
1075 case RISCVISD::BuildPairF64: {
1076 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1077 break;
1078
1079 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
1080 "BuildPairF64 only handled here on rv32i_zdinx");
1081
1082 SDValue Ops[] = {
1083 CurDAG->getTargetConstant(Val: RISCV::GPRPairRegClassID, DL, VT: MVT::i32),
1084 Node->getOperand(Num: 0),
1085 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_even, DL, VT: MVT::i32),
1086 Node->getOperand(Num: 1),
1087 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_odd, DL, VT: MVT::i32)};
1088
1089 SDNode *N = CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl: DL, VT, Ops);
1090 ReplaceNode(F: Node, T: N);
1091 return;
1092 }
1093 case RISCVISD::SplitGPRPair:
1094 case RISCVISD::SplitF64: {
1095 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1096 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
1097 "SplitF64 only handled here on rv32i_zdinx");
1098
1099 if (!SDValue(Node, 0).use_empty()) {
1100 SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_even, DL,
1101 VT: Node->getValueType(ResNo: 0),
1102 Operand: Node->getOperand(Num: 0));
1103 ReplaceUses(F: SDValue(Node, 0), T: Lo);
1104 }
1105
1106 if (!SDValue(Node, 1).use_empty()) {
1107 SDValue Hi = CurDAG->getTargetExtractSubreg(
1108 SRIdx: RISCV::sub_gpr_odd, DL, VT: Node->getValueType(ResNo: 1), Operand: Node->getOperand(Num: 0));
1109 ReplaceUses(F: SDValue(Node, 1), T: Hi);
1110 }
1111
1112 CurDAG->RemoveDeadNode(N: Node);
1113 return;
1114 }
1115
1116 assert(Opcode != RISCVISD::SplitGPRPair &&
1117 "SplitGPRPair should already be handled");
1118
1119 if (!Subtarget->hasStdExtZfa())
1120 break;
1121 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1122 "Unexpected subtarget");
1123
1124 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1125 if (!SDValue(Node, 0).use_empty()) {
1126 SDNode *Lo = CurDAG->getMachineNode(Opcode: RISCV::FMV_X_W_FPR64, dl: DL, VT,
1127 Op1: Node->getOperand(Num: 0));
1128 ReplaceUses(F: SDValue(Node, 0), T: SDValue(Lo, 0));
1129 }
1130 if (!SDValue(Node, 1).use_empty()) {
1131 SDNode *Hi = CurDAG->getMachineNode(Opcode: RISCV::FMVH_X_D, dl: DL, VT,
1132 Op1: Node->getOperand(Num: 0));
1133 ReplaceUses(F: SDValue(Node, 1), T: SDValue(Hi, 0));
1134 }
1135
1136 CurDAG->RemoveDeadNode(N: Node);
1137 return;
1138 }
1139 case ISD::SHL: {
1140 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1141 if (!N1C)
1142 break;
1143 SDValue N0 = Node->getOperand(Num: 0);
1144 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1145 !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)))
1146 break;
1147 unsigned ShAmt = N1C->getZExtValue();
1148 uint64_t Mask = N0.getConstantOperandVal(i: 1);
1149
1150 if (isShiftedMask_64(Value: Mask)) {
1151 unsigned XLen = Subtarget->getXLen();
1152 unsigned LeadingZeros = XLen - llvm::bit_width(Value: Mask);
1153 unsigned TrailingZeros = llvm::countr_zero(Val: Mask);
1154 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1155 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1156 // where C2 has 32 leading zeros and C3 trailing zeros.
1157 SDNode *SRLIW = CurDAG->getMachineNode(
1158 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0->getOperand(Num: 0),
1159 Op2: CurDAG->getTargetConstant(Val: TrailingZeros, DL, VT));
1160 SDNode *SLLI = CurDAG->getMachineNode(
1161 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1162 Op2: CurDAG->getTargetConstant(Val: TrailingZeros + ShAmt, DL, VT));
1163 ReplaceNode(F: Node, T: SLLI);
1164 return;
1165 }
1166 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1167 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1168 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1169 // where C2 has C4 leading zeros and no trailing zeros.
1170 // This is profitable if the "and" was to be lowered to
1171 // (srli (slli X, C4), C4) and not (andi X, C2).
1172 // For "LeadingZeros == 32":
1173 // - with Zba it's just (slli.uw X, C)
1174 // - without Zba a tablegen pattern applies the very same
1175 // transform as we would have done here
1176 SDNode *SLLI = CurDAG->getMachineNode(
1177 Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0->getOperand(Num: 0),
1178 Op2: CurDAG->getTargetConstant(Val: LeadingZeros, DL, VT));
1179 SDNode *SRLI = CurDAG->getMachineNode(
1180 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1181 Op2: CurDAG->getTargetConstant(Val: LeadingZeros - ShAmt, DL, VT));
1182 ReplaceNode(F: Node, T: SRLI);
1183 return;
1184 }
1185 }
1186 break;
1187 }
1188 case ISD::SRL: {
1189 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1190 if (!N1C)
1191 break;
1192 SDValue N0 = Node->getOperand(Num: 0);
1193 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)))
1194 break;
1195 unsigned ShAmt = N1C->getZExtValue();
1196 uint64_t Mask = N0.getConstantOperandVal(i: 1);
1197
1198 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1199 // 32 leading zeros and C3 trailing zeros.
1200 if (isShiftedMask_64(Value: Mask) && N0.hasOneUse()) {
1201 unsigned XLen = Subtarget->getXLen();
1202 unsigned LeadingZeros = XLen - llvm::bit_width(Value: Mask);
1203 unsigned TrailingZeros = llvm::countr_zero(Val: Mask);
1204 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1205 SDNode *SRLIW = CurDAG->getMachineNode(
1206 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0->getOperand(Num: 0),
1207 Op2: CurDAG->getTargetConstant(Val: TrailingZeros, DL, VT));
1208 SDNode *SLLI = CurDAG->getMachineNode(
1209 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1210 Op2: CurDAG->getTargetConstant(Val: TrailingZeros - ShAmt, DL, VT));
1211 ReplaceNode(F: Node, T: SLLI);
1212 return;
1213 }
1214 }
1215
1216 // Optimize (srl (and X, C2), C) ->
1217 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1218 // Where C2 is a mask with C3 trailing ones.
1219 // Taking into account that the C2 may have had lower bits unset by
1220 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1221 // This pattern occurs when type legalizing right shifts for types with
1222 // less than XLen bits.
1223 Mask |= maskTrailingOnes<uint64_t>(N: ShAmt);
1224 if (!isMask_64(Value: Mask))
1225 break;
1226 unsigned TrailingOnes = llvm::countr_one(Value: Mask);
1227 if (ShAmt >= TrailingOnes)
1228 break;
1229 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1230 if (TrailingOnes == 32) {
1231 SDNode *SRLI = CurDAG->getMachineNode(
1232 Opcode: Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, dl: DL, VT,
1233 Op1: N0->getOperand(Num: 0), Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT));
1234 ReplaceNode(F: Node, T: SRLI);
1235 return;
1236 }
1237
1238 // Only do the remaining transforms if the AND has one use.
1239 if (!N0.hasOneUse())
1240 break;
1241
1242 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1243 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1244 SDNode *BEXTI = CurDAG->getMachineNode(
1245 Opcode: Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, dl: DL, VT,
1246 Op1: N0->getOperand(Num: 0), Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT));
1247 ReplaceNode(F: Node, T: BEXTI);
1248 return;
1249 }
1250
1251 const unsigned Msb = TrailingOnes - 1;
1252 const unsigned Lsb = ShAmt;
1253 if (tryUnsignedBitfieldExtract(Node, DL, VT, X: N0->getOperand(Num: 0), Msb, Lsb))
1254 return;
1255
1256 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1257 SDNode *SLLI =
1258 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0->getOperand(Num: 0),
1259 Op2: CurDAG->getTargetConstant(Val: LShAmt, DL, VT));
1260 SDNode *SRLI = CurDAG->getMachineNode(
1261 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1262 Op2: CurDAG->getTargetConstant(Val: LShAmt + ShAmt, DL, VT));
1263 ReplaceNode(F: Node, T: SRLI);
1264 return;
1265 }
1266 case ISD::SRA: {
1267 if (trySignedBitfieldExtract(Node))
1268 return;
1269
1270 if (trySignedBitfieldInsertInSign(Node))
1271 return;
1272
1273 // Optimize (sra (sext_inreg X, i16), C) ->
1274 // (srai (slli X, (XLen-16), (XLen-16) + C)
1275 // And (sra (sext_inreg X, i8), C) ->
1276 // (srai (slli X, (XLen-8), (XLen-8) + C)
1277 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1278 // This transform matches the code we get without Zbb. The shifts are more
1279 // compressible, and this can help expose CSE opportunities in the sdiv by
1280 // constant optimization.
1281 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1282 if (!N1C)
1283 break;
1284 SDValue N0 = Node->getOperand(Num: 0);
1285 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1286 break;
1287 unsigned ShAmt = N1C->getZExtValue();
1288 unsigned ExtSize =
1289 cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT().getSizeInBits();
1290 // ExtSize of 32 should use sraiw via tablegen pattern.
1291 if (ExtSize >= 32 || ShAmt >= ExtSize)
1292 break;
1293 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1294 SDNode *SLLI =
1295 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0->getOperand(Num: 0),
1296 Op2: CurDAG->getTargetConstant(Val: LShAmt, DL, VT));
1297 SDNode *SRAI = CurDAG->getMachineNode(
1298 Opcode: RISCV::SRAI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1299 Op2: CurDAG->getTargetConstant(Val: LShAmt + ShAmt, DL, VT));
1300 ReplaceNode(F: Node, T: SRAI);
1301 return;
1302 }
1303 case ISD::OR:
1304 case ISD::XOR:
1305 if (tryShrinkShlLogicImm(Node))
1306 return;
1307
1308 break;
1309 case ISD::AND: {
1310 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1311 if (!N1C)
1312 break;
1313
1314 SDValue N0 = Node->getOperand(Num: 0);
1315
1316 bool LeftShift = N0.getOpcode() == ISD::SHL;
1317 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1318 auto *C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
1319 if (!C)
1320 break;
1321 unsigned C2 = C->getZExtValue();
1322 unsigned XLen = Subtarget->getXLen();
1323 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1324
1325 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1326 // shift pair might offer more compression opportunities.
1327 // TODO: We could check for C extension here, but we don't have many lit
1328 // tests with the C extension enabled so not checking gets better
1329 // coverage.
1330 // TODO: What if ANDI faster than shift?
1331 bool IsCANDI = isInt<6>(x: N1C->getSExtValue());
1332
1333 uint64_t C1 = N1C->getZExtValue();
1334
1335 // Clear irrelevant bits in the mask.
1336 if (LeftShift)
1337 C1 &= maskTrailingZeros<uint64_t>(N: C2);
1338 else
1339 C1 &= maskTrailingOnes<uint64_t>(N: XLen - C2);
1340
1341 // Some transforms should only be done if the shift has a single use or
1342 // the AND would become (srli (slli X, 32), 32)
1343 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1344
1345 SDValue X = N0.getOperand(i: 0);
1346
1347 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1348 // with c3 leading zeros.
1349 if (!LeftShift && isMask_64(Value: C1)) {
1350 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1351 if (C2 < Leading) {
1352 // If the number of leading zeros is C2+32 this can be SRLIW.
1353 if (C2 + 32 == Leading) {
1354 SDNode *SRLIW = CurDAG->getMachineNode(
1355 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X, Op2: CurDAG->getTargetConstant(Val: C2, DL, VT));
1356 ReplaceNode(F: Node, T: SRLIW);
1357 return;
1358 }
1359
1360 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1361 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1362 //
1363 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1364 // legalized and goes through DAG combine.
1365 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1366 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1367 cast<VTSDNode>(Val: X.getOperand(i: 1))->getVT() == MVT::i32) {
1368 SDNode *SRAIW =
1369 CurDAG->getMachineNode(Opcode: RISCV::SRAIW, dl: DL, VT, Op1: X.getOperand(i: 0),
1370 Op2: CurDAG->getTargetConstant(Val: 31, DL, VT));
1371 SDNode *SRLIW = CurDAG->getMachineNode(
1372 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: SDValue(SRAIW, 0),
1373 Op2: CurDAG->getTargetConstant(Val: Leading - 32, DL, VT));
1374 ReplaceNode(F: Node, T: SRLIW);
1375 return;
1376 }
1377
1378 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1379 // available.
1380 // Transform (and (srl x, C2), C1)
1381 // -> (<bfextract> x, msb, lsb)
1382 //
1383 // Make sure to keep this below the SRLIW cases, as we always want to
1384 // prefer the more common instruction.
1385 const unsigned Msb = llvm::bit_width(Value: C1) + C2 - 1;
1386 const unsigned Lsb = C2;
1387 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1388 return;
1389
1390 // (srli (slli x, c3-c2), c3).
1391 // Skip if we could use (zext.w (sraiw X, C2)).
1392 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1393 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1394 cast<VTSDNode>(Val: X.getOperand(i: 1))->getVT() == MVT::i32;
1395 // Also Skip if we can use bexti or th.tst.
1396 Skip |= HasBitTest && Leading == XLen - 1;
1397 if (OneUseOrZExtW && !Skip) {
1398 SDNode *SLLI = CurDAG->getMachineNode(
1399 Opcode: RISCV::SLLI, dl: DL, VT, Op1: X,
1400 Op2: CurDAG->getTargetConstant(Val: Leading - C2, DL, VT));
1401 SDNode *SRLI = CurDAG->getMachineNode(
1402 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1403 Op2: CurDAG->getTargetConstant(Val: Leading, DL, VT));
1404 ReplaceNode(F: Node, T: SRLI);
1405 return;
1406 }
1407 }
1408 }
1409
1410 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1411 // shifted by c2 bits with c3 leading zeros.
1412 if (LeftShift && isShiftedMask_64(Value: C1)) {
1413 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1414
1415 if (C2 + Leading < XLen &&
1416 C1 == (maskTrailingOnes<uint64_t>(N: XLen - (C2 + Leading)) << C2)) {
1417 // Use slli.uw when possible.
1418 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1419 SDNode *SLLI_UW =
1420 CurDAG->getMachineNode(Opcode: RISCV::SLLI_UW, dl: DL, VT, Op1: X,
1421 Op2: CurDAG->getTargetConstant(Val: C2, DL, VT));
1422 ReplaceNode(F: Node, T: SLLI_UW);
1423 return;
1424 }
1425
1426 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1427 // available.
1428 // Transform (and (shl x, c2), c1)
1429 // -> (<bfinsert> x, msb, lsb)
1430 // e.g.
1431 // (and (shl x, 12), 0x00fff000)
1432 // If XLen = 32 and C2 = 12, then
1433 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1434 const unsigned Msb = XLen - Leading - 1;
1435 const unsigned Lsb = C2;
1436 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1437 return;
1438
1439 // (srli (slli c2+c3), c3)
1440 if (OneUseOrZExtW && !IsCANDI) {
1441 SDNode *SLLI = CurDAG->getMachineNode(
1442 Opcode: RISCV::SLLI, dl: DL, VT, Op1: X,
1443 Op2: CurDAG->getTargetConstant(Val: C2 + Leading, DL, VT));
1444 SDNode *SRLI = CurDAG->getMachineNode(
1445 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0),
1446 Op2: CurDAG->getTargetConstant(Val: Leading, DL, VT));
1447 ReplaceNode(F: Node, T: SRLI);
1448 return;
1449 }
1450 }
1451 }
1452
1453 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1454 // shifted mask with c2 leading zeros and c3 trailing zeros.
1455 if (!LeftShift && isShiftedMask_64(Value: C1)) {
1456 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1457 unsigned Trailing = llvm::countr_zero(Val: C1);
1458 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1459 !IsCANDI) {
1460 unsigned SrliOpc = RISCV::SRLI;
1461 // If the input is zexti32 we should use SRLIW.
1462 if (X.getOpcode() == ISD::AND &&
1463 isa<ConstantSDNode>(Val: X.getOperand(i: 1)) &&
1464 X.getConstantOperandVal(i: 1) == UINT64_C(0xFFFFFFFF)) {
1465 SrliOpc = RISCV::SRLIW;
1466 X = X.getOperand(i: 0);
1467 }
1468 SDNode *SRLI = CurDAG->getMachineNode(
1469 Opcode: SrliOpc, dl: DL, VT, Op1: X,
1470 Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT));
1471 SDNode *SLLI = CurDAG->getMachineNode(
1472 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLI, 0),
1473 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1474 ReplaceNode(F: Node, T: SLLI);
1475 return;
1476 }
1477 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1478 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1479 OneUseOrZExtW && !IsCANDI) {
1480 SDNode *SRLIW = CurDAG->getMachineNode(
1481 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X,
1482 Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT));
1483 SDNode *SLLI = CurDAG->getMachineNode(
1484 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1485 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1486 ReplaceNode(F: Node, T: SLLI);
1487 return;
1488 }
1489 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1490 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1491 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1492 SDNode *SRLI = CurDAG->getMachineNode(
1493 Opcode: RISCV::SRLI, dl: DL, VT, Op1: X,
1494 Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT));
1495 SDNode *SLLI_UW = CurDAG->getMachineNode(
1496 Opcode: RISCV::SLLI_UW, dl: DL, VT, Op1: SDValue(SRLI, 0),
1497 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1498 ReplaceNode(F: Node, T: SLLI_UW);
1499 return;
1500 }
1501 }
1502
1503 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1504 // shifted mask with no leading zeros and c3 trailing zeros.
1505 if (LeftShift && isShiftedMask_64(Value: C1)) {
1506 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1507 unsigned Trailing = llvm::countr_zero(Val: C1);
1508 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1509 SDNode *SRLI = CurDAG->getMachineNode(
1510 Opcode: RISCV::SRLI, dl: DL, VT, Op1: X,
1511 Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT));
1512 SDNode *SLLI = CurDAG->getMachineNode(
1513 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLI, 0),
1514 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1515 ReplaceNode(F: Node, T: SLLI);
1516 return;
1517 }
1518 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1519 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1520 SDNode *SRLIW = CurDAG->getMachineNode(
1521 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X,
1522 Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT));
1523 SDNode *SLLI = CurDAG->getMachineNode(
1524 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0),
1525 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1526 ReplaceNode(F: Node, T: SLLI);
1527 return;
1528 }
1529
1530 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1531 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1532 Subtarget->hasStdExtZba()) {
1533 SDNode *SRLI = CurDAG->getMachineNode(
1534 Opcode: RISCV::SRLI, dl: DL, VT, Op1: X,
1535 Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT));
1536 SDNode *SLLI_UW = CurDAG->getMachineNode(
1537 Opcode: RISCV::SLLI_UW, dl: DL, VT, Op1: SDValue(SRLI, 0),
1538 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1539 ReplaceNode(F: Node, T: SLLI_UW);
1540 return;
1541 }
1542 }
1543 }
1544
1545 const uint64_t C1 = N1C->getZExtValue();
1546
1547 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
1548 N0.hasOneUse()) {
1549 unsigned C2 = N0.getConstantOperandVal(i: 1);
1550 unsigned XLen = Subtarget->getXLen();
1551 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1552
1553 SDValue X = N0.getOperand(i: 0);
1554
1555 // Prefer SRAIW + ANDI when possible.
1556 bool Skip = C2 > 32 && isInt<12>(x: N1C->getSExtValue()) &&
1557 X.getOpcode() == ISD::SHL &&
1558 isa<ConstantSDNode>(Val: X.getOperand(i: 1)) &&
1559 X.getConstantOperandVal(i: 1) == 32;
1560 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1561 // mask with c3 leading zeros and c2 is larger than c3.
1562 if (isMask_64(Value: C1) && !Skip) {
1563 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1564 if (C2 > Leading) {
1565 SDNode *SRAI = CurDAG->getMachineNode(
1566 Opcode: RISCV::SRAI, dl: DL, VT, Op1: X,
1567 Op2: CurDAG->getTargetConstant(Val: C2 - Leading, DL, VT));
1568 SDNode *SRLI = CurDAG->getMachineNode(
1569 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SRAI, 0),
1570 Op2: CurDAG->getTargetConstant(Val: Leading, DL, VT));
1571 ReplaceNode(F: Node, T: SRLI);
1572 return;
1573 }
1574 }
1575
1576 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1577 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1578 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1579 if (isShiftedMask_64(Value: C1) && !Skip) {
1580 unsigned Leading = XLen - llvm::bit_width(Value: C1);
1581 unsigned Trailing = llvm::countr_zero(Val: C1);
1582 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1583 SDNode *SRAI = CurDAG->getMachineNode(
1584 Opcode: RISCV::SRAI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1585 Op2: CurDAG->getTargetConstant(Val: C2 - Leading, DL, VT));
1586 SDNode *SRLI = CurDAG->getMachineNode(
1587 Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SRAI, 0),
1588 Op2: CurDAG->getTargetConstant(Val: Leading + Trailing, DL, VT));
1589 SDNode *SLLI = CurDAG->getMachineNode(
1590 Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLI, 0),
1591 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT));
1592 ReplaceNode(F: Node, T: SLLI);
1593 return;
1594 }
1595 }
1596 }
1597
1598 // If C1 masks off the upper bits only (but can't be formed as an
1599 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1600 // available.
1601 // Transform (and x, C1)
1602 // -> (<bfextract> x, msb, lsb)
1603 if (isMask_64(Value: C1) && !isInt<12>(x: N1C->getSExtValue())) {
1604 const unsigned Msb = llvm::bit_width(Value: C1) - 1;
1605 if (tryUnsignedBitfieldExtract(Node, DL, VT, X: N0, Msb, Lsb: 0))
1606 return;
1607 }
1608
1609 if (tryShrinkShlLogicImm(Node))
1610 return;
1611
1612 break;
1613 }
1614 case ISD::MUL: {
1615 // Special case for calculating (mul (and X, C2), C1) where the full product
1616 // fits in XLen bits. We can shift X left by the number of leading zeros in
1617 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1618 // product has XLen trailing zeros, putting it in the output of MULHU. This
1619 // can avoid materializing a constant in a register for C2.
1620
1621 // RHS should be a constant.
1622 auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1));
1623 if (!N1C || !N1C->hasOneUse())
1624 break;
1625
1626 // LHS should be an AND with constant.
1627 SDValue N0 = Node->getOperand(Num: 0);
1628 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)))
1629 break;
1630
1631 uint64_t C2 = N0.getConstantOperandVal(i: 1);
1632
1633 // Constant should be a mask.
1634 if (!isMask_64(Value: C2))
1635 break;
1636
1637 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1638 // multiple users or the constant is a simm12. This prevents inserting a
1639 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1640 // make it more costly to materialize. Otherwise, using a SLLI might allow
1641 // it to be compressed.
1642 bool IsANDIOrZExt =
1643 isInt<12>(x: C2) ||
1644 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1645 // With XTHeadBb, we can use TH.EXTU.
1646 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1647 if (IsANDIOrZExt && (isInt<12>(x: N1C->getSExtValue()) || !N0.hasOneUse()))
1648 break;
1649 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1650 // the constant is a simm32.
1651 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1652 // With XTHeadBb, we can use TH.EXTU.
1653 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1654 if (IsZExtW && (isInt<32>(x: N1C->getSExtValue()) || !N0.hasOneUse()))
1655 break;
1656
1657 // We need to shift left the AND input and C1 by a total of XLen bits.
1658
1659 // How far left do we need to shift the AND input?
1660 unsigned XLen = Subtarget->getXLen();
1661 unsigned LeadingZeros = XLen - llvm::bit_width(Value: C2);
1662
1663 // The constant gets shifted by the remaining amount unless that would
1664 // shift bits out.
1665 uint64_t C1 = N1C->getZExtValue();
1666 unsigned ConstantShift = XLen - LeadingZeros;
1667 if (ConstantShift > (XLen - llvm::bit_width(Value: C1)))
1668 break;
1669
1670 uint64_t ShiftedC1 = C1 << ConstantShift;
1671 // If this RV32, we need to sign extend the constant.
1672 if (XLen == 32)
1673 ShiftedC1 = SignExtend64<32>(x: ShiftedC1);
1674
1675 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1676 SDNode *Imm = selectImm(CurDAG, DL, VT, Imm: ShiftedC1, Subtarget: *Subtarget).getNode();
1677 SDNode *SLLI =
1678 CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
1679 Op2: CurDAG->getTargetConstant(Val: LeadingZeros, DL, VT));
1680 SDNode *MULHU = CurDAG->getMachineNode(Opcode: RISCV::MULHU, dl: DL, VT,
1681 Op1: SDValue(SLLI, 0), Op2: SDValue(Imm, 0));
1682 ReplaceNode(F: Node, T: MULHU);
1683 return;
1684 }
1685 case ISD::LOAD: {
1686 if (tryIndexedLoad(Node))
1687 return;
1688
1689 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1690 // We match post-incrementing load here
1691 LoadSDNode *Load = cast<LoadSDNode>(Val: Node);
1692 if (Load->getAddressingMode() != ISD::POST_INC)
1693 break;
1694
1695 SDValue Chain = Node->getOperand(Num: 0);
1696 SDValue Base = Node->getOperand(Num: 1);
1697 SDValue Offset = Node->getOperand(Num: 2);
1698
1699 bool Simm12 = false;
1700 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1701
1702 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Val&: Offset)) {
1703 int ConstantVal = ConstantOffset->getSExtValue();
1704 Simm12 = isInt<12>(x: ConstantVal);
1705 if (Simm12)
1706 Offset = CurDAG->getTargetConstant(Val: ConstantVal, DL: SDLoc(Offset),
1707 VT: Offset.getValueType());
1708 }
1709
1710 unsigned Opcode = 0;
1711 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1712 case MVT::i8:
1713 if (Simm12 && SignExtend)
1714 Opcode = RISCV::CV_LB_ri_inc;
1715 else if (Simm12 && !SignExtend)
1716 Opcode = RISCV::CV_LBU_ri_inc;
1717 else if (!Simm12 && SignExtend)
1718 Opcode = RISCV::CV_LB_rr_inc;
1719 else
1720 Opcode = RISCV::CV_LBU_rr_inc;
1721 break;
1722 case MVT::i16:
1723 if (Simm12 && SignExtend)
1724 Opcode = RISCV::CV_LH_ri_inc;
1725 else if (Simm12 && !SignExtend)
1726 Opcode = RISCV::CV_LHU_ri_inc;
1727 else if (!Simm12 && SignExtend)
1728 Opcode = RISCV::CV_LH_rr_inc;
1729 else
1730 Opcode = RISCV::CV_LHU_rr_inc;
1731 break;
1732 case MVT::i32:
1733 if (Simm12)
1734 Opcode = RISCV::CV_LW_ri_inc;
1735 else
1736 Opcode = RISCV::CV_LW_rr_inc;
1737 break;
1738 default:
1739 break;
1740 }
1741 if (!Opcode)
1742 break;
1743
1744 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode, dl: DL, VT1: XLenVT, VT2: XLenVT,
1745 VT3: Chain.getSimpleValueType(), Op1: Base,
1746 Op2: Offset, Op3: Chain));
1747 return;
1748 }
1749 break;
1750 }
1751 case RISCVISD::LD_RV32: {
1752 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
1753
1754 SDValue Base, Offset;
1755 SDValue Chain = Node->getOperand(Num: 0);
1756 SDValue Addr = Node->getOperand(Num: 1);
1757 SelectAddrRegImm(Addr, Base, Offset);
1758
1759 SDValue Ops[] = {Base, Offset, Chain};
1760 MachineSDNode *New = CurDAG->getMachineNode(
1761 Opcode: RISCV::LD_RV32, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
1762 SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_even, DL,
1763 VT: MVT::i32, Operand: SDValue(New, 0));
1764 SDValue Hi = CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_odd, DL,
1765 VT: MVT::i32, Operand: SDValue(New, 0));
1766 CurDAG->setNodeMemRefs(N: New, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
1767 ReplaceUses(F: SDValue(Node, 0), T: Lo);
1768 ReplaceUses(F: SDValue(Node, 1), T: Hi);
1769 ReplaceUses(F: SDValue(Node, 2), T: SDValue(New, 1));
1770 CurDAG->RemoveDeadNode(N: Node);
1771 return;
1772 }
1773 case RISCVISD::SD_RV32: {
1774 SDValue Base, Offset;
1775 SDValue Chain = Node->getOperand(Num: 0);
1776 SDValue Addr = Node->getOperand(Num: 3);
1777 SelectAddrRegImm(Addr, Base, Offset);
1778
1779 SDValue Lo = Node->getOperand(Num: 1);
1780 SDValue Hi = Node->getOperand(Num: 2);
1781
1782 SDValue RegPair;
1783 // Peephole to use X0_Pair for storing zero.
1784 if (isNullConstant(V: Lo) && isNullConstant(V: Hi)) {
1785 RegPair = CurDAG->getRegister(Reg: RISCV::X0_Pair, VT: MVT::Untyped);
1786 } else {
1787 SDValue Ops[] = {
1788 CurDAG->getTargetConstant(Val: RISCV::GPRPairRegClassID, DL, VT: MVT::i32), Lo,
1789 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_even, DL, VT: MVT::i32), Hi,
1790 CurDAG->getTargetConstant(Val: RISCV::sub_gpr_odd, DL, VT: MVT::i32)};
1791
1792 RegPair = SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl: DL,
1793 VT: MVT::Untyped, Ops),
1794 0);
1795 }
1796
1797 MachineSDNode *New = CurDAG->getMachineNode(Opcode: RISCV::SD_RV32, dl: DL, VT: MVT::Other,
1798 Ops: {RegPair, Base, Offset, Chain});
1799 CurDAG->setNodeMemRefs(N: New, NewMemRefs: {cast<MemSDNode>(Val: Node)->getMemOperand()});
1800 ReplaceUses(F: SDValue(Node, 0), T: SDValue(New, 0));
1801 CurDAG->RemoveDeadNode(N: Node);
1802 return;
1803 }
1804 case ISD::INTRINSIC_WO_CHAIN: {
1805 unsigned IntNo = Node->getConstantOperandVal(Num: 0);
1806 switch (IntNo) {
1807 // By default we do not custom select any intrinsic.
1808 default:
1809 break;
1810 case Intrinsic::riscv_vmsgeu:
1811 case Intrinsic::riscv_vmsge: {
1812 SDValue Src1 = Node->getOperand(Num: 1);
1813 SDValue Src2 = Node->getOperand(Num: 2);
1814 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1815 bool IsCmpConstant = false;
1816 bool IsCmpMinimum = false;
1817 // Only custom select scalar second operand.
1818 if (Src2.getValueType() != XLenVT)
1819 break;
1820 // Small constants are handled with patterns.
1821 int64_t CVal = 0;
1822 MVT Src1VT = Src1.getSimpleValueType();
1823 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Src2)) {
1824 IsCmpConstant = true;
1825 CVal = C->getSExtValue();
1826 if (CVal >= -15 && CVal <= 16) {
1827 if (!IsUnsigned || CVal != 0)
1828 break;
1829 IsCmpMinimum = true;
1830 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1831 numBits: Src1VT.getScalarSizeInBits())
1832 .getSExtValue()) {
1833 IsCmpMinimum = true;
1834 }
1835 }
1836 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
1837 switch (RISCVTargetLowering::getLMUL(VT: Src1VT)) {
1838 default:
1839 llvm_unreachable("Unexpected LMUL!");
1840#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1841 case RISCVVType::lmulenum: \
1842 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1843 : RISCV::PseudoVMSLT_VX_##suffix; \
1844 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
1845 : RISCV::PseudoVMSGT_VX_##suffix; \
1846 break;
1847 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1848 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1849 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1850 CASE_VMSLT_OPCODES(LMUL_1, M1)
1851 CASE_VMSLT_OPCODES(LMUL_2, M2)
1852 CASE_VMSLT_OPCODES(LMUL_4, M4)
1853 CASE_VMSLT_OPCODES(LMUL_8, M8)
1854#undef CASE_VMSLT_OPCODES
1855 }
1856 // Mask operations use the LMUL from the mask type.
1857 switch (RISCVTargetLowering::getLMUL(VT)) {
1858 default:
1859 llvm_unreachable("Unexpected LMUL!");
1860#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
1861 case RISCVVType::lmulenum: \
1862 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1863 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
1864 break;
1865 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
1866 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
1867 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
1868 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
1869 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
1870 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
1871 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
1872#undef CASE_VMNAND_VMSET_OPCODES
1873 }
1874 SDValue SEW = CurDAG->getTargetConstant(
1875 Val: Log2_32(Value: Src1VT.getScalarSizeInBits()), DL, VT: XLenVT);
1876 SDValue MaskSEW = CurDAG->getTargetConstant(Val: 0, DL, VT: XLenVT);
1877 SDValue VL;
1878 selectVLOp(N: Node->getOperand(Num: 3), VL);
1879
1880 // If vmsge(u) with minimum value, expand it to vmset.
1881 if (IsCmpMinimum) {
1882 ReplaceNode(F: Node,
1883 T: CurDAG->getMachineNode(Opcode: VMSetOpcode, dl: DL, VT, Op1: VL, Op2: MaskSEW));
1884 return;
1885 }
1886
1887 if (IsCmpConstant) {
1888 SDValue Imm =
1889 selectImm(CurDAG, DL: SDLoc(Src2), VT: XLenVT, Imm: CVal - 1, Subtarget: *Subtarget);
1890
1891 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMSGTOpcode, dl: DL, VT,
1892 Ops: {Src1, Imm, VL, SEW}));
1893 return;
1894 }
1895
1896 // Expand to
1897 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1898 SDValue Cmp = SDValue(
1899 CurDAG->getMachineNode(Opcode: VMSLTOpcode, dl: DL, VT, Ops: {Src1, Src2, VL, SEW}),
1900 0);
1901 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMNANDOpcode, dl: DL, VT,
1902 Ops: {Cmp, Cmp, VL, MaskSEW}));
1903 return;
1904 }
1905 case Intrinsic::riscv_vmsgeu_mask:
1906 case Intrinsic::riscv_vmsge_mask: {
1907 SDValue Src1 = Node->getOperand(Num: 2);
1908 SDValue Src2 = Node->getOperand(Num: 3);
1909 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1910 bool IsCmpConstant = false;
1911 bool IsCmpMinimum = false;
1912 // Only custom select scalar second operand.
1913 if (Src2.getValueType() != XLenVT)
1914 break;
1915 // Small constants are handled with patterns.
1916 MVT Src1VT = Src1.getSimpleValueType();
1917 int64_t CVal = 0;
1918 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Src2)) {
1919 IsCmpConstant = true;
1920 CVal = C->getSExtValue();
1921 if (CVal >= -15 && CVal <= 16) {
1922 if (!IsUnsigned || CVal != 0)
1923 break;
1924 IsCmpMinimum = true;
1925 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1926 numBits: Src1VT.getScalarSizeInBits())
1927 .getSExtValue()) {
1928 IsCmpMinimum = true;
1929 }
1930 }
1931 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1932 VMOROpcode, VMSGTMaskOpcode;
1933 switch (RISCVTargetLowering::getLMUL(VT: Src1VT)) {
1934 default:
1935 llvm_unreachable("Unexpected LMUL!");
1936#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1937 case RISCVVType::lmulenum: \
1938 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1939 : RISCV::PseudoVMSLT_VX_##suffix; \
1940 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1941 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1942 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
1943 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
1944 break;
1945 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1946 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1947 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1948 CASE_VMSLT_OPCODES(LMUL_1, M1)
1949 CASE_VMSLT_OPCODES(LMUL_2, M2)
1950 CASE_VMSLT_OPCODES(LMUL_4, M4)
1951 CASE_VMSLT_OPCODES(LMUL_8, M8)
1952#undef CASE_VMSLT_OPCODES
1953 }
1954 // Mask operations use the LMUL from the mask type.
1955 switch (RISCVTargetLowering::getLMUL(VT)) {
1956 default:
1957 llvm_unreachable("Unexpected LMUL!");
1958#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1959 case RISCVVType::lmulenum: \
1960 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1961 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1962 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1963 break;
1964 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
1965 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
1966 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
1967 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, B8)
1968 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, B4)
1969 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, B2)
1970 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, B1)
1971#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1972 }
1973 SDValue SEW = CurDAG->getTargetConstant(
1974 Val: Log2_32(Value: Src1VT.getScalarSizeInBits()), DL, VT: XLenVT);
1975 SDValue MaskSEW = CurDAG->getTargetConstant(Val: 0, DL, VT: XLenVT);
1976 SDValue VL;
1977 selectVLOp(N: Node->getOperand(Num: 5), VL);
1978 SDValue MaskedOff = Node->getOperand(Num: 1);
1979 SDValue Mask = Node->getOperand(Num: 4);
1980
1981 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
1982 if (IsCmpMinimum) {
1983 // We don't need vmor if the MaskedOff and the Mask are the same
1984 // value.
1985 if (Mask == MaskedOff) {
1986 ReplaceUses(F: Node, T: Mask.getNode());
1987 return;
1988 }
1989 ReplaceNode(F: Node,
1990 T: CurDAG->getMachineNode(Opcode: VMOROpcode, dl: DL, VT,
1991 Ops: {Mask, MaskedOff, VL, MaskSEW}));
1992 return;
1993 }
1994
1995 // If the MaskedOff value and the Mask are the same value use
1996 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1997 // This avoids needing to copy v0 to vd before starting the next sequence.
1998 if (Mask == MaskedOff) {
1999 SDValue Cmp = SDValue(
2000 CurDAG->getMachineNode(Opcode: VMSLTOpcode, dl: DL, VT, Ops: {Src1, Src2, VL, SEW}),
2001 0);
2002 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMANDNOpcode, dl: DL, VT,
2003 Ops: {Mask, Cmp, VL, MaskSEW}));
2004 return;
2005 }
2006
2007 SDValue PolicyOp =
2008 CurDAG->getTargetConstant(Val: RISCVVType::TAIL_AGNOSTIC, DL, VT: XLenVT);
2009
2010 if (IsCmpConstant) {
2011 SDValue Imm =
2012 selectImm(CurDAG, DL: SDLoc(Src2), VT: XLenVT, Imm: CVal - 1, Subtarget: *Subtarget);
2013
2014 ReplaceNode(F: Node, T: CurDAG->getMachineNode(
2015 Opcode: VMSGTMaskOpcode, dl: DL, VT,
2016 Ops: {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2017 return;
2018 }
2019
2020 // Otherwise use
2021 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2022 // The result is mask undisturbed.
2023 // We use the same instructions to emulate mask agnostic behavior, because
2024 // the agnostic result can be either undisturbed or all 1.
2025 SDValue Cmp = SDValue(CurDAG->getMachineNode(Opcode: VMSLTMaskOpcode, dl: DL, VT,
2026 Ops: {MaskedOff, Src1, Src2, Mask,
2027 VL, SEW, PolicyOp}),
2028 0);
2029 // vmxor.mm vd, vd, v0 is used to update active value.
2030 ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMXOROpcode, dl: DL, VT,
2031 Ops: {Cmp, Mask, VL, MaskSEW}));
2032 return;
2033 }
2034 case Intrinsic::riscv_vsetvli:
2035 case Intrinsic::riscv_vsetvlimax:
2036 return selectVSETVLI(Node);
2037 }
2038 break;
2039 }
2040 case ISD::INTRINSIC_W_CHAIN: {
2041 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
2042 switch (IntNo) {
2043 // By default we do not custom select any intrinsic.
2044 default:
2045 break;
2046 case Intrinsic::riscv_vlseg2:
2047 case Intrinsic::riscv_vlseg3:
2048 case Intrinsic::riscv_vlseg4:
2049 case Intrinsic::riscv_vlseg5:
2050 case Intrinsic::riscv_vlseg6:
2051 case Intrinsic::riscv_vlseg7:
2052 case Intrinsic::riscv_vlseg8: {
2053 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2054 /*IsStrided*/ false);
2055 return;
2056 }
2057 case Intrinsic::riscv_vlseg2_mask:
2058 case Intrinsic::riscv_vlseg3_mask:
2059 case Intrinsic::riscv_vlseg4_mask:
2060 case Intrinsic::riscv_vlseg5_mask:
2061 case Intrinsic::riscv_vlseg6_mask:
2062 case Intrinsic::riscv_vlseg7_mask:
2063 case Intrinsic::riscv_vlseg8_mask: {
2064 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2065 /*IsStrided*/ false);
2066 return;
2067 }
2068 case Intrinsic::riscv_vlsseg2:
2069 case Intrinsic::riscv_vlsseg3:
2070 case Intrinsic::riscv_vlsseg4:
2071 case Intrinsic::riscv_vlsseg5:
2072 case Intrinsic::riscv_vlsseg6:
2073 case Intrinsic::riscv_vlsseg7:
2074 case Intrinsic::riscv_vlsseg8: {
2075 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2076 /*IsStrided*/ true);
2077 return;
2078 }
2079 case Intrinsic::riscv_vlsseg2_mask:
2080 case Intrinsic::riscv_vlsseg3_mask:
2081 case Intrinsic::riscv_vlsseg4_mask:
2082 case Intrinsic::riscv_vlsseg5_mask:
2083 case Intrinsic::riscv_vlsseg6_mask:
2084 case Intrinsic::riscv_vlsseg7_mask:
2085 case Intrinsic::riscv_vlsseg8_mask: {
2086 selectVLSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2087 /*IsStrided*/ true);
2088 return;
2089 }
2090 case Intrinsic::riscv_vloxseg2:
2091 case Intrinsic::riscv_vloxseg3:
2092 case Intrinsic::riscv_vloxseg4:
2093 case Intrinsic::riscv_vloxseg5:
2094 case Intrinsic::riscv_vloxseg6:
2095 case Intrinsic::riscv_vloxseg7:
2096 case Intrinsic::riscv_vloxseg8:
2097 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2098 /*IsOrdered*/ true);
2099 return;
2100 case Intrinsic::riscv_vluxseg2:
2101 case Intrinsic::riscv_vluxseg3:
2102 case Intrinsic::riscv_vluxseg4:
2103 case Intrinsic::riscv_vluxseg5:
2104 case Intrinsic::riscv_vluxseg6:
2105 case Intrinsic::riscv_vluxseg7:
2106 case Intrinsic::riscv_vluxseg8:
2107 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2108 /*IsOrdered*/ false);
2109 return;
2110 case Intrinsic::riscv_vloxseg2_mask:
2111 case Intrinsic::riscv_vloxseg3_mask:
2112 case Intrinsic::riscv_vloxseg4_mask:
2113 case Intrinsic::riscv_vloxseg5_mask:
2114 case Intrinsic::riscv_vloxseg6_mask:
2115 case Intrinsic::riscv_vloxseg7_mask:
2116 case Intrinsic::riscv_vloxseg8_mask:
2117 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2118 /*IsOrdered*/ true);
2119 return;
2120 case Intrinsic::riscv_vluxseg2_mask:
2121 case Intrinsic::riscv_vluxseg3_mask:
2122 case Intrinsic::riscv_vluxseg4_mask:
2123 case Intrinsic::riscv_vluxseg5_mask:
2124 case Intrinsic::riscv_vluxseg6_mask:
2125 case Intrinsic::riscv_vluxseg7_mask:
2126 case Intrinsic::riscv_vluxseg8_mask:
2127 selectVLXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2128 /*IsOrdered*/ false);
2129 return;
2130 case Intrinsic::riscv_vlseg8ff:
2131 case Intrinsic::riscv_vlseg7ff:
2132 case Intrinsic::riscv_vlseg6ff:
2133 case Intrinsic::riscv_vlseg5ff:
2134 case Intrinsic::riscv_vlseg4ff:
2135 case Intrinsic::riscv_vlseg3ff:
2136 case Intrinsic::riscv_vlseg2ff: {
2137 selectVLSEGFF(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false);
2138 return;
2139 }
2140 case Intrinsic::riscv_vlseg8ff_mask:
2141 case Intrinsic::riscv_vlseg7ff_mask:
2142 case Intrinsic::riscv_vlseg6ff_mask:
2143 case Intrinsic::riscv_vlseg5ff_mask:
2144 case Intrinsic::riscv_vlseg4ff_mask:
2145 case Intrinsic::riscv_vlseg3ff_mask:
2146 case Intrinsic::riscv_vlseg2ff_mask: {
2147 selectVLSEGFF(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true);
2148 return;
2149 }
2150 case Intrinsic::riscv_vloxei:
2151 case Intrinsic::riscv_vloxei_mask:
2152 case Intrinsic::riscv_vluxei:
2153 case Intrinsic::riscv_vluxei_mask: {
2154 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2155 IntNo == Intrinsic::riscv_vluxei_mask;
2156 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2157 IntNo == Intrinsic::riscv_vloxei_mask;
2158
2159 MVT VT = Node->getSimpleValueType(ResNo: 0);
2160 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2161
2162 unsigned CurOp = 2;
2163 SmallVector<SDValue, 8> Operands;
2164 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2165
2166 MVT IndexVT;
2167 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2168 /*IsStridedOrIndexed*/ true, Operands,
2169 /*IsLoad=*/true, IndexVT: &IndexVT);
2170
2171 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2172 "Element count mismatch");
2173
2174 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2175 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
2176 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
2177 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2178 report_fatal_error(reason: "The V extension does not support EEW=64 for index "
2179 "values when XLEN=32");
2180 }
2181 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2182 Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL),
2183 IndexLMUL: static_cast<unsigned>(IndexLMUL));
2184 MachineSDNode *Load =
2185 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2186
2187 if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node))
2188 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()});
2189
2190 ReplaceNode(F: Node, T: Load);
2191 return;
2192 }
2193 case Intrinsic::riscv_vlm:
2194 case Intrinsic::riscv_vle:
2195 case Intrinsic::riscv_vle_mask:
2196 case Intrinsic::riscv_vlse:
2197 case Intrinsic::riscv_vlse_mask: {
2198 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2199 IntNo == Intrinsic::riscv_vlse_mask;
2200 bool IsStrided =
2201 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2202
2203 MVT VT = Node->getSimpleValueType(ResNo: 0);
2204 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2205
2206 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2207 // operand at the IR level. In pseudos, they have both policy and
2208 // passthru operand. The passthru operand is needed to track the
2209 // "tail undefined" state, and the policy is there just for
2210 // for consistency - it will always be "don't care" for the
2211 // unmasked form.
2212 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2213 unsigned CurOp = 2;
2214 SmallVector<SDValue, 8> Operands;
2215 if (HasPassthruOperand)
2216 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2217 else {
2218 // We eagerly lower to implicit_def (instead of undef), as we
2219 // otherwise fail to select nodes such as: nxv1i1 = undef
2220 SDNode *Passthru =
2221 CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT);
2222 Operands.push_back(Elt: SDValue(Passthru, 0));
2223 }
2224 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
2225 Operands, /*IsLoad=*/true);
2226
2227 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2228 const RISCV::VLEPseudo *P =
2229 RISCV::getVLEPseudo(Masked: IsMasked, Strided: IsStrided, /*FF*/ false, Log2SEW,
2230 LMUL: static_cast<unsigned>(LMUL));
2231 MachineSDNode *Load =
2232 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2233
2234 if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node))
2235 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()});
2236
2237 ReplaceNode(F: Node, T: Load);
2238 return;
2239 }
2240 case Intrinsic::riscv_vleff:
2241 case Intrinsic::riscv_vleff_mask: {
2242 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2243
2244 MVT VT = Node->getSimpleValueType(ResNo: 0);
2245 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2246
2247 unsigned CurOp = 2;
2248 SmallVector<SDValue, 7> Operands;
2249 Operands.push_back(Elt: Node->getOperand(Num: CurOp++));
2250 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2251 /*IsStridedOrIndexed*/ false, Operands,
2252 /*IsLoad=*/true);
2253
2254 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2255 const RISCV::VLEPseudo *P =
2256 RISCV::getVLEPseudo(Masked: IsMasked, /*Strided*/ false, /*FF*/ true,
2257 Log2SEW, LMUL: static_cast<unsigned>(LMUL));
2258 MachineSDNode *Load = CurDAG->getMachineNode(
2259 Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2260 if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node))
2261 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()});
2262
2263 ReplaceNode(F: Node, T: Load);
2264 return;
2265 }
2266 }
2267 break;
2268 }
2269 case ISD::INTRINSIC_VOID: {
2270 unsigned IntNo = Node->getConstantOperandVal(Num: 1);
2271 switch (IntNo) {
2272 case Intrinsic::riscv_vsseg2:
2273 case Intrinsic::riscv_vsseg3:
2274 case Intrinsic::riscv_vsseg4:
2275 case Intrinsic::riscv_vsseg5:
2276 case Intrinsic::riscv_vsseg6:
2277 case Intrinsic::riscv_vsseg7:
2278 case Intrinsic::riscv_vsseg8: {
2279 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2280 /*IsStrided*/ false);
2281 return;
2282 }
2283 case Intrinsic::riscv_vsseg2_mask:
2284 case Intrinsic::riscv_vsseg3_mask:
2285 case Intrinsic::riscv_vsseg4_mask:
2286 case Intrinsic::riscv_vsseg5_mask:
2287 case Intrinsic::riscv_vsseg6_mask:
2288 case Intrinsic::riscv_vsseg7_mask:
2289 case Intrinsic::riscv_vsseg8_mask: {
2290 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2291 /*IsStrided*/ false);
2292 return;
2293 }
2294 case Intrinsic::riscv_vssseg2:
2295 case Intrinsic::riscv_vssseg3:
2296 case Intrinsic::riscv_vssseg4:
2297 case Intrinsic::riscv_vssseg5:
2298 case Intrinsic::riscv_vssseg6:
2299 case Intrinsic::riscv_vssseg7:
2300 case Intrinsic::riscv_vssseg8: {
2301 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2302 /*IsStrided*/ true);
2303 return;
2304 }
2305 case Intrinsic::riscv_vssseg2_mask:
2306 case Intrinsic::riscv_vssseg3_mask:
2307 case Intrinsic::riscv_vssseg4_mask:
2308 case Intrinsic::riscv_vssseg5_mask:
2309 case Intrinsic::riscv_vssseg6_mask:
2310 case Intrinsic::riscv_vssseg7_mask:
2311 case Intrinsic::riscv_vssseg8_mask: {
2312 selectVSSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2313 /*IsStrided*/ true);
2314 return;
2315 }
2316 case Intrinsic::riscv_vsoxseg2:
2317 case Intrinsic::riscv_vsoxseg3:
2318 case Intrinsic::riscv_vsoxseg4:
2319 case Intrinsic::riscv_vsoxseg5:
2320 case Intrinsic::riscv_vsoxseg6:
2321 case Intrinsic::riscv_vsoxseg7:
2322 case Intrinsic::riscv_vsoxseg8:
2323 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2324 /*IsOrdered*/ true);
2325 return;
2326 case Intrinsic::riscv_vsuxseg2:
2327 case Intrinsic::riscv_vsuxseg3:
2328 case Intrinsic::riscv_vsuxseg4:
2329 case Intrinsic::riscv_vsuxseg5:
2330 case Intrinsic::riscv_vsuxseg6:
2331 case Intrinsic::riscv_vsuxseg7:
2332 case Intrinsic::riscv_vsuxseg8:
2333 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ false,
2334 /*IsOrdered*/ false);
2335 return;
2336 case Intrinsic::riscv_vsoxseg2_mask:
2337 case Intrinsic::riscv_vsoxseg3_mask:
2338 case Intrinsic::riscv_vsoxseg4_mask:
2339 case Intrinsic::riscv_vsoxseg5_mask:
2340 case Intrinsic::riscv_vsoxseg6_mask:
2341 case Intrinsic::riscv_vsoxseg7_mask:
2342 case Intrinsic::riscv_vsoxseg8_mask:
2343 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2344 /*IsOrdered*/ true);
2345 return;
2346 case Intrinsic::riscv_vsuxseg2_mask:
2347 case Intrinsic::riscv_vsuxseg3_mask:
2348 case Intrinsic::riscv_vsuxseg4_mask:
2349 case Intrinsic::riscv_vsuxseg5_mask:
2350 case Intrinsic::riscv_vsuxseg6_mask:
2351 case Intrinsic::riscv_vsuxseg7_mask:
2352 case Intrinsic::riscv_vsuxseg8_mask:
2353 selectVSXSEG(Node, NF: getSegInstNF(Intrinsic: IntNo), /*IsMasked*/ true,
2354 /*IsOrdered*/ false);
2355 return;
2356 case Intrinsic::riscv_vsoxei:
2357 case Intrinsic::riscv_vsoxei_mask:
2358 case Intrinsic::riscv_vsuxei:
2359 case Intrinsic::riscv_vsuxei_mask: {
2360 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2361 IntNo == Intrinsic::riscv_vsuxei_mask;
2362 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2363 IntNo == Intrinsic::riscv_vsoxei_mask;
2364
2365 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
2366 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2367
2368 unsigned CurOp = 2;
2369 SmallVector<SDValue, 8> Operands;
2370 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Store value.
2371
2372 MVT IndexVT;
2373 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2374 /*IsStridedOrIndexed*/ true, Operands,
2375 /*IsLoad=*/false, IndexVT: &IndexVT);
2376
2377 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2378 "Element count mismatch");
2379
2380 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2381 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT);
2382 unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits());
2383 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2384 report_fatal_error(reason: "The V extension does not support EEW=64 for index "
2385 "values when XLEN=32");
2386 }
2387 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2388 Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW,
2389 LMUL: static_cast<unsigned>(LMUL), IndexLMUL: static_cast<unsigned>(IndexLMUL));
2390 MachineSDNode *Store =
2391 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2392
2393 if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node))
2394 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {MemOp->getMemOperand()});
2395
2396 ReplaceNode(F: Node, T: Store);
2397 return;
2398 }
2399 case Intrinsic::riscv_vsm:
2400 case Intrinsic::riscv_vse:
2401 case Intrinsic::riscv_vse_mask:
2402 case Intrinsic::riscv_vsse:
2403 case Intrinsic::riscv_vsse_mask: {
2404 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2405 IntNo == Intrinsic::riscv_vsse_mask;
2406 bool IsStrided =
2407 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2408
2409 MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0);
2410 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2411
2412 unsigned CurOp = 2;
2413 SmallVector<SDValue, 8> Operands;
2414 Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Store value.
2415
2416 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided,
2417 Operands);
2418
2419 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2420 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2421 Masked: IsMasked, Strided: IsStrided, Log2SEW, LMUL: static_cast<unsigned>(LMUL));
2422 MachineSDNode *Store =
2423 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands);
2424 if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node))
2425 CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {MemOp->getMemOperand()});
2426
2427 ReplaceNode(F: Node, T: Store);
2428 return;
2429 }
2430 case Intrinsic::riscv_sf_vc_x_se:
2431 case Intrinsic::riscv_sf_vc_i_se:
2432 selectSF_VC_X_SE(Node);
2433 return;
2434 }
2435 break;
2436 }
2437 case ISD::BITCAST: {
2438 MVT SrcVT = Node->getOperand(Num: 0).getSimpleValueType();
2439 // Just drop bitcasts between vectors if both are fixed or both are
2440 // scalable.
2441 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2442 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2443 ReplaceUses(F: SDValue(Node, 0), T: Node->getOperand(Num: 0));
2444 CurDAG->RemoveDeadNode(N: Node);
2445 return;
2446 }
2447 break;
2448 }
2449 case ISD::INSERT_SUBVECTOR:
2450 case RISCVISD::TUPLE_INSERT: {
2451 SDValue V = Node->getOperand(Num: 0);
2452 SDValue SubV = Node->getOperand(Num: 1);
2453 SDLoc DL(SubV);
2454 auto Idx = Node->getConstantOperandVal(Num: 2);
2455 MVT SubVecVT = SubV.getSimpleValueType();
2456
2457 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2458 MVT SubVecContainerVT = SubVecVT;
2459 // Establish the correct scalable-vector types for any fixed-length type.
2460 if (SubVecVT.isFixedLengthVector()) {
2461 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT: SubVecVT);
2462 TypeSize VecRegSize = TypeSize::getScalable(MinimumSize: RISCV::RVVBitsPerBlock);
2463 [[maybe_unused]] bool ExactlyVecRegSized =
2464 Subtarget->expandVScale(X: SubVecVT.getSizeInBits())
2465 .isKnownMultipleOf(RHS: Subtarget->expandVScale(X: VecRegSize));
2466 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2467 .getKnownMinValue()));
2468 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2469 }
2470 MVT ContainerVT = VT;
2471 if (VT.isFixedLengthVector())
2472 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2473
2474 const auto *TRI = Subtarget->getRegisterInfo();
2475 unsigned SubRegIdx;
2476 std::tie(args&: SubRegIdx, args&: Idx) =
2477 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2478 VecVT: ContainerVT, SubVecVT: SubVecContainerVT, InsertExtractIdx: Idx, TRI);
2479
2480 // If the Idx hasn't been completely eliminated then this is a subvector
2481 // insert which doesn't naturally align to a vector register. These must
2482 // be handled using instructions to manipulate the vector registers.
2483 if (Idx != 0)
2484 break;
2485
2486 RISCVVType::VLMUL SubVecLMUL =
2487 RISCVTargetLowering::getLMUL(VT: SubVecContainerVT);
2488 [[maybe_unused]] bool IsSubVecPartReg =
2489 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
2490 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
2491 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
2492 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
2493 V.isUndef()) &&
2494 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2495 "the subvector is smaller than a full-sized register");
2496
2497 // If we haven't set a SubRegIdx, then we must be going between
2498 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2499 if (SubRegIdx == RISCV::NoSubRegister) {
2500 unsigned InRegClassID =
2501 RISCVTargetLowering::getRegClassIDForVecVT(VT: ContainerVT);
2502 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2503 InRegClassID &&
2504 "Unexpected subvector extraction");
2505 SDValue RC = CurDAG->getTargetConstant(Val: InRegClassID, DL, VT: XLenVT);
2506 SDNode *NewNode = CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS,
2507 dl: DL, VT, Op1: SubV, Op2: RC);
2508 ReplaceNode(F: Node, T: NewNode);
2509 return;
2510 }
2511
2512 SDValue Insert = CurDAG->getTargetInsertSubreg(SRIdx: SubRegIdx, DL, VT, Operand: V, Subreg: SubV);
2513 ReplaceNode(F: Node, T: Insert.getNode());
2514 return;
2515 }
2516 case ISD::EXTRACT_SUBVECTOR:
2517 case RISCVISD::TUPLE_EXTRACT: {
2518 SDValue V = Node->getOperand(Num: 0);
2519 auto Idx = Node->getConstantOperandVal(Num: 1);
2520 MVT InVT = V.getSimpleValueType();
2521 SDLoc DL(V);
2522
2523 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2524 MVT SubVecContainerVT = VT;
2525 // Establish the correct scalable-vector types for any fixed-length type.
2526 if (VT.isFixedLengthVector()) {
2527 assert(Idx == 0);
2528 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2529 }
2530 if (InVT.isFixedLengthVector())
2531 InVT = TLI.getContainerForFixedLengthVector(VT: InVT);
2532
2533 const auto *TRI = Subtarget->getRegisterInfo();
2534 unsigned SubRegIdx;
2535 std::tie(args&: SubRegIdx, args&: Idx) =
2536 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2537 VecVT: InVT, SubVecVT: SubVecContainerVT, InsertExtractIdx: Idx, TRI);
2538
2539 // If the Idx hasn't been completely eliminated then this is a subvector
2540 // extract which doesn't naturally align to a vector register. These must
2541 // be handled using instructions to manipulate the vector registers.
2542 if (Idx != 0)
2543 break;
2544
2545 // If we haven't set a SubRegIdx, then we must be going between
2546 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2547 if (SubRegIdx == RISCV::NoSubRegister) {
2548 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT: InVT);
2549 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2550 InRegClassID &&
2551 "Unexpected subvector extraction");
2552 SDValue RC = CurDAG->getTargetConstant(Val: InRegClassID, DL, VT: XLenVT);
2553 SDNode *NewNode =
2554 CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, dl: DL, VT, Op1: V, Op2: RC);
2555 ReplaceNode(F: Node, T: NewNode);
2556 return;
2557 }
2558
2559 SDValue Extract = CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT, Operand: V);
2560 ReplaceNode(F: Node, T: Extract.getNode());
2561 return;
2562 }
2563 case RISCVISD::VMV_S_X_VL:
2564 case RISCVISD::VFMV_S_F_VL:
2565 case RISCVISD::VMV_V_X_VL:
2566 case RISCVISD::VFMV_V_F_VL: {
2567 // Try to match splat of a scalar load to a strided load with stride of x0.
2568 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2569 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2570 if (!Node->getOperand(Num: 0).isUndef())
2571 break;
2572 SDValue Src = Node->getOperand(Num: 1);
2573 auto *Ld = dyn_cast<LoadSDNode>(Val&: Src);
2574 // Can't fold load update node because the second
2575 // output is used so that load update node can't be removed.
2576 if (!Ld || Ld->isIndexed())
2577 break;
2578 EVT MemVT = Ld->getMemoryVT();
2579 // The memory VT should be the same size as the element type.
2580 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2581 break;
2582 if (!IsProfitableToFold(N: Src, U: Node, Root: Node) ||
2583 !IsLegalToFold(N: Src, U: Node, Root: Node, OptLevel: TM.getOptLevel()))
2584 break;
2585
2586 SDValue VL;
2587 if (IsScalarMove) {
2588 // We could deal with more VL if we update the VSETVLI insert pass to
2589 // avoid introducing more VSETVLI.
2590 if (!isOneConstant(V: Node->getOperand(Num: 2)))
2591 break;
2592 selectVLOp(N: Node->getOperand(Num: 2), VL);
2593 } else
2594 selectVLOp(N: Node->getOperand(Num: 2), VL);
2595
2596 unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits());
2597 SDValue SEW = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT);
2598
2599 // If VL=1, then we don't need to do a strided load and can just do a
2600 // regular load.
2601 bool IsStrided = !isOneConstant(V: VL);
2602
2603 // Only do a strided load if we have optimized zero-stride vector load.
2604 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2605 break;
2606
2607 SmallVector<SDValue> Operands = {
2608 SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT), 0),
2609 Ld->getBasePtr()};
2610 if (IsStrided)
2611 Operands.push_back(Elt: CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT));
2612 uint64_t Policy = RISCVVType::MASK_AGNOSTIC | RISCVVType::TAIL_AGNOSTIC;
2613 SDValue PolicyOp = CurDAG->getTargetConstant(Val: Policy, DL, VT: XLenVT);
2614 Operands.append(IL: {VL, SEW, PolicyOp, Ld->getChain()});
2615
2616 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2617 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2618 /*IsMasked*/ Masked: false, Strided: IsStrided, /*FF*/ false,
2619 Log2SEW, LMUL: static_cast<unsigned>(LMUL));
2620 MachineSDNode *Load =
2621 CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, ResultTys: {VT, MVT::Other}, Ops: Operands);
2622 // Update the chain.
2623 ReplaceUses(F: Src.getValue(R: 1), T: SDValue(Load, 1));
2624 // Record the mem-refs
2625 CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {Ld->getMemOperand()});
2626 // Replace the splat with the vlse.
2627 ReplaceNode(F: Node, T: Load);
2628 return;
2629 }
2630 case ISD::PREFETCH:
2631 unsigned Locality = Node->getConstantOperandVal(Num: 3);
2632 if (Locality > 2)
2633 break;
2634
2635 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Val: Node)) {
2636 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2637 MMO->setFlags(MachineMemOperand::MONonTemporal);
2638
2639 int NontemporalLevel = 0;
2640 switch (Locality) {
2641 case 0:
2642 NontemporalLevel = 3; // NTL.ALL
2643 break;
2644 case 1:
2645 NontemporalLevel = 1; // NTL.PALL
2646 break;
2647 case 2:
2648 NontemporalLevel = 0; // NTL.P1
2649 break;
2650 default:
2651 llvm_unreachable("unexpected locality value.");
2652 }
2653
2654 if (NontemporalLevel & 0b1)
2655 MMO->setFlags(MONontemporalBit0);
2656 if (NontemporalLevel & 0b10)
2657 MMO->setFlags(MONontemporalBit1);
2658 }
2659 break;
2660 }
2661
2662 // Select the default instruction.
2663 SelectCode(N: Node);
2664}
2665
2666bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
2667 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2668 std::vector<SDValue> &OutOps) {
2669 // Always produce a register and immediate operand, as expected by
2670 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2671 switch (ConstraintID) {
2672 case InlineAsm::ConstraintCode::o:
2673 case InlineAsm::ConstraintCode::m: {
2674 SDValue Op0, Op1;
2675 [[maybe_unused]] bool Found = SelectAddrRegImm(Addr: Op, Base&: Op0, Offset&: Op1);
2676 assert(Found && "SelectAddrRegImm should always succeed");
2677 OutOps.push_back(x: Op0);
2678 OutOps.push_back(x: Op1);
2679 return false;
2680 }
2681 case InlineAsm::ConstraintCode::A:
2682 OutOps.push_back(x: Op);
2683 OutOps.push_back(
2684 x: CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget->getXLenVT()));
2685 return false;
2686 default:
2687 report_fatal_error(reason: "Unexpected asm memory constraint " +
2688 InlineAsm::getMemConstraintName(C: ConstraintID));
2689 }
2690
2691 return true;
2692}
2693
2694bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
2695 SDValue &Offset) {
2696 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Addr)) {
2697 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT: Subtarget->getXLenVT());
2698 Offset = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Addr), VT: Subtarget->getXLenVT());
2699 return true;
2700 }
2701
2702 return false;
2703}
2704
2705// Fold constant addresses.
2706static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2707 const MVT VT, const RISCVSubtarget *Subtarget,
2708 SDValue Addr, SDValue &Base, SDValue &Offset,
2709 bool IsPrefetch = false) {
2710 if (!isa<ConstantSDNode>(Val: Addr))
2711 return false;
2712
2713 int64_t CVal = cast<ConstantSDNode>(Val&: Addr)->getSExtValue();
2714
2715 // If the constant is a simm12, we can fold the whole constant and use X0 as
2716 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2717 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2718 int64_t Lo12 = SignExtend64<12>(x: CVal);
2719 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2720 if (!Subtarget->is64Bit() || isInt<32>(x: Hi)) {
2721 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2722 return false;
2723 if (Hi) {
2724 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2725 Base = SDValue(
2726 CurDAG->getMachineNode(Opcode: RISCV::LUI, dl: DL, VT,
2727 Op1: CurDAG->getTargetConstant(Val: Hi20, DL, VT)),
2728 0);
2729 } else {
2730 Base = CurDAG->getRegister(Reg: RISCV::X0, VT);
2731 }
2732 Offset = CurDAG->getSignedTargetConstant(Val: Lo12, DL, VT);
2733 return true;
2734 }
2735
2736 // Ask how constant materialization would handle this constant.
2737 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: CVal, STI: *Subtarget);
2738
2739 // If the last instruction would be an ADDI, we can fold its immediate and
2740 // emit the rest of the sequence as the base.
2741 if (Seq.back().getOpcode() != RISCV::ADDI)
2742 return false;
2743 Lo12 = Seq.back().getImm();
2744 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2745 return false;
2746
2747 // Drop the last instruction.
2748 Seq.pop_back();
2749 assert(!Seq.empty() && "Expected more instructions in sequence");
2750
2751 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2752 Offset = CurDAG->getSignedTargetConstant(Val: Lo12, DL, VT);
2753 return true;
2754}
2755
2756// Is this ADD instruction only used as the base pointer of scalar loads and
2757// stores?
2758static bool isWorthFoldingAdd(SDValue Add) {
2759 for (auto *User : Add->users()) {
2760 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
2761 User->getOpcode() != ISD::ATOMIC_LOAD &&
2762 User->getOpcode() != ISD::ATOMIC_STORE)
2763 return false;
2764 EVT VT = cast<MemSDNode>(Val: User)->getMemoryVT();
2765 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2766 VT != MVT::f64)
2767 return false;
2768 // Don't allow stores of the value. It must be used as the address.
2769 if (User->getOpcode() == ISD::STORE &&
2770 cast<StoreSDNode>(Val: User)->getValue() == Add)
2771 return false;
2772 if (User->getOpcode() == ISD::ATOMIC_STORE &&
2773 cast<AtomicSDNode>(Val: User)->getVal() == Add)
2774 return false;
2775 if (isStrongerThanMonotonic(AO: cast<MemSDNode>(Val: User)->getSuccessOrdering()))
2776 return false;
2777 }
2778
2779 return true;
2780}
2781
2782bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
2783 unsigned MaxShiftAmount,
2784 SDValue &Base, SDValue &Index,
2785 SDValue &Scale) {
2786 EVT VT = Addr.getSimpleValueType();
2787 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2788 SDValue &Shift) {
2789 uint64_t ShiftAmt = 0;
2790 Index = N;
2791
2792 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Val: N.getOperand(i: 1))) {
2793 // Only match shifts by a value in range [0, MaxShiftAmount].
2794 if (N.getConstantOperandVal(i: 1) <= MaxShiftAmount) {
2795 Index = N.getOperand(i: 0);
2796 ShiftAmt = N.getConstantOperandVal(i: 1);
2797 }
2798 }
2799
2800 Shift = CurDAG->getTargetConstant(Val: ShiftAmt, DL: SDLoc(N), VT);
2801 return ShiftAmt != 0;
2802 };
2803
2804 if (Addr.getOpcode() == ISD::ADD) {
2805 if (auto *C1 = dyn_cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))) {
2806 SDValue AddrB = Addr.getOperand(i: 0);
2807 if (AddrB.getOpcode() == ISD::ADD &&
2808 UnwrapShl(AddrB.getOperand(i: 0), Index, Scale) &&
2809 !isa<ConstantSDNode>(Val: AddrB.getOperand(i: 1)) &&
2810 isInt<12>(x: C1->getSExtValue())) {
2811 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2812 SDValue C1Val =
2813 CurDAG->getTargetConstant(Val: C1->getZExtValue(), DL: SDLoc(Addr), VT);
2814 Base = SDValue(CurDAG->getMachineNode(Opcode: RISCV::ADDI, dl: SDLoc(Addr), VT,
2815 Op1: AddrB.getOperand(i: 1), Op2: C1Val),
2816 0);
2817 return true;
2818 }
2819 } else if (UnwrapShl(Addr.getOperand(i: 0), Index, Scale)) {
2820 Base = Addr.getOperand(i: 1);
2821 return true;
2822 } else {
2823 UnwrapShl(Addr.getOperand(i: 1), Index, Scale);
2824 Base = Addr.getOperand(i: 0);
2825 return true;
2826 }
2827 }
2828
2829 return false;
2830}
2831
2832bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
2833 SDValue &Offset) {
2834 if (SelectAddrFrameIndex(Addr, Base, Offset))
2835 return true;
2836
2837 SDLoc DL(Addr);
2838 MVT VT = Addr.getSimpleValueType();
2839
2840 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2841 Base = Addr.getOperand(i: 0);
2842 Offset = Addr.getOperand(i: 1);
2843 return true;
2844 }
2845
2846 if (CurDAG->isBaseWithConstantOffset(Op: Addr)) {
2847 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
2848 if (isInt<12>(x: CVal) && isInt<12>(x: CVal)) {
2849 Base = Addr.getOperand(i: 0);
2850 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2851 SDValue LoOperand = Base.getOperand(i: 1);
2852 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: LoOperand)) {
2853 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2854 // (its low part, really), then we can rely on the alignment of that
2855 // variable to provide a margin of safety before low part can overflow
2856 // the 12 bits of the load/store offset. Check if CVal falls within
2857 // that margin; if so (low part + CVal) can't overflow.
2858 const DataLayout &DL = CurDAG->getDataLayout();
2859 Align Alignment = commonAlignment(
2860 A: GA->getGlobal()->getPointerAlignment(DL), Offset: GA->getOffset());
2861 if ((CVal == 0 || Alignment > CVal)) {
2862 int64_t CombinedOffset = CVal + GA->getOffset();
2863 Base = Base.getOperand(i: 0);
2864 Offset = CurDAG->getTargetGlobalAddress(
2865 GV: GA->getGlobal(), DL: SDLoc(LoOperand), VT: LoOperand.getValueType(),
2866 offset: CombinedOffset, TargetFlags: GA->getTargetFlags());
2867 return true;
2868 }
2869 }
2870 }
2871
2872 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base))
2873 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT);
2874 Offset = CurDAG->getSignedTargetConstant(Val: CVal, DL, VT);
2875 return true;
2876 }
2877 }
2878
2879 // Handle ADD with large immediates.
2880 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Val: Addr.getOperand(i: 1))) {
2881 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
2882 assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2883 "simm12 not already handled?");
2884
2885 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2886 // an ADDI for part of the offset and fold the rest into the load/store.
2887 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2888 if (CVal >= -4096 && CVal <= 4094) {
2889 int64_t Adj = CVal < 0 ? -2048 : 2047;
2890 Base = SDValue(
2891 CurDAG->getMachineNode(Opcode: RISCV::ADDI, dl: DL, VT, Op1: Addr.getOperand(i: 0),
2892 Op2: CurDAG->getSignedTargetConstant(Val: Adj, DL, VT)),
2893 0);
2894 Offset = CurDAG->getSignedTargetConstant(Val: CVal - Adj, DL, VT);
2895 return true;
2896 }
2897
2898 // For larger immediates, we might be able to save one instruction from
2899 // constant materialization by folding the Lo12 bits of the immediate into
2900 // the address. We should only do this if the ADD is only used by loads and
2901 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2902 // separately with the full materialized immediate creating extra
2903 // instructions.
2904 if (isWorthFoldingAdd(Add: Addr) &&
2905 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr: Addr.getOperand(i: 1), Base,
2906 Offset, /*IsPrefetch=*/false)) {
2907 // Insert an ADD instruction with the materialized Hi52 bits.
2908 Base = SDValue(
2909 CurDAG->getMachineNode(Opcode: RISCV::ADD, dl: DL, VT, Op1: Addr.getOperand(i: 0), Op2: Base),
2910 0);
2911 return true;
2912 }
2913 }
2914
2915 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
2916 /*IsPrefetch=*/false))
2917 return true;
2918
2919 Base = Addr;
2920 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
2921 return true;
2922}
2923
2924/// Similar to SelectAddrRegImm, except that the offset restricted for
2925/// unsinged nine bits.
2926bool RISCVDAGToDAGISel::SelectAddrRegImm9(SDValue Addr, SDValue &Base,
2927 SDValue &Offset) {
2928 if (SelectAddrFrameIndex(Addr, Base, Offset))
2929 return true;
2930
2931 SDLoc DL(Addr);
2932 MVT VT = Addr.getSimpleValueType();
2933
2934 if (CurDAG->isBaseWithConstantOffset(Op: Addr)) {
2935 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
2936 if (isUInt<9>(x: CVal)) {
2937 Base = Addr.getOperand(i: 0);
2938
2939 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base))
2940 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT);
2941 Offset = CurDAG->getSignedTargetConstant(Val: CVal, DL, VT);
2942 return true;
2943 }
2944 }
2945
2946 Base = Addr;
2947 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
2948 return true;
2949}
2950
2951/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2952/// Offset should be all zeros.
2953bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
2954 SDValue &Offset) {
2955 if (SelectAddrFrameIndex(Addr, Base, Offset))
2956 return true;
2957
2958 SDLoc DL(Addr);
2959 MVT VT = Addr.getSimpleValueType();
2960
2961 if (CurDAG->isBaseWithConstantOffset(Op: Addr)) {
2962 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
2963 if (isInt<12>(x: CVal)) {
2964 Base = Addr.getOperand(i: 0);
2965
2966 // Early-out if not a valid offset.
2967 if ((CVal & 0b11111) != 0) {
2968 Base = Addr;
2969 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
2970 return true;
2971 }
2972
2973 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base))
2974 Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT);
2975 Offset = CurDAG->getSignedTargetConstant(Val: CVal, DL, VT);
2976 return true;
2977 }
2978 }
2979
2980 // Handle ADD with large immediates.
2981 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Val: Addr.getOperand(i: 1))) {
2982 int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue();
2983 assert(!isInt<12>(CVal) && "simm12 not already handled?");
2984
2985 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2986 // one instruction by folding adjustment (-2048 or 2016) into the address.
2987 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2988 int64_t Adj = CVal < 0 ? -2048 : 2016;
2989 int64_t AdjustedOffset = CVal - Adj;
2990 Base =
2991 SDValue(CurDAG->getMachineNode(
2992 Opcode: RISCV::ADDI, dl: DL, VT, Op1: Addr.getOperand(i: 0),
2993 Op2: CurDAG->getSignedTargetConstant(Val: AdjustedOffset, DL, VT)),
2994 0);
2995 Offset = CurDAG->getSignedTargetConstant(Val: Adj, DL, VT);
2996 return true;
2997 }
2998
2999 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr: Addr.getOperand(i: 1), Base,
3000 Offset, /*IsPrefetch=*/true)) {
3001 // Insert an ADD instruction with the materialized Hi52 bits.
3002 Base = SDValue(
3003 CurDAG->getMachineNode(Opcode: RISCV::ADD, dl: DL, VT, Op1: Addr.getOperand(i: 0), Op2: Base),
3004 0);
3005 return true;
3006 }
3007 }
3008
3009 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3010 /*IsPrefetch=*/true))
3011 return true;
3012
3013 Base = Addr;
3014 Offset = CurDAG->getTargetConstant(Val: 0, DL, VT);
3015 return true;
3016}
3017
3018bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base,
3019 SDValue &Offset) {
3020 if (Addr.getOpcode() != ISD::ADD)
3021 return false;
3022
3023 if (isa<ConstantSDNode>(Val: Addr.getOperand(i: 1)))
3024 return false;
3025
3026 Base = Addr.getOperand(i: 0);
3027 Offset = Addr.getOperand(i: 1);
3028 return true;
3029}
3030
3031bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
3032 SDValue &ShAmt) {
3033 ShAmt = N;
3034
3035 // Peek through zext.
3036 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3037 ShAmt = ShAmt.getOperand(i: 0);
3038
3039 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3040 // amount. If there is an AND on the shift amount, we can bypass it if it
3041 // doesn't affect any of those bits.
3042 if (ShAmt.getOpcode() == ISD::AND &&
3043 isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 1))) {
3044 const APInt &AndMask = ShAmt.getConstantOperandAPInt(i: 1);
3045
3046 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3047 // mask that covers the bits needed to represent all shift amounts.
3048 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3049 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3050
3051 if (ShMask.isSubsetOf(RHS: AndMask)) {
3052 ShAmt = ShAmt.getOperand(i: 0);
3053 } else {
3054 // SimplifyDemandedBits may have optimized the mask so try restoring any
3055 // bits that are known zero.
3056 KnownBits Known = CurDAG->computeKnownBits(Op: ShAmt.getOperand(i: 0));
3057 if (!ShMask.isSubsetOf(RHS: AndMask | Known.Zero))
3058 return true;
3059 ShAmt = ShAmt.getOperand(i: 0);
3060 }
3061 }
3062
3063 if (ShAmt.getOpcode() == ISD::ADD &&
3064 isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 1))) {
3065 uint64_t Imm = ShAmt.getConstantOperandVal(i: 1);
3066 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3067 // to avoid the ADD.
3068 if (Imm != 0 && Imm % ShiftWidth == 0) {
3069 ShAmt = ShAmt.getOperand(i: 0);
3070 return true;
3071 }
3072 } else if (ShAmt.getOpcode() == ISD::SUB &&
3073 isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 0))) {
3074 uint64_t Imm = ShAmt.getConstantOperandVal(i: 0);
3075 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3076 // generate a NEG instead of a SUB of a constant.
3077 if (Imm != 0 && Imm % ShiftWidth == 0) {
3078 SDLoc DL(ShAmt);
3079 EVT VT = ShAmt.getValueType();
3080 SDValue Zero = CurDAG->getRegister(Reg: RISCV::X0, VT);
3081 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3082 MachineSDNode *Neg = CurDAG->getMachineNode(Opcode: NegOpc, dl: DL, VT, Op1: Zero,
3083 Op2: ShAmt.getOperand(i: 1));
3084 ShAmt = SDValue(Neg, 0);
3085 return true;
3086 }
3087 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3088 // to generate a NOT instead of a SUB of a constant.
3089 if (Imm % ShiftWidth == ShiftWidth - 1) {
3090 SDLoc DL(ShAmt);
3091 EVT VT = ShAmt.getValueType();
3092 MachineSDNode *Not = CurDAG->getMachineNode(
3093 Opcode: RISCV::XORI, dl: DL, VT, Op1: ShAmt.getOperand(i: 1),
3094 Op2: CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/IsTarget: true));
3095 ShAmt = SDValue(Not, 0);
3096 return true;
3097 }
3098 }
3099
3100 return true;
3101}
3102
3103/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3104/// check for equality with 0. This function emits instructions that convert the
3105/// seteq/setne into something that can be compared with 0.
3106/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3107/// ISD::SETNE).
3108bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
3109 SDValue &Val) {
3110 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3111 "Unexpected condition code!");
3112
3113 // We're looking for a setcc.
3114 if (N->getOpcode() != ISD::SETCC)
3115 return false;
3116
3117 // Must be an equality comparison.
3118 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
3119 if (CCVal != ExpectedCCVal)
3120 return false;
3121
3122 SDValue LHS = N->getOperand(Num: 0);
3123 SDValue RHS = N->getOperand(Num: 1);
3124
3125 if (!LHS.getValueType().isScalarInteger())
3126 return false;
3127
3128 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3129 if (isNullConstant(V: RHS)) {
3130 Val = LHS;
3131 return true;
3132 }
3133
3134 SDLoc DL(N);
3135
3136 if (auto *C = dyn_cast<ConstantSDNode>(Val&: RHS)) {
3137 int64_t CVal = C->getSExtValue();
3138 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3139 // non-zero otherwise.
3140 if (CVal == -2048) {
3141 Val = SDValue(
3142 CurDAG->getMachineNode(
3143 Opcode: RISCV::XORI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3144 Op2: CurDAG->getSignedTargetConstant(Val: CVal, DL, VT: N->getValueType(ResNo: 0))),
3145 0);
3146 return true;
3147 }
3148 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
3149 // LHS is equal to the RHS and non-zero otherwise.
3150 if (isInt<12>(x: CVal) || CVal == 2048) {
3151 Val = SDValue(
3152 CurDAG->getMachineNode(
3153 Opcode: RISCV::ADDI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3154 Op2: CurDAG->getSignedTargetConstant(Val: -CVal, DL, VT: N->getValueType(ResNo: 0))),
3155 0);
3156 return true;
3157 }
3158 if (isPowerOf2_64(Value: CVal) && Subtarget->hasStdExtZbs()) {
3159 Val = SDValue(
3160 CurDAG->getMachineNode(
3161 Opcode: RISCV::BINVI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3162 Op2: CurDAG->getTargetConstant(Val: Log2_64(Value: CVal), DL, VT: N->getValueType(ResNo: 0))),
3163 0);
3164 return true;
3165 }
3166 // Same as the addi case above but for larger immediates (signed 26-bit) use
3167 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3168 // anything which can be done with a single lui as it might be compressible.
3169 if (Subtarget->hasVendorXqcilia() && isInt<26>(x: CVal) &&
3170 (CVal & 0xFFF) != 0) {
3171 Val = SDValue(
3172 CurDAG->getMachineNode(
3173 Opcode: RISCV::QC_E_ADDI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS,
3174 Op2: CurDAG->getSignedTargetConstant(Val: -CVal, DL, VT: N->getValueType(ResNo: 0))),
3175 0);
3176 return true;
3177 }
3178 }
3179
3180 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3181 // equal and a non-zero value if they aren't.
3182 Val = SDValue(
3183 CurDAG->getMachineNode(Opcode: RISCV::XOR, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS, Op2: RHS), 0);
3184 return true;
3185}
3186
3187bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {
3188 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3189 cast<VTSDNode>(Val: N.getOperand(i: 1))->getVT().getSizeInBits() == Bits) {
3190 Val = N.getOperand(i: 0);
3191 return true;
3192 }
3193
3194 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3195 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(Val: N.getOperand(i: 1)))
3196 return N;
3197
3198 SDValue N0 = N.getOperand(i: 0);
3199 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
3200 N.getConstantOperandVal(i: 1) == ShiftAmt &&
3201 N0.getConstantOperandVal(i: 1) == ShiftAmt)
3202 return N0.getOperand(i: 0);
3203
3204 return N;
3205 };
3206
3207 MVT VT = N.getSimpleValueType();
3208 if (CurDAG->ComputeNumSignBits(Op: N) > (VT.getSizeInBits() - Bits)) {
3209 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3210 return true;
3211 }
3212
3213 return false;
3214}
3215
3216bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) {
3217 if (N.getOpcode() == ISD::AND) {
3218 auto *C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
3219 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(N: Bits)) {
3220 Val = N.getOperand(i: 0);
3221 return true;
3222 }
3223 }
3224 MVT VT = N.getSimpleValueType();
3225 APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: Bits);
3226 if (CurDAG->MaskedValueIsZero(Op: N, Mask)) {
3227 Val = N;
3228 return true;
3229 }
3230
3231 return false;
3232}
3233
3234/// Look for various patterns that can be done with a SHL that can be folded
3235/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3236/// SHXADD we are trying to match.
3237bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
3238 SDValue &Val) {
3239 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(Val: N.getOperand(i: 1))) {
3240 SDValue N0 = N.getOperand(i: 0);
3241
3242 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3243 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3244 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
3245 uint64_t Mask = N.getConstantOperandVal(i: 1);
3246 unsigned C2 = N0.getConstantOperandVal(i: 1);
3247
3248 unsigned XLen = Subtarget->getXLen();
3249 if (LeftShift)
3250 Mask &= maskTrailingZeros<uint64_t>(N: C2);
3251 else
3252 Mask &= maskTrailingOnes<uint64_t>(N: XLen - C2);
3253
3254 if (isShiftedMask_64(Value: Mask)) {
3255 unsigned Leading = XLen - llvm::bit_width(Value: Mask);
3256 unsigned Trailing = llvm::countr_zero(Val: Mask);
3257 if (Trailing != ShAmt)
3258 return false;
3259
3260 unsigned Opcode;
3261 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3262 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
3263 // followed by a SHXADD with c3 for the X amount.
3264 if (LeftShift && Leading == 0 && C2 < Trailing)
3265 Opcode = RISCV::SRLI;
3266 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
3267 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
3268 // followed by a SHXADD with c3 for the X amount.
3269 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
3270 Opcode = RISCV::SRLIW;
3271 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3272 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3273 // followed by a SHXADD using c3 for the X amount.
3274 else if (!LeftShift && Leading == C2)
3275 Opcode = RISCV::SRLI;
3276 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
3277 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
3278 // followed by a SHXADD using c3 for the X amount.
3279 else if (!LeftShift && Leading == 32 + C2)
3280 Opcode = RISCV::SRLIW;
3281 else
3282 return false;
3283
3284 SDLoc DL(N);
3285 EVT VT = N.getValueType();
3286 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
3287 Val = SDValue(
3288 CurDAG->getMachineNode(Opcode, dl: DL, VT, Op1: N0.getOperand(i: 0),
3289 Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT)),
3290 0);
3291 return true;
3292 }
3293 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3294 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
3295 uint64_t Mask = N.getConstantOperandVal(i: 1);
3296 unsigned C2 = N0.getConstantOperandVal(i: 1);
3297
3298 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3299 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3300 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3301 // the X amount.
3302 if (isShiftedMask_64(Value: Mask)) {
3303 unsigned XLen = Subtarget->getXLen();
3304 unsigned Leading = XLen - llvm::bit_width(Value: Mask);
3305 unsigned Trailing = llvm::countr_zero(Val: Mask);
3306 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3307 SDLoc DL(N);
3308 EVT VT = N.getValueType();
3309 Val = SDValue(CurDAG->getMachineNode(
3310 Opcode: RISCV::SRAI, dl: DL, VT, Op1: N0.getOperand(i: 0),
3311 Op2: CurDAG->getTargetConstant(Val: C2 - Leading, DL, VT)),
3312 0);
3313 Val = SDValue(CurDAG->getMachineNode(
3314 Opcode: RISCV::SRLI, dl: DL, VT, Op1: Val,
3315 Op2: CurDAG->getTargetConstant(Val: Leading + ShAmt, DL, VT)),
3316 0);
3317 return true;
3318 }
3319 }
3320 }
3321 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3322 (LeftShift || N.getOpcode() == ISD::SRL) &&
3323 isa<ConstantSDNode>(Val: N.getOperand(i: 1))) {
3324 SDValue N0 = N.getOperand(i: 0);
3325 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3326 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
3327 uint64_t Mask = N0.getConstantOperandVal(i: 1);
3328 if (isShiftedMask_64(Value: Mask)) {
3329 unsigned C1 = N.getConstantOperandVal(i: 1);
3330 unsigned XLen = Subtarget->getXLen();
3331 unsigned Leading = XLen - llvm::bit_width(Value: Mask);
3332 unsigned Trailing = llvm::countr_zero(Val: Mask);
3333 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3334 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3335 if (LeftShift && Leading == 32 && Trailing > 0 &&
3336 (Trailing + C1) == ShAmt) {
3337 SDLoc DL(N);
3338 EVT VT = N.getValueType();
3339 Val = SDValue(CurDAG->getMachineNode(
3340 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
3341 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT)),
3342 0);
3343 return true;
3344 }
3345 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3346 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3347 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3348 (Trailing - C1) == ShAmt) {
3349 SDLoc DL(N);
3350 EVT VT = N.getValueType();
3351 Val = SDValue(CurDAG->getMachineNode(
3352 Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0),
3353 Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT)),
3354 0);
3355 return true;
3356 }
3357 }
3358 }
3359 }
3360
3361 return false;
3362}
3363
3364/// Look for various patterns that can be done with a SHL that can be folded
3365/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3366/// SHXADD_UW we are trying to match.
3367bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
3368 SDValue &Val) {
3369 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(Val: N.getOperand(i: 1)) &&
3370 N.hasOneUse()) {
3371 SDValue N0 = N.getOperand(i: 0);
3372 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
3373 N0.hasOneUse()) {
3374 uint64_t Mask = N.getConstantOperandVal(i: 1);
3375 unsigned C2 = N0.getConstantOperandVal(i: 1);
3376
3377 Mask &= maskTrailingZeros<uint64_t>(N: C2);
3378
3379 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3380 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3381 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3382 if (isShiftedMask_64(Value: Mask)) {
3383 unsigned Leading = llvm::countl_zero(Val: Mask);
3384 unsigned Trailing = llvm::countr_zero(Val: Mask);
3385 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3386 SDLoc DL(N);
3387 EVT VT = N.getValueType();
3388 Val = SDValue(CurDAG->getMachineNode(
3389 Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0),
3390 Op2: CurDAG->getTargetConstant(Val: C2 - ShAmt, DL, VT)),
3391 0);
3392 return true;
3393 }
3394 }
3395 }
3396 }
3397
3398 return false;
3399}
3400
3401bool RISCVDAGToDAGISel::orIsAdd(const SDNode *N) const {
3402 if (N->getFlags().hasDisjoint())
3403 return true;
3404 KnownBits Known0 = CurDAG->computeKnownBits(Op: N->getOperand(Num: 0), Depth: 0);
3405 KnownBits Known1 = CurDAG->computeKnownBits(Op: N->getOperand(Num: 1), Depth: 0);
3406 return KnownBits::haveNoCommonBitsSet(LHS: Known0, RHS: Known1);
3407}
3408
3409bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
3410 SDValue N, SDValue &Val) {
3411 int OrigCost = RISCVMatInt::getIntMatCost(Val: APInt(64, OrigImm), Size: 64, STI: *Subtarget,
3412 /*CompressionCost=*/true);
3413 int Cost = RISCVMatInt::getIntMatCost(Val: APInt(64, Imm), Size: 64, STI: *Subtarget,
3414 /*CompressionCost=*/true);
3415 if (OrigCost <= Cost)
3416 return false;
3417
3418 Val = selectImm(CurDAG, DL: SDLoc(N), VT: N->getSimpleValueType(ResNo: 0), Imm, Subtarget: *Subtarget);
3419 return true;
3420}
3421
3422bool RISCVDAGToDAGISel::selectZExtImm32(SDValue N, SDValue &Val) {
3423 if (!isa<ConstantSDNode>(Val: N))
3424 return false;
3425 int64_t Imm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
3426 if ((Imm >> 31) != 1)
3427 return false;
3428
3429 for (const SDNode *U : N->users()) {
3430 switch (U->getOpcode()) {
3431 case ISD::ADD:
3432 break;
3433 case ISD::OR:
3434 if (orIsAdd(N: U))
3435 break;
3436 return false;
3437 default:
3438 return false;
3439 }
3440 }
3441
3442 return selectImm64IfCheaper(Imm: 0xffffffff00000000 | Imm, OrigImm: Imm, N, Val);
3443}
3444
3445bool RISCVDAGToDAGISel::selectNegImm(SDValue N, SDValue &Val) {
3446 if (!isa<ConstantSDNode>(Val: N))
3447 return false;
3448 int64_t Imm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
3449 if (isInt<32>(x: Imm))
3450 return false;
3451
3452 for (const SDNode *U : N->users()) {
3453 switch (U->getOpcode()) {
3454 case ISD::ADD:
3455 break;
3456 case RISCVISD::VMV_V_X_VL:
3457 if (!all_of(Range: U->users(), P: [](const SDNode *V) {
3458 return V->getOpcode() == ISD::ADD ||
3459 V->getOpcode() == RISCVISD::ADD_VL;
3460 }))
3461 return false;
3462 break;
3463 default:
3464 return false;
3465 }
3466 }
3467
3468 return selectImm64IfCheaper(Imm: -Imm, OrigImm: Imm, N, Val);
3469}
3470
3471bool RISCVDAGToDAGISel::selectInvLogicImm(SDValue N, SDValue &Val) {
3472 if (!isa<ConstantSDNode>(Val: N))
3473 return false;
3474 int64_t Imm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
3475
3476 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
3477 if (isInt<32>(x: Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
3478 return false;
3479
3480 // Abandon this transform if the constant is needed elsewhere.
3481 for (const SDNode *U : N->users()) {
3482 switch (U->getOpcode()) {
3483 case ISD::AND:
3484 case ISD::OR:
3485 case ISD::XOR:
3486 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
3487 return false;
3488 break;
3489 case RISCVISD::VMV_V_X_VL:
3490 if (!Subtarget->hasStdExtZvkb())
3491 return false;
3492 if (!all_of(Range: U->users(), P: [](const SDNode *V) {
3493 return V->getOpcode() == ISD::AND ||
3494 V->getOpcode() == RISCVISD::AND_VL;
3495 }))
3496 return false;
3497 break;
3498 default:
3499 return false;
3500 }
3501 }
3502
3503 if (isInt<32>(x: Imm)) {
3504 Val =
3505 selectImm(CurDAG, DL: SDLoc(N), VT: N->getSimpleValueType(ResNo: 0), Imm: ~Imm, Subtarget: *Subtarget);
3506 return true;
3507 }
3508
3509 // For 64-bit constants, the instruction sequences get complex,
3510 // so we select inverted only if it's cheaper.
3511 return selectImm64IfCheaper(Imm: ~Imm, OrigImm: Imm, N, Val);
3512}
3513
3514static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3515 unsigned Bits,
3516 const TargetInstrInfo *TII) {
3517 unsigned MCOpcode = RISCV::getRVVMCOpcode(RVVPseudoOpcode: User->getMachineOpcode());
3518
3519 if (!MCOpcode)
3520 return false;
3521
3522 const MCInstrDesc &MCID = TII->get(Opcode: User->getMachineOpcode());
3523 const uint64_t TSFlags = MCID.TSFlags;
3524 if (!RISCVII::hasSEWOp(TSFlags))
3525 return false;
3526 assert(RISCVII::hasVLOp(TSFlags));
3527
3528 unsigned ChainOpIdx = User->getNumOperands() - 1;
3529 bool HasChainOp = User->getOperand(Num: ChainOpIdx).getValueType() == MVT::Other;
3530 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3531 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
3532 const unsigned Log2SEW = User->getConstantOperandVal(Num: VLIdx + 1);
3533
3534 if (UserOpNo == VLIdx)
3535 return false;
3536
3537 auto NumDemandedBits =
3538 RISCV::getVectorLowDemandedScalarBits(Opcode: MCOpcode, Log2SEW);
3539 return NumDemandedBits && Bits >= *NumDemandedBits;
3540}
3541
3542// Return true if all users of this SDNode* only consume the lower \p Bits.
3543// This can be used to form W instructions for add/sub/mul/shl even when the
3544// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3545// SimplifyDemandedBits has made it so some users see a sext_inreg and some
3546// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3547// the add/sub/mul/shl to become non-W instructions. By checking the users we
3548// may be able to use a W instruction and CSE with the other instruction if
3549// this has happened. We could try to detect that the CSE opportunity exists
3550// before doing this, but that would be more complicated.
3551bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
3552 const unsigned Depth) const {
3553 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3554 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3555 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3556 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3557 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3558 isa<ConstantSDNode>(Node) || Depth != 0) &&
3559 "Unexpected opcode");
3560
3561 if (Depth >= SelectionDAG::MaxRecursionDepth)
3562 return false;
3563
3564 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3565 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3566 if (Depth == 0 && !Node->getValueType(ResNo: 0).isScalarInteger())
3567 return false;
3568
3569 for (SDUse &Use : Node->uses()) {
3570 SDNode *User = Use.getUser();
3571 // Users of this node should have already been instruction selected
3572 if (!User->isMachineOpcode())
3573 return false;
3574
3575 // TODO: Add more opcodes?
3576 switch (User->getMachineOpcode()) {
3577 default:
3578 if (vectorPseudoHasAllNBitUsers(User, UserOpNo: Use.getOperandNo(), Bits, TII))
3579 break;
3580 return false;
3581 case RISCV::ADDW:
3582 case RISCV::ADDIW:
3583 case RISCV::SUBW:
3584 case RISCV::MULW:
3585 case RISCV::SLLW:
3586 case RISCV::SLLIW:
3587 case RISCV::SRAW:
3588 case RISCV::SRAIW:
3589 case RISCV::SRLW:
3590 case RISCV::SRLIW:
3591 case RISCV::DIVW:
3592 case RISCV::DIVUW:
3593 case RISCV::REMW:
3594 case RISCV::REMUW:
3595 case RISCV::ROLW:
3596 case RISCV::RORW:
3597 case RISCV::RORIW:
3598 case RISCV::CLZW:
3599 case RISCV::CTZW:
3600 case RISCV::CPOPW:
3601 case RISCV::SLLI_UW:
3602 case RISCV::FMV_W_X:
3603 case RISCV::FCVT_H_W:
3604 case RISCV::FCVT_H_W_INX:
3605 case RISCV::FCVT_H_WU:
3606 case RISCV::FCVT_H_WU_INX:
3607 case RISCV::FCVT_S_W:
3608 case RISCV::FCVT_S_W_INX:
3609 case RISCV::FCVT_S_WU:
3610 case RISCV::FCVT_S_WU_INX:
3611 case RISCV::FCVT_D_W:
3612 case RISCV::FCVT_D_W_INX:
3613 case RISCV::FCVT_D_WU:
3614 case RISCV::FCVT_D_WU_INX:
3615 case RISCV::TH_REVW:
3616 case RISCV::TH_SRRIW:
3617 if (Bits >= 32)
3618 break;
3619 return false;
3620 case RISCV::SLL:
3621 case RISCV::SRA:
3622 case RISCV::SRL:
3623 case RISCV::ROL:
3624 case RISCV::ROR:
3625 case RISCV::BSET:
3626 case RISCV::BCLR:
3627 case RISCV::BINV:
3628 // Shift amount operands only use log2(Xlen) bits.
3629 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Value: Subtarget->getXLen()))
3630 break;
3631 return false;
3632 case RISCV::SLLI:
3633 // SLLI only uses the lower (XLen - ShAmt) bits.
3634 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(Num: 1))
3635 break;
3636 return false;
3637 case RISCV::ANDI:
3638 if (Bits >= (unsigned)llvm::bit_width(Value: User->getConstantOperandVal(Num: 1)))
3639 break;
3640 goto RecCheck;
3641 case RISCV::ORI: {
3642 uint64_t Imm = cast<ConstantSDNode>(Val: User->getOperand(Num: 1))->getSExtValue();
3643 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(Value: ~Imm))
3644 break;
3645 [[fallthrough]];
3646 }
3647 case RISCV::AND:
3648 case RISCV::OR:
3649 case RISCV::XOR:
3650 case RISCV::XORI:
3651 case RISCV::ANDN:
3652 case RISCV::ORN:
3653 case RISCV::XNOR:
3654 case RISCV::SH1ADD:
3655 case RISCV::SH2ADD:
3656 case RISCV::SH3ADD:
3657 RecCheck:
3658 if (hasAllNBitUsers(Node: User, Bits, Depth: Depth + 1))
3659 break;
3660 return false;
3661 case RISCV::SRLI: {
3662 unsigned ShAmt = User->getConstantOperandVal(Num: 1);
3663 // If we are shifting right by less than Bits, and users don't demand any
3664 // bits that were shifted into [Bits-1:0], then we can consider this as an
3665 // N-Bit user.
3666 if (Bits > ShAmt && hasAllNBitUsers(Node: User, Bits: Bits - ShAmt, Depth: Depth + 1))
3667 break;
3668 return false;
3669 }
3670 case RISCV::SEXT_B:
3671 case RISCV::PACKH:
3672 if (Bits >= 8)
3673 break;
3674 return false;
3675 case RISCV::SEXT_H:
3676 case RISCV::FMV_H_X:
3677 case RISCV::ZEXT_H_RV32:
3678 case RISCV::ZEXT_H_RV64:
3679 case RISCV::PACKW:
3680 if (Bits >= 16)
3681 break;
3682 return false;
3683 case RISCV::PACK:
3684 if (Bits >= (Subtarget->getXLen() / 2))
3685 break;
3686 return false;
3687 case RISCV::ADD_UW:
3688 case RISCV::SH1ADD_UW:
3689 case RISCV::SH2ADD_UW:
3690 case RISCV::SH3ADD_UW:
3691 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3692 // 32 bits.
3693 if (Use.getOperandNo() == 0 && Bits >= 32)
3694 break;
3695 return false;
3696 case RISCV::SB:
3697 if (Use.getOperandNo() == 0 && Bits >= 8)
3698 break;
3699 return false;
3700 case RISCV::SH:
3701 if (Use.getOperandNo() == 0 && Bits >= 16)
3702 break;
3703 return false;
3704 case RISCV::SW:
3705 if (Use.getOperandNo() == 0 && Bits >= 32)
3706 break;
3707 return false;
3708 }
3709 }
3710
3711 return true;
3712}
3713
3714// Select a constant that can be represented as (sign_extend(imm5) << imm2).
3715bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,
3716 SDValue &Shl2) {
3717 if (auto *C = dyn_cast<ConstantSDNode>(Val&: N)) {
3718 int64_t Offset = C->getSExtValue();
3719 unsigned Shift;
3720 for (Shift = 0; Shift < 4; Shift++)
3721 if (isInt<5>(x: Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3722 break;
3723
3724 // Constant cannot be encoded.
3725 if (Shift == 4)
3726 return false;
3727
3728 EVT Ty = N->getValueType(ResNo: 0);
3729 Simm5 = CurDAG->getSignedTargetConstant(Val: Offset >> Shift, DL: SDLoc(N), VT: Ty);
3730 Shl2 = CurDAG->getTargetConstant(Val: Shift, DL: SDLoc(N), VT: Ty);
3731 return true;
3732 }
3733
3734 return false;
3735}
3736
3737// Select VL as a 5 bit immediate or a value that will become a register. This
3738// allows us to choose between VSETIVLI or VSETVLI later.
3739bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
3740 auto *C = dyn_cast<ConstantSDNode>(Val&: N);
3741 if (C && isUInt<5>(x: C->getZExtValue())) {
3742 VL = CurDAG->getTargetConstant(Val: C->getZExtValue(), DL: SDLoc(N),
3743 VT: N->getValueType(ResNo: 0));
3744 } else if (C && C->isAllOnes()) {
3745 // Treat all ones as VLMax.
3746 VL = CurDAG->getSignedTargetConstant(Val: RISCV::VLMaxSentinel, DL: SDLoc(N),
3747 VT: N->getValueType(ResNo: 0));
3748 } else if (isa<RegisterSDNode>(Val: N) &&
3749 cast<RegisterSDNode>(Val&: N)->getReg() == RISCV::X0) {
3750 // All our VL operands use an operand that allows GPRNoX0 or an immediate
3751 // as the register class. Convert X0 to a special immediate to pass the
3752 // MachineVerifier. This is recognized specially by the vsetvli insertion
3753 // pass.
3754 VL = CurDAG->getSignedTargetConstant(Val: RISCV::VLMaxSentinel, DL: SDLoc(N),
3755 VT: N->getValueType(ResNo: 0));
3756 } else {
3757 VL = N;
3758 }
3759
3760 return true;
3761}
3762
3763static SDValue findVSplat(SDValue N) {
3764 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3765 if (!N.getOperand(i: 0).isUndef())
3766 return SDValue();
3767 N = N.getOperand(i: 1);
3768 }
3769 SDValue Splat = N;
3770 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3771 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3772 !Splat.getOperand(i: 0).isUndef())
3773 return SDValue();
3774 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3775 return Splat;
3776}
3777
3778bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
3779 SDValue Splat = findVSplat(N);
3780 if (!Splat)
3781 return false;
3782
3783 SplatVal = Splat.getOperand(i: 1);
3784 return true;
3785}
3786
3787static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,
3788 SelectionDAG &DAG,
3789 const RISCVSubtarget &Subtarget,
3790 std::function<bool(int64_t)> ValidateImm,
3791 bool Decrement = false) {
3792 SDValue Splat = findVSplat(N);
3793 if (!Splat || !isa<ConstantSDNode>(Val: Splat.getOperand(i: 1)))
3794 return false;
3795
3796 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3797 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3798 "Unexpected splat operand type");
3799
3800 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3801 // type is wider than the resulting vector element type: an implicit
3802 // truncation first takes place. Therefore, perform a manual
3803 // truncation/sign-extension in order to ignore any truncated bits and catch
3804 // any zero-extended immediate.
3805 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3806 // sign-extending to (XLenVT -1).
3807 APInt SplatConst = Splat.getConstantOperandAPInt(i: 1).sextOrTrunc(width: SplatEltSize);
3808
3809 int64_t SplatImm = SplatConst.getSExtValue();
3810
3811 if (!ValidateImm(SplatImm))
3812 return false;
3813
3814 if (Decrement)
3815 SplatImm -= 1;
3816
3817 SplatVal =
3818 DAG.getSignedTargetConstant(Val: SplatImm, DL: SDLoc(N), VT: Subtarget.getXLenVT());
3819 return true;
3820}
3821
3822bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
3823 return selectVSplatImmHelper(N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
3824 ValidateImm: [](int64_t Imm) { return isInt<5>(x: Imm); });
3825}
3826
3827bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
3828 return selectVSplatImmHelper(
3829 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
3830 ValidateImm: [](int64_t Imm) { return (isInt<5>(x: Imm) && Imm != -16) || Imm == 16; },
3831 /*Decrement=*/true);
3832}
3833
3834bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal) {
3835 return selectVSplatImmHelper(
3836 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
3837 ValidateImm: [](int64_t Imm) { return (isInt<5>(x: Imm) && Imm != -16) || Imm == 16; },
3838 /*Decrement=*/false);
3839}
3840
3841bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
3842 SDValue &SplatVal) {
3843 return selectVSplatImmHelper(
3844 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
3845 ValidateImm: [](int64_t Imm) {
3846 return Imm != 0 && ((isInt<5>(x: Imm) && Imm != -16) || Imm == 16);
3847 },
3848 /*Decrement=*/true);
3849}
3850
3851bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
3852 SDValue &SplatVal) {
3853 return selectVSplatImmHelper(
3854 N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget,
3855 ValidateImm: [Bits](int64_t Imm) { return isUIntN(N: Bits, x: Imm); });
3856}
3857
3858bool RISCVDAGToDAGISel::selectVSplatImm64Neg(SDValue N, SDValue &SplatVal) {
3859 SDValue Splat = findVSplat(N);
3860 return Splat && selectNegImm(N: Splat.getOperand(i: 1), Val&: SplatVal);
3861}
3862
3863bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
3864 auto IsExtOrTrunc = [](SDValue N) {
3865 switch (N->getOpcode()) {
3866 case ISD::SIGN_EXTEND:
3867 case ISD::ZERO_EXTEND:
3868 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3869 // inactive elements will be undef.
3870 case RISCVISD::TRUNCATE_VECTOR_VL:
3871 case RISCVISD::VSEXT_VL:
3872 case RISCVISD::VZEXT_VL:
3873 return true;
3874 default:
3875 return false;
3876 }
3877 };
3878
3879 // We can have multiple nested nodes, so unravel them all if needed.
3880 while (IsExtOrTrunc(N)) {
3881 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3882 return false;
3883 N = N->getOperand(Num: 0);
3884 }
3885
3886 return selectVSplat(N, SplatVal);
3887}
3888
3889bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N, SDValue &Imm) {
3890 // Allow bitcasts from XLenVT -> FP.
3891 if (N.getOpcode() == ISD::BITCAST &&
3892 N.getOperand(i: 0).getValueType() == Subtarget->getXLenVT()) {
3893 Imm = N.getOperand(i: 0);
3894 return true;
3895 }
3896 // Allow moves from XLenVT to FP.
3897 if (N.getOpcode() == RISCVISD::FMV_H_X ||
3898 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
3899 Imm = N.getOperand(i: 0);
3900 return true;
3901 }
3902
3903 // Otherwise, look for FP constants that can materialized with scalar int.
3904 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Val: N.getNode());
3905 if (!CFP)
3906 return false;
3907 const APFloat &APF = CFP->getValueAPF();
3908 // td can handle +0.0 already.
3909 if (APF.isPosZero())
3910 return false;
3911
3912 MVT VT = CFP->getSimpleValueType(ResNo: 0);
3913
3914 MVT XLenVT = Subtarget->getXLenVT();
3915 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3916 assert(APF.isNegZero() && "Unexpected constant.");
3917 return false;
3918 }
3919 SDLoc DL(N);
3920 Imm = selectImm(CurDAG, DL, VT: XLenVT, Imm: APF.bitcastToAPInt().getSExtValue(),
3921 Subtarget: *Subtarget);
3922 return true;
3923}
3924
3925bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
3926 SDValue &Imm) {
3927 if (auto *C = dyn_cast<ConstantSDNode>(Val&: N)) {
3928 int64_t ImmVal = SignExtend64(X: C->getSExtValue(), B: Width);
3929
3930 if (!isInt<5>(x: ImmVal))
3931 return false;
3932
3933 Imm = CurDAG->getSignedTargetConstant(Val: ImmVal, DL: SDLoc(N),
3934 VT: Subtarget->getXLenVT());
3935 return true;
3936 }
3937
3938 return false;
3939}
3940
3941// Try to remove sext.w if the input is a W instruction or can be made into
3942// a W instruction cheaply.
3943bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3944 // Look for the sext.w pattern, addiw rd, rs1, 0.
3945 if (N->getMachineOpcode() != RISCV::ADDIW ||
3946 !isNullConstant(V: N->getOperand(Num: 1)))
3947 return false;
3948
3949 SDValue N0 = N->getOperand(Num: 0);
3950 if (!N0.isMachineOpcode())
3951 return false;
3952
3953 switch (N0.getMachineOpcode()) {
3954 default:
3955 break;
3956 case RISCV::ADD:
3957 case RISCV::ADDI:
3958 case RISCV::SUB:
3959 case RISCV::MUL:
3960 case RISCV::SLLI: {
3961 // Convert sext.w+add/sub/mul to their W instructions. This will create
3962 // a new independent instruction. This improves latency.
3963 unsigned Opc;
3964 switch (N0.getMachineOpcode()) {
3965 default:
3966 llvm_unreachable("Unexpected opcode!");
3967 case RISCV::ADD: Opc = RISCV::ADDW; break;
3968 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3969 case RISCV::SUB: Opc = RISCV::SUBW; break;
3970 case RISCV::MUL: Opc = RISCV::MULW; break;
3971 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3972 }
3973
3974 SDValue N00 = N0.getOperand(i: 0);
3975 SDValue N01 = N0.getOperand(i: 1);
3976
3977 // Shift amount needs to be uimm5.
3978 if (N0.getMachineOpcode() == RISCV::SLLI &&
3979 !isUInt<5>(x: cast<ConstantSDNode>(Val&: N01)->getSExtValue()))
3980 break;
3981
3982 SDNode *Result =
3983 CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VT: N->getValueType(ResNo: 0),
3984 Op1: N00, Op2: N01);
3985 ReplaceUses(F: N, T: Result);
3986 return true;
3987 }
3988 case RISCV::ADDW:
3989 case RISCV::ADDIW:
3990 case RISCV::SUBW:
3991 case RISCV::MULW:
3992 case RISCV::SLLIW:
3993 case RISCV::PACKW:
3994 case RISCV::TH_MULAW:
3995 case RISCV::TH_MULAH:
3996 case RISCV::TH_MULSW:
3997 case RISCV::TH_MULSH:
3998 if (N0.getValueType() == MVT::i32)
3999 break;
4000
4001 // Result is already sign extended just remove the sext.w.
4002 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4003 ReplaceUses(F: N, T: N0.getNode());
4004 return true;
4005 }
4006
4007 return false;
4008}
4009
4010static bool usesAllOnesMask(SDValue MaskOp) {
4011 const auto IsVMSet = [](unsigned Opc) {
4012 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4013 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4014 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4015 Opc == RISCV::PseudoVMSET_M_B8;
4016 };
4017
4018 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4019 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4020 // assume that it's all-ones? Same applies to its VL.
4021 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4022}
4023
4024static bool isImplicitDef(SDValue V) {
4025 if (!V.isMachineOpcode())
4026 return false;
4027 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4028 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4029 if (!isImplicitDef(V: V.getOperand(i: I)))
4030 return false;
4031 return true;
4032 }
4033 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4034}
4035
4036// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4037// corresponding "unmasked" pseudo versions.
4038bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4039 const RISCV::RISCVMaskedPseudoInfo *I =
4040 RISCV::getMaskedPseudoInfo(MaskedPseudo: N->getMachineOpcode());
4041 if (!I)
4042 return false;
4043
4044 unsigned MaskOpIdx = I->MaskOpIdx;
4045 if (!usesAllOnesMask(MaskOp: N->getOperand(Num: MaskOpIdx)))
4046 return false;
4047
4048 // There are two classes of pseudos in the table - compares and
4049 // everything else. See the comment on RISCVMaskedPseudo for details.
4050 const unsigned Opc = I->UnmaskedPseudo;
4051 const MCInstrDesc &MCID = TII->get(Opcode: Opc);
4052 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc: MCID);
4053
4054 const MCInstrDesc &MaskedMCID = TII->get(Opcode: N->getMachineOpcode());
4055 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc: MaskedMCID);
4056
4057 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4058 !RISCVII::hasVecPolicyOp(MCID.TSFlags)) &&
4059 "Unmasked pseudo has policy but masked pseudo doesn't?");
4060 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4061 "Unexpected pseudo structure");
4062 assert(!(HasPassthru && !MaskedHasPassthru) &&
4063 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4064
4065 SmallVector<SDValue, 8> Ops;
4066 // Skip the passthru operand at index 0 if the unmasked don't have one.
4067 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4068 bool DropPolicy = !RISCVII::hasVecPolicyOp(TSFlags: MCID.TSFlags) &&
4069 RISCVII::hasVecPolicyOp(TSFlags: MaskedMCID.TSFlags);
4070 bool HasChainOp =
4071 N->getOperand(Num: N->getNumOperands() - 1).getValueType() == MVT::Other;
4072 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4073 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4074 // Skip the mask
4075 SDValue Op = N->getOperand(Num: I);
4076 if (I == MaskOpIdx)
4077 continue;
4078 if (DropPolicy && I == LastOpNum)
4079 continue;
4080 Ops.push_back(Elt: Op);
4081 }
4082
4083 MachineSDNode *Result =
4084 CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VTs: N->getVTList(), Ops);
4085
4086 if (!N->memoperands_empty())
4087 CurDAG->setNodeMemRefs(N: Result, NewMemRefs: N->memoperands());
4088
4089 Result->setFlags(N->getFlags());
4090 ReplaceUses(F: N, T: Result);
4091
4092 return true;
4093}
4094
4095static bool IsVMerge(SDNode *N) {
4096 return RISCV::getRVVMCOpcode(RVVPseudoOpcode: N->getMachineOpcode()) == RISCV::VMERGE_VVM;
4097}
4098
4099// Try to fold away VMERGE_VVM instructions into their true operands:
4100//
4101// %true = PseudoVADD_VV ...
4102// %x = PseudoVMERGE_VVM %false, %false, %true, %mask
4103// ->
4104// %x = PseudoVADD_VV_MASK %false, ..., %mask
4105//
4106// We can only fold if vmerge's passthru operand, vmerge's false operand and
4107// %true's passthru operand (if it has one) are the same. This is because we
4108// have to consolidate them into one passthru operand in the result.
4109//
4110// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
4111// mask is all ones.
4112//
4113// The resulting VL is the minimum of the two VLs.
4114//
4115// The resulting policy is the effective policy the vmerge would have had,
4116// i.e. whether or not it's passthru operand was implicit-def.
4117bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
4118 SDValue Passthru, False, True, VL, Mask;
4119 assert(IsVMerge(N));
4120 Passthru = N->getOperand(Num: 0);
4121 False = N->getOperand(Num: 1);
4122 True = N->getOperand(Num: 2);
4123 Mask = N->getOperand(Num: 3);
4124 VL = N->getOperand(Num: 4);
4125
4126 // If the EEW of True is different from vmerge's SEW, then we can't fold.
4127 if (True.getSimpleValueType() != N->getSimpleValueType(ResNo: 0))
4128 return false;
4129
4130 // We require that either passthru and false are the same, or that passthru
4131 // is undefined.
4132 if (Passthru != False && !isImplicitDef(V: Passthru))
4133 return false;
4134
4135 assert(True.getResNo() == 0 &&
4136 "Expect True is the first output of an instruction.");
4137
4138 // Need N is the exactly one using True.
4139 if (!True.hasOneUse())
4140 return false;
4141
4142 if (!True.isMachineOpcode())
4143 return false;
4144
4145 unsigned TrueOpc = True.getMachineOpcode();
4146 const MCInstrDesc &TrueMCID = TII->get(Opcode: TrueOpc);
4147 uint64_t TrueTSFlags = TrueMCID.TSFlags;
4148 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(Desc: TrueMCID);
4149
4150 const RISCV::RISCVMaskedPseudoInfo *Info =
4151 RISCV::lookupMaskedIntrinsicByUnmasked(UnmaskedPseudo: TrueOpc);
4152 if (!Info)
4153 return false;
4154
4155 // If True has a passthru operand then it needs to be the same as vmerge's
4156 // False, since False will be used for the result's passthru operand.
4157 if (HasTiedDest && !isImplicitDef(V: True->getOperand(Num: 0))) {
4158 SDValue PassthruOpTrue = True->getOperand(Num: 0);
4159 if (False != PassthruOpTrue)
4160 return false;
4161 }
4162
4163 // Skip if True has side effect.
4164 if (TII->get(Opcode: TrueOpc).hasUnmodeledSideEffects())
4165 return false;
4166
4167 unsigned TrueChainOpIdx = True.getNumOperands() - 1;
4168 bool HasChainOp =
4169 True.getOperand(i: TrueChainOpIdx).getValueType() == MVT::Other;
4170
4171 if (HasChainOp) {
4172 // Avoid creating cycles in the DAG. We must ensure that none of the other
4173 // operands depend on True through it's Chain.
4174 SmallVector<const SDNode *, 4> LoopWorklist;
4175 SmallPtrSet<const SDNode *, 16> Visited;
4176 LoopWorklist.push_back(Elt: False.getNode());
4177 LoopWorklist.push_back(Elt: Mask.getNode());
4178 LoopWorklist.push_back(Elt: VL.getNode());
4179 if (SDNode::hasPredecessorHelper(N: True.getNode(), Visited, Worklist&: LoopWorklist))
4180 return false;
4181 }
4182
4183 // The vector policy operand may be present for masked intrinsics
4184 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags: TrueTSFlags);
4185 unsigned TrueVLIndex =
4186 True.getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
4187 SDValue TrueVL = True.getOperand(i: TrueVLIndex);
4188 SDValue SEW = True.getOperand(i: TrueVLIndex + 1);
4189
4190 auto GetMinVL = [](SDValue LHS, SDValue RHS) {
4191 if (LHS == RHS)
4192 return LHS;
4193 if (isAllOnesConstant(V: LHS))
4194 return RHS;
4195 if (isAllOnesConstant(V: RHS))
4196 return LHS;
4197 auto *CLHS = dyn_cast<ConstantSDNode>(Val&: LHS);
4198 auto *CRHS = dyn_cast<ConstantSDNode>(Val&: RHS);
4199 if (!CLHS || !CRHS)
4200 return SDValue();
4201 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
4202 };
4203
4204 // Because N and True must have the same passthru operand (or True's operand
4205 // is implicit_def), the "effective" body is the minimum of their VLs.
4206 SDValue OrigVL = VL;
4207 VL = GetMinVL(TrueVL, VL);
4208 if (!VL)
4209 return false;
4210
4211 // Some operations produce different elementwise results depending on the
4212 // active elements, like viota.m or vredsum. This transformation is illegal
4213 // for these if we change the active elements (i.e. mask or VL).
4214 const MCInstrDesc &TrueBaseMCID = TII->get(Opcode: RISCV::getRVVMCOpcode(RVVPseudoOpcode: TrueOpc));
4215 if (RISCVII::elementsDependOnVL(TSFlags: TrueBaseMCID.TSFlags) && (TrueVL != VL))
4216 return false;
4217 if (RISCVII::elementsDependOnMask(TSFlags: TrueBaseMCID.TSFlags) &&
4218 (Mask && !usesAllOnesMask(MaskOp: Mask)))
4219 return false;
4220
4221 // Make sure it doesn't raise any observable fp exceptions, since changing the
4222 // active elements will affect how fflags is set.
4223 if (mayRaiseFPException(Node: True.getNode()) && !True->getFlags().hasNoFPExcept())
4224 return false;
4225
4226 SDLoc DL(N);
4227
4228 unsigned MaskedOpc = Info->MaskedPseudo;
4229#ifndef NDEBUG
4230 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
4231 assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) &&
4232 "Expected instructions with mask have policy operand.");
4233 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
4234 MCOI::TIED_TO) == 0 &&
4235 "Expected instructions with mask have a tied dest.");
4236#endif
4237
4238 // Use a tumu policy, relaxing it to tail agnostic provided that the passthru
4239 // operand is undefined.
4240 //
4241 // However, if the VL became smaller than what the vmerge had originally, then
4242 // elements past VL that were previously in the vmerge's body will have moved
4243 // to the tail. In that case we always need to use tail undisturbed to
4244 // preserve them.
4245 bool MergeVLShrunk = VL != OrigVL;
4246 uint64_t Policy = (isImplicitDef(V: Passthru) && !MergeVLShrunk)
4247 ? RISCVVType::TAIL_AGNOSTIC
4248 : /*TUMU*/ 0;
4249 SDValue PolicyOp =
4250 CurDAG->getTargetConstant(Val: Policy, DL, VT: Subtarget->getXLenVT());
4251
4252
4253 SmallVector<SDValue, 8> Ops;
4254 Ops.push_back(Elt: False);
4255
4256 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TSFlags: TrueTSFlags);
4257 const unsigned NormalOpsEnd = TrueVLIndex - HasRoundingMode;
4258 Ops.append(in_start: True->op_begin() + HasTiedDest, in_end: True->op_begin() + NormalOpsEnd);
4259
4260 Ops.push_back(Elt: Mask);
4261
4262 // For unmasked "VOp" with rounding mode operand, that is interfaces like
4263 // (..., rm, vl) or (..., rm, vl, policy).
4264 // Its masked version is (..., vm, rm, vl, policy).
4265 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
4266 if (HasRoundingMode)
4267 Ops.push_back(Elt: True->getOperand(Num: TrueVLIndex - 1));
4268
4269 Ops.append(IL: {VL, SEW, PolicyOp});
4270
4271 // Result node should have chain operand of True.
4272 if (HasChainOp)
4273 Ops.push_back(Elt: True.getOperand(i: TrueChainOpIdx));
4274
4275 MachineSDNode *Result =
4276 CurDAG->getMachineNode(Opcode: MaskedOpc, dl: DL, VTs: True->getVTList(), Ops);
4277 Result->setFlags(True->getFlags());
4278
4279 if (!cast<MachineSDNode>(Val&: True)->memoperands_empty())
4280 CurDAG->setNodeMemRefs(N: Result, NewMemRefs: cast<MachineSDNode>(Val&: True)->memoperands());
4281
4282 // Replace vmerge.vvm node by Result.
4283 ReplaceUses(F: SDValue(N, 0), T: SDValue(Result, 0));
4284
4285 // Replace another value of True. E.g. chain and VL.
4286 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
4287 ReplaceUses(F: True.getValue(R: Idx), T: SDValue(Result, Idx));
4288
4289 return true;
4290}
4291
4292bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
4293 bool MadeChange = false;
4294 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4295
4296 while (Position != CurDAG->allnodes_begin()) {
4297 SDNode *N = &*--Position;
4298 if (N->use_empty() || !N->isMachineOpcode())
4299 continue;
4300
4301 if (IsVMerge(N))
4302 MadeChange |= performCombineVMergeAndVOps(N);
4303 }
4304 return MadeChange;
4305}
4306
4307/// If our passthru is an implicit_def, use noreg instead. This side
4308/// steps issues with MachineCSE not being able to CSE expressions with
4309/// IMPLICIT_DEF operands while preserving the semantic intent. See
4310/// pr64282 for context. Note that this transform is the last one
4311/// performed at ISEL DAG to DAG.
4312bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4313 bool MadeChange = false;
4314 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4315
4316 while (Position != CurDAG->allnodes_begin()) {
4317 SDNode *N = &*--Position;
4318 if (N->use_empty() || !N->isMachineOpcode())
4319 continue;
4320
4321 const unsigned Opc = N->getMachineOpcode();
4322 if (!RISCVVPseudosTable::getPseudoInfo(Pseudo: Opc) ||
4323 !RISCVII::isFirstDefTiedToFirstUse(Desc: TII->get(Opcode: Opc)) ||
4324 !isImplicitDef(V: N->getOperand(Num: 0)))
4325 continue;
4326
4327 SmallVector<SDValue> Ops;
4328 Ops.push_back(Elt: CurDAG->getRegister(Reg: RISCV::NoRegister, VT: N->getValueType(ResNo: 0)));
4329 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4330 SDValue Op = N->getOperand(Num: I);
4331 Ops.push_back(Elt: Op);
4332 }
4333
4334 MachineSDNode *Result =
4335 CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VTs: N->getVTList(), Ops);
4336 Result->setFlags(N->getFlags());
4337 CurDAG->setNodeMemRefs(N: Result, NewMemRefs: cast<MachineSDNode>(Val: N)->memoperands());
4338 ReplaceUses(F: N, T: Result);
4339 MadeChange = true;
4340 }
4341 return MadeChange;
4342}
4343
4344
4345// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4346// for instruction scheduling.
4347FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
4348 CodeGenOptLevel OptLevel) {
4349 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4350}
4351
4352char RISCVDAGToDAGISelLegacy::ID = 0;
4353
4354RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TM,
4355 CodeGenOptLevel OptLevel)
4356 : SelectionDAGISelLegacy(
4357 ID, std::make_unique<RISCVDAGToDAGISel>(args&: TM, args&: OptLevel)) {}
4358
4359INITIALIZE_PASS(RISCVDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
4360