1 | //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines an instruction selector for the RISC-V target. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "RISCVISelDAGToDAG.h" |
14 | #include "MCTargetDesc/RISCVBaseInfo.h" |
15 | #include "MCTargetDesc/RISCVMCTargetDesc.h" |
16 | #include "MCTargetDesc/RISCVMatInt.h" |
17 | #include "RISCVISelLowering.h" |
18 | #include "RISCVMachineFunctionInfo.h" |
19 | #include "llvm/CodeGen/MachineFrameInfo.h" |
20 | #include "llvm/IR/IntrinsicsRISCV.h" |
21 | #include "llvm/Support/Alignment.h" |
22 | #include "llvm/Support/Debug.h" |
23 | #include "llvm/Support/MathExtras.h" |
24 | #include "llvm/Support/raw_ostream.h" |
25 | |
26 | using namespace llvm; |
27 | |
28 | #define DEBUG_TYPE "riscv-isel" |
29 | #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection" |
30 | |
31 | static cl::opt<bool> UsePseudoMovImm( |
32 | "riscv-use-rematerializable-movimm" , cl::Hidden, |
33 | cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " |
34 | "constant materialization" ), |
35 | cl::init(Val: false)); |
36 | |
37 | namespace llvm::RISCV { |
38 | #define GET_RISCVVSSEGTable_IMPL |
39 | #define GET_RISCVVLSEGTable_IMPL |
40 | #define GET_RISCVVLXSEGTable_IMPL |
41 | #define GET_RISCVVSXSEGTable_IMPL |
42 | #define GET_RISCVVLETable_IMPL |
43 | #define GET_RISCVVSETable_IMPL |
44 | #define GET_RISCVVLXTable_IMPL |
45 | #define GET_RISCVVSXTable_IMPL |
46 | #include "RISCVGenSearchableTables.inc" |
47 | } // namespace llvm::RISCV |
48 | |
49 | void RISCVDAGToDAGISel::PreprocessISelDAG() { |
50 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); |
51 | |
52 | bool MadeChange = false; |
53 | while (Position != CurDAG->allnodes_begin()) { |
54 | SDNode *N = &*--Position; |
55 | if (N->use_empty()) |
56 | continue; |
57 | |
58 | SDValue Result; |
59 | switch (N->getOpcode()) { |
60 | case ISD::SPLAT_VECTOR: { |
61 | // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point |
62 | // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. |
63 | MVT VT = N->getSimpleValueType(ResNo: 0); |
64 | unsigned Opc = |
65 | VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; |
66 | SDLoc DL(N); |
67 | SDValue VL = CurDAG->getRegister(Reg: RISCV::X0, VT: Subtarget->getXLenVT()); |
68 | SDValue Src = N->getOperand(Num: 0); |
69 | if (VT.isInteger()) |
70 | Src = CurDAG->getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget->getXLenVT(), |
71 | Operand: N->getOperand(Num: 0)); |
72 | Result = CurDAG->getNode(Opcode: Opc, DL, VT, N1: CurDAG->getUNDEF(VT), N2: Src, N3: VL); |
73 | break; |
74 | } |
75 | case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: { |
76 | // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector |
77 | // load. Done after lowering and combining so that we have a chance to |
78 | // optimize this to VMV_V_X_VL when the upper bits aren't needed. |
79 | assert(N->getNumOperands() == 4 && "Unexpected number of operands" ); |
80 | MVT VT = N->getSimpleValueType(ResNo: 0); |
81 | SDValue Passthru = N->getOperand(Num: 0); |
82 | SDValue Lo = N->getOperand(Num: 1); |
83 | SDValue Hi = N->getOperand(Num: 2); |
84 | SDValue VL = N->getOperand(Num: 3); |
85 | assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && |
86 | Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && |
87 | "Unexpected VTs!" ); |
88 | MachineFunction &MF = CurDAG->getMachineFunction(); |
89 | SDLoc DL(N); |
90 | |
91 | // Create temporary stack for each expanding node. |
92 | SDValue StackSlot = |
93 | CurDAG->CreateStackTemporary(Bytes: TypeSize::getFixed(ExactSize: 8), Alignment: Align(8)); |
94 | int FI = cast<FrameIndexSDNode>(Val: StackSlot.getNode())->getIndex(); |
95 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); |
96 | |
97 | SDValue Chain = CurDAG->getEntryNode(); |
98 | Lo = CurDAG->getStore(Chain, dl: DL, Val: Lo, Ptr: StackSlot, PtrInfo: MPI, Alignment: Align(8)); |
99 | |
100 | SDValue OffsetSlot = |
101 | CurDAG->getMemBasePlusOffset(Base: StackSlot, Offset: TypeSize::getFixed(ExactSize: 4), DL); |
102 | Hi = CurDAG->getStore(Chain, dl: DL, Val: Hi, Ptr: OffsetSlot, PtrInfo: MPI.getWithOffset(O: 4), |
103 | Alignment: Align(8)); |
104 | |
105 | Chain = CurDAG->getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, N1: Lo, N2: Hi); |
106 | |
107 | SDVTList VTs = CurDAG->getVTList(VTs: {VT, MVT::Other}); |
108 | SDValue IntID = |
109 | CurDAG->getTargetConstant(Val: Intrinsic::riscv_vlse, DL, VT: MVT::i64); |
110 | SDValue Ops[] = {Chain, |
111 | IntID, |
112 | Passthru, |
113 | StackSlot, |
114 | CurDAG->getRegister(Reg: RISCV::X0, VT: MVT::i64), |
115 | VL}; |
116 | |
117 | Result = CurDAG->getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, |
118 | MemVT: MVT::i64, PtrInfo: MPI, Alignment: Align(8), |
119 | Flags: MachineMemOperand::MOLoad); |
120 | break; |
121 | } |
122 | } |
123 | |
124 | if (Result) { |
125 | LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: " ); |
126 | LLVM_DEBUG(N->dump(CurDAG)); |
127 | LLVM_DEBUG(dbgs() << "\nNew: " ); |
128 | LLVM_DEBUG(Result->dump(CurDAG)); |
129 | LLVM_DEBUG(dbgs() << "\n" ); |
130 | |
131 | CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 0), To: Result); |
132 | MadeChange = true; |
133 | } |
134 | } |
135 | |
136 | if (MadeChange) |
137 | CurDAG->RemoveDeadNodes(); |
138 | } |
139 | |
140 | void RISCVDAGToDAGISel::PostprocessISelDAG() { |
141 | HandleSDNode Dummy(CurDAG->getRoot()); |
142 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); |
143 | |
144 | bool MadeChange = false; |
145 | while (Position != CurDAG->allnodes_begin()) { |
146 | SDNode *N = &*--Position; |
147 | // Skip dead nodes and any non-machine opcodes. |
148 | if (N->use_empty() || !N->isMachineOpcode()) |
149 | continue; |
150 | |
151 | MadeChange |= doPeepholeSExtW(Node: N); |
152 | |
153 | // FIXME: This is here only because the VMerge transform doesn't |
154 | // know how to handle masked true inputs. Once that has been moved |
155 | // to post-ISEL, this can be deleted as well. |
156 | MadeChange |= doPeepholeMaskedRVV(Node: cast<MachineSDNode>(Val: N)); |
157 | } |
158 | |
159 | CurDAG->setRoot(Dummy.getValue()); |
160 | |
161 | MadeChange |= doPeepholeMergeVVMFold(); |
162 | |
163 | // After we're done with everything else, convert IMPLICIT_DEF |
164 | // passthru operands to NoRegister. This is required to workaround |
165 | // an optimization deficiency in MachineCSE. This really should |
166 | // be merged back into each of the patterns (i.e. there's no good |
167 | // reason not to go directly to NoReg), but is being done this way |
168 | // to allow easy backporting. |
169 | MadeChange |= doPeepholeNoRegPassThru(); |
170 | |
171 | if (MadeChange) |
172 | CurDAG->RemoveDeadNodes(); |
173 | } |
174 | |
175 | static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, |
176 | RISCVMatInt::InstSeq &Seq) { |
177 | SDValue SrcReg = CurDAG->getRegister(Reg: RISCV::X0, VT); |
178 | for (const RISCVMatInt::Inst &Inst : Seq) { |
179 | SDValue SDImm = CurDAG->getTargetConstant(Val: Inst.getImm(), DL, VT); |
180 | SDNode *Result = nullptr; |
181 | switch (Inst.getOpndKind()) { |
182 | case RISCVMatInt::Imm: |
183 | Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SDImm); |
184 | break; |
185 | case RISCVMatInt::RegX0: |
186 | Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg, |
187 | Op2: CurDAG->getRegister(Reg: RISCV::X0, VT)); |
188 | break; |
189 | case RISCVMatInt::RegReg: |
190 | Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg, Op2: SrcReg); |
191 | break; |
192 | case RISCVMatInt::RegImm: |
193 | Result = CurDAG->getMachineNode(Opcode: Inst.getOpcode(), dl: DL, VT, Op1: SrcReg, Op2: SDImm); |
194 | break; |
195 | } |
196 | |
197 | // Only the first instruction has X0 as its source. |
198 | SrcReg = SDValue(Result, 0); |
199 | } |
200 | |
201 | return SrcReg; |
202 | } |
203 | |
204 | static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, |
205 | int64_t Imm, const RISCVSubtarget &Subtarget) { |
206 | RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: Imm, STI: Subtarget); |
207 | |
208 | // Use a rematerializable pseudo instruction for short sequences if enabled. |
209 | if (Seq.size() == 2 && UsePseudoMovImm) |
210 | return SDValue( |
211 | CurDAG->getMachineNode(Opcode: RISCV::PseudoMovImm, dl: DL, VT, |
212 | Op1: CurDAG->getTargetConstant(Val: Imm, DL, VT)), |
213 | 0); |
214 | |
215 | // See if we can create this constant as (ADD (SLLI X, C), X) where X is at |
216 | // worst an LUI+ADDIW. This will require an extra register, but avoids a |
217 | // constant pool. |
218 | // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where |
219 | // low and high 32 bits are the same and bit 31 and 63 are set. |
220 | if (Seq.size() > 3) { |
221 | unsigned ShiftAmt, AddOpc; |
222 | RISCVMatInt::InstSeq SeqLo = |
223 | RISCVMatInt::generateTwoRegInstSeq(Val: Imm, STI: Subtarget, ShiftAmt, AddOpc); |
224 | if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) { |
225 | SDValue Lo = selectImmSeq(CurDAG, DL, VT, Seq&: SeqLo); |
226 | |
227 | SDValue SLLI = SDValue( |
228 | CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: Lo, |
229 | Op2: CurDAG->getTargetConstant(Val: ShiftAmt, DL, VT)), |
230 | 0); |
231 | return SDValue(CurDAG->getMachineNode(Opcode: AddOpc, dl: DL, VT, Op1: Lo, Op2: SLLI), 0); |
232 | } |
233 | } |
234 | |
235 | // Otherwise, use the original sequence. |
236 | return selectImmSeq(CurDAG, DL, VT, Seq); |
237 | } |
238 | |
239 | static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, |
240 | unsigned NF, RISCVII::VLMUL LMUL) { |
241 | static const unsigned M1TupleRegClassIDs[] = { |
242 | RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, |
243 | RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, |
244 | RISCV::VRN8M1RegClassID}; |
245 | static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID, |
246 | RISCV::VRN3M2RegClassID, |
247 | RISCV::VRN4M2RegClassID}; |
248 | |
249 | assert(Regs.size() >= 2 && Regs.size() <= 8); |
250 | |
251 | unsigned RegClassID; |
252 | unsigned SubReg0; |
253 | switch (LMUL) { |
254 | default: |
255 | llvm_unreachable("Invalid LMUL." ); |
256 | case RISCVII::VLMUL::LMUL_F8: |
257 | case RISCVII::VLMUL::LMUL_F4: |
258 | case RISCVII::VLMUL::LMUL_F2: |
259 | case RISCVII::VLMUL::LMUL_1: |
260 | static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, |
261 | "Unexpected subreg numbering" ); |
262 | SubReg0 = RISCV::sub_vrm1_0; |
263 | RegClassID = M1TupleRegClassIDs[NF - 2]; |
264 | break; |
265 | case RISCVII::VLMUL::LMUL_2: |
266 | static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, |
267 | "Unexpected subreg numbering" ); |
268 | SubReg0 = RISCV::sub_vrm2_0; |
269 | RegClassID = M2TupleRegClassIDs[NF - 2]; |
270 | break; |
271 | case RISCVII::VLMUL::LMUL_4: |
272 | static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, |
273 | "Unexpected subreg numbering" ); |
274 | SubReg0 = RISCV::sub_vrm4_0; |
275 | RegClassID = RISCV::VRN2M4RegClassID; |
276 | break; |
277 | } |
278 | |
279 | SDLoc DL(Regs[0]); |
280 | SmallVector<SDValue, 8> Ops; |
281 | |
282 | Ops.push_back(Elt: CurDAG.getTargetConstant(Val: RegClassID, DL, VT: MVT::i32)); |
283 | |
284 | for (unsigned I = 0; I < Regs.size(); ++I) { |
285 | Ops.push_back(Elt: Regs[I]); |
286 | Ops.push_back(Elt: CurDAG.getTargetConstant(Val: SubReg0 + I, DL, VT: MVT::i32)); |
287 | } |
288 | SDNode *N = |
289 | CurDAG.getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl: DL, VT: MVT::Untyped, Ops); |
290 | return SDValue(N, 0); |
291 | } |
292 | |
293 | void RISCVDAGToDAGISel::addVectorLoadStoreOperands( |
294 | SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, |
295 | bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, |
296 | bool IsLoad, MVT *IndexVT) { |
297 | SDValue Chain = Node->getOperand(Num: 0); |
298 | SDValue Glue; |
299 | |
300 | Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Base pointer. |
301 | |
302 | if (IsStridedOrIndexed) { |
303 | Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Index. |
304 | if (IndexVT) |
305 | *IndexVT = Operands.back()->getSimpleValueType(ResNo: 0); |
306 | } |
307 | |
308 | if (IsMasked) { |
309 | // Mask needs to be copied to V0. |
310 | SDValue Mask = Node->getOperand(Num: CurOp++); |
311 | Chain = CurDAG->getCopyToReg(Chain, dl: DL, Reg: RISCV::V0, N: Mask, Glue: SDValue()); |
312 | Glue = Chain.getValue(R: 1); |
313 | Operands.push_back(Elt: CurDAG->getRegister(Reg: RISCV::V0, VT: Mask.getValueType())); |
314 | } |
315 | SDValue VL; |
316 | selectVLOp(N: Node->getOperand(Num: CurOp++), VL); |
317 | Operands.push_back(Elt: VL); |
318 | |
319 | MVT XLenVT = Subtarget->getXLenVT(); |
320 | SDValue SEWOp = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT); |
321 | Operands.push_back(Elt: SEWOp); |
322 | |
323 | // At the IR layer, all the masked load intrinsics have policy operands, |
324 | // none of the others do. All have passthru operands. For our pseudos, |
325 | // all loads have policy operands. |
326 | if (IsLoad) { |
327 | uint64_t Policy = RISCVII::MASK_AGNOSTIC; |
328 | if (IsMasked) |
329 | Policy = Node->getConstantOperandVal(Num: CurOp++); |
330 | SDValue PolicyOp = CurDAG->getTargetConstant(Val: Policy, DL, VT: XLenVT); |
331 | Operands.push_back(Elt: PolicyOp); |
332 | } |
333 | |
334 | Operands.push_back(Elt: Chain); // Chain. |
335 | if (Glue) |
336 | Operands.push_back(Elt: Glue); |
337 | } |
338 | |
339 | void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, |
340 | bool IsStrided) { |
341 | SDLoc DL(Node); |
342 | unsigned NF = Node->getNumValues() - 1; |
343 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
344 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
345 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
346 | |
347 | unsigned CurOp = 2; |
348 | SmallVector<SDValue, 8> Operands; |
349 | |
350 | SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, |
351 | Node->op_begin() + CurOp + NF); |
352 | SDValue Merge = createTuple(CurDAG&: *CurDAG, Regs, NF, LMUL); |
353 | Operands.push_back(Elt: Merge); |
354 | CurOp += NF; |
355 | |
356 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided, |
357 | Operands, /*IsLoad=*/true); |
358 | |
359 | const RISCV::VLSEGPseudo *P = |
360 | RISCV::getVLSEGPseudo(NF, Masked: IsMasked, Strided: IsStrided, /*FF*/ false, Log2SEW, |
361 | LMUL: static_cast<unsigned>(LMUL)); |
362 | MachineSDNode *Load = |
363 | CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped, VT2: MVT::Other, Ops: Operands); |
364 | |
365 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
366 | CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()}); |
367 | |
368 | SDValue SuperReg = SDValue(Load, 0); |
369 | for (unsigned I = 0; I < NF; ++I) { |
370 | unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, Index: I); |
371 | ReplaceUses(F: SDValue(Node, I), |
372 | T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT, Operand: SuperReg)); |
373 | } |
374 | |
375 | ReplaceUses(F: SDValue(Node, NF), T: SDValue(Load, 1)); |
376 | CurDAG->RemoveDeadNode(N: Node); |
377 | } |
378 | |
379 | void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { |
380 | SDLoc DL(Node); |
381 | unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. |
382 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
383 | MVT XLenVT = Subtarget->getXLenVT(); |
384 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
385 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
386 | |
387 | unsigned CurOp = 2; |
388 | SmallVector<SDValue, 7> Operands; |
389 | |
390 | SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, |
391 | Node->op_begin() + CurOp + NF); |
392 | SDValue MaskedOff = createTuple(CurDAG&: *CurDAG, Regs, NF, LMUL); |
393 | Operands.push_back(Elt: MaskedOff); |
394 | CurOp += NF; |
395 | |
396 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, |
397 | /*IsStridedOrIndexed*/ false, Operands, |
398 | /*IsLoad=*/true); |
399 | |
400 | const RISCV::VLSEGPseudo *P = |
401 | RISCV::getVLSEGPseudo(NF, Masked: IsMasked, /*Strided*/ false, /*FF*/ true, |
402 | Log2SEW, LMUL: static_cast<unsigned>(LMUL)); |
403 | MachineSDNode *Load = CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped, |
404 | VT2: XLenVT, VT3: MVT::Other, Ops: Operands); |
405 | |
406 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
407 | CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()}); |
408 | |
409 | SDValue SuperReg = SDValue(Load, 0); |
410 | for (unsigned I = 0; I < NF; ++I) { |
411 | unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, Index: I); |
412 | ReplaceUses(F: SDValue(Node, I), |
413 | T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT, Operand: SuperReg)); |
414 | } |
415 | |
416 | ReplaceUses(F: SDValue(Node, NF), T: SDValue(Load, 1)); // VL |
417 | ReplaceUses(F: SDValue(Node, NF + 1), T: SDValue(Load, 2)); // Chain |
418 | CurDAG->RemoveDeadNode(N: Node); |
419 | } |
420 | |
421 | void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, |
422 | bool IsOrdered) { |
423 | SDLoc DL(Node); |
424 | unsigned NF = Node->getNumValues() - 1; |
425 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
426 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
427 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
428 | |
429 | unsigned CurOp = 2; |
430 | SmallVector<SDValue, 8> Operands; |
431 | |
432 | SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, |
433 | Node->op_begin() + CurOp + NF); |
434 | SDValue MaskedOff = createTuple(CurDAG&: *CurDAG, Regs, NF, LMUL); |
435 | Operands.push_back(Elt: MaskedOff); |
436 | CurOp += NF; |
437 | |
438 | MVT IndexVT; |
439 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, |
440 | /*IsStridedOrIndexed*/ true, Operands, |
441 | /*IsLoad=*/true, IndexVT: &IndexVT); |
442 | |
443 | assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && |
444 | "Element count mismatch" ); |
445 | |
446 | RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT); |
447 | unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits()); |
448 | if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { |
449 | report_fatal_error(reason: "The V extension does not support EEW=64 for index " |
450 | "values when XLEN=32" ); |
451 | } |
452 | const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( |
453 | NF, Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL), |
454 | IndexLMUL: static_cast<unsigned>(IndexLMUL)); |
455 | MachineSDNode *Load = |
456 | CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT1: MVT::Untyped, VT2: MVT::Other, Ops: Operands); |
457 | |
458 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
459 | CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()}); |
460 | |
461 | SDValue SuperReg = SDValue(Load, 0); |
462 | for (unsigned I = 0; I < NF; ++I) { |
463 | unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, Index: I); |
464 | ReplaceUses(F: SDValue(Node, I), |
465 | T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT, Operand: SuperReg)); |
466 | } |
467 | |
468 | ReplaceUses(F: SDValue(Node, NF), T: SDValue(Load, 1)); |
469 | CurDAG->RemoveDeadNode(N: Node); |
470 | } |
471 | |
472 | void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, |
473 | bool IsStrided) { |
474 | SDLoc DL(Node); |
475 | unsigned NF = Node->getNumOperands() - 4; |
476 | if (IsStrided) |
477 | NF--; |
478 | if (IsMasked) |
479 | NF--; |
480 | MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0); |
481 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
482 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
483 | SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); |
484 | SDValue StoreVal = createTuple(CurDAG&: *CurDAG, Regs, NF, LMUL); |
485 | |
486 | SmallVector<SDValue, 8> Operands; |
487 | Operands.push_back(Elt: StoreVal); |
488 | unsigned CurOp = 2 + NF; |
489 | |
490 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided, |
491 | Operands); |
492 | |
493 | const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( |
494 | NF, Masked: IsMasked, Strided: IsStrided, Log2SEW, LMUL: static_cast<unsigned>(LMUL)); |
495 | MachineSDNode *Store = |
496 | CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT: Node->getValueType(ResNo: 0), Ops: Operands); |
497 | |
498 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
499 | CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {MemOp->getMemOperand()}); |
500 | |
501 | ReplaceNode(F: Node, T: Store); |
502 | } |
503 | |
504 | void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, |
505 | bool IsOrdered) { |
506 | SDLoc DL(Node); |
507 | unsigned NF = Node->getNumOperands() - 5; |
508 | if (IsMasked) |
509 | --NF; |
510 | MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0); |
511 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
512 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
513 | SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); |
514 | SDValue StoreVal = createTuple(CurDAG&: *CurDAG, Regs, NF, LMUL); |
515 | |
516 | SmallVector<SDValue, 8> Operands; |
517 | Operands.push_back(Elt: StoreVal); |
518 | unsigned CurOp = 2 + NF; |
519 | |
520 | MVT IndexVT; |
521 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, |
522 | /*IsStridedOrIndexed*/ true, Operands, |
523 | /*IsLoad=*/false, IndexVT: &IndexVT); |
524 | |
525 | assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && |
526 | "Element count mismatch" ); |
527 | |
528 | RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT); |
529 | unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits()); |
530 | if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { |
531 | report_fatal_error(reason: "The V extension does not support EEW=64 for index " |
532 | "values when XLEN=32" ); |
533 | } |
534 | const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( |
535 | NF, Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL), |
536 | IndexLMUL: static_cast<unsigned>(IndexLMUL)); |
537 | MachineSDNode *Store = |
538 | CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VT: Node->getValueType(ResNo: 0), Ops: Operands); |
539 | |
540 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
541 | CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {MemOp->getMemOperand()}); |
542 | |
543 | ReplaceNode(F: Node, T: Store); |
544 | } |
545 | |
546 | void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { |
547 | if (!Subtarget->hasVInstructions()) |
548 | return; |
549 | |
550 | assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode" ); |
551 | |
552 | SDLoc DL(Node); |
553 | MVT XLenVT = Subtarget->getXLenVT(); |
554 | |
555 | unsigned IntNo = Node->getConstantOperandVal(Num: 0); |
556 | |
557 | assert((IntNo == Intrinsic::riscv_vsetvli || |
558 | IntNo == Intrinsic::riscv_vsetvlimax) && |
559 | "Unexpected vsetvli intrinsic" ); |
560 | |
561 | bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax; |
562 | unsigned Offset = (VLMax ? 1 : 2); |
563 | |
564 | assert(Node->getNumOperands() == Offset + 2 && |
565 | "Unexpected number of operands" ); |
566 | |
567 | unsigned SEW = |
568 | RISCVVType::decodeVSEW(VSEW: Node->getConstantOperandVal(Num: Offset) & 0x7); |
569 | RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( |
570 | Node->getConstantOperandVal(Num: Offset + 1) & 0x7); |
571 | |
572 | unsigned VTypeI = RISCVVType::encodeVTYPE(VLMUL: VLMul, SEW, /*TailAgnostic*/ true, |
573 | /*MaskAgnostic*/ true); |
574 | SDValue VTypeIOp = CurDAG->getTargetConstant(Val: VTypeI, DL, VT: XLenVT); |
575 | |
576 | SDValue VLOperand; |
577 | unsigned Opcode = RISCV::PseudoVSETVLI; |
578 | if (auto *C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1))) { |
579 | if (auto VLEN = Subtarget->getRealVLen()) |
580 | if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue()) |
581 | VLMax = true; |
582 | } |
583 | if (VLMax || isAllOnesConstant(V: Node->getOperand(Num: 1))) { |
584 | VLOperand = CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT); |
585 | Opcode = RISCV::PseudoVSETVLIX0; |
586 | } else { |
587 | VLOperand = Node->getOperand(Num: 1); |
588 | |
589 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: VLOperand)) { |
590 | uint64_t AVL = C->getZExtValue(); |
591 | if (isUInt<5>(x: AVL)) { |
592 | SDValue VLImm = CurDAG->getTargetConstant(Val: AVL, DL, VT: XLenVT); |
593 | ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: RISCV::PseudoVSETIVLI, dl: DL, |
594 | VT: XLenVT, Op1: VLImm, Op2: VTypeIOp)); |
595 | return; |
596 | } |
597 | } |
598 | } |
599 | |
600 | ReplaceNode(F: Node, |
601 | T: CurDAG->getMachineNode(Opcode, dl: DL, VT: XLenVT, Op1: VLOperand, Op2: VTypeIOp)); |
602 | } |
603 | |
604 | bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) { |
605 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
606 | unsigned Opcode = Node->getOpcode(); |
607 | assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) && |
608 | "Unexpected opcode" ); |
609 | SDLoc DL(Node); |
610 | |
611 | // For operations of the form (x << C1) op C2, check if we can use |
612 | // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1. |
613 | SDValue N0 = Node->getOperand(Num: 0); |
614 | SDValue N1 = Node->getOperand(Num: 1); |
615 | |
616 | ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Val&: N1); |
617 | if (!Cst) |
618 | return false; |
619 | |
620 | int64_t Val = Cst->getSExtValue(); |
621 | |
622 | // Check if immediate can already use ANDI/ORI/XORI. |
623 | if (isInt<12>(x: Val)) |
624 | return false; |
625 | |
626 | SDValue Shift = N0; |
627 | |
628 | // If Val is simm32 and we have a sext_inreg from i32, then the binop |
629 | // produces at least 33 sign bits. We can peek through the sext_inreg and use |
630 | // a SLLIW at the end. |
631 | bool SignExt = false; |
632 | if (isInt<32>(x: Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG && |
633 | N0.hasOneUse() && cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT() == MVT::i32) { |
634 | SignExt = true; |
635 | Shift = N0.getOperand(i: 0); |
636 | } |
637 | |
638 | if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse()) |
639 | return false; |
640 | |
641 | ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Val: Shift.getOperand(i: 1)); |
642 | if (!ShlCst) |
643 | return false; |
644 | |
645 | uint64_t ShAmt = ShlCst->getZExtValue(); |
646 | |
647 | // Make sure that we don't change the operation by removing bits. |
648 | // This only matters for OR and XOR, AND is unaffected. |
649 | uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(N: ShAmt); |
650 | if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) |
651 | return false; |
652 | |
653 | int64_t ShiftedVal = Val >> ShAmt; |
654 | if (!isInt<12>(x: ShiftedVal)) |
655 | return false; |
656 | |
657 | // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW. |
658 | if (SignExt && ShAmt >= 32) |
659 | return false; |
660 | |
661 | // Ok, we can reorder to get a smaller immediate. |
662 | unsigned BinOpc; |
663 | switch (Opcode) { |
664 | default: llvm_unreachable("Unexpected opcode" ); |
665 | case ISD::AND: BinOpc = RISCV::ANDI; break; |
666 | case ISD::OR: BinOpc = RISCV::ORI; break; |
667 | case ISD::XOR: BinOpc = RISCV::XORI; break; |
668 | } |
669 | |
670 | unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI; |
671 | |
672 | SDNode *BinOp = |
673 | CurDAG->getMachineNode(Opcode: BinOpc, dl: DL, VT, Op1: Shift.getOperand(i: 0), |
674 | Op2: CurDAG->getTargetConstant(Val: ShiftedVal, DL, VT)); |
675 | SDNode *SLLI = |
676 | CurDAG->getMachineNode(Opcode: ShOpc, dl: DL, VT, Op1: SDValue(BinOp, 0), |
677 | Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT)); |
678 | ReplaceNode(F: Node, T: SLLI); |
679 | return true; |
680 | } |
681 | |
682 | bool RISCVDAGToDAGISel::(SDNode *Node) { |
683 | // Only supported with XTHeadBb at the moment. |
684 | if (!Subtarget->hasVendorXTHeadBb()) |
685 | return false; |
686 | |
687 | auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1)); |
688 | if (!N1C) |
689 | return false; |
690 | |
691 | SDValue N0 = Node->getOperand(Num: 0); |
692 | if (!N0.hasOneUse()) |
693 | return false; |
694 | |
695 | auto = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL, |
696 | MVT VT) { |
697 | return CurDAG->getMachineNode(Opcode: RISCV::TH_EXT, dl: DL, VT, Op1: N0.getOperand(i: 0), |
698 | Op2: CurDAG->getTargetConstant(Val: Msb, DL, VT), |
699 | Op3: CurDAG->getTargetConstant(Val: Lsb, DL, VT)); |
700 | }; |
701 | |
702 | SDLoc DL(Node); |
703 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
704 | const unsigned RightShAmt = N1C->getZExtValue(); |
705 | |
706 | // Transform (sra (shl X, C1) C2) with C1 < C2 |
707 | // -> (TH.EXT X, msb, lsb) |
708 | if (N0.getOpcode() == ISD::SHL) { |
709 | auto *N01C = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1)); |
710 | if (!N01C) |
711 | return false; |
712 | |
713 | const unsigned LeftShAmt = N01C->getZExtValue(); |
714 | // Make sure that this is a bitfield extraction (i.e., the shift-right |
715 | // amount can not be less than the left-shift). |
716 | if (LeftShAmt > RightShAmt) |
717 | return false; |
718 | |
719 | const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt; |
720 | const unsigned Msb = MsbPlusOne - 1; |
721 | const unsigned Lsb = RightShAmt - LeftShAmt; |
722 | |
723 | SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); |
724 | ReplaceNode(F: Node, T: TH_EXT); |
725 | return true; |
726 | } |
727 | |
728 | // Transform (sra (sext_inreg X, _), C) -> |
729 | // (TH.EXT X, msb, lsb) |
730 | if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) { |
731 | unsigned ExtSize = |
732 | cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT().getSizeInBits(); |
733 | |
734 | // ExtSize of 32 should use sraiw via tablegen pattern. |
735 | if (ExtSize == 32) |
736 | return false; |
737 | |
738 | const unsigned Msb = ExtSize - 1; |
739 | const unsigned Lsb = RightShAmt; |
740 | |
741 | SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); |
742 | ReplaceNode(F: Node, T: TH_EXT); |
743 | return true; |
744 | } |
745 | |
746 | return false; |
747 | } |
748 | |
749 | bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) { |
750 | // Target does not support indexed loads. |
751 | if (!Subtarget->hasVendorXTHeadMemIdx()) |
752 | return false; |
753 | |
754 | LoadSDNode *Ld = cast<LoadSDNode>(Val: Node); |
755 | ISD::MemIndexedMode AM = Ld->getAddressingMode(); |
756 | if (AM == ISD::UNINDEXED) |
757 | return false; |
758 | |
759 | const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Ld->getOffset()); |
760 | if (!C) |
761 | return false; |
762 | |
763 | EVT LoadVT = Ld->getMemoryVT(); |
764 | assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) && |
765 | "Unexpected addressing mode" ); |
766 | bool IsPre = AM == ISD::PRE_INC; |
767 | bool IsPost = AM == ISD::POST_INC; |
768 | int64_t Offset = C->getSExtValue(); |
769 | |
770 | // The constants that can be encoded in the THeadMemIdx instructions |
771 | // are of the form (sign_extend(imm5) << imm2). |
772 | int64_t Shift; |
773 | for (Shift = 0; Shift < 4; Shift++) |
774 | if (isInt<5>(x: Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) |
775 | break; |
776 | |
777 | // Constant cannot be encoded. |
778 | if (Shift == 4) |
779 | return false; |
780 | |
781 | bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD); |
782 | unsigned Opcode; |
783 | if (LoadVT == MVT::i8 && IsPre) |
784 | Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB; |
785 | else if (LoadVT == MVT::i8 && IsPost) |
786 | Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA; |
787 | else if (LoadVT == MVT::i16 && IsPre) |
788 | Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB; |
789 | else if (LoadVT == MVT::i16 && IsPost) |
790 | Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA; |
791 | else if (LoadVT == MVT::i32 && IsPre) |
792 | Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB; |
793 | else if (LoadVT == MVT::i32 && IsPost) |
794 | Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA; |
795 | else if (LoadVT == MVT::i64 && IsPre) |
796 | Opcode = RISCV::TH_LDIB; |
797 | else if (LoadVT == MVT::i64 && IsPost) |
798 | Opcode = RISCV::TH_LDIA; |
799 | else |
800 | return false; |
801 | |
802 | EVT Ty = Ld->getOffset().getValueType(); |
803 | SDValue Ops[] = {Ld->getBasePtr(), |
804 | CurDAG->getTargetConstant(Val: Offset >> Shift, DL: SDLoc(Node), VT: Ty), |
805 | CurDAG->getTargetConstant(Val: Shift, DL: SDLoc(Node), VT: Ty), |
806 | Ld->getChain()}; |
807 | SDNode *New = CurDAG->getMachineNode(Opcode, dl: SDLoc(Node), VT1: Ld->getValueType(ResNo: 0), |
808 | VT2: Ld->getValueType(ResNo: 1), VT3: MVT::Other, Ops); |
809 | |
810 | MachineMemOperand *MemOp = cast<MemSDNode>(Val: Node)->getMemOperand(); |
811 | CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: New), NewMemRefs: {MemOp}); |
812 | |
813 | ReplaceNode(F: Node, T: New); |
814 | |
815 | return true; |
816 | } |
817 | |
818 | void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) { |
819 | if (!Subtarget->hasVInstructions()) |
820 | return; |
821 | |
822 | assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode" ); |
823 | |
824 | SDLoc DL(Node); |
825 | unsigned IntNo = Node->getConstantOperandVal(Num: 1); |
826 | |
827 | assert((IntNo == Intrinsic::riscv_sf_vc_x_se || |
828 | IntNo == Intrinsic::riscv_sf_vc_i_se) && |
829 | "Unexpected vsetvli intrinsic" ); |
830 | |
831 | // imm, imm, imm, simm5/scalar, sew, log2lmul, vl |
832 | unsigned Log2SEW = Log2_32(Value: Node->getConstantOperandVal(Num: 6)); |
833 | SDValue SEWOp = |
834 | CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: Subtarget->getXLenVT()); |
835 | SmallVector<SDValue, 8> Operands = {Node->getOperand(Num: 2), Node->getOperand(Num: 3), |
836 | Node->getOperand(Num: 4), Node->getOperand(Num: 5), |
837 | Node->getOperand(Num: 8), SEWOp, |
838 | Node->getOperand(Num: 0)}; |
839 | |
840 | unsigned Opcode; |
841 | auto *LMulSDNode = cast<ConstantSDNode>(Val: Node->getOperand(Num: 7)); |
842 | switch (LMulSDNode->getSExtValue()) { |
843 | case 5: |
844 | Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8 |
845 | : RISCV::PseudoVC_I_SE_MF8; |
846 | break; |
847 | case 6: |
848 | Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4 |
849 | : RISCV::PseudoVC_I_SE_MF4; |
850 | break; |
851 | case 7: |
852 | Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2 |
853 | : RISCV::PseudoVC_I_SE_MF2; |
854 | break; |
855 | case 0: |
856 | Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1 |
857 | : RISCV::PseudoVC_I_SE_M1; |
858 | break; |
859 | case 1: |
860 | Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2 |
861 | : RISCV::PseudoVC_I_SE_M2; |
862 | break; |
863 | case 2: |
864 | Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4 |
865 | : RISCV::PseudoVC_I_SE_M4; |
866 | break; |
867 | case 3: |
868 | Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8 |
869 | : RISCV::PseudoVC_I_SE_M8; |
870 | break; |
871 | } |
872 | |
873 | ReplaceNode(F: Node, T: CurDAG->getMachineNode( |
874 | Opcode, dl: DL, VT: Node->getSimpleValueType(ResNo: 0), Ops: Operands)); |
875 | } |
876 | |
877 | void RISCVDAGToDAGISel::Select(SDNode *Node) { |
878 | // If we have a custom node, we have already selected. |
879 | if (Node->isMachineOpcode()) { |
880 | LLVM_DEBUG(dbgs() << "== " ; Node->dump(CurDAG); dbgs() << "\n" ); |
881 | Node->setNodeId(-1); |
882 | return; |
883 | } |
884 | |
885 | // Instruction Selection not handled by the auto-generated tablegen selection |
886 | // should be handled here. |
887 | unsigned Opcode = Node->getOpcode(); |
888 | MVT XLenVT = Subtarget->getXLenVT(); |
889 | SDLoc DL(Node); |
890 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
891 | |
892 | bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs(); |
893 | |
894 | switch (Opcode) { |
895 | case ISD::Constant: { |
896 | assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT" ); |
897 | auto *ConstNode = cast<ConstantSDNode>(Val: Node); |
898 | if (ConstNode->isZero()) { |
899 | SDValue New = |
900 | CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: RISCV::X0, VT); |
901 | ReplaceNode(F: Node, T: New.getNode()); |
902 | return; |
903 | } |
904 | int64_t Imm = ConstNode->getSExtValue(); |
905 | // If only the lower 8 bits are used, try to convert this to a simm6 by |
906 | // sign-extending bit 7. This is neutral without the C extension, and |
907 | // allows C.LI to be used if C is present. |
908 | if (isUInt<8>(x: Imm) && isInt<6>(x: SignExtend64<8>(x: Imm)) && hasAllBUsers(Node)) |
909 | Imm = SignExtend64<8>(x: Imm); |
910 | // If the upper XLen-16 bits are not used, try to convert this to a simm12 |
911 | // by sign extending bit 15. |
912 | if (isUInt<16>(x: Imm) && isInt<12>(x: SignExtend64<16>(x: Imm)) && |
913 | hasAllHUsers(Node)) |
914 | Imm = SignExtend64<16>(x: Imm); |
915 | // If the upper 32-bits are not used try to convert this into a simm32 by |
916 | // sign extending bit 32. |
917 | if (!isInt<32>(x: Imm) && isUInt<32>(x: Imm) && hasAllWUsers(Node)) |
918 | Imm = SignExtend64<32>(x: Imm); |
919 | |
920 | ReplaceNode(F: Node, T: selectImm(CurDAG, DL, VT, Imm, Subtarget: *Subtarget).getNode()); |
921 | return; |
922 | } |
923 | case ISD::ConstantFP: { |
924 | const APFloat &APF = cast<ConstantFPSDNode>(Val: Node)->getValueAPF(); |
925 | auto [FPImm, NeedsFNeg] = |
926 | static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(Imm: APF, |
927 | VT); |
928 | if (FPImm >= 0) { |
929 | unsigned Opc; |
930 | unsigned FNegOpc; |
931 | switch (VT.SimpleTy) { |
932 | default: |
933 | llvm_unreachable("Unexpected size" ); |
934 | case MVT::f16: |
935 | Opc = RISCV::FLI_H; |
936 | FNegOpc = RISCV::FSGNJN_H; |
937 | break; |
938 | case MVT::f32: |
939 | Opc = RISCV::FLI_S; |
940 | FNegOpc = RISCV::FSGNJN_S; |
941 | break; |
942 | case MVT::f64: |
943 | Opc = RISCV::FLI_D; |
944 | FNegOpc = RISCV::FSGNJN_D; |
945 | break; |
946 | } |
947 | SDNode *Res = CurDAG->getMachineNode( |
948 | Opcode: Opc, dl: DL, VT, Op1: CurDAG->getTargetConstant(Val: FPImm, DL, VT: XLenVT)); |
949 | if (NeedsFNeg) |
950 | Res = CurDAG->getMachineNode(Opcode: FNegOpc, dl: DL, VT, Op1: SDValue(Res, 0), |
951 | Op2: SDValue(Res, 0)); |
952 | |
953 | ReplaceNode(F: Node, T: Res); |
954 | return; |
955 | } |
956 | |
957 | bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64; |
958 | SDValue Imm; |
959 | // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will |
960 | // create an integer immediate. |
961 | if (APF.isPosZero() || NegZeroF64) |
962 | Imm = CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT); |
963 | else |
964 | Imm = selectImm(CurDAG, DL, VT: XLenVT, Imm: APF.bitcastToAPInt().getSExtValue(), |
965 | Subtarget: *Subtarget); |
966 | |
967 | bool HasZdinx = Subtarget->hasStdExtZdinx(); |
968 | bool Is64Bit = Subtarget->is64Bit(); |
969 | unsigned Opc; |
970 | switch (VT.SimpleTy) { |
971 | default: |
972 | llvm_unreachable("Unexpected size" ); |
973 | case MVT::bf16: |
974 | assert(Subtarget->hasStdExtZfbfmin()); |
975 | Opc = RISCV::FMV_H_X; |
976 | break; |
977 | case MVT::f16: |
978 | Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X; |
979 | break; |
980 | case MVT::f32: |
981 | Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X; |
982 | break; |
983 | case MVT::f64: |
984 | // For RV32, we can't move from a GPR, we need to convert instead. This |
985 | // should only happen for +0.0 and -0.0. |
986 | assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant" ); |
987 | if (Is64Bit) |
988 | Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X; |
989 | else |
990 | Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W; |
991 | break; |
992 | } |
993 | |
994 | SDNode *Res; |
995 | if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W) |
996 | Res = CurDAG->getMachineNode( |
997 | Opcode: Opc, dl: DL, VT, Op1: Imm, |
998 | Op2: CurDAG->getTargetConstant(Val: RISCVFPRndMode::RNE, DL, VT: XLenVT)); |
999 | else |
1000 | Res = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: Imm); |
1001 | |
1002 | // For f64 -0.0, we need to insert a fneg.d idiom. |
1003 | if (NegZeroF64) { |
1004 | Opc = RISCV::FSGNJN_D; |
1005 | if (HasZdinx) |
1006 | Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X; |
1007 | Res = |
1008 | CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT, Op1: SDValue(Res, 0), Op2: SDValue(Res, 0)); |
1009 | } |
1010 | |
1011 | ReplaceNode(F: Node, T: Res); |
1012 | return; |
1013 | } |
1014 | case RISCVISD::BuildPairF64: { |
1015 | if (!Subtarget->hasStdExtZdinx()) |
1016 | break; |
1017 | |
1018 | assert(!Subtarget->is64Bit() && "Unexpected subtarget" ); |
1019 | |
1020 | SDValue Ops[] = { |
1021 | CurDAG->getTargetConstant(Val: RISCV::GPRPairRegClassID, DL, VT: MVT::i32), |
1022 | Node->getOperand(Num: 0), |
1023 | CurDAG->getTargetConstant(Val: RISCV::sub_gpr_even, DL, VT: MVT::i32), |
1024 | Node->getOperand(Num: 1), |
1025 | CurDAG->getTargetConstant(Val: RISCV::sub_gpr_odd, DL, VT: MVT::i32)}; |
1026 | |
1027 | SDNode *N = |
1028 | CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl: DL, VT: MVT::f64, Ops); |
1029 | ReplaceNode(F: Node, T: N); |
1030 | return; |
1031 | } |
1032 | case RISCVISD::SplitF64: { |
1033 | if (Subtarget->hasStdExtZdinx()) { |
1034 | assert(!Subtarget->is64Bit() && "Unexpected subtarget" ); |
1035 | |
1036 | if (!SDValue(Node, 0).use_empty()) { |
1037 | SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_even, DL, VT, |
1038 | Operand: Node->getOperand(Num: 0)); |
1039 | ReplaceUses(F: SDValue(Node, 0), T: Lo); |
1040 | } |
1041 | |
1042 | if (!SDValue(Node, 1).use_empty()) { |
1043 | SDValue Hi = CurDAG->getTargetExtractSubreg(SRIdx: RISCV::sub_gpr_odd, DL, VT, |
1044 | Operand: Node->getOperand(Num: 0)); |
1045 | ReplaceUses(F: SDValue(Node, 1), T: Hi); |
1046 | } |
1047 | |
1048 | CurDAG->RemoveDeadNode(N: Node); |
1049 | return; |
1050 | } |
1051 | |
1052 | if (!Subtarget->hasStdExtZfa()) |
1053 | break; |
1054 | assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() && |
1055 | "Unexpected subtarget" ); |
1056 | |
1057 | // With Zfa, lower to fmv.x.w and fmvh.x.d. |
1058 | if (!SDValue(Node, 0).use_empty()) { |
1059 | SDNode *Lo = CurDAG->getMachineNode(Opcode: RISCV::FMV_X_W_FPR64, dl: DL, VT, |
1060 | Op1: Node->getOperand(Num: 0)); |
1061 | ReplaceUses(F: SDValue(Node, 0), T: SDValue(Lo, 0)); |
1062 | } |
1063 | if (!SDValue(Node, 1).use_empty()) { |
1064 | SDNode *Hi = CurDAG->getMachineNode(Opcode: RISCV::FMVH_X_D, dl: DL, VT, |
1065 | Op1: Node->getOperand(Num: 0)); |
1066 | ReplaceUses(F: SDValue(Node, 1), T: SDValue(Hi, 0)); |
1067 | } |
1068 | |
1069 | CurDAG->RemoveDeadNode(N: Node); |
1070 | return; |
1071 | } |
1072 | case ISD::SHL: { |
1073 | auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1)); |
1074 | if (!N1C) |
1075 | break; |
1076 | SDValue N0 = Node->getOperand(Num: 0); |
1077 | if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || |
1078 | !isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) |
1079 | break; |
1080 | unsigned ShAmt = N1C->getZExtValue(); |
1081 | uint64_t Mask = N0.getConstantOperandVal(i: 1); |
1082 | |
1083 | // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has |
1084 | // 32 leading zeros and C3 trailing zeros. |
1085 | if (ShAmt <= 32 && isShiftedMask_64(Value: Mask)) { |
1086 | unsigned XLen = Subtarget->getXLen(); |
1087 | unsigned LeadingZeros = XLen - llvm::bit_width(Value: Mask); |
1088 | unsigned TrailingZeros = llvm::countr_zero(Val: Mask); |
1089 | if (TrailingZeros > 0 && LeadingZeros == 32) { |
1090 | SDNode *SRLIW = CurDAG->getMachineNode( |
1091 | Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0->getOperand(Num: 0), |
1092 | Op2: CurDAG->getTargetConstant(Val: TrailingZeros, DL, VT)); |
1093 | SDNode *SLLI = CurDAG->getMachineNode( |
1094 | Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0), |
1095 | Op2: CurDAG->getTargetConstant(Val: TrailingZeros + ShAmt, DL, VT)); |
1096 | ReplaceNode(F: Node, T: SLLI); |
1097 | return; |
1098 | } |
1099 | } |
1100 | break; |
1101 | } |
1102 | case ISD::SRL: { |
1103 | auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1)); |
1104 | if (!N1C) |
1105 | break; |
1106 | SDValue N0 = Node->getOperand(Num: 0); |
1107 | if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) |
1108 | break; |
1109 | unsigned ShAmt = N1C->getZExtValue(); |
1110 | uint64_t Mask = N0.getConstantOperandVal(i: 1); |
1111 | |
1112 | // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has |
1113 | // 32 leading zeros and C3 trailing zeros. |
1114 | if (isShiftedMask_64(Value: Mask) && N0.hasOneUse()) { |
1115 | unsigned XLen = Subtarget->getXLen(); |
1116 | unsigned LeadingZeros = XLen - llvm::bit_width(Value: Mask); |
1117 | unsigned TrailingZeros = llvm::countr_zero(Val: Mask); |
1118 | if (LeadingZeros == 32 && TrailingZeros > ShAmt) { |
1119 | SDNode *SRLIW = CurDAG->getMachineNode( |
1120 | Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0->getOperand(Num: 0), |
1121 | Op2: CurDAG->getTargetConstant(Val: TrailingZeros, DL, VT)); |
1122 | SDNode *SLLI = CurDAG->getMachineNode( |
1123 | Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0), |
1124 | Op2: CurDAG->getTargetConstant(Val: TrailingZeros - ShAmt, DL, VT)); |
1125 | ReplaceNode(F: Node, T: SLLI); |
1126 | return; |
1127 | } |
1128 | } |
1129 | |
1130 | // Optimize (srl (and X, C2), C) -> |
1131 | // (srli (slli X, (XLen-C3), (XLen-C3) + C) |
1132 | // Where C2 is a mask with C3 trailing ones. |
1133 | // Taking into account that the C2 may have had lower bits unset by |
1134 | // SimplifyDemandedBits. This avoids materializing the C2 immediate. |
1135 | // This pattern occurs when type legalizing right shifts for types with |
1136 | // less than XLen bits. |
1137 | Mask |= maskTrailingOnes<uint64_t>(N: ShAmt); |
1138 | if (!isMask_64(Value: Mask)) |
1139 | break; |
1140 | unsigned TrailingOnes = llvm::countr_one(Value: Mask); |
1141 | if (ShAmt >= TrailingOnes) |
1142 | break; |
1143 | // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64. |
1144 | if (TrailingOnes == 32) { |
1145 | SDNode *SRLI = CurDAG->getMachineNode( |
1146 | Opcode: Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, dl: DL, VT, |
1147 | Op1: N0->getOperand(Num: 0), Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT)); |
1148 | ReplaceNode(F: Node, T: SRLI); |
1149 | return; |
1150 | } |
1151 | |
1152 | // Only do the remaining transforms if the AND has one use. |
1153 | if (!N0.hasOneUse()) |
1154 | break; |
1155 | |
1156 | // If C2 is (1 << ShAmt) use bexti or th.tst if possible. |
1157 | if (HasBitTest && ShAmt + 1 == TrailingOnes) { |
1158 | SDNode *BEXTI = CurDAG->getMachineNode( |
1159 | Opcode: Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, dl: DL, VT, |
1160 | Op1: N0->getOperand(Num: 0), Op2: CurDAG->getTargetConstant(Val: ShAmt, DL, VT)); |
1161 | ReplaceNode(F: Node, T: BEXTI); |
1162 | return; |
1163 | } |
1164 | |
1165 | unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; |
1166 | SDNode *SLLI = |
1167 | CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0->getOperand(Num: 0), |
1168 | Op2: CurDAG->getTargetConstant(Val: LShAmt, DL, VT)); |
1169 | SDNode *SRLI = CurDAG->getMachineNode( |
1170 | Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0), |
1171 | Op2: CurDAG->getTargetConstant(Val: LShAmt + ShAmt, DL, VT)); |
1172 | ReplaceNode(F: Node, T: SRLI); |
1173 | return; |
1174 | } |
1175 | case ISD::SRA: { |
1176 | if (trySignedBitfieldExtract(Node)) |
1177 | return; |
1178 | |
1179 | // Optimize (sra (sext_inreg X, i16), C) -> |
1180 | // (srai (slli X, (XLen-16), (XLen-16) + C) |
1181 | // And (sra (sext_inreg X, i8), C) -> |
1182 | // (srai (slli X, (XLen-8), (XLen-8) + C) |
1183 | // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. |
1184 | // This transform matches the code we get without Zbb. The shifts are more |
1185 | // compressible, and this can help expose CSE opportunities in the sdiv by |
1186 | // constant optimization. |
1187 | auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1)); |
1188 | if (!N1C) |
1189 | break; |
1190 | SDValue N0 = Node->getOperand(Num: 0); |
1191 | if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) |
1192 | break; |
1193 | unsigned ShAmt = N1C->getZExtValue(); |
1194 | unsigned ExtSize = |
1195 | cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT().getSizeInBits(); |
1196 | // ExtSize of 32 should use sraiw via tablegen pattern. |
1197 | if (ExtSize >= 32 || ShAmt >= ExtSize) |
1198 | break; |
1199 | unsigned LShAmt = Subtarget->getXLen() - ExtSize; |
1200 | SDNode *SLLI = |
1201 | CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0->getOperand(Num: 0), |
1202 | Op2: CurDAG->getTargetConstant(Val: LShAmt, DL, VT)); |
1203 | SDNode *SRAI = CurDAG->getMachineNode( |
1204 | Opcode: RISCV::SRAI, dl: DL, VT, Op1: SDValue(SLLI, 0), |
1205 | Op2: CurDAG->getTargetConstant(Val: LShAmt + ShAmt, DL, VT)); |
1206 | ReplaceNode(F: Node, T: SRAI); |
1207 | return; |
1208 | } |
1209 | case ISD::OR: |
1210 | case ISD::XOR: |
1211 | if (tryShrinkShlLogicImm(Node)) |
1212 | return; |
1213 | |
1214 | break; |
1215 | case ISD::AND: { |
1216 | auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1)); |
1217 | if (!N1C) |
1218 | break; |
1219 | uint64_t C1 = N1C->getZExtValue(); |
1220 | const bool isC1Mask = isMask_64(Value: C1); |
1221 | const bool isC1ANDI = isInt<12>(x: C1); |
1222 | |
1223 | SDValue N0 = Node->getOperand(Num: 0); |
1224 | |
1225 | auto = [&](SDNode *Node, SDLoc DL, MVT VT, |
1226 | SDValue X, unsigned Msb, |
1227 | unsigned Lsb) { |
1228 | if (!Subtarget->hasVendorXTHeadBb()) |
1229 | return false; |
1230 | |
1231 | SDNode *TH_EXTU = CurDAG->getMachineNode( |
1232 | Opcode: RISCV::TH_EXTU, dl: DL, VT, Op1: X, Op2: CurDAG->getTargetConstant(Val: Msb, DL, VT), |
1233 | Op3: CurDAG->getTargetConstant(Val: Lsb, DL, VT)); |
1234 | ReplaceNode(F: Node, T: TH_EXTU); |
1235 | return true; |
1236 | }; |
1237 | |
1238 | bool LeftShift = N0.getOpcode() == ISD::SHL; |
1239 | if (LeftShift || N0.getOpcode() == ISD::SRL) { |
1240 | auto *C = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1)); |
1241 | if (!C) |
1242 | break; |
1243 | unsigned C2 = C->getZExtValue(); |
1244 | unsigned XLen = Subtarget->getXLen(); |
1245 | assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!" ); |
1246 | |
1247 | // Keep track of whether this is a c.andi. If we can't use c.andi, the |
1248 | // shift pair might offer more compression opportunities. |
1249 | // TODO: We could check for C extension here, but we don't have many lit |
1250 | // tests with the C extension enabled so not checking gets better |
1251 | // coverage. |
1252 | // TODO: What if ANDI faster than shift? |
1253 | bool IsCANDI = isInt<6>(x: N1C->getSExtValue()); |
1254 | |
1255 | // Clear irrelevant bits in the mask. |
1256 | if (LeftShift) |
1257 | C1 &= maskTrailingZeros<uint64_t>(N: C2); |
1258 | else |
1259 | C1 &= maskTrailingOnes<uint64_t>(N: XLen - C2); |
1260 | |
1261 | // Some transforms should only be done if the shift has a single use or |
1262 | // the AND would become (srli (slli X, 32), 32) |
1263 | bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); |
1264 | |
1265 | SDValue X = N0.getOperand(i: 0); |
1266 | |
1267 | // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask |
1268 | // with c3 leading zeros. |
1269 | if (!LeftShift && isC1Mask) { |
1270 | unsigned Leading = XLen - llvm::bit_width(Value: C1); |
1271 | if (C2 < Leading) { |
1272 | // If the number of leading zeros is C2+32 this can be SRLIW. |
1273 | if (C2 + 32 == Leading) { |
1274 | SDNode *SRLIW = CurDAG->getMachineNode( |
1275 | Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X, Op2: CurDAG->getTargetConstant(Val: C2, DL, VT)); |
1276 | ReplaceNode(F: Node, T: SRLIW); |
1277 | return; |
1278 | } |
1279 | |
1280 | // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) |
1281 | // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. |
1282 | // |
1283 | // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type |
1284 | // legalized and goes through DAG combine. |
1285 | if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() && |
1286 | X.getOpcode() == ISD::SIGN_EXTEND_INREG && |
1287 | cast<VTSDNode>(Val: X.getOperand(i: 1))->getVT() == MVT::i32) { |
1288 | SDNode *SRAIW = |
1289 | CurDAG->getMachineNode(Opcode: RISCV::SRAIW, dl: DL, VT, Op1: X.getOperand(i: 0), |
1290 | Op2: CurDAG->getTargetConstant(Val: 31, DL, VT)); |
1291 | SDNode *SRLIW = CurDAG->getMachineNode( |
1292 | Opcode: RISCV::SRLIW, dl: DL, VT, Op1: SDValue(SRAIW, 0), |
1293 | Op2: CurDAG->getTargetConstant(Val: Leading - 32, DL, VT)); |
1294 | ReplaceNode(F: Node, T: SRLIW); |
1295 | return; |
1296 | } |
1297 | |
1298 | // Try to use an unsigned bitfield extract (e.g., th.extu) if |
1299 | // available. |
1300 | // Transform (and (srl x, C2), C1) |
1301 | // -> (<bfextract> x, msb, lsb) |
1302 | // |
1303 | // Make sure to keep this below the SRLIW cases, as we always want to |
1304 | // prefer the more common instruction. |
1305 | const unsigned Msb = llvm::bit_width(Value: C1) + C2 - 1; |
1306 | const unsigned Lsb = C2; |
1307 | if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb)) |
1308 | return; |
1309 | |
1310 | // (srli (slli x, c3-c2), c3). |
1311 | // Skip if we could use (zext.w (sraiw X, C2)). |
1312 | bool Skip = Subtarget->hasStdExtZba() && Leading == 32 && |
1313 | X.getOpcode() == ISD::SIGN_EXTEND_INREG && |
1314 | cast<VTSDNode>(Val: X.getOperand(i: 1))->getVT() == MVT::i32; |
1315 | // Also Skip if we can use bexti or th.tst. |
1316 | Skip |= HasBitTest && Leading == XLen - 1; |
1317 | if (OneUseOrZExtW && !Skip) { |
1318 | SDNode *SLLI = CurDAG->getMachineNode( |
1319 | Opcode: RISCV::SLLI, dl: DL, VT, Op1: X, |
1320 | Op2: CurDAG->getTargetConstant(Val: Leading - C2, DL, VT)); |
1321 | SDNode *SRLI = CurDAG->getMachineNode( |
1322 | Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0), |
1323 | Op2: CurDAG->getTargetConstant(Val: Leading, DL, VT)); |
1324 | ReplaceNode(F: Node, T: SRLI); |
1325 | return; |
1326 | } |
1327 | } |
1328 | } |
1329 | |
1330 | // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask |
1331 | // shifted by c2 bits with c3 leading zeros. |
1332 | if (LeftShift && isShiftedMask_64(Value: C1)) { |
1333 | unsigned Leading = XLen - llvm::bit_width(Value: C1); |
1334 | |
1335 | if (C2 + Leading < XLen && |
1336 | C1 == (maskTrailingOnes<uint64_t>(N: XLen - (C2 + Leading)) << C2)) { |
1337 | // Use slli.uw when possible. |
1338 | if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) { |
1339 | SDNode *SLLI_UW = |
1340 | CurDAG->getMachineNode(Opcode: RISCV::SLLI_UW, dl: DL, VT, Op1: X, |
1341 | Op2: CurDAG->getTargetConstant(Val: C2, DL, VT)); |
1342 | ReplaceNode(F: Node, T: SLLI_UW); |
1343 | return; |
1344 | } |
1345 | |
1346 | // (srli (slli c2+c3), c3) |
1347 | if (OneUseOrZExtW && !IsCANDI) { |
1348 | SDNode *SLLI = CurDAG->getMachineNode( |
1349 | Opcode: RISCV::SLLI, dl: DL, VT, Op1: X, |
1350 | Op2: CurDAG->getTargetConstant(Val: C2 + Leading, DL, VT)); |
1351 | SDNode *SRLI = CurDAG->getMachineNode( |
1352 | Opcode: RISCV::SRLI, dl: DL, VT, Op1: SDValue(SLLI, 0), |
1353 | Op2: CurDAG->getTargetConstant(Val: Leading, DL, VT)); |
1354 | ReplaceNode(F: Node, T: SRLI); |
1355 | return; |
1356 | } |
1357 | } |
1358 | } |
1359 | |
1360 | // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a |
1361 | // shifted mask with c2 leading zeros and c3 trailing zeros. |
1362 | if (!LeftShift && isShiftedMask_64(Value: C1)) { |
1363 | unsigned Leading = XLen - llvm::bit_width(Value: C1); |
1364 | unsigned Trailing = llvm::countr_zero(Val: C1); |
1365 | if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW && |
1366 | !IsCANDI) { |
1367 | unsigned SrliOpc = RISCV::SRLI; |
1368 | // If the input is zexti32 we should use SRLIW. |
1369 | if (X.getOpcode() == ISD::AND && |
1370 | isa<ConstantSDNode>(Val: X.getOperand(i: 1)) && |
1371 | X.getConstantOperandVal(i: 1) == UINT64_C(0xFFFFFFFF)) { |
1372 | SrliOpc = RISCV::SRLIW; |
1373 | X = X.getOperand(i: 0); |
1374 | } |
1375 | SDNode *SRLI = CurDAG->getMachineNode( |
1376 | Opcode: SrliOpc, dl: DL, VT, Op1: X, |
1377 | Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT)); |
1378 | SDNode *SLLI = CurDAG->getMachineNode( |
1379 | Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLI, 0), |
1380 | Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT)); |
1381 | ReplaceNode(F: Node, T: SLLI); |
1382 | return; |
1383 | } |
1384 | // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. |
1385 | if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 && |
1386 | OneUseOrZExtW && !IsCANDI) { |
1387 | SDNode *SRLIW = CurDAG->getMachineNode( |
1388 | Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X, |
1389 | Op2: CurDAG->getTargetConstant(Val: C2 + Trailing, DL, VT)); |
1390 | SDNode *SLLI = CurDAG->getMachineNode( |
1391 | Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0), |
1392 | Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT)); |
1393 | ReplaceNode(F: Node, T: SLLI); |
1394 | return; |
1395 | } |
1396 | } |
1397 | |
1398 | // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a |
1399 | // shifted mask with no leading zeros and c3 trailing zeros. |
1400 | if (LeftShift && isShiftedMask_64(Value: C1)) { |
1401 | unsigned Leading = XLen - llvm::bit_width(Value: C1); |
1402 | unsigned Trailing = llvm::countr_zero(Val: C1); |
1403 | if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) { |
1404 | SDNode *SRLI = CurDAG->getMachineNode( |
1405 | Opcode: RISCV::SRLI, dl: DL, VT, Op1: X, |
1406 | Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT)); |
1407 | SDNode *SLLI = CurDAG->getMachineNode( |
1408 | Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLI, 0), |
1409 | Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT)); |
1410 | ReplaceNode(F: Node, T: SLLI); |
1411 | return; |
1412 | } |
1413 | // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. |
1414 | if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { |
1415 | SDNode *SRLIW = CurDAG->getMachineNode( |
1416 | Opcode: RISCV::SRLIW, dl: DL, VT, Op1: X, |
1417 | Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT)); |
1418 | SDNode *SLLI = CurDAG->getMachineNode( |
1419 | Opcode: RISCV::SLLI, dl: DL, VT, Op1: SDValue(SRLIW, 0), |
1420 | Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT)); |
1421 | ReplaceNode(F: Node, T: SLLI); |
1422 | return; |
1423 | } |
1424 | |
1425 | // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI. |
1426 | if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW && |
1427 | Subtarget->hasStdExtZba()) { |
1428 | SDNode *SRLI = CurDAG->getMachineNode( |
1429 | Opcode: RISCV::SRLI, dl: DL, VT, Op1: X, |
1430 | Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT)); |
1431 | SDNode *SLLI_UW = CurDAG->getMachineNode( |
1432 | Opcode: RISCV::SLLI_UW, dl: DL, VT, Op1: SDValue(SRLI, 0), |
1433 | Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT)); |
1434 | ReplaceNode(F: Node, T: SLLI_UW); |
1435 | return; |
1436 | } |
1437 | } |
1438 | } |
1439 | |
1440 | // If C1 masks off the upper bits only (but can't be formed as an |
1441 | // ANDI), use an unsigned bitfield extract (e.g., th.extu), if |
1442 | // available. |
1443 | // Transform (and x, C1) |
1444 | // -> (<bfextract> x, msb, lsb) |
1445 | if (isC1Mask && !isC1ANDI) { |
1446 | const unsigned Msb = llvm::bit_width(Value: C1) - 1; |
1447 | if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0)) |
1448 | return; |
1449 | } |
1450 | |
1451 | if (tryShrinkShlLogicImm(Node)) |
1452 | return; |
1453 | |
1454 | break; |
1455 | } |
1456 | case ISD::MUL: { |
1457 | // Special case for calculating (mul (and X, C2), C1) where the full product |
1458 | // fits in XLen bits. We can shift X left by the number of leading zeros in |
1459 | // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final |
1460 | // product has XLen trailing zeros, putting it in the output of MULHU. This |
1461 | // can avoid materializing a constant in a register for C2. |
1462 | |
1463 | // RHS should be a constant. |
1464 | auto *N1C = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1)); |
1465 | if (!N1C || !N1C->hasOneUse()) |
1466 | break; |
1467 | |
1468 | // LHS should be an AND with constant. |
1469 | SDValue N0 = Node->getOperand(Num: 0); |
1470 | if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) |
1471 | break; |
1472 | |
1473 | uint64_t C2 = N0.getConstantOperandVal(i: 1); |
1474 | |
1475 | // Constant should be a mask. |
1476 | if (!isMask_64(Value: C2)) |
1477 | break; |
1478 | |
1479 | // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has |
1480 | // multiple users or the constant is a simm12. This prevents inserting a |
1481 | // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely |
1482 | // make it more costly to materialize. Otherwise, using a SLLI might allow |
1483 | // it to be compressed. |
1484 | bool IsANDIOrZExt = |
1485 | isInt<12>(x: C2) || |
1486 | (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb()); |
1487 | // With XTHeadBb, we can use TH.EXTU. |
1488 | IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb(); |
1489 | if (IsANDIOrZExt && (isInt<12>(x: N1C->getSExtValue()) || !N0.hasOneUse())) |
1490 | break; |
1491 | // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or |
1492 | // the constant is a simm32. |
1493 | bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba(); |
1494 | // With XTHeadBb, we can use TH.EXTU. |
1495 | IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb(); |
1496 | if (IsZExtW && (isInt<32>(x: N1C->getSExtValue()) || !N0.hasOneUse())) |
1497 | break; |
1498 | |
1499 | // We need to shift left the AND input and C1 by a total of XLen bits. |
1500 | |
1501 | // How far left do we need to shift the AND input? |
1502 | unsigned XLen = Subtarget->getXLen(); |
1503 | unsigned LeadingZeros = XLen - llvm::bit_width(Value: C2); |
1504 | |
1505 | // The constant gets shifted by the remaining amount unless that would |
1506 | // shift bits out. |
1507 | uint64_t C1 = N1C->getZExtValue(); |
1508 | unsigned ConstantShift = XLen - LeadingZeros; |
1509 | if (ConstantShift > (XLen - llvm::bit_width(Value: C1))) |
1510 | break; |
1511 | |
1512 | uint64_t ShiftedC1 = C1 << ConstantShift; |
1513 | // If this RV32, we need to sign extend the constant. |
1514 | if (XLen == 32) |
1515 | ShiftedC1 = SignExtend64<32>(x: ShiftedC1); |
1516 | |
1517 | // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). |
1518 | SDNode *Imm = selectImm(CurDAG, DL, VT, Imm: ShiftedC1, Subtarget: *Subtarget).getNode(); |
1519 | SDNode *SLLI = |
1520 | CurDAG->getMachineNode(Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0), |
1521 | Op2: CurDAG->getTargetConstant(Val: LeadingZeros, DL, VT)); |
1522 | SDNode *MULHU = CurDAG->getMachineNode(Opcode: RISCV::MULHU, dl: DL, VT, |
1523 | Op1: SDValue(SLLI, 0), Op2: SDValue(Imm, 0)); |
1524 | ReplaceNode(F: Node, T: MULHU); |
1525 | return; |
1526 | } |
1527 | case ISD::LOAD: { |
1528 | if (tryIndexedLoad(Node)) |
1529 | return; |
1530 | |
1531 | if (Subtarget->hasVendorXCVmem()) { |
1532 | // We match post-incrementing load here |
1533 | LoadSDNode *Load = cast<LoadSDNode>(Val: Node); |
1534 | if (Load->getAddressingMode() != ISD::POST_INC) |
1535 | break; |
1536 | |
1537 | SDValue Chain = Node->getOperand(Num: 0); |
1538 | SDValue Base = Node->getOperand(Num: 1); |
1539 | SDValue Offset = Node->getOperand(Num: 2); |
1540 | |
1541 | bool Simm12 = false; |
1542 | bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD; |
1543 | |
1544 | if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Val&: Offset)) { |
1545 | int ConstantVal = ConstantOffset->getSExtValue(); |
1546 | Simm12 = isInt<12>(x: ConstantVal); |
1547 | if (Simm12) |
1548 | Offset = CurDAG->getTargetConstant(Val: ConstantVal, DL: SDLoc(Offset), |
1549 | VT: Offset.getValueType()); |
1550 | } |
1551 | |
1552 | unsigned Opcode = 0; |
1553 | switch (Load->getMemoryVT().getSimpleVT().SimpleTy) { |
1554 | case MVT::i8: |
1555 | if (Simm12 && SignExtend) |
1556 | Opcode = RISCV::CV_LB_ri_inc; |
1557 | else if (Simm12 && !SignExtend) |
1558 | Opcode = RISCV::CV_LBU_ri_inc; |
1559 | else if (!Simm12 && SignExtend) |
1560 | Opcode = RISCV::CV_LB_rr_inc; |
1561 | else |
1562 | Opcode = RISCV::CV_LBU_rr_inc; |
1563 | break; |
1564 | case MVT::i16: |
1565 | if (Simm12 && SignExtend) |
1566 | Opcode = RISCV::CV_LH_ri_inc; |
1567 | else if (Simm12 && !SignExtend) |
1568 | Opcode = RISCV::CV_LHU_ri_inc; |
1569 | else if (!Simm12 && SignExtend) |
1570 | Opcode = RISCV::CV_LH_rr_inc; |
1571 | else |
1572 | Opcode = RISCV::CV_LHU_rr_inc; |
1573 | break; |
1574 | case MVT::i32: |
1575 | if (Simm12) |
1576 | Opcode = RISCV::CV_LW_ri_inc; |
1577 | else |
1578 | Opcode = RISCV::CV_LW_rr_inc; |
1579 | break; |
1580 | default: |
1581 | break; |
1582 | } |
1583 | if (!Opcode) |
1584 | break; |
1585 | |
1586 | ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode, dl: DL, VT1: XLenVT, VT2: XLenVT, |
1587 | VT3: Chain.getSimpleValueType(), Op1: Base, |
1588 | Op2: Offset, Op3: Chain)); |
1589 | return; |
1590 | } |
1591 | break; |
1592 | } |
1593 | case ISD::INTRINSIC_WO_CHAIN: { |
1594 | unsigned IntNo = Node->getConstantOperandVal(Num: 0); |
1595 | switch (IntNo) { |
1596 | // By default we do not custom select any intrinsic. |
1597 | default: |
1598 | break; |
1599 | case Intrinsic::riscv_vmsgeu: |
1600 | case Intrinsic::riscv_vmsge: { |
1601 | SDValue Src1 = Node->getOperand(Num: 1); |
1602 | SDValue Src2 = Node->getOperand(Num: 2); |
1603 | bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; |
1604 | bool IsCmpUnsignedZero = false; |
1605 | // Only custom select scalar second operand. |
1606 | if (Src2.getValueType() != XLenVT) |
1607 | break; |
1608 | // Small constants are handled with patterns. |
1609 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Src2)) { |
1610 | int64_t CVal = C->getSExtValue(); |
1611 | if (CVal >= -15 && CVal <= 16) { |
1612 | if (!IsUnsigned || CVal != 0) |
1613 | break; |
1614 | IsCmpUnsignedZero = true; |
1615 | } |
1616 | } |
1617 | MVT Src1VT = Src1.getSimpleValueType(); |
1618 | unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; |
1619 | switch (RISCVTargetLowering::getLMUL(VT: Src1VT)) { |
1620 | default: |
1621 | llvm_unreachable("Unexpected LMUL!" ); |
1622 | #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ |
1623 | case RISCVII::VLMUL::lmulenum: \ |
1624 | VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ |
1625 | : RISCV::PseudoVMSLT_VX_##suffix; \ |
1626 | VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ |
1627 | VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ |
1628 | break; |
1629 | CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1) |
1630 | CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2) |
1631 | CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4) |
1632 | CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8) |
1633 | CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16) |
1634 | CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32) |
1635 | CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64) |
1636 | #undef CASE_VMSLT_VMNAND_VMSET_OPCODES |
1637 | } |
1638 | SDValue SEW = CurDAG->getTargetConstant( |
1639 | Val: Log2_32(Value: Src1VT.getScalarSizeInBits()), DL, VT: XLenVT); |
1640 | SDValue VL; |
1641 | selectVLOp(N: Node->getOperand(Num: 3), VL); |
1642 | |
1643 | // If vmsgeu with 0 immediate, expand it to vmset. |
1644 | if (IsCmpUnsignedZero) { |
1645 | ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMSetOpcode, dl: DL, VT, Op1: VL, Op2: SEW)); |
1646 | return; |
1647 | } |
1648 | |
1649 | // Expand to |
1650 | // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd |
1651 | SDValue Cmp = SDValue( |
1652 | CurDAG->getMachineNode(Opcode: VMSLTOpcode, dl: DL, VT, Ops: {Src1, Src2, VL, SEW}), |
1653 | 0); |
1654 | ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMNANDOpcode, dl: DL, VT, |
1655 | Ops: {Cmp, Cmp, VL, SEW})); |
1656 | return; |
1657 | } |
1658 | case Intrinsic::riscv_vmsgeu_mask: |
1659 | case Intrinsic::riscv_vmsge_mask: { |
1660 | SDValue Src1 = Node->getOperand(Num: 2); |
1661 | SDValue Src2 = Node->getOperand(Num: 3); |
1662 | bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; |
1663 | bool IsCmpUnsignedZero = false; |
1664 | // Only custom select scalar second operand. |
1665 | if (Src2.getValueType() != XLenVT) |
1666 | break; |
1667 | // Small constants are handled with patterns. |
1668 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Src2)) { |
1669 | int64_t CVal = C->getSExtValue(); |
1670 | if (CVal >= -15 && CVal <= 16) { |
1671 | if (!IsUnsigned || CVal != 0) |
1672 | break; |
1673 | IsCmpUnsignedZero = true; |
1674 | } |
1675 | } |
1676 | MVT Src1VT = Src1.getSimpleValueType(); |
1677 | unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, |
1678 | VMOROpcode; |
1679 | switch (RISCVTargetLowering::getLMUL(VT: Src1VT)) { |
1680 | default: |
1681 | llvm_unreachable("Unexpected LMUL!" ); |
1682 | #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \ |
1683 | case RISCVII::VLMUL::lmulenum: \ |
1684 | VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ |
1685 | : RISCV::PseudoVMSLT_VX_##suffix; \ |
1686 | VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ |
1687 | : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ |
1688 | break; |
1689 | CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1) |
1690 | CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2) |
1691 | CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4) |
1692 | CASE_VMSLT_OPCODES(LMUL_1, M1, B8) |
1693 | CASE_VMSLT_OPCODES(LMUL_2, M2, B16) |
1694 | CASE_VMSLT_OPCODES(LMUL_4, M4, B32) |
1695 | CASE_VMSLT_OPCODES(LMUL_8, M8, B64) |
1696 | #undef CASE_VMSLT_OPCODES |
1697 | } |
1698 | // Mask operations use the LMUL from the mask type. |
1699 | switch (RISCVTargetLowering::getLMUL(VT)) { |
1700 | default: |
1701 | llvm_unreachable("Unexpected LMUL!" ); |
1702 | #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ |
1703 | case RISCVII::VLMUL::lmulenum: \ |
1704 | VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ |
1705 | VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ |
1706 | VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ |
1707 | break; |
1708 | CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8) |
1709 | CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4) |
1710 | CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2) |
1711 | CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1) |
1712 | CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2) |
1713 | CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4) |
1714 | CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8) |
1715 | #undef CASE_VMXOR_VMANDN_VMOR_OPCODES |
1716 | } |
1717 | SDValue SEW = CurDAG->getTargetConstant( |
1718 | Val: Log2_32(Value: Src1VT.getScalarSizeInBits()), DL, VT: XLenVT); |
1719 | SDValue MaskSEW = CurDAG->getTargetConstant(Val: 0, DL, VT: XLenVT); |
1720 | SDValue VL; |
1721 | selectVLOp(N: Node->getOperand(Num: 5), VL); |
1722 | SDValue MaskedOff = Node->getOperand(Num: 1); |
1723 | SDValue Mask = Node->getOperand(Num: 4); |
1724 | |
1725 | // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff. |
1726 | if (IsCmpUnsignedZero) { |
1727 | // We don't need vmor if the MaskedOff and the Mask are the same |
1728 | // value. |
1729 | if (Mask == MaskedOff) { |
1730 | ReplaceUses(F: Node, T: Mask.getNode()); |
1731 | return; |
1732 | } |
1733 | ReplaceNode(F: Node, |
1734 | T: CurDAG->getMachineNode(Opcode: VMOROpcode, dl: DL, VT, |
1735 | Ops: {Mask, MaskedOff, VL, MaskSEW})); |
1736 | return; |
1737 | } |
1738 | |
1739 | // If the MaskedOff value and the Mask are the same value use |
1740 | // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt |
1741 | // This avoids needing to copy v0 to vd before starting the next sequence. |
1742 | if (Mask == MaskedOff) { |
1743 | SDValue Cmp = SDValue( |
1744 | CurDAG->getMachineNode(Opcode: VMSLTOpcode, dl: DL, VT, Ops: {Src1, Src2, VL, SEW}), |
1745 | 0); |
1746 | ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMANDNOpcode, dl: DL, VT, |
1747 | Ops: {Mask, Cmp, VL, MaskSEW})); |
1748 | return; |
1749 | } |
1750 | |
1751 | // Mask needs to be copied to V0. |
1752 | SDValue Chain = CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl: DL, |
1753 | Reg: RISCV::V0, N: Mask, Glue: SDValue()); |
1754 | SDValue Glue = Chain.getValue(R: 1); |
1755 | SDValue V0 = CurDAG->getRegister(Reg: RISCV::V0, VT); |
1756 | |
1757 | // Otherwise use |
1758 | // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 |
1759 | // The result is mask undisturbed. |
1760 | // We use the same instructions to emulate mask agnostic behavior, because |
1761 | // the agnostic result can be either undisturbed or all 1. |
1762 | SDValue Cmp = SDValue( |
1763 | CurDAG->getMachineNode(Opcode: VMSLTMaskOpcode, dl: DL, VT, |
1764 | Ops: {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), |
1765 | 0); |
1766 | // vmxor.mm vd, vd, v0 is used to update active value. |
1767 | ReplaceNode(F: Node, T: CurDAG->getMachineNode(Opcode: VMXOROpcode, dl: DL, VT, |
1768 | Ops: {Cmp, Mask, VL, MaskSEW})); |
1769 | return; |
1770 | } |
1771 | case Intrinsic::riscv_vsetvli: |
1772 | case Intrinsic::riscv_vsetvlimax: |
1773 | return selectVSETVLI(Node); |
1774 | } |
1775 | break; |
1776 | } |
1777 | case ISD::INTRINSIC_W_CHAIN: { |
1778 | unsigned IntNo = Node->getConstantOperandVal(Num: 1); |
1779 | switch (IntNo) { |
1780 | // By default we do not custom select any intrinsic. |
1781 | default: |
1782 | break; |
1783 | case Intrinsic::riscv_vlseg2: |
1784 | case Intrinsic::riscv_vlseg3: |
1785 | case Intrinsic::riscv_vlseg4: |
1786 | case Intrinsic::riscv_vlseg5: |
1787 | case Intrinsic::riscv_vlseg6: |
1788 | case Intrinsic::riscv_vlseg7: |
1789 | case Intrinsic::riscv_vlseg8: { |
1790 | selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); |
1791 | return; |
1792 | } |
1793 | case Intrinsic::riscv_vlseg2_mask: |
1794 | case Intrinsic::riscv_vlseg3_mask: |
1795 | case Intrinsic::riscv_vlseg4_mask: |
1796 | case Intrinsic::riscv_vlseg5_mask: |
1797 | case Intrinsic::riscv_vlseg6_mask: |
1798 | case Intrinsic::riscv_vlseg7_mask: |
1799 | case Intrinsic::riscv_vlseg8_mask: { |
1800 | selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); |
1801 | return; |
1802 | } |
1803 | case Intrinsic::riscv_vlsseg2: |
1804 | case Intrinsic::riscv_vlsseg3: |
1805 | case Intrinsic::riscv_vlsseg4: |
1806 | case Intrinsic::riscv_vlsseg5: |
1807 | case Intrinsic::riscv_vlsseg6: |
1808 | case Intrinsic::riscv_vlsseg7: |
1809 | case Intrinsic::riscv_vlsseg8: { |
1810 | selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); |
1811 | return; |
1812 | } |
1813 | case Intrinsic::riscv_vlsseg2_mask: |
1814 | case Intrinsic::riscv_vlsseg3_mask: |
1815 | case Intrinsic::riscv_vlsseg4_mask: |
1816 | case Intrinsic::riscv_vlsseg5_mask: |
1817 | case Intrinsic::riscv_vlsseg6_mask: |
1818 | case Intrinsic::riscv_vlsseg7_mask: |
1819 | case Intrinsic::riscv_vlsseg8_mask: { |
1820 | selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); |
1821 | return; |
1822 | } |
1823 | case Intrinsic::riscv_vloxseg2: |
1824 | case Intrinsic::riscv_vloxseg3: |
1825 | case Intrinsic::riscv_vloxseg4: |
1826 | case Intrinsic::riscv_vloxseg5: |
1827 | case Intrinsic::riscv_vloxseg6: |
1828 | case Intrinsic::riscv_vloxseg7: |
1829 | case Intrinsic::riscv_vloxseg8: |
1830 | selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); |
1831 | return; |
1832 | case Intrinsic::riscv_vluxseg2: |
1833 | case Intrinsic::riscv_vluxseg3: |
1834 | case Intrinsic::riscv_vluxseg4: |
1835 | case Intrinsic::riscv_vluxseg5: |
1836 | case Intrinsic::riscv_vluxseg6: |
1837 | case Intrinsic::riscv_vluxseg7: |
1838 | case Intrinsic::riscv_vluxseg8: |
1839 | selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); |
1840 | return; |
1841 | case Intrinsic::riscv_vloxseg2_mask: |
1842 | case Intrinsic::riscv_vloxseg3_mask: |
1843 | case Intrinsic::riscv_vloxseg4_mask: |
1844 | case Intrinsic::riscv_vloxseg5_mask: |
1845 | case Intrinsic::riscv_vloxseg6_mask: |
1846 | case Intrinsic::riscv_vloxseg7_mask: |
1847 | case Intrinsic::riscv_vloxseg8_mask: |
1848 | selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); |
1849 | return; |
1850 | case Intrinsic::riscv_vluxseg2_mask: |
1851 | case Intrinsic::riscv_vluxseg3_mask: |
1852 | case Intrinsic::riscv_vluxseg4_mask: |
1853 | case Intrinsic::riscv_vluxseg5_mask: |
1854 | case Intrinsic::riscv_vluxseg6_mask: |
1855 | case Intrinsic::riscv_vluxseg7_mask: |
1856 | case Intrinsic::riscv_vluxseg8_mask: |
1857 | selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); |
1858 | return; |
1859 | case Intrinsic::riscv_vlseg8ff: |
1860 | case Intrinsic::riscv_vlseg7ff: |
1861 | case Intrinsic::riscv_vlseg6ff: |
1862 | case Intrinsic::riscv_vlseg5ff: |
1863 | case Intrinsic::riscv_vlseg4ff: |
1864 | case Intrinsic::riscv_vlseg3ff: |
1865 | case Intrinsic::riscv_vlseg2ff: { |
1866 | selectVLSEGFF(Node, /*IsMasked*/ false); |
1867 | return; |
1868 | } |
1869 | case Intrinsic::riscv_vlseg8ff_mask: |
1870 | case Intrinsic::riscv_vlseg7ff_mask: |
1871 | case Intrinsic::riscv_vlseg6ff_mask: |
1872 | case Intrinsic::riscv_vlseg5ff_mask: |
1873 | case Intrinsic::riscv_vlseg4ff_mask: |
1874 | case Intrinsic::riscv_vlseg3ff_mask: |
1875 | case Intrinsic::riscv_vlseg2ff_mask: { |
1876 | selectVLSEGFF(Node, /*IsMasked*/ true); |
1877 | return; |
1878 | } |
1879 | case Intrinsic::riscv_vloxei: |
1880 | case Intrinsic::riscv_vloxei_mask: |
1881 | case Intrinsic::riscv_vluxei: |
1882 | case Intrinsic::riscv_vluxei_mask: { |
1883 | bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || |
1884 | IntNo == Intrinsic::riscv_vluxei_mask; |
1885 | bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || |
1886 | IntNo == Intrinsic::riscv_vloxei_mask; |
1887 | |
1888 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
1889 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
1890 | |
1891 | unsigned CurOp = 2; |
1892 | SmallVector<SDValue, 8> Operands; |
1893 | Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); |
1894 | |
1895 | MVT IndexVT; |
1896 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, |
1897 | /*IsStridedOrIndexed*/ true, Operands, |
1898 | /*IsLoad=*/true, IndexVT: &IndexVT); |
1899 | |
1900 | assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && |
1901 | "Element count mismatch" ); |
1902 | |
1903 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
1904 | RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT); |
1905 | unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits()); |
1906 | if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { |
1907 | report_fatal_error(reason: "The V extension does not support EEW=64 for index " |
1908 | "values when XLEN=32" ); |
1909 | } |
1910 | const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( |
1911 | Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, LMUL: static_cast<unsigned>(LMUL), |
1912 | IndexLMUL: static_cast<unsigned>(IndexLMUL)); |
1913 | MachineSDNode *Load = |
1914 | CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands); |
1915 | |
1916 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
1917 | CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()}); |
1918 | |
1919 | ReplaceNode(F: Node, T: Load); |
1920 | return; |
1921 | } |
1922 | case Intrinsic::riscv_vlm: |
1923 | case Intrinsic::riscv_vle: |
1924 | case Intrinsic::riscv_vle_mask: |
1925 | case Intrinsic::riscv_vlse: |
1926 | case Intrinsic::riscv_vlse_mask: { |
1927 | bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || |
1928 | IntNo == Intrinsic::riscv_vlse_mask; |
1929 | bool IsStrided = |
1930 | IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; |
1931 | |
1932 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
1933 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
1934 | |
1935 | // The riscv_vlm intrinsic are always tail agnostic and no passthru |
1936 | // operand at the IR level. In pseudos, they have both policy and |
1937 | // passthru operand. The passthru operand is needed to track the |
1938 | // "tail undefined" state, and the policy is there just for |
1939 | // for consistency - it will always be "don't care" for the |
1940 | // unmasked form. |
1941 | bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; |
1942 | unsigned CurOp = 2; |
1943 | SmallVector<SDValue, 8> Operands; |
1944 | if (HasPassthruOperand) |
1945 | Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); |
1946 | else { |
1947 | // We eagerly lower to implicit_def (instead of undef), as we |
1948 | // otherwise fail to select nodes such as: nxv1i1 = undef |
1949 | SDNode *Passthru = |
1950 | CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT); |
1951 | Operands.push_back(Elt: SDValue(Passthru, 0)); |
1952 | } |
1953 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided, |
1954 | Operands, /*IsLoad=*/true); |
1955 | |
1956 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
1957 | const RISCV::VLEPseudo *P = |
1958 | RISCV::getVLEPseudo(Masked: IsMasked, Strided: IsStrided, /*FF*/ false, Log2SEW, |
1959 | LMUL: static_cast<unsigned>(LMUL)); |
1960 | MachineSDNode *Load = |
1961 | CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands); |
1962 | |
1963 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
1964 | CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()}); |
1965 | |
1966 | ReplaceNode(F: Node, T: Load); |
1967 | return; |
1968 | } |
1969 | case Intrinsic::riscv_vleff: |
1970 | case Intrinsic::riscv_vleff_mask: { |
1971 | bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; |
1972 | |
1973 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
1974 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
1975 | |
1976 | unsigned CurOp = 2; |
1977 | SmallVector<SDValue, 7> Operands; |
1978 | Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); |
1979 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, |
1980 | /*IsStridedOrIndexed*/ false, Operands, |
1981 | /*IsLoad=*/true); |
1982 | |
1983 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
1984 | const RISCV::VLEPseudo *P = |
1985 | RISCV::getVLEPseudo(Masked: IsMasked, /*Strided*/ false, /*FF*/ true, |
1986 | Log2SEW, LMUL: static_cast<unsigned>(LMUL)); |
1987 | MachineSDNode *Load = CurDAG->getMachineNode( |
1988 | Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands); |
1989 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
1990 | CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {MemOp->getMemOperand()}); |
1991 | |
1992 | ReplaceNode(F: Node, T: Load); |
1993 | return; |
1994 | } |
1995 | } |
1996 | break; |
1997 | } |
1998 | case ISD::INTRINSIC_VOID: { |
1999 | unsigned IntNo = Node->getConstantOperandVal(Num: 1); |
2000 | switch (IntNo) { |
2001 | case Intrinsic::riscv_vsseg2: |
2002 | case Intrinsic::riscv_vsseg3: |
2003 | case Intrinsic::riscv_vsseg4: |
2004 | case Intrinsic::riscv_vsseg5: |
2005 | case Intrinsic::riscv_vsseg6: |
2006 | case Intrinsic::riscv_vsseg7: |
2007 | case Intrinsic::riscv_vsseg8: { |
2008 | selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); |
2009 | return; |
2010 | } |
2011 | case Intrinsic::riscv_vsseg2_mask: |
2012 | case Intrinsic::riscv_vsseg3_mask: |
2013 | case Intrinsic::riscv_vsseg4_mask: |
2014 | case Intrinsic::riscv_vsseg5_mask: |
2015 | case Intrinsic::riscv_vsseg6_mask: |
2016 | case Intrinsic::riscv_vsseg7_mask: |
2017 | case Intrinsic::riscv_vsseg8_mask: { |
2018 | selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); |
2019 | return; |
2020 | } |
2021 | case Intrinsic::riscv_vssseg2: |
2022 | case Intrinsic::riscv_vssseg3: |
2023 | case Intrinsic::riscv_vssseg4: |
2024 | case Intrinsic::riscv_vssseg5: |
2025 | case Intrinsic::riscv_vssseg6: |
2026 | case Intrinsic::riscv_vssseg7: |
2027 | case Intrinsic::riscv_vssseg8: { |
2028 | selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); |
2029 | return; |
2030 | } |
2031 | case Intrinsic::riscv_vssseg2_mask: |
2032 | case Intrinsic::riscv_vssseg3_mask: |
2033 | case Intrinsic::riscv_vssseg4_mask: |
2034 | case Intrinsic::riscv_vssseg5_mask: |
2035 | case Intrinsic::riscv_vssseg6_mask: |
2036 | case Intrinsic::riscv_vssseg7_mask: |
2037 | case Intrinsic::riscv_vssseg8_mask: { |
2038 | selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); |
2039 | return; |
2040 | } |
2041 | case Intrinsic::riscv_vsoxseg2: |
2042 | case Intrinsic::riscv_vsoxseg3: |
2043 | case Intrinsic::riscv_vsoxseg4: |
2044 | case Intrinsic::riscv_vsoxseg5: |
2045 | case Intrinsic::riscv_vsoxseg6: |
2046 | case Intrinsic::riscv_vsoxseg7: |
2047 | case Intrinsic::riscv_vsoxseg8: |
2048 | selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); |
2049 | return; |
2050 | case Intrinsic::riscv_vsuxseg2: |
2051 | case Intrinsic::riscv_vsuxseg3: |
2052 | case Intrinsic::riscv_vsuxseg4: |
2053 | case Intrinsic::riscv_vsuxseg5: |
2054 | case Intrinsic::riscv_vsuxseg6: |
2055 | case Intrinsic::riscv_vsuxseg7: |
2056 | case Intrinsic::riscv_vsuxseg8: |
2057 | selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); |
2058 | return; |
2059 | case Intrinsic::riscv_vsoxseg2_mask: |
2060 | case Intrinsic::riscv_vsoxseg3_mask: |
2061 | case Intrinsic::riscv_vsoxseg4_mask: |
2062 | case Intrinsic::riscv_vsoxseg5_mask: |
2063 | case Intrinsic::riscv_vsoxseg6_mask: |
2064 | case Intrinsic::riscv_vsoxseg7_mask: |
2065 | case Intrinsic::riscv_vsoxseg8_mask: |
2066 | selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); |
2067 | return; |
2068 | case Intrinsic::riscv_vsuxseg2_mask: |
2069 | case Intrinsic::riscv_vsuxseg3_mask: |
2070 | case Intrinsic::riscv_vsuxseg4_mask: |
2071 | case Intrinsic::riscv_vsuxseg5_mask: |
2072 | case Intrinsic::riscv_vsuxseg6_mask: |
2073 | case Intrinsic::riscv_vsuxseg7_mask: |
2074 | case Intrinsic::riscv_vsuxseg8_mask: |
2075 | selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); |
2076 | return; |
2077 | case Intrinsic::riscv_vsoxei: |
2078 | case Intrinsic::riscv_vsoxei_mask: |
2079 | case Intrinsic::riscv_vsuxei: |
2080 | case Intrinsic::riscv_vsuxei_mask: { |
2081 | bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || |
2082 | IntNo == Intrinsic::riscv_vsuxei_mask; |
2083 | bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || |
2084 | IntNo == Intrinsic::riscv_vsoxei_mask; |
2085 | |
2086 | MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0); |
2087 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
2088 | |
2089 | unsigned CurOp = 2; |
2090 | SmallVector<SDValue, 8> Operands; |
2091 | Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Store value. |
2092 | |
2093 | MVT IndexVT; |
2094 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, |
2095 | /*IsStridedOrIndexed*/ true, Operands, |
2096 | /*IsLoad=*/false, IndexVT: &IndexVT); |
2097 | |
2098 | assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && |
2099 | "Element count mismatch" ); |
2100 | |
2101 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
2102 | RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(VT: IndexVT); |
2103 | unsigned IndexLog2EEW = Log2_32(Value: IndexVT.getScalarSizeInBits()); |
2104 | if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { |
2105 | report_fatal_error(reason: "The V extension does not support EEW=64 for index " |
2106 | "values when XLEN=32" ); |
2107 | } |
2108 | const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( |
2109 | Masked: IsMasked, Ordered: IsOrdered, Log2SEW: IndexLog2EEW, |
2110 | LMUL: static_cast<unsigned>(LMUL), IndexLMUL: static_cast<unsigned>(IndexLMUL)); |
2111 | MachineSDNode *Store = |
2112 | CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands); |
2113 | |
2114 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
2115 | CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {MemOp->getMemOperand()}); |
2116 | |
2117 | ReplaceNode(F: Node, T: Store); |
2118 | return; |
2119 | } |
2120 | case Intrinsic::riscv_vsm: |
2121 | case Intrinsic::riscv_vse: |
2122 | case Intrinsic::riscv_vse_mask: |
2123 | case Intrinsic::riscv_vsse: |
2124 | case Intrinsic::riscv_vsse_mask: { |
2125 | bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || |
2126 | IntNo == Intrinsic::riscv_vsse_mask; |
2127 | bool IsStrided = |
2128 | IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; |
2129 | |
2130 | MVT VT = Node->getOperand(Num: 2)->getSimpleValueType(ResNo: 0); |
2131 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
2132 | |
2133 | unsigned CurOp = 2; |
2134 | SmallVector<SDValue, 8> Operands; |
2135 | Operands.push_back(Elt: Node->getOperand(Num: CurOp++)); // Store value. |
2136 | |
2137 | addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStridedOrIndexed: IsStrided, |
2138 | Operands); |
2139 | |
2140 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
2141 | const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( |
2142 | Masked: IsMasked, Strided: IsStrided, Log2SEW, LMUL: static_cast<unsigned>(LMUL)); |
2143 | MachineSDNode *Store = |
2144 | CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, VTs: Node->getVTList(), Ops: Operands); |
2145 | if (auto *MemOp = dyn_cast<MemSDNode>(Val: Node)) |
2146 | CurDAG->setNodeMemRefs(N: Store, NewMemRefs: {MemOp->getMemOperand()}); |
2147 | |
2148 | ReplaceNode(F: Node, T: Store); |
2149 | return; |
2150 | } |
2151 | case Intrinsic::riscv_sf_vc_x_se: |
2152 | case Intrinsic::riscv_sf_vc_i_se: |
2153 | selectSF_VC_X_SE(Node); |
2154 | return; |
2155 | } |
2156 | break; |
2157 | } |
2158 | case ISD::BITCAST: { |
2159 | MVT SrcVT = Node->getOperand(Num: 0).getSimpleValueType(); |
2160 | // Just drop bitcasts between vectors if both are fixed or both are |
2161 | // scalable. |
2162 | if ((VT.isScalableVector() && SrcVT.isScalableVector()) || |
2163 | (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { |
2164 | ReplaceUses(F: SDValue(Node, 0), T: Node->getOperand(Num: 0)); |
2165 | CurDAG->RemoveDeadNode(N: Node); |
2166 | return; |
2167 | } |
2168 | break; |
2169 | } |
2170 | case ISD::INSERT_SUBVECTOR: { |
2171 | SDValue V = Node->getOperand(Num: 0); |
2172 | SDValue SubV = Node->getOperand(Num: 1); |
2173 | SDLoc DL(SubV); |
2174 | auto Idx = Node->getConstantOperandVal(Num: 2); |
2175 | MVT SubVecVT = SubV.getSimpleValueType(); |
2176 | |
2177 | const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); |
2178 | MVT SubVecContainerVT = SubVecVT; |
2179 | // Establish the correct scalable-vector types for any fixed-length type. |
2180 | if (SubVecVT.isFixedLengthVector()) { |
2181 | SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT: SubVecVT); |
2182 | TypeSize VecRegSize = TypeSize::getScalable(MinimumSize: RISCV::RVVBitsPerBlock); |
2183 | [[maybe_unused]] bool ExactlyVecRegSized = |
2184 | Subtarget->expandVScale(X: SubVecVT.getSizeInBits()) |
2185 | .isKnownMultipleOf(RHS: Subtarget->expandVScale(X: VecRegSize)); |
2186 | assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits()) |
2187 | .getKnownMinValue())); |
2188 | assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef())); |
2189 | } |
2190 | MVT ContainerVT = VT; |
2191 | if (VT.isFixedLengthVector()) |
2192 | ContainerVT = TLI.getContainerForFixedLengthVector(VT); |
2193 | |
2194 | const auto *TRI = Subtarget->getRegisterInfo(); |
2195 | unsigned SubRegIdx; |
2196 | std::tie(args&: SubRegIdx, args&: Idx) = |
2197 | RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( |
2198 | VecVT: ContainerVT, SubVecVT: SubVecContainerVT, InsertExtractIdx: Idx, TRI); |
2199 | |
2200 | // If the Idx hasn't been completely eliminated then this is a subvector |
2201 | // insert which doesn't naturally align to a vector register. These must |
2202 | // be handled using instructions to manipulate the vector registers. |
2203 | if (Idx != 0) |
2204 | break; |
2205 | |
2206 | RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(VT: SubVecContainerVT); |
2207 | [[maybe_unused]] bool IsSubVecPartReg = |
2208 | SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || |
2209 | SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || |
2210 | SubVecLMUL == RISCVII::VLMUL::LMUL_F8; |
2211 | assert((!IsSubVecPartReg || V.isUndef()) && |
2212 | "Expecting lowering to have created legal INSERT_SUBVECTORs when " |
2213 | "the subvector is smaller than a full-sized register" ); |
2214 | |
2215 | // If we haven't set a SubRegIdx, then we must be going between |
2216 | // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. |
2217 | if (SubRegIdx == RISCV::NoSubRegister) { |
2218 | unsigned InRegClassID = |
2219 | RISCVTargetLowering::getRegClassIDForVecVT(VT: ContainerVT); |
2220 | assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == |
2221 | InRegClassID && |
2222 | "Unexpected subvector extraction" ); |
2223 | SDValue RC = CurDAG->getTargetConstant(Val: InRegClassID, DL, VT: XLenVT); |
2224 | SDNode *NewNode = CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, |
2225 | dl: DL, VT, Op1: SubV, Op2: RC); |
2226 | ReplaceNode(F: Node, T: NewNode); |
2227 | return; |
2228 | } |
2229 | |
2230 | SDValue Insert = CurDAG->getTargetInsertSubreg(SRIdx: SubRegIdx, DL, VT, Operand: V, Subreg: SubV); |
2231 | ReplaceNode(F: Node, T: Insert.getNode()); |
2232 | return; |
2233 | } |
2234 | case ISD::EXTRACT_SUBVECTOR: { |
2235 | SDValue V = Node->getOperand(Num: 0); |
2236 | auto Idx = Node->getConstantOperandVal(Num: 1); |
2237 | MVT InVT = V.getSimpleValueType(); |
2238 | SDLoc DL(V); |
2239 | |
2240 | const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); |
2241 | MVT SubVecContainerVT = VT; |
2242 | // Establish the correct scalable-vector types for any fixed-length type. |
2243 | if (VT.isFixedLengthVector()) { |
2244 | assert(Idx == 0); |
2245 | SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); |
2246 | } |
2247 | if (InVT.isFixedLengthVector()) |
2248 | InVT = TLI.getContainerForFixedLengthVector(VT: InVT); |
2249 | |
2250 | const auto *TRI = Subtarget->getRegisterInfo(); |
2251 | unsigned SubRegIdx; |
2252 | std::tie(args&: SubRegIdx, args&: Idx) = |
2253 | RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( |
2254 | VecVT: InVT, SubVecVT: SubVecContainerVT, InsertExtractIdx: Idx, TRI); |
2255 | |
2256 | // If the Idx hasn't been completely eliminated then this is a subvector |
2257 | // extract which doesn't naturally align to a vector register. These must |
2258 | // be handled using instructions to manipulate the vector registers. |
2259 | if (Idx != 0) |
2260 | break; |
2261 | |
2262 | // If we haven't set a SubRegIdx, then we must be going between |
2263 | // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. |
2264 | if (SubRegIdx == RISCV::NoSubRegister) { |
2265 | unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT: InVT); |
2266 | assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == |
2267 | InRegClassID && |
2268 | "Unexpected subvector extraction" ); |
2269 | SDValue RC = CurDAG->getTargetConstant(Val: InRegClassID, DL, VT: XLenVT); |
2270 | SDNode *NewNode = |
2271 | CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, dl: DL, VT, Op1: V, Op2: RC); |
2272 | ReplaceNode(F: Node, T: NewNode); |
2273 | return; |
2274 | } |
2275 | |
2276 | SDValue = CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT, Operand: V); |
2277 | ReplaceNode(F: Node, T: Extract.getNode()); |
2278 | return; |
2279 | } |
2280 | case RISCVISD::VMV_S_X_VL: |
2281 | case RISCVISD::VFMV_S_F_VL: |
2282 | case RISCVISD::VMV_V_X_VL: |
2283 | case RISCVISD::VFMV_V_F_VL: { |
2284 | // Try to match splat of a scalar load to a strided load with stride of x0. |
2285 | bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || |
2286 | Node->getOpcode() == RISCVISD::VFMV_S_F_VL; |
2287 | if (!Node->getOperand(Num: 0).isUndef()) |
2288 | break; |
2289 | SDValue Src = Node->getOperand(Num: 1); |
2290 | auto *Ld = dyn_cast<LoadSDNode>(Val&: Src); |
2291 | // Can't fold load update node because the second |
2292 | // output is used so that load update node can't be removed. |
2293 | if (!Ld || Ld->isIndexed()) |
2294 | break; |
2295 | EVT MemVT = Ld->getMemoryVT(); |
2296 | // The memory VT should be the same size as the element type. |
2297 | if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) |
2298 | break; |
2299 | if (!IsProfitableToFold(N: Src, U: Node, Root: Node) || |
2300 | !IsLegalToFold(N: Src, U: Node, Root: Node, OptLevel: TM.getOptLevel())) |
2301 | break; |
2302 | |
2303 | SDValue VL; |
2304 | if (IsScalarMove) { |
2305 | // We could deal with more VL if we update the VSETVLI insert pass to |
2306 | // avoid introducing more VSETVLI. |
2307 | if (!isOneConstant(V: Node->getOperand(Num: 2))) |
2308 | break; |
2309 | selectVLOp(N: Node->getOperand(Num: 2), VL); |
2310 | } else |
2311 | selectVLOp(N: Node->getOperand(Num: 2), VL); |
2312 | |
2313 | unsigned Log2SEW = Log2_32(Value: VT.getScalarSizeInBits()); |
2314 | SDValue SEW = CurDAG->getTargetConstant(Val: Log2SEW, DL, VT: XLenVT); |
2315 | |
2316 | // If VL=1, then we don't need to do a strided load and can just do a |
2317 | // regular load. |
2318 | bool IsStrided = !isOneConstant(V: VL); |
2319 | |
2320 | // Only do a strided load if we have optimized zero-stride vector load. |
2321 | if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad()) |
2322 | break; |
2323 | |
2324 | SmallVector<SDValue> Operands = { |
2325 | SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT), 0), |
2326 | Ld->getBasePtr()}; |
2327 | if (IsStrided) |
2328 | Operands.push_back(Elt: CurDAG->getRegister(Reg: RISCV::X0, VT: XLenVT)); |
2329 | uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC; |
2330 | SDValue PolicyOp = CurDAG->getTargetConstant(Val: Policy, DL, VT: XLenVT); |
2331 | Operands.append(IL: {VL, SEW, PolicyOp, Ld->getChain()}); |
2332 | |
2333 | RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); |
2334 | const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( |
2335 | /*IsMasked*/ Masked: false, Strided: IsStrided, /*FF*/ false, |
2336 | Log2SEW, LMUL: static_cast<unsigned>(LMUL)); |
2337 | MachineSDNode *Load = |
2338 | CurDAG->getMachineNode(Opcode: P->Pseudo, dl: DL, ResultTys: {VT, MVT::Other}, Ops: Operands); |
2339 | // Update the chain. |
2340 | ReplaceUses(F: Src.getValue(R: 1), T: SDValue(Load, 1)); |
2341 | // Record the mem-refs |
2342 | CurDAG->setNodeMemRefs(N: Load, NewMemRefs: {Ld->getMemOperand()}); |
2343 | // Replace the splat with the vlse. |
2344 | ReplaceNode(F: Node, T: Load); |
2345 | return; |
2346 | } |
2347 | case ISD::PREFETCH: |
2348 | unsigned Locality = Node->getConstantOperandVal(Num: 3); |
2349 | if (Locality > 2) |
2350 | break; |
2351 | |
2352 | if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Val: Node)) { |
2353 | MachineMemOperand *MMO = LoadStoreMem->getMemOperand(); |
2354 | MMO->setFlags(MachineMemOperand::MONonTemporal); |
2355 | |
2356 | int NontemporalLevel = 0; |
2357 | switch (Locality) { |
2358 | case 0: |
2359 | NontemporalLevel = 3; // NTL.ALL |
2360 | break; |
2361 | case 1: |
2362 | NontemporalLevel = 1; // NTL.PALL |
2363 | break; |
2364 | case 2: |
2365 | NontemporalLevel = 0; // NTL.P1 |
2366 | break; |
2367 | default: |
2368 | llvm_unreachable("unexpected locality value." ); |
2369 | } |
2370 | |
2371 | if (NontemporalLevel & 0b1) |
2372 | MMO->setFlags(MONontemporalBit0); |
2373 | if (NontemporalLevel & 0b10) |
2374 | MMO->setFlags(MONontemporalBit1); |
2375 | } |
2376 | break; |
2377 | } |
2378 | |
2379 | // Select the default instruction. |
2380 | SelectCode(N: Node); |
2381 | } |
2382 | |
2383 | bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( |
2384 | const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, |
2385 | std::vector<SDValue> &OutOps) { |
2386 | // Always produce a register and immediate operand, as expected by |
2387 | // RISCVAsmPrinter::PrintAsmMemoryOperand. |
2388 | switch (ConstraintID) { |
2389 | case InlineAsm::ConstraintCode::o: |
2390 | case InlineAsm::ConstraintCode::m: { |
2391 | SDValue Op0, Op1; |
2392 | [[maybe_unused]] bool Found = SelectAddrRegImm(Addr: Op, Base&: Op0, Offset&: Op1); |
2393 | assert(Found && "SelectAddrRegImm should always succeed" ); |
2394 | OutOps.push_back(x: Op0); |
2395 | OutOps.push_back(x: Op1); |
2396 | return false; |
2397 | } |
2398 | case InlineAsm::ConstraintCode::A: |
2399 | OutOps.push_back(x: Op); |
2400 | OutOps.push_back( |
2401 | x: CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget->getXLenVT())); |
2402 | return false; |
2403 | default: |
2404 | report_fatal_error(reason: "Unexpected asm memory constraint " + |
2405 | InlineAsm::getMemConstraintName(C: ConstraintID)); |
2406 | } |
2407 | |
2408 | return true; |
2409 | } |
2410 | |
2411 | bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base, |
2412 | SDValue &Offset) { |
2413 | if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Addr)) { |
2414 | Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT: Subtarget->getXLenVT()); |
2415 | Offset = CurDAG->getTargetConstant(Val: 0, DL: SDLoc(Addr), VT: Subtarget->getXLenVT()); |
2416 | return true; |
2417 | } |
2418 | |
2419 | return false; |
2420 | } |
2421 | |
2422 | // Select a frame index and an optional immediate offset from an ADD or OR. |
2423 | bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, |
2424 | SDValue &Offset) { |
2425 | if (SelectAddrFrameIndex(Addr, Base, Offset)) |
2426 | return true; |
2427 | |
2428 | if (!CurDAG->isBaseWithConstantOffset(Op: Addr)) |
2429 | return false; |
2430 | |
2431 | if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val: Addr.getOperand(i: 0))) { |
2432 | int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue(); |
2433 | if (isInt<12>(x: CVal)) { |
2434 | Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), |
2435 | VT: Subtarget->getXLenVT()); |
2436 | Offset = CurDAG->getTargetConstant(Val: CVal, DL: SDLoc(Addr), |
2437 | VT: Subtarget->getXLenVT()); |
2438 | return true; |
2439 | } |
2440 | } |
2441 | |
2442 | return false; |
2443 | } |
2444 | |
2445 | // Fold constant addresses. |
2446 | static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, |
2447 | const MVT VT, const RISCVSubtarget *Subtarget, |
2448 | SDValue Addr, SDValue &Base, SDValue &Offset, |
2449 | bool IsPrefetch = false) { |
2450 | if (!isa<ConstantSDNode>(Val: Addr)) |
2451 | return false; |
2452 | |
2453 | int64_t CVal = cast<ConstantSDNode>(Val&: Addr)->getSExtValue(); |
2454 | |
2455 | // If the constant is a simm12, we can fold the whole constant and use X0 as |
2456 | // the base. If the constant can be materialized with LUI+simm12, use LUI as |
2457 | // the base. We can't use generateInstSeq because it favors LUI+ADDIW. |
2458 | int64_t Lo12 = SignExtend64<12>(x: CVal); |
2459 | int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12; |
2460 | if (!Subtarget->is64Bit() || isInt<32>(x: Hi)) { |
2461 | if (IsPrefetch && (Lo12 & 0b11111) != 0) |
2462 | return false; |
2463 | |
2464 | if (Hi) { |
2465 | int64_t Hi20 = (Hi >> 12) & 0xfffff; |
2466 | Base = SDValue( |
2467 | CurDAG->getMachineNode(Opcode: RISCV::LUI, dl: DL, VT, |
2468 | Op1: CurDAG->getTargetConstant(Val: Hi20, DL, VT)), |
2469 | 0); |
2470 | } else { |
2471 | Base = CurDAG->getRegister(Reg: RISCV::X0, VT); |
2472 | } |
2473 | Offset = CurDAG->getTargetConstant(Val: Lo12, DL, VT); |
2474 | return true; |
2475 | } |
2476 | |
2477 | // Ask how constant materialization would handle this constant. |
2478 | RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: CVal, STI: *Subtarget); |
2479 | |
2480 | // If the last instruction would be an ADDI, we can fold its immediate and |
2481 | // emit the rest of the sequence as the base. |
2482 | if (Seq.back().getOpcode() != RISCV::ADDI) |
2483 | return false; |
2484 | Lo12 = Seq.back().getImm(); |
2485 | if (IsPrefetch && (Lo12 & 0b11111) != 0) |
2486 | return false; |
2487 | |
2488 | // Drop the last instruction. |
2489 | Seq.pop_back(); |
2490 | assert(!Seq.empty() && "Expected more instructions in sequence" ); |
2491 | |
2492 | Base = selectImmSeq(CurDAG, DL, VT, Seq); |
2493 | Offset = CurDAG->getTargetConstant(Val: Lo12, DL, VT); |
2494 | return true; |
2495 | } |
2496 | |
2497 | // Is this ADD instruction only used as the base pointer of scalar loads and |
2498 | // stores? |
2499 | static bool isWorthFoldingAdd(SDValue Add) { |
2500 | for (auto *Use : Add->uses()) { |
2501 | if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && |
2502 | Use->getOpcode() != ISD::ATOMIC_LOAD && |
2503 | Use->getOpcode() != ISD::ATOMIC_STORE) |
2504 | return false; |
2505 | EVT VT = cast<MemSDNode>(Val: Use)->getMemoryVT(); |
2506 | if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 && |
2507 | VT != MVT::f64) |
2508 | return false; |
2509 | // Don't allow stores of the value. It must be used as the address. |
2510 | if (Use->getOpcode() == ISD::STORE && |
2511 | cast<StoreSDNode>(Val: Use)->getValue() == Add) |
2512 | return false; |
2513 | if (Use->getOpcode() == ISD::ATOMIC_STORE && |
2514 | cast<AtomicSDNode>(Val: Use)->getVal() == Add) |
2515 | return false; |
2516 | } |
2517 | |
2518 | return true; |
2519 | } |
2520 | |
2521 | bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr, |
2522 | unsigned MaxShiftAmount, |
2523 | SDValue &Base, SDValue &Index, |
2524 | SDValue &Scale) { |
2525 | EVT VT = Addr.getSimpleValueType(); |
2526 | auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index, |
2527 | SDValue &Shift) { |
2528 | uint64_t ShiftAmt = 0; |
2529 | Index = N; |
2530 | |
2531 | if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Val: N.getOperand(i: 1))) { |
2532 | // Only match shifts by a value in range [0, MaxShiftAmount]. |
2533 | if (N.getConstantOperandVal(i: 1) <= MaxShiftAmount) { |
2534 | Index = N.getOperand(i: 0); |
2535 | ShiftAmt = N.getConstantOperandVal(i: 1); |
2536 | } |
2537 | } |
2538 | |
2539 | Shift = CurDAG->getTargetConstant(Val: ShiftAmt, DL: SDLoc(N), VT); |
2540 | return ShiftAmt != 0; |
2541 | }; |
2542 | |
2543 | if (Addr.getOpcode() == ISD::ADD) { |
2544 | if (auto *C1 = dyn_cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))) { |
2545 | SDValue AddrB = Addr.getOperand(i: 0); |
2546 | if (AddrB.getOpcode() == ISD::ADD && |
2547 | UnwrapShl(AddrB.getOperand(i: 0), Index, Scale) && |
2548 | !isa<ConstantSDNode>(Val: AddrB.getOperand(i: 1)) && |
2549 | isInt<12>(x: C1->getSExtValue())) { |
2550 | // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) |
2551 | SDValue C1Val = |
2552 | CurDAG->getTargetConstant(Val: C1->getZExtValue(), DL: SDLoc(Addr), VT); |
2553 | Base = SDValue(CurDAG->getMachineNode(Opcode: RISCV::ADDI, dl: SDLoc(Addr), VT, |
2554 | Op1: AddrB.getOperand(i: 1), Op2: C1Val), |
2555 | 0); |
2556 | return true; |
2557 | } |
2558 | } else if (UnwrapShl(Addr.getOperand(i: 0), Index, Scale)) { |
2559 | Base = Addr.getOperand(i: 1); |
2560 | return true; |
2561 | } else { |
2562 | UnwrapShl(Addr.getOperand(i: 1), Index, Scale); |
2563 | Base = Addr.getOperand(i: 0); |
2564 | return true; |
2565 | } |
2566 | } else if (UnwrapShl(Addr, Index, Scale)) { |
2567 | EVT VT = Addr.getValueType(); |
2568 | Base = CurDAG->getRegister(Reg: RISCV::X0, VT); |
2569 | return true; |
2570 | } |
2571 | |
2572 | return false; |
2573 | } |
2574 | |
2575 | bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, |
2576 | SDValue &Offset, bool IsINX) { |
2577 | if (SelectAddrFrameIndex(Addr, Base, Offset)) |
2578 | return true; |
2579 | |
2580 | SDLoc DL(Addr); |
2581 | MVT VT = Addr.getSimpleValueType(); |
2582 | |
2583 | if (Addr.getOpcode() == RISCVISD::ADD_LO) { |
2584 | Base = Addr.getOperand(i: 0); |
2585 | Offset = Addr.getOperand(i: 1); |
2586 | return true; |
2587 | } |
2588 | |
2589 | int64_t RV32ZdinxRange = IsINX ? 4 : 0; |
2590 | if (CurDAG->isBaseWithConstantOffset(Op: Addr)) { |
2591 | int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue(); |
2592 | if (isInt<12>(x: CVal) && isInt<12>(x: CVal + RV32ZdinxRange)) { |
2593 | Base = Addr.getOperand(i: 0); |
2594 | if (Base.getOpcode() == RISCVISD::ADD_LO) { |
2595 | SDValue LoOperand = Base.getOperand(i: 1); |
2596 | if (auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: LoOperand)) { |
2597 | // If the Lo in (ADD_LO hi, lo) is a global variable's address |
2598 | // (its low part, really), then we can rely on the alignment of that |
2599 | // variable to provide a margin of safety before low part can overflow |
2600 | // the 12 bits of the load/store offset. Check if CVal falls within |
2601 | // that margin; if so (low part + CVal) can't overflow. |
2602 | const DataLayout &DL = CurDAG->getDataLayout(); |
2603 | Align Alignment = commonAlignment( |
2604 | A: GA->getGlobal()->getPointerAlignment(DL), Offset: GA->getOffset()); |
2605 | if (CVal == 0 || Alignment > CVal) { |
2606 | int64_t CombinedOffset = CVal + GA->getOffset(); |
2607 | Base = Base.getOperand(i: 0); |
2608 | Offset = CurDAG->getTargetGlobalAddress( |
2609 | GV: GA->getGlobal(), DL: SDLoc(LoOperand), VT: LoOperand.getValueType(), |
2610 | offset: CombinedOffset, TargetFlags: GA->getTargetFlags()); |
2611 | return true; |
2612 | } |
2613 | } |
2614 | } |
2615 | |
2616 | if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base)) |
2617 | Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT); |
2618 | Offset = CurDAG->getTargetConstant(Val: CVal, DL, VT); |
2619 | return true; |
2620 | } |
2621 | } |
2622 | |
2623 | // Handle ADD with large immediates. |
2624 | if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Val: Addr.getOperand(i: 1))) { |
2625 | int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue(); |
2626 | assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) && |
2627 | "simm12 not already handled?" ); |
2628 | |
2629 | // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use |
2630 | // an ADDI for part of the offset and fold the rest into the load/store. |
2631 | // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. |
2632 | if (isInt<12>(x: CVal / 2) && isInt<12>(x: CVal - CVal / 2)) { |
2633 | int64_t Adj = CVal < 0 ? -2048 : 2047; |
2634 | Base = SDValue( |
2635 | CurDAG->getMachineNode(Opcode: RISCV::ADDI, dl: DL, VT, Op1: Addr.getOperand(i: 0), |
2636 | Op2: CurDAG->getTargetConstant(Val: Adj, DL, VT)), |
2637 | 0); |
2638 | Offset = CurDAG->getTargetConstant(Val: CVal - Adj, DL, VT); |
2639 | return true; |
2640 | } |
2641 | |
2642 | // For larger immediates, we might be able to save one instruction from |
2643 | // constant materialization by folding the Lo12 bits of the immediate into |
2644 | // the address. We should only do this if the ADD is only used by loads and |
2645 | // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled |
2646 | // separately with the full materialized immediate creating extra |
2647 | // instructions. |
2648 | if (isWorthFoldingAdd(Add: Addr) && |
2649 | selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr: Addr.getOperand(i: 1), Base, |
2650 | Offset)) { |
2651 | // Insert an ADD instruction with the materialized Hi52 bits. |
2652 | Base = SDValue( |
2653 | CurDAG->getMachineNode(Opcode: RISCV::ADD, dl: DL, VT, Op1: Addr.getOperand(i: 0), Op2: Base), |
2654 | 0); |
2655 | return true; |
2656 | } |
2657 | } |
2658 | |
2659 | if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset)) |
2660 | return true; |
2661 | |
2662 | Base = Addr; |
2663 | Offset = CurDAG->getTargetConstant(Val: 0, DL, VT); |
2664 | return true; |
2665 | } |
2666 | |
2667 | /// Similar to SelectAddrRegImm, except that the least significant 5 bits of |
2668 | /// Offset shoule be all zeros. |
2669 | bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, |
2670 | SDValue &Offset) { |
2671 | if (SelectAddrFrameIndex(Addr, Base, Offset)) |
2672 | return true; |
2673 | |
2674 | SDLoc DL(Addr); |
2675 | MVT VT = Addr.getSimpleValueType(); |
2676 | |
2677 | if (CurDAG->isBaseWithConstantOffset(Op: Addr)) { |
2678 | int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue(); |
2679 | if (isInt<12>(x: CVal)) { |
2680 | Base = Addr.getOperand(i: 0); |
2681 | |
2682 | // Early-out if not a valid offset. |
2683 | if ((CVal & 0b11111) != 0) { |
2684 | Base = Addr; |
2685 | Offset = CurDAG->getTargetConstant(Val: 0, DL, VT); |
2686 | return true; |
2687 | } |
2688 | |
2689 | if (auto *FIN = dyn_cast<FrameIndexSDNode>(Val&: Base)) |
2690 | Base = CurDAG->getTargetFrameIndex(FI: FIN->getIndex(), VT); |
2691 | Offset = CurDAG->getTargetConstant(Val: CVal, DL, VT); |
2692 | return true; |
2693 | } |
2694 | } |
2695 | |
2696 | // Handle ADD with large immediates. |
2697 | if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Val: Addr.getOperand(i: 1))) { |
2698 | int64_t CVal = cast<ConstantSDNode>(Val: Addr.getOperand(i: 1))->getSExtValue(); |
2699 | assert(!(isInt<12>(CVal) && isInt<12>(CVal)) && |
2700 | "simm12 not already handled?" ); |
2701 | |
2702 | // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save |
2703 | // one instruction by folding adjustment (-2048 or 2016) into the address. |
2704 | if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) { |
2705 | int64_t Adj = CVal < 0 ? -2048 : 2016; |
2706 | int64_t AdjustedOffset = CVal - Adj; |
2707 | Base = SDValue(CurDAG->getMachineNode( |
2708 | Opcode: RISCV::ADDI, dl: DL, VT, Op1: Addr.getOperand(i: 0), |
2709 | Op2: CurDAG->getTargetConstant(Val: AdjustedOffset, DL, VT)), |
2710 | 0); |
2711 | Offset = CurDAG->getTargetConstant(Val: Adj, DL, VT); |
2712 | return true; |
2713 | } |
2714 | |
2715 | if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr: Addr.getOperand(i: 1), Base, |
2716 | Offset, IsPrefetch: true)) { |
2717 | // Insert an ADD instruction with the materialized Hi52 bits. |
2718 | Base = SDValue( |
2719 | CurDAG->getMachineNode(Opcode: RISCV::ADD, dl: DL, VT, Op1: Addr.getOperand(i: 0), Op2: Base), |
2720 | 0); |
2721 | return true; |
2722 | } |
2723 | } |
2724 | |
2725 | if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, IsPrefetch: true)) |
2726 | return true; |
2727 | |
2728 | Base = Addr; |
2729 | Offset = CurDAG->getTargetConstant(Val: 0, DL, VT); |
2730 | return true; |
2731 | } |
2732 | |
2733 | bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base, |
2734 | SDValue &Offset) { |
2735 | if (Addr.getOpcode() != ISD::ADD) |
2736 | return false; |
2737 | |
2738 | if (isa<ConstantSDNode>(Val: Addr.getOperand(i: 1))) |
2739 | return false; |
2740 | |
2741 | Base = Addr.getOperand(i: 1); |
2742 | Offset = Addr.getOperand(i: 0); |
2743 | return true; |
2744 | } |
2745 | |
2746 | bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, |
2747 | SDValue &ShAmt) { |
2748 | ShAmt = N; |
2749 | |
2750 | // Peek through zext. |
2751 | if (ShAmt->getOpcode() == ISD::ZERO_EXTEND) |
2752 | ShAmt = ShAmt.getOperand(i: 0); |
2753 | |
2754 | // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift |
2755 | // amount. If there is an AND on the shift amount, we can bypass it if it |
2756 | // doesn't affect any of those bits. |
2757 | if (ShAmt.getOpcode() == ISD::AND && |
2758 | isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 1))) { |
2759 | const APInt &AndMask = ShAmt.getConstantOperandAPInt(i: 1); |
2760 | |
2761 | // Since the max shift amount is a power of 2 we can subtract 1 to make a |
2762 | // mask that covers the bits needed to represent all shift amounts. |
2763 | assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!" ); |
2764 | APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); |
2765 | |
2766 | if (ShMask.isSubsetOf(RHS: AndMask)) { |
2767 | ShAmt = ShAmt.getOperand(i: 0); |
2768 | } else { |
2769 | // SimplifyDemandedBits may have optimized the mask so try restoring any |
2770 | // bits that are known zero. |
2771 | KnownBits Known = CurDAG->computeKnownBits(Op: ShAmt.getOperand(i: 0)); |
2772 | if (!ShMask.isSubsetOf(RHS: AndMask | Known.Zero)) |
2773 | return true; |
2774 | ShAmt = ShAmt.getOperand(i: 0); |
2775 | } |
2776 | } |
2777 | |
2778 | if (ShAmt.getOpcode() == ISD::ADD && |
2779 | isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 1))) { |
2780 | uint64_t Imm = ShAmt.getConstantOperandVal(i: 1); |
2781 | // If we are shifting by X+N where N == 0 mod Size, then just shift by X |
2782 | // to avoid the ADD. |
2783 | if (Imm != 0 && Imm % ShiftWidth == 0) { |
2784 | ShAmt = ShAmt.getOperand(i: 0); |
2785 | return true; |
2786 | } |
2787 | } else if (ShAmt.getOpcode() == ISD::SUB && |
2788 | isa<ConstantSDNode>(Val: ShAmt.getOperand(i: 0))) { |
2789 | uint64_t Imm = ShAmt.getConstantOperandVal(i: 0); |
2790 | // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to |
2791 | // generate a NEG instead of a SUB of a constant. |
2792 | if (Imm != 0 && Imm % ShiftWidth == 0) { |
2793 | SDLoc DL(ShAmt); |
2794 | EVT VT = ShAmt.getValueType(); |
2795 | SDValue Zero = CurDAG->getRegister(Reg: RISCV::X0, VT); |
2796 | unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; |
2797 | MachineSDNode *Neg = CurDAG->getMachineNode(Opcode: NegOpc, dl: DL, VT, Op1: Zero, |
2798 | Op2: ShAmt.getOperand(i: 1)); |
2799 | ShAmt = SDValue(Neg, 0); |
2800 | return true; |
2801 | } |
2802 | // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X |
2803 | // to generate a NOT instead of a SUB of a constant. |
2804 | if (Imm % ShiftWidth == ShiftWidth - 1) { |
2805 | SDLoc DL(ShAmt); |
2806 | EVT VT = ShAmt.getValueType(); |
2807 | MachineSDNode *Not = |
2808 | CurDAG->getMachineNode(Opcode: RISCV::XORI, dl: DL, VT, Op1: ShAmt.getOperand(i: 1), |
2809 | Op2: CurDAG->getTargetConstant(Val: -1, DL, VT)); |
2810 | ShAmt = SDValue(Not, 0); |
2811 | return true; |
2812 | } |
2813 | } |
2814 | |
2815 | return true; |
2816 | } |
2817 | |
2818 | /// RISC-V doesn't have general instructions for integer setne/seteq, but we can |
2819 | /// check for equality with 0. This function emits instructions that convert the |
2820 | /// seteq/setne into something that can be compared with 0. |
2821 | /// \p ExpectedCCVal indicates the condition code to attempt to match (e.g. |
2822 | /// ISD::SETNE). |
2823 | bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, |
2824 | SDValue &Val) { |
2825 | assert(ISD::isIntEqualitySetCC(ExpectedCCVal) && |
2826 | "Unexpected condition code!" ); |
2827 | |
2828 | // We're looking for a setcc. |
2829 | if (N->getOpcode() != ISD::SETCC) |
2830 | return false; |
2831 | |
2832 | // Must be an equality comparison. |
2833 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get(); |
2834 | if (CCVal != ExpectedCCVal) |
2835 | return false; |
2836 | |
2837 | SDValue LHS = N->getOperand(Num: 0); |
2838 | SDValue RHS = N->getOperand(Num: 1); |
2839 | |
2840 | if (!LHS.getValueType().isScalarInteger()) |
2841 | return false; |
2842 | |
2843 | // If the RHS side is 0, we don't need any extra instructions, return the LHS. |
2844 | if (isNullConstant(V: RHS)) { |
2845 | Val = LHS; |
2846 | return true; |
2847 | } |
2848 | |
2849 | SDLoc DL(N); |
2850 | |
2851 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: RHS)) { |
2852 | int64_t CVal = C->getSExtValue(); |
2853 | // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and |
2854 | // non-zero otherwise. |
2855 | if (CVal == -2048) { |
2856 | Val = |
2857 | SDValue(CurDAG->getMachineNode( |
2858 | Opcode: RISCV::XORI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS, |
2859 | Op2: CurDAG->getTargetConstant(Val: CVal, DL, VT: N->getValueType(ResNo: 0))), |
2860 | 0); |
2861 | return true; |
2862 | } |
2863 | // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the |
2864 | // LHS is equal to the RHS and non-zero otherwise. |
2865 | if (isInt<12>(x: CVal) || CVal == 2048) { |
2866 | Val = |
2867 | SDValue(CurDAG->getMachineNode( |
2868 | Opcode: RISCV::ADDI, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS, |
2869 | Op2: CurDAG->getTargetConstant(Val: -CVal, DL, VT: N->getValueType(ResNo: 0))), |
2870 | 0); |
2871 | return true; |
2872 | } |
2873 | } |
2874 | |
2875 | // If nothing else we can XOR the LHS and RHS to produce zero if they are |
2876 | // equal and a non-zero value if they aren't. |
2877 | Val = SDValue( |
2878 | CurDAG->getMachineNode(Opcode: RISCV::XOR, dl: DL, VT: N->getValueType(ResNo: 0), Op1: LHS, Op2: RHS), 0); |
2879 | return true; |
2880 | } |
2881 | |
2882 | bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) { |
2883 | if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && |
2884 | cast<VTSDNode>(Val: N.getOperand(i: 1))->getVT().getSizeInBits() == Bits) { |
2885 | Val = N.getOperand(i: 0); |
2886 | return true; |
2887 | } |
2888 | |
2889 | auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) { |
2890 | if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(Val: N.getOperand(i: 1))) |
2891 | return N; |
2892 | |
2893 | SDValue N0 = N.getOperand(i: 0); |
2894 | if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) && |
2895 | N.getConstantOperandVal(i: 1) == ShiftAmt && |
2896 | N0.getConstantOperandVal(i: 1) == ShiftAmt) |
2897 | return N0.getOperand(i: 0); |
2898 | |
2899 | return N; |
2900 | }; |
2901 | |
2902 | MVT VT = N.getSimpleValueType(); |
2903 | if (CurDAG->ComputeNumSignBits(Op: N) > (VT.getSizeInBits() - Bits)) { |
2904 | Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits); |
2905 | return true; |
2906 | } |
2907 | |
2908 | return false; |
2909 | } |
2910 | |
2911 | bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) { |
2912 | if (N.getOpcode() == ISD::AND) { |
2913 | auto *C = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1)); |
2914 | if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(N: Bits)) { |
2915 | Val = N.getOperand(i: 0); |
2916 | return true; |
2917 | } |
2918 | } |
2919 | MVT VT = N.getSimpleValueType(); |
2920 | APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: Bits); |
2921 | if (CurDAG->MaskedValueIsZero(Op: N, Mask)) { |
2922 | Val = N; |
2923 | return true; |
2924 | } |
2925 | |
2926 | return false; |
2927 | } |
2928 | |
2929 | /// Look for various patterns that can be done with a SHL that can be folded |
2930 | /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which |
2931 | /// SHXADD we are trying to match. |
2932 | bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt, |
2933 | SDValue &Val) { |
2934 | if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(Val: N.getOperand(i: 1))) { |
2935 | SDValue N0 = N.getOperand(i: 0); |
2936 | |
2937 | bool LeftShift = N0.getOpcode() == ISD::SHL; |
2938 | if ((LeftShift || N0.getOpcode() == ISD::SRL) && |
2939 | isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) { |
2940 | uint64_t Mask = N.getConstantOperandVal(i: 1); |
2941 | unsigned C2 = N0.getConstantOperandVal(i: 1); |
2942 | |
2943 | unsigned XLen = Subtarget->getXLen(); |
2944 | if (LeftShift) |
2945 | Mask &= maskTrailingZeros<uint64_t>(N: C2); |
2946 | else |
2947 | Mask &= maskTrailingOnes<uint64_t>(N: XLen - C2); |
2948 | |
2949 | // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no |
2950 | // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3 |
2951 | // followed by a SHXADD with c3 for the X amount. |
2952 | if (isShiftedMask_64(Value: Mask)) { |
2953 | unsigned Leading = XLen - llvm::bit_width(Value: Mask); |
2954 | unsigned Trailing = llvm::countr_zero(Val: Mask); |
2955 | if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) { |
2956 | SDLoc DL(N); |
2957 | EVT VT = N.getValueType(); |
2958 | Val = SDValue(CurDAG->getMachineNode( |
2959 | Opcode: RISCV::SRLI, dl: DL, VT, Op1: N0.getOperand(i: 0), |
2960 | Op2: CurDAG->getTargetConstant(Val: Trailing - C2, DL, VT)), |
2961 | 0); |
2962 | return true; |
2963 | } |
2964 | // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2 |
2965 | // leading zeros and c3 trailing zeros. We can use an SRLI by C3 |
2966 | // followed by a SHXADD using c3 for the X amount. |
2967 | if (!LeftShift && Leading == C2 && Trailing == ShAmt) { |
2968 | SDLoc DL(N); |
2969 | EVT VT = N.getValueType(); |
2970 | Val = SDValue( |
2971 | CurDAG->getMachineNode( |
2972 | Opcode: RISCV::SRLI, dl: DL, VT, Op1: N0.getOperand(i: 0), |
2973 | Op2: CurDAG->getTargetConstant(Val: Leading + Trailing, DL, VT)), |
2974 | 0); |
2975 | return true; |
2976 | } |
2977 | } |
2978 | } |
2979 | } |
2980 | |
2981 | bool LeftShift = N.getOpcode() == ISD::SHL; |
2982 | if ((LeftShift || N.getOpcode() == ISD::SRL) && |
2983 | isa<ConstantSDNode>(Val: N.getOperand(i: 1))) { |
2984 | SDValue N0 = N.getOperand(i: 0); |
2985 | if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && |
2986 | isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) { |
2987 | uint64_t Mask = N0.getConstantOperandVal(i: 1); |
2988 | if (isShiftedMask_64(Value: Mask)) { |
2989 | unsigned C1 = N.getConstantOperandVal(i: 1); |
2990 | unsigned XLen = Subtarget->getXLen(); |
2991 | unsigned Leading = XLen - llvm::bit_width(Value: Mask); |
2992 | unsigned Trailing = llvm::countr_zero(Val: Mask); |
2993 | // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and |
2994 | // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD. |
2995 | if (LeftShift && Leading == 32 && Trailing > 0 && |
2996 | (Trailing + C1) == ShAmt) { |
2997 | SDLoc DL(N); |
2998 | EVT VT = N.getValueType(); |
2999 | Val = SDValue(CurDAG->getMachineNode( |
3000 | Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0), |
3001 | Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT)), |
3002 | 0); |
3003 | return true; |
3004 | } |
3005 | // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and |
3006 | // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD. |
3007 | if (!LeftShift && Leading == 32 && Trailing > C1 && |
3008 | (Trailing - C1) == ShAmt) { |
3009 | SDLoc DL(N); |
3010 | EVT VT = N.getValueType(); |
3011 | Val = SDValue(CurDAG->getMachineNode( |
3012 | Opcode: RISCV::SRLIW, dl: DL, VT, Op1: N0.getOperand(i: 0), |
3013 | Op2: CurDAG->getTargetConstant(Val: Trailing, DL, VT)), |
3014 | 0); |
3015 | return true; |
3016 | } |
3017 | } |
3018 | } |
3019 | } |
3020 | |
3021 | return false; |
3022 | } |
3023 | |
3024 | /// Look for various patterns that can be done with a SHL that can be folded |
3025 | /// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which |
3026 | /// SHXADD_UW we are trying to match. |
3027 | bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt, |
3028 | SDValue &Val) { |
3029 | if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(Val: N.getOperand(i: 1)) && |
3030 | N.hasOneUse()) { |
3031 | SDValue N0 = N.getOperand(i: 0); |
3032 | if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) && |
3033 | N0.hasOneUse()) { |
3034 | uint64_t Mask = N.getConstantOperandVal(i: 1); |
3035 | unsigned C2 = N0.getConstantOperandVal(i: 1); |
3036 | |
3037 | Mask &= maskTrailingZeros<uint64_t>(N: C2); |
3038 | |
3039 | // Look for (and (shl y, c2), c1) where c1 is a shifted mask with |
3040 | // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by |
3041 | // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount. |
3042 | if (isShiftedMask_64(Value: Mask)) { |
3043 | unsigned Leading = llvm::countl_zero(Val: Mask); |
3044 | unsigned Trailing = llvm::countr_zero(Val: Mask); |
3045 | if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) { |
3046 | SDLoc DL(N); |
3047 | EVT VT = N.getValueType(); |
3048 | Val = SDValue(CurDAG->getMachineNode( |
3049 | Opcode: RISCV::SLLI, dl: DL, VT, Op1: N0.getOperand(i: 0), |
3050 | Op2: CurDAG->getTargetConstant(Val: C2 - ShAmt, DL, VT)), |
3051 | 0); |
3052 | return true; |
3053 | } |
3054 | } |
3055 | } |
3056 | } |
3057 | |
3058 | return false; |
3059 | } |
3060 | |
3061 | static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, |
3062 | unsigned Bits, |
3063 | const TargetInstrInfo *TII) { |
3064 | unsigned MCOpcode = RISCV::getRVVMCOpcode(RVVPseudoOpcode: User->getMachineOpcode()); |
3065 | |
3066 | if (!MCOpcode) |
3067 | return false; |
3068 | |
3069 | const MCInstrDesc &MCID = TII->get(Opcode: User->getMachineOpcode()); |
3070 | const uint64_t TSFlags = MCID.TSFlags; |
3071 | if (!RISCVII::hasSEWOp(TSFlags)) |
3072 | return false; |
3073 | assert(RISCVII::hasVLOp(TSFlags)); |
3074 | |
3075 | bool HasGlueOp = User->getGluedNode() != nullptr; |
3076 | unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1; |
3077 | bool HasChainOp = User->getOperand(Num: ChainOpIdx).getValueType() == MVT::Other; |
3078 | bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags); |
3079 | unsigned VLIdx = |
3080 | User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; |
3081 | const unsigned Log2SEW = User->getConstantOperandVal(Num: VLIdx + 1); |
3082 | |
3083 | if (UserOpNo == VLIdx) |
3084 | return false; |
3085 | |
3086 | auto NumDemandedBits = |
3087 | RISCV::getVectorLowDemandedScalarBits(Opcode: MCOpcode, Log2SEW); |
3088 | return NumDemandedBits && Bits >= *NumDemandedBits; |
3089 | } |
3090 | |
3091 | // Return true if all users of this SDNode* only consume the lower \p Bits. |
3092 | // This can be used to form W instructions for add/sub/mul/shl even when the |
3093 | // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if |
3094 | // SimplifyDemandedBits has made it so some users see a sext_inreg and some |
3095 | // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave |
3096 | // the add/sub/mul/shl to become non-W instructions. By checking the users we |
3097 | // may be able to use a W instruction and CSE with the other instruction if |
3098 | // this has happened. We could try to detect that the CSE opportunity exists |
3099 | // before doing this, but that would be more complicated. |
3100 | bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits, |
3101 | const unsigned Depth) const { |
3102 | assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || |
3103 | Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || |
3104 | Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND || |
3105 | Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR || |
3106 | Node->getOpcode() == ISD::SIGN_EXTEND_INREG || |
3107 | isa<ConstantSDNode>(Node) || Depth != 0) && |
3108 | "Unexpected opcode" ); |
3109 | |
3110 | if (Depth >= SelectionDAG::MaxRecursionDepth) |
3111 | return false; |
3112 | |
3113 | // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked |
3114 | // the VT. Ensure the type is scalar to avoid wasting time on vectors. |
3115 | if (Depth == 0 && !Node->getValueType(ResNo: 0).isScalarInteger()) |
3116 | return false; |
3117 | |
3118 | for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { |
3119 | SDNode *User = *UI; |
3120 | // Users of this node should have already been instruction selected |
3121 | if (!User->isMachineOpcode()) |
3122 | return false; |
3123 | |
3124 | // TODO: Add more opcodes? |
3125 | switch (User->getMachineOpcode()) { |
3126 | default: |
3127 | if (vectorPseudoHasAllNBitUsers(User, UserOpNo: UI.getOperandNo(), Bits, TII)) |
3128 | break; |
3129 | return false; |
3130 | case RISCV::ADDW: |
3131 | case RISCV::ADDIW: |
3132 | case RISCV::SUBW: |
3133 | case RISCV::MULW: |
3134 | case RISCV::SLLW: |
3135 | case RISCV::SLLIW: |
3136 | case RISCV::SRAW: |
3137 | case RISCV::SRAIW: |
3138 | case RISCV::SRLW: |
3139 | case RISCV::SRLIW: |
3140 | case RISCV::DIVW: |
3141 | case RISCV::DIVUW: |
3142 | case RISCV::REMW: |
3143 | case RISCV::REMUW: |
3144 | case RISCV::ROLW: |
3145 | case RISCV::RORW: |
3146 | case RISCV::RORIW: |
3147 | case RISCV::CLZW: |
3148 | case RISCV::CTZW: |
3149 | case RISCV::CPOPW: |
3150 | case RISCV::SLLI_UW: |
3151 | case RISCV::FMV_W_X: |
3152 | case RISCV::FCVT_H_W: |
3153 | case RISCV::FCVT_H_WU: |
3154 | case RISCV::FCVT_S_W: |
3155 | case RISCV::FCVT_S_WU: |
3156 | case RISCV::FCVT_D_W: |
3157 | case RISCV::FCVT_D_WU: |
3158 | case RISCV::TH_REVW: |
3159 | case RISCV::TH_SRRIW: |
3160 | if (Bits < 32) |
3161 | return false; |
3162 | break; |
3163 | case RISCV::SLL: |
3164 | case RISCV::SRA: |
3165 | case RISCV::SRL: |
3166 | case RISCV::ROL: |
3167 | case RISCV::ROR: |
3168 | case RISCV::BSET: |
3169 | case RISCV::BCLR: |
3170 | case RISCV::BINV: |
3171 | // Shift amount operands only use log2(Xlen) bits. |
3172 | if (UI.getOperandNo() != 1 || Bits < Log2_32(Value: Subtarget->getXLen())) |
3173 | return false; |
3174 | break; |
3175 | case RISCV::SLLI: |
3176 | // SLLI only uses the lower (XLen - ShAmt) bits. |
3177 | if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(Num: 1)) |
3178 | return false; |
3179 | break; |
3180 | case RISCV::ANDI: |
3181 | if (Bits >= (unsigned)llvm::bit_width(Value: User->getConstantOperandVal(Num: 1))) |
3182 | break; |
3183 | goto RecCheck; |
3184 | case RISCV::ORI: { |
3185 | uint64_t Imm = cast<ConstantSDNode>(Val: User->getOperand(Num: 1))->getSExtValue(); |
3186 | if (Bits >= (unsigned)llvm::bit_width<uint64_t>(Value: ~Imm)) |
3187 | break; |
3188 | [[fallthrough]]; |
3189 | } |
3190 | case RISCV::AND: |
3191 | case RISCV::OR: |
3192 | case RISCV::XOR: |
3193 | case RISCV::XORI: |
3194 | case RISCV::ANDN: |
3195 | case RISCV::ORN: |
3196 | case RISCV::XNOR: |
3197 | case RISCV::SH1ADD: |
3198 | case RISCV::SH2ADD: |
3199 | case RISCV::SH3ADD: |
3200 | RecCheck: |
3201 | if (hasAllNBitUsers(Node: User, Bits, Depth: Depth + 1)) |
3202 | break; |
3203 | return false; |
3204 | case RISCV::SRLI: { |
3205 | unsigned ShAmt = User->getConstantOperandVal(Num: 1); |
3206 | // If we are shifting right by less than Bits, and users don't demand any |
3207 | // bits that were shifted into [Bits-1:0], then we can consider this as an |
3208 | // N-Bit user. |
3209 | if (Bits > ShAmt && hasAllNBitUsers(Node: User, Bits: Bits - ShAmt, Depth: Depth + 1)) |
3210 | break; |
3211 | return false; |
3212 | } |
3213 | case RISCV::SEXT_B: |
3214 | case RISCV::PACKH: |
3215 | if (Bits < 8) |
3216 | return false; |
3217 | break; |
3218 | case RISCV::SEXT_H: |
3219 | case RISCV::FMV_H_X: |
3220 | case RISCV::ZEXT_H_RV32: |
3221 | case RISCV::ZEXT_H_RV64: |
3222 | case RISCV::PACKW: |
3223 | if (Bits < 16) |
3224 | return false; |
3225 | break; |
3226 | case RISCV::PACK: |
3227 | if (Bits < (Subtarget->getXLen() / 2)) |
3228 | return false; |
3229 | break; |
3230 | case RISCV::ADD_UW: |
3231 | case RISCV::SH1ADD_UW: |
3232 | case RISCV::SH2ADD_UW: |
3233 | case RISCV::SH3ADD_UW: |
3234 | // The first operand to add.uw/shXadd.uw is implicitly zero extended from |
3235 | // 32 bits. |
3236 | if (UI.getOperandNo() != 0 || Bits < 32) |
3237 | return false; |
3238 | break; |
3239 | case RISCV::SB: |
3240 | if (UI.getOperandNo() != 0 || Bits < 8) |
3241 | return false; |
3242 | break; |
3243 | case RISCV::SH: |
3244 | if (UI.getOperandNo() != 0 || Bits < 16) |
3245 | return false; |
3246 | break; |
3247 | case RISCV::SW: |
3248 | if (UI.getOperandNo() != 0 || Bits < 32) |
3249 | return false; |
3250 | break; |
3251 | } |
3252 | } |
3253 | |
3254 | return true; |
3255 | } |
3256 | |
3257 | // Select a constant that can be represented as (sign_extend(imm5) << imm2). |
3258 | bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5, |
3259 | SDValue &Shl2) { |
3260 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: N)) { |
3261 | int64_t Offset = C->getSExtValue(); |
3262 | int64_t Shift; |
3263 | for (Shift = 0; Shift < 4; Shift++) |
3264 | if (isInt<5>(x: Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) |
3265 | break; |
3266 | |
3267 | // Constant cannot be encoded. |
3268 | if (Shift == 4) |
3269 | return false; |
3270 | |
3271 | EVT Ty = N->getValueType(ResNo: 0); |
3272 | Simm5 = CurDAG->getTargetConstant(Val: Offset >> Shift, DL: SDLoc(N), VT: Ty); |
3273 | Shl2 = CurDAG->getTargetConstant(Val: Shift, DL: SDLoc(N), VT: Ty); |
3274 | return true; |
3275 | } |
3276 | |
3277 | return false; |
3278 | } |
3279 | |
3280 | // Select VL as a 5 bit immediate or a value that will become a register. This |
3281 | // allows us to choose betwen VSETIVLI or VSETVLI later. |
3282 | bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { |
3283 | auto *C = dyn_cast<ConstantSDNode>(Val&: N); |
3284 | if (C && isUInt<5>(x: C->getZExtValue())) { |
3285 | VL = CurDAG->getTargetConstant(Val: C->getZExtValue(), DL: SDLoc(N), |
3286 | VT: N->getValueType(ResNo: 0)); |
3287 | } else if (C && C->isAllOnes()) { |
3288 | // Treat all ones as VLMax. |
3289 | VL = CurDAG->getTargetConstant(Val: RISCV::VLMaxSentinel, DL: SDLoc(N), |
3290 | VT: N->getValueType(ResNo: 0)); |
3291 | } else if (isa<RegisterSDNode>(Val: N) && |
3292 | cast<RegisterSDNode>(Val&: N)->getReg() == RISCV::X0) { |
3293 | // All our VL operands use an operand that allows GPRNoX0 or an immediate |
3294 | // as the register class. Convert X0 to a special immediate to pass the |
3295 | // MachineVerifier. This is recognized specially by the vsetvli insertion |
3296 | // pass. |
3297 | VL = CurDAG->getTargetConstant(Val: RISCV::VLMaxSentinel, DL: SDLoc(N), |
3298 | VT: N->getValueType(ResNo: 0)); |
3299 | } else { |
3300 | VL = N; |
3301 | } |
3302 | |
3303 | return true; |
3304 | } |
3305 | |
3306 | static SDValue findVSplat(SDValue N) { |
3307 | if (N.getOpcode() == ISD::INSERT_SUBVECTOR) { |
3308 | if (!N.getOperand(i: 0).isUndef()) |
3309 | return SDValue(); |
3310 | N = N.getOperand(i: 1); |
3311 | } |
3312 | SDValue Splat = N; |
3313 | if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL && |
3314 | Splat.getOpcode() != RISCVISD::VMV_S_X_VL) || |
3315 | !Splat.getOperand(i: 0).isUndef()) |
3316 | return SDValue(); |
3317 | assert(Splat.getNumOperands() == 3 && "Unexpected number of operands" ); |
3318 | return Splat; |
3319 | } |
3320 | |
3321 | bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { |
3322 | SDValue Splat = findVSplat(N); |
3323 | if (!Splat) |
3324 | return false; |
3325 | |
3326 | SplatVal = Splat.getOperand(i: 1); |
3327 | return true; |
3328 | } |
3329 | |
3330 | static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, |
3331 | SelectionDAG &DAG, |
3332 | const RISCVSubtarget &Subtarget, |
3333 | std::function<bool(int64_t)> ValidateImm) { |
3334 | SDValue Splat = findVSplat(N); |
3335 | if (!Splat || !isa<ConstantSDNode>(Val: Splat.getOperand(i: 1))) |
3336 | return false; |
3337 | |
3338 | const unsigned SplatEltSize = Splat.getScalarValueSizeInBits(); |
3339 | assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() && |
3340 | "Unexpected splat operand type" ); |
3341 | |
3342 | // The semantics of RISCVISD::VMV_V_X_VL is that when the operand |
3343 | // type is wider than the resulting vector element type: an implicit |
3344 | // truncation first takes place. Therefore, perform a manual |
3345 | // truncation/sign-extension in order to ignore any truncated bits and catch |
3346 | // any zero-extended immediate. |
3347 | // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first |
3348 | // sign-extending to (XLenVT -1). |
3349 | APInt SplatConst = Splat.getConstantOperandAPInt(i: 1).sextOrTrunc(width: SplatEltSize); |
3350 | |
3351 | int64_t SplatImm = SplatConst.getSExtValue(); |
3352 | |
3353 | if (!ValidateImm(SplatImm)) |
3354 | return false; |
3355 | |
3356 | SplatVal = DAG.getTargetConstant(Val: SplatImm, DL: SDLoc(N), VT: Subtarget.getXLenVT()); |
3357 | return true; |
3358 | } |
3359 | |
3360 | bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { |
3361 | return selectVSplatImmHelper(N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget, |
3362 | ValidateImm: [](int64_t Imm) { return isInt<5>(x: Imm); }); |
3363 | } |
3364 | |
3365 | bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { |
3366 | return selectVSplatImmHelper( |
3367 | N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget, |
3368 | ValidateImm: [](int64_t Imm) { return (isInt<5>(x: Imm) && Imm != -16) || Imm == 16; }); |
3369 | } |
3370 | |
3371 | bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, |
3372 | SDValue &SplatVal) { |
3373 | return selectVSplatImmHelper( |
3374 | N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget, ValidateImm: [](int64_t Imm) { |
3375 | return Imm != 0 && ((isInt<5>(x: Imm) && Imm != -16) || Imm == 16); |
3376 | }); |
3377 | } |
3378 | |
3379 | bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits, |
3380 | SDValue &SplatVal) { |
3381 | return selectVSplatImmHelper( |
3382 | N, SplatVal, DAG&: *CurDAG, Subtarget: *Subtarget, |
3383 | ValidateImm: [Bits](int64_t Imm) { return isUIntN(N: Bits, x: Imm); }); |
3384 | } |
3385 | |
3386 | bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) { |
3387 | auto IsExtOrTrunc = [](SDValue N) { |
3388 | switch (N->getOpcode()) { |
3389 | case ISD::SIGN_EXTEND: |
3390 | case ISD::ZERO_EXTEND: |
3391 | // There's no passthru on these _VL nodes so any VL/mask is ok, since any |
3392 | // inactive elements will be undef. |
3393 | case RISCVISD::TRUNCATE_VECTOR_VL: |
3394 | case RISCVISD::VSEXT_VL: |
3395 | case RISCVISD::VZEXT_VL: |
3396 | return true; |
3397 | default: |
3398 | return false; |
3399 | } |
3400 | }; |
3401 | |
3402 | // We can have multiple nested nodes, so unravel them all if needed. |
3403 | while (IsExtOrTrunc(N)) { |
3404 | if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8) |
3405 | return false; |
3406 | N = N->getOperand(Num: 0); |
3407 | } |
3408 | |
3409 | return selectVSplat(N, SplatVal); |
3410 | } |
3411 | |
3412 | bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) { |
3413 | ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Val: N.getNode()); |
3414 | if (!CFP) |
3415 | return false; |
3416 | const APFloat &APF = CFP->getValueAPF(); |
3417 | // td can handle +0.0 already. |
3418 | if (APF.isPosZero()) |
3419 | return false; |
3420 | |
3421 | MVT VT = CFP->getSimpleValueType(ResNo: 0); |
3422 | |
3423 | // Even if this FPImm requires an additional FNEG (i.e. the second element of |
3424 | // the returned pair is true) we still prefer FLI + FNEG over immediate |
3425 | // materialization as the latter might generate a longer instruction sequence. |
3426 | if (static_cast<const RISCVTargetLowering *>(TLI) |
3427 | ->getLegalZfaFPImm(Imm: APF, VT) |
3428 | .first >= 0) |
3429 | return false; |
3430 | |
3431 | MVT XLenVT = Subtarget->getXLenVT(); |
3432 | if (VT == MVT::f64 && !Subtarget->is64Bit()) { |
3433 | assert(APF.isNegZero() && "Unexpected constant." ); |
3434 | return false; |
3435 | } |
3436 | SDLoc DL(N); |
3437 | Imm = selectImm(CurDAG, DL, VT: XLenVT, Imm: APF.bitcastToAPInt().getSExtValue(), |
3438 | Subtarget: *Subtarget); |
3439 | return true; |
3440 | } |
3441 | |
3442 | bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, |
3443 | SDValue &Imm) { |
3444 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: N)) { |
3445 | int64_t ImmVal = SignExtend64(X: C->getSExtValue(), B: Width); |
3446 | |
3447 | if (!isInt<5>(x: ImmVal)) |
3448 | return false; |
3449 | |
3450 | Imm = CurDAG->getTargetConstant(Val: ImmVal, DL: SDLoc(N), VT: Subtarget->getXLenVT()); |
3451 | return true; |
3452 | } |
3453 | |
3454 | return false; |
3455 | } |
3456 | |
3457 | // Try to remove sext.w if the input is a W instruction or can be made into |
3458 | // a W instruction cheaply. |
3459 | bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { |
3460 | // Look for the sext.w pattern, addiw rd, rs1, 0. |
3461 | if (N->getMachineOpcode() != RISCV::ADDIW || |
3462 | !isNullConstant(V: N->getOperand(Num: 1))) |
3463 | return false; |
3464 | |
3465 | SDValue N0 = N->getOperand(Num: 0); |
3466 | if (!N0.isMachineOpcode()) |
3467 | return false; |
3468 | |
3469 | switch (N0.getMachineOpcode()) { |
3470 | default: |
3471 | break; |
3472 | case RISCV::ADD: |
3473 | case RISCV::ADDI: |
3474 | case RISCV::SUB: |
3475 | case RISCV::MUL: |
3476 | case RISCV::SLLI: { |
3477 | // Convert sext.w+add/sub/mul to their W instructions. This will create |
3478 | // a new independent instruction. This improves latency. |
3479 | unsigned Opc; |
3480 | switch (N0.getMachineOpcode()) { |
3481 | default: |
3482 | llvm_unreachable("Unexpected opcode!" ); |
3483 | case RISCV::ADD: Opc = RISCV::ADDW; break; |
3484 | case RISCV::ADDI: Opc = RISCV::ADDIW; break; |
3485 | case RISCV::SUB: Opc = RISCV::SUBW; break; |
3486 | case RISCV::MUL: Opc = RISCV::MULW; break; |
3487 | case RISCV::SLLI: Opc = RISCV::SLLIW; break; |
3488 | } |
3489 | |
3490 | SDValue N00 = N0.getOperand(i: 0); |
3491 | SDValue N01 = N0.getOperand(i: 1); |
3492 | |
3493 | // Shift amount needs to be uimm5. |
3494 | if (N0.getMachineOpcode() == RISCV::SLLI && |
3495 | !isUInt<5>(x: cast<ConstantSDNode>(Val&: N01)->getSExtValue())) |
3496 | break; |
3497 | |
3498 | SDNode *Result = |
3499 | CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VT: N->getValueType(ResNo: 0), |
3500 | Op1: N00, Op2: N01); |
3501 | ReplaceUses(F: N, T: Result); |
3502 | return true; |
3503 | } |
3504 | case RISCV::ADDW: |
3505 | case RISCV::ADDIW: |
3506 | case RISCV::SUBW: |
3507 | case RISCV::MULW: |
3508 | case RISCV::SLLIW: |
3509 | case RISCV::PACKW: |
3510 | case RISCV::TH_MULAW: |
3511 | case RISCV::TH_MULAH: |
3512 | case RISCV::TH_MULSW: |
3513 | case RISCV::TH_MULSH: |
3514 | if (N0.getValueType() == MVT::i32) |
3515 | break; |
3516 | |
3517 | // Result is already sign extended just remove the sext.w. |
3518 | // NOTE: We only handle the nodes that are selected with hasAllWUsers. |
3519 | ReplaceUses(F: N, T: N0.getNode()); |
3520 | return true; |
3521 | } |
3522 | |
3523 | return false; |
3524 | } |
3525 | |
3526 | // After ISel, a vector pseudo's mask will be copied to V0 via a CopyToReg |
3527 | // that's glued to the pseudo. This tries to look up the value that was copied |
3528 | // to V0. |
3529 | static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp) { |
3530 | // Check that we're using V0 as a mask register. |
3531 | if (!isa<RegisterSDNode>(Val: MaskOp) || |
3532 | cast<RegisterSDNode>(Val&: MaskOp)->getReg() != RISCV::V0) |
3533 | return SDValue(); |
3534 | |
3535 | // The glued user defines V0. |
3536 | const auto *Glued = GlueOp.getNode(); |
3537 | |
3538 | if (!Glued || Glued->getOpcode() != ISD::CopyToReg) |
3539 | return SDValue(); |
3540 | |
3541 | // Check that we're defining V0 as a mask register. |
3542 | if (!isa<RegisterSDNode>(Val: Glued->getOperand(Num: 1)) || |
3543 | cast<RegisterSDNode>(Val: Glued->getOperand(Num: 1))->getReg() != RISCV::V0) |
3544 | return SDValue(); |
3545 | |
3546 | SDValue MaskSetter = Glued->getOperand(Num: 2); |
3547 | |
3548 | // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came |
3549 | // from an extract_subvector or insert_subvector. |
3550 | if (MaskSetter->isMachineOpcode() && |
3551 | MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS) |
3552 | MaskSetter = MaskSetter->getOperand(Num: 0); |
3553 | |
3554 | return MaskSetter; |
3555 | } |
3556 | |
3557 | static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) { |
3558 | // Check the instruction defining V0; it needs to be a VMSET pseudo. |
3559 | SDValue MaskSetter = getMaskSetter(MaskOp, GlueOp); |
3560 | if (!MaskSetter) |
3561 | return false; |
3562 | |
3563 | const auto IsVMSet = [](unsigned Opc) { |
3564 | return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || |
3565 | Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || |
3566 | Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || |
3567 | Opc == RISCV::PseudoVMSET_M_B8; |
3568 | }; |
3569 | |
3570 | // TODO: Check that the VMSET is the expected bitwidth? The pseudo has |
3571 | // undefined behaviour if it's the wrong bitwidth, so we could choose to |
3572 | // assume that it's all-ones? Same applies to its VL. |
3573 | return MaskSetter->isMachineOpcode() && |
3574 | IsVMSet(MaskSetter.getMachineOpcode()); |
3575 | } |
3576 | |
3577 | // Return true if we can make sure mask of N is all-ones mask. |
3578 | static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) { |
3579 | return usesAllOnesMask(MaskOp: N->getOperand(Num: MaskOpIdx), |
3580 | GlueOp: N->getOperand(Num: N->getNumOperands() - 1)); |
3581 | } |
3582 | |
3583 | static bool isImplicitDef(SDValue V) { |
3584 | if (!V.isMachineOpcode()) |
3585 | return false; |
3586 | if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) { |
3587 | for (unsigned I = 1; I < V.getNumOperands(); I += 2) |
3588 | if (!isImplicitDef(V: V.getOperand(i: I))) |
3589 | return false; |
3590 | return true; |
3591 | } |
3592 | return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; |
3593 | } |
3594 | |
3595 | // Optimize masked RVV pseudo instructions with a known all-ones mask to their |
3596 | // corresponding "unmasked" pseudo versions. The mask we're interested in will |
3597 | // take the form of a V0 physical register operand, with a glued |
3598 | // register-setting instruction. |
3599 | bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) { |
3600 | const RISCV::RISCVMaskedPseudoInfo *I = |
3601 | RISCV::getMaskedPseudoInfo(MaskedPseudo: N->getMachineOpcode()); |
3602 | if (!I) |
3603 | return false; |
3604 | |
3605 | unsigned MaskOpIdx = I->MaskOpIdx; |
3606 | if (!usesAllOnesMask(N, MaskOpIdx)) |
3607 | return false; |
3608 | |
3609 | // There are two classes of pseudos in the table - compares and |
3610 | // everything else. See the comment on RISCVMaskedPseudo for details. |
3611 | const unsigned Opc = I->UnmaskedPseudo; |
3612 | const MCInstrDesc &MCID = TII->get(Opcode: Opc); |
3613 | const bool UseTUPseudo = RISCVII::hasVecPolicyOp(TSFlags: MCID.TSFlags); |
3614 | #ifndef NDEBUG |
3615 | const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode()); |
3616 | assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) == |
3617 | RISCVII::hasVecPolicyOp(MCID.TSFlags) && |
3618 | "Masked and unmasked pseudos are inconsistent" ); |
3619 | const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID); |
3620 | assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure" ); |
3621 | #endif |
3622 | |
3623 | SmallVector<SDValue, 8> Ops; |
3624 | // Skip the merge operand at index 0 if !UseTUPseudo. |
3625 | for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) { |
3626 | // Skip the mask, and the Glue. |
3627 | SDValue Op = N->getOperand(Num: I); |
3628 | if (I == MaskOpIdx || Op.getValueType() == MVT::Glue) |
3629 | continue; |
3630 | Ops.push_back(Elt: Op); |
3631 | } |
3632 | |
3633 | // Transitively apply any node glued to our new node. |
3634 | const auto *Glued = N->getGluedNode(); |
3635 | if (auto *TGlued = Glued->getGluedNode()) |
3636 | Ops.push_back(Elt: SDValue(TGlued, TGlued->getNumValues() - 1)); |
3637 | |
3638 | MachineSDNode *Result = |
3639 | CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VTs: N->getVTList(), Ops); |
3640 | |
3641 | if (!N->memoperands_empty()) |
3642 | CurDAG->setNodeMemRefs(N: Result, NewMemRefs: N->memoperands()); |
3643 | |
3644 | Result->setFlags(N->getFlags()); |
3645 | ReplaceUses(F: N, T: Result); |
3646 | |
3647 | return true; |
3648 | } |
3649 | |
3650 | static bool IsVMerge(SDNode *N) { |
3651 | return RISCV::getRVVMCOpcode(RVVPseudoOpcode: N->getMachineOpcode()) == RISCV::VMERGE_VVM; |
3652 | } |
3653 | |
3654 | static bool IsVMv(SDNode *N) { |
3655 | return RISCV::getRVVMCOpcode(RVVPseudoOpcode: N->getMachineOpcode()) == RISCV::VMV_V_V; |
3656 | } |
3657 | |
3658 | static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) { |
3659 | switch (LMUL) { |
3660 | case RISCVII::LMUL_F8: |
3661 | return RISCV::PseudoVMSET_M_B1; |
3662 | case RISCVII::LMUL_F4: |
3663 | return RISCV::PseudoVMSET_M_B2; |
3664 | case RISCVII::LMUL_F2: |
3665 | return RISCV::PseudoVMSET_M_B4; |
3666 | case RISCVII::LMUL_1: |
3667 | return RISCV::PseudoVMSET_M_B8; |
3668 | case RISCVII::LMUL_2: |
3669 | return RISCV::PseudoVMSET_M_B16; |
3670 | case RISCVII::LMUL_4: |
3671 | return RISCV::PseudoVMSET_M_B32; |
3672 | case RISCVII::LMUL_8: |
3673 | return RISCV::PseudoVMSET_M_B64; |
3674 | case RISCVII::LMUL_RESERVED: |
3675 | llvm_unreachable("Unexpected LMUL" ); |
3676 | } |
3677 | llvm_unreachable("Unknown VLMUL enum" ); |
3678 | } |
3679 | |
3680 | // Try to fold away VMERGE_VVM instructions into their true operands: |
3681 | // |
3682 | // %true = PseudoVADD_VV ... |
3683 | // %x = PseudoVMERGE_VVM %false, %false, %true, %mask |
3684 | // -> |
3685 | // %x = PseudoVADD_VV_MASK %false, ..., %mask |
3686 | // |
3687 | // We can only fold if vmerge's merge operand, vmerge's false operand and |
3688 | // %true's merge operand (if it has one) are the same. This is because we have |
3689 | // to consolidate them into one merge operand in the result. |
3690 | // |
3691 | // If %true is masked, then we can use its mask instead of vmerge's if vmerge's |
3692 | // mask is all ones. |
3693 | // |
3694 | // We can also fold a VMV_V_V into its true operand, since it is equivalent to a |
3695 | // VMERGE_VVM with an all ones mask. |
3696 | // |
3697 | // The resulting VL is the minimum of the two VLs. |
3698 | // |
3699 | // The resulting policy is the effective policy the vmerge would have had, |
3700 | // i.e. whether or not it's merge operand was implicit-def. |
3701 | bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { |
3702 | SDValue Merge, False, True, VL, Mask, Glue; |
3703 | // A vmv.v.v is equivalent to a vmerge with an all-ones mask. |
3704 | if (IsVMv(N)) { |
3705 | Merge = N->getOperand(Num: 0); |
3706 | False = N->getOperand(Num: 0); |
3707 | True = N->getOperand(Num: 1); |
3708 | VL = N->getOperand(Num: 2); |
3709 | // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones |
3710 | // mask later below. |
3711 | } else { |
3712 | assert(IsVMerge(N)); |
3713 | Merge = N->getOperand(Num: 0); |
3714 | False = N->getOperand(Num: 1); |
3715 | True = N->getOperand(Num: 2); |
3716 | Mask = N->getOperand(Num: 3); |
3717 | VL = N->getOperand(Num: 4); |
3718 | // We always have a glue node for the mask at v0. |
3719 | Glue = N->getOperand(Num: N->getNumOperands() - 1); |
3720 | } |
3721 | assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0); |
3722 | assert(!Glue || Glue.getValueType() == MVT::Glue); |
3723 | |
3724 | // If the EEW of True is different from vmerge's SEW, then we can't fold. |
3725 | if (True.getSimpleValueType() != N->getSimpleValueType(ResNo: 0)) |
3726 | return false; |
3727 | |
3728 | // We require that either merge and false are the same, or that merge |
3729 | // is undefined. |
3730 | if (Merge != False && !isImplicitDef(V: Merge)) |
3731 | return false; |
3732 | |
3733 | assert(True.getResNo() == 0 && |
3734 | "Expect True is the first output of an instruction." ); |
3735 | |
3736 | // Need N is the exactly one using True. |
3737 | if (!True.hasOneUse()) |
3738 | return false; |
3739 | |
3740 | if (!True.isMachineOpcode()) |
3741 | return false; |
3742 | |
3743 | unsigned TrueOpc = True.getMachineOpcode(); |
3744 | const MCInstrDesc &TrueMCID = TII->get(Opcode: TrueOpc); |
3745 | uint64_t TrueTSFlags = TrueMCID.TSFlags; |
3746 | bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(Desc: TrueMCID); |
3747 | |
3748 | bool IsMasked = false; |
3749 | const RISCV::RISCVMaskedPseudoInfo *Info = |
3750 | RISCV::lookupMaskedIntrinsicByUnmasked(UnmaskedPseudo: TrueOpc); |
3751 | if (!Info && HasTiedDest) { |
3752 | Info = RISCV::getMaskedPseudoInfo(MaskedPseudo: TrueOpc); |
3753 | IsMasked = true; |
3754 | } |
3755 | assert(!(IsMasked && !HasTiedDest) && "Expected tied dest" ); |
3756 | |
3757 | if (!Info) |
3758 | return false; |
3759 | |
3760 | // If True has a merge operand then it needs to be the same as vmerge's False, |
3761 | // since False will be used for the result's merge operand. |
3762 | if (HasTiedDest && !isImplicitDef(V: True->getOperand(Num: 0))) { |
3763 | SDValue MergeOpTrue = True->getOperand(Num: 0); |
3764 | if (False != MergeOpTrue) |
3765 | return false; |
3766 | } |
3767 | |
3768 | // If True is masked then the vmerge must have either the same mask or an all |
3769 | // 1s mask, since we're going to keep the mask from True. |
3770 | if (IsMasked && Mask) { |
3771 | // FIXME: Support mask agnostic True instruction which would have an |
3772 | // undef merge operand. |
3773 | SDValue TrueMask = |
3774 | getMaskSetter(MaskOp: True->getOperand(Num: Info->MaskOpIdx), |
3775 | GlueOp: True->getOperand(Num: True->getNumOperands() - 1)); |
3776 | assert(TrueMask); |
3777 | if (!usesAllOnesMask(MaskOp: Mask, GlueOp: Glue) && getMaskSetter(MaskOp: Mask, GlueOp: Glue) != TrueMask) |
3778 | return false; |
3779 | } |
3780 | |
3781 | // Skip if True has side effect. |
3782 | if (TII->get(Opcode: TrueOpc).hasUnmodeledSideEffects()) |
3783 | return false; |
3784 | |
3785 | // The last operand of a masked instruction may be glued. |
3786 | bool HasGlueOp = True->getGluedNode() != nullptr; |
3787 | |
3788 | // The chain operand may exist either before the glued operands or in the last |
3789 | // position. |
3790 | unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1; |
3791 | bool HasChainOp = |
3792 | True.getOperand(i: TrueChainOpIdx).getValueType() == MVT::Other; |
3793 | |
3794 | if (HasChainOp) { |
3795 | // Avoid creating cycles in the DAG. We must ensure that none of the other |
3796 | // operands depend on True through it's Chain. |
3797 | SmallVector<const SDNode *, 4> LoopWorklist; |
3798 | SmallPtrSet<const SDNode *, 16> Visited; |
3799 | LoopWorklist.push_back(Elt: False.getNode()); |
3800 | if (Mask) |
3801 | LoopWorklist.push_back(Elt: Mask.getNode()); |
3802 | LoopWorklist.push_back(Elt: VL.getNode()); |
3803 | if (Glue) |
3804 | LoopWorklist.push_back(Elt: Glue.getNode()); |
3805 | if (SDNode::hasPredecessorHelper(N: True.getNode(), Visited, Worklist&: LoopWorklist)) |
3806 | return false; |
3807 | } |
3808 | |
3809 | // The vector policy operand may be present for masked intrinsics |
3810 | bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags: TrueTSFlags); |
3811 | unsigned TrueVLIndex = |
3812 | True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; |
3813 | SDValue TrueVL = True.getOperand(i: TrueVLIndex); |
3814 | SDValue SEW = True.getOperand(i: TrueVLIndex + 1); |
3815 | |
3816 | auto GetMinVL = [](SDValue LHS, SDValue RHS) { |
3817 | if (LHS == RHS) |
3818 | return LHS; |
3819 | if (isAllOnesConstant(V: LHS)) |
3820 | return RHS; |
3821 | if (isAllOnesConstant(V: RHS)) |
3822 | return LHS; |
3823 | auto *CLHS = dyn_cast<ConstantSDNode>(Val&: LHS); |
3824 | auto *CRHS = dyn_cast<ConstantSDNode>(Val&: RHS); |
3825 | if (!CLHS || !CRHS) |
3826 | return SDValue(); |
3827 | return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS; |
3828 | }; |
3829 | |
3830 | // Because N and True must have the same merge operand (or True's operand is |
3831 | // implicit_def), the "effective" body is the minimum of their VLs. |
3832 | SDValue OrigVL = VL; |
3833 | VL = GetMinVL(TrueVL, VL); |
3834 | if (!VL) |
3835 | return false; |
3836 | |
3837 | // Some operations produce different elementwise results depending on the |
3838 | // active elements, like viota.m or vredsum. This transformation is illegal |
3839 | // for these if we change the active elements (i.e. mask or VL). |
3840 | if (Info->ActiveElementsAffectResult) { |
3841 | if (Mask && !usesAllOnesMask(MaskOp: Mask, GlueOp: Glue)) |
3842 | return false; |
3843 | if (TrueVL != VL) |
3844 | return false; |
3845 | } |
3846 | |
3847 | // If we end up changing the VL or mask of True, then we need to make sure it |
3848 | // doesn't raise any observable fp exceptions, since changing the active |
3849 | // elements will affect how fflags is set. |
3850 | if (TrueVL != VL || !IsMasked) |
3851 | if (mayRaiseFPException(Node: True.getNode()) && |
3852 | !True->getFlags().hasNoFPExcept()) |
3853 | return false; |
3854 | |
3855 | SDLoc DL(N); |
3856 | |
3857 | // From the preconditions we checked above, we know the mask and thus glue |
3858 | // for the result node will be taken from True. |
3859 | if (IsMasked) { |
3860 | Mask = True->getOperand(Num: Info->MaskOpIdx); |
3861 | Glue = True->getOperand(Num: True->getNumOperands() - 1); |
3862 | assert(Glue.getValueType() == MVT::Glue); |
3863 | } |
3864 | // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create |
3865 | // an all-ones mask to use. |
3866 | else if (IsVMv(N)) { |
3867 | unsigned TSFlags = TII->get(Opcode: N->getMachineOpcode()).TSFlags; |
3868 | unsigned VMSetOpc = GetVMSetForLMul(LMUL: RISCVII::getLMul(TSFlags)); |
3869 | ElementCount EC = N->getValueType(ResNo: 0).getVectorElementCount(); |
3870 | MVT MaskVT = MVT::getVectorVT(VT: MVT::i1, EC); |
3871 | |
3872 | SDValue AllOnesMask = |
3873 | SDValue(CurDAG->getMachineNode(Opcode: VMSetOpc, dl: DL, VT: MaskVT, Op1: VL, Op2: SEW), 0); |
3874 | SDValue MaskCopy = CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl: DL, |
3875 | Reg: RISCV::V0, N: AllOnesMask, Glue: SDValue()); |
3876 | Mask = CurDAG->getRegister(Reg: RISCV::V0, VT: MaskVT); |
3877 | Glue = MaskCopy.getValue(R: 1); |
3878 | } |
3879 | |
3880 | unsigned MaskedOpc = Info->MaskedPseudo; |
3881 | #ifndef NDEBUG |
3882 | const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc); |
3883 | assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) && |
3884 | "Expected instructions with mask have policy operand." ); |
3885 | assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(), |
3886 | MCOI::TIED_TO) == 0 && |
3887 | "Expected instructions with mask have a tied dest." ); |
3888 | #endif |
3889 | |
3890 | // Use a tumu policy, relaxing it to tail agnostic provided that the merge |
3891 | // operand is undefined. |
3892 | // |
3893 | // However, if the VL became smaller than what the vmerge had originally, then |
3894 | // elements past VL that were previously in the vmerge's body will have moved |
3895 | // to the tail. In that case we always need to use tail undisturbed to |
3896 | // preserve them. |
3897 | bool MergeVLShrunk = VL != OrigVL; |
3898 | uint64_t Policy = (isImplicitDef(V: Merge) && !MergeVLShrunk) |
3899 | ? RISCVII::TAIL_AGNOSTIC |
3900 | : /*TUMU*/ 0; |
3901 | SDValue PolicyOp = |
3902 | CurDAG->getTargetConstant(Val: Policy, DL, VT: Subtarget->getXLenVT()); |
3903 | |
3904 | |
3905 | SmallVector<SDValue, 8> Ops; |
3906 | Ops.push_back(Elt: False); |
3907 | |
3908 | const bool HasRoundingMode = RISCVII::hasRoundModeOp(TSFlags: TrueTSFlags); |
3909 | const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode; |
3910 | assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx); |
3911 | Ops.append(in_start: True->op_begin() + HasTiedDest, in_end: True->op_begin() + NormalOpsEnd); |
3912 | |
3913 | Ops.push_back(Elt: Mask); |
3914 | |
3915 | // For unmasked "VOp" with rounding mode operand, that is interfaces like |
3916 | // (..., rm, vl) or (..., rm, vl, policy). |
3917 | // Its masked version is (..., vm, rm, vl, policy). |
3918 | // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td |
3919 | if (HasRoundingMode) |
3920 | Ops.push_back(Elt: True->getOperand(Num: TrueVLIndex - 1)); |
3921 | |
3922 | Ops.append(IL: {VL, SEW, PolicyOp}); |
3923 | |
3924 | // Result node should have chain operand of True. |
3925 | if (HasChainOp) |
3926 | Ops.push_back(Elt: True.getOperand(i: TrueChainOpIdx)); |
3927 | |
3928 | // Add the glue for the CopyToReg of mask->v0. |
3929 | Ops.push_back(Elt: Glue); |
3930 | |
3931 | MachineSDNode *Result = |
3932 | CurDAG->getMachineNode(Opcode: MaskedOpc, dl: DL, VTs: True->getVTList(), Ops); |
3933 | Result->setFlags(True->getFlags()); |
3934 | |
3935 | if (!cast<MachineSDNode>(Val&: True)->memoperands_empty()) |
3936 | CurDAG->setNodeMemRefs(N: Result, NewMemRefs: cast<MachineSDNode>(Val&: True)->memoperands()); |
3937 | |
3938 | // Replace vmerge.vvm node by Result. |
3939 | ReplaceUses(F: SDValue(N, 0), T: SDValue(Result, 0)); |
3940 | |
3941 | // Replace another value of True. E.g. chain and VL. |
3942 | for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx) |
3943 | ReplaceUses(F: True.getValue(R: Idx), T: SDValue(Result, Idx)); |
3944 | |
3945 | return true; |
3946 | } |
3947 | |
3948 | bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { |
3949 | bool MadeChange = false; |
3950 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); |
3951 | |
3952 | while (Position != CurDAG->allnodes_begin()) { |
3953 | SDNode *N = &*--Position; |
3954 | if (N->use_empty() || !N->isMachineOpcode()) |
3955 | continue; |
3956 | |
3957 | if (IsVMerge(N) || IsVMv(N)) |
3958 | MadeChange |= performCombineVMergeAndVOps(N); |
3959 | } |
3960 | return MadeChange; |
3961 | } |
3962 | |
3963 | /// If our passthru is an implicit_def, use noreg instead. This side |
3964 | /// steps issues with MachineCSE not being able to CSE expressions with |
3965 | /// IMPLICIT_DEF operands while preserving the semantic intent. See |
3966 | /// pr64282 for context. Note that this transform is the last one |
3967 | /// performed at ISEL DAG to DAG. |
3968 | bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() { |
3969 | bool MadeChange = false; |
3970 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); |
3971 | |
3972 | while (Position != CurDAG->allnodes_begin()) { |
3973 | SDNode *N = &*--Position; |
3974 | if (N->use_empty() || !N->isMachineOpcode()) |
3975 | continue; |
3976 | |
3977 | const unsigned Opc = N->getMachineOpcode(); |
3978 | if (!RISCVVPseudosTable::getPseudoInfo(Pseudo: Opc) || |
3979 | !RISCVII::isFirstDefTiedToFirstUse(Desc: TII->get(Opcode: Opc)) || |
3980 | !isImplicitDef(V: N->getOperand(Num: 0))) |
3981 | continue; |
3982 | |
3983 | SmallVector<SDValue> Ops; |
3984 | Ops.push_back(Elt: CurDAG->getRegister(Reg: RISCV::NoRegister, VT: N->getValueType(ResNo: 0))); |
3985 | for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) { |
3986 | SDValue Op = N->getOperand(Num: I); |
3987 | Ops.push_back(Elt: Op); |
3988 | } |
3989 | |
3990 | MachineSDNode *Result = |
3991 | CurDAG->getMachineNode(Opcode: Opc, dl: SDLoc(N), VTs: N->getVTList(), Ops); |
3992 | Result->setFlags(N->getFlags()); |
3993 | CurDAG->setNodeMemRefs(N: Result, NewMemRefs: cast<MachineSDNode>(Val: N)->memoperands()); |
3994 | ReplaceUses(F: N, T: Result); |
3995 | MadeChange = true; |
3996 | } |
3997 | return MadeChange; |
3998 | } |
3999 | |
4000 | |
4001 | // This pass converts a legalized DAG into a RISCV-specific DAG, ready |
4002 | // for instruction scheduling. |
4003 | FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, |
4004 | CodeGenOptLevel OptLevel) { |
4005 | return new RISCVDAGToDAGISelLegacy(TM, OptLevel); |
4006 | } |
4007 | |
4008 | char RISCVDAGToDAGISelLegacy::ID = 0; |
4009 | |
4010 | RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TM, |
4011 | CodeGenOptLevel OptLevel) |
4012 | : SelectionDAGISelLegacy( |
4013 | ID, std::make_unique<RISCVDAGToDAGISel>(args&: TM, args&: OptLevel)) {} |
4014 | |
4015 | INITIALIZE_PASS(RISCVDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false) |
4016 | |