1 | //===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that RISC-V uses to lower LLVM code into a |
10 | // selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "RISCVISelLowering.h" |
15 | #include "MCTargetDesc/RISCVMatInt.h" |
16 | #include "RISCV.h" |
17 | #include "RISCVMachineFunctionInfo.h" |
18 | #include "RISCVRegisterInfo.h" |
19 | #include "RISCVSubtarget.h" |
20 | #include "RISCVTargetMachine.h" |
21 | #include "llvm/ADT/SmallSet.h" |
22 | #include "llvm/ADT/Statistic.h" |
23 | #include "llvm/Analysis/MemoryLocation.h" |
24 | #include "llvm/Analysis/VectorUtils.h" |
25 | #include "llvm/CodeGen/Analysis.h" |
26 | #include "llvm/CodeGen/MachineFrameInfo.h" |
27 | #include "llvm/CodeGen/MachineFunction.h" |
28 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
29 | #include "llvm/CodeGen/MachineJumpTableInfo.h" |
30 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
31 | #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" |
32 | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" |
33 | #include "llvm/CodeGen/ValueTypes.h" |
34 | #include "llvm/IR/DiagnosticInfo.h" |
35 | #include "llvm/IR/DiagnosticPrinter.h" |
36 | #include "llvm/IR/IRBuilder.h" |
37 | #include "llvm/IR/Instructions.h" |
38 | #include "llvm/IR/IntrinsicsRISCV.h" |
39 | #include "llvm/IR/PatternMatch.h" |
40 | #include "llvm/Support/CommandLine.h" |
41 | #include "llvm/Support/Debug.h" |
42 | #include "llvm/Support/ErrorHandling.h" |
43 | #include "llvm/Support/InstructionCost.h" |
44 | #include "llvm/Support/KnownBits.h" |
45 | #include "llvm/Support/MathExtras.h" |
46 | #include "llvm/Support/raw_ostream.h" |
47 | #include <optional> |
48 | |
49 | using namespace llvm; |
50 | |
51 | #define DEBUG_TYPE "riscv-lower" |
52 | |
53 | STATISTIC(NumTailCalls, "Number of tail calls" ); |
54 | |
55 | static cl::opt<unsigned> ExtensionMaxWebSize( |
56 | DEBUG_TYPE "-ext-max-web-size" , cl::Hidden, |
57 | cl::desc("Give the maximum size (in number of nodes) of the web of " |
58 | "instructions that we will consider for VW expansion" ), |
59 | cl::init(Val: 18)); |
60 | |
61 | static cl::opt<bool> |
62 | AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat" , cl::Hidden, |
63 | cl::desc("Allow the formation of VW_W operations (e.g., " |
64 | "VWADD_W) with splat constants" ), |
65 | cl::init(Val: false)); |
66 | |
67 | static cl::opt<unsigned> NumRepeatedDivisors( |
68 | DEBUG_TYPE "-fp-repeated-divisors" , cl::Hidden, |
69 | cl::desc("Set the minimum number of repetitions of a divisor to allow " |
70 | "transformation to multiplications by the reciprocal" ), |
71 | cl::init(Val: 2)); |
72 | |
73 | static cl::opt<int> |
74 | FPImmCost(DEBUG_TYPE "-fpimm-cost" , cl::Hidden, |
75 | cl::desc("Give the maximum number of instructions that we will " |
76 | "use for creating a floating-point immediate value" ), |
77 | cl::init(Val: 2)); |
78 | |
79 | static cl::opt<bool> |
80 | RV64LegalI32("riscv-experimental-rv64-legal-i32" , cl::ReallyHidden, |
81 | cl::desc("Make i32 a legal type for SelectionDAG on RV64." )); |
82 | |
83 | RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, |
84 | const RISCVSubtarget &STI) |
85 | : TargetLowering(TM), Subtarget(STI) { |
86 | |
87 | RISCVABI::ABI ABI = Subtarget.getTargetABI(); |
88 | assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI" ); |
89 | |
90 | if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && |
91 | !Subtarget.hasStdExtF()) { |
92 | errs() << "Hard-float 'f' ABI can't be used for a target that " |
93 | "doesn't support the F instruction set extension (ignoring " |
94 | "target-abi)\n" ; |
95 | ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; |
96 | } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && |
97 | !Subtarget.hasStdExtD()) { |
98 | errs() << "Hard-float 'd' ABI can't be used for a target that " |
99 | "doesn't support the D instruction set extension (ignoring " |
100 | "target-abi)\n" ; |
101 | ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; |
102 | } |
103 | |
104 | switch (ABI) { |
105 | default: |
106 | report_fatal_error(reason: "Don't know how to lower this ABI" ); |
107 | case RISCVABI::ABI_ILP32: |
108 | case RISCVABI::ABI_ILP32E: |
109 | case RISCVABI::ABI_LP64E: |
110 | case RISCVABI::ABI_ILP32F: |
111 | case RISCVABI::ABI_ILP32D: |
112 | case RISCVABI::ABI_LP64: |
113 | case RISCVABI::ABI_LP64F: |
114 | case RISCVABI::ABI_LP64D: |
115 | break; |
116 | } |
117 | |
118 | MVT XLenVT = Subtarget.getXLenVT(); |
119 | |
120 | // Set up the register classes. |
121 | addRegisterClass(VT: XLenVT, RC: &RISCV::GPRRegClass); |
122 | if (Subtarget.is64Bit() && RV64LegalI32) |
123 | addRegisterClass(VT: MVT::i32, RC: &RISCV::GPRRegClass); |
124 | |
125 | if (Subtarget.hasStdExtZfhmin()) |
126 | addRegisterClass(VT: MVT::f16, RC: &RISCV::FPR16RegClass); |
127 | if (Subtarget.hasStdExtZfbfmin()) |
128 | addRegisterClass(VT: MVT::bf16, RC: &RISCV::FPR16RegClass); |
129 | if (Subtarget.hasStdExtF()) |
130 | addRegisterClass(VT: MVT::f32, RC: &RISCV::FPR32RegClass); |
131 | if (Subtarget.hasStdExtD()) |
132 | addRegisterClass(VT: MVT::f64, RC: &RISCV::FPR64RegClass); |
133 | if (Subtarget.hasStdExtZhinxmin()) |
134 | addRegisterClass(VT: MVT::f16, RC: &RISCV::GPRF16RegClass); |
135 | if (Subtarget.hasStdExtZfinx()) |
136 | addRegisterClass(VT: MVT::f32, RC: &RISCV::GPRF32RegClass); |
137 | if (Subtarget.hasStdExtZdinx()) { |
138 | if (Subtarget.is64Bit()) |
139 | addRegisterClass(VT: MVT::f64, RC: &RISCV::GPRRegClass); |
140 | else |
141 | addRegisterClass(VT: MVT::f64, RC: &RISCV::GPRPairRegClass); |
142 | } |
143 | |
144 | static const MVT::SimpleValueType BoolVecVTs[] = { |
145 | MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, |
146 | MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; |
147 | static const MVT::SimpleValueType IntVecVTs[] = { |
148 | MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, |
149 | MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, |
150 | MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, |
151 | MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, |
152 | MVT::nxv4i64, MVT::nxv8i64}; |
153 | static const MVT::SimpleValueType F16VecVTs[] = { |
154 | MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, |
155 | MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; |
156 | static const MVT::SimpleValueType BF16VecVTs[] = { |
157 | MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16, |
158 | MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16}; |
159 | static const MVT::SimpleValueType F32VecVTs[] = { |
160 | MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; |
161 | static const MVT::SimpleValueType F64VecVTs[] = { |
162 | MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; |
163 | |
164 | if (Subtarget.hasVInstructions()) { |
165 | auto addRegClassForRVV = [this](MVT VT) { |
166 | // Disable the smallest fractional LMUL types if ELEN is less than |
167 | // RVVBitsPerBlock. |
168 | unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen(); |
169 | if (VT.getVectorMinNumElements() < MinElts) |
170 | return; |
171 | |
172 | unsigned Size = VT.getSizeInBits().getKnownMinValue(); |
173 | const TargetRegisterClass *RC; |
174 | if (Size <= RISCV::RVVBitsPerBlock) |
175 | RC = &RISCV::VRRegClass; |
176 | else if (Size == 2 * RISCV::RVVBitsPerBlock) |
177 | RC = &RISCV::VRM2RegClass; |
178 | else if (Size == 4 * RISCV::RVVBitsPerBlock) |
179 | RC = &RISCV::VRM4RegClass; |
180 | else if (Size == 8 * RISCV::RVVBitsPerBlock) |
181 | RC = &RISCV::VRM8RegClass; |
182 | else |
183 | llvm_unreachable("Unexpected size" ); |
184 | |
185 | addRegisterClass(VT, RC); |
186 | }; |
187 | |
188 | for (MVT VT : BoolVecVTs) |
189 | addRegClassForRVV(VT); |
190 | for (MVT VT : IntVecVTs) { |
191 | if (VT.getVectorElementType() == MVT::i64 && |
192 | !Subtarget.hasVInstructionsI64()) |
193 | continue; |
194 | addRegClassForRVV(VT); |
195 | } |
196 | |
197 | if (Subtarget.hasVInstructionsF16Minimal()) |
198 | for (MVT VT : F16VecVTs) |
199 | addRegClassForRVV(VT); |
200 | |
201 | if (Subtarget.hasVInstructionsBF16()) |
202 | for (MVT VT : BF16VecVTs) |
203 | addRegClassForRVV(VT); |
204 | |
205 | if (Subtarget.hasVInstructionsF32()) |
206 | for (MVT VT : F32VecVTs) |
207 | addRegClassForRVV(VT); |
208 | |
209 | if (Subtarget.hasVInstructionsF64()) |
210 | for (MVT VT : F64VecVTs) |
211 | addRegClassForRVV(VT); |
212 | |
213 | if (Subtarget.useRVVForFixedLengthVectors()) { |
214 | auto addRegClassForFixedVectors = [this](MVT VT) { |
215 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
216 | unsigned RCID = getRegClassIDForVecVT(VT: ContainerVT); |
217 | const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo(); |
218 | addRegisterClass(VT, RC: TRI.getRegClass(i: RCID)); |
219 | }; |
220 | for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) |
221 | if (useRVVForFixedLengthVectorVT(VT)) |
222 | addRegClassForFixedVectors(VT); |
223 | |
224 | for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) |
225 | if (useRVVForFixedLengthVectorVT(VT)) |
226 | addRegClassForFixedVectors(VT); |
227 | } |
228 | } |
229 | |
230 | // Compute derived properties from the register classes. |
231 | computeRegisterProperties(TRI: STI.getRegisterInfo()); |
232 | |
233 | setStackPointerRegisterToSaveRestore(RISCV::X2); |
234 | |
235 | setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: XLenVT, |
236 | MemVT: MVT::i1, Action: Promote); |
237 | // DAGCombiner can call isLoadExtLegal for types that aren't legal. |
238 | setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: MVT::i32, |
239 | MemVT: MVT::i1, Action: Promote); |
240 | |
241 | // TODO: add all necessary setOperationAction calls. |
242 | setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: XLenVT, Action: Expand); |
243 | |
244 | setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand); |
245 | setOperationAction(Op: ISD::BR_CC, VT: XLenVT, Action: Expand); |
246 | if (RV64LegalI32 && Subtarget.is64Bit()) |
247 | setOperationAction(Op: ISD::BR_CC, VT: MVT::i32, Action: Expand); |
248 | setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Custom); |
249 | setOperationAction(Op: ISD::SELECT_CC, VT: XLenVT, Action: Expand); |
250 | if (RV64LegalI32 && Subtarget.is64Bit()) |
251 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i32, Action: Expand); |
252 | |
253 | if (!Subtarget.hasVendorXCValu()) |
254 | setCondCodeAction(CCs: ISD::SETLE, VT: XLenVT, Action: Expand); |
255 | setCondCodeAction(CCs: ISD::SETGT, VT: XLenVT, Action: Custom); |
256 | setCondCodeAction(CCs: ISD::SETGE, VT: XLenVT, Action: Expand); |
257 | if (!Subtarget.hasVendorXCValu()) |
258 | setCondCodeAction(CCs: ISD::SETULE, VT: XLenVT, Action: Expand); |
259 | setCondCodeAction(CCs: ISD::SETUGT, VT: XLenVT, Action: Custom); |
260 | setCondCodeAction(CCs: ISD::SETUGE, VT: XLenVT, Action: Expand); |
261 | |
262 | if (RV64LegalI32 && Subtarget.is64Bit()) |
263 | setOperationAction(Op: ISD::SETCC, VT: MVT::i32, Action: Promote); |
264 | |
265 | setOperationAction(Ops: {ISD::STACKSAVE, ISD::STACKRESTORE}, VT: MVT::Other, Action: Expand); |
266 | |
267 | setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom); |
268 | setOperationAction(Ops: {ISD::VAARG, ISD::VACOPY, ISD::VAEND}, VT: MVT::Other, Action: Expand); |
269 | if (RV64LegalI32 && Subtarget.is64Bit()) |
270 | setOperationAction(Op: ISD::VAARG, VT: MVT::i32, Action: Promote); |
271 | |
272 | setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand); |
273 | |
274 | setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom); |
275 | |
276 | if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb()) |
277 | setOperationAction(Ops: ISD::SIGN_EXTEND_INREG, VTs: {MVT::i8, MVT::i16}, Action: Expand); |
278 | |
279 | if (Subtarget.is64Bit()) { |
280 | setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i64, Action: Custom); |
281 | |
282 | if (!RV64LegalI32) { |
283 | setOperationAction(Op: ISD::LOAD, VT: MVT::i32, Action: Custom); |
284 | setOperationAction(Ops: {ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL}, |
285 | VT: MVT::i32, Action: Custom); |
286 | setOperationAction(Ops: {ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT}, |
287 | VT: MVT::i32, Action: Custom); |
288 | if (!Subtarget.hasStdExtZbb()) |
289 | setOperationAction(Ops: {ISD::SADDSAT, ISD::SSUBSAT}, VT: MVT::i32, Action: Custom); |
290 | } else { |
291 | setOperationAction(Op: ISD::SSUBO, VT: MVT::i32, Action: Custom); |
292 | if (Subtarget.hasStdExtZbb()) { |
293 | setOperationAction(Ops: {ISD::SADDSAT, ISD::SSUBSAT}, VT: MVT::i32, Action: Custom); |
294 | setOperationAction(Ops: {ISD::UADDSAT, ISD::USUBSAT}, VT: MVT::i32, Action: Custom); |
295 | } |
296 | } |
297 | setOperationAction(Op: ISD::SADDO, VT: MVT::i32, Action: Custom); |
298 | } |
299 | if (!Subtarget.hasStdExtZmmul()) { |
300 | setOperationAction(Ops: {ISD::MUL, ISD::MULHS, ISD::MULHU}, VT: XLenVT, Action: Expand); |
301 | if (RV64LegalI32 && Subtarget.is64Bit()) |
302 | setOperationAction(Op: ISD::MUL, VT: MVT::i32, Action: Promote); |
303 | } else if (Subtarget.is64Bit()) { |
304 | setOperationAction(Op: ISD::MUL, VT: MVT::i128, Action: Custom); |
305 | if (!RV64LegalI32) |
306 | setOperationAction(Op: ISD::MUL, VT: MVT::i32, Action: Custom); |
307 | else |
308 | setOperationAction(Op: ISD::SMULO, VT: MVT::i32, Action: Custom); |
309 | } else { |
310 | setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Custom); |
311 | } |
312 | |
313 | if (!Subtarget.hasStdExtM()) { |
314 | setOperationAction(Ops: {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, |
315 | VT: XLenVT, Action: Expand); |
316 | if (RV64LegalI32 && Subtarget.is64Bit()) |
317 | setOperationAction(Ops: {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, VT: MVT::i32, |
318 | Action: Promote); |
319 | } else if (Subtarget.is64Bit()) { |
320 | if (!RV64LegalI32) |
321 | setOperationAction(Ops: {ISD::SDIV, ISD::UDIV, ISD::UREM}, |
322 | VTs: {MVT::i8, MVT::i16, MVT::i32}, Action: Custom); |
323 | } |
324 | |
325 | if (RV64LegalI32 && Subtarget.is64Bit()) { |
326 | setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT: MVT::i32, Action: Expand); |
327 | setOperationAction( |
328 | Ops: {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: MVT::i32, |
329 | Action: Expand); |
330 | } |
331 | |
332 | setOperationAction( |
333 | Ops: {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: XLenVT, |
334 | Action: Expand); |
335 | |
336 | setOperationAction(Ops: {ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, VT: XLenVT, |
337 | Action: Custom); |
338 | |
339 | if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) { |
340 | if (!RV64LegalI32 && Subtarget.is64Bit()) |
341 | setOperationAction(Ops: {ISD::ROTL, ISD::ROTR}, VT: MVT::i32, Action: Custom); |
342 | } else if (Subtarget.hasVendorXTHeadBb()) { |
343 | if (Subtarget.is64Bit()) |
344 | setOperationAction(Ops: {ISD::ROTL, ISD::ROTR}, VT: MVT::i32, Action: Custom); |
345 | setOperationAction(Ops: {ISD::ROTL, ISD::ROTR}, VT: XLenVT, Action: Custom); |
346 | } else if (Subtarget.hasVendorXCVbitmanip()) { |
347 | setOperationAction(Op: ISD::ROTL, VT: XLenVT, Action: Expand); |
348 | } else { |
349 | setOperationAction(Ops: {ISD::ROTL, ISD::ROTR}, VT: XLenVT, Action: Expand); |
350 | if (RV64LegalI32 && Subtarget.is64Bit()) |
351 | setOperationAction(Ops: {ISD::ROTL, ISD::ROTR}, VT: MVT::i32, Action: Expand); |
352 | } |
353 | |
354 | // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll |
355 | // pattern match it directly in isel. |
356 | setOperationAction(Op: ISD::BSWAP, VT: XLenVT, |
357 | Action: (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || |
358 | Subtarget.hasVendorXTHeadBb()) |
359 | ? Legal |
360 | : Expand); |
361 | if (RV64LegalI32 && Subtarget.is64Bit()) |
362 | setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, |
363 | Action: (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || |
364 | Subtarget.hasVendorXTHeadBb()) |
365 | ? Promote |
366 | : Expand); |
367 | |
368 | |
369 | if (Subtarget.hasVendorXCVbitmanip()) { |
370 | setOperationAction(Op: ISD::BITREVERSE, VT: XLenVT, Action: Legal); |
371 | } else { |
372 | // Zbkb can use rev8+brev8 to implement bitreverse. |
373 | setOperationAction(Op: ISD::BITREVERSE, VT: XLenVT, |
374 | Action: Subtarget.hasStdExtZbkb() ? Custom : Expand); |
375 | } |
376 | |
377 | if (Subtarget.hasStdExtZbb()) { |
378 | setOperationAction(Ops: {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT: XLenVT, |
379 | Action: Legal); |
380 | if (RV64LegalI32 && Subtarget.is64Bit()) |
381 | setOperationAction(Ops: {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT: MVT::i32, |
382 | Action: Promote); |
383 | |
384 | if (Subtarget.is64Bit()) { |
385 | if (RV64LegalI32) |
386 | setOperationAction(Op: ISD::CTTZ, VT: MVT::i32, Action: Legal); |
387 | else |
388 | setOperationAction(Ops: {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, VT: MVT::i32, Action: Custom); |
389 | } |
390 | } else if (!Subtarget.hasVendorXCVbitmanip()) { |
391 | setOperationAction(Ops: {ISD::CTTZ, ISD::CTPOP}, VT: XLenVT, Action: Expand); |
392 | if (RV64LegalI32 && Subtarget.is64Bit()) |
393 | setOperationAction(Ops: {ISD::CTTZ, ISD::CTPOP}, VT: MVT::i32, Action: Expand); |
394 | } |
395 | |
396 | if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || |
397 | Subtarget.hasVendorXCVbitmanip()) { |
398 | // We need the custom lowering to make sure that the resulting sequence |
399 | // for the 32bit case is efficient on 64bit targets. |
400 | if (Subtarget.is64Bit()) { |
401 | if (RV64LegalI32) { |
402 | setOperationAction(Op: ISD::CTLZ, VT: MVT::i32, |
403 | Action: Subtarget.hasStdExtZbb() ? Legal : Promote); |
404 | if (!Subtarget.hasStdExtZbb()) |
405 | setOperationAction(Op: ISD::CTLZ_ZERO_UNDEF, VT: MVT::i32, Action: Promote); |
406 | } else |
407 | setOperationAction(Ops: {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, VT: MVT::i32, Action: Custom); |
408 | } |
409 | } else { |
410 | setOperationAction(Op: ISD::CTLZ, VT: XLenVT, Action: Expand); |
411 | if (RV64LegalI32 && Subtarget.is64Bit()) |
412 | setOperationAction(Op: ISD::CTLZ, VT: MVT::i32, Action: Expand); |
413 | } |
414 | |
415 | if (!RV64LegalI32 && Subtarget.is64Bit() && |
416 | !Subtarget.hasShortForwardBranchOpt()) |
417 | setOperationAction(Op: ISD::ABS, VT: MVT::i32, Action: Custom); |
418 | |
419 | // We can use PseudoCCSUB to implement ABS. |
420 | if (Subtarget.hasShortForwardBranchOpt()) |
421 | setOperationAction(Op: ISD::ABS, VT: XLenVT, Action: Legal); |
422 | |
423 | if (!Subtarget.hasVendorXTHeadCondMov()) { |
424 | setOperationAction(Op: ISD::SELECT, VT: XLenVT, Action: Custom); |
425 | if (RV64LegalI32 && Subtarget.is64Bit()) |
426 | setOperationAction(Op: ISD::SELECT, VT: MVT::i32, Action: Promote); |
427 | } |
428 | |
429 | static const unsigned FPLegalNodeTypes[] = { |
430 | ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT, |
431 | ISD::LLRINT, ISD::LROUND, ISD::LLROUND, |
432 | ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND, |
433 | ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD, |
434 | ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, |
435 | ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS}; |
436 | |
437 | static const ISD::CondCode FPCCToExpand[] = { |
438 | ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, |
439 | ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, |
440 | ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; |
441 | |
442 | static const unsigned FPOpToExpand[] = { |
443 | ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, |
444 | ISD::FREM}; |
445 | |
446 | static const unsigned FPRndMode[] = { |
447 | ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND, |
448 | ISD::FROUNDEVEN}; |
449 | |
450 | if (Subtarget.hasStdExtZfhminOrZhinxmin()) |
451 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i16, Action: Custom); |
452 | |
453 | static const unsigned ZfhminZfbfminPromoteOps[] = { |
454 | ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, |
455 | ISD::FSUB, ISD::FMUL, ISD::FMA, |
456 | ISD::FDIV, ISD::FSQRT, ISD::FABS, |
457 | ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD, |
458 | ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, |
459 | ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, |
460 | ISD::SETCC, ISD::FCEIL, ISD::FFLOOR, |
461 | ISD::FTRUNC, ISD::FRINT, ISD::FROUND, |
462 | ISD::FROUNDEVEN, ISD::SELECT}; |
463 | |
464 | if (Subtarget.hasStdExtZfbfmin()) { |
465 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i16, Action: Custom); |
466 | setOperationAction(Op: ISD::BITCAST, VT: MVT::bf16, Action: Custom); |
467 | setOperationAction(Op: ISD::FP_ROUND, VT: MVT::bf16, Action: Custom); |
468 | setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::f32, Action: Custom); |
469 | setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::f64, Action: Custom); |
470 | setOperationAction(Op: ISD::ConstantFP, VT: MVT::bf16, Action: Expand); |
471 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::bf16, Action: Expand); |
472 | setOperationAction(Op: ISD::BR_CC, VT: MVT::bf16, Action: Expand); |
473 | setOperationAction(Ops: ZfhminZfbfminPromoteOps, VT: MVT::bf16, Action: Promote); |
474 | setOperationAction(Op: ISD::FREM, VT: MVT::bf16, Action: Promote); |
475 | // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the |
476 | // DAGCombiner::visitFP_ROUND probably needs improvements first. |
477 | setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::bf16, Action: Expand); |
478 | } |
479 | |
480 | if (Subtarget.hasStdExtZfhminOrZhinxmin()) { |
481 | if (Subtarget.hasStdExtZfhOrZhinx()) { |
482 | setOperationAction(Ops: FPLegalNodeTypes, VT: MVT::f16, Action: Legal); |
483 | setOperationAction(Ops: FPRndMode, VT: MVT::f16, |
484 | Action: Subtarget.hasStdExtZfa() ? Legal : Custom); |
485 | setOperationAction(Op: ISD::SELECT, VT: MVT::f16, Action: Custom); |
486 | setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f16, Action: Custom); |
487 | } else { |
488 | setOperationAction(Ops: ZfhminZfbfminPromoteOps, VT: MVT::f16, Action: Promote); |
489 | setOperationAction(Ops: {ISD::STRICT_LRINT, ISD::STRICT_LLRINT, |
490 | ISD::STRICT_LROUND, ISD::STRICT_LLROUND}, |
491 | VT: MVT::f16, Action: Legal); |
492 | // FIXME: Need to promote f16 FCOPYSIGN to f32, but the |
493 | // DAGCombiner::visitFP_ROUND probably needs improvements first. |
494 | setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f16, Action: Expand); |
495 | } |
496 | |
497 | setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f16, Action: Legal); |
498 | setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f32, Action: Legal); |
499 | setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f16, Action: Expand); |
500 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f16, Action: Expand); |
501 | setOperationAction(Op: ISD::BR_CC, VT: MVT::f16, Action: Expand); |
502 | |
503 | setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f16, |
504 | Action: Subtarget.hasStdExtZfa() ? Legal : Promote); |
505 | setOperationAction(Ops: {ISD::FREM, ISD::FPOW, ISD::FPOWI, |
506 | ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP, |
507 | ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, |
508 | ISD::FLOG10}, |
509 | VT: MVT::f16, Action: Promote); |
510 | |
511 | // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have |
512 | // complete support for all operations in LegalizeDAG. |
513 | setOperationAction(Ops: {ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, |
514 | ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT, |
515 | ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN, |
516 | ISD::STRICT_FTRUNC}, |
517 | VT: MVT::f16, Action: Promote); |
518 | |
519 | // We need to custom promote this. |
520 | if (Subtarget.is64Bit()) |
521 | setOperationAction(Op: ISD::FPOWI, VT: MVT::i32, Action: Custom); |
522 | |
523 | setOperationAction(Ops: {ISD::FMAXIMUM, ISD::FMINIMUM}, VT: MVT::f16, |
524 | Action: Subtarget.hasStdExtZfa() ? Legal : Custom); |
525 | } |
526 | |
527 | if (Subtarget.hasStdExtFOrZfinx()) { |
528 | setOperationAction(Ops: FPLegalNodeTypes, VT: MVT::f32, Action: Legal); |
529 | setOperationAction(Ops: FPRndMode, VT: MVT::f32, |
530 | Action: Subtarget.hasStdExtZfa() ? Legal : Custom); |
531 | setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f32, Action: Expand); |
532 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand); |
533 | setOperationAction(Op: ISD::SELECT, VT: MVT::f32, Action: Custom); |
534 | setOperationAction(Op: ISD::BR_CC, VT: MVT::f32, Action: Expand); |
535 | setOperationAction(Ops: FPOpToExpand, VT: MVT::f32, Action: Expand); |
536 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand); |
537 | setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand); |
538 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand); |
539 | setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand); |
540 | setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Custom); |
541 | setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f32, Action: Custom); |
542 | setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f32, |
543 | Action: Subtarget.isSoftFPABI() ? LibCall : Custom); |
544 | setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32, Action: Custom); |
545 | setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32, Action: Custom); |
546 | |
547 | if (Subtarget.hasStdExtZfa()) { |
548 | setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f32, Action: Legal); |
549 | setOperationAction(Ops: {ISD::FMAXIMUM, ISD::FMINIMUM}, VT: MVT::f32, Action: Legal); |
550 | } else { |
551 | setOperationAction(Ops: {ISD::FMAXIMUM, ISD::FMINIMUM}, VT: MVT::f32, Action: Custom); |
552 | } |
553 | } |
554 | |
555 | if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit()) |
556 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom); |
557 | |
558 | if (Subtarget.hasStdExtDOrZdinx()) { |
559 | setOperationAction(Ops: FPLegalNodeTypes, VT: MVT::f64, Action: Legal); |
560 | |
561 | if (!Subtarget.is64Bit()) |
562 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom); |
563 | |
564 | if (Subtarget.hasStdExtZfa()) { |
565 | setOperationAction(Ops: FPRndMode, VT: MVT::f64, Action: Legal); |
566 | setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f64, Action: Legal); |
567 | setOperationAction(Ops: {ISD::FMAXIMUM, ISD::FMINIMUM}, VT: MVT::f64, Action: Legal); |
568 | } else { |
569 | if (Subtarget.is64Bit()) |
570 | setOperationAction(Ops: FPRndMode, VT: MVT::f64, Action: Custom); |
571 | |
572 | setOperationAction(Ops: {ISD::FMAXIMUM, ISD::FMINIMUM}, VT: MVT::f64, Action: Custom); |
573 | } |
574 | |
575 | setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f32, Action: Legal); |
576 | setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f64, Action: Legal); |
577 | setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f64, Action: Expand); |
578 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand); |
579 | setOperationAction(Op: ISD::SELECT, VT: MVT::f64, Action: Custom); |
580 | setOperationAction(Op: ISD::BR_CC, VT: MVT::f64, Action: Expand); |
581 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand); |
582 | setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand); |
583 | setOperationAction(Ops: FPOpToExpand, VT: MVT::f64, Action: Expand); |
584 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand); |
585 | setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand); |
586 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand); |
587 | setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand); |
588 | setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Custom); |
589 | setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f64, Action: Custom); |
590 | setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f64, |
591 | Action: Subtarget.isSoftFPABI() ? LibCall : Custom); |
592 | setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64, Action: Custom); |
593 | setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand); |
594 | } |
595 | |
596 | if (Subtarget.is64Bit()) { |
597 | setOperationAction(Ops: {ISD::FP_TO_UINT, ISD::FP_TO_SINT, |
598 | ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT}, |
599 | VT: MVT::i32, Action: Custom); |
600 | setOperationAction(Op: ISD::LROUND, VT: MVT::i32, Action: Custom); |
601 | } |
602 | |
603 | if (Subtarget.hasStdExtFOrZfinx()) { |
604 | setOperationAction(Ops: {ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, VT: XLenVT, |
605 | Action: Custom); |
606 | |
607 | setOperationAction(Ops: {ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT, |
608 | ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, |
609 | VT: XLenVT, Action: Legal); |
610 | |
611 | if (RV64LegalI32 && Subtarget.is64Bit()) |
612 | setOperationAction(Ops: {ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT, |
613 | ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, |
614 | VT: MVT::i32, Action: Legal); |
615 | |
616 | setOperationAction(Op: ISD::GET_ROUNDING, VT: XLenVT, Action: Custom); |
617 | setOperationAction(Op: ISD::SET_ROUNDING, VT: MVT::Other, Action: Custom); |
618 | } |
619 | |
620 | setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, |
621 | ISD::JumpTable}, |
622 | VT: XLenVT, Action: Custom); |
623 | |
624 | setOperationAction(Op: ISD::GlobalTLSAddress, VT: XLenVT, Action: Custom); |
625 | |
626 | if (Subtarget.is64Bit()) |
627 | setOperationAction(Op: ISD::Constant, VT: MVT::i64, Action: Custom); |
628 | |
629 | // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present. |
630 | // Unfortunately this can't be determined just from the ISA naming string. |
631 | setOperationAction(Op: ISD::READCYCLECOUNTER, VT: MVT::i64, |
632 | Action: Subtarget.is64Bit() ? Legal : Custom); |
633 | setOperationAction(Op: ISD::READSTEADYCOUNTER, VT: MVT::i64, |
634 | Action: Subtarget.is64Bit() ? Legal : Custom); |
635 | |
636 | setOperationAction(Ops: {ISD::TRAP, ISD::DEBUGTRAP}, VT: MVT::Other, Action: Legal); |
637 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom); |
638 | if (Subtarget.is64Bit()) |
639 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i32, Action: Custom); |
640 | |
641 | if (Subtarget.hasStdExtZicbop()) { |
642 | setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Legal); |
643 | } |
644 | |
645 | if (Subtarget.hasStdExtA()) { |
646 | setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); |
647 | if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) |
648 | setMinCmpXchgSizeInBits(8); |
649 | else |
650 | setMinCmpXchgSizeInBits(32); |
651 | } else if (Subtarget.hasForcedAtomics()) { |
652 | setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); |
653 | } else { |
654 | setMaxAtomicSizeInBitsSupported(0); |
655 | } |
656 | |
657 | setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom); |
658 | |
659 | setBooleanContents(ZeroOrOneBooleanContent); |
660 | |
661 | if (getTargetMachine().getTargetTriple().isOSLinux()) { |
662 | // Custom lowering of llvm.clear_cache. |
663 | setOperationAction(Op: ISD::CLEAR_CACHE, VT: MVT::Other, Action: Custom); |
664 | } |
665 | |
666 | if (Subtarget.hasVInstructions()) { |
667 | setBooleanVectorContents(ZeroOrOneBooleanContent); |
668 | |
669 | setOperationAction(Op: ISD::VSCALE, VT: XLenVT, Action: Custom); |
670 | if (RV64LegalI32 && Subtarget.is64Bit()) |
671 | setOperationAction(Op: ISD::VSCALE, VT: MVT::i32, Action: Custom); |
672 | |
673 | // RVV intrinsics may have illegal operands. |
674 | // We also need to custom legalize vmv.x.s. |
675 | setOperationAction(Ops: {ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN, |
676 | ISD::INTRINSIC_VOID}, |
677 | VTs: {MVT::i8, MVT::i16}, Action: Custom); |
678 | if (Subtarget.is64Bit()) |
679 | setOperationAction(Ops: {ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, |
680 | VT: MVT::i32, Action: Custom); |
681 | else |
682 | setOperationAction(Ops: {ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN}, |
683 | VT: MVT::i64, Action: Custom); |
684 | |
685 | setOperationAction(Ops: {ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, |
686 | VT: MVT::Other, Action: Custom); |
687 | |
688 | static const unsigned IntegerVPOps[] = { |
689 | ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, |
690 | ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM, |
691 | ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, |
692 | ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL, |
693 | ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, |
694 | ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX, |
695 | ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN, |
696 | ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT, |
697 | ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND, |
698 | ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN, |
699 | ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX, |
700 | ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE, |
701 | ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT, |
702 | ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF, |
703 | ISD::EXPERIMENTAL_VP_SPLAT}; |
704 | |
705 | static const unsigned FloatingPointVPOps[] = { |
706 | ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, |
707 | ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, |
708 | ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, |
709 | ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE, |
710 | ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP, |
711 | ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND, |
712 | ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM, |
713 | ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, |
714 | ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, |
715 | ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS, |
716 | ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT, |
717 | ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE, |
718 | ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM, |
719 | ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT}; |
720 | |
721 | static const unsigned IntegerVecReduceOps[] = { |
722 | ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, |
723 | ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, |
724 | ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN}; |
725 | |
726 | static const unsigned FloatingPointVecReduceOps[] = { |
727 | ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN, |
728 | ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM}; |
729 | |
730 | if (!Subtarget.is64Bit()) { |
731 | // We must custom-lower certain vXi64 operations on RV32 due to the vector |
732 | // element type being illegal. |
733 | setOperationAction(Ops: {ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, |
734 | VT: MVT::i64, Action: Custom); |
735 | |
736 | setOperationAction(Ops: IntegerVecReduceOps, VT: MVT::i64, Action: Custom); |
737 | |
738 | setOperationAction(Ops: {ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, |
739 | ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, |
740 | ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN, |
741 | ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN}, |
742 | VT: MVT::i64, Action: Custom); |
743 | } |
744 | |
745 | for (MVT VT : BoolVecVTs) { |
746 | if (!isTypeLegal(VT)) |
747 | continue; |
748 | |
749 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT, Action: Custom); |
750 | |
751 | // Mask VTs are custom-expanded into a series of standard nodes |
752 | setOperationAction(Ops: {ISD::TRUNCATE, ISD::CONCAT_VECTORS, |
753 | ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, |
754 | ISD::SCALAR_TO_VECTOR}, |
755 | VT, Action: Custom); |
756 | |
757 | setOperationAction(Ops: {ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, |
758 | Action: Custom); |
759 | |
760 | setOperationAction(Op: ISD::SELECT, VT, Action: Custom); |
761 | setOperationAction( |
762 | Ops: {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT, |
763 | Action: Expand); |
764 | |
765 | setOperationAction(Ops: {ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT, |
766 | Action: Custom); |
767 | |
768 | setOperationAction(Ops: {ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Action: Custom); |
769 | |
770 | setOperationAction( |
771 | Ops: {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, |
772 | Action: Custom); |
773 | |
774 | setOperationAction( |
775 | Ops: {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, |
776 | Action: Custom); |
777 | |
778 | // RVV has native int->float & float->int conversions where the |
779 | // element type sizes are within one power-of-two of each other. Any |
780 | // wider distances between type sizes have to be lowered as sequences |
781 | // which progressively narrow the gap in stages. |
782 | setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, |
783 | ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP, |
784 | ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT, |
785 | ISD::STRICT_FP_TO_UINT}, |
786 | VT, Action: Custom); |
787 | setOperationAction(Ops: {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, |
788 | Action: Custom); |
789 | |
790 | // Expand all extending loads to types larger than this, and truncating |
791 | // stores from types larger than this. |
792 | for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { |
793 | setTruncStoreAction(ValVT: VT, MemVT: OtherVT, Action: Expand); |
794 | setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: VT, |
795 | MemVT: OtherVT, Action: Expand); |
796 | } |
797 | |
798 | setOperationAction(Ops: {ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, |
799 | ISD::VP_TRUNCATE, ISD::VP_SETCC}, |
800 | VT, Action: Custom); |
801 | |
802 | setOperationAction(Op: ISD::VECTOR_DEINTERLEAVE, VT, Action: Custom); |
803 | setOperationAction(Op: ISD::VECTOR_INTERLEAVE, VT, Action: Custom); |
804 | |
805 | setOperationAction(Op: ISD::VECTOR_REVERSE, VT, Action: Custom); |
806 | |
807 | setOperationAction(Op: ISD::EXPERIMENTAL_VP_SPLICE, VT, Action: Custom); |
808 | setOperationAction(Op: ISD::EXPERIMENTAL_VP_REVERSE, VT, Action: Custom); |
809 | |
810 | setOperationPromotedToType( |
811 | Opc: ISD::VECTOR_SPLICE, OrigVT: VT, |
812 | DestVT: MVT::getVectorVT(VT: MVT::i8, EC: VT.getVectorElementCount())); |
813 | } |
814 | |
815 | for (MVT VT : IntVecVTs) { |
816 | if (!isTypeLegal(VT)) |
817 | continue; |
818 | |
819 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT, Action: Legal); |
820 | setOperationAction(Op: ISD::SPLAT_VECTOR_PARTS, VT, Action: Custom); |
821 | |
822 | // Vectors implement MULHS/MULHU. |
823 | setOperationAction(Ops: {ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Action: Expand); |
824 | |
825 | // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*. |
826 | if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) |
827 | setOperationAction(Ops: {ISD::MULHU, ISD::MULHS}, VT, Action: Expand); |
828 | |
829 | setOperationAction(Ops: {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT, |
830 | Action: Legal); |
831 | |
832 | setOperationAction(Ops: {ISD::ABDS, ISD::ABDU}, VT, Action: Custom); |
833 | |
834 | // Custom-lower extensions and truncations from/to mask types. |
835 | setOperationAction(Ops: {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, |
836 | VT, Action: Custom); |
837 | |
838 | // RVV has native int->float & float->int conversions where the |
839 | // element type sizes are within one power-of-two of each other. Any |
840 | // wider distances between type sizes have to be lowered as sequences |
841 | // which progressively narrow the gap in stages. |
842 | setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, |
843 | ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP, |
844 | ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT, |
845 | ISD::STRICT_FP_TO_UINT}, |
846 | VT, Action: Custom); |
847 | setOperationAction(Ops: {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, |
848 | Action: Custom); |
849 | setOperationAction(Ops: {ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS, |
850 | ISD::AVGCEILU, ISD::SADDSAT, ISD::UADDSAT, |
851 | ISD::SSUBSAT, ISD::USUBSAT}, |
852 | VT, Action: Legal); |
853 | |
854 | // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" |
855 | // nodes which truncate by one power of two at a time. |
856 | setOperationAction(Op: ISD::TRUNCATE, VT, Action: Custom); |
857 | |
858 | // Custom-lower insert/extract operations to simplify patterns. |
859 | setOperationAction(Ops: {ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, |
860 | Action: Custom); |
861 | |
862 | // Custom-lower reduction operations to set up the corresponding custom |
863 | // nodes' operands. |
864 | setOperationAction(Ops: IntegerVecReduceOps, VT, Action: Custom); |
865 | |
866 | setOperationAction(Ops: IntegerVPOps, VT, Action: Custom); |
867 | |
868 | setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Custom); |
869 | |
870 | setOperationAction(Ops: {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, |
871 | VT, Action: Custom); |
872 | |
873 | setOperationAction( |
874 | Ops: {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, |
875 | ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, |
876 | VT, Action: Custom); |
877 | |
878 | setOperationAction(Ops: {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, |
879 | ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, |
880 | VT, Action: Custom); |
881 | |
882 | setOperationAction(Op: ISD::SELECT, VT, Action: Custom); |
883 | setOperationAction(Op: ISD::SELECT_CC, VT, Action: Expand); |
884 | |
885 | setOperationAction(Ops: {ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Action: Custom); |
886 | |
887 | for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { |
888 | setTruncStoreAction(ValVT: VT, MemVT: OtherVT, Action: Expand); |
889 | setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: VT, |
890 | MemVT: OtherVT, Action: Expand); |
891 | } |
892 | |
893 | setOperationAction(Op: ISD::VECTOR_DEINTERLEAVE, VT, Action: Custom); |
894 | setOperationAction(Op: ISD::VECTOR_INTERLEAVE, VT, Action: Custom); |
895 | |
896 | // Splice |
897 | setOperationAction(Op: ISD::VECTOR_SPLICE, VT, Action: Custom); |
898 | |
899 | if (Subtarget.hasStdExtZvkb()) { |
900 | setOperationAction(Op: ISD::BSWAP, VT, Action: Legal); |
901 | setOperationAction(Op: ISD::VP_BSWAP, VT, Action: Custom); |
902 | } else { |
903 | setOperationAction(Ops: {ISD::BSWAP, ISD::VP_BSWAP}, VT, Action: Expand); |
904 | setOperationAction(Ops: {ISD::ROTL, ISD::ROTR}, VT, Action: Expand); |
905 | } |
906 | |
907 | if (Subtarget.hasStdExtZvbb()) { |
908 | setOperationAction(Op: ISD::BITREVERSE, VT, Action: Legal); |
909 | setOperationAction(Op: ISD::VP_BITREVERSE, VT, Action: Custom); |
910 | setOperationAction(Ops: {ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, |
911 | ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, |
912 | VT, Action: Custom); |
913 | } else { |
914 | setOperationAction(Ops: {ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Action: Expand); |
915 | setOperationAction(Ops: {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Action: Expand); |
916 | setOperationAction(Ops: {ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, |
917 | ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, |
918 | VT, Action: Expand); |
919 | |
920 | // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the |
921 | // range of f32. |
922 | EVT FloatVT = MVT::getVectorVT(VT: MVT::f32, EC: VT.getVectorElementCount()); |
923 | if (isTypeLegal(VT: FloatVT)) { |
924 | setOperationAction(Ops: {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, |
925 | ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ, |
926 | ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF}, |
927 | VT, Action: Custom); |
928 | } |
929 | } |
930 | } |
931 | |
932 | // Expand various CCs to best match the RVV ISA, which natively supports UNE |
933 | // but no other unordered comparisons, and supports all ordered comparisons |
934 | // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization |
935 | // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), |
936 | // and we pattern-match those back to the "original", swapping operands once |
937 | // more. This way we catch both operations and both "vf" and "fv" forms with |
938 | // fewer patterns. |
939 | static const ISD::CondCode VFPCCToExpand[] = { |
940 | ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, |
941 | ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, |
942 | ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, |
943 | }; |
944 | |
945 | // TODO: support more ops. |
946 | static const unsigned ZvfhminPromoteOps[] = { |
947 | ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB, |
948 | ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, |
949 | ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL, |
950 | ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT, |
951 | ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM, |
952 | ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, |
953 | ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}; |
954 | |
955 | // TODO: support more vp ops. |
956 | static const unsigned ZvfhminPromoteVPOps[] = { |
957 | ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, |
958 | ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, |
959 | ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, |
960 | ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT, |
961 | ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL, |
962 | ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN, |
963 | ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT, |
964 | ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM, |
965 | ISD::VP_FMAXIMUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM}; |
966 | |
967 | // Sets common operation actions on RVV floating-point vector types. |
968 | const auto SetCommonVFPActions = [&](MVT VT) { |
969 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT, Action: Legal); |
970 | // RVV has native FP_ROUND & FP_EXTEND conversions where the element type |
971 | // sizes are within one power-of-two of each other. Therefore conversions |
972 | // between vXf16 and vXf64 must be lowered as sequences which convert via |
973 | // vXf32. |
974 | setOperationAction(Ops: {ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Action: Custom); |
975 | setOperationAction(Ops: {ISD::LRINT, ISD::LLRINT}, VT, Action: Custom); |
976 | // Custom-lower insert/extract operations to simplify patterns. |
977 | setOperationAction(Ops: {ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, |
978 | Action: Custom); |
979 | // Expand various condition codes (explained above). |
980 | setCondCodeAction(CCs: VFPCCToExpand, VT, Action: Expand); |
981 | |
982 | setOperationAction(Ops: {ISD::FMINNUM, ISD::FMAXNUM}, VT, Action: Legal); |
983 | setOperationAction(Ops: {ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Action: Custom); |
984 | |
985 | setOperationAction(Ops: {ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, |
986 | ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, |
987 | ISD::IS_FPCLASS}, |
988 | VT, Action: Custom); |
989 | |
990 | setOperationAction(Ops: FloatingPointVecReduceOps, VT, Action: Custom); |
991 | |
992 | // Expand FP operations that need libcalls. |
993 | setOperationAction(Op: ISD::FREM, VT, Action: Expand); |
994 | setOperationAction(Op: ISD::FPOW, VT, Action: Expand); |
995 | setOperationAction(Op: ISD::FCOS, VT, Action: Expand); |
996 | setOperationAction(Op: ISD::FSIN, VT, Action: Expand); |
997 | setOperationAction(Op: ISD::FSINCOS, VT, Action: Expand); |
998 | setOperationAction(Op: ISD::FEXP, VT, Action: Expand); |
999 | setOperationAction(Op: ISD::FEXP2, VT, Action: Expand); |
1000 | setOperationAction(Op: ISD::FEXP10, VT, Action: Expand); |
1001 | setOperationAction(Op: ISD::FLOG, VT, Action: Expand); |
1002 | setOperationAction(Op: ISD::FLOG2, VT, Action: Expand); |
1003 | setOperationAction(Op: ISD::FLOG10, VT, Action: Expand); |
1004 | |
1005 | setOperationAction(Op: ISD::FCOPYSIGN, VT, Action: Legal); |
1006 | |
1007 | setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Custom); |
1008 | |
1009 | setOperationAction(Ops: {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, |
1010 | VT, Action: Custom); |
1011 | |
1012 | setOperationAction( |
1013 | Ops: {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, |
1014 | ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, |
1015 | VT, Action: Custom); |
1016 | |
1017 | setOperationAction(Op: ISD::SELECT, VT, Action: Custom); |
1018 | setOperationAction(Op: ISD::SELECT_CC, VT, Action: Expand); |
1019 | |
1020 | setOperationAction(Ops: {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, |
1021 | ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, |
1022 | VT, Action: Custom); |
1023 | |
1024 | setOperationAction(Op: ISD::VECTOR_DEINTERLEAVE, VT, Action: Custom); |
1025 | setOperationAction(Op: ISD::VECTOR_INTERLEAVE, VT, Action: Custom); |
1026 | |
1027 | setOperationAction(Ops: {ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Action: Custom); |
1028 | |
1029 | setOperationAction(Ops: FloatingPointVPOps, VT, Action: Custom); |
1030 | |
1031 | setOperationAction(Ops: {ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT, |
1032 | Action: Custom); |
1033 | setOperationAction(Ops: {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, |
1034 | ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}, |
1035 | VT, Action: Legal); |
1036 | setOperationAction(Ops: {ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, |
1037 | ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL, |
1038 | ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, |
1039 | ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, |
1040 | VT, Action: Custom); |
1041 | }; |
1042 | |
1043 | // Sets common extload/truncstore actions on RVV floating-point vector |
1044 | // types. |
1045 | const auto SetCommonVFPExtLoadTruncStoreActions = |
1046 | [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) { |
1047 | for (auto SmallVT : SmallerVTs) { |
1048 | setTruncStoreAction(ValVT: VT, MemVT: SmallVT, Action: Expand); |
1049 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: SmallVT, Action: Expand); |
1050 | } |
1051 | }; |
1052 | |
1053 | if (Subtarget.hasVInstructionsF16()) { |
1054 | for (MVT VT : F16VecVTs) { |
1055 | if (!isTypeLegal(VT)) |
1056 | continue; |
1057 | SetCommonVFPActions(VT); |
1058 | } |
1059 | } else if (Subtarget.hasVInstructionsF16Minimal()) { |
1060 | for (MVT VT : F16VecVTs) { |
1061 | if (!isTypeLegal(VT)) |
1062 | continue; |
1063 | setOperationAction(Ops: {ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Action: Custom); |
1064 | setOperationAction(Ops: {ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, |
1065 | Action: Custom); |
1066 | setOperationAction(Ops: {ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Action: Custom); |
1067 | setOperationAction(Ops: {ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, |
1068 | Action: Custom); |
1069 | setOperationAction(Op: ISD::SELECT_CC, VT, Action: Expand); |
1070 | setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP, |
1071 | ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, |
1072 | VT, Action: Custom); |
1073 | setOperationAction(Ops: {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, |
1074 | ISD::EXTRACT_SUBVECTOR}, |
1075 | VT, Action: Custom); |
1076 | if (Subtarget.hasStdExtZfhmin()) |
1077 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT, Action: Custom); |
1078 | // load/store |
1079 | setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Custom); |
1080 | |
1081 | // Custom split nxv32f16 since nxv32f32 if not legal. |
1082 | if (VT == MVT::nxv32f16) { |
1083 | setOperationAction(Ops: ZvfhminPromoteOps, VT, Action: Custom); |
1084 | setOperationAction(Ops: ZvfhminPromoteVPOps, VT, Action: Custom); |
1085 | continue; |
1086 | } |
1087 | // Add more promote ops. |
1088 | MVT F32VecVT = MVT::getVectorVT(VT: MVT::f32, EC: VT.getVectorElementCount()); |
1089 | setOperationPromotedToType(Ops: ZvfhminPromoteOps, OrigVT: VT, DestVT: F32VecVT); |
1090 | setOperationPromotedToType(Ops: ZvfhminPromoteVPOps, OrigVT: VT, DestVT: F32VecVT); |
1091 | } |
1092 | } |
1093 | |
1094 | // TODO: Could we merge some code with zvfhmin? |
1095 | if (Subtarget.hasVInstructionsBF16()) { |
1096 | for (MVT VT : BF16VecVTs) { |
1097 | if (!isTypeLegal(VT)) |
1098 | continue; |
1099 | setOperationAction(Ops: {ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Action: Custom); |
1100 | setOperationAction(Ops: {ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Action: Custom); |
1101 | setOperationAction(Ops: {ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, |
1102 | Action: Custom); |
1103 | setOperationAction(Ops: {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, |
1104 | ISD::EXTRACT_SUBVECTOR}, |
1105 | VT, Action: Custom); |
1106 | setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Custom); |
1107 | if (Subtarget.hasStdExtZfbfmin()) |
1108 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT, Action: Custom); |
1109 | setOperationAction(Ops: {ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, |
1110 | Action: Custom); |
1111 | setOperationAction(Op: ISD::SELECT_CC, VT, Action: Expand); |
1112 | // TODO: Promote to fp32. |
1113 | } |
1114 | } |
1115 | |
1116 | if (Subtarget.hasVInstructionsF32()) { |
1117 | for (MVT VT : F32VecVTs) { |
1118 | if (!isTypeLegal(VT)) |
1119 | continue; |
1120 | SetCommonVFPActions(VT); |
1121 | SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); |
1122 | } |
1123 | } |
1124 | |
1125 | if (Subtarget.hasVInstructionsF64()) { |
1126 | for (MVT VT : F64VecVTs) { |
1127 | if (!isTypeLegal(VT)) |
1128 | continue; |
1129 | SetCommonVFPActions(VT); |
1130 | SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); |
1131 | SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs); |
1132 | } |
1133 | } |
1134 | |
1135 | if (Subtarget.useRVVForFixedLengthVectors()) { |
1136 | for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { |
1137 | if (!useRVVForFixedLengthVectorVT(VT)) |
1138 | continue; |
1139 | |
1140 | // By default everything must be expanded. |
1141 | for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) |
1142 | setOperationAction(Op, VT, Action: Expand); |
1143 | for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) { |
1144 | setTruncStoreAction(ValVT: VT, MemVT: OtherVT, Action: Expand); |
1145 | setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: VT, |
1146 | MemVT: OtherVT, Action: Expand); |
1147 | } |
1148 | |
1149 | // Custom lower fixed vector undefs to scalable vector undefs to avoid |
1150 | // expansion to a build_vector of 0s. |
1151 | setOperationAction(Op: ISD::UNDEF, VT, Action: Custom); |
1152 | |
1153 | // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. |
1154 | setOperationAction(Ops: {ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, |
1155 | Action: Custom); |
1156 | |
1157 | setOperationAction(Ops: {ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT, |
1158 | Action: Custom); |
1159 | |
1160 | setOperationAction(Ops: {ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, |
1161 | VT, Action: Custom); |
1162 | |
1163 | setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom); |
1164 | |
1165 | setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Custom); |
1166 | |
1167 | setOperationAction(Op: ISD::SETCC, VT, Action: Custom); |
1168 | |
1169 | setOperationAction(Op: ISD::SELECT, VT, Action: Custom); |
1170 | |
1171 | setOperationAction(Op: ISD::TRUNCATE, VT, Action: Custom); |
1172 | |
1173 | setOperationAction(Op: ISD::BITCAST, VT, Action: Custom); |
1174 | |
1175 | setOperationAction( |
1176 | Ops: {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, |
1177 | Action: Custom); |
1178 | |
1179 | setOperationAction( |
1180 | Ops: {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, |
1181 | Action: Custom); |
1182 | |
1183 | setOperationAction( |
1184 | Ops: { |
1185 | ISD::SINT_TO_FP, |
1186 | ISD::UINT_TO_FP, |
1187 | ISD::FP_TO_SINT, |
1188 | ISD::FP_TO_UINT, |
1189 | ISD::STRICT_SINT_TO_FP, |
1190 | ISD::STRICT_UINT_TO_FP, |
1191 | ISD::STRICT_FP_TO_SINT, |
1192 | ISD::STRICT_FP_TO_UINT, |
1193 | }, |
1194 | VT, Action: Custom); |
1195 | setOperationAction(Ops: {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, |
1196 | Action: Custom); |
1197 | |
1198 | setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom); |
1199 | |
1200 | // Operations below are different for between masks and other vectors. |
1201 | if (VT.getVectorElementType() == MVT::i1) { |
1202 | setOperationAction(Ops: {ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND, |
1203 | ISD::OR, ISD::XOR}, |
1204 | VT, Action: Custom); |
1205 | |
1206 | setOperationAction(Ops: {ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, |
1207 | ISD::VP_SETCC, ISD::VP_TRUNCATE}, |
1208 | VT, Action: Custom); |
1209 | |
1210 | setOperationAction(Op: ISD::EXPERIMENTAL_VP_SPLICE, VT, Action: Custom); |
1211 | setOperationAction(Op: ISD::EXPERIMENTAL_VP_REVERSE, VT, Action: Custom); |
1212 | continue; |
1213 | } |
1214 | |
1215 | // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to |
1216 | // it before type legalization for i64 vectors on RV32. It will then be |
1217 | // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle. |
1218 | // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs |
1219 | // improvements first. |
1220 | if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) { |
1221 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT, Action: Legal); |
1222 | setOperationAction(Op: ISD::SPLAT_VECTOR_PARTS, VT, Action: Custom); |
1223 | } |
1224 | |
1225 | setOperationAction( |
1226 | Ops: {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Action: Custom); |
1227 | |
1228 | setOperationAction(Ops: {ISD::VP_LOAD, ISD::VP_STORE, |
1229 | ISD::EXPERIMENTAL_VP_STRIDED_LOAD, |
1230 | ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, |
1231 | ISD::VP_SCATTER}, |
1232 | VT, Action: Custom); |
1233 | |
1234 | setOperationAction(Ops: {ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR, |
1235 | ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV, |
1236 | ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL}, |
1237 | VT, Action: Custom); |
1238 | |
1239 | setOperationAction( |
1240 | Ops: {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Action: Custom); |
1241 | |
1242 | setOperationAction(Ops: {ISD::ABDS, ISD::ABDU}, VT, Action: Custom); |
1243 | |
1244 | // vXi64 MULHS/MULHU requires the V extension instead of Zve64*. |
1245 | if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) |
1246 | setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Custom); |
1247 | |
1248 | setOperationAction(Ops: {ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS, |
1249 | ISD::AVGCEILU, ISD::SADDSAT, ISD::UADDSAT, |
1250 | ISD::SSUBSAT, ISD::USUBSAT}, |
1251 | VT, Action: Custom); |
1252 | |
1253 | setOperationAction(Op: ISD::VSELECT, VT, Action: Custom); |
1254 | |
1255 | setOperationAction( |
1256 | Ops: {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Action: Custom); |
1257 | |
1258 | // Custom-lower reduction operations to set up the corresponding custom |
1259 | // nodes' operands. |
1260 | setOperationAction(Ops: {ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX, |
1261 | ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX, |
1262 | ISD::VECREDUCE_UMIN}, |
1263 | VT, Action: Custom); |
1264 | |
1265 | setOperationAction(Ops: IntegerVPOps, VT, Action: Custom); |
1266 | |
1267 | if (Subtarget.hasStdExtZvkb()) |
1268 | setOperationAction(Ops: {ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Action: Custom); |
1269 | |
1270 | if (Subtarget.hasStdExtZvbb()) { |
1271 | setOperationAction(Ops: {ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, |
1272 | ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP}, |
1273 | VT, Action: Custom); |
1274 | } else { |
1275 | // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the |
1276 | // range of f32. |
1277 | EVT FloatVT = MVT::getVectorVT(VT: MVT::f32, EC: VT.getVectorElementCount()); |
1278 | if (isTypeLegal(VT: FloatVT)) |
1279 | setOperationAction( |
1280 | Ops: {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, |
1281 | Action: Custom); |
1282 | } |
1283 | } |
1284 | |
1285 | for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { |
1286 | // There are no extending loads or truncating stores. |
1287 | for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) { |
1288 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand); |
1289 | setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand); |
1290 | } |
1291 | |
1292 | if (!useRVVForFixedLengthVectorVT(VT)) |
1293 | continue; |
1294 | |
1295 | // By default everything must be expanded. |
1296 | for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) |
1297 | setOperationAction(Op, VT, Action: Expand); |
1298 | |
1299 | // Custom lower fixed vector undefs to scalable vector undefs to avoid |
1300 | // expansion to a build_vector of 0s. |
1301 | setOperationAction(Op: ISD::UNDEF, VT, Action: Custom); |
1302 | |
1303 | setOperationAction(Ops: {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, |
1304 | ISD::EXTRACT_SUBVECTOR}, |
1305 | VT, Action: Custom); |
1306 | |
1307 | // FIXME: mload, mstore, mgather, mscatter, vp_load/store, |
1308 | // vp_stride_load/store, vp_gather/scatter can be hoisted to here. |
1309 | setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Custom); |
1310 | |
1311 | setOperationAction(Ops: {ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Action: Custom); |
1312 | setOperationAction(Ops: {ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, |
1313 | Action: Custom); |
1314 | |
1315 | if (VT.getVectorElementType() == MVT::f16 && |
1316 | !Subtarget.hasVInstructionsF16()) { |
1317 | setOperationAction(Ops: {ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Action: Custom); |
1318 | setOperationAction( |
1319 | Ops: {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT, |
1320 | Action: Custom); |
1321 | setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP, |
1322 | ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, |
1323 | VT, Action: Custom); |
1324 | setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom); |
1325 | if (Subtarget.hasStdExtZfhmin()) { |
1326 | // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR. |
1327 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT, Action: Custom); |
1328 | } else { |
1329 | // We need to custom legalize f16 build vectors if Zfhmin isn't |
1330 | // available. |
1331 | setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::f16, Action: Custom); |
1332 | } |
1333 | MVT F32VecVT = MVT::getVectorVT(VT: MVT::f32, EC: VT.getVectorElementCount()); |
1334 | // Don't promote f16 vector operations to f32 if f32 vector type is |
1335 | // not legal. |
1336 | // TODO: could split the f16 vector into two vectors and do promotion. |
1337 | if (!isTypeLegal(VT: F32VecVT)) |
1338 | continue; |
1339 | setOperationPromotedToType(Ops: ZvfhminPromoteOps, OrigVT: VT, DestVT: F32VecVT); |
1340 | setOperationPromotedToType(Ops: ZvfhminPromoteVPOps, OrigVT: VT, DestVT: F32VecVT); |
1341 | continue; |
1342 | } |
1343 | |
1344 | if (VT.getVectorElementType() == MVT::bf16) { |
1345 | setOperationAction(Ops: {ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Action: Custom); |
1346 | // FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR. |
1347 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT, Action: Custom); |
1348 | setOperationAction( |
1349 | Ops: {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT, |
1350 | Action: Custom); |
1351 | // TODO: Promote to fp32. |
1352 | continue; |
1353 | } |
1354 | |
1355 | setOperationAction(Ops: {ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE, |
1356 | ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, |
1357 | VT, Action: Custom); |
1358 | |
1359 | setOperationAction( |
1360 | Ops: {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Action: Custom); |
1361 | |
1362 | setOperationAction(Ops: {ISD::VP_LOAD, ISD::VP_STORE, |
1363 | ISD::EXPERIMENTAL_VP_STRIDED_LOAD, |
1364 | ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, |
1365 | ISD::VP_SCATTER}, |
1366 | VT, Action: Custom); |
1367 | |
1368 | setOperationAction(Ops: {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, |
1369 | ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT, |
1370 | ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM, |
1371 | ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM}, |
1372 | VT, Action: Custom); |
1373 | |
1374 | setOperationAction(Ops: {ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, |
1375 | ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT}, |
1376 | VT, Action: Custom); |
1377 | |
1378 | setCondCodeAction(CCs: VFPCCToExpand, VT, Action: Expand); |
1379 | |
1380 | setOperationAction(Op: ISD::SETCC, VT, Action: Custom); |
1381 | setOperationAction(Ops: {ISD::VSELECT, ISD::SELECT}, VT, Action: Custom); |
1382 | |
1383 | setOperationAction(Op: ISD::BITCAST, VT, Action: Custom); |
1384 | |
1385 | setOperationAction(Ops: FloatingPointVecReduceOps, VT, Action: Custom); |
1386 | |
1387 | setOperationAction(Ops: FloatingPointVPOps, VT, Action: Custom); |
1388 | |
1389 | setOperationAction( |
1390 | Ops: {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, |
1391 | ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA, |
1392 | ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC, |
1393 | ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, |
1394 | ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, |
1395 | VT, Action: Custom); |
1396 | } |
1397 | |
1398 | // Custom-legalize bitcasts from fixed-length vectors to scalar types. |
1399 | setOperationAction(Ops: ISD::BITCAST, VTs: {MVT::i8, MVT::i16, MVT::i32, MVT::i64}, |
1400 | Action: Custom); |
1401 | if (Subtarget.hasStdExtZfhminOrZhinxmin()) |
1402 | setOperationAction(Op: ISD::BITCAST, VT: MVT::f16, Action: Custom); |
1403 | if (Subtarget.hasStdExtFOrZfinx()) |
1404 | setOperationAction(Op: ISD::BITCAST, VT: MVT::f32, Action: Custom); |
1405 | if (Subtarget.hasStdExtDOrZdinx()) |
1406 | setOperationAction(Op: ISD::BITCAST, VT: MVT::f64, Action: Custom); |
1407 | } |
1408 | } |
1409 | |
1410 | if (Subtarget.hasStdExtA()) { |
1411 | setOperationAction(Op: ISD::ATOMIC_LOAD_SUB, VT: XLenVT, Action: Expand); |
1412 | if (RV64LegalI32 && Subtarget.is64Bit()) |
1413 | setOperationAction(Op: ISD::ATOMIC_LOAD_SUB, VT: MVT::i32, Action: Expand); |
1414 | } |
1415 | |
1416 | if (Subtarget.hasForcedAtomics()) { |
1417 | // Force __sync libcalls to be emitted for atomic rmw/cas operations. |
1418 | setOperationAction( |
1419 | Ops: {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD, |
1420 | ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR, |
1421 | ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN, |
1422 | ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX}, |
1423 | VT: XLenVT, Action: LibCall); |
1424 | } |
1425 | |
1426 | if (Subtarget.hasVendorXTHeadMemIdx()) { |
1427 | for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) { |
1428 | setIndexedLoadAction(IdxModes: im, VT: MVT::i8, Action: Legal); |
1429 | setIndexedStoreAction(IdxModes: im, VT: MVT::i8, Action: Legal); |
1430 | setIndexedLoadAction(IdxModes: im, VT: MVT::i16, Action: Legal); |
1431 | setIndexedStoreAction(IdxModes: im, VT: MVT::i16, Action: Legal); |
1432 | setIndexedLoadAction(IdxModes: im, VT: MVT::i32, Action: Legal); |
1433 | setIndexedStoreAction(IdxModes: im, VT: MVT::i32, Action: Legal); |
1434 | |
1435 | if (Subtarget.is64Bit()) { |
1436 | setIndexedLoadAction(IdxModes: im, VT: MVT::i64, Action: Legal); |
1437 | setIndexedStoreAction(IdxModes: im, VT: MVT::i64, Action: Legal); |
1438 | } |
1439 | } |
1440 | } |
1441 | |
1442 | if (Subtarget.hasVendorXCVmem()) { |
1443 | setIndexedLoadAction(IdxModes: ISD::POST_INC, VT: MVT::i8, Action: Legal); |
1444 | setIndexedLoadAction(IdxModes: ISD::POST_INC, VT: MVT::i16, Action: Legal); |
1445 | setIndexedLoadAction(IdxModes: ISD::POST_INC, VT: MVT::i32, Action: Legal); |
1446 | |
1447 | setIndexedStoreAction(IdxModes: ISD::POST_INC, VT: MVT::i8, Action: Legal); |
1448 | setIndexedStoreAction(IdxModes: ISD::POST_INC, VT: MVT::i16, Action: Legal); |
1449 | setIndexedStoreAction(IdxModes: ISD::POST_INC, VT: MVT::i32, Action: Legal); |
1450 | } |
1451 | |
1452 | if (Subtarget.hasVendorXCValu()) { |
1453 | setOperationAction(Op: ISD::ABS, VT: XLenVT, Action: Legal); |
1454 | setOperationAction(Op: ISD::SMIN, VT: XLenVT, Action: Legal); |
1455 | setOperationAction(Op: ISD::UMIN, VT: XLenVT, Action: Legal); |
1456 | setOperationAction(Op: ISD::SMAX, VT: XLenVT, Action: Legal); |
1457 | setOperationAction(Op: ISD::UMAX, VT: XLenVT, Action: Legal); |
1458 | setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i8, Action: Legal); |
1459 | setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i16, Action: Legal); |
1460 | } |
1461 | |
1462 | // Function alignments. |
1463 | const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4); |
1464 | setMinFunctionAlignment(FunctionAlignment); |
1465 | // Set preferred alignments. |
1466 | setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); |
1467 | setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); |
1468 | |
1469 | setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, |
1470 | ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL, |
1471 | ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); |
1472 | if (Subtarget.is64Bit()) |
1473 | setTargetDAGCombine(ISD::SRA); |
1474 | |
1475 | if (Subtarget.hasStdExtFOrZfinx()) |
1476 | setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM}); |
1477 | |
1478 | if (Subtarget.hasStdExtZbb()) |
1479 | setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}); |
1480 | |
1481 | if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) || |
1482 | Subtarget.hasStdExtV()) |
1483 | setTargetDAGCombine(ISD::TRUNCATE); |
1484 | |
1485 | if (Subtarget.hasStdExtZbkb()) |
1486 | setTargetDAGCombine(ISD::BITREVERSE); |
1487 | if (Subtarget.hasStdExtZfhminOrZhinxmin()) |
1488 | setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); |
1489 | if (Subtarget.hasStdExtFOrZfinx()) |
1490 | setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT, |
1491 | ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}); |
1492 | if (Subtarget.hasVInstructions()) |
1493 | setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER, |
1494 | ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL, |
1495 | ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR, |
1496 | ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, |
1497 | ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL, |
1498 | ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, |
1499 | ISD::INSERT_VECTOR_ELT, ISD::ABS}); |
1500 | if (Subtarget.hasVendorXTHeadMemPair()) |
1501 | setTargetDAGCombine({ISD::LOAD, ISD::STORE}); |
1502 | if (Subtarget.useRVVForFixedLengthVectors()) |
1503 | setTargetDAGCombine(ISD::BITCAST); |
1504 | |
1505 | setLibcallName(Call: RTLIB::FPEXT_F16_F32, Name: "__extendhfsf2" ); |
1506 | setLibcallName(Call: RTLIB::FPROUND_F32_F16, Name: "__truncsfhf2" ); |
1507 | |
1508 | // Disable strict node mutation. |
1509 | IsStrictFPEnabled = true; |
1510 | |
1511 | // Let the subtarget decide if a predictable select is more expensive than the |
1512 | // corresponding branch. This information is used in CGP/SelectOpt to decide |
1513 | // when to convert selects into branches. |
1514 | PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive(); |
1515 | } |
1516 | |
1517 | EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, |
1518 | LLVMContext &Context, |
1519 | EVT VT) const { |
1520 | if (!VT.isVector()) |
1521 | return getPointerTy(DL); |
1522 | if (Subtarget.hasVInstructions() && |
1523 | (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) |
1524 | return EVT::getVectorVT(Context, VT: MVT::i1, EC: VT.getVectorElementCount()); |
1525 | return VT.changeVectorElementTypeToInteger(); |
1526 | } |
1527 | |
1528 | MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const { |
1529 | return Subtarget.getXLenVT(); |
1530 | } |
1531 | |
1532 | // Return false if we can lower get_vector_length to a vsetvli intrinsic. |
1533 | bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT, |
1534 | unsigned VF, |
1535 | bool IsScalable) const { |
1536 | if (!Subtarget.hasVInstructions()) |
1537 | return true; |
1538 | |
1539 | if (!IsScalable) |
1540 | return true; |
1541 | |
1542 | if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT()) |
1543 | return true; |
1544 | |
1545 | // Don't allow VF=1 if those types are't legal. |
1546 | if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen()) |
1547 | return true; |
1548 | |
1549 | // VLEN=32 support is incomplete. |
1550 | if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) |
1551 | return true; |
1552 | |
1553 | // The maximum VF is for the smallest element width with LMUL=8. |
1554 | // VF must be a power of 2. |
1555 | unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8; |
1556 | return VF > MaxVF || !isPowerOf2_32(Value: VF); |
1557 | } |
1558 | |
1559 | bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const { |
1560 | return !Subtarget.hasVInstructions() || |
1561 | VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT); |
1562 | } |
1563 | |
1564 | bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, |
1565 | const CallInst &I, |
1566 | MachineFunction &MF, |
1567 | unsigned Intrinsic) const { |
1568 | auto &DL = I.getDataLayout(); |
1569 | |
1570 | auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore, |
1571 | bool IsUnitStrided, bool UsePtrVal = false) { |
1572 | Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN; |
1573 | // We can't use ptrVal if the intrinsic can access memory before the |
1574 | // pointer. This means we can't use it for strided or indexed intrinsics. |
1575 | if (UsePtrVal) |
1576 | Info.ptrVal = I.getArgOperand(i: PtrOp); |
1577 | else |
1578 | Info.fallbackAddressSpace = |
1579 | I.getArgOperand(i: PtrOp)->getType()->getPointerAddressSpace(); |
1580 | Type *MemTy; |
1581 | if (IsStore) { |
1582 | // Store value is the first operand. |
1583 | MemTy = I.getArgOperand(i: 0)->getType(); |
1584 | } else { |
1585 | // Use return type. If it's segment load, return type is a struct. |
1586 | MemTy = I.getType(); |
1587 | if (MemTy->isStructTy()) |
1588 | MemTy = MemTy->getStructElementType(N: 0); |
1589 | } |
1590 | if (!IsUnitStrided) |
1591 | MemTy = MemTy->getScalarType(); |
1592 | |
1593 | Info.memVT = getValueType(DL, Ty: MemTy); |
1594 | Info.align = Align(DL.getTypeSizeInBits(Ty: MemTy->getScalarType()) / 8); |
1595 | Info.size = MemoryLocation::UnknownSize; |
1596 | Info.flags |= |
1597 | IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad; |
1598 | return true; |
1599 | }; |
1600 | |
1601 | if (I.hasMetadata(KindID: LLVMContext::MD_nontemporal)) |
1602 | Info.flags |= MachineMemOperand::MONonTemporal; |
1603 | |
1604 | Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I); |
1605 | switch (Intrinsic) { |
1606 | default: |
1607 | return false; |
1608 | case Intrinsic::riscv_masked_atomicrmw_xchg_i32: |
1609 | case Intrinsic::riscv_masked_atomicrmw_add_i32: |
1610 | case Intrinsic::riscv_masked_atomicrmw_sub_i32: |
1611 | case Intrinsic::riscv_masked_atomicrmw_nand_i32: |
1612 | case Intrinsic::riscv_masked_atomicrmw_max_i32: |
1613 | case Intrinsic::riscv_masked_atomicrmw_min_i32: |
1614 | case Intrinsic::riscv_masked_atomicrmw_umax_i32: |
1615 | case Intrinsic::riscv_masked_atomicrmw_umin_i32: |
1616 | case Intrinsic::riscv_masked_cmpxchg_i32: |
1617 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
1618 | Info.memVT = MVT::i32; |
1619 | Info.ptrVal = I.getArgOperand(i: 0); |
1620 | Info.offset = 0; |
1621 | Info.align = Align(4); |
1622 | Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | |
1623 | MachineMemOperand::MOVolatile; |
1624 | return true; |
1625 | case Intrinsic::riscv_masked_strided_load: |
1626 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false, |
1627 | /*IsUnitStrided*/ false); |
1628 | case Intrinsic::riscv_masked_strided_store: |
1629 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true, |
1630 | /*IsUnitStrided*/ false); |
1631 | case Intrinsic::riscv_seg2_load: |
1632 | case Intrinsic::riscv_seg3_load: |
1633 | case Intrinsic::riscv_seg4_load: |
1634 | case Intrinsic::riscv_seg5_load: |
1635 | case Intrinsic::riscv_seg6_load: |
1636 | case Intrinsic::riscv_seg7_load: |
1637 | case Intrinsic::riscv_seg8_load: |
1638 | return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false, |
1639 | /*IsUnitStrided*/ false, /*UsePtrVal*/ true); |
1640 | case Intrinsic::riscv_seg2_store: |
1641 | case Intrinsic::riscv_seg3_store: |
1642 | case Intrinsic::riscv_seg4_store: |
1643 | case Intrinsic::riscv_seg5_store: |
1644 | case Intrinsic::riscv_seg6_store: |
1645 | case Intrinsic::riscv_seg7_store: |
1646 | case Intrinsic::riscv_seg8_store: |
1647 | // Operands are (vec, ..., vec, ptr, vl) |
1648 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, |
1649 | /*IsStore*/ true, |
1650 | /*IsUnitStrided*/ false, /*UsePtrVal*/ true); |
1651 | case Intrinsic::riscv_vle: |
1652 | case Intrinsic::riscv_vle_mask: |
1653 | case Intrinsic::riscv_vleff: |
1654 | case Intrinsic::riscv_vleff_mask: |
1655 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, |
1656 | /*IsStore*/ false, |
1657 | /*IsUnitStrided*/ true, |
1658 | /*UsePtrVal*/ true); |
1659 | case Intrinsic::riscv_vse: |
1660 | case Intrinsic::riscv_vse_mask: |
1661 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, |
1662 | /*IsStore*/ true, |
1663 | /*IsUnitStrided*/ true, |
1664 | /*UsePtrVal*/ true); |
1665 | case Intrinsic::riscv_vlse: |
1666 | case Intrinsic::riscv_vlse_mask: |
1667 | case Intrinsic::riscv_vloxei: |
1668 | case Intrinsic::riscv_vloxei_mask: |
1669 | case Intrinsic::riscv_vluxei: |
1670 | case Intrinsic::riscv_vluxei_mask: |
1671 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, |
1672 | /*IsStore*/ false, |
1673 | /*IsUnitStrided*/ false); |
1674 | case Intrinsic::riscv_vsse: |
1675 | case Intrinsic::riscv_vsse_mask: |
1676 | case Intrinsic::riscv_vsoxei: |
1677 | case Intrinsic::riscv_vsoxei_mask: |
1678 | case Intrinsic::riscv_vsuxei: |
1679 | case Intrinsic::riscv_vsuxei_mask: |
1680 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, |
1681 | /*IsStore*/ true, |
1682 | /*IsUnitStrided*/ false); |
1683 | case Intrinsic::riscv_vlseg2: |
1684 | case Intrinsic::riscv_vlseg3: |
1685 | case Intrinsic::riscv_vlseg4: |
1686 | case Intrinsic::riscv_vlseg5: |
1687 | case Intrinsic::riscv_vlseg6: |
1688 | case Intrinsic::riscv_vlseg7: |
1689 | case Intrinsic::riscv_vlseg8: |
1690 | case Intrinsic::riscv_vlseg2ff: |
1691 | case Intrinsic::riscv_vlseg3ff: |
1692 | case Intrinsic::riscv_vlseg4ff: |
1693 | case Intrinsic::riscv_vlseg5ff: |
1694 | case Intrinsic::riscv_vlseg6ff: |
1695 | case Intrinsic::riscv_vlseg7ff: |
1696 | case Intrinsic::riscv_vlseg8ff: |
1697 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, |
1698 | /*IsStore*/ false, |
1699 | /*IsUnitStrided*/ false, /*UsePtrVal*/ true); |
1700 | case Intrinsic::riscv_vlseg2_mask: |
1701 | case Intrinsic::riscv_vlseg3_mask: |
1702 | case Intrinsic::riscv_vlseg4_mask: |
1703 | case Intrinsic::riscv_vlseg5_mask: |
1704 | case Intrinsic::riscv_vlseg6_mask: |
1705 | case Intrinsic::riscv_vlseg7_mask: |
1706 | case Intrinsic::riscv_vlseg8_mask: |
1707 | case Intrinsic::riscv_vlseg2ff_mask: |
1708 | case Intrinsic::riscv_vlseg3ff_mask: |
1709 | case Intrinsic::riscv_vlseg4ff_mask: |
1710 | case Intrinsic::riscv_vlseg5ff_mask: |
1711 | case Intrinsic::riscv_vlseg6ff_mask: |
1712 | case Intrinsic::riscv_vlseg7ff_mask: |
1713 | case Intrinsic::riscv_vlseg8ff_mask: |
1714 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4, |
1715 | /*IsStore*/ false, |
1716 | /*IsUnitStrided*/ false, /*UsePtrVal*/ true); |
1717 | case Intrinsic::riscv_vlsseg2: |
1718 | case Intrinsic::riscv_vlsseg3: |
1719 | case Intrinsic::riscv_vlsseg4: |
1720 | case Intrinsic::riscv_vlsseg5: |
1721 | case Intrinsic::riscv_vlsseg6: |
1722 | case Intrinsic::riscv_vlsseg7: |
1723 | case Intrinsic::riscv_vlsseg8: |
1724 | case Intrinsic::riscv_vloxseg2: |
1725 | case Intrinsic::riscv_vloxseg3: |
1726 | case Intrinsic::riscv_vloxseg4: |
1727 | case Intrinsic::riscv_vloxseg5: |
1728 | case Intrinsic::riscv_vloxseg6: |
1729 | case Intrinsic::riscv_vloxseg7: |
1730 | case Intrinsic::riscv_vloxseg8: |
1731 | case Intrinsic::riscv_vluxseg2: |
1732 | case Intrinsic::riscv_vluxseg3: |
1733 | case Intrinsic::riscv_vluxseg4: |
1734 | case Intrinsic::riscv_vluxseg5: |
1735 | case Intrinsic::riscv_vluxseg6: |
1736 | case Intrinsic::riscv_vluxseg7: |
1737 | case Intrinsic::riscv_vluxseg8: |
1738 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, |
1739 | /*IsStore*/ false, |
1740 | /*IsUnitStrided*/ false); |
1741 | case Intrinsic::riscv_vlsseg2_mask: |
1742 | case Intrinsic::riscv_vlsseg3_mask: |
1743 | case Intrinsic::riscv_vlsseg4_mask: |
1744 | case Intrinsic::riscv_vlsseg5_mask: |
1745 | case Intrinsic::riscv_vlsseg6_mask: |
1746 | case Intrinsic::riscv_vlsseg7_mask: |
1747 | case Intrinsic::riscv_vlsseg8_mask: |
1748 | case Intrinsic::riscv_vloxseg2_mask: |
1749 | case Intrinsic::riscv_vloxseg3_mask: |
1750 | case Intrinsic::riscv_vloxseg4_mask: |
1751 | case Intrinsic::riscv_vloxseg5_mask: |
1752 | case Intrinsic::riscv_vloxseg6_mask: |
1753 | case Intrinsic::riscv_vloxseg7_mask: |
1754 | case Intrinsic::riscv_vloxseg8_mask: |
1755 | case Intrinsic::riscv_vluxseg2_mask: |
1756 | case Intrinsic::riscv_vluxseg3_mask: |
1757 | case Intrinsic::riscv_vluxseg4_mask: |
1758 | case Intrinsic::riscv_vluxseg5_mask: |
1759 | case Intrinsic::riscv_vluxseg6_mask: |
1760 | case Intrinsic::riscv_vluxseg7_mask: |
1761 | case Intrinsic::riscv_vluxseg8_mask: |
1762 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5, |
1763 | /*IsStore*/ false, |
1764 | /*IsUnitStrided*/ false); |
1765 | case Intrinsic::riscv_vsseg2: |
1766 | case Intrinsic::riscv_vsseg3: |
1767 | case Intrinsic::riscv_vsseg4: |
1768 | case Intrinsic::riscv_vsseg5: |
1769 | case Intrinsic::riscv_vsseg6: |
1770 | case Intrinsic::riscv_vsseg7: |
1771 | case Intrinsic::riscv_vsseg8: |
1772 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, |
1773 | /*IsStore*/ true, |
1774 | /*IsUnitStrided*/ false); |
1775 | case Intrinsic::riscv_vsseg2_mask: |
1776 | case Intrinsic::riscv_vsseg3_mask: |
1777 | case Intrinsic::riscv_vsseg4_mask: |
1778 | case Intrinsic::riscv_vsseg5_mask: |
1779 | case Intrinsic::riscv_vsseg6_mask: |
1780 | case Intrinsic::riscv_vsseg7_mask: |
1781 | case Intrinsic::riscv_vsseg8_mask: |
1782 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, |
1783 | /*IsStore*/ true, |
1784 | /*IsUnitStrided*/ false); |
1785 | case Intrinsic::riscv_vssseg2: |
1786 | case Intrinsic::riscv_vssseg3: |
1787 | case Intrinsic::riscv_vssseg4: |
1788 | case Intrinsic::riscv_vssseg5: |
1789 | case Intrinsic::riscv_vssseg6: |
1790 | case Intrinsic::riscv_vssseg7: |
1791 | case Intrinsic::riscv_vssseg8: |
1792 | case Intrinsic::riscv_vsoxseg2: |
1793 | case Intrinsic::riscv_vsoxseg3: |
1794 | case Intrinsic::riscv_vsoxseg4: |
1795 | case Intrinsic::riscv_vsoxseg5: |
1796 | case Intrinsic::riscv_vsoxseg6: |
1797 | case Intrinsic::riscv_vsoxseg7: |
1798 | case Intrinsic::riscv_vsoxseg8: |
1799 | case Intrinsic::riscv_vsuxseg2: |
1800 | case Intrinsic::riscv_vsuxseg3: |
1801 | case Intrinsic::riscv_vsuxseg4: |
1802 | case Intrinsic::riscv_vsuxseg5: |
1803 | case Intrinsic::riscv_vsuxseg6: |
1804 | case Intrinsic::riscv_vsuxseg7: |
1805 | case Intrinsic::riscv_vsuxseg8: |
1806 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, |
1807 | /*IsStore*/ true, |
1808 | /*IsUnitStrided*/ false); |
1809 | case Intrinsic::riscv_vssseg2_mask: |
1810 | case Intrinsic::riscv_vssseg3_mask: |
1811 | case Intrinsic::riscv_vssseg4_mask: |
1812 | case Intrinsic::riscv_vssseg5_mask: |
1813 | case Intrinsic::riscv_vssseg6_mask: |
1814 | case Intrinsic::riscv_vssseg7_mask: |
1815 | case Intrinsic::riscv_vssseg8_mask: |
1816 | case Intrinsic::riscv_vsoxseg2_mask: |
1817 | case Intrinsic::riscv_vsoxseg3_mask: |
1818 | case Intrinsic::riscv_vsoxseg4_mask: |
1819 | case Intrinsic::riscv_vsoxseg5_mask: |
1820 | case Intrinsic::riscv_vsoxseg6_mask: |
1821 | case Intrinsic::riscv_vsoxseg7_mask: |
1822 | case Intrinsic::riscv_vsoxseg8_mask: |
1823 | case Intrinsic::riscv_vsuxseg2_mask: |
1824 | case Intrinsic::riscv_vsuxseg3_mask: |
1825 | case Intrinsic::riscv_vsuxseg4_mask: |
1826 | case Intrinsic::riscv_vsuxseg5_mask: |
1827 | case Intrinsic::riscv_vsuxseg6_mask: |
1828 | case Intrinsic::riscv_vsuxseg7_mask: |
1829 | case Intrinsic::riscv_vsuxseg8_mask: |
1830 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4, |
1831 | /*IsStore*/ true, |
1832 | /*IsUnitStrided*/ false); |
1833 | } |
1834 | } |
1835 | |
1836 | bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, |
1837 | const AddrMode &AM, Type *Ty, |
1838 | unsigned AS, |
1839 | Instruction *I) const { |
1840 | // No global is ever allowed as a base. |
1841 | if (AM.BaseGV) |
1842 | return false; |
1843 | |
1844 | // RVV instructions only support register addressing. |
1845 | if (Subtarget.hasVInstructions() && isa<VectorType>(Val: Ty)) |
1846 | return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs; |
1847 | |
1848 | // Require a 12-bit signed offset. |
1849 | if (!isInt<12>(x: AM.BaseOffs)) |
1850 | return false; |
1851 | |
1852 | switch (AM.Scale) { |
1853 | case 0: // "r+i" or just "i", depending on HasBaseReg. |
1854 | break; |
1855 | case 1: |
1856 | if (!AM.HasBaseReg) // allow "r+i". |
1857 | break; |
1858 | return false; // disallow "r+r" or "r+r+i". |
1859 | default: |
1860 | return false; |
1861 | } |
1862 | |
1863 | return true; |
1864 | } |
1865 | |
1866 | bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { |
1867 | return isInt<12>(x: Imm); |
1868 | } |
1869 | |
1870 | bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { |
1871 | return isInt<12>(x: Imm); |
1872 | } |
1873 | |
1874 | // On RV32, 64-bit integers are split into their high and low parts and held |
1875 | // in two different registers, so the trunc is free since the low register can |
1876 | // just be used. |
1877 | // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of |
1878 | // isTruncateFree? |
1879 | bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { |
1880 | if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) |
1881 | return false; |
1882 | unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); |
1883 | unsigned DestBits = DstTy->getPrimitiveSizeInBits(); |
1884 | return (SrcBits == 64 && DestBits == 32); |
1885 | } |
1886 | |
1887 | bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { |
1888 | // We consider i64->i32 free on RV64 since we have good selection of W |
1889 | // instructions that make promoting operations back to i64 free in many cases. |
1890 | if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() || |
1891 | !DstVT.isInteger()) |
1892 | return false; |
1893 | unsigned SrcBits = SrcVT.getSizeInBits(); |
1894 | unsigned DestBits = DstVT.getSizeInBits(); |
1895 | return (SrcBits == 64 && DestBits == 32); |
1896 | } |
1897 | |
1898 | bool RISCVTargetLowering::isTruncateFree(SDValue Val, EVT VT2) const { |
1899 | EVT SrcVT = Val.getValueType(); |
1900 | // free truncate from vnsrl and vnsra |
1901 | if (Subtarget.hasStdExtV() && |
1902 | (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) && |
1903 | SrcVT.isVector() && VT2.isVector()) { |
1904 | unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits(); |
1905 | unsigned DestBits = VT2.getVectorElementType().getSizeInBits(); |
1906 | if (SrcBits == DestBits * 2) { |
1907 | return true; |
1908 | } |
1909 | } |
1910 | return TargetLowering::isTruncateFree(Val, VT2); |
1911 | } |
1912 | |
1913 | bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { |
1914 | // Zexts are free if they can be combined with a load. |
1915 | // Don't advertise i32->i64 zextload as being free for RV64. It interacts |
1916 | // poorly with type legalization of compares preferring sext. |
1917 | if (auto *LD = dyn_cast<LoadSDNode>(Val)) { |
1918 | EVT MemVT = LD->getMemoryVT(); |
1919 | if ((MemVT == MVT::i8 || MemVT == MVT::i16) && |
1920 | (LD->getExtensionType() == ISD::NON_EXTLOAD || |
1921 | LD->getExtensionType() == ISD::ZEXTLOAD)) |
1922 | return true; |
1923 | } |
1924 | |
1925 | return TargetLowering::isZExtFree(Val, VT2); |
1926 | } |
1927 | |
1928 | bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { |
1929 | return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; |
1930 | } |
1931 | |
1932 | bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const { |
1933 | return Subtarget.is64Bit() && CI->getType()->isIntegerTy(Bitwidth: 32); |
1934 | } |
1935 | |
1936 | bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const { |
1937 | return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip(); |
1938 | } |
1939 | |
1940 | bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { |
1941 | return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || |
1942 | Subtarget.hasVendorXCVbitmanip(); |
1943 | } |
1944 | |
1945 | bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial( |
1946 | const Instruction &AndI) const { |
1947 | // We expect to be able to match a bit extraction instruction if the Zbs |
1948 | // extension is supported and the mask is a power of two. However, we |
1949 | // conservatively return false if the mask would fit in an ANDI instruction, |
1950 | // on the basis that it's possible the sinking+duplication of the AND in |
1951 | // CodeGenPrepare triggered by this hook wouldn't decrease the instruction |
1952 | // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ). |
1953 | if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs()) |
1954 | return false; |
1955 | ConstantInt *Mask = dyn_cast<ConstantInt>(Val: AndI.getOperand(i: 1)); |
1956 | if (!Mask) |
1957 | return false; |
1958 | return !Mask->getValue().isSignedIntN(N: 12) && Mask->getValue().isPowerOf2(); |
1959 | } |
1960 | |
1961 | bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const { |
1962 | EVT VT = Y.getValueType(); |
1963 | |
1964 | // FIXME: Support vectors once we have tests. |
1965 | if (VT.isVector()) |
1966 | return false; |
1967 | |
1968 | return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) && |
1969 | (!isa<ConstantSDNode>(Val: Y) || cast<ConstantSDNode>(Val&: Y)->isOpaque()); |
1970 | } |
1971 | |
1972 | bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const { |
1973 | // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test. |
1974 | if (Subtarget.hasStdExtZbs()) |
1975 | return X.getValueType().isScalarInteger(); |
1976 | auto *C = dyn_cast<ConstantSDNode>(Val&: Y); |
1977 | // XTheadBs provides th.tst (similar to bexti), if Y is a constant |
1978 | if (Subtarget.hasVendorXTHeadBs()) |
1979 | return C != nullptr; |
1980 | // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position. |
1981 | return C && C->getAPIntValue().ule(RHS: 10); |
1982 | } |
1983 | |
1984 | bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode, |
1985 | EVT VT) const { |
1986 | // Only enable for rvv. |
1987 | if (!VT.isVector() || !Subtarget.hasVInstructions()) |
1988 | return false; |
1989 | |
1990 | if (VT.isFixedLengthVector() && !isTypeLegal(VT)) |
1991 | return false; |
1992 | |
1993 | return true; |
1994 | } |
1995 | |
1996 | bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, |
1997 | Type *Ty) const { |
1998 | assert(Ty->isIntegerTy()); |
1999 | |
2000 | unsigned BitSize = Ty->getIntegerBitWidth(); |
2001 | if (BitSize > Subtarget.getXLen()) |
2002 | return false; |
2003 | |
2004 | // Fast path, assume 32-bit immediates are cheap. |
2005 | int64_t Val = Imm.getSExtValue(); |
2006 | if (isInt<32>(x: Val)) |
2007 | return true; |
2008 | |
2009 | // A constant pool entry may be more aligned thant he load we're trying to |
2010 | // replace. If we don't support unaligned scalar mem, prefer the constant |
2011 | // pool. |
2012 | // TODO: Can the caller pass down the alignment? |
2013 | if (!Subtarget.enableUnalignedScalarMem()) |
2014 | return true; |
2015 | |
2016 | // Prefer to keep the load if it would require many instructions. |
2017 | // This uses the same threshold we use for constant pools but doesn't |
2018 | // check useConstantPoolForLargeInts. |
2019 | // TODO: Should we keep the load only when we're definitely going to emit a |
2020 | // constant pool? |
2021 | |
2022 | RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI: Subtarget); |
2023 | return Seq.size() <= Subtarget.getMaxBuildIntsCost(); |
2024 | } |
2025 | |
2026 | bool RISCVTargetLowering:: |
2027 | shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
2028 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
2029 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
2030 | SelectionDAG &DAG) const { |
2031 | // One interesting pattern that we'd want to form is 'bit extract': |
2032 | // ((1 >> Y) & 1) ==/!= 0 |
2033 | // But we also need to be careful not to try to reverse that fold. |
2034 | |
2035 | // Is this '((1 >> Y) & 1)'? |
2036 | if (XC && OldShiftOpcode == ISD::SRL && XC->isOne()) |
2037 | return false; // Keep the 'bit extract' pattern. |
2038 | |
2039 | // Will this be '((1 >> Y) & 1)' after the transform? |
2040 | if (NewShiftOpcode == ISD::SRL && CC->isOne()) |
2041 | return true; // Do form the 'bit extract' pattern. |
2042 | |
2043 | // If 'X' is a constant, and we transform, then we will immediately |
2044 | // try to undo the fold, thus causing endless combine loop. |
2045 | // So only do the transform if X is not a constant. This matches the default |
2046 | // implementation of this function. |
2047 | return !XC; |
2048 | } |
2049 | |
2050 | bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const { |
2051 | switch (Opcode) { |
2052 | case Instruction::Add: |
2053 | case Instruction::Sub: |
2054 | case Instruction::Mul: |
2055 | case Instruction::And: |
2056 | case Instruction::Or: |
2057 | case Instruction::Xor: |
2058 | case Instruction::FAdd: |
2059 | case Instruction::FSub: |
2060 | case Instruction::FMul: |
2061 | case Instruction::FDiv: |
2062 | case Instruction::ICmp: |
2063 | case Instruction::FCmp: |
2064 | return true; |
2065 | case Instruction::Shl: |
2066 | case Instruction::LShr: |
2067 | case Instruction::AShr: |
2068 | case Instruction::UDiv: |
2069 | case Instruction::SDiv: |
2070 | case Instruction::URem: |
2071 | case Instruction::SRem: |
2072 | case Instruction::Select: |
2073 | return Operand == 1; |
2074 | default: |
2075 | return false; |
2076 | } |
2077 | } |
2078 | |
2079 | |
2080 | bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const { |
2081 | if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) |
2082 | return false; |
2083 | |
2084 | if (canSplatOperand(Opcode: I->getOpcode(), Operand)) |
2085 | return true; |
2086 | |
2087 | auto *II = dyn_cast<IntrinsicInst>(Val: I); |
2088 | if (!II) |
2089 | return false; |
2090 | |
2091 | switch (II->getIntrinsicID()) { |
2092 | case Intrinsic::fma: |
2093 | case Intrinsic::vp_fma: |
2094 | return Operand == 0 || Operand == 1; |
2095 | case Intrinsic::vp_shl: |
2096 | case Intrinsic::vp_lshr: |
2097 | case Intrinsic::vp_ashr: |
2098 | case Intrinsic::vp_udiv: |
2099 | case Intrinsic::vp_sdiv: |
2100 | case Intrinsic::vp_urem: |
2101 | case Intrinsic::vp_srem: |
2102 | case Intrinsic::ssub_sat: |
2103 | case Intrinsic::vp_ssub_sat: |
2104 | case Intrinsic::usub_sat: |
2105 | case Intrinsic::vp_usub_sat: |
2106 | return Operand == 1; |
2107 | // These intrinsics are commutative. |
2108 | case Intrinsic::vp_add: |
2109 | case Intrinsic::vp_mul: |
2110 | case Intrinsic::vp_and: |
2111 | case Intrinsic::vp_or: |
2112 | case Intrinsic::vp_xor: |
2113 | case Intrinsic::vp_fadd: |
2114 | case Intrinsic::vp_fmul: |
2115 | case Intrinsic::vp_icmp: |
2116 | case Intrinsic::vp_fcmp: |
2117 | case Intrinsic::smin: |
2118 | case Intrinsic::vp_smin: |
2119 | case Intrinsic::umin: |
2120 | case Intrinsic::vp_umin: |
2121 | case Intrinsic::smax: |
2122 | case Intrinsic::vp_smax: |
2123 | case Intrinsic::umax: |
2124 | case Intrinsic::vp_umax: |
2125 | case Intrinsic::sadd_sat: |
2126 | case Intrinsic::vp_sadd_sat: |
2127 | case Intrinsic::uadd_sat: |
2128 | case Intrinsic::vp_uadd_sat: |
2129 | // These intrinsics have 'vr' versions. |
2130 | case Intrinsic::vp_sub: |
2131 | case Intrinsic::vp_fsub: |
2132 | case Intrinsic::vp_fdiv: |
2133 | return Operand == 0 || Operand == 1; |
2134 | default: |
2135 | return false; |
2136 | } |
2137 | } |
2138 | |
2139 | /// Check if sinking \p I's operands to I's basic block is profitable, because |
2140 | /// the operands can be folded into a target instruction, e.g. |
2141 | /// splats of scalars can fold into vector instructions. |
2142 | bool RISCVTargetLowering::shouldSinkOperands( |
2143 | Instruction *I, SmallVectorImpl<Use *> &Ops) const { |
2144 | using namespace llvm::PatternMatch; |
2145 | |
2146 | if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) |
2147 | return false; |
2148 | |
2149 | // Don't sink splat operands if the target prefers it. Some targets requires |
2150 | // S2V transfer buffers and we can run out of them copying the same value |
2151 | // repeatedly. |
2152 | // FIXME: It could still be worth doing if it would improve vector register |
2153 | // pressure and prevent a vector spill. |
2154 | if (!Subtarget.sinkSplatOperands()) |
2155 | return false; |
2156 | |
2157 | for (auto OpIdx : enumerate(First: I->operands())) { |
2158 | if (!canSplatOperand(I, Operand: OpIdx.index())) |
2159 | continue; |
2160 | |
2161 | Instruction *Op = dyn_cast<Instruction>(Val: OpIdx.value().get()); |
2162 | // Make sure we are not already sinking this operand |
2163 | if (!Op || any_of(Range&: Ops, P: [&](Use *U) { return U->get() == Op; })) |
2164 | continue; |
2165 | |
2166 | // We are looking for a splat that can be sunk. |
2167 | if (!match(V: Op, P: m_Shuffle(v1: m_InsertElt(Val: m_Undef(), Elt: m_Value(), Idx: m_ZeroInt()), |
2168 | v2: m_Undef(), mask: m_ZeroMask()))) |
2169 | continue; |
2170 | |
2171 | // Don't sink i1 splats. |
2172 | if (cast<VectorType>(Val: Op->getType())->getElementType()->isIntegerTy(Bitwidth: 1)) |
2173 | continue; |
2174 | |
2175 | // All uses of the shuffle should be sunk to avoid duplicating it across gpr |
2176 | // and vector registers |
2177 | for (Use &U : Op->uses()) { |
2178 | Instruction *Insn = cast<Instruction>(Val: U.getUser()); |
2179 | if (!canSplatOperand(I: Insn, Operand: U.getOperandNo())) |
2180 | return false; |
2181 | } |
2182 | |
2183 | Ops.push_back(Elt: &Op->getOperandUse(i: 0)); |
2184 | Ops.push_back(Elt: &OpIdx.value()); |
2185 | } |
2186 | return true; |
2187 | } |
2188 | |
2189 | bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { |
2190 | unsigned Opc = VecOp.getOpcode(); |
2191 | |
2192 | // Assume target opcodes can't be scalarized. |
2193 | // TODO - do we have any exceptions? |
2194 | if (Opc >= ISD::BUILTIN_OP_END) |
2195 | return false; |
2196 | |
2197 | // If the vector op is not supported, try to convert to scalar. |
2198 | EVT VecVT = VecOp.getValueType(); |
2199 | if (!isOperationLegalOrCustomOrPromote(Op: Opc, VT: VecVT)) |
2200 | return true; |
2201 | |
2202 | // If the vector op is supported, but the scalar op is not, the transform may |
2203 | // not be worthwhile. |
2204 | // Permit a vector binary operation can be converted to scalar binary |
2205 | // operation which is custom lowered with illegal type. |
2206 | EVT ScalarVT = VecVT.getScalarType(); |
2207 | return isOperationLegalOrCustomOrPromote(Op: Opc, VT: ScalarVT) || |
2208 | isOperationCustom(Op: Opc, VT: ScalarVT); |
2209 | } |
2210 | |
2211 | bool RISCVTargetLowering::isOffsetFoldingLegal( |
2212 | const GlobalAddressSDNode *GA) const { |
2213 | // In order to maximise the opportunity for common subexpression elimination, |
2214 | // keep a separate ADD node for the global address offset instead of folding |
2215 | // it in the global address node. Later peephole optimisations may choose to |
2216 | // fold it back in when profitable. |
2217 | return false; |
2218 | } |
2219 | |
2220 | // Return one of the followings: |
2221 | // (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value. |
2222 | // (2) `{0-31 value, true}` if Imm is negative and FLI is available for its |
2223 | // positive counterpart, which will be materialized from the first returned |
2224 | // element. The second returned element indicated that there should be a FNEG |
2225 | // followed. |
2226 | // (3) `{-1, _}` if there is no way FLI can be used to materialize Imm. |
2227 | std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, |
2228 | EVT VT) const { |
2229 | if (!Subtarget.hasStdExtZfa()) |
2230 | return std::make_pair(x: -1, y: false); |
2231 | |
2232 | bool IsSupportedVT = false; |
2233 | if (VT == MVT::f16) { |
2234 | IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh(); |
2235 | } else if (VT == MVT::f32) { |
2236 | IsSupportedVT = true; |
2237 | } else if (VT == MVT::f64) { |
2238 | assert(Subtarget.hasStdExtD() && "Expect D extension" ); |
2239 | IsSupportedVT = true; |
2240 | } |
2241 | |
2242 | if (!IsSupportedVT) |
2243 | return std::make_pair(x: -1, y: false); |
2244 | |
2245 | int Index = RISCVLoadFPImm::getLoadFPImm(FPImm: Imm); |
2246 | if (Index < 0 && Imm.isNegative()) |
2247 | // Try the combination of its positive counterpart + FNEG. |
2248 | return std::make_pair(x: RISCVLoadFPImm::getLoadFPImm(FPImm: -Imm), y: true); |
2249 | else |
2250 | return std::make_pair(x&: Index, y: false); |
2251 | } |
2252 | |
2253 | bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, |
2254 | bool ForCodeSize) const { |
2255 | bool IsLegalVT = false; |
2256 | if (VT == MVT::f16) |
2257 | IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin(); |
2258 | else if (VT == MVT::f32) |
2259 | IsLegalVT = Subtarget.hasStdExtFOrZfinx(); |
2260 | else if (VT == MVT::f64) |
2261 | IsLegalVT = Subtarget.hasStdExtDOrZdinx(); |
2262 | else if (VT == MVT::bf16) |
2263 | IsLegalVT = Subtarget.hasStdExtZfbfmin(); |
2264 | |
2265 | if (!IsLegalVT) |
2266 | return false; |
2267 | |
2268 | if (getLegalZfaFPImm(Imm, VT).first >= 0) |
2269 | return true; |
2270 | |
2271 | // Cannot create a 64 bit floating-point immediate value for rv32. |
2272 | if (Subtarget.getXLen() < VT.getScalarSizeInBits()) { |
2273 | // td can handle +0.0 or -0.0 already. |
2274 | // -0.0 can be created by fmv + fneg. |
2275 | return Imm.isZero(); |
2276 | } |
2277 | |
2278 | // Special case: fmv + fneg |
2279 | if (Imm.isNegZero()) |
2280 | return true; |
2281 | |
2282 | // Building an integer and then converting requires a fmv at the end of |
2283 | // the integer sequence. |
2284 | const int Cost = |
2285 | 1 + RISCVMatInt::getIntMatCost(Val: Imm.bitcastToAPInt(), Size: Subtarget.getXLen(), |
2286 | STI: Subtarget); |
2287 | return Cost <= FPImmCost; |
2288 | } |
2289 | |
2290 | // TODO: This is very conservative. |
2291 | bool RISCVTargetLowering::(EVT ResVT, EVT SrcVT, |
2292 | unsigned Index) const { |
2293 | if (!isOperationLegalOrCustom(Op: ISD::EXTRACT_SUBVECTOR, VT: ResVT)) |
2294 | return false; |
2295 | |
2296 | // Only support extracting a fixed from a fixed vector for now. |
2297 | if (ResVT.isScalableVector() || SrcVT.isScalableVector()) |
2298 | return false; |
2299 | |
2300 | EVT EltVT = ResVT.getVectorElementType(); |
2301 | assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node" ); |
2302 | |
2303 | // The smallest type we can slide is i8. |
2304 | // TODO: We can extract index 0 from a mask vector without a slide. |
2305 | if (EltVT == MVT::i1) |
2306 | return false; |
2307 | |
2308 | unsigned ResElts = ResVT.getVectorNumElements(); |
2309 | unsigned SrcElts = SrcVT.getVectorNumElements(); |
2310 | |
2311 | unsigned MinVLen = Subtarget.getRealMinVLen(); |
2312 | unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits(); |
2313 | |
2314 | // If we're extracting only data from the first VLEN bits of the source |
2315 | // then we can always do this with an m1 vslidedown.vx. Restricting the |
2316 | // Index ensures we can use a vslidedown.vi. |
2317 | // TODO: We can generalize this when the exact VLEN is known. |
2318 | if (Index + ResElts <= MinVLMAX && Index < 31) |
2319 | return true; |
2320 | |
2321 | // Convervatively only handle extracting half of a vector. |
2322 | // TODO: For sizes which aren't multiples of VLEN sizes, this may not be |
2323 | // a cheap extract. However, this case is important in practice for |
2324 | // shuffled extracts of longer vectors. How resolve? |
2325 | if ((ResElts * 2) != SrcElts) |
2326 | return false; |
2327 | |
2328 | // Slide can support arbitrary index, but we only treat vslidedown.vi as |
2329 | // cheap. |
2330 | if (Index >= 32) |
2331 | return false; |
2332 | |
2333 | // TODO: We can do arbitrary slidedowns, but for now only support extracting |
2334 | // the upper half of a vector until we have more test coverage. |
2335 | return Index == 0 || Index == ResElts; |
2336 | } |
2337 | |
2338 | MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, |
2339 | CallingConv::ID CC, |
2340 | EVT VT) const { |
2341 | // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. |
2342 | // We might still end up using a GPR but that will be decided based on ABI. |
2343 | if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() && |
2344 | !Subtarget.hasStdExtZfhminOrZhinxmin()) |
2345 | return MVT::f32; |
2346 | |
2347 | MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); |
2348 | |
2349 | if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32) |
2350 | return MVT::i64; |
2351 | |
2352 | return PartVT; |
2353 | } |
2354 | |
2355 | unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, |
2356 | CallingConv::ID CC, |
2357 | EVT VT) const { |
2358 | // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. |
2359 | // We might still end up using a GPR but that will be decided based on ABI. |
2360 | if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() && |
2361 | !Subtarget.hasStdExtZfhminOrZhinxmin()) |
2362 | return 1; |
2363 | |
2364 | return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); |
2365 | } |
2366 | |
2367 | unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv( |
2368 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
2369 | unsigned &NumIntermediates, MVT &RegisterVT) const { |
2370 | unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv( |
2371 | Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT); |
2372 | |
2373 | if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32) |
2374 | IntermediateVT = MVT::i64; |
2375 | |
2376 | if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32) |
2377 | RegisterVT = MVT::i64; |
2378 | |
2379 | return NumRegs; |
2380 | } |
2381 | |
2382 | // Changes the condition code and swaps operands if necessary, so the SetCC |
2383 | // operation matches one of the comparisons supported directly by branches |
2384 | // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare |
2385 | // with 1/-1. |
2386 | static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, |
2387 | ISD::CondCode &CC, SelectionDAG &DAG) { |
2388 | // If this is a single bit test that can't be handled by ANDI, shift the |
2389 | // bit to be tested to the MSB and perform a signed compare with 0. |
2390 | if (isIntEqualitySetCC(Code: CC) && isNullConstant(V: RHS) && |
2391 | LHS.getOpcode() == ISD::AND && LHS.hasOneUse() && |
2392 | isa<ConstantSDNode>(Val: LHS.getOperand(i: 1))) { |
2393 | uint64_t Mask = LHS.getConstantOperandVal(i: 1); |
2394 | if ((isPowerOf2_64(Value: Mask) || isMask_64(Value: Mask)) && !isInt<12>(x: Mask)) { |
2395 | unsigned ShAmt = 0; |
2396 | if (isPowerOf2_64(Value: Mask)) { |
2397 | CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; |
2398 | ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Value: Mask); |
2399 | } else { |
2400 | ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Value: Mask); |
2401 | } |
2402 | |
2403 | LHS = LHS.getOperand(i: 0); |
2404 | if (ShAmt != 0) |
2405 | LHS = DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS, |
2406 | N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType())); |
2407 | return; |
2408 | } |
2409 | } |
2410 | |
2411 | if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: RHS)) { |
2412 | int64_t C = RHSC->getSExtValue(); |
2413 | switch (CC) { |
2414 | default: break; |
2415 | case ISD::SETGT: |
2416 | // Convert X > -1 to X >= 0. |
2417 | if (C == -1) { |
2418 | RHS = DAG.getConstant(Val: 0, DL, VT: RHS.getValueType()); |
2419 | CC = ISD::SETGE; |
2420 | return; |
2421 | } |
2422 | break; |
2423 | case ISD::SETLT: |
2424 | // Convert X < 1 to 0 >= X. |
2425 | if (C == 1) { |
2426 | RHS = LHS; |
2427 | LHS = DAG.getConstant(Val: 0, DL, VT: RHS.getValueType()); |
2428 | CC = ISD::SETGE; |
2429 | return; |
2430 | } |
2431 | break; |
2432 | } |
2433 | } |
2434 | |
2435 | switch (CC) { |
2436 | default: |
2437 | break; |
2438 | case ISD::SETGT: |
2439 | case ISD::SETLE: |
2440 | case ISD::SETUGT: |
2441 | case ISD::SETULE: |
2442 | CC = ISD::getSetCCSwappedOperands(Operation: CC); |
2443 | std::swap(a&: LHS, b&: RHS); |
2444 | break; |
2445 | } |
2446 | } |
2447 | |
2448 | RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) { |
2449 | assert(VT.isScalableVector() && "Expecting a scalable vector type" ); |
2450 | unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); |
2451 | if (VT.getVectorElementType() == MVT::i1) |
2452 | KnownSize *= 8; |
2453 | |
2454 | switch (KnownSize) { |
2455 | default: |
2456 | llvm_unreachable("Invalid LMUL." ); |
2457 | case 8: |
2458 | return RISCVII::VLMUL::LMUL_F8; |
2459 | case 16: |
2460 | return RISCVII::VLMUL::LMUL_F4; |
2461 | case 32: |
2462 | return RISCVII::VLMUL::LMUL_F2; |
2463 | case 64: |
2464 | return RISCVII::VLMUL::LMUL_1; |
2465 | case 128: |
2466 | return RISCVII::VLMUL::LMUL_2; |
2467 | case 256: |
2468 | return RISCVII::VLMUL::LMUL_4; |
2469 | case 512: |
2470 | return RISCVII::VLMUL::LMUL_8; |
2471 | } |
2472 | } |
2473 | |
2474 | unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) { |
2475 | switch (LMul) { |
2476 | default: |
2477 | llvm_unreachable("Invalid LMUL." ); |
2478 | case RISCVII::VLMUL::LMUL_F8: |
2479 | case RISCVII::VLMUL::LMUL_F4: |
2480 | case RISCVII::VLMUL::LMUL_F2: |
2481 | case RISCVII::VLMUL::LMUL_1: |
2482 | return RISCV::VRRegClassID; |
2483 | case RISCVII::VLMUL::LMUL_2: |
2484 | return RISCV::VRM2RegClassID; |
2485 | case RISCVII::VLMUL::LMUL_4: |
2486 | return RISCV::VRM4RegClassID; |
2487 | case RISCVII::VLMUL::LMUL_8: |
2488 | return RISCV::VRM8RegClassID; |
2489 | } |
2490 | } |
2491 | |
2492 | unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { |
2493 | RISCVII::VLMUL LMUL = getLMUL(VT); |
2494 | if (LMUL == RISCVII::VLMUL::LMUL_F8 || |
2495 | LMUL == RISCVII::VLMUL::LMUL_F4 || |
2496 | LMUL == RISCVII::VLMUL::LMUL_F2 || |
2497 | LMUL == RISCVII::VLMUL::LMUL_1) { |
2498 | static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, |
2499 | "Unexpected subreg numbering" ); |
2500 | return RISCV::sub_vrm1_0 + Index; |
2501 | } |
2502 | if (LMUL == RISCVII::VLMUL::LMUL_2) { |
2503 | static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, |
2504 | "Unexpected subreg numbering" ); |
2505 | return RISCV::sub_vrm2_0 + Index; |
2506 | } |
2507 | if (LMUL == RISCVII::VLMUL::LMUL_4) { |
2508 | static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, |
2509 | "Unexpected subreg numbering" ); |
2510 | return RISCV::sub_vrm4_0 + Index; |
2511 | } |
2512 | llvm_unreachable("Invalid vector type." ); |
2513 | } |
2514 | |
2515 | unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { |
2516 | if (VT.getVectorElementType() == MVT::i1) |
2517 | return RISCV::VRRegClassID; |
2518 | return getRegClassIDForLMUL(LMul: getLMUL(VT)); |
2519 | } |
2520 | |
2521 | // Attempt to decompose a subvector insert/extract between VecVT and |
2522 | // SubVecVT via subregister indices. Returns the subregister index that |
2523 | // can perform the subvector insert/extract with the given element index, as |
2524 | // well as the index corresponding to any leftover subvectors that must be |
2525 | // further inserted/extracted within the register class for SubVecVT. |
2526 | std::pair<unsigned, unsigned> |
2527 | RISCVTargetLowering::( |
2528 | MVT VecVT, MVT SubVecVT, unsigned , |
2529 | const RISCVRegisterInfo *TRI) { |
2530 | static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && |
2531 | RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && |
2532 | RISCV::VRM2RegClassID > RISCV::VRRegClassID), |
2533 | "Register classes not ordered" ); |
2534 | unsigned VecRegClassID = getRegClassIDForVecVT(VT: VecVT); |
2535 | unsigned SubRegClassID = getRegClassIDForVecVT(VT: SubVecVT); |
2536 | // Try to compose a subregister index that takes us from the incoming |
2537 | // LMUL>1 register class down to the outgoing one. At each step we half |
2538 | // the LMUL: |
2539 | // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 |
2540 | // Note that this is not guaranteed to find a subregister index, such as |
2541 | // when we are extracting from one VR type to another. |
2542 | unsigned SubRegIdx = RISCV::NoSubRegister; |
2543 | for (const unsigned RCID : |
2544 | {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) |
2545 | if (VecRegClassID > RCID && SubRegClassID <= RCID) { |
2546 | VecVT = VecVT.getHalfNumVectorElementsVT(); |
2547 | bool IsHi = |
2548 | InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); |
2549 | SubRegIdx = TRI->composeSubRegIndices(a: SubRegIdx, |
2550 | b: getSubregIndexByMVT(VT: VecVT, Index: IsHi)); |
2551 | if (IsHi) |
2552 | InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); |
2553 | } |
2554 | return {SubRegIdx, InsertExtractIdx}; |
2555 | } |
2556 | |
2557 | // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar |
2558 | // stores for those types. |
2559 | bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const { |
2560 | return !Subtarget.useRVVForFixedLengthVectors() || |
2561 | (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1); |
2562 | } |
2563 | |
2564 | bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const { |
2565 | if (!ScalarTy.isSimple()) |
2566 | return false; |
2567 | switch (ScalarTy.getSimpleVT().SimpleTy) { |
2568 | case MVT::iPTR: |
2569 | return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true; |
2570 | case MVT::i8: |
2571 | case MVT::i16: |
2572 | case MVT::i32: |
2573 | return true; |
2574 | case MVT::i64: |
2575 | return Subtarget.hasVInstructionsI64(); |
2576 | case MVT::f16: |
2577 | return Subtarget.hasVInstructionsF16(); |
2578 | case MVT::f32: |
2579 | return Subtarget.hasVInstructionsF32(); |
2580 | case MVT::f64: |
2581 | return Subtarget.hasVInstructionsF64(); |
2582 | default: |
2583 | return false; |
2584 | } |
2585 | } |
2586 | |
2587 | |
2588 | unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const { |
2589 | return NumRepeatedDivisors; |
2590 | } |
2591 | |
2592 | static SDValue getVLOperand(SDValue Op) { |
2593 | assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || |
2594 | Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && |
2595 | "Unexpected opcode" ); |
2596 | bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; |
2597 | unsigned IntNo = Op.getConstantOperandVal(i: HasChain ? 1 : 0); |
2598 | const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = |
2599 | RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntrinsicID: IntNo); |
2600 | if (!II) |
2601 | return SDValue(); |
2602 | return Op.getOperand(i: II->VLOperand + 1 + HasChain); |
2603 | } |
2604 | |
2605 | static bool useRVVForFixedLengthVectorVT(MVT VT, |
2606 | const RISCVSubtarget &Subtarget) { |
2607 | assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!" ); |
2608 | if (!Subtarget.useRVVForFixedLengthVectors()) |
2609 | return false; |
2610 | |
2611 | // We only support a set of vector types with a consistent maximum fixed size |
2612 | // across all supported vector element types to avoid legalization issues. |
2613 | // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest |
2614 | // fixed-length vector type we support is 1024 bytes. |
2615 | if (VT.getFixedSizeInBits() > 1024 * 8) |
2616 | return false; |
2617 | |
2618 | unsigned MinVLen = Subtarget.getRealMinVLen(); |
2619 | |
2620 | MVT EltVT = VT.getVectorElementType(); |
2621 | |
2622 | // Don't use RVV for vectors we cannot scalarize if required. |
2623 | switch (EltVT.SimpleTy) { |
2624 | // i1 is supported but has different rules. |
2625 | default: |
2626 | return false; |
2627 | case MVT::i1: |
2628 | // Masks can only use a single register. |
2629 | if (VT.getVectorNumElements() > MinVLen) |
2630 | return false; |
2631 | MinVLen /= 8; |
2632 | break; |
2633 | case MVT::i8: |
2634 | case MVT::i16: |
2635 | case MVT::i32: |
2636 | break; |
2637 | case MVT::i64: |
2638 | if (!Subtarget.hasVInstructionsI64()) |
2639 | return false; |
2640 | break; |
2641 | case MVT::f16: |
2642 | if (!Subtarget.hasVInstructionsF16Minimal()) |
2643 | return false; |
2644 | break; |
2645 | case MVT::bf16: |
2646 | if (!Subtarget.hasVInstructionsBF16()) |
2647 | return false; |
2648 | break; |
2649 | case MVT::f32: |
2650 | if (!Subtarget.hasVInstructionsF32()) |
2651 | return false; |
2652 | break; |
2653 | case MVT::f64: |
2654 | if (!Subtarget.hasVInstructionsF64()) |
2655 | return false; |
2656 | break; |
2657 | } |
2658 | |
2659 | // Reject elements larger than ELEN. |
2660 | if (EltVT.getSizeInBits() > Subtarget.getELen()) |
2661 | return false; |
2662 | |
2663 | unsigned LMul = divideCeil(Numerator: VT.getSizeInBits(), Denominator: MinVLen); |
2664 | // Don't use RVV for types that don't fit. |
2665 | if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) |
2666 | return false; |
2667 | |
2668 | // TODO: Perhaps an artificial restriction, but worth having whilst getting |
2669 | // the base fixed length RVV support in place. |
2670 | if (!VT.isPow2VectorType()) |
2671 | return false; |
2672 | |
2673 | return true; |
2674 | } |
2675 | |
2676 | bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { |
2677 | return ::useRVVForFixedLengthVectorVT(VT, Subtarget); |
2678 | } |
2679 | |
2680 | // Return the largest legal scalable vector type that matches VT's element type. |
2681 | static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT, |
2682 | const RISCVSubtarget &Subtarget) { |
2683 | // This may be called before legal types are setup. |
2684 | assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) || |
2685 | useRVVForFixedLengthVectorVT(VT, Subtarget)) && |
2686 | "Expected legal fixed length vector!" ); |
2687 | |
2688 | unsigned MinVLen = Subtarget.getRealMinVLen(); |
2689 | unsigned MaxELen = Subtarget.getELen(); |
2690 | |
2691 | MVT EltVT = VT.getVectorElementType(); |
2692 | switch (EltVT.SimpleTy) { |
2693 | default: |
2694 | llvm_unreachable("unexpected element type for RVV container" ); |
2695 | case MVT::i1: |
2696 | case MVT::i8: |
2697 | case MVT::i16: |
2698 | case MVT::i32: |
2699 | case MVT::i64: |
2700 | case MVT::bf16: |
2701 | case MVT::f16: |
2702 | case MVT::f32: |
2703 | case MVT::f64: { |
2704 | // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for |
2705 | // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within |
2706 | // each fractional LMUL we support SEW between 8 and LMUL*ELEN. |
2707 | unsigned NumElts = |
2708 | (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen; |
2709 | NumElts = std::max(a: NumElts, b: RISCV::RVVBitsPerBlock / MaxELen); |
2710 | assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts" ); |
2711 | return MVT::getScalableVectorVT(VT: EltVT, NumElements: NumElts); |
2712 | } |
2713 | } |
2714 | } |
2715 | |
2716 | static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, |
2717 | const RISCVSubtarget &Subtarget) { |
2718 | return getContainerForFixedLengthVector(TLI: DAG.getTargetLoweringInfo(), VT, |
2719 | Subtarget); |
2720 | } |
2721 | |
2722 | MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const { |
2723 | return ::getContainerForFixedLengthVector(TLI: *this, VT, Subtarget: getSubtarget()); |
2724 | } |
2725 | |
2726 | // Grow V to consume an entire RVV register. |
2727 | static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, |
2728 | const RISCVSubtarget &Subtarget) { |
2729 | assert(VT.isScalableVector() && |
2730 | "Expected to convert into a scalable vector!" ); |
2731 | assert(V.getValueType().isFixedLengthVector() && |
2732 | "Expected a fixed length vector operand!" ); |
2733 | SDLoc DL(V); |
2734 | SDValue Zero = DAG.getVectorIdxConstant(Val: 0, DL); |
2735 | return DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: DAG.getUNDEF(VT), N2: V, N3: Zero); |
2736 | } |
2737 | |
2738 | // Shrink V so it's just big enough to maintain a VT's worth of data. |
2739 | static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, |
2740 | const RISCVSubtarget &Subtarget) { |
2741 | assert(VT.isFixedLengthVector() && |
2742 | "Expected to convert into a fixed length vector!" ); |
2743 | assert(V.getValueType().isScalableVector() && |
2744 | "Expected a scalable vector operand!" ); |
2745 | SDLoc DL(V); |
2746 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: Subtarget.getXLenVT()); |
2747 | return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: V, N2: Zero); |
2748 | } |
2749 | |
2750 | /// Return the type of the mask type suitable for masking the provided |
2751 | /// vector type. This is simply an i1 element type vector of the same |
2752 | /// (possibly scalable) length. |
2753 | static MVT getMaskTypeFor(MVT VecVT) { |
2754 | assert(VecVT.isVector()); |
2755 | ElementCount EC = VecVT.getVectorElementCount(); |
2756 | return MVT::getVectorVT(VT: MVT::i1, EC); |
2757 | } |
2758 | |
2759 | /// Creates an all ones mask suitable for masking a vector of type VecTy with |
2760 | /// vector length VL. . |
2761 | static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, |
2762 | SelectionDAG &DAG) { |
2763 | MVT MaskVT = getMaskTypeFor(VecVT); |
2764 | return DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: MaskVT, Operand: VL); |
2765 | } |
2766 | |
2767 | static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, |
2768 | SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { |
2769 | // If we know the exact VLEN, and our VL is exactly equal to VLMAX, |
2770 | // canonicalize the representation. InsertVSETVLI will pick the immediate |
2771 | // encoding later if profitable. |
2772 | const auto [MinVLMAX, MaxVLMAX] = |
2773 | RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); |
2774 | if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX) |
2775 | return DAG.getRegister(Reg: RISCV::X0, VT: Subtarget.getXLenVT()); |
2776 | |
2777 | return DAG.getConstant(Val: NumElts, DL, VT: Subtarget.getXLenVT()); |
2778 | } |
2779 | |
2780 | static std::pair<SDValue, SDValue> |
2781 | getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, |
2782 | const RISCVSubtarget &Subtarget) { |
2783 | assert(VecVT.isScalableVector() && "Expecting a scalable vector" ); |
2784 | SDValue VL = DAG.getRegister(Reg: RISCV::X0, VT: Subtarget.getXLenVT()); |
2785 | SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG); |
2786 | return {Mask, VL}; |
2787 | } |
2788 | |
2789 | static std::pair<SDValue, SDValue> |
2790 | getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, |
2791 | SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { |
2792 | assert(ContainerVT.isScalableVector() && "Expecting scalable container type" ); |
2793 | SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget); |
2794 | SDValue Mask = getAllOnesMask(VecVT: ContainerVT, VL, DL, DAG); |
2795 | return {Mask, VL}; |
2796 | } |
2797 | |
2798 | // Gets the two common "VL" operands: an all-ones mask and the vector length. |
2799 | // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is |
2800 | // the vector type that the fixed-length vector is contained in. Otherwise if |
2801 | // VecVT is scalable, then ContainerVT should be the same as VecVT. |
2802 | static std::pair<SDValue, SDValue> |
2803 | getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, |
2804 | const RISCVSubtarget &Subtarget) { |
2805 | if (VecVT.isFixedLengthVector()) |
2806 | return getDefaultVLOps(NumElts: VecVT.getVectorNumElements(), ContainerVT, DL, DAG, |
2807 | Subtarget); |
2808 | assert(ContainerVT.isScalableVector() && "Expecting scalable container type" ); |
2809 | return getDefaultScalableVLOps(VecVT: ContainerVT, DL, DAG, Subtarget); |
2810 | } |
2811 | |
2812 | SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL, |
2813 | SelectionDAG &DAG) const { |
2814 | assert(VecVT.isScalableVector() && "Expected scalable vector" ); |
2815 | return DAG.getElementCount(DL, VT: Subtarget.getXLenVT(), |
2816 | EC: VecVT.getVectorElementCount()); |
2817 | } |
2818 | |
2819 | std::pair<unsigned, unsigned> |
2820 | RISCVTargetLowering::computeVLMAXBounds(MVT VecVT, |
2821 | const RISCVSubtarget &Subtarget) { |
2822 | assert(VecVT.isScalableVector() && "Expected scalable vector" ); |
2823 | |
2824 | unsigned EltSize = VecVT.getScalarSizeInBits(); |
2825 | unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); |
2826 | |
2827 | unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); |
2828 | unsigned MaxVLMAX = |
2829 | RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMax, EltSize, MinSize); |
2830 | |
2831 | unsigned VectorBitsMin = Subtarget.getRealMinVLen(); |
2832 | unsigned MinVLMAX = |
2833 | RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMin, EltSize, MinSize); |
2834 | |
2835 | return std::make_pair(x&: MinVLMAX, y&: MaxVLMAX); |
2836 | } |
2837 | |
2838 | // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few |
2839 | // of either is (currently) supported. This can get us into an infinite loop |
2840 | // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR |
2841 | // as a ..., etc. |
2842 | // Until either (or both) of these can reliably lower any node, reporting that |
2843 | // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks |
2844 | // the infinite loop. Note that this lowers BUILD_VECTOR through the stack, |
2845 | // which is not desirable. |
2846 | bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( |
2847 | EVT VT, unsigned DefinedValues) const { |
2848 | return false; |
2849 | } |
2850 | |
2851 | InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const { |
2852 | // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is |
2853 | // implementation-defined. |
2854 | if (!VT.isVector()) |
2855 | return InstructionCost::getInvalid(); |
2856 | unsigned DLenFactor = Subtarget.getDLenFactor(); |
2857 | unsigned Cost; |
2858 | if (VT.isScalableVector()) { |
2859 | unsigned LMul; |
2860 | bool Fractional; |
2861 | std::tie(args&: LMul, args&: Fractional) = |
2862 | RISCVVType::decodeVLMUL(VLMUL: RISCVTargetLowering::getLMUL(VT)); |
2863 | if (Fractional) |
2864 | Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1; |
2865 | else |
2866 | Cost = (LMul * DLenFactor); |
2867 | } else { |
2868 | Cost = divideCeil(Numerator: VT.getSizeInBits(), Denominator: Subtarget.getRealMinVLen() / DLenFactor); |
2869 | } |
2870 | return Cost; |
2871 | } |
2872 | |
2873 | |
2874 | /// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv |
2875 | /// is generally quadratic in the number of vreg implied by LMUL. Note that |
2876 | /// operand (index and possibly mask) are handled separately. |
2877 | InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const { |
2878 | return getLMULCost(VT) * getLMULCost(VT); |
2879 | } |
2880 | |
2881 | /// Return the cost of a vrgather.vi (or vx) instruction for the type VT. |
2882 | /// vrgather.vi/vx may be linear in the number of vregs implied by LMUL, |
2883 | /// or may track the vrgather.vv cost. It is implementation-dependent. |
2884 | InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const { |
2885 | return getLMULCost(VT); |
2886 | } |
2887 | |
2888 | /// Return the cost of a vslidedown.vx or vslideup.vx instruction |
2889 | /// for the type VT. (This does not cover the vslide1up or vslide1down |
2890 | /// variants.) Slides may be linear in the number of vregs implied by LMUL, |
2891 | /// or may track the vrgather.vv cost. It is implementation-dependent. |
2892 | InstructionCost RISCVTargetLowering::getVSlideVXCost(MVT VT) const { |
2893 | return getLMULCost(VT); |
2894 | } |
2895 | |
2896 | /// Return the cost of a vslidedown.vi or vslideup.vi instruction |
2897 | /// for the type VT. (This does not cover the vslide1up or vslide1down |
2898 | /// variants.) Slides may be linear in the number of vregs implied by LMUL, |
2899 | /// or may track the vrgather.vv cost. It is implementation-dependent. |
2900 | InstructionCost RISCVTargetLowering::getVSlideVICost(MVT VT) const { |
2901 | return getLMULCost(VT); |
2902 | } |
2903 | |
2904 | static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, |
2905 | const RISCVSubtarget &Subtarget) { |
2906 | // RISC-V FP-to-int conversions saturate to the destination register size, but |
2907 | // don't produce 0 for nan. We can use a conversion instruction and fix the |
2908 | // nan case with a compare and a select. |
2909 | SDValue Src = Op.getOperand(i: 0); |
2910 | |
2911 | MVT DstVT = Op.getSimpleValueType(); |
2912 | EVT SatVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT(); |
2913 | |
2914 | bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT; |
2915 | |
2916 | if (!DstVT.isVector()) { |
2917 | // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate |
2918 | // the result. |
2919 | if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) || |
2920 | Src.getValueType() == MVT::bf16) { |
2921 | Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: SDLoc(Op), VT: MVT::f32, Operand: Src); |
2922 | } |
2923 | |
2924 | unsigned Opc; |
2925 | if (SatVT == DstVT) |
2926 | Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; |
2927 | else if (DstVT == MVT::i64 && SatVT == MVT::i32) |
2928 | Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; |
2929 | else |
2930 | return SDValue(); |
2931 | // FIXME: Support other SatVTs by clamping before or after the conversion. |
2932 | |
2933 | SDLoc DL(Op); |
2934 | SDValue FpToInt = DAG.getNode( |
2935 | Opcode: Opc, DL, VT: DstVT, N1: Src, |
2936 | N2: DAG.getTargetConstant(Val: RISCVFPRndMode::RTZ, DL, VT: Subtarget.getXLenVT())); |
2937 | |
2938 | if (Opc == RISCVISD::FCVT_WU_RV64) |
2939 | FpToInt = DAG.getZeroExtendInReg(Op: FpToInt, DL, VT: MVT::i32); |
2940 | |
2941 | SDValue ZeroInt = DAG.getConstant(Val: 0, DL, VT: DstVT); |
2942 | return DAG.getSelectCC(DL, LHS: Src, RHS: Src, True: ZeroInt, False: FpToInt, |
2943 | Cond: ISD::CondCode::SETUO); |
2944 | } |
2945 | |
2946 | // Vectors. |
2947 | |
2948 | MVT DstEltVT = DstVT.getVectorElementType(); |
2949 | MVT SrcVT = Src.getSimpleValueType(); |
2950 | MVT SrcEltVT = SrcVT.getVectorElementType(); |
2951 | unsigned SrcEltSize = SrcEltVT.getSizeInBits(); |
2952 | unsigned DstEltSize = DstEltVT.getSizeInBits(); |
2953 | |
2954 | // Only handle saturating to the destination type. |
2955 | if (SatVT != DstEltVT) |
2956 | return SDValue(); |
2957 | |
2958 | // FIXME: Don't support narrowing by more than 1 steps for now. |
2959 | if (SrcEltSize > (2 * DstEltSize)) |
2960 | return SDValue(); |
2961 | |
2962 | MVT DstContainerVT = DstVT; |
2963 | MVT SrcContainerVT = SrcVT; |
2964 | if (DstVT.isFixedLengthVector()) { |
2965 | DstContainerVT = getContainerForFixedLengthVector(DAG, VT: DstVT, Subtarget); |
2966 | SrcContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcVT, Subtarget); |
2967 | assert(DstContainerVT.getVectorElementCount() == |
2968 | SrcContainerVT.getVectorElementCount() && |
2969 | "Expected same element count" ); |
2970 | Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget); |
2971 | } |
2972 | |
2973 | SDLoc DL(Op); |
2974 | |
2975 | auto [Mask, VL] = getDefaultVLOps(VecVT: DstVT, ContainerVT: DstContainerVT, DL, DAG, Subtarget); |
2976 | |
2977 | SDValue IsNan = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(), |
2978 | Ops: {Src, Src, DAG.getCondCode(Cond: ISD::SETNE), |
2979 | DAG.getUNDEF(VT: Mask.getValueType()), Mask, VL}); |
2980 | |
2981 | // Need to widen by more than 1 step, promote the FP type, then do a widening |
2982 | // convert. |
2983 | if (DstEltSize > (2 * SrcEltSize)) { |
2984 | assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!" ); |
2985 | MVT InterVT = SrcContainerVT.changeVectorElementType(EltVT: MVT::f32); |
2986 | Src = DAG.getNode(Opcode: RISCVISD::FP_EXTEND_VL, DL, VT: InterVT, N1: Src, N2: Mask, N3: VL); |
2987 | } |
2988 | |
2989 | unsigned RVVOpc = |
2990 | IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; |
2991 | SDValue Res = DAG.getNode(Opcode: RVVOpc, DL, VT: DstContainerVT, N1: Src, N2: Mask, N3: VL); |
2992 | |
2993 | SDValue SplatZero = DAG.getNode( |
2994 | Opcode: RISCVISD::VMV_V_X_VL, DL, VT: DstContainerVT, N1: DAG.getUNDEF(VT: DstContainerVT), |
2995 | N2: DAG.getConstant(Val: 0, DL, VT: Subtarget.getXLenVT()), N3: VL); |
2996 | Res = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: DstContainerVT, N1: IsNan, N2: SplatZero, |
2997 | N3: Res, N4: DAG.getUNDEF(VT: DstContainerVT), N5: VL); |
2998 | |
2999 | if (DstVT.isFixedLengthVector()) |
3000 | Res = convertFromScalableVector(VT: DstVT, V: Res, DAG, Subtarget); |
3001 | |
3002 | return Res; |
3003 | } |
3004 | |
3005 | static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) { |
3006 | switch (Opc) { |
3007 | case ISD::FROUNDEVEN: |
3008 | case ISD::STRICT_FROUNDEVEN: |
3009 | case ISD::VP_FROUNDEVEN: |
3010 | return RISCVFPRndMode::RNE; |
3011 | case ISD::FTRUNC: |
3012 | case ISD::STRICT_FTRUNC: |
3013 | case ISD::VP_FROUNDTOZERO: |
3014 | return RISCVFPRndMode::RTZ; |
3015 | case ISD::FFLOOR: |
3016 | case ISD::STRICT_FFLOOR: |
3017 | case ISD::VP_FFLOOR: |
3018 | return RISCVFPRndMode::RDN; |
3019 | case ISD::FCEIL: |
3020 | case ISD::STRICT_FCEIL: |
3021 | case ISD::VP_FCEIL: |
3022 | return RISCVFPRndMode::RUP; |
3023 | case ISD::FROUND: |
3024 | case ISD::STRICT_FROUND: |
3025 | case ISD::VP_FROUND: |
3026 | return RISCVFPRndMode::RMM; |
3027 | case ISD::FRINT: |
3028 | return RISCVFPRndMode::DYN; |
3029 | } |
3030 | |
3031 | return RISCVFPRndMode::Invalid; |
3032 | } |
3033 | |
3034 | // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND |
3035 | // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to |
3036 | // the integer domain and back. Taking care to avoid converting values that are |
3037 | // nan or already correct. |
3038 | static SDValue |
3039 | lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, |
3040 | const RISCVSubtarget &Subtarget) { |
3041 | MVT VT = Op.getSimpleValueType(); |
3042 | assert(VT.isVector() && "Unexpected type" ); |
3043 | |
3044 | SDLoc DL(Op); |
3045 | |
3046 | SDValue Src = Op.getOperand(i: 0); |
3047 | |
3048 | MVT ContainerVT = VT; |
3049 | if (VT.isFixedLengthVector()) { |
3050 | ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3051 | Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget); |
3052 | } |
3053 | |
3054 | SDValue Mask, VL; |
3055 | if (Op->isVPOpcode()) { |
3056 | Mask = Op.getOperand(i: 1); |
3057 | if (VT.isFixedLengthVector()) |
3058 | Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG, |
3059 | Subtarget); |
3060 | VL = Op.getOperand(i: 2); |
3061 | } else { |
3062 | std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
3063 | } |
3064 | |
3065 | // Freeze the source since we are increasing the number of uses. |
3066 | Src = DAG.getFreeze(V: Src); |
3067 | |
3068 | // We do the conversion on the absolute value and fix the sign at the end. |
3069 | SDValue Abs = DAG.getNode(Opcode: RISCVISD::FABS_VL, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL); |
3070 | |
3071 | // Determine the largest integer that can be represented exactly. This and |
3072 | // values larger than it don't have any fractional bits so don't need to |
3073 | // be converted. |
3074 | const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT: ContainerVT); |
3075 | unsigned Precision = APFloat::semanticsPrecision(FltSem); |
3076 | APFloat MaxVal = APFloat(FltSem); |
3077 | MaxVal.convertFromAPInt(Input: APInt::getOneBitSet(numBits: Precision, BitNo: Precision - 1), |
3078 | /*IsSigned*/ false, RM: APFloat::rmNearestTiesToEven); |
3079 | SDValue MaxValNode = |
3080 | DAG.getConstantFP(Val: MaxVal, DL, VT: ContainerVT.getVectorElementType()); |
3081 | SDValue MaxValSplat = DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT: ContainerVT, |
3082 | N1: DAG.getUNDEF(VT: ContainerVT), N2: MaxValNode, N3: VL); |
3083 | |
3084 | // If abs(Src) was larger than MaxVal or nan, keep it. |
3085 | MVT SetccVT = MVT::getVectorVT(VT: MVT::i1, EC: ContainerVT.getVectorElementCount()); |
3086 | Mask = |
3087 | DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: SetccVT, |
3088 | Ops: {Abs, MaxValSplat, DAG.getCondCode(Cond: ISD::SETOLT), |
3089 | Mask, Mask, VL}); |
3090 | |
3091 | // Truncate to integer and convert back to FP. |
3092 | MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); |
3093 | MVT XLenVT = Subtarget.getXLenVT(); |
3094 | SDValue Truncated; |
3095 | |
3096 | switch (Op.getOpcode()) { |
3097 | default: |
3098 | llvm_unreachable("Unexpected opcode" ); |
3099 | case ISD::FCEIL: |
3100 | case ISD::VP_FCEIL: |
3101 | case ISD::FFLOOR: |
3102 | case ISD::VP_FFLOOR: |
3103 | case ISD::FROUND: |
3104 | case ISD::FROUNDEVEN: |
3105 | case ISD::VP_FROUND: |
3106 | case ISD::VP_FROUNDEVEN: |
3107 | case ISD::VP_FROUNDTOZERO: { |
3108 | RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Op.getOpcode()); |
3109 | assert(FRM != RISCVFPRndMode::Invalid); |
3110 | Truncated = DAG.getNode(Opcode: RISCVISD::VFCVT_RM_X_F_VL, DL, VT: IntVT, N1: Src, N2: Mask, |
3111 | N3: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT), N4: VL); |
3112 | break; |
3113 | } |
3114 | case ISD::FTRUNC: |
3115 | Truncated = DAG.getNode(Opcode: RISCVISD::VFCVT_RTZ_X_F_VL, DL, VT: IntVT, N1: Src, |
3116 | N2: Mask, N3: VL); |
3117 | break; |
3118 | case ISD::FRINT: |
3119 | case ISD::VP_FRINT: |
3120 | Truncated = DAG.getNode(Opcode: RISCVISD::VFCVT_X_F_VL, DL, VT: IntVT, N1: Src, N2: Mask, N3: VL); |
3121 | break; |
3122 | case ISD::FNEARBYINT: |
3123 | case ISD::VP_FNEARBYINT: |
3124 | Truncated = DAG.getNode(Opcode: RISCVISD::VFROUND_NOEXCEPT_VL, DL, VT: ContainerVT, N1: Src, |
3125 | N2: Mask, N3: VL); |
3126 | break; |
3127 | } |
3128 | |
3129 | // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. |
3130 | if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL) |
3131 | Truncated = DAG.getNode(Opcode: RISCVISD::SINT_TO_FP_VL, DL, VT: ContainerVT, N1: Truncated, |
3132 | N2: Mask, N3: VL); |
3133 | |
3134 | // Restore the original sign so that -0.0 is preserved. |
3135 | Truncated = DAG.getNode(Opcode: RISCVISD::FCOPYSIGN_VL, DL, VT: ContainerVT, N1: Truncated, |
3136 | N2: Src, N3: Src, N4: Mask, N5: VL); |
3137 | |
3138 | if (!VT.isFixedLengthVector()) |
3139 | return Truncated; |
3140 | |
3141 | return convertFromScalableVector(VT, V: Truncated, DAG, Subtarget); |
3142 | } |
3143 | |
3144 | // Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND |
3145 | // STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to |
3146 | // qNan and coverting the new source to integer and back to FP. |
3147 | static SDValue |
3148 | lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, |
3149 | const RISCVSubtarget &Subtarget) { |
3150 | SDLoc DL(Op); |
3151 | MVT VT = Op.getSimpleValueType(); |
3152 | SDValue Chain = Op.getOperand(i: 0); |
3153 | SDValue Src = Op.getOperand(i: 1); |
3154 | |
3155 | MVT ContainerVT = VT; |
3156 | if (VT.isFixedLengthVector()) { |
3157 | ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3158 | Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget); |
3159 | } |
3160 | |
3161 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
3162 | |
3163 | // Freeze the source since we are increasing the number of uses. |
3164 | Src = DAG.getFreeze(V: Src); |
3165 | |
3166 | // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src. |
3167 | MVT MaskVT = Mask.getSimpleValueType(); |
3168 | SDValue Unorder = DAG.getNode(Opcode: RISCVISD::STRICT_FSETCC_VL, DL, |
3169 | VTList: DAG.getVTList(VT1: MaskVT, VT2: MVT::Other), |
3170 | Ops: {Chain, Src, Src, DAG.getCondCode(Cond: ISD::SETUNE), |
3171 | DAG.getUNDEF(VT: MaskVT), Mask, VL}); |
3172 | Chain = Unorder.getValue(R: 1); |
3173 | Src = DAG.getNode(Opcode: RISCVISD::STRICT_FADD_VL, DL, |
3174 | VTList: DAG.getVTList(VT1: ContainerVT, VT2: MVT::Other), |
3175 | Ops: {Chain, Src, Src, Src, Unorder, VL}); |
3176 | Chain = Src.getValue(R: 1); |
3177 | |
3178 | // We do the conversion on the absolute value and fix the sign at the end. |
3179 | SDValue Abs = DAG.getNode(Opcode: RISCVISD::FABS_VL, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL); |
3180 | |
3181 | // Determine the largest integer that can be represented exactly. This and |
3182 | // values larger than it don't have any fractional bits so don't need to |
3183 | // be converted. |
3184 | const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT: ContainerVT); |
3185 | unsigned Precision = APFloat::semanticsPrecision(FltSem); |
3186 | APFloat MaxVal = APFloat(FltSem); |
3187 | MaxVal.convertFromAPInt(Input: APInt::getOneBitSet(numBits: Precision, BitNo: Precision - 1), |
3188 | /*IsSigned*/ false, RM: APFloat::rmNearestTiesToEven); |
3189 | SDValue MaxValNode = |
3190 | DAG.getConstantFP(Val: MaxVal, DL, VT: ContainerVT.getVectorElementType()); |
3191 | SDValue MaxValSplat = DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT: ContainerVT, |
3192 | N1: DAG.getUNDEF(VT: ContainerVT), N2: MaxValNode, N3: VL); |
3193 | |
3194 | // If abs(Src) was larger than MaxVal or nan, keep it. |
3195 | Mask = DAG.getNode( |
3196 | Opcode: RISCVISD::SETCC_VL, DL, VT: MaskVT, |
3197 | Ops: {Abs, MaxValSplat, DAG.getCondCode(Cond: ISD::SETOLT), Mask, Mask, VL}); |
3198 | |
3199 | // Truncate to integer and convert back to FP. |
3200 | MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); |
3201 | MVT XLenVT = Subtarget.getXLenVT(); |
3202 | SDValue Truncated; |
3203 | |
3204 | switch (Op.getOpcode()) { |
3205 | default: |
3206 | llvm_unreachable("Unexpected opcode" ); |
3207 | case ISD::STRICT_FCEIL: |
3208 | case ISD::STRICT_FFLOOR: |
3209 | case ISD::STRICT_FROUND: |
3210 | case ISD::STRICT_FROUNDEVEN: { |
3211 | RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Op.getOpcode()); |
3212 | assert(FRM != RISCVFPRndMode::Invalid); |
3213 | Truncated = DAG.getNode( |
3214 | Opcode: RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, VTList: DAG.getVTList(VT1: IntVT, VT2: MVT::Other), |
3215 | Ops: {Chain, Src, Mask, DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT), VL}); |
3216 | break; |
3217 | } |
3218 | case ISD::STRICT_FTRUNC: |
3219 | Truncated = |
3220 | DAG.getNode(Opcode: RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL, |
3221 | VTList: DAG.getVTList(VT1: IntVT, VT2: MVT::Other), N1: Chain, N2: Src, N3: Mask, N4: VL); |
3222 | break; |
3223 | case ISD::STRICT_FNEARBYINT: |
3224 | Truncated = DAG.getNode(Opcode: RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL, |
3225 | VTList: DAG.getVTList(VT1: ContainerVT, VT2: MVT::Other), N1: Chain, N2: Src, |
3226 | N3: Mask, N4: VL); |
3227 | break; |
3228 | } |
3229 | Chain = Truncated.getValue(R: 1); |
3230 | |
3231 | // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. |
3232 | if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) { |
3233 | Truncated = DAG.getNode(Opcode: RISCVISD::STRICT_SINT_TO_FP_VL, DL, |
3234 | VTList: DAG.getVTList(VT1: ContainerVT, VT2: MVT::Other), N1: Chain, |
3235 | N2: Truncated, N3: Mask, N4: VL); |
3236 | Chain = Truncated.getValue(R: 1); |
3237 | } |
3238 | |
3239 | // Restore the original sign so that -0.0 is preserved. |
3240 | Truncated = DAG.getNode(Opcode: RISCVISD::FCOPYSIGN_VL, DL, VT: ContainerVT, N1: Truncated, |
3241 | N2: Src, N3: Src, N4: Mask, N5: VL); |
3242 | |
3243 | if (VT.isFixedLengthVector()) |
3244 | Truncated = convertFromScalableVector(VT, V: Truncated, DAG, Subtarget); |
3245 | return DAG.getMergeValues(Ops: {Truncated, Chain}, dl: DL); |
3246 | } |
3247 | |
3248 | static SDValue |
3249 | lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, |
3250 | const RISCVSubtarget &Subtarget) { |
3251 | MVT VT = Op.getSimpleValueType(); |
3252 | if (VT.isVector()) |
3253 | return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); |
3254 | |
3255 | if (DAG.shouldOptForSize()) |
3256 | return SDValue(); |
3257 | |
3258 | SDLoc DL(Op); |
3259 | SDValue Src = Op.getOperand(i: 0); |
3260 | |
3261 | // Create an integer the size of the mantissa with the MSB set. This and all |
3262 | // values larger than it don't have any fractional bits so don't need to be |
3263 | // converted. |
3264 | const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); |
3265 | unsigned Precision = APFloat::semanticsPrecision(FltSem); |
3266 | APFloat MaxVal = APFloat(FltSem); |
3267 | MaxVal.convertFromAPInt(Input: APInt::getOneBitSet(numBits: Precision, BitNo: Precision - 1), |
3268 | /*IsSigned*/ false, RM: APFloat::rmNearestTiesToEven); |
3269 | SDValue MaxValNode = DAG.getConstantFP(Val: MaxVal, DL, VT); |
3270 | |
3271 | RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Op.getOpcode()); |
3272 | return DAG.getNode(Opcode: RISCVISD::FROUND, DL, VT, N1: Src, N2: MaxValNode, |
3273 | N3: DAG.getTargetConstant(Val: FRM, DL, VT: Subtarget.getXLenVT())); |
3274 | } |
3275 | |
3276 | // Expand vector LRINT and LLRINT by converting to the integer domain. |
3277 | static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, |
3278 | const RISCVSubtarget &Subtarget) { |
3279 | MVT VT = Op.getSimpleValueType(); |
3280 | assert(VT.isVector() && "Unexpected type" ); |
3281 | |
3282 | SDLoc DL(Op); |
3283 | SDValue Src = Op.getOperand(i: 0); |
3284 | MVT ContainerVT = VT; |
3285 | |
3286 | if (VT.isFixedLengthVector()) { |
3287 | ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3288 | Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget); |
3289 | } |
3290 | |
3291 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
3292 | SDValue Truncated = |
3293 | DAG.getNode(Opcode: RISCVISD::VFCVT_X_F_VL, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL); |
3294 | |
3295 | if (!VT.isFixedLengthVector()) |
3296 | return Truncated; |
3297 | |
3298 | return convertFromScalableVector(VT, V: Truncated, DAG, Subtarget); |
3299 | } |
3300 | |
3301 | static SDValue |
3302 | getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, |
3303 | const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, |
3304 | SDValue Offset, SDValue Mask, SDValue VL, |
3305 | unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) { |
3306 | if (Merge.isUndef()) |
3307 | Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; |
3308 | SDValue PolicyOp = DAG.getTargetConstant(Val: Policy, DL, VT: Subtarget.getXLenVT()); |
3309 | SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp}; |
3310 | return DAG.getNode(Opcode: RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops); |
3311 | } |
3312 | |
3313 | static SDValue |
3314 | getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, |
3315 | EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, |
3316 | SDValue VL, |
3317 | unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) { |
3318 | if (Merge.isUndef()) |
3319 | Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; |
3320 | SDValue PolicyOp = DAG.getTargetConstant(Val: Policy, DL, VT: Subtarget.getXLenVT()); |
3321 | SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp}; |
3322 | return DAG.getNode(Opcode: RISCVISD::VSLIDEUP_VL, DL, VT, Ops); |
3323 | } |
3324 | |
3325 | static MVT getLMUL1VT(MVT VT) { |
3326 | assert(VT.getVectorElementType().getSizeInBits() <= 64 && |
3327 | "Unexpected vector MVT" ); |
3328 | return MVT::getScalableVectorVT( |
3329 | VT: VT.getVectorElementType(), |
3330 | NumElements: RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); |
3331 | } |
3332 | |
3333 | struct VIDSequence { |
3334 | int64_t StepNumerator; |
3335 | unsigned StepDenominator; |
3336 | int64_t Addend; |
3337 | }; |
3338 | |
3339 | static std::optional<uint64_t> getExactInteger(const APFloat &APF, |
3340 | uint32_t BitWidth) { |
3341 | // We will use a SINT_TO_FP to materialize this constant so we should use a |
3342 | // signed APSInt here. |
3343 | APSInt ValInt(BitWidth, /*IsUnsigned*/ false); |
3344 | // We use an arbitrary rounding mode here. If a floating-point is an exact |
3345 | // integer (e.g., 1.0), the rounding mode does not affect the output value. If |
3346 | // the rounding mode changes the output value, then it is not an exact |
3347 | // integer. |
3348 | RoundingMode ArbitraryRM = RoundingMode::TowardZero; |
3349 | bool IsExact; |
3350 | // If it is out of signed integer range, it will return an invalid operation. |
3351 | // If it is not an exact integer, IsExact is false. |
3352 | if ((APF.convertToInteger(Result&: ValInt, RM: ArbitraryRM, IsExact: &IsExact) == |
3353 | APFloatBase::opInvalidOp) || |
3354 | !IsExact) |
3355 | return std::nullopt; |
3356 | return ValInt.extractBitsAsZExtValue(numBits: BitWidth, bitPosition: 0); |
3357 | } |
3358 | |
3359 | // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S] |
3360 | // to the (non-zero) step S and start value X. This can be then lowered as the |
3361 | // RVV sequence (VID * S) + X, for example. |
3362 | // The step S is represented as an integer numerator divided by a positive |
3363 | // denominator. Note that the implementation currently only identifies |
3364 | // sequences in which either the numerator is +/- 1 or the denominator is 1. It |
3365 | // cannot detect 2/3, for example. |
3366 | // Note that this method will also match potentially unappealing index |
3367 | // sequences, like <i32 0, i32 50939494>, however it is left to the caller to |
3368 | // determine whether this is worth generating code for. |
3369 | static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op, |
3370 | unsigned EltSizeInBits) { |
3371 | assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR" ); |
3372 | if (!cast<BuildVectorSDNode>(Val&: Op)->isConstant()) |
3373 | return std::nullopt; |
3374 | bool IsInteger = Op.getValueType().isInteger(); |
3375 | |
3376 | std::optional<unsigned> SeqStepDenom; |
3377 | std::optional<int64_t> SeqStepNum, SeqAddend; |
3378 | std::optional<std::pair<uint64_t, unsigned>> PrevElt; |
3379 | assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits()); |
3380 | |
3381 | // First extract the ops into a list of constant integer values. This may not |
3382 | // be possible for floats if they're not all representable as integers. |
3383 | SmallVector<std::optional<uint64_t>> Elts(Op.getNumOperands()); |
3384 | const unsigned OpSize = Op.getScalarValueSizeInBits(); |
3385 | for (auto [Idx, Elt] : enumerate(First: Op->op_values())) { |
3386 | if (Elt.isUndef()) { |
3387 | Elts[Idx] = std::nullopt; |
3388 | continue; |
3389 | } |
3390 | if (IsInteger) { |
3391 | Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(N: OpSize); |
3392 | } else { |
3393 | auto ExactInteger = |
3394 | getExactInteger(APF: cast<ConstantFPSDNode>(Val: Elt)->getValueAPF(), BitWidth: OpSize); |
3395 | if (!ExactInteger) |
3396 | return std::nullopt; |
3397 | Elts[Idx] = *ExactInteger; |
3398 | } |
3399 | } |
3400 | |
3401 | for (auto [Idx, Elt] : enumerate(First&: Elts)) { |
3402 | // Assume undef elements match the sequence; we just have to be careful |
3403 | // when interpolating across them. |
3404 | if (!Elt) |
3405 | continue; |
3406 | |
3407 | if (PrevElt) { |
3408 | // Calculate the step since the last non-undef element, and ensure |
3409 | // it's consistent across the entire sequence. |
3410 | unsigned IdxDiff = Idx - PrevElt->second; |
3411 | int64_t ValDiff = SignExtend64(X: *Elt - PrevElt->first, B: EltSizeInBits); |
3412 | |
3413 | // A zero-value value difference means that we're somewhere in the middle |
3414 | // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a |
3415 | // step change before evaluating the sequence. |
3416 | if (ValDiff == 0) |
3417 | continue; |
3418 | |
3419 | int64_t Remainder = ValDiff % IdxDiff; |
3420 | // Normalize the step if it's greater than 1. |
3421 | if (Remainder != ValDiff) { |
3422 | // The difference must cleanly divide the element span. |
3423 | if (Remainder != 0) |
3424 | return std::nullopt; |
3425 | ValDiff /= IdxDiff; |
3426 | IdxDiff = 1; |
3427 | } |
3428 | |
3429 | if (!SeqStepNum) |
3430 | SeqStepNum = ValDiff; |
3431 | else if (ValDiff != SeqStepNum) |
3432 | return std::nullopt; |
3433 | |
3434 | if (!SeqStepDenom) |
3435 | SeqStepDenom = IdxDiff; |
3436 | else if (IdxDiff != *SeqStepDenom) |
3437 | return std::nullopt; |
3438 | } |
3439 | |
3440 | // Record this non-undef element for later. |
3441 | if (!PrevElt || PrevElt->first != *Elt) |
3442 | PrevElt = std::make_pair(x&: *Elt, y&: Idx); |
3443 | } |
3444 | |
3445 | // We need to have logged a step for this to count as a legal index sequence. |
3446 | if (!SeqStepNum || !SeqStepDenom) |
3447 | return std::nullopt; |
3448 | |
3449 | // Loop back through the sequence and validate elements we might have skipped |
3450 | // while waiting for a valid step. While doing this, log any sequence addend. |
3451 | for (auto [Idx, Elt] : enumerate(First&: Elts)) { |
3452 | if (!Elt) |
3453 | continue; |
3454 | uint64_t ExpectedVal = |
3455 | (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom; |
3456 | int64_t Addend = SignExtend64(X: *Elt - ExpectedVal, B: EltSizeInBits); |
3457 | if (!SeqAddend) |
3458 | SeqAddend = Addend; |
3459 | else if (Addend != SeqAddend) |
3460 | return std::nullopt; |
3461 | } |
3462 | |
3463 | assert(SeqAddend && "Must have an addend if we have a step" ); |
3464 | |
3465 | return VIDSequence{.StepNumerator: *SeqStepNum, .StepDenominator: *SeqStepDenom, .Addend: *SeqAddend}; |
3466 | } |
3467 | |
3468 | // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT |
3469 | // and lower it as a VRGATHER_VX_VL from the source vector. |
3470 | static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, |
3471 | SelectionDAG &DAG, |
3472 | const RISCVSubtarget &Subtarget) { |
3473 | if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
3474 | return SDValue(); |
3475 | SDValue Vec = SplatVal.getOperand(i: 0); |
3476 | // Only perform this optimization on vectors of the same size for simplicity. |
3477 | // Don't perform this optimization for i1 vectors. |
3478 | // FIXME: Support i1 vectors, maybe by promoting to i8? |
3479 | if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1) |
3480 | return SDValue(); |
3481 | SDValue Idx = SplatVal.getOperand(i: 1); |
3482 | // The index must be a legal type. |
3483 | if (Idx.getValueType() != Subtarget.getXLenVT()) |
3484 | return SDValue(); |
3485 | |
3486 | MVT ContainerVT = VT; |
3487 | if (VT.isFixedLengthVector()) { |
3488 | ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3489 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
3490 | } |
3491 | |
3492 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
3493 | |
3494 | SDValue Gather = DAG.getNode(Opcode: RISCVISD::VRGATHER_VX_VL, DL, VT: ContainerVT, N1: Vec, |
3495 | N2: Idx, N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL); |
3496 | |
3497 | if (!VT.isFixedLengthVector()) |
3498 | return Gather; |
3499 | |
3500 | return convertFromScalableVector(VT, V: Gather, DAG, Subtarget); |
3501 | } |
3502 | |
3503 | |
3504 | /// Try and optimize BUILD_VECTORs with "dominant values" - these are values |
3505 | /// which constitute a large proportion of the elements. In such cases we can |
3506 | /// splat a vector with the dominant element and make up the shortfall with |
3507 | /// INSERT_VECTOR_ELTs. Returns SDValue if not profitable. |
3508 | /// Note that this includes vectors of 2 elements by association. The |
3509 | /// upper-most element is the "dominant" one, allowing us to use a splat to |
3510 | /// "insert" the upper element, and an insert of the lower element at position |
3511 | /// 0, which improves codegen. |
3512 | static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, |
3513 | const RISCVSubtarget &Subtarget) { |
3514 | MVT VT = Op.getSimpleValueType(); |
3515 | assert(VT.isFixedLengthVector() && "Unexpected vector!" ); |
3516 | |
3517 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3518 | |
3519 | SDLoc DL(Op); |
3520 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
3521 | |
3522 | MVT XLenVT = Subtarget.getXLenVT(); |
3523 | unsigned NumElts = Op.getNumOperands(); |
3524 | |
3525 | SDValue DominantValue; |
3526 | unsigned MostCommonCount = 0; |
3527 | DenseMap<SDValue, unsigned> ValueCounts; |
3528 | unsigned NumUndefElts = |
3529 | count_if(Range: Op->op_values(), P: [](const SDValue &V) { return V.isUndef(); }); |
3530 | |
3531 | // Track the number of scalar loads we know we'd be inserting, estimated as |
3532 | // any non-zero floating-point constant. Other kinds of element are either |
3533 | // already in registers or are materialized on demand. The threshold at which |
3534 | // a vector load is more desirable than several scalar materializion and |
3535 | // vector-insertion instructions is not known. |
3536 | unsigned NumScalarLoads = 0; |
3537 | |
3538 | for (SDValue V : Op->op_values()) { |
3539 | if (V.isUndef()) |
3540 | continue; |
3541 | |
3542 | ValueCounts.insert(KV: std::make_pair(x&: V, y: 0)); |
3543 | unsigned &Count = ValueCounts[V]; |
3544 | if (0 == Count) |
3545 | if (auto *CFP = dyn_cast<ConstantFPSDNode>(Val&: V)) |
3546 | NumScalarLoads += !CFP->isExactlyValue(V: +0.0); |
3547 | |
3548 | // Is this value dominant? In case of a tie, prefer the highest element as |
3549 | // it's cheaper to insert near the beginning of a vector than it is at the |
3550 | // end. |
3551 | if (++Count >= MostCommonCount) { |
3552 | DominantValue = V; |
3553 | MostCommonCount = Count; |
3554 | } |
3555 | } |
3556 | |
3557 | assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR" ); |
3558 | unsigned NumDefElts = NumElts - NumUndefElts; |
3559 | unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; |
3560 | |
3561 | // Don't perform this optimization when optimizing for size, since |
3562 | // materializing elements and inserting them tends to cause code bloat. |
3563 | if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts && |
3564 | (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode())) && |
3565 | ((MostCommonCount > DominantValueCountThreshold) || |
3566 | (ValueCounts.size() <= Log2_32(Value: NumDefElts)))) { |
3567 | // Start by splatting the most common element. |
3568 | SDValue Vec = DAG.getSplatBuildVector(VT, DL, Op: DominantValue); |
3569 | |
3570 | DenseSet<SDValue> Processed{DominantValue}; |
3571 | |
3572 | // We can handle an insert into the last element (of a splat) via |
3573 | // v(f)slide1down. This is slightly better than the vslideup insert |
3574 | // lowering as it avoids the need for a vector group temporary. It |
3575 | // is also better than using vmerge.vx as it avoids the need to |
3576 | // materialize the mask in a vector register. |
3577 | if (SDValue LastOp = Op->getOperand(Num: Op->getNumOperands() - 1); |
3578 | !LastOp.isUndef() && ValueCounts[LastOp] == 1 && |
3579 | LastOp != DominantValue) { |
3580 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
3581 | auto OpCode = |
3582 | VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; |
3583 | if (!VT.isFloatingPoint()) |
3584 | LastOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: LastOp); |
3585 | Vec = DAG.getNode(Opcode: OpCode, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Vec, |
3586 | N3: LastOp, N4: Mask, N5: VL); |
3587 | Vec = convertFromScalableVector(VT, V: Vec, DAG, Subtarget); |
3588 | Processed.insert(V: LastOp); |
3589 | } |
3590 | |
3591 | MVT SelMaskTy = VT.changeVectorElementType(EltVT: MVT::i1); |
3592 | for (const auto &OpIdx : enumerate(First: Op->ops())) { |
3593 | const SDValue &V = OpIdx.value(); |
3594 | if (V.isUndef() || !Processed.insert(V).second) |
3595 | continue; |
3596 | if (ValueCounts[V] == 1) { |
3597 | Vec = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT, N1: Vec, N2: V, |
3598 | N3: DAG.getVectorIdxConstant(Val: OpIdx.index(), DL)); |
3599 | } else { |
3600 | // Blend in all instances of this value using a VSELECT, using a |
3601 | // mask where each bit signals whether that element is the one |
3602 | // we're after. |
3603 | SmallVector<SDValue> Ops; |
3604 | transform(Range: Op->op_values(), d_first: std::back_inserter(x&: Ops), F: [&](SDValue V1) { |
3605 | return DAG.getConstant(Val: V == V1, DL, VT: XLenVT); |
3606 | }); |
3607 | Vec = DAG.getNode(Opcode: ISD::VSELECT, DL, VT, |
3608 | N1: DAG.getBuildVector(VT: SelMaskTy, DL, Ops), |
3609 | N2: DAG.getSplatBuildVector(VT, DL, Op: V), N3: Vec); |
3610 | } |
3611 | } |
3612 | |
3613 | return Vec; |
3614 | } |
3615 | |
3616 | return SDValue(); |
3617 | } |
3618 | |
3619 | static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, |
3620 | const RISCVSubtarget &Subtarget) { |
3621 | MVT VT = Op.getSimpleValueType(); |
3622 | assert(VT.isFixedLengthVector() && "Unexpected vector!" ); |
3623 | |
3624 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3625 | |
3626 | SDLoc DL(Op); |
3627 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
3628 | |
3629 | MVT XLenVT = Subtarget.getXLenVT(); |
3630 | unsigned NumElts = Op.getNumOperands(); |
3631 | |
3632 | if (VT.getVectorElementType() == MVT::i1) { |
3633 | if (ISD::isBuildVectorAllZeros(N: Op.getNode())) { |
3634 | SDValue VMClr = DAG.getNode(Opcode: RISCVISD::VMCLR_VL, DL, VT: ContainerVT, Operand: VL); |
3635 | return convertFromScalableVector(VT, V: VMClr, DAG, Subtarget); |
3636 | } |
3637 | |
3638 | if (ISD::isBuildVectorAllOnes(N: Op.getNode())) { |
3639 | SDValue VMSet = DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: ContainerVT, Operand: VL); |
3640 | return convertFromScalableVector(VT, V: VMSet, DAG, Subtarget); |
3641 | } |
3642 | |
3643 | // Lower constant mask BUILD_VECTORs via an integer vector type, in |
3644 | // scalar integer chunks whose bit-width depends on the number of mask |
3645 | // bits and XLEN. |
3646 | // First, determine the most appropriate scalar integer type to use. This |
3647 | // is at most XLenVT, but may be shrunk to a smaller vector element type |
3648 | // according to the size of the final vector - use i8 chunks rather than |
3649 | // XLenVT if we're producing a v8i1. This results in more consistent |
3650 | // codegen across RV32 and RV64. |
3651 | unsigned NumViaIntegerBits = std::clamp(val: NumElts, lo: 8u, hi: Subtarget.getXLen()); |
3652 | NumViaIntegerBits = std::min(a: NumViaIntegerBits, b: Subtarget.getELen()); |
3653 | // If we have to use more than one INSERT_VECTOR_ELT then this |
3654 | // optimization is likely to increase code size; avoid peforming it in |
3655 | // such a case. We can use a load from a constant pool in this case. |
3656 | if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits) |
3657 | return SDValue(); |
3658 | // Now we can create our integer vector type. Note that it may be larger |
3659 | // than the resulting mask type: v4i1 would use v1i8 as its integer type. |
3660 | unsigned IntegerViaVecElts = divideCeil(Numerator: NumElts, Denominator: NumViaIntegerBits); |
3661 | MVT IntegerViaVecVT = |
3662 | MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: NumViaIntegerBits), |
3663 | NumElements: IntegerViaVecElts); |
3664 | |
3665 | uint64_t Bits = 0; |
3666 | unsigned BitPos = 0, IntegerEltIdx = 0; |
3667 | SmallVector<SDValue, 8> Elts(IntegerViaVecElts); |
3668 | |
3669 | for (unsigned I = 0; I < NumElts;) { |
3670 | SDValue V = Op.getOperand(i: I); |
3671 | bool BitValue = !V.isUndef() && V->getAsZExtVal(); |
3672 | Bits |= ((uint64_t)BitValue << BitPos); |
3673 | ++BitPos; |
3674 | ++I; |
3675 | |
3676 | // Once we accumulate enough bits to fill our scalar type or process the |
3677 | // last element, insert into our vector and clear our accumulated data. |
3678 | if (I % NumViaIntegerBits == 0 || I == NumElts) { |
3679 | if (NumViaIntegerBits <= 32) |
3680 | Bits = SignExtend64<32>(x: Bits); |
3681 | SDValue Elt = DAG.getConstant(Val: Bits, DL, VT: XLenVT); |
3682 | Elts[IntegerEltIdx] = Elt; |
3683 | Bits = 0; |
3684 | BitPos = 0; |
3685 | IntegerEltIdx++; |
3686 | } |
3687 | } |
3688 | |
3689 | SDValue Vec = DAG.getBuildVector(VT: IntegerViaVecVT, DL, Ops: Elts); |
3690 | |
3691 | if (NumElts < NumViaIntegerBits) { |
3692 | // If we're producing a smaller vector than our minimum legal integer |
3693 | // type, bitcast to the equivalent (known-legal) mask type, and extract |
3694 | // our final mask. |
3695 | assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type" ); |
3696 | Vec = DAG.getBitcast(VT: MVT::v8i1, V: Vec); |
3697 | Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: Vec, |
3698 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT)); |
3699 | } else { |
3700 | // Else we must have produced an integer type with the same size as the |
3701 | // mask type; bitcast for the final result. |
3702 | assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits()); |
3703 | Vec = DAG.getBitcast(VT, V: Vec); |
3704 | } |
3705 | |
3706 | return Vec; |
3707 | } |
3708 | |
3709 | if (SDValue Splat = cast<BuildVectorSDNode>(Val&: Op)->getSplatValue()) { |
3710 | unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL |
3711 | : RISCVISD::VMV_V_X_VL; |
3712 | if (!VT.isFloatingPoint()) |
3713 | Splat = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Splat); |
3714 | Splat = |
3715 | DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Splat, N3: VL); |
3716 | return convertFromScalableVector(VT, V: Splat, DAG, Subtarget); |
3717 | } |
3718 | |
3719 | // Try and match index sequences, which we can lower to the vid instruction |
3720 | // with optional modifications. An all-undef vector is matched by |
3721 | // getSplatValue, above. |
3722 | if (auto SimpleVID = isSimpleVIDSequence(Op, EltSizeInBits: Op.getScalarValueSizeInBits())) { |
3723 | int64_t StepNumerator = SimpleVID->StepNumerator; |
3724 | unsigned StepDenominator = SimpleVID->StepDenominator; |
3725 | int64_t Addend = SimpleVID->Addend; |
3726 | |
3727 | assert(StepNumerator != 0 && "Invalid step" ); |
3728 | bool Negate = false; |
3729 | int64_t SplatStepVal = StepNumerator; |
3730 | unsigned StepOpcode = ISD::MUL; |
3731 | // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it |
3732 | // anyway as the shift of 63 won't fit in uimm5. |
3733 | if (StepNumerator != 1 && StepNumerator != INT64_MIN && |
3734 | isPowerOf2_64(Value: std::abs(i: StepNumerator))) { |
3735 | Negate = StepNumerator < 0; |
3736 | StepOpcode = ISD::SHL; |
3737 | SplatStepVal = Log2_64(Value: std::abs(i: StepNumerator)); |
3738 | } |
3739 | |
3740 | // Only emit VIDs with suitably-small steps/addends. We use imm5 is a |
3741 | // threshold since it's the immediate value many RVV instructions accept. |
3742 | // There is no vmul.vi instruction so ensure multiply constant can fit in |
3743 | // a single addi instruction. |
3744 | if (((StepOpcode == ISD::MUL && isInt<12>(x: SplatStepVal)) || |
3745 | (StepOpcode == ISD::SHL && isUInt<5>(x: SplatStepVal))) && |
3746 | isPowerOf2_32(Value: StepDenominator) && |
3747 | (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(x: Addend)) { |
3748 | MVT VIDVT = |
3749 | VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT; |
3750 | MVT VIDContainerVT = |
3751 | getContainerForFixedLengthVector(DAG, VT: VIDVT, Subtarget); |
3752 | SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT: VIDContainerVT, N1: Mask, N2: VL); |
3753 | // Convert right out of the scalable type so we can use standard ISD |
3754 | // nodes for the rest of the computation. If we used scalable types with |
3755 | // these, we'd lose the fixed-length vector info and generate worse |
3756 | // vsetvli code. |
3757 | VID = convertFromScalableVector(VT: VIDVT, V: VID, DAG, Subtarget); |
3758 | if ((StepOpcode == ISD::MUL && SplatStepVal != 1) || |
3759 | (StepOpcode == ISD::SHL && SplatStepVal != 0)) { |
3760 | SDValue SplatStep = DAG.getConstant(Val: SplatStepVal, DL, VT: VIDVT); |
3761 | VID = DAG.getNode(Opcode: StepOpcode, DL, VT: VIDVT, N1: VID, N2: SplatStep); |
3762 | } |
3763 | if (StepDenominator != 1) { |
3764 | SDValue SplatStep = |
3765 | DAG.getConstant(Val: Log2_64(Value: StepDenominator), DL, VT: VIDVT); |
3766 | VID = DAG.getNode(Opcode: ISD::SRL, DL, VT: VIDVT, N1: VID, N2: SplatStep); |
3767 | } |
3768 | if (Addend != 0 || Negate) { |
3769 | SDValue SplatAddend = DAG.getConstant(Val: Addend, DL, VT: VIDVT); |
3770 | VID = DAG.getNode(Opcode: Negate ? ISD::SUB : ISD::ADD, DL, VT: VIDVT, N1: SplatAddend, |
3771 | N2: VID); |
3772 | } |
3773 | if (VT.isFloatingPoint()) { |
3774 | // TODO: Use vfwcvt to reduce register pressure. |
3775 | VID = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: VID); |
3776 | } |
3777 | return VID; |
3778 | } |
3779 | } |
3780 | |
3781 | // For very small build_vectors, use a single scalar insert of a constant. |
3782 | // TODO: Base this on constant rematerialization cost, not size. |
3783 | const unsigned EltBitSize = VT.getScalarSizeInBits(); |
3784 | if (VT.getSizeInBits() <= 32 && |
3785 | ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode())) { |
3786 | MVT ViaIntVT = MVT::getIntegerVT(BitWidth: VT.getSizeInBits()); |
3787 | assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) && |
3788 | "Unexpected sequence type" ); |
3789 | // If we can use the original VL with the modified element type, this |
3790 | // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this |
3791 | // be moved into InsertVSETVLI? |
3792 | unsigned ViaVecLen = |
3793 | (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1; |
3794 | MVT ViaVecVT = MVT::getVectorVT(VT: ViaIntVT, NumElements: ViaVecLen); |
3795 | |
3796 | uint64_t EltMask = maskTrailingOnes<uint64_t>(N: EltBitSize); |
3797 | uint64_t SplatValue = 0; |
3798 | // Construct the amalgamated value at this larger vector type. |
3799 | for (const auto &OpIdx : enumerate(First: Op->op_values())) { |
3800 | const auto &SeqV = OpIdx.value(); |
3801 | if (!SeqV.isUndef()) |
3802 | SplatValue |= |
3803 | ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize)); |
3804 | } |
3805 | |
3806 | // On RV64, sign-extend from 32 to 64 bits where possible in order to |
3807 | // achieve better constant materializion. |
3808 | if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) |
3809 | SplatValue = SignExtend64<32>(x: SplatValue); |
3810 | |
3811 | SDValue Vec = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ViaVecVT, |
3812 | N1: DAG.getUNDEF(VT: ViaVecVT), |
3813 | N2: DAG.getConstant(Val: SplatValue, DL, VT: XLenVT), |
3814 | N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
3815 | if (ViaVecLen != 1) |
3816 | Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, |
3817 | VT: MVT::getVectorVT(VT: ViaIntVT, NumElements: 1), N1: Vec, |
3818 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT)); |
3819 | return DAG.getBitcast(VT, V: Vec); |
3820 | } |
3821 | |
3822 | |
3823 | // Attempt to detect "hidden" splats, which only reveal themselves as splats |
3824 | // when re-interpreted as a vector with a larger element type. For example, |
3825 | // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1 |
3826 | // could be instead splat as |
3827 | // v2i32 = build_vector i32 0x00010000, i32 0x00010000 |
3828 | // TODO: This optimization could also work on non-constant splats, but it |
3829 | // would require bit-manipulation instructions to construct the splat value. |
3830 | SmallVector<SDValue> Sequence; |
3831 | const auto *BV = cast<BuildVectorSDNode>(Val&: Op); |
3832 | if (VT.isInteger() && EltBitSize < Subtarget.getELen() && |
3833 | ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode()) && |
3834 | BV->getRepeatedSequence(Sequence) && |
3835 | (Sequence.size() * EltBitSize) <= Subtarget.getELen()) { |
3836 | unsigned SeqLen = Sequence.size(); |
3837 | MVT ViaIntVT = MVT::getIntegerVT(BitWidth: EltBitSize * SeqLen); |
3838 | assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || |
3839 | ViaIntVT == MVT::i64) && |
3840 | "Unexpected sequence type" ); |
3841 | |
3842 | // If we can use the original VL with the modified element type, this |
3843 | // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this |
3844 | // be moved into InsertVSETVLI? |
3845 | const unsigned RequiredVL = NumElts / SeqLen; |
3846 | const unsigned ViaVecLen = |
3847 | (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ? |
3848 | NumElts : RequiredVL; |
3849 | MVT ViaVecVT = MVT::getVectorVT(VT: ViaIntVT, NumElements: ViaVecLen); |
3850 | |
3851 | unsigned EltIdx = 0; |
3852 | uint64_t EltMask = maskTrailingOnes<uint64_t>(N: EltBitSize); |
3853 | uint64_t SplatValue = 0; |
3854 | // Construct the amalgamated value which can be splatted as this larger |
3855 | // vector type. |
3856 | for (const auto &SeqV : Sequence) { |
3857 | if (!SeqV.isUndef()) |
3858 | SplatValue |= |
3859 | ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize)); |
3860 | EltIdx++; |
3861 | } |
3862 | |
3863 | // On RV64, sign-extend from 32 to 64 bits where possible in order to |
3864 | // achieve better constant materializion. |
3865 | if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) |
3866 | SplatValue = SignExtend64<32>(x: SplatValue); |
3867 | |
3868 | // Since we can't introduce illegal i64 types at this stage, we can only |
3869 | // perform an i64 splat on RV32 if it is its own sign-extended value. That |
3870 | // way we can use RVV instructions to splat. |
3871 | assert((ViaIntVT.bitsLE(XLenVT) || |
3872 | (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) && |
3873 | "Unexpected bitcast sequence" ); |
3874 | if (ViaIntVT.bitsLE(VT: XLenVT) || isInt<32>(x: SplatValue)) { |
3875 | SDValue ViaVL = |
3876 | DAG.getConstant(Val: ViaVecVT.getVectorNumElements(), DL, VT: XLenVT); |
3877 | MVT ViaContainerVT = |
3878 | getContainerForFixedLengthVector(DAG, VT: ViaVecVT, Subtarget); |
3879 | SDValue Splat = |
3880 | DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ViaContainerVT, |
3881 | N1: DAG.getUNDEF(VT: ViaContainerVT), |
3882 | N2: DAG.getConstant(Val: SplatValue, DL, VT: XLenVT), N3: ViaVL); |
3883 | Splat = convertFromScalableVector(VT: ViaVecVT, V: Splat, DAG, Subtarget); |
3884 | if (ViaVecLen != RequiredVL) |
3885 | Splat = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, |
3886 | VT: MVT::getVectorVT(VT: ViaIntVT, NumElements: RequiredVL), N1: Splat, |
3887 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT)); |
3888 | return DAG.getBitcast(VT, V: Splat); |
3889 | } |
3890 | } |
3891 | |
3892 | // If the number of signbits allows, see if we can lower as a <N x i8>. |
3893 | // Our main goal here is to reduce LMUL (and thus work) required to |
3894 | // build the constant, but we will also narrow if the resulting |
3895 | // narrow vector is known to materialize cheaply. |
3896 | // TODO: We really should be costing the smaller vector. There are |
3897 | // profitable cases this misses. |
3898 | if (EltBitSize > 8 && VT.isInteger() && |
3899 | (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) { |
3900 | unsigned SignBits = DAG.ComputeNumSignBits(Op); |
3901 | if (EltBitSize - SignBits < 8) { |
3902 | SDValue Source = DAG.getBuildVector(VT: VT.changeVectorElementType(EltVT: MVT::i8), |
3903 | DL, Ops: Op->ops()); |
3904 | Source = convertToScalableVector(VT: ContainerVT.changeVectorElementType(EltVT: MVT::i8), |
3905 | V: Source, DAG, Subtarget); |
3906 | SDValue Res = DAG.getNode(Opcode: RISCVISD::VSEXT_VL, DL, VT: ContainerVT, N1: Source, N2: Mask, N3: VL); |
3907 | return convertFromScalableVector(VT, V: Res, DAG, Subtarget); |
3908 | } |
3909 | } |
3910 | |
3911 | if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget)) |
3912 | return Res; |
3913 | |
3914 | // For constant vectors, use generic constant pool lowering. Otherwise, |
3915 | // we'd have to materialize constants in GPRs just to move them into the |
3916 | // vector. |
3917 | return SDValue(); |
3918 | } |
3919 | |
3920 | static unsigned getPACKOpcode(unsigned DestBW, |
3921 | const RISCVSubtarget &Subtarget) { |
3922 | switch (DestBW) { |
3923 | default: |
3924 | llvm_unreachable("Unsupported pack size" ); |
3925 | case 16: |
3926 | return RISCV::PACKH; |
3927 | case 32: |
3928 | return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK; |
3929 | case 64: |
3930 | assert(Subtarget.is64Bit()); |
3931 | return RISCV::PACK; |
3932 | } |
3933 | } |
3934 | |
3935 | /// Double the element size of the build vector to reduce the number |
3936 | /// of vslide1down in the build vector chain. In the worst case, this |
3937 | /// trades three scalar operations for 1 vector operation. Scalar |
3938 | /// operations are generally lower latency, and for out-of-order cores |
3939 | /// we also benefit from additional parallelism. |
3940 | static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, |
3941 | const RISCVSubtarget &Subtarget) { |
3942 | SDLoc DL(Op); |
3943 | MVT VT = Op.getSimpleValueType(); |
3944 | assert(VT.isFixedLengthVector() && "Unexpected vector!" ); |
3945 | MVT ElemVT = VT.getVectorElementType(); |
3946 | if (!ElemVT.isInteger()) |
3947 | return SDValue(); |
3948 | |
3949 | // TODO: Relax these architectural restrictions, possibly with costing |
3950 | // of the actual instructions required. |
3951 | if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba()) |
3952 | return SDValue(); |
3953 | |
3954 | unsigned NumElts = VT.getVectorNumElements(); |
3955 | unsigned ElemSizeInBits = ElemVT.getSizeInBits(); |
3956 | if (ElemSizeInBits >= std::min(a: Subtarget.getELen(), b: Subtarget.getXLen()) || |
3957 | NumElts % 2 != 0) |
3958 | return SDValue(); |
3959 | |
3960 | // Produce [B,A] packed into a type twice as wide. Note that all |
3961 | // scalars are XLenVT, possibly masked (see below). |
3962 | MVT XLenVT = Subtarget.getXLenVT(); |
3963 | SDValue Mask = DAG.getConstant( |
3964 | Val: APInt::getLowBitsSet(numBits: XLenVT.getSizeInBits(), loBitsSet: ElemSizeInBits), DL, VT: XLenVT); |
3965 | auto pack = [&](SDValue A, SDValue B) { |
3966 | // Bias the scheduling of the inserted operations to near the |
3967 | // definition of the element - this tends to reduce register |
3968 | // pressure overall. |
3969 | SDLoc ElemDL(B); |
3970 | if (Subtarget.hasStdExtZbkb()) |
3971 | // Note that we're relying on the high bits of the result being |
3972 | // don't care. For PACKW, the result is *sign* extended. |
3973 | return SDValue( |
3974 | DAG.getMachineNode(Opcode: getPACKOpcode(DestBW: ElemSizeInBits * 2, Subtarget), |
3975 | dl: ElemDL, VT: XLenVT, Op1: A, Op2: B), |
3976 | 0); |
3977 | |
3978 | A = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(A), VT: XLenVT, N1: A, N2: Mask); |
3979 | B = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(B), VT: XLenVT, N1: B, N2: Mask); |
3980 | SDValue ShtAmt = DAG.getConstant(Val: ElemSizeInBits, DL: ElemDL, VT: XLenVT); |
3981 | SDNodeFlags Flags; |
3982 | Flags.setDisjoint(true); |
3983 | return DAG.getNode(Opcode: ISD::OR, DL: ElemDL, VT: XLenVT, N1: A, |
3984 | N2: DAG.getNode(Opcode: ISD::SHL, DL: ElemDL, VT: XLenVT, N1: B, N2: ShtAmt), Flags); |
3985 | }; |
3986 | |
3987 | SmallVector<SDValue> NewOperands; |
3988 | NewOperands.reserve(N: NumElts / 2); |
3989 | for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2) |
3990 | NewOperands.push_back(Elt: pack(Op.getOperand(i), Op.getOperand(i: i + 1))); |
3991 | assert(NumElts == NewOperands.size() * 2); |
3992 | MVT WideVT = MVT::getIntegerVT(BitWidth: ElemSizeInBits * 2); |
3993 | MVT WideVecVT = MVT::getVectorVT(VT: WideVT, NumElements: NumElts / 2); |
3994 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, |
3995 | Operand: DAG.getBuildVector(VT: WideVecVT, DL, Ops: NewOperands)); |
3996 | } |
3997 | |
3998 | // Convert to an vXf16 build_vector to vXi16 with bitcasts. |
3999 | static SDValue lowerBUILD_VECTORvXf16(SDValue Op, SelectionDAG &DAG) { |
4000 | MVT VT = Op.getSimpleValueType(); |
4001 | MVT IVT = VT.changeVectorElementType(EltVT: MVT::i16); |
4002 | SmallVector<SDValue, 16> NewOps(Op.getNumOperands()); |
4003 | for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) |
4004 | NewOps[I] = DAG.getBitcast(VT: MVT::i16, V: Op.getOperand(i: I)); |
4005 | SDValue Res = DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL: SDLoc(Op), VT: IVT, Ops: NewOps); |
4006 | return DAG.getBitcast(VT, V: Res); |
4007 | } |
4008 | |
4009 | static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, |
4010 | const RISCVSubtarget &Subtarget) { |
4011 | MVT VT = Op.getSimpleValueType(); |
4012 | assert(VT.isFixedLengthVector() && "Unexpected vector!" ); |
4013 | |
4014 | // If we don't have scalar f16, we need to bitcast to an i16 vector. |
4015 | if (VT.getVectorElementType() == MVT::f16 && |
4016 | !Subtarget.hasStdExtZfhmin()) |
4017 | return lowerBUILD_VECTORvXf16(Op, DAG); |
4018 | |
4019 | if (ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode()) || |
4020 | ISD::isBuildVectorOfConstantFPSDNodes(N: Op.getNode())) |
4021 | return lowerBuildVectorOfConstants(Op, DAG, Subtarget); |
4022 | |
4023 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
4024 | |
4025 | SDLoc DL(Op); |
4026 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
4027 | |
4028 | MVT XLenVT = Subtarget.getXLenVT(); |
4029 | |
4030 | if (VT.getVectorElementType() == MVT::i1) { |
4031 | // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask |
4032 | // vector type, we have a legal equivalently-sized i8 type, so we can use |
4033 | // that. |
4034 | MVT WideVecVT = VT.changeVectorElementType(EltVT: MVT::i8); |
4035 | SDValue VecZero = DAG.getConstant(Val: 0, DL, VT: WideVecVT); |
4036 | |
4037 | SDValue WideVec; |
4038 | if (SDValue Splat = cast<BuildVectorSDNode>(Val&: Op)->getSplatValue()) { |
4039 | // For a splat, perform a scalar truncate before creating the wider |
4040 | // vector. |
4041 | Splat = DAG.getNode(Opcode: ISD::AND, DL, VT: Splat.getValueType(), N1: Splat, |
4042 | N2: DAG.getConstant(Val: 1, DL, VT: Splat.getValueType())); |
4043 | WideVec = DAG.getSplatBuildVector(VT: WideVecVT, DL, Op: Splat); |
4044 | } else { |
4045 | SmallVector<SDValue, 8> Ops(Op->op_values()); |
4046 | WideVec = DAG.getBuildVector(VT: WideVecVT, DL, Ops); |
4047 | SDValue VecOne = DAG.getConstant(Val: 1, DL, VT: WideVecVT); |
4048 | WideVec = DAG.getNode(Opcode: ISD::AND, DL, VT: WideVecVT, N1: WideVec, N2: VecOne); |
4049 | } |
4050 | |
4051 | return DAG.getSetCC(DL, VT, LHS: WideVec, RHS: VecZero, Cond: ISD::SETNE); |
4052 | } |
4053 | |
4054 | if (SDValue Splat = cast<BuildVectorSDNode>(Val&: Op)->getSplatValue()) { |
4055 | if (auto Gather = matchSplatAsGather(SplatVal: Splat, VT, DL, DAG, Subtarget)) |
4056 | return Gather; |
4057 | unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL |
4058 | : RISCVISD::VMV_V_X_VL; |
4059 | if (!VT.isFloatingPoint()) |
4060 | Splat = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Splat); |
4061 | Splat = |
4062 | DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Splat, N3: VL); |
4063 | return convertFromScalableVector(VT, V: Splat, DAG, Subtarget); |
4064 | } |
4065 | |
4066 | if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget)) |
4067 | return Res; |
4068 | |
4069 | // If we're compiling for an exact VLEN value, we can split our work per |
4070 | // register in the register group. |
4071 | if (const auto VLen = Subtarget.getRealVLen(); |
4072 | VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) { |
4073 | MVT ElemVT = VT.getVectorElementType(); |
4074 | unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits(); |
4075 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
4076 | MVT OneRegVT = MVT::getVectorVT(VT: ElemVT, NumElements: ElemsPerVReg); |
4077 | MVT M1VT = getContainerForFixedLengthVector(DAG, VT: OneRegVT, Subtarget); |
4078 | assert(M1VT == getLMUL1VT(M1VT)); |
4079 | |
4080 | // The following semantically builds up a fixed length concat_vector |
4081 | // of the component build_vectors. We eagerly lower to scalable and |
4082 | // insert_subvector here to avoid DAG combining it back to a large |
4083 | // build_vector. |
4084 | SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end()); |
4085 | unsigned NumOpElts = M1VT.getVectorMinNumElements(); |
4086 | SDValue Vec = DAG.getUNDEF(VT: ContainerVT); |
4087 | for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) { |
4088 | auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(N: i, M: ElemsPerVReg); |
4089 | SDValue SubBV = |
4090 | DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL, VT: OneRegVT, Ops: OneVRegOfOps); |
4091 | SubBV = convertToScalableVector(VT: M1VT, V: SubBV, DAG, Subtarget); |
4092 | unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts; |
4093 | Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec, N2: SubBV, |
4094 | N3: DAG.getVectorIdxConstant(Val: InsertIdx, DL)); |
4095 | } |
4096 | return convertFromScalableVector(VT, V: Vec, DAG, Subtarget); |
4097 | } |
4098 | |
4099 | // If we're about to resort to vslide1down (or stack usage), pack our |
4100 | // elements into the widest scalar type we can. This will force a VL/VTYPE |
4101 | // toggle, but reduces the critical path, the number of vslide1down ops |
4102 | // required, and possibly enables scalar folds of the values. |
4103 | if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget)) |
4104 | return Res; |
4105 | |
4106 | // For m1 vectors, if we have non-undef values in both halves of our vector, |
4107 | // split the vector into low and high halves, build them separately, then |
4108 | // use a vselect to combine them. For long vectors, this cuts the critical |
4109 | // path of the vslide1down sequence in half, and gives us an opportunity |
4110 | // to special case each half independently. Note that we don't change the |
4111 | // length of the sub-vectors here, so if both fallback to the generic |
4112 | // vslide1down path, we should be able to fold the vselect into the final |
4113 | // vslidedown (for the undef tail) for the first half w/ masking. |
4114 | unsigned NumElts = VT.getVectorNumElements(); |
4115 | unsigned NumUndefElts = |
4116 | count_if(Range: Op->op_values(), P: [](const SDValue &V) { return V.isUndef(); }); |
4117 | unsigned NumDefElts = NumElts - NumUndefElts; |
4118 | if (NumDefElts >= 8 && NumDefElts > NumElts / 2 && |
4119 | ContainerVT.bitsLE(VT: getLMUL1VT(VT: ContainerVT))) { |
4120 | SmallVector<SDValue> SubVecAOps, SubVecBOps; |
4121 | SmallVector<SDValue> MaskVals; |
4122 | SDValue UndefElem = DAG.getUNDEF(VT: Op->getOperand(Num: 0)->getValueType(ResNo: 0)); |
4123 | SubVecAOps.reserve(N: NumElts); |
4124 | SubVecBOps.reserve(N: NumElts); |
4125 | for (unsigned i = 0; i < NumElts; i++) { |
4126 | SDValue Elem = Op->getOperand(Num: i); |
4127 | if (i < NumElts / 2) { |
4128 | SubVecAOps.push_back(Elt: Elem); |
4129 | SubVecBOps.push_back(Elt: UndefElem); |
4130 | } else { |
4131 | SubVecAOps.push_back(Elt: UndefElem); |
4132 | SubVecBOps.push_back(Elt: Elem); |
4133 | } |
4134 | bool SelectMaskVal = (i < NumElts / 2); |
4135 | MaskVals.push_back(Elt: DAG.getConstant(Val: SelectMaskVal, DL, VT: XLenVT)); |
4136 | } |
4137 | assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts && |
4138 | MaskVals.size() == NumElts); |
4139 | |
4140 | SDValue SubVecA = DAG.getBuildVector(VT, DL, Ops: SubVecAOps); |
4141 | SDValue SubVecB = DAG.getBuildVector(VT, DL, Ops: SubVecBOps); |
4142 | MVT MaskVT = MVT::getVectorVT(VT: MVT::i1, NumElements: NumElts); |
4143 | SDValue SelectMask = DAG.getBuildVector(VT: MaskVT, DL, Ops: MaskVals); |
4144 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectMask, N2: SubVecA, N3: SubVecB); |
4145 | } |
4146 | |
4147 | // Cap the cost at a value linear to the number of elements in the vector. |
4148 | // The default lowering is to use the stack. The vector store + scalar loads |
4149 | // is linear in VL. However, at high lmuls vslide1down and vslidedown end up |
4150 | // being (at least) linear in LMUL. As a result, using the vslidedown |
4151 | // lowering for every element ends up being VL*LMUL.. |
4152 | // TODO: Should we be directly costing the stack alternative? Doing so might |
4153 | // give us a more accurate upper bound. |
4154 | InstructionCost LinearBudget = VT.getVectorNumElements() * 2; |
4155 | |
4156 | // TODO: unify with TTI getSlideCost. |
4157 | InstructionCost PerSlideCost = 1; |
4158 | switch (RISCVTargetLowering::getLMUL(VT: ContainerVT)) { |
4159 | default: break; |
4160 | case RISCVII::VLMUL::LMUL_2: |
4161 | PerSlideCost = 2; |
4162 | break; |
4163 | case RISCVII::VLMUL::LMUL_4: |
4164 | PerSlideCost = 4; |
4165 | break; |
4166 | case RISCVII::VLMUL::LMUL_8: |
4167 | PerSlideCost = 8; |
4168 | break; |
4169 | } |
4170 | |
4171 | // TODO: Should we be using the build instseq then cost + evaluate scheme |
4172 | // we use for integer constants here? |
4173 | unsigned UndefCount = 0; |
4174 | for (const SDValue &V : Op->ops()) { |
4175 | if (V.isUndef()) { |
4176 | UndefCount++; |
4177 | continue; |
4178 | } |
4179 | if (UndefCount) { |
4180 | LinearBudget -= PerSlideCost; |
4181 | UndefCount = 0; |
4182 | } |
4183 | LinearBudget -= PerSlideCost; |
4184 | } |
4185 | if (UndefCount) { |
4186 | LinearBudget -= PerSlideCost; |
4187 | } |
4188 | |
4189 | if (LinearBudget < 0) |
4190 | return SDValue(); |
4191 | |
4192 | assert((!VT.isFloatingPoint() || |
4193 | VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) && |
4194 | "Illegal type which will result in reserved encoding" ); |
4195 | |
4196 | const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; |
4197 | |
4198 | SDValue Vec; |
4199 | UndefCount = 0; |
4200 | for (SDValue V : Op->ops()) { |
4201 | if (V.isUndef()) { |
4202 | UndefCount++; |
4203 | continue; |
4204 | } |
4205 | |
4206 | // Start our sequence with a TA splat in the hopes that hardware is able to |
4207 | // recognize there's no dependency on the prior value of our temporary |
4208 | // register. |
4209 | if (!Vec) { |
4210 | Vec = DAG.getSplatVector(VT, DL, Op: V); |
4211 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
4212 | UndefCount = 0; |
4213 | continue; |
4214 | } |
4215 | |
4216 | if (UndefCount) { |
4217 | const SDValue Offset = DAG.getConstant(Val: UndefCount, DL, VT: Subtarget.getXLenVT()); |
4218 | Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: DAG.getUNDEF(VT: ContainerVT), |
4219 | Op: Vec, Offset, Mask, VL, Policy); |
4220 | UndefCount = 0; |
4221 | } |
4222 | auto OpCode = |
4223 | VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; |
4224 | if (!VT.isFloatingPoint()) |
4225 | V = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getXLenVT(), Operand: V); |
4226 | Vec = DAG.getNode(Opcode: OpCode, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Vec, |
4227 | N3: V, N4: Mask, N5: VL); |
4228 | } |
4229 | if (UndefCount) { |
4230 | const SDValue Offset = DAG.getConstant(Val: UndefCount, DL, VT: Subtarget.getXLenVT()); |
4231 | Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: DAG.getUNDEF(VT: ContainerVT), |
4232 | Op: Vec, Offset, Mask, VL, Policy); |
4233 | } |
4234 | return convertFromScalableVector(VT, V: Vec, DAG, Subtarget); |
4235 | } |
4236 | |
4237 | static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, |
4238 | SDValue Lo, SDValue Hi, SDValue VL, |
4239 | SelectionDAG &DAG) { |
4240 | if (!Passthru) |
4241 | Passthru = DAG.getUNDEF(VT); |
4242 | if (isa<ConstantSDNode>(Val: Lo) && isa<ConstantSDNode>(Val: Hi)) { |
4243 | int32_t LoC = cast<ConstantSDNode>(Val&: Lo)->getSExtValue(); |
4244 | int32_t HiC = cast<ConstantSDNode>(Val&: Hi)->getSExtValue(); |
4245 | // If Hi constant is all the same sign bit as Lo, lower this as a custom |
4246 | // node in order to try and match RVV vector/scalar instructions. |
4247 | if ((LoC >> 31) == HiC) |
4248 | return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Lo, N3: VL); |
4249 | |
4250 | // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo, |
4251 | // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use |
4252 | // vlmax vsetvli or vsetivli to change the VL. |
4253 | // FIXME: Support larger constants? |
4254 | // FIXME: Support non-constant VLs by saturating? |
4255 | if (LoC == HiC) { |
4256 | SDValue NewVL; |
4257 | if (isAllOnesConstant(V: VL) || |
4258 | (isa<RegisterSDNode>(Val: VL) && |
4259 | cast<RegisterSDNode>(Val&: VL)->getReg() == RISCV::X0)) |
4260 | NewVL = DAG.getRegister(Reg: RISCV::X0, VT: MVT::i32); |
4261 | else if (isa<ConstantSDNode>(Val: VL) && isUInt<4>(x: VL->getAsZExtVal())) |
4262 | NewVL = DAG.getNode(Opcode: ISD::ADD, DL, VT: VL.getValueType(), N1: VL, N2: VL); |
4263 | |
4264 | if (NewVL) { |
4265 | MVT InterVT = |
4266 | MVT::getVectorVT(VT: MVT::i32, EC: VT.getVectorElementCount() * 2); |
4267 | auto InterVec = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: InterVT, |
4268 | N1: DAG.getUNDEF(VT: InterVT), N2: Lo, N3: NewVL); |
4269 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: InterVec); |
4270 | } |
4271 | } |
4272 | } |
4273 | |
4274 | // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended. |
4275 | if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(i: 0) == Lo && |
4276 | isa<ConstantSDNode>(Val: Hi.getOperand(i: 1)) && |
4277 | Hi.getConstantOperandVal(i: 1) == 31) |
4278 | return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Lo, N3: VL); |
4279 | |
4280 | // If the hi bits of the splat are undefined, then it's fine to just splat Lo |
4281 | // even if it might be sign extended. |
4282 | if (Hi.isUndef()) |
4283 | return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Lo, N3: VL); |
4284 | |
4285 | // Fall back to a stack store and stride x0 vector load. |
4286 | return DAG.getNode(Opcode: RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, N1: Passthru, N2: Lo, |
4287 | N3: Hi, N4: VL); |
4288 | } |
4289 | |
4290 | // Called by type legalization to handle splat of i64 on RV32. |
4291 | // FIXME: We can optimize this when the type has sign or zero bits in one |
4292 | // of the halves. |
4293 | static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, |
4294 | SDValue Scalar, SDValue VL, |
4295 | SelectionDAG &DAG) { |
4296 | assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!" ); |
4297 | SDValue Lo, Hi; |
4298 | std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Scalar, DL, LoVT: MVT::i32, HiVT: MVT::i32); |
4299 | return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG); |
4300 | } |
4301 | |
4302 | // This function lowers a splat of a scalar operand Splat with the vector |
4303 | // length VL. It ensures the final sequence is type legal, which is useful when |
4304 | // lowering a splat after type legalization. |
4305 | static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, |
4306 | MVT VT, const SDLoc &DL, SelectionDAG &DAG, |
4307 | const RISCVSubtarget &Subtarget) { |
4308 | bool HasPassthru = Passthru && !Passthru.isUndef(); |
4309 | if (!HasPassthru && !Passthru) |
4310 | Passthru = DAG.getUNDEF(VT); |
4311 | if (VT.isFloatingPoint()) |
4312 | return DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL); |
4313 | |
4314 | MVT XLenVT = Subtarget.getXLenVT(); |
4315 | |
4316 | // Simplest case is that the operand needs to be promoted to XLenVT. |
4317 | if (Scalar.getValueType().bitsLE(VT: XLenVT)) { |
4318 | // If the operand is a constant, sign extend to increase our chances |
4319 | // of being able to use a .vi instruction. ANY_EXTEND would become a |
4320 | // a zero extend and the simm5 check in isel would fail. |
4321 | // FIXME: Should we ignore the upper bits in isel instead? |
4322 | unsigned ExtOpc = |
4323 | isa<ConstantSDNode>(Val: Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; |
4324 | Scalar = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: Scalar); |
4325 | return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL); |
4326 | } |
4327 | |
4328 | assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 && |
4329 | "Unexpected scalar for splat lowering!" ); |
4330 | |
4331 | if (isOneConstant(V: VL) && isNullConstant(V: Scalar)) |
4332 | return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT, N1: Passthru, |
4333 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT), N3: VL); |
4334 | |
4335 | // Otherwise use the more complicated splatting algorithm. |
4336 | return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG); |
4337 | } |
4338 | |
4339 | // This function lowers an insert of a scalar operand Scalar into lane |
4340 | // 0 of the vector regardless of the value of VL. The contents of the |
4341 | // remaining lanes of the result vector are unspecified. VL is assumed |
4342 | // to be non-zero. |
4343 | static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, |
4344 | const SDLoc &DL, SelectionDAG &DAG, |
4345 | const RISCVSubtarget &Subtarget) { |
4346 | assert(VT.isScalableVector() && "Expect VT is scalable vector type." ); |
4347 | |
4348 | const MVT XLenVT = Subtarget.getXLenVT(); |
4349 | SDValue Passthru = DAG.getUNDEF(VT); |
4350 | |
4351 | if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
4352 | isNullConstant(V: Scalar.getOperand(i: 1))) { |
4353 | SDValue = Scalar.getOperand(i: 0); |
4354 | // The element types must be the same. |
4355 | if (ExtractedVal.getValueType().getVectorElementType() == |
4356 | VT.getVectorElementType()) { |
4357 | MVT = ExtractedVal.getSimpleValueType(); |
4358 | MVT = ExtractedVT; |
4359 | if (ExtractedContainerVT.isFixedLengthVector()) { |
4360 | ExtractedContainerVT = getContainerForFixedLengthVector( |
4361 | DAG, VT: ExtractedContainerVT, Subtarget); |
4362 | ExtractedVal = convertToScalableVector(VT: ExtractedContainerVT, |
4363 | V: ExtractedVal, DAG, Subtarget); |
4364 | } |
4365 | if (ExtractedContainerVT.bitsLE(VT)) |
4366 | return DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: Passthru, |
4367 | N2: ExtractedVal, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
4368 | return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: ExtractedVal, |
4369 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
4370 | } |
4371 | } |
4372 | |
4373 | |
4374 | if (VT.isFloatingPoint()) |
4375 | return DAG.getNode(Opcode: RISCVISD::VFMV_S_F_VL, DL, VT, |
4376 | N1: DAG.getUNDEF(VT), N2: Scalar, N3: VL); |
4377 | |
4378 | // Avoid the tricky legalization cases by falling back to using the |
4379 | // splat code which already handles it gracefully. |
4380 | if (!Scalar.getValueType().bitsLE(VT: XLenVT)) |
4381 | return lowerScalarSplat(Passthru: DAG.getUNDEF(VT), Scalar, |
4382 | VL: DAG.getConstant(Val: 1, DL, VT: XLenVT), |
4383 | VT, DL, DAG, Subtarget); |
4384 | |
4385 | // If the operand is a constant, sign extend to increase our chances |
4386 | // of being able to use a .vi instruction. ANY_EXTEND would become a |
4387 | // a zero extend and the simm5 check in isel would fail. |
4388 | // FIXME: Should we ignore the upper bits in isel instead? |
4389 | unsigned ExtOpc = |
4390 | isa<ConstantSDNode>(Val: Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; |
4391 | Scalar = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: Scalar); |
4392 | return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT, |
4393 | N1: DAG.getUNDEF(VT), N2: Scalar, N3: VL); |
4394 | } |
4395 | |
4396 | // Is this a shuffle extracts either the even or odd elements of a vector? |
4397 | // That is, specifically, either (a) or (b) below. |
4398 | // t34: v8i8 = extract_subvector t11, Constant:i64<0> |
4399 | // t33: v8i8 = extract_subvector t11, Constant:i64<8> |
4400 | // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33 |
4401 | // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33 |
4402 | // Returns {Src Vector, Even Elements} om success |
4403 | static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, |
4404 | SDValue V2, ArrayRef<int> Mask, |
4405 | const RISCVSubtarget &Subtarget) { |
4406 | // Need to be able to widen the vector. |
4407 | if (VT.getScalarSizeInBits() >= Subtarget.getELen()) |
4408 | return false; |
4409 | |
4410 | // Both input must be extracts. |
4411 | if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR || |
4412 | V2.getOpcode() != ISD::EXTRACT_SUBVECTOR) |
4413 | return false; |
4414 | |
4415 | // Extracting from the same source. |
4416 | SDValue Src = V1.getOperand(i: 0); |
4417 | if (Src != V2.getOperand(i: 0)) |
4418 | return false; |
4419 | |
4420 | // Src needs to have twice the number of elements. |
4421 | if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2)) |
4422 | return false; |
4423 | |
4424 | // The extracts must extract the two halves of the source. |
4425 | if (V1.getConstantOperandVal(i: 1) != 0 || |
4426 | V2.getConstantOperandVal(i: 1) != Mask.size()) |
4427 | return false; |
4428 | |
4429 | // First index must be the first even or odd element from V1. |
4430 | if (Mask[0] != 0 && Mask[0] != 1) |
4431 | return false; |
4432 | |
4433 | // The others must increase by 2 each time. |
4434 | // TODO: Support undef elements? |
4435 | for (unsigned i = 1; i != Mask.size(); ++i) |
4436 | if (Mask[i] != Mask[i - 1] + 2) |
4437 | return false; |
4438 | |
4439 | return true; |
4440 | } |
4441 | |
4442 | /// Is this shuffle interleaving contiguous elements from one vector into the |
4443 | /// even elements and contiguous elements from another vector into the odd |
4444 | /// elements. \p EvenSrc will contain the element that should be in the first |
4445 | /// even element. \p OddSrc will contain the element that should be in the first |
4446 | /// odd element. These can be the first element in a source or the element half |
4447 | /// way through the source. |
4448 | static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc, |
4449 | int &OddSrc, const RISCVSubtarget &Subtarget) { |
4450 | // We need to be able to widen elements to the next larger integer type. |
4451 | if (VT.getScalarSizeInBits() >= Subtarget.getELen()) |
4452 | return false; |
4453 | |
4454 | int Size = Mask.size(); |
4455 | int NumElts = VT.getVectorNumElements(); |
4456 | assert(Size == (int)NumElts && "Unexpected mask size" ); |
4457 | |
4458 | SmallVector<unsigned, 2> StartIndexes; |
4459 | if (!ShuffleVectorInst::isInterleaveMask(Mask, Factor: 2, NumInputElts: Size * 2, StartIndexes)) |
4460 | return false; |
4461 | |
4462 | EvenSrc = StartIndexes[0]; |
4463 | OddSrc = StartIndexes[1]; |
4464 | |
4465 | // One source should be low half of first vector. |
4466 | if (EvenSrc != 0 && OddSrc != 0) |
4467 | return false; |
4468 | |
4469 | // Subvectors will be subtracted from either at the start of the two input |
4470 | // vectors, or at the start and middle of the first vector if it's an unary |
4471 | // interleave. |
4472 | // In both cases, HalfNumElts will be extracted. |
4473 | // We need to ensure that the extract indices are 0 or HalfNumElts otherwise |
4474 | // we'll create an illegal extract_subvector. |
4475 | // FIXME: We could support other values using a slidedown first. |
4476 | int HalfNumElts = NumElts / 2; |
4477 | return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0); |
4478 | } |
4479 | |
4480 | /// Match shuffles that concatenate two vectors, rotate the concatenation, |
4481 | /// and then extract the original number of elements from the rotated result. |
4482 | /// This is equivalent to vector.splice or X86's PALIGNR instruction. The |
4483 | /// returned rotation amount is for a rotate right, where elements move from |
4484 | /// higher elements to lower elements. \p LoSrc indicates the first source |
4485 | /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector |
4486 | /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be |
4487 | /// 0 or 1 if a rotation is found. |
4488 | /// |
4489 | /// NOTE: We talk about rotate to the right which matches how bit shift and |
4490 | /// rotate instructions are described where LSBs are on the right, but LLVM IR |
4491 | /// and the table below write vectors with the lowest elements on the left. |
4492 | static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) { |
4493 | int Size = Mask.size(); |
4494 | |
4495 | // We need to detect various ways of spelling a rotation: |
4496 | // [11, 12, 13, 14, 15, 0, 1, 2] |
4497 | // [-1, 12, 13, 14, -1, -1, 1, -1] |
4498 | // [-1, -1, -1, -1, -1, -1, 1, 2] |
4499 | // [ 3, 4, 5, 6, 7, 8, 9, 10] |
4500 | // [-1, 4, 5, 6, -1, -1, 9, -1] |
4501 | // [-1, 4, 5, 6, -1, -1, -1, -1] |
4502 | int Rotation = 0; |
4503 | LoSrc = -1; |
4504 | HiSrc = -1; |
4505 | for (int i = 0; i != Size; ++i) { |
4506 | int M = Mask[i]; |
4507 | if (M < 0) |
4508 | continue; |
4509 | |
4510 | // Determine where a rotate vector would have started. |
4511 | int StartIdx = i - (M % Size); |
4512 | // The identity rotation isn't interesting, stop. |
4513 | if (StartIdx == 0) |
4514 | return -1; |
4515 | |
4516 | // If we found the tail of a vector the rotation must be the missing |
4517 | // front. If we found the head of a vector, it must be how much of the |
4518 | // head. |
4519 | int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx; |
4520 | |
4521 | if (Rotation == 0) |
4522 | Rotation = CandidateRotation; |
4523 | else if (Rotation != CandidateRotation) |
4524 | // The rotations don't match, so we can't match this mask. |
4525 | return -1; |
4526 | |
4527 | // Compute which value this mask is pointing at. |
4528 | int MaskSrc = M < Size ? 0 : 1; |
4529 | |
4530 | // Compute which of the two target values this index should be assigned to. |
4531 | // This reflects whether the high elements are remaining or the low elemnts |
4532 | // are remaining. |
4533 | int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc; |
4534 | |
4535 | // Either set up this value if we've not encountered it before, or check |
4536 | // that it remains consistent. |
4537 | if (TargetSrc < 0) |
4538 | TargetSrc = MaskSrc; |
4539 | else if (TargetSrc != MaskSrc) |
4540 | // This may be a rotation, but it pulls from the inputs in some |
4541 | // unsupported interleaving. |
4542 | return -1; |
4543 | } |
4544 | |
4545 | // Check that we successfully analyzed the mask, and normalize the results. |
4546 | assert(Rotation != 0 && "Failed to locate a viable rotation!" ); |
4547 | assert((LoSrc >= 0 || HiSrc >= 0) && |
4548 | "Failed to find a rotated input vector!" ); |
4549 | |
4550 | return Rotation; |
4551 | } |
4552 | |
4553 | // Lower a deinterleave shuffle to vnsrl. |
4554 | // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true) |
4555 | // -> [p, q, r, s] (EvenElts == false) |
4556 | // VT is the type of the vector to return, <[vscale x ]n x ty> |
4557 | // Src is the vector to deinterleave of type <[vscale x ]n*2 x ty> |
4558 | static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, |
4559 | bool EvenElts, |
4560 | const RISCVSubtarget &Subtarget, |
4561 | SelectionDAG &DAG) { |
4562 | // The result is a vector of type <m x n x ty> |
4563 | MVT ContainerVT = VT; |
4564 | // Convert fixed vectors to scalable if needed |
4565 | if (ContainerVT.isFixedLengthVector()) { |
4566 | assert(Src.getSimpleValueType().isFixedLengthVector()); |
4567 | ContainerVT = getContainerForFixedLengthVector(DAG, VT: ContainerVT, Subtarget); |
4568 | |
4569 | // The source is a vector of type <m x n*2 x ty> |
4570 | MVT SrcContainerVT = |
4571 | MVT::getVectorVT(VT: ContainerVT.getVectorElementType(), |
4572 | EC: ContainerVT.getVectorElementCount() * 2); |
4573 | Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget); |
4574 | } |
4575 | |
4576 | auto [TrueMask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
4577 | |
4578 | // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2> |
4579 | // This also converts FP to int. |
4580 | unsigned EltBits = ContainerVT.getScalarSizeInBits(); |
4581 | MVT WideSrcContainerVT = MVT::getVectorVT( |
4582 | VT: MVT::getIntegerVT(BitWidth: EltBits * 2), EC: ContainerVT.getVectorElementCount()); |
4583 | Src = DAG.getBitcast(VT: WideSrcContainerVT, V: Src); |
4584 | |
4585 | // The integer version of the container type. |
4586 | MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger(); |
4587 | |
4588 | // If we want even elements, then the shift amount is 0. Otherwise, shift by |
4589 | // the original element size. |
4590 | unsigned Shift = EvenElts ? 0 : EltBits; |
4591 | SDValue SplatShift = DAG.getNode( |
4592 | Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IntContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), |
4593 | N2: DAG.getConstant(Val: Shift, DL, VT: Subtarget.getXLenVT()), N3: VL); |
4594 | SDValue Res = |
4595 | DAG.getNode(Opcode: RISCVISD::VNSRL_VL, DL, VT: IntContainerVT, N1: Src, N2: SplatShift, |
4596 | N3: DAG.getUNDEF(VT: IntContainerVT), N4: TrueMask, N5: VL); |
4597 | // Cast back to FP if needed. |
4598 | Res = DAG.getBitcast(VT: ContainerVT, V: Res); |
4599 | |
4600 | if (VT.isFixedLengthVector()) |
4601 | Res = convertFromScalableVector(VT, V: Res, DAG, Subtarget); |
4602 | return Res; |
4603 | } |
4604 | |
4605 | // Lower the following shuffle to vslidedown. |
4606 | // a) |
4607 | // t49: v8i8 = extract_subvector t13, Constant:i64<0> |
4608 | // t109: v8i8 = extract_subvector t13, Constant:i64<8> |
4609 | // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106 |
4610 | // b) |
4611 | // t69: v16i16 = extract_subvector t68, Constant:i64<0> |
4612 | // t23: v8i16 = extract_subvector t69, Constant:i64<0> |
4613 | // t29: v4i16 = extract_subvector t23, Constant:i64<4> |
4614 | // t26: v8i16 = extract_subvector t69, Constant:i64<8> |
4615 | // t30: v4i16 = extract_subvector t26, Constant:i64<0> |
4616 | // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30 |
4617 | static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, |
4618 | SDValue V1, SDValue V2, |
4619 | ArrayRef<int> Mask, |
4620 | const RISCVSubtarget &Subtarget, |
4621 | SelectionDAG &DAG) { |
4622 | auto = |
4623 | [](SDValue Parent) -> std::pair<SDValue, uint64_t> { |
4624 | uint64_t Offset = 0; |
4625 | while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
4626 | // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from |
4627 | // a scalable vector. But we don't want to match the case. |
4628 | Parent.getOperand(i: 0).getSimpleValueType().isFixedLengthVector()) { |
4629 | Offset += Parent.getConstantOperandVal(i: 1); |
4630 | Parent = Parent.getOperand(i: 0); |
4631 | } |
4632 | return std::make_pair(x&: Parent, y&: Offset); |
4633 | }; |
4634 | |
4635 | auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1); |
4636 | auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2); |
4637 | |
4638 | // Extracting from the same source. |
4639 | SDValue Src = V1Src; |
4640 | if (Src != V2Src) |
4641 | return SDValue(); |
4642 | |
4643 | // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs. |
4644 | SmallVector<int, 16> NewMask(Mask); |
4645 | for (size_t i = 0; i != NewMask.size(); ++i) { |
4646 | if (NewMask[i] == -1) |
4647 | continue; |
4648 | |
4649 | if (static_cast<size_t>(NewMask[i]) < NewMask.size()) { |
4650 | NewMask[i] = NewMask[i] + V1IndexOffset; |
4651 | } else { |
4652 | // Minus NewMask.size() is needed. Otherwise, the b case would be |
4653 | // <5,6,7,12> instead of <5,6,7,8>. |
4654 | NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset; |
4655 | } |
4656 | } |
4657 | |
4658 | // First index must be known and non-zero. It will be used as the slidedown |
4659 | // amount. |
4660 | if (NewMask[0] <= 0) |
4661 | return SDValue(); |
4662 | |
4663 | // NewMask is also continuous. |
4664 | for (unsigned i = 1; i != NewMask.size(); ++i) |
4665 | if (NewMask[i - 1] + 1 != NewMask[i]) |
4666 | return SDValue(); |
4667 | |
4668 | MVT XLenVT = Subtarget.getXLenVT(); |
4669 | MVT SrcVT = Src.getSimpleValueType(); |
4670 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcVT, Subtarget); |
4671 | auto [TrueMask, VL] = getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget); |
4672 | SDValue Slidedown = |
4673 | getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: DAG.getUNDEF(VT: ContainerVT), |
4674 | Op: convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget), |
4675 | Offset: DAG.getConstant(Val: NewMask[0], DL, VT: XLenVT), Mask: TrueMask, VL); |
4676 | return DAG.getNode( |
4677 | Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, |
4678 | N1: convertFromScalableVector(VT: SrcVT, V: Slidedown, DAG, Subtarget), |
4679 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT)); |
4680 | } |
4681 | |
4682 | // Because vslideup leaves the destination elements at the start intact, we can |
4683 | // use it to perform shuffles that insert subvectors: |
4684 | // |
4685 | // vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11> |
4686 | // -> |
4687 | // vsetvli zero, 8, e8, mf2, ta, ma |
4688 | // vslideup.vi v8, v9, 4 |
4689 | // |
4690 | // vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7> |
4691 | // -> |
4692 | // vsetvli zero, 5, e8, mf2, tu, ma |
4693 | // vslideup.v1 v8, v9, 2 |
4694 | static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, |
4695 | SDValue V1, SDValue V2, |
4696 | ArrayRef<int> Mask, |
4697 | const RISCVSubtarget &Subtarget, |
4698 | SelectionDAG &DAG) { |
4699 | unsigned NumElts = VT.getVectorNumElements(); |
4700 | int NumSubElts, Index; |
4701 | if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumSrcElts: NumElts, NumSubElts, |
4702 | Index)) |
4703 | return SDValue(); |
4704 | |
4705 | bool OpsSwapped = Mask[Index] < (int)NumElts; |
4706 | SDValue InPlace = OpsSwapped ? V2 : V1; |
4707 | SDValue ToInsert = OpsSwapped ? V1 : V2; |
4708 | |
4709 | MVT XLenVT = Subtarget.getXLenVT(); |
4710 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
4711 | auto TrueMask = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).first; |
4712 | // We slide up by the index that the subvector is being inserted at, and set |
4713 | // VL to the index + the number of elements being inserted. |
4714 | unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC; |
4715 | // If the we're adding a suffix to the in place vector, i.e. inserting right |
4716 | // up to the very end of it, then we don't actually care about the tail. |
4717 | if (NumSubElts + Index >= (int)NumElts) |
4718 | Policy |= RISCVII::TAIL_AGNOSTIC; |
4719 | |
4720 | InPlace = convertToScalableVector(VT: ContainerVT, V: InPlace, DAG, Subtarget); |
4721 | ToInsert = convertToScalableVector(VT: ContainerVT, V: ToInsert, DAG, Subtarget); |
4722 | SDValue VL = DAG.getConstant(Val: NumSubElts + Index, DL, VT: XLenVT); |
4723 | |
4724 | SDValue Res; |
4725 | // If we're inserting into the lowest elements, use a tail undisturbed |
4726 | // vmv.v.v. |
4727 | if (Index == 0) |
4728 | Res = DAG.getNode(Opcode: RISCVISD::VMV_V_V_VL, DL, VT: ContainerVT, N1: InPlace, N2: ToInsert, |
4729 | N3: VL); |
4730 | else |
4731 | Res = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: InPlace, Op: ToInsert, |
4732 | Offset: DAG.getConstant(Val: Index, DL, VT: XLenVT), Mask: TrueMask, VL, Policy); |
4733 | return convertFromScalableVector(VT, V: Res, DAG, Subtarget); |
4734 | } |
4735 | |
4736 | /// Match v(f)slide1up/down idioms. These operations involve sliding |
4737 | /// N-1 elements to make room for an inserted scalar at one end. |
4738 | static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, |
4739 | SDValue V1, SDValue V2, |
4740 | ArrayRef<int> Mask, |
4741 | const RISCVSubtarget &Subtarget, |
4742 | SelectionDAG &DAG) { |
4743 | bool OpsSwapped = false; |
4744 | if (!isa<BuildVectorSDNode>(Val: V1)) { |
4745 | if (!isa<BuildVectorSDNode>(Val: V2)) |
4746 | return SDValue(); |
4747 | std::swap(a&: V1, b&: V2); |
4748 | OpsSwapped = true; |
4749 | } |
4750 | SDValue Splat = cast<BuildVectorSDNode>(Val&: V1)->getSplatValue(); |
4751 | if (!Splat) |
4752 | return SDValue(); |
4753 | |
4754 | // Return true if the mask could describe a slide of Mask.size() - 1 |
4755 | // elements from concat_vector(V1, V2)[Base:] to [Offset:]. |
4756 | auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) { |
4757 | const unsigned S = (Offset > 0) ? 0 : -Offset; |
4758 | const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0); |
4759 | for (unsigned i = S; i != E; ++i) |
4760 | if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset) |
4761 | return false; |
4762 | return true; |
4763 | }; |
4764 | |
4765 | const unsigned NumElts = VT.getVectorNumElements(); |
4766 | bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1); |
4767 | if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1)) |
4768 | return SDValue(); |
4769 | |
4770 | const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0]; |
4771 | // Inserted lane must come from splat, undef scalar is legal but not profitable. |
4772 | if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped) |
4773 | return SDValue(); |
4774 | |
4775 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
4776 | auto [TrueMask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
4777 | auto OpCode = IsVSlidedown ? |
4778 | (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) : |
4779 | (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL); |
4780 | if (!VT.isFloatingPoint()) |
4781 | Splat = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getXLenVT(), Operand: Splat); |
4782 | auto Vec = DAG.getNode(Opcode: OpCode, DL, VT: ContainerVT, |
4783 | N1: DAG.getUNDEF(VT: ContainerVT), |
4784 | N2: convertToScalableVector(VT: ContainerVT, V: V2, DAG, Subtarget), |
4785 | N3: Splat, N4: TrueMask, N5: VL); |
4786 | return convertFromScalableVector(VT, V: Vec, DAG, Subtarget); |
4787 | } |
4788 | |
4789 | // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx |
4790 | // to create an interleaved vector of <[vscale x] n*2 x ty>. |
4791 | // This requires that the size of ty is less than the subtarget's maximum ELEN. |
4792 | static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, |
4793 | const SDLoc &DL, SelectionDAG &DAG, |
4794 | const RISCVSubtarget &Subtarget) { |
4795 | MVT VecVT = EvenV.getSimpleValueType(); |
4796 | MVT VecContainerVT = VecVT; // <vscale x n x ty> |
4797 | // Convert fixed vectors to scalable if needed |
4798 | if (VecContainerVT.isFixedLengthVector()) { |
4799 | VecContainerVT = getContainerForFixedLengthVector(DAG, VT: VecVT, Subtarget); |
4800 | EvenV = convertToScalableVector(VT: VecContainerVT, V: EvenV, DAG, Subtarget); |
4801 | OddV = convertToScalableVector(VT: VecContainerVT, V: OddV, DAG, Subtarget); |
4802 | } |
4803 | |
4804 | assert(VecVT.getScalarSizeInBits() < Subtarget.getELen()); |
4805 | |
4806 | // We're working with a vector of the same size as the resulting |
4807 | // interleaved vector, but with half the number of elements and |
4808 | // twice the SEW (Hence the restriction on not using the maximum |
4809 | // ELEN) |
4810 | MVT WideVT = |
4811 | MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: VecVT.getScalarSizeInBits() * 2), |
4812 | EC: VecVT.getVectorElementCount()); |
4813 | MVT WideContainerVT = WideVT; // <vscale x n x ty*2> |
4814 | if (WideContainerVT.isFixedLengthVector()) |
4815 | WideContainerVT = getContainerForFixedLengthVector(DAG, VT: WideVT, Subtarget); |
4816 | |
4817 | // Bitcast the input vectors to integers in case they are FP |
4818 | VecContainerVT = VecContainerVT.changeTypeToInteger(); |
4819 | EvenV = DAG.getBitcast(VT: VecContainerVT, V: EvenV); |
4820 | OddV = DAG.getBitcast(VT: VecContainerVT, V: OddV); |
4821 | |
4822 | auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT: VecContainerVT, DL, DAG, Subtarget); |
4823 | SDValue Passthru = DAG.getUNDEF(VT: WideContainerVT); |
4824 | |
4825 | SDValue Interleaved; |
4826 | if (OddV.isUndef()) { |
4827 | // If OddV is undef, this is a zero extend. |
4828 | // FIXME: Not only does this optimize the code, it fixes some correctness |
4829 | // issues because MIR does not have freeze. |
4830 | Interleaved = |
4831 | DAG.getNode(Opcode: RISCVISD::VZEXT_VL, DL, VT: WideContainerVT, N1: EvenV, N2: Mask, N3: VL); |
4832 | } else if (Subtarget.hasStdExtZvbb()) { |
4833 | // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV. |
4834 | SDValue OffsetVec = |
4835 | DAG.getConstant(Val: VecVT.getScalarSizeInBits(), DL, VT: VecContainerVT); |
4836 | Interleaved = DAG.getNode(Opcode: RISCVISD::VWSLL_VL, DL, VT: WideContainerVT, N1: OddV, |
4837 | N2: OffsetVec, N3: Passthru, N4: Mask, N5: VL); |
4838 | if (!EvenV.isUndef()) |
4839 | Interleaved = DAG.getNode(Opcode: RISCVISD::VWADDU_W_VL, DL, VT: WideContainerVT, |
4840 | N1: Interleaved, N2: EvenV, N3: Passthru, N4: Mask, N5: VL); |
4841 | } else if (EvenV.isUndef()) { |
4842 | Interleaved = |
4843 | DAG.getNode(Opcode: RISCVISD::VZEXT_VL, DL, VT: WideContainerVT, N1: OddV, N2: Mask, N3: VL); |
4844 | |
4845 | SDValue OffsetVec = |
4846 | DAG.getConstant(Val: VecVT.getScalarSizeInBits(), DL, VT: WideContainerVT); |
4847 | Interleaved = DAG.getNode(Opcode: RISCVISD::SHL_VL, DL, VT: WideContainerVT, |
4848 | N1: Interleaved, N2: OffsetVec, N3: Passthru, N4: Mask, N5: VL); |
4849 | } else { |
4850 | // FIXME: We should freeze the odd vector here. We already handled the case |
4851 | // of provably undef/poison above. |
4852 | |
4853 | // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with |
4854 | // vwaddu.vv |
4855 | Interleaved = DAG.getNode(Opcode: RISCVISD::VWADDU_VL, DL, VT: WideContainerVT, N1: EvenV, |
4856 | N2: OddV, N3: Passthru, N4: Mask, N5: VL); |
4857 | |
4858 | // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1) |
4859 | SDValue AllOnesVec = DAG.getSplatVector( |
4860 | VT: VecContainerVT, DL, Op: DAG.getAllOnesConstant(DL, VT: Subtarget.getXLenVT())); |
4861 | SDValue OddsMul = DAG.getNode(Opcode: RISCVISD::VWMULU_VL, DL, VT: WideContainerVT, |
4862 | N1: OddV, N2: AllOnesVec, N3: Passthru, N4: Mask, N5: VL); |
4863 | |
4864 | // Add the two together so we get |
4865 | // (OddV * 0xff...ff) + (OddV + EvenV) |
4866 | // = (OddV * 0x100...00) + EvenV |
4867 | // = (OddV << VecVT.getScalarSizeInBits()) + EvenV |
4868 | // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx |
4869 | Interleaved = DAG.getNode(Opcode: RISCVISD::ADD_VL, DL, VT: WideContainerVT, |
4870 | N1: Interleaved, N2: OddsMul, N3: Passthru, N4: Mask, N5: VL); |
4871 | } |
4872 | |
4873 | // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty> |
4874 | MVT ResultContainerVT = MVT::getVectorVT( |
4875 | VT: VecVT.getVectorElementType(), // Make sure to use original type |
4876 | EC: VecContainerVT.getVectorElementCount().multiplyCoefficientBy(RHS: 2)); |
4877 | Interleaved = DAG.getBitcast(VT: ResultContainerVT, V: Interleaved); |
4878 | |
4879 | // Convert back to a fixed vector if needed |
4880 | MVT ResultVT = |
4881 | MVT::getVectorVT(VT: VecVT.getVectorElementType(), |
4882 | EC: VecVT.getVectorElementCount().multiplyCoefficientBy(RHS: 2)); |
4883 | if (ResultVT.isFixedLengthVector()) |
4884 | Interleaved = |
4885 | convertFromScalableVector(VT: ResultVT, V: Interleaved, DAG, Subtarget); |
4886 | |
4887 | return Interleaved; |
4888 | } |
4889 | |
4890 | // If we have a vector of bits that we want to reverse, we can use a vbrev on a |
4891 | // larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse. |
4892 | static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, |
4893 | SelectionDAG &DAG, |
4894 | const RISCVSubtarget &Subtarget) { |
4895 | SDLoc DL(SVN); |
4896 | MVT VT = SVN->getSimpleValueType(ResNo: 0); |
4897 | SDValue V = SVN->getOperand(Num: 0); |
4898 | unsigned NumElts = VT.getVectorNumElements(); |
4899 | |
4900 | assert(VT.getVectorElementType() == MVT::i1); |
4901 | |
4902 | if (!ShuffleVectorInst::isReverseMask(Mask: SVN->getMask(), |
4903 | NumSrcElts: SVN->getMask().size()) || |
4904 | !SVN->getOperand(Num: 1).isUndef()) |
4905 | return SDValue(); |
4906 | |
4907 | unsigned ViaEltSize = std::max(a: (uint64_t)8, b: PowerOf2Ceil(A: NumElts)); |
4908 | EVT ViaVT = EVT::getVectorVT( |
4909 | Context&: *DAG.getContext(), VT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ViaEltSize), NumElements: 1); |
4910 | EVT ViaBitVT = |
4911 | EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1, NumElements: ViaVT.getScalarSizeInBits()); |
4912 | |
4913 | // If we don't have zvbb or the larger element type > ELEN, the operation will |
4914 | // be illegal. |
4915 | if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(Op: ISD::BITREVERSE, |
4916 | VT: ViaVT) || |
4917 | !Subtarget.getTargetLowering()->isTypeLegal(VT: ViaBitVT)) |
4918 | return SDValue(); |
4919 | |
4920 | // If the bit vector doesn't fit exactly into the larger element type, we need |
4921 | // to insert it into the larger vector and then shift up the reversed bits |
4922 | // afterwards to get rid of the gap introduced. |
4923 | if (ViaEltSize > NumElts) |
4924 | V = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ViaBitVT, N1: DAG.getUNDEF(VT: ViaBitVT), |
4925 | N2: V, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
4926 | |
4927 | SDValue Res = |
4928 | DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT: ViaVT, Operand: DAG.getBitcast(VT: ViaVT, V)); |
4929 | |
4930 | // Shift up the reversed bits if the vector didn't exactly fit into the larger |
4931 | // element type. |
4932 | if (ViaEltSize > NumElts) |
4933 | Res = DAG.getNode(Opcode: ISD::SRL, DL, VT: ViaVT, N1: Res, |
4934 | N2: DAG.getConstant(Val: ViaEltSize - NumElts, DL, VT: ViaVT)); |
4935 | |
4936 | Res = DAG.getBitcast(VT: ViaBitVT, V: Res); |
4937 | |
4938 | if (ViaEltSize > NumElts) |
4939 | Res = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: Res, |
4940 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
4941 | return Res; |
4942 | } |
4943 | |
4944 | static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, |
4945 | SelectionDAG &DAG, |
4946 | const RISCVSubtarget &Subtarget, |
4947 | MVT &RotateVT, unsigned &RotateAmt) { |
4948 | SDLoc DL(SVN); |
4949 | |
4950 | EVT VT = SVN->getValueType(ResNo: 0); |
4951 | unsigned NumElts = VT.getVectorNumElements(); |
4952 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
4953 | unsigned NumSubElts; |
4954 | if (!ShuffleVectorInst::isBitRotateMask(Mask: SVN->getMask(), EltSizeInBits, MinSubElts: 2, |
4955 | MaxSubElts: NumElts, NumSubElts, RotateAmt)) |
4956 | return false; |
4957 | RotateVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltSizeInBits * NumSubElts), |
4958 | NumElements: NumElts / NumSubElts); |
4959 | |
4960 | // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x. |
4961 | return Subtarget.getTargetLowering()->isTypeLegal(VT: RotateVT); |
4962 | } |
4963 | |
4964 | // Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can |
4965 | // reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this |
4966 | // as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor. |
4967 | static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, |
4968 | SelectionDAG &DAG, |
4969 | const RISCVSubtarget &Subtarget) { |
4970 | SDLoc DL(SVN); |
4971 | |
4972 | EVT VT = SVN->getValueType(ResNo: 0); |
4973 | unsigned RotateAmt; |
4974 | MVT RotateVT; |
4975 | if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt)) |
4976 | return SDValue(); |
4977 | |
4978 | SDValue Op = DAG.getBitcast(VT: RotateVT, V: SVN->getOperand(Num: 0)); |
4979 | |
4980 | SDValue Rotate; |
4981 | // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap, |
4982 | // so canonicalize to vrev8. |
4983 | if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8) |
4984 | Rotate = DAG.getNode(Opcode: ISD::BSWAP, DL, VT: RotateVT, Operand: Op); |
4985 | else |
4986 | Rotate = DAG.getNode(Opcode: ISD::ROTL, DL, VT: RotateVT, N1: Op, |
4987 | N2: DAG.getConstant(Val: RotateAmt, DL, VT: RotateVT)); |
4988 | |
4989 | return DAG.getBitcast(VT, V: Rotate); |
4990 | } |
4991 | |
4992 | // If compiling with an exactly known VLEN, see if we can split a |
4993 | // shuffle on m2 or larger into a small number of m1 sized shuffles |
4994 | // which write each destination registers exactly once. |
4995 | static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, |
4996 | SelectionDAG &DAG, |
4997 | const RISCVSubtarget &Subtarget) { |
4998 | SDLoc DL(SVN); |
4999 | MVT VT = SVN->getSimpleValueType(ResNo: 0); |
5000 | SDValue V1 = SVN->getOperand(Num: 0); |
5001 | SDValue V2 = SVN->getOperand(Num: 1); |
5002 | ArrayRef<int> Mask = SVN->getMask(); |
5003 | unsigned NumElts = VT.getVectorNumElements(); |
5004 | |
5005 | // If we don't know exact data layout, not much we can do. If this |
5006 | // is already m1 or smaller, no point in splitting further. |
5007 | const auto VLen = Subtarget.getRealVLen(); |
5008 | if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen) |
5009 | return SDValue(); |
5010 | |
5011 | // Avoid picking up bitrotate patterns which we have a linear-in-lmul |
5012 | // expansion for. |
5013 | unsigned RotateAmt; |
5014 | MVT RotateVT; |
5015 | if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt)) |
5016 | return SDValue(); |
5017 | |
5018 | MVT ElemVT = VT.getVectorElementType(); |
5019 | unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits(); |
5020 | unsigned VRegsPerSrc = NumElts / ElemsPerVReg; |
5021 | |
5022 | SmallVector<std::pair<int, SmallVector<int>>> |
5023 | OutMasks(VRegsPerSrc, {-1, {}}); |
5024 | |
5025 | // Check if our mask can be done as a 1-to-1 mapping from source |
5026 | // to destination registers in the group without needing to |
5027 | // write each destination more than once. |
5028 | for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) { |
5029 | int DstVecIdx = DstIdx / ElemsPerVReg; |
5030 | int DstSubIdx = DstIdx % ElemsPerVReg; |
5031 | int SrcIdx = Mask[DstIdx]; |
5032 | if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts) |
5033 | continue; |
5034 | int SrcVecIdx = SrcIdx / ElemsPerVReg; |
5035 | int SrcSubIdx = SrcIdx % ElemsPerVReg; |
5036 | if (OutMasks[DstVecIdx].first == -1) |
5037 | OutMasks[DstVecIdx].first = SrcVecIdx; |
5038 | if (OutMasks[DstVecIdx].first != SrcVecIdx) |
5039 | // Note: This case could easily be handled by keeping track of a chain |
5040 | // of source values and generating two element shuffles below. This is |
5041 | // less an implementation question, and more a profitability one. |
5042 | return SDValue(); |
5043 | |
5044 | OutMasks[DstVecIdx].second.resize(N: ElemsPerVReg, NV: -1); |
5045 | OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx; |
5046 | } |
5047 | |
5048 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
5049 | MVT OneRegVT = MVT::getVectorVT(VT: ElemVT, NumElements: ElemsPerVReg); |
5050 | MVT M1VT = getContainerForFixedLengthVector(DAG, VT: OneRegVT, Subtarget); |
5051 | assert(M1VT == getLMUL1VT(M1VT)); |
5052 | unsigned NumOpElts = M1VT.getVectorMinNumElements(); |
5053 | SDValue Vec = DAG.getUNDEF(VT: ContainerVT); |
5054 | // The following semantically builds up a fixed length concat_vector |
5055 | // of the component shuffle_vectors. We eagerly lower to scalable here |
5056 | // to avoid DAG combining it back to a large shuffle_vector again. |
5057 | V1 = convertToScalableVector(VT: ContainerVT, V: V1, DAG, Subtarget); |
5058 | V2 = convertToScalableVector(VT: ContainerVT, V: V2, DAG, Subtarget); |
5059 | for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) { |
5060 | auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx]; |
5061 | if (SrcVecIdx == -1) |
5062 | continue; |
5063 | unsigned = (SrcVecIdx % VRegsPerSrc) * NumOpElts; |
5064 | SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1; |
5065 | SDValue SubVec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: M1VT, N1: SrcVec, |
5066 | N2: DAG.getVectorIdxConstant(Val: ExtractIdx, DL)); |
5067 | SubVec = convertFromScalableVector(VT: OneRegVT, V: SubVec, DAG, Subtarget); |
5068 | SubVec = DAG.getVectorShuffle(VT: OneRegVT, dl: DL, N1: SubVec, N2: SubVec, Mask: SrcSubMask); |
5069 | SubVec = convertToScalableVector(VT: M1VT, V: SubVec, DAG, Subtarget); |
5070 | unsigned InsertIdx = DstVecIdx * NumOpElts; |
5071 | Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec, N2: SubVec, |
5072 | N3: DAG.getVectorIdxConstant(Val: InsertIdx, DL)); |
5073 | } |
5074 | return convertFromScalableVector(VT, V: Vec, DAG, Subtarget); |
5075 | } |
5076 | |
5077 | static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, |
5078 | const RISCVSubtarget &Subtarget) { |
5079 | SDValue V1 = Op.getOperand(i: 0); |
5080 | SDValue V2 = Op.getOperand(i: 1); |
5081 | SDLoc DL(Op); |
5082 | MVT XLenVT = Subtarget.getXLenVT(); |
5083 | MVT VT = Op.getSimpleValueType(); |
5084 | unsigned NumElts = VT.getVectorNumElements(); |
5085 | ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: Op.getNode()); |
5086 | |
5087 | if (VT.getVectorElementType() == MVT::i1) { |
5088 | // Lower to a vror.vi of a larger element type if possible before we promote |
5089 | // i1s to i8s. |
5090 | if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) |
5091 | return V; |
5092 | if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget)) |
5093 | return V; |
5094 | |
5095 | // Promote i1 shuffle to i8 shuffle. |
5096 | MVT WidenVT = MVT::getVectorVT(VT: MVT::i8, EC: VT.getVectorElementCount()); |
5097 | V1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WidenVT, Operand: V1); |
5098 | V2 = V2.isUndef() ? DAG.getUNDEF(VT: WidenVT) |
5099 | : DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WidenVT, Operand: V2); |
5100 | SDValue Shuffled = DAG.getVectorShuffle(VT: WidenVT, dl: DL, N1: V1, N2: V2, Mask: SVN->getMask()); |
5101 | return DAG.getSetCC(DL, VT, LHS: Shuffled, RHS: DAG.getConstant(Val: 0, DL, VT: WidenVT), |
5102 | Cond: ISD::SETNE); |
5103 | } |
5104 | |
5105 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
5106 | |
5107 | auto [TrueMask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
5108 | |
5109 | if (SVN->isSplat()) { |
5110 | const int Lane = SVN->getSplatIndex(); |
5111 | if (Lane >= 0) { |
5112 | MVT SVT = VT.getVectorElementType(); |
5113 | |
5114 | // Turn splatted vector load into a strided load with an X0 stride. |
5115 | SDValue V = V1; |
5116 | // Peek through CONCAT_VECTORS as VectorCombine can concat a vector |
5117 | // with undef. |
5118 | // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts? |
5119 | int Offset = Lane; |
5120 | if (V.getOpcode() == ISD::CONCAT_VECTORS) { |
5121 | int OpElements = |
5122 | V.getOperand(i: 0).getSimpleValueType().getVectorNumElements(); |
5123 | V = V.getOperand(i: Offset / OpElements); |
5124 | Offset %= OpElements; |
5125 | } |
5126 | |
5127 | // We need to ensure the load isn't atomic or volatile. |
5128 | if (ISD::isNormalLoad(N: V.getNode()) && cast<LoadSDNode>(Val&: V)->isSimple()) { |
5129 | auto *Ld = cast<LoadSDNode>(Val&: V); |
5130 | Offset *= SVT.getStoreSize(); |
5131 | SDValue NewAddr = DAG.getMemBasePlusOffset( |
5132 | Base: Ld->getBasePtr(), Offset: TypeSize::getFixed(ExactSize: Offset), DL); |
5133 | |
5134 | // If this is SEW=64 on RV32, use a strided load with a stride of x0. |
5135 | if (SVT.isInteger() && SVT.bitsGT(VT: XLenVT)) { |
5136 | SDVTList VTs = DAG.getVTList(VTs: {ContainerVT, MVT::Other}); |
5137 | SDValue IntID = |
5138 | DAG.getTargetConstant(Val: Intrinsic::riscv_vlse, DL, VT: XLenVT); |
5139 | SDValue Ops[] = {Ld->getChain(), |
5140 | IntID, |
5141 | DAG.getUNDEF(VT: ContainerVT), |
5142 | NewAddr, |
5143 | DAG.getRegister(Reg: RISCV::X0, VT: XLenVT), |
5144 | VL}; |
5145 | SDValue NewLoad = DAG.getMemIntrinsicNode( |
5146 | Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, MemVT: SVT, |
5147 | MMO: DAG.getMachineFunction().getMachineMemOperand( |
5148 | MMO: Ld->getMemOperand(), Offset, Size: SVT.getStoreSize())); |
5149 | DAG.makeEquivalentMemoryOrdering(OldLoad: Ld, NewMemOp: NewLoad); |
5150 | return convertFromScalableVector(VT, V: NewLoad, DAG, Subtarget); |
5151 | } |
5152 | |
5153 | MVT SplatVT = ContainerVT; |
5154 | |
5155 | // If we don't have Zfh, we need to use an integer scalar load. |
5156 | if (SVT == MVT::f16 && !Subtarget.hasStdExtZfh()) { |
5157 | SVT = MVT::i16; |
5158 | SplatVT = ContainerVT.changeVectorElementType(EltVT: SVT); |
5159 | } |
5160 | |
5161 | // Otherwise use a scalar load and splat. This will give the best |
5162 | // opportunity to fold a splat into the operation. ISel can turn it into |
5163 | // the x0 strided load if we aren't able to fold away the select. |
5164 | if (SVT.isFloatingPoint()) |
5165 | V = DAG.getLoad(VT: SVT, dl: DL, Chain: Ld->getChain(), Ptr: NewAddr, |
5166 | PtrInfo: Ld->getPointerInfo().getWithOffset(O: Offset), |
5167 | Alignment: Ld->getOriginalAlign(), |
5168 | MMOFlags: Ld->getMemOperand()->getFlags()); |
5169 | else |
5170 | V = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: DL, VT: XLenVT, Chain: Ld->getChain(), Ptr: NewAddr, |
5171 | PtrInfo: Ld->getPointerInfo().getWithOffset(O: Offset), MemVT: SVT, |
5172 | Alignment: Ld->getOriginalAlign(), |
5173 | MMOFlags: Ld->getMemOperand()->getFlags()); |
5174 | DAG.makeEquivalentMemoryOrdering(OldLoad: Ld, NewMemOp: V); |
5175 | |
5176 | unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL |
5177 | : RISCVISD::VMV_V_X_VL; |
5178 | SDValue Splat = |
5179 | DAG.getNode(Opcode: Opc, DL, VT: SplatVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: V, N3: VL); |
5180 | Splat = DAG.getBitcast(VT: ContainerVT, V: Splat); |
5181 | return convertFromScalableVector(VT, V: Splat, DAG, Subtarget); |
5182 | } |
5183 | |
5184 | V1 = convertToScalableVector(VT: ContainerVT, V: V1, DAG, Subtarget); |
5185 | assert(Lane < (int)NumElts && "Unexpected lane!" ); |
5186 | SDValue Gather = DAG.getNode(Opcode: RISCVISD::VRGATHER_VX_VL, DL, VT: ContainerVT, |
5187 | N1: V1, N2: DAG.getConstant(Val: Lane, DL, VT: XLenVT), |
5188 | N3: DAG.getUNDEF(VT: ContainerVT), N4: TrueMask, N5: VL); |
5189 | return convertFromScalableVector(VT, V: Gather, DAG, Subtarget); |
5190 | } |
5191 | } |
5192 | |
5193 | // For exact VLEN m2 or greater, try to split to m1 operations if we |
5194 | // can split cleanly. |
5195 | if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget)) |
5196 | return V; |
5197 | |
5198 | ArrayRef<int> Mask = SVN->getMask(); |
5199 | |
5200 | if (SDValue V = |
5201 | lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG)) |
5202 | return V; |
5203 | |
5204 | if (SDValue V = |
5205 | lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG)) |
5206 | return V; |
5207 | |
5208 | // A bitrotate will be one instruction on Zvkb, so try to lower to it first if |
5209 | // available. |
5210 | if (Subtarget.hasStdExtZvkb()) |
5211 | if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) |
5212 | return V; |
5213 | |
5214 | // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may |
5215 | // be undef which can be handled with a single SLIDEDOWN/UP. |
5216 | int LoSrc, HiSrc; |
5217 | int Rotation = isElementRotate(LoSrc, HiSrc, Mask); |
5218 | if (Rotation > 0) { |
5219 | SDValue LoV, HiV; |
5220 | if (LoSrc >= 0) { |
5221 | LoV = LoSrc == 0 ? V1 : V2; |
5222 | LoV = convertToScalableVector(VT: ContainerVT, V: LoV, DAG, Subtarget); |
5223 | } |
5224 | if (HiSrc >= 0) { |
5225 | HiV = HiSrc == 0 ? V1 : V2; |
5226 | HiV = convertToScalableVector(VT: ContainerVT, V: HiV, DAG, Subtarget); |
5227 | } |
5228 | |
5229 | // We found a rotation. We need to slide HiV down by Rotation. Then we need |
5230 | // to slide LoV up by (NumElts - Rotation). |
5231 | unsigned InvRotate = NumElts - Rotation; |
5232 | |
5233 | SDValue Res = DAG.getUNDEF(VT: ContainerVT); |
5234 | if (HiV) { |
5235 | // Even though we could use a smaller VL, don't to avoid a vsetivli |
5236 | // toggle. |
5237 | Res = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: Res, Op: HiV, |
5238 | Offset: DAG.getConstant(Val: Rotation, DL, VT: XLenVT), Mask: TrueMask, VL); |
5239 | } |
5240 | if (LoV) |
5241 | Res = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: Res, Op: LoV, |
5242 | Offset: DAG.getConstant(Val: InvRotate, DL, VT: XLenVT), Mask: TrueMask, VL, |
5243 | Policy: RISCVII::TAIL_AGNOSTIC); |
5244 | |
5245 | return convertFromScalableVector(VT, V: Res, DAG, Subtarget); |
5246 | } |
5247 | |
5248 | // If this is a deinterleave and we can widen the vector, then we can use |
5249 | // vnsrl to deinterleave. |
5250 | if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) { |
5251 | return getDeinterleaveViaVNSRL(DL, VT, Src: V1.getOperand(i: 0), EvenElts: Mask[0] == 0, |
5252 | Subtarget, DAG); |
5253 | } |
5254 | |
5255 | if (SDValue V = |
5256 | lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) |
5257 | return V; |
5258 | |
5259 | // Detect an interleave shuffle and lower to |
5260 | // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1)) |
5261 | int EvenSrc, OddSrc; |
5262 | if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) { |
5263 | // Extract the halves of the vectors. |
5264 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
5265 | |
5266 | int Size = Mask.size(); |
5267 | SDValue EvenV, OddV; |
5268 | assert(EvenSrc >= 0 && "Undef source?" ); |
5269 | EvenV = (EvenSrc / Size) == 0 ? V1 : V2; |
5270 | EvenV = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: HalfVT, N1: EvenV, |
5271 | N2: DAG.getVectorIdxConstant(Val: EvenSrc % Size, DL)); |
5272 | |
5273 | assert(OddSrc >= 0 && "Undef source?" ); |
5274 | OddV = (OddSrc / Size) == 0 ? V1 : V2; |
5275 | OddV = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: HalfVT, N1: OddV, |
5276 | N2: DAG.getVectorIdxConstant(Val: OddSrc % Size, DL)); |
5277 | |
5278 | return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget); |
5279 | } |
5280 | |
5281 | |
5282 | // Handle any remaining single source shuffles |
5283 | assert(!V1.isUndef() && "Unexpected shuffle canonicalization" ); |
5284 | if (V2.isUndef()) { |
5285 | // We might be able to express the shuffle as a bitrotate. But even if we |
5286 | // don't have Zvkb and have to expand, the expanded sequence of approx. 2 |
5287 | // shifts and a vor will have a higher throughput than a vrgather. |
5288 | if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) |
5289 | return V; |
5290 | |
5291 | if (VT.getScalarSizeInBits() == 8 && |
5292 | any_of(Range&: Mask, P: [&](const auto &Idx) { return Idx > 255; })) { |
5293 | // On such a vector we're unable to use i8 as the index type. |
5294 | // FIXME: We could promote the index to i16 and use vrgatherei16, but that |
5295 | // may involve vector splitting if we're already at LMUL=8, or our |
5296 | // user-supplied maximum fixed-length LMUL. |
5297 | return SDValue(); |
5298 | } |
5299 | |
5300 | // Base case for the two operand recursion below - handle the worst case |
5301 | // single source shuffle. |
5302 | unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL; |
5303 | MVT IndexVT = VT.changeTypeToInteger(); |
5304 | // Since we can't introduce illegal index types at this stage, use i16 and |
5305 | // vrgatherei16 if the corresponding index type for plain vrgather is greater |
5306 | // than XLenVT. |
5307 | if (IndexVT.getScalarType().bitsGT(VT: XLenVT)) { |
5308 | GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; |
5309 | IndexVT = IndexVT.changeVectorElementType(EltVT: MVT::i16); |
5310 | } |
5311 | |
5312 | // If the mask allows, we can do all the index computation in 16 bits. This |
5313 | // requires less work and less register pressure at high LMUL, and creates |
5314 | // smaller constants which may be cheaper to materialize. |
5315 | if (IndexVT.getScalarType().bitsGT(VT: MVT::i16) && isUInt<16>(x: NumElts - 1) && |
5316 | (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) { |
5317 | GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; |
5318 | IndexVT = IndexVT.changeVectorElementType(EltVT: MVT::i16); |
5319 | } |
5320 | |
5321 | MVT IndexContainerVT = |
5322 | ContainerVT.changeVectorElementType(EltVT: IndexVT.getScalarType()); |
5323 | |
5324 | V1 = convertToScalableVector(VT: ContainerVT, V: V1, DAG, Subtarget); |
5325 | SmallVector<SDValue> GatherIndicesLHS; |
5326 | for (int MaskIndex : Mask) { |
5327 | bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0; |
5328 | GatherIndicesLHS.push_back(Elt: IsLHSIndex |
5329 | ? DAG.getConstant(Val: MaskIndex, DL, VT: XLenVT) |
5330 | : DAG.getUNDEF(VT: XLenVT)); |
5331 | } |
5332 | SDValue LHSIndices = DAG.getBuildVector(VT: IndexVT, DL, Ops: GatherIndicesLHS); |
5333 | LHSIndices = convertToScalableVector(VT: IndexContainerVT, V: LHSIndices, DAG, |
5334 | Subtarget); |
5335 | SDValue Gather = DAG.getNode(Opcode: GatherVVOpc, DL, VT: ContainerVT, N1: V1, N2: LHSIndices, |
5336 | N3: DAG.getUNDEF(VT: ContainerVT), N4: TrueMask, N5: VL); |
5337 | return convertFromScalableVector(VT, V: Gather, DAG, Subtarget); |
5338 | } |
5339 | |
5340 | // As a backup, shuffles can be lowered via a vrgather instruction, possibly |
5341 | // merged with a second vrgather. |
5342 | SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS; |
5343 | |
5344 | // Now construct the mask that will be used by the blended vrgather operation. |
5345 | // Construct the appropriate indices into each vector. |
5346 | for (int MaskIndex : Mask) { |
5347 | bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts; |
5348 | ShuffleMaskLHS.push_back(Elt: IsLHSOrUndefIndex && MaskIndex >= 0 |
5349 | ? MaskIndex : -1); |
5350 | ShuffleMaskRHS.push_back(Elt: IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts)); |
5351 | } |
5352 | |
5353 | // Try to pick a profitable operand order. |
5354 | bool SwapOps = DAG.isSplatValue(V: V2) && !DAG.isSplatValue(V: V1); |
5355 | SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(Mask: ShuffleMaskRHS, NumSrcElts: NumElts); |
5356 | |
5357 | // Recursively invoke lowering for each operand if we had two |
5358 | // independent single source shuffles, and then combine the result via a |
5359 | // vselect. Note that the vselect will likely be folded back into the |
5360 | // second permute (vrgather, or other) by the post-isel combine. |
5361 | V1 = DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: DAG.getUNDEF(VT), Mask: ShuffleMaskLHS); |
5362 | V2 = DAG.getVectorShuffle(VT, dl: DL, N1: V2, N2: DAG.getUNDEF(VT), Mask: ShuffleMaskRHS); |
5363 | |
5364 | SmallVector<SDValue> MaskVals; |
5365 | for (int MaskIndex : Mask) { |
5366 | bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps; |
5367 | MaskVals.push_back(Elt: DAG.getConstant(Val: SelectMaskVal, DL, VT: XLenVT)); |
5368 | } |
5369 | |
5370 | assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle" ); |
5371 | MVT MaskVT = MVT::getVectorVT(VT: MVT::i1, NumElements: NumElts); |
5372 | SDValue SelectMask = DAG.getBuildVector(VT: MaskVT, DL, Ops: MaskVals); |
5373 | |
5374 | if (SwapOps) |
5375 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectMask, N2: V1, N3: V2); |
5376 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectMask, N2: V2, N3: V1); |
5377 | } |
5378 | |
5379 | bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { |
5380 | // Support splats for any type. These should type legalize well. |
5381 | if (ShuffleVectorSDNode::isSplatMask(Mask: M.data(), VT)) |
5382 | return true; |
5383 | |
5384 | // Only support legal VTs for other shuffles for now. |
5385 | if (!isTypeLegal(VT)) |
5386 | return false; |
5387 | |
5388 | MVT SVT = VT.getSimpleVT(); |
5389 | |
5390 | // Not for i1 vectors. |
5391 | if (SVT.getScalarType() == MVT::i1) |
5392 | return false; |
5393 | |
5394 | int Dummy1, Dummy2; |
5395 | return (isElementRotate(LoSrc&: Dummy1, HiSrc&: Dummy2, Mask: M) > 0) || |
5396 | isInterleaveShuffle(Mask: M, VT: SVT, EvenSrc&: Dummy1, OddSrc&: Dummy2, Subtarget); |
5397 | } |
5398 | |
5399 | // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting |
5400 | // the exponent. |
5401 | SDValue |
5402 | RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, |
5403 | SelectionDAG &DAG) const { |
5404 | MVT VT = Op.getSimpleValueType(); |
5405 | unsigned EltSize = VT.getScalarSizeInBits(); |
5406 | SDValue Src = Op.getOperand(i: 0); |
5407 | SDLoc DL(Op); |
5408 | MVT ContainerVT = VT; |
5409 | |
5410 | SDValue Mask, VL; |
5411 | if (Op->isVPOpcode()) { |
5412 | Mask = Op.getOperand(i: 1); |
5413 | if (VT.isFixedLengthVector()) |
5414 | Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG, |
5415 | Subtarget); |
5416 | VL = Op.getOperand(i: 2); |
5417 | } |
5418 | |
5419 | // We choose FP type that can represent the value if possible. Otherwise, we |
5420 | // use rounding to zero conversion for correct exponent of the result. |
5421 | // TODO: Use f16 for i8 when possible? |
5422 | MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32; |
5423 | if (!isTypeLegal(VT: MVT::getVectorVT(VT: FloatEltVT, EC: VT.getVectorElementCount()))) |
5424 | FloatEltVT = MVT::f32; |
5425 | MVT FloatVT = MVT::getVectorVT(VT: FloatEltVT, EC: VT.getVectorElementCount()); |
5426 | |
5427 | // Legal types should have been checked in the RISCVTargetLowering |
5428 | // constructor. |
5429 | // TODO: Splitting may make sense in some cases. |
5430 | assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) && |
5431 | "Expected legal float type!" ); |
5432 | |
5433 | // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X. |
5434 | // The trailing zero count is equal to log2 of this single bit value. |
5435 | if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { |
5436 | SDValue Neg = DAG.getNegative(Val: Src, DL, VT); |
5437 | Src = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Src, N2: Neg); |
5438 | } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) { |
5439 | SDValue Neg = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), |
5440 | N2: Src, N3: Mask, N4: VL); |
5441 | Src = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Src, N2: Neg, N3: Mask, N4: VL); |
5442 | } |
5443 | |
5444 | // We have a legal FP type, convert to it. |
5445 | SDValue FloatVal; |
5446 | if (FloatVT.bitsGT(VT)) { |
5447 | if (Op->isVPOpcode()) |
5448 | FloatVal = DAG.getNode(Opcode: ISD::VP_UINT_TO_FP, DL, VT: FloatVT, N1: Src, N2: Mask, N3: VL); |
5449 | else |
5450 | FloatVal = DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT: FloatVT, Operand: Src); |
5451 | } else { |
5452 | // Use RTZ to avoid rounding influencing exponent of FloatVal. |
5453 | if (VT.isFixedLengthVector()) { |
5454 | ContainerVT = getContainerForFixedLengthVector(VT); |
5455 | Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget); |
5456 | } |
5457 | if (!Op->isVPOpcode()) |
5458 | std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
5459 | SDValue RTZRM = |
5460 | DAG.getTargetConstant(Val: RISCVFPRndMode::RTZ, DL, VT: Subtarget.getXLenVT()); |
5461 | MVT ContainerFloatVT = |
5462 | MVT::getVectorVT(VT: FloatEltVT, EC: ContainerVT.getVectorElementCount()); |
5463 | FloatVal = DAG.getNode(Opcode: RISCVISD::VFCVT_RM_F_XU_VL, DL, VT: ContainerFloatVT, |
5464 | N1: Src, N2: Mask, N3: RTZRM, N4: VL); |
5465 | if (VT.isFixedLengthVector()) |
5466 | FloatVal = convertFromScalableVector(VT: FloatVT, V: FloatVal, DAG, Subtarget); |
5467 | } |
5468 | // Bitcast to integer and shift the exponent to the LSB. |
5469 | EVT IntVT = FloatVT.changeVectorElementTypeToInteger(); |
5470 | SDValue Bitcast = DAG.getBitcast(VT: IntVT, V: FloatVal); |
5471 | unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23; |
5472 | |
5473 | SDValue Exp; |
5474 | // Restore back to original type. Truncation after SRL is to generate vnsrl. |
5475 | if (Op->isVPOpcode()) { |
5476 | Exp = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT: IntVT, N1: Bitcast, |
5477 | N2: DAG.getConstant(Val: ShiftAmt, DL, VT: IntVT), N3: Mask, N4: VL); |
5478 | Exp = DAG.getVPZExtOrTrunc(DL, VT, Op: Exp, Mask, EVL: VL); |
5479 | } else { |
5480 | Exp = DAG.getNode(Opcode: ISD::SRL, DL, VT: IntVT, N1: Bitcast, |
5481 | N2: DAG.getConstant(Val: ShiftAmt, DL, VT: IntVT)); |
5482 | if (IntVT.bitsLT(VT)) |
5483 | Exp = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: Exp); |
5484 | else if (IntVT.bitsGT(VT)) |
5485 | Exp = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Exp); |
5486 | } |
5487 | |
5488 | // The exponent contains log2 of the value in biased form. |
5489 | unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127; |
5490 | // For trailing zeros, we just need to subtract the bias. |
5491 | if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) |
5492 | return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Exp, |
5493 | N2: DAG.getConstant(Val: ExponentBias, DL, VT)); |
5494 | if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) |
5495 | return DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: Exp, |
5496 | N2: DAG.getConstant(Val: ExponentBias, DL, VT), N3: Mask, N4: VL); |
5497 | |
5498 | // For leading zeros, we need to remove the bias and convert from log2 to |
5499 | // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)). |
5500 | unsigned Adjust = ExponentBias + (EltSize - 1); |
5501 | SDValue Res; |
5502 | if (Op->isVPOpcode()) |
5503 | Res = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: DAG.getConstant(Val: Adjust, DL, VT), N2: Exp, |
5504 | N3: Mask, N4: VL); |
5505 | else |
5506 | Res = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: Adjust, DL, VT), N2: Exp); |
5507 | |
5508 | // The above result with zero input equals to Adjust which is greater than |
5509 | // EltSize. Hence, we can do min(Res, EltSize) for CTLZ. |
5510 | if (Op.getOpcode() == ISD::CTLZ) |
5511 | Res = DAG.getNode(Opcode: ISD::UMIN, DL, VT, N1: Res, N2: DAG.getConstant(Val: EltSize, DL, VT)); |
5512 | else if (Op.getOpcode() == ISD::VP_CTLZ) |
5513 | Res = DAG.getNode(Opcode: ISD::VP_UMIN, DL, VT, N1: Res, |
5514 | N2: DAG.getConstant(Val: EltSize, DL, VT), N3: Mask, N4: VL); |
5515 | return Res; |
5516 | } |
5517 | |
5518 | SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op, |
5519 | SelectionDAG &DAG) const { |
5520 | SDLoc DL(Op); |
5521 | MVT XLenVT = Subtarget.getXLenVT(); |
5522 | SDValue Source = Op->getOperand(Num: 0); |
5523 | MVT SrcVT = Source.getSimpleValueType(); |
5524 | SDValue Mask = Op->getOperand(Num: 1); |
5525 | SDValue EVL = Op->getOperand(Num: 2); |
5526 | |
5527 | if (SrcVT.isFixedLengthVector()) { |
5528 | MVT ContainerVT = getContainerForFixedLengthVector(VT: SrcVT); |
5529 | Source = convertToScalableVector(VT: ContainerVT, V: Source, DAG, Subtarget); |
5530 | Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG, |
5531 | Subtarget); |
5532 | SrcVT = ContainerVT; |
5533 | } |
5534 | |
5535 | // Convert to boolean vector. |
5536 | if (SrcVT.getScalarType() != MVT::i1) { |
5537 | SDValue AllZero = DAG.getConstant(Val: 0, DL, VT: SrcVT); |
5538 | SrcVT = MVT::getVectorVT(VT: MVT::i1, EC: SrcVT.getVectorElementCount()); |
5539 | Source = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: SrcVT, |
5540 | Ops: {Source, AllZero, DAG.getCondCode(Cond: ISD::SETNE), |
5541 | DAG.getUNDEF(VT: SrcVT), Mask, EVL}); |
5542 | } |
5543 | |
5544 | SDValue Res = DAG.getNode(Opcode: RISCVISD::VFIRST_VL, DL, VT: XLenVT, N1: Source, N2: Mask, N3: EVL); |
5545 | if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF) |
5546 | // In this case, we can interpret poison as -1, so nothing to do further. |
5547 | return Res; |
5548 | |
5549 | // Convert -1 to VL. |
5550 | SDValue SetCC = |
5551 | DAG.getSetCC(DL, VT: XLenVT, LHS: Res, RHS: DAG.getConstant(Val: 0, DL, VT: XLenVT), Cond: ISD::SETLT); |
5552 | Res = DAG.getSelect(DL, VT: XLenVT, Cond: SetCC, LHS: EVL, RHS: Res); |
5553 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Op.getValueType(), Operand: Res); |
5554 | } |
5555 | |
5556 | // While RVV has alignment restrictions, we should always be able to load as a |
5557 | // legal equivalently-sized byte-typed vector instead. This method is |
5558 | // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If |
5559 | // the load is already correctly-aligned, it returns SDValue(). |
5560 | SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op, |
5561 | SelectionDAG &DAG) const { |
5562 | auto *Load = cast<LoadSDNode>(Val&: Op); |
5563 | assert(Load && Load->getMemoryVT().isVector() && "Expected vector load" ); |
5564 | |
5565 | if (allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), |
5566 | VT: Load->getMemoryVT(), |
5567 | MMO: *Load->getMemOperand())) |
5568 | return SDValue(); |
5569 | |
5570 | SDLoc DL(Op); |
5571 | MVT VT = Op.getSimpleValueType(); |
5572 | unsigned EltSizeBits = VT.getScalarSizeInBits(); |
5573 | assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && |
5574 | "Unexpected unaligned RVV load type" ); |
5575 | MVT NewVT = |
5576 | MVT::getVectorVT(VT: MVT::i8, EC: VT.getVectorElementCount() * (EltSizeBits / 8)); |
5577 | assert(NewVT.isValid() && |
5578 | "Expecting equally-sized RVV vector types to be legal" ); |
5579 | SDValue L = DAG.getLoad(VT: NewVT, dl: DL, Chain: Load->getChain(), Ptr: Load->getBasePtr(), |
5580 | PtrInfo: Load->getPointerInfo(), Alignment: Load->getOriginalAlign(), |
5581 | MMOFlags: Load->getMemOperand()->getFlags()); |
5582 | return DAG.getMergeValues(Ops: {DAG.getBitcast(VT, V: L), L.getValue(R: 1)}, dl: DL); |
5583 | } |
5584 | |
5585 | // While RVV has alignment restrictions, we should always be able to store as a |
5586 | // legal equivalently-sized byte-typed vector instead. This method is |
5587 | // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It |
5588 | // returns SDValue() if the store is already correctly aligned. |
5589 | SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op, |
5590 | SelectionDAG &DAG) const { |
5591 | auto *Store = cast<StoreSDNode>(Val&: Op); |
5592 | assert(Store && Store->getValue().getValueType().isVector() && |
5593 | "Expected vector store" ); |
5594 | |
5595 | if (allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), |
5596 | VT: Store->getMemoryVT(), |
5597 | MMO: *Store->getMemOperand())) |
5598 | return SDValue(); |
5599 | |
5600 | SDLoc DL(Op); |
5601 | SDValue StoredVal = Store->getValue(); |
5602 | MVT VT = StoredVal.getSimpleValueType(); |
5603 | unsigned EltSizeBits = VT.getScalarSizeInBits(); |
5604 | assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && |
5605 | "Unexpected unaligned RVV store type" ); |
5606 | MVT NewVT = |
5607 | MVT::getVectorVT(VT: MVT::i8, EC: VT.getVectorElementCount() * (EltSizeBits / 8)); |
5608 | assert(NewVT.isValid() && |
5609 | "Expecting equally-sized RVV vector types to be legal" ); |
5610 | StoredVal = DAG.getBitcast(VT: NewVT, V: StoredVal); |
5611 | return DAG.getStore(Chain: Store->getChain(), dl: DL, Val: StoredVal, Ptr: Store->getBasePtr(), |
5612 | PtrInfo: Store->getPointerInfo(), Alignment: Store->getOriginalAlign(), |
5613 | MMOFlags: Store->getMemOperand()->getFlags()); |
5614 | } |
5615 | |
5616 | static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, |
5617 | const RISCVSubtarget &Subtarget) { |
5618 | assert(Op.getValueType() == MVT::i64 && "Unexpected VT" ); |
5619 | |
5620 | int64_t Imm = cast<ConstantSDNode>(Val&: Op)->getSExtValue(); |
5621 | |
5622 | // All simm32 constants should be handled by isel. |
5623 | // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making |
5624 | // this check redundant, but small immediates are common so this check |
5625 | // should have better compile time. |
5626 | if (isInt<32>(x: Imm)) |
5627 | return Op; |
5628 | |
5629 | // We only need to cost the immediate, if constant pool lowering is enabled. |
5630 | if (!Subtarget.useConstantPoolForLargeInts()) |
5631 | return Op; |
5632 | |
5633 | RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: Imm, STI: Subtarget); |
5634 | if (Seq.size() <= Subtarget.getMaxBuildIntsCost()) |
5635 | return Op; |
5636 | |
5637 | // Optimizations below are disabled for opt size. If we're optimizing for |
5638 | // size, use a constant pool. |
5639 | if (DAG.shouldOptForSize()) |
5640 | return SDValue(); |
5641 | |
5642 | // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do |
5643 | // that if it will avoid a constant pool. |
5644 | // It will require an extra temporary register though. |
5645 | // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where |
5646 | // low and high 32 bits are the same and bit 31 and 63 are set. |
5647 | unsigned ShiftAmt, AddOpc; |
5648 | RISCVMatInt::InstSeq SeqLo = |
5649 | RISCVMatInt::generateTwoRegInstSeq(Val: Imm, STI: Subtarget, ShiftAmt, AddOpc); |
5650 | if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost()) |
5651 | return Op; |
5652 | |
5653 | return SDValue(); |
5654 | } |
5655 | |
5656 | static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, |
5657 | const RISCVSubtarget &Subtarget) { |
5658 | SDLoc dl(Op); |
5659 | AtomicOrdering FenceOrdering = |
5660 | static_cast<AtomicOrdering>(Op.getConstantOperandVal(i: 1)); |
5661 | SyncScope::ID FenceSSID = |
5662 | static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2)); |
5663 | |
5664 | if (Subtarget.hasStdExtZtso()) { |
5665 | // The only fence that needs an instruction is a sequentially-consistent |
5666 | // cross-thread fence. |
5667 | if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && |
5668 | FenceSSID == SyncScope::System) |
5669 | return Op; |
5670 | |
5671 | // MEMBARRIER is a compiler barrier; it codegens to a no-op. |
5672 | return DAG.getNode(Opcode: ISD::MEMBARRIER, DL: dl, VT: MVT::Other, Operand: Op.getOperand(i: 0)); |
5673 | } |
5674 | |
5675 | // singlethread fences only synchronize with signal handlers on the same |
5676 | // thread and thus only need to preserve instruction order, not actually |
5677 | // enforce memory ordering. |
5678 | if (FenceSSID == SyncScope::SingleThread) |
5679 | // MEMBARRIER is a compiler barrier; it codegens to a no-op. |
5680 | return DAG.getNode(Opcode: ISD::MEMBARRIER, DL: dl, VT: MVT::Other, Operand: Op.getOperand(i: 0)); |
5681 | |
5682 | return Op; |
5683 | } |
5684 | |
5685 | static SDValue lowerSADDSAT_SSUBSAT(SDValue Op, SelectionDAG &DAG) { |
5686 | assert(Op.getValueType() == MVT::i32 && RV64LegalI32 && |
5687 | "Unexpected custom legalisation" ); |
5688 | |
5689 | // With Zbb, we can widen to i64 and smin/smax with INT32_MAX/MIN. |
5690 | bool IsAdd = Op.getOpcode() == ISD::SADDSAT; |
5691 | SDLoc DL(Op); |
5692 | SDValue LHS = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 0)); |
5693 | SDValue RHS = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 1)); |
5694 | SDValue Result = |
5695 | DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL, VT: MVT::i64, N1: LHS, N2: RHS); |
5696 | |
5697 | APInt MinVal = APInt::getSignedMinValue(numBits: 32).sext(width: 64); |
5698 | APInt MaxVal = APInt::getSignedMaxValue(numBits: 32).sext(width: 64); |
5699 | SDValue SatMin = DAG.getConstant(Val: MinVal, DL, VT: MVT::i64); |
5700 | SDValue SatMax = DAG.getConstant(Val: MaxVal, DL, VT: MVT::i64); |
5701 | Result = DAG.getNode(Opcode: ISD::SMIN, DL, VT: MVT::i64, N1: Result, N2: SatMax); |
5702 | Result = DAG.getNode(Opcode: ISD::SMAX, DL, VT: MVT::i64, N1: Result, N2: SatMin); |
5703 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Result); |
5704 | } |
5705 | |
5706 | static SDValue lowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG) { |
5707 | assert(Op.getValueType() == MVT::i32 && RV64LegalI32 && |
5708 | "Unexpected custom legalisation" ); |
5709 | |
5710 | // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using |
5711 | // sign extend allows overflow of the lower 32 bits to be detected on |
5712 | // the promoted size. |
5713 | SDLoc DL(Op); |
5714 | SDValue LHS = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 0)); |
5715 | SDValue RHS = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 1)); |
5716 | SDValue WideOp = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: MVT::i64, N1: LHS, N2: RHS); |
5717 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: WideOp); |
5718 | } |
5719 | |
5720 | // Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw. |
5721 | static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG) { |
5722 | assert(Op.getValueType() == MVT::i32 && RV64LegalI32 && |
5723 | "Unexpected custom legalisation" ); |
5724 | if (isa<ConstantSDNode>(Val: Op.getOperand(i: 1))) |
5725 | return SDValue(); |
5726 | |
5727 | bool IsAdd = Op.getOpcode() == ISD::SADDO; |
5728 | SDLoc DL(Op); |
5729 | SDValue LHS = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 0)); |
5730 | SDValue RHS = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 1)); |
5731 | SDValue WideOp = |
5732 | DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL, VT: MVT::i64, N1: LHS, N2: RHS); |
5733 | SDValue Res = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: WideOp); |
5734 | SDValue SExt = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: WideOp, |
5735 | N2: DAG.getValueType(MVT::i32)); |
5736 | SDValue Ovf = DAG.getSetCC(DL, VT: Op.getValue(R: 1).getValueType(), LHS: WideOp, RHS: SExt, |
5737 | Cond: ISD::SETNE); |
5738 | return DAG.getMergeValues(Ops: {Res, Ovf}, dl: DL); |
5739 | } |
5740 | |
5741 | // Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw. |
5742 | static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG) { |
5743 | assert(Op.getValueType() == MVT::i32 && RV64LegalI32 && |
5744 | "Unexpected custom legalisation" ); |
5745 | SDLoc DL(Op); |
5746 | SDValue LHS = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 0)); |
5747 | SDValue RHS = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 1)); |
5748 | SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL, VT: MVT::i64, N1: LHS, N2: RHS); |
5749 | SDValue Res = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Mul); |
5750 | SDValue SExt = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: Mul, |
5751 | N2: DAG.getValueType(MVT::i32)); |
5752 | SDValue Ovf = DAG.getSetCC(DL, VT: Op.getValue(R: 1).getValueType(), LHS: Mul, RHS: SExt, |
5753 | Cond: ISD::SETNE); |
5754 | return DAG.getMergeValues(Ops: {Res, Ovf}, dl: DL); |
5755 | } |
5756 | |
5757 | SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op, |
5758 | SelectionDAG &DAG) const { |
5759 | SDLoc DL(Op); |
5760 | MVT VT = Op.getSimpleValueType(); |
5761 | MVT XLenVT = Subtarget.getXLenVT(); |
5762 | unsigned Check = Op.getConstantOperandVal(i: 1); |
5763 | unsigned TDCMask = 0; |
5764 | if (Check & fcSNan) |
5765 | TDCMask |= RISCV::FPMASK_Signaling_NaN; |
5766 | if (Check & fcQNan) |
5767 | TDCMask |= RISCV::FPMASK_Quiet_NaN; |
5768 | if (Check & fcPosInf) |
5769 | TDCMask |= RISCV::FPMASK_Positive_Infinity; |
5770 | if (Check & fcNegInf) |
5771 | TDCMask |= RISCV::FPMASK_Negative_Infinity; |
5772 | if (Check & fcPosNormal) |
5773 | TDCMask |= RISCV::FPMASK_Positive_Normal; |
5774 | if (Check & fcNegNormal) |
5775 | TDCMask |= RISCV::FPMASK_Negative_Normal; |
5776 | if (Check & fcPosSubnormal) |
5777 | TDCMask |= RISCV::FPMASK_Positive_Subnormal; |
5778 | if (Check & fcNegSubnormal) |
5779 | TDCMask |= RISCV::FPMASK_Negative_Subnormal; |
5780 | if (Check & fcPosZero) |
5781 | TDCMask |= RISCV::FPMASK_Positive_Zero; |
5782 | if (Check & fcNegZero) |
5783 | TDCMask |= RISCV::FPMASK_Negative_Zero; |
5784 | |
5785 | bool IsOneBitMask = isPowerOf2_32(Value: TDCMask); |
5786 | |
5787 | SDValue TDCMaskV = DAG.getConstant(Val: TDCMask, DL, VT: XLenVT); |
5788 | |
5789 | if (VT.isVector()) { |
5790 | SDValue Op0 = Op.getOperand(i: 0); |
5791 | MVT VT0 = Op.getOperand(i: 0).getSimpleValueType(); |
5792 | |
5793 | if (VT.isScalableVector()) { |
5794 | MVT DstVT = VT0.changeVectorElementTypeToInteger(); |
5795 | auto [Mask, VL] = getDefaultScalableVLOps(VecVT: VT0, DL, DAG, Subtarget); |
5796 | if (Op.getOpcode() == ISD::VP_IS_FPCLASS) { |
5797 | Mask = Op.getOperand(i: 2); |
5798 | VL = Op.getOperand(i: 3); |
5799 | } |
5800 | SDValue FPCLASS = DAG.getNode(Opcode: RISCVISD::FCLASS_VL, DL, VT: DstVT, N1: Op0, N2: Mask, |
5801 | N3: VL, Flags: Op->getFlags()); |
5802 | if (IsOneBitMask) |
5803 | return DAG.getSetCC(DL, VT, LHS: FPCLASS, |
5804 | RHS: DAG.getConstant(Val: TDCMask, DL, VT: DstVT), |
5805 | Cond: ISD::CondCode::SETEQ); |
5806 | SDValue AND = DAG.getNode(Opcode: ISD::AND, DL, VT: DstVT, N1: FPCLASS, |
5807 | N2: DAG.getConstant(Val: TDCMask, DL, VT: DstVT)); |
5808 | return DAG.getSetCC(DL, VT, LHS: AND, RHS: DAG.getConstant(Val: 0, DL, VT: DstVT), |
5809 | Cond: ISD::SETNE); |
5810 | } |
5811 | |
5812 | MVT ContainerVT0 = getContainerForFixedLengthVector(VT: VT0); |
5813 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
5814 | MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger(); |
5815 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT0, ContainerVT: ContainerVT0, DL, DAG, Subtarget); |
5816 | if (Op.getOpcode() == ISD::VP_IS_FPCLASS) { |
5817 | Mask = Op.getOperand(i: 2); |
5818 | MVT MaskContainerVT = |
5819 | getContainerForFixedLengthVector(VT: Mask.getSimpleValueType()); |
5820 | Mask = convertToScalableVector(VT: MaskContainerVT, V: Mask, DAG, Subtarget); |
5821 | VL = Op.getOperand(i: 3); |
5822 | } |
5823 | Op0 = convertToScalableVector(VT: ContainerVT0, V: Op0, DAG, Subtarget); |
5824 | |
5825 | SDValue FPCLASS = DAG.getNode(Opcode: RISCVISD::FCLASS_VL, DL, VT: ContainerDstVT, N1: Op0, |
5826 | N2: Mask, N3: VL, Flags: Op->getFlags()); |
5827 | |
5828 | TDCMaskV = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerDstVT, |
5829 | N1: DAG.getUNDEF(VT: ContainerDstVT), N2: TDCMaskV, N3: VL); |
5830 | if (IsOneBitMask) { |
5831 | SDValue VMSEQ = |
5832 | DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT, |
5833 | Ops: {FPCLASS, TDCMaskV, DAG.getCondCode(Cond: ISD::SETEQ), |
5834 | DAG.getUNDEF(VT: ContainerVT), Mask, VL}); |
5835 | return convertFromScalableVector(VT, V: VMSEQ, DAG, Subtarget); |
5836 | } |
5837 | SDValue AND = DAG.getNode(Opcode: RISCVISD::AND_VL, DL, VT: ContainerDstVT, N1: FPCLASS, |
5838 | N2: TDCMaskV, N3: DAG.getUNDEF(VT: ContainerDstVT), N4: Mask, N5: VL); |
5839 | |
5840 | SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: XLenVT); |
5841 | SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerDstVT, |
5842 | N1: DAG.getUNDEF(VT: ContainerDstVT), N2: SplatZero, N3: VL); |
5843 | |
5844 | SDValue VMSNE = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT, |
5845 | Ops: {AND, SplatZero, DAG.getCondCode(Cond: ISD::SETNE), |
5846 | DAG.getUNDEF(VT: ContainerVT), Mask, VL}); |
5847 | return convertFromScalableVector(VT, V: VMSNE, DAG, Subtarget); |
5848 | } |
5849 | |
5850 | SDValue FCLASS = DAG.getNode(Opcode: RISCVISD::FCLASS, DL, VT: XLenVT, Operand: Op.getOperand(i: 0)); |
5851 | SDValue AND = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: FCLASS, N2: TDCMaskV); |
5852 | SDValue Res = DAG.getSetCC(DL, VT: XLenVT, LHS: AND, RHS: DAG.getConstant(Val: 0, DL, VT: XLenVT), |
5853 | Cond: ISD::CondCode::SETNE); |
5854 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Res); |
5855 | } |
5856 | |
5857 | // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these |
5858 | // operations propagate nans. |
5859 | static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, |
5860 | const RISCVSubtarget &Subtarget) { |
5861 | SDLoc DL(Op); |
5862 | MVT VT = Op.getSimpleValueType(); |
5863 | |
5864 | SDValue X = Op.getOperand(i: 0); |
5865 | SDValue Y = Op.getOperand(i: 1); |
5866 | |
5867 | if (!VT.isVector()) { |
5868 | MVT XLenVT = Subtarget.getXLenVT(); |
5869 | |
5870 | // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This |
5871 | // ensures that when one input is a nan, the other will also be a nan |
5872 | // allowing the nan to propagate. If both inputs are nan, this will swap the |
5873 | // inputs which is harmless. |
5874 | |
5875 | SDValue NewY = Y; |
5876 | if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Op: X)) { |
5877 | SDValue XIsNonNan = DAG.getSetCC(DL, VT: XLenVT, LHS: X, RHS: X, Cond: ISD::SETOEQ); |
5878 | NewY = DAG.getSelect(DL, VT, Cond: XIsNonNan, LHS: Y, RHS: X); |
5879 | } |
5880 | |
5881 | SDValue NewX = X; |
5882 | if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Op: Y)) { |
5883 | SDValue YIsNonNan = DAG.getSetCC(DL, VT: XLenVT, LHS: Y, RHS: Y, Cond: ISD::SETOEQ); |
5884 | NewX = DAG.getSelect(DL, VT, Cond: YIsNonNan, LHS: X, RHS: Y); |
5885 | } |
5886 | |
5887 | unsigned Opc = |
5888 | Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN; |
5889 | return DAG.getNode(Opcode: Opc, DL, VT, N1: NewX, N2: NewY); |
5890 | } |
5891 | |
5892 | // Check no NaNs before converting to fixed vector scalable. |
5893 | bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Op: X); |
5894 | bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Op: Y); |
5895 | |
5896 | MVT ContainerVT = VT; |
5897 | if (VT.isFixedLengthVector()) { |
5898 | ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
5899 | X = convertToScalableVector(VT: ContainerVT, V: X, DAG, Subtarget); |
5900 | Y = convertToScalableVector(VT: ContainerVT, V: Y, DAG, Subtarget); |
5901 | } |
5902 | |
5903 | SDValue Mask, VL; |
5904 | if (Op->isVPOpcode()) { |
5905 | Mask = Op.getOperand(i: 2); |
5906 | if (VT.isFixedLengthVector()) |
5907 | Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG, |
5908 | Subtarget); |
5909 | VL = Op.getOperand(i: 3); |
5910 | } else { |
5911 | std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
5912 | } |
5913 | |
5914 | SDValue NewY = Y; |
5915 | if (!XIsNeverNan) { |
5916 | SDValue XIsNonNan = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(), |
5917 | Ops: {X, X, DAG.getCondCode(Cond: ISD::SETOEQ), |
5918 | DAG.getUNDEF(VT: ContainerVT), Mask, VL}); |
5919 | NewY = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: XIsNonNan, N2: Y, N3: X, |
5920 | N4: DAG.getUNDEF(VT: ContainerVT), N5: VL); |
5921 | } |
5922 | |
5923 | SDValue NewX = X; |
5924 | if (!YIsNeverNan) { |
5925 | SDValue YIsNonNan = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(), |
5926 | Ops: {Y, Y, DAG.getCondCode(Cond: ISD::SETOEQ), |
5927 | DAG.getUNDEF(VT: ContainerVT), Mask, VL}); |
5928 | NewX = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: YIsNonNan, N2: X, N3: Y, |
5929 | N4: DAG.getUNDEF(VT: ContainerVT), N5: VL); |
5930 | } |
5931 | |
5932 | unsigned Opc = |
5933 | Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM |
5934 | ? RISCVISD::VFMAX_VL |
5935 | : RISCVISD::VFMIN_VL; |
5936 | SDValue Res = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: NewX, N2: NewY, |
5937 | N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL); |
5938 | if (VT.isFixedLengthVector()) |
5939 | Res = convertFromScalableVector(VT, V: Res, DAG, Subtarget); |
5940 | return Res; |
5941 | } |
5942 | |
5943 | /// Get a RISC-V target specified VL op for a given SDNode. |
5944 | static unsigned getRISCVVLOp(SDValue Op) { |
5945 | #define OP_CASE(NODE) \ |
5946 | case ISD::NODE: \ |
5947 | return RISCVISD::NODE##_VL; |
5948 | #define VP_CASE(NODE) \ |
5949 | case ISD::VP_##NODE: \ |
5950 | return RISCVISD::NODE##_VL; |
5951 | // clang-format off |
5952 | switch (Op.getOpcode()) { |
5953 | default: |
5954 | llvm_unreachable("don't have RISC-V specified VL op for this SDNode" ); |
5955 | OP_CASE(ADD) |
5956 | OP_CASE(SUB) |
5957 | OP_CASE(MUL) |
5958 | OP_CASE(MULHS) |
5959 | OP_CASE(MULHU) |
5960 | OP_CASE(SDIV) |
5961 | OP_CASE(SREM) |
5962 | OP_CASE(UDIV) |
5963 | OP_CASE(UREM) |
5964 | OP_CASE(SHL) |
5965 | OP_CASE(SRA) |
5966 | OP_CASE(SRL) |
5967 | OP_CASE(ROTL) |
5968 | OP_CASE(ROTR) |
5969 | OP_CASE(BSWAP) |
5970 | OP_CASE(CTTZ) |
5971 | OP_CASE(CTLZ) |
5972 | OP_CASE(CTPOP) |
5973 | OP_CASE(BITREVERSE) |
5974 | OP_CASE(SADDSAT) |
5975 | OP_CASE(UADDSAT) |
5976 | OP_CASE(SSUBSAT) |
5977 | OP_CASE(USUBSAT) |
5978 | OP_CASE(AVGFLOORS) |
5979 | OP_CASE(AVGFLOORU) |
5980 | OP_CASE(AVGCEILS) |
5981 | OP_CASE(AVGCEILU) |
5982 | OP_CASE(FADD) |
5983 | OP_CASE(FSUB) |
5984 | OP_CASE(FMUL) |
5985 | OP_CASE(FDIV) |
5986 | OP_CASE(FNEG) |
5987 | OP_CASE(FABS) |
5988 | OP_CASE(FSQRT) |
5989 | OP_CASE(SMIN) |
5990 | OP_CASE(SMAX) |
5991 | OP_CASE(UMIN) |
5992 | OP_CASE(UMAX) |
5993 | OP_CASE(STRICT_FADD) |
5994 | OP_CASE(STRICT_FSUB) |
5995 | OP_CASE(STRICT_FMUL) |
5996 | OP_CASE(STRICT_FDIV) |
5997 | OP_CASE(STRICT_FSQRT) |
5998 | VP_CASE(ADD) // VP_ADD |
5999 | VP_CASE(SUB) // VP_SUB |
6000 | VP_CASE(MUL) // VP_MUL |
6001 | VP_CASE(SDIV) // VP_SDIV |
6002 | VP_CASE(SREM) // VP_SREM |
6003 | VP_CASE(UDIV) // VP_UDIV |
6004 | VP_CASE(UREM) // VP_UREM |
6005 | VP_CASE(SHL) // VP_SHL |
6006 | VP_CASE(FADD) // VP_FADD |
6007 | VP_CASE(FSUB) // VP_FSUB |
6008 | VP_CASE(FMUL) // VP_FMUL |
6009 | VP_CASE(FDIV) // VP_FDIV |
6010 | VP_CASE(FNEG) // VP_FNEG |
6011 | VP_CASE(FABS) // VP_FABS |
6012 | VP_CASE(SMIN) // VP_SMIN |
6013 | VP_CASE(SMAX) // VP_SMAX |
6014 | VP_CASE(UMIN) // VP_UMIN |
6015 | VP_CASE(UMAX) // VP_UMAX |
6016 | VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN |
6017 | VP_CASE(SETCC) // VP_SETCC |
6018 | VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP |
6019 | VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP |
6020 | VP_CASE(BITREVERSE) // VP_BITREVERSE |
6021 | VP_CASE(SADDSAT) // VP_SADDSAT |
6022 | VP_CASE(UADDSAT) // VP_UADDSAT |
6023 | VP_CASE(SSUBSAT) // VP_SSUBSAT |
6024 | VP_CASE(USUBSAT) // VP_USUBSAT |
6025 | VP_CASE(BSWAP) // VP_BSWAP |
6026 | VP_CASE(CTLZ) // VP_CTLZ |
6027 | VP_CASE(CTTZ) // VP_CTTZ |
6028 | VP_CASE(CTPOP) // VP_CTPOP |
6029 | case ISD::CTLZ_ZERO_UNDEF: |
6030 | case ISD::VP_CTLZ_ZERO_UNDEF: |
6031 | return RISCVISD::CTLZ_VL; |
6032 | case ISD::CTTZ_ZERO_UNDEF: |
6033 | case ISD::VP_CTTZ_ZERO_UNDEF: |
6034 | return RISCVISD::CTTZ_VL; |
6035 | case ISD::FMA: |
6036 | case ISD::VP_FMA: |
6037 | return RISCVISD::VFMADD_VL; |
6038 | case ISD::STRICT_FMA: |
6039 | return RISCVISD::STRICT_VFMADD_VL; |
6040 | case ISD::AND: |
6041 | case ISD::VP_AND: |
6042 | if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) |
6043 | return RISCVISD::VMAND_VL; |
6044 | return RISCVISD::AND_VL; |
6045 | case ISD::OR: |
6046 | case ISD::VP_OR: |
6047 | if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) |
6048 | return RISCVISD::VMOR_VL; |
6049 | return RISCVISD::OR_VL; |
6050 | case ISD::XOR: |
6051 | case ISD::VP_XOR: |
6052 | if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) |
6053 | return RISCVISD::VMXOR_VL; |
6054 | return RISCVISD::XOR_VL; |
6055 | case ISD::VP_SELECT: |
6056 | case ISD::VP_MERGE: |
6057 | return RISCVISD::VMERGE_VL; |
6058 | case ISD::VP_SRA: |
6059 | return RISCVISD::SRA_VL; |
6060 | case ISD::VP_SRL: |
6061 | return RISCVISD::SRL_VL; |
6062 | case ISD::VP_SQRT: |
6063 | return RISCVISD::FSQRT_VL; |
6064 | case ISD::VP_SIGN_EXTEND: |
6065 | return RISCVISD::VSEXT_VL; |
6066 | case ISD::VP_ZERO_EXTEND: |
6067 | return RISCVISD::VZEXT_VL; |
6068 | case ISD::VP_FP_TO_SINT: |
6069 | return RISCVISD::VFCVT_RTZ_X_F_VL; |
6070 | case ISD::VP_FP_TO_UINT: |
6071 | return RISCVISD::VFCVT_RTZ_XU_F_VL; |
6072 | case ISD::FMINNUM: |
6073 | case ISD::VP_FMINNUM: |
6074 | return RISCVISD::VFMIN_VL; |
6075 | case ISD::FMAXNUM: |
6076 | case ISD::VP_FMAXNUM: |
6077 | return RISCVISD::VFMAX_VL; |
6078 | case ISD::LRINT: |
6079 | case ISD::VP_LRINT: |
6080 | case ISD::LLRINT: |
6081 | case ISD::VP_LLRINT: |
6082 | return RISCVISD::VFCVT_X_F_VL; |
6083 | } |
6084 | // clang-format on |
6085 | #undef OP_CASE |
6086 | #undef VP_CASE |
6087 | } |
6088 | |
6089 | /// Return true if a RISC-V target specified op has a merge operand. |
6090 | static bool hasMergeOp(unsigned Opcode) { |
6091 | assert(Opcode > RISCVISD::FIRST_NUMBER && |
6092 | Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && |
6093 | "not a RISC-V target specific op" ); |
6094 | static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == |
6095 | 130 && |
6096 | RISCVISD::LAST_RISCV_STRICTFP_OPCODE - |
6097 | ISD::FIRST_TARGET_STRICTFP_OPCODE == |
6098 | 21 && |
6099 | "adding target specific op should update this function" ); |
6100 | if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL) |
6101 | return true; |
6102 | if (Opcode == RISCVISD::FCOPYSIGN_VL) |
6103 | return true; |
6104 | if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL) |
6105 | return true; |
6106 | if (Opcode == RISCVISD::SETCC_VL) |
6107 | return true; |
6108 | if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL) |
6109 | return true; |
6110 | if (Opcode == RISCVISD::VMERGE_VL) |
6111 | return true; |
6112 | return false; |
6113 | } |
6114 | |
6115 | /// Return true if a RISC-V target specified op has a mask operand. |
6116 | static bool hasMaskOp(unsigned Opcode) { |
6117 | assert(Opcode > RISCVISD::FIRST_NUMBER && |
6118 | Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && |
6119 | "not a RISC-V target specific op" ); |
6120 | static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == |
6121 | 130 && |
6122 | RISCVISD::LAST_RISCV_STRICTFP_OPCODE - |
6123 | ISD::FIRST_TARGET_STRICTFP_OPCODE == |
6124 | 21 && |
6125 | "adding target specific op should update this function" ); |
6126 | if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL) |
6127 | return true; |
6128 | if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL) |
6129 | return true; |
6130 | if (Opcode >= RISCVISD::STRICT_FADD_VL && |
6131 | Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL) |
6132 | return true; |
6133 | return false; |
6134 | } |
6135 | |
6136 | static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) { |
6137 | auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: Op.getValueType()); |
6138 | SDLoc DL(Op); |
6139 | |
6140 | SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); |
6141 | SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); |
6142 | |
6143 | for (unsigned j = 0; j != Op.getNumOperands(); ++j) { |
6144 | if (!Op.getOperand(i: j).getValueType().isVector()) { |
6145 | LoOperands[j] = Op.getOperand(i: j); |
6146 | HiOperands[j] = Op.getOperand(i: j); |
6147 | continue; |
6148 | } |
6149 | std::tie(args&: LoOperands[j], args&: HiOperands[j]) = |
6150 | DAG.SplitVector(N: Op.getOperand(i: j), DL); |
6151 | } |
6152 | |
6153 | SDValue LoRes = |
6154 | DAG.getNode(Opcode: Op.getOpcode(), DL, VT: LoVT, Ops: LoOperands, Flags: Op->getFlags()); |
6155 | SDValue HiRes = |
6156 | DAG.getNode(Opcode: Op.getOpcode(), DL, VT: HiVT, Ops: HiOperands, Flags: Op->getFlags()); |
6157 | |
6158 | return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: Op.getValueType(), N1: LoRes, N2: HiRes); |
6159 | } |
6160 | |
6161 | static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) { |
6162 | assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op" ); |
6163 | auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: Op.getValueType()); |
6164 | SDLoc DL(Op); |
6165 | |
6166 | SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); |
6167 | SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); |
6168 | |
6169 | for (unsigned j = 0; j != Op.getNumOperands(); ++j) { |
6170 | if (ISD::getVPExplicitVectorLengthIdx(Opcode: Op.getOpcode()) == j) { |
6171 | std::tie(args&: LoOperands[j], args&: HiOperands[j]) = |
6172 | DAG.SplitEVL(N: Op.getOperand(i: j), VecVT: Op.getValueType(), DL); |
6173 | continue; |
6174 | } |
6175 | if (!Op.getOperand(i: j).getValueType().isVector()) { |
6176 | LoOperands[j] = Op.getOperand(i: j); |
6177 | HiOperands[j] = Op.getOperand(i: j); |
6178 | continue; |
6179 | } |
6180 | std::tie(args&: LoOperands[j], args&: HiOperands[j]) = |
6181 | DAG.SplitVector(N: Op.getOperand(i: j), DL); |
6182 | } |
6183 | |
6184 | SDValue LoRes = |
6185 | DAG.getNode(Opcode: Op.getOpcode(), DL, VT: LoVT, Ops: LoOperands, Flags: Op->getFlags()); |
6186 | SDValue HiRes = |
6187 | DAG.getNode(Opcode: Op.getOpcode(), DL, VT: HiVT, Ops: HiOperands, Flags: Op->getFlags()); |
6188 | |
6189 | return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: Op.getValueType(), N1: LoRes, N2: HiRes); |
6190 | } |
6191 | |
6192 | static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) { |
6193 | SDLoc DL(Op); |
6194 | |
6195 | auto [Lo, Hi] = DAG.SplitVector(N: Op.getOperand(i: 1), DL); |
6196 | auto [MaskLo, MaskHi] = DAG.SplitVector(N: Op.getOperand(i: 2), DL); |
6197 | auto [EVLLo, EVLHi] = |
6198 | DAG.SplitEVL(N: Op.getOperand(i: 3), VecVT: Op.getOperand(i: 1).getValueType(), DL); |
6199 | |
6200 | SDValue ResLo = |
6201 | DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(), |
6202 | Ops: {Op.getOperand(i: 0), Lo, MaskLo, EVLLo}, Flags: Op->getFlags()); |
6203 | return DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(), |
6204 | Ops: {ResLo, Hi, MaskHi, EVLHi}, Flags: Op->getFlags()); |
6205 | } |
6206 | |
6207 | static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) { |
6208 | |
6209 | assert(Op->isStrictFPOpcode()); |
6210 | |
6211 | auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: Op->getValueType(ResNo: 0)); |
6212 | |
6213 | SDVTList LoVTs = DAG.getVTList(VT1: LoVT, VT2: Op->getValueType(ResNo: 1)); |
6214 | SDVTList HiVTs = DAG.getVTList(VT1: HiVT, VT2: Op->getValueType(ResNo: 1)); |
6215 | |
6216 | SDLoc DL(Op); |
6217 | |
6218 | SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); |
6219 | SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); |
6220 | |
6221 | for (unsigned j = 0; j != Op.getNumOperands(); ++j) { |
6222 | if (!Op.getOperand(i: j).getValueType().isVector()) { |
6223 | LoOperands[j] = Op.getOperand(i: j); |
6224 | HiOperands[j] = Op.getOperand(i: j); |
6225 | continue; |
6226 | } |
6227 | std::tie(args&: LoOperands[j], args&: HiOperands[j]) = |
6228 | DAG.SplitVector(N: Op.getOperand(i: j), DL); |
6229 | } |
6230 | |
6231 | SDValue LoRes = |
6232 | DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: LoVTs, Ops: LoOperands, Flags: Op->getFlags()); |
6233 | HiOperands[0] = LoRes.getValue(R: 1); |
6234 | SDValue HiRes = |
6235 | DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: HiVTs, Ops: HiOperands, Flags: Op->getFlags()); |
6236 | |
6237 | SDValue V = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: Op->getValueType(ResNo: 0), |
6238 | N1: LoRes.getValue(R: 0), N2: HiRes.getValue(R: 0)); |
6239 | return DAG.getMergeValues(Ops: {V, HiRes.getValue(R: 1)}, dl: DL); |
6240 | } |
6241 | |
6242 | SDValue RISCVTargetLowering::LowerOperation(SDValue Op, |
6243 | SelectionDAG &DAG) const { |
6244 | switch (Op.getOpcode()) { |
6245 | default: |
6246 | report_fatal_error(reason: "unimplemented operand" ); |
6247 | case ISD::ATOMIC_FENCE: |
6248 | return LowerATOMIC_FENCE(Op, DAG, Subtarget); |
6249 | case ISD::GlobalAddress: |
6250 | return lowerGlobalAddress(Op, DAG); |
6251 | case ISD::BlockAddress: |
6252 | return lowerBlockAddress(Op, DAG); |
6253 | case ISD::ConstantPool: |
6254 | return lowerConstantPool(Op, DAG); |
6255 | case ISD::JumpTable: |
6256 | return lowerJumpTable(Op, DAG); |
6257 | case ISD::GlobalTLSAddress: |
6258 | return lowerGlobalTLSAddress(Op, DAG); |
6259 | case ISD::Constant: |
6260 | return lowerConstant(Op, DAG, Subtarget); |
6261 | case ISD::SELECT: |
6262 | return lowerSELECT(Op, DAG); |
6263 | case ISD::BRCOND: |
6264 | return lowerBRCOND(Op, DAG); |
6265 | case ISD::VASTART: |
6266 | return lowerVASTART(Op, DAG); |
6267 | case ISD::FRAMEADDR: |
6268 | return lowerFRAMEADDR(Op, DAG); |
6269 | case ISD::RETURNADDR: |
6270 | return lowerRETURNADDR(Op, DAG); |
6271 | case ISD::SADDO: |
6272 | case ISD::SSUBO: |
6273 | return lowerSADDO_SSUBO(Op, DAG); |
6274 | case ISD::SMULO: |
6275 | return lowerSMULO(Op, DAG); |
6276 | case ISD::SHL_PARTS: |
6277 | return lowerShiftLeftParts(Op, DAG); |
6278 | case ISD::SRA_PARTS: |
6279 | return lowerShiftRightParts(Op, DAG, IsSRA: true); |
6280 | case ISD::SRL_PARTS: |
6281 | return lowerShiftRightParts(Op, DAG, IsSRA: false); |
6282 | case ISD::ROTL: |
6283 | case ISD::ROTR: |
6284 | if (Op.getValueType().isFixedLengthVector()) { |
6285 | assert(Subtarget.hasStdExtZvkb()); |
6286 | return lowerToScalableOp(Op, DAG); |
6287 | } |
6288 | assert(Subtarget.hasVendorXTHeadBb() && |
6289 | !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) && |
6290 | "Unexpected custom legalization" ); |
6291 | // XTHeadBb only supports rotate by constant. |
6292 | if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 1))) |
6293 | return SDValue(); |
6294 | return Op; |
6295 | case ISD::BITCAST: { |
6296 | SDLoc DL(Op); |
6297 | EVT VT = Op.getValueType(); |
6298 | SDValue Op0 = Op.getOperand(i: 0); |
6299 | EVT Op0VT = Op0.getValueType(); |
6300 | MVT XLenVT = Subtarget.getXLenVT(); |
6301 | if (VT == MVT::f16 && Op0VT == MVT::i16 && |
6302 | Subtarget.hasStdExtZfhminOrZhinxmin()) { |
6303 | SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Op0); |
6304 | SDValue FPConv = DAG.getNode(Opcode: RISCVISD::FMV_H_X, DL, VT: MVT::f16, Operand: NewOp0); |
6305 | return FPConv; |
6306 | } |
6307 | if (VT == MVT::bf16 && Op0VT == MVT::i16 && |
6308 | Subtarget.hasStdExtZfbfmin()) { |
6309 | SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Op0); |
6310 | SDValue FPConv = DAG.getNode(Opcode: RISCVISD::FMV_H_X, DL, VT: MVT::bf16, Operand: NewOp0); |
6311 | return FPConv; |
6312 | } |
6313 | if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() && |
6314 | Subtarget.hasStdExtFOrZfinx()) { |
6315 | SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op0); |
6316 | SDValue FPConv = |
6317 | DAG.getNode(Opcode: RISCVISD::FMV_W_X_RV64, DL, VT: MVT::f32, Operand: NewOp0); |
6318 | return FPConv; |
6319 | } |
6320 | if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) { |
6321 | SDValue Lo, Hi; |
6322 | std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Op0, DL, LoVT: MVT::i32, HiVT: MVT::i32); |
6323 | SDValue RetReg = |
6324 | DAG.getNode(Opcode: RISCVISD::BuildPairF64, DL, VT: MVT::f64, N1: Lo, N2: Hi); |
6325 | return RetReg; |
6326 | } |
6327 | |
6328 | // Consider other scalar<->scalar casts as legal if the types are legal. |
6329 | // Otherwise expand them. |
6330 | if (!VT.isVector() && !Op0VT.isVector()) { |
6331 | if (isTypeLegal(VT) && isTypeLegal(VT: Op0VT)) |
6332 | return Op; |
6333 | return SDValue(); |
6334 | } |
6335 | |
6336 | assert(!VT.isScalableVector() && !Op0VT.isScalableVector() && |
6337 | "Unexpected types" ); |
6338 | |
6339 | if (VT.isFixedLengthVector()) { |
6340 | // We can handle fixed length vector bitcasts with a simple replacement |
6341 | // in isel. |
6342 | if (Op0VT.isFixedLengthVector()) |
6343 | return Op; |
6344 | // When bitcasting from scalar to fixed-length vector, insert the scalar |
6345 | // into a one-element vector of the result type, and perform a vector |
6346 | // bitcast. |
6347 | if (!Op0VT.isVector()) { |
6348 | EVT BVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: Op0VT, NumElements: 1); |
6349 | if (!isTypeLegal(VT: BVT)) |
6350 | return SDValue(); |
6351 | return DAG.getBitcast(VT, V: DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: BVT, |
6352 | N1: DAG.getUNDEF(VT: BVT), N2: Op0, |
6353 | N3: DAG.getVectorIdxConstant(Val: 0, DL))); |
6354 | } |
6355 | return SDValue(); |
6356 | } |
6357 | // Custom-legalize bitcasts from fixed-length vector types to scalar types |
6358 | // thus: bitcast the vector to a one-element vector type whose element type |
6359 | // is the same as the result type, and extract the first element. |
6360 | if (!VT.isVector() && Op0VT.isFixedLengthVector()) { |
6361 | EVT BVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: 1); |
6362 | if (!isTypeLegal(VT: BVT)) |
6363 | return SDValue(); |
6364 | SDValue BVec = DAG.getBitcast(VT: BVT, V: Op0); |
6365 | return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT, N1: BVec, |
6366 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
6367 | } |
6368 | return SDValue(); |
6369 | } |
6370 | case ISD::INTRINSIC_WO_CHAIN: |
6371 | return LowerINTRINSIC_WO_CHAIN(Op, DAG); |
6372 | case ISD::INTRINSIC_W_CHAIN: |
6373 | return LowerINTRINSIC_W_CHAIN(Op, DAG); |
6374 | case ISD::INTRINSIC_VOID: |
6375 | return LowerINTRINSIC_VOID(Op, DAG); |
6376 | case ISD::IS_FPCLASS: |
6377 | return LowerIS_FPCLASS(Op, DAG); |
6378 | case ISD::BITREVERSE: { |
6379 | MVT VT = Op.getSimpleValueType(); |
6380 | if (VT.isFixedLengthVector()) { |
6381 | assert(Subtarget.hasStdExtZvbb()); |
6382 | return lowerToScalableOp(Op, DAG); |
6383 | } |
6384 | SDLoc DL(Op); |
6385 | assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization" ); |
6386 | assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode" ); |
6387 | // Expand bitreverse to a bswap(rev8) followed by brev8. |
6388 | SDValue BSwap = DAG.getNode(Opcode: ISD::BSWAP, DL, VT, Operand: Op.getOperand(i: 0)); |
6389 | return DAG.getNode(Opcode: RISCVISD::BREV8, DL, VT, Operand: BSwap); |
6390 | } |
6391 | case ISD::TRUNCATE: |
6392 | // Only custom-lower vector truncates |
6393 | if (!Op.getSimpleValueType().isVector()) |
6394 | return Op; |
6395 | return lowerVectorTruncLike(Op, DAG); |
6396 | case ISD::ANY_EXTEND: |
6397 | case ISD::ZERO_EXTEND: |
6398 | if (Op.getOperand(i: 0).getValueType().isVector() && |
6399 | Op.getOperand(i: 0).getValueType().getVectorElementType() == MVT::i1) |
6400 | return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ ExtTrueVal: 1); |
6401 | return lowerFixedLengthVectorExtendToRVV(Op, DAG, ExtendOpc: RISCVISD::VZEXT_VL); |
6402 | case ISD::SIGN_EXTEND: |
6403 | if (Op.getOperand(i: 0).getValueType().isVector() && |
6404 | Op.getOperand(i: 0).getValueType().getVectorElementType() == MVT::i1) |
6405 | return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ ExtTrueVal: -1); |
6406 | return lowerFixedLengthVectorExtendToRVV(Op, DAG, ExtendOpc: RISCVISD::VSEXT_VL); |
6407 | case ISD::SPLAT_VECTOR_PARTS: |
6408 | return lowerSPLAT_VECTOR_PARTS(Op, DAG); |
6409 | case ISD::INSERT_VECTOR_ELT: |
6410 | return lowerINSERT_VECTOR_ELT(Op, DAG); |
6411 | case ISD::EXTRACT_VECTOR_ELT: |
6412 | return lowerEXTRACT_VECTOR_ELT(Op, DAG); |
6413 | case ISD::SCALAR_TO_VECTOR: { |
6414 | MVT VT = Op.getSimpleValueType(); |
6415 | SDLoc DL(Op); |
6416 | SDValue Scalar = Op.getOperand(i: 0); |
6417 | if (VT.getVectorElementType() == MVT::i1) { |
6418 | MVT WideVT = VT.changeVectorElementType(EltVT: MVT::i8); |
6419 | SDValue V = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: WideVT, Operand: Scalar); |
6420 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: V); |
6421 | } |
6422 | MVT ContainerVT = VT; |
6423 | if (VT.isFixedLengthVector()) |
6424 | ContainerVT = getContainerForFixedLengthVector(VT); |
6425 | SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second; |
6426 | Scalar = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getXLenVT(), Operand: Scalar); |
6427 | SDValue V = DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT: ContainerVT, |
6428 | N1: DAG.getUNDEF(VT: ContainerVT), N2: Scalar, N3: VL); |
6429 | if (VT.isFixedLengthVector()) |
6430 | V = convertFromScalableVector(VT, V, DAG, Subtarget); |
6431 | return V; |
6432 | } |
6433 | case ISD::VSCALE: { |
6434 | MVT XLenVT = Subtarget.getXLenVT(); |
6435 | MVT VT = Op.getSimpleValueType(); |
6436 | SDLoc DL(Op); |
6437 | SDValue Res = DAG.getNode(Opcode: RISCVISD::READ_VLENB, DL, VT: XLenVT); |
6438 | // We define our scalable vector types for lmul=1 to use a 64 bit known |
6439 | // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate |
6440 | // vscale as VLENB / 8. |
6441 | static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!" ); |
6442 | if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) |
6443 | report_fatal_error(reason: "Support for VLEN==32 is incomplete." ); |
6444 | // We assume VLENB is a multiple of 8. We manually choose the best shift |
6445 | // here because SimplifyDemandedBits isn't always able to simplify it. |
6446 | uint64_t Val = Op.getConstantOperandVal(i: 0); |
6447 | if (isPowerOf2_64(Value: Val)) { |
6448 | uint64_t Log2 = Log2_64(Value: Val); |
6449 | if (Log2 < 3) |
6450 | Res = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, N1: Res, |
6451 | N2: DAG.getConstant(Val: 3 - Log2, DL, VT)); |
6452 | else if (Log2 > 3) |
6453 | Res = DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: Res, |
6454 | N2: DAG.getConstant(Val: Log2 - 3, DL, VT: XLenVT)); |
6455 | } else if ((Val % 8) == 0) { |
6456 | // If the multiplier is a multiple of 8, scale it down to avoid needing |
6457 | // to shift the VLENB value. |
6458 | Res = DAG.getNode(Opcode: ISD::MUL, DL, VT: XLenVT, N1: Res, |
6459 | N2: DAG.getConstant(Val: Val / 8, DL, VT: XLenVT)); |
6460 | } else { |
6461 | SDValue VScale = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, N1: Res, |
6462 | N2: DAG.getConstant(Val: 3, DL, VT: XLenVT)); |
6463 | Res = DAG.getNode(Opcode: ISD::MUL, DL, VT: XLenVT, N1: VScale, |
6464 | N2: DAG.getConstant(Val, DL, VT: XLenVT)); |
6465 | } |
6466 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Res); |
6467 | } |
6468 | case ISD::FPOWI: { |
6469 | // Custom promote f16 powi with illegal i32 integer type on RV64. Once |
6470 | // promoted this will be legalized into a libcall by LegalizeIntegerTypes. |
6471 | if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() && |
6472 | Op.getOperand(i: 1).getValueType() == MVT::i32) { |
6473 | SDLoc DL(Op); |
6474 | SDValue Op0 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Op.getOperand(i: 0)); |
6475 | SDValue Powi = |
6476 | DAG.getNode(Opcode: ISD::FPOWI, DL, VT: MVT::f32, N1: Op0, N2: Op.getOperand(i: 1)); |
6477 | return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: MVT::f16, N1: Powi, |
6478 | N2: DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)); |
6479 | } |
6480 | return SDValue(); |
6481 | } |
6482 | case ISD::FMAXIMUM: |
6483 | case ISD::FMINIMUM: |
6484 | if (Op.getValueType() == MVT::nxv32f16 && |
6485 | (Subtarget.hasVInstructionsF16Minimal() && |
6486 | !Subtarget.hasVInstructionsF16())) |
6487 | return SplitVectorOp(Op, DAG); |
6488 | return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); |
6489 | case ISD::FP_EXTEND: { |
6490 | SDLoc DL(Op); |
6491 | EVT VT = Op.getValueType(); |
6492 | SDValue Op0 = Op.getOperand(i: 0); |
6493 | EVT Op0VT = Op0.getValueType(); |
6494 | if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) |
6495 | return DAG.getNode(Opcode: RISCVISD::FP_EXTEND_BF16, DL, VT: MVT::f32, Operand: Op0); |
6496 | if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) { |
6497 | SDValue FloatVal = |
6498 | DAG.getNode(Opcode: RISCVISD::FP_EXTEND_BF16, DL, VT: MVT::f32, Operand: Op0); |
6499 | return DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f64, Operand: FloatVal); |
6500 | } |
6501 | |
6502 | if (!Op.getValueType().isVector()) |
6503 | return Op; |
6504 | return lowerVectorFPExtendOrRoundLike(Op, DAG); |
6505 | } |
6506 | case ISD::FP_ROUND: { |
6507 | SDLoc DL(Op); |
6508 | EVT VT = Op.getValueType(); |
6509 | SDValue Op0 = Op.getOperand(i: 0); |
6510 | EVT Op0VT = Op0.getValueType(); |
6511 | if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin()) |
6512 | return DAG.getNode(Opcode: RISCVISD::FP_ROUND_BF16, DL, VT: MVT::bf16, Operand: Op0); |
6513 | if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() && |
6514 | Subtarget.hasStdExtDOrZdinx()) { |
6515 | SDValue FloatVal = |
6516 | DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: MVT::f32, N1: Op0, |
6517 | N2: DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)); |
6518 | return DAG.getNode(Opcode: RISCVISD::FP_ROUND_BF16, DL, VT: MVT::bf16, Operand: FloatVal); |
6519 | } |
6520 | |
6521 | if (!Op.getValueType().isVector()) |
6522 | return Op; |
6523 | return lowerVectorFPExtendOrRoundLike(Op, DAG); |
6524 | } |
6525 | case ISD::STRICT_FP_ROUND: |
6526 | case ISD::STRICT_FP_EXTEND: |
6527 | return lowerStrictFPExtendOrRoundLike(Op, DAG); |
6528 | case ISD::SINT_TO_FP: |
6529 | case ISD::UINT_TO_FP: |
6530 | if (Op.getValueType().isVector() && |
6531 | Op.getValueType().getScalarType() == MVT::f16 && |
6532 | (Subtarget.hasVInstructionsF16Minimal() && |
6533 | !Subtarget.hasVInstructionsF16())) { |
6534 | if (Op.getValueType() == MVT::nxv32f16) |
6535 | return SplitVectorOp(Op, DAG); |
6536 | // int -> f32 |
6537 | SDLoc DL(Op); |
6538 | MVT NVT = |
6539 | MVT::getVectorVT(VT: MVT::f32, EC: Op.getValueType().getVectorElementCount()); |
6540 | SDValue NC = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: NVT, Ops: Op->ops()); |
6541 | // f32 -> f16 |
6542 | return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Op.getValueType(), N1: NC, |
6543 | N2: DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)); |
6544 | } |
6545 | [[fallthrough]]; |
6546 | case ISD::FP_TO_SINT: |
6547 | case ISD::FP_TO_UINT: |
6548 | if (SDValue Op1 = Op.getOperand(i: 0); |
6549 | Op1.getValueType().isVector() && |
6550 | Op1.getValueType().getScalarType() == MVT::f16 && |
6551 | (Subtarget.hasVInstructionsF16Minimal() && |
6552 | !Subtarget.hasVInstructionsF16())) { |
6553 | if (Op1.getValueType() == MVT::nxv32f16) |
6554 | return SplitVectorOp(Op, DAG); |
6555 | // f16 -> f32 |
6556 | SDLoc DL(Op); |
6557 | MVT NVT = MVT::getVectorVT(VT: MVT::f32, |
6558 | EC: Op1.getValueType().getVectorElementCount()); |
6559 | SDValue WidenVec = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NVT, Operand: Op1); |
6560 | // f32 -> int |
6561 | return DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(), Operand: WidenVec); |
6562 | } |
6563 | [[fallthrough]]; |
6564 | case ISD::STRICT_FP_TO_SINT: |
6565 | case ISD::STRICT_FP_TO_UINT: |
6566 | case ISD::STRICT_SINT_TO_FP: |
6567 | case ISD::STRICT_UINT_TO_FP: { |
6568 | // RVV can only do fp<->int conversions to types half/double the size as |
6569 | // the source. We custom-lower any conversions that do two hops into |
6570 | // sequences. |
6571 | MVT VT = Op.getSimpleValueType(); |
6572 | if (!VT.isVector()) |
6573 | return Op; |
6574 | SDLoc DL(Op); |
6575 | bool IsStrict = Op->isStrictFPOpcode(); |
6576 | SDValue Src = Op.getOperand(i: 0 + IsStrict); |
6577 | MVT EltVT = VT.getVectorElementType(); |
6578 | MVT SrcVT = Src.getSimpleValueType(); |
6579 | MVT SrcEltVT = SrcVT.getVectorElementType(); |
6580 | unsigned EltSize = EltVT.getSizeInBits(); |
6581 | unsigned SrcEltSize = SrcEltVT.getSizeInBits(); |
6582 | assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && |
6583 | "Unexpected vector element types" ); |
6584 | |
6585 | bool IsInt2FP = SrcEltVT.isInteger(); |
6586 | // Widening conversions |
6587 | if (EltSize > (2 * SrcEltSize)) { |
6588 | if (IsInt2FP) { |
6589 | // Do a regular integer sign/zero extension then convert to float. |
6590 | MVT IVecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltSize / 2), |
6591 | EC: VT.getVectorElementCount()); |
6592 | unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP || |
6593 | Op.getOpcode() == ISD::STRICT_UINT_TO_FP) |
6594 | ? ISD::ZERO_EXTEND |
6595 | : ISD::SIGN_EXTEND; |
6596 | SDValue Ext = DAG.getNode(Opcode: ExtOpcode, DL, VT: IVecVT, Operand: Src); |
6597 | if (IsStrict) |
6598 | return DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: Op->getVTList(), |
6599 | N1: Op.getOperand(i: 0), N2: Ext); |
6600 | return DAG.getNode(Opcode: Op.getOpcode(), DL, VT, Operand: Ext); |
6601 | } |
6602 | // FP2Int |
6603 | assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering" ); |
6604 | // Do one doubling fp_extend then complete the operation by converting |
6605 | // to int. |
6606 | MVT InterimFVT = MVT::getVectorVT(VT: MVT::f32, EC: VT.getVectorElementCount()); |
6607 | if (IsStrict) { |
6608 | auto [FExt, Chain] = |
6609 | DAG.getStrictFPExtendOrRound(Op: Src, Chain: Op.getOperand(i: 0), DL, VT: InterimFVT); |
6610 | return DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: Op->getVTList(), N1: Chain, N2: FExt); |
6611 | } |
6612 | SDValue FExt = DAG.getFPExtendOrRound(Op: Src, DL, VT: InterimFVT); |
6613 | return DAG.getNode(Opcode: Op.getOpcode(), DL, VT, Operand: FExt); |
6614 | } |
6615 | |
6616 | // Narrowing conversions |
6617 | if (SrcEltSize > (2 * EltSize)) { |
6618 | if (IsInt2FP) { |
6619 | // One narrowing int_to_fp, then an fp_round. |
6620 | assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering" ); |
6621 | MVT InterimFVT = MVT::getVectorVT(VT: MVT::f32, EC: VT.getVectorElementCount()); |
6622 | if (IsStrict) { |
6623 | SDValue Int2FP = DAG.getNode(Opcode: Op.getOpcode(), DL, |
6624 | VTList: DAG.getVTList(VT1: InterimFVT, VT2: MVT::Other), |
6625 | N1: Op.getOperand(i: 0), N2: Src); |
6626 | SDValue Chain = Int2FP.getValue(R: 1); |
6627 | return DAG.getStrictFPExtendOrRound(Op: Int2FP, Chain, DL, VT).first; |
6628 | } |
6629 | SDValue Int2FP = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: InterimFVT, Operand: Src); |
6630 | return DAG.getFPExtendOrRound(Op: Int2FP, DL, VT); |
6631 | } |
6632 | // FP2Int |
6633 | // One narrowing fp_to_int, then truncate the integer. If the float isn't |
6634 | // representable by the integer, the result is poison. |
6635 | MVT IVecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize / 2), |
6636 | EC: VT.getVectorElementCount()); |
6637 | if (IsStrict) { |
6638 | SDValue FP2Int = |
6639 | DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: DAG.getVTList(VT1: IVecVT, VT2: MVT::Other), |
6640 | N1: Op.getOperand(i: 0), N2: Src); |
6641 | SDValue Res = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: FP2Int); |
6642 | return DAG.getMergeValues(Ops: {Res, FP2Int.getValue(R: 1)}, dl: DL); |
6643 | } |
6644 | SDValue FP2Int = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: IVecVT, Operand: Src); |
6645 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: FP2Int); |
6646 | } |
6647 | |
6648 | // Scalable vectors can exit here. Patterns will handle equally-sized |
6649 | // conversions halving/doubling ones. |
6650 | if (!VT.isFixedLengthVector()) |
6651 | return Op; |
6652 | |
6653 | // For fixed-length vectors we lower to a custom "VL" node. |
6654 | unsigned RVVOpc = 0; |
6655 | switch (Op.getOpcode()) { |
6656 | default: |
6657 | llvm_unreachable("Impossible opcode" ); |
6658 | case ISD::FP_TO_SINT: |
6659 | RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL; |
6660 | break; |
6661 | case ISD::FP_TO_UINT: |
6662 | RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL; |
6663 | break; |
6664 | case ISD::SINT_TO_FP: |
6665 | RVVOpc = RISCVISD::SINT_TO_FP_VL; |
6666 | break; |
6667 | case ISD::UINT_TO_FP: |
6668 | RVVOpc = RISCVISD::UINT_TO_FP_VL; |
6669 | break; |
6670 | case ISD::STRICT_FP_TO_SINT: |
6671 | RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL; |
6672 | break; |
6673 | case ISD::STRICT_FP_TO_UINT: |
6674 | RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL; |
6675 | break; |
6676 | case ISD::STRICT_SINT_TO_FP: |
6677 | RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL; |
6678 | break; |
6679 | case ISD::STRICT_UINT_TO_FP: |
6680 | RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL; |
6681 | break; |
6682 | } |
6683 | |
6684 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
6685 | MVT SrcContainerVT = getContainerForFixedLengthVector(VT: SrcVT); |
6686 | assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() && |
6687 | "Expected same element count" ); |
6688 | |
6689 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
6690 | |
6691 | Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget); |
6692 | if (IsStrict) { |
6693 | Src = DAG.getNode(Opcode: RVVOpc, DL, VTList: DAG.getVTList(VT1: ContainerVT, VT2: MVT::Other), |
6694 | N1: Op.getOperand(i: 0), N2: Src, N3: Mask, N4: VL); |
6695 | SDValue SubVec = convertFromScalableVector(VT, V: Src, DAG, Subtarget); |
6696 | return DAG.getMergeValues(Ops: {SubVec, Src.getValue(R: 1)}, dl: DL); |
6697 | } |
6698 | Src = DAG.getNode(Opcode: RVVOpc, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL); |
6699 | return convertFromScalableVector(VT, V: Src, DAG, Subtarget); |
6700 | } |
6701 | case ISD::FP_TO_SINT_SAT: |
6702 | case ISD::FP_TO_UINT_SAT: |
6703 | return lowerFP_TO_INT_SAT(Op, DAG, Subtarget); |
6704 | case ISD::FP_TO_BF16: { |
6705 | // Custom lower to ensure the libcall return is passed in an FPR on hard |
6706 | // float ABIs. |
6707 | assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization" ); |
6708 | SDLoc DL(Op); |
6709 | MakeLibCallOptions CallOptions; |
6710 | RTLIB::Libcall LC = |
6711 | RTLIB::getFPROUND(OpVT: Op.getOperand(i: 0).getValueType(), RetVT: MVT::bf16); |
6712 | SDValue Res = |
6713 | makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op.getOperand(i: 0), CallOptions, dl: DL).first; |
6714 | if (Subtarget.is64Bit() && !RV64LegalI32) |
6715 | return DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTW_RV64, DL, VT: MVT::i64, Operand: Res); |
6716 | return DAG.getBitcast(VT: MVT::i32, V: Res); |
6717 | } |
6718 | case ISD::BF16_TO_FP: { |
6719 | assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization" ); |
6720 | MVT VT = Op.getSimpleValueType(); |
6721 | SDLoc DL(Op); |
6722 | Op = DAG.getNode( |
6723 | Opcode: ISD::SHL, DL, VT: Op.getOperand(i: 0).getValueType(), N1: Op.getOperand(i: 0), |
6724 | N2: DAG.getShiftAmountConstant(Val: 16, VT: Op.getOperand(i: 0).getValueType(), DL)); |
6725 | SDValue Res = Subtarget.is64Bit() |
6726 | ? DAG.getNode(Opcode: RISCVISD::FMV_W_X_RV64, DL, VT: MVT::f32, Operand: Op) |
6727 | : DAG.getBitcast(VT: MVT::f32, V: Op); |
6728 | // fp_extend if the target VT is bigger than f32. |
6729 | if (VT != MVT::f32) |
6730 | return DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT, Operand: Res); |
6731 | return Res; |
6732 | } |
6733 | case ISD::FP_TO_FP16: { |
6734 | // Custom lower to ensure the libcall return is passed in an FPR on hard |
6735 | // float ABIs. |
6736 | assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation" ); |
6737 | SDLoc DL(Op); |
6738 | MakeLibCallOptions CallOptions; |
6739 | RTLIB::Libcall LC = |
6740 | RTLIB::getFPROUND(OpVT: Op.getOperand(i: 0).getValueType(), RetVT: MVT::f16); |
6741 | SDValue Res = |
6742 | makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op.getOperand(i: 0), CallOptions, dl: DL).first; |
6743 | if (Subtarget.is64Bit() && !RV64LegalI32) |
6744 | return DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTW_RV64, DL, VT: MVT::i64, Operand: Res); |
6745 | return DAG.getBitcast(VT: MVT::i32, V: Res); |
6746 | } |
6747 | case ISD::FP16_TO_FP: { |
6748 | // Custom lower to ensure the libcall argument is passed in an FPR on hard |
6749 | // float ABIs. |
6750 | assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation" ); |
6751 | SDLoc DL(Op); |
6752 | MakeLibCallOptions CallOptions; |
6753 | SDValue Arg = Subtarget.is64Bit() |
6754 | ? DAG.getNode(Opcode: RISCVISD::FMV_W_X_RV64, DL, VT: MVT::f32, |
6755 | Operand: Op.getOperand(i: 0)) |
6756 | : DAG.getBitcast(VT: MVT::f32, V: Op.getOperand(i: 0)); |
6757 | SDValue Res = |
6758 | makeLibCall(DAG, LC: RTLIB::FPEXT_F16_F32, RetVT: MVT::f32, Ops: Arg, CallOptions, dl: DL) |
6759 | .first; |
6760 | return Res; |
6761 | } |
6762 | case ISD::FTRUNC: |
6763 | case ISD::FCEIL: |
6764 | case ISD::FFLOOR: |
6765 | case ISD::FNEARBYINT: |
6766 | case ISD::FRINT: |
6767 | case ISD::FROUND: |
6768 | case ISD::FROUNDEVEN: |
6769 | return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); |
6770 | case ISD::LRINT: |
6771 | case ISD::LLRINT: |
6772 | return lowerVectorXRINT(Op, DAG, Subtarget); |
6773 | case ISD::VECREDUCE_ADD: |
6774 | case ISD::VECREDUCE_UMAX: |
6775 | case ISD::VECREDUCE_SMAX: |
6776 | case ISD::VECREDUCE_UMIN: |
6777 | case ISD::VECREDUCE_SMIN: |
6778 | return lowerVECREDUCE(Op, DAG); |
6779 | case ISD::VECREDUCE_AND: |
6780 | case ISD::VECREDUCE_OR: |
6781 | case ISD::VECREDUCE_XOR: |
6782 | if (Op.getOperand(i: 0).getValueType().getVectorElementType() == MVT::i1) |
6783 | return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false); |
6784 | return lowerVECREDUCE(Op, DAG); |
6785 | case ISD::VECREDUCE_FADD: |
6786 | case ISD::VECREDUCE_SEQ_FADD: |
6787 | case ISD::VECREDUCE_FMIN: |
6788 | case ISD::VECREDUCE_FMAX: |
6789 | case ISD::VECREDUCE_FMAXIMUM: |
6790 | case ISD::VECREDUCE_FMINIMUM: |
6791 | return lowerFPVECREDUCE(Op, DAG); |
6792 | case ISD::VP_REDUCE_ADD: |
6793 | case ISD::VP_REDUCE_UMAX: |
6794 | case ISD::VP_REDUCE_SMAX: |
6795 | case ISD::VP_REDUCE_UMIN: |
6796 | case ISD::VP_REDUCE_SMIN: |
6797 | case ISD::VP_REDUCE_FADD: |
6798 | case ISD::VP_REDUCE_SEQ_FADD: |
6799 | case ISD::VP_REDUCE_FMIN: |
6800 | case ISD::VP_REDUCE_FMAX: |
6801 | case ISD::VP_REDUCE_FMINIMUM: |
6802 | case ISD::VP_REDUCE_FMAXIMUM: |
6803 | if (Op.getOperand(i: 1).getValueType() == MVT::nxv32f16 && |
6804 | (Subtarget.hasVInstructionsF16Minimal() && |
6805 | !Subtarget.hasVInstructionsF16())) |
6806 | return SplitVectorReductionOp(Op, DAG); |
6807 | return lowerVPREDUCE(Op, DAG); |
6808 | case ISD::VP_REDUCE_AND: |
6809 | case ISD::VP_REDUCE_OR: |
6810 | case ISD::VP_REDUCE_XOR: |
6811 | if (Op.getOperand(i: 1).getValueType().getVectorElementType() == MVT::i1) |
6812 | return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true); |
6813 | return lowerVPREDUCE(Op, DAG); |
6814 | case ISD::VP_CTTZ_ELTS: |
6815 | case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: |
6816 | return lowerVPCttzElements(Op, DAG); |
6817 | case ISD::UNDEF: { |
6818 | MVT ContainerVT = getContainerForFixedLengthVector(VT: Op.getSimpleValueType()); |
6819 | return convertFromScalableVector(VT: Op.getSimpleValueType(), |
6820 | V: DAG.getUNDEF(VT: ContainerVT), DAG, Subtarget); |
6821 | } |
6822 | case ISD::INSERT_SUBVECTOR: |
6823 | return lowerINSERT_SUBVECTOR(Op, DAG); |
6824 | case ISD::EXTRACT_SUBVECTOR: |
6825 | return lowerEXTRACT_SUBVECTOR(Op, DAG); |
6826 | case ISD::VECTOR_DEINTERLEAVE: |
6827 | return lowerVECTOR_DEINTERLEAVE(Op, DAG); |
6828 | case ISD::VECTOR_INTERLEAVE: |
6829 | return lowerVECTOR_INTERLEAVE(Op, DAG); |
6830 | case ISD::STEP_VECTOR: |
6831 | return lowerSTEP_VECTOR(Op, DAG); |
6832 | case ISD::VECTOR_REVERSE: |
6833 | return lowerVECTOR_REVERSE(Op, DAG); |
6834 | case ISD::VECTOR_SPLICE: |
6835 | return lowerVECTOR_SPLICE(Op, DAG); |
6836 | case ISD::BUILD_VECTOR: |
6837 | return lowerBUILD_VECTOR(Op, DAG, Subtarget); |
6838 | case ISD::SPLAT_VECTOR: |
6839 | if ((Op.getValueType().getScalarType() == MVT::f16 && |
6840 | (Subtarget.hasVInstructionsF16Minimal() && |
6841 | Subtarget.hasStdExtZfhminOrZhinxmin() && |
6842 | !Subtarget.hasVInstructionsF16())) || |
6843 | (Op.getValueType().getScalarType() == MVT::bf16 && |
6844 | (Subtarget.hasVInstructionsBF16() && Subtarget.hasStdExtZfbfmin()))) { |
6845 | if (Op.getValueType() == MVT::nxv32f16 || |
6846 | Op.getValueType() == MVT::nxv32bf16) |
6847 | return SplitVectorOp(Op, DAG); |
6848 | SDLoc DL(Op); |
6849 | SDValue NewScalar = |
6850 | DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Op.getOperand(i: 0)); |
6851 | SDValue NewSplat = DAG.getNode( |
6852 | Opcode: ISD::SPLAT_VECTOR, DL, |
6853 | VT: MVT::getVectorVT(VT: MVT::f32, EC: Op.getValueType().getVectorElementCount()), |
6854 | Operand: NewScalar); |
6855 | return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Op.getValueType(), N1: NewSplat, |
6856 | N2: DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)); |
6857 | } |
6858 | if (Op.getValueType().getVectorElementType() == MVT::i1) |
6859 | return lowerVectorMaskSplat(Op, DAG); |
6860 | return SDValue(); |
6861 | case ISD::VECTOR_SHUFFLE: |
6862 | return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); |
6863 | case ISD::CONCAT_VECTORS: { |
6864 | // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is |
6865 | // better than going through the stack, as the default expansion does. |
6866 | SDLoc DL(Op); |
6867 | MVT VT = Op.getSimpleValueType(); |
6868 | MVT ContainerVT = VT; |
6869 | if (VT.isFixedLengthVector()) |
6870 | ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget); |
6871 | |
6872 | // Recursively split concat_vectors with more than 2 operands: |
6873 | // |
6874 | // concat_vector op1, op2, op3, op4 |
6875 | // -> |
6876 | // concat_vector (concat_vector op1, op2), (concat_vector op3, op4) |
6877 | // |
6878 | // This reduces the length of the chain of vslideups and allows us to |
6879 | // perform the vslideups at a smaller LMUL, limited to MF2. |
6880 | if (Op.getNumOperands() > 2 && |
6881 | ContainerVT.bitsGE(VT: getLMUL1VT(VT: ContainerVT))) { |
6882 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
6883 | assert(isPowerOf2_32(Op.getNumOperands())); |
6884 | size_t HalfNumOps = Op.getNumOperands() / 2; |
6885 | SDValue Lo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT, |
6886 | Ops: Op->ops().take_front(N: HalfNumOps)); |
6887 | SDValue Hi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT, |
6888 | Ops: Op->ops().drop_front(N: HalfNumOps)); |
6889 | return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: Lo, N2: Hi); |
6890 | } |
6891 | |
6892 | unsigned NumOpElts = |
6893 | Op.getOperand(i: 0).getSimpleValueType().getVectorMinNumElements(); |
6894 | SDValue Vec = DAG.getUNDEF(VT); |
6895 | for (const auto &OpIdx : enumerate(First: Op->ops())) { |
6896 | SDValue SubVec = OpIdx.value(); |
6897 | // Don't insert undef subvectors. |
6898 | if (SubVec.isUndef()) |
6899 | continue; |
6900 | Vec = |
6901 | DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: Vec, N2: SubVec, |
6902 | N3: DAG.getVectorIdxConstant(Val: OpIdx.index() * NumOpElts, DL)); |
6903 | } |
6904 | return Vec; |
6905 | } |
6906 | case ISD::LOAD: |
6907 | if (auto V = expandUnalignedRVVLoad(Op, DAG)) |
6908 | return V; |
6909 | if (Op.getValueType().isFixedLengthVector()) |
6910 | return lowerFixedLengthVectorLoadToRVV(Op, DAG); |
6911 | return Op; |
6912 | case ISD::STORE: |
6913 | if (auto V = expandUnalignedRVVStore(Op, DAG)) |
6914 | return V; |
6915 | if (Op.getOperand(i: 1).getValueType().isFixedLengthVector()) |
6916 | return lowerFixedLengthVectorStoreToRVV(Op, DAG); |
6917 | return Op; |
6918 | case ISD::MLOAD: |
6919 | case ISD::VP_LOAD: |
6920 | return lowerMaskedLoad(Op, DAG); |
6921 | case ISD::MSTORE: |
6922 | case ISD::VP_STORE: |
6923 | return lowerMaskedStore(Op, DAG); |
6924 | case ISD::SELECT_CC: { |
6925 | // This occurs because we custom legalize SETGT and SETUGT for setcc. That |
6926 | // causes LegalizeDAG to think we need to custom legalize select_cc. Expand |
6927 | // into separate SETCC+SELECT just like LegalizeDAG. |
6928 | SDValue Tmp1 = Op.getOperand(i: 0); |
6929 | SDValue Tmp2 = Op.getOperand(i: 1); |
6930 | SDValue True = Op.getOperand(i: 2); |
6931 | SDValue False = Op.getOperand(i: 3); |
6932 | EVT VT = Op.getValueType(); |
6933 | SDValue CC = Op.getOperand(i: 4); |
6934 | EVT CmpVT = Tmp1.getValueType(); |
6935 | EVT CCVT = |
6936 | getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: CmpVT); |
6937 | SDLoc DL(Op); |
6938 | SDValue Cond = |
6939 | DAG.getNode(Opcode: ISD::SETCC, DL, VT: CCVT, N1: Tmp1, N2: Tmp2, N3: CC, Flags: Op->getFlags()); |
6940 | return DAG.getSelect(DL, VT, Cond, LHS: True, RHS: False); |
6941 | } |
6942 | case ISD::SETCC: { |
6943 | MVT OpVT = Op.getOperand(i: 0).getSimpleValueType(); |
6944 | if (OpVT.isScalarInteger()) { |
6945 | MVT VT = Op.getSimpleValueType(); |
6946 | SDValue LHS = Op.getOperand(i: 0); |
6947 | SDValue RHS = Op.getOperand(i: 1); |
6948 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get(); |
6949 | assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) && |
6950 | "Unexpected CondCode" ); |
6951 | |
6952 | SDLoc DL(Op); |
6953 | |
6954 | // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can |
6955 | // convert this to the equivalent of (set(u)ge X, C+1) by using |
6956 | // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant |
6957 | // in a register. |
6958 | if (isa<ConstantSDNode>(Val: RHS)) { |
6959 | int64_t Imm = cast<ConstantSDNode>(Val&: RHS)->getSExtValue(); |
6960 | if (Imm != 0 && isInt<12>(x: (uint64_t)Imm + 1)) { |
6961 | // If this is an unsigned compare and the constant is -1, incrementing |
6962 | // the constant would change behavior. The result should be false. |
6963 | if (CCVal == ISD::SETUGT && Imm == -1) |
6964 | return DAG.getConstant(Val: 0, DL, VT); |
6965 | // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT. |
6966 | CCVal = ISD::getSetCCSwappedOperands(Operation: CCVal); |
6967 | SDValue SetCC = DAG.getSetCC( |
6968 | DL, VT, LHS, RHS: DAG.getConstant(Val: Imm + 1, DL, VT: OpVT), Cond: CCVal); |
6969 | return DAG.getLogicalNOT(DL, Val: SetCC, VT); |
6970 | } |
6971 | } |
6972 | |
6973 | // Not a constant we could handle, swap the operands and condition code to |
6974 | // SETLT/SETULT. |
6975 | CCVal = ISD::getSetCCSwappedOperands(Operation: CCVal); |
6976 | return DAG.getSetCC(DL, VT, LHS: RHS, RHS: LHS, Cond: CCVal); |
6977 | } |
6978 | |
6979 | if (Op.getOperand(i: 0).getSimpleValueType() == MVT::nxv32f16 && |
6980 | (Subtarget.hasVInstructionsF16Minimal() && |
6981 | !Subtarget.hasVInstructionsF16())) |
6982 | return SplitVectorOp(Op, DAG); |
6983 | |
6984 | return lowerFixedLengthVectorSetccToRVV(Op, DAG); |
6985 | } |
6986 | case ISD::ADD: |
6987 | case ISD::SUB: |
6988 | case ISD::MUL: |
6989 | case ISD::MULHS: |
6990 | case ISD::MULHU: |
6991 | case ISD::AND: |
6992 | case ISD::OR: |
6993 | case ISD::XOR: |
6994 | case ISD::SDIV: |
6995 | case ISD::SREM: |
6996 | case ISD::UDIV: |
6997 | case ISD::UREM: |
6998 | case ISD::BSWAP: |
6999 | case ISD::CTPOP: |
7000 | return lowerToScalableOp(Op, DAG); |
7001 | case ISD::SHL: |
7002 | case ISD::SRA: |
7003 | case ISD::SRL: |
7004 | if (Op.getSimpleValueType().isFixedLengthVector()) |
7005 | return lowerToScalableOp(Op, DAG); |
7006 | // This can be called for an i32 shift amount that needs to be promoted. |
7007 | assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && |
7008 | "Unexpected custom legalisation" ); |
7009 | return SDValue(); |
7010 | case ISD::FADD: |
7011 | case ISD::FSUB: |
7012 | case ISD::FMUL: |
7013 | case ISD::FDIV: |
7014 | case ISD::FNEG: |
7015 | case ISD::FABS: |
7016 | case ISD::FSQRT: |
7017 | case ISD::FMA: |
7018 | case ISD::FMINNUM: |
7019 | case ISD::FMAXNUM: |
7020 | if (Op.getValueType() == MVT::nxv32f16 && |
7021 | (Subtarget.hasVInstructionsF16Minimal() && |
7022 | !Subtarget.hasVInstructionsF16())) |
7023 | return SplitVectorOp(Op, DAG); |
7024 | [[fallthrough]]; |
7025 | case ISD::AVGFLOORS: |
7026 | case ISD::AVGFLOORU: |
7027 | case ISD::AVGCEILS: |
7028 | case ISD::AVGCEILU: |
7029 | case ISD::SMIN: |
7030 | case ISD::SMAX: |
7031 | case ISD::UMIN: |
7032 | case ISD::UMAX: |
7033 | return lowerToScalableOp(Op, DAG); |
7034 | case ISD::UADDSAT: |
7035 | case ISD::USUBSAT: |
7036 | if (!Op.getValueType().isVector()) |
7037 | return lowerUADDSAT_USUBSAT(Op, DAG); |
7038 | return lowerToScalableOp(Op, DAG); |
7039 | case ISD::SADDSAT: |
7040 | case ISD::SSUBSAT: |
7041 | if (!Op.getValueType().isVector()) |
7042 | return lowerSADDSAT_SSUBSAT(Op, DAG); |
7043 | return lowerToScalableOp(Op, DAG); |
7044 | case ISD::ABDS: |
7045 | case ISD::ABDU: { |
7046 | SDLoc dl(Op); |
7047 | EVT VT = Op->getValueType(ResNo: 0); |
7048 | SDValue LHS = DAG.getFreeze(V: Op->getOperand(Num: 0)); |
7049 | SDValue RHS = DAG.getFreeze(V: Op->getOperand(Num: 1)); |
7050 | bool IsSigned = Op->getOpcode() == ISD::ABDS; |
7051 | |
7052 | // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs)) |
7053 | // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs)) |
7054 | unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX; |
7055 | unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN; |
7056 | SDValue Max = DAG.getNode(Opcode: MaxOpc, DL: dl, VT, N1: LHS, N2: RHS); |
7057 | SDValue Min = DAG.getNode(Opcode: MinOpc, DL: dl, VT, N1: LHS, N2: RHS); |
7058 | return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: Min); |
7059 | } |
7060 | case ISD::ABS: |
7061 | case ISD::VP_ABS: |
7062 | return lowerABS(Op, DAG); |
7063 | case ISD::CTLZ: |
7064 | case ISD::CTLZ_ZERO_UNDEF: |
7065 | case ISD::CTTZ: |
7066 | case ISD::CTTZ_ZERO_UNDEF: |
7067 | if (Subtarget.hasStdExtZvbb()) |
7068 | return lowerToScalableOp(Op, DAG); |
7069 | assert(Op.getOpcode() != ISD::CTTZ); |
7070 | return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); |
7071 | case ISD::VSELECT: |
7072 | return lowerFixedLengthVectorSelectToRVV(Op, DAG); |
7073 | case ISD::FCOPYSIGN: |
7074 | if (Op.getValueType() == MVT::nxv32f16 && |
7075 | (Subtarget.hasVInstructionsF16Minimal() && |
7076 | !Subtarget.hasVInstructionsF16())) |
7077 | return SplitVectorOp(Op, DAG); |
7078 | return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); |
7079 | case ISD::STRICT_FADD: |
7080 | case ISD::STRICT_FSUB: |
7081 | case ISD::STRICT_FMUL: |
7082 | case ISD::STRICT_FDIV: |
7083 | case ISD::STRICT_FSQRT: |
7084 | case ISD::STRICT_FMA: |
7085 | if (Op.getValueType() == MVT::nxv32f16 && |
7086 | (Subtarget.hasVInstructionsF16Minimal() && |
7087 | !Subtarget.hasVInstructionsF16())) |
7088 | return SplitStrictFPVectorOp(Op, DAG); |
7089 | return lowerToScalableOp(Op, DAG); |
7090 | case ISD::STRICT_FSETCC: |
7091 | case ISD::STRICT_FSETCCS: |
7092 | return lowerVectorStrictFSetcc(Op, DAG); |
7093 | case ISD::STRICT_FCEIL: |
7094 | case ISD::STRICT_FRINT: |
7095 | case ISD::STRICT_FFLOOR: |
7096 | case ISD::STRICT_FTRUNC: |
7097 | case ISD::STRICT_FNEARBYINT: |
7098 | case ISD::STRICT_FROUND: |
7099 | case ISD::STRICT_FROUNDEVEN: |
7100 | return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); |
7101 | case ISD::MGATHER: |
7102 | case ISD::VP_GATHER: |
7103 | return lowerMaskedGather(Op, DAG); |
7104 | case ISD::MSCATTER: |
7105 | case ISD::VP_SCATTER: |
7106 | return lowerMaskedScatter(Op, DAG); |
7107 | case ISD::GET_ROUNDING: |
7108 | return lowerGET_ROUNDING(Op, DAG); |
7109 | case ISD::SET_ROUNDING: |
7110 | return lowerSET_ROUNDING(Op, DAG); |
7111 | case ISD::EH_DWARF_CFA: |
7112 | return lowerEH_DWARF_CFA(Op, DAG); |
7113 | case ISD::VP_SELECT: |
7114 | case ISD::VP_MERGE: |
7115 | case ISD::VP_ADD: |
7116 | case ISD::VP_SUB: |
7117 | case ISD::VP_MUL: |
7118 | case ISD::VP_SDIV: |
7119 | case ISD::VP_UDIV: |
7120 | case ISD::VP_SREM: |
7121 | case ISD::VP_UREM: |
7122 | case ISD::VP_UADDSAT: |
7123 | case ISD::VP_USUBSAT: |
7124 | case ISD::VP_SADDSAT: |
7125 | case ISD::VP_SSUBSAT: |
7126 | case ISD::VP_LRINT: |
7127 | case ISD::VP_LLRINT: |
7128 | return lowerVPOp(Op, DAG); |
7129 | case ISD::VP_AND: |
7130 | case ISD::VP_OR: |
7131 | case ISD::VP_XOR: |
7132 | return lowerLogicVPOp(Op, DAG); |
7133 | case ISD::VP_FADD: |
7134 | case ISD::VP_FSUB: |
7135 | case ISD::VP_FMUL: |
7136 | case ISD::VP_FDIV: |
7137 | case ISD::VP_FNEG: |
7138 | case ISD::VP_FABS: |
7139 | case ISD::VP_SQRT: |
7140 | case ISD::VP_FMA: |
7141 | case ISD::VP_FMINNUM: |
7142 | case ISD::VP_FMAXNUM: |
7143 | case ISD::VP_FCOPYSIGN: |
7144 | if (Op.getValueType() == MVT::nxv32f16 && |
7145 | (Subtarget.hasVInstructionsF16Minimal() && |
7146 | !Subtarget.hasVInstructionsF16())) |
7147 | return SplitVPOp(Op, DAG); |
7148 | [[fallthrough]]; |
7149 | case ISD::VP_SRA: |
7150 | case ISD::VP_SRL: |
7151 | case ISD::VP_SHL: |
7152 | return lowerVPOp(Op, DAG); |
7153 | case ISD::VP_IS_FPCLASS: |
7154 | return LowerIS_FPCLASS(Op, DAG); |
7155 | case ISD::VP_SIGN_EXTEND: |
7156 | case ISD::VP_ZERO_EXTEND: |
7157 | if (Op.getOperand(i: 0).getSimpleValueType().getVectorElementType() == MVT::i1) |
7158 | return lowerVPExtMaskOp(Op, DAG); |
7159 | return lowerVPOp(Op, DAG); |
7160 | case ISD::VP_TRUNCATE: |
7161 | return lowerVectorTruncLike(Op, DAG); |
7162 | case ISD::VP_FP_EXTEND: |
7163 | case ISD::VP_FP_ROUND: |
7164 | return lowerVectorFPExtendOrRoundLike(Op, DAG); |
7165 | case ISD::VP_SINT_TO_FP: |
7166 | case ISD::VP_UINT_TO_FP: |
7167 | if (Op.getValueType().isVector() && |
7168 | Op.getValueType().getScalarType() == MVT::f16 && |
7169 | (Subtarget.hasVInstructionsF16Minimal() && |
7170 | !Subtarget.hasVInstructionsF16())) { |
7171 | if (Op.getValueType() == MVT::nxv32f16) |
7172 | return SplitVPOp(Op, DAG); |
7173 | // int -> f32 |
7174 | SDLoc DL(Op); |
7175 | MVT NVT = |
7176 | MVT::getVectorVT(VT: MVT::f32, EC: Op.getValueType().getVectorElementCount()); |
7177 | auto NC = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: NVT, Ops: Op->ops()); |
7178 | // f32 -> f16 |
7179 | return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Op.getValueType(), N1: NC, |
7180 | N2: DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)); |
7181 | } |
7182 | [[fallthrough]]; |
7183 | case ISD::VP_FP_TO_SINT: |
7184 | case ISD::VP_FP_TO_UINT: |
7185 | if (SDValue Op1 = Op.getOperand(i: 0); |
7186 | Op1.getValueType().isVector() && |
7187 | Op1.getValueType().getScalarType() == MVT::f16 && |
7188 | (Subtarget.hasVInstructionsF16Minimal() && |
7189 | !Subtarget.hasVInstructionsF16())) { |
7190 | if (Op1.getValueType() == MVT::nxv32f16) |
7191 | return SplitVPOp(Op, DAG); |
7192 | // f16 -> f32 |
7193 | SDLoc DL(Op); |
7194 | MVT NVT = MVT::getVectorVT(VT: MVT::f32, |
7195 | EC: Op1.getValueType().getVectorElementCount()); |
7196 | SDValue WidenVec = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NVT, Operand: Op1); |
7197 | // f32 -> int |
7198 | return DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(), |
7199 | Ops: {WidenVec, Op.getOperand(i: 1), Op.getOperand(i: 2)}); |
7200 | } |
7201 | return lowerVPFPIntConvOp(Op, DAG); |
7202 | case ISD::VP_SETCC: |
7203 | if (Op.getOperand(i: 0).getSimpleValueType() == MVT::nxv32f16 && |
7204 | (Subtarget.hasVInstructionsF16Minimal() && |
7205 | !Subtarget.hasVInstructionsF16())) |
7206 | return SplitVPOp(Op, DAG); |
7207 | if (Op.getOperand(i: 0).getSimpleValueType().getVectorElementType() == MVT::i1) |
7208 | return lowerVPSetCCMaskOp(Op, DAG); |
7209 | [[fallthrough]]; |
7210 | case ISD::VP_SMIN: |
7211 | case ISD::VP_SMAX: |
7212 | case ISD::VP_UMIN: |
7213 | case ISD::VP_UMAX: |
7214 | case ISD::VP_BITREVERSE: |
7215 | case ISD::VP_BSWAP: |
7216 | return lowerVPOp(Op, DAG); |
7217 | case ISD::VP_CTLZ: |
7218 | case ISD::VP_CTLZ_ZERO_UNDEF: |
7219 | if (Subtarget.hasStdExtZvbb()) |
7220 | return lowerVPOp(Op, DAG); |
7221 | return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); |
7222 | case ISD::VP_CTTZ: |
7223 | case ISD::VP_CTTZ_ZERO_UNDEF: |
7224 | if (Subtarget.hasStdExtZvbb()) |
7225 | return lowerVPOp(Op, DAG); |
7226 | return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); |
7227 | case ISD::VP_CTPOP: |
7228 | return lowerVPOp(Op, DAG); |
7229 | case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: |
7230 | return lowerVPStridedLoad(Op, DAG); |
7231 | case ISD::EXPERIMENTAL_VP_STRIDED_STORE: |
7232 | return lowerVPStridedStore(Op, DAG); |
7233 | case ISD::VP_FCEIL: |
7234 | case ISD::VP_FFLOOR: |
7235 | case ISD::VP_FRINT: |
7236 | case ISD::VP_FNEARBYINT: |
7237 | case ISD::VP_FROUND: |
7238 | case ISD::VP_FROUNDEVEN: |
7239 | case ISD::VP_FROUNDTOZERO: |
7240 | if (Op.getValueType() == MVT::nxv32f16 && |
7241 | (Subtarget.hasVInstructionsF16Minimal() && |
7242 | !Subtarget.hasVInstructionsF16())) |
7243 | return SplitVPOp(Op, DAG); |
7244 | return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); |
7245 | case ISD::VP_FMAXIMUM: |
7246 | case ISD::VP_FMINIMUM: |
7247 | if (Op.getValueType() == MVT::nxv32f16 && |
7248 | (Subtarget.hasVInstructionsF16Minimal() && |
7249 | !Subtarget.hasVInstructionsF16())) |
7250 | return SplitVPOp(Op, DAG); |
7251 | return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); |
7252 | case ISD::EXPERIMENTAL_VP_SPLICE: |
7253 | return lowerVPSpliceExperimental(Op, DAG); |
7254 | case ISD::EXPERIMENTAL_VP_REVERSE: |
7255 | return lowerVPReverseExperimental(Op, DAG); |
7256 | case ISD::EXPERIMENTAL_VP_SPLAT: |
7257 | return lowerVPSplatExperimental(Op, DAG); |
7258 | case ISD::CLEAR_CACHE: { |
7259 | assert(getTargetMachine().getTargetTriple().isOSLinux() && |
7260 | "llvm.clear_cache only needs custom lower on Linux targets" ); |
7261 | SDLoc DL(Op); |
7262 | SDValue Flags = DAG.getConstant(Val: 0, DL, VT: Subtarget.getXLenVT()); |
7263 | return emitFlushICache(DAG, InChain: Op.getOperand(i: 0), Start: Op.getOperand(i: 1), |
7264 | End: Op.getOperand(i: 2), Flags, DL); |
7265 | } |
7266 | } |
7267 | } |
7268 | |
7269 | SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain, |
7270 | SDValue Start, SDValue End, |
7271 | SDValue Flags, SDLoc DL) const { |
7272 | MakeLibCallOptions CallOptions; |
7273 | std::pair<SDValue, SDValue> CallResult = |
7274 | makeLibCall(DAG, LC: RTLIB::RISCV_FLUSH_ICACHE, RetVT: MVT::isVoid, |
7275 | Ops: {Start, End, Flags}, CallOptions, dl: DL, Chain: InChain); |
7276 | |
7277 | // This function returns void so only the out chain matters. |
7278 | return CallResult.second; |
7279 | } |
7280 | |
7281 | static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, |
7282 | SelectionDAG &DAG, unsigned Flags) { |
7283 | return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: Flags); |
7284 | } |
7285 | |
7286 | static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty, |
7287 | SelectionDAG &DAG, unsigned Flags) { |
7288 | return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(), |
7289 | TargetFlags: Flags); |
7290 | } |
7291 | |
7292 | static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, |
7293 | SelectionDAG &DAG, unsigned Flags) { |
7294 | return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(), |
7295 | Offset: N->getOffset(), TargetFlags: Flags); |
7296 | } |
7297 | |
7298 | static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty, |
7299 | SelectionDAG &DAG, unsigned Flags) { |
7300 | return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags); |
7301 | } |
7302 | |
7303 | template <class NodeTy> |
7304 | SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, |
7305 | bool IsLocal, bool IsExternWeak) const { |
7306 | SDLoc DL(N); |
7307 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
7308 | |
7309 | // When HWASAN is used and tagging of global variables is enabled |
7310 | // they should be accessed via the GOT, since the tagged address of a global |
7311 | // is incompatible with existing code models. This also applies to non-pic |
7312 | // mode. |
7313 | if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) { |
7314 | SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); |
7315 | if (IsLocal && !Subtarget.allowTaggedGlobals()) |
7316 | // Use PC-relative addressing to access the symbol. This generates the |
7317 | // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) |
7318 | // %pcrel_lo(auipc)). |
7319 | return DAG.getNode(Opcode: RISCVISD::LLA, DL, VT: Ty, Operand: Addr); |
7320 | |
7321 | // Use PC-relative addressing to access the GOT for this symbol, then load |
7322 | // the address from the GOT. This generates the pattern (PseudoLGA sym), |
7323 | // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). |
7324 | SDValue Load = |
7325 | SDValue(DAG.getMachineNode(Opcode: RISCV::PseudoLGA, dl: DL, VT: Ty, Op1: Addr), 0); |
7326 | MachineFunction &MF = DAG.getMachineFunction(); |
7327 | MachineMemOperand *MemOp = MF.getMachineMemOperand( |
7328 | PtrInfo: MachinePointerInfo::getGOT(MF), |
7329 | f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | |
7330 | MachineMemOperand::MOInvariant, |
7331 | MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8)); |
7332 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp}); |
7333 | return Load; |
7334 | } |
7335 | |
7336 | switch (getTargetMachine().getCodeModel()) { |
7337 | default: |
7338 | report_fatal_error(reason: "Unsupported code model for lowering" ); |
7339 | case CodeModel::Small: { |
7340 | // Generate a sequence for accessing addresses within the first 2 GiB of |
7341 | // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). |
7342 | SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); |
7343 | SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); |
7344 | SDValue MNHi = DAG.getNode(Opcode: RISCVISD::HI, DL, VT: Ty, Operand: AddrHi); |
7345 | return DAG.getNode(Opcode: RISCVISD::ADD_LO, DL, VT: Ty, N1: MNHi, N2: AddrLo); |
7346 | } |
7347 | case CodeModel::Medium: { |
7348 | SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); |
7349 | if (IsExternWeak) { |
7350 | // An extern weak symbol may be undefined, i.e. have value 0, which may |
7351 | // not be within 2GiB of PC, so use GOT-indirect addressing to access the |
7352 | // symbol. This generates the pattern (PseudoLGA sym), which expands to |
7353 | // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). |
7354 | SDValue Load = |
7355 | SDValue(DAG.getMachineNode(Opcode: RISCV::PseudoLGA, dl: DL, VT: Ty, Op1: Addr), 0); |
7356 | MachineFunction &MF = DAG.getMachineFunction(); |
7357 | MachineMemOperand *MemOp = MF.getMachineMemOperand( |
7358 | PtrInfo: MachinePointerInfo::getGOT(MF), |
7359 | f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | |
7360 | MachineMemOperand::MOInvariant, |
7361 | MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8)); |
7362 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp}); |
7363 | return Load; |
7364 | } |
7365 | |
7366 | // Generate a sequence for accessing addresses within any 2GiB range within |
7367 | // the address space. This generates the pattern (PseudoLLA sym), which |
7368 | // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). |
7369 | return DAG.getNode(Opcode: RISCVISD::LLA, DL, VT: Ty, Operand: Addr); |
7370 | } |
7371 | } |
7372 | } |
7373 | |
7374 | SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, |
7375 | SelectionDAG &DAG) const { |
7376 | GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op); |
7377 | assert(N->getOffset() == 0 && "unexpected offset in global node" ); |
7378 | const GlobalValue *GV = N->getGlobal(); |
7379 | return getAddr(N, DAG, IsLocal: GV->isDSOLocal(), IsExternWeak: GV->hasExternalWeakLinkage()); |
7380 | } |
7381 | |
7382 | SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, |
7383 | SelectionDAG &DAG) const { |
7384 | BlockAddressSDNode *N = cast<BlockAddressSDNode>(Val&: Op); |
7385 | |
7386 | return getAddr(N, DAG); |
7387 | } |
7388 | |
7389 | SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, |
7390 | SelectionDAG &DAG) const { |
7391 | ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Val&: Op); |
7392 | |
7393 | return getAddr(N, DAG); |
7394 | } |
7395 | |
7396 | SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, |
7397 | SelectionDAG &DAG) const { |
7398 | JumpTableSDNode *N = cast<JumpTableSDNode>(Val&: Op); |
7399 | |
7400 | return getAddr(N, DAG); |
7401 | } |
7402 | |
7403 | SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, |
7404 | SelectionDAG &DAG, |
7405 | bool UseGOT) const { |
7406 | SDLoc DL(N); |
7407 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
7408 | const GlobalValue *GV = N->getGlobal(); |
7409 | MVT XLenVT = Subtarget.getXLenVT(); |
7410 | |
7411 | if (UseGOT) { |
7412 | // Use PC-relative addressing to access the GOT for this TLS symbol, then |
7413 | // load the address from the GOT and add the thread pointer. This generates |
7414 | // the pattern (PseudoLA_TLS_IE sym), which expands to |
7415 | // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). |
7416 | SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: 0); |
7417 | SDValue Load = |
7418 | SDValue(DAG.getMachineNode(Opcode: RISCV::PseudoLA_TLS_IE, dl: DL, VT: Ty, Op1: Addr), 0); |
7419 | MachineFunction &MF = DAG.getMachineFunction(); |
7420 | MachineMemOperand *MemOp = MF.getMachineMemOperand( |
7421 | PtrInfo: MachinePointerInfo::getGOT(MF), |
7422 | f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | |
7423 | MachineMemOperand::MOInvariant, |
7424 | MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8)); |
7425 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp}); |
7426 | |
7427 | // Add the thread pointer. |
7428 | SDValue TPReg = DAG.getRegister(Reg: RISCV::X4, VT: XLenVT); |
7429 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: Ty, N1: Load, N2: TPReg); |
7430 | } |
7431 | |
7432 | // Generate a sequence for accessing the address relative to the thread |
7433 | // pointer, with the appropriate adjustment for the thread pointer offset. |
7434 | // This generates the pattern |
7435 | // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) |
7436 | SDValue AddrHi = |
7437 | DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: RISCVII::MO_TPREL_HI); |
7438 | SDValue AddrAdd = |
7439 | DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: RISCVII::MO_TPREL_ADD); |
7440 | SDValue AddrLo = |
7441 | DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: RISCVII::MO_TPREL_LO); |
7442 | |
7443 | SDValue MNHi = DAG.getNode(Opcode: RISCVISD::HI, DL, VT: Ty, Operand: AddrHi); |
7444 | SDValue TPReg = DAG.getRegister(Reg: RISCV::X4, VT: XLenVT); |
7445 | SDValue MNAdd = |
7446 | DAG.getNode(Opcode: RISCVISD::ADD_TPREL, DL, VT: Ty, N1: MNHi, N2: TPReg, N3: AddrAdd); |
7447 | return DAG.getNode(Opcode: RISCVISD::ADD_LO, DL, VT: Ty, N1: MNAdd, N2: AddrLo); |
7448 | } |
7449 | |
7450 | SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, |
7451 | SelectionDAG &DAG) const { |
7452 | SDLoc DL(N); |
7453 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
7454 | IntegerType *CallTy = Type::getIntNTy(C&: *DAG.getContext(), N: Ty.getSizeInBits()); |
7455 | const GlobalValue *GV = N->getGlobal(); |
7456 | |
7457 | // Use a PC-relative addressing mode to access the global dynamic GOT address. |
7458 | // This generates the pattern (PseudoLA_TLS_GD sym), which expands to |
7459 | // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). |
7460 | SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: 0); |
7461 | SDValue Load = |
7462 | SDValue(DAG.getMachineNode(Opcode: RISCV::PseudoLA_TLS_GD, dl: DL, VT: Ty, Op1: Addr), 0); |
7463 | |
7464 | // Prepare argument list to generate call. |
7465 | ArgListTy Args; |
7466 | ArgListEntry Entry; |
7467 | Entry.Node = Load; |
7468 | Entry.Ty = CallTy; |
7469 | Args.push_back(x: Entry); |
7470 | |
7471 | // Setup call to __tls_get_addr. |
7472 | TargetLowering::CallLoweringInfo CLI(DAG); |
7473 | CLI.setDebugLoc(DL) |
7474 | .setChain(DAG.getEntryNode()) |
7475 | .setLibCallee(CC: CallingConv::C, ResultType: CallTy, |
7476 | Target: DAG.getExternalSymbol(Sym: "__tls_get_addr" , VT: Ty), |
7477 | ArgsList: std::move(Args)); |
7478 | |
7479 | return LowerCallTo(CLI).first; |
7480 | } |
7481 | |
7482 | SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N, |
7483 | SelectionDAG &DAG) const { |
7484 | SDLoc DL(N); |
7485 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
7486 | const GlobalValue *GV = N->getGlobal(); |
7487 | |
7488 | // Use a PC-relative addressing mode to access the global dynamic GOT address. |
7489 | // This generates the pattern (PseudoLA_TLSDESC sym), which expands to |
7490 | // |
7491 | // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol) |
7492 | // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label) |
7493 | // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label) |
7494 | // jalr t0, tY // R_RISCV_TLSDESC_CALL(label) |
7495 | SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: 0); |
7496 | return SDValue(DAG.getMachineNode(Opcode: RISCV::PseudoLA_TLSDESC, dl: DL, VT: Ty, Op1: Addr), 0); |
7497 | } |
7498 | |
7499 | SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, |
7500 | SelectionDAG &DAG) const { |
7501 | GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op); |
7502 | assert(N->getOffset() == 0 && "unexpected offset in global node" ); |
7503 | |
7504 | if (DAG.getTarget().useEmulatedTLS()) |
7505 | return LowerToTLSEmulatedModel(GA: N, DAG); |
7506 | |
7507 | TLSModel::Model Model = getTargetMachine().getTLSModel(GV: N->getGlobal()); |
7508 | |
7509 | if (DAG.getMachineFunction().getFunction().getCallingConv() == |
7510 | CallingConv::GHC) |
7511 | report_fatal_error(reason: "In GHC calling convention TLS is not supported" ); |
7512 | |
7513 | SDValue Addr; |
7514 | switch (Model) { |
7515 | case TLSModel::LocalExec: |
7516 | Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); |
7517 | break; |
7518 | case TLSModel::InitialExec: |
7519 | Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); |
7520 | break; |
7521 | case TLSModel::LocalDynamic: |
7522 | case TLSModel::GeneralDynamic: |
7523 | Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG) |
7524 | : getDynamicTLSAddr(N, DAG); |
7525 | break; |
7526 | } |
7527 | |
7528 | return Addr; |
7529 | } |
7530 | |
7531 | // Return true if Val is equal to (setcc LHS, RHS, CC). |
7532 | // Return false if Val is the inverse of (setcc LHS, RHS, CC). |
7533 | // Otherwise, return std::nullopt. |
7534 | static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS, |
7535 | ISD::CondCode CC, SDValue Val) { |
7536 | assert(Val->getOpcode() == ISD::SETCC); |
7537 | SDValue LHS2 = Val.getOperand(i: 0); |
7538 | SDValue RHS2 = Val.getOperand(i: 1); |
7539 | ISD::CondCode CC2 = cast<CondCodeSDNode>(Val: Val.getOperand(i: 2))->get(); |
7540 | |
7541 | if (LHS == LHS2 && RHS == RHS2) { |
7542 | if (CC == CC2) |
7543 | return true; |
7544 | if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType())) |
7545 | return false; |
7546 | } else if (LHS == RHS2 && RHS == LHS2) { |
7547 | CC2 = ISD::getSetCCSwappedOperands(Operation: CC2); |
7548 | if (CC == CC2) |
7549 | return true; |
7550 | if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType())) |
7551 | return false; |
7552 | } |
7553 | |
7554 | return std::nullopt; |
7555 | } |
7556 | |
7557 | static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, |
7558 | const RISCVSubtarget &Subtarget) { |
7559 | SDValue CondV = N->getOperand(Num: 0); |
7560 | SDValue TrueV = N->getOperand(Num: 1); |
7561 | SDValue FalseV = N->getOperand(Num: 2); |
7562 | MVT VT = N->getSimpleValueType(ResNo: 0); |
7563 | SDLoc DL(N); |
7564 | |
7565 | if (!Subtarget.hasConditionalMoveFusion()) { |
7566 | // (select c, -1, y) -> -c | y |
7567 | if (isAllOnesConstant(V: TrueV)) { |
7568 | SDValue Neg = DAG.getNegative(Val: CondV, DL, VT); |
7569 | return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV)); |
7570 | } |
7571 | // (select c, y, -1) -> (c-1) | y |
7572 | if (isAllOnesConstant(V: FalseV)) { |
7573 | SDValue Neg = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, |
7574 | N2: DAG.getAllOnesConstant(DL, VT)); |
7575 | return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV)); |
7576 | } |
7577 | |
7578 | // (select c, 0, y) -> (c-1) & y |
7579 | if (isNullConstant(V: TrueV)) { |
7580 | SDValue Neg = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, |
7581 | N2: DAG.getAllOnesConstant(DL, VT)); |
7582 | return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV)); |
7583 | } |
7584 | // (select c, y, 0) -> -c & y |
7585 | if (isNullConstant(V: FalseV)) { |
7586 | SDValue Neg = DAG.getNegative(Val: CondV, DL, VT); |
7587 | return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV)); |
7588 | } |
7589 | } |
7590 | |
7591 | // select c, ~x, x --> xor -c, x |
7592 | if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV)) { |
7593 | const APInt &TrueVal = TrueV->getAsAPIntVal(); |
7594 | const APInt &FalseVal = FalseV->getAsAPIntVal(); |
7595 | if (~TrueVal == FalseVal) { |
7596 | SDValue Neg = DAG.getNegative(Val: CondV, DL, VT); |
7597 | return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Neg, N2: FalseV); |
7598 | } |
7599 | } |
7600 | |
7601 | // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops |
7602 | // when both truev and falsev are also setcc. |
7603 | if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC && |
7604 | FalseV.getOpcode() == ISD::SETCC) { |
7605 | SDValue LHS = CondV.getOperand(i: 0); |
7606 | SDValue RHS = CondV.getOperand(i: 1); |
7607 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get(); |
7608 | |
7609 | // (select x, x, y) -> x | y |
7610 | // (select !x, x, y) -> x & y |
7611 | if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: TrueV)) { |
7612 | return DAG.getNode(Opcode: *MatchResult ? ISD::OR : ISD::AND, DL, VT, N1: TrueV, |
7613 | N2: DAG.getFreeze(V: FalseV)); |
7614 | } |
7615 | // (select x, y, x) -> x & y |
7616 | // (select !x, y, x) -> x | y |
7617 | if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: FalseV)) { |
7618 | return DAG.getNode(Opcode: *MatchResult ? ISD::AND : ISD::OR, DL, VT, |
7619 | N1: DAG.getFreeze(V: TrueV), N2: FalseV); |
7620 | } |
7621 | } |
7622 | |
7623 | return SDValue(); |
7624 | } |
7625 | |
7626 | // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants |
7627 | // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable. |
7628 | // For now we only consider transformation profitable if `binOp(c0, c1)` ends up |
7629 | // being `0` or `-1`. In such cases we can replace `select` with `and`. |
7630 | // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize |
7631 | // than `c0`? |
7632 | static SDValue |
7633 | foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, |
7634 | const RISCVSubtarget &Subtarget) { |
7635 | if (Subtarget.hasShortForwardBranchOpt()) |
7636 | return SDValue(); |
7637 | |
7638 | unsigned SelOpNo = 0; |
7639 | SDValue Sel = BO->getOperand(Num: 0); |
7640 | if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { |
7641 | SelOpNo = 1; |
7642 | Sel = BO->getOperand(Num: 1); |
7643 | } |
7644 | |
7645 | if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) |
7646 | return SDValue(); |
7647 | |
7648 | unsigned ConstSelOpNo = 1; |
7649 | unsigned OtherSelOpNo = 2; |
7650 | if (!dyn_cast<ConstantSDNode>(Val: Sel->getOperand(Num: ConstSelOpNo))) { |
7651 | ConstSelOpNo = 2; |
7652 | OtherSelOpNo = 1; |
7653 | } |
7654 | SDValue ConstSelOp = Sel->getOperand(Num: ConstSelOpNo); |
7655 | ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(Val&: ConstSelOp); |
7656 | if (!ConstSelOpNode || ConstSelOpNode->isOpaque()) |
7657 | return SDValue(); |
7658 | |
7659 | SDValue ConstBinOp = BO->getOperand(Num: SelOpNo ^ 1); |
7660 | ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(Val&: ConstBinOp); |
7661 | if (!ConstBinOpNode || ConstBinOpNode->isOpaque()) |
7662 | return SDValue(); |
7663 | |
7664 | SDLoc DL(Sel); |
7665 | EVT VT = BO->getValueType(ResNo: 0); |
7666 | |
7667 | SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp}; |
7668 | if (SelOpNo == 1) |
7669 | std::swap(a&: NewConstOps[0], b&: NewConstOps[1]); |
7670 | |
7671 | SDValue NewConstOp = |
7672 | DAG.FoldConstantArithmetic(Opcode: BO->getOpcode(), DL, VT, Ops: NewConstOps); |
7673 | if (!NewConstOp) |
7674 | return SDValue(); |
7675 | |
7676 | const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal(); |
7677 | if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes()) |
7678 | return SDValue(); |
7679 | |
7680 | SDValue OtherSelOp = Sel->getOperand(Num: OtherSelOpNo); |
7681 | SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp}; |
7682 | if (SelOpNo == 1) |
7683 | std::swap(a&: NewNonConstOps[0], b&: NewNonConstOps[1]); |
7684 | SDValue NewNonConstOp = DAG.getNode(Opcode: BO->getOpcode(), DL, VT, Ops: NewNonConstOps); |
7685 | |
7686 | SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp; |
7687 | SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp; |
7688 | return DAG.getSelect(DL, VT, Cond: Sel.getOperand(i: 0), LHS: NewT, RHS: NewF); |
7689 | } |
7690 | |
7691 | SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { |
7692 | SDValue CondV = Op.getOperand(i: 0); |
7693 | SDValue TrueV = Op.getOperand(i: 1); |
7694 | SDValue FalseV = Op.getOperand(i: 2); |
7695 | SDLoc DL(Op); |
7696 | MVT VT = Op.getSimpleValueType(); |
7697 | MVT XLenVT = Subtarget.getXLenVT(); |
7698 | |
7699 | // Lower vector SELECTs to VSELECTs by splatting the condition. |
7700 | if (VT.isVector()) { |
7701 | MVT SplatCondVT = VT.changeVectorElementType(EltVT: MVT::i1); |
7702 | SDValue CondSplat = DAG.getSplat(VT: SplatCondVT, DL, Op: CondV); |
7703 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: CondSplat, N2: TrueV, N3: FalseV); |
7704 | } |
7705 | |
7706 | // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ |
7707 | // nodes to implement the SELECT. Performing the lowering here allows for |
7708 | // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless |
7709 | // sequence or RISCVISD::SELECT_CC node (branch-based select). |
7710 | if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) && |
7711 | VT.isScalarInteger()) { |
7712 | // (select c, t, 0) -> (czero_eqz t, c) |
7713 | if (isNullConstant(V: FalseV)) |
7714 | return DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV, N2: CondV); |
7715 | // (select c, 0, f) -> (czero_nez f, c) |
7716 | if (isNullConstant(V: TrueV)) |
7717 | return DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV, N2: CondV); |
7718 | |
7719 | // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c)) |
7720 | if (TrueV.getOpcode() == ISD::AND && |
7721 | (TrueV.getOperand(i: 0) == FalseV || TrueV.getOperand(i: 1) == FalseV)) |
7722 | return DAG.getNode( |
7723 | Opcode: ISD::OR, DL, VT, N1: TrueV, |
7724 | N2: DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV, N2: CondV)); |
7725 | // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x)) |
7726 | if (FalseV.getOpcode() == ISD::AND && |
7727 | (FalseV.getOperand(i: 0) == TrueV || FalseV.getOperand(i: 1) == TrueV)) |
7728 | return DAG.getNode( |
7729 | Opcode: ISD::OR, DL, VT, N1: FalseV, |
7730 | N2: DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV, N2: CondV)); |
7731 | |
7732 | // Try some other optimizations before falling back to generic lowering. |
7733 | if (SDValue V = combineSelectToBinOp(N: Op.getNode(), DAG, Subtarget)) |
7734 | return V; |
7735 | |
7736 | // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1) |
7737 | // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2) |
7738 | if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV)) { |
7739 | const APInt &TrueVal = TrueV->getAsAPIntVal(); |
7740 | const APInt &FalseVal = FalseV->getAsAPIntVal(); |
7741 | const int TrueValCost = RISCVMatInt::getIntMatCost( |
7742 | Val: TrueVal, Size: Subtarget.getXLen(), STI: Subtarget, /*CompressionCost=*/true); |
7743 | const int FalseValCost = RISCVMatInt::getIntMatCost( |
7744 | Val: FalseVal, Size: Subtarget.getXLen(), STI: Subtarget, /*CompressionCost=*/true); |
7745 | bool IsCZERO_NEZ = TrueValCost <= FalseValCost; |
7746 | SDValue LHSVal = DAG.getConstant( |
7747 | Val: IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT); |
7748 | SDValue RHSVal = |
7749 | DAG.getConstant(Val: IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT); |
7750 | SDValue CMOV = |
7751 | DAG.getNode(Opcode: IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ, |
7752 | DL, VT, N1: LHSVal, N2: CondV); |
7753 | return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CMOV, N2: RHSVal); |
7754 | } |
7755 | |
7756 | // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c)) |
7757 | // Unless we have the short forward branch optimization. |
7758 | if (!Subtarget.hasConditionalMoveFusion()) |
7759 | return DAG.getNode( |
7760 | Opcode: ISD::OR, DL, VT, |
7761 | N1: DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV, N2: CondV), |
7762 | N2: DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV, N2: CondV)); |
7763 | } |
7764 | |
7765 | if (SDValue V = combineSelectToBinOp(N: Op.getNode(), DAG, Subtarget)) |
7766 | return V; |
7767 | |
7768 | if (Op.hasOneUse()) { |
7769 | unsigned UseOpc = Op->use_begin()->getOpcode(); |
7770 | if (isBinOp(Opcode: UseOpc) && DAG.isSafeToSpeculativelyExecute(Opcode: UseOpc)) { |
7771 | SDNode *BinOp = *Op->use_begin(); |
7772 | if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(BO: *Op->use_begin(), |
7773 | DAG, Subtarget)) { |
7774 | DAG.ReplaceAllUsesWith(From: BinOp, To: &NewSel); |
7775 | // Opcode check is necessary because foldBinOpIntoSelectIfProfitable |
7776 | // may return a constant node and cause crash in lowerSELECT. |
7777 | if (NewSel.getOpcode() == ISD::SELECT) |
7778 | return lowerSELECT(Op: NewSel, DAG); |
7779 | return NewSel; |
7780 | } |
7781 | } |
7782 | } |
7783 | |
7784 | // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc)) |
7785 | // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1))) |
7786 | const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(Val&: TrueV); |
7787 | const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(Val&: FalseV); |
7788 | if (FPTV && FPFV) { |
7789 | if (FPTV->isExactlyValue(V: 1.0) && FPFV->isExactlyValue(V: 0.0)) |
7790 | return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: CondV); |
7791 | if (FPTV->isExactlyValue(V: 0.0) && FPFV->isExactlyValue(V: 1.0)) { |
7792 | SDValue XOR = DAG.getNode(Opcode: ISD::XOR, DL, VT: XLenVT, N1: CondV, |
7793 | N2: DAG.getConstant(Val: 1, DL, VT: XLenVT)); |
7794 | return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: XOR); |
7795 | } |
7796 | } |
7797 | |
7798 | // If the condition is not an integer SETCC which operates on XLenVT, we need |
7799 | // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.: |
7800 | // (select condv, truev, falsev) |
7801 | // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) |
7802 | if (CondV.getOpcode() != ISD::SETCC || |
7803 | CondV.getOperand(i: 0).getSimpleValueType() != XLenVT) { |
7804 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: XLenVT); |
7805 | SDValue SetNE = DAG.getCondCode(Cond: ISD::SETNE); |
7806 | |
7807 | SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; |
7808 | |
7809 | return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT, Ops); |
7810 | } |
7811 | |
7812 | // If the CondV is the output of a SETCC node which operates on XLenVT inputs, |
7813 | // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take |
7814 | // advantage of the integer compare+branch instructions. i.e.: |
7815 | // (select (setcc lhs, rhs, cc), truev, falsev) |
7816 | // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) |
7817 | SDValue LHS = CondV.getOperand(i: 0); |
7818 | SDValue RHS = CondV.getOperand(i: 1); |
7819 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get(); |
7820 | |
7821 | // Special case for a select of 2 constants that have a diffence of 1. |
7822 | // Normally this is done by DAGCombine, but if the select is introduced by |
7823 | // type legalization or op legalization, we miss it. Restricting to SETLT |
7824 | // case for now because that is what signed saturating add/sub need. |
7825 | // FIXME: We don't need the condition to be SETLT or even a SETCC, |
7826 | // but we would probably want to swap the true/false values if the condition |
7827 | // is SETGE/SETLE to avoid an XORI. |
7828 | if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV) && |
7829 | CCVal == ISD::SETLT) { |
7830 | const APInt &TrueVal = TrueV->getAsAPIntVal(); |
7831 | const APInt &FalseVal = FalseV->getAsAPIntVal(); |
7832 | if (TrueVal - 1 == FalseVal) |
7833 | return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: FalseV); |
7834 | if (TrueVal + 1 == FalseVal) |
7835 | return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: FalseV, N2: CondV); |
7836 | } |
7837 | |
7838 | translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG); |
7839 | // 1 < x ? x : 1 -> 0 < x ? x : 1 |
7840 | if (isOneConstant(V: LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) && |
7841 | RHS == TrueV && LHS == FalseV) { |
7842 | LHS = DAG.getConstant(Val: 0, DL, VT); |
7843 | // 0 <u x is the same as x != 0. |
7844 | if (CCVal == ISD::SETULT) { |
7845 | std::swap(a&: LHS, b&: RHS); |
7846 | CCVal = ISD::SETNE; |
7847 | } |
7848 | } |
7849 | |
7850 | // x <s -1 ? x : -1 -> x <s 0 ? x : -1 |
7851 | if (isAllOnesConstant(V: RHS) && CCVal == ISD::SETLT && LHS == TrueV && |
7852 | RHS == FalseV) { |
7853 | RHS = DAG.getConstant(Val: 0, DL, VT); |
7854 | } |
7855 | |
7856 | SDValue TargetCC = DAG.getCondCode(Cond: CCVal); |
7857 | |
7858 | if (isa<ConstantSDNode>(Val: TrueV) && !isa<ConstantSDNode>(Val: FalseV)) { |
7859 | // (select (setcc lhs, rhs, CC), constant, falsev) |
7860 | // -> (select (setcc lhs, rhs, InverseCC), falsev, constant) |
7861 | std::swap(a&: TrueV, b&: FalseV); |
7862 | TargetCC = DAG.getCondCode(Cond: ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType())); |
7863 | } |
7864 | |
7865 | SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; |
7866 | return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT, Ops); |
7867 | } |
7868 | |
7869 | SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { |
7870 | SDValue CondV = Op.getOperand(i: 1); |
7871 | SDLoc DL(Op); |
7872 | MVT XLenVT = Subtarget.getXLenVT(); |
7873 | |
7874 | if (CondV.getOpcode() == ISD::SETCC && |
7875 | CondV.getOperand(i: 0).getValueType() == XLenVT) { |
7876 | SDValue LHS = CondV.getOperand(i: 0); |
7877 | SDValue RHS = CondV.getOperand(i: 1); |
7878 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get(); |
7879 | |
7880 | translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG); |
7881 | |
7882 | SDValue TargetCC = DAG.getCondCode(Cond: CCVal); |
7883 | return DAG.getNode(Opcode: RISCVISD::BR_CC, DL, VT: Op.getValueType(), N1: Op.getOperand(i: 0), |
7884 | N2: LHS, N3: RHS, N4: TargetCC, N5: Op.getOperand(i: 2)); |
7885 | } |
7886 | |
7887 | return DAG.getNode(Opcode: RISCVISD::BR_CC, DL, VT: Op.getValueType(), N1: Op.getOperand(i: 0), |
7888 | N2: CondV, N3: DAG.getConstant(Val: 0, DL, VT: XLenVT), |
7889 | N4: DAG.getCondCode(Cond: ISD::SETNE), N5: Op.getOperand(i: 2)); |
7890 | } |
7891 | |
7892 | SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { |
7893 | MachineFunction &MF = DAG.getMachineFunction(); |
7894 | RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); |
7895 | |
7896 | SDLoc DL(Op); |
7897 | SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), |
7898 | VT: getPointerTy(DL: MF.getDataLayout())); |
7899 | |
7900 | // vastart just stores the address of the VarArgsFrameIndex slot into the |
7901 | // memory location argument. |
7902 | const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue(); |
7903 | return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: FI, Ptr: Op.getOperand(i: 1), |
7904 | PtrInfo: MachinePointerInfo(SV)); |
7905 | } |
7906 | |
7907 | SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, |
7908 | SelectionDAG &DAG) const { |
7909 | const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); |
7910 | MachineFunction &MF = DAG.getMachineFunction(); |
7911 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
7912 | MFI.setFrameAddressIsTaken(true); |
7913 | Register FrameReg = RI.getFrameRegister(MF); |
7914 | int XLenInBytes = Subtarget.getXLen() / 8; |
7915 | |
7916 | EVT VT = Op.getValueType(); |
7917 | SDLoc DL(Op); |
7918 | SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT); |
7919 | unsigned Depth = Op.getConstantOperandVal(i: 0); |
7920 | while (Depth--) { |
7921 | int Offset = -(XLenInBytes * 2); |
7922 | SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, |
7923 | N2: DAG.getIntPtrConstant(Val: Offset, DL)); |
7924 | FrameAddr = |
7925 | DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo()); |
7926 | } |
7927 | return FrameAddr; |
7928 | } |
7929 | |
7930 | SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, |
7931 | SelectionDAG &DAG) const { |
7932 | const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); |
7933 | MachineFunction &MF = DAG.getMachineFunction(); |
7934 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
7935 | MFI.setReturnAddressIsTaken(true); |
7936 | MVT XLenVT = Subtarget.getXLenVT(); |
7937 | int XLenInBytes = Subtarget.getXLen() / 8; |
7938 | |
7939 | if (verifyReturnAddressArgumentIsConstant(Op, DAG)) |
7940 | return SDValue(); |
7941 | |
7942 | EVT VT = Op.getValueType(); |
7943 | SDLoc DL(Op); |
7944 | unsigned Depth = Op.getConstantOperandVal(i: 0); |
7945 | if (Depth) { |
7946 | int Off = -XLenInBytes; |
7947 | SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); |
7948 | SDValue Offset = DAG.getConstant(Val: Off, DL, VT); |
7949 | return DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), |
7950 | Ptr: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, N2: Offset), |
7951 | PtrInfo: MachinePointerInfo()); |
7952 | } |
7953 | |
7954 | // Return the value of the return address register, marking it an implicit |
7955 | // live-in. |
7956 | Register Reg = MF.addLiveIn(PReg: RI.getRARegister(), RC: getRegClassFor(VT: XLenVT)); |
7957 | return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg, VT: XLenVT); |
7958 | } |
7959 | |
7960 | SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, |
7961 | SelectionDAG &DAG) const { |
7962 | SDLoc DL(Op); |
7963 | SDValue Lo = Op.getOperand(i: 0); |
7964 | SDValue Hi = Op.getOperand(i: 1); |
7965 | SDValue Shamt = Op.getOperand(i: 2); |
7966 | EVT VT = Lo.getValueType(); |
7967 | |
7968 | // if Shamt-XLEN < 0: // Shamt < XLEN |
7969 | // Lo = Lo << Shamt |
7970 | // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) |
7971 | // else: |
7972 | // Lo = 0 |
7973 | // Hi = Lo << (Shamt-XLEN) |
7974 | |
7975 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT); |
7976 | SDValue One = DAG.getConstant(Val: 1, DL, VT); |
7977 | SDValue MinusXLen = DAG.getConstant(Val: -(int)Subtarget.getXLen(), DL, VT); |
7978 | SDValue XLenMinus1 = DAG.getConstant(Val: Subtarget.getXLen() - 1, DL, VT); |
7979 | SDValue ShamtMinusXLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusXLen); |
7980 | SDValue XLenMinus1Shamt = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: XLenMinus1, N2: Shamt); |
7981 | |
7982 | SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt); |
7983 | SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One); |
7984 | SDValue ShiftRightLo = |
7985 | DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: XLenMinus1Shamt); |
7986 | SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt); |
7987 | SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo); |
7988 | SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusXLen); |
7989 | |
7990 | SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusXLen, RHS: Zero, Cond: ISD::SETLT); |
7991 | |
7992 | Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero); |
7993 | Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse); |
7994 | |
7995 | SDValue Parts[2] = {Lo, Hi}; |
7996 | return DAG.getMergeValues(Ops: Parts, dl: DL); |
7997 | } |
7998 | |
7999 | SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, |
8000 | bool IsSRA) const { |
8001 | SDLoc DL(Op); |
8002 | SDValue Lo = Op.getOperand(i: 0); |
8003 | SDValue Hi = Op.getOperand(i: 1); |
8004 | SDValue Shamt = Op.getOperand(i: 2); |
8005 | EVT VT = Lo.getValueType(); |
8006 | |
8007 | // SRA expansion: |
8008 | // if Shamt-XLEN < 0: // Shamt < XLEN |
8009 | // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt)) |
8010 | // Hi = Hi >>s Shamt |
8011 | // else: |
8012 | // Lo = Hi >>s (Shamt-XLEN); |
8013 | // Hi = Hi >>s (XLEN-1) |
8014 | // |
8015 | // SRL expansion: |
8016 | // if Shamt-XLEN < 0: // Shamt < XLEN |
8017 | // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt)) |
8018 | // Hi = Hi >>u Shamt |
8019 | // else: |
8020 | // Lo = Hi >>u (Shamt-XLEN); |
8021 | // Hi = 0; |
8022 | |
8023 | unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; |
8024 | |
8025 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT); |
8026 | SDValue One = DAG.getConstant(Val: 1, DL, VT); |
8027 | SDValue MinusXLen = DAG.getConstant(Val: -(int)Subtarget.getXLen(), DL, VT); |
8028 | SDValue XLenMinus1 = DAG.getConstant(Val: Subtarget.getXLen() - 1, DL, VT); |
8029 | SDValue ShamtMinusXLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusXLen); |
8030 | SDValue XLenMinus1Shamt = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: XLenMinus1, N2: Shamt); |
8031 | |
8032 | SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt); |
8033 | SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One); |
8034 | SDValue ShiftLeftHi = |
8035 | DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: XLenMinus1Shamt); |
8036 | SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi); |
8037 | SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt); |
8038 | SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusXLen); |
8039 | SDValue HiFalse = |
8040 | IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: XLenMinus1) : Zero; |
8041 | |
8042 | SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusXLen, RHS: Zero, Cond: ISD::SETLT); |
8043 | |
8044 | Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse); |
8045 | Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse); |
8046 | |
8047 | SDValue Parts[2] = {Lo, Hi}; |
8048 | return DAG.getMergeValues(Ops: Parts, dl: DL); |
8049 | } |
8050 | |
8051 | // Lower splats of i1 types to SETCC. For each mask vector type, we have a |
8052 | // legal equivalently-sized i8 type, so we can use that as a go-between. |
8053 | SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op, |
8054 | SelectionDAG &DAG) const { |
8055 | SDLoc DL(Op); |
8056 | MVT VT = Op.getSimpleValueType(); |
8057 | SDValue SplatVal = Op.getOperand(i: 0); |
8058 | // All-zeros or all-ones splats are handled specially. |
8059 | if (ISD::isConstantSplatVectorAllOnes(N: Op.getNode())) { |
8060 | SDValue VL = getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget).second; |
8061 | return DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT, Operand: VL); |
8062 | } |
8063 | if (ISD::isConstantSplatVectorAllZeros(N: Op.getNode())) { |
8064 | SDValue VL = getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget).second; |
8065 | return DAG.getNode(Opcode: RISCVISD::VMCLR_VL, DL, VT, Operand: VL); |
8066 | } |
8067 | MVT InterVT = VT.changeVectorElementType(EltVT: MVT::i8); |
8068 | SplatVal = DAG.getNode(Opcode: ISD::AND, DL, VT: SplatVal.getValueType(), N1: SplatVal, |
8069 | N2: DAG.getConstant(Val: 1, DL, VT: SplatVal.getValueType())); |
8070 | SDValue LHS = DAG.getSplatVector(VT: InterVT, DL, Op: SplatVal); |
8071 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: InterVT); |
8072 | return DAG.getSetCC(DL, VT, LHS, RHS: Zero, Cond: ISD::SETNE); |
8073 | } |
8074 | |
8075 | // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is |
8076 | // illegal (currently only vXi64 RV32). |
8077 | // FIXME: We could also catch non-constant sign-extended i32 values and lower |
8078 | // them to VMV_V_X_VL. |
8079 | SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op, |
8080 | SelectionDAG &DAG) const { |
8081 | SDLoc DL(Op); |
8082 | MVT VecVT = Op.getSimpleValueType(); |
8083 | assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && |
8084 | "Unexpected SPLAT_VECTOR_PARTS lowering" ); |
8085 | |
8086 | assert(Op.getNumOperands() == 2 && "Unexpected number of operands!" ); |
8087 | SDValue Lo = Op.getOperand(i: 0); |
8088 | SDValue Hi = Op.getOperand(i: 1); |
8089 | |
8090 | MVT ContainerVT = VecVT; |
8091 | if (VecVT.isFixedLengthVector()) |
8092 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
8093 | |
8094 | auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; |
8095 | |
8096 | SDValue Res = |
8097 | splatPartsI64WithVL(DL, VT: ContainerVT, Passthru: SDValue(), Lo, Hi, VL, DAG); |
8098 | |
8099 | if (VecVT.isFixedLengthVector()) |
8100 | Res = convertFromScalableVector(VT: VecVT, V: Res, DAG, Subtarget); |
8101 | |
8102 | return Res; |
8103 | } |
8104 | |
8105 | // Custom-lower extensions from mask vectors by using a vselect either with 1 |
8106 | // for zero/any-extension or -1 for sign-extension: |
8107 | // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) |
8108 | // Note that any-extension is lowered identically to zero-extension. |
8109 | SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, |
8110 | int64_t ExtTrueVal) const { |
8111 | SDLoc DL(Op); |
8112 | MVT VecVT = Op.getSimpleValueType(); |
8113 | SDValue Src = Op.getOperand(i: 0); |
8114 | // Only custom-lower extensions from mask types |
8115 | assert(Src.getValueType().isVector() && |
8116 | Src.getValueType().getVectorElementType() == MVT::i1); |
8117 | |
8118 | if (VecVT.isScalableVector()) { |
8119 | SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: VecVT); |
8120 | SDValue SplatTrueVal = DAG.getConstant(Val: ExtTrueVal, DL, VT: VecVT); |
8121 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: VecVT, N1: Src, N2: SplatTrueVal, N3: SplatZero); |
8122 | } |
8123 | |
8124 | MVT ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
8125 | MVT I1ContainerVT = |
8126 | MVT::getVectorVT(VT: MVT::i1, EC: ContainerVT.getVectorElementCount()); |
8127 | |
8128 | SDValue CC = convertToScalableVector(VT: I1ContainerVT, V: Src, DAG, Subtarget); |
8129 | |
8130 | SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; |
8131 | |
8132 | MVT XLenVT = Subtarget.getXLenVT(); |
8133 | SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: XLenVT); |
8134 | SDValue SplatTrueVal = DAG.getConstant(Val: ExtTrueVal, DL, VT: XLenVT); |
8135 | |
8136 | SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
8137 | N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatZero, N3: VL); |
8138 | SplatTrueVal = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
8139 | N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatTrueVal, N3: VL); |
8140 | SDValue Select = |
8141 | DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: CC, N2: SplatTrueVal, |
8142 | N3: SplatZero, N4: DAG.getUNDEF(VT: ContainerVT), N5: VL); |
8143 | |
8144 | return convertFromScalableVector(VT: VecVT, V: Select, DAG, Subtarget); |
8145 | } |
8146 | |
8147 | SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( |
8148 | SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { |
8149 | MVT ExtVT = Op.getSimpleValueType(); |
8150 | // Only custom-lower extensions from fixed-length vector types. |
8151 | if (!ExtVT.isFixedLengthVector()) |
8152 | return Op; |
8153 | MVT VT = Op.getOperand(i: 0).getSimpleValueType(); |
8154 | // Grab the canonical container type for the extended type. Infer the smaller |
8155 | // type from that to ensure the same number of vector elements, as we know |
8156 | // the LMUL will be sufficient to hold the smaller type. |
8157 | MVT ContainerExtVT = getContainerForFixedLengthVector(VT: ExtVT); |
8158 | // Get the extended container type manually to ensure the same number of |
8159 | // vector elements between source and dest. |
8160 | MVT ContainerVT = MVT::getVectorVT(VT: VT.getVectorElementType(), |
8161 | EC: ContainerExtVT.getVectorElementCount()); |
8162 | |
8163 | SDValue Op1 = |
8164 | convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: 0), DAG, Subtarget); |
8165 | |
8166 | SDLoc DL(Op); |
8167 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
8168 | |
8169 | SDValue Ext = DAG.getNode(Opcode: ExtendOpc, DL, VT: ContainerExtVT, N1: Op1, N2: Mask, N3: VL); |
8170 | |
8171 | return convertFromScalableVector(VT: ExtVT, V: Ext, DAG, Subtarget); |
8172 | } |
8173 | |
8174 | // Custom-lower truncations from vectors to mask vectors by using a mask and a |
8175 | // setcc operation: |
8176 | // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) |
8177 | SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op, |
8178 | SelectionDAG &DAG) const { |
8179 | bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; |
8180 | SDLoc DL(Op); |
8181 | EVT MaskVT = Op.getValueType(); |
8182 | // Only expect to custom-lower truncations to mask types |
8183 | assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && |
8184 | "Unexpected type for vector mask lowering" ); |
8185 | SDValue Src = Op.getOperand(i: 0); |
8186 | MVT VecVT = Src.getSimpleValueType(); |
8187 | SDValue Mask, VL; |
8188 | if (IsVPTrunc) { |
8189 | Mask = Op.getOperand(i: 1); |
8190 | VL = Op.getOperand(i: 2); |
8191 | } |
8192 | // If this is a fixed vector, we need to convert it to a scalable vector. |
8193 | MVT ContainerVT = VecVT; |
8194 | |
8195 | if (VecVT.isFixedLengthVector()) { |
8196 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
8197 | Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget); |
8198 | if (IsVPTrunc) { |
8199 | MVT MaskContainerVT = |
8200 | getContainerForFixedLengthVector(VT: Mask.getSimpleValueType()); |
8201 | Mask = convertToScalableVector(VT: MaskContainerVT, V: Mask, DAG, Subtarget); |
8202 | } |
8203 | } |
8204 | |
8205 | if (!IsVPTrunc) { |
8206 | std::tie(args&: Mask, args&: VL) = |
8207 | getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
8208 | } |
8209 | |
8210 | SDValue SplatOne = DAG.getConstant(Val: 1, DL, VT: Subtarget.getXLenVT()); |
8211 | SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: Subtarget.getXLenVT()); |
8212 | |
8213 | SplatOne = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
8214 | N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatOne, N3: VL); |
8215 | SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
8216 | N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatZero, N3: VL); |
8217 | |
8218 | MVT MaskContainerVT = ContainerVT.changeVectorElementType(EltVT: MVT::i1); |
8219 | SDValue Trunc = DAG.getNode(Opcode: RISCVISD::AND_VL, DL, VT: ContainerVT, N1: Src, N2: SplatOne, |
8220 | N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL); |
8221 | Trunc = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: MaskContainerVT, |
8222 | Ops: {Trunc, SplatZero, DAG.getCondCode(Cond: ISD::SETNE), |
8223 | DAG.getUNDEF(VT: MaskContainerVT), Mask, VL}); |
8224 | if (MaskVT.isFixedLengthVector()) |
8225 | Trunc = convertFromScalableVector(VT: MaskVT, V: Trunc, DAG, Subtarget); |
8226 | return Trunc; |
8227 | } |
8228 | |
8229 | SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op, |
8230 | SelectionDAG &DAG) const { |
8231 | bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; |
8232 | SDLoc DL(Op); |
8233 | |
8234 | MVT VT = Op.getSimpleValueType(); |
8235 | // Only custom-lower vector truncates |
8236 | assert(VT.isVector() && "Unexpected type for vector truncate lowering" ); |
8237 | |
8238 | // Truncates to mask types are handled differently |
8239 | if (VT.getVectorElementType() == MVT::i1) |
8240 | return lowerVectorMaskTruncLike(Op, DAG); |
8241 | |
8242 | // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary |
8243 | // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which |
8244 | // truncate by one power of two at a time. |
8245 | MVT DstEltVT = VT.getVectorElementType(); |
8246 | |
8247 | SDValue Src = Op.getOperand(i: 0); |
8248 | MVT SrcVT = Src.getSimpleValueType(); |
8249 | MVT SrcEltVT = SrcVT.getVectorElementType(); |
8250 | |
8251 | assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) && |
8252 | isPowerOf2_64(SrcEltVT.getSizeInBits()) && |
8253 | "Unexpected vector truncate lowering" ); |
8254 | |
8255 | MVT ContainerVT = SrcVT; |
8256 | SDValue Mask, VL; |
8257 | if (IsVPTrunc) { |
8258 | Mask = Op.getOperand(i: 1); |
8259 | VL = Op.getOperand(i: 2); |
8260 | } |
8261 | if (SrcVT.isFixedLengthVector()) { |
8262 | ContainerVT = getContainerForFixedLengthVector(VT: SrcVT); |
8263 | Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget); |
8264 | if (IsVPTrunc) { |
8265 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
8266 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
8267 | } |
8268 | } |
8269 | |
8270 | SDValue Result = Src; |
8271 | if (!IsVPTrunc) { |
8272 | std::tie(args&: Mask, args&: VL) = |
8273 | getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget); |
8274 | } |
8275 | |
8276 | LLVMContext &Context = *DAG.getContext(); |
8277 | const ElementCount Count = ContainerVT.getVectorElementCount(); |
8278 | do { |
8279 | SrcEltVT = MVT::getIntegerVT(BitWidth: SrcEltVT.getSizeInBits() / 2); |
8280 | EVT ResultVT = EVT::getVectorVT(Context, VT: SrcEltVT, EC: Count); |
8281 | Result = DAG.getNode(Opcode: RISCVISD::TRUNCATE_VECTOR_VL, DL, VT: ResultVT, N1: Result, |
8282 | N2: Mask, N3: VL); |
8283 | } while (SrcEltVT != DstEltVT); |
8284 | |
8285 | if (SrcVT.isFixedLengthVector()) |
8286 | Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
8287 | |
8288 | return Result; |
8289 | } |
8290 | |
8291 | SDValue |
8292 | RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op, |
8293 | SelectionDAG &DAG) const { |
8294 | SDLoc DL(Op); |
8295 | SDValue Chain = Op.getOperand(i: 0); |
8296 | SDValue Src = Op.getOperand(i: 1); |
8297 | MVT VT = Op.getSimpleValueType(); |
8298 | MVT SrcVT = Src.getSimpleValueType(); |
8299 | MVT ContainerVT = VT; |
8300 | if (VT.isFixedLengthVector()) { |
8301 | MVT SrcContainerVT = getContainerForFixedLengthVector(VT: SrcVT); |
8302 | ContainerVT = |
8303 | SrcContainerVT.changeVectorElementType(EltVT: VT.getVectorElementType()); |
8304 | Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget); |
8305 | } |
8306 | |
8307 | auto [Mask, VL] = getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget); |
8308 | |
8309 | // RVV can only widen/truncate fp to types double/half the size as the source. |
8310 | if ((VT.getVectorElementType() == MVT::f64 && |
8311 | (SrcVT.getVectorElementType() == MVT::f16 || |
8312 | SrcVT.getVectorElementType() == MVT::bf16)) || |
8313 | ((VT.getVectorElementType() == MVT::f16 || |
8314 | VT.getVectorElementType() == MVT::bf16) && |
8315 | SrcVT.getVectorElementType() == MVT::f64)) { |
8316 | // For double rounding, the intermediate rounding should be round-to-odd. |
8317 | unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND |
8318 | ? RISCVISD::STRICT_FP_EXTEND_VL |
8319 | : RISCVISD::STRICT_VFNCVT_ROD_VL; |
8320 | MVT InterVT = ContainerVT.changeVectorElementType(EltVT: MVT::f32); |
8321 | Src = DAG.getNode(Opcode: InterConvOpc, DL, VTList: DAG.getVTList(VT1: InterVT, VT2: MVT::Other), |
8322 | N1: Chain, N2: Src, N3: Mask, N4: VL); |
8323 | Chain = Src.getValue(R: 1); |
8324 | } |
8325 | |
8326 | unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND |
8327 | ? RISCVISD::STRICT_FP_EXTEND_VL |
8328 | : RISCVISD::STRICT_FP_ROUND_VL; |
8329 | SDValue Res = DAG.getNode(Opcode: ConvOpc, DL, VTList: DAG.getVTList(VT1: ContainerVT, VT2: MVT::Other), |
8330 | N1: Chain, N2: Src, N3: Mask, N4: VL); |
8331 | if (VT.isFixedLengthVector()) { |
8332 | // StrictFP operations have two result values. Their lowered result should |
8333 | // have same result count. |
8334 | SDValue SubVec = convertFromScalableVector(VT, V: Res, DAG, Subtarget); |
8335 | Res = DAG.getMergeValues(Ops: {SubVec, Res.getValue(R: 1)}, dl: DL); |
8336 | } |
8337 | return Res; |
8338 | } |
8339 | |
8340 | SDValue |
8341 | RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op, |
8342 | SelectionDAG &DAG) const { |
8343 | bool IsVP = |
8344 | Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND; |
8345 | bool IsExtend = |
8346 | Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND; |
8347 | // RVV can only do truncate fp to types half the size as the source. We |
8348 | // custom-lower f64->f16 rounds via RVV's round-to-odd float |
8349 | // conversion instruction. |
8350 | SDLoc DL(Op); |
8351 | MVT VT = Op.getSimpleValueType(); |
8352 | |
8353 | assert(VT.isVector() && "Unexpected type for vector truncate lowering" ); |
8354 | |
8355 | SDValue Src = Op.getOperand(i: 0); |
8356 | MVT SrcVT = Src.getSimpleValueType(); |
8357 | |
8358 | bool IsDirectExtend = |
8359 | IsExtend && (VT.getVectorElementType() != MVT::f64 || |
8360 | (SrcVT.getVectorElementType() != MVT::f16 && |
8361 | SrcVT.getVectorElementType() != MVT::bf16)); |
8362 | bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 && |
8363 | VT.getVectorElementType() != MVT::bf16) || |
8364 | SrcVT.getVectorElementType() != MVT::f64); |
8365 | |
8366 | bool IsDirectConv = IsDirectExtend || IsDirectTrunc; |
8367 | |
8368 | // Prepare any fixed-length vector operands. |
8369 | MVT ContainerVT = VT; |
8370 | SDValue Mask, VL; |
8371 | if (IsVP) { |
8372 | Mask = Op.getOperand(i: 1); |
8373 | VL = Op.getOperand(i: 2); |
8374 | } |
8375 | if (VT.isFixedLengthVector()) { |
8376 | MVT SrcContainerVT = getContainerForFixedLengthVector(VT: SrcVT); |
8377 | ContainerVT = |
8378 | SrcContainerVT.changeVectorElementType(EltVT: VT.getVectorElementType()); |
8379 | Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget); |
8380 | if (IsVP) { |
8381 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
8382 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
8383 | } |
8384 | } |
8385 | |
8386 | if (!IsVP) |
8387 | std::tie(args&: Mask, args&: VL) = |
8388 | getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget); |
8389 | |
8390 | unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL; |
8391 | |
8392 | if (IsDirectConv) { |
8393 | Src = DAG.getNode(Opcode: ConvOpc, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL); |
8394 | if (VT.isFixedLengthVector()) |
8395 | Src = convertFromScalableVector(VT, V: Src, DAG, Subtarget); |
8396 | return Src; |
8397 | } |
8398 | |
8399 | unsigned InterConvOpc = |
8400 | IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL; |
8401 | |
8402 | MVT InterVT = ContainerVT.changeVectorElementType(EltVT: MVT::f32); |
8403 | SDValue IntermediateConv = |
8404 | DAG.getNode(Opcode: InterConvOpc, DL, VT: InterVT, N1: Src, N2: Mask, N3: VL); |
8405 | SDValue Result = |
8406 | DAG.getNode(Opcode: ConvOpc, DL, VT: ContainerVT, N1: IntermediateConv, N2: Mask, N3: VL); |
8407 | if (VT.isFixedLengthVector()) |
8408 | return convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
8409 | return Result; |
8410 | } |
8411 | |
8412 | // Given a scalable vector type and an index into it, returns the type for the |
8413 | // smallest subvector that the index fits in. This can be used to reduce LMUL |
8414 | // for operations like vslidedown. |
8415 | // |
8416 | // E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32. |
8417 | static std::optional<MVT> |
8418 | getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, |
8419 | const RISCVSubtarget &Subtarget) { |
8420 | assert(VecVT.isScalableVector()); |
8421 | const unsigned EltSize = VecVT.getScalarSizeInBits(); |
8422 | const unsigned VectorBitsMin = Subtarget.getRealMinVLen(); |
8423 | const unsigned MinVLMAX = VectorBitsMin / EltSize; |
8424 | MVT SmallerVT; |
8425 | if (MaxIdx < MinVLMAX) |
8426 | SmallerVT = getLMUL1VT(VT: VecVT); |
8427 | else if (MaxIdx < MinVLMAX * 2) |
8428 | SmallerVT = getLMUL1VT(VT: VecVT).getDoubleNumVectorElementsVT(); |
8429 | else if (MaxIdx < MinVLMAX * 4) |
8430 | SmallerVT = getLMUL1VT(VT: VecVT) |
8431 | .getDoubleNumVectorElementsVT() |
8432 | .getDoubleNumVectorElementsVT(); |
8433 | if (!SmallerVT.isValid() || !VecVT.bitsGT(VT: SmallerVT)) |
8434 | return std::nullopt; |
8435 | return SmallerVT; |
8436 | } |
8437 | |
8438 | // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the |
8439 | // first position of a vector, and that vector is slid up to the insert index. |
8440 | // By limiting the active vector length to index+1 and merging with the |
8441 | // original vector (with an undisturbed tail policy for elements >= VL), we |
8442 | // achieve the desired result of leaving all elements untouched except the one |
8443 | // at VL-1, which is replaced with the desired value. |
8444 | SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, |
8445 | SelectionDAG &DAG) const { |
8446 | SDLoc DL(Op); |
8447 | MVT VecVT = Op.getSimpleValueType(); |
8448 | SDValue Vec = Op.getOperand(i: 0); |
8449 | SDValue Val = Op.getOperand(i: 1); |
8450 | SDValue Idx = Op.getOperand(i: 2); |
8451 | |
8452 | if (VecVT.getVectorElementType() == MVT::i1) { |
8453 | // FIXME: For now we just promote to an i8 vector and insert into that, |
8454 | // but this is probably not optimal. |
8455 | MVT WideVT = MVT::getVectorVT(VT: MVT::i8, EC: VecVT.getVectorElementCount()); |
8456 | Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WideVT, Operand: Vec); |
8457 | Vec = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: WideVT, N1: Vec, N2: Val, N3: Idx); |
8458 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VecVT, Operand: Vec); |
8459 | } |
8460 | |
8461 | MVT ContainerVT = VecVT; |
8462 | // If the operand is a fixed-length vector, convert to a scalable one. |
8463 | if (VecVT.isFixedLengthVector()) { |
8464 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
8465 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
8466 | } |
8467 | |
8468 | // If we know the index we're going to insert at, we can shrink Vec so that |
8469 | // we're performing the scalar inserts and slideup on a smaller LMUL. |
8470 | MVT OrigContainerVT = ContainerVT; |
8471 | SDValue OrigVec = Vec; |
8472 | SDValue AlignedIdx; |
8473 | if (auto *IdxC = dyn_cast<ConstantSDNode>(Val&: Idx)) { |
8474 | const unsigned OrigIdx = IdxC->getZExtValue(); |
8475 | // Do we know an upper bound on LMUL? |
8476 | if (auto ShrunkVT = getSmallestVTForIndex(VecVT: ContainerVT, MaxIdx: OrigIdx, |
8477 | DL, DAG, Subtarget)) { |
8478 | ContainerVT = *ShrunkVT; |
8479 | AlignedIdx = DAG.getVectorIdxConstant(Val: 0, DL); |
8480 | } |
8481 | |
8482 | // If we're compiling for an exact VLEN value, we can always perform |
8483 | // the insert in m1 as we can determine the register corresponding to |
8484 | // the index in the register group. |
8485 | const MVT M1VT = getLMUL1VT(VT: ContainerVT); |
8486 | if (auto VLEN = Subtarget.getRealVLen(); |
8487 | VLEN && ContainerVT.bitsGT(VT: M1VT)) { |
8488 | EVT ElemVT = VecVT.getVectorElementType(); |
8489 | unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits(); |
8490 | unsigned RemIdx = OrigIdx % ElemsPerVReg; |
8491 | unsigned SubRegIdx = OrigIdx / ElemsPerVReg; |
8492 | unsigned = |
8493 | SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue(); |
8494 | AlignedIdx = DAG.getVectorIdxConstant(Val: ExtractIdx, DL); |
8495 | Idx = DAG.getVectorIdxConstant(Val: RemIdx, DL); |
8496 | ContainerVT = M1VT; |
8497 | } |
8498 | |
8499 | if (AlignedIdx) |
8500 | Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec, |
8501 | N2: AlignedIdx); |
8502 | } |
8503 | |
8504 | MVT XLenVT = Subtarget.getXLenVT(); |
8505 | |
8506 | bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64; |
8507 | // Even i64-element vectors on RV32 can be lowered without scalar |
8508 | // legalization if the most-significant 32 bits of the value are not affected |
8509 | // by the sign-extension of the lower 32 bits. |
8510 | // TODO: We could also catch sign extensions of a 32-bit value. |
8511 | if (!IsLegalInsert && isa<ConstantSDNode>(Val)) { |
8512 | const auto *CVal = cast<ConstantSDNode>(Val); |
8513 | if (isInt<32>(x: CVal->getSExtValue())) { |
8514 | IsLegalInsert = true; |
8515 | Val = DAG.getConstant(Val: CVal->getSExtValue(), DL, VT: MVT::i32); |
8516 | } |
8517 | } |
8518 | |
8519 | auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
8520 | |
8521 | SDValue ValInVec; |
8522 | |
8523 | if (IsLegalInsert) { |
8524 | unsigned Opc = |
8525 | VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL; |
8526 | if (isNullConstant(V: Idx)) { |
8527 | if (!VecVT.isFloatingPoint()) |
8528 | Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Val); |
8529 | Vec = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: Vec, N2: Val, N3: VL); |
8530 | |
8531 | if (AlignedIdx) |
8532 | Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: OrigContainerVT, N1: OrigVec, |
8533 | N2: Vec, N3: AlignedIdx); |
8534 | if (!VecVT.isFixedLengthVector()) |
8535 | return Vec; |
8536 | return convertFromScalableVector(VT: VecVT, V: Vec, DAG, Subtarget); |
8537 | } |
8538 | ValInVec = lowerScalarInsert(Scalar: Val, VL, VT: ContainerVT, DL, DAG, Subtarget); |
8539 | } else { |
8540 | // On RV32, i64-element vectors must be specially handled to place the |
8541 | // value at element 0, by using two vslide1down instructions in sequence on |
8542 | // the i32 split lo/hi value. Use an equivalently-sized i32 vector for |
8543 | // this. |
8544 | SDValue ValLo, ValHi; |
8545 | std::tie(args&: ValLo, args&: ValHi) = DAG.SplitScalar(N: Val, DL, LoVT: MVT::i32, HiVT: MVT::i32); |
8546 | MVT I32ContainerVT = |
8547 | MVT::getVectorVT(VT: MVT::i32, EC: ContainerVT.getVectorElementCount() * 2); |
8548 | SDValue I32Mask = |
8549 | getDefaultScalableVLOps(VecVT: I32ContainerVT, DL, DAG, Subtarget).first; |
8550 | // Limit the active VL to two. |
8551 | SDValue InsertI64VL = DAG.getConstant(Val: 2, DL, VT: XLenVT); |
8552 | // If the Idx is 0 we can insert directly into the vector. |
8553 | if (isNullConstant(V: Idx)) { |
8554 | // First slide in the lo value, then the hi in above it. We use slide1down |
8555 | // to avoid the register group overlap constraint of vslide1up. |
8556 | ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT, |
8557 | N1: Vec, N2: Vec, N3: ValLo, N4: I32Mask, N5: InsertI64VL); |
8558 | // If the source vector is undef don't pass along the tail elements from |
8559 | // the previous slide1down. |
8560 | SDValue Tail = Vec.isUndef() ? Vec : ValInVec; |
8561 | ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT, |
8562 | N1: Tail, N2: ValInVec, N3: ValHi, N4: I32Mask, N5: InsertI64VL); |
8563 | // Bitcast back to the right container type. |
8564 | ValInVec = DAG.getBitcast(VT: ContainerVT, V: ValInVec); |
8565 | |
8566 | if (AlignedIdx) |
8567 | ValInVec = |
8568 | DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: OrigContainerVT, N1: OrigVec, |
8569 | N2: ValInVec, N3: AlignedIdx); |
8570 | if (!VecVT.isFixedLengthVector()) |
8571 | return ValInVec; |
8572 | return convertFromScalableVector(VT: VecVT, V: ValInVec, DAG, Subtarget); |
8573 | } |
8574 | |
8575 | // First slide in the lo value, then the hi in above it. We use slide1down |
8576 | // to avoid the register group overlap constraint of vslide1up. |
8577 | ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT, |
8578 | N1: DAG.getUNDEF(VT: I32ContainerVT), |
8579 | N2: DAG.getUNDEF(VT: I32ContainerVT), N3: ValLo, |
8580 | N4: I32Mask, N5: InsertI64VL); |
8581 | ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT, |
8582 | N1: DAG.getUNDEF(VT: I32ContainerVT), N2: ValInVec, N3: ValHi, |
8583 | N4: I32Mask, N5: InsertI64VL); |
8584 | // Bitcast back to the right container type. |
8585 | ValInVec = DAG.getBitcast(VT: ContainerVT, V: ValInVec); |
8586 | } |
8587 | |
8588 | // Now that the value is in a vector, slide it into position. |
8589 | SDValue InsertVL = |
8590 | DAG.getNode(Opcode: ISD::ADD, DL, VT: XLenVT, N1: Idx, N2: DAG.getConstant(Val: 1, DL, VT: XLenVT)); |
8591 | |
8592 | // Use tail agnostic policy if Idx is the last index of Vec. |
8593 | unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; |
8594 | if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Val: Idx) && |
8595 | Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements()) |
8596 | Policy = RISCVII::TAIL_AGNOSTIC; |
8597 | SDValue Slideup = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: Vec, Op: ValInVec, |
8598 | Offset: Idx, Mask, VL: InsertVL, Policy); |
8599 | |
8600 | if (AlignedIdx) |
8601 | Slideup = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: OrigContainerVT, N1: OrigVec, |
8602 | N2: Slideup, N3: AlignedIdx); |
8603 | if (!VecVT.isFixedLengthVector()) |
8604 | return Slideup; |
8605 | return convertFromScalableVector(VT: VecVT, V: Slideup, DAG, Subtarget); |
8606 | } |
8607 | |
8608 | // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then |
8609 | // extract the first element: (extractelt (slidedown vec, idx), 0). For integer |
8610 | // types this is done using VMV_X_S to allow us to glean information about the |
8611 | // sign bits of the result. |
8612 | SDValue RISCVTargetLowering::(SDValue Op, |
8613 | SelectionDAG &DAG) const { |
8614 | SDLoc DL(Op); |
8615 | SDValue Idx = Op.getOperand(i: 1); |
8616 | SDValue Vec = Op.getOperand(i: 0); |
8617 | EVT EltVT = Op.getValueType(); |
8618 | MVT VecVT = Vec.getSimpleValueType(); |
8619 | MVT XLenVT = Subtarget.getXLenVT(); |
8620 | |
8621 | if (VecVT.getVectorElementType() == MVT::i1) { |
8622 | // Use vfirst.m to extract the first bit. |
8623 | if (isNullConstant(V: Idx)) { |
8624 | MVT ContainerVT = VecVT; |
8625 | if (VecVT.isFixedLengthVector()) { |
8626 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
8627 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
8628 | } |
8629 | auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
8630 | SDValue Vfirst = |
8631 | DAG.getNode(Opcode: RISCVISD::VFIRST_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL); |
8632 | SDValue Res = DAG.getSetCC(DL, VT: XLenVT, LHS: Vfirst, |
8633 | RHS: DAG.getConstant(Val: 0, DL, VT: XLenVT), Cond: ISD::SETEQ); |
8634 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: EltVT, Operand: Res); |
8635 | } |
8636 | if (VecVT.isFixedLengthVector()) { |
8637 | unsigned NumElts = VecVT.getVectorNumElements(); |
8638 | if (NumElts >= 8) { |
8639 | MVT WideEltVT; |
8640 | unsigned WidenVecLen; |
8641 | SDValue ; |
8642 | SDValue ; |
8643 | unsigned MaxEEW = Subtarget.getELen(); |
8644 | MVT LargestEltVT = MVT::getIntegerVT( |
8645 | BitWidth: std::min(a: MaxEEW, b: unsigned(XLenVT.getSizeInBits()))); |
8646 | if (NumElts <= LargestEltVT.getSizeInBits()) { |
8647 | assert(isPowerOf2_32(NumElts) && |
8648 | "the number of elements should be power of 2" ); |
8649 | WideEltVT = MVT::getIntegerVT(BitWidth: NumElts); |
8650 | WidenVecLen = 1; |
8651 | ExtractElementIdx = DAG.getConstant(Val: 0, DL, VT: XLenVT); |
8652 | ExtractBitIdx = Idx; |
8653 | } else { |
8654 | WideEltVT = LargestEltVT; |
8655 | WidenVecLen = NumElts / WideEltVT.getSizeInBits(); |
8656 | // extract element index = index / element width |
8657 | ExtractElementIdx = DAG.getNode( |
8658 | Opcode: ISD::SRL, DL, VT: XLenVT, N1: Idx, |
8659 | N2: DAG.getConstant(Val: Log2_64(Value: WideEltVT.getSizeInBits()), DL, VT: XLenVT)); |
8660 | // mask bit index = index % element width |
8661 | ExtractBitIdx = DAG.getNode( |
8662 | Opcode: ISD::AND, DL, VT: XLenVT, N1: Idx, |
8663 | N2: DAG.getConstant(Val: WideEltVT.getSizeInBits() - 1, DL, VT: XLenVT)); |
8664 | } |
8665 | MVT WideVT = MVT::getVectorVT(VT: WideEltVT, NumElements: WidenVecLen); |
8666 | Vec = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: WideVT, Operand: Vec); |
8667 | SDValue = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: XLenVT, |
8668 | N1: Vec, N2: ExtractElementIdx); |
8669 | // Extract the bit from GPR. |
8670 | SDValue ShiftRight = |
8671 | DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, N1: ExtractElt, N2: ExtractBitIdx); |
8672 | SDValue Res = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: ShiftRight, |
8673 | N2: DAG.getConstant(Val: 1, DL, VT: XLenVT)); |
8674 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: EltVT, Operand: Res); |
8675 | } |
8676 | } |
8677 | // Otherwise, promote to an i8 vector and extract from that. |
8678 | MVT WideVT = MVT::getVectorVT(VT: MVT::i8, EC: VecVT.getVectorElementCount()); |
8679 | Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WideVT, Operand: Vec); |
8680 | return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Vec, N2: Idx); |
8681 | } |
8682 | |
8683 | // If this is a fixed vector, we need to convert it to a scalable vector. |
8684 | MVT ContainerVT = VecVT; |
8685 | if (VecVT.isFixedLengthVector()) { |
8686 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
8687 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
8688 | } |
8689 | |
8690 | // If we're compiling for an exact VLEN value and we have a known |
8691 | // constant index, we can always perform the extract in m1 (or |
8692 | // smaller) as we can determine the register corresponding to |
8693 | // the index in the register group. |
8694 | const auto VLen = Subtarget.getRealVLen(); |
8695 | if (auto *IdxC = dyn_cast<ConstantSDNode>(Val&: Idx); |
8696 | IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) { |
8697 | MVT M1VT = getLMUL1VT(VT: ContainerVT); |
8698 | unsigned OrigIdx = IdxC->getZExtValue(); |
8699 | EVT ElemVT = VecVT.getVectorElementType(); |
8700 | unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits(); |
8701 | unsigned RemIdx = OrigIdx % ElemsPerVReg; |
8702 | unsigned SubRegIdx = OrigIdx / ElemsPerVReg; |
8703 | unsigned = |
8704 | SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue(); |
8705 | Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: M1VT, N1: Vec, |
8706 | N2: DAG.getVectorIdxConstant(Val: ExtractIdx, DL)); |
8707 | Idx = DAG.getVectorIdxConstant(Val: RemIdx, DL); |
8708 | ContainerVT = M1VT; |
8709 | } |
8710 | |
8711 | // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which |
8712 | // contains our index. |
8713 | std::optional<uint64_t> MaxIdx; |
8714 | if (VecVT.isFixedLengthVector()) |
8715 | MaxIdx = VecVT.getVectorNumElements() - 1; |
8716 | if (auto *IdxC = dyn_cast<ConstantSDNode>(Val&: Idx)) |
8717 | MaxIdx = IdxC->getZExtValue(); |
8718 | if (MaxIdx) { |
8719 | if (auto SmallerVT = |
8720 | getSmallestVTForIndex(VecVT: ContainerVT, MaxIdx: *MaxIdx, DL, DAG, Subtarget)) { |
8721 | ContainerVT = *SmallerVT; |
8722 | Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec, |
8723 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT)); |
8724 | } |
8725 | } |
8726 | |
8727 | // If after narrowing, the required slide is still greater than LMUL2, |
8728 | // fallback to generic expansion and go through the stack. This is done |
8729 | // for a subtle reason: extracting *all* elements out of a vector is |
8730 | // widely expected to be linear in vector size, but because vslidedown |
8731 | // is linear in LMUL, performing N extracts using vslidedown becomes |
8732 | // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack |
8733 | // seems to have the same problem (the store is linear in LMUL), but the |
8734 | // generic expansion *memoizes* the store, and thus for many extracts of |
8735 | // the same vector we end up with one store and a bunch of loads. |
8736 | // TODO: We don't have the same code for insert_vector_elt because we |
8737 | // have BUILD_VECTOR and handle the degenerate case there. Should we |
8738 | // consider adding an inverse BUILD_VECTOR node? |
8739 | MVT LMUL2VT = getLMUL1VT(VT: ContainerVT).getDoubleNumVectorElementsVT(); |
8740 | if (ContainerVT.bitsGT(VT: LMUL2VT) && VecVT.isFixedLengthVector()) |
8741 | return SDValue(); |
8742 | |
8743 | // If the index is 0, the vector is already in the right position. |
8744 | if (!isNullConstant(V: Idx)) { |
8745 | // Use a VL of 1 to avoid processing more elements than we need. |
8746 | auto [Mask, VL] = getDefaultVLOps(NumElts: 1, ContainerVT, DL, DAG, Subtarget); |
8747 | Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, |
8748 | Merge: DAG.getUNDEF(VT: ContainerVT), Op: Vec, Offset: Idx, Mask, VL); |
8749 | } |
8750 | |
8751 | if (!EltVT.isInteger()) { |
8752 | // Floating-point extracts are handled in TableGen. |
8753 | return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Vec, |
8754 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
8755 | } |
8756 | |
8757 | SDValue Elt0 = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Vec); |
8758 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: EltVT, Operand: Elt0); |
8759 | } |
8760 | |
8761 | // Some RVV intrinsics may claim that they want an integer operand to be |
8762 | // promoted or expanded. |
8763 | static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, |
8764 | const RISCVSubtarget &Subtarget) { |
8765 | assert((Op.getOpcode() == ISD::INTRINSIC_VOID || |
8766 | Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || |
8767 | Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && |
8768 | "Unexpected opcode" ); |
8769 | |
8770 | if (!Subtarget.hasVInstructions()) |
8771 | return SDValue(); |
8772 | |
8773 | bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID || |
8774 | Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; |
8775 | unsigned IntNo = Op.getConstantOperandVal(i: HasChain ? 1 : 0); |
8776 | |
8777 | SDLoc DL(Op); |
8778 | |
8779 | const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = |
8780 | RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntrinsicID: IntNo); |
8781 | if (!II || !II->hasScalarOperand()) |
8782 | return SDValue(); |
8783 | |
8784 | unsigned SplatOp = II->ScalarOperand + 1 + HasChain; |
8785 | assert(SplatOp < Op.getNumOperands()); |
8786 | |
8787 | SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); |
8788 | SDValue &ScalarOp = Operands[SplatOp]; |
8789 | MVT OpVT = ScalarOp.getSimpleValueType(); |
8790 | MVT XLenVT = Subtarget.getXLenVT(); |
8791 | |
8792 | // If this isn't a scalar, or its type is XLenVT we're done. |
8793 | if (!OpVT.isScalarInteger() || OpVT == XLenVT) |
8794 | return SDValue(); |
8795 | |
8796 | // Simplest case is that the operand needs to be promoted to XLenVT. |
8797 | if (OpVT.bitsLT(VT: XLenVT)) { |
8798 | // If the operand is a constant, sign extend to increase our chances |
8799 | // of being able to use a .vi instruction. ANY_EXTEND would become a |
8800 | // a zero extend and the simm5 check in isel would fail. |
8801 | // FIXME: Should we ignore the upper bits in isel instead? |
8802 | unsigned ExtOpc = |
8803 | isa<ConstantSDNode>(Val: ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; |
8804 | ScalarOp = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: ScalarOp); |
8805 | return DAG.getNode(Opcode: Op->getOpcode(), DL, VTList: Op->getVTList(), Ops: Operands); |
8806 | } |
8807 | |
8808 | // Use the previous operand to get the vXi64 VT. The result might be a mask |
8809 | // VT for compares. Using the previous operand assumes that the previous |
8810 | // operand will never have a smaller element size than a scalar operand and |
8811 | // that a widening operation never uses SEW=64. |
8812 | // NOTE: If this fails the below assert, we can probably just find the |
8813 | // element count from any operand or result and use it to construct the VT. |
8814 | assert(II->ScalarOperand > 0 && "Unexpected splat operand!" ); |
8815 | MVT VT = Op.getOperand(i: SplatOp - 1).getSimpleValueType(); |
8816 | |
8817 | // The more complex case is when the scalar is larger than XLenVT. |
8818 | assert(XLenVT == MVT::i32 && OpVT == MVT::i64 && |
8819 | VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!" ); |
8820 | |
8821 | // If this is a sign-extended 32-bit value, we can truncate it and rely on the |
8822 | // instruction to sign-extend since SEW>XLEN. |
8823 | if (DAG.ComputeNumSignBits(Op: ScalarOp) > 32) { |
8824 | ScalarOp = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: ScalarOp); |
8825 | return DAG.getNode(Opcode: Op->getOpcode(), DL, VTList: Op->getVTList(), Ops: Operands); |
8826 | } |
8827 | |
8828 | switch (IntNo) { |
8829 | case Intrinsic::riscv_vslide1up: |
8830 | case Intrinsic::riscv_vslide1down: |
8831 | case Intrinsic::riscv_vslide1up_mask: |
8832 | case Intrinsic::riscv_vslide1down_mask: { |
8833 | // We need to special case these when the scalar is larger than XLen. |
8834 | unsigned NumOps = Op.getNumOperands(); |
8835 | bool IsMasked = NumOps == 7; |
8836 | |
8837 | // Convert the vector source to the equivalent nxvXi32 vector. |
8838 | MVT I32VT = MVT::getVectorVT(VT: MVT::i32, EC: VT.getVectorElementCount() * 2); |
8839 | SDValue Vec = DAG.getBitcast(VT: I32VT, V: Operands[2]); |
8840 | SDValue ScalarLo, ScalarHi; |
8841 | std::tie(args&: ScalarLo, args&: ScalarHi) = |
8842 | DAG.SplitScalar(N: ScalarOp, DL, LoVT: MVT::i32, HiVT: MVT::i32); |
8843 | |
8844 | // Double the VL since we halved SEW. |
8845 | SDValue AVL = getVLOperand(Op); |
8846 | SDValue I32VL; |
8847 | |
8848 | // Optimize for constant AVL |
8849 | if (isa<ConstantSDNode>(Val: AVL)) { |
8850 | const auto [MinVLMAX, MaxVLMAX] = |
8851 | RISCVTargetLowering::computeVLMAXBounds(VecVT: VT, Subtarget); |
8852 | |
8853 | uint64_t AVLInt = AVL->getAsZExtVal(); |
8854 | if (AVLInt <= MinVLMAX) { |
8855 | I32VL = DAG.getConstant(Val: 2 * AVLInt, DL, VT: XLenVT); |
8856 | } else if (AVLInt >= 2 * MaxVLMAX) { |
8857 | // Just set vl to VLMAX in this situation |
8858 | RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT: I32VT); |
8859 | SDValue LMUL = DAG.getConstant(Val: Lmul, DL, VT: XLenVT); |
8860 | unsigned Sew = RISCVVType::encodeSEW(SEW: I32VT.getScalarSizeInBits()); |
8861 | SDValue SEW = DAG.getConstant(Val: Sew, DL, VT: XLenVT); |
8862 | SDValue SETVLMAX = DAG.getTargetConstant( |
8863 | Val: Intrinsic::riscv_vsetvlimax, DL, VT: MVT::i32); |
8864 | I32VL = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: XLenVT, N1: SETVLMAX, N2: SEW, |
8865 | N3: LMUL); |
8866 | } else { |
8867 | // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl |
8868 | // is related to the hardware implementation. |
8869 | // So let the following code handle |
8870 | } |
8871 | } |
8872 | if (!I32VL) { |
8873 | RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT); |
8874 | SDValue LMUL = DAG.getConstant(Val: Lmul, DL, VT: XLenVT); |
8875 | unsigned Sew = RISCVVType::encodeSEW(SEW: VT.getScalarSizeInBits()); |
8876 | SDValue SEW = DAG.getConstant(Val: Sew, DL, VT: XLenVT); |
8877 | SDValue SETVL = |
8878 | DAG.getTargetConstant(Val: Intrinsic::riscv_vsetvli, DL, VT: MVT::i32); |
8879 | // Using vsetvli instruction to get actually used length which related to |
8880 | // the hardware implementation |
8881 | SDValue VL = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: XLenVT, N1: SETVL, N2: AVL, |
8882 | N3: SEW, N4: LMUL); |
8883 | I32VL = |
8884 | DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: VL, N2: DAG.getConstant(Val: 1, DL, VT: XLenVT)); |
8885 | } |
8886 | |
8887 | SDValue I32Mask = getAllOnesMask(VecVT: I32VT, VL: I32VL, DL, DAG); |
8888 | |
8889 | // Shift the two scalar parts in using SEW=32 slide1up/slide1down |
8890 | // instructions. |
8891 | SDValue Passthru; |
8892 | if (IsMasked) |
8893 | Passthru = DAG.getUNDEF(VT: I32VT); |
8894 | else |
8895 | Passthru = DAG.getBitcast(VT: I32VT, V: Operands[1]); |
8896 | |
8897 | if (IntNo == Intrinsic::riscv_vslide1up || |
8898 | IntNo == Intrinsic::riscv_vslide1up_mask) { |
8899 | Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1UP_VL, DL, VT: I32VT, N1: Passthru, N2: Vec, |
8900 | N3: ScalarHi, N4: I32Mask, N5: I32VL); |
8901 | Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1UP_VL, DL, VT: I32VT, N1: Passthru, N2: Vec, |
8902 | N3: ScalarLo, N4: I32Mask, N5: I32VL); |
8903 | } else { |
8904 | Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32VT, N1: Passthru, N2: Vec, |
8905 | N3: ScalarLo, N4: I32Mask, N5: I32VL); |
8906 | Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32VT, N1: Passthru, N2: Vec, |
8907 | N3: ScalarHi, N4: I32Mask, N5: I32VL); |
8908 | } |
8909 | |
8910 | // Convert back to nxvXi64. |
8911 | Vec = DAG.getBitcast(VT, V: Vec); |
8912 | |
8913 | if (!IsMasked) |
8914 | return Vec; |
8915 | // Apply mask after the operation. |
8916 | SDValue Mask = Operands[NumOps - 3]; |
8917 | SDValue MaskedOff = Operands[1]; |
8918 | // Assume Policy operand is the last operand. |
8919 | uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal(); |
8920 | // We don't need to select maskedoff if it's undef. |
8921 | if (MaskedOff.isUndef()) |
8922 | return Vec; |
8923 | // TAMU |
8924 | if (Policy == RISCVII::TAIL_AGNOSTIC) |
8925 | return DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT, N1: Mask, N2: Vec, N3: MaskedOff, |
8926 | N4: DAG.getUNDEF(VT), N5: AVL); |
8927 | // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma. |
8928 | // It's fine because vmerge does not care mask policy. |
8929 | return DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT, N1: Mask, N2: Vec, N3: MaskedOff, |
8930 | N4: MaskedOff, N5: AVL); |
8931 | } |
8932 | } |
8933 | |
8934 | // We need to convert the scalar to a splat vector. |
8935 | SDValue VL = getVLOperand(Op); |
8936 | assert(VL.getValueType() == XLenVT); |
8937 | ScalarOp = splatSplitI64WithVL(DL, VT, Passthru: SDValue(), Scalar: ScalarOp, VL, DAG); |
8938 | return DAG.getNode(Opcode: Op->getOpcode(), DL, VTList: Op->getVTList(), Ops: Operands); |
8939 | } |
8940 | |
8941 | // Lower the llvm.get.vector.length intrinsic to vsetvli. We only support |
8942 | // scalable vector llvm.get.vector.length for now. |
8943 | // |
8944 | // We need to convert from a scalable VF to a vsetvli with VLMax equal to |
8945 | // (vscale * VF). The vscale and VF are independent of element width. We use |
8946 | // SEW=8 for the vsetvli because it is the only element width that supports all |
8947 | // fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is |
8948 | // (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The |
8949 | // InsertVSETVLI pass can fix up the vtype of the vsetvli if a different |
8950 | // SEW and LMUL are better for the surrounding vector instructions. |
8951 | static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, |
8952 | const RISCVSubtarget &Subtarget) { |
8953 | MVT XLenVT = Subtarget.getXLenVT(); |
8954 | |
8955 | // The smallest LMUL is only valid for the smallest element width. |
8956 | const unsigned ElementWidth = 8; |
8957 | |
8958 | // Determine the VF that corresponds to LMUL 1 for ElementWidth. |
8959 | unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth; |
8960 | // We don't support VF==1 with ELEN==32. |
8961 | [[maybe_unused]] unsigned MinVF = |
8962 | RISCV::RVVBitsPerBlock / Subtarget.getELen(); |
8963 | |
8964 | [[maybe_unused]] unsigned VF = N->getConstantOperandVal(Num: 2); |
8965 | assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) && |
8966 | "Unexpected VF" ); |
8967 | |
8968 | bool Fractional = VF < LMul1VF; |
8969 | unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF; |
8970 | unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMUL: LMulVal, Fractional); |
8971 | unsigned VSEW = RISCVVType::encodeSEW(SEW: ElementWidth); |
8972 | |
8973 | SDLoc DL(N); |
8974 | |
8975 | SDValue LMul = DAG.getTargetConstant(Val: VLMUL, DL, VT: XLenVT); |
8976 | SDValue Sew = DAG.getTargetConstant(Val: VSEW, DL, VT: XLenVT); |
8977 | |
8978 | SDValue AVL = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: XLenVT, Operand: N->getOperand(Num: 1)); |
8979 | |
8980 | SDValue ID = DAG.getTargetConstant(Val: Intrinsic::riscv_vsetvli, DL, VT: XLenVT); |
8981 | SDValue Res = |
8982 | DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: XLenVT, N1: ID, N2: AVL, N3: Sew, N4: LMul); |
8983 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: Res); |
8984 | } |
8985 | |
8986 | static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, |
8987 | const RISCVSubtarget &Subtarget) { |
8988 | SDValue Op0 = N->getOperand(Num: 1); |
8989 | MVT OpVT = Op0.getSimpleValueType(); |
8990 | MVT ContainerVT = OpVT; |
8991 | if (OpVT.isFixedLengthVector()) { |
8992 | ContainerVT = getContainerForFixedLengthVector(DAG, VT: OpVT, Subtarget); |
8993 | Op0 = convertToScalableVector(VT: ContainerVT, V: Op0, DAG, Subtarget); |
8994 | } |
8995 | MVT XLenVT = Subtarget.getXLenVT(); |
8996 | SDLoc DL(N); |
8997 | auto [Mask, VL] = getDefaultVLOps(VecVT: OpVT, ContainerVT, DL, DAG, Subtarget); |
8998 | SDValue Res = DAG.getNode(Opcode: RISCVISD::VFIRST_VL, DL, VT: XLenVT, N1: Op0, N2: Mask, N3: VL); |
8999 | if (isOneConstant(V: N->getOperand(Num: 2))) |
9000 | return Res; |
9001 | |
9002 | // Convert -1 to VL. |
9003 | SDValue Setcc = |
9004 | DAG.getSetCC(DL, VT: XLenVT, LHS: Res, RHS: DAG.getConstant(Val: 0, DL, VT: XLenVT), Cond: ISD::SETLT); |
9005 | VL = DAG.getElementCount(DL, VT: XLenVT, EC: OpVT.getVectorElementCount()); |
9006 | return DAG.getSelect(DL, VT: XLenVT, Cond: Setcc, LHS: VL, RHS: Res); |
9007 | } |
9008 | |
9009 | static inline void promoteVCIXScalar(const SDValue &Op, |
9010 | SmallVectorImpl<SDValue> &Operands, |
9011 | SelectionDAG &DAG) { |
9012 | const RISCVSubtarget &Subtarget = |
9013 | DAG.getMachineFunction().getSubtarget<RISCVSubtarget>(); |
9014 | |
9015 | bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID || |
9016 | Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; |
9017 | unsigned IntNo = Op.getConstantOperandVal(i: HasChain ? 1 : 0); |
9018 | SDLoc DL(Op); |
9019 | |
9020 | const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = |
9021 | RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntrinsicID: IntNo); |
9022 | if (!II || !II->hasScalarOperand()) |
9023 | return; |
9024 | |
9025 | unsigned SplatOp = II->ScalarOperand + 1; |
9026 | assert(SplatOp < Op.getNumOperands()); |
9027 | |
9028 | SDValue &ScalarOp = Operands[SplatOp]; |
9029 | MVT OpVT = ScalarOp.getSimpleValueType(); |
9030 | MVT XLenVT = Subtarget.getXLenVT(); |
9031 | |
9032 | // The code below is partially copied from lowerVectorIntrinsicScalars. |
9033 | // If this isn't a scalar, or its type is XLenVT we're done. |
9034 | if (!OpVT.isScalarInteger() || OpVT == XLenVT) |
9035 | return; |
9036 | |
9037 | // Manually emit promote operation for scalar operation. |
9038 | if (OpVT.bitsLT(VT: XLenVT)) { |
9039 | unsigned ExtOpc = |
9040 | isa<ConstantSDNode>(Val: ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; |
9041 | ScalarOp = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: ScalarOp); |
9042 | } |
9043 | |
9044 | return; |
9045 | } |
9046 | |
9047 | static void processVCIXOperands(SDValue &OrigOp, |
9048 | SmallVectorImpl<SDValue> &Operands, |
9049 | SelectionDAG &DAG) { |
9050 | promoteVCIXScalar(Op: OrigOp, Operands, DAG); |
9051 | const RISCVSubtarget &Subtarget = |
9052 | DAG.getMachineFunction().getSubtarget<RISCVSubtarget>(); |
9053 | for (SDValue &V : Operands) { |
9054 | EVT ValType = V.getValueType(); |
9055 | if (ValType.isVector() && ValType.isFloatingPoint()) { |
9056 | MVT InterimIVT = |
9057 | MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: ValType.getScalarSizeInBits()), |
9058 | EC: ValType.getVectorElementCount()); |
9059 | V = DAG.getBitcast(VT: InterimIVT, V); |
9060 | } |
9061 | if (ValType.isFixedLengthVector()) { |
9062 | MVT OpContainerVT = getContainerForFixedLengthVector( |
9063 | DAG, VT: V.getSimpleValueType(), Subtarget); |
9064 | V = convertToScalableVector(VT: OpContainerVT, V, DAG, Subtarget); |
9065 | } |
9066 | } |
9067 | } |
9068 | |
9069 | // LMUL * VLEN should be greater than or equal to EGS * SEW |
9070 | static inline bool isValidEGW(int EGS, EVT VT, |
9071 | const RISCVSubtarget &Subtarget) { |
9072 | return (Subtarget.getRealMinVLen() * |
9073 | VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >= |
9074 | EGS * VT.getScalarSizeInBits(); |
9075 | } |
9076 | |
9077 | SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, |
9078 | SelectionDAG &DAG) const { |
9079 | unsigned IntNo = Op.getConstantOperandVal(i: 0); |
9080 | SDLoc DL(Op); |
9081 | MVT XLenVT = Subtarget.getXLenVT(); |
9082 | |
9083 | switch (IntNo) { |
9084 | default: |
9085 | break; // Don't custom lower most intrinsics. |
9086 | case Intrinsic::thread_pointer: { |
9087 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
9088 | return DAG.getRegister(Reg: RISCV::X4, VT: PtrVT); |
9089 | } |
9090 | case Intrinsic::riscv_orc_b: |
9091 | case Intrinsic::riscv_brev8: |
9092 | case Intrinsic::riscv_sha256sig0: |
9093 | case Intrinsic::riscv_sha256sig1: |
9094 | case Intrinsic::riscv_sha256sum0: |
9095 | case Intrinsic::riscv_sha256sum1: |
9096 | case Intrinsic::riscv_sm3p0: |
9097 | case Intrinsic::riscv_sm3p1: { |
9098 | unsigned Opc; |
9099 | switch (IntNo) { |
9100 | case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break; |
9101 | case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break; |
9102 | case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break; |
9103 | case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break; |
9104 | case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break; |
9105 | case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break; |
9106 | case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break; |
9107 | case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break; |
9108 | } |
9109 | |
9110 | if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { |
9111 | SDValue NewOp = |
9112 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 1)); |
9113 | SDValue Res = DAG.getNode(Opcode: Opc, DL, VT: MVT::i64, Operand: NewOp); |
9114 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res); |
9115 | } |
9116 | |
9117 | return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, Operand: Op.getOperand(i: 1)); |
9118 | } |
9119 | case Intrinsic::riscv_sm4ks: |
9120 | case Intrinsic::riscv_sm4ed: { |
9121 | unsigned Opc = |
9122 | IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED; |
9123 | |
9124 | if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { |
9125 | SDValue NewOp0 = |
9126 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 1)); |
9127 | SDValue NewOp1 = |
9128 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 2)); |
9129 | SDValue Res = |
9130 | DAG.getNode(Opcode: Opc, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1, N3: Op.getOperand(i: 3)); |
9131 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res); |
9132 | } |
9133 | |
9134 | return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), |
9135 | N3: Op.getOperand(i: 3)); |
9136 | } |
9137 | case Intrinsic::riscv_zip: |
9138 | case Intrinsic::riscv_unzip: { |
9139 | unsigned Opc = |
9140 | IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP; |
9141 | return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, Operand: Op.getOperand(i: 1)); |
9142 | } |
9143 | case Intrinsic::riscv_mopr: { |
9144 | if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { |
9145 | SDValue NewOp = |
9146 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 1)); |
9147 | SDValue Res = DAG.getNode( |
9148 | Opcode: RISCVISD::MOPR, DL, VT: MVT::i64, N1: NewOp, |
9149 | N2: DAG.getTargetConstant(Val: Op.getConstantOperandVal(i: 2), DL, VT: MVT::i64)); |
9150 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res); |
9151 | } |
9152 | return DAG.getNode(Opcode: RISCVISD::MOPR, DL, VT: XLenVT, N1: Op.getOperand(i: 1), |
9153 | N2: Op.getOperand(i: 2)); |
9154 | } |
9155 | |
9156 | case Intrinsic::riscv_moprr: { |
9157 | if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { |
9158 | SDValue NewOp0 = |
9159 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 1)); |
9160 | SDValue NewOp1 = |
9161 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 2)); |
9162 | SDValue Res = DAG.getNode( |
9163 | Opcode: RISCVISD::MOPRR, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1, |
9164 | N3: DAG.getTargetConstant(Val: Op.getConstantOperandVal(i: 3), DL, VT: MVT::i64)); |
9165 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res); |
9166 | } |
9167 | return DAG.getNode(Opcode: RISCVISD::MOPRR, DL, VT: XLenVT, N1: Op.getOperand(i: 1), |
9168 | N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
9169 | } |
9170 | case Intrinsic::riscv_clmul: |
9171 | if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { |
9172 | SDValue NewOp0 = |
9173 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 1)); |
9174 | SDValue NewOp1 = |
9175 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 2)); |
9176 | SDValue Res = DAG.getNode(Opcode: RISCVISD::CLMUL, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1); |
9177 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res); |
9178 | } |
9179 | return DAG.getNode(Opcode: RISCVISD::CLMUL, DL, VT: XLenVT, N1: Op.getOperand(i: 1), |
9180 | N2: Op.getOperand(i: 2)); |
9181 | case Intrinsic::riscv_clmulh: |
9182 | case Intrinsic::riscv_clmulr: { |
9183 | unsigned Opc = |
9184 | IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR; |
9185 | if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { |
9186 | SDValue NewOp0 = |
9187 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 1)); |
9188 | SDValue NewOp1 = |
9189 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 2)); |
9190 | NewOp0 = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, N1: NewOp0, |
9191 | N2: DAG.getConstant(Val: 32, DL, VT: MVT::i64)); |
9192 | NewOp1 = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, N1: NewOp1, |
9193 | N2: DAG.getConstant(Val: 32, DL, VT: MVT::i64)); |
9194 | SDValue Res = DAG.getNode(Opcode: Opc, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1); |
9195 | Res = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: Res, |
9196 | N2: DAG.getConstant(Val: 32, DL, VT: MVT::i64)); |
9197 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res); |
9198 | } |
9199 | |
9200 | return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
9201 | } |
9202 | case Intrinsic::experimental_get_vector_length: |
9203 | return lowerGetVectorLength(N: Op.getNode(), DAG, Subtarget); |
9204 | case Intrinsic::experimental_cttz_elts: |
9205 | return lowerCttzElts(N: Op.getNode(), DAG, Subtarget); |
9206 | case Intrinsic::riscv_vmv_x_s: { |
9207 | SDValue Res = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Op.getOperand(i: 1)); |
9208 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Op.getValueType(), Operand: Res); |
9209 | } |
9210 | case Intrinsic::riscv_vfmv_f_s: |
9211 | return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: Op.getValueType(), |
9212 | N1: Op.getOperand(i: 1), N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
9213 | case Intrinsic::riscv_vmv_v_x: |
9214 | return lowerScalarSplat(Passthru: Op.getOperand(i: 1), Scalar: Op.getOperand(i: 2), |
9215 | VL: Op.getOperand(i: 3), VT: Op.getSimpleValueType(), DL, DAG, |
9216 | Subtarget); |
9217 | case Intrinsic::riscv_vfmv_v_f: |
9218 | return DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT: Op.getValueType(), |
9219 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
9220 | case Intrinsic::riscv_vmv_s_x: { |
9221 | SDValue Scalar = Op.getOperand(i: 2); |
9222 | |
9223 | if (Scalar.getValueType().bitsLE(VT: XLenVT)) { |
9224 | Scalar = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Scalar); |
9225 | return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT: Op.getValueType(), |
9226 | N1: Op.getOperand(i: 1), N2: Scalar, N3: Op.getOperand(i: 3)); |
9227 | } |
9228 | |
9229 | assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!" ); |
9230 | |
9231 | // This is an i64 value that lives in two scalar registers. We have to |
9232 | // insert this in a convoluted way. First we build vXi64 splat containing |
9233 | // the two values that we assemble using some bit math. Next we'll use |
9234 | // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask |
9235 | // to merge element 0 from our splat into the source vector. |
9236 | // FIXME: This is probably not the best way to do this, but it is |
9237 | // consistent with INSERT_VECTOR_ELT lowering so it is a good starting |
9238 | // point. |
9239 | // sw lo, (a0) |
9240 | // sw hi, 4(a0) |
9241 | // vlse vX, (a0) |
9242 | // |
9243 | // vid.v vVid |
9244 | // vmseq.vx mMask, vVid, 0 |
9245 | // vmerge.vvm vDest, vSrc, vVal, mMask |
9246 | MVT VT = Op.getSimpleValueType(); |
9247 | SDValue Vec = Op.getOperand(i: 1); |
9248 | SDValue VL = getVLOperand(Op); |
9249 | |
9250 | SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Passthru: SDValue(), Scalar, VL, DAG); |
9251 | if (Op.getOperand(i: 1).isUndef()) |
9252 | return SplattedVal; |
9253 | SDValue SplattedIdx = |
9254 | DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: DAG.getUNDEF(VT), |
9255 | N2: DAG.getConstant(Val: 0, DL, VT: MVT::i32), N3: VL); |
9256 | |
9257 | MVT MaskVT = getMaskTypeFor(VecVT: VT); |
9258 | SDValue Mask = getAllOnesMask(VecVT: VT, VL, DL, DAG); |
9259 | SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT, N1: Mask, N2: VL); |
9260 | SDValue SelectCond = |
9261 | DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: MaskVT, |
9262 | Ops: {VID, SplattedIdx, DAG.getCondCode(Cond: ISD::SETEQ), |
9263 | DAG.getUNDEF(VT: MaskVT), Mask, VL}); |
9264 | return DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT, N1: SelectCond, N2: SplattedVal, |
9265 | N3: Vec, N4: DAG.getUNDEF(VT), N5: VL); |
9266 | } |
9267 | case Intrinsic::riscv_vfmv_s_f: |
9268 | return DAG.getNode(Opcode: RISCVISD::VFMV_S_F_VL, DL, VT: Op.getSimpleValueType(), |
9269 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
9270 | // EGS * EEW >= 128 bits |
9271 | case Intrinsic::riscv_vaesdf_vv: |
9272 | case Intrinsic::riscv_vaesdf_vs: |
9273 | case Intrinsic::riscv_vaesdm_vv: |
9274 | case Intrinsic::riscv_vaesdm_vs: |
9275 | case Intrinsic::riscv_vaesef_vv: |
9276 | case Intrinsic::riscv_vaesef_vs: |
9277 | case Intrinsic::riscv_vaesem_vv: |
9278 | case Intrinsic::riscv_vaesem_vs: |
9279 | case Intrinsic::riscv_vaeskf1: |
9280 | case Intrinsic::riscv_vaeskf2: |
9281 | case Intrinsic::riscv_vaesz_vs: |
9282 | case Intrinsic::riscv_vsm4k: |
9283 | case Intrinsic::riscv_vsm4r_vv: |
9284 | case Intrinsic::riscv_vsm4r_vs: { |
9285 | if (!isValidEGW(EGS: 4, VT: Op.getSimpleValueType(), Subtarget) || |
9286 | !isValidEGW(EGS: 4, VT: Op->getOperand(Num: 1).getSimpleValueType(), Subtarget) || |
9287 | !isValidEGW(EGS: 4, VT: Op->getOperand(Num: 2).getSimpleValueType(), Subtarget)) |
9288 | report_fatal_error(reason: "EGW should be greater than or equal to 4 * SEW." ); |
9289 | return Op; |
9290 | } |
9291 | // EGS * EEW >= 256 bits |
9292 | case Intrinsic::riscv_vsm3c: |
9293 | case Intrinsic::riscv_vsm3me: { |
9294 | if (!isValidEGW(EGS: 8, VT: Op.getSimpleValueType(), Subtarget) || |
9295 | !isValidEGW(EGS: 8, VT: Op->getOperand(Num: 1).getSimpleValueType(), Subtarget)) |
9296 | report_fatal_error(reason: "EGW should be greater than or equal to 8 * SEW." ); |
9297 | return Op; |
9298 | } |
9299 | // zvknha(SEW=32)/zvknhb(SEW=[32|64]) |
9300 | case Intrinsic::riscv_vsha2ch: |
9301 | case Intrinsic::riscv_vsha2cl: |
9302 | case Intrinsic::riscv_vsha2ms: { |
9303 | if (Op->getSimpleValueType(ResNo: 0).getScalarSizeInBits() == 64 && |
9304 | !Subtarget.hasStdExtZvknhb()) |
9305 | report_fatal_error(reason: "SEW=64 needs Zvknhb to be enabled." ); |
9306 | if (!isValidEGW(EGS: 4, VT: Op.getSimpleValueType(), Subtarget) || |
9307 | !isValidEGW(EGS: 4, VT: Op->getOperand(Num: 1).getSimpleValueType(), Subtarget) || |
9308 | !isValidEGW(EGS: 4, VT: Op->getOperand(Num: 2).getSimpleValueType(), Subtarget)) |
9309 | report_fatal_error(reason: "EGW should be greater than or equal to 4 * SEW." ); |
9310 | return Op; |
9311 | } |
9312 | case Intrinsic::riscv_sf_vc_v_x: |
9313 | case Intrinsic::riscv_sf_vc_v_i: |
9314 | case Intrinsic::riscv_sf_vc_v_xv: |
9315 | case Intrinsic::riscv_sf_vc_v_iv: |
9316 | case Intrinsic::riscv_sf_vc_v_vv: |
9317 | case Intrinsic::riscv_sf_vc_v_fv: |
9318 | case Intrinsic::riscv_sf_vc_v_xvv: |
9319 | case Intrinsic::riscv_sf_vc_v_ivv: |
9320 | case Intrinsic::riscv_sf_vc_v_vvv: |
9321 | case Intrinsic::riscv_sf_vc_v_fvv: |
9322 | case Intrinsic::riscv_sf_vc_v_xvw: |
9323 | case Intrinsic::riscv_sf_vc_v_ivw: |
9324 | case Intrinsic::riscv_sf_vc_v_vvw: |
9325 | case Intrinsic::riscv_sf_vc_v_fvw: { |
9326 | MVT VT = Op.getSimpleValueType(); |
9327 | |
9328 | SmallVector<SDValue> Operands{Op->op_values()}; |
9329 | processVCIXOperands(OrigOp&: Op, Operands, DAG); |
9330 | |
9331 | MVT RetVT = VT; |
9332 | if (VT.isFixedLengthVector()) |
9333 | RetVT = getContainerForFixedLengthVector(VT); |
9334 | else if (VT.isFloatingPoint()) |
9335 | RetVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: VT.getScalarSizeInBits()), |
9336 | EC: VT.getVectorElementCount()); |
9337 | |
9338 | SDValue NewNode = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: RetVT, Ops: Operands); |
9339 | |
9340 | if (VT.isFixedLengthVector()) |
9341 | NewNode = convertFromScalableVector(VT, V: NewNode, DAG, Subtarget); |
9342 | else if (VT.isFloatingPoint()) |
9343 | NewNode = DAG.getBitcast(VT, V: NewNode); |
9344 | |
9345 | if (Op == NewNode) |
9346 | break; |
9347 | |
9348 | return NewNode; |
9349 | } |
9350 | } |
9351 | |
9352 | return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); |
9353 | } |
9354 | |
9355 | static inline SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, |
9356 | unsigned Type) { |
9357 | SDLoc DL(Op); |
9358 | SmallVector<SDValue> Operands{Op->op_values()}; |
9359 | Operands.erase(CI: Operands.begin() + 1); |
9360 | |
9361 | const RISCVSubtarget &Subtarget = |
9362 | DAG.getMachineFunction().getSubtarget<RISCVSubtarget>(); |
9363 | MVT VT = Op.getSimpleValueType(); |
9364 | MVT RetVT = VT; |
9365 | MVT FloatVT = VT; |
9366 | |
9367 | if (VT.isFloatingPoint()) { |
9368 | RetVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: VT.getScalarSizeInBits()), |
9369 | EC: VT.getVectorElementCount()); |
9370 | FloatVT = RetVT; |
9371 | } |
9372 | if (VT.isFixedLengthVector()) |
9373 | RetVT = getContainerForFixedLengthVector(TLI: DAG.getTargetLoweringInfo(), VT: RetVT, |
9374 | Subtarget); |
9375 | |
9376 | processVCIXOperands(OrigOp&: Op, Operands, DAG); |
9377 | |
9378 | SDVTList VTs = DAG.getVTList(VTs: {RetVT, MVT::Other}); |
9379 | SDValue NewNode = DAG.getNode(Opcode: Type, DL, VTList: VTs, Ops: Operands); |
9380 | SDValue Chain = NewNode.getValue(R: 1); |
9381 | |
9382 | if (VT.isFixedLengthVector()) |
9383 | NewNode = convertFromScalableVector(VT: FloatVT, V: NewNode, DAG, Subtarget); |
9384 | if (VT.isFloatingPoint()) |
9385 | NewNode = DAG.getBitcast(VT, V: NewNode); |
9386 | |
9387 | NewNode = DAG.getMergeValues(Ops: {NewNode, Chain}, dl: DL); |
9388 | |
9389 | return NewNode; |
9390 | } |
9391 | |
9392 | static inline SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, |
9393 | unsigned Type) { |
9394 | SmallVector<SDValue> Operands{Op->op_values()}; |
9395 | Operands.erase(CI: Operands.begin() + 1); |
9396 | processVCIXOperands(OrigOp&: Op, Operands, DAG); |
9397 | |
9398 | return DAG.getNode(Opcode: Type, DL: SDLoc(Op), VT: Op.getValueType(), Ops: Operands); |
9399 | } |
9400 | |
9401 | SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, |
9402 | SelectionDAG &DAG) const { |
9403 | unsigned IntNo = Op.getConstantOperandVal(i: 1); |
9404 | switch (IntNo) { |
9405 | default: |
9406 | break; |
9407 | case Intrinsic::riscv_masked_strided_load: { |
9408 | SDLoc DL(Op); |
9409 | MVT XLenVT = Subtarget.getXLenVT(); |
9410 | |
9411 | // If the mask is known to be all ones, optimize to an unmasked intrinsic; |
9412 | // the selection of the masked intrinsics doesn't do this for us. |
9413 | SDValue Mask = Op.getOperand(i: 5); |
9414 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()); |
9415 | |
9416 | MVT VT = Op->getSimpleValueType(ResNo: 0); |
9417 | MVT ContainerVT = VT; |
9418 | if (VT.isFixedLengthVector()) |
9419 | ContainerVT = getContainerForFixedLengthVector(VT); |
9420 | |
9421 | SDValue PassThru = Op.getOperand(i: 2); |
9422 | if (!IsUnmasked) { |
9423 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
9424 | if (VT.isFixedLengthVector()) { |
9425 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
9426 | PassThru = convertToScalableVector(VT: ContainerVT, V: PassThru, DAG, Subtarget); |
9427 | } |
9428 | } |
9429 | |
9430 | auto *Load = cast<MemIntrinsicSDNode>(Val&: Op); |
9431 | SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second; |
9432 | SDValue Ptr = Op.getOperand(i: 3); |
9433 | SDValue Stride = Op.getOperand(i: 4); |
9434 | SDValue Result, Chain; |
9435 | |
9436 | // TODO: We restrict this to unmasked loads currently in consideration of |
9437 | // the complexity of handling all falses masks. |
9438 | MVT ScalarVT = ContainerVT.getVectorElementType(); |
9439 | if (IsUnmasked && isNullConstant(V: Stride) && ContainerVT.isInteger()) { |
9440 | SDValue ScalarLoad = |
9441 | DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: DL, VT: XLenVT, Chain: Load->getChain(), Ptr, |
9442 | MemVT: ScalarVT, MMO: Load->getMemOperand()); |
9443 | Chain = ScalarLoad.getValue(R: 1); |
9444 | Result = lowerScalarSplat(Passthru: SDValue(), Scalar: ScalarLoad, VL, VT: ContainerVT, DL, DAG, |
9445 | Subtarget); |
9446 | } else if (IsUnmasked && isNullConstant(V: Stride) && isTypeLegal(VT: ScalarVT)) { |
9447 | SDValue ScalarLoad = DAG.getLoad(VT: ScalarVT, dl: DL, Chain: Load->getChain(), Ptr, |
9448 | MMO: Load->getMemOperand()); |
9449 | Chain = ScalarLoad.getValue(R: 1); |
9450 | Result = DAG.getSplat(VT: ContainerVT, DL, Op: ScalarLoad); |
9451 | } else { |
9452 | SDValue IntID = DAG.getTargetConstant( |
9453 | Val: IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL, |
9454 | VT: XLenVT); |
9455 | |
9456 | SmallVector<SDValue, 8> Ops{Load->getChain(), IntID}; |
9457 | if (IsUnmasked) |
9458 | Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT)); |
9459 | else |
9460 | Ops.push_back(Elt: PassThru); |
9461 | Ops.push_back(Elt: Ptr); |
9462 | Ops.push_back(Elt: Stride); |
9463 | if (!IsUnmasked) |
9464 | Ops.push_back(Elt: Mask); |
9465 | Ops.push_back(Elt: VL); |
9466 | if (!IsUnmasked) { |
9467 | SDValue Policy = |
9468 | DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT); |
9469 | Ops.push_back(Elt: Policy); |
9470 | } |
9471 | |
9472 | SDVTList VTs = DAG.getVTList(VTs: {ContainerVT, MVT::Other}); |
9473 | Result = |
9474 | DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, |
9475 | MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand()); |
9476 | Chain = Result.getValue(R: 1); |
9477 | } |
9478 | if (VT.isFixedLengthVector()) |
9479 | Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
9480 | return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL); |
9481 | } |
9482 | case Intrinsic::riscv_seg2_load: |
9483 | case Intrinsic::riscv_seg3_load: |
9484 | case Intrinsic::riscv_seg4_load: |
9485 | case Intrinsic::riscv_seg5_load: |
9486 | case Intrinsic::riscv_seg6_load: |
9487 | case Intrinsic::riscv_seg7_load: |
9488 | case Intrinsic::riscv_seg8_load: { |
9489 | SDLoc DL(Op); |
9490 | static const Intrinsic::ID VlsegInts[7] = { |
9491 | Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, |
9492 | Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, |
9493 | Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, |
9494 | Intrinsic::riscv_vlseg8}; |
9495 | unsigned NF = Op->getNumValues() - 1; |
9496 | assert(NF >= 2 && NF <= 8 && "Unexpected seg number" ); |
9497 | MVT XLenVT = Subtarget.getXLenVT(); |
9498 | MVT VT = Op->getSimpleValueType(ResNo: 0); |
9499 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
9500 | |
9501 | SDValue VL = getVLOp(NumElts: VT.getVectorNumElements(), ContainerVT, DL, DAG, |
9502 | Subtarget); |
9503 | SDValue IntID = DAG.getTargetConstant(Val: VlsegInts[NF - 2], DL, VT: XLenVT); |
9504 | auto *Load = cast<MemIntrinsicSDNode>(Val&: Op); |
9505 | SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT); |
9506 | ContainerVTs.push_back(Elt: MVT::Other); |
9507 | SDVTList VTs = DAG.getVTList(VTs: ContainerVTs); |
9508 | SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID}; |
9509 | Ops.insert(I: Ops.end(), NumToInsert: NF, Elt: DAG.getUNDEF(VT: ContainerVT)); |
9510 | Ops.push_back(Elt: Op.getOperand(i: 2)); |
9511 | Ops.push_back(Elt: VL); |
9512 | SDValue Result = |
9513 | DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, |
9514 | MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand()); |
9515 | SmallVector<SDValue, 9> Results; |
9516 | for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) |
9517 | Results.push_back(Elt: convertFromScalableVector(VT, V: Result.getValue(R: RetIdx), |
9518 | DAG, Subtarget)); |
9519 | Results.push_back(Elt: Result.getValue(R: NF)); |
9520 | return DAG.getMergeValues(Ops: Results, dl: DL); |
9521 | } |
9522 | case Intrinsic::riscv_sf_vc_v_x_se: |
9523 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_X_SE); |
9524 | case Intrinsic::riscv_sf_vc_v_i_se: |
9525 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_I_SE); |
9526 | case Intrinsic::riscv_sf_vc_v_xv_se: |
9527 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_XV_SE); |
9528 | case Intrinsic::riscv_sf_vc_v_iv_se: |
9529 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_IV_SE); |
9530 | case Intrinsic::riscv_sf_vc_v_vv_se: |
9531 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_VV_SE); |
9532 | case Intrinsic::riscv_sf_vc_v_fv_se: |
9533 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_FV_SE); |
9534 | case Intrinsic::riscv_sf_vc_v_xvv_se: |
9535 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_XVV_SE); |
9536 | case Intrinsic::riscv_sf_vc_v_ivv_se: |
9537 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_IVV_SE); |
9538 | case Intrinsic::riscv_sf_vc_v_vvv_se: |
9539 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_VVV_SE); |
9540 | case Intrinsic::riscv_sf_vc_v_fvv_se: |
9541 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_FVV_SE); |
9542 | case Intrinsic::riscv_sf_vc_v_xvw_se: |
9543 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_XVW_SE); |
9544 | case Intrinsic::riscv_sf_vc_v_ivw_se: |
9545 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_IVW_SE); |
9546 | case Intrinsic::riscv_sf_vc_v_vvw_se: |
9547 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_VVW_SE); |
9548 | case Intrinsic::riscv_sf_vc_v_fvw_se: |
9549 | return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_FVW_SE); |
9550 | } |
9551 | |
9552 | return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); |
9553 | } |
9554 | |
9555 | SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, |
9556 | SelectionDAG &DAG) const { |
9557 | unsigned IntNo = Op.getConstantOperandVal(i: 1); |
9558 | switch (IntNo) { |
9559 | default: |
9560 | break; |
9561 | case Intrinsic::riscv_masked_strided_store: { |
9562 | SDLoc DL(Op); |
9563 | MVT XLenVT = Subtarget.getXLenVT(); |
9564 | |
9565 | // If the mask is known to be all ones, optimize to an unmasked intrinsic; |
9566 | // the selection of the masked intrinsics doesn't do this for us. |
9567 | SDValue Mask = Op.getOperand(i: 5); |
9568 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()); |
9569 | |
9570 | SDValue Val = Op.getOperand(i: 2); |
9571 | MVT VT = Val.getSimpleValueType(); |
9572 | MVT ContainerVT = VT; |
9573 | if (VT.isFixedLengthVector()) { |
9574 | ContainerVT = getContainerForFixedLengthVector(VT); |
9575 | Val = convertToScalableVector(VT: ContainerVT, V: Val, DAG, Subtarget); |
9576 | } |
9577 | if (!IsUnmasked) { |
9578 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
9579 | if (VT.isFixedLengthVector()) |
9580 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
9581 | } |
9582 | |
9583 | SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second; |
9584 | |
9585 | SDValue IntID = DAG.getTargetConstant( |
9586 | Val: IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL, |
9587 | VT: XLenVT); |
9588 | |
9589 | auto *Store = cast<MemIntrinsicSDNode>(Val&: Op); |
9590 | SmallVector<SDValue, 8> Ops{Store->getChain(), IntID}; |
9591 | Ops.push_back(Elt: Val); |
9592 | Ops.push_back(Elt: Op.getOperand(i: 3)); // Ptr |
9593 | Ops.push_back(Elt: Op.getOperand(i: 4)); // Stride |
9594 | if (!IsUnmasked) |
9595 | Ops.push_back(Elt: Mask); |
9596 | Ops.push_back(Elt: VL); |
9597 | |
9598 | return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl: DL, VTList: Store->getVTList(), |
9599 | Ops, MemVT: Store->getMemoryVT(), |
9600 | MMO: Store->getMemOperand()); |
9601 | } |
9602 | case Intrinsic::riscv_seg2_store: |
9603 | case Intrinsic::riscv_seg3_store: |
9604 | case Intrinsic::riscv_seg4_store: |
9605 | case Intrinsic::riscv_seg5_store: |
9606 | case Intrinsic::riscv_seg6_store: |
9607 | case Intrinsic::riscv_seg7_store: |
9608 | case Intrinsic::riscv_seg8_store: { |
9609 | SDLoc DL(Op); |
9610 | static const Intrinsic::ID VssegInts[] = { |
9611 | Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, |
9612 | Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, |
9613 | Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, |
9614 | Intrinsic::riscv_vsseg8}; |
9615 | // Operands are (chain, int_id, vec*, ptr, vl) |
9616 | unsigned NF = Op->getNumOperands() - 4; |
9617 | assert(NF >= 2 && NF <= 8 && "Unexpected seg number" ); |
9618 | MVT XLenVT = Subtarget.getXLenVT(); |
9619 | MVT VT = Op->getOperand(Num: 2).getSimpleValueType(); |
9620 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
9621 | |
9622 | SDValue VL = getVLOp(NumElts: VT.getVectorNumElements(), ContainerVT, DL, DAG, |
9623 | Subtarget); |
9624 | SDValue IntID = DAG.getTargetConstant(Val: VssegInts[NF - 2], DL, VT: XLenVT); |
9625 | SDValue Ptr = Op->getOperand(Num: NF + 2); |
9626 | |
9627 | auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Val&: Op); |
9628 | SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID}; |
9629 | for (unsigned i = 0; i < NF; i++) |
9630 | Ops.push_back(Elt: convertToScalableVector( |
9631 | VT: ContainerVT, V: FixedIntrinsic->getOperand(Num: 2 + i), DAG, Subtarget)); |
9632 | Ops.append(IL: {Ptr, VL}); |
9633 | |
9634 | return DAG.getMemIntrinsicNode( |
9635 | Opcode: ISD::INTRINSIC_VOID, dl: DL, VTList: DAG.getVTList(VT: MVT::Other), Ops, |
9636 | MemVT: FixedIntrinsic->getMemoryVT(), MMO: FixedIntrinsic->getMemOperand()); |
9637 | } |
9638 | case Intrinsic::riscv_sf_vc_xv_se: |
9639 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_XV_SE); |
9640 | case Intrinsic::riscv_sf_vc_iv_se: |
9641 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_IV_SE); |
9642 | case Intrinsic::riscv_sf_vc_vv_se: |
9643 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_VV_SE); |
9644 | case Intrinsic::riscv_sf_vc_fv_se: |
9645 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_FV_SE); |
9646 | case Intrinsic::riscv_sf_vc_xvv_se: |
9647 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_XVV_SE); |
9648 | case Intrinsic::riscv_sf_vc_ivv_se: |
9649 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_IVV_SE); |
9650 | case Intrinsic::riscv_sf_vc_vvv_se: |
9651 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_VVV_SE); |
9652 | case Intrinsic::riscv_sf_vc_fvv_se: |
9653 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_FVV_SE); |
9654 | case Intrinsic::riscv_sf_vc_xvw_se: |
9655 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_XVW_SE); |
9656 | case Intrinsic::riscv_sf_vc_ivw_se: |
9657 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_IVW_SE); |
9658 | case Intrinsic::riscv_sf_vc_vvw_se: |
9659 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_VVW_SE); |
9660 | case Intrinsic::riscv_sf_vc_fvw_se: |
9661 | return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_FVW_SE); |
9662 | } |
9663 | |
9664 | return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); |
9665 | } |
9666 | |
9667 | static unsigned getRVVReductionOp(unsigned ISDOpcode) { |
9668 | switch (ISDOpcode) { |
9669 | default: |
9670 | llvm_unreachable("Unhandled reduction" ); |
9671 | case ISD::VP_REDUCE_ADD: |
9672 | case ISD::VECREDUCE_ADD: |
9673 | return RISCVISD::VECREDUCE_ADD_VL; |
9674 | case ISD::VP_REDUCE_UMAX: |
9675 | case ISD::VECREDUCE_UMAX: |
9676 | return RISCVISD::VECREDUCE_UMAX_VL; |
9677 | case ISD::VP_REDUCE_SMAX: |
9678 | case ISD::VECREDUCE_SMAX: |
9679 | return RISCVISD::VECREDUCE_SMAX_VL; |
9680 | case ISD::VP_REDUCE_UMIN: |
9681 | case ISD::VECREDUCE_UMIN: |
9682 | return RISCVISD::VECREDUCE_UMIN_VL; |
9683 | case ISD::VP_REDUCE_SMIN: |
9684 | case ISD::VECREDUCE_SMIN: |
9685 | return RISCVISD::VECREDUCE_SMIN_VL; |
9686 | case ISD::VP_REDUCE_AND: |
9687 | case ISD::VECREDUCE_AND: |
9688 | return RISCVISD::VECREDUCE_AND_VL; |
9689 | case ISD::VP_REDUCE_OR: |
9690 | case ISD::VECREDUCE_OR: |
9691 | return RISCVISD::VECREDUCE_OR_VL; |
9692 | case ISD::VP_REDUCE_XOR: |
9693 | case ISD::VECREDUCE_XOR: |
9694 | return RISCVISD::VECREDUCE_XOR_VL; |
9695 | case ISD::VP_REDUCE_FADD: |
9696 | return RISCVISD::VECREDUCE_FADD_VL; |
9697 | case ISD::VP_REDUCE_SEQ_FADD: |
9698 | return RISCVISD::VECREDUCE_SEQ_FADD_VL; |
9699 | case ISD::VP_REDUCE_FMAX: |
9700 | case ISD::VP_REDUCE_FMAXIMUM: |
9701 | return RISCVISD::VECREDUCE_FMAX_VL; |
9702 | case ISD::VP_REDUCE_FMIN: |
9703 | case ISD::VP_REDUCE_FMINIMUM: |
9704 | return RISCVISD::VECREDUCE_FMIN_VL; |
9705 | } |
9706 | |
9707 | } |
9708 | |
9709 | SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op, |
9710 | SelectionDAG &DAG, |
9711 | bool IsVP) const { |
9712 | SDLoc DL(Op); |
9713 | SDValue Vec = Op.getOperand(i: IsVP ? 1 : 0); |
9714 | MVT VecVT = Vec.getSimpleValueType(); |
9715 | assert((Op.getOpcode() == ISD::VECREDUCE_AND || |
9716 | Op.getOpcode() == ISD::VECREDUCE_OR || |
9717 | Op.getOpcode() == ISD::VECREDUCE_XOR || |
9718 | Op.getOpcode() == ISD::VP_REDUCE_AND || |
9719 | Op.getOpcode() == ISD::VP_REDUCE_OR || |
9720 | Op.getOpcode() == ISD::VP_REDUCE_XOR) && |
9721 | "Unexpected reduction lowering" ); |
9722 | |
9723 | MVT XLenVT = Subtarget.getXLenVT(); |
9724 | |
9725 | MVT ContainerVT = VecVT; |
9726 | if (VecVT.isFixedLengthVector()) { |
9727 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
9728 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
9729 | } |
9730 | |
9731 | SDValue Mask, VL; |
9732 | if (IsVP) { |
9733 | Mask = Op.getOperand(i: 2); |
9734 | VL = Op.getOperand(i: 3); |
9735 | } else { |
9736 | std::tie(args&: Mask, args&: VL) = |
9737 | getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
9738 | } |
9739 | |
9740 | unsigned BaseOpc; |
9741 | ISD::CondCode CC; |
9742 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: XLenVT); |
9743 | |
9744 | switch (Op.getOpcode()) { |
9745 | default: |
9746 | llvm_unreachable("Unhandled reduction" ); |
9747 | case ISD::VECREDUCE_AND: |
9748 | case ISD::VP_REDUCE_AND: { |
9749 | // vcpop ~x == 0 |
9750 | SDValue TrueMask = DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: ContainerVT, Operand: VL); |
9751 | Vec = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Vec, N2: TrueMask, N3: VL); |
9752 | Vec = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL); |
9753 | CC = ISD::SETEQ; |
9754 | BaseOpc = ISD::AND; |
9755 | break; |
9756 | } |
9757 | case ISD::VECREDUCE_OR: |
9758 | case ISD::VP_REDUCE_OR: |
9759 | // vcpop x != 0 |
9760 | Vec = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL); |
9761 | CC = ISD::SETNE; |
9762 | BaseOpc = ISD::OR; |
9763 | break; |
9764 | case ISD::VECREDUCE_XOR: |
9765 | case ISD::VP_REDUCE_XOR: { |
9766 | // ((vcpop x) & 1) != 0 |
9767 | SDValue One = DAG.getConstant(Val: 1, DL, VT: XLenVT); |
9768 | Vec = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL); |
9769 | Vec = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: Vec, N2: One); |
9770 | CC = ISD::SETNE; |
9771 | BaseOpc = ISD::XOR; |
9772 | break; |
9773 | } |
9774 | } |
9775 | |
9776 | SDValue SetCC = DAG.getSetCC(DL, VT: XLenVT, LHS: Vec, RHS: Zero, Cond: CC); |
9777 | SetCC = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Op.getValueType(), Operand: SetCC); |
9778 | |
9779 | if (!IsVP) |
9780 | return SetCC; |
9781 | |
9782 | // Now include the start value in the operation. |
9783 | // Note that we must return the start value when no elements are operated |
9784 | // upon. The vcpop instructions we've emitted in each case above will return |
9785 | // 0 for an inactive vector, and so we've already received the neutral value: |
9786 | // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we |
9787 | // can simply include the start value. |
9788 | return DAG.getNode(Opcode: BaseOpc, DL, VT: Op.getValueType(), N1: SetCC, N2: Op.getOperand(i: 0)); |
9789 | } |
9790 | |
9791 | static bool isNonZeroAVL(SDValue AVL) { |
9792 | auto *RegisterAVL = dyn_cast<RegisterSDNode>(Val&: AVL); |
9793 | auto *ImmAVL = dyn_cast<ConstantSDNode>(Val&: AVL); |
9794 | return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) || |
9795 | (ImmAVL && ImmAVL->getZExtValue() >= 1); |
9796 | } |
9797 | |
9798 | /// Helper to lower a reduction sequence of the form: |
9799 | /// scalar = reduce_op vec, scalar_start |
9800 | static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, |
9801 | SDValue StartValue, SDValue Vec, SDValue Mask, |
9802 | SDValue VL, const SDLoc &DL, SelectionDAG &DAG, |
9803 | const RISCVSubtarget &Subtarget) { |
9804 | const MVT VecVT = Vec.getSimpleValueType(); |
9805 | const MVT M1VT = getLMUL1VT(VT: VecVT); |
9806 | const MVT XLenVT = Subtarget.getXLenVT(); |
9807 | const bool NonZeroAVL = isNonZeroAVL(AVL: VL); |
9808 | |
9809 | // The reduction needs an LMUL1 input; do the splat at either LMUL1 |
9810 | // or the original VT if fractional. |
9811 | auto InnerVT = VecVT.bitsLE(VT: M1VT) ? VecVT : M1VT; |
9812 | // We reuse the VL of the reduction to reduce vsetvli toggles if we can |
9813 | // prove it is non-zero. For the AVL=0 case, we need the scalar to |
9814 | // be the result of the reduction operation. |
9815 | auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(Val: 1, DL, VT: XLenVT); |
9816 | SDValue InitialValue = lowerScalarInsert(Scalar: StartValue, VL: InnerVL, VT: InnerVT, DL, |
9817 | DAG, Subtarget); |
9818 | if (M1VT != InnerVT) |
9819 | InitialValue = |
9820 | DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: M1VT, N1: DAG.getUNDEF(VT: M1VT), |
9821 | N2: InitialValue, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
9822 | SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(VT: M1VT) : InitialValue; |
9823 | SDValue Policy = DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT); |
9824 | SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy}; |
9825 | SDValue Reduction = DAG.getNode(Opcode: RVVOpcode, DL, VT: M1VT, Ops); |
9826 | return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ResVT, N1: Reduction, |
9827 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
9828 | } |
9829 | |
9830 | SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, |
9831 | SelectionDAG &DAG) const { |
9832 | SDLoc DL(Op); |
9833 | SDValue Vec = Op.getOperand(i: 0); |
9834 | EVT VecEVT = Vec.getValueType(); |
9835 | |
9836 | unsigned BaseOpc = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Op.getOpcode()); |
9837 | |
9838 | // Due to ordering in legalize types we may have a vector type that needs to |
9839 | // be split. Do that manually so we can get down to a legal type. |
9840 | while (getTypeAction(Context&: *DAG.getContext(), VT: VecEVT) == |
9841 | TargetLowering::TypeSplitVector) { |
9842 | auto [Lo, Hi] = DAG.SplitVector(N: Vec, DL); |
9843 | VecEVT = Lo.getValueType(); |
9844 | Vec = DAG.getNode(Opcode: BaseOpc, DL, VT: VecEVT, N1: Lo, N2: Hi); |
9845 | } |
9846 | |
9847 | // TODO: The type may need to be widened rather than split. Or widened before |
9848 | // it can be split. |
9849 | if (!isTypeLegal(VT: VecEVT)) |
9850 | return SDValue(); |
9851 | |
9852 | MVT VecVT = VecEVT.getSimpleVT(); |
9853 | MVT VecEltVT = VecVT.getVectorElementType(); |
9854 | unsigned RVVOpcode = getRVVReductionOp(ISDOpcode: Op.getOpcode()); |
9855 | |
9856 | MVT ContainerVT = VecVT; |
9857 | if (VecVT.isFixedLengthVector()) { |
9858 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
9859 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
9860 | } |
9861 | |
9862 | auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
9863 | |
9864 | SDValue StartV = DAG.getNeutralElement(Opcode: BaseOpc, DL, VT: VecEltVT, Flags: SDNodeFlags()); |
9865 | switch (BaseOpc) { |
9866 | case ISD::AND: |
9867 | case ISD::OR: |
9868 | case ISD::UMAX: |
9869 | case ISD::UMIN: |
9870 | case ISD::SMAX: |
9871 | case ISD::SMIN: |
9872 | StartV = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: VecEltVT, N1: Vec, |
9873 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
9874 | } |
9875 | return lowerReductionSeq(RVVOpcode, ResVT: Op.getSimpleValueType(), StartValue: StartV, Vec, |
9876 | Mask, VL, DL, DAG, Subtarget); |
9877 | } |
9878 | |
9879 | // Given a reduction op, this function returns the matching reduction opcode, |
9880 | // the vector SDValue and the scalar SDValue required to lower this to a |
9881 | // RISCVISD node. |
9882 | static std::tuple<unsigned, SDValue, SDValue> |
9883 | getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, |
9884 | const RISCVSubtarget &Subtarget) { |
9885 | SDLoc DL(Op); |
9886 | auto Flags = Op->getFlags(); |
9887 | unsigned Opcode = Op.getOpcode(); |
9888 | switch (Opcode) { |
9889 | default: |
9890 | llvm_unreachable("Unhandled reduction" ); |
9891 | case ISD::VECREDUCE_FADD: { |
9892 | // Use positive zero if we can. It is cheaper to materialize. |
9893 | SDValue Zero = |
9894 | DAG.getConstantFP(Val: Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, VT: EltVT); |
9895 | return std::make_tuple(args: RISCVISD::VECREDUCE_FADD_VL, args: Op.getOperand(i: 0), args&: Zero); |
9896 | } |
9897 | case ISD::VECREDUCE_SEQ_FADD: |
9898 | return std::make_tuple(args: RISCVISD::VECREDUCE_SEQ_FADD_VL, args: Op.getOperand(i: 1), |
9899 | args: Op.getOperand(i: 0)); |
9900 | case ISD::VECREDUCE_FMINIMUM: |
9901 | case ISD::VECREDUCE_FMAXIMUM: |
9902 | case ISD::VECREDUCE_FMIN: |
9903 | case ISD::VECREDUCE_FMAX: { |
9904 | SDValue Front = |
9905 | DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Op.getOperand(i: 0), |
9906 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
9907 | unsigned RVVOpc = |
9908 | (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM) |
9909 | ? RISCVISD::VECREDUCE_FMIN_VL |
9910 | : RISCVISD::VECREDUCE_FMAX_VL; |
9911 | return std::make_tuple(args&: RVVOpc, args: Op.getOperand(i: 0), args&: Front); |
9912 | } |
9913 | } |
9914 | } |
9915 | |
9916 | SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, |
9917 | SelectionDAG &DAG) const { |
9918 | SDLoc DL(Op); |
9919 | MVT VecEltVT = Op.getSimpleValueType(); |
9920 | |
9921 | unsigned RVVOpcode; |
9922 | SDValue VectorVal, ScalarVal; |
9923 | std::tie(args&: RVVOpcode, args&: VectorVal, args&: ScalarVal) = |
9924 | getRVVFPReductionOpAndOperands(Op, DAG, EltVT: VecEltVT, Subtarget); |
9925 | MVT VecVT = VectorVal.getSimpleValueType(); |
9926 | |
9927 | MVT ContainerVT = VecVT; |
9928 | if (VecVT.isFixedLengthVector()) { |
9929 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
9930 | VectorVal = convertToScalableVector(VT: ContainerVT, V: VectorVal, DAG, Subtarget); |
9931 | } |
9932 | |
9933 | MVT ResVT = Op.getSimpleValueType(); |
9934 | auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
9935 | SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, StartValue: ScalarVal, Vec: VectorVal, Mask, |
9936 | VL, DL, DAG, Subtarget); |
9937 | if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM && |
9938 | Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM) |
9939 | return Res; |
9940 | |
9941 | if (Op->getFlags().hasNoNaNs()) |
9942 | return Res; |
9943 | |
9944 | // Force output to NaN if any element is Nan. |
9945 | SDValue IsNan = |
9946 | DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(), |
9947 | Ops: {VectorVal, VectorVal, DAG.getCondCode(Cond: ISD::SETNE), |
9948 | DAG.getUNDEF(VT: Mask.getValueType()), Mask, VL}); |
9949 | MVT XLenVT = Subtarget.getXLenVT(); |
9950 | SDValue CPop = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: IsNan, N2: Mask, N3: VL); |
9951 | SDValue NoNaNs = DAG.getSetCC(DL, VT: XLenVT, LHS: CPop, |
9952 | RHS: DAG.getConstant(Val: 0, DL, VT: XLenVT), Cond: ISD::SETEQ); |
9953 | return DAG.getSelect( |
9954 | DL, VT: ResVT, Cond: NoNaNs, LHS: Res, |
9955 | RHS: DAG.getConstantFP(Val: APFloat::getNaN(Sem: DAG.EVTToAPFloatSemantics(VT: ResVT)), DL, |
9956 | VT: ResVT)); |
9957 | } |
9958 | |
9959 | SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op, |
9960 | SelectionDAG &DAG) const { |
9961 | SDLoc DL(Op); |
9962 | unsigned Opc = Op.getOpcode(); |
9963 | SDValue Start = Op.getOperand(i: 0); |
9964 | SDValue Vec = Op.getOperand(i: 1); |
9965 | EVT VecEVT = Vec.getValueType(); |
9966 | MVT XLenVT = Subtarget.getXLenVT(); |
9967 | |
9968 | // TODO: The type may need to be widened rather than split. Or widened before |
9969 | // it can be split. |
9970 | if (!isTypeLegal(VT: VecEVT)) |
9971 | return SDValue(); |
9972 | |
9973 | MVT VecVT = VecEVT.getSimpleVT(); |
9974 | unsigned RVVOpcode = getRVVReductionOp(ISDOpcode: Opc); |
9975 | |
9976 | if (VecVT.isFixedLengthVector()) { |
9977 | auto ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
9978 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
9979 | } |
9980 | |
9981 | SDValue VL = Op.getOperand(i: 3); |
9982 | SDValue Mask = Op.getOperand(i: 2); |
9983 | SDValue Res = |
9984 | lowerReductionSeq(RVVOpcode, ResVT: Op.getSimpleValueType(), StartValue: Op.getOperand(i: 0), |
9985 | Vec, Mask, VL, DL, DAG, Subtarget); |
9986 | if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) || |
9987 | Op->getFlags().hasNoNaNs()) |
9988 | return Res; |
9989 | |
9990 | // Propagate NaNs. |
9991 | MVT PredVT = getMaskTypeFor(VecVT: Vec.getSimpleValueType()); |
9992 | // Check if any of the elements in Vec is NaN. |
9993 | SDValue IsNaN = DAG.getNode( |
9994 | Opcode: RISCVISD::SETCC_VL, DL, VT: PredVT, |
9995 | Ops: {Vec, Vec, DAG.getCondCode(Cond: ISD::SETNE), DAG.getUNDEF(VT: PredVT), Mask, VL}); |
9996 | SDValue VCPop = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: IsNaN, N2: Mask, N3: VL); |
9997 | // Check if the start value is NaN. |
9998 | SDValue StartIsNaN = DAG.getSetCC(DL, VT: XLenVT, LHS: Start, RHS: Start, Cond: ISD::SETUO); |
9999 | VCPop = DAG.getNode(Opcode: ISD::OR, DL, VT: XLenVT, N1: VCPop, N2: StartIsNaN); |
10000 | SDValue NoNaNs = DAG.getSetCC(DL, VT: XLenVT, LHS: VCPop, |
10001 | RHS: DAG.getConstant(Val: 0, DL, VT: XLenVT), Cond: ISD::SETEQ); |
10002 | MVT ResVT = Res.getSimpleValueType(); |
10003 | return DAG.getSelect( |
10004 | DL, VT: ResVT, Cond: NoNaNs, LHS: Res, |
10005 | RHS: DAG.getConstantFP(Val: APFloat::getNaN(Sem: DAG.EVTToAPFloatSemantics(VT: ResVT)), DL, |
10006 | VT: ResVT)); |
10007 | } |
10008 | |
10009 | SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, |
10010 | SelectionDAG &DAG) const { |
10011 | SDValue Vec = Op.getOperand(i: 0); |
10012 | SDValue SubVec = Op.getOperand(i: 1); |
10013 | MVT VecVT = Vec.getSimpleValueType(); |
10014 | MVT SubVecVT = SubVec.getSimpleValueType(); |
10015 | |
10016 | SDLoc DL(Op); |
10017 | MVT XLenVT = Subtarget.getXLenVT(); |
10018 | unsigned OrigIdx = Op.getConstantOperandVal(i: 2); |
10019 | const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
10020 | |
10021 | // We don't have the ability to slide mask vectors up indexed by their i1 |
10022 | // elements; the smallest we can do is i8. Often we are able to bitcast to |
10023 | // equivalent i8 vectors. Note that when inserting a fixed-length vector |
10024 | // into a scalable one, we might not necessarily have enough scalable |
10025 | // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. |
10026 | if (SubVecVT.getVectorElementType() == MVT::i1 && |
10027 | (OrigIdx != 0 || !Vec.isUndef())) { |
10028 | if (VecVT.getVectorMinNumElements() >= 8 && |
10029 | SubVecVT.getVectorMinNumElements() >= 8) { |
10030 | assert(OrigIdx % 8 == 0 && "Invalid index" ); |
10031 | assert(VecVT.getVectorMinNumElements() % 8 == 0 && |
10032 | SubVecVT.getVectorMinNumElements() % 8 == 0 && |
10033 | "Unexpected mask vector lowering" ); |
10034 | OrigIdx /= 8; |
10035 | SubVecVT = |
10036 | MVT::getVectorVT(VT: MVT::i8, NumElements: SubVecVT.getVectorMinNumElements() / 8, |
10037 | IsScalable: SubVecVT.isScalableVector()); |
10038 | VecVT = MVT::getVectorVT(VT: MVT::i8, NumElements: VecVT.getVectorMinNumElements() / 8, |
10039 | IsScalable: VecVT.isScalableVector()); |
10040 | Vec = DAG.getBitcast(VT: VecVT, V: Vec); |
10041 | SubVec = DAG.getBitcast(VT: SubVecVT, V: SubVec); |
10042 | } else { |
10043 | // We can't slide this mask vector up indexed by its i1 elements. |
10044 | // This poses a problem when we wish to insert a scalable vector which |
10045 | // can't be re-expressed as a larger type. Just choose the slow path and |
10046 | // extend to a larger type, then truncate back down. |
10047 | MVT ExtVecVT = VecVT.changeVectorElementType(EltVT: MVT::i8); |
10048 | MVT ExtSubVecVT = SubVecVT.changeVectorElementType(EltVT: MVT::i8); |
10049 | Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVecVT, Operand: Vec); |
10050 | SubVec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtSubVecVT, Operand: SubVec); |
10051 | Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ExtVecVT, N1: Vec, N2: SubVec, |
10052 | N3: Op.getOperand(i: 2)); |
10053 | SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: ExtVecVT); |
10054 | return DAG.getSetCC(DL, VT: VecVT, LHS: Vec, RHS: SplatZero, Cond: ISD::SETNE); |
10055 | } |
10056 | } |
10057 | |
10058 | // If the subvector vector is a fixed-length type and we don't know VLEN |
10059 | // exactly, we cannot use subregister manipulation to simplify the codegen; we |
10060 | // don't know which register of a LMUL group contains the specific subvector |
10061 | // as we only know the minimum register size. Therefore we must slide the |
10062 | // vector group up the full amount. |
10063 | const auto VLen = Subtarget.getRealVLen(); |
10064 | if (SubVecVT.isFixedLengthVector() && !VLen) { |
10065 | if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector()) |
10066 | return Op; |
10067 | MVT ContainerVT = VecVT; |
10068 | if (VecVT.isFixedLengthVector()) { |
10069 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
10070 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
10071 | } |
10072 | |
10073 | if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) { |
10074 | SubVec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ContainerVT, |
10075 | N1: DAG.getUNDEF(VT: ContainerVT), N2: SubVec, |
10076 | N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
10077 | SubVec = convertFromScalableVector(VT: VecVT, V: SubVec, DAG, Subtarget); |
10078 | return DAG.getBitcast(VT: Op.getValueType(), V: SubVec); |
10079 | } |
10080 | |
10081 | SubVec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ContainerVT, |
10082 | N1: DAG.getUNDEF(VT: ContainerVT), N2: SubVec, |
10083 | N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
10084 | SDValue Mask = |
10085 | getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; |
10086 | // Set the vector length to only the number of elements we care about. Note |
10087 | // that for slideup this includes the offset. |
10088 | unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements(); |
10089 | SDValue VL = getVLOp(NumElts: EndIndex, ContainerVT, DL, DAG, Subtarget); |
10090 | |
10091 | // Use tail agnostic policy if we're inserting over Vec's tail. |
10092 | unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; |
10093 | if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements()) |
10094 | Policy = RISCVII::TAIL_AGNOSTIC; |
10095 | |
10096 | // If we're inserting into the lowest elements, use a tail undisturbed |
10097 | // vmv.v.v. |
10098 | if (OrigIdx == 0) { |
10099 | SubVec = |
10100 | DAG.getNode(Opcode: RISCVISD::VMV_V_V_VL, DL, VT: ContainerVT, N1: Vec, N2: SubVec, N3: VL); |
10101 | } else { |
10102 | SDValue SlideupAmt = DAG.getConstant(Val: OrigIdx, DL, VT: XLenVT); |
10103 | SubVec = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: Vec, Op: SubVec, |
10104 | Offset: SlideupAmt, Mask, VL, Policy); |
10105 | } |
10106 | |
10107 | if (VecVT.isFixedLengthVector()) |
10108 | SubVec = convertFromScalableVector(VT: VecVT, V: SubVec, DAG, Subtarget); |
10109 | return DAG.getBitcast(VT: Op.getValueType(), V: SubVec); |
10110 | } |
10111 | |
10112 | MVT ContainerVecVT = VecVT; |
10113 | if (VecVT.isFixedLengthVector()) { |
10114 | ContainerVecVT = getContainerForFixedLengthVector(VT: VecVT); |
10115 | Vec = convertToScalableVector(VT: ContainerVecVT, V: Vec, DAG, Subtarget); |
10116 | } |
10117 | |
10118 | MVT ContainerSubVecVT = SubVecVT; |
10119 | if (SubVecVT.isFixedLengthVector()) { |
10120 | ContainerSubVecVT = getContainerForFixedLengthVector(VT: SubVecVT); |
10121 | SubVec = convertToScalableVector(VT: ContainerSubVecVT, V: SubVec, DAG, Subtarget); |
10122 | } |
10123 | |
10124 | unsigned SubRegIdx; |
10125 | ElementCount RemIdx; |
10126 | // insert_subvector scales the index by vscale if the subvector is scalable, |
10127 | // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if |
10128 | // we have a fixed length subvector, we need to adjust the index by 1/vscale. |
10129 | if (SubVecVT.isFixedLengthVector()) { |
10130 | assert(VLen); |
10131 | unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock; |
10132 | auto Decompose = |
10133 | RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( |
10134 | VecVT: ContainerVecVT, SubVecVT: ContainerSubVecVT, InsertExtractIdx: OrigIdx / Vscale, TRI); |
10135 | SubRegIdx = Decompose.first; |
10136 | RemIdx = ElementCount::getFixed(MinVal: (Decompose.second * Vscale) + |
10137 | (OrigIdx % Vscale)); |
10138 | } else { |
10139 | auto Decompose = |
10140 | RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( |
10141 | VecVT: ContainerVecVT, SubVecVT: ContainerSubVecVT, InsertExtractIdx: OrigIdx, TRI); |
10142 | SubRegIdx = Decompose.first; |
10143 | RemIdx = ElementCount::getScalable(MinVal: Decompose.second); |
10144 | } |
10145 | |
10146 | TypeSize VecRegSize = TypeSize::getScalable(MinimumSize: RISCV::RVVBitsPerBlock); |
10147 | assert(isPowerOf2_64( |
10148 | Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue())); |
10149 | bool ExactlyVecRegSized = |
10150 | Subtarget.expandVScale(X: SubVecVT.getSizeInBits()) |
10151 | .isKnownMultipleOf(RHS: Subtarget.expandVScale(X: VecRegSize)); |
10152 | |
10153 | // 1. If the Idx has been completely eliminated and this subvector's size is |
10154 | // a vector register or a multiple thereof, or the surrounding elements are |
10155 | // undef, then this is a subvector insert which naturally aligns to a vector |
10156 | // register. These can easily be handled using subregister manipulation. |
10157 | // 2. If the subvector isn't an exact multiple of a valid register group size, |
10158 | // then the insertion must preserve the undisturbed elements of the register. |
10159 | // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 |
10160 | // vector type (which resolves to a subregister copy), performing a VSLIDEUP |
10161 | // to place the subvector within the vector register, and an INSERT_SUBVECTOR |
10162 | // of that LMUL=1 type back into the larger vector (resolving to another |
10163 | // subregister operation). See below for how our VSLIDEUP works. We go via a |
10164 | // LMUL=1 type to avoid allocating a large register group to hold our |
10165 | // subvector. |
10166 | if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) { |
10167 | if (SubVecVT.isFixedLengthVector()) { |
10168 | // We may get NoSubRegister if inserting at index 0 and the subvec |
10169 | // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0 |
10170 | if (SubRegIdx == RISCV::NoSubRegister) { |
10171 | assert(OrigIdx == 0); |
10172 | return Op; |
10173 | } |
10174 | |
10175 | SDValue Insert = |
10176 | DAG.getTargetInsertSubreg(SRIdx: SubRegIdx, DL, VT: ContainerVecVT, Operand: Vec, Subreg: SubVec); |
10177 | if (VecVT.isFixedLengthVector()) |
10178 | Insert = convertFromScalableVector(VT: VecVT, V: Insert, DAG, Subtarget); |
10179 | return Insert; |
10180 | } |
10181 | return Op; |
10182 | } |
10183 | |
10184 | // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements |
10185 | // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy |
10186 | // (in our case undisturbed). This means we can set up a subvector insertion |
10187 | // where OFFSET is the insertion offset, and the VL is the OFFSET plus the |
10188 | // size of the subvector. |
10189 | MVT InterSubVT = ContainerVecVT; |
10190 | SDValue = Vec; |
10191 | unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue(); |
10192 | if (SubVecVT.isFixedLengthVector()) |
10193 | AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock; |
10194 | if (ContainerVecVT.bitsGT(VT: getLMUL1VT(VT: ContainerVecVT))) { |
10195 | InterSubVT = getLMUL1VT(VT: ContainerVecVT); |
10196 | // Extract a subvector equal to the nearest full vector register type. This |
10197 | // should resolve to a EXTRACT_SUBREG instruction. |
10198 | AlignedExtract = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: InterSubVT, N1: Vec, |
10199 | N2: DAG.getVectorIdxConstant(Val: AlignedIdx, DL)); |
10200 | } |
10201 | |
10202 | SubVec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: InterSubVT, |
10203 | N1: DAG.getUNDEF(VT: InterSubVT), N2: SubVec, |
10204 | N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
10205 | |
10206 | auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT: ContainerVecVT, DL, DAG, Subtarget); |
10207 | |
10208 | ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount(); |
10209 | VL = DAG.getElementCount(DL, VT: XLenVT, EC: SubVecVT.getVectorElementCount()); |
10210 | |
10211 | // Use tail agnostic policy if we're inserting over InterSubVT's tail. |
10212 | unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; |
10213 | if (Subtarget.expandVScale(X: EndIndex) == |
10214 | Subtarget.expandVScale(X: InterSubVT.getVectorElementCount())) |
10215 | Policy = RISCVII::TAIL_AGNOSTIC; |
10216 | |
10217 | // If we're inserting into the lowest elements, use a tail undisturbed |
10218 | // vmv.v.v. |
10219 | if (RemIdx.isZero()) { |
10220 | SubVec = DAG.getNode(Opcode: RISCVISD::VMV_V_V_VL, DL, VT: InterSubVT, N1: AlignedExtract, |
10221 | N2: SubVec, N3: VL); |
10222 | } else { |
10223 | SDValue SlideupAmt = DAG.getElementCount(DL, VT: XLenVT, EC: RemIdx); |
10224 | |
10225 | // Construct the vector length corresponding to RemIdx + length(SubVecVT). |
10226 | VL = DAG.getNode(Opcode: ISD::ADD, DL, VT: XLenVT, N1: SlideupAmt, N2: VL); |
10227 | |
10228 | SubVec = getVSlideup(DAG, Subtarget, DL, VT: InterSubVT, Merge: AlignedExtract, Op: SubVec, |
10229 | Offset: SlideupAmt, Mask, VL, Policy); |
10230 | } |
10231 | |
10232 | // If required, insert this subvector back into the correct vector register. |
10233 | // This should resolve to an INSERT_SUBREG instruction. |
10234 | if (ContainerVecVT.bitsGT(VT: InterSubVT)) |
10235 | SubVec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ContainerVecVT, N1: Vec, N2: SubVec, |
10236 | N3: DAG.getVectorIdxConstant(Val: AlignedIdx, DL)); |
10237 | |
10238 | if (VecVT.isFixedLengthVector()) |
10239 | SubVec = convertFromScalableVector(VT: VecVT, V: SubVec, DAG, Subtarget); |
10240 | |
10241 | // We might have bitcast from a mask type: cast back to the original type if |
10242 | // required. |
10243 | return DAG.getBitcast(VT: Op.getSimpleValueType(), V: SubVec); |
10244 | } |
10245 | |
10246 | SDValue RISCVTargetLowering::(SDValue Op, |
10247 | SelectionDAG &DAG) const { |
10248 | SDValue Vec = Op.getOperand(i: 0); |
10249 | MVT SubVecVT = Op.getSimpleValueType(); |
10250 | MVT VecVT = Vec.getSimpleValueType(); |
10251 | |
10252 | SDLoc DL(Op); |
10253 | MVT XLenVT = Subtarget.getXLenVT(); |
10254 | unsigned OrigIdx = Op.getConstantOperandVal(i: 1); |
10255 | const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
10256 | |
10257 | // We don't have the ability to slide mask vectors down indexed by their i1 |
10258 | // elements; the smallest we can do is i8. Often we are able to bitcast to |
10259 | // equivalent i8 vectors. Note that when extracting a fixed-length vector |
10260 | // from a scalable one, we might not necessarily have enough scalable |
10261 | // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. |
10262 | if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) { |
10263 | if (VecVT.getVectorMinNumElements() >= 8 && |
10264 | SubVecVT.getVectorMinNumElements() >= 8) { |
10265 | assert(OrigIdx % 8 == 0 && "Invalid index" ); |
10266 | assert(VecVT.getVectorMinNumElements() % 8 == 0 && |
10267 | SubVecVT.getVectorMinNumElements() % 8 == 0 && |
10268 | "Unexpected mask vector lowering" ); |
10269 | OrigIdx /= 8; |
10270 | SubVecVT = |
10271 | MVT::getVectorVT(VT: MVT::i8, NumElements: SubVecVT.getVectorMinNumElements() / 8, |
10272 | IsScalable: SubVecVT.isScalableVector()); |
10273 | VecVT = MVT::getVectorVT(VT: MVT::i8, NumElements: VecVT.getVectorMinNumElements() / 8, |
10274 | IsScalable: VecVT.isScalableVector()); |
10275 | Vec = DAG.getBitcast(VT: VecVT, V: Vec); |
10276 | } else { |
10277 | // We can't slide this mask vector down, indexed by its i1 elements. |
10278 | // This poses a problem when we wish to extract a scalable vector which |
10279 | // can't be re-expressed as a larger type. Just choose the slow path and |
10280 | // extend to a larger type, then truncate back down. |
10281 | // TODO: We could probably improve this when extracting certain fixed |
10282 | // from fixed, where we can extract as i8 and shift the correct element |
10283 | // right to reach the desired subvector? |
10284 | MVT ExtVecVT = VecVT.changeVectorElementType(EltVT: MVT::i8); |
10285 | MVT ExtSubVecVT = SubVecVT.changeVectorElementType(EltVT: MVT::i8); |
10286 | Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVecVT, Operand: Vec); |
10287 | Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ExtSubVecVT, N1: Vec, |
10288 | N2: Op.getOperand(i: 1)); |
10289 | SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: ExtSubVecVT); |
10290 | return DAG.getSetCC(DL, VT: SubVecVT, LHS: Vec, RHS: SplatZero, Cond: ISD::SETNE); |
10291 | } |
10292 | } |
10293 | |
10294 | // With an index of 0 this is a cast-like subvector, which can be performed |
10295 | // with subregister operations. |
10296 | if (OrigIdx == 0) |
10297 | return Op; |
10298 | |
10299 | const auto VLen = Subtarget.getRealVLen(); |
10300 | |
10301 | // If the subvector vector is a fixed-length type and we don't know VLEN |
10302 | // exactly, we cannot use subregister manipulation to simplify the codegen; we |
10303 | // don't know which register of a LMUL group contains the specific subvector |
10304 | // as we only know the minimum register size. Therefore we must slide the |
10305 | // vector group down the full amount. |
10306 | if (SubVecVT.isFixedLengthVector() && !VLen) { |
10307 | MVT ContainerVT = VecVT; |
10308 | if (VecVT.isFixedLengthVector()) { |
10309 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
10310 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
10311 | } |
10312 | |
10313 | // Shrink down Vec so we're performing the slidedown on a smaller LMUL. |
10314 | unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1; |
10315 | if (auto ShrunkVT = |
10316 | getSmallestVTForIndex(VecVT: ContainerVT, MaxIdx: LastIdx, DL, DAG, Subtarget)) { |
10317 | ContainerVT = *ShrunkVT; |
10318 | Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ContainerVT, N1: Vec, |
10319 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
10320 | } |
10321 | |
10322 | SDValue Mask = |
10323 | getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; |
10324 | // Set the vector length to only the number of elements we care about. This |
10325 | // avoids sliding down elements we're going to discard straight away. |
10326 | SDValue VL = getVLOp(NumElts: SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG, |
10327 | Subtarget); |
10328 | SDValue SlidedownAmt = DAG.getConstant(Val: OrigIdx, DL, VT: XLenVT); |
10329 | SDValue Slidedown = |
10330 | getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, |
10331 | Merge: DAG.getUNDEF(VT: ContainerVT), Op: Vec, Offset: SlidedownAmt, Mask, VL); |
10332 | // Now we can use a cast-like subvector extract to get the result. |
10333 | Slidedown = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SubVecVT, N1: Slidedown, |
10334 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
10335 | return DAG.getBitcast(VT: Op.getValueType(), V: Slidedown); |
10336 | } |
10337 | |
10338 | if (VecVT.isFixedLengthVector()) { |
10339 | VecVT = getContainerForFixedLengthVector(VT: VecVT); |
10340 | Vec = convertToScalableVector(VT: VecVT, V: Vec, DAG, Subtarget); |
10341 | } |
10342 | |
10343 | MVT ContainerSubVecVT = SubVecVT; |
10344 | if (SubVecVT.isFixedLengthVector()) |
10345 | ContainerSubVecVT = getContainerForFixedLengthVector(VT: SubVecVT); |
10346 | |
10347 | unsigned SubRegIdx; |
10348 | ElementCount RemIdx; |
10349 | // extract_subvector scales the index by vscale if the subvector is scalable, |
10350 | // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if |
10351 | // we have a fixed length subvector, we need to adjust the index by 1/vscale. |
10352 | if (SubVecVT.isFixedLengthVector()) { |
10353 | assert(VLen); |
10354 | unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock; |
10355 | auto Decompose = |
10356 | RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( |
10357 | VecVT, SubVecVT: ContainerSubVecVT, InsertExtractIdx: OrigIdx / Vscale, TRI); |
10358 | SubRegIdx = Decompose.first; |
10359 | RemIdx = ElementCount::getFixed(MinVal: (Decompose.second * Vscale) + |
10360 | (OrigIdx % Vscale)); |
10361 | } else { |
10362 | auto Decompose = |
10363 | RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( |
10364 | VecVT, SubVecVT: ContainerSubVecVT, InsertExtractIdx: OrigIdx, TRI); |
10365 | SubRegIdx = Decompose.first; |
10366 | RemIdx = ElementCount::getScalable(MinVal: Decompose.second); |
10367 | } |
10368 | |
10369 | // If the Idx has been completely eliminated then this is a subvector extract |
10370 | // which naturally aligns to a vector register. These can easily be handled |
10371 | // using subregister manipulation. |
10372 | if (RemIdx.isZero()) { |
10373 | if (SubVecVT.isFixedLengthVector()) { |
10374 | Vec = DAG.getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT: ContainerSubVecVT, Operand: Vec); |
10375 | return convertFromScalableVector(VT: SubVecVT, V: Vec, DAG, Subtarget); |
10376 | } |
10377 | return Op; |
10378 | } |
10379 | |
10380 | // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT |
10381 | // was > M1 then the index would need to be a multiple of VLMAX, and so would |
10382 | // divide exactly. |
10383 | assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second || |
10384 | getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1); |
10385 | |
10386 | // If the vector type is an LMUL-group type, extract a subvector equal to the |
10387 | // nearest full vector register type. |
10388 | MVT InterSubVT = VecVT; |
10389 | if (VecVT.bitsGT(VT: getLMUL1VT(VT: VecVT))) { |
10390 | // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and |
10391 | // we should have successfully decomposed the extract into a subregister. |
10392 | assert(SubRegIdx != RISCV::NoSubRegister); |
10393 | InterSubVT = getLMUL1VT(VT: VecVT); |
10394 | Vec = DAG.getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT: InterSubVT, Operand: Vec); |
10395 | } |
10396 | |
10397 | // Slide this vector register down by the desired number of elements in order |
10398 | // to place the desired subvector starting at element 0. |
10399 | SDValue SlidedownAmt = DAG.getElementCount(DL, VT: XLenVT, EC: RemIdx); |
10400 | auto [Mask, VL] = getDefaultScalableVLOps(VecVT: InterSubVT, DL, DAG, Subtarget); |
10401 | if (SubVecVT.isFixedLengthVector()) |
10402 | VL = getVLOp(NumElts: SubVecVT.getVectorNumElements(), ContainerVT: InterSubVT, DL, DAG, |
10403 | Subtarget); |
10404 | SDValue Slidedown = |
10405 | getVSlidedown(DAG, Subtarget, DL, VT: InterSubVT, Merge: DAG.getUNDEF(VT: InterSubVT), |
10406 | Op: Vec, Offset: SlidedownAmt, Mask, VL); |
10407 | |
10408 | // Now the vector is in the right position, extract our final subvector. This |
10409 | // should resolve to a COPY. |
10410 | Slidedown = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SubVecVT, N1: Slidedown, |
10411 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
10412 | |
10413 | // We might have bitcast from a mask type: cast back to the original type if |
10414 | // required. |
10415 | return DAG.getBitcast(VT: Op.getSimpleValueType(), V: Slidedown); |
10416 | } |
10417 | |
10418 | // Widen a vector's operands to i8, then truncate its results back to the |
10419 | // original type, typically i1. All operand and result types must be the same. |
10420 | static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, |
10421 | SelectionDAG &DAG) { |
10422 | MVT VT = N.getSimpleValueType(); |
10423 | MVT WideVT = VT.changeVectorElementType(EltVT: MVT::i8); |
10424 | SmallVector<SDValue, 4> WideOps; |
10425 | for (SDValue Op : N->ops()) { |
10426 | assert(Op.getSimpleValueType() == VT && |
10427 | "Operands and result must be same type" ); |
10428 | WideOps.push_back(Elt: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WideVT, Operand: Op)); |
10429 | } |
10430 | |
10431 | unsigned NumVals = N->getNumValues(); |
10432 | |
10433 | SDVTList VTs = DAG.getVTList(VTs: SmallVector<EVT, 4>( |
10434 | NumVals, N.getValueType().changeVectorElementType(EltVT: MVT::i8))); |
10435 | SDValue WideN = DAG.getNode(Opcode: N.getOpcode(), DL, VTList: VTs, Ops: WideOps); |
10436 | SmallVector<SDValue, 4> TruncVals; |
10437 | for (unsigned I = 0; I < NumVals; I++) { |
10438 | TruncVals.push_back( |
10439 | Elt: DAG.getSetCC(DL, VT: N->getSimpleValueType(ResNo: I), LHS: WideN.getValue(R: I), |
10440 | RHS: DAG.getConstant(Val: 0, DL, VT: WideVT), Cond: ISD::SETNE)); |
10441 | } |
10442 | |
10443 | if (TruncVals.size() > 1) |
10444 | return DAG.getMergeValues(Ops: TruncVals, dl: DL); |
10445 | return TruncVals.front(); |
10446 | } |
10447 | |
10448 | SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op, |
10449 | SelectionDAG &DAG) const { |
10450 | SDLoc DL(Op); |
10451 | MVT VecVT = Op.getSimpleValueType(); |
10452 | |
10453 | assert(VecVT.isScalableVector() && |
10454 | "vector_interleave on non-scalable vector!" ); |
10455 | |
10456 | // 1 bit element vectors need to be widened to e8 |
10457 | if (VecVT.getVectorElementType() == MVT::i1) |
10458 | return widenVectorOpsToi8(N: Op, DL, DAG); |
10459 | |
10460 | // If the VT is LMUL=8, we need to split and reassemble. |
10461 | if (VecVT.getSizeInBits().getKnownMinValue() == |
10462 | (8 * RISCV::RVVBitsPerBlock)) { |
10463 | auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: 0); |
10464 | auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: 1); |
10465 | EVT SplitVT = Op0Lo.getValueType(); |
10466 | |
10467 | SDValue ResLo = DAG.getNode(Opcode: ISD::VECTOR_DEINTERLEAVE, DL, |
10468 | VTList: DAG.getVTList(VT1: SplitVT, VT2: SplitVT), N1: Op0Lo, N2: Op0Hi); |
10469 | SDValue ResHi = DAG.getNode(Opcode: ISD::VECTOR_DEINTERLEAVE, DL, |
10470 | VTList: DAG.getVTList(VT1: SplitVT, VT2: SplitVT), N1: Op1Lo, N2: Op1Hi); |
10471 | |
10472 | SDValue Even = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT, |
10473 | N1: ResLo.getValue(R: 0), N2: ResHi.getValue(R: 0)); |
10474 | SDValue Odd = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT, N1: ResLo.getValue(R: 1), |
10475 | N2: ResHi.getValue(R: 1)); |
10476 | return DAG.getMergeValues(Ops: {Even, Odd}, dl: DL); |
10477 | } |
10478 | |
10479 | // Concatenate the two vectors as one vector to deinterleave |
10480 | MVT ConcatVT = |
10481 | MVT::getVectorVT(VT: VecVT.getVectorElementType(), |
10482 | EC: VecVT.getVectorElementCount().multiplyCoefficientBy(RHS: 2)); |
10483 | SDValue Concat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ConcatVT, |
10484 | N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1)); |
10485 | |
10486 | // We want to operate on all lanes, so get the mask and VL and mask for it |
10487 | auto [Mask, VL] = getDefaultScalableVLOps(VecVT: ConcatVT, DL, DAG, Subtarget); |
10488 | SDValue Passthru = DAG.getUNDEF(VT: ConcatVT); |
10489 | |
10490 | // We can deinterleave through vnsrl.wi if the element type is smaller than |
10491 | // ELEN |
10492 | if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) { |
10493 | SDValue Even = |
10494 | getDeinterleaveViaVNSRL(DL, VT: VecVT, Src: Concat, EvenElts: true, Subtarget, DAG); |
10495 | SDValue Odd = |
10496 | getDeinterleaveViaVNSRL(DL, VT: VecVT, Src: Concat, EvenElts: false, Subtarget, DAG); |
10497 | return DAG.getMergeValues(Ops: {Even, Odd}, dl: DL); |
10498 | } |
10499 | |
10500 | // For the indices, use the same SEW to avoid an extra vsetvli |
10501 | MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger(); |
10502 | // Create a vector of even indices {0, 2, 4, ...} |
10503 | SDValue EvenIdx = |
10504 | DAG.getStepVector(DL, ResVT: IdxVT, StepVal: APInt(IdxVT.getScalarSizeInBits(), 2)); |
10505 | // Create a vector of odd indices {1, 3, 5, ... } |
10506 | SDValue OddIdx = |
10507 | DAG.getNode(Opcode: ISD::ADD, DL, VT: IdxVT, N1: EvenIdx, N2: DAG.getConstant(Val: 1, DL, VT: IdxVT)); |
10508 | |
10509 | // Gather the even and odd elements into two separate vectors |
10510 | SDValue EvenWide = DAG.getNode(Opcode: RISCVISD::VRGATHER_VV_VL, DL, VT: ConcatVT, |
10511 | N1: Concat, N2: EvenIdx, N3: Passthru, N4: Mask, N5: VL); |
10512 | SDValue OddWide = DAG.getNode(Opcode: RISCVISD::VRGATHER_VV_VL, DL, VT: ConcatVT, |
10513 | N1: Concat, N2: OddIdx, N3: Passthru, N4: Mask, N5: VL); |
10514 | |
10515 | // Extract the result half of the gather for even and odd |
10516 | SDValue Even = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: VecVT, N1: EvenWide, |
10517 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
10518 | SDValue Odd = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: VecVT, N1: OddWide, |
10519 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
10520 | |
10521 | return DAG.getMergeValues(Ops: {Even, Odd}, dl: DL); |
10522 | } |
10523 | |
10524 | SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op, |
10525 | SelectionDAG &DAG) const { |
10526 | SDLoc DL(Op); |
10527 | MVT VecVT = Op.getSimpleValueType(); |
10528 | |
10529 | assert(VecVT.isScalableVector() && |
10530 | "vector_interleave on non-scalable vector!" ); |
10531 | |
10532 | // i1 vectors need to be widened to i8 |
10533 | if (VecVT.getVectorElementType() == MVT::i1) |
10534 | return widenVectorOpsToi8(N: Op, DL, DAG); |
10535 | |
10536 | MVT XLenVT = Subtarget.getXLenVT(); |
10537 | SDValue VL = DAG.getRegister(Reg: RISCV::X0, VT: XLenVT); |
10538 | |
10539 | // If the VT is LMUL=8, we need to split and reassemble. |
10540 | if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) { |
10541 | auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: 0); |
10542 | auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: 1); |
10543 | EVT SplitVT = Op0Lo.getValueType(); |
10544 | |
10545 | SDValue ResLo = DAG.getNode(Opcode: ISD::VECTOR_INTERLEAVE, DL, |
10546 | VTList: DAG.getVTList(VT1: SplitVT, VT2: SplitVT), N1: Op0Lo, N2: Op1Lo); |
10547 | SDValue ResHi = DAG.getNode(Opcode: ISD::VECTOR_INTERLEAVE, DL, |
10548 | VTList: DAG.getVTList(VT1: SplitVT, VT2: SplitVT), N1: Op0Hi, N2: Op1Hi); |
10549 | |
10550 | SDValue Lo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT, |
10551 | N1: ResLo.getValue(R: 0), N2: ResLo.getValue(R: 1)); |
10552 | SDValue Hi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT, |
10553 | N1: ResHi.getValue(R: 0), N2: ResHi.getValue(R: 1)); |
10554 | return DAG.getMergeValues(Ops: {Lo, Hi}, dl: DL); |
10555 | } |
10556 | |
10557 | SDValue Interleaved; |
10558 | |
10559 | // If the element type is smaller than ELEN, then we can interleave with |
10560 | // vwaddu.vv and vwmaccu.vx |
10561 | if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) { |
10562 | Interleaved = getWideningInterleave(EvenV: Op.getOperand(i: 0), OddV: Op.getOperand(i: 1), DL, |
10563 | DAG, Subtarget); |
10564 | } else { |
10565 | // Otherwise, fallback to using vrgathere16.vv |
10566 | MVT ConcatVT = |
10567 | MVT::getVectorVT(VT: VecVT.getVectorElementType(), |
10568 | EC: VecVT.getVectorElementCount().multiplyCoefficientBy(RHS: 2)); |
10569 | SDValue Concat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ConcatVT, |
10570 | N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1)); |
10571 | |
10572 | MVT IdxVT = ConcatVT.changeVectorElementType(EltVT: MVT::i16); |
10573 | |
10574 | // 0 1 2 3 4 5 6 7 ... |
10575 | SDValue StepVec = DAG.getStepVector(DL, ResVT: IdxVT); |
10576 | |
10577 | // 1 1 1 1 1 1 1 1 ... |
10578 | SDValue Ones = DAG.getSplatVector(VT: IdxVT, DL, Op: DAG.getConstant(Val: 1, DL, VT: XLenVT)); |
10579 | |
10580 | // 1 0 1 0 1 0 1 0 ... |
10581 | SDValue OddMask = DAG.getNode(Opcode: ISD::AND, DL, VT: IdxVT, N1: StepVec, N2: Ones); |
10582 | OddMask = DAG.getSetCC( |
10583 | DL, VT: IdxVT.changeVectorElementType(EltVT: MVT::i1), LHS: OddMask, |
10584 | RHS: DAG.getSplatVector(VT: IdxVT, DL, Op: DAG.getConstant(Val: 0, DL, VT: XLenVT)), |
10585 | Cond: ISD::CondCode::SETNE); |
10586 | |
10587 | SDValue VLMax = DAG.getSplatVector(VT: IdxVT, DL, Op: computeVLMax(VecVT, DL, DAG)); |
10588 | |
10589 | // Build up the index vector for interleaving the concatenated vector |
10590 | // 0 0 1 1 2 2 3 3 ... |
10591 | SDValue Idx = DAG.getNode(Opcode: ISD::SRL, DL, VT: IdxVT, N1: StepVec, N2: Ones); |
10592 | // 0 n 1 n+1 2 n+2 3 n+3 ... |
10593 | Idx = |
10594 | DAG.getNode(Opcode: RISCVISD::ADD_VL, DL, VT: IdxVT, N1: Idx, N2: VLMax, N3: Idx, N4: OddMask, N5: VL); |
10595 | |
10596 | // Then perform the interleave |
10597 | // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ... |
10598 | SDValue TrueMask = getAllOnesMask(VecVT: IdxVT, VL, DL, DAG); |
10599 | Interleaved = DAG.getNode(Opcode: RISCVISD::VRGATHEREI16_VV_VL, DL, VT: ConcatVT, |
10600 | N1: Concat, N2: Idx, N3: DAG.getUNDEF(VT: ConcatVT), N4: TrueMask, N5: VL); |
10601 | } |
10602 | |
10603 | // Extract the two halves from the interleaved result |
10604 | SDValue Lo = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: VecVT, N1: Interleaved, |
10605 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
10606 | SDValue Hi = DAG.getNode( |
10607 | Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: VecVT, N1: Interleaved, |
10608 | N2: DAG.getVectorIdxConstant(Val: VecVT.getVectorMinNumElements(), DL)); |
10609 | |
10610 | return DAG.getMergeValues(Ops: {Lo, Hi}, dl: DL); |
10611 | } |
10612 | |
10613 | // Lower step_vector to the vid instruction. Any non-identity step value must |
10614 | // be accounted for my manual expansion. |
10615 | SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op, |
10616 | SelectionDAG &DAG) const { |
10617 | SDLoc DL(Op); |
10618 | MVT VT = Op.getSimpleValueType(); |
10619 | assert(VT.isScalableVector() && "Expected scalable vector" ); |
10620 | MVT XLenVT = Subtarget.getXLenVT(); |
10621 | auto [Mask, VL] = getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget); |
10622 | SDValue StepVec = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT, N1: Mask, N2: VL); |
10623 | uint64_t StepValImm = Op.getConstantOperandVal(i: 0); |
10624 | if (StepValImm != 1) { |
10625 | if (isPowerOf2_64(Value: StepValImm)) { |
10626 | SDValue StepVal = |
10627 | DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: DAG.getUNDEF(VT), |
10628 | N2: DAG.getConstant(Val: Log2_64(Value: StepValImm), DL, VT: XLenVT), N3: VL); |
10629 | StepVec = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: StepVec, N2: StepVal); |
10630 | } else { |
10631 | SDValue StepVal = lowerScalarSplat( |
10632 | Passthru: SDValue(), Scalar: DAG.getConstant(Val: StepValImm, DL, VT: VT.getVectorElementType()), |
10633 | VL, VT, DL, DAG, Subtarget); |
10634 | StepVec = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: StepVec, N2: StepVal); |
10635 | } |
10636 | } |
10637 | return StepVec; |
10638 | } |
10639 | |
10640 | // Implement vector_reverse using vrgather.vv with indices determined by |
10641 | // subtracting the id of each element from (VLMAX-1). This will convert |
10642 | // the indices like so: |
10643 | // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). |
10644 | // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. |
10645 | SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, |
10646 | SelectionDAG &DAG) const { |
10647 | SDLoc DL(Op); |
10648 | MVT VecVT = Op.getSimpleValueType(); |
10649 | if (VecVT.getVectorElementType() == MVT::i1) { |
10650 | MVT WidenVT = MVT::getVectorVT(VT: MVT::i8, EC: VecVT.getVectorElementCount()); |
10651 | SDValue Op1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WidenVT, Operand: Op.getOperand(i: 0)); |
10652 | SDValue Op2 = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: WidenVT, Operand: Op1); |
10653 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VecVT, Operand: Op2); |
10654 | } |
10655 | unsigned EltSize = VecVT.getScalarSizeInBits(); |
10656 | unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); |
10657 | unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); |
10658 | unsigned MaxVLMAX = |
10659 | RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMax, EltSize, MinSize); |
10660 | |
10661 | unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; |
10662 | MVT IntVT = VecVT.changeVectorElementTypeToInteger(); |
10663 | |
10664 | // If this is SEW=8 and VLMAX is potentially more than 256, we need |
10665 | // to use vrgatherei16.vv. |
10666 | // TODO: It's also possible to use vrgatherei16.vv for other types to |
10667 | // decrease register width for the index calculation. |
10668 | if (MaxVLMAX > 256 && EltSize == 8) { |
10669 | // If this is LMUL=8, we have to split before can use vrgatherei16.vv. |
10670 | // Reverse each half, then reassemble them in reverse order. |
10671 | // NOTE: It's also possible that after splitting that VLMAX no longer |
10672 | // requires vrgatherei16.vv. |
10673 | if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { |
10674 | auto [Lo, Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: 0); |
10675 | auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: VecVT); |
10676 | Lo = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: LoVT, Operand: Lo); |
10677 | Hi = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: HiVT, Operand: Hi); |
10678 | // Reassemble the low and high pieces reversed. |
10679 | // FIXME: This is a CONCAT_VECTORS. |
10680 | SDValue Res = |
10681 | DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: VecVT, N1: DAG.getUNDEF(VT: VecVT), N2: Hi, |
10682 | N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
10683 | return DAG.getNode( |
10684 | Opcode: ISD::INSERT_SUBVECTOR, DL, VT: VecVT, N1: Res, N2: Lo, |
10685 | N3: DAG.getVectorIdxConstant(Val: LoVT.getVectorMinNumElements(), DL)); |
10686 | } |
10687 | |
10688 | // Just promote the int type to i16 which will double the LMUL. |
10689 | IntVT = MVT::getVectorVT(VT: MVT::i16, EC: VecVT.getVectorElementCount()); |
10690 | GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; |
10691 | } |
10692 | |
10693 | MVT XLenVT = Subtarget.getXLenVT(); |
10694 | auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); |
10695 | |
10696 | // Calculate VLMAX-1 for the desired SEW. |
10697 | SDValue VLMinus1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, |
10698 | N1: computeVLMax(VecVT, DL, DAG), |
10699 | N2: DAG.getConstant(Val: 1, DL, VT: XLenVT)); |
10700 | |
10701 | // Splat VLMAX-1 taking care to handle SEW==64 on RV32. |
10702 | bool IsRV32E64 = |
10703 | !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64; |
10704 | SDValue SplatVL; |
10705 | if (!IsRV32E64) |
10706 | SplatVL = DAG.getSplatVector(VT: IntVT, DL, Op: VLMinus1); |
10707 | else |
10708 | SplatVL = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IntVT, N1: DAG.getUNDEF(VT: IntVT), |
10709 | N2: VLMinus1, N3: DAG.getRegister(Reg: RISCV::X0, VT: XLenVT)); |
10710 | |
10711 | SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT: IntVT, N1: Mask, N2: VL); |
10712 | SDValue Indices = DAG.getNode(Opcode: RISCVISD::SUB_VL, DL, VT: IntVT, N1: SplatVL, N2: VID, |
10713 | N3: DAG.getUNDEF(VT: IntVT), N4: Mask, N5: VL); |
10714 | |
10715 | return DAG.getNode(Opcode: GatherOpc, DL, VT: VecVT, N1: Op.getOperand(i: 0), N2: Indices, |
10716 | N3: DAG.getUNDEF(VT: VecVT), N4: Mask, N5: VL); |
10717 | } |
10718 | |
10719 | SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op, |
10720 | SelectionDAG &DAG) const { |
10721 | SDLoc DL(Op); |
10722 | SDValue V1 = Op.getOperand(i: 0); |
10723 | SDValue V2 = Op.getOperand(i: 1); |
10724 | MVT XLenVT = Subtarget.getXLenVT(); |
10725 | MVT VecVT = Op.getSimpleValueType(); |
10726 | |
10727 | SDValue VLMax = computeVLMax(VecVT, DL, DAG); |
10728 | |
10729 | int64_t ImmValue = cast<ConstantSDNode>(Val: Op.getOperand(i: 2))->getSExtValue(); |
10730 | SDValue DownOffset, UpOffset; |
10731 | if (ImmValue >= 0) { |
10732 | // The operand is a TargetConstant, we need to rebuild it as a regular |
10733 | // constant. |
10734 | DownOffset = DAG.getConstant(Val: ImmValue, DL, VT: XLenVT); |
10735 | UpOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: VLMax, N2: DownOffset); |
10736 | } else { |
10737 | // The operand is a TargetConstant, we need to rebuild it as a regular |
10738 | // constant rather than negating the original operand. |
10739 | UpOffset = DAG.getConstant(Val: -ImmValue, DL, VT: XLenVT); |
10740 | DownOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: VLMax, N2: UpOffset); |
10741 | } |
10742 | |
10743 | SDValue TrueMask = getAllOnesMask(VecVT, VL: VLMax, DL, DAG); |
10744 | |
10745 | SDValue SlideDown = |
10746 | getVSlidedown(DAG, Subtarget, DL, VT: VecVT, Merge: DAG.getUNDEF(VT: VecVT), Op: V1, |
10747 | Offset: DownOffset, Mask: TrueMask, VL: UpOffset); |
10748 | return getVSlideup(DAG, Subtarget, DL, VT: VecVT, Merge: SlideDown, Op: V2, Offset: UpOffset, |
10749 | Mask: TrueMask, VL: DAG.getRegister(Reg: RISCV::X0, VT: XLenVT), |
10750 | Policy: RISCVII::TAIL_AGNOSTIC); |
10751 | } |
10752 | |
10753 | SDValue |
10754 | RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, |
10755 | SelectionDAG &DAG) const { |
10756 | SDLoc DL(Op); |
10757 | auto *Load = cast<LoadSDNode>(Val&: Op); |
10758 | |
10759 | assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), |
10760 | Load->getMemoryVT(), |
10761 | *Load->getMemOperand()) && |
10762 | "Expecting a correctly-aligned load" ); |
10763 | |
10764 | MVT VT = Op.getSimpleValueType(); |
10765 | MVT XLenVT = Subtarget.getXLenVT(); |
10766 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
10767 | |
10768 | // If we know the exact VLEN and our fixed length vector completely fills |
10769 | // the container, use a whole register load instead. |
10770 | const auto [MinVLMAX, MaxVLMAX] = |
10771 | RISCVTargetLowering::computeVLMAXBounds(VecVT: ContainerVT, Subtarget); |
10772 | if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && |
10773 | getLMUL1VT(VT: ContainerVT).bitsLE(VT: ContainerVT)) { |
10774 | MachineMemOperand *MMO = Load->getMemOperand(); |
10775 | SDValue NewLoad = |
10776 | DAG.getLoad(VT: ContainerVT, dl: DL, Chain: Load->getChain(), Ptr: Load->getBasePtr(), |
10777 | PtrInfo: MMO->getPointerInfo(), Alignment: MMO->getBaseAlign(), MMOFlags: MMO->getFlags(), |
10778 | AAInfo: MMO->getAAInfo(), Ranges: MMO->getRanges()); |
10779 | SDValue Result = convertFromScalableVector(VT, V: NewLoad, DAG, Subtarget); |
10780 | return DAG.getMergeValues(Ops: {Result, NewLoad.getValue(R: 1)}, dl: DL); |
10781 | } |
10782 | |
10783 | SDValue VL = getVLOp(NumElts: VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget); |
10784 | |
10785 | bool IsMaskOp = VT.getVectorElementType() == MVT::i1; |
10786 | SDValue IntID = DAG.getTargetConstant( |
10787 | Val: IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, VT: XLenVT); |
10788 | SmallVector<SDValue, 4> Ops{Load->getChain(), IntID}; |
10789 | if (!IsMaskOp) |
10790 | Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT)); |
10791 | Ops.push_back(Elt: Load->getBasePtr()); |
10792 | Ops.push_back(Elt: VL); |
10793 | SDVTList VTs = DAG.getVTList(VTs: {ContainerVT, MVT::Other}); |
10794 | SDValue NewLoad = |
10795 | DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, |
10796 | MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand()); |
10797 | |
10798 | SDValue Result = convertFromScalableVector(VT, V: NewLoad, DAG, Subtarget); |
10799 | return DAG.getMergeValues(Ops: {Result, NewLoad.getValue(R: 1)}, dl: DL); |
10800 | } |
10801 | |
10802 | SDValue |
10803 | RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, |
10804 | SelectionDAG &DAG) const { |
10805 | SDLoc DL(Op); |
10806 | auto *Store = cast<StoreSDNode>(Val&: Op); |
10807 | |
10808 | assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), |
10809 | Store->getMemoryVT(), |
10810 | *Store->getMemOperand()) && |
10811 | "Expecting a correctly-aligned store" ); |
10812 | |
10813 | SDValue StoreVal = Store->getValue(); |
10814 | MVT VT = StoreVal.getSimpleValueType(); |
10815 | MVT XLenVT = Subtarget.getXLenVT(); |
10816 | |
10817 | // If the size less than a byte, we need to pad with zeros to make a byte. |
10818 | if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) { |
10819 | VT = MVT::v8i1; |
10820 | StoreVal = |
10821 | DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), |
10822 | N2: StoreVal, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
10823 | } |
10824 | |
10825 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
10826 | |
10827 | SDValue NewValue = |
10828 | convertToScalableVector(VT: ContainerVT, V: StoreVal, DAG, Subtarget); |
10829 | |
10830 | |
10831 | // If we know the exact VLEN and our fixed length vector completely fills |
10832 | // the container, use a whole register store instead. |
10833 | const auto [MinVLMAX, MaxVLMAX] = |
10834 | RISCVTargetLowering::computeVLMAXBounds(VecVT: ContainerVT, Subtarget); |
10835 | if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && |
10836 | getLMUL1VT(VT: ContainerVT).bitsLE(VT: ContainerVT)) { |
10837 | MachineMemOperand *MMO = Store->getMemOperand(); |
10838 | return DAG.getStore(Chain: Store->getChain(), dl: DL, Val: NewValue, Ptr: Store->getBasePtr(), |
10839 | PtrInfo: MMO->getPointerInfo(), Alignment: MMO->getBaseAlign(), |
10840 | MMOFlags: MMO->getFlags(), AAInfo: MMO->getAAInfo()); |
10841 | } |
10842 | |
10843 | SDValue VL = getVLOp(NumElts: VT.getVectorNumElements(), ContainerVT, DL, DAG, |
10844 | Subtarget); |
10845 | |
10846 | bool IsMaskOp = VT.getVectorElementType() == MVT::i1; |
10847 | SDValue IntID = DAG.getTargetConstant( |
10848 | Val: IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, VT: XLenVT); |
10849 | return DAG.getMemIntrinsicNode( |
10850 | Opcode: ISD::INTRINSIC_VOID, dl: DL, VTList: DAG.getVTList(VT: MVT::Other), |
10851 | Ops: {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL}, |
10852 | MemVT: Store->getMemoryVT(), MMO: Store->getMemOperand()); |
10853 | } |
10854 | |
10855 | SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op, |
10856 | SelectionDAG &DAG) const { |
10857 | SDLoc DL(Op); |
10858 | MVT VT = Op.getSimpleValueType(); |
10859 | |
10860 | const auto *MemSD = cast<MemSDNode>(Val&: Op); |
10861 | EVT MemVT = MemSD->getMemoryVT(); |
10862 | MachineMemOperand *MMO = MemSD->getMemOperand(); |
10863 | SDValue Chain = MemSD->getChain(); |
10864 | SDValue BasePtr = MemSD->getBasePtr(); |
10865 | |
10866 | SDValue Mask, PassThru, VL; |
10867 | if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Val&: Op)) { |
10868 | Mask = VPLoad->getMask(); |
10869 | PassThru = DAG.getUNDEF(VT); |
10870 | VL = VPLoad->getVectorLength(); |
10871 | } else { |
10872 | const auto *MLoad = cast<MaskedLoadSDNode>(Val&: Op); |
10873 | Mask = MLoad->getMask(); |
10874 | PassThru = MLoad->getPassThru(); |
10875 | } |
10876 | |
10877 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()); |
10878 | |
10879 | MVT XLenVT = Subtarget.getXLenVT(); |
10880 | |
10881 | MVT ContainerVT = VT; |
10882 | if (VT.isFixedLengthVector()) { |
10883 | ContainerVT = getContainerForFixedLengthVector(VT); |
10884 | PassThru = convertToScalableVector(VT: ContainerVT, V: PassThru, DAG, Subtarget); |
10885 | if (!IsUnmasked) { |
10886 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
10887 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
10888 | } |
10889 | } |
10890 | |
10891 | if (!VL) |
10892 | VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second; |
10893 | |
10894 | unsigned IntID = |
10895 | IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask; |
10896 | SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)}; |
10897 | if (IsUnmasked) |
10898 | Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT)); |
10899 | else |
10900 | Ops.push_back(Elt: PassThru); |
10901 | Ops.push_back(Elt: BasePtr); |
10902 | if (!IsUnmasked) |
10903 | Ops.push_back(Elt: Mask); |
10904 | Ops.push_back(Elt: VL); |
10905 | if (!IsUnmasked) |
10906 | Ops.push_back(Elt: DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT)); |
10907 | |
10908 | SDVTList VTs = DAG.getVTList(VTs: {ContainerVT, MVT::Other}); |
10909 | |
10910 | SDValue Result = |
10911 | DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, MemVT, MMO); |
10912 | Chain = Result.getValue(R: 1); |
10913 | |
10914 | if (VT.isFixedLengthVector()) |
10915 | Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
10916 | |
10917 | return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL); |
10918 | } |
10919 | |
10920 | SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, |
10921 | SelectionDAG &DAG) const { |
10922 | SDLoc DL(Op); |
10923 | |
10924 | const auto *MemSD = cast<MemSDNode>(Val&: Op); |
10925 | EVT MemVT = MemSD->getMemoryVT(); |
10926 | MachineMemOperand *MMO = MemSD->getMemOperand(); |
10927 | SDValue Chain = MemSD->getChain(); |
10928 | SDValue BasePtr = MemSD->getBasePtr(); |
10929 | SDValue Val, Mask, VL; |
10930 | |
10931 | bool IsCompressingStore = false; |
10932 | if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Val&: Op)) { |
10933 | Val = VPStore->getValue(); |
10934 | Mask = VPStore->getMask(); |
10935 | VL = VPStore->getVectorLength(); |
10936 | } else { |
10937 | const auto *MStore = cast<MaskedStoreSDNode>(Val&: Op); |
10938 | Val = MStore->getValue(); |
10939 | Mask = MStore->getMask(); |
10940 | IsCompressingStore = MStore->isCompressingStore(); |
10941 | } |
10942 | |
10943 | bool IsUnmasked = |
10944 | ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()) || IsCompressingStore; |
10945 | |
10946 | MVT VT = Val.getSimpleValueType(); |
10947 | MVT XLenVT = Subtarget.getXLenVT(); |
10948 | |
10949 | MVT ContainerVT = VT; |
10950 | if (VT.isFixedLengthVector()) { |
10951 | ContainerVT = getContainerForFixedLengthVector(VT); |
10952 | |
10953 | Val = convertToScalableVector(VT: ContainerVT, V: Val, DAG, Subtarget); |
10954 | if (!IsUnmasked || IsCompressingStore) { |
10955 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
10956 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
10957 | } |
10958 | } |
10959 | |
10960 | if (!VL) |
10961 | VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second; |
10962 | |
10963 | if (IsCompressingStore) { |
10964 | Val = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: ContainerVT, |
10965 | N1: DAG.getConstant(Val: Intrinsic::riscv_vcompress, DL, VT: XLenVT), |
10966 | N2: DAG.getUNDEF(VT: ContainerVT), N3: Val, N4: Mask, N5: VL); |
10967 | VL = |
10968 | DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Mask, |
10969 | N2: getAllOnesMask(VecVT: Mask.getSimpleValueType(), VL, DL, DAG), N3: VL); |
10970 | } |
10971 | |
10972 | unsigned IntID = |
10973 | IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask; |
10974 | SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)}; |
10975 | Ops.push_back(Elt: Val); |
10976 | Ops.push_back(Elt: BasePtr); |
10977 | if (!IsUnmasked) |
10978 | Ops.push_back(Elt: Mask); |
10979 | Ops.push_back(Elt: VL); |
10980 | |
10981 | return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl: DL, |
10982 | VTList: DAG.getVTList(VT: MVT::Other), Ops, MemVT, MMO); |
10983 | } |
10984 | |
10985 | SDValue |
10986 | RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, |
10987 | SelectionDAG &DAG) const { |
10988 | MVT InVT = Op.getOperand(i: 0).getSimpleValueType(); |
10989 | MVT ContainerVT = getContainerForFixedLengthVector(VT: InVT); |
10990 | |
10991 | MVT VT = Op.getSimpleValueType(); |
10992 | |
10993 | SDValue Op1 = |
10994 | convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: 0), DAG, Subtarget); |
10995 | SDValue Op2 = |
10996 | convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: 1), DAG, Subtarget); |
10997 | |
10998 | SDLoc DL(Op); |
10999 | auto [Mask, VL] = getDefaultVLOps(NumElts: VT.getVectorNumElements(), ContainerVT, DL, |
11000 | DAG, Subtarget); |
11001 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
11002 | |
11003 | SDValue Cmp = |
11004 | DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: MaskVT, |
11005 | Ops: {Op1, Op2, Op.getOperand(i: 2), DAG.getUNDEF(VT: MaskVT), Mask, VL}); |
11006 | |
11007 | return convertFromScalableVector(VT, V: Cmp, DAG, Subtarget); |
11008 | } |
11009 | |
11010 | SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op, |
11011 | SelectionDAG &DAG) const { |
11012 | unsigned Opc = Op.getOpcode(); |
11013 | SDLoc DL(Op); |
11014 | SDValue Chain = Op.getOperand(i: 0); |
11015 | SDValue Op1 = Op.getOperand(i: 1); |
11016 | SDValue Op2 = Op.getOperand(i: 2); |
11017 | SDValue CC = Op.getOperand(i: 3); |
11018 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get(); |
11019 | MVT VT = Op.getSimpleValueType(); |
11020 | MVT InVT = Op1.getSimpleValueType(); |
11021 | |
11022 | // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE |
11023 | // condition code. |
11024 | if (Opc == ISD::STRICT_FSETCCS) { |
11025 | // Expand strict_fsetccs(x, oeq) to |
11026 | // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole)) |
11027 | SDVTList VTList = Op->getVTList(); |
11028 | if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) { |
11029 | SDValue OLECCVal = DAG.getCondCode(Cond: ISD::SETOLE); |
11030 | SDValue Tmp1 = DAG.getNode(Opcode: ISD::STRICT_FSETCCS, DL, VTList, N1: Chain, N2: Op1, |
11031 | N3: Op2, N4: OLECCVal); |
11032 | SDValue Tmp2 = DAG.getNode(Opcode: ISD::STRICT_FSETCCS, DL, VTList, N1: Chain, N2: Op2, |
11033 | N3: Op1, N4: OLECCVal); |
11034 | SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, |
11035 | N1: Tmp1.getValue(R: 1), N2: Tmp2.getValue(R: 1)); |
11036 | // Tmp1 and Tmp2 might be the same node. |
11037 | if (Tmp1 != Tmp2) |
11038 | Tmp1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Tmp1, N2: Tmp2); |
11039 | return DAG.getMergeValues(Ops: {Tmp1, OutChain}, dl: DL); |
11040 | } |
11041 | |
11042 | // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq)) |
11043 | if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) { |
11044 | SDValue OEQCCVal = DAG.getCondCode(Cond: ISD::SETOEQ); |
11045 | SDValue OEQ = DAG.getNode(Opcode: ISD::STRICT_FSETCCS, DL, VTList, N1: Chain, N2: Op1, |
11046 | N3: Op2, N4: OEQCCVal); |
11047 | SDValue Res = DAG.getNOT(DL, Val: OEQ, VT); |
11048 | return DAG.getMergeValues(Ops: {Res, OEQ.getValue(R: 1)}, dl: DL); |
11049 | } |
11050 | } |
11051 | |
11052 | MVT ContainerInVT = InVT; |
11053 | if (InVT.isFixedLengthVector()) { |
11054 | ContainerInVT = getContainerForFixedLengthVector(VT: InVT); |
11055 | Op1 = convertToScalableVector(VT: ContainerInVT, V: Op1, DAG, Subtarget); |
11056 | Op2 = convertToScalableVector(VT: ContainerInVT, V: Op2, DAG, Subtarget); |
11057 | } |
11058 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerInVT); |
11059 | |
11060 | auto [Mask, VL] = getDefaultVLOps(VecVT: InVT, ContainerVT: ContainerInVT, DL, DAG, Subtarget); |
11061 | |
11062 | SDValue Res; |
11063 | if (Opc == ISD::STRICT_FSETCC && |
11064 | (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE || |
11065 | CCVal == ISD::SETOLE)) { |
11066 | // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only |
11067 | // active when both input elements are ordered. |
11068 | SDValue True = getAllOnesMask(VecVT: ContainerInVT, VL, DL, DAG); |
11069 | SDValue OrderMask1 = DAG.getNode( |
11070 | Opcode: RISCVISD::STRICT_FSETCC_VL, DL, VTList: DAG.getVTList(VT1: MaskVT, VT2: MVT::Other), |
11071 | Ops: {Chain, Op1, Op1, DAG.getCondCode(Cond: ISD::SETOEQ), DAG.getUNDEF(VT: MaskVT), |
11072 | True, VL}); |
11073 | SDValue OrderMask2 = DAG.getNode( |
11074 | Opcode: RISCVISD::STRICT_FSETCC_VL, DL, VTList: DAG.getVTList(VT1: MaskVT, VT2: MVT::Other), |
11075 | Ops: {Chain, Op2, Op2, DAG.getCondCode(Cond: ISD::SETOEQ), DAG.getUNDEF(VT: MaskVT), |
11076 | True, VL}); |
11077 | Mask = |
11078 | DAG.getNode(Opcode: RISCVISD::VMAND_VL, DL, VT: MaskVT, N1: OrderMask1, N2: OrderMask2, N3: VL); |
11079 | // Use Mask as the merge operand to let the result be 0 if either of the |
11080 | // inputs is unordered. |
11081 | Res = DAG.getNode(Opcode: RISCVISD::STRICT_FSETCCS_VL, DL, |
11082 | VTList: DAG.getVTList(VT1: MaskVT, VT2: MVT::Other), |
11083 | Ops: {Chain, Op1, Op2, CC, Mask, Mask, VL}); |
11084 | } else { |
11085 | unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL |
11086 | : RISCVISD::STRICT_FSETCCS_VL; |
11087 | Res = DAG.getNode(Opcode: RVVOpc, DL, VTList: DAG.getVTList(VT1: MaskVT, VT2: MVT::Other), |
11088 | Ops: {Chain, Op1, Op2, CC, DAG.getUNDEF(VT: MaskVT), Mask, VL}); |
11089 | } |
11090 | |
11091 | if (VT.isFixedLengthVector()) { |
11092 | SDValue SubVec = convertFromScalableVector(VT, V: Res, DAG, Subtarget); |
11093 | return DAG.getMergeValues(Ops: {SubVec, Res.getValue(R: 1)}, dl: DL); |
11094 | } |
11095 | return Res; |
11096 | } |
11097 | |
11098 | // Lower vector ABS to smax(X, sub(0, X)). |
11099 | SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { |
11100 | SDLoc DL(Op); |
11101 | MVT VT = Op.getSimpleValueType(); |
11102 | SDValue X = Op.getOperand(i: 0); |
11103 | |
11104 | assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) && |
11105 | "Unexpected type for ISD::ABS" ); |
11106 | |
11107 | MVT ContainerVT = VT; |
11108 | if (VT.isFixedLengthVector()) { |
11109 | ContainerVT = getContainerForFixedLengthVector(VT); |
11110 | X = convertToScalableVector(VT: ContainerVT, V: X, DAG, Subtarget); |
11111 | } |
11112 | |
11113 | SDValue Mask, VL; |
11114 | if (Op->getOpcode() == ISD::VP_ABS) { |
11115 | Mask = Op->getOperand(Num: 1); |
11116 | if (VT.isFixedLengthVector()) |
11117 | Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG, |
11118 | Subtarget); |
11119 | VL = Op->getOperand(Num: 2); |
11120 | } else |
11121 | std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
11122 | |
11123 | SDValue SplatZero = DAG.getNode( |
11124 | Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), |
11125 | N2: DAG.getConstant(Val: 0, DL, VT: Subtarget.getXLenVT()), N3: VL); |
11126 | SDValue NegX = DAG.getNode(Opcode: RISCVISD::SUB_VL, DL, VT: ContainerVT, N1: SplatZero, N2: X, |
11127 | N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL); |
11128 | SDValue Max = DAG.getNode(Opcode: RISCVISD::SMAX_VL, DL, VT: ContainerVT, N1: X, N2: NegX, |
11129 | N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL); |
11130 | |
11131 | if (VT.isFixedLengthVector()) |
11132 | Max = convertFromScalableVector(VT, V: Max, DAG, Subtarget); |
11133 | return Max; |
11134 | } |
11135 | |
11136 | SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV( |
11137 | SDValue Op, SelectionDAG &DAG) const { |
11138 | SDLoc DL(Op); |
11139 | MVT VT = Op.getSimpleValueType(); |
11140 | SDValue Mag = Op.getOperand(i: 0); |
11141 | SDValue Sign = Op.getOperand(i: 1); |
11142 | assert(Mag.getValueType() == Sign.getValueType() && |
11143 | "Can only handle COPYSIGN with matching types." ); |
11144 | |
11145 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
11146 | Mag = convertToScalableVector(VT: ContainerVT, V: Mag, DAG, Subtarget); |
11147 | Sign = convertToScalableVector(VT: ContainerVT, V: Sign, DAG, Subtarget); |
11148 | |
11149 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
11150 | |
11151 | SDValue CopySign = DAG.getNode(Opcode: RISCVISD::FCOPYSIGN_VL, DL, VT: ContainerVT, N1: Mag, |
11152 | N2: Sign, N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL); |
11153 | |
11154 | return convertFromScalableVector(VT, V: CopySign, DAG, Subtarget); |
11155 | } |
11156 | |
11157 | SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( |
11158 | SDValue Op, SelectionDAG &DAG) const { |
11159 | MVT VT = Op.getSimpleValueType(); |
11160 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
11161 | |
11162 | MVT I1ContainerVT = |
11163 | MVT::getVectorVT(VT: MVT::i1, EC: ContainerVT.getVectorElementCount()); |
11164 | |
11165 | SDValue CC = |
11166 | convertToScalableVector(VT: I1ContainerVT, V: Op.getOperand(i: 0), DAG, Subtarget); |
11167 | SDValue Op1 = |
11168 | convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: 1), DAG, Subtarget); |
11169 | SDValue Op2 = |
11170 | convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: 2), DAG, Subtarget); |
11171 | |
11172 | SDLoc DL(Op); |
11173 | SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second; |
11174 | |
11175 | SDValue Select = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: CC, N2: Op1, |
11176 | N3: Op2, N4: DAG.getUNDEF(VT: ContainerVT), N5: VL); |
11177 | |
11178 | return convertFromScalableVector(VT, V: Select, DAG, Subtarget); |
11179 | } |
11180 | |
11181 | SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, |
11182 | SelectionDAG &DAG) const { |
11183 | unsigned NewOpc = getRISCVVLOp(Op); |
11184 | bool HasMergeOp = hasMergeOp(Opcode: NewOpc); |
11185 | bool HasMask = hasMaskOp(Opcode: NewOpc); |
11186 | |
11187 | MVT VT = Op.getSimpleValueType(); |
11188 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
11189 | |
11190 | // Create list of operands by converting existing ones to scalable types. |
11191 | SmallVector<SDValue, 6> Ops; |
11192 | for (const SDValue &V : Op->op_values()) { |
11193 | assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!" ); |
11194 | |
11195 | // Pass through non-vector operands. |
11196 | if (!V.getValueType().isVector()) { |
11197 | Ops.push_back(Elt: V); |
11198 | continue; |
11199 | } |
11200 | |
11201 | // "cast" fixed length vector to a scalable vector. |
11202 | assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && |
11203 | "Only fixed length vectors are supported!" ); |
11204 | Ops.push_back(Elt: convertToScalableVector(VT: ContainerVT, V, DAG, Subtarget)); |
11205 | } |
11206 | |
11207 | SDLoc DL(Op); |
11208 | auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget); |
11209 | if (HasMergeOp) |
11210 | Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT)); |
11211 | if (HasMask) |
11212 | Ops.push_back(Elt: Mask); |
11213 | Ops.push_back(Elt: VL); |
11214 | |
11215 | // StrictFP operations have two result values. Their lowered result should |
11216 | // have same result count. |
11217 | if (Op->isStrictFPOpcode()) { |
11218 | SDValue ScalableRes = |
11219 | DAG.getNode(Opcode: NewOpc, DL, VTList: DAG.getVTList(VT1: ContainerVT, VT2: MVT::Other), Ops, |
11220 | Flags: Op->getFlags()); |
11221 | SDValue SubVec = convertFromScalableVector(VT, V: ScalableRes, DAG, Subtarget); |
11222 | return DAG.getMergeValues(Ops: {SubVec, ScalableRes.getValue(R: 1)}, dl: DL); |
11223 | } |
11224 | |
11225 | SDValue ScalableRes = |
11226 | DAG.getNode(Opcode: NewOpc, DL, VT: ContainerVT, Ops, Flags: Op->getFlags()); |
11227 | return convertFromScalableVector(VT, V: ScalableRes, DAG, Subtarget); |
11228 | } |
11229 | |
11230 | // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node: |
11231 | // * Operands of each node are assumed to be in the same order. |
11232 | // * The EVL operand is promoted from i32 to i64 on RV64. |
11233 | // * Fixed-length vectors are converted to their scalable-vector container |
11234 | // types. |
11235 | SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const { |
11236 | unsigned RISCVISDOpc = getRISCVVLOp(Op); |
11237 | bool HasMergeOp = hasMergeOp(Opcode: RISCVISDOpc); |
11238 | |
11239 | SDLoc DL(Op); |
11240 | MVT VT = Op.getSimpleValueType(); |
11241 | SmallVector<SDValue, 4> Ops; |
11242 | |
11243 | MVT ContainerVT = VT; |
11244 | if (VT.isFixedLengthVector()) |
11245 | ContainerVT = getContainerForFixedLengthVector(VT); |
11246 | |
11247 | for (const auto &OpIdx : enumerate(First: Op->ops())) { |
11248 | SDValue V = OpIdx.value(); |
11249 | assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!" ); |
11250 | // Add dummy merge value before the mask. Or if there isn't a mask, before |
11251 | // EVL. |
11252 | if (HasMergeOp) { |
11253 | auto MaskIdx = ISD::getVPMaskIdx(Opcode: Op.getOpcode()); |
11254 | if (MaskIdx) { |
11255 | if (*MaskIdx == OpIdx.index()) |
11256 | Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT)); |
11257 | } else if (ISD::getVPExplicitVectorLengthIdx(Opcode: Op.getOpcode()) == |
11258 | OpIdx.index()) { |
11259 | if (Op.getOpcode() == ISD::VP_MERGE) { |
11260 | // For VP_MERGE, copy the false operand instead of an undef value. |
11261 | Ops.push_back(Elt: Ops.back()); |
11262 | } else { |
11263 | assert(Op.getOpcode() == ISD::VP_SELECT); |
11264 | // For VP_SELECT, add an undef value. |
11265 | Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT)); |
11266 | } |
11267 | } |
11268 | } |
11269 | // Pass through operands which aren't fixed-length vectors. |
11270 | if (!V.getValueType().isFixedLengthVector()) { |
11271 | Ops.push_back(Elt: V); |
11272 | continue; |
11273 | } |
11274 | // "cast" fixed length vector to a scalable vector. |
11275 | MVT OpVT = V.getSimpleValueType(); |
11276 | MVT ContainerVT = getContainerForFixedLengthVector(VT: OpVT); |
11277 | assert(useRVVForFixedLengthVectorVT(OpVT) && |
11278 | "Only fixed length vectors are supported!" ); |
11279 | Ops.push_back(Elt: convertToScalableVector(VT: ContainerVT, V, DAG, Subtarget)); |
11280 | } |
11281 | |
11282 | if (!VT.isFixedLengthVector()) |
11283 | return DAG.getNode(Opcode: RISCVISDOpc, DL, VT, Ops, Flags: Op->getFlags()); |
11284 | |
11285 | SDValue VPOp = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: ContainerVT, Ops, Flags: Op->getFlags()); |
11286 | |
11287 | return convertFromScalableVector(VT, V: VPOp, DAG, Subtarget); |
11288 | } |
11289 | |
11290 | SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op, |
11291 | SelectionDAG &DAG) const { |
11292 | SDLoc DL(Op); |
11293 | MVT VT = Op.getSimpleValueType(); |
11294 | |
11295 | SDValue Src = Op.getOperand(i: 0); |
11296 | // NOTE: Mask is dropped. |
11297 | SDValue VL = Op.getOperand(i: 2); |
11298 | |
11299 | MVT ContainerVT = VT; |
11300 | if (VT.isFixedLengthVector()) { |
11301 | ContainerVT = getContainerForFixedLengthVector(VT); |
11302 | MVT SrcVT = MVT::getVectorVT(VT: MVT::i1, EC: ContainerVT.getVectorElementCount()); |
11303 | Src = convertToScalableVector(VT: SrcVT, V: Src, DAG, Subtarget); |
11304 | } |
11305 | |
11306 | MVT XLenVT = Subtarget.getXLenVT(); |
11307 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: XLenVT); |
11308 | SDValue ZeroSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
11309 | N1: DAG.getUNDEF(VT: ContainerVT), N2: Zero, N3: VL); |
11310 | |
11311 | SDValue SplatValue = DAG.getConstant( |
11312 | Val: Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, VT: XLenVT); |
11313 | SDValue Splat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
11314 | N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatValue, N3: VL); |
11315 | |
11316 | SDValue Result = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: Src, N2: Splat, |
11317 | N3: ZeroSplat, N4: DAG.getUNDEF(VT: ContainerVT), N5: VL); |
11318 | if (!VT.isFixedLengthVector()) |
11319 | return Result; |
11320 | return convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
11321 | } |
11322 | |
11323 | SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op, |
11324 | SelectionDAG &DAG) const { |
11325 | SDLoc DL(Op); |
11326 | MVT VT = Op.getSimpleValueType(); |
11327 | |
11328 | SDValue Op1 = Op.getOperand(i: 0); |
11329 | SDValue Op2 = Op.getOperand(i: 1); |
11330 | ISD::CondCode Condition = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get(); |
11331 | // NOTE: Mask is dropped. |
11332 | SDValue VL = Op.getOperand(i: 4); |
11333 | |
11334 | MVT ContainerVT = VT; |
11335 | if (VT.isFixedLengthVector()) { |
11336 | ContainerVT = getContainerForFixedLengthVector(VT); |
11337 | Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget); |
11338 | Op2 = convertToScalableVector(VT: ContainerVT, V: Op2, DAG, Subtarget); |
11339 | } |
11340 | |
11341 | SDValue Result; |
11342 | SDValue AllOneMask = DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: ContainerVT, Operand: VL); |
11343 | |
11344 | switch (Condition) { |
11345 | default: |
11346 | break; |
11347 | // X != Y --> (X^Y) |
11348 | case ISD::SETNE: |
11349 | Result = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: Op2, N3: VL); |
11350 | break; |
11351 | // X == Y --> ~(X^Y) |
11352 | case ISD::SETEQ: { |
11353 | SDValue Temp = |
11354 | DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: Op2, N3: VL); |
11355 | Result = |
11356 | DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Temp, N2: AllOneMask, N3: VL); |
11357 | break; |
11358 | } |
11359 | // X >s Y --> X == 0 & Y == 1 --> ~X & Y |
11360 | // X <u Y --> X == 0 & Y == 1 --> ~X & Y |
11361 | case ISD::SETGT: |
11362 | case ISD::SETULT: { |
11363 | SDValue Temp = |
11364 | DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: AllOneMask, N3: VL); |
11365 | Result = DAG.getNode(Opcode: RISCVISD::VMAND_VL, DL, VT: ContainerVT, N1: Temp, N2: Op2, N3: VL); |
11366 | break; |
11367 | } |
11368 | // X <s Y --> X == 1 & Y == 0 --> ~Y & X |
11369 | // X >u Y --> X == 1 & Y == 0 --> ~Y & X |
11370 | case ISD::SETLT: |
11371 | case ISD::SETUGT: { |
11372 | SDValue Temp = |
11373 | DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op2, N2: AllOneMask, N3: VL); |
11374 | Result = DAG.getNode(Opcode: RISCVISD::VMAND_VL, DL, VT: ContainerVT, N1: Op1, N2: Temp, N3: VL); |
11375 | break; |
11376 | } |
11377 | // X >=s Y --> X == 0 | Y == 1 --> ~X | Y |
11378 | // X <=u Y --> X == 0 | Y == 1 --> ~X | Y |
11379 | case ISD::SETGE: |
11380 | case ISD::SETULE: { |
11381 | SDValue Temp = |
11382 | DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: AllOneMask, N3: VL); |
11383 | Result = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Temp, N2: Op2, N3: VL); |
11384 | break; |
11385 | } |
11386 | // X <=s Y --> X == 1 | Y == 0 --> ~Y | X |
11387 | // X >=u Y --> X == 1 | Y == 0 --> ~Y | X |
11388 | case ISD::SETLE: |
11389 | case ISD::SETUGE: { |
11390 | SDValue Temp = |
11391 | DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op2, N2: AllOneMask, N3: VL); |
11392 | Result = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Temp, N2: Op1, N3: VL); |
11393 | break; |
11394 | } |
11395 | } |
11396 | |
11397 | if (!VT.isFixedLengthVector()) |
11398 | return Result; |
11399 | return convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
11400 | } |
11401 | |
11402 | // Lower Floating-Point/Integer Type-Convert VP SDNodes |
11403 | SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, |
11404 | SelectionDAG &DAG) const { |
11405 | SDLoc DL(Op); |
11406 | |
11407 | SDValue Src = Op.getOperand(i: 0); |
11408 | SDValue Mask = Op.getOperand(i: 1); |
11409 | SDValue VL = Op.getOperand(i: 2); |
11410 | unsigned RISCVISDOpc = getRISCVVLOp(Op); |
11411 | |
11412 | MVT DstVT = Op.getSimpleValueType(); |
11413 | MVT SrcVT = Src.getSimpleValueType(); |
11414 | if (DstVT.isFixedLengthVector()) { |
11415 | DstVT = getContainerForFixedLengthVector(VT: DstVT); |
11416 | SrcVT = getContainerForFixedLengthVector(VT: SrcVT); |
11417 | Src = convertToScalableVector(VT: SrcVT, V: Src, DAG, Subtarget); |
11418 | MVT MaskVT = getMaskTypeFor(VecVT: DstVT); |
11419 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
11420 | } |
11421 | |
11422 | unsigned DstEltSize = DstVT.getScalarSizeInBits(); |
11423 | unsigned SrcEltSize = SrcVT.getScalarSizeInBits(); |
11424 | |
11425 | SDValue Result; |
11426 | if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion. |
11427 | if (SrcVT.isInteger()) { |
11428 | assert(DstVT.isFloatingPoint() && "Wrong input/output vector types" ); |
11429 | |
11430 | unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL |
11431 | ? RISCVISD::VSEXT_VL |
11432 | : RISCVISD::VZEXT_VL; |
11433 | |
11434 | // Do we need to do any pre-widening before converting? |
11435 | if (SrcEltSize == 1) { |
11436 | MVT IntVT = DstVT.changeVectorElementTypeToInteger(); |
11437 | MVT XLenVT = Subtarget.getXLenVT(); |
11438 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: XLenVT); |
11439 | SDValue ZeroSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IntVT, |
11440 | N1: DAG.getUNDEF(VT: IntVT), N2: Zero, N3: VL); |
11441 | SDValue One = DAG.getConstant( |
11442 | Val: RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, VT: XLenVT); |
11443 | SDValue OneSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IntVT, |
11444 | N1: DAG.getUNDEF(VT: IntVT), N2: One, N3: VL); |
11445 | Src = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: IntVT, N1: Src, N2: OneSplat, |
11446 | N3: ZeroSplat, N4: DAG.getUNDEF(VT: IntVT), N5: VL); |
11447 | } else if (DstEltSize > (2 * SrcEltSize)) { |
11448 | // Widen before converting. |
11449 | MVT IntVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: DstEltSize / 2), |
11450 | EC: DstVT.getVectorElementCount()); |
11451 | Src = DAG.getNode(Opcode: RISCVISDExtOpc, DL, VT: IntVT, N1: Src, N2: Mask, N3: VL); |
11452 | } |
11453 | |
11454 | Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: DstVT, N1: Src, N2: Mask, N3: VL); |
11455 | } else { |
11456 | assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && |
11457 | "Wrong input/output vector types" ); |
11458 | |
11459 | // Convert f16 to f32 then convert f32 to i64. |
11460 | if (DstEltSize > (2 * SrcEltSize)) { |
11461 | assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!" ); |
11462 | MVT InterimFVT = |
11463 | MVT::getVectorVT(VT: MVT::f32, EC: DstVT.getVectorElementCount()); |
11464 | Src = |
11465 | DAG.getNode(Opcode: RISCVISD::FP_EXTEND_VL, DL, VT: InterimFVT, N1: Src, N2: Mask, N3: VL); |
11466 | } |
11467 | |
11468 | Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: DstVT, N1: Src, N2: Mask, N3: VL); |
11469 | } |
11470 | } else { // Narrowing + Conversion |
11471 | if (SrcVT.isInteger()) { |
11472 | assert(DstVT.isFloatingPoint() && "Wrong input/output vector types" ); |
11473 | // First do a narrowing convert to an FP type half the size, then round |
11474 | // the FP type to a small FP type if needed. |
11475 | |
11476 | MVT InterimFVT = DstVT; |
11477 | if (SrcEltSize > (2 * DstEltSize)) { |
11478 | assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!" ); |
11479 | assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!" ); |
11480 | InterimFVT = MVT::getVectorVT(VT: MVT::f32, EC: DstVT.getVectorElementCount()); |
11481 | } |
11482 | |
11483 | Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: InterimFVT, N1: Src, N2: Mask, N3: VL); |
11484 | |
11485 | if (InterimFVT != DstVT) { |
11486 | Src = Result; |
11487 | Result = DAG.getNode(Opcode: RISCVISD::FP_ROUND_VL, DL, VT: DstVT, N1: Src, N2: Mask, N3: VL); |
11488 | } |
11489 | } else { |
11490 | assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && |
11491 | "Wrong input/output vector types" ); |
11492 | // First do a narrowing conversion to an integer half the size, then |
11493 | // truncate if needed. |
11494 | |
11495 | if (DstEltSize == 1) { |
11496 | // First convert to the same size integer, then convert to mask using |
11497 | // setcc. |
11498 | assert(SrcEltSize >= 16 && "Unexpected FP type!" ); |
11499 | MVT InterimIVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize), |
11500 | EC: DstVT.getVectorElementCount()); |
11501 | Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: InterimIVT, N1: Src, N2: Mask, N3: VL); |
11502 | |
11503 | // Compare the integer result to 0. The integer should be 0 or 1/-1, |
11504 | // otherwise the conversion was undefined. |
11505 | MVT XLenVT = Subtarget.getXLenVT(); |
11506 | SDValue SplatZero = DAG.getConstant(Val: 0, DL, VT: XLenVT); |
11507 | SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: InterimIVT, |
11508 | N1: DAG.getUNDEF(VT: InterimIVT), N2: SplatZero, N3: VL); |
11509 | Result = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: DstVT, |
11510 | Ops: {Result, SplatZero, DAG.getCondCode(Cond: ISD::SETNE), |
11511 | DAG.getUNDEF(VT: DstVT), Mask, VL}); |
11512 | } else { |
11513 | MVT InterimIVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize / 2), |
11514 | EC: DstVT.getVectorElementCount()); |
11515 | |
11516 | Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: InterimIVT, N1: Src, N2: Mask, N3: VL); |
11517 | |
11518 | while (InterimIVT != DstVT) { |
11519 | SrcEltSize /= 2; |
11520 | Src = Result; |
11521 | InterimIVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize / 2), |
11522 | EC: DstVT.getVectorElementCount()); |
11523 | Result = DAG.getNode(Opcode: RISCVISD::TRUNCATE_VECTOR_VL, DL, VT: InterimIVT, |
11524 | N1: Src, N2: Mask, N3: VL); |
11525 | } |
11526 | } |
11527 | } |
11528 | } |
11529 | |
11530 | MVT VT = Op.getSimpleValueType(); |
11531 | if (!VT.isFixedLengthVector()) |
11532 | return Result; |
11533 | return convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
11534 | } |
11535 | |
11536 | SDValue |
11537 | RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op, |
11538 | SelectionDAG &DAG) const { |
11539 | SDLoc DL(Op); |
11540 | |
11541 | SDValue Op1 = Op.getOperand(i: 0); |
11542 | SDValue Op2 = Op.getOperand(i: 1); |
11543 | SDValue Offset = Op.getOperand(i: 2); |
11544 | SDValue Mask = Op.getOperand(i: 3); |
11545 | SDValue EVL1 = Op.getOperand(i: 4); |
11546 | SDValue EVL2 = Op.getOperand(i: 5); |
11547 | |
11548 | const MVT XLenVT = Subtarget.getXLenVT(); |
11549 | MVT VT = Op.getSimpleValueType(); |
11550 | MVT ContainerVT = VT; |
11551 | if (VT.isFixedLengthVector()) { |
11552 | ContainerVT = getContainerForFixedLengthVector(VT); |
11553 | Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget); |
11554 | Op2 = convertToScalableVector(VT: ContainerVT, V: Op2, DAG, Subtarget); |
11555 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
11556 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
11557 | } |
11558 | |
11559 | // EVL1 may need to be extended to XLenVT with RV64LegalI32. |
11560 | EVL1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: XLenVT, Operand: EVL1); |
11561 | |
11562 | bool IsMaskVector = VT.getVectorElementType() == MVT::i1; |
11563 | if (IsMaskVector) { |
11564 | ContainerVT = ContainerVT.changeVectorElementType(EltVT: MVT::i8); |
11565 | |
11566 | // Expand input operands |
11567 | SDValue SplatOneOp1 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
11568 | N1: DAG.getUNDEF(VT: ContainerVT), |
11569 | N2: DAG.getConstant(Val: 1, DL, VT: XLenVT), N3: EVL1); |
11570 | SDValue SplatZeroOp1 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
11571 | N1: DAG.getUNDEF(VT: ContainerVT), |
11572 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT), N3: EVL1); |
11573 | Op1 = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: Op1, N2: SplatOneOp1, |
11574 | N3: SplatZeroOp1, N4: DAG.getUNDEF(VT: ContainerVT), N5: EVL1); |
11575 | |
11576 | SDValue SplatOneOp2 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
11577 | N1: DAG.getUNDEF(VT: ContainerVT), |
11578 | N2: DAG.getConstant(Val: 1, DL, VT: XLenVT), N3: EVL2); |
11579 | SDValue SplatZeroOp2 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
11580 | N1: DAG.getUNDEF(VT: ContainerVT), |
11581 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT), N3: EVL2); |
11582 | Op2 = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: Op2, N2: SplatOneOp2, |
11583 | N3: SplatZeroOp2, N4: DAG.getUNDEF(VT: ContainerVT), N5: EVL2); |
11584 | } |
11585 | |
11586 | int64_t ImmValue = cast<ConstantSDNode>(Val&: Offset)->getSExtValue(); |
11587 | SDValue DownOffset, UpOffset; |
11588 | if (ImmValue >= 0) { |
11589 | // The operand is a TargetConstant, we need to rebuild it as a regular |
11590 | // constant. |
11591 | DownOffset = DAG.getConstant(Val: ImmValue, DL, VT: XLenVT); |
11592 | UpOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: EVL1, N2: DownOffset); |
11593 | } else { |
11594 | // The operand is a TargetConstant, we need to rebuild it as a regular |
11595 | // constant rather than negating the original operand. |
11596 | UpOffset = DAG.getConstant(Val: -ImmValue, DL, VT: XLenVT); |
11597 | DownOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: EVL1, N2: UpOffset); |
11598 | } |
11599 | |
11600 | SDValue SlideDown = |
11601 | getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Merge: DAG.getUNDEF(VT: ContainerVT), |
11602 | Op: Op1, Offset: DownOffset, Mask, VL: UpOffset); |
11603 | SDValue Result = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Merge: SlideDown, Op: Op2, |
11604 | Offset: UpOffset, Mask, VL: EVL2, Policy: RISCVII::TAIL_AGNOSTIC); |
11605 | |
11606 | if (IsMaskVector) { |
11607 | // Truncate Result back to a mask vector (Result has same EVL as Op2) |
11608 | Result = DAG.getNode( |
11609 | Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT.changeVectorElementType(EltVT: MVT::i1), |
11610 | Ops: {Result, DAG.getConstant(Val: 0, DL, VT: ContainerVT), |
11611 | DAG.getCondCode(Cond: ISD::SETNE), DAG.getUNDEF(VT: getMaskTypeFor(VecVT: ContainerVT)), |
11612 | Mask, EVL2}); |
11613 | } |
11614 | |
11615 | if (!VT.isFixedLengthVector()) |
11616 | return Result; |
11617 | return convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
11618 | } |
11619 | |
11620 | SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op, |
11621 | SelectionDAG &DAG) const { |
11622 | SDLoc DL(Op); |
11623 | SDValue Val = Op.getOperand(i: 0); |
11624 | SDValue Mask = Op.getOperand(i: 1); |
11625 | SDValue VL = Op.getOperand(i: 2); |
11626 | MVT VT = Op.getSimpleValueType(); |
11627 | |
11628 | MVT ContainerVT = VT; |
11629 | if (VT.isFixedLengthVector()) { |
11630 | ContainerVT = getContainerForFixedLengthVector(VT); |
11631 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
11632 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
11633 | } |
11634 | |
11635 | SDValue Result = |
11636 | lowerScalarSplat(Passthru: SDValue(), Scalar: Val, VL, VT: ContainerVT, DL, DAG, Subtarget); |
11637 | |
11638 | if (!VT.isFixedLengthVector()) |
11639 | return Result; |
11640 | return convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
11641 | } |
11642 | |
11643 | SDValue |
11644 | RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op, |
11645 | SelectionDAG &DAG) const { |
11646 | SDLoc DL(Op); |
11647 | MVT VT = Op.getSimpleValueType(); |
11648 | MVT XLenVT = Subtarget.getXLenVT(); |
11649 | |
11650 | SDValue Op1 = Op.getOperand(i: 0); |
11651 | SDValue Mask = Op.getOperand(i: 1); |
11652 | SDValue EVL = Op.getOperand(i: 2); |
11653 | |
11654 | MVT ContainerVT = VT; |
11655 | if (VT.isFixedLengthVector()) { |
11656 | ContainerVT = getContainerForFixedLengthVector(VT); |
11657 | Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget); |
11658 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
11659 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
11660 | } |
11661 | |
11662 | MVT GatherVT = ContainerVT; |
11663 | MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger(); |
11664 | // Check if we are working with mask vectors |
11665 | bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1; |
11666 | if (IsMaskVector) { |
11667 | GatherVT = IndicesVT = ContainerVT.changeVectorElementType(EltVT: MVT::i8); |
11668 | |
11669 | // Expand input operand |
11670 | SDValue SplatOne = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IndicesVT, |
11671 | N1: DAG.getUNDEF(VT: IndicesVT), |
11672 | N2: DAG.getConstant(Val: 1, DL, VT: XLenVT), N3: EVL); |
11673 | SDValue SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IndicesVT, |
11674 | N1: DAG.getUNDEF(VT: IndicesVT), |
11675 | N2: DAG.getConstant(Val: 0, DL, VT: XLenVT), N3: EVL); |
11676 | Op1 = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: IndicesVT, N1: Op1, N2: SplatOne, |
11677 | N3: SplatZero, N4: DAG.getUNDEF(VT: IndicesVT), N5: EVL); |
11678 | } |
11679 | |
11680 | unsigned EltSize = GatherVT.getScalarSizeInBits(); |
11681 | unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue(); |
11682 | unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); |
11683 | unsigned MaxVLMAX = |
11684 | RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMax, EltSize, MinSize); |
11685 | |
11686 | unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; |
11687 | // If this is SEW=8 and VLMAX is unknown or more than 256, we need |
11688 | // to use vrgatherei16.vv. |
11689 | // TODO: It's also possible to use vrgatherei16.vv for other types to |
11690 | // decrease register width for the index calculation. |
11691 | // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. |
11692 | if (MaxVLMAX > 256 && EltSize == 8) { |
11693 | // If this is LMUL=8, we have to split before using vrgatherei16.vv. |
11694 | // Split the vector in half and reverse each half using a full register |
11695 | // reverse. |
11696 | // Swap the halves and concatenate them. |
11697 | // Slide the concatenated result by (VLMax - VL). |
11698 | if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { |
11699 | auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: GatherVT); |
11700 | auto [Lo, Hi] = DAG.SplitVector(N: Op1, DL); |
11701 | |
11702 | SDValue LoRev = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: LoVT, Operand: Lo); |
11703 | SDValue HiRev = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: HiVT, Operand: Hi); |
11704 | |
11705 | // Reassemble the low and high pieces reversed. |
11706 | // NOTE: this Result is unmasked (because we do not need masks for |
11707 | // shuffles). If in the future this has to change, we can use a SELECT_VL |
11708 | // between Result and UNDEF using the mask originally passed to VP_REVERSE |
11709 | SDValue Result = |
11710 | DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: GatherVT, N1: HiRev, N2: LoRev); |
11711 | |
11712 | // Slide off any elements from past EVL that were reversed into the low |
11713 | // elements. |
11714 | unsigned MinElts = GatherVT.getVectorMinNumElements(); |
11715 | SDValue VLMax = |
11716 | DAG.getVScale(DL, VT: XLenVT, MulImm: APInt(XLenVT.getSizeInBits(), MinElts)); |
11717 | SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: VLMax, N2: EVL); |
11718 | |
11719 | Result = getVSlidedown(DAG, Subtarget, DL, VT: GatherVT, |
11720 | Merge: DAG.getUNDEF(VT: GatherVT), Op: Result, Offset: Diff, Mask, VL: EVL); |
11721 | |
11722 | if (IsMaskVector) { |
11723 | // Truncate Result back to a mask vector |
11724 | Result = |
11725 | DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT, |
11726 | Ops: {Result, DAG.getConstant(Val: 0, DL, VT: GatherVT), |
11727 | DAG.getCondCode(Cond: ISD::SETNE), |
11728 | DAG.getUNDEF(VT: getMaskTypeFor(VecVT: ContainerVT)), Mask, EVL}); |
11729 | } |
11730 | |
11731 | if (!VT.isFixedLengthVector()) |
11732 | return Result; |
11733 | return convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
11734 | } |
11735 | |
11736 | // Just promote the int type to i16 which will double the LMUL. |
11737 | IndicesVT = MVT::getVectorVT(VT: MVT::i16, EC: IndicesVT.getVectorElementCount()); |
11738 | GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; |
11739 | } |
11740 | |
11741 | SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT: IndicesVT, N1: Mask, N2: EVL); |
11742 | SDValue VecLen = |
11743 | DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: EVL, N2: DAG.getConstant(Val: 1, DL, VT: XLenVT)); |
11744 | SDValue VecLenSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IndicesVT, |
11745 | N1: DAG.getUNDEF(VT: IndicesVT), N2: VecLen, N3: EVL); |
11746 | SDValue VRSUB = DAG.getNode(Opcode: RISCVISD::SUB_VL, DL, VT: IndicesVT, N1: VecLenSplat, N2: VID, |
11747 | N3: DAG.getUNDEF(VT: IndicesVT), N4: Mask, N5: EVL); |
11748 | SDValue Result = DAG.getNode(Opcode: GatherOpc, DL, VT: GatherVT, N1: Op1, N2: VRSUB, |
11749 | N3: DAG.getUNDEF(VT: GatherVT), N4: Mask, N5: EVL); |
11750 | |
11751 | if (IsMaskVector) { |
11752 | // Truncate Result back to a mask vector |
11753 | Result = DAG.getNode( |
11754 | Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT, |
11755 | Ops: {Result, DAG.getConstant(Val: 0, DL, VT: GatherVT), DAG.getCondCode(Cond: ISD::SETNE), |
11756 | DAG.getUNDEF(VT: getMaskTypeFor(VecVT: ContainerVT)), Mask, EVL}); |
11757 | } |
11758 | |
11759 | if (!VT.isFixedLengthVector()) |
11760 | return Result; |
11761 | return convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
11762 | } |
11763 | |
11764 | SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, |
11765 | SelectionDAG &DAG) const { |
11766 | MVT VT = Op.getSimpleValueType(); |
11767 | if (VT.getVectorElementType() != MVT::i1) |
11768 | return lowerVPOp(Op, DAG); |
11769 | |
11770 | // It is safe to drop mask parameter as masked-off elements are undef. |
11771 | SDValue Op1 = Op->getOperand(Num: 0); |
11772 | SDValue Op2 = Op->getOperand(Num: 1); |
11773 | SDValue VL = Op->getOperand(Num: 3); |
11774 | |
11775 | MVT ContainerVT = VT; |
11776 | const bool IsFixed = VT.isFixedLengthVector(); |
11777 | if (IsFixed) { |
11778 | ContainerVT = getContainerForFixedLengthVector(VT); |
11779 | Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget); |
11780 | Op2 = convertToScalableVector(VT: ContainerVT, V: Op2, DAG, Subtarget); |
11781 | } |
11782 | |
11783 | SDLoc DL(Op); |
11784 | SDValue Val = DAG.getNode(Opcode: getRISCVVLOp(Op), DL, VT: ContainerVT, N1: Op1, N2: Op2, N3: VL); |
11785 | if (!IsFixed) |
11786 | return Val; |
11787 | return convertFromScalableVector(VT, V: Val, DAG, Subtarget); |
11788 | } |
11789 | |
11790 | SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op, |
11791 | SelectionDAG &DAG) const { |
11792 | SDLoc DL(Op); |
11793 | MVT XLenVT = Subtarget.getXLenVT(); |
11794 | MVT VT = Op.getSimpleValueType(); |
11795 | MVT ContainerVT = VT; |
11796 | if (VT.isFixedLengthVector()) |
11797 | ContainerVT = getContainerForFixedLengthVector(VT); |
11798 | |
11799 | SDVTList VTs = DAG.getVTList(VTs: {ContainerVT, MVT::Other}); |
11800 | |
11801 | auto *VPNode = cast<VPStridedLoadSDNode>(Val&: Op); |
11802 | // Check if the mask is known to be all ones |
11803 | SDValue Mask = VPNode->getMask(); |
11804 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()); |
11805 | |
11806 | SDValue IntID = DAG.getTargetConstant(Val: IsUnmasked ? Intrinsic::riscv_vlse |
11807 | : Intrinsic::riscv_vlse_mask, |
11808 | DL, VT: XLenVT); |
11809 | SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, |
11810 | DAG.getUNDEF(VT: ContainerVT), VPNode->getBasePtr(), |
11811 | VPNode->getStride()}; |
11812 | if (!IsUnmasked) { |
11813 | if (VT.isFixedLengthVector()) { |
11814 | MVT MaskVT = ContainerVT.changeVectorElementType(EltVT: MVT::i1); |
11815 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
11816 | } |
11817 | Ops.push_back(Elt: Mask); |
11818 | } |
11819 | Ops.push_back(Elt: VPNode->getVectorLength()); |
11820 | if (!IsUnmasked) { |
11821 | SDValue Policy = DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT); |
11822 | Ops.push_back(Elt: Policy); |
11823 | } |
11824 | |
11825 | SDValue Result = |
11826 | DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, |
11827 | MemVT: VPNode->getMemoryVT(), MMO: VPNode->getMemOperand()); |
11828 | SDValue Chain = Result.getValue(R: 1); |
11829 | |
11830 | if (VT.isFixedLengthVector()) |
11831 | Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
11832 | |
11833 | return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL); |
11834 | } |
11835 | |
11836 | SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op, |
11837 | SelectionDAG &DAG) const { |
11838 | SDLoc DL(Op); |
11839 | MVT XLenVT = Subtarget.getXLenVT(); |
11840 | |
11841 | auto *VPNode = cast<VPStridedStoreSDNode>(Val&: Op); |
11842 | SDValue StoreVal = VPNode->getValue(); |
11843 | MVT VT = StoreVal.getSimpleValueType(); |
11844 | MVT ContainerVT = VT; |
11845 | if (VT.isFixedLengthVector()) { |
11846 | ContainerVT = getContainerForFixedLengthVector(VT); |
11847 | StoreVal = convertToScalableVector(VT: ContainerVT, V: StoreVal, DAG, Subtarget); |
11848 | } |
11849 | |
11850 | // Check if the mask is known to be all ones |
11851 | SDValue Mask = VPNode->getMask(); |
11852 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()); |
11853 | |
11854 | SDValue IntID = DAG.getTargetConstant(Val: IsUnmasked ? Intrinsic::riscv_vsse |
11855 | : Intrinsic::riscv_vsse_mask, |
11856 | DL, VT: XLenVT); |
11857 | SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal, |
11858 | VPNode->getBasePtr(), VPNode->getStride()}; |
11859 | if (!IsUnmasked) { |
11860 | if (VT.isFixedLengthVector()) { |
11861 | MVT MaskVT = ContainerVT.changeVectorElementType(EltVT: MVT::i1); |
11862 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
11863 | } |
11864 | Ops.push_back(Elt: Mask); |
11865 | } |
11866 | Ops.push_back(Elt: VPNode->getVectorLength()); |
11867 | |
11868 | return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl: DL, VTList: VPNode->getVTList(), |
11869 | Ops, MemVT: VPNode->getMemoryVT(), |
11870 | MMO: VPNode->getMemOperand()); |
11871 | } |
11872 | |
11873 | // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be |
11874 | // matched to a RVV indexed load. The RVV indexed load instructions only |
11875 | // support the "unsigned unscaled" addressing mode; indices are implicitly |
11876 | // zero-extended or truncated to XLEN and are treated as byte offsets. Any |
11877 | // signed or scaled indexing is extended to the XLEN value type and scaled |
11878 | // accordingly. |
11879 | SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op, |
11880 | SelectionDAG &DAG) const { |
11881 | SDLoc DL(Op); |
11882 | MVT VT = Op.getSimpleValueType(); |
11883 | |
11884 | const auto *MemSD = cast<MemSDNode>(Val: Op.getNode()); |
11885 | EVT MemVT = MemSD->getMemoryVT(); |
11886 | MachineMemOperand *MMO = MemSD->getMemOperand(); |
11887 | SDValue Chain = MemSD->getChain(); |
11888 | SDValue BasePtr = MemSD->getBasePtr(); |
11889 | |
11890 | [[maybe_unused]] ISD::LoadExtType LoadExtType; |
11891 | SDValue Index, Mask, PassThru, VL; |
11892 | |
11893 | if (auto *VPGN = dyn_cast<VPGatherSDNode>(Val: Op.getNode())) { |
11894 | Index = VPGN->getIndex(); |
11895 | Mask = VPGN->getMask(); |
11896 | PassThru = DAG.getUNDEF(VT); |
11897 | VL = VPGN->getVectorLength(); |
11898 | // VP doesn't support extending loads. |
11899 | LoadExtType = ISD::NON_EXTLOAD; |
11900 | } else { |
11901 | // Else it must be a MGATHER. |
11902 | auto *MGN = cast<MaskedGatherSDNode>(Val: Op.getNode()); |
11903 | Index = MGN->getIndex(); |
11904 | Mask = MGN->getMask(); |
11905 | PassThru = MGN->getPassThru(); |
11906 | LoadExtType = MGN->getExtensionType(); |
11907 | } |
11908 | |
11909 | MVT IndexVT = Index.getSimpleValueType(); |
11910 | MVT XLenVT = Subtarget.getXLenVT(); |
11911 | |
11912 | assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && |
11913 | "Unexpected VTs!" ); |
11914 | assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type" ); |
11915 | // Targets have to explicitly opt-in for extending vector loads. |
11916 | assert(LoadExtType == ISD::NON_EXTLOAD && |
11917 | "Unexpected extending MGATHER/VP_GATHER" ); |
11918 | |
11919 | // If the mask is known to be all ones, optimize to an unmasked intrinsic; |
11920 | // the selection of the masked intrinsics doesn't do this for us. |
11921 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()); |
11922 | |
11923 | MVT ContainerVT = VT; |
11924 | if (VT.isFixedLengthVector()) { |
11925 | ContainerVT = getContainerForFixedLengthVector(VT); |
11926 | IndexVT = MVT::getVectorVT(VT: IndexVT.getVectorElementType(), |
11927 | EC: ContainerVT.getVectorElementCount()); |
11928 | |
11929 | Index = convertToScalableVector(VT: IndexVT, V: Index, DAG, Subtarget); |
11930 | |
11931 | if (!IsUnmasked) { |
11932 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
11933 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
11934 | PassThru = convertToScalableVector(VT: ContainerVT, V: PassThru, DAG, Subtarget); |
11935 | } |
11936 | } |
11937 | |
11938 | if (!VL) |
11939 | VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second; |
11940 | |
11941 | if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(VT: XLenVT)) { |
11942 | IndexVT = IndexVT.changeVectorElementType(EltVT: XLenVT); |
11943 | Index = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: IndexVT, Operand: Index); |
11944 | } |
11945 | |
11946 | unsigned IntID = |
11947 | IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask; |
11948 | SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)}; |
11949 | if (IsUnmasked) |
11950 | Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT)); |
11951 | else |
11952 | Ops.push_back(Elt: PassThru); |
11953 | Ops.push_back(Elt: BasePtr); |
11954 | Ops.push_back(Elt: Index); |
11955 | if (!IsUnmasked) |
11956 | Ops.push_back(Elt: Mask); |
11957 | Ops.push_back(Elt: VL); |
11958 | if (!IsUnmasked) |
11959 | Ops.push_back(Elt: DAG.getTargetConstant(Val: RISCVII::TAIL_AGNOSTIC, DL, VT: XLenVT)); |
11960 | |
11961 | SDVTList VTs = DAG.getVTList(VTs: {ContainerVT, MVT::Other}); |
11962 | SDValue Result = |
11963 | DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, MemVT, MMO); |
11964 | Chain = Result.getValue(R: 1); |
11965 | |
11966 | if (VT.isFixedLengthVector()) |
11967 | Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget); |
11968 | |
11969 | return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL); |
11970 | } |
11971 | |
11972 | // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be |
11973 | // matched to a RVV indexed store. The RVV indexed store instructions only |
11974 | // support the "unsigned unscaled" addressing mode; indices are implicitly |
11975 | // zero-extended or truncated to XLEN and are treated as byte offsets. Any |
11976 | // signed or scaled indexing is extended to the XLEN value type and scaled |
11977 | // accordingly. |
11978 | SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op, |
11979 | SelectionDAG &DAG) const { |
11980 | SDLoc DL(Op); |
11981 | const auto *MemSD = cast<MemSDNode>(Val: Op.getNode()); |
11982 | EVT MemVT = MemSD->getMemoryVT(); |
11983 | MachineMemOperand *MMO = MemSD->getMemOperand(); |
11984 | SDValue Chain = MemSD->getChain(); |
11985 | SDValue BasePtr = MemSD->getBasePtr(); |
11986 | |
11987 | [[maybe_unused]] bool IsTruncatingStore = false; |
11988 | SDValue Index, Mask, Val, VL; |
11989 | |
11990 | if (auto *VPSN = dyn_cast<VPScatterSDNode>(Val: Op.getNode())) { |
11991 | Index = VPSN->getIndex(); |
11992 | Mask = VPSN->getMask(); |
11993 | Val = VPSN->getValue(); |
11994 | VL = VPSN->getVectorLength(); |
11995 | // VP doesn't support truncating stores. |
11996 | IsTruncatingStore = false; |
11997 | } else { |
11998 | // Else it must be a MSCATTER. |
11999 | auto *MSN = cast<MaskedScatterSDNode>(Val: Op.getNode()); |
12000 | Index = MSN->getIndex(); |
12001 | Mask = MSN->getMask(); |
12002 | Val = MSN->getValue(); |
12003 | IsTruncatingStore = MSN->isTruncatingStore(); |
12004 | } |
12005 | |
12006 | MVT VT = Val.getSimpleValueType(); |
12007 | MVT IndexVT = Index.getSimpleValueType(); |
12008 | MVT XLenVT = Subtarget.getXLenVT(); |
12009 | |
12010 | assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && |
12011 | "Unexpected VTs!" ); |
12012 | assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type" ); |
12013 | // Targets have to explicitly opt-in for extending vector loads and |
12014 | // truncating vector stores. |
12015 | assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER" ); |
12016 | |
12017 | // If the mask is known to be all ones, optimize to an unmasked intrinsic; |
12018 | // the selection of the masked intrinsics doesn't do this for us. |
12019 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()); |
12020 | |
12021 | MVT ContainerVT = VT; |
12022 | if (VT.isFixedLengthVector()) { |
12023 | ContainerVT = getContainerForFixedLengthVector(VT); |
12024 | IndexVT = MVT::getVectorVT(VT: IndexVT.getVectorElementType(), |
12025 | EC: ContainerVT.getVectorElementCount()); |
12026 | |
12027 | Index = convertToScalableVector(VT: IndexVT, V: Index, DAG, Subtarget); |
12028 | Val = convertToScalableVector(VT: ContainerVT, V: Val, DAG, Subtarget); |
12029 | |
12030 | if (!IsUnmasked) { |
12031 | MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT); |
12032 | Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget); |
12033 | } |
12034 | } |
12035 | |
12036 | if (!VL) |
12037 | VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second; |
12038 | |
12039 | if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(VT: XLenVT)) { |
12040 | IndexVT = IndexVT.changeVectorElementType(EltVT: XLenVT); |
12041 | Index = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: IndexVT, Operand: Index); |
12042 | } |
12043 | |
12044 | unsigned IntID = |
12045 | IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask; |
12046 | SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)}; |
12047 | Ops.push_back(Elt: Val); |
12048 | Ops.push_back(Elt: BasePtr); |
12049 | Ops.push_back(Elt: Index); |
12050 | if (!IsUnmasked) |
12051 | Ops.push_back(Elt: Mask); |
12052 | Ops.push_back(Elt: VL); |
12053 | |
12054 | return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl: DL, |
12055 | VTList: DAG.getVTList(VT: MVT::Other), Ops, MemVT, MMO); |
12056 | } |
12057 | |
12058 | SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op, |
12059 | SelectionDAG &DAG) const { |
12060 | const MVT XLenVT = Subtarget.getXLenVT(); |
12061 | SDLoc DL(Op); |
12062 | SDValue Chain = Op->getOperand(Num: 0); |
12063 | SDValue SysRegNo = DAG.getTargetConstant( |
12064 | Val: RISCVSysReg::lookupSysRegByName(Name: "FRM" )->Encoding, DL, VT: XLenVT); |
12065 | SDVTList VTs = DAG.getVTList(VT1: XLenVT, VT2: MVT::Other); |
12066 | SDValue RM = DAG.getNode(Opcode: RISCVISD::READ_CSR, DL, VTList: VTs, N1: Chain, N2: SysRegNo); |
12067 | |
12068 | // Encoding used for rounding mode in RISC-V differs from that used in |
12069 | // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a |
12070 | // table, which consists of a sequence of 4-bit fields, each representing |
12071 | // corresponding FLT_ROUNDS mode. |
12072 | static const int Table = |
12073 | (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) | |
12074 | (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) | |
12075 | (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) | |
12076 | (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) | |
12077 | (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM); |
12078 | |
12079 | SDValue Shift = |
12080 | DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: RM, N2: DAG.getConstant(Val: 2, DL, VT: XLenVT)); |
12081 | SDValue Shifted = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, |
12082 | N1: DAG.getConstant(Val: Table, DL, VT: XLenVT), N2: Shift); |
12083 | SDValue Masked = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: Shifted, |
12084 | N2: DAG.getConstant(Val: 7, DL, VT: XLenVT)); |
12085 | |
12086 | return DAG.getMergeValues(Ops: {Masked, Chain}, dl: DL); |
12087 | } |
12088 | |
12089 | SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op, |
12090 | SelectionDAG &DAG) const { |
12091 | const MVT XLenVT = Subtarget.getXLenVT(); |
12092 | SDLoc DL(Op); |
12093 | SDValue Chain = Op->getOperand(Num: 0); |
12094 | SDValue RMValue = Op->getOperand(Num: 1); |
12095 | SDValue SysRegNo = DAG.getTargetConstant( |
12096 | Val: RISCVSysReg::lookupSysRegByName(Name: "FRM" )->Encoding, DL, VT: XLenVT); |
12097 | |
12098 | // Encoding used for rounding mode in RISC-V differs from that used in |
12099 | // FLT_ROUNDS. To convert it the C rounding mode is used as an index in |
12100 | // a table, which consists of a sequence of 4-bit fields, each representing |
12101 | // corresponding RISC-V mode. |
12102 | static const unsigned Table = |
12103 | (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) | |
12104 | (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) | |
12105 | (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) | |
12106 | (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) | |
12107 | (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway)); |
12108 | |
12109 | RMValue = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: XLenVT, Operand: RMValue); |
12110 | |
12111 | SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: RMValue, |
12112 | N2: DAG.getConstant(Val: 2, DL, VT: XLenVT)); |
12113 | SDValue Shifted = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, |
12114 | N1: DAG.getConstant(Val: Table, DL, VT: XLenVT), N2: Shift); |
12115 | RMValue = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: Shifted, |
12116 | N2: DAG.getConstant(Val: 0x7, DL, VT: XLenVT)); |
12117 | return DAG.getNode(Opcode: RISCVISD::WRITE_CSR, DL, VT: MVT::Other, N1: Chain, N2: SysRegNo, |
12118 | N3: RMValue); |
12119 | } |
12120 | |
12121 | SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op, |
12122 | SelectionDAG &DAG) const { |
12123 | MachineFunction &MF = DAG.getMachineFunction(); |
12124 | |
12125 | bool isRISCV64 = Subtarget.is64Bit(); |
12126 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
12127 | |
12128 | int FI = MF.getFrameInfo().CreateFixedObject(Size: isRISCV64 ? 8 : 4, SPOffset: 0, IsImmutable: false); |
12129 | return DAG.getFrameIndex(FI, VT: PtrVT); |
12130 | } |
12131 | |
12132 | // Returns the opcode of the target-specific SDNode that implements the 32-bit |
12133 | // form of the given Opcode. |
12134 | static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { |
12135 | switch (Opcode) { |
12136 | default: |
12137 | llvm_unreachable("Unexpected opcode" ); |
12138 | case ISD::SHL: |
12139 | return RISCVISD::SLLW; |
12140 | case ISD::SRA: |
12141 | return RISCVISD::SRAW; |
12142 | case ISD::SRL: |
12143 | return RISCVISD::SRLW; |
12144 | case ISD::SDIV: |
12145 | return RISCVISD::DIVW; |
12146 | case ISD::UDIV: |
12147 | return RISCVISD::DIVUW; |
12148 | case ISD::UREM: |
12149 | return RISCVISD::REMUW; |
12150 | case ISD::ROTL: |
12151 | return RISCVISD::ROLW; |
12152 | case ISD::ROTR: |
12153 | return RISCVISD::RORW; |
12154 | } |
12155 | } |
12156 | |
12157 | // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG |
12158 | // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would |
12159 | // otherwise be promoted to i64, making it difficult to select the |
12160 | // SLLW/DIVUW/.../*W later one because the fact the operation was originally of |
12161 | // type i8/i16/i32 is lost. |
12162 | static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, |
12163 | unsigned ExtOpc = ISD::ANY_EXTEND) { |
12164 | SDLoc DL(N); |
12165 | RISCVISD::NodeType WOpcode = getRISCVWOpcode(Opcode: N->getOpcode()); |
12166 | SDValue NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
12167 | SDValue NewOp1 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
12168 | SDValue NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1); |
12169 | // ReplaceNodeResults requires we maintain the same type for the return value. |
12170 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: NewRes); |
12171 | } |
12172 | |
12173 | // Converts the given 32-bit operation to a i64 operation with signed extension |
12174 | // semantic to reduce the signed extension instructions. |
12175 | static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { |
12176 | SDLoc DL(N); |
12177 | SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
12178 | SDValue NewOp1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
12179 | SDValue NewWOp = DAG.getNode(Opcode: N->getOpcode(), DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1); |
12180 | SDValue NewRes = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: NewWOp, |
12181 | N2: DAG.getValueType(MVT::i32)); |
12182 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: NewRes); |
12183 | } |
12184 | |
12185 | void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, |
12186 | SmallVectorImpl<SDValue> &Results, |
12187 | SelectionDAG &DAG) const { |
12188 | SDLoc DL(N); |
12189 | switch (N->getOpcode()) { |
12190 | default: |
12191 | llvm_unreachable("Don't know how to custom type legalize this operation!" ); |
12192 | case ISD::STRICT_FP_TO_SINT: |
12193 | case ISD::STRICT_FP_TO_UINT: |
12194 | case ISD::FP_TO_SINT: |
12195 | case ISD::FP_TO_UINT: { |
12196 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12197 | "Unexpected custom legalisation" ); |
12198 | bool IsStrict = N->isStrictFPOpcode(); |
12199 | bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT || |
12200 | N->getOpcode() == ISD::STRICT_FP_TO_SINT; |
12201 | SDValue Op0 = IsStrict ? N->getOperand(Num: 1) : N->getOperand(Num: 0); |
12202 | if (getTypeAction(Context&: *DAG.getContext(), VT: Op0.getValueType()) != |
12203 | TargetLowering::TypeSoftenFloat) { |
12204 | if (!isTypeLegal(VT: Op0.getValueType())) |
12205 | return; |
12206 | if (IsStrict) { |
12207 | SDValue Chain = N->getOperand(Num: 0); |
12208 | // In absense of Zfh, promote f16 to f32, then convert. |
12209 | if (Op0.getValueType() == MVT::f16 && |
12210 | !Subtarget.hasStdExtZfhOrZhinx()) { |
12211 | Op0 = DAG.getNode(Opcode: ISD::STRICT_FP_EXTEND, DL, ResultTys: {MVT::f32, MVT::Other}, |
12212 | Ops: {Chain, Op0}); |
12213 | Chain = Op0.getValue(R: 1); |
12214 | } |
12215 | unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64 |
12216 | : RISCVISD::STRICT_FCVT_WU_RV64; |
12217 | SDVTList VTs = DAG.getVTList(VT1: MVT::i64, VT2: MVT::Other); |
12218 | SDValue Res = DAG.getNode( |
12219 | Opcode: Opc, DL, VTList: VTs, N1: Chain, N2: Op0, |
12220 | N3: DAG.getTargetConstant(Val: RISCVFPRndMode::RTZ, DL, VT: MVT::i64)); |
12221 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res)); |
12222 | Results.push_back(Elt: Res.getValue(R: 1)); |
12223 | return; |
12224 | } |
12225 | // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then |
12226 | // convert. |
12227 | if ((Op0.getValueType() == MVT::f16 && |
12228 | !Subtarget.hasStdExtZfhOrZhinx()) || |
12229 | Op0.getValueType() == MVT::bf16) |
12230 | Op0 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Op0); |
12231 | |
12232 | unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; |
12233 | SDValue Res = |
12234 | DAG.getNode(Opcode: Opc, DL, VT: MVT::i64, N1: Op0, |
12235 | N2: DAG.getTargetConstant(Val: RISCVFPRndMode::RTZ, DL, VT: MVT::i64)); |
12236 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res)); |
12237 | return; |
12238 | } |
12239 | // If the FP type needs to be softened, emit a library call using the 'si' |
12240 | // version. If we left it to default legalization we'd end up with 'di'. If |
12241 | // the FP type doesn't need to be softened just let generic type |
12242 | // legalization promote the result type. |
12243 | RTLIB::Libcall LC; |
12244 | if (IsSigned) |
12245 | LC = RTLIB::getFPTOSINT(OpVT: Op0.getValueType(), RetVT: N->getValueType(ResNo: 0)); |
12246 | else |
12247 | LC = RTLIB::getFPTOUINT(OpVT: Op0.getValueType(), RetVT: N->getValueType(ResNo: 0)); |
12248 | MakeLibCallOptions CallOptions; |
12249 | EVT OpVT = Op0.getValueType(); |
12250 | CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: N->getValueType(ResNo: 0), Value: true); |
12251 | SDValue Chain = IsStrict ? N->getOperand(Num: 0) : SDValue(); |
12252 | SDValue Result; |
12253 | std::tie(args&: Result, args&: Chain) = |
12254 | makeLibCall(DAG, LC, RetVT: N->getValueType(ResNo: 0), Ops: Op0, CallOptions, dl: DL, Chain); |
12255 | Results.push_back(Elt: Result); |
12256 | if (IsStrict) |
12257 | Results.push_back(Elt: Chain); |
12258 | break; |
12259 | } |
12260 | case ISD::LROUND: { |
12261 | SDValue Op0 = N->getOperand(Num: 0); |
12262 | EVT Op0VT = Op0.getValueType(); |
12263 | if (getTypeAction(Context&: *DAG.getContext(), VT: Op0.getValueType()) != |
12264 | TargetLowering::TypeSoftenFloat) { |
12265 | if (!isTypeLegal(VT: Op0VT)) |
12266 | return; |
12267 | |
12268 | // In absense of Zfh, promote f16 to f32, then convert. |
12269 | if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) |
12270 | Op0 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Op0); |
12271 | |
12272 | SDValue Res = |
12273 | DAG.getNode(Opcode: RISCVISD::FCVT_W_RV64, DL, VT: MVT::i64, N1: Op0, |
12274 | N2: DAG.getTargetConstant(Val: RISCVFPRndMode::RMM, DL, VT: MVT::i64)); |
12275 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res)); |
12276 | return; |
12277 | } |
12278 | // If the FP type needs to be softened, emit a library call to lround. We'll |
12279 | // need to truncate the result. We assume any value that doesn't fit in i32 |
12280 | // is allowed to return an unspecified value. |
12281 | RTLIB::Libcall LC = |
12282 | Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32; |
12283 | MakeLibCallOptions CallOptions; |
12284 | EVT OpVT = Op0.getValueType(); |
12285 | CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: MVT::i64, Value: true); |
12286 | SDValue Result = makeLibCall(DAG, LC, RetVT: MVT::i64, Ops: Op0, CallOptions, dl: DL).first; |
12287 | Result = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Result); |
12288 | Results.push_back(Elt: Result); |
12289 | break; |
12290 | } |
12291 | case ISD::READCYCLECOUNTER: |
12292 | case ISD::READSTEADYCOUNTER: { |
12293 | assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only " |
12294 | "has custom type legalization on riscv32" ); |
12295 | |
12296 | SDValue LoCounter, HiCounter; |
12297 | MVT XLenVT = Subtarget.getXLenVT(); |
12298 | if (N->getOpcode() == ISD::READCYCLECOUNTER) { |
12299 | LoCounter = DAG.getTargetConstant( |
12300 | Val: RISCVSysReg::lookupSysRegByName(Name: "CYCLE" )->Encoding, DL, VT: XLenVT); |
12301 | HiCounter = DAG.getTargetConstant( |
12302 | Val: RISCVSysReg::lookupSysRegByName(Name: "CYCLEH" )->Encoding, DL, VT: XLenVT); |
12303 | } else { |
12304 | LoCounter = DAG.getTargetConstant( |
12305 | Val: RISCVSysReg::lookupSysRegByName(Name: "TIME" )->Encoding, DL, VT: XLenVT); |
12306 | HiCounter = DAG.getTargetConstant( |
12307 | Val: RISCVSysReg::lookupSysRegByName(Name: "TIMEH" )->Encoding, DL, VT: XLenVT); |
12308 | } |
12309 | SDVTList VTs = DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32, VT3: MVT::Other); |
12310 | SDValue RCW = DAG.getNode(Opcode: RISCVISD::READ_COUNTER_WIDE, DL, VTList: VTs, |
12311 | N1: N->getOperand(Num: 0), N2: LoCounter, N3: HiCounter); |
12312 | |
12313 | Results.push_back( |
12314 | Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, N1: RCW, N2: RCW.getValue(R: 1))); |
12315 | Results.push_back(Elt: RCW.getValue(R: 2)); |
12316 | break; |
12317 | } |
12318 | case ISD::LOAD: { |
12319 | if (!ISD::isNON_EXTLoad(N)) |
12320 | return; |
12321 | |
12322 | // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the |
12323 | // sext_inreg we emit for ADD/SUB/MUL/SLLI. |
12324 | LoadSDNode *Ld = cast<LoadSDNode>(Val: N); |
12325 | |
12326 | SDLoc dl(N); |
12327 | SDValue Res = DAG.getExtLoad(ExtType: ISD::SEXTLOAD, dl, VT: MVT::i64, Chain: Ld->getChain(), |
12328 | Ptr: Ld->getBasePtr(), MemVT: Ld->getMemoryVT(), |
12329 | MMO: Ld->getMemOperand()); |
12330 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: Res)); |
12331 | Results.push_back(Elt: Res.getValue(R: 1)); |
12332 | return; |
12333 | } |
12334 | case ISD::MUL: { |
12335 | unsigned Size = N->getSimpleValueType(ResNo: 0).getSizeInBits(); |
12336 | unsigned XLen = Subtarget.getXLen(); |
12337 | // This multiply needs to be expanded, try to use MULHSU+MUL if possible. |
12338 | if (Size > XLen) { |
12339 | assert(Size == (XLen * 2) && "Unexpected custom legalisation" ); |
12340 | SDValue LHS = N->getOperand(Num: 0); |
12341 | SDValue RHS = N->getOperand(Num: 1); |
12342 | APInt HighMask = APInt::getHighBitsSet(numBits: Size, hiBitsSet: XLen); |
12343 | |
12344 | bool LHSIsU = DAG.MaskedValueIsZero(Op: LHS, Mask: HighMask); |
12345 | bool RHSIsU = DAG.MaskedValueIsZero(Op: RHS, Mask: HighMask); |
12346 | // We need exactly one side to be unsigned. |
12347 | if (LHSIsU == RHSIsU) |
12348 | return; |
12349 | |
12350 | auto MakeMULPair = [&](SDValue S, SDValue U) { |
12351 | MVT XLenVT = Subtarget.getXLenVT(); |
12352 | S = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: XLenVT, Operand: S); |
12353 | U = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: XLenVT, Operand: U); |
12354 | SDValue Lo = DAG.getNode(Opcode: ISD::MUL, DL, VT: XLenVT, N1: S, N2: U); |
12355 | SDValue Hi = DAG.getNode(Opcode: RISCVISD::MULHSU, DL, VT: XLenVT, N1: S, N2: U); |
12356 | return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: N->getValueType(ResNo: 0), N1: Lo, N2: Hi); |
12357 | }; |
12358 | |
12359 | bool LHSIsS = DAG.ComputeNumSignBits(Op: LHS) > XLen; |
12360 | bool RHSIsS = DAG.ComputeNumSignBits(Op: RHS) > XLen; |
12361 | |
12362 | // The other operand should be signed, but still prefer MULH when |
12363 | // possible. |
12364 | if (RHSIsU && LHSIsS && !RHSIsS) |
12365 | Results.push_back(Elt: MakeMULPair(LHS, RHS)); |
12366 | else if (LHSIsU && RHSIsS && !LHSIsS) |
12367 | Results.push_back(Elt: MakeMULPair(RHS, LHS)); |
12368 | |
12369 | return; |
12370 | } |
12371 | [[fallthrough]]; |
12372 | } |
12373 | case ISD::ADD: |
12374 | case ISD::SUB: |
12375 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12376 | "Unexpected custom legalisation" ); |
12377 | Results.push_back(Elt: customLegalizeToWOpWithSExt(N, DAG)); |
12378 | break; |
12379 | case ISD::SHL: |
12380 | case ISD::SRA: |
12381 | case ISD::SRL: |
12382 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12383 | "Unexpected custom legalisation" ); |
12384 | if (N->getOperand(Num: 1).getOpcode() != ISD::Constant) { |
12385 | // If we can use a BSET instruction, allow default promotion to apply. |
12386 | if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() && |
12387 | isOneConstant(V: N->getOperand(Num: 0))) |
12388 | break; |
12389 | Results.push_back(Elt: customLegalizeToWOp(N, DAG)); |
12390 | break; |
12391 | } |
12392 | |
12393 | // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is |
12394 | // similar to customLegalizeToWOpWithSExt, but we must zero_extend the |
12395 | // shift amount. |
12396 | if (N->getOpcode() == ISD::SHL) { |
12397 | SDLoc DL(N); |
12398 | SDValue NewOp0 = |
12399 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
12400 | SDValue NewOp1 = |
12401 | DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
12402 | SDValue NewWOp = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1); |
12403 | SDValue NewRes = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: NewWOp, |
12404 | N2: DAG.getValueType(MVT::i32)); |
12405 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: NewRes)); |
12406 | } |
12407 | |
12408 | break; |
12409 | case ISD::ROTL: |
12410 | case ISD::ROTR: |
12411 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12412 | "Unexpected custom legalisation" ); |
12413 | assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || |
12414 | Subtarget.hasVendorXTHeadBb()) && |
12415 | "Unexpected custom legalization" ); |
12416 | if (!isa<ConstantSDNode>(Val: N->getOperand(Num: 1)) && |
12417 | !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())) |
12418 | return; |
12419 | Results.push_back(Elt: customLegalizeToWOp(N, DAG)); |
12420 | break; |
12421 | case ISD::CTTZ: |
12422 | case ISD::CTTZ_ZERO_UNDEF: |
12423 | case ISD::CTLZ: |
12424 | case ISD::CTLZ_ZERO_UNDEF: { |
12425 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12426 | "Unexpected custom legalisation" ); |
12427 | |
12428 | SDValue NewOp0 = |
12429 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
12430 | bool IsCTZ = |
12431 | N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF; |
12432 | unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW; |
12433 | SDValue Res = DAG.getNode(Opcode: Opc, DL, VT: MVT::i64, Operand: NewOp0); |
12434 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res)); |
12435 | return; |
12436 | } |
12437 | case ISD::SDIV: |
12438 | case ISD::UDIV: |
12439 | case ISD::UREM: { |
12440 | MVT VT = N->getSimpleValueType(ResNo: 0); |
12441 | assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && |
12442 | Subtarget.is64Bit() && Subtarget.hasStdExtM() && |
12443 | "Unexpected custom legalisation" ); |
12444 | // Don't promote division/remainder by constant since we should expand those |
12445 | // to multiply by magic constant. |
12446 | AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); |
12447 | if (N->getOperand(Num: 1).getOpcode() == ISD::Constant && |
12448 | !isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr)) |
12449 | return; |
12450 | |
12451 | // If the input is i32, use ANY_EXTEND since the W instructions don't read |
12452 | // the upper 32 bits. For other types we need to sign or zero extend |
12453 | // based on the opcode. |
12454 | unsigned ExtOpc = ISD::ANY_EXTEND; |
12455 | if (VT != MVT::i32) |
12456 | ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND |
12457 | : ISD::ZERO_EXTEND; |
12458 | |
12459 | Results.push_back(Elt: customLegalizeToWOp(N, DAG, ExtOpc)); |
12460 | break; |
12461 | } |
12462 | case ISD::SADDO: { |
12463 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12464 | "Unexpected custom legalisation" ); |
12465 | |
12466 | // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise |
12467 | // use the default legalization. |
12468 | if (!isa<ConstantSDNode>(Val: N->getOperand(Num: 1))) |
12469 | return; |
12470 | |
12471 | SDValue LHS = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
12472 | SDValue RHS = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
12473 | SDValue Res = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: LHS, N2: RHS); |
12474 | Res = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: Res, |
12475 | N2: DAG.getValueType(MVT::i32)); |
12476 | |
12477 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: MVT::i64); |
12478 | |
12479 | // For an addition, the result should be less than one of the operands (LHS) |
12480 | // if and only if the other operand (RHS) is negative, otherwise there will |
12481 | // be overflow. |
12482 | // For a subtraction, the result should be less than one of the operands |
12483 | // (LHS) if and only if the other operand (RHS) is (non-zero) positive, |
12484 | // otherwise there will be overflow. |
12485 | EVT OType = N->getValueType(ResNo: 1); |
12486 | SDValue ResultLowerThanLHS = DAG.getSetCC(DL, VT: OType, LHS: Res, RHS: LHS, Cond: ISD::SETLT); |
12487 | SDValue ConditionRHS = DAG.getSetCC(DL, VT: OType, LHS: RHS, RHS: Zero, Cond: ISD::SETLT); |
12488 | |
12489 | SDValue Overflow = |
12490 | DAG.getNode(Opcode: ISD::XOR, DL, VT: OType, N1: ConditionRHS, N2: ResultLowerThanLHS); |
12491 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res)); |
12492 | Results.push_back(Elt: Overflow); |
12493 | return; |
12494 | } |
12495 | case ISD::UADDO: |
12496 | case ISD::USUBO: { |
12497 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12498 | "Unexpected custom legalisation" ); |
12499 | bool IsAdd = N->getOpcode() == ISD::UADDO; |
12500 | // Create an ADDW or SUBW. |
12501 | SDValue LHS = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
12502 | SDValue RHS = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
12503 | SDValue Res = |
12504 | DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL, VT: MVT::i64, N1: LHS, N2: RHS); |
12505 | Res = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: Res, |
12506 | N2: DAG.getValueType(MVT::i32)); |
12507 | |
12508 | SDValue Overflow; |
12509 | if (IsAdd && isOneConstant(V: RHS)) { |
12510 | // Special case uaddo X, 1 overflowed if the addition result is 0. |
12511 | // The general case (X + C) < C is not necessarily beneficial. Although we |
12512 | // reduce the live range of X, we may introduce the materialization of |
12513 | // constant C, especially when the setcc result is used by branch. We have |
12514 | // no compare with constant and branch instructions. |
12515 | Overflow = DAG.getSetCC(DL, VT: N->getValueType(ResNo: 1), LHS: Res, |
12516 | RHS: DAG.getConstant(Val: 0, DL, VT: MVT::i64), Cond: ISD::SETEQ); |
12517 | } else if (IsAdd && isAllOnesConstant(V: RHS)) { |
12518 | // Special case uaddo X, -1 overflowed if X != 0. |
12519 | Overflow = DAG.getSetCC(DL, VT: N->getValueType(ResNo: 1), LHS: N->getOperand(Num: 0), |
12520 | RHS: DAG.getConstant(Val: 0, DL, VT: MVT::i32), Cond: ISD::SETNE); |
12521 | } else { |
12522 | // Sign extend the LHS and perform an unsigned compare with the ADDW |
12523 | // result. Since the inputs are sign extended from i32, this is equivalent |
12524 | // to comparing the lower 32 bits. |
12525 | LHS = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
12526 | Overflow = DAG.getSetCC(DL, VT: N->getValueType(ResNo: 1), LHS: Res, RHS: LHS, |
12527 | Cond: IsAdd ? ISD::SETULT : ISD::SETUGT); |
12528 | } |
12529 | |
12530 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res)); |
12531 | Results.push_back(Elt: Overflow); |
12532 | return; |
12533 | } |
12534 | case ISD::UADDSAT: |
12535 | case ISD::USUBSAT: { |
12536 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12537 | "Unexpected custom legalisation" ); |
12538 | if (Subtarget.hasStdExtZbb()) { |
12539 | // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using |
12540 | // sign extend allows overflow of the lower 32 bits to be detected on |
12541 | // the promoted size. |
12542 | SDValue LHS = |
12543 | DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
12544 | SDValue RHS = |
12545 | DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
12546 | SDValue Res = DAG.getNode(Opcode: N->getOpcode(), DL, VT: MVT::i64, N1: LHS, N2: RHS); |
12547 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res)); |
12548 | return; |
12549 | } |
12550 | |
12551 | // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom |
12552 | // promotion for UADDO/USUBO. |
12553 | Results.push_back(Elt: expandAddSubSat(Node: N, DAG)); |
12554 | return; |
12555 | } |
12556 | case ISD::SADDSAT: |
12557 | case ISD::SSUBSAT: { |
12558 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12559 | "Unexpected custom legalisation" ); |
12560 | Results.push_back(Elt: expandAddSubSat(Node: N, DAG)); |
12561 | return; |
12562 | } |
12563 | case ISD::ABS: { |
12564 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
12565 | "Unexpected custom legalisation" ); |
12566 | |
12567 | if (Subtarget.hasStdExtZbb()) { |
12568 | // Emit a special ABSW node that will be expanded to NEGW+MAX at isel. |
12569 | // This allows us to remember that the result is sign extended. Expanding |
12570 | // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits. |
12571 | SDValue Src = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, |
12572 | Operand: N->getOperand(Num: 0)); |
12573 | SDValue Abs = DAG.getNode(Opcode: RISCVISD::ABSW, DL, VT: MVT::i64, Operand: Src); |
12574 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Abs)); |
12575 | return; |
12576 | } |
12577 | |
12578 | // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y) |
12579 | SDValue Src = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
12580 | |
12581 | // Freeze the source so we can increase it's use count. |
12582 | Src = DAG.getFreeze(V: Src); |
12583 | |
12584 | // Copy sign bit to all bits using the sraiw pattern. |
12585 | SDValue SignFill = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: Src, |
12586 | N2: DAG.getValueType(MVT::i32)); |
12587 | SignFill = DAG.getNode(Opcode: ISD::SRA, DL, VT: MVT::i64, N1: SignFill, |
12588 | N2: DAG.getConstant(Val: 31, DL, VT: MVT::i64)); |
12589 | |
12590 | SDValue NewRes = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, N1: Src, N2: SignFill); |
12591 | NewRes = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: NewRes, N2: SignFill); |
12592 | |
12593 | // NOTE: The result is only required to be anyextended, but sext is |
12594 | // consistent with type legalization of sub. |
12595 | NewRes = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: NewRes, |
12596 | N2: DAG.getValueType(MVT::i32)); |
12597 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: NewRes)); |
12598 | return; |
12599 | } |
12600 | case ISD::BITCAST: { |
12601 | EVT VT = N->getValueType(ResNo: 0); |
12602 | assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!" ); |
12603 | SDValue Op0 = N->getOperand(Num: 0); |
12604 | EVT Op0VT = Op0.getValueType(); |
12605 | MVT XLenVT = Subtarget.getXLenVT(); |
12606 | if (VT == MVT::i16 && Op0VT == MVT::f16 && |
12607 | Subtarget.hasStdExtZfhminOrZhinxmin()) { |
12608 | SDValue FPConv = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: XLenVT, Operand: Op0); |
12609 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i16, Operand: FPConv)); |
12610 | } else if (VT == MVT::i16 && Op0VT == MVT::bf16 && |
12611 | Subtarget.hasStdExtZfbfmin()) { |
12612 | SDValue FPConv = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: XLenVT, Operand: Op0); |
12613 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i16, Operand: FPConv)); |
12614 | } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() && |
12615 | Subtarget.hasStdExtFOrZfinx()) { |
12616 | SDValue FPConv = |
12617 | DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTW_RV64, DL, VT: MVT::i64, Operand: Op0); |
12618 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: FPConv)); |
12619 | } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) { |
12620 | SDValue NewReg = DAG.getNode(Opcode: RISCVISD::SplitF64, DL, |
12621 | VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Op0); |
12622 | SDValue RetReg = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, |
12623 | N1: NewReg.getValue(R: 0), N2: NewReg.getValue(R: 1)); |
12624 | Results.push_back(Elt: RetReg); |
12625 | } else if (!VT.isVector() && Op0VT.isFixedLengthVector() && |
12626 | isTypeLegal(VT: Op0VT)) { |
12627 | // Custom-legalize bitcasts from fixed-length vector types to illegal |
12628 | // scalar types in order to improve codegen. Bitcast the vector to a |
12629 | // one-element vector type whose element type is the same as the result |
12630 | // type, and extract the first element. |
12631 | EVT BVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: 1); |
12632 | if (isTypeLegal(VT: BVT)) { |
12633 | SDValue BVec = DAG.getBitcast(VT: BVT, V: Op0); |
12634 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT, N1: BVec, |
12635 | N2: DAG.getVectorIdxConstant(Val: 0, DL))); |
12636 | } |
12637 | } |
12638 | break; |
12639 | } |
12640 | case RISCVISD::BREV8: |
12641 | case RISCVISD::ORC_B: { |
12642 | MVT VT = N->getSimpleValueType(ResNo: 0); |
12643 | MVT XLenVT = Subtarget.getXLenVT(); |
12644 | assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) && |
12645 | "Unexpected custom legalisation" ); |
12646 | assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) || |
12647 | (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) && |
12648 | "Unexpected extension" ); |
12649 | SDValue NewOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: N->getOperand(Num: 0)); |
12650 | SDValue NewRes = DAG.getNode(Opcode: N->getOpcode(), DL, VT: XLenVT, Operand: NewOp); |
12651 | // ReplaceNodeResults requires we maintain the same type for the return |
12652 | // value. |
12653 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: NewRes)); |
12654 | break; |
12655 | } |
12656 | case ISD::EXTRACT_VECTOR_ELT: { |
12657 | // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element |
12658 | // type is illegal (currently only vXi64 RV32). |
12659 | // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are |
12660 | // transferred to the destination register. We issue two of these from the |
12661 | // upper- and lower- halves of the SEW-bit vector element, slid down to the |
12662 | // first element. |
12663 | SDValue Vec = N->getOperand(Num: 0); |
12664 | SDValue Idx = N->getOperand(Num: 1); |
12665 | |
12666 | // The vector type hasn't been legalized yet so we can't issue target |
12667 | // specific nodes if it needs legalization. |
12668 | // FIXME: We would manually legalize if it's important. |
12669 | if (!isTypeLegal(VT: Vec.getValueType())) |
12670 | return; |
12671 | |
12672 | MVT VecVT = Vec.getSimpleValueType(); |
12673 | |
12674 | assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && |
12675 | VecVT.getVectorElementType() == MVT::i64 && |
12676 | "Unexpected EXTRACT_VECTOR_ELT legalization" ); |
12677 | |
12678 | // If this is a fixed vector, we need to convert it to a scalable vector. |
12679 | MVT ContainerVT = VecVT; |
12680 | if (VecVT.isFixedLengthVector()) { |
12681 | ContainerVT = getContainerForFixedLengthVector(VT: VecVT); |
12682 | Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget); |
12683 | } |
12684 | |
12685 | MVT XLenVT = Subtarget.getXLenVT(); |
12686 | |
12687 | // Use a VL of 1 to avoid processing more elements than we need. |
12688 | auto [Mask, VL] = getDefaultVLOps(NumElts: 1, ContainerVT, DL, DAG, Subtarget); |
12689 | |
12690 | // Unless the index is known to be 0, we must slide the vector down to get |
12691 | // the desired element into index 0. |
12692 | if (!isNullConstant(V: Idx)) { |
12693 | Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, |
12694 | Merge: DAG.getUNDEF(VT: ContainerVT), Op: Vec, Offset: Idx, Mask, VL); |
12695 | } |
12696 | |
12697 | // Extract the lower XLEN bits of the correct vector element. |
12698 | SDValue EltLo = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Vec); |
12699 | |
12700 | // To extract the upper XLEN bits of the vector element, shift the first |
12701 | // element right by 32 bits and re-extract the lower XLEN bits. |
12702 | SDValue ThirtyTwoV = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, |
12703 | N1: DAG.getUNDEF(VT: ContainerVT), |
12704 | N2: DAG.getConstant(Val: 32, DL, VT: XLenVT), N3: VL); |
12705 | SDValue LShr32 = |
12706 | DAG.getNode(Opcode: RISCVISD::SRL_VL, DL, VT: ContainerVT, N1: Vec, N2: ThirtyTwoV, |
12707 | N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL); |
12708 | |
12709 | SDValue EltHi = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: LShr32); |
12710 | |
12711 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, N1: EltLo, N2: EltHi)); |
12712 | break; |
12713 | } |
12714 | case ISD::INTRINSIC_WO_CHAIN: { |
12715 | unsigned IntNo = N->getConstantOperandVal(Num: 0); |
12716 | switch (IntNo) { |
12717 | default: |
12718 | llvm_unreachable( |
12719 | "Don't know how to custom type legalize this intrinsic!" ); |
12720 | case Intrinsic::experimental_get_vector_length: { |
12721 | SDValue Res = lowerGetVectorLength(N, DAG, Subtarget); |
12722 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res)); |
12723 | return; |
12724 | } |
12725 | case Intrinsic::experimental_cttz_elts: { |
12726 | SDValue Res = lowerCttzElts(N, DAG, Subtarget); |
12727 | Results.push_back( |
12728 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: Res)); |
12729 | return; |
12730 | } |
12731 | case Intrinsic::riscv_orc_b: |
12732 | case Intrinsic::riscv_brev8: |
12733 | case Intrinsic::riscv_sha256sig0: |
12734 | case Intrinsic::riscv_sha256sig1: |
12735 | case Intrinsic::riscv_sha256sum0: |
12736 | case Intrinsic::riscv_sha256sum1: |
12737 | case Intrinsic::riscv_sm3p0: |
12738 | case Intrinsic::riscv_sm3p1: { |
12739 | if (!Subtarget.is64Bit() || N->getValueType(ResNo: 0) != MVT::i32) |
12740 | return; |
12741 | unsigned Opc; |
12742 | switch (IntNo) { |
12743 | case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break; |
12744 | case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break; |
12745 | case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break; |
12746 | case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break; |
12747 | case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break; |
12748 | case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break; |
12749 | case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break; |
12750 | case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break; |
12751 | } |
12752 | |
12753 | SDValue NewOp = |
12754 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
12755 | SDValue Res = DAG.getNode(Opcode: Opc, DL, VT: MVT::i64, Operand: NewOp); |
12756 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res)); |
12757 | return; |
12758 | } |
12759 | case Intrinsic::riscv_sm4ks: |
12760 | case Intrinsic::riscv_sm4ed: { |
12761 | unsigned Opc = |
12762 | IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED; |
12763 | SDValue NewOp0 = |
12764 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
12765 | SDValue NewOp1 = |
12766 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 2)); |
12767 | SDValue Res = |
12768 | DAG.getNode(Opcode: Opc, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1, N3: N->getOperand(Num: 3)); |
12769 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res)); |
12770 | return; |
12771 | } |
12772 | case Intrinsic::riscv_mopr: { |
12773 | if (!Subtarget.is64Bit() || N->getValueType(ResNo: 0) != MVT::i32) |
12774 | return; |
12775 | SDValue NewOp = |
12776 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
12777 | SDValue Res = DAG.getNode( |
12778 | Opcode: RISCVISD::MOPR, DL, VT: MVT::i64, N1: NewOp, |
12779 | N2: DAG.getTargetConstant(Val: N->getConstantOperandVal(Num: 2), DL, VT: MVT::i64)); |
12780 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res)); |
12781 | return; |
12782 | } |
12783 | case Intrinsic::riscv_moprr: { |
12784 | if (!Subtarget.is64Bit() || N->getValueType(ResNo: 0) != MVT::i32) |
12785 | return; |
12786 | SDValue NewOp0 = |
12787 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
12788 | SDValue NewOp1 = |
12789 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 2)); |
12790 | SDValue Res = DAG.getNode( |
12791 | Opcode: RISCVISD::MOPRR, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1, |
12792 | N3: DAG.getTargetConstant(Val: N->getConstantOperandVal(Num: 3), DL, VT: MVT::i64)); |
12793 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res)); |
12794 | return; |
12795 | } |
12796 | case Intrinsic::riscv_clmul: { |
12797 | if (!Subtarget.is64Bit() || N->getValueType(ResNo: 0) != MVT::i32) |
12798 | return; |
12799 | |
12800 | SDValue NewOp0 = |
12801 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
12802 | SDValue NewOp1 = |
12803 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 2)); |
12804 | SDValue Res = DAG.getNode(Opcode: RISCVISD::CLMUL, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1); |
12805 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res)); |
12806 | return; |
12807 | } |
12808 | case Intrinsic::riscv_clmulh: |
12809 | case Intrinsic::riscv_clmulr: { |
12810 | if (!Subtarget.is64Bit() || N->getValueType(ResNo: 0) != MVT::i32) |
12811 | return; |
12812 | |
12813 | // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros |
12814 | // to the full 128-bit clmul result of multiplying two xlen values. |
12815 | // Perform clmulr or clmulh on the shifted values. Finally, extract the |
12816 | // upper 32 bits. |
12817 | // |
12818 | // The alternative is to mask the inputs to 32 bits and use clmul, but |
12819 | // that requires two shifts to mask each input without zext.w. |
12820 | // FIXME: If the inputs are known zero extended or could be freely |
12821 | // zero extended, the mask form would be better. |
12822 | SDValue NewOp0 = |
12823 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
12824 | SDValue NewOp1 = |
12825 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 2)); |
12826 | NewOp0 = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, N1: NewOp0, |
12827 | N2: DAG.getConstant(Val: 32, DL, VT: MVT::i64)); |
12828 | NewOp1 = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, N1: NewOp1, |
12829 | N2: DAG.getConstant(Val: 32, DL, VT: MVT::i64)); |
12830 | unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH |
12831 | : RISCVISD::CLMULR; |
12832 | SDValue Res = DAG.getNode(Opcode: Opc, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1); |
12833 | Res = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: Res, |
12834 | N2: DAG.getConstant(Val: 32, DL, VT: MVT::i64)); |
12835 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res)); |
12836 | return; |
12837 | } |
12838 | case Intrinsic::riscv_vmv_x_s: { |
12839 | EVT VT = N->getValueType(ResNo: 0); |
12840 | MVT XLenVT = Subtarget.getXLenVT(); |
12841 | if (VT.bitsLT(VT: XLenVT)) { |
12842 | // Simple case just extract using vmv.x.s and truncate. |
12843 | SDValue = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, |
12844 | VT: Subtarget.getXLenVT(), Operand: N->getOperand(Num: 1)); |
12845 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Extract)); |
12846 | return; |
12847 | } |
12848 | |
12849 | assert(VT == MVT::i64 && !Subtarget.is64Bit() && |
12850 | "Unexpected custom legalization" ); |
12851 | |
12852 | // We need to do the move in two steps. |
12853 | SDValue Vec = N->getOperand(Num: 1); |
12854 | MVT VecVT = Vec.getSimpleValueType(); |
12855 | |
12856 | // First extract the lower XLEN bits of the element. |
12857 | SDValue EltLo = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Vec); |
12858 | |
12859 | // To extract the upper XLEN bits of the vector element, shift the first |
12860 | // element right by 32 bits and re-extract the lower XLEN bits. |
12861 | auto [Mask, VL] = getDefaultVLOps(NumElts: 1, ContainerVT: VecVT, DL, DAG, Subtarget); |
12862 | |
12863 | SDValue ThirtyTwoV = |
12864 | DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: VecVT, N1: DAG.getUNDEF(VT: VecVT), |
12865 | N2: DAG.getConstant(Val: 32, DL, VT: XLenVT), N3: VL); |
12866 | SDValue LShr32 = DAG.getNode(Opcode: RISCVISD::SRL_VL, DL, VT: VecVT, N1: Vec, N2: ThirtyTwoV, |
12867 | N3: DAG.getUNDEF(VT: VecVT), N4: Mask, N5: VL); |
12868 | SDValue EltHi = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: LShr32); |
12869 | |
12870 | Results.push_back( |
12871 | Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, N1: EltLo, N2: EltHi)); |
12872 | break; |
12873 | } |
12874 | } |
12875 | break; |
12876 | } |
12877 | case ISD::VECREDUCE_ADD: |
12878 | case ISD::VECREDUCE_AND: |
12879 | case ISD::VECREDUCE_OR: |
12880 | case ISD::VECREDUCE_XOR: |
12881 | case ISD::VECREDUCE_SMAX: |
12882 | case ISD::VECREDUCE_UMAX: |
12883 | case ISD::VECREDUCE_SMIN: |
12884 | case ISD::VECREDUCE_UMIN: |
12885 | if (SDValue V = lowerVECREDUCE(Op: SDValue(N, 0), DAG)) |
12886 | Results.push_back(Elt: V); |
12887 | break; |
12888 | case ISD::VP_REDUCE_ADD: |
12889 | case ISD::VP_REDUCE_AND: |
12890 | case ISD::VP_REDUCE_OR: |
12891 | case ISD::VP_REDUCE_XOR: |
12892 | case ISD::VP_REDUCE_SMAX: |
12893 | case ISD::VP_REDUCE_UMAX: |
12894 | case ISD::VP_REDUCE_SMIN: |
12895 | case ISD::VP_REDUCE_UMIN: |
12896 | if (SDValue V = lowerVPREDUCE(Op: SDValue(N, 0), DAG)) |
12897 | Results.push_back(Elt: V); |
12898 | break; |
12899 | case ISD::GET_ROUNDING: { |
12900 | SDVTList VTs = DAG.getVTList(VT1: Subtarget.getXLenVT(), VT2: MVT::Other); |
12901 | SDValue Res = DAG.getNode(Opcode: ISD::GET_ROUNDING, DL, VTList: VTs, N: N->getOperand(Num: 0)); |
12902 | Results.push_back(Elt: Res.getValue(R: 0)); |
12903 | Results.push_back(Elt: Res.getValue(R: 1)); |
12904 | break; |
12905 | } |
12906 | } |
12907 | } |
12908 | |
12909 | /// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP |
12910 | /// which corresponds to it. |
12911 | static unsigned getVecReduceOpcode(unsigned Opc) { |
12912 | switch (Opc) { |
12913 | default: |
12914 | llvm_unreachable("Unhandled binary to transfrom reduction" ); |
12915 | case ISD::ADD: |
12916 | return ISD::VECREDUCE_ADD; |
12917 | case ISD::UMAX: |
12918 | return ISD::VECREDUCE_UMAX; |
12919 | case ISD::SMAX: |
12920 | return ISD::VECREDUCE_SMAX; |
12921 | case ISD::UMIN: |
12922 | return ISD::VECREDUCE_UMIN; |
12923 | case ISD::SMIN: |
12924 | return ISD::VECREDUCE_SMIN; |
12925 | case ISD::AND: |
12926 | return ISD::VECREDUCE_AND; |
12927 | case ISD::OR: |
12928 | return ISD::VECREDUCE_OR; |
12929 | case ISD::XOR: |
12930 | return ISD::VECREDUCE_XOR; |
12931 | case ISD::FADD: |
12932 | // Note: This is the associative form of the generic reduction opcode. |
12933 | return ISD::VECREDUCE_FADD; |
12934 | } |
12935 | } |
12936 | |
12937 | /// Perform two related transforms whose purpose is to incrementally recognize |
12938 | /// an explode_vector followed by scalar reduction as a vector reduction node. |
12939 | /// This exists to recover from a deficiency in SLP which can't handle |
12940 | /// forests with multiple roots sharing common nodes. In some cases, one |
12941 | /// of the trees will be vectorized, and the other will remain (unprofitably) |
12942 | /// scalarized. |
12943 | static SDValue |
12944 | (SDNode *N, SelectionDAG &DAG, |
12945 | const RISCVSubtarget &Subtarget) { |
12946 | |
12947 | // This transforms need to run before all integer types have been legalized |
12948 | // to i64 (so that the vector element type matches the add type), and while |
12949 | // it's safe to introduce odd sized vector types. |
12950 | if (DAG.NewNodesMustHaveLegalTypes) |
12951 | return SDValue(); |
12952 | |
12953 | // Without V, this transform isn't useful. We could form the (illegal) |
12954 | // operations and let them be scalarized again, but there's really no point. |
12955 | if (!Subtarget.hasVInstructions()) |
12956 | return SDValue(); |
12957 | |
12958 | const SDLoc DL(N); |
12959 | const EVT VT = N->getValueType(ResNo: 0); |
12960 | const unsigned Opc = N->getOpcode(); |
12961 | |
12962 | // For FADD, we only handle the case with reassociation allowed. We |
12963 | // could handle strict reduction order, but at the moment, there's no |
12964 | // known reason to, and the complexity isn't worth it. |
12965 | // TODO: Handle fminnum and fmaxnum here |
12966 | if (!VT.isInteger() && |
12967 | (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation())) |
12968 | return SDValue(); |
12969 | |
12970 | const unsigned ReduceOpc = getVecReduceOpcode(Opc); |
12971 | assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) && |
12972 | "Inconsistent mappings" ); |
12973 | SDValue LHS = N->getOperand(Num: 0); |
12974 | SDValue RHS = N->getOperand(Num: 1); |
12975 | |
12976 | if (!LHS.hasOneUse() || !RHS.hasOneUse()) |
12977 | return SDValue(); |
12978 | |
12979 | if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
12980 | std::swap(a&: LHS, b&: RHS); |
12981 | |
12982 | if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
12983 | !isa<ConstantSDNode>(Val: RHS.getOperand(i: 1))) |
12984 | return SDValue(); |
12985 | |
12986 | uint64_t RHSIdx = cast<ConstantSDNode>(Val: RHS.getOperand(i: 1))->getLimitedValue(); |
12987 | SDValue SrcVec = RHS.getOperand(i: 0); |
12988 | EVT SrcVecVT = SrcVec.getValueType(); |
12989 | assert(SrcVecVT.getVectorElementType() == VT); |
12990 | if (SrcVecVT.isScalableVector()) |
12991 | return SDValue(); |
12992 | |
12993 | if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen()) |
12994 | return SDValue(); |
12995 | |
12996 | // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to |
12997 | // reduce_op (extract_subvector [2 x VT] from V). This will form the |
12998 | // root of our reduction tree. TODO: We could extend this to any two |
12999 | // adjacent aligned constant indices if desired. |
13000 | if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
13001 | LHS.getOperand(i: 0) == SrcVec && isa<ConstantSDNode>(Val: LHS.getOperand(i: 1))) { |
13002 | uint64_t LHSIdx = |
13003 | cast<ConstantSDNode>(Val: LHS.getOperand(i: 1))->getLimitedValue(); |
13004 | if (0 == std::min(a: LHSIdx, b: RHSIdx) && 1 == std::max(a: LHSIdx, b: RHSIdx)) { |
13005 | EVT ReduceVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: 2); |
13006 | SDValue Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ReduceVT, N1: SrcVec, |
13007 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
13008 | return DAG.getNode(Opcode: ReduceOpc, DL, VT, Operand: Vec, Flags: N->getFlags()); |
13009 | } |
13010 | } |
13011 | |
13012 | // Match (binop (reduce (extract_subvector V, 0), |
13013 | // (extract_vector_elt V, sizeof(SubVec)))) |
13014 | // into a reduction of one more element from the original vector V. |
13015 | if (LHS.getOpcode() != ReduceOpc) |
13016 | return SDValue(); |
13017 | |
13018 | SDValue ReduceVec = LHS.getOperand(i: 0); |
13019 | if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
13020 | ReduceVec.hasOneUse() && ReduceVec.getOperand(i: 0) == RHS.getOperand(i: 0) && |
13021 | isNullConstant(V: ReduceVec.getOperand(i: 1)) && |
13022 | ReduceVec.getValueType().getVectorNumElements() == RHSIdx) { |
13023 | // For illegal types (e.g. 3xi32), most will be combined again into a |
13024 | // wider (hopefully legal) type. If this is a terminal state, we are |
13025 | // relying on type legalization here to produce something reasonable |
13026 | // and this lowering quality could probably be improved. (TODO) |
13027 | EVT ReduceVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: RHSIdx + 1); |
13028 | SDValue Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ReduceVT, N1: SrcVec, |
13029 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
13030 | auto Flags = ReduceVec->getFlags(); |
13031 | Flags.intersectWith(Flags: N->getFlags()); |
13032 | return DAG.getNode(Opcode: ReduceOpc, DL, VT, Operand: Vec, Flags); |
13033 | } |
13034 | |
13035 | return SDValue(); |
13036 | } |
13037 | |
13038 | |
13039 | // Try to fold (<bop> x, (reduction.<bop> vec, start)) |
13040 | static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, |
13041 | const RISCVSubtarget &Subtarget) { |
13042 | auto BinOpToRVVReduce = [](unsigned Opc) { |
13043 | switch (Opc) { |
13044 | default: |
13045 | llvm_unreachable("Unhandled binary to transfrom reduction" ); |
13046 | case ISD::ADD: |
13047 | return RISCVISD::VECREDUCE_ADD_VL; |
13048 | case ISD::UMAX: |
13049 | return RISCVISD::VECREDUCE_UMAX_VL; |
13050 | case ISD::SMAX: |
13051 | return RISCVISD::VECREDUCE_SMAX_VL; |
13052 | case ISD::UMIN: |
13053 | return RISCVISD::VECREDUCE_UMIN_VL; |
13054 | case ISD::SMIN: |
13055 | return RISCVISD::VECREDUCE_SMIN_VL; |
13056 | case ISD::AND: |
13057 | return RISCVISD::VECREDUCE_AND_VL; |
13058 | case ISD::OR: |
13059 | return RISCVISD::VECREDUCE_OR_VL; |
13060 | case ISD::XOR: |
13061 | return RISCVISD::VECREDUCE_XOR_VL; |
13062 | case ISD::FADD: |
13063 | return RISCVISD::VECREDUCE_FADD_VL; |
13064 | case ISD::FMAXNUM: |
13065 | return RISCVISD::VECREDUCE_FMAX_VL; |
13066 | case ISD::FMINNUM: |
13067 | return RISCVISD::VECREDUCE_FMIN_VL; |
13068 | } |
13069 | }; |
13070 | |
13071 | auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) { |
13072 | return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
13073 | isNullConstant(V: V.getOperand(i: 1)) && |
13074 | V.getOperand(i: 0).getOpcode() == BinOpToRVVReduce(Opc); |
13075 | }; |
13076 | |
13077 | unsigned Opc = N->getOpcode(); |
13078 | unsigned ReduceIdx; |
13079 | if (IsReduction(N->getOperand(Num: 0), Opc)) |
13080 | ReduceIdx = 0; |
13081 | else if (IsReduction(N->getOperand(Num: 1), Opc)) |
13082 | ReduceIdx = 1; |
13083 | else |
13084 | return SDValue(); |
13085 | |
13086 | // Skip if FADD disallows reassociation but the combiner needs. |
13087 | if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation()) |
13088 | return SDValue(); |
13089 | |
13090 | SDValue = N->getOperand(Num: ReduceIdx); |
13091 | SDValue Reduce = Extract.getOperand(i: 0); |
13092 | if (!Extract.hasOneUse() || !Reduce.hasOneUse()) |
13093 | return SDValue(); |
13094 | |
13095 | SDValue ScalarV = Reduce.getOperand(i: 2); |
13096 | EVT ScalarVT = ScalarV.getValueType(); |
13097 | if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR && |
13098 | ScalarV.getOperand(i: 0)->isUndef() && |
13099 | isNullConstant(V: ScalarV.getOperand(i: 2))) |
13100 | ScalarV = ScalarV.getOperand(i: 1); |
13101 | |
13102 | // Make sure that ScalarV is a splat with VL=1. |
13103 | if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL && |
13104 | ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL && |
13105 | ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL) |
13106 | return SDValue(); |
13107 | |
13108 | if (!isNonZeroAVL(AVL: ScalarV.getOperand(i: 2))) |
13109 | return SDValue(); |
13110 | |
13111 | // Check the scalar of ScalarV is neutral element |
13112 | // TODO: Deal with value other than neutral element. |
13113 | if (!isNeutralConstant(Opc: N->getOpcode(), Flags: N->getFlags(), V: ScalarV.getOperand(i: 1), |
13114 | OperandNo: 0)) |
13115 | return SDValue(); |
13116 | |
13117 | // If the AVL is zero, operand 0 will be returned. So it's not safe to fold. |
13118 | // FIXME: We might be able to improve this if operand 0 is undef. |
13119 | if (!isNonZeroAVL(AVL: Reduce.getOperand(i: 5))) |
13120 | return SDValue(); |
13121 | |
13122 | SDValue NewStart = N->getOperand(Num: 1 - ReduceIdx); |
13123 | |
13124 | SDLoc DL(N); |
13125 | SDValue NewScalarV = |
13126 | lowerScalarInsert(Scalar: NewStart, VL: ScalarV.getOperand(i: 2), |
13127 | VT: ScalarV.getSimpleValueType(), DL, DAG, Subtarget); |
13128 | |
13129 | // If we looked through an INSERT_SUBVECTOR we need to restore it. |
13130 | if (ScalarVT != ScalarV.getValueType()) |
13131 | NewScalarV = |
13132 | DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ScalarVT, N1: DAG.getUNDEF(VT: ScalarVT), |
13133 | N2: NewScalarV, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
13134 | |
13135 | SDValue Ops[] = {Reduce.getOperand(i: 0), Reduce.getOperand(i: 1), |
13136 | NewScalarV, Reduce.getOperand(i: 3), |
13137 | Reduce.getOperand(i: 4), Reduce.getOperand(i: 5)}; |
13138 | SDValue NewReduce = |
13139 | DAG.getNode(Opcode: Reduce.getOpcode(), DL, VT: Reduce.getValueType(), Ops); |
13140 | return DAG.getNode(Opcode: Extract.getOpcode(), DL, VT: Extract.getValueType(), N1: NewReduce, |
13141 | N2: Extract.getOperand(i: 1)); |
13142 | } |
13143 | |
13144 | // Optimize (add (shl x, c0), (shl y, c1)) -> |
13145 | // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3]. |
13146 | static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, |
13147 | const RISCVSubtarget &Subtarget) { |
13148 | // Perform this optimization only in the zba extension. |
13149 | if (!Subtarget.hasStdExtZba()) |
13150 | return SDValue(); |
13151 | |
13152 | // Skip for vector types and larger types. |
13153 | EVT VT = N->getValueType(ResNo: 0); |
13154 | if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) |
13155 | return SDValue(); |
13156 | |
13157 | // The two operand nodes must be SHL and have no other use. |
13158 | SDValue N0 = N->getOperand(Num: 0); |
13159 | SDValue N1 = N->getOperand(Num: 1); |
13160 | if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL || |
13161 | !N0->hasOneUse() || !N1->hasOneUse()) |
13162 | return SDValue(); |
13163 | |
13164 | // Check c0 and c1. |
13165 | auto *N0C = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1)); |
13166 | auto *N1C = dyn_cast<ConstantSDNode>(Val: N1->getOperand(Num: 1)); |
13167 | if (!N0C || !N1C) |
13168 | return SDValue(); |
13169 | int64_t C0 = N0C->getSExtValue(); |
13170 | int64_t C1 = N1C->getSExtValue(); |
13171 | if (C0 <= 0 || C1 <= 0) |
13172 | return SDValue(); |
13173 | |
13174 | // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable. |
13175 | int64_t Bits = std::min(a: C0, b: C1); |
13176 | int64_t Diff = std::abs(i: C0 - C1); |
13177 | if (Diff != 1 && Diff != 2 && Diff != 3) |
13178 | return SDValue(); |
13179 | |
13180 | // Build nodes. |
13181 | SDLoc DL(N); |
13182 | SDValue NS = (C0 < C1) ? N0->getOperand(Num: 0) : N1->getOperand(Num: 0); |
13183 | SDValue NL = (C0 > C1) ? N0->getOperand(Num: 0) : N1->getOperand(Num: 0); |
13184 | SDValue SHADD = DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: NL, |
13185 | N2: DAG.getConstant(Val: Diff, DL, VT), N3: NS); |
13186 | return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: SHADD, N2: DAG.getConstant(Val: Bits, DL, VT)); |
13187 | } |
13188 | |
13189 | // Combine a constant select operand into its use: |
13190 | // |
13191 | // (and (select cond, -1, c), x) |
13192 | // -> (select cond, x, (and x, c)) [AllOnes=1] |
13193 | // (or (select cond, 0, c), x) |
13194 | // -> (select cond, x, (or x, c)) [AllOnes=0] |
13195 | // (xor (select cond, 0, c), x) |
13196 | // -> (select cond, x, (xor x, c)) [AllOnes=0] |
13197 | // (add (select cond, 0, c), x) |
13198 | // -> (select cond, x, (add x, c)) [AllOnes=0] |
13199 | // (sub x, (select cond, 0, c)) |
13200 | // -> (select cond, x, (sub x, c)) [AllOnes=0] |
13201 | static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, |
13202 | SelectionDAG &DAG, bool AllOnes, |
13203 | const RISCVSubtarget &Subtarget) { |
13204 | EVT VT = N->getValueType(ResNo: 0); |
13205 | |
13206 | // Skip vectors. |
13207 | if (VT.isVector()) |
13208 | return SDValue(); |
13209 | |
13210 | if (!Subtarget.hasConditionalMoveFusion()) { |
13211 | // (select cond, x, (and x, c)) has custom lowering with Zicond. |
13212 | if ((!Subtarget.hasStdExtZicond() && |
13213 | !Subtarget.hasVendorXVentanaCondOps()) || |
13214 | N->getOpcode() != ISD::AND) |
13215 | return SDValue(); |
13216 | |
13217 | // Maybe harmful when condition code has multiple use. |
13218 | if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(i: 0).hasOneUse()) |
13219 | return SDValue(); |
13220 | |
13221 | // Maybe harmful when VT is wider than XLen. |
13222 | if (VT.getSizeInBits() > Subtarget.getXLen()) |
13223 | return SDValue(); |
13224 | } |
13225 | |
13226 | if ((Slct.getOpcode() != ISD::SELECT && |
13227 | Slct.getOpcode() != RISCVISD::SELECT_CC) || |
13228 | !Slct.hasOneUse()) |
13229 | return SDValue(); |
13230 | |
13231 | auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) { |
13232 | return AllOnes ? isAllOnesConstant(V: N) : isNullConstant(V: N); |
13233 | }; |
13234 | |
13235 | bool SwapSelectOps; |
13236 | unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0; |
13237 | SDValue TrueVal = Slct.getOperand(i: 1 + OpOffset); |
13238 | SDValue FalseVal = Slct.getOperand(i: 2 + OpOffset); |
13239 | SDValue NonConstantVal; |
13240 | if (isZeroOrAllOnes(TrueVal, AllOnes)) { |
13241 | SwapSelectOps = false; |
13242 | NonConstantVal = FalseVal; |
13243 | } else if (isZeroOrAllOnes(FalseVal, AllOnes)) { |
13244 | SwapSelectOps = true; |
13245 | NonConstantVal = TrueVal; |
13246 | } else |
13247 | return SDValue(); |
13248 | |
13249 | // Slct is now know to be the desired identity constant when CC is true. |
13250 | TrueVal = OtherOp; |
13251 | FalseVal = DAG.getNode(Opcode: N->getOpcode(), DL: SDLoc(N), VT, N1: OtherOp, N2: NonConstantVal); |
13252 | // Unless SwapSelectOps says the condition should be false. |
13253 | if (SwapSelectOps) |
13254 | std::swap(a&: TrueVal, b&: FalseVal); |
13255 | |
13256 | if (Slct.getOpcode() == RISCVISD::SELECT_CC) |
13257 | return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL: SDLoc(N), VT, |
13258 | Ops: {Slct.getOperand(i: 0), Slct.getOperand(i: 1), |
13259 | Slct.getOperand(i: 2), TrueVal, FalseVal}); |
13260 | |
13261 | return DAG.getNode(Opcode: ISD::SELECT, DL: SDLoc(N), VT, |
13262 | Ops: {Slct.getOperand(i: 0), TrueVal, FalseVal}); |
13263 | } |
13264 | |
13265 | // Attempt combineSelectAndUse on each operand of a commutative operator N. |
13266 | static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG, |
13267 | bool AllOnes, |
13268 | const RISCVSubtarget &Subtarget) { |
13269 | SDValue N0 = N->getOperand(Num: 0); |
13270 | SDValue N1 = N->getOperand(Num: 1); |
13271 | if (SDValue Result = combineSelectAndUse(N, Slct: N0, OtherOp: N1, DAG, AllOnes, Subtarget)) |
13272 | return Result; |
13273 | if (SDValue Result = combineSelectAndUse(N, Slct: N1, OtherOp: N0, DAG, AllOnes, Subtarget)) |
13274 | return Result; |
13275 | return SDValue(); |
13276 | } |
13277 | |
13278 | // Transform (add (mul x, c0), c1) -> |
13279 | // (add (mul (add x, c1/c0), c0), c1%c0). |
13280 | // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case |
13281 | // that should be excluded is when c0*(c1/c0) is simm12, which will lead |
13282 | // to an infinite loop in DAGCombine if transformed. |
13283 | // Or transform (add (mul x, c0), c1) -> |
13284 | // (add (mul (add x, c1/c0+1), c0), c1%c0-c0), |
13285 | // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner |
13286 | // case that should be excluded is when c0*(c1/c0+1) is simm12, which will |
13287 | // lead to an infinite loop in DAGCombine if transformed. |
13288 | // Or transform (add (mul x, c0), c1) -> |
13289 | // (add (mul (add x, c1/c0-1), c0), c1%c0+c0), |
13290 | // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner |
13291 | // case that should be excluded is when c0*(c1/c0-1) is simm12, which will |
13292 | // lead to an infinite loop in DAGCombine if transformed. |
13293 | // Or transform (add (mul x, c0), c1) -> |
13294 | // (mul (add x, c1/c0), c0). |
13295 | // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not. |
13296 | static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, |
13297 | const RISCVSubtarget &Subtarget) { |
13298 | // Skip for vector types and larger types. |
13299 | EVT VT = N->getValueType(ResNo: 0); |
13300 | if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) |
13301 | return SDValue(); |
13302 | // The first operand node must be a MUL and has no other use. |
13303 | SDValue N0 = N->getOperand(Num: 0); |
13304 | if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL) |
13305 | return SDValue(); |
13306 | // Check if c0 and c1 match above conditions. |
13307 | auto *N0C = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1)); |
13308 | auto *N1C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)); |
13309 | if (!N0C || !N1C) |
13310 | return SDValue(); |
13311 | // If N0C has multiple uses it's possible one of the cases in |
13312 | // DAGCombiner::isMulAddWithConstProfitable will be true, which would result |
13313 | // in an infinite loop. |
13314 | if (!N0C->hasOneUse()) |
13315 | return SDValue(); |
13316 | int64_t C0 = N0C->getSExtValue(); |
13317 | int64_t C1 = N1C->getSExtValue(); |
13318 | int64_t CA, CB; |
13319 | if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(x: C1)) |
13320 | return SDValue(); |
13321 | // Search for proper CA (non-zero) and CB that both are simm12. |
13322 | if ((C1 / C0) != 0 && isInt<12>(x: C1 / C0) && isInt<12>(x: C1 % C0) && |
13323 | !isInt<12>(x: C0 * (C1 / C0))) { |
13324 | CA = C1 / C0; |
13325 | CB = C1 % C0; |
13326 | } else if ((C1 / C0 + 1) != 0 && isInt<12>(x: C1 / C0 + 1) && |
13327 | isInt<12>(x: C1 % C0 - C0) && !isInt<12>(x: C0 * (C1 / C0 + 1))) { |
13328 | CA = C1 / C0 + 1; |
13329 | CB = C1 % C0 - C0; |
13330 | } else if ((C1 / C0 - 1) != 0 && isInt<12>(x: C1 / C0 - 1) && |
13331 | isInt<12>(x: C1 % C0 + C0) && !isInt<12>(x: C0 * (C1 / C0 - 1))) { |
13332 | CA = C1 / C0 - 1; |
13333 | CB = C1 % C0 + C0; |
13334 | } else |
13335 | return SDValue(); |
13336 | // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0). |
13337 | SDLoc DL(N); |
13338 | SDValue New0 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0->getOperand(Num: 0), |
13339 | N2: DAG.getConstant(Val: CA, DL, VT)); |
13340 | SDValue New1 = |
13341 | DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: New0, N2: DAG.getConstant(Val: C0, DL, VT)); |
13342 | return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: New1, N2: DAG.getConstant(Val: CB, DL, VT)); |
13343 | } |
13344 | |
13345 | // add (zext, zext) -> zext (add (zext, zext)) |
13346 | // sub (zext, zext) -> sext (sub (zext, zext)) |
13347 | // mul (zext, zext) -> zext (mul (zext, zext)) |
13348 | // sdiv (zext, zext) -> zext (sdiv (zext, zext)) |
13349 | // udiv (zext, zext) -> zext (udiv (zext, zext)) |
13350 | // srem (zext, zext) -> zext (srem (zext, zext)) |
13351 | // urem (zext, zext) -> zext (urem (zext, zext)) |
13352 | // |
13353 | // where the sum of the extend widths match, and the the range of the bin op |
13354 | // fits inside the width of the narrower bin op. (For profitability on rvv, we |
13355 | // use a power of two for both inner and outer extend.) |
13356 | static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG) { |
13357 | |
13358 | EVT VT = N->getValueType(ResNo: 0); |
13359 | if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
13360 | return SDValue(); |
13361 | |
13362 | SDValue N0 = N->getOperand(Num: 0); |
13363 | SDValue N1 = N->getOperand(Num: 1); |
13364 | if (N0.getOpcode() != ISD::ZERO_EXTEND || N1.getOpcode() != ISD::ZERO_EXTEND) |
13365 | return SDValue(); |
13366 | if (!N0.hasOneUse() || !N1.hasOneUse()) |
13367 | return SDValue(); |
13368 | |
13369 | SDValue Src0 = N0.getOperand(i: 0); |
13370 | SDValue Src1 = N1.getOperand(i: 0); |
13371 | EVT SrcVT = Src0.getValueType(); |
13372 | if (!DAG.getTargetLoweringInfo().isTypeLegal(VT: SrcVT) || |
13373 | SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 || |
13374 | SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2) |
13375 | return SDValue(); |
13376 | |
13377 | LLVMContext &C = *DAG.getContext(); |
13378 | EVT ElemVT = VT.getVectorElementType().getHalfSizedIntegerVT(Context&: C); |
13379 | EVT NarrowVT = EVT::getVectorVT(Context&: C, VT: ElemVT, EC: VT.getVectorElementCount()); |
13380 | |
13381 | Src0 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc(Src0), VT: NarrowVT, Operand: Src0); |
13382 | Src1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc(Src1), VT: NarrowVT, Operand: Src1); |
13383 | |
13384 | // Src0 and Src1 are zero extended, so they're always positive if signed. |
13385 | // |
13386 | // sub can produce a negative from two positive operands, so it needs sign |
13387 | // extended. Other nodes produce a positive from two positive operands, so |
13388 | // zero extend instead. |
13389 | unsigned OuterExtend = |
13390 | N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
13391 | |
13392 | return DAG.getNode( |
13393 | Opcode: OuterExtend, DL: SDLoc(N), VT, |
13394 | Operand: DAG.getNode(Opcode: N->getOpcode(), DL: SDLoc(N), VT: NarrowVT, N1: Src0, N2: Src1)); |
13395 | } |
13396 | |
13397 | // Try to turn (add (xor bool, 1) -1) into (neg bool). |
13398 | static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) { |
13399 | SDValue N0 = N->getOperand(Num: 0); |
13400 | SDValue N1 = N->getOperand(Num: 1); |
13401 | EVT VT = N->getValueType(ResNo: 0); |
13402 | SDLoc DL(N); |
13403 | |
13404 | // RHS should be -1. |
13405 | if (!isAllOnesConstant(V: N1)) |
13406 | return SDValue(); |
13407 | |
13408 | // Look for (xor X, 1). |
13409 | if (N0.getOpcode() != ISD::XOR || !isOneConstant(V: N0.getOperand(i: 1))) |
13410 | return SDValue(); |
13411 | |
13412 | // First xor input should be 0 or 1. |
13413 | APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: 1); |
13414 | if (!DAG.MaskedValueIsZero(Op: N0.getOperand(i: 0), Mask)) |
13415 | return SDValue(); |
13416 | |
13417 | // Emit a negate of the setcc. |
13418 | return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), |
13419 | N2: N0.getOperand(i: 0)); |
13420 | } |
13421 | |
13422 | static SDValue performADDCombine(SDNode *N, |
13423 | TargetLowering::DAGCombinerInfo &DCI, |
13424 | const RISCVSubtarget &Subtarget) { |
13425 | SelectionDAG &DAG = DCI.DAG; |
13426 | if (SDValue V = combineAddOfBooleanXor(N, DAG)) |
13427 | return V; |
13428 | if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget)) |
13429 | return V; |
13430 | if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) |
13431 | if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) |
13432 | return V; |
13433 | if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) |
13434 | return V; |
13435 | if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) |
13436 | return V; |
13437 | if (SDValue V = combineBinOpOfZExt(N, DAG)) |
13438 | return V; |
13439 | |
13440 | // fold (add (select lhs, rhs, cc, 0, y), x) -> |
13441 | // (select lhs, rhs, cc, x, (add x, y)) |
13442 | return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); |
13443 | } |
13444 | |
13445 | // Try to turn a sub boolean RHS and constant LHS into an addi. |
13446 | static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) { |
13447 | SDValue N0 = N->getOperand(Num: 0); |
13448 | SDValue N1 = N->getOperand(Num: 1); |
13449 | EVT VT = N->getValueType(ResNo: 0); |
13450 | SDLoc DL(N); |
13451 | |
13452 | // Require a constant LHS. |
13453 | auto *N0C = dyn_cast<ConstantSDNode>(Val&: N0); |
13454 | if (!N0C) |
13455 | return SDValue(); |
13456 | |
13457 | // All our optimizations involve subtracting 1 from the immediate and forming |
13458 | // an ADDI. Make sure the new immediate is valid for an ADDI. |
13459 | APInt ImmValMinus1 = N0C->getAPIntValue() - 1; |
13460 | if (!ImmValMinus1.isSignedIntN(N: 12)) |
13461 | return SDValue(); |
13462 | |
13463 | SDValue NewLHS; |
13464 | if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) { |
13465 | // (sub constant, (setcc x, y, eq/neq)) -> |
13466 | // (add (setcc x, y, neq/eq), constant - 1) |
13467 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: N1.getOperand(i: 2))->get(); |
13468 | EVT SetCCOpVT = N1.getOperand(i: 0).getValueType(); |
13469 | if (!isIntEqualitySetCC(Code: CCVal) || !SetCCOpVT.isInteger()) |
13470 | return SDValue(); |
13471 | CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: SetCCOpVT); |
13472 | NewLHS = |
13473 | DAG.getSetCC(DL: SDLoc(N1), VT, LHS: N1.getOperand(i: 0), RHS: N1.getOperand(i: 1), Cond: CCVal); |
13474 | } else if (N1.getOpcode() == ISD::XOR && isOneConstant(V: N1.getOperand(i: 1)) && |
13475 | N1.getOperand(i: 0).getOpcode() == ISD::SETCC) { |
13476 | // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1). |
13477 | // Since setcc returns a bool the xor is equivalent to 1-setcc. |
13478 | NewLHS = N1.getOperand(i: 0); |
13479 | } else |
13480 | return SDValue(); |
13481 | |
13482 | SDValue NewRHS = DAG.getConstant(Val: ImmValMinus1, DL, VT); |
13483 | return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: NewLHS, N2: NewRHS); |
13484 | } |
13485 | |
13486 | // Looks for (sub (shl X, 8), X) where only bits 8, 16, 24, 32, etc. of X are |
13487 | // non-zero. Replace with orc.b. |
13488 | static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, |
13489 | const RISCVSubtarget &Subtarget) { |
13490 | if (!Subtarget.hasStdExtZbb()) |
13491 | return SDValue(); |
13492 | |
13493 | EVT VT = N->getValueType(ResNo: 0); |
13494 | |
13495 | if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16) |
13496 | return SDValue(); |
13497 | |
13498 | SDValue N0 = N->getOperand(Num: 0); |
13499 | SDValue N1 = N->getOperand(Num: 1); |
13500 | |
13501 | if (N0.getOpcode() != ISD::SHL || N0.getOperand(i: 0) != N1 || !N0.hasOneUse()) |
13502 | return SDValue(); |
13503 | |
13504 | auto *ShAmtC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1)); |
13505 | if (!ShAmtC || ShAmtC->getZExtValue() != 8) |
13506 | return SDValue(); |
13507 | |
13508 | APInt Mask = APInt::getSplat(NewLen: VT.getSizeInBits(), V: APInt(8, 0xfe)); |
13509 | if (!DAG.MaskedValueIsZero(Op: N1, Mask)) |
13510 | return SDValue(); |
13511 | |
13512 | return DAG.getNode(Opcode: RISCVISD::ORC_B, DL: SDLoc(N), VT, Operand: N1); |
13513 | } |
13514 | |
13515 | static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, |
13516 | const RISCVSubtarget &Subtarget) { |
13517 | if (SDValue V = combineSubOfBoolean(N, DAG)) |
13518 | return V; |
13519 | |
13520 | EVT VT = N->getValueType(ResNo: 0); |
13521 | SDValue N0 = N->getOperand(Num: 0); |
13522 | SDValue N1 = N->getOperand(Num: 1); |
13523 | // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1) |
13524 | if (isNullConstant(V: N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() && |
13525 | isNullConstant(V: N1.getOperand(i: 1))) { |
13526 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: N1.getOperand(i: 2))->get(); |
13527 | if (CCVal == ISD::SETLT) { |
13528 | SDLoc DL(N); |
13529 | unsigned ShAmt = N0.getValueSizeInBits() - 1; |
13530 | return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: N1.getOperand(i: 0), |
13531 | N2: DAG.getConstant(Val: ShAmt, DL, VT)); |
13532 | } |
13533 | } |
13534 | |
13535 | if (SDValue V = combineBinOpOfZExt(N, DAG)) |
13536 | return V; |
13537 | if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget)) |
13538 | return V; |
13539 | |
13540 | // fold (sub x, (select lhs, rhs, cc, 0, y)) -> |
13541 | // (select lhs, rhs, cc, x, (sub x, y)) |
13542 | return combineSelectAndUse(N, Slct: N1, OtherOp: N0, DAG, /*AllOnes*/ false, Subtarget); |
13543 | } |
13544 | |
13545 | // Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1. |
13546 | // Legalizing setcc can introduce xors like this. Doing this transform reduces |
13547 | // the number of xors and may allow the xor to fold into a branch condition. |
13548 | static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) { |
13549 | SDValue N0 = N->getOperand(Num: 0); |
13550 | SDValue N1 = N->getOperand(Num: 1); |
13551 | bool IsAnd = N->getOpcode() == ISD::AND; |
13552 | |
13553 | if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR) |
13554 | return SDValue(); |
13555 | |
13556 | if (!N0.hasOneUse() || !N1.hasOneUse()) |
13557 | return SDValue(); |
13558 | |
13559 | SDValue N01 = N0.getOperand(i: 1); |
13560 | SDValue N11 = N1.getOperand(i: 1); |
13561 | |
13562 | // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into |
13563 | // (xor X, -1) based on the upper bits of the other operand being 0. If the |
13564 | // operation is And, allow one of the Xors to use -1. |
13565 | if (isOneConstant(V: N01)) { |
13566 | if (!isOneConstant(V: N11) && !(IsAnd && isAllOnesConstant(V: N11))) |
13567 | return SDValue(); |
13568 | } else if (isOneConstant(V: N11)) { |
13569 | // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1. |
13570 | if (!(IsAnd && isAllOnesConstant(V: N01))) |
13571 | return SDValue(); |
13572 | } else |
13573 | return SDValue(); |
13574 | |
13575 | EVT VT = N->getValueType(ResNo: 0); |
13576 | |
13577 | SDValue N00 = N0.getOperand(i: 0); |
13578 | SDValue N10 = N1.getOperand(i: 0); |
13579 | |
13580 | // The LHS of the xors needs to be 0/1. |
13581 | APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: 1); |
13582 | if (!DAG.MaskedValueIsZero(Op: N00, Mask) || !DAG.MaskedValueIsZero(Op: N10, Mask)) |
13583 | return SDValue(); |
13584 | |
13585 | // Invert the opcode and insert a new xor. |
13586 | SDLoc DL(N); |
13587 | unsigned Opc = IsAnd ? ISD::OR : ISD::AND; |
13588 | SDValue Logic = DAG.getNode(Opcode: Opc, DL, VT, N1: N00, N2: N10); |
13589 | return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Logic, N2: DAG.getConstant(Val: 1, DL, VT)); |
13590 | } |
13591 | |
13592 | // Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to |
13593 | // (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed |
13594 | // value to an unsigned value. This will be lowered to vmax and series of |
13595 | // vnclipu instructions later. This can be extended to other truncated types |
13596 | // other than i8 by replacing 256 and 255 with the equivalent constants for the |
13597 | // type. |
13598 | static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG) { |
13599 | EVT VT = N->getValueType(ResNo: 0); |
13600 | SDValue N0 = N->getOperand(Num: 0); |
13601 | EVT SrcVT = N0.getValueType(); |
13602 | |
13603 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
13604 | if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(VT: SrcVT)) |
13605 | return SDValue(); |
13606 | |
13607 | if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse()) |
13608 | return SDValue(); |
13609 | |
13610 | SDValue Cond = N0.getOperand(i: 0); |
13611 | SDValue True = N0.getOperand(i: 1); |
13612 | SDValue False = N0.getOperand(i: 2); |
13613 | |
13614 | if (Cond.getOpcode() != ISD::SETCC) |
13615 | return SDValue(); |
13616 | |
13617 | // FIXME: Support the version of this pattern with the select operands |
13618 | // swapped. |
13619 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Cond.getOperand(i: 2))->get(); |
13620 | if (CCVal != ISD::SETULT) |
13621 | return SDValue(); |
13622 | |
13623 | SDValue CondLHS = Cond.getOperand(i: 0); |
13624 | SDValue CondRHS = Cond.getOperand(i: 1); |
13625 | |
13626 | if (CondLHS != True) |
13627 | return SDValue(); |
13628 | |
13629 | unsigned ScalarBits = VT.getScalarSizeInBits(); |
13630 | |
13631 | // FIXME: Support other constants. |
13632 | ConstantSDNode *CondRHSC = isConstOrConstSplat(N: CondRHS); |
13633 | if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits)) |
13634 | return SDValue(); |
13635 | |
13636 | if (False.getOpcode() != ISD::SIGN_EXTEND) |
13637 | return SDValue(); |
13638 | |
13639 | False = False.getOperand(i: 0); |
13640 | |
13641 | if (False.getOpcode() != ISD::SETCC || False.getOperand(i: 0) != True) |
13642 | return SDValue(); |
13643 | |
13644 | ConstantSDNode *FalseRHSC = isConstOrConstSplat(N: False.getOperand(i: 1)); |
13645 | if (!FalseRHSC || !FalseRHSC->isZero()) |
13646 | return SDValue(); |
13647 | |
13648 | ISD::CondCode CCVal2 = cast<CondCodeSDNode>(Val: False.getOperand(i: 2))->get(); |
13649 | if (CCVal2 != ISD::SETGT) |
13650 | return SDValue(); |
13651 | |
13652 | // Emit the signed to unsigned saturation pattern. |
13653 | SDLoc DL(N); |
13654 | SDValue Max = |
13655 | DAG.getNode(Opcode: ISD::SMAX, DL, VT: SrcVT, N1: True, N2: DAG.getConstant(Val: 0, DL, VT: SrcVT)); |
13656 | SDValue Min = |
13657 | DAG.getNode(Opcode: ISD::SMIN, DL, VT: SrcVT, N1: Max, |
13658 | N2: DAG.getConstant(Val: (1ULL << ScalarBits) - 1, DL, VT: SrcVT)); |
13659 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Min); |
13660 | } |
13661 | |
13662 | static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, |
13663 | const RISCVSubtarget &Subtarget) { |
13664 | SDValue N0 = N->getOperand(Num: 0); |
13665 | EVT VT = N->getValueType(ResNo: 0); |
13666 | |
13667 | // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero |
13668 | // extending X. This is safe since we only need the LSB after the shift and |
13669 | // shift amounts larger than 31 would produce poison. If we wait until |
13670 | // type legalization, we'll create RISCVISD::SRLW and we can't recover it |
13671 | // to use a BEXT instruction. |
13672 | if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 && |
13673 | N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL && |
13674 | !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) && N0.hasOneUse()) { |
13675 | SDLoc DL(N0); |
13676 | SDValue Op0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N0.getOperand(i: 0)); |
13677 | SDValue Op1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: N0.getOperand(i: 1)); |
13678 | SDValue Srl = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: Op0, N2: Op1); |
13679 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc(N), VT, Operand: Srl); |
13680 | } |
13681 | |
13682 | return combineTruncSelectToSMaxUSat(N, DAG); |
13683 | } |
13684 | |
13685 | // Combines two comparison operation and logic operation to one selection |
13686 | // operation(min, max) and logic operation. Returns new constructed Node if |
13687 | // conditions for optimization are satisfied. |
13688 | static SDValue performANDCombine(SDNode *N, |
13689 | TargetLowering::DAGCombinerInfo &DCI, |
13690 | const RISCVSubtarget &Subtarget) { |
13691 | SelectionDAG &DAG = DCI.DAG; |
13692 | |
13693 | SDValue N0 = N->getOperand(Num: 0); |
13694 | // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero |
13695 | // extending X. This is safe since we only need the LSB after the shift and |
13696 | // shift amounts larger than 31 would produce poison. If we wait until |
13697 | // type legalization, we'll create RISCVISD::SRLW and we can't recover it |
13698 | // to use a BEXT instruction. |
13699 | if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && |
13700 | N->getValueType(ResNo: 0) == MVT::i32 && isOneConstant(V: N->getOperand(Num: 1)) && |
13701 | N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) && |
13702 | N0.hasOneUse()) { |
13703 | SDLoc DL(N); |
13704 | SDValue Op0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N0.getOperand(i: 0)); |
13705 | SDValue Op1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: N0.getOperand(i: 1)); |
13706 | SDValue Srl = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: Op0, N2: Op1); |
13707 | SDValue And = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, N1: Srl, |
13708 | N2: DAG.getConstant(Val: 1, DL, VT: MVT::i64)); |
13709 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: And); |
13710 | } |
13711 | |
13712 | if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) |
13713 | return V; |
13714 | if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) |
13715 | return V; |
13716 | |
13717 | if (DCI.isAfterLegalizeDAG()) |
13718 | if (SDValue V = combineDeMorganOfBoolean(N, DAG)) |
13719 | return V; |
13720 | |
13721 | // fold (and (select lhs, rhs, cc, -1, y), x) -> |
13722 | // (select lhs, rhs, cc, x, (and x, y)) |
13723 | return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget); |
13724 | } |
13725 | |
13726 | // Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez. |
13727 | // FIXME: Generalize to other binary operators with same operand. |
13728 | static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, |
13729 | SelectionDAG &DAG) { |
13730 | assert(N->getOpcode() == ISD::OR && "Unexpected opcode" ); |
13731 | |
13732 | if (N0.getOpcode() != RISCVISD::CZERO_EQZ || |
13733 | N1.getOpcode() != RISCVISD::CZERO_NEZ || |
13734 | !N0.hasOneUse() || !N1.hasOneUse()) |
13735 | return SDValue(); |
13736 | |
13737 | // Should have the same condition. |
13738 | SDValue Cond = N0.getOperand(i: 1); |
13739 | if (Cond != N1.getOperand(i: 1)) |
13740 | return SDValue(); |
13741 | |
13742 | SDValue TrueV = N0.getOperand(i: 0); |
13743 | SDValue FalseV = N1.getOperand(i: 0); |
13744 | |
13745 | if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR || |
13746 | TrueV.getOperand(i: 1) != FalseV.getOperand(i: 1) || |
13747 | !isOneConstant(V: TrueV.getOperand(i: 1)) || |
13748 | !TrueV.hasOneUse() || !FalseV.hasOneUse()) |
13749 | return SDValue(); |
13750 | |
13751 | EVT VT = N->getValueType(ResNo: 0); |
13752 | SDLoc DL(N); |
13753 | |
13754 | SDValue NewN0 = DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV.getOperand(i: 0), |
13755 | N2: Cond); |
13756 | SDValue NewN1 = DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV.getOperand(i: 0), |
13757 | N2: Cond); |
13758 | SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: NewN0, N2: NewN1); |
13759 | return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: NewOr, N2: TrueV.getOperand(i: 1)); |
13760 | } |
13761 | |
13762 | static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, |
13763 | const RISCVSubtarget &Subtarget) { |
13764 | SelectionDAG &DAG = DCI.DAG; |
13765 | |
13766 | if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) |
13767 | return V; |
13768 | if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) |
13769 | return V; |
13770 | |
13771 | if (DCI.isAfterLegalizeDAG()) |
13772 | if (SDValue V = combineDeMorganOfBoolean(N, DAG)) |
13773 | return V; |
13774 | |
13775 | // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom. |
13776 | // We may be able to pull a common operation out of the true and false value. |
13777 | SDValue N0 = N->getOperand(Num: 0); |
13778 | SDValue N1 = N->getOperand(Num: 1); |
13779 | if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG)) |
13780 | return V; |
13781 | if (SDValue V = combineOrOfCZERO(N, N0: N1, N1: N0, DAG)) |
13782 | return V; |
13783 | |
13784 | // fold (or (select cond, 0, y), x) -> |
13785 | // (select cond, x, (or x, y)) |
13786 | return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); |
13787 | } |
13788 | |
13789 | static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, |
13790 | const RISCVSubtarget &Subtarget) { |
13791 | SDValue N0 = N->getOperand(Num: 0); |
13792 | SDValue N1 = N->getOperand(Num: 1); |
13793 | |
13794 | // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use |
13795 | // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create |
13796 | // RISCVISD:::SLLW and we can't recover it to use a BSET instruction. |
13797 | if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && |
13798 | N->getValueType(ResNo: 0) == MVT::i32 && isAllOnesConstant(V: N1) && |
13799 | N0.getOpcode() == ISD::SHL && isAllOnesConstant(V: N0.getOperand(i: 0)) && |
13800 | !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) && N0.hasOneUse()) { |
13801 | SDLoc DL(N); |
13802 | SDValue Op0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N0.getOperand(i: 0)); |
13803 | SDValue Op1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: N0.getOperand(i: 1)); |
13804 | SDValue Shl = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, N1: Op0, N2: Op1); |
13805 | SDValue And = DAG.getNOT(DL, Val: Shl, VT: MVT::i64); |
13806 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: And); |
13807 | } |
13808 | |
13809 | // fold (xor (sllw 1, x), -1) -> (rolw ~1, x) |
13810 | // NOTE: Assumes ROL being legal means ROLW is legal. |
13811 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
13812 | if (N0.getOpcode() == RISCVISD::SLLW && |
13813 | isAllOnesConstant(V: N1) && isOneConstant(V: N0.getOperand(i: 0)) && |
13814 | TLI.isOperationLegal(Op: ISD::ROTL, VT: MVT::i64)) { |
13815 | SDLoc DL(N); |
13816 | return DAG.getNode(Opcode: RISCVISD::ROLW, DL, VT: MVT::i64, |
13817 | N1: DAG.getConstant(Val: ~1, DL, VT: MVT::i64), N2: N0.getOperand(i: 1)); |
13818 | } |
13819 | |
13820 | // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt) |
13821 | if (N0.getOpcode() == ISD::SETCC && isOneConstant(V: N1) && N0.hasOneUse()) { |
13822 | auto *ConstN00 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 0)); |
13823 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: 2))->get(); |
13824 | if (ConstN00 && CC == ISD::SETLT) { |
13825 | EVT VT = N0.getValueType(); |
13826 | SDLoc DL(N0); |
13827 | const APInt &Imm = ConstN00->getAPIntValue(); |
13828 | if ((Imm + 1).isSignedIntN(N: 12)) |
13829 | return DAG.getSetCC(DL, VT, LHS: N0.getOperand(i: 1), |
13830 | RHS: DAG.getConstant(Val: Imm + 1, DL, VT), Cond: CC); |
13831 | } |
13832 | } |
13833 | |
13834 | // Combine (xor (trunc (X cc Y)) 1) -> (trunc (X !cc Y)). This is needed with |
13835 | // RV64LegalI32 when the setcc is created after type legalization. An i1 xor |
13836 | // would have been promoted to i32, but the setcc would have i64 result. |
13837 | if (N->getValueType(ResNo: 0) == MVT::i32 && N0.getOpcode() == ISD::TRUNCATE && |
13838 | isOneConstant(V: N1) && N0.getOperand(i: 0).getOpcode() == ISD::SETCC) { |
13839 | SDValue N00 = N0.getOperand(i: 0); |
13840 | SDLoc DL(N); |
13841 | SDValue LHS = N00.getOperand(i: 0); |
13842 | SDValue RHS = N00.getOperand(i: 1); |
13843 | SDValue CC = N00.getOperand(i: 2); |
13844 | ISD::CondCode NotCC = ISD::getSetCCInverse(Operation: cast<CondCodeSDNode>(Val&: CC)->get(), |
13845 | Type: LHS.getValueType()); |
13846 | SDValue Setcc = DAG.getSetCC(DL: SDLoc(N00), VT: N0.getOperand(i: 0).getValueType(), |
13847 | LHS, RHS, Cond: NotCC); |
13848 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc(N0), VT: N->getValueType(ResNo: 0), Operand: Setcc); |
13849 | } |
13850 | |
13851 | if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) |
13852 | return V; |
13853 | if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) |
13854 | return V; |
13855 | |
13856 | // fold (xor (select cond, 0, y), x) -> |
13857 | // (select cond, x, (xor x, y)) |
13858 | return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); |
13859 | } |
13860 | |
13861 | // Try to expand a scalar multiply to a faster sequence. |
13862 | static SDValue expandMul(SDNode *N, SelectionDAG &DAG, |
13863 | TargetLowering::DAGCombinerInfo &DCI, |
13864 | const RISCVSubtarget &Subtarget) { |
13865 | |
13866 | EVT VT = N->getValueType(ResNo: 0); |
13867 | |
13868 | // LI + MUL is usually smaller than the alternative sequence. |
13869 | if (DAG.getMachineFunction().getFunction().hasMinSize()) |
13870 | return SDValue(); |
13871 | |
13872 | if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) |
13873 | return SDValue(); |
13874 | |
13875 | if (VT != Subtarget.getXLenVT()) |
13876 | return SDValue(); |
13877 | |
13878 | const bool HasShlAdd = |
13879 | Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa(); |
13880 | |
13881 | ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)); |
13882 | if (!CNode) |
13883 | return SDValue(); |
13884 | uint64_t MulAmt = CNode->getZExtValue(); |
13885 | |
13886 | // WARNING: The code below is knowingly incorrect with regards to undef semantics. |
13887 | // We're adding additional uses of X here, and in principle, we should be freezing |
13888 | // X before doing so. However, adding freeze here causes real regressions, and no |
13889 | // other target properly freezes X in these cases either. |
13890 | SDValue X = N->getOperand(Num: 0); |
13891 | |
13892 | if (HasShlAdd) { |
13893 | for (uint64_t Divisor : {3, 5, 9}) { |
13894 | if (MulAmt % Divisor != 0) |
13895 | continue; |
13896 | uint64_t MulAmt2 = MulAmt / Divisor; |
13897 | // 3/5/9 * 2^N -> shl (shXadd X, X), N |
13898 | if (isPowerOf2_64(Value: MulAmt2)) { |
13899 | SDLoc DL(N); |
13900 | SDValue X = N->getOperand(Num: 0); |
13901 | // Put the shift first if we can fold a zext into the |
13902 | // shift forming a slli.uw. |
13903 | if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(Val: X.getOperand(i: 1)) && |
13904 | X.getConstantOperandVal(i: 1) == UINT64_C(0xffffffff)) { |
13905 | SDValue Shl = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, |
13906 | N2: DAG.getConstant(Val: Log2_64(Value: MulAmt2), DL, VT)); |
13907 | return DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: Shl, |
13908 | N2: DAG.getConstant(Val: Log2_64(Value: Divisor - 1), DL, VT), |
13909 | N3: Shl); |
13910 | } |
13911 | // Otherwise, put rhe shl second so that it can fold with following |
13912 | // instructions (e.g. sext or add). |
13913 | SDValue Mul359 = |
13914 | DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X, |
13915 | N2: DAG.getConstant(Val: Log2_64(Value: Divisor - 1), DL, VT), N3: X); |
13916 | return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Mul359, |
13917 | N2: DAG.getConstant(Val: Log2_64(Value: MulAmt2), DL, VT)); |
13918 | } |
13919 | |
13920 | // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X) |
13921 | if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) { |
13922 | SDLoc DL(N); |
13923 | SDValue Mul359 = |
13924 | DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X, |
13925 | N2: DAG.getConstant(Val: Log2_64(Value: Divisor - 1), DL, VT), N3: X); |
13926 | return DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: Mul359, |
13927 | N2: DAG.getConstant(Val: Log2_64(Value: MulAmt2 - 1), DL, VT), |
13928 | N3: Mul359); |
13929 | } |
13930 | } |
13931 | |
13932 | // If this is a power 2 + 2/4/8, we can use a shift followed by a single |
13933 | // shXadd. First check if this a sum of two power of 2s because that's |
13934 | // easy. Then count how many zeros are up to the first bit. |
13935 | if (isPowerOf2_64(Value: MulAmt & (MulAmt - 1))) { |
13936 | unsigned ScaleShift = llvm::countr_zero(Val: MulAmt); |
13937 | if (ScaleShift >= 1 && ScaleShift < 4) { |
13938 | unsigned ShiftAmt = Log2_64(Value: (MulAmt & (MulAmt - 1))); |
13939 | SDLoc DL(N); |
13940 | SDValue Shift1 = |
13941 | DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: DAG.getConstant(Val: ShiftAmt, DL, VT)); |
13942 | return DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X, |
13943 | N2: DAG.getConstant(Val: ScaleShift, DL, VT), N3: Shift1); |
13944 | } |
13945 | } |
13946 | |
13947 | // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x) |
13948 | // This is the two instruction form, there are also three instruction |
13949 | // variants we could implement. e.g. |
13950 | // (2^(1,2,3) * 3,5,9 + 1) << C2 |
13951 | // 2^(C1>3) * 3,5,9 +/- 1 |
13952 | for (uint64_t Divisor : {3, 5, 9}) { |
13953 | uint64_t C = MulAmt - 1; |
13954 | if (C <= Divisor) |
13955 | continue; |
13956 | unsigned TZ = llvm::countr_zero(Val: C); |
13957 | if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) { |
13958 | SDLoc DL(N); |
13959 | SDValue Mul359 = |
13960 | DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X, |
13961 | N2: DAG.getConstant(Val: Log2_64(Value: Divisor - 1), DL, VT), N3: X); |
13962 | return DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: Mul359, |
13963 | N2: DAG.getConstant(Val: TZ, DL, VT), N3: X); |
13964 | } |
13965 | } |
13966 | |
13967 | // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X)) |
13968 | if (MulAmt > 2 && isPowerOf2_64(Value: (MulAmt - 1) & (MulAmt - 2))) { |
13969 | unsigned ScaleShift = llvm::countr_zero(Val: MulAmt - 1); |
13970 | if (ScaleShift >= 1 && ScaleShift < 4) { |
13971 | unsigned ShiftAmt = Log2_64(Value: ((MulAmt - 1) & (MulAmt - 2))); |
13972 | SDLoc DL(N); |
13973 | SDValue Shift1 = |
13974 | DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: DAG.getConstant(Val: ShiftAmt, DL, VT)); |
13975 | return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shift1, |
13976 | N2: DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X, |
13977 | N2: DAG.getConstant(Val: ScaleShift, DL, VT), N3: X)); |
13978 | } |
13979 | } |
13980 | |
13981 | // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x)) |
13982 | for (uint64_t Offset : {3, 5, 9}) { |
13983 | if (isPowerOf2_64(Value: MulAmt + Offset)) { |
13984 | SDLoc DL(N); |
13985 | SDValue Shift1 = |
13986 | DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, |
13987 | N2: DAG.getConstant(Val: Log2_64(Value: MulAmt + Offset), DL, VT)); |
13988 | SDValue Mul359 = |
13989 | DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X, |
13990 | N2: DAG.getConstant(Val: Log2_64(Value: Offset - 1), DL, VT), N3: X); |
13991 | return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Shift1, N2: Mul359); |
13992 | } |
13993 | } |
13994 | } |
13995 | |
13996 | // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2)) |
13997 | uint64_t MulAmtLowBit = MulAmt & (-MulAmt); |
13998 | if (isPowerOf2_64(Value: MulAmt + MulAmtLowBit)) { |
13999 | uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit; |
14000 | SDLoc DL(N); |
14001 | SDValue Shift1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: 0), |
14002 | N2: DAG.getConstant(Val: Log2_64(Value: ShiftAmt1), DL, VT)); |
14003 | SDValue Shift2 = |
14004 | DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: 0), |
14005 | N2: DAG.getConstant(Val: Log2_64(Value: MulAmtLowBit), DL, VT)); |
14006 | return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Shift1, N2: Shift2); |
14007 | } |
14008 | |
14009 | return SDValue(); |
14010 | } |
14011 | |
14012 | // Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) -> |
14013 | // (bitcast (sra (v2Xi16 (bitcast X)), 15)) |
14014 | // Same for other equivalent types with other equivalent constants. |
14015 | static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG) { |
14016 | EVT VT = N->getValueType(ResNo: 0); |
14017 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
14018 | |
14019 | // Do this for legal vectors unless they are i1 or i8 vectors. |
14020 | if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16) |
14021 | return SDValue(); |
14022 | |
14023 | if (N->getOperand(Num: 0).getOpcode() != ISD::AND || |
14024 | N->getOperand(Num: 0).getOperand(i: 0).getOpcode() != ISD::SRL) |
14025 | return SDValue(); |
14026 | |
14027 | SDValue And = N->getOperand(Num: 0); |
14028 | SDValue Srl = And.getOperand(i: 0); |
14029 | |
14030 | APInt V1, V2, V3; |
14031 | if (!ISD::isConstantSplatVector(N: N->getOperand(Num: 1).getNode(), SplatValue&: V1) || |
14032 | !ISD::isConstantSplatVector(N: And.getOperand(i: 1).getNode(), SplatValue&: V2) || |
14033 | !ISD::isConstantSplatVector(N: Srl.getOperand(i: 1).getNode(), SplatValue&: V3)) |
14034 | return SDValue(); |
14035 | |
14036 | unsigned HalfSize = VT.getScalarSizeInBits() / 2; |
14037 | if (!V1.isMask(numBits: HalfSize) || V2 != (1ULL | 1ULL << HalfSize) || |
14038 | V3 != (HalfSize - 1)) |
14039 | return SDValue(); |
14040 | |
14041 | EVT HalfVT = EVT::getVectorVT(Context&: *DAG.getContext(), |
14042 | VT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: HalfSize), |
14043 | EC: VT.getVectorElementCount() * 2); |
14044 | SDLoc DL(N); |
14045 | SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: HalfVT, Operand: Srl.getOperand(i: 0)); |
14046 | SDValue Sra = DAG.getNode(Opcode: ISD::SRA, DL, VT: HalfVT, N1: Cast, |
14047 | N2: DAG.getConstant(Val: HalfSize - 1, DL, VT: HalfVT)); |
14048 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Sra); |
14049 | } |
14050 | |
14051 | static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, |
14052 | TargetLowering::DAGCombinerInfo &DCI, |
14053 | const RISCVSubtarget &Subtarget) { |
14054 | EVT VT = N->getValueType(ResNo: 0); |
14055 | if (!VT.isVector()) |
14056 | return expandMul(N, DAG, DCI, Subtarget); |
14057 | |
14058 | SDLoc DL(N); |
14059 | SDValue N0 = N->getOperand(Num: 0); |
14060 | SDValue N1 = N->getOperand(Num: 1); |
14061 | SDValue MulOper; |
14062 | unsigned AddSubOpc; |
14063 | |
14064 | // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y) |
14065 | // (mul x, add (y, 1)) -> (add x, (mul x, y)) |
14066 | // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y)) |
14067 | // (mul x, (sub 1, y)) -> (sub x, (mul x, y)) |
14068 | auto IsAddSubWith1 = [&](SDValue V) -> bool { |
14069 | AddSubOpc = V->getOpcode(); |
14070 | if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) { |
14071 | SDValue Opnd = V->getOperand(Num: 1); |
14072 | MulOper = V->getOperand(Num: 0); |
14073 | if (AddSubOpc == ISD::SUB) |
14074 | std::swap(a&: Opnd, b&: MulOper); |
14075 | if (isOneOrOneSplat(V: Opnd)) |
14076 | return true; |
14077 | } |
14078 | return false; |
14079 | }; |
14080 | |
14081 | if (IsAddSubWith1(N0)) { |
14082 | SDValue MulVal = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1, N2: MulOper); |
14083 | return DAG.getNode(Opcode: AddSubOpc, DL, VT, N1, N2: MulVal); |
14084 | } |
14085 | |
14086 | if (IsAddSubWith1(N1)) { |
14087 | SDValue MulVal = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N0, N2: MulOper); |
14088 | return DAG.getNode(Opcode: AddSubOpc, DL, VT, N1: N0, N2: MulVal); |
14089 | } |
14090 | |
14091 | if (SDValue V = combineBinOpOfZExt(N, DAG)) |
14092 | return V; |
14093 | |
14094 | if (SDValue V = combineVectorMulToSraBitcast(N, DAG)) |
14095 | return V; |
14096 | |
14097 | return SDValue(); |
14098 | } |
14099 | |
14100 | /// According to the property that indexed load/store instructions zero-extend |
14101 | /// their indices, try to narrow the type of index operand. |
14102 | static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) { |
14103 | if (isIndexTypeSigned(IndexType)) |
14104 | return false; |
14105 | |
14106 | if (!N->hasOneUse()) |
14107 | return false; |
14108 | |
14109 | EVT VT = N.getValueType(); |
14110 | SDLoc DL(N); |
14111 | |
14112 | // In general, what we're doing here is seeing if we can sink a truncate to |
14113 | // a smaller element type into the expression tree building our index. |
14114 | // TODO: We can generalize this and handle a bunch more cases if useful. |
14115 | |
14116 | // Narrow a buildvector to the narrowest element type. This requires less |
14117 | // work and less register pressure at high LMUL, and creates smaller constants |
14118 | // which may be cheaper to materialize. |
14119 | if (ISD::isBuildVectorOfConstantSDNodes(N: N.getNode())) { |
14120 | KnownBits Known = DAG.computeKnownBits(Op: N); |
14121 | unsigned ActiveBits = std::max(a: 8u, b: Known.countMaxActiveBits()); |
14122 | LLVMContext &C = *DAG.getContext(); |
14123 | EVT ResultVT = EVT::getIntegerVT(Context&: C, BitWidth: ActiveBits).getRoundIntegerType(Context&: C); |
14124 | if (ResultVT.bitsLT(VT: VT.getVectorElementType())) { |
14125 | N = DAG.getNode(Opcode: ISD::TRUNCATE, DL, |
14126 | VT: VT.changeVectorElementType(EltVT: ResultVT), Operand: N); |
14127 | return true; |
14128 | } |
14129 | } |
14130 | |
14131 | // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty). |
14132 | if (N.getOpcode() != ISD::SHL) |
14133 | return false; |
14134 | |
14135 | SDValue N0 = N.getOperand(i: 0); |
14136 | if (N0.getOpcode() != ISD::ZERO_EXTEND && |
14137 | N0.getOpcode() != RISCVISD::VZEXT_VL) |
14138 | return false; |
14139 | if (!N0->hasOneUse()) |
14140 | return false; |
14141 | |
14142 | APInt ShAmt; |
14143 | SDValue N1 = N.getOperand(i: 1); |
14144 | if (!ISD::isConstantSplatVector(N: N1.getNode(), SplatValue&: ShAmt)) |
14145 | return false; |
14146 | |
14147 | SDValue Src = N0.getOperand(i: 0); |
14148 | EVT SrcVT = Src.getValueType(); |
14149 | unsigned SrcElen = SrcVT.getScalarSizeInBits(); |
14150 | unsigned ShAmtV = ShAmt.getZExtValue(); |
14151 | unsigned NewElen = PowerOf2Ceil(A: SrcElen + ShAmtV); |
14152 | NewElen = std::max(a: NewElen, b: 8U); |
14153 | |
14154 | // Skip if NewElen is not narrower than the original extended type. |
14155 | if (NewElen >= N0.getValueType().getScalarSizeInBits()) |
14156 | return false; |
14157 | |
14158 | EVT NewEltVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NewElen); |
14159 | EVT NewVT = SrcVT.changeVectorElementType(EltVT: NewEltVT); |
14160 | |
14161 | SDValue NewExt = DAG.getNode(Opcode: N0->getOpcode(), DL, VT: NewVT, Ops: N0->ops()); |
14162 | SDValue NewShAmtVec = DAG.getConstant(Val: ShAmtV, DL, VT: NewVT); |
14163 | N = DAG.getNode(Opcode: ISD::SHL, DL, VT: NewVT, N1: NewExt, N2: NewShAmtVec); |
14164 | return true; |
14165 | } |
14166 | |
14167 | // Replace (seteq (i64 (and X, 0xffffffff)), C1) with |
14168 | // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from |
14169 | // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg |
14170 | // can become a sext.w instead of a shift pair. |
14171 | static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, |
14172 | const RISCVSubtarget &Subtarget) { |
14173 | SDValue N0 = N->getOperand(Num: 0); |
14174 | SDValue N1 = N->getOperand(Num: 1); |
14175 | EVT VT = N->getValueType(ResNo: 0); |
14176 | EVT OpVT = N0.getValueType(); |
14177 | |
14178 | if (OpVT != MVT::i64 || !Subtarget.is64Bit()) |
14179 | return SDValue(); |
14180 | |
14181 | // RHS needs to be a constant. |
14182 | auto *N1C = dyn_cast<ConstantSDNode>(Val&: N1); |
14183 | if (!N1C) |
14184 | return SDValue(); |
14185 | |
14186 | // LHS needs to be (and X, 0xffffffff). |
14187 | if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || |
14188 | !isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) || |
14189 | N0.getConstantOperandVal(i: 1) != UINT64_C(0xffffffff)) |
14190 | return SDValue(); |
14191 | |
14192 | // Looking for an equality compare. |
14193 | ISD::CondCode Cond = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get(); |
14194 | if (!isIntEqualitySetCC(Code: Cond)) |
14195 | return SDValue(); |
14196 | |
14197 | // Don't do this if the sign bit is provably zero, it will be turned back into |
14198 | // an AND. |
14199 | APInt SignMask = APInt::getOneBitSet(numBits: 64, BitNo: 31); |
14200 | if (DAG.MaskedValueIsZero(Op: N0.getOperand(i: 0), Mask: SignMask)) |
14201 | return SDValue(); |
14202 | |
14203 | const APInt &C1 = N1C->getAPIntValue(); |
14204 | |
14205 | SDLoc dl(N); |
14206 | // If the constant is larger than 2^32 - 1 it is impossible for both sides |
14207 | // to be equal. |
14208 | if (C1.getActiveBits() > 32) |
14209 | return DAG.getBoolConstant(V: Cond == ISD::SETNE, DL: dl, VT, OpVT); |
14210 | |
14211 | SDValue SExtOp = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: N, VT: OpVT, |
14212 | N1: N0.getOperand(i: 0), N2: DAG.getValueType(MVT::i32)); |
14213 | return DAG.getSetCC(DL: dl, VT, LHS: SExtOp, RHS: DAG.getConstant(Val: C1.trunc(width: 32).sext(width: 64), |
14214 | DL: dl, VT: OpVT), Cond); |
14215 | } |
14216 | |
14217 | static SDValue |
14218 | performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, |
14219 | const RISCVSubtarget &Subtarget) { |
14220 | SDValue Src = N->getOperand(Num: 0); |
14221 | EVT VT = N->getValueType(ResNo: 0); |
14222 | |
14223 | // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X) |
14224 | if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH && |
14225 | cast<VTSDNode>(Val: N->getOperand(Num: 1))->getVT().bitsGE(VT: MVT::i16)) |
14226 | return DAG.getNode(Opcode: RISCVISD::FMV_X_SIGNEXTH, DL: SDLoc(N), VT, |
14227 | Operand: Src.getOperand(i: 0)); |
14228 | |
14229 | return SDValue(); |
14230 | } |
14231 | |
14232 | namespace { |
14233 | // Forward declaration of the structure holding the necessary information to |
14234 | // apply a combine. |
14235 | struct CombineResult; |
14236 | |
14237 | enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 }; |
14238 | /// Helper class for folding sign/zero extensions. |
14239 | /// In particular, this class is used for the following combines: |
14240 | /// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w |
14241 | /// sub | sub_vl -> vwsub(u) | vwsub(u)_w |
14242 | /// mul | mul_vl -> vwmul(u) | vwmul_su |
14243 | /// shl | shl_vl -> vwsll |
14244 | /// fadd -> vfwadd | vfwadd_w |
14245 | /// fsub -> vfwsub | vfwsub_w |
14246 | /// fmul -> vfwmul |
14247 | /// An object of this class represents an operand of the operation we want to |
14248 | /// combine. |
14249 | /// E.g., when trying to combine `mul_vl a, b`, we will have one instance of |
14250 | /// NodeExtensionHelper for `a` and one for `b`. |
14251 | /// |
14252 | /// This class abstracts away how the extension is materialized and |
14253 | /// how its number of users affect the combines. |
14254 | /// |
14255 | /// In particular: |
14256 | /// - VWADD_W is conceptually == add(op0, sext(op1)) |
14257 | /// - VWADDU_W == add(op0, zext(op1)) |
14258 | /// - VWSUB_W == sub(op0, sext(op1)) |
14259 | /// - VWSUBU_W == sub(op0, zext(op1)) |
14260 | /// - VFWADD_W == fadd(op0, fpext(op1)) |
14261 | /// - VFWSUB_W == fsub(op0, fpext(op1)) |
14262 | /// And VMV_V_X_VL, depending on the value, is conceptually equivalent to |
14263 | /// zext|sext(smaller_value). |
14264 | struct NodeExtensionHelper { |
14265 | /// Records if this operand is like being zero extended. |
14266 | bool SupportsZExt; |
14267 | /// Records if this operand is like being sign extended. |
14268 | /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For |
14269 | /// instance, a splat constant (e.g., 3), would support being both sign and |
14270 | /// zero extended. |
14271 | bool SupportsSExt; |
14272 | /// Records if this operand is like being floating-Point extended. |
14273 | bool SupportsFPExt; |
14274 | /// This boolean captures whether we care if this operand would still be |
14275 | /// around after the folding happens. |
14276 | bool EnforceOneUse; |
14277 | /// Original value that this NodeExtensionHelper represents. |
14278 | SDValue OrigOperand; |
14279 | |
14280 | /// Get the value feeding the extension or the value itself. |
14281 | /// E.g., for zext(a), this would return a. |
14282 | SDValue getSource() const { |
14283 | switch (OrigOperand.getOpcode()) { |
14284 | case ISD::ZERO_EXTEND: |
14285 | case ISD::SIGN_EXTEND: |
14286 | case RISCVISD::VSEXT_VL: |
14287 | case RISCVISD::VZEXT_VL: |
14288 | case RISCVISD::FP_EXTEND_VL: |
14289 | return OrigOperand.getOperand(i: 0); |
14290 | default: |
14291 | return OrigOperand; |
14292 | } |
14293 | } |
14294 | |
14295 | /// Check if this instance represents a splat. |
14296 | bool isSplat() const { |
14297 | return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL || |
14298 | OrigOperand.getOpcode() == ISD::SPLAT_VECTOR; |
14299 | } |
14300 | |
14301 | /// Get the extended opcode. |
14302 | unsigned getExtOpc(ExtKind SupportsExt) const { |
14303 | switch (SupportsExt) { |
14304 | case ExtKind::SExt: |
14305 | return RISCVISD::VSEXT_VL; |
14306 | case ExtKind::ZExt: |
14307 | return RISCVISD::VZEXT_VL; |
14308 | case ExtKind::FPExt: |
14309 | return RISCVISD::FP_EXTEND_VL; |
14310 | } |
14311 | llvm_unreachable("Unknown ExtKind enum" ); |
14312 | } |
14313 | |
14314 | /// Get or create a value that can feed \p Root with the given extension \p |
14315 | /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this |
14316 | /// operand. \see ::getSource(). |
14317 | SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG, |
14318 | const RISCVSubtarget &Subtarget, |
14319 | std::optional<ExtKind> SupportsExt) const { |
14320 | if (!SupportsExt.has_value()) |
14321 | return OrigOperand; |
14322 | |
14323 | MVT NarrowVT = getNarrowType(Root, SupportsExt: *SupportsExt); |
14324 | |
14325 | SDValue Source = getSource(); |
14326 | assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType())); |
14327 | if (Source.getValueType() == NarrowVT) |
14328 | return Source; |
14329 | |
14330 | unsigned ExtOpc = getExtOpc(SupportsExt: *SupportsExt); |
14331 | |
14332 | // If we need an extension, we should be changing the type. |
14333 | SDLoc DL(OrigOperand); |
14334 | auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); |
14335 | switch (OrigOperand.getOpcode()) { |
14336 | case ISD::ZERO_EXTEND: |
14337 | case ISD::SIGN_EXTEND: |
14338 | case RISCVISD::VSEXT_VL: |
14339 | case RISCVISD::VZEXT_VL: |
14340 | case RISCVISD::FP_EXTEND_VL: |
14341 | return DAG.getNode(Opcode: ExtOpc, DL, VT: NarrowVT, N1: Source, N2: Mask, N3: VL); |
14342 | case ISD::SPLAT_VECTOR: |
14343 | return DAG.getSplat(VT: NarrowVT, DL, Op: Source.getOperand(i: 0)); |
14344 | case RISCVISD::VMV_V_X_VL: |
14345 | return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: NarrowVT, |
14346 | N1: DAG.getUNDEF(VT: NarrowVT), N2: Source.getOperand(i: 1), N3: VL); |
14347 | case RISCVISD::VFMV_V_F_VL: |
14348 | Source = Source.getOperand(i: 1); |
14349 | assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source" ); |
14350 | Source = Source.getOperand(i: 0); |
14351 | assert(Source.getValueType() == NarrowVT.getVectorElementType()); |
14352 | return DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT: NarrowVT, |
14353 | N1: DAG.getUNDEF(VT: NarrowVT), N2: Source, N3: VL); |
14354 | default: |
14355 | // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL |
14356 | // and that operand should already have the right NarrowVT so no |
14357 | // extension should be required at this point. |
14358 | llvm_unreachable("Unsupported opcode" ); |
14359 | } |
14360 | } |
14361 | |
14362 | /// Helper function to get the narrow type for \p Root. |
14363 | /// The narrow type is the type of \p Root where we divided the size of each |
14364 | /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>. |
14365 | /// \pre Both the narrow type and the original type should be legal. |
14366 | static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) { |
14367 | MVT VT = Root->getSimpleValueType(ResNo: 0); |
14368 | |
14369 | // Determine the narrow size. |
14370 | unsigned NarrowSize = VT.getScalarSizeInBits() / 2; |
14371 | |
14372 | MVT EltVT = SupportsExt == ExtKind::FPExt |
14373 | ? MVT::getFloatingPointVT(BitWidth: NarrowSize) |
14374 | : MVT::getIntegerVT(BitWidth: NarrowSize); |
14375 | |
14376 | assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) && |
14377 | "Trying to extend something we can't represent" ); |
14378 | MVT NarrowVT = MVT::getVectorVT(VT: EltVT, EC: VT.getVectorElementCount()); |
14379 | return NarrowVT; |
14380 | } |
14381 | |
14382 | /// Get the opcode to materialize: |
14383 | /// Opcode(sext(a), sext(b)) -> newOpcode(a, b) |
14384 | static unsigned getSExtOpcode(unsigned Opcode) { |
14385 | switch (Opcode) { |
14386 | case ISD::ADD: |
14387 | case RISCVISD::ADD_VL: |
14388 | case RISCVISD::VWADD_W_VL: |
14389 | case RISCVISD::VWADDU_W_VL: |
14390 | case ISD::OR: |
14391 | return RISCVISD::VWADD_VL; |
14392 | case ISD::SUB: |
14393 | case RISCVISD::SUB_VL: |
14394 | case RISCVISD::VWSUB_W_VL: |
14395 | case RISCVISD::VWSUBU_W_VL: |
14396 | return RISCVISD::VWSUB_VL; |
14397 | case ISD::MUL: |
14398 | case RISCVISD::MUL_VL: |
14399 | return RISCVISD::VWMUL_VL; |
14400 | default: |
14401 | llvm_unreachable("Unexpected opcode" ); |
14402 | } |
14403 | } |
14404 | |
14405 | /// Get the opcode to materialize: |
14406 | /// Opcode(zext(a), zext(b)) -> newOpcode(a, b) |
14407 | static unsigned getZExtOpcode(unsigned Opcode) { |
14408 | switch (Opcode) { |
14409 | case ISD::ADD: |
14410 | case RISCVISD::ADD_VL: |
14411 | case RISCVISD::VWADD_W_VL: |
14412 | case RISCVISD::VWADDU_W_VL: |
14413 | case ISD::OR: |
14414 | return RISCVISD::VWADDU_VL; |
14415 | case ISD::SUB: |
14416 | case RISCVISD::SUB_VL: |
14417 | case RISCVISD::VWSUB_W_VL: |
14418 | case RISCVISD::VWSUBU_W_VL: |
14419 | return RISCVISD::VWSUBU_VL; |
14420 | case ISD::MUL: |
14421 | case RISCVISD::MUL_VL: |
14422 | return RISCVISD::VWMULU_VL; |
14423 | case ISD::SHL: |
14424 | case RISCVISD::SHL_VL: |
14425 | return RISCVISD::VWSLL_VL; |
14426 | default: |
14427 | llvm_unreachable("Unexpected opcode" ); |
14428 | } |
14429 | } |
14430 | |
14431 | /// Get the opcode to materialize: |
14432 | /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b) |
14433 | static unsigned getFPExtOpcode(unsigned Opcode) { |
14434 | switch (Opcode) { |
14435 | case RISCVISD::FADD_VL: |
14436 | case RISCVISD::VFWADD_W_VL: |
14437 | return RISCVISD::VFWADD_VL; |
14438 | case RISCVISD::FSUB_VL: |
14439 | case RISCVISD::VFWSUB_W_VL: |
14440 | return RISCVISD::VFWSUB_VL; |
14441 | case RISCVISD::FMUL_VL: |
14442 | return RISCVISD::VFWMUL_VL; |
14443 | default: |
14444 | llvm_unreachable("Unexpected opcode" ); |
14445 | } |
14446 | } |
14447 | |
14448 | /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) -> |
14449 | /// newOpcode(a, b). |
14450 | static unsigned getSUOpcode(unsigned Opcode) { |
14451 | assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) && |
14452 | "SU is only supported for MUL" ); |
14453 | return RISCVISD::VWMULSU_VL; |
14454 | } |
14455 | |
14456 | /// Get the opcode to materialize |
14457 | /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b). |
14458 | static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) { |
14459 | switch (Opcode) { |
14460 | case ISD::ADD: |
14461 | case RISCVISD::ADD_VL: |
14462 | case ISD::OR: |
14463 | return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL |
14464 | : RISCVISD::VWADDU_W_VL; |
14465 | case ISD::SUB: |
14466 | case RISCVISD::SUB_VL: |
14467 | return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL |
14468 | : RISCVISD::VWSUBU_W_VL; |
14469 | case RISCVISD::FADD_VL: |
14470 | return RISCVISD::VFWADD_W_VL; |
14471 | case RISCVISD::FSUB_VL: |
14472 | return RISCVISD::VFWSUB_W_VL; |
14473 | default: |
14474 | llvm_unreachable("Unexpected opcode" ); |
14475 | } |
14476 | } |
14477 | |
14478 | using CombineToTry = std::function<std::optional<CombineResult>( |
14479 | SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/, |
14480 | const NodeExtensionHelper & /*RHS*/, SelectionDAG &, |
14481 | const RISCVSubtarget &)>; |
14482 | |
14483 | /// Check if this node needs to be fully folded or extended for all users. |
14484 | bool needToPromoteOtherUsers() const { return EnforceOneUse; } |
14485 | |
14486 | void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG, |
14487 | const RISCVSubtarget &Subtarget) { |
14488 | unsigned Opc = OrigOperand.getOpcode(); |
14489 | MVT VT = OrigOperand.getSimpleValueType(); |
14490 | |
14491 | assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) && |
14492 | "Unexpected Opcode" ); |
14493 | |
14494 | // The pasthru must be undef for tail agnostic. |
14495 | if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(i: 0).isUndef()) |
14496 | return; |
14497 | |
14498 | // Get the scalar value. |
14499 | SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(i: 0) |
14500 | : OrigOperand.getOperand(i: 1); |
14501 | |
14502 | // See if we have enough sign bits or zero bits in the scalar to use a |
14503 | // widening opcode by splatting to smaller element size. |
14504 | unsigned EltBits = VT.getScalarSizeInBits(); |
14505 | unsigned ScalarBits = Op.getValueSizeInBits(); |
14506 | // If we're not getting all bits from the element, we need special handling. |
14507 | if (ScalarBits < EltBits) { |
14508 | // This should only occur on RV32. |
14509 | assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 && |
14510 | !Subtarget.is64Bit() && "Unexpected splat" ); |
14511 | // vmv.v.x sign extends narrow inputs. |
14512 | SupportsSExt = true; |
14513 | |
14514 | // If the input is positive, then sign extend is also zero extend. |
14515 | if (DAG.SignBitIsZero(Op)) |
14516 | SupportsZExt = true; |
14517 | |
14518 | EnforceOneUse = false; |
14519 | return; |
14520 | } |
14521 | |
14522 | unsigned NarrowSize = EltBits / 2; |
14523 | // If the narrow type cannot be expressed with a legal VMV, |
14524 | // this is not a valid candidate. |
14525 | if (NarrowSize < 8) |
14526 | return; |
14527 | |
14528 | if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize) |
14529 | SupportsSExt = true; |
14530 | |
14531 | if (DAG.MaskedValueIsZero(Op, |
14532 | Mask: APInt::getBitsSetFrom(numBits: ScalarBits, loBit: NarrowSize))) |
14533 | SupportsZExt = true; |
14534 | |
14535 | EnforceOneUse = false; |
14536 | } |
14537 | |
14538 | /// Helper method to set the various fields of this struct based on the |
14539 | /// type of \p Root. |
14540 | void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG, |
14541 | const RISCVSubtarget &Subtarget) { |
14542 | SupportsZExt = false; |
14543 | SupportsSExt = false; |
14544 | SupportsFPExt = false; |
14545 | EnforceOneUse = true; |
14546 | unsigned Opc = OrigOperand.getOpcode(); |
14547 | // For the nodes we handle below, we end up using their inputs directly: see |
14548 | // getSource(). However since they either don't have a passthru or we check |
14549 | // that their passthru is undef, we can safely ignore their mask and VL. |
14550 | switch (Opc) { |
14551 | case ISD::ZERO_EXTEND: |
14552 | case ISD::SIGN_EXTEND: { |
14553 | MVT VT = OrigOperand.getSimpleValueType(); |
14554 | if (!VT.isVector()) |
14555 | break; |
14556 | |
14557 | SDValue NarrowElt = OrigOperand.getOperand(i: 0); |
14558 | MVT NarrowVT = NarrowElt.getSimpleValueType(); |
14559 | // i1 types are legal but we can't select V{S,Z}EXT_VLs with them. |
14560 | if (NarrowVT.getVectorElementType() == MVT::i1) |
14561 | break; |
14562 | |
14563 | SupportsZExt = Opc == ISD::ZERO_EXTEND; |
14564 | SupportsSExt = Opc == ISD::SIGN_EXTEND; |
14565 | break; |
14566 | } |
14567 | case RISCVISD::VZEXT_VL: |
14568 | SupportsZExt = true; |
14569 | break; |
14570 | case RISCVISD::VSEXT_VL: |
14571 | SupportsSExt = true; |
14572 | break; |
14573 | case RISCVISD::FP_EXTEND_VL: |
14574 | SupportsFPExt = true; |
14575 | break; |
14576 | case ISD::SPLAT_VECTOR: |
14577 | case RISCVISD::VMV_V_X_VL: |
14578 | fillUpExtensionSupportForSplat(Root, DAG, Subtarget); |
14579 | break; |
14580 | case RISCVISD::VFMV_V_F_VL: { |
14581 | MVT VT = OrigOperand.getSimpleValueType(); |
14582 | |
14583 | if (!OrigOperand.getOperand(i: 0).isUndef()) |
14584 | break; |
14585 | |
14586 | SDValue Op = OrigOperand.getOperand(i: 1); |
14587 | if (Op.getOpcode() != ISD::FP_EXTEND) |
14588 | break; |
14589 | |
14590 | unsigned NarrowSize = VT.getScalarSizeInBits() / 2; |
14591 | unsigned ScalarBits = Op.getOperand(i: 0).getValueSizeInBits(); |
14592 | if (NarrowSize != ScalarBits) |
14593 | break; |
14594 | |
14595 | SupportsFPExt = true; |
14596 | break; |
14597 | } |
14598 | default: |
14599 | break; |
14600 | } |
14601 | } |
14602 | |
14603 | /// Check if \p Root supports any extension folding combines. |
14604 | static bool isSupportedRoot(const SDNode *Root, |
14605 | const RISCVSubtarget &Subtarget) { |
14606 | switch (Root->getOpcode()) { |
14607 | case ISD::ADD: |
14608 | case ISD::SUB: |
14609 | case ISD::MUL: { |
14610 | return Root->getValueType(ResNo: 0).isScalableVector(); |
14611 | } |
14612 | case ISD::OR: { |
14613 | return Root->getValueType(ResNo: 0).isScalableVector() && |
14614 | Root->getFlags().hasDisjoint(); |
14615 | } |
14616 | // Vector Widening Integer Add/Sub/Mul Instructions |
14617 | case RISCVISD::ADD_VL: |
14618 | case RISCVISD::MUL_VL: |
14619 | case RISCVISD::VWADD_W_VL: |
14620 | case RISCVISD::VWADDU_W_VL: |
14621 | case RISCVISD::SUB_VL: |
14622 | case RISCVISD::VWSUB_W_VL: |
14623 | case RISCVISD::VWSUBU_W_VL: |
14624 | // Vector Widening Floating-Point Add/Sub/Mul Instructions |
14625 | case RISCVISD::FADD_VL: |
14626 | case RISCVISD::FSUB_VL: |
14627 | case RISCVISD::FMUL_VL: |
14628 | case RISCVISD::VFWADD_W_VL: |
14629 | case RISCVISD::VFWSUB_W_VL: |
14630 | return true; |
14631 | case ISD::SHL: |
14632 | return Root->getValueType(ResNo: 0).isScalableVector() && |
14633 | Subtarget.hasStdExtZvbb(); |
14634 | case RISCVISD::SHL_VL: |
14635 | return Subtarget.hasStdExtZvbb(); |
14636 | default: |
14637 | return false; |
14638 | } |
14639 | } |
14640 | |
14641 | /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx). |
14642 | NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG, |
14643 | const RISCVSubtarget &Subtarget) { |
14644 | assert(isSupportedRoot(Root, Subtarget) && |
14645 | "Trying to build an helper with an " |
14646 | "unsupported root" ); |
14647 | assert(OperandIdx < 2 && "Requesting something else than LHS or RHS" ); |
14648 | assert(DAG.getTargetLoweringInfo().isTypeLegal(Root->getValueType(0))); |
14649 | OrigOperand = Root->getOperand(Num: OperandIdx); |
14650 | |
14651 | unsigned Opc = Root->getOpcode(); |
14652 | switch (Opc) { |
14653 | // We consider |
14654 | // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS)) |
14655 | // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS)) |
14656 | // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS)) |
14657 | case RISCVISD::VWADD_W_VL: |
14658 | case RISCVISD::VWADDU_W_VL: |
14659 | case RISCVISD::VWSUB_W_VL: |
14660 | case RISCVISD::VWSUBU_W_VL: |
14661 | case RISCVISD::VFWADD_W_VL: |
14662 | case RISCVISD::VFWSUB_W_VL: |
14663 | if (OperandIdx == 1) { |
14664 | SupportsZExt = |
14665 | Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL; |
14666 | SupportsSExt = |
14667 | Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWSUB_W_VL; |
14668 | SupportsFPExt = |
14669 | Opc == RISCVISD::VFWADD_W_VL || Opc == RISCVISD::VFWSUB_W_VL; |
14670 | // There's no existing extension here, so we don't have to worry about |
14671 | // making sure it gets removed. |
14672 | EnforceOneUse = false; |
14673 | break; |
14674 | } |
14675 | [[fallthrough]]; |
14676 | default: |
14677 | fillUpExtensionSupport(Root, DAG, Subtarget); |
14678 | break; |
14679 | } |
14680 | } |
14681 | |
14682 | /// Helper function to get the Mask and VL from \p Root. |
14683 | static std::pair<SDValue, SDValue> |
14684 | getMaskAndVL(const SDNode *Root, SelectionDAG &DAG, |
14685 | const RISCVSubtarget &Subtarget) { |
14686 | assert(isSupportedRoot(Root, Subtarget) && "Unexpected root" ); |
14687 | switch (Root->getOpcode()) { |
14688 | case ISD::ADD: |
14689 | case ISD::SUB: |
14690 | case ISD::MUL: |
14691 | case ISD::OR: |
14692 | case ISD::SHL: { |
14693 | SDLoc DL(Root); |
14694 | MVT VT = Root->getSimpleValueType(ResNo: 0); |
14695 | return getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget); |
14696 | } |
14697 | default: |
14698 | return std::make_pair(x: Root->getOperand(Num: 3), y: Root->getOperand(Num: 4)); |
14699 | } |
14700 | } |
14701 | |
14702 | /// Helper function to check if \p N is commutative with respect to the |
14703 | /// foldings that are supported by this class. |
14704 | static bool isCommutative(const SDNode *N) { |
14705 | switch (N->getOpcode()) { |
14706 | case ISD::ADD: |
14707 | case ISD::MUL: |
14708 | case ISD::OR: |
14709 | case RISCVISD::ADD_VL: |
14710 | case RISCVISD::MUL_VL: |
14711 | case RISCVISD::VWADD_W_VL: |
14712 | case RISCVISD::VWADDU_W_VL: |
14713 | case RISCVISD::FADD_VL: |
14714 | case RISCVISD::FMUL_VL: |
14715 | case RISCVISD::VFWADD_W_VL: |
14716 | return true; |
14717 | case ISD::SUB: |
14718 | case RISCVISD::SUB_VL: |
14719 | case RISCVISD::VWSUB_W_VL: |
14720 | case RISCVISD::VWSUBU_W_VL: |
14721 | case RISCVISD::FSUB_VL: |
14722 | case RISCVISD::VFWSUB_W_VL: |
14723 | case ISD::SHL: |
14724 | case RISCVISD::SHL_VL: |
14725 | return false; |
14726 | default: |
14727 | llvm_unreachable("Unexpected opcode" ); |
14728 | } |
14729 | } |
14730 | |
14731 | /// Get a list of combine to try for folding extensions in \p Root. |
14732 | /// Note that each returned CombineToTry function doesn't actually modify |
14733 | /// anything. Instead they produce an optional CombineResult that if not None, |
14734 | /// need to be materialized for the combine to be applied. |
14735 | /// \see CombineResult::materialize. |
14736 | /// If the related CombineToTry function returns std::nullopt, that means the |
14737 | /// combine didn't match. |
14738 | static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root); |
14739 | }; |
14740 | |
14741 | /// Helper structure that holds all the necessary information to materialize a |
14742 | /// combine that does some extension folding. |
14743 | struct CombineResult { |
14744 | /// Opcode to be generated when materializing the combine. |
14745 | unsigned TargetOpcode; |
14746 | // No value means no extension is needed. |
14747 | std::optional<ExtKind> LHSExt; |
14748 | std::optional<ExtKind> RHSExt; |
14749 | /// Root of the combine. |
14750 | SDNode *Root; |
14751 | /// LHS of the TargetOpcode. |
14752 | NodeExtensionHelper LHS; |
14753 | /// RHS of the TargetOpcode. |
14754 | NodeExtensionHelper RHS; |
14755 | |
14756 | CombineResult(unsigned TargetOpcode, SDNode *Root, |
14757 | const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt, |
14758 | const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt) |
14759 | : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root), |
14760 | LHS(LHS), RHS(RHS) {} |
14761 | |
14762 | /// Return a value that uses TargetOpcode and that can be used to replace |
14763 | /// Root. |
14764 | /// The actual replacement is *not* done in that method. |
14765 | SDValue materialize(SelectionDAG &DAG, |
14766 | const RISCVSubtarget &Subtarget) const { |
14767 | SDValue Mask, VL, Merge; |
14768 | std::tie(args&: Mask, args&: VL) = |
14769 | NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget); |
14770 | switch (Root->getOpcode()) { |
14771 | default: |
14772 | Merge = Root->getOperand(Num: 2); |
14773 | break; |
14774 | case ISD::ADD: |
14775 | case ISD::SUB: |
14776 | case ISD::MUL: |
14777 | case ISD::OR: |
14778 | case ISD::SHL: |
14779 | Merge = DAG.getUNDEF(VT: Root->getValueType(ResNo: 0)); |
14780 | break; |
14781 | } |
14782 | return DAG.getNode(Opcode: TargetOpcode, DL: SDLoc(Root), VT: Root->getValueType(ResNo: 0), |
14783 | N1: LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SupportsExt: LHSExt), |
14784 | N2: RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SupportsExt: RHSExt), |
14785 | N3: Merge, N4: Mask, N5: VL); |
14786 | } |
14787 | }; |
14788 | |
14789 | /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) |
14790 | /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both |
14791 | /// are zext) and LHS and RHS can be folded into Root. |
14792 | /// AllowExtMask define which form `ext` can take in this pattern. |
14793 | /// |
14794 | /// \note If the pattern can match with both zext and sext, the returned |
14795 | /// CombineResult will feature the zext result. |
14796 | /// |
14797 | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
14798 | /// can be used to apply the pattern. |
14799 | static std::optional<CombineResult> |
14800 | canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, |
14801 | const NodeExtensionHelper &RHS, |
14802 | uint8_t AllowExtMask, SelectionDAG &DAG, |
14803 | const RISCVSubtarget &Subtarget) { |
14804 | if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt) |
14805 | return CombineResult(NodeExtensionHelper::getZExtOpcode(Opcode: Root->getOpcode()), |
14806 | Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS, |
14807 | /*RHSExt=*/{ExtKind::ZExt}); |
14808 | if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt) |
14809 | return CombineResult(NodeExtensionHelper::getSExtOpcode(Opcode: Root->getOpcode()), |
14810 | Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS, |
14811 | /*RHSExt=*/{ExtKind::SExt}); |
14812 | if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt) |
14813 | return CombineResult(NodeExtensionHelper::getFPExtOpcode(Opcode: Root->getOpcode()), |
14814 | Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS, |
14815 | /*RHSExt=*/{ExtKind::FPExt}); |
14816 | return std::nullopt; |
14817 | } |
14818 | |
14819 | /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) |
14820 | /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both |
14821 | /// are zext) and LHS and RHS can be folded into Root. |
14822 | /// |
14823 | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
14824 | /// can be used to apply the pattern. |
14825 | static std::optional<CombineResult> |
14826 | canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, |
14827 | const NodeExtensionHelper &RHS, SelectionDAG &DAG, |
14828 | const RISCVSubtarget &Subtarget) { |
14829 | return canFoldToVWWithSameExtensionImpl( |
14830 | Root, LHS, RHS, AllowExtMask: ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG, |
14831 | Subtarget); |
14832 | } |
14833 | |
14834 | /// Check if \p Root follows a pattern Root(LHS, ext(RHS)) |
14835 | /// |
14836 | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
14837 | /// can be used to apply the pattern. |
14838 | static std::optional<CombineResult> |
14839 | canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, |
14840 | const NodeExtensionHelper &RHS, SelectionDAG &DAG, |
14841 | const RISCVSubtarget &Subtarget) { |
14842 | if (RHS.SupportsFPExt) |
14843 | return CombineResult( |
14844 | NodeExtensionHelper::getWOpcode(Opcode: Root->getOpcode(), SupportsExt: ExtKind::FPExt), |
14845 | Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt}); |
14846 | |
14847 | // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar |
14848 | // sext/zext? |
14849 | // Control this behavior behind an option (AllowSplatInVW_W) for testing |
14850 | // purposes. |
14851 | if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W)) |
14852 | return CombineResult( |
14853 | NodeExtensionHelper::getWOpcode(Opcode: Root->getOpcode(), SupportsExt: ExtKind::ZExt), Root, |
14854 | LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt}); |
14855 | if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W)) |
14856 | return CombineResult( |
14857 | NodeExtensionHelper::getWOpcode(Opcode: Root->getOpcode(), SupportsExt: ExtKind::SExt), Root, |
14858 | LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt}); |
14859 | return std::nullopt; |
14860 | } |
14861 | |
14862 | /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS)) |
14863 | /// |
14864 | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
14865 | /// can be used to apply the pattern. |
14866 | static std::optional<CombineResult> |
14867 | canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, |
14868 | const NodeExtensionHelper &RHS, SelectionDAG &DAG, |
14869 | const RISCVSubtarget &Subtarget) { |
14870 | return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, AllowExtMask: ExtKind::SExt, DAG, |
14871 | Subtarget); |
14872 | } |
14873 | |
14874 | /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS)) |
14875 | /// |
14876 | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
14877 | /// can be used to apply the pattern. |
14878 | static std::optional<CombineResult> |
14879 | canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, |
14880 | const NodeExtensionHelper &RHS, SelectionDAG &DAG, |
14881 | const RISCVSubtarget &Subtarget) { |
14882 | return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, AllowExtMask: ExtKind::ZExt, DAG, |
14883 | Subtarget); |
14884 | } |
14885 | |
14886 | /// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS)) |
14887 | /// |
14888 | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
14889 | /// can be used to apply the pattern. |
14890 | static std::optional<CombineResult> |
14891 | canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS, |
14892 | const NodeExtensionHelper &RHS, SelectionDAG &DAG, |
14893 | const RISCVSubtarget &Subtarget) { |
14894 | return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, AllowExtMask: ExtKind::FPExt, DAG, |
14895 | Subtarget); |
14896 | } |
14897 | |
14898 | /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS)) |
14899 | /// |
14900 | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
14901 | /// can be used to apply the pattern. |
14902 | static std::optional<CombineResult> |
14903 | canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS, |
14904 | const NodeExtensionHelper &RHS, SelectionDAG &DAG, |
14905 | const RISCVSubtarget &Subtarget) { |
14906 | |
14907 | if (!LHS.SupportsSExt || !RHS.SupportsZExt) |
14908 | return std::nullopt; |
14909 | return CombineResult(NodeExtensionHelper::getSUOpcode(Opcode: Root->getOpcode()), |
14910 | Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS, |
14911 | /*RHSExt=*/{ExtKind::ZExt}); |
14912 | } |
14913 | |
14914 | SmallVector<NodeExtensionHelper::CombineToTry> |
14915 | NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { |
14916 | SmallVector<CombineToTry> Strategies; |
14917 | switch (Root->getOpcode()) { |
14918 | case ISD::ADD: |
14919 | case ISD::SUB: |
14920 | case ISD::OR: |
14921 | case RISCVISD::ADD_VL: |
14922 | case RISCVISD::SUB_VL: |
14923 | case RISCVISD::FADD_VL: |
14924 | case RISCVISD::FSUB_VL: |
14925 | // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub |
14926 | Strategies.push_back(Elt: canFoldToVWWithSameExtension); |
14927 | // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w |
14928 | Strategies.push_back(Elt: canFoldToVW_W); |
14929 | break; |
14930 | case RISCVISD::FMUL_VL: |
14931 | Strategies.push_back(Elt: canFoldToVWWithSameExtension); |
14932 | break; |
14933 | case ISD::MUL: |
14934 | case RISCVISD::MUL_VL: |
14935 | // mul -> vwmul(u) |
14936 | Strategies.push_back(Elt: canFoldToVWWithSameExtension); |
14937 | // mul -> vwmulsu |
14938 | Strategies.push_back(Elt: canFoldToVW_SU); |
14939 | break; |
14940 | case ISD::SHL: |
14941 | case RISCVISD::SHL_VL: |
14942 | // shl -> vwsll |
14943 | Strategies.push_back(Elt: canFoldToVWWithZEXT); |
14944 | break; |
14945 | case RISCVISD::VWADD_W_VL: |
14946 | case RISCVISD::VWSUB_W_VL: |
14947 | // vwadd_w|vwsub_w -> vwadd|vwsub |
14948 | Strategies.push_back(Elt: canFoldToVWWithSEXT); |
14949 | break; |
14950 | case RISCVISD::VWADDU_W_VL: |
14951 | case RISCVISD::VWSUBU_W_VL: |
14952 | // vwaddu_w|vwsubu_w -> vwaddu|vwsubu |
14953 | Strategies.push_back(Elt: canFoldToVWWithZEXT); |
14954 | break; |
14955 | case RISCVISD::VFWADD_W_VL: |
14956 | case RISCVISD::VFWSUB_W_VL: |
14957 | // vfwadd_w|vfwsub_w -> vfwadd|vfwsub |
14958 | Strategies.push_back(Elt: canFoldToVWWithFPEXT); |
14959 | break; |
14960 | default: |
14961 | llvm_unreachable("Unexpected opcode" ); |
14962 | } |
14963 | return Strategies; |
14964 | } |
14965 | } // End anonymous namespace. |
14966 | |
14967 | /// Combine a binary operation to its equivalent VW or VW_W form. |
14968 | /// The supported combines are: |
14969 | /// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w |
14970 | /// sub | sub_vl -> vwsub(u) | vwsub(u)_w |
14971 | /// mul | mul_vl -> vwmul(u) | vwmul_su |
14972 | /// shl | shl_vl -> vwsll |
14973 | /// fadd_vl -> vfwadd | vfwadd_w |
14974 | /// fsub_vl -> vfwsub | vfwsub_w |
14975 | /// fmul_vl -> vfwmul |
14976 | /// vwadd_w(u) -> vwadd(u) |
14977 | /// vwsub_w(u) -> vwsub(u) |
14978 | /// vfwadd_w -> vfwadd |
14979 | /// vfwsub_w -> vfwsub |
14980 | static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, |
14981 | TargetLowering::DAGCombinerInfo &DCI, |
14982 | const RISCVSubtarget &Subtarget) { |
14983 | SelectionDAG &DAG = DCI.DAG; |
14984 | if (DCI.isBeforeLegalize()) |
14985 | return SDValue(); |
14986 | |
14987 | if (!NodeExtensionHelper::isSupportedRoot(Root: N, Subtarget)) |
14988 | return SDValue(); |
14989 | |
14990 | SmallVector<SDNode *> Worklist; |
14991 | SmallSet<SDNode *, 8> Inserted; |
14992 | Worklist.push_back(Elt: N); |
14993 | Inserted.insert(Ptr: N); |
14994 | SmallVector<CombineResult> CombinesToApply; |
14995 | |
14996 | while (!Worklist.empty()) { |
14997 | SDNode *Root = Worklist.pop_back_val(); |
14998 | if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget)) |
14999 | return SDValue(); |
15000 | |
15001 | NodeExtensionHelper LHS(Root, 0, DAG, Subtarget); |
15002 | NodeExtensionHelper RHS(Root, 1, DAG, Subtarget); |
15003 | auto AppendUsersIfNeeded = [&Worklist, |
15004 | &Inserted](const NodeExtensionHelper &Op) { |
15005 | if (Op.needToPromoteOtherUsers()) { |
15006 | for (SDNode *TheUse : Op.OrigOperand->uses()) { |
15007 | if (Inserted.insert(Ptr: TheUse).second) |
15008 | Worklist.push_back(Elt: TheUse); |
15009 | } |
15010 | } |
15011 | }; |
15012 | |
15013 | // Control the compile time by limiting the number of node we look at in |
15014 | // total. |
15015 | if (Inserted.size() > ExtensionMaxWebSize) |
15016 | return SDValue(); |
15017 | |
15018 | SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies = |
15019 | NodeExtensionHelper::getSupportedFoldings(Root); |
15020 | |
15021 | assert(!FoldingStrategies.empty() && "Nothing to be folded" ); |
15022 | bool Matched = false; |
15023 | for (int Attempt = 0; |
15024 | (Attempt != 1 + NodeExtensionHelper::isCommutative(N: Root)) && !Matched; |
15025 | ++Attempt) { |
15026 | |
15027 | for (NodeExtensionHelper::CombineToTry FoldingStrategy : |
15028 | FoldingStrategies) { |
15029 | std::optional<CombineResult> Res = |
15030 | FoldingStrategy(Root, LHS, RHS, DAG, Subtarget); |
15031 | if (Res) { |
15032 | Matched = true; |
15033 | CombinesToApply.push_back(Elt: *Res); |
15034 | // All the inputs that are extended need to be folded, otherwise |
15035 | // we would be leaving the old input (since it is may still be used), |
15036 | // and the new one. |
15037 | if (Res->LHSExt.has_value()) |
15038 | AppendUsersIfNeeded(LHS); |
15039 | if (Res->RHSExt.has_value()) |
15040 | AppendUsersIfNeeded(RHS); |
15041 | break; |
15042 | } |
15043 | } |
15044 | std::swap(a&: LHS, b&: RHS); |
15045 | } |
15046 | // Right now we do an all or nothing approach. |
15047 | if (!Matched) |
15048 | return SDValue(); |
15049 | } |
15050 | // Store the value for the replacement of the input node separately. |
15051 | SDValue InputRootReplacement; |
15052 | // We do the RAUW after we materialize all the combines, because some replaced |
15053 | // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently, |
15054 | // some of these nodes may appear in the NodeExtensionHelpers of some of the |
15055 | // yet-to-be-visited CombinesToApply roots. |
15056 | SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace; |
15057 | ValuesToReplace.reserve(N: CombinesToApply.size()); |
15058 | for (CombineResult Res : CombinesToApply) { |
15059 | SDValue NewValue = Res.materialize(DAG, Subtarget); |
15060 | if (!InputRootReplacement) { |
15061 | assert(Res.Root == N && |
15062 | "First element is expected to be the current node" ); |
15063 | InputRootReplacement = NewValue; |
15064 | } else { |
15065 | ValuesToReplace.emplace_back(Args: SDValue(Res.Root, 0), Args&: NewValue); |
15066 | } |
15067 | } |
15068 | for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) { |
15069 | DAG.ReplaceAllUsesOfValueWith(From: OldNewValues.first, To: OldNewValues.second); |
15070 | DCI.AddToWorklist(N: OldNewValues.second.getNode()); |
15071 | } |
15072 | return InputRootReplacement; |
15073 | } |
15074 | |
15075 | // Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond |
15076 | // (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond |
15077 | // y will be the Passthru and cond will be the Mask. |
15078 | static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG) { |
15079 | unsigned Opc = N->getOpcode(); |
15080 | assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL || |
15081 | Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL); |
15082 | |
15083 | SDValue Y = N->getOperand(Num: 0); |
15084 | SDValue MergeOp = N->getOperand(Num: 1); |
15085 | unsigned MergeOpc = MergeOp.getOpcode(); |
15086 | |
15087 | if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT) |
15088 | return SDValue(); |
15089 | |
15090 | SDValue X = MergeOp->getOperand(Num: 1); |
15091 | |
15092 | if (!MergeOp.hasOneUse()) |
15093 | return SDValue(); |
15094 | |
15095 | // Passthru should be undef |
15096 | SDValue Passthru = N->getOperand(Num: 2); |
15097 | if (!Passthru.isUndef()) |
15098 | return SDValue(); |
15099 | |
15100 | // Mask should be all ones |
15101 | SDValue Mask = N->getOperand(Num: 3); |
15102 | if (Mask.getOpcode() != RISCVISD::VMSET_VL) |
15103 | return SDValue(); |
15104 | |
15105 | // False value of MergeOp should be all zeros |
15106 | SDValue Z = MergeOp->getOperand(Num: 2); |
15107 | |
15108 | if (Z.getOpcode() == ISD::INSERT_SUBVECTOR && |
15109 | (isNullOrNullSplat(V: Z.getOperand(i: 0)) || Z.getOperand(i: 0).isUndef())) |
15110 | Z = Z.getOperand(i: 1); |
15111 | |
15112 | if (!ISD::isConstantSplatVectorAllZeros(N: Z.getNode())) |
15113 | return SDValue(); |
15114 | |
15115 | return DAG.getNode(Opcode: Opc, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
15116 | Ops: {Y, X, Y, MergeOp->getOperand(Num: 0), N->getOperand(Num: 4)}, |
15117 | Flags: N->getFlags()); |
15118 | } |
15119 | |
15120 | static SDValue performVWADDSUBW_VLCombine(SDNode *N, |
15121 | TargetLowering::DAGCombinerInfo &DCI, |
15122 | const RISCVSubtarget &Subtarget) { |
15123 | [[maybe_unused]] unsigned Opc = N->getOpcode(); |
15124 | assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL || |
15125 | Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL); |
15126 | |
15127 | if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) |
15128 | return V; |
15129 | |
15130 | return combineVWADDSUBWSelect(N, DAG&: DCI.DAG); |
15131 | } |
15132 | |
15133 | // Helper function for performMemPairCombine. |
15134 | // Try to combine the memory loads/stores LSNode1 and LSNode2 |
15135 | // into a single memory pair operation. |
15136 | static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, |
15137 | LSBaseSDNode *LSNode2, SDValue BasePtr, |
15138 | uint64_t Imm) { |
15139 | SmallPtrSet<const SDNode *, 32> Visited; |
15140 | SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2}; |
15141 | |
15142 | if (SDNode::hasPredecessorHelper(N: LSNode1, Visited, Worklist) || |
15143 | SDNode::hasPredecessorHelper(N: LSNode2, Visited, Worklist)) |
15144 | return SDValue(); |
15145 | |
15146 | MachineFunction &MF = DAG.getMachineFunction(); |
15147 | const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); |
15148 | |
15149 | // The new operation has twice the width. |
15150 | MVT XLenVT = Subtarget.getXLenVT(); |
15151 | EVT MemVT = LSNode1->getMemoryVT(); |
15152 | EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128; |
15153 | MachineMemOperand *MMO = LSNode1->getMemOperand(); |
15154 | MachineMemOperand *NewMMO = MF.getMachineMemOperand( |
15155 | MMO, PtrInfo: MMO->getPointerInfo(), Size: MemVT == MVT::i32 ? 8 : 16); |
15156 | |
15157 | if (LSNode1->getOpcode() == ISD::LOAD) { |
15158 | auto Ext = cast<LoadSDNode>(Val: LSNode1)->getExtensionType(); |
15159 | unsigned Opcode; |
15160 | if (MemVT == MVT::i32) |
15161 | Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD; |
15162 | else |
15163 | Opcode = RISCVISD::TH_LDD; |
15164 | |
15165 | SDValue Res = DAG.getMemIntrinsicNode( |
15166 | Opcode, dl: SDLoc(LSNode1), VTList: DAG.getVTList(VTs: {XLenVT, XLenVT, MVT::Other}), |
15167 | Ops: {LSNode1->getChain(), BasePtr, |
15168 | DAG.getConstant(Val: Imm, DL: SDLoc(LSNode1), VT: XLenVT)}, |
15169 | MemVT: NewMemVT, MMO: NewMMO); |
15170 | |
15171 | SDValue Node1 = |
15172 | DAG.getMergeValues(Ops: {Res.getValue(R: 0), Res.getValue(R: 2)}, dl: SDLoc(LSNode1)); |
15173 | SDValue Node2 = |
15174 | DAG.getMergeValues(Ops: {Res.getValue(R: 1), Res.getValue(R: 2)}, dl: SDLoc(LSNode2)); |
15175 | |
15176 | DAG.ReplaceAllUsesWith(From: LSNode2, To: Node2.getNode()); |
15177 | return Node1; |
15178 | } else { |
15179 | unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD; |
15180 | |
15181 | SDValue Res = DAG.getMemIntrinsicNode( |
15182 | Opcode, dl: SDLoc(LSNode1), VTList: DAG.getVTList(VT: MVT::Other), |
15183 | Ops: {LSNode1->getChain(), LSNode1->getOperand(Num: 1), LSNode2->getOperand(Num: 1), |
15184 | BasePtr, DAG.getConstant(Val: Imm, DL: SDLoc(LSNode1), VT: XLenVT)}, |
15185 | MemVT: NewMemVT, MMO: NewMMO); |
15186 | |
15187 | DAG.ReplaceAllUsesWith(From: LSNode2, To: Res.getNode()); |
15188 | return Res; |
15189 | } |
15190 | } |
15191 | |
15192 | // Try to combine two adjacent loads/stores to a single pair instruction from |
15193 | // the XTHeadMemPair vendor extension. |
15194 | static SDValue performMemPairCombine(SDNode *N, |
15195 | TargetLowering::DAGCombinerInfo &DCI) { |
15196 | SelectionDAG &DAG = DCI.DAG; |
15197 | MachineFunction &MF = DAG.getMachineFunction(); |
15198 | const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); |
15199 | |
15200 | // Target does not support load/store pair. |
15201 | if (!Subtarget.hasVendorXTHeadMemPair()) |
15202 | return SDValue(); |
15203 | |
15204 | LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(Val: N); |
15205 | EVT MemVT = LSNode1->getMemoryVT(); |
15206 | unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2; |
15207 | |
15208 | // No volatile, indexed or atomic loads/stores. |
15209 | if (!LSNode1->isSimple() || LSNode1->isIndexed()) |
15210 | return SDValue(); |
15211 | |
15212 | // Function to get a base + constant representation from a memory value. |
15213 | auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> { |
15214 | if (Ptr->getOpcode() == ISD::ADD) |
15215 | if (auto *C1 = dyn_cast<ConstantSDNode>(Val: Ptr->getOperand(Num: 1))) |
15216 | return {Ptr->getOperand(Num: 0), C1->getZExtValue()}; |
15217 | return {Ptr, 0}; |
15218 | }; |
15219 | |
15220 | auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(Num: OpNum)); |
15221 | |
15222 | SDValue Chain = N->getOperand(Num: 0); |
15223 | for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end(); |
15224 | UI != UE; ++UI) { |
15225 | SDUse &Use = UI.getUse(); |
15226 | if (Use.getUser() != N && Use.getResNo() == 0 && |
15227 | Use.getUser()->getOpcode() == N->getOpcode()) { |
15228 | LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Val: Use.getUser()); |
15229 | |
15230 | // No volatile, indexed or atomic loads/stores. |
15231 | if (!LSNode2->isSimple() || LSNode2->isIndexed()) |
15232 | continue; |
15233 | |
15234 | // Check if LSNode1 and LSNode2 have the same type and extension. |
15235 | if (LSNode1->getOpcode() == ISD::LOAD) |
15236 | if (cast<LoadSDNode>(Val: LSNode2)->getExtensionType() != |
15237 | cast<LoadSDNode>(Val: LSNode1)->getExtensionType()) |
15238 | continue; |
15239 | |
15240 | if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT()) |
15241 | continue; |
15242 | |
15243 | auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(Num: OpNum)); |
15244 | |
15245 | // Check if the base pointer is the same for both instruction. |
15246 | if (Base1 != Base2) |
15247 | continue; |
15248 | |
15249 | // Check if the offsets match the XTHeadMemPair encoding contraints. |
15250 | bool Valid = false; |
15251 | if (MemVT == MVT::i32) { |
15252 | // Check for adjacent i32 values and a 2-bit index. |
15253 | if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(x: Offset1)) |
15254 | Valid = true; |
15255 | } else if (MemVT == MVT::i64) { |
15256 | // Check for adjacent i64 values and a 2-bit index. |
15257 | if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(x: Offset1)) |
15258 | Valid = true; |
15259 | } |
15260 | |
15261 | if (!Valid) |
15262 | continue; |
15263 | |
15264 | // Try to combine. |
15265 | if (SDValue Res = |
15266 | tryMemPairCombine(DAG, LSNode1, LSNode2, BasePtr: Base1, Imm: Offset1)) |
15267 | return Res; |
15268 | } |
15269 | } |
15270 | |
15271 | return SDValue(); |
15272 | } |
15273 | |
15274 | // Fold |
15275 | // (fp_to_int (froundeven X)) -> fcvt X, rne |
15276 | // (fp_to_int (ftrunc X)) -> fcvt X, rtz |
15277 | // (fp_to_int (ffloor X)) -> fcvt X, rdn |
15278 | // (fp_to_int (fceil X)) -> fcvt X, rup |
15279 | // (fp_to_int (fround X)) -> fcvt X, rmm |
15280 | // (fp_to_int (frint X)) -> fcvt X |
15281 | static SDValue performFP_TO_INTCombine(SDNode *N, |
15282 | TargetLowering::DAGCombinerInfo &DCI, |
15283 | const RISCVSubtarget &Subtarget) { |
15284 | SelectionDAG &DAG = DCI.DAG; |
15285 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
15286 | MVT XLenVT = Subtarget.getXLenVT(); |
15287 | |
15288 | SDValue Src = N->getOperand(Num: 0); |
15289 | |
15290 | // Don't do this for strict-fp Src. |
15291 | if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode()) |
15292 | return SDValue(); |
15293 | |
15294 | // Ensure the FP type is legal. |
15295 | if (!TLI.isTypeLegal(VT: Src.getValueType())) |
15296 | return SDValue(); |
15297 | |
15298 | // Don't do this for f16 with Zfhmin and not Zfh. |
15299 | if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) |
15300 | return SDValue(); |
15301 | |
15302 | RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Src.getOpcode()); |
15303 | // If the result is invalid, we didn't find a foldable instruction. |
15304 | if (FRM == RISCVFPRndMode::Invalid) |
15305 | return SDValue(); |
15306 | |
15307 | SDLoc DL(N); |
15308 | bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; |
15309 | EVT VT = N->getValueType(ResNo: 0); |
15310 | |
15311 | if (VT.isVector() && TLI.isTypeLegal(VT)) { |
15312 | MVT SrcVT = Src.getSimpleValueType(); |
15313 | MVT SrcContainerVT = SrcVT; |
15314 | MVT ContainerVT = VT.getSimpleVT(); |
15315 | SDValue XVal = Src.getOperand(i: 0); |
15316 | |
15317 | // For widening and narrowing conversions we just combine it into a |
15318 | // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They |
15319 | // end up getting lowered to their appropriate pseudo instructions based on |
15320 | // their operand types |
15321 | if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 || |
15322 | VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits()) |
15323 | return SDValue(); |
15324 | |
15325 | // Make fixed-length vectors scalable first |
15326 | if (SrcVT.isFixedLengthVector()) { |
15327 | SrcContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcVT, Subtarget); |
15328 | XVal = convertToScalableVector(VT: SrcContainerVT, V: XVal, DAG, Subtarget); |
15329 | ContainerVT = |
15330 | getContainerForFixedLengthVector(DAG, VT: ContainerVT, Subtarget); |
15331 | } |
15332 | |
15333 | auto [Mask, VL] = |
15334 | getDefaultVLOps(VecVT: SrcVT, ContainerVT: SrcContainerVT, DL, DAG, Subtarget); |
15335 | |
15336 | SDValue FpToInt; |
15337 | if (FRM == RISCVFPRndMode::RTZ) { |
15338 | // Use the dedicated trunc static rounding mode if we're truncating so we |
15339 | // don't need to generate calls to fsrmi/fsrm |
15340 | unsigned Opc = |
15341 | IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; |
15342 | FpToInt = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: XVal, N2: Mask, N3: VL); |
15343 | } else if (FRM == RISCVFPRndMode::DYN) { |
15344 | unsigned Opc = |
15345 | IsSigned ? RISCVISD::VFCVT_X_F_VL : RISCVISD::VFCVT_XU_F_VL; |
15346 | FpToInt = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: XVal, N2: Mask, N3: VL); |
15347 | } else { |
15348 | unsigned Opc = |
15349 | IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL; |
15350 | FpToInt = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: XVal, N2: Mask, |
15351 | N3: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT), N4: VL); |
15352 | } |
15353 | |
15354 | // If converted from fixed-length to scalable, convert back |
15355 | if (VT.isFixedLengthVector()) |
15356 | FpToInt = convertFromScalableVector(VT, V: FpToInt, DAG, Subtarget); |
15357 | |
15358 | return FpToInt; |
15359 | } |
15360 | |
15361 | // Only handle XLen or i32 types. Other types narrower than XLen will |
15362 | // eventually be legalized to XLenVT. |
15363 | if (VT != MVT::i32 && VT != XLenVT) |
15364 | return SDValue(); |
15365 | |
15366 | unsigned Opc; |
15367 | if (VT == XLenVT) |
15368 | Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; |
15369 | else |
15370 | Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; |
15371 | |
15372 | SDValue FpToInt = DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Src.getOperand(i: 0), |
15373 | N2: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT)); |
15374 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: FpToInt); |
15375 | } |
15376 | |
15377 | // Fold |
15378 | // (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne)) |
15379 | // (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz)) |
15380 | // (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn)) |
15381 | // (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup)) |
15382 | // (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm)) |
15383 | // (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn)) |
15384 | static SDValue performFP_TO_INT_SATCombine(SDNode *N, |
15385 | TargetLowering::DAGCombinerInfo &DCI, |
15386 | const RISCVSubtarget &Subtarget) { |
15387 | SelectionDAG &DAG = DCI.DAG; |
15388 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
15389 | MVT XLenVT = Subtarget.getXLenVT(); |
15390 | |
15391 | // Only handle XLen types. Other types narrower than XLen will eventually be |
15392 | // legalized to XLenVT. |
15393 | EVT DstVT = N->getValueType(ResNo: 0); |
15394 | if (DstVT != XLenVT) |
15395 | return SDValue(); |
15396 | |
15397 | SDValue Src = N->getOperand(Num: 0); |
15398 | |
15399 | // Don't do this for strict-fp Src. |
15400 | if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode()) |
15401 | return SDValue(); |
15402 | |
15403 | // Ensure the FP type is also legal. |
15404 | if (!TLI.isTypeLegal(VT: Src.getValueType())) |
15405 | return SDValue(); |
15406 | |
15407 | // Don't do this for f16 with Zfhmin and not Zfh. |
15408 | if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) |
15409 | return SDValue(); |
15410 | |
15411 | EVT SatVT = cast<VTSDNode>(Val: N->getOperand(Num: 1))->getVT(); |
15412 | |
15413 | RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Src.getOpcode()); |
15414 | if (FRM == RISCVFPRndMode::Invalid) |
15415 | return SDValue(); |
15416 | |
15417 | bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT; |
15418 | |
15419 | unsigned Opc; |
15420 | if (SatVT == DstVT) |
15421 | Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; |
15422 | else if (DstVT == MVT::i64 && SatVT == MVT::i32) |
15423 | Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; |
15424 | else |
15425 | return SDValue(); |
15426 | // FIXME: Support other SatVTs by clamping before or after the conversion. |
15427 | |
15428 | Src = Src.getOperand(i: 0); |
15429 | |
15430 | SDLoc DL(N); |
15431 | SDValue FpToInt = DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Src, |
15432 | N2: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT)); |
15433 | |
15434 | // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero |
15435 | // extend. |
15436 | if (Opc == RISCVISD::FCVT_WU_RV64) |
15437 | FpToInt = DAG.getZeroExtendInReg(Op: FpToInt, DL, VT: MVT::i32); |
15438 | |
15439 | // RISC-V FP-to-int conversions saturate to the destination register size, but |
15440 | // don't produce 0 for nan. |
15441 | SDValue ZeroInt = DAG.getConstant(Val: 0, DL, VT: DstVT); |
15442 | return DAG.getSelectCC(DL, LHS: Src, RHS: Src, True: ZeroInt, False: FpToInt, Cond: ISD::CondCode::SETUO); |
15443 | } |
15444 | |
15445 | // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is |
15446 | // smaller than XLenVT. |
15447 | static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, |
15448 | const RISCVSubtarget &Subtarget) { |
15449 | assert(Subtarget.hasStdExtZbkb() && "Unexpected extension" ); |
15450 | |
15451 | SDValue Src = N->getOperand(Num: 0); |
15452 | if (Src.getOpcode() != ISD::BSWAP) |
15453 | return SDValue(); |
15454 | |
15455 | EVT VT = N->getValueType(ResNo: 0); |
15456 | if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() || |
15457 | !llvm::has_single_bit<uint32_t>(Value: VT.getSizeInBits())) |
15458 | return SDValue(); |
15459 | |
15460 | SDLoc DL(N); |
15461 | return DAG.getNode(Opcode: RISCVISD::BREV8, DL, VT, Operand: Src.getOperand(i: 0)); |
15462 | } |
15463 | |
15464 | // Convert from one FMA opcode to another based on whether we are negating the |
15465 | // multiply result and/or the accumulator. |
15466 | // NOTE: Only supports RVV operations with VL. |
15467 | static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { |
15468 | // Negating the multiply result changes ADD<->SUB and toggles 'N'. |
15469 | if (NegMul) { |
15470 | // clang-format off |
15471 | switch (Opcode) { |
15472 | default: llvm_unreachable("Unexpected opcode" ); |
15473 | case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; |
15474 | case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; |
15475 | case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; |
15476 | case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; |
15477 | case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break; |
15478 | case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break; |
15479 | case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break; |
15480 | case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break; |
15481 | } |
15482 | // clang-format on |
15483 | } |
15484 | |
15485 | // Negating the accumulator changes ADD<->SUB. |
15486 | if (NegAcc) { |
15487 | // clang-format off |
15488 | switch (Opcode) { |
15489 | default: llvm_unreachable("Unexpected opcode" ); |
15490 | case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; |
15491 | case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; |
15492 | case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; |
15493 | case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; |
15494 | case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break; |
15495 | case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break; |
15496 | case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break; |
15497 | case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break; |
15498 | } |
15499 | // clang-format on |
15500 | } |
15501 | |
15502 | return Opcode; |
15503 | } |
15504 | |
15505 | static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) { |
15506 | // Fold FNEG_VL into FMA opcodes. |
15507 | // The first operand of strict-fp is chain. |
15508 | unsigned Offset = N->isTargetStrictFPOpcode(); |
15509 | SDValue A = N->getOperand(Num: 0 + Offset); |
15510 | SDValue B = N->getOperand(Num: 1 + Offset); |
15511 | SDValue C = N->getOperand(Num: 2 + Offset); |
15512 | SDValue Mask = N->getOperand(Num: 3 + Offset); |
15513 | SDValue VL = N->getOperand(Num: 4 + Offset); |
15514 | |
15515 | auto invertIfNegative = [&Mask, &VL](SDValue &V) { |
15516 | if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(i: 1) == Mask && |
15517 | V.getOperand(i: 2) == VL) { |
15518 | // Return the negated input. |
15519 | V = V.getOperand(i: 0); |
15520 | return true; |
15521 | } |
15522 | |
15523 | return false; |
15524 | }; |
15525 | |
15526 | bool NegA = invertIfNegative(A); |
15527 | bool NegB = invertIfNegative(B); |
15528 | bool NegC = invertIfNegative(C); |
15529 | |
15530 | // If no operands are negated, we're done. |
15531 | if (!NegA && !NegB && !NegC) |
15532 | return SDValue(); |
15533 | |
15534 | unsigned NewOpcode = negateFMAOpcode(Opcode: N->getOpcode(), NegMul: NegA != NegB, NegAcc: NegC); |
15535 | if (N->isTargetStrictFPOpcode()) |
15536 | return DAG.getNode(Opcode: NewOpcode, DL: SDLoc(N), VTList: N->getVTList(), |
15537 | Ops: {N->getOperand(Num: 0), A, B, C, Mask, VL}); |
15538 | return DAG.getNode(Opcode: NewOpcode, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), N1: A, N2: B, N3: C, N4: Mask, |
15539 | N5: VL); |
15540 | } |
15541 | |
15542 | static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, |
15543 | const RISCVSubtarget &Subtarget) { |
15544 | if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG)) |
15545 | return V; |
15546 | |
15547 | if (N->getValueType(ResNo: 0).getVectorElementType() == MVT::f32 && |
15548 | !Subtarget.hasVInstructionsF16()) |
15549 | return SDValue(); |
15550 | |
15551 | // FIXME: Ignore strict opcodes for now. |
15552 | if (N->isTargetStrictFPOpcode()) |
15553 | return SDValue(); |
15554 | |
15555 | // Try to form widening FMA. |
15556 | SDValue Op0 = N->getOperand(Num: 0); |
15557 | SDValue Op1 = N->getOperand(Num: 1); |
15558 | SDValue Mask = N->getOperand(Num: 3); |
15559 | SDValue VL = N->getOperand(Num: 4); |
15560 | |
15561 | if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL || |
15562 | Op1.getOpcode() != RISCVISD::FP_EXTEND_VL) |
15563 | return SDValue(); |
15564 | |
15565 | // TODO: Refactor to handle more complex cases similar to |
15566 | // combineBinOp_VLToVWBinOp_VL. |
15567 | if ((!Op0.hasOneUse() || !Op1.hasOneUse()) && |
15568 | (Op0 != Op1 || !Op0->hasNUsesOfValue(NUses: 2, Value: 0))) |
15569 | return SDValue(); |
15570 | |
15571 | // Check the mask and VL are the same. |
15572 | if (Op0.getOperand(i: 1) != Mask || Op0.getOperand(i: 2) != VL || |
15573 | Op1.getOperand(i: 1) != Mask || Op1.getOperand(i: 2) != VL) |
15574 | return SDValue(); |
15575 | |
15576 | unsigned NewOpc; |
15577 | switch (N->getOpcode()) { |
15578 | default: |
15579 | llvm_unreachable("Unexpected opcode" ); |
15580 | case RISCVISD::VFMADD_VL: |
15581 | NewOpc = RISCVISD::VFWMADD_VL; |
15582 | break; |
15583 | case RISCVISD::VFNMSUB_VL: |
15584 | NewOpc = RISCVISD::VFWNMSUB_VL; |
15585 | break; |
15586 | case RISCVISD::VFNMADD_VL: |
15587 | NewOpc = RISCVISD::VFWNMADD_VL; |
15588 | break; |
15589 | case RISCVISD::VFMSUB_VL: |
15590 | NewOpc = RISCVISD::VFWMSUB_VL; |
15591 | break; |
15592 | } |
15593 | |
15594 | Op0 = Op0.getOperand(i: 0); |
15595 | Op1 = Op1.getOperand(i: 0); |
15596 | |
15597 | return DAG.getNode(Opcode: NewOpc, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), N1: Op0, N2: Op1, |
15598 | N3: N->getOperand(Num: 2), N4: Mask, N5: VL); |
15599 | } |
15600 | |
15601 | static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, |
15602 | const RISCVSubtarget &Subtarget) { |
15603 | assert(N->getOpcode() == ISD::SRA && "Unexpected opcode" ); |
15604 | |
15605 | if (N->getValueType(ResNo: 0) != MVT::i64 || !Subtarget.is64Bit()) |
15606 | return SDValue(); |
15607 | |
15608 | if (!isa<ConstantSDNode>(Val: N->getOperand(Num: 1))) |
15609 | return SDValue(); |
15610 | uint64_t ShAmt = N->getConstantOperandVal(Num: 1); |
15611 | if (ShAmt > 32) |
15612 | return SDValue(); |
15613 | |
15614 | SDValue N0 = N->getOperand(Num: 0); |
15615 | |
15616 | // Combine (sra (sext_inreg (shl X, C1), i32), C2) -> |
15617 | // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of |
15618 | // SLLIW+SRAIW. SLLI+SRAI have compressed forms. |
15619 | if (ShAmt < 32 && |
15620 | N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() && |
15621 | cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT() == MVT::i32 && |
15622 | N0.getOperand(i: 0).getOpcode() == ISD::SHL && N0.getOperand(i: 0).hasOneUse() && |
15623 | isa<ConstantSDNode>(Val: N0.getOperand(i: 0).getOperand(i: 1))) { |
15624 | uint64_t LShAmt = N0.getOperand(i: 0).getConstantOperandVal(i: 1); |
15625 | if (LShAmt < 32) { |
15626 | SDLoc ShlDL(N0.getOperand(i: 0)); |
15627 | SDValue Shl = DAG.getNode(Opcode: ISD::SHL, DL: ShlDL, VT: MVT::i64, |
15628 | N1: N0.getOperand(i: 0).getOperand(i: 0), |
15629 | N2: DAG.getConstant(Val: LShAmt + 32, DL: ShlDL, VT: MVT::i64)); |
15630 | SDLoc DL(N); |
15631 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: MVT::i64, N1: Shl, |
15632 | N2: DAG.getConstant(Val: ShAmt + 32, DL, VT: MVT::i64)); |
15633 | } |
15634 | } |
15635 | |
15636 | // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C) |
15637 | // FIXME: Should this be a generic combine? There's a similar combine on X86. |
15638 | // |
15639 | // Also try these folds where an add or sub is in the middle. |
15640 | // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C) |
15641 | // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C) |
15642 | SDValue Shl; |
15643 | ConstantSDNode *AddC = nullptr; |
15644 | |
15645 | // We might have an ADD or SUB between the SRA and SHL. |
15646 | bool IsAdd = N0.getOpcode() == ISD::ADD; |
15647 | if ((IsAdd || N0.getOpcode() == ISD::SUB)) { |
15648 | // Other operand needs to be a constant we can modify. |
15649 | AddC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: IsAdd ? 1 : 0)); |
15650 | if (!AddC) |
15651 | return SDValue(); |
15652 | |
15653 | // AddC needs to have at least 32 trailing zeros. |
15654 | if (AddC->getAPIntValue().countr_zero() < 32) |
15655 | return SDValue(); |
15656 | |
15657 | // All users should be a shift by constant less than or equal to 32. This |
15658 | // ensures we'll do this optimization for each of them to produce an |
15659 | // add/sub+sext_inreg they can all share. |
15660 | for (SDNode *U : N0->uses()) { |
15661 | if (U->getOpcode() != ISD::SRA || |
15662 | !isa<ConstantSDNode>(Val: U->getOperand(Num: 1)) || |
15663 | U->getConstantOperandVal(Num: 1) > 32) |
15664 | return SDValue(); |
15665 | } |
15666 | |
15667 | Shl = N0.getOperand(i: IsAdd ? 0 : 1); |
15668 | } else { |
15669 | // Not an ADD or SUB. |
15670 | Shl = N0; |
15671 | } |
15672 | |
15673 | // Look for a shift left by 32. |
15674 | if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Val: Shl.getOperand(i: 1)) || |
15675 | Shl.getConstantOperandVal(i: 1) != 32) |
15676 | return SDValue(); |
15677 | |
15678 | // We if we didn't look through an add/sub, then the shl should have one use. |
15679 | // If we did look through an add/sub, the sext_inreg we create is free so |
15680 | // we're only creating 2 new instructions. It's enough to only remove the |
15681 | // original sra+add/sub. |
15682 | if (!AddC && !Shl.hasOneUse()) |
15683 | return SDValue(); |
15684 | |
15685 | SDLoc DL(N); |
15686 | SDValue In = Shl.getOperand(i: 0); |
15687 | |
15688 | // If we looked through an ADD or SUB, we need to rebuild it with the shifted |
15689 | // constant. |
15690 | if (AddC) { |
15691 | SDValue ShiftedAddC = |
15692 | DAG.getConstant(Val: AddC->getAPIntValue().lshr(shiftAmt: 32), DL, VT: MVT::i64); |
15693 | if (IsAdd) |
15694 | In = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: In, N2: ShiftedAddC); |
15695 | else |
15696 | In = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: ShiftedAddC, N2: In); |
15697 | } |
15698 | |
15699 | SDValue SExt = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: In, |
15700 | N2: DAG.getValueType(MVT::i32)); |
15701 | if (ShAmt == 32) |
15702 | return SExt; |
15703 | |
15704 | return DAG.getNode( |
15705 | Opcode: ISD::SHL, DL, VT: MVT::i64, N1: SExt, |
15706 | N2: DAG.getConstant(Val: 32 - ShAmt, DL, VT: MVT::i64)); |
15707 | } |
15708 | |
15709 | // Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if |
15710 | // the result is used as the conditon of a br_cc or select_cc we can invert, |
15711 | // inverting the setcc is free, and Z is 0/1. Caller will invert the |
15712 | // br_cc/select_cc. |
15713 | static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) { |
15714 | bool IsAnd = Cond.getOpcode() == ISD::AND; |
15715 | if (!IsAnd && Cond.getOpcode() != ISD::OR) |
15716 | return SDValue(); |
15717 | |
15718 | if (!Cond.hasOneUse()) |
15719 | return SDValue(); |
15720 | |
15721 | SDValue Setcc = Cond.getOperand(i: 0); |
15722 | SDValue Xor = Cond.getOperand(i: 1); |
15723 | // Canonicalize setcc to LHS. |
15724 | if (Setcc.getOpcode() != ISD::SETCC) |
15725 | std::swap(a&: Setcc, b&: Xor); |
15726 | // LHS should be a setcc and RHS should be an xor. |
15727 | if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() || |
15728 | Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse()) |
15729 | return SDValue(); |
15730 | |
15731 | // If the condition is an And, SimplifyDemandedBits may have changed |
15732 | // (xor Z, 1) to (not Z). |
15733 | SDValue Xor1 = Xor.getOperand(i: 1); |
15734 | if (!isOneConstant(V: Xor1) && !(IsAnd && isAllOnesConstant(V: Xor1))) |
15735 | return SDValue(); |
15736 | |
15737 | EVT VT = Cond.getValueType(); |
15738 | SDValue Xor0 = Xor.getOperand(i: 0); |
15739 | |
15740 | // The LHS of the xor needs to be 0/1. |
15741 | APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: 1); |
15742 | if (!DAG.MaskedValueIsZero(Op: Xor0, Mask)) |
15743 | return SDValue(); |
15744 | |
15745 | // We can only invert integer setccs. |
15746 | EVT SetCCOpVT = Setcc.getOperand(i: 0).getValueType(); |
15747 | if (!SetCCOpVT.isScalarInteger()) |
15748 | return SDValue(); |
15749 | |
15750 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Setcc.getOperand(i: 2))->get(); |
15751 | if (ISD::isIntEqualitySetCC(Code: CCVal)) { |
15752 | CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: SetCCOpVT); |
15753 | Setcc = DAG.getSetCC(DL: SDLoc(Setcc), VT, LHS: Setcc.getOperand(i: 0), |
15754 | RHS: Setcc.getOperand(i: 1), Cond: CCVal); |
15755 | } else if (CCVal == ISD::SETLT && isNullConstant(V: Setcc.getOperand(i: 0))) { |
15756 | // Invert (setlt 0, X) by converting to (setlt X, 1). |
15757 | Setcc = DAG.getSetCC(DL: SDLoc(Setcc), VT, LHS: Setcc.getOperand(i: 1), |
15758 | RHS: DAG.getConstant(Val: 1, DL: SDLoc(Setcc), VT), Cond: CCVal); |
15759 | } else if (CCVal == ISD::SETLT && isOneConstant(V: Setcc.getOperand(i: 1))) { |
15760 | // (setlt X, 1) by converting to (setlt 0, X). |
15761 | Setcc = DAG.getSetCC(DL: SDLoc(Setcc), VT, |
15762 | LHS: DAG.getConstant(Val: 0, DL: SDLoc(Setcc), VT), |
15763 | RHS: Setcc.getOperand(i: 0), Cond: CCVal); |
15764 | } else |
15765 | return SDValue(); |
15766 | |
15767 | unsigned Opc = IsAnd ? ISD::OR : ISD::AND; |
15768 | return DAG.getNode(Opcode: Opc, DL: SDLoc(Cond), VT, N1: Setcc, N2: Xor.getOperand(i: 0)); |
15769 | } |
15770 | |
15771 | // Perform common combines for BR_CC and SELECT_CC condtions. |
15772 | static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, |
15773 | SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { |
15774 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get(); |
15775 | |
15776 | // As far as arithmetic right shift always saves the sign, |
15777 | // shift can be omitted. |
15778 | // Fold setlt (sra X, N), 0 -> setlt X, 0 and |
15779 | // setge (sra X, N), 0 -> setge X, 0 |
15780 | if (isNullConstant(V: RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) && |
15781 | LHS.getOpcode() == ISD::SRA) { |
15782 | LHS = LHS.getOperand(i: 0); |
15783 | return true; |
15784 | } |
15785 | |
15786 | if (!ISD::isIntEqualitySetCC(Code: CCVal)) |
15787 | return false; |
15788 | |
15789 | // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt) |
15790 | // Sometimes the setcc is introduced after br_cc/select_cc has been formed. |
15791 | if (LHS.getOpcode() == ISD::SETCC && isNullConstant(V: RHS) && |
15792 | LHS.getOperand(i: 0).getValueType() == Subtarget.getXLenVT()) { |
15793 | // If we're looking for eq 0 instead of ne 0, we need to invert the |
15794 | // condition. |
15795 | bool Invert = CCVal == ISD::SETEQ; |
15796 | CCVal = cast<CondCodeSDNode>(Val: LHS.getOperand(i: 2))->get(); |
15797 | if (Invert) |
15798 | CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType()); |
15799 | |
15800 | RHS = LHS.getOperand(i: 1); |
15801 | LHS = LHS.getOperand(i: 0); |
15802 | translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG); |
15803 | |
15804 | CC = DAG.getCondCode(Cond: CCVal); |
15805 | return true; |
15806 | } |
15807 | |
15808 | // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne) |
15809 | if (LHS.getOpcode() == ISD::XOR && isNullConstant(V: RHS)) { |
15810 | RHS = LHS.getOperand(i: 1); |
15811 | LHS = LHS.getOperand(i: 0); |
15812 | return true; |
15813 | } |
15814 | |
15815 | // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt) |
15816 | if (isNullConstant(V: RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() && |
15817 | LHS.getOperand(i: 1).getOpcode() == ISD::Constant) { |
15818 | SDValue LHS0 = LHS.getOperand(i: 0); |
15819 | if (LHS0.getOpcode() == ISD::AND && |
15820 | LHS0.getOperand(i: 1).getOpcode() == ISD::Constant) { |
15821 | uint64_t Mask = LHS0.getConstantOperandVal(i: 1); |
15822 | uint64_t ShAmt = LHS.getConstantOperandVal(i: 1); |
15823 | if (isPowerOf2_64(Value: Mask) && Log2_64(Value: Mask) == ShAmt) { |
15824 | CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; |
15825 | CC = DAG.getCondCode(Cond: CCVal); |
15826 | |
15827 | ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt; |
15828 | LHS = LHS0.getOperand(i: 0); |
15829 | if (ShAmt != 0) |
15830 | LHS = |
15831 | DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS0.getOperand(i: 0), |
15832 | N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType())); |
15833 | return true; |
15834 | } |
15835 | } |
15836 | } |
15837 | |
15838 | // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1. |
15839 | // This can occur when legalizing some floating point comparisons. |
15840 | APInt Mask = APInt::getBitsSetFrom(numBits: LHS.getValueSizeInBits(), loBit: 1); |
15841 | if (isOneConstant(V: RHS) && DAG.MaskedValueIsZero(Op: LHS, Mask)) { |
15842 | CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType()); |
15843 | CC = DAG.getCondCode(Cond: CCVal); |
15844 | RHS = DAG.getConstant(Val: 0, DL, VT: LHS.getValueType()); |
15845 | return true; |
15846 | } |
15847 | |
15848 | if (isNullConstant(V: RHS)) { |
15849 | if (SDValue NewCond = tryDemorganOfBooleanCondition(Cond: LHS, DAG)) { |
15850 | CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType()); |
15851 | CC = DAG.getCondCode(Cond: CCVal); |
15852 | LHS = NewCond; |
15853 | return true; |
15854 | } |
15855 | } |
15856 | |
15857 | return false; |
15858 | } |
15859 | |
15860 | // Fold |
15861 | // (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)). |
15862 | // (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)). |
15863 | // (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)). |
15864 | // (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)). |
15865 | static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, |
15866 | SDValue TrueVal, SDValue FalseVal, |
15867 | bool Swapped) { |
15868 | bool Commutative = true; |
15869 | unsigned Opc = TrueVal.getOpcode(); |
15870 | switch (Opc) { |
15871 | default: |
15872 | return SDValue(); |
15873 | case ISD::SHL: |
15874 | case ISD::SRA: |
15875 | case ISD::SRL: |
15876 | case ISD::SUB: |
15877 | Commutative = false; |
15878 | break; |
15879 | case ISD::ADD: |
15880 | case ISD::OR: |
15881 | case ISD::XOR: |
15882 | break; |
15883 | } |
15884 | |
15885 | if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(Val: FalseVal)) |
15886 | return SDValue(); |
15887 | |
15888 | unsigned OpToFold; |
15889 | if (FalseVal == TrueVal.getOperand(i: 0)) |
15890 | OpToFold = 0; |
15891 | else if (Commutative && FalseVal == TrueVal.getOperand(i: 1)) |
15892 | OpToFold = 1; |
15893 | else |
15894 | return SDValue(); |
15895 | |
15896 | EVT VT = N->getValueType(ResNo: 0); |
15897 | SDLoc DL(N); |
15898 | SDValue OtherOp = TrueVal.getOperand(i: 1 - OpToFold); |
15899 | EVT OtherOpVT = OtherOp.getValueType(); |
15900 | SDValue IdentityOperand = |
15901 | DAG.getNeutralElement(Opcode: Opc, DL, VT: OtherOpVT, Flags: N->getFlags()); |
15902 | if (!Commutative) |
15903 | IdentityOperand = DAG.getConstant(Val: 0, DL, VT: OtherOpVT); |
15904 | assert(IdentityOperand && "No identity operand!" ); |
15905 | |
15906 | if (Swapped) |
15907 | std::swap(a&: OtherOp, b&: IdentityOperand); |
15908 | SDValue NewSel = |
15909 | DAG.getSelect(DL, VT: OtherOpVT, Cond: N->getOperand(Num: 0), LHS: OtherOp, RHS: IdentityOperand); |
15910 | return DAG.getNode(Opcode: TrueVal.getOpcode(), DL, VT, N1: FalseVal, N2: NewSel); |
15911 | } |
15912 | |
15913 | // This tries to get rid of `select` and `icmp` that are being used to handle |
15914 | // `Targets` that do not support `cttz(0)`/`ctlz(0)`. |
15915 | static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) { |
15916 | SDValue Cond = N->getOperand(Num: 0); |
15917 | |
15918 | // This represents either CTTZ or CTLZ instruction. |
15919 | SDValue CountZeroes; |
15920 | |
15921 | SDValue ValOnZero; |
15922 | |
15923 | if (Cond.getOpcode() != ISD::SETCC) |
15924 | return SDValue(); |
15925 | |
15926 | if (!isNullConstant(V: Cond->getOperand(Num: 1))) |
15927 | return SDValue(); |
15928 | |
15929 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Cond->getOperand(Num: 2))->get(); |
15930 | if (CCVal == ISD::CondCode::SETEQ) { |
15931 | CountZeroes = N->getOperand(Num: 2); |
15932 | ValOnZero = N->getOperand(Num: 1); |
15933 | } else if (CCVal == ISD::CondCode::SETNE) { |
15934 | CountZeroes = N->getOperand(Num: 1); |
15935 | ValOnZero = N->getOperand(Num: 2); |
15936 | } else { |
15937 | return SDValue(); |
15938 | } |
15939 | |
15940 | if (CountZeroes.getOpcode() == ISD::TRUNCATE || |
15941 | CountZeroes.getOpcode() == ISD::ZERO_EXTEND) |
15942 | CountZeroes = CountZeroes.getOperand(i: 0); |
15943 | |
15944 | if (CountZeroes.getOpcode() != ISD::CTTZ && |
15945 | CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF && |
15946 | CountZeroes.getOpcode() != ISD::CTLZ && |
15947 | CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF) |
15948 | return SDValue(); |
15949 | |
15950 | if (!isNullConstant(V: ValOnZero)) |
15951 | return SDValue(); |
15952 | |
15953 | SDValue CountZeroesArgument = CountZeroes->getOperand(Num: 0); |
15954 | if (Cond->getOperand(Num: 0) != CountZeroesArgument) |
15955 | return SDValue(); |
15956 | |
15957 | if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { |
15958 | CountZeroes = DAG.getNode(Opcode: ISD::CTTZ, DL: SDLoc(CountZeroes), |
15959 | VT: CountZeroes.getValueType(), Operand: CountZeroesArgument); |
15960 | } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) { |
15961 | CountZeroes = DAG.getNode(Opcode: ISD::CTLZ, DL: SDLoc(CountZeroes), |
15962 | VT: CountZeroes.getValueType(), Operand: CountZeroesArgument); |
15963 | } |
15964 | |
15965 | unsigned BitWidth = CountZeroes.getValueSizeInBits(); |
15966 | SDValue BitWidthMinusOne = |
15967 | DAG.getConstant(Val: BitWidth - 1, DL: SDLoc(N), VT: CountZeroes.getValueType()); |
15968 | |
15969 | auto AndNode = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N), VT: CountZeroes.getValueType(), |
15970 | N1: CountZeroes, N2: BitWidthMinusOne); |
15971 | return DAG.getZExtOrTrunc(Op: AndNode, DL: SDLoc(N), VT: N->getValueType(ResNo: 0)); |
15972 | } |
15973 | |
15974 | static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, |
15975 | const RISCVSubtarget &Subtarget) { |
15976 | SDValue Cond = N->getOperand(Num: 0); |
15977 | SDValue True = N->getOperand(Num: 1); |
15978 | SDValue False = N->getOperand(Num: 2); |
15979 | SDLoc DL(N); |
15980 | EVT VT = N->getValueType(ResNo: 0); |
15981 | EVT CondVT = Cond.getValueType(); |
15982 | |
15983 | if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) |
15984 | return SDValue(); |
15985 | |
15986 | // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate |
15987 | // BEXTI, where C is power of 2. |
15988 | if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() && |
15989 | (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) { |
15990 | SDValue LHS = Cond.getOperand(i: 0); |
15991 | SDValue RHS = Cond.getOperand(i: 1); |
15992 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: Cond.getOperand(i: 2))->get(); |
15993 | if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND && |
15994 | isa<ConstantSDNode>(Val: LHS.getOperand(i: 1)) && isNullConstant(V: RHS)) { |
15995 | const APInt &MaskVal = LHS.getConstantOperandAPInt(i: 1); |
15996 | if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(N: 12)) |
15997 | return DAG.getSelect(DL, VT, |
15998 | Cond: DAG.getSetCC(DL, VT: CondVT, LHS, RHS, Cond: ISD::SETNE), |
15999 | LHS: False, RHS: True); |
16000 | } |
16001 | } |
16002 | return SDValue(); |
16003 | } |
16004 | |
16005 | static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, |
16006 | const RISCVSubtarget &Subtarget) { |
16007 | if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG)) |
16008 | return Folded; |
16009 | |
16010 | if (SDValue V = useInversedSetcc(N, DAG, Subtarget)) |
16011 | return V; |
16012 | |
16013 | if (Subtarget.hasConditionalMoveFusion()) |
16014 | return SDValue(); |
16015 | |
16016 | SDValue TrueVal = N->getOperand(Num: 1); |
16017 | SDValue FalseVal = N->getOperand(Num: 2); |
16018 | if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false)) |
16019 | return V; |
16020 | return tryFoldSelectIntoOp(N, DAG, TrueVal: FalseVal, FalseVal: TrueVal, /*Swapped*/true); |
16021 | } |
16022 | |
16023 | /// If we have a build_vector where each lane is binop X, C, where C |
16024 | /// is a constant (but not necessarily the same constant on all lanes), |
16025 | /// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..). |
16026 | /// We assume that materializing a constant build vector will be no more |
16027 | /// expensive that performing O(n) binops. |
16028 | static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, |
16029 | const RISCVSubtarget &Subtarget, |
16030 | const RISCVTargetLowering &TLI) { |
16031 | SDLoc DL(N); |
16032 | EVT VT = N->getValueType(ResNo: 0); |
16033 | |
16034 | assert(!VT.isScalableVector() && "unexpected build vector" ); |
16035 | |
16036 | if (VT.getVectorNumElements() == 1) |
16037 | return SDValue(); |
16038 | |
16039 | const unsigned Opcode = N->op_begin()->getNode()->getOpcode(); |
16040 | if (!TLI.isBinOp(Opcode)) |
16041 | return SDValue(); |
16042 | |
16043 | if (!TLI.isOperationLegalOrCustom(Op: Opcode, VT) || !TLI.isTypeLegal(VT)) |
16044 | return SDValue(); |
16045 | |
16046 | // This BUILD_VECTOR involves an implicit truncation, and sinking |
16047 | // truncates through binops is non-trivial. |
16048 | if (N->op_begin()->getValueType() != VT.getVectorElementType()) |
16049 | return SDValue(); |
16050 | |
16051 | SmallVector<SDValue> LHSOps; |
16052 | SmallVector<SDValue> RHSOps; |
16053 | for (SDValue Op : N->ops()) { |
16054 | if (Op.isUndef()) { |
16055 | // We can't form a divide or remainder from undef. |
16056 | if (!DAG.isSafeToSpeculativelyExecute(Opcode)) |
16057 | return SDValue(); |
16058 | |
16059 | LHSOps.push_back(Elt: Op); |
16060 | RHSOps.push_back(Elt: Op); |
16061 | continue; |
16062 | } |
16063 | |
16064 | // TODO: We can handle operations which have an neutral rhs value |
16065 | // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track |
16066 | // of profit in a more explicit manner. |
16067 | if (Op.getOpcode() != Opcode || !Op.hasOneUse()) |
16068 | return SDValue(); |
16069 | |
16070 | LHSOps.push_back(Elt: Op.getOperand(i: 0)); |
16071 | if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 1)) && |
16072 | !isa<ConstantFPSDNode>(Val: Op.getOperand(i: 1))) |
16073 | return SDValue(); |
16074 | // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may |
16075 | // have different LHS and RHS types. |
16076 | if (Op.getOperand(i: 0).getValueType() != Op.getOperand(i: 1).getValueType()) |
16077 | return SDValue(); |
16078 | |
16079 | RHSOps.push_back(Elt: Op.getOperand(i: 1)); |
16080 | } |
16081 | |
16082 | return DAG.getNode(Opcode, DL, VT, N1: DAG.getBuildVector(VT, DL, Ops: LHSOps), |
16083 | N2: DAG.getBuildVector(VT, DL, Ops: RHSOps)); |
16084 | } |
16085 | |
16086 | static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, |
16087 | const RISCVSubtarget &Subtarget, |
16088 | const RISCVTargetLowering &TLI) { |
16089 | SDValue InVec = N->getOperand(Num: 0); |
16090 | SDValue InVal = N->getOperand(Num: 1); |
16091 | SDValue EltNo = N->getOperand(Num: 2); |
16092 | SDLoc DL(N); |
16093 | |
16094 | EVT VT = InVec.getValueType(); |
16095 | if (VT.isScalableVector()) |
16096 | return SDValue(); |
16097 | |
16098 | if (!InVec.hasOneUse()) |
16099 | return SDValue(); |
16100 | |
16101 | // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt |
16102 | // move the insert_vector_elts into the arms of the binop. Note that |
16103 | // the new RHS must be a constant. |
16104 | const unsigned InVecOpcode = InVec->getOpcode(); |
16105 | if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(Opcode: InVecOpcode) && |
16106 | InVal.hasOneUse()) { |
16107 | SDValue InVecLHS = InVec->getOperand(Num: 0); |
16108 | SDValue InVecRHS = InVec->getOperand(Num: 1); |
16109 | SDValue InValLHS = InVal->getOperand(Num: 0); |
16110 | SDValue InValRHS = InVal->getOperand(Num: 1); |
16111 | |
16112 | if (!ISD::isBuildVectorOfConstantSDNodes(N: InVecRHS.getNode())) |
16113 | return SDValue(); |
16114 | if (!isa<ConstantSDNode>(Val: InValRHS) && !isa<ConstantFPSDNode>(Val: InValRHS)) |
16115 | return SDValue(); |
16116 | // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may |
16117 | // have different LHS and RHS types. |
16118 | if (InVec.getOperand(i: 0).getValueType() != InVec.getOperand(i: 1).getValueType()) |
16119 | return SDValue(); |
16120 | SDValue LHS = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT, |
16121 | N1: InVecLHS, N2: InValLHS, N3: EltNo); |
16122 | SDValue RHS = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT, |
16123 | N1: InVecRHS, N2: InValRHS, N3: EltNo); |
16124 | return DAG.getNode(Opcode: InVecOpcode, DL, VT, N1: LHS, N2: RHS); |
16125 | } |
16126 | |
16127 | // Given insert_vector_elt (concat_vectors ...), InVal, Elt |
16128 | // move the insert_vector_elt to the source operand of the concat_vector. |
16129 | if (InVec.getOpcode() != ISD::CONCAT_VECTORS) |
16130 | return SDValue(); |
16131 | |
16132 | auto *IndexC = dyn_cast<ConstantSDNode>(Val&: EltNo); |
16133 | if (!IndexC) |
16134 | return SDValue(); |
16135 | unsigned Elt = IndexC->getZExtValue(); |
16136 | |
16137 | EVT ConcatVT = InVec.getOperand(i: 0).getValueType(); |
16138 | if (ConcatVT.getVectorElementType() != InVal.getValueType()) |
16139 | return SDValue(); |
16140 | unsigned ConcatNumElts = ConcatVT.getVectorNumElements(); |
16141 | SDValue NewIdx = DAG.getVectorIdxConstant(Val: Elt % ConcatNumElts, DL); |
16142 | |
16143 | unsigned ConcatOpIdx = Elt / ConcatNumElts; |
16144 | SDValue ConcatOp = InVec.getOperand(i: ConcatOpIdx); |
16145 | ConcatOp = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ConcatVT, |
16146 | N1: ConcatOp, N2: InVal, N3: NewIdx); |
16147 | |
16148 | SmallVector<SDValue> ConcatOps; |
16149 | ConcatOps.append(in_start: InVec->op_begin(), in_end: InVec->op_end()); |
16150 | ConcatOps[ConcatOpIdx] = ConcatOp; |
16151 | return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, Ops: ConcatOps); |
16152 | } |
16153 | |
16154 | // If we're concatenating a series of vector loads like |
16155 | // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ... |
16156 | // Then we can turn this into a strided load by widening the vector elements |
16157 | // vlse32 p, stride=n |
16158 | static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, |
16159 | const RISCVSubtarget &Subtarget, |
16160 | const RISCVTargetLowering &TLI) { |
16161 | SDLoc DL(N); |
16162 | EVT VT = N->getValueType(ResNo: 0); |
16163 | |
16164 | // Only perform this combine on legal MVTs. |
16165 | if (!TLI.isTypeLegal(VT)) |
16166 | return SDValue(); |
16167 | |
16168 | // TODO: Potentially extend this to scalable vectors |
16169 | if (VT.isScalableVector()) |
16170 | return SDValue(); |
16171 | |
16172 | auto *BaseLd = dyn_cast<LoadSDNode>(Val: N->getOperand(Num: 0)); |
16173 | if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(N: BaseLd) || |
16174 | !SDValue(BaseLd, 0).hasOneUse()) |
16175 | return SDValue(); |
16176 | |
16177 | EVT BaseLdVT = BaseLd->getValueType(ResNo: 0); |
16178 | |
16179 | // Go through the loads and check that they're strided |
16180 | SmallVector<LoadSDNode *> Lds; |
16181 | Lds.push_back(Elt: BaseLd); |
16182 | Align Align = BaseLd->getAlign(); |
16183 | for (SDValue Op : N->ops().drop_front()) { |
16184 | auto *Ld = dyn_cast<LoadSDNode>(Val&: Op); |
16185 | if (!Ld || !Ld->isSimple() || !Op.hasOneUse() || |
16186 | Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(N: Ld) || |
16187 | Ld->getValueType(ResNo: 0) != BaseLdVT) |
16188 | return SDValue(); |
16189 | |
16190 | Lds.push_back(Elt: Ld); |
16191 | |
16192 | // The common alignment is the most restrictive (smallest) of all the loads |
16193 | Align = std::min(a: Align, b: Ld->getAlign()); |
16194 | } |
16195 | |
16196 | using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>; |
16197 | auto GetPtrDiff = [&DAG](LoadSDNode *Ld1, |
16198 | LoadSDNode *Ld2) -> std::optional<PtrDiff> { |
16199 | // If the load ptrs can be decomposed into a common (Base + Index) with a |
16200 | // common constant stride, then return the constant stride. |
16201 | BaseIndexOffset BIO1 = BaseIndexOffset::match(N: Ld1, DAG); |
16202 | BaseIndexOffset BIO2 = BaseIndexOffset::match(N: Ld2, DAG); |
16203 | if (BIO1.equalBaseIndex(Other: BIO2, DAG)) |
16204 | return {{BIO2.getOffset() - BIO1.getOffset(), false}}; |
16205 | |
16206 | // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride) |
16207 | SDValue P1 = Ld1->getBasePtr(); |
16208 | SDValue P2 = Ld2->getBasePtr(); |
16209 | if (P2.getOpcode() == ISD::ADD && P2.getOperand(i: 0) == P1) |
16210 | return {{P2.getOperand(i: 1), false}}; |
16211 | if (P1.getOpcode() == ISD::ADD && P1.getOperand(i: 0) == P2) |
16212 | return {{P1.getOperand(i: 1), true}}; |
16213 | |
16214 | return std::nullopt; |
16215 | }; |
16216 | |
16217 | // Get the distance between the first and second loads |
16218 | auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]); |
16219 | if (!BaseDiff) |
16220 | return SDValue(); |
16221 | |
16222 | // Check all the loads are the same distance apart |
16223 | for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++) |
16224 | if (GetPtrDiff(*It, *std::next(x: It)) != BaseDiff) |
16225 | return SDValue(); |
16226 | |
16227 | // TODO: At this point, we've successfully matched a generalized gather |
16228 | // load. Maybe we should emit that, and then move the specialized |
16229 | // matchers above and below into a DAG combine? |
16230 | |
16231 | // Get the widened scalar type, e.g. v4i8 -> i64 |
16232 | unsigned WideScalarBitWidth = |
16233 | BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements(); |
16234 | MVT WideScalarVT = MVT::getIntegerVT(BitWidth: WideScalarBitWidth); |
16235 | |
16236 | // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64 |
16237 | MVT WideVecVT = MVT::getVectorVT(VT: WideScalarVT, NumElements: N->getNumOperands()); |
16238 | if (!TLI.isTypeLegal(VT: WideVecVT)) |
16239 | return SDValue(); |
16240 | |
16241 | // Check that the operation is legal |
16242 | if (!TLI.isLegalStridedLoadStore(DataType: WideVecVT, Alignment: Align)) |
16243 | return SDValue(); |
16244 | |
16245 | auto [StrideVariant, MustNegateStride] = *BaseDiff; |
16246 | SDValue Stride = std::holds_alternative<SDValue>(v: StrideVariant) |
16247 | ? std::get<SDValue>(v&: StrideVariant) |
16248 | : DAG.getConstant(Val: std::get<int64_t>(v&: StrideVariant), DL, |
16249 | VT: Lds[0]->getOffset().getValueType()); |
16250 | if (MustNegateStride) |
16251 | Stride = DAG.getNegative(Val: Stride, DL, VT: Stride.getValueType()); |
16252 | |
16253 | SDValue AllOneMask = |
16254 | DAG.getSplat(VT: WideVecVT.changeVectorElementType(EltVT: MVT::i1), DL, |
16255 | Op: DAG.getConstant(Val: 1, DL, VT: MVT::i1)); |
16256 | |
16257 | uint64_t MemSize; |
16258 | if (auto *ConstStride = dyn_cast<ConstantSDNode>(Val&: Stride); |
16259 | ConstStride && ConstStride->getSExtValue() >= 0) |
16260 | // total size = (elsize * n) + (stride - elsize) * (n-1) |
16261 | // = elsize + stride * (n-1) |
16262 | MemSize = WideScalarVT.getSizeInBits() + |
16263 | ConstStride->getSExtValue() * (N->getNumOperands() - 1); |
16264 | else |
16265 | // If Stride isn't constant, then we can't know how much it will load |
16266 | MemSize = MemoryLocation::UnknownSize; |
16267 | |
16268 | MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( |
16269 | PtrInfo: BaseLd->getPointerInfo(), F: BaseLd->getMemOperand()->getFlags(), Size: MemSize, |
16270 | BaseAlignment: Align); |
16271 | |
16272 | SDValue StridedLoad = DAG.getStridedLoadVP( |
16273 | VT: WideVecVT, DL, Chain: BaseLd->getChain(), Ptr: BaseLd->getBasePtr(), Stride, |
16274 | Mask: AllOneMask, |
16275 | EVL: DAG.getConstant(Val: N->getNumOperands(), DL, VT: Subtarget.getXLenVT()), MMO); |
16276 | |
16277 | for (SDValue Ld : N->ops()) |
16278 | DAG.makeEquivalentMemoryOrdering(OldLoad: cast<LoadSDNode>(Val&: Ld), NewMemOp: StridedLoad); |
16279 | |
16280 | return DAG.getBitcast(VT: VT.getSimpleVT(), V: StridedLoad); |
16281 | } |
16282 | |
16283 | static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, |
16284 | const RISCVSubtarget &Subtarget) { |
16285 | |
16286 | assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD); |
16287 | |
16288 | if (N->getValueType(ResNo: 0).isFixedLengthVector()) |
16289 | return SDValue(); |
16290 | |
16291 | SDValue Addend = N->getOperand(Num: 0); |
16292 | SDValue MulOp = N->getOperand(Num: 1); |
16293 | |
16294 | if (N->getOpcode() == RISCVISD::ADD_VL) { |
16295 | SDValue AddMergeOp = N->getOperand(Num: 2); |
16296 | if (!AddMergeOp.isUndef()) |
16297 | return SDValue(); |
16298 | } |
16299 | |
16300 | auto IsVWMulOpc = [](unsigned Opc) { |
16301 | switch (Opc) { |
16302 | case RISCVISD::VWMUL_VL: |
16303 | case RISCVISD::VWMULU_VL: |
16304 | case RISCVISD::VWMULSU_VL: |
16305 | return true; |
16306 | default: |
16307 | return false; |
16308 | } |
16309 | }; |
16310 | |
16311 | if (!IsVWMulOpc(MulOp.getOpcode())) |
16312 | std::swap(a&: Addend, b&: MulOp); |
16313 | |
16314 | if (!IsVWMulOpc(MulOp.getOpcode())) |
16315 | return SDValue(); |
16316 | |
16317 | SDValue MulMergeOp = MulOp.getOperand(i: 2); |
16318 | |
16319 | if (!MulMergeOp.isUndef()) |
16320 | return SDValue(); |
16321 | |
16322 | auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG, |
16323 | const RISCVSubtarget &Subtarget) { |
16324 | if (N->getOpcode() == ISD::ADD) { |
16325 | SDLoc DL(N); |
16326 | return getDefaultScalableVLOps(VecVT: N->getSimpleValueType(ResNo: 0), DL, DAG, |
16327 | Subtarget); |
16328 | } |
16329 | return std::make_pair(x: N->getOperand(Num: 3), y: N->getOperand(Num: 4)); |
16330 | }(N, DAG, Subtarget); |
16331 | |
16332 | SDValue MulMask = MulOp.getOperand(i: 3); |
16333 | SDValue MulVL = MulOp.getOperand(i: 4); |
16334 | |
16335 | if (AddMask != MulMask || AddVL != MulVL) |
16336 | return SDValue(); |
16337 | |
16338 | unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL; |
16339 | static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL, |
16340 | "Unexpected opcode after VWMACC_VL" ); |
16341 | static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL, |
16342 | "Unexpected opcode after VWMACC_VL!" ); |
16343 | static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL, |
16344 | "Unexpected opcode after VWMUL_VL!" ); |
16345 | static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL, |
16346 | "Unexpected opcode after VWMUL_VL!" ); |
16347 | |
16348 | SDLoc DL(N); |
16349 | EVT VT = N->getValueType(ResNo: 0); |
16350 | SDValue Ops[] = {MulOp.getOperand(i: 0), MulOp.getOperand(i: 1), Addend, AddMask, |
16351 | AddVL}; |
16352 | return DAG.getNode(Opcode: Opc, DL, VT, Ops); |
16353 | } |
16354 | |
16355 | static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, |
16356 | ISD::MemIndexType &IndexType, |
16357 | RISCVTargetLowering::DAGCombinerInfo &DCI) { |
16358 | if (!DCI.isBeforeLegalize()) |
16359 | return false; |
16360 | |
16361 | SelectionDAG &DAG = DCI.DAG; |
16362 | const MVT XLenVT = |
16363 | DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT(); |
16364 | |
16365 | const EVT IndexVT = Index.getValueType(); |
16366 | |
16367 | // RISC-V indexed loads only support the "unsigned unscaled" addressing |
16368 | // mode, so anything else must be manually legalized. |
16369 | if (!isIndexTypeSigned(IndexType)) |
16370 | return false; |
16371 | |
16372 | if (IndexVT.getVectorElementType().bitsLT(VT: XLenVT)) { |
16373 | // Any index legalization should first promote to XLenVT, so we don't lose |
16374 | // bits when scaling. This may create an illegal index type so we let |
16375 | // LLVM's legalization take care of the splitting. |
16376 | // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet. |
16377 | Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, |
16378 | VT: IndexVT.changeVectorElementType(EltVT: XLenVT), Operand: Index); |
16379 | } |
16380 | IndexType = ISD::UNSIGNED_SCALED; |
16381 | return true; |
16382 | } |
16383 | |
16384 | /// Match the index vector of a scatter or gather node as the shuffle mask |
16385 | /// which performs the rearrangement if possible. Will only match if |
16386 | /// all lanes are touched, and thus replacing the scatter or gather with |
16387 | /// a unit strided access and shuffle is legal. |
16388 | static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, |
16389 | SmallVector<int> &ShuffleMask) { |
16390 | if (!ISD::isConstantSplatVectorAllOnes(N: Mask.getNode())) |
16391 | return false; |
16392 | if (!ISD::isBuildVectorOfConstantSDNodes(N: Index.getNode())) |
16393 | return false; |
16394 | |
16395 | const unsigned ElementSize = VT.getScalarStoreSize(); |
16396 | const unsigned NumElems = VT.getVectorNumElements(); |
16397 | |
16398 | // Create the shuffle mask and check all bits active |
16399 | assert(ShuffleMask.empty()); |
16400 | BitVector ActiveLanes(NumElems); |
16401 | for (unsigned i = 0; i < Index->getNumOperands(); i++) { |
16402 | // TODO: We've found an active bit of UB, and could be |
16403 | // more aggressive here if desired. |
16404 | if (Index->getOperand(Num: i)->isUndef()) |
16405 | return false; |
16406 | uint64_t C = Index->getConstantOperandVal(Num: i); |
16407 | if (C % ElementSize != 0) |
16408 | return false; |
16409 | C = C / ElementSize; |
16410 | if (C >= NumElems) |
16411 | return false; |
16412 | ShuffleMask.push_back(Elt: C); |
16413 | ActiveLanes.set(C); |
16414 | } |
16415 | return ActiveLanes.all(); |
16416 | } |
16417 | |
16418 | /// Match the index of a gather or scatter operation as an operation |
16419 | /// with twice the element width and half the number of elements. This is |
16420 | /// generally profitable (if legal) because these operations are linear |
16421 | /// in VL, so even if we cause some extract VTYPE/VL toggles, we still |
16422 | /// come out ahead. |
16423 | static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, |
16424 | Align BaseAlign, const RISCVSubtarget &ST) { |
16425 | if (!ISD::isConstantSplatVectorAllOnes(N: Mask.getNode())) |
16426 | return false; |
16427 | if (!ISD::isBuildVectorOfConstantSDNodes(N: Index.getNode())) |
16428 | return false; |
16429 | |
16430 | // Attempt a doubling. If we can use a element type 4x or 8x in |
16431 | // size, this will happen via multiply iterations of the transform. |
16432 | const unsigned NumElems = VT.getVectorNumElements(); |
16433 | if (NumElems % 2 != 0) |
16434 | return false; |
16435 | |
16436 | const unsigned ElementSize = VT.getScalarStoreSize(); |
16437 | const unsigned WiderElementSize = ElementSize * 2; |
16438 | if (WiderElementSize > ST.getELen()/8) |
16439 | return false; |
16440 | |
16441 | if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize) |
16442 | return false; |
16443 | |
16444 | for (unsigned i = 0; i < Index->getNumOperands(); i++) { |
16445 | // TODO: We've found an active bit of UB, and could be |
16446 | // more aggressive here if desired. |
16447 | if (Index->getOperand(Num: i)->isUndef()) |
16448 | return false; |
16449 | // TODO: This offset check is too strict if we support fully |
16450 | // misaligned memory operations. |
16451 | uint64_t C = Index->getConstantOperandVal(Num: i); |
16452 | if (i % 2 == 0) { |
16453 | if (C % WiderElementSize != 0) |
16454 | return false; |
16455 | continue; |
16456 | } |
16457 | uint64_t Last = Index->getConstantOperandVal(Num: i-1); |
16458 | if (C != Last + ElementSize) |
16459 | return false; |
16460 | } |
16461 | return true; |
16462 | } |
16463 | |
16464 | // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1)) |
16465 | // This would be benefit for the cases where X and Y are both the same value |
16466 | // type of low precision vectors. Since the truncate would be lowered into |
16467 | // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate |
16468 | // restriction, such pattern would be expanded into a series of "vsetvli" |
16469 | // and "vnsrl" instructions later to reach this point. |
16470 | static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG) { |
16471 | SDValue Mask = N->getOperand(Num: 1); |
16472 | SDValue VL = N->getOperand(Num: 2); |
16473 | |
16474 | bool IsVLMAX = isAllOnesConstant(V: VL) || |
16475 | (isa<RegisterSDNode>(Val: VL) && |
16476 | cast<RegisterSDNode>(Val&: VL)->getReg() == RISCV::X0); |
16477 | if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL || |
16478 | Mask.getOperand(i: 0) != VL) |
16479 | return SDValue(); |
16480 | |
16481 | auto IsTruncNode = [&](SDValue V) { |
16482 | return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL && |
16483 | V.getOperand(i: 1) == Mask && V.getOperand(i: 2) == VL; |
16484 | }; |
16485 | |
16486 | SDValue Op = N->getOperand(Num: 0); |
16487 | |
16488 | // We need to first find the inner level of TRUNCATE_VECTOR_VL node |
16489 | // to distinguish such pattern. |
16490 | while (IsTruncNode(Op)) { |
16491 | if (!Op.hasOneUse()) |
16492 | return SDValue(); |
16493 | Op = Op.getOperand(i: 0); |
16494 | } |
16495 | |
16496 | if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse()) |
16497 | return SDValue(); |
16498 | |
16499 | SDValue N0 = Op.getOperand(i: 0); |
16500 | SDValue N1 = Op.getOperand(i: 1); |
16501 | if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() || |
16502 | N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse()) |
16503 | return SDValue(); |
16504 | |
16505 | SDValue N00 = N0.getOperand(i: 0); |
16506 | SDValue N10 = N1.getOperand(i: 0); |
16507 | if (!N00.getValueType().isVector() || |
16508 | N00.getValueType() != N10.getValueType() || |
16509 | N->getValueType(ResNo: 0) != N10.getValueType()) |
16510 | return SDValue(); |
16511 | |
16512 | unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1; |
16513 | SDValue SMin = |
16514 | DAG.getNode(Opcode: ISD::SMIN, DL: SDLoc(N1), VT: N->getValueType(ResNo: 0), N1: N10, |
16515 | N2: DAG.getConstant(Val: MaxShAmt, DL: SDLoc(N1), VT: N->getValueType(ResNo: 0))); |
16516 | return DAG.getNode(Opcode: ISD::SRA, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), N1: N00, N2: SMin); |
16517 | } |
16518 | |
16519 | // Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the |
16520 | // maximum value for the truncated type. |
16521 | // Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1 |
16522 | // is the signed maximum value for the truncated type and C2 is the signed |
16523 | // minimum value. |
16524 | static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, |
16525 | const RISCVSubtarget &Subtarget) { |
16526 | assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL); |
16527 | |
16528 | MVT VT = N->getSimpleValueType(ResNo: 0); |
16529 | |
16530 | SDValue Mask = N->getOperand(Num: 1); |
16531 | SDValue VL = N->getOperand(Num: 2); |
16532 | |
16533 | auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL, |
16534 | APInt &SplatVal) { |
16535 | if (V.getOpcode() != Opc && |
16536 | !(V.getOpcode() == OpcVL && V.getOperand(i: 2).isUndef() && |
16537 | V.getOperand(i: 3) == Mask && V.getOperand(i: 4) == VL)) |
16538 | return SDValue(); |
16539 | |
16540 | SDValue Op = V.getOperand(i: 1); |
16541 | |
16542 | // Peek through conversion between fixed and scalable vectors. |
16543 | if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(i: 0).isUndef() && |
16544 | isNullConstant(V: Op.getOperand(i: 2)) && |
16545 | Op.getOperand(i: 1).getValueType().isFixedLengthVector() && |
16546 | Op.getOperand(i: 1).getOpcode() == ISD::EXTRACT_SUBVECTOR && |
16547 | Op.getOperand(i: 1).getOperand(i: 0).getValueType() == Op.getValueType() && |
16548 | isNullConstant(V: Op.getOperand(i: 1).getOperand(i: 1))) |
16549 | Op = Op.getOperand(i: 1).getOperand(i: 0); |
16550 | |
16551 | if (ISD::isConstantSplatVector(N: Op.getNode(), SplatValue&: SplatVal)) |
16552 | return V.getOperand(i: 0); |
16553 | |
16554 | if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(i: 0).isUndef() && |
16555 | Op.getOperand(i: 2) == VL) { |
16556 | if (auto *Op1 = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1))) { |
16557 | SplatVal = |
16558 | Op1->getAPIntValue().sextOrTrunc(width: Op.getScalarValueSizeInBits()); |
16559 | return V.getOperand(i: 0); |
16560 | } |
16561 | } |
16562 | |
16563 | return SDValue(); |
16564 | }; |
16565 | |
16566 | SDLoc DL(N); |
16567 | |
16568 | auto DetectUSatPattern = [&](SDValue V) { |
16569 | APInt LoC, HiC; |
16570 | |
16571 | // Simple case, V is a UMIN. |
16572 | if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC)) |
16573 | if (HiC.isMask(numBits: VT.getScalarSizeInBits())) |
16574 | return UMinOp; |
16575 | |
16576 | // If we have an SMAX that removes negative numbers first, then we can match |
16577 | // SMIN instead of UMIN. |
16578 | if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC)) |
16579 | if (SDValue SMaxOp = |
16580 | MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC)) |
16581 | if (LoC.isNonNegative() && HiC.isMask(numBits: VT.getScalarSizeInBits())) |
16582 | return SMinOp; |
16583 | |
16584 | // If we have an SMIN before an SMAX and the SMAX constant is less than or |
16585 | // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX |
16586 | // first. |
16587 | if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC)) |
16588 | if (SDValue SMinOp = |
16589 | MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC)) |
16590 | if (LoC.isNonNegative() && HiC.isMask(numBits: VT.getScalarSizeInBits()) && |
16591 | HiC.uge(RHS: LoC)) |
16592 | return DAG.getNode(Opcode: RISCVISD::SMAX_VL, DL, VT: V.getValueType(), N1: SMinOp, |
16593 | N2: V.getOperand(i: 1), N3: DAG.getUNDEF(VT: V.getValueType()), |
16594 | N4: Mask, N5: VL); |
16595 | |
16596 | return SDValue(); |
16597 | }; |
16598 | |
16599 | auto DetectSSatPattern = [&](SDValue V) { |
16600 | unsigned NumDstBits = VT.getScalarSizeInBits(); |
16601 | unsigned NumSrcBits = V.getScalarValueSizeInBits(); |
16602 | APInt SignedMax = APInt::getSignedMaxValue(numBits: NumDstBits).sext(width: NumSrcBits); |
16603 | APInt SignedMin = APInt::getSignedMinValue(numBits: NumDstBits).sext(width: NumSrcBits); |
16604 | |
16605 | APInt HiC, LoC; |
16606 | if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC)) |
16607 | if (SDValue SMaxOp = |
16608 | MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC)) |
16609 | if (HiC == SignedMax && LoC == SignedMin) |
16610 | return SMaxOp; |
16611 | |
16612 | if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC)) |
16613 | if (SDValue SMinOp = |
16614 | MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC)) |
16615 | if (HiC == SignedMax && LoC == SignedMin) |
16616 | return SMinOp; |
16617 | |
16618 | return SDValue(); |
16619 | }; |
16620 | |
16621 | SDValue Src = N->getOperand(Num: 0); |
16622 | |
16623 | // Look through multiple layers of truncates. |
16624 | while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL && |
16625 | Src.getOperand(i: 1) == Mask && Src.getOperand(i: 2) == VL && |
16626 | Src.hasOneUse()) |
16627 | Src = Src.getOperand(i: 0); |
16628 | |
16629 | SDValue Val; |
16630 | unsigned ClipOpc; |
16631 | if ((Val = DetectUSatPattern(Src))) |
16632 | ClipOpc = RISCVISD::VNCLIPU_VL; |
16633 | else if ((Val = DetectSSatPattern(Src))) |
16634 | ClipOpc = RISCVISD::VNCLIP_VL; |
16635 | else |
16636 | return SDValue(); |
16637 | |
16638 | MVT ValVT = Val.getSimpleValueType(); |
16639 | |
16640 | do { |
16641 | MVT ValEltVT = MVT::getIntegerVT(BitWidth: ValVT.getScalarSizeInBits() / 2); |
16642 | ValVT = ValVT.changeVectorElementType(EltVT: ValEltVT); |
16643 | // Rounding mode here is arbitrary since we aren't shifting out any bits. |
16644 | Val = DAG.getNode( |
16645 | Opcode: ClipOpc, DL, VT: ValVT, |
16646 | Ops: {Val, DAG.getConstant(Val: 0, DL, VT: ValVT), DAG.getUNDEF(VT), Mask, |
16647 | DAG.getTargetConstant(Val: RISCVVXRndMode::RNU, DL, VT: Subtarget.getXLenVT()), |
16648 | VL}); |
16649 | } while (ValVT != VT); |
16650 | |
16651 | return Val; |
16652 | } |
16653 | |
16654 | SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, |
16655 | DAGCombinerInfo &DCI) const { |
16656 | SelectionDAG &DAG = DCI.DAG; |
16657 | const MVT XLenVT = Subtarget.getXLenVT(); |
16658 | SDLoc DL(N); |
16659 | |
16660 | // Helper to call SimplifyDemandedBits on an operand of N where only some low |
16661 | // bits are demanded. N will be added to the Worklist if it was not deleted. |
16662 | // Caller should return SDValue(N, 0) if this returns true. |
16663 | auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) { |
16664 | SDValue Op = N->getOperand(Num: OpNo); |
16665 | APInt Mask = APInt::getLowBitsSet(numBits: Op.getValueSizeInBits(), loBitsSet: LowBits); |
16666 | if (!SimplifyDemandedBits(Op, DemandedBits: Mask, DCI)) |
16667 | return false; |
16668 | |
16669 | if (N->getOpcode() != ISD::DELETED_NODE) |
16670 | DCI.AddToWorklist(N); |
16671 | return true; |
16672 | }; |
16673 | |
16674 | switch (N->getOpcode()) { |
16675 | default: |
16676 | break; |
16677 | case RISCVISD::SplitF64: { |
16678 | SDValue Op0 = N->getOperand(Num: 0); |
16679 | // If the input to SplitF64 is just BuildPairF64 then the operation is |
16680 | // redundant. Instead, use BuildPairF64's operands directly. |
16681 | if (Op0->getOpcode() == RISCVISD::BuildPairF64) |
16682 | return DCI.CombineTo(N, Res0: Op0.getOperand(i: 0), Res1: Op0.getOperand(i: 1)); |
16683 | |
16684 | if (Op0->isUndef()) { |
16685 | SDValue Lo = DAG.getUNDEF(VT: MVT::i32); |
16686 | SDValue Hi = DAG.getUNDEF(VT: MVT::i32); |
16687 | return DCI.CombineTo(N, Res0: Lo, Res1: Hi); |
16688 | } |
16689 | |
16690 | // It's cheaper to materialise two 32-bit integers than to load a double |
16691 | // from the constant pool and transfer it to integer registers through the |
16692 | // stack. |
16693 | if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: Op0)) { |
16694 | APInt V = C->getValueAPF().bitcastToAPInt(); |
16695 | SDValue Lo = DAG.getConstant(Val: V.trunc(width: 32), DL, VT: MVT::i32); |
16696 | SDValue Hi = DAG.getConstant(Val: V.lshr(shiftAmt: 32).trunc(width: 32), DL, VT: MVT::i32); |
16697 | return DCI.CombineTo(N, Res0: Lo, Res1: Hi); |
16698 | } |
16699 | |
16700 | // This is a target-specific version of a DAGCombine performed in |
16701 | // DAGCombiner::visitBITCAST. It performs the equivalent of: |
16702 | // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) |
16703 | // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) |
16704 | if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || |
16705 | !Op0.getNode()->hasOneUse()) |
16706 | break; |
16707 | SDValue NewSplitF64 = |
16708 | DAG.getNode(Opcode: RISCVISD::SplitF64, DL, VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), |
16709 | N: Op0.getOperand(i: 0)); |
16710 | SDValue Lo = NewSplitF64.getValue(R: 0); |
16711 | SDValue Hi = NewSplitF64.getValue(R: 1); |
16712 | APInt SignBit = APInt::getSignMask(BitWidth: 32); |
16713 | if (Op0.getOpcode() == ISD::FNEG) { |
16714 | SDValue NewHi = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i32, N1: Hi, |
16715 | N2: DAG.getConstant(Val: SignBit, DL, VT: MVT::i32)); |
16716 | return DCI.CombineTo(N, Res0: Lo, Res1: NewHi); |
16717 | } |
16718 | assert(Op0.getOpcode() == ISD::FABS); |
16719 | SDValue NewHi = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i32, N1: Hi, |
16720 | N2: DAG.getConstant(Val: ~SignBit, DL, VT: MVT::i32)); |
16721 | return DCI.CombineTo(N, Res0: Lo, Res1: NewHi); |
16722 | } |
16723 | case RISCVISD::SLLW: |
16724 | case RISCVISD::SRAW: |
16725 | case RISCVISD::SRLW: |
16726 | case RISCVISD::RORW: |
16727 | case RISCVISD::ROLW: { |
16728 | // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. |
16729 | if (SimplifyDemandedLowBitsHelper(0, 32) || |
16730 | SimplifyDemandedLowBitsHelper(1, 5)) |
16731 | return SDValue(N, 0); |
16732 | |
16733 | break; |
16734 | } |
16735 | case RISCVISD::CLZW: |
16736 | case RISCVISD::CTZW: { |
16737 | // Only the lower 32 bits of the first operand are read |
16738 | if (SimplifyDemandedLowBitsHelper(0, 32)) |
16739 | return SDValue(N, 0); |
16740 | break; |
16741 | } |
16742 | case RISCVISD::FMV_W_X_RV64: { |
16743 | // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the |
16744 | // conversion is unnecessary and can be replaced with the |
16745 | // FMV_X_ANYEXTW_RV64 operand. |
16746 | SDValue Op0 = N->getOperand(Num: 0); |
16747 | if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64) |
16748 | return Op0.getOperand(i: 0); |
16749 | break; |
16750 | } |
16751 | case RISCVISD::FMV_X_ANYEXTH: |
16752 | case RISCVISD::FMV_X_ANYEXTW_RV64: { |
16753 | SDLoc DL(N); |
16754 | SDValue Op0 = N->getOperand(Num: 0); |
16755 | MVT VT = N->getSimpleValueType(ResNo: 0); |
16756 | // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the |
16757 | // conversion is unnecessary and can be replaced with the FMV_W_X_RV64 |
16758 | // operand. Similar for FMV_X_ANYEXTH and FMV_H_X. |
16759 | if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 && |
16760 | Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) || |
16761 | (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH && |
16762 | Op0->getOpcode() == RISCVISD::FMV_H_X)) { |
16763 | assert(Op0.getOperand(0).getValueType() == VT && |
16764 | "Unexpected value type!" ); |
16765 | return Op0.getOperand(i: 0); |
16766 | } |
16767 | |
16768 | // This is a target-specific version of a DAGCombine performed in |
16769 | // DAGCombiner::visitBITCAST. It performs the equivalent of: |
16770 | // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) |
16771 | // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) |
16772 | if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || |
16773 | !Op0.getNode()->hasOneUse()) |
16774 | break; |
16775 | SDValue NewFMV = DAG.getNode(Opcode: N->getOpcode(), DL, VT, Operand: Op0.getOperand(i: 0)); |
16776 | unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16; |
16777 | APInt SignBit = APInt::getSignMask(BitWidth: FPBits).sext(width: VT.getSizeInBits()); |
16778 | if (Op0.getOpcode() == ISD::FNEG) |
16779 | return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: NewFMV, |
16780 | N2: DAG.getConstant(Val: SignBit, DL, VT)); |
16781 | |
16782 | assert(Op0.getOpcode() == ISD::FABS); |
16783 | return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: NewFMV, |
16784 | N2: DAG.getConstant(Val: ~SignBit, DL, VT)); |
16785 | } |
16786 | case ISD::ABS: { |
16787 | EVT VT = N->getValueType(ResNo: 0); |
16788 | SDValue N0 = N->getOperand(Num: 0); |
16789 | // abs (sext) -> zext (abs) |
16790 | // abs (zext) -> zext (handled elsewhere) |
16791 | if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) { |
16792 | SDValue Src = N0.getOperand(i: 0); |
16793 | SDLoc DL(N); |
16794 | return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, |
16795 | Operand: DAG.getNode(Opcode: ISD::ABS, DL, VT: Src.getValueType(), Operand: Src)); |
16796 | } |
16797 | break; |
16798 | } |
16799 | case ISD::ADD: { |
16800 | if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) |
16801 | return V; |
16802 | if (SDValue V = combineToVWMACC(N, DAG, Subtarget)) |
16803 | return V; |
16804 | return performADDCombine(N, DCI, Subtarget); |
16805 | } |
16806 | case ISD::SUB: { |
16807 | if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) |
16808 | return V; |
16809 | return performSUBCombine(N, DAG, Subtarget); |
16810 | } |
16811 | case ISD::AND: |
16812 | return performANDCombine(N, DCI, Subtarget); |
16813 | case ISD::OR: { |
16814 | if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) |
16815 | return V; |
16816 | return performORCombine(N, DCI, Subtarget); |
16817 | } |
16818 | case ISD::XOR: |
16819 | return performXORCombine(N, DAG, Subtarget); |
16820 | case ISD::MUL: |
16821 | if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) |
16822 | return V; |
16823 | return performMULCombine(N, DAG, DCI, Subtarget); |
16824 | case ISD::SDIV: |
16825 | case ISD::UDIV: |
16826 | case ISD::SREM: |
16827 | case ISD::UREM: |
16828 | if (SDValue V = combineBinOpOfZExt(N, DAG)) |
16829 | return V; |
16830 | break; |
16831 | case ISD::FADD: |
16832 | case ISD::UMAX: |
16833 | case ISD::UMIN: |
16834 | case ISD::SMAX: |
16835 | case ISD::SMIN: |
16836 | case ISD::FMAXNUM: |
16837 | case ISD::FMINNUM: { |
16838 | if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) |
16839 | return V; |
16840 | if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) |
16841 | return V; |
16842 | return SDValue(); |
16843 | } |
16844 | case ISD::SETCC: |
16845 | return performSETCCCombine(N, DAG, Subtarget); |
16846 | case ISD::SIGN_EXTEND_INREG: |
16847 | return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); |
16848 | case ISD::ZERO_EXTEND: |
16849 | // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during |
16850 | // type legalization. This is safe because fp_to_uint produces poison if |
16851 | // it overflows. |
16852 | if (N->getValueType(ResNo: 0) == MVT::i64 && Subtarget.is64Bit()) { |
16853 | SDValue Src = N->getOperand(Num: 0); |
16854 | if (Src.getOpcode() == ISD::FP_TO_UINT && |
16855 | isTypeLegal(VT: Src.getOperand(i: 0).getValueType())) |
16856 | return DAG.getNode(Opcode: ISD::FP_TO_UINT, DL: SDLoc(N), VT: MVT::i64, |
16857 | Operand: Src.getOperand(i: 0)); |
16858 | if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() && |
16859 | isTypeLegal(VT: Src.getOperand(i: 1).getValueType())) { |
16860 | SDVTList VTs = DAG.getVTList(VT1: MVT::i64, VT2: MVT::Other); |
16861 | SDValue Res = DAG.getNode(Opcode: ISD::STRICT_FP_TO_UINT, DL: SDLoc(N), VTList: VTs, |
16862 | N1: Src.getOperand(i: 0), N2: Src.getOperand(i: 1)); |
16863 | DCI.CombineTo(N, Res); |
16864 | DAG.ReplaceAllUsesOfValueWith(From: Src.getValue(R: 1), To: Res.getValue(R: 1)); |
16865 | DCI.recursivelyDeleteUnusedNodes(N: Src.getNode()); |
16866 | return SDValue(N, 0); // Return N so it doesn't get rechecked. |
16867 | } |
16868 | } |
16869 | return SDValue(); |
16870 | case RISCVISD::TRUNCATE_VECTOR_VL: |
16871 | if (SDValue V = combineTruncOfSraSext(N, DAG)) |
16872 | return V; |
16873 | return combineTruncToVnclip(N, DAG, Subtarget); |
16874 | case ISD::TRUNCATE: |
16875 | return performTRUNCATECombine(N, DAG, Subtarget); |
16876 | case ISD::SELECT: |
16877 | return performSELECTCombine(N, DAG, Subtarget); |
16878 | case RISCVISD::CZERO_EQZ: |
16879 | case RISCVISD::CZERO_NEZ: { |
16880 | SDValue Val = N->getOperand(Num: 0); |
16881 | SDValue Cond = N->getOperand(Num: 1); |
16882 | |
16883 | unsigned Opc = N->getOpcode(); |
16884 | |
16885 | // czero_eqz x, x -> x |
16886 | if (Opc == RISCVISD::CZERO_EQZ && Val == Cond) |
16887 | return Val; |
16888 | |
16889 | unsigned InvOpc = |
16890 | Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ; |
16891 | |
16892 | // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1. |
16893 | // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1. |
16894 | if (Cond.getOpcode() == ISD::XOR && isOneConstant(V: Cond.getOperand(i: 1))) { |
16895 | SDValue NewCond = Cond.getOperand(i: 0); |
16896 | APInt Mask = APInt::getBitsSetFrom(numBits: NewCond.getValueSizeInBits(), loBit: 1); |
16897 | if (DAG.MaskedValueIsZero(Op: NewCond, Mask)) |
16898 | return DAG.getNode(Opcode: InvOpc, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), N1: Val, N2: NewCond); |
16899 | } |
16900 | // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y |
16901 | // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y |
16902 | // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y |
16903 | // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y |
16904 | if (Cond.getOpcode() == ISD::SETCC && isNullConstant(V: Cond.getOperand(i: 1))) { |
16905 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Cond.getOperand(i: 2))->get(); |
16906 | if (ISD::isIntEqualitySetCC(Code: CCVal)) |
16907 | return DAG.getNode(Opcode: CCVal == ISD::SETNE ? Opc : InvOpc, DL: SDLoc(N), |
16908 | VT: N->getValueType(ResNo: 0), N1: Val, N2: Cond.getOperand(i: 0)); |
16909 | } |
16910 | return SDValue(); |
16911 | } |
16912 | case RISCVISD::SELECT_CC: { |
16913 | // Transform |
16914 | SDValue LHS = N->getOperand(Num: 0); |
16915 | SDValue RHS = N->getOperand(Num: 1); |
16916 | SDValue CC = N->getOperand(Num: 2); |
16917 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get(); |
16918 | SDValue TrueV = N->getOperand(Num: 3); |
16919 | SDValue FalseV = N->getOperand(Num: 4); |
16920 | SDLoc DL(N); |
16921 | EVT VT = N->getValueType(ResNo: 0); |
16922 | |
16923 | // If the True and False values are the same, we don't need a select_cc. |
16924 | if (TrueV == FalseV) |
16925 | return TrueV; |
16926 | |
16927 | // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z |
16928 | // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y |
16929 | if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(Val: TrueV) && |
16930 | isa<ConstantSDNode>(Val: FalseV) && isNullConstant(V: RHS) && |
16931 | (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) { |
16932 | if (CCVal == ISD::CondCode::SETGE) |
16933 | std::swap(a&: TrueV, b&: FalseV); |
16934 | |
16935 | int64_t TrueSImm = cast<ConstantSDNode>(Val&: TrueV)->getSExtValue(); |
16936 | int64_t FalseSImm = cast<ConstantSDNode>(Val&: FalseV)->getSExtValue(); |
16937 | // Only handle simm12, if it is not in this range, it can be considered as |
16938 | // register. |
16939 | if (isInt<12>(x: TrueSImm) && isInt<12>(x: FalseSImm) && |
16940 | isInt<12>(x: TrueSImm - FalseSImm)) { |
16941 | SDValue SRA = |
16942 | DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: LHS, |
16943 | N2: DAG.getConstant(Val: Subtarget.getXLen() - 1, DL, VT)); |
16944 | SDValue AND = |
16945 | DAG.getNode(Opcode: ISD::AND, DL, VT, N1: SRA, |
16946 | N2: DAG.getConstant(Val: TrueSImm - FalseSImm, DL, VT)); |
16947 | return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: AND, N2: FalseV); |
16948 | } |
16949 | |
16950 | if (CCVal == ISD::CondCode::SETGE) |
16951 | std::swap(a&: TrueV, b&: FalseV); |
16952 | } |
16953 | |
16954 | if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) |
16955 | return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT: N->getValueType(ResNo: 0), |
16956 | Ops: {LHS, RHS, CC, TrueV, FalseV}); |
16957 | |
16958 | if (!Subtarget.hasConditionalMoveFusion()) { |
16959 | // (select c, -1, y) -> -c | y |
16960 | if (isAllOnesConstant(V: TrueV)) { |
16961 | SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, Cond: CCVal); |
16962 | SDValue Neg = DAG.getNegative(Val: C, DL, VT); |
16963 | return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: FalseV); |
16964 | } |
16965 | // (select c, y, -1) -> -!c | y |
16966 | if (isAllOnesConstant(V: FalseV)) { |
16967 | SDValue C = |
16968 | DAG.getSetCC(DL, VT, LHS, RHS, Cond: ISD::getSetCCInverse(Operation: CCVal, Type: VT)); |
16969 | SDValue Neg = DAG.getNegative(Val: C, DL, VT); |
16970 | return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: TrueV); |
16971 | } |
16972 | |
16973 | // (select c, 0, y) -> -!c & y |
16974 | if (isNullConstant(V: TrueV)) { |
16975 | SDValue C = |
16976 | DAG.getSetCC(DL, VT, LHS, RHS, Cond: ISD::getSetCCInverse(Operation: CCVal, Type: VT)); |
16977 | SDValue Neg = DAG.getNegative(Val: C, DL, VT); |
16978 | return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: FalseV); |
16979 | } |
16980 | // (select c, y, 0) -> -c & y |
16981 | if (isNullConstant(V: FalseV)) { |
16982 | SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, Cond: CCVal); |
16983 | SDValue Neg = DAG.getNegative(Val: C, DL, VT); |
16984 | return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: TrueV); |
16985 | } |
16986 | // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq)) |
16987 | // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq)) |
16988 | if (((isOneConstant(V: FalseV) && LHS == TrueV && |
16989 | CCVal == ISD::CondCode::SETNE) || |
16990 | (isOneConstant(V: TrueV) && LHS == FalseV && |
16991 | CCVal == ISD::CondCode::SETEQ)) && |
16992 | isNullConstant(V: RHS)) { |
16993 | // freeze it to be safe. |
16994 | LHS = DAG.getFreeze(V: LHS); |
16995 | SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, Cond: ISD::CondCode::SETEQ); |
16996 | return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: LHS, N2: C); |
16997 | } |
16998 | } |
16999 | |
17000 | // If both true/false are an xor with 1, pull through the select. |
17001 | // This can occur after op legalization if both operands are setccs that |
17002 | // require an xor to invert. |
17003 | // FIXME: Generalize to other binary ops with identical operand? |
17004 | if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR && |
17005 | TrueV.getOperand(i: 1) == FalseV.getOperand(i: 1) && |
17006 | isOneConstant(V: TrueV.getOperand(i: 1)) && |
17007 | TrueV.hasOneUse() && FalseV.hasOneUse()) { |
17008 | SDValue NewSel = DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT, N1: LHS, N2: RHS, N3: CC, |
17009 | N4: TrueV.getOperand(i: 0), N5: FalseV.getOperand(i: 0)); |
17010 | return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: NewSel, N2: TrueV.getOperand(i: 1)); |
17011 | } |
17012 | |
17013 | return SDValue(); |
17014 | } |
17015 | case RISCVISD::BR_CC: { |
17016 | SDValue LHS = N->getOperand(Num: 1); |
17017 | SDValue RHS = N->getOperand(Num: 2); |
17018 | SDValue CC = N->getOperand(Num: 3); |
17019 | SDLoc DL(N); |
17020 | |
17021 | if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) |
17022 | return DAG.getNode(Opcode: RISCVISD::BR_CC, DL, VT: N->getValueType(ResNo: 0), |
17023 | N1: N->getOperand(Num: 0), N2: LHS, N3: RHS, N4: CC, N5: N->getOperand(Num: 4)); |
17024 | |
17025 | return SDValue(); |
17026 | } |
17027 | case ISD::BITREVERSE: |
17028 | return performBITREVERSECombine(N, DAG, Subtarget); |
17029 | case ISD::FP_TO_SINT: |
17030 | case ISD::FP_TO_UINT: |
17031 | return performFP_TO_INTCombine(N, DCI, Subtarget); |
17032 | case ISD::FP_TO_SINT_SAT: |
17033 | case ISD::FP_TO_UINT_SAT: |
17034 | return performFP_TO_INT_SATCombine(N, DCI, Subtarget); |
17035 | case ISD::FCOPYSIGN: { |
17036 | EVT VT = N->getValueType(ResNo: 0); |
17037 | if (!VT.isVector()) |
17038 | break; |
17039 | // There is a form of VFSGNJ which injects the negated sign of its second |
17040 | // operand. Try and bubble any FNEG up after the extend/round to produce |
17041 | // this optimized pattern. Avoid modifying cases where FP_ROUND and |
17042 | // TRUNC=1. |
17043 | SDValue In2 = N->getOperand(Num: 1); |
17044 | // Avoid cases where the extend/round has multiple uses, as duplicating |
17045 | // those is typically more expensive than removing a fneg. |
17046 | if (!In2.hasOneUse()) |
17047 | break; |
17048 | if (In2.getOpcode() != ISD::FP_EXTEND && |
17049 | (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(i: 1) != 0)) |
17050 | break; |
17051 | In2 = In2.getOperand(i: 0); |
17052 | if (In2.getOpcode() != ISD::FNEG) |
17053 | break; |
17054 | SDLoc DL(N); |
17055 | SDValue NewFPExtRound = DAG.getFPExtendOrRound(Op: In2.getOperand(i: 0), DL, VT); |
17056 | return DAG.getNode(Opcode: ISD::FCOPYSIGN, DL, VT, N1: N->getOperand(Num: 0), |
17057 | N2: DAG.getNode(Opcode: ISD::FNEG, DL, VT, Operand: NewFPExtRound)); |
17058 | } |
17059 | case ISD::MGATHER: { |
17060 | const auto *MGN = cast<MaskedGatherSDNode>(Val: N); |
17061 | const EVT VT = N->getValueType(ResNo: 0); |
17062 | SDValue Index = MGN->getIndex(); |
17063 | SDValue ScaleOp = MGN->getScale(); |
17064 | ISD::MemIndexType IndexType = MGN->getIndexType(); |
17065 | assert(!MGN->isIndexScaled() && |
17066 | "Scaled gather/scatter should not be formed" ); |
17067 | |
17068 | SDLoc DL(N); |
17069 | if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) |
17070 | return DAG.getMaskedGather( |
17071 | VTs: N->getVTList(), MemVT: MGN->getMemoryVT(), dl: DL, |
17072 | Ops: {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), |
17073 | MGN->getBasePtr(), Index, ScaleOp}, |
17074 | MMO: MGN->getMemOperand(), IndexType, ExtTy: MGN->getExtensionType()); |
17075 | |
17076 | if (narrowIndex(N&: Index, IndexType, DAG)) |
17077 | return DAG.getMaskedGather( |
17078 | VTs: N->getVTList(), MemVT: MGN->getMemoryVT(), dl: DL, |
17079 | Ops: {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), |
17080 | MGN->getBasePtr(), Index, ScaleOp}, |
17081 | MMO: MGN->getMemOperand(), IndexType, ExtTy: MGN->getExtensionType()); |
17082 | |
17083 | if (Index.getOpcode() == ISD::BUILD_VECTOR && |
17084 | MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) { |
17085 | // The sequence will be XLenVT, not the type of Index. Tell |
17086 | // isSimpleVIDSequence this so we avoid overflow. |
17087 | if (std::optional<VIDSequence> SimpleVID = |
17088 | isSimpleVIDSequence(Op: Index, EltSizeInBits: Subtarget.getXLen()); |
17089 | SimpleVID && SimpleVID->StepDenominator == 1) { |
17090 | const int64_t StepNumerator = SimpleVID->StepNumerator; |
17091 | const int64_t Addend = SimpleVID->Addend; |
17092 | |
17093 | // Note: We don't need to check alignment here since (by assumption |
17094 | // from the existance of the gather), our offsets must be sufficiently |
17095 | // aligned. |
17096 | |
17097 | const EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
17098 | assert(MGN->getBasePtr()->getValueType(0) == PtrVT); |
17099 | assert(IndexType == ISD::UNSIGNED_SCALED); |
17100 | SDValue BasePtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: MGN->getBasePtr(), |
17101 | N2: DAG.getConstant(Val: Addend, DL, VT: PtrVT)); |
17102 | |
17103 | SDValue EVL = DAG.getElementCount(DL, VT: Subtarget.getXLenVT(), |
17104 | EC: VT.getVectorElementCount()); |
17105 | SDValue StridedLoad = |
17106 | DAG.getStridedLoadVP(VT, DL, Chain: MGN->getChain(), Ptr: BasePtr, |
17107 | Stride: DAG.getConstant(Val: StepNumerator, DL, VT: XLenVT), |
17108 | Mask: MGN->getMask(), EVL, MMO: MGN->getMemOperand()); |
17109 | SDValue VPSelect = DAG.getNode(Opcode: ISD::VP_SELECT, DL, VT, N1: MGN->getMask(), |
17110 | N2: StridedLoad, N3: MGN->getPassThru(), N4: EVL); |
17111 | return DAG.getMergeValues(Ops: {VPSelect, SDValue(StridedLoad.getNode(), 1)}, |
17112 | dl: DL); |
17113 | } |
17114 | } |
17115 | |
17116 | SmallVector<int> ShuffleMask; |
17117 | if (MGN->getExtensionType() == ISD::NON_EXTLOAD && |
17118 | matchIndexAsShuffle(VT, Index, Mask: MGN->getMask(), ShuffleMask)) { |
17119 | SDValue Load = DAG.getMaskedLoad(VT, dl: DL, Chain: MGN->getChain(), |
17120 | Base: MGN->getBasePtr(), Offset: DAG.getUNDEF(VT: XLenVT), |
17121 | Mask: MGN->getMask(), Src0: DAG.getUNDEF(VT), |
17122 | MemVT: MGN->getMemoryVT(), MMO: MGN->getMemOperand(), |
17123 | AM: ISD::UNINDEXED, ISD::NON_EXTLOAD); |
17124 | SDValue Shuffle = |
17125 | DAG.getVectorShuffle(VT, dl: DL, N1: Load, N2: DAG.getUNDEF(VT), Mask: ShuffleMask); |
17126 | return DAG.getMergeValues(Ops: {Shuffle, Load.getValue(R: 1)}, dl: DL); |
17127 | } |
17128 | |
17129 | if (MGN->getExtensionType() == ISD::NON_EXTLOAD && |
17130 | matchIndexAsWiderOp(VT, Index, Mask: MGN->getMask(), |
17131 | BaseAlign: MGN->getMemOperand()->getBaseAlign(), ST: Subtarget)) { |
17132 | SmallVector<SDValue> NewIndices; |
17133 | for (unsigned i = 0; i < Index->getNumOperands(); i += 2) |
17134 | NewIndices.push_back(Elt: Index.getOperand(i)); |
17135 | EVT IndexVT = Index.getValueType() |
17136 | .getHalfNumVectorElementsVT(Context&: *DAG.getContext()); |
17137 | Index = DAG.getBuildVector(VT: IndexVT, DL, Ops: NewIndices); |
17138 | |
17139 | unsigned ElementSize = VT.getScalarStoreSize(); |
17140 | EVT WideScalarVT = MVT::getIntegerVT(BitWidth: ElementSize * 8 * 2); |
17141 | auto EltCnt = VT.getVectorElementCount(); |
17142 | assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!" ); |
17143 | EVT WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideScalarVT, |
17144 | EC: EltCnt.divideCoefficientBy(RHS: 2)); |
17145 | SDValue Passthru = DAG.getBitcast(VT: WideVT, V: MGN->getPassThru()); |
17146 | EVT MaskVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1, |
17147 | EC: EltCnt.divideCoefficientBy(RHS: 2)); |
17148 | SDValue Mask = DAG.getSplat(VT: MaskVT, DL, Op: DAG.getConstant(Val: 1, DL, VT: MVT::i1)); |
17149 | |
17150 | SDValue Gather = |
17151 | DAG.getMaskedGather(VTs: DAG.getVTList(VT1: WideVT, VT2: MVT::Other), MemVT: WideVT, dl: DL, |
17152 | Ops: {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(), |
17153 | Index, ScaleOp}, |
17154 | MMO: MGN->getMemOperand(), IndexType, ExtTy: ISD::NON_EXTLOAD); |
17155 | SDValue Result = DAG.getBitcast(VT, V: Gather.getValue(R: 0)); |
17156 | return DAG.getMergeValues(Ops: {Result, Gather.getValue(R: 1)}, dl: DL); |
17157 | } |
17158 | break; |
17159 | } |
17160 | case ISD::MSCATTER:{ |
17161 | const auto *MSN = cast<MaskedScatterSDNode>(Val: N); |
17162 | SDValue Index = MSN->getIndex(); |
17163 | SDValue ScaleOp = MSN->getScale(); |
17164 | ISD::MemIndexType IndexType = MSN->getIndexType(); |
17165 | assert(!MSN->isIndexScaled() && |
17166 | "Scaled gather/scatter should not be formed" ); |
17167 | |
17168 | SDLoc DL(N); |
17169 | if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) |
17170 | return DAG.getMaskedScatter( |
17171 | VTs: N->getVTList(), MemVT: MSN->getMemoryVT(), dl: DL, |
17172 | Ops: {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), |
17173 | Index, ScaleOp}, |
17174 | MMO: MSN->getMemOperand(), IndexType, IsTruncating: MSN->isTruncatingStore()); |
17175 | |
17176 | if (narrowIndex(N&: Index, IndexType, DAG)) |
17177 | return DAG.getMaskedScatter( |
17178 | VTs: N->getVTList(), MemVT: MSN->getMemoryVT(), dl: DL, |
17179 | Ops: {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), |
17180 | Index, ScaleOp}, |
17181 | MMO: MSN->getMemOperand(), IndexType, IsTruncating: MSN->isTruncatingStore()); |
17182 | |
17183 | EVT VT = MSN->getValue()->getValueType(ResNo: 0); |
17184 | SmallVector<int> ShuffleMask; |
17185 | if (!MSN->isTruncatingStore() && |
17186 | matchIndexAsShuffle(VT, Index, Mask: MSN->getMask(), ShuffleMask)) { |
17187 | SDValue Shuffle = DAG.getVectorShuffle(VT, dl: DL, N1: MSN->getValue(), |
17188 | N2: DAG.getUNDEF(VT), Mask: ShuffleMask); |
17189 | return DAG.getMaskedStore(Chain: MSN->getChain(), dl: DL, Val: Shuffle, Base: MSN->getBasePtr(), |
17190 | Offset: DAG.getUNDEF(VT: XLenVT), Mask: MSN->getMask(), |
17191 | MemVT: MSN->getMemoryVT(), MMO: MSN->getMemOperand(), |
17192 | AM: ISD::UNINDEXED, IsTruncating: false); |
17193 | } |
17194 | break; |
17195 | } |
17196 | case ISD::VP_GATHER: { |
17197 | const auto *VPGN = cast<VPGatherSDNode>(Val: N); |
17198 | SDValue Index = VPGN->getIndex(); |
17199 | SDValue ScaleOp = VPGN->getScale(); |
17200 | ISD::MemIndexType IndexType = VPGN->getIndexType(); |
17201 | assert(!VPGN->isIndexScaled() && |
17202 | "Scaled gather/scatter should not be formed" ); |
17203 | |
17204 | SDLoc DL(N); |
17205 | if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) |
17206 | return DAG.getGatherVP(VTs: N->getVTList(), VT: VPGN->getMemoryVT(), dl: DL, |
17207 | Ops: {VPGN->getChain(), VPGN->getBasePtr(), Index, |
17208 | ScaleOp, VPGN->getMask(), |
17209 | VPGN->getVectorLength()}, |
17210 | MMO: VPGN->getMemOperand(), IndexType); |
17211 | |
17212 | if (narrowIndex(N&: Index, IndexType, DAG)) |
17213 | return DAG.getGatherVP(VTs: N->getVTList(), VT: VPGN->getMemoryVT(), dl: DL, |
17214 | Ops: {VPGN->getChain(), VPGN->getBasePtr(), Index, |
17215 | ScaleOp, VPGN->getMask(), |
17216 | VPGN->getVectorLength()}, |
17217 | MMO: VPGN->getMemOperand(), IndexType); |
17218 | |
17219 | break; |
17220 | } |
17221 | case ISD::VP_SCATTER: { |
17222 | const auto *VPSN = cast<VPScatterSDNode>(Val: N); |
17223 | SDValue Index = VPSN->getIndex(); |
17224 | SDValue ScaleOp = VPSN->getScale(); |
17225 | ISD::MemIndexType IndexType = VPSN->getIndexType(); |
17226 | assert(!VPSN->isIndexScaled() && |
17227 | "Scaled gather/scatter should not be formed" ); |
17228 | |
17229 | SDLoc DL(N); |
17230 | if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) |
17231 | return DAG.getScatterVP(VTs: N->getVTList(), VT: VPSN->getMemoryVT(), dl: DL, |
17232 | Ops: {VPSN->getChain(), VPSN->getValue(), |
17233 | VPSN->getBasePtr(), Index, ScaleOp, |
17234 | VPSN->getMask(), VPSN->getVectorLength()}, |
17235 | MMO: VPSN->getMemOperand(), IndexType); |
17236 | |
17237 | if (narrowIndex(N&: Index, IndexType, DAG)) |
17238 | return DAG.getScatterVP(VTs: N->getVTList(), VT: VPSN->getMemoryVT(), dl: DL, |
17239 | Ops: {VPSN->getChain(), VPSN->getValue(), |
17240 | VPSN->getBasePtr(), Index, ScaleOp, |
17241 | VPSN->getMask(), VPSN->getVectorLength()}, |
17242 | MMO: VPSN->getMemOperand(), IndexType); |
17243 | break; |
17244 | } |
17245 | case RISCVISD::SHL_VL: |
17246 | if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) |
17247 | return V; |
17248 | [[fallthrough]]; |
17249 | case RISCVISD::SRA_VL: |
17250 | case RISCVISD::SRL_VL: { |
17251 | SDValue ShAmt = N->getOperand(Num: 1); |
17252 | if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { |
17253 | // We don't need the upper 32 bits of a 64-bit element for a shift amount. |
17254 | SDLoc DL(N); |
17255 | SDValue VL = N->getOperand(Num: 4); |
17256 | EVT VT = N->getValueType(ResNo: 0); |
17257 | ShAmt = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: DAG.getUNDEF(VT), |
17258 | N2: ShAmt.getOperand(i: 1), N3: VL); |
17259 | return DAG.getNode(Opcode: N->getOpcode(), DL, VT, N1: N->getOperand(Num: 0), N2: ShAmt, |
17260 | N3: N->getOperand(Num: 2), N4: N->getOperand(Num: 3), N5: N->getOperand(Num: 4)); |
17261 | } |
17262 | break; |
17263 | } |
17264 | case ISD::SRA: |
17265 | if (SDValue V = performSRACombine(N, DAG, Subtarget)) |
17266 | return V; |
17267 | [[fallthrough]]; |
17268 | case ISD::SRL: |
17269 | case ISD::SHL: { |
17270 | if (N->getOpcode() == ISD::SHL) { |
17271 | if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) |
17272 | return V; |
17273 | } |
17274 | SDValue ShAmt = N->getOperand(Num: 1); |
17275 | if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { |
17276 | // We don't need the upper 32 bits of a 64-bit element for a shift amount. |
17277 | SDLoc DL(N); |
17278 | EVT VT = N->getValueType(ResNo: 0); |
17279 | ShAmt = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: DAG.getUNDEF(VT), |
17280 | N2: ShAmt.getOperand(i: 1), |
17281 | N3: DAG.getRegister(Reg: RISCV::X0, VT: Subtarget.getXLenVT())); |
17282 | return DAG.getNode(Opcode: N->getOpcode(), DL, VT, N1: N->getOperand(Num: 0), N2: ShAmt); |
17283 | } |
17284 | break; |
17285 | } |
17286 | case RISCVISD::ADD_VL: |
17287 | if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) |
17288 | return V; |
17289 | return combineToVWMACC(N, DAG, Subtarget); |
17290 | case RISCVISD::VWADD_W_VL: |
17291 | case RISCVISD::VWADDU_W_VL: |
17292 | case RISCVISD::VWSUB_W_VL: |
17293 | case RISCVISD::VWSUBU_W_VL: |
17294 | return performVWADDSUBW_VLCombine(N, DCI, Subtarget); |
17295 | case RISCVISD::SUB_VL: |
17296 | case RISCVISD::MUL_VL: |
17297 | return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget); |
17298 | case RISCVISD::VFMADD_VL: |
17299 | case RISCVISD::VFNMADD_VL: |
17300 | case RISCVISD::VFMSUB_VL: |
17301 | case RISCVISD::VFNMSUB_VL: |
17302 | case RISCVISD::STRICT_VFMADD_VL: |
17303 | case RISCVISD::STRICT_VFNMADD_VL: |
17304 | case RISCVISD::STRICT_VFMSUB_VL: |
17305 | case RISCVISD::STRICT_VFNMSUB_VL: |
17306 | return performVFMADD_VLCombine(N, DAG, Subtarget); |
17307 | case RISCVISD::FADD_VL: |
17308 | case RISCVISD::FSUB_VL: |
17309 | case RISCVISD::FMUL_VL: |
17310 | case RISCVISD::VFWADD_W_VL: |
17311 | case RISCVISD::VFWSUB_W_VL: { |
17312 | if (N->getValueType(ResNo: 0).getVectorElementType() == MVT::f32 && |
17313 | !Subtarget.hasVInstructionsF16()) |
17314 | return SDValue(); |
17315 | return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget); |
17316 | } |
17317 | case ISD::LOAD: |
17318 | case ISD::STORE: { |
17319 | if (DCI.isAfterLegalizeDAG()) |
17320 | if (SDValue V = performMemPairCombine(N, DCI)) |
17321 | return V; |
17322 | |
17323 | if (N->getOpcode() != ISD::STORE) |
17324 | break; |
17325 | |
17326 | auto *Store = cast<StoreSDNode>(Val: N); |
17327 | SDValue Chain = Store->getChain(); |
17328 | EVT MemVT = Store->getMemoryVT(); |
17329 | SDValue Val = Store->getValue(); |
17330 | SDLoc DL(N); |
17331 | |
17332 | bool IsScalarizable = |
17333 | MemVT.isFixedLengthVector() && ISD::isNormalStore(N: Store) && |
17334 | Store->isSimple() && |
17335 | MemVT.getVectorElementType().bitsLE(VT: Subtarget.getXLenVT()) && |
17336 | isPowerOf2_64(Value: MemVT.getSizeInBits()) && |
17337 | MemVT.getSizeInBits() <= Subtarget.getXLen(); |
17338 | |
17339 | // If sufficiently aligned we can scalarize stores of constant vectors of |
17340 | // any power-of-two size up to XLen bits, provided that they aren't too |
17341 | // expensive to materialize. |
17342 | // vsetivli zero, 2, e8, m1, ta, ma |
17343 | // vmv.v.i v8, 4 |
17344 | // vse64.v v8, (a0) |
17345 | // -> |
17346 | // li a1, 1028 |
17347 | // sh a1, 0(a0) |
17348 | if (DCI.isBeforeLegalize() && IsScalarizable && |
17349 | ISD::isBuildVectorOfConstantSDNodes(N: Val.getNode())) { |
17350 | // Get the constant vector bits |
17351 | APInt NewC(Val.getValueSizeInBits(), 0); |
17352 | uint64_t EltSize = Val.getScalarValueSizeInBits(); |
17353 | for (unsigned i = 0; i < Val.getNumOperands(); i++) { |
17354 | if (Val.getOperand(i).isUndef()) |
17355 | continue; |
17356 | NewC.insertBits(SubBits: Val.getConstantOperandAPInt(i).trunc(width: EltSize), |
17357 | bitPosition: i * EltSize); |
17358 | } |
17359 | MVT NewVT = MVT::getIntegerVT(BitWidth: MemVT.getSizeInBits()); |
17360 | |
17361 | if (RISCVMatInt::getIntMatCost(Val: NewC, Size: Subtarget.getXLen(), STI: Subtarget, |
17362 | CompressionCost: true) <= 2 && |
17363 | allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), |
17364 | VT: NewVT, MMO: *Store->getMemOperand())) { |
17365 | SDValue NewV = DAG.getConstant(Val: NewC, DL, VT: NewVT); |
17366 | return DAG.getStore(Chain, dl: DL, Val: NewV, Ptr: Store->getBasePtr(), |
17367 | PtrInfo: Store->getPointerInfo(), Alignment: Store->getOriginalAlign(), |
17368 | MMOFlags: Store->getMemOperand()->getFlags()); |
17369 | } |
17370 | } |
17371 | |
17372 | // Similarly, if sufficiently aligned we can scalarize vector copies, e.g. |
17373 | // vsetivli zero, 2, e16, m1, ta, ma |
17374 | // vle16.v v8, (a0) |
17375 | // vse16.v v8, (a1) |
17376 | if (auto *L = dyn_cast<LoadSDNode>(Val); |
17377 | L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() && |
17378 | L->hasNUsesOfValue(NUses: 1, Value: 0) && L->hasNUsesOfValue(NUses: 1, Value: 1) && |
17379 | Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(N: L) && |
17380 | L->getMemoryVT() == MemVT) { |
17381 | MVT NewVT = MVT::getIntegerVT(BitWidth: MemVT.getSizeInBits()); |
17382 | if (allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), |
17383 | VT: NewVT, MMO: *Store->getMemOperand()) && |
17384 | allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), |
17385 | VT: NewVT, MMO: *L->getMemOperand())) { |
17386 | SDValue NewL = DAG.getLoad(VT: NewVT, dl: DL, Chain: L->getChain(), Ptr: L->getBasePtr(), |
17387 | PtrInfo: L->getPointerInfo(), Alignment: L->getOriginalAlign(), |
17388 | MMOFlags: L->getMemOperand()->getFlags()); |
17389 | return DAG.getStore(Chain, dl: DL, Val: NewL, Ptr: Store->getBasePtr(), |
17390 | PtrInfo: Store->getPointerInfo(), Alignment: Store->getOriginalAlign(), |
17391 | MMOFlags: Store->getMemOperand()->getFlags()); |
17392 | } |
17393 | } |
17394 | |
17395 | // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1. |
17396 | // vfmv.f.s is represented as extract element from 0. Match it late to avoid |
17397 | // any illegal types. |
17398 | if (Val.getOpcode() == RISCVISD::VMV_X_S || |
17399 | (DCI.isAfterLegalizeDAG() && |
17400 | Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
17401 | isNullConstant(V: Val.getOperand(i: 1)))) { |
17402 | SDValue Src = Val.getOperand(i: 0); |
17403 | MVT VecVT = Src.getSimpleValueType(); |
17404 | // VecVT should be scalable and memory VT should match the element type. |
17405 | if (!Store->isIndexed() && VecVT.isScalableVector() && |
17406 | MemVT == VecVT.getVectorElementType()) { |
17407 | SDLoc DL(N); |
17408 | MVT MaskVT = getMaskTypeFor(VecVT); |
17409 | return DAG.getStoreVP( |
17410 | Chain: Store->getChain(), dl: DL, Val: Src, Ptr: Store->getBasePtr(), Offset: Store->getOffset(), |
17411 | Mask: DAG.getConstant(Val: 1, DL, VT: MaskVT), |
17412 | EVL: DAG.getConstant(Val: 1, DL, VT: Subtarget.getXLenVT()), MemVT, |
17413 | MMO: Store->getMemOperand(), AM: Store->getAddressingMode(), |
17414 | IsTruncating: Store->isTruncatingStore(), /*IsCompress*/ IsCompressing: false); |
17415 | } |
17416 | } |
17417 | |
17418 | break; |
17419 | } |
17420 | case ISD::SPLAT_VECTOR: { |
17421 | EVT VT = N->getValueType(ResNo: 0); |
17422 | // Only perform this combine on legal MVT types. |
17423 | if (!isTypeLegal(VT)) |
17424 | break; |
17425 | if (auto Gather = matchSplatAsGather(SplatVal: N->getOperand(Num: 0), VT: VT.getSimpleVT(), DL: N, |
17426 | DAG, Subtarget)) |
17427 | return Gather; |
17428 | break; |
17429 | } |
17430 | case ISD::BUILD_VECTOR: |
17431 | if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, TLI: *this)) |
17432 | return V; |
17433 | break; |
17434 | case ISD::CONCAT_VECTORS: |
17435 | if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, TLI: *this)) |
17436 | return V; |
17437 | break; |
17438 | case ISD::INSERT_VECTOR_ELT: |
17439 | if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, TLI: *this)) |
17440 | return V; |
17441 | break; |
17442 | case RISCVISD::VFMV_V_F_VL: { |
17443 | const MVT VT = N->getSimpleValueType(ResNo: 0); |
17444 | SDValue Passthru = N->getOperand(Num: 0); |
17445 | SDValue Scalar = N->getOperand(Num: 1); |
17446 | SDValue VL = N->getOperand(Num: 2); |
17447 | |
17448 | // If VL is 1, we can use vfmv.s.f. |
17449 | if (isOneConstant(V: VL)) |
17450 | return DAG.getNode(Opcode: RISCVISD::VFMV_S_F_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL); |
17451 | break; |
17452 | } |
17453 | case RISCVISD::VMV_V_X_VL: { |
17454 | const MVT VT = N->getSimpleValueType(ResNo: 0); |
17455 | SDValue Passthru = N->getOperand(Num: 0); |
17456 | SDValue Scalar = N->getOperand(Num: 1); |
17457 | SDValue VL = N->getOperand(Num: 2); |
17458 | |
17459 | // Tail agnostic VMV.V.X only demands the vector element bitwidth from the |
17460 | // scalar input. |
17461 | unsigned ScalarSize = Scalar.getValueSizeInBits(); |
17462 | unsigned EltWidth = VT.getScalarSizeInBits(); |
17463 | if (ScalarSize > EltWidth && Passthru.isUndef()) |
17464 | if (SimplifyDemandedLowBitsHelper(1, EltWidth)) |
17465 | return SDValue(N, 0); |
17466 | |
17467 | // If VL is 1 and the scalar value won't benefit from immediate, we can |
17468 | // use vmv.s.x. |
17469 | ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val&: Scalar); |
17470 | if (isOneConstant(V: VL) && |
17471 | (!Const || Const->isZero() || |
17472 | !Const->getAPIntValue().sextOrTrunc(width: EltWidth).isSignedIntN(N: 5))) |
17473 | return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL); |
17474 | |
17475 | break; |
17476 | } |
17477 | case RISCVISD::VFMV_S_F_VL: { |
17478 | SDValue Src = N->getOperand(Num: 1); |
17479 | // Try to remove vector->scalar->vector if the scalar->vector is inserting |
17480 | // into an undef vector. |
17481 | // TODO: Could use a vslide or vmv.v.v for non-undef. |
17482 | if (N->getOperand(Num: 0).isUndef() && |
17483 | Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
17484 | isNullConstant(V: Src.getOperand(i: 1)) && |
17485 | Src.getOperand(i: 0).getValueType().isScalableVector()) { |
17486 | EVT VT = N->getValueType(ResNo: 0); |
17487 | EVT SrcVT = Src.getOperand(i: 0).getValueType(); |
17488 | assert(SrcVT.getVectorElementType() == VT.getVectorElementType()); |
17489 | // Widths match, just return the original vector. |
17490 | if (SrcVT == VT) |
17491 | return Src.getOperand(i: 0); |
17492 | // TODO: Use insert_subvector/extract_subvector to change widen/narrow? |
17493 | } |
17494 | [[fallthrough]]; |
17495 | } |
17496 | case RISCVISD::VMV_S_X_VL: { |
17497 | const MVT VT = N->getSimpleValueType(ResNo: 0); |
17498 | SDValue Passthru = N->getOperand(Num: 0); |
17499 | SDValue Scalar = N->getOperand(Num: 1); |
17500 | SDValue VL = N->getOperand(Num: 2); |
17501 | |
17502 | if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() && |
17503 | Scalar.getOperand(i: 0).getValueType() == N->getValueType(ResNo: 0)) |
17504 | return Scalar.getOperand(i: 0); |
17505 | |
17506 | // Use M1 or smaller to avoid over constraining register allocation |
17507 | const MVT M1VT = getLMUL1VT(VT); |
17508 | if (M1VT.bitsLT(VT)) { |
17509 | SDValue M1Passthru = |
17510 | DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: M1VT, N1: Passthru, |
17511 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
17512 | SDValue Result = |
17513 | DAG.getNode(Opcode: N->getOpcode(), DL, VT: M1VT, N1: M1Passthru, N2: Scalar, N3: VL); |
17514 | Result = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: Passthru, N2: Result, |
17515 | N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
17516 | return Result; |
17517 | } |
17518 | |
17519 | // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or |
17520 | // higher would involve overly constraining the register allocator for |
17521 | // no purpose. |
17522 | if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val&: Scalar); |
17523 | Const && !Const->isZero() && isInt<5>(x: Const->getSExtValue()) && |
17524 | VT.bitsLE(VT: getLMUL1VT(VT)) && Passthru.isUndef()) |
17525 | return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL); |
17526 | |
17527 | break; |
17528 | } |
17529 | case RISCVISD::VMV_X_S: { |
17530 | SDValue Vec = N->getOperand(Num: 0); |
17531 | MVT VecVT = N->getOperand(Num: 0).getSimpleValueType(); |
17532 | const MVT M1VT = getLMUL1VT(VT: VecVT); |
17533 | if (M1VT.bitsLT(VT: VecVT)) { |
17534 | Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: M1VT, N1: Vec, |
17535 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
17536 | return DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: N->getSimpleValueType(ResNo: 0), Operand: Vec); |
17537 | } |
17538 | break; |
17539 | } |
17540 | case ISD::INTRINSIC_VOID: |
17541 | case ISD::INTRINSIC_W_CHAIN: |
17542 | case ISD::INTRINSIC_WO_CHAIN: { |
17543 | unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; |
17544 | unsigned IntNo = N->getConstantOperandVal(Num: IntOpNo); |
17545 | switch (IntNo) { |
17546 | // By default we do not combine any intrinsic. |
17547 | default: |
17548 | return SDValue(); |
17549 | case Intrinsic::riscv_masked_strided_load: { |
17550 | MVT VT = N->getSimpleValueType(ResNo: 0); |
17551 | auto *Load = cast<MemIntrinsicSDNode>(Val: N); |
17552 | SDValue PassThru = N->getOperand(Num: 2); |
17553 | SDValue Base = N->getOperand(Num: 3); |
17554 | SDValue Stride = N->getOperand(Num: 4); |
17555 | SDValue Mask = N->getOperand(Num: 5); |
17556 | |
17557 | // If the stride is equal to the element size in bytes, we can use |
17558 | // a masked.load. |
17559 | const unsigned ElementSize = VT.getScalarStoreSize(); |
17560 | if (auto *StrideC = dyn_cast<ConstantSDNode>(Val&: Stride); |
17561 | StrideC && StrideC->getZExtValue() == ElementSize) |
17562 | return DAG.getMaskedLoad(VT, dl: DL, Chain: Load->getChain(), Base, |
17563 | Offset: DAG.getUNDEF(VT: XLenVT), Mask, Src0: PassThru, |
17564 | MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand(), |
17565 | AM: ISD::UNINDEXED, ISD::NON_EXTLOAD); |
17566 | return SDValue(); |
17567 | } |
17568 | case Intrinsic::riscv_masked_strided_store: { |
17569 | auto *Store = cast<MemIntrinsicSDNode>(Val: N); |
17570 | SDValue Value = N->getOperand(Num: 2); |
17571 | SDValue Base = N->getOperand(Num: 3); |
17572 | SDValue Stride = N->getOperand(Num: 4); |
17573 | SDValue Mask = N->getOperand(Num: 5); |
17574 | |
17575 | // If the stride is equal to the element size in bytes, we can use |
17576 | // a masked.store. |
17577 | const unsigned ElementSize = Value.getValueType().getScalarStoreSize(); |
17578 | if (auto *StrideC = dyn_cast<ConstantSDNode>(Val&: Stride); |
17579 | StrideC && StrideC->getZExtValue() == ElementSize) |
17580 | return DAG.getMaskedStore(Chain: Store->getChain(), dl: DL, Val: Value, Base, |
17581 | Offset: DAG.getUNDEF(VT: XLenVT), Mask, |
17582 | MemVT: Value.getValueType(), MMO: Store->getMemOperand(), |
17583 | AM: ISD::UNINDEXED, IsTruncating: false); |
17584 | return SDValue(); |
17585 | } |
17586 | case Intrinsic::riscv_vcpop: |
17587 | case Intrinsic::riscv_vcpop_mask: |
17588 | case Intrinsic::riscv_vfirst: |
17589 | case Intrinsic::riscv_vfirst_mask: { |
17590 | SDValue VL = N->getOperand(Num: 2); |
17591 | if (IntNo == Intrinsic::riscv_vcpop_mask || |
17592 | IntNo == Intrinsic::riscv_vfirst_mask) |
17593 | VL = N->getOperand(Num: 3); |
17594 | if (!isNullConstant(V: VL)) |
17595 | return SDValue(); |
17596 | // If VL is 0, vcpop -> li 0, vfirst -> li -1. |
17597 | SDLoc DL(N); |
17598 | EVT VT = N->getValueType(ResNo: 0); |
17599 | if (IntNo == Intrinsic::riscv_vfirst || |
17600 | IntNo == Intrinsic::riscv_vfirst_mask) |
17601 | return DAG.getConstant(Val: -1, DL, VT); |
17602 | return DAG.getConstant(Val: 0, DL, VT); |
17603 | } |
17604 | } |
17605 | } |
17606 | case ISD::BITCAST: { |
17607 | assert(Subtarget.useRVVForFixedLengthVectors()); |
17608 | SDValue N0 = N->getOperand(Num: 0); |
17609 | EVT VT = N->getValueType(ResNo: 0); |
17610 | EVT SrcVT = N0.getValueType(); |
17611 | // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer |
17612 | // type, widen both sides to avoid a trip through memory. |
17613 | if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) && |
17614 | VT.isScalarInteger()) { |
17615 | unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); |
17616 | SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT: SrcVT)); |
17617 | Ops[0] = N0; |
17618 | SDLoc DL(N); |
17619 | N0 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: MVT::v8i1, Ops); |
17620 | N0 = DAG.getBitcast(VT: MVT::i8, V: N0); |
17621 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: N0); |
17622 | } |
17623 | |
17624 | return SDValue(); |
17625 | } |
17626 | } |
17627 | |
17628 | return SDValue(); |
17629 | } |
17630 | |
17631 | bool RISCVTargetLowering::shouldTransformSignedTruncationCheck( |
17632 | EVT XVT, unsigned KeptBits) const { |
17633 | // For vectors, we don't have a preference.. |
17634 | if (XVT.isVector()) |
17635 | return false; |
17636 | |
17637 | if (XVT != MVT::i32 && XVT != MVT::i64) |
17638 | return false; |
17639 | |
17640 | // We can use sext.w for RV64 or an srai 31 on RV32. |
17641 | if (KeptBits == 32 || KeptBits == 64) |
17642 | return true; |
17643 | |
17644 | // With Zbb we can use sext.h/sext.b. |
17645 | return Subtarget.hasStdExtZbb() && |
17646 | ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) || |
17647 | KeptBits == 16); |
17648 | } |
17649 | |
17650 | bool RISCVTargetLowering::isDesirableToCommuteWithShift( |
17651 | const SDNode *N, CombineLevel Level) const { |
17652 | assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || |
17653 | N->getOpcode() == ISD::SRL) && |
17654 | "Expected shift op" ); |
17655 | |
17656 | // The following folds are only desirable if `(OP _, c1 << c2)` can be |
17657 | // materialised in fewer instructions than `(OP _, c1)`: |
17658 | // |
17659 | // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) |
17660 | // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) |
17661 | SDValue N0 = N->getOperand(Num: 0); |
17662 | EVT Ty = N0.getValueType(); |
17663 | if (Ty.isScalarInteger() && |
17664 | (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { |
17665 | auto *C1 = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1)); |
17666 | auto *C2 = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)); |
17667 | if (C1 && C2) { |
17668 | const APInt &C1Int = C1->getAPIntValue(); |
17669 | APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); |
17670 | |
17671 | // We can materialise `c1 << c2` into an add immediate, so it's "free", |
17672 | // and the combine should happen, to potentially allow further combines |
17673 | // later. |
17674 | if (ShiftedC1Int.getSignificantBits() <= 64 && |
17675 | isLegalAddImmediate(Imm: ShiftedC1Int.getSExtValue())) |
17676 | return true; |
17677 | |
17678 | // We can materialise `c1` in an add immediate, so it's "free", and the |
17679 | // combine should be prevented. |
17680 | if (C1Int.getSignificantBits() <= 64 && |
17681 | isLegalAddImmediate(Imm: C1Int.getSExtValue())) |
17682 | return false; |
17683 | |
17684 | // Neither constant will fit into an immediate, so find materialisation |
17685 | // costs. |
17686 | int C1Cost = |
17687 | RISCVMatInt::getIntMatCost(Val: C1Int, Size: Ty.getSizeInBits(), STI: Subtarget, |
17688 | /*CompressionCost*/ true); |
17689 | int ShiftedC1Cost = RISCVMatInt::getIntMatCost( |
17690 | Val: ShiftedC1Int, Size: Ty.getSizeInBits(), STI: Subtarget, |
17691 | /*CompressionCost*/ true); |
17692 | |
17693 | // Materialising `c1` is cheaper than materialising `c1 << c2`, so the |
17694 | // combine should be prevented. |
17695 | if (C1Cost < ShiftedC1Cost) |
17696 | return false; |
17697 | } |
17698 | } |
17699 | return true; |
17700 | } |
17701 | |
17702 | bool RISCVTargetLowering::targetShrinkDemandedConstant( |
17703 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
17704 | TargetLoweringOpt &TLO) const { |
17705 | // Delay this optimization as late as possible. |
17706 | if (!TLO.LegalOps) |
17707 | return false; |
17708 | |
17709 | EVT VT = Op.getValueType(); |
17710 | if (VT.isVector()) |
17711 | return false; |
17712 | |
17713 | unsigned Opcode = Op.getOpcode(); |
17714 | if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR) |
17715 | return false; |
17716 | |
17717 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1)); |
17718 | if (!C) |
17719 | return false; |
17720 | |
17721 | const APInt &Mask = C->getAPIntValue(); |
17722 | |
17723 | // Clear all non-demanded bits initially. |
17724 | APInt ShrunkMask = Mask & DemandedBits; |
17725 | |
17726 | // Try to make a smaller immediate by setting undemanded bits. |
17727 | |
17728 | APInt ExpandedMask = Mask | ~DemandedBits; |
17729 | |
17730 | auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool { |
17731 | return ShrunkMask.isSubsetOf(RHS: Mask) && Mask.isSubsetOf(RHS: ExpandedMask); |
17732 | }; |
17733 | auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool { |
17734 | if (NewMask == Mask) |
17735 | return true; |
17736 | SDLoc DL(Op); |
17737 | SDValue NewC = TLO.DAG.getConstant(Val: NewMask, DL, VT: Op.getValueType()); |
17738 | SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(), |
17739 | N1: Op.getOperand(i: 0), N2: NewC); |
17740 | return TLO.CombineTo(O: Op, N: NewOp); |
17741 | }; |
17742 | |
17743 | // If the shrunk mask fits in sign extended 12 bits, let the target |
17744 | // independent code apply it. |
17745 | if (ShrunkMask.isSignedIntN(N: 12)) |
17746 | return false; |
17747 | |
17748 | // And has a few special cases for zext. |
17749 | if (Opcode == ISD::AND) { |
17750 | // Preserve (and X, 0xffff), if zext.h exists use zext.h, |
17751 | // otherwise use SLLI + SRLI. |
17752 | APInt NewMask = APInt(Mask.getBitWidth(), 0xffff); |
17753 | if (IsLegalMask(NewMask)) |
17754 | return UseMask(NewMask); |
17755 | |
17756 | // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern. |
17757 | if (VT == MVT::i64) { |
17758 | APInt NewMask = APInt(64, 0xffffffff); |
17759 | if (IsLegalMask(NewMask)) |
17760 | return UseMask(NewMask); |
17761 | } |
17762 | } |
17763 | |
17764 | // For the remaining optimizations, we need to be able to make a negative |
17765 | // number through a combination of mask and undemanded bits. |
17766 | if (!ExpandedMask.isNegative()) |
17767 | return false; |
17768 | |
17769 | // What is the fewest number of bits we need to represent the negative number. |
17770 | unsigned MinSignedBits = ExpandedMask.getSignificantBits(); |
17771 | |
17772 | // Try to make a 12 bit negative immediate. If that fails try to make a 32 |
17773 | // bit negative immediate unless the shrunk immediate already fits in 32 bits. |
17774 | // If we can't create a simm12, we shouldn't change opaque constants. |
17775 | APInt NewMask = ShrunkMask; |
17776 | if (MinSignedBits <= 12) |
17777 | NewMask.setBitsFrom(11); |
17778 | else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(N: 32)) |
17779 | NewMask.setBitsFrom(31); |
17780 | else |
17781 | return false; |
17782 | |
17783 | // Check that our new mask is a subset of the demanded mask. |
17784 | assert(IsLegalMask(NewMask)); |
17785 | return UseMask(NewMask); |
17786 | } |
17787 | |
17788 | static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) { |
17789 | static const uint64_t GREVMasks[] = { |
17790 | 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, |
17791 | 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; |
17792 | |
17793 | for (unsigned Stage = 0; Stage != 6; ++Stage) { |
17794 | unsigned Shift = 1 << Stage; |
17795 | if (ShAmt & Shift) { |
17796 | uint64_t Mask = GREVMasks[Stage]; |
17797 | uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask); |
17798 | if (IsGORC) |
17799 | Res |= x; |
17800 | x = Res; |
17801 | } |
17802 | } |
17803 | |
17804 | return x; |
17805 | } |
17806 | |
17807 | void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, |
17808 | KnownBits &Known, |
17809 | const APInt &DemandedElts, |
17810 | const SelectionDAG &DAG, |
17811 | unsigned Depth) const { |
17812 | unsigned BitWidth = Known.getBitWidth(); |
17813 | unsigned Opc = Op.getOpcode(); |
17814 | assert((Opc >= ISD::BUILTIN_OP_END || |
17815 | Opc == ISD::INTRINSIC_WO_CHAIN || |
17816 | Opc == ISD::INTRINSIC_W_CHAIN || |
17817 | Opc == ISD::INTRINSIC_VOID) && |
17818 | "Should use MaskedValueIsZero if you don't know whether Op" |
17819 | " is a target node!" ); |
17820 | |
17821 | Known.resetAll(); |
17822 | switch (Opc) { |
17823 | default: break; |
17824 | case RISCVISD::SELECT_CC: { |
17825 | Known = DAG.computeKnownBits(Op: Op.getOperand(i: 4), Depth: Depth + 1); |
17826 | // If we don't know any bits, early out. |
17827 | if (Known.isUnknown()) |
17828 | break; |
17829 | KnownBits Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 3), Depth: Depth + 1); |
17830 | |
17831 | // Only known if known in both the LHS and RHS. |
17832 | Known = Known.intersectWith(RHS: Known2); |
17833 | break; |
17834 | } |
17835 | case RISCVISD::CZERO_EQZ: |
17836 | case RISCVISD::CZERO_NEZ: |
17837 | Known = DAG.computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
17838 | // Result is either all zero or operand 0. We can propagate zeros, but not |
17839 | // ones. |
17840 | Known.One.clearAllBits(); |
17841 | break; |
17842 | case RISCVISD::REMUW: { |
17843 | KnownBits Known2; |
17844 | Known = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
17845 | Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
17846 | // We only care about the lower 32 bits. |
17847 | Known = KnownBits::urem(LHS: Known.trunc(BitWidth: 32), RHS: Known2.trunc(BitWidth: 32)); |
17848 | // Restore the original width by sign extending. |
17849 | Known = Known.sext(BitWidth); |
17850 | break; |
17851 | } |
17852 | case RISCVISD::DIVUW: { |
17853 | KnownBits Known2; |
17854 | Known = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
17855 | Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
17856 | // We only care about the lower 32 bits. |
17857 | Known = KnownBits::udiv(LHS: Known.trunc(BitWidth: 32), RHS: Known2.trunc(BitWidth: 32)); |
17858 | // Restore the original width by sign extending. |
17859 | Known = Known.sext(BitWidth); |
17860 | break; |
17861 | } |
17862 | case RISCVISD::SLLW: { |
17863 | KnownBits Known2; |
17864 | Known = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
17865 | Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1); |
17866 | Known = KnownBits::shl(LHS: Known.trunc(BitWidth: 32), RHS: Known2.trunc(BitWidth: 5).zext(BitWidth: 32)); |
17867 | // Restore the original width by sign extending. |
17868 | Known = Known.sext(BitWidth); |
17869 | break; |
17870 | } |
17871 | case RISCVISD::CTZW: { |
17872 | KnownBits Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
17873 | unsigned PossibleTZ = Known2.trunc(BitWidth: 32).countMaxTrailingZeros(); |
17874 | unsigned LowBits = llvm::bit_width(Value: PossibleTZ); |
17875 | Known.Zero.setBitsFrom(LowBits); |
17876 | break; |
17877 | } |
17878 | case RISCVISD::CLZW: { |
17879 | KnownBits Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
17880 | unsigned PossibleLZ = Known2.trunc(BitWidth: 32).countMaxLeadingZeros(); |
17881 | unsigned LowBits = llvm::bit_width(Value: PossibleLZ); |
17882 | Known.Zero.setBitsFrom(LowBits); |
17883 | break; |
17884 | } |
17885 | case RISCVISD::BREV8: |
17886 | case RISCVISD::ORC_B: { |
17887 | // FIXME: This is based on the non-ratified Zbp GREV and GORC where a |
17888 | // control value of 7 is equivalent to brev8 and orc.b. |
17889 | Known = DAG.computeKnownBits(Op: Op.getOperand(i: 0), Depth: Depth + 1); |
17890 | bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B; |
17891 | // To compute zeros, we need to invert the value and invert it back after. |
17892 | Known.Zero = |
17893 | ~computeGREVOrGORC(x: ~Known.Zero.getZExtValue(), ShAmt: 7, IsGORC); |
17894 | Known.One = computeGREVOrGORC(x: Known.One.getZExtValue(), ShAmt: 7, IsGORC); |
17895 | break; |
17896 | } |
17897 | case RISCVISD::READ_VLENB: { |
17898 | // We can use the minimum and maximum VLEN values to bound VLENB. We |
17899 | // know VLEN must be a power of two. |
17900 | const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8; |
17901 | const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8; |
17902 | assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?" ); |
17903 | Known.Zero.setLowBits(Log2_32(Value: MinVLenB)); |
17904 | Known.Zero.setBitsFrom(Log2_32(Value: MaxVLenB)+1); |
17905 | if (MaxVLenB == MinVLenB) |
17906 | Known.One.setBit(Log2_32(Value: MinVLenB)); |
17907 | break; |
17908 | } |
17909 | case RISCVISD::FCLASS: { |
17910 | // fclass will only set one of the low 10 bits. |
17911 | Known.Zero.setBitsFrom(10); |
17912 | break; |
17913 | } |
17914 | case ISD::INTRINSIC_W_CHAIN: |
17915 | case ISD::INTRINSIC_WO_CHAIN: { |
17916 | unsigned IntNo = |
17917 | Op.getConstantOperandVal(i: Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1); |
17918 | switch (IntNo) { |
17919 | default: |
17920 | // We can't do anything for most intrinsics. |
17921 | break; |
17922 | case Intrinsic::riscv_vsetvli: |
17923 | case Intrinsic::riscv_vsetvlimax: { |
17924 | bool HasAVL = IntNo == Intrinsic::riscv_vsetvli; |
17925 | unsigned VSEW = Op.getConstantOperandVal(i: HasAVL + 1); |
17926 | RISCVII::VLMUL VLMUL = |
17927 | static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(i: HasAVL + 2)); |
17928 | unsigned SEW = RISCVVType::decodeVSEW(VSEW); |
17929 | auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL); |
17930 | uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW; |
17931 | MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul; |
17932 | |
17933 | // Result of vsetvli must be not larger than AVL. |
17934 | if (HasAVL && isa<ConstantSDNode>(Val: Op.getOperand(i: 1))) |
17935 | MaxVL = std::min(a: MaxVL, b: Op.getConstantOperandVal(i: 1)); |
17936 | |
17937 | unsigned KnownZeroFirstBit = Log2_32(Value: MaxVL) + 1; |
17938 | if (BitWidth > KnownZeroFirstBit) |
17939 | Known.Zero.setBitsFrom(KnownZeroFirstBit); |
17940 | break; |
17941 | } |
17942 | } |
17943 | break; |
17944 | } |
17945 | } |
17946 | } |
17947 | |
17948 | unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( |
17949 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
17950 | unsigned Depth) const { |
17951 | switch (Op.getOpcode()) { |
17952 | default: |
17953 | break; |
17954 | case RISCVISD::SELECT_CC: { |
17955 | unsigned Tmp = |
17956 | DAG.ComputeNumSignBits(Op: Op.getOperand(i: 3), DemandedElts, Depth: Depth + 1); |
17957 | if (Tmp == 1) return 1; // Early out. |
17958 | unsigned Tmp2 = |
17959 | DAG.ComputeNumSignBits(Op: Op.getOperand(i: 4), DemandedElts, Depth: Depth + 1); |
17960 | return std::min(a: Tmp, b: Tmp2); |
17961 | } |
17962 | case RISCVISD::CZERO_EQZ: |
17963 | case RISCVISD::CZERO_NEZ: |
17964 | // Output is either all zero or operand 0. We can propagate sign bit count |
17965 | // from operand 0. |
17966 | return DAG.ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
17967 | case RISCVISD::ABSW: { |
17968 | // We expand this at isel to negw+max. The result will have 33 sign bits |
17969 | // if the input has at least 33 sign bits. |
17970 | unsigned Tmp = |
17971 | DAG.ComputeNumSignBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1); |
17972 | if (Tmp < 33) return 1; |
17973 | return 33; |
17974 | } |
17975 | case RISCVISD::SLLW: |
17976 | case RISCVISD::SRAW: |
17977 | case RISCVISD::SRLW: |
17978 | case RISCVISD::DIVW: |
17979 | case RISCVISD::DIVUW: |
17980 | case RISCVISD::REMUW: |
17981 | case RISCVISD::ROLW: |
17982 | case RISCVISD::RORW: |
17983 | case RISCVISD::FCVT_W_RV64: |
17984 | case RISCVISD::FCVT_WU_RV64: |
17985 | case RISCVISD::STRICT_FCVT_W_RV64: |
17986 | case RISCVISD::STRICT_FCVT_WU_RV64: |
17987 | // TODO: As the result is sign-extended, this is conservatively correct. A |
17988 | // more precise answer could be calculated for SRAW depending on known |
17989 | // bits in the shift amount. |
17990 | return 33; |
17991 | case RISCVISD::VMV_X_S: { |
17992 | // The number of sign bits of the scalar result is computed by obtaining the |
17993 | // element type of the input vector operand, subtracting its width from the |
17994 | // XLEN, and then adding one (sign bit within the element type). If the |
17995 | // element type is wider than XLen, the least-significant XLEN bits are |
17996 | // taken. |
17997 | unsigned XLen = Subtarget.getXLen(); |
17998 | unsigned EltBits = Op.getOperand(i: 0).getScalarValueSizeInBits(); |
17999 | if (EltBits <= XLen) |
18000 | return XLen - EltBits + 1; |
18001 | break; |
18002 | } |
18003 | case ISD::INTRINSIC_W_CHAIN: { |
18004 | unsigned IntNo = Op.getConstantOperandVal(i: 1); |
18005 | switch (IntNo) { |
18006 | default: |
18007 | break; |
18008 | case Intrinsic::riscv_masked_atomicrmw_xchg_i64: |
18009 | case Intrinsic::riscv_masked_atomicrmw_add_i64: |
18010 | case Intrinsic::riscv_masked_atomicrmw_sub_i64: |
18011 | case Intrinsic::riscv_masked_atomicrmw_nand_i64: |
18012 | case Intrinsic::riscv_masked_atomicrmw_max_i64: |
18013 | case Intrinsic::riscv_masked_atomicrmw_min_i64: |
18014 | case Intrinsic::riscv_masked_atomicrmw_umax_i64: |
18015 | case Intrinsic::riscv_masked_atomicrmw_umin_i64: |
18016 | case Intrinsic::riscv_masked_cmpxchg_i64: |
18017 | // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated |
18018 | // narrow atomic operation. These are implemented using atomic |
18019 | // operations at the minimum supported atomicrmw/cmpxchg width whose |
18020 | // result is then sign extended to XLEN. With +A, the minimum width is |
18021 | // 32 for both 64 and 32. |
18022 | assert(Subtarget.getXLen() == 64); |
18023 | assert(getMinCmpXchgSizeInBits() == 32); |
18024 | assert(Subtarget.hasStdExtA()); |
18025 | return 33; |
18026 | } |
18027 | break; |
18028 | } |
18029 | } |
18030 | |
18031 | return 1; |
18032 | } |
18033 | |
18034 | bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode( |
18035 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
18036 | bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const { |
18037 | |
18038 | // TODO: Add more target nodes. |
18039 | switch (Op.getOpcode()) { |
18040 | case RISCVISD::SELECT_CC: |
18041 | // Integer select_cc cannot create poison. |
18042 | // TODO: What are the FP poison semantics? |
18043 | // TODO: This instruction blocks poison from the unselected operand, can |
18044 | // we do anything with that? |
18045 | return !Op.getValueType().isInteger(); |
18046 | } |
18047 | return TargetLowering::canCreateUndefOrPoisonForTargetNode( |
18048 | Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth); |
18049 | } |
18050 | |
18051 | const Constant * |
18052 | RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const { |
18053 | assert(Ld && "Unexpected null LoadSDNode" ); |
18054 | if (!ISD::isNormalLoad(N: Ld)) |
18055 | return nullptr; |
18056 | |
18057 | SDValue Ptr = Ld->getBasePtr(); |
18058 | |
18059 | // Only constant pools with no offset are supported. |
18060 | auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * { |
18061 | auto *CNode = dyn_cast<ConstantPoolSDNode>(Val&: Ptr); |
18062 | if (!CNode || CNode->isMachineConstantPoolEntry() || |
18063 | CNode->getOffset() != 0) |
18064 | return nullptr; |
18065 | |
18066 | return CNode; |
18067 | }; |
18068 | |
18069 | // Simple case, LLA. |
18070 | if (Ptr.getOpcode() == RISCVISD::LLA) { |
18071 | auto *CNode = GetSupportedConstantPool(Ptr); |
18072 | if (!CNode || CNode->getTargetFlags() != 0) |
18073 | return nullptr; |
18074 | |
18075 | return CNode->getConstVal(); |
18076 | } |
18077 | |
18078 | // Look for a HI and ADD_LO pair. |
18079 | if (Ptr.getOpcode() != RISCVISD::ADD_LO || |
18080 | Ptr.getOperand(i: 0).getOpcode() != RISCVISD::HI) |
18081 | return nullptr; |
18082 | |
18083 | auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(i: 1)); |
18084 | auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(i: 0).getOperand(i: 0)); |
18085 | |
18086 | if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO || |
18087 | !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI) |
18088 | return nullptr; |
18089 | |
18090 | if (CNodeLo->getConstVal() != CNodeHi->getConstVal()) |
18091 | return nullptr; |
18092 | |
18093 | return CNodeLo->getConstVal(); |
18094 | } |
18095 | |
18096 | static MachineBasicBlock *emitReadCounterWidePseudo(MachineInstr &MI, |
18097 | MachineBasicBlock *BB) { |
18098 | assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction" ); |
18099 | |
18100 | // To read a 64-bit counter CSR on a 32-bit target, we read the two halves. |
18101 | // Should the count have wrapped while it was being read, we need to try |
18102 | // again. |
18103 | // For example: |
18104 | // ``` |
18105 | // read: |
18106 | // csrrs x3, counterh # load high word of counter |
18107 | // csrrs x2, counter # load low word of counter |
18108 | // csrrs x4, counterh # load high word of counter |
18109 | // bne x3, x4, read # check if high word reads match, otherwise try again |
18110 | // ``` |
18111 | |
18112 | MachineFunction &MF = *BB->getParent(); |
18113 | const BasicBlock *LLVMBB = BB->getBasicBlock(); |
18114 | MachineFunction::iterator It = ++BB->getIterator(); |
18115 | |
18116 | MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(BB: LLVMBB); |
18117 | MF.insert(MBBI: It, MBB: LoopMBB); |
18118 | |
18119 | MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(BB: LLVMBB); |
18120 | MF.insert(MBBI: It, MBB: DoneMBB); |
18121 | |
18122 | // Transfer the remainder of BB and its successor edges to DoneMBB. |
18123 | DoneMBB->splice(Where: DoneMBB->begin(), Other: BB, |
18124 | From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end()); |
18125 | DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB); |
18126 | |
18127 | BB->addSuccessor(Succ: LoopMBB); |
18128 | |
18129 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
18130 | Register ReadAgainReg = RegInfo.createVirtualRegister(RegClass: &RISCV::GPRRegClass); |
18131 | Register LoReg = MI.getOperand(i: 0).getReg(); |
18132 | Register HiReg = MI.getOperand(i: 1).getReg(); |
18133 | int64_t LoCounter = MI.getOperand(i: 2).getImm(); |
18134 | int64_t HiCounter = MI.getOperand(i: 3).getImm(); |
18135 | DebugLoc DL = MI.getDebugLoc(); |
18136 | |
18137 | const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); |
18138 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRS), DestReg: HiReg) |
18139 | .addImm(Val: HiCounter) |
18140 | .addReg(RegNo: RISCV::X0); |
18141 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRS), DestReg: LoReg) |
18142 | .addImm(Val: LoCounter) |
18143 | .addReg(RegNo: RISCV::X0); |
18144 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRS), DestReg: ReadAgainReg) |
18145 | .addImm(Val: HiCounter) |
18146 | .addReg(RegNo: RISCV::X0); |
18147 | |
18148 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE)) |
18149 | .addReg(RegNo: HiReg) |
18150 | .addReg(RegNo: ReadAgainReg) |
18151 | .addMBB(MBB: LoopMBB); |
18152 | |
18153 | LoopMBB->addSuccessor(Succ: LoopMBB); |
18154 | LoopMBB->addSuccessor(Succ: DoneMBB); |
18155 | |
18156 | MI.eraseFromParent(); |
18157 | |
18158 | return DoneMBB; |
18159 | } |
18160 | |
18161 | static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, |
18162 | MachineBasicBlock *BB, |
18163 | const RISCVSubtarget &Subtarget) { |
18164 | assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction" ); |
18165 | |
18166 | MachineFunction &MF = *BB->getParent(); |
18167 | DebugLoc DL = MI.getDebugLoc(); |
18168 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
18169 | const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); |
18170 | Register LoReg = MI.getOperand(i: 0).getReg(); |
18171 | Register HiReg = MI.getOperand(i: 1).getReg(); |
18172 | Register SrcReg = MI.getOperand(i: 2).getReg(); |
18173 | |
18174 | const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; |
18175 | int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); |
18176 | |
18177 | TII.storeRegToStackSlot(MBB&: *BB, MI, SrcReg, isKill: MI.getOperand(i: 2).isKill(), FrameIndex: FI, RC: SrcRC, |
18178 | TRI: RI, VReg: Register()); |
18179 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); |
18180 | MachineMemOperand *MMOLo = |
18181 | MF.getMachineMemOperand(PtrInfo: MPI, F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(8)); |
18182 | MachineMemOperand *MMOHi = MF.getMachineMemOperand( |
18183 | PtrInfo: MPI.getWithOffset(O: 4), F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(8)); |
18184 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::LW), DestReg: LoReg) |
18185 | .addFrameIndex(Idx: FI) |
18186 | .addImm(Val: 0) |
18187 | .addMemOperand(MMO: MMOLo); |
18188 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::LW), DestReg: HiReg) |
18189 | .addFrameIndex(Idx: FI) |
18190 | .addImm(Val: 4) |
18191 | .addMemOperand(MMO: MMOHi); |
18192 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
18193 | return BB; |
18194 | } |
18195 | |
18196 | static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, |
18197 | MachineBasicBlock *BB, |
18198 | const RISCVSubtarget &Subtarget) { |
18199 | assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && |
18200 | "Unexpected instruction" ); |
18201 | |
18202 | MachineFunction &MF = *BB->getParent(); |
18203 | DebugLoc DL = MI.getDebugLoc(); |
18204 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
18205 | const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); |
18206 | Register DstReg = MI.getOperand(i: 0).getReg(); |
18207 | Register LoReg = MI.getOperand(i: 1).getReg(); |
18208 | Register HiReg = MI.getOperand(i: 2).getReg(); |
18209 | |
18210 | const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; |
18211 | int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); |
18212 | |
18213 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); |
18214 | MachineMemOperand *MMOLo = |
18215 | MF.getMachineMemOperand(PtrInfo: MPI, F: MachineMemOperand::MOStore, Size: 4, BaseAlignment: Align(8)); |
18216 | MachineMemOperand *MMOHi = MF.getMachineMemOperand( |
18217 | PtrInfo: MPI.getWithOffset(O: 4), F: MachineMemOperand::MOStore, Size: 4, BaseAlignment: Align(8)); |
18218 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::SW)) |
18219 | .addReg(RegNo: LoReg, flags: getKillRegState(B: MI.getOperand(i: 1).isKill())) |
18220 | .addFrameIndex(Idx: FI) |
18221 | .addImm(Val: 0) |
18222 | .addMemOperand(MMO: MMOLo); |
18223 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::SW)) |
18224 | .addReg(RegNo: HiReg, flags: getKillRegState(B: MI.getOperand(i: 2).isKill())) |
18225 | .addFrameIndex(Idx: FI) |
18226 | .addImm(Val: 4) |
18227 | .addMemOperand(MMO: MMOHi); |
18228 | TII.loadRegFromStackSlot(MBB&: *BB, MI, DestReg: DstReg, FrameIndex: FI, RC: DstRC, TRI: RI, VReg: Register()); |
18229 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
18230 | return BB; |
18231 | } |
18232 | |
18233 | static bool isSelectPseudo(MachineInstr &MI) { |
18234 | switch (MI.getOpcode()) { |
18235 | default: |
18236 | return false; |
18237 | case RISCV::Select_GPR_Using_CC_GPR: |
18238 | case RISCV::Select_GPR_Using_CC_Imm: |
18239 | case RISCV::Select_FPR16_Using_CC_GPR: |
18240 | case RISCV::Select_FPR16INX_Using_CC_GPR: |
18241 | case RISCV::Select_FPR32_Using_CC_GPR: |
18242 | case RISCV::Select_FPR32INX_Using_CC_GPR: |
18243 | case RISCV::Select_FPR64_Using_CC_GPR: |
18244 | case RISCV::Select_FPR64INX_Using_CC_GPR: |
18245 | case RISCV::Select_FPR64IN32X_Using_CC_GPR: |
18246 | return true; |
18247 | } |
18248 | } |
18249 | |
18250 | static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, |
18251 | unsigned RelOpcode, unsigned EqOpcode, |
18252 | const RISCVSubtarget &Subtarget) { |
18253 | DebugLoc DL = MI.getDebugLoc(); |
18254 | Register DstReg = MI.getOperand(i: 0).getReg(); |
18255 | Register Src1Reg = MI.getOperand(i: 1).getReg(); |
18256 | Register Src2Reg = MI.getOperand(i: 2).getReg(); |
18257 | MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); |
18258 | Register SavedFFlags = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass); |
18259 | const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); |
18260 | |
18261 | // Save the current FFLAGS. |
18262 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::ReadFFLAGS), DestReg: SavedFFlags); |
18263 | |
18264 | auto MIB = BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RelOpcode), DestReg: DstReg) |
18265 | .addReg(RegNo: Src1Reg) |
18266 | .addReg(RegNo: Src2Reg); |
18267 | if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept)) |
18268 | MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); |
18269 | |
18270 | // Restore the FFLAGS. |
18271 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::WriteFFLAGS)) |
18272 | .addReg(RegNo: SavedFFlags, flags: RegState::Kill); |
18273 | |
18274 | // Issue a dummy FEQ opcode to raise exception for signaling NaNs. |
18275 | auto MIB2 = BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: EqOpcode), DestReg: RISCV::X0) |
18276 | .addReg(RegNo: Src1Reg, flags: getKillRegState(B: MI.getOperand(i: 1).isKill())) |
18277 | .addReg(RegNo: Src2Reg, flags: getKillRegState(B: MI.getOperand(i: 2).isKill())); |
18278 | if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept)) |
18279 | MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept); |
18280 | |
18281 | // Erase the pseudoinstruction. |
18282 | MI.eraseFromParent(); |
18283 | return BB; |
18284 | } |
18285 | |
18286 | static MachineBasicBlock * |
18287 | EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, |
18288 | MachineBasicBlock *ThisMBB, |
18289 | const RISCVSubtarget &Subtarget) { |
18290 | // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5) |
18291 | // Without this, custom-inserter would have generated: |
18292 | // |
18293 | // A |
18294 | // | \ |
18295 | // | B |
18296 | // | / |
18297 | // C |
18298 | // | \ |
18299 | // | D |
18300 | // | / |
18301 | // E |
18302 | // |
18303 | // A: X = ...; Y = ... |
18304 | // B: empty |
18305 | // C: Z = PHI [X, A], [Y, B] |
18306 | // D: empty |
18307 | // E: PHI [X, C], [Z, D] |
18308 | // |
18309 | // If we lower both Select_FPRX_ in a single step, we can instead generate: |
18310 | // |
18311 | // A |
18312 | // | \ |
18313 | // | C |
18314 | // | /| |
18315 | // |/ | |
18316 | // | | |
18317 | // | D |
18318 | // | / |
18319 | // E |
18320 | // |
18321 | // A: X = ...; Y = ... |
18322 | // D: empty |
18323 | // E: PHI [X, A], [X, C], [Y, D] |
18324 | |
18325 | const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); |
18326 | const DebugLoc &DL = First.getDebugLoc(); |
18327 | const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); |
18328 | MachineFunction *F = ThisMBB->getParent(); |
18329 | MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
18330 | MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
18331 | MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
18332 | MachineFunction::iterator It = ++ThisMBB->getIterator(); |
18333 | F->insert(MBBI: It, MBB: FirstMBB); |
18334 | F->insert(MBBI: It, MBB: SecondMBB); |
18335 | F->insert(MBBI: It, MBB: SinkMBB); |
18336 | |
18337 | // Transfer the remainder of ThisMBB and its successor edges to SinkMBB. |
18338 | SinkMBB->splice(Where: SinkMBB->begin(), Other: ThisMBB, |
18339 | From: std::next(x: MachineBasicBlock::iterator(First)), |
18340 | To: ThisMBB->end()); |
18341 | SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: ThisMBB); |
18342 | |
18343 | // Fallthrough block for ThisMBB. |
18344 | ThisMBB->addSuccessor(Succ: FirstMBB); |
18345 | // Fallthrough block for FirstMBB. |
18346 | FirstMBB->addSuccessor(Succ: SecondMBB); |
18347 | ThisMBB->addSuccessor(Succ: SinkMBB); |
18348 | FirstMBB->addSuccessor(Succ: SinkMBB); |
18349 | // This is fallthrough. |
18350 | SecondMBB->addSuccessor(Succ: SinkMBB); |
18351 | |
18352 | auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(i: 3).getImm()); |
18353 | Register FLHS = First.getOperand(i: 1).getReg(); |
18354 | Register FRHS = First.getOperand(i: 2).getReg(); |
18355 | // Insert appropriate branch. |
18356 | BuildMI(BB: FirstMBB, MIMD: DL, MCID: TII.getBrCond(CC: FirstCC)) |
18357 | .addReg(RegNo: FLHS) |
18358 | .addReg(RegNo: FRHS) |
18359 | .addMBB(MBB: SinkMBB); |
18360 | |
18361 | Register SLHS = Second.getOperand(i: 1).getReg(); |
18362 | Register SRHS = Second.getOperand(i: 2).getReg(); |
18363 | Register Op1Reg4 = First.getOperand(i: 4).getReg(); |
18364 | Register Op1Reg5 = First.getOperand(i: 5).getReg(); |
18365 | |
18366 | auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(i: 3).getImm()); |
18367 | // Insert appropriate branch. |
18368 | BuildMI(BB: ThisMBB, MIMD: DL, MCID: TII.getBrCond(CC: SecondCC)) |
18369 | .addReg(RegNo: SLHS) |
18370 | .addReg(RegNo: SRHS) |
18371 | .addMBB(MBB: SinkMBB); |
18372 | |
18373 | Register DestReg = Second.getOperand(i: 0).getReg(); |
18374 | Register Op2Reg4 = Second.getOperand(i: 4).getReg(); |
18375 | BuildMI(BB&: *SinkMBB, I: SinkMBB->begin(), MIMD: DL, MCID: TII.get(Opcode: RISCV::PHI), DestReg) |
18376 | .addReg(RegNo: Op2Reg4) |
18377 | .addMBB(MBB: ThisMBB) |
18378 | .addReg(RegNo: Op1Reg4) |
18379 | .addMBB(MBB: FirstMBB) |
18380 | .addReg(RegNo: Op1Reg5) |
18381 | .addMBB(MBB: SecondMBB); |
18382 | |
18383 | // Now remove the Select_FPRX_s. |
18384 | First.eraseFromParent(); |
18385 | Second.eraseFromParent(); |
18386 | return SinkMBB; |
18387 | } |
18388 | |
18389 | static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, |
18390 | MachineBasicBlock *BB, |
18391 | const RISCVSubtarget &Subtarget) { |
18392 | // To "insert" Select_* instructions, we actually have to insert the triangle |
18393 | // control-flow pattern. The incoming instructions know the destination vreg |
18394 | // to set, the condition code register to branch on, the true/false values to |
18395 | // select between, and the condcode to use to select the appropriate branch. |
18396 | // |
18397 | // We produce the following control flow: |
18398 | // HeadMBB |
18399 | // | \ |
18400 | // | IfFalseMBB |
18401 | // | / |
18402 | // TailMBB |
18403 | // |
18404 | // When we find a sequence of selects we attempt to optimize their emission |
18405 | // by sharing the control flow. Currently we only handle cases where we have |
18406 | // multiple selects with the exact same condition (same LHS, RHS and CC). |
18407 | // The selects may be interleaved with other instructions if the other |
18408 | // instructions meet some requirements we deem safe: |
18409 | // - They are not pseudo instructions. |
18410 | // - They are debug instructions. Otherwise, |
18411 | // - They do not have side-effects, do not access memory and their inputs do |
18412 | // not depend on the results of the select pseudo-instructions. |
18413 | // The TrueV/FalseV operands of the selects cannot depend on the result of |
18414 | // previous selects in the sequence. |
18415 | // These conditions could be further relaxed. See the X86 target for a |
18416 | // related approach and more information. |
18417 | // |
18418 | // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)) |
18419 | // is checked here and handled by a separate function - |
18420 | // EmitLoweredCascadedSelect. |
18421 | |
18422 | auto Next = next_nodbg(It: MI.getIterator(), End: BB->instr_end()); |
18423 | if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && |
18424 | MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) && |
18425 | Next != BB->end() && Next->getOpcode() == MI.getOpcode() && |
18426 | Next->getOperand(i: 5).getReg() == MI.getOperand(i: 0).getReg() && |
18427 | Next->getOperand(i: 5).isKill()) |
18428 | return EmitLoweredCascadedSelect(First&: MI, Second&: *Next, ThisMBB: BB, Subtarget); |
18429 | |
18430 | Register LHS = MI.getOperand(i: 1).getReg(); |
18431 | Register RHS; |
18432 | if (MI.getOperand(i: 2).isReg()) |
18433 | RHS = MI.getOperand(i: 2).getReg(); |
18434 | auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(i: 3).getImm()); |
18435 | |
18436 | SmallVector<MachineInstr *, 4> SelectDebugValues; |
18437 | SmallSet<Register, 4> SelectDests; |
18438 | SelectDests.insert(V: MI.getOperand(i: 0).getReg()); |
18439 | |
18440 | MachineInstr *LastSelectPseudo = &MI; |
18441 | for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); |
18442 | SequenceMBBI != E; ++SequenceMBBI) { |
18443 | if (SequenceMBBI->isDebugInstr()) |
18444 | continue; |
18445 | if (isSelectPseudo(MI&: *SequenceMBBI)) { |
18446 | if (SequenceMBBI->getOperand(i: 1).getReg() != LHS || |
18447 | !SequenceMBBI->getOperand(i: 2).isReg() || |
18448 | SequenceMBBI->getOperand(i: 2).getReg() != RHS || |
18449 | SequenceMBBI->getOperand(i: 3).getImm() != CC || |
18450 | SelectDests.count(V: SequenceMBBI->getOperand(i: 4).getReg()) || |
18451 | SelectDests.count(V: SequenceMBBI->getOperand(i: 5).getReg())) |
18452 | break; |
18453 | LastSelectPseudo = &*SequenceMBBI; |
18454 | SequenceMBBI->collectDebugValues(DbgValues&: SelectDebugValues); |
18455 | SelectDests.insert(V: SequenceMBBI->getOperand(i: 0).getReg()); |
18456 | continue; |
18457 | } |
18458 | if (SequenceMBBI->hasUnmodeledSideEffects() || |
18459 | SequenceMBBI->mayLoadOrStore() || |
18460 | SequenceMBBI->usesCustomInsertionHook()) |
18461 | break; |
18462 | if (llvm::any_of(Range: SequenceMBBI->operands(), P: [&](MachineOperand &MO) { |
18463 | return MO.isReg() && MO.isUse() && SelectDests.count(V: MO.getReg()); |
18464 | })) |
18465 | break; |
18466 | } |
18467 | |
18468 | const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); |
18469 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
18470 | DebugLoc DL = MI.getDebugLoc(); |
18471 | MachineFunction::iterator I = ++BB->getIterator(); |
18472 | |
18473 | MachineBasicBlock *HeadMBB = BB; |
18474 | MachineFunction *F = BB->getParent(); |
18475 | MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
18476 | MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
18477 | |
18478 | F->insert(MBBI: I, MBB: IfFalseMBB); |
18479 | F->insert(MBBI: I, MBB: TailMBB); |
18480 | |
18481 | // Set the call frame size on entry to the new basic blocks. |
18482 | unsigned CallFrameSize = TII.getCallFrameSizeAt(MI&: *LastSelectPseudo); |
18483 | IfFalseMBB->setCallFrameSize(CallFrameSize); |
18484 | TailMBB->setCallFrameSize(CallFrameSize); |
18485 | |
18486 | // Transfer debug instructions associated with the selects to TailMBB. |
18487 | for (MachineInstr *DebugInstr : SelectDebugValues) { |
18488 | TailMBB->push_back(MI: DebugInstr->removeFromParent()); |
18489 | } |
18490 | |
18491 | // Move all instructions after the sequence to TailMBB. |
18492 | TailMBB->splice(Where: TailMBB->end(), Other: HeadMBB, |
18493 | From: std::next(x: LastSelectPseudo->getIterator()), To: HeadMBB->end()); |
18494 | // Update machine-CFG edges by transferring all successors of the current |
18495 | // block to the new block which will contain the Phi nodes for the selects. |
18496 | TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: HeadMBB); |
18497 | // Set the successors for HeadMBB. |
18498 | HeadMBB->addSuccessor(Succ: IfFalseMBB); |
18499 | HeadMBB->addSuccessor(Succ: TailMBB); |
18500 | |
18501 | // Insert appropriate branch. |
18502 | if (MI.getOperand(i: 2).isImm()) |
18503 | BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.getBrCond(CC, Imm: MI.getOperand(i: 2).isImm())) |
18504 | .addReg(RegNo: LHS) |
18505 | .addImm(Val: MI.getOperand(i: 2).getImm()) |
18506 | .addMBB(MBB: TailMBB); |
18507 | else |
18508 | BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.getBrCond(CC)) |
18509 | .addReg(RegNo: LHS) |
18510 | .addReg(RegNo: RHS) |
18511 | .addMBB(MBB: TailMBB); |
18512 | |
18513 | // IfFalseMBB just falls through to TailMBB. |
18514 | IfFalseMBB->addSuccessor(Succ: TailMBB); |
18515 | |
18516 | // Create PHIs for all of the select pseudo-instructions. |
18517 | auto SelectMBBI = MI.getIterator(); |
18518 | auto SelectEnd = std::next(x: LastSelectPseudo->getIterator()); |
18519 | auto InsertionPoint = TailMBB->begin(); |
18520 | while (SelectMBBI != SelectEnd) { |
18521 | auto Next = std::next(x: SelectMBBI); |
18522 | if (isSelectPseudo(MI&: *SelectMBBI)) { |
18523 | // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] |
18524 | BuildMI(BB&: *TailMBB, I: InsertionPoint, MIMD: SelectMBBI->getDebugLoc(), |
18525 | MCID: TII.get(Opcode: RISCV::PHI), DestReg: SelectMBBI->getOperand(i: 0).getReg()) |
18526 | .addReg(RegNo: SelectMBBI->getOperand(i: 4).getReg()) |
18527 | .addMBB(MBB: HeadMBB) |
18528 | .addReg(RegNo: SelectMBBI->getOperand(i: 5).getReg()) |
18529 | .addMBB(MBB: IfFalseMBB); |
18530 | SelectMBBI->eraseFromParent(); |
18531 | } |
18532 | SelectMBBI = Next; |
18533 | } |
18534 | |
18535 | F->getProperties().reset(P: MachineFunctionProperties::Property::NoPHIs); |
18536 | return TailMBB; |
18537 | } |
18538 | |
18539 | // Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW. |
18540 | static const RISCV::RISCVMaskedPseudoInfo * |
18541 | lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) { |
18542 | const RISCVVInversePseudosTable::PseudoInfo *Inverse = |
18543 | RISCVVInversePseudosTable::getBaseInfo(BaseInstr: MCOpcode, VLMul: LMul, SEW); |
18544 | assert(Inverse && "Unexpected LMUL and SEW pair for instruction" ); |
18545 | const RISCV::RISCVMaskedPseudoInfo *Masked = |
18546 | RISCV::lookupMaskedIntrinsicByUnmasked(UnmaskedPseudo: Inverse->Pseudo); |
18547 | assert(Masked && "Could not find masked instruction for LMUL and SEW pair" ); |
18548 | return Masked; |
18549 | } |
18550 | |
18551 | static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, |
18552 | MachineBasicBlock *BB, |
18553 | unsigned CVTXOpc) { |
18554 | DebugLoc DL = MI.getDebugLoc(); |
18555 | |
18556 | const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); |
18557 | |
18558 | MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); |
18559 | Register SavedFFLAGS = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass); |
18560 | |
18561 | // Save the old value of FFLAGS. |
18562 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::ReadFFLAGS), DestReg: SavedFFLAGS); |
18563 | |
18564 | assert(MI.getNumOperands() == 7); |
18565 | |
18566 | // Emit a VFCVT_X_F |
18567 | const TargetRegisterInfo *TRI = |
18568 | BB->getParent()->getSubtarget().getRegisterInfo(); |
18569 | const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx: 0, TII: &TII, TRI); |
18570 | Register Tmp = MRI.createVirtualRegister(RegClass: RC); |
18571 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: CVTXOpc), DestReg: Tmp) |
18572 | .add(MO: MI.getOperand(i: 1)) |
18573 | .add(MO: MI.getOperand(i: 2)) |
18574 | .add(MO: MI.getOperand(i: 3)) |
18575 | .add(MO: MachineOperand::CreateImm(Val: 7)) // frm = DYN |
18576 | .add(MO: MI.getOperand(i: 4)) |
18577 | .add(MO: MI.getOperand(i: 5)) |
18578 | .add(MO: MI.getOperand(i: 6)) |
18579 | .add(MO: MachineOperand::CreateReg(Reg: RISCV::FRM, |
18580 | /*IsDef*/ isDef: false, |
18581 | /*IsImp*/ isImp: true)); |
18582 | |
18583 | // Emit a VFCVT_F_X |
18584 | RISCVII::VLMUL LMul = RISCVII::getLMul(TSFlags: MI.getDesc().TSFlags); |
18585 | unsigned Log2SEW = MI.getOperand(i: RISCVII::getSEWOpNum(Desc: MI.getDesc())).getImm(); |
18586 | // There is no E8 variant for VFCVT_F_X. |
18587 | assert(Log2SEW >= 4); |
18588 | unsigned CVTFOpc = |
18589 | lookupMaskedIntrinsic(MCOpcode: RISCV::VFCVT_F_X_V, LMul, SEW: 1 << Log2SEW) |
18590 | ->MaskedPseudo; |
18591 | |
18592 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: CVTFOpc)) |
18593 | .add(MO: MI.getOperand(i: 0)) |
18594 | .add(MO: MI.getOperand(i: 1)) |
18595 | .addReg(RegNo: Tmp) |
18596 | .add(MO: MI.getOperand(i: 3)) |
18597 | .add(MO: MachineOperand::CreateImm(Val: 7)) // frm = DYN |
18598 | .add(MO: MI.getOperand(i: 4)) |
18599 | .add(MO: MI.getOperand(i: 5)) |
18600 | .add(MO: MI.getOperand(i: 6)) |
18601 | .add(MO: MachineOperand::CreateReg(Reg: RISCV::FRM, |
18602 | /*IsDef*/ isDef: false, |
18603 | /*IsImp*/ isImp: true)); |
18604 | |
18605 | // Restore FFLAGS. |
18606 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::WriteFFLAGS)) |
18607 | .addReg(RegNo: SavedFFLAGS, flags: RegState::Kill); |
18608 | |
18609 | // Erase the pseudoinstruction. |
18610 | MI.eraseFromParent(); |
18611 | return BB; |
18612 | } |
18613 | |
18614 | static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, |
18615 | const RISCVSubtarget &Subtarget) { |
18616 | unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc; |
18617 | const TargetRegisterClass *RC; |
18618 | switch (MI.getOpcode()) { |
18619 | default: |
18620 | llvm_unreachable("Unexpected opcode" ); |
18621 | case RISCV::PseudoFROUND_H: |
18622 | CmpOpc = RISCV::FLT_H; |
18623 | F2IOpc = RISCV::FCVT_W_H; |
18624 | I2FOpc = RISCV::FCVT_H_W; |
18625 | FSGNJOpc = RISCV::FSGNJ_H; |
18626 | FSGNJXOpc = RISCV::FSGNJX_H; |
18627 | RC = &RISCV::FPR16RegClass; |
18628 | break; |
18629 | case RISCV::PseudoFROUND_H_INX: |
18630 | CmpOpc = RISCV::FLT_H_INX; |
18631 | F2IOpc = RISCV::FCVT_W_H_INX; |
18632 | I2FOpc = RISCV::FCVT_H_W_INX; |
18633 | FSGNJOpc = RISCV::FSGNJ_H_INX; |
18634 | FSGNJXOpc = RISCV::FSGNJX_H_INX; |
18635 | RC = &RISCV::GPRF16RegClass; |
18636 | break; |
18637 | case RISCV::PseudoFROUND_S: |
18638 | CmpOpc = RISCV::FLT_S; |
18639 | F2IOpc = RISCV::FCVT_W_S; |
18640 | I2FOpc = RISCV::FCVT_S_W; |
18641 | FSGNJOpc = RISCV::FSGNJ_S; |
18642 | FSGNJXOpc = RISCV::FSGNJX_S; |
18643 | RC = &RISCV::FPR32RegClass; |
18644 | break; |
18645 | case RISCV::PseudoFROUND_S_INX: |
18646 | CmpOpc = RISCV::FLT_S_INX; |
18647 | F2IOpc = RISCV::FCVT_W_S_INX; |
18648 | I2FOpc = RISCV::FCVT_S_W_INX; |
18649 | FSGNJOpc = RISCV::FSGNJ_S_INX; |
18650 | FSGNJXOpc = RISCV::FSGNJX_S_INX; |
18651 | RC = &RISCV::GPRF32RegClass; |
18652 | break; |
18653 | case RISCV::PseudoFROUND_D: |
18654 | assert(Subtarget.is64Bit() && "Expected 64-bit GPR." ); |
18655 | CmpOpc = RISCV::FLT_D; |
18656 | F2IOpc = RISCV::FCVT_L_D; |
18657 | I2FOpc = RISCV::FCVT_D_L; |
18658 | FSGNJOpc = RISCV::FSGNJ_D; |
18659 | FSGNJXOpc = RISCV::FSGNJX_D; |
18660 | RC = &RISCV::FPR64RegClass; |
18661 | break; |
18662 | case RISCV::PseudoFROUND_D_INX: |
18663 | assert(Subtarget.is64Bit() && "Expected 64-bit GPR." ); |
18664 | CmpOpc = RISCV::FLT_D_INX; |
18665 | F2IOpc = RISCV::FCVT_L_D_INX; |
18666 | I2FOpc = RISCV::FCVT_D_L_INX; |
18667 | FSGNJOpc = RISCV::FSGNJ_D_INX; |
18668 | FSGNJXOpc = RISCV::FSGNJX_D_INX; |
18669 | RC = &RISCV::GPRRegClass; |
18670 | break; |
18671 | } |
18672 | |
18673 | const BasicBlock *BB = MBB->getBasicBlock(); |
18674 | DebugLoc DL = MI.getDebugLoc(); |
18675 | MachineFunction::iterator I = ++MBB->getIterator(); |
18676 | |
18677 | MachineFunction *F = MBB->getParent(); |
18678 | MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB); |
18679 | MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB); |
18680 | |
18681 | F->insert(MBBI: I, MBB: CvtMBB); |
18682 | F->insert(MBBI: I, MBB: DoneMBB); |
18683 | // Move all instructions after the sequence to DoneMBB. |
18684 | DoneMBB->splice(Where: DoneMBB->end(), Other: MBB, From: MachineBasicBlock::iterator(MI), |
18685 | To: MBB->end()); |
18686 | // Update machine-CFG edges by transferring all successors of the current |
18687 | // block to the new block which will contain the Phi nodes for the selects. |
18688 | DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB); |
18689 | // Set the successors for MBB. |
18690 | MBB->addSuccessor(Succ: CvtMBB); |
18691 | MBB->addSuccessor(Succ: DoneMBB); |
18692 | |
18693 | Register DstReg = MI.getOperand(i: 0).getReg(); |
18694 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
18695 | Register MaxReg = MI.getOperand(i: 2).getReg(); |
18696 | int64_t FRM = MI.getOperand(i: 3).getImm(); |
18697 | |
18698 | const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); |
18699 | MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); |
18700 | |
18701 | Register FabsReg = MRI.createVirtualRegister(RegClass: RC); |
18702 | BuildMI(BB: MBB, MIMD: DL, MCID: TII.get(Opcode: FSGNJXOpc), DestReg: FabsReg).addReg(RegNo: SrcReg).addReg(RegNo: SrcReg); |
18703 | |
18704 | // Compare the FP value to the max value. |
18705 | Register CmpReg = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass); |
18706 | auto MIB = |
18707 | BuildMI(BB: MBB, MIMD: DL, MCID: TII.get(Opcode: CmpOpc), DestReg: CmpReg).addReg(RegNo: FabsReg).addReg(RegNo: MaxReg); |
18708 | if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept)) |
18709 | MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); |
18710 | |
18711 | // Insert branch. |
18712 | BuildMI(BB: MBB, MIMD: DL, MCID: TII.get(Opcode: RISCV::BEQ)) |
18713 | .addReg(RegNo: CmpReg) |
18714 | .addReg(RegNo: RISCV::X0) |
18715 | .addMBB(MBB: DoneMBB); |
18716 | |
18717 | CvtMBB->addSuccessor(Succ: DoneMBB); |
18718 | |
18719 | // Convert to integer. |
18720 | Register F2IReg = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass); |
18721 | MIB = BuildMI(BB: CvtMBB, MIMD: DL, MCID: TII.get(Opcode: F2IOpc), DestReg: F2IReg).addReg(RegNo: SrcReg).addImm(Val: FRM); |
18722 | if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept)) |
18723 | MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); |
18724 | |
18725 | // Convert back to FP. |
18726 | Register I2FReg = MRI.createVirtualRegister(RegClass: RC); |
18727 | MIB = BuildMI(BB: CvtMBB, MIMD: DL, MCID: TII.get(Opcode: I2FOpc), DestReg: I2FReg).addReg(RegNo: F2IReg).addImm(Val: FRM); |
18728 | if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept)) |
18729 | MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); |
18730 | |
18731 | // Restore the sign bit. |
18732 | Register CvtReg = MRI.createVirtualRegister(RegClass: RC); |
18733 | BuildMI(BB: CvtMBB, MIMD: DL, MCID: TII.get(Opcode: FSGNJOpc), DestReg: CvtReg).addReg(RegNo: I2FReg).addReg(RegNo: SrcReg); |
18734 | |
18735 | // Merge the results. |
18736 | BuildMI(BB&: *DoneMBB, I: DoneMBB->begin(), MIMD: DL, MCID: TII.get(Opcode: RISCV::PHI), DestReg: DstReg) |
18737 | .addReg(RegNo: SrcReg) |
18738 | .addMBB(MBB) |
18739 | .addReg(RegNo: CvtReg) |
18740 | .addMBB(MBB: CvtMBB); |
18741 | |
18742 | MI.eraseFromParent(); |
18743 | return DoneMBB; |
18744 | } |
18745 | |
18746 | MachineBasicBlock * |
18747 | RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, |
18748 | MachineBasicBlock *BB) const { |
18749 | switch (MI.getOpcode()) { |
18750 | default: |
18751 | llvm_unreachable("Unexpected instr type to insert" ); |
18752 | case RISCV::ReadCounterWide: |
18753 | assert(!Subtarget.is64Bit() && |
18754 | "ReadCounterWide is only to be used on riscv32" ); |
18755 | return emitReadCounterWidePseudo(MI, BB); |
18756 | case RISCV::Select_GPR_Using_CC_GPR: |
18757 | case RISCV::Select_GPR_Using_CC_Imm: |
18758 | case RISCV::Select_FPR16_Using_CC_GPR: |
18759 | case RISCV::Select_FPR16INX_Using_CC_GPR: |
18760 | case RISCV::Select_FPR32_Using_CC_GPR: |
18761 | case RISCV::Select_FPR32INX_Using_CC_GPR: |
18762 | case RISCV::Select_FPR64_Using_CC_GPR: |
18763 | case RISCV::Select_FPR64INX_Using_CC_GPR: |
18764 | case RISCV::Select_FPR64IN32X_Using_CC_GPR: |
18765 | return emitSelectPseudo(MI, BB, Subtarget); |
18766 | case RISCV::BuildPairF64Pseudo: |
18767 | return emitBuildPairF64Pseudo(MI, BB, Subtarget); |
18768 | case RISCV::SplitF64Pseudo: |
18769 | return emitSplitF64Pseudo(MI, BB, Subtarget); |
18770 | case RISCV::PseudoQuietFLE_H: |
18771 | return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLE_H, EqOpcode: RISCV::FEQ_H, Subtarget); |
18772 | case RISCV::PseudoQuietFLE_H_INX: |
18773 | return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLE_H_INX, EqOpcode: RISCV::FEQ_H_INX, Subtarget); |
18774 | case RISCV::PseudoQuietFLT_H: |
18775 | return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLT_H, EqOpcode: RISCV::FEQ_H, Subtarget); |
18776 | case RISCV::PseudoQuietFLT_H_INX: |
18777 | return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLT_H_INX, EqOpcode: RISCV::FEQ_H_INX, Subtarget); |
18778 | case RISCV::PseudoQuietFLE_S: |
18779 | return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLE_S, EqOpcode: RISCV::FEQ_S, Subtarget); |
18780 | case RISCV::PseudoQuietFLE_S_INX: |
18781 | return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLE_S_INX, EqOpcode: RISCV::FEQ_S_INX, Subtarget); |
18782 | case RISCV::PseudoQuietFLT_S: |
18783 | return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLT_S, EqOpcode: RISCV::FEQ_S, Subtarget); |
18784 | case RISCV::PseudoQuietFLT_S_INX: |
18785 | return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLT_S_INX, EqOpcode: RISCV::FEQ_S_INX, Subtarget); |
18786 | case RISCV::PseudoQuietFLE_D: |
18787 | return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLE_D, EqOpcode: RISCV::FEQ_D, Subtarget); |
18788 | case RISCV::PseudoQuietFLE_D_INX: |
18789 | return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLE_D_INX, EqOpcode: RISCV::FEQ_D_INX, Subtarget); |
18790 | case RISCV::PseudoQuietFLE_D_IN32X: |
18791 | return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLE_D_IN32X, EqOpcode: RISCV::FEQ_D_IN32X, |
18792 | Subtarget); |
18793 | case RISCV::PseudoQuietFLT_D: |
18794 | return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLT_D, EqOpcode: RISCV::FEQ_D, Subtarget); |
18795 | case RISCV::PseudoQuietFLT_D_INX: |
18796 | return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLT_D_INX, EqOpcode: RISCV::FEQ_D_INX, Subtarget); |
18797 | case RISCV::PseudoQuietFLT_D_IN32X: |
18798 | return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLT_D_IN32X, EqOpcode: RISCV::FEQ_D_IN32X, |
18799 | Subtarget); |
18800 | |
18801 | case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK: |
18802 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, CVTXOpc: RISCV::PseudoVFCVT_X_F_V_M1_MASK); |
18803 | case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK: |
18804 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, CVTXOpc: RISCV::PseudoVFCVT_X_F_V_M2_MASK); |
18805 | case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK: |
18806 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, CVTXOpc: RISCV::PseudoVFCVT_X_F_V_M4_MASK); |
18807 | case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK: |
18808 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, CVTXOpc: RISCV::PseudoVFCVT_X_F_V_M8_MASK); |
18809 | case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK: |
18810 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, CVTXOpc: RISCV::PseudoVFCVT_X_F_V_MF2_MASK); |
18811 | case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK: |
18812 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, CVTXOpc: RISCV::PseudoVFCVT_X_F_V_MF4_MASK); |
18813 | case RISCV::PseudoFROUND_H: |
18814 | case RISCV::PseudoFROUND_H_INX: |
18815 | case RISCV::PseudoFROUND_S: |
18816 | case RISCV::PseudoFROUND_S_INX: |
18817 | case RISCV::PseudoFROUND_D: |
18818 | case RISCV::PseudoFROUND_D_INX: |
18819 | case RISCV::PseudoFROUND_D_IN32X: |
18820 | return emitFROUND(MI, MBB: BB, Subtarget); |
18821 | case TargetOpcode::STATEPOINT: |
18822 | // STATEPOINT is a pseudo instruction which has no implicit defs/uses |
18823 | // while jal call instruction (where statepoint will be lowered at the end) |
18824 | // has implicit def. This def is early-clobber as it will be set at |
18825 | // the moment of the call and earlier than any use is read. |
18826 | // Add this implicit dead def here as a workaround. |
18827 | MI.addOperand(MF&: *MI.getMF(), |
18828 | Op: MachineOperand::CreateReg( |
18829 | Reg: RISCV::X1, /*isDef*/ true, |
18830 | /*isImp*/ true, /*isKill*/ false, /*isDead*/ true, |
18831 | /*isUndef*/ false, /*isEarlyClobber*/ true)); |
18832 | [[fallthrough]]; |
18833 | case TargetOpcode::STACKMAP: |
18834 | case TargetOpcode::PATCHPOINT: |
18835 | if (!Subtarget.is64Bit()) |
18836 | report_fatal_error(reason: "STACKMAP, PATCHPOINT and STATEPOINT are only " |
18837 | "supported on 64-bit targets" ); |
18838 | return emitPatchPoint(MI, MBB: BB); |
18839 | } |
18840 | } |
18841 | |
18842 | void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, |
18843 | SDNode *Node) const { |
18844 | // Add FRM dependency to any instructions with dynamic rounding mode. |
18845 | int Idx = RISCV::getNamedOperandIdx(Opcode: MI.getOpcode(), NamedIndex: RISCV::OpName::frm); |
18846 | if (Idx < 0) { |
18847 | // Vector pseudos have FRM index indicated by TSFlags. |
18848 | Idx = RISCVII::getFRMOpNum(Desc: MI.getDesc()); |
18849 | if (Idx < 0) |
18850 | return; |
18851 | } |
18852 | if (MI.getOperand(i: Idx).getImm() != RISCVFPRndMode::DYN) |
18853 | return; |
18854 | // If the instruction already reads FRM, don't add another read. |
18855 | if (MI.readsRegister(Reg: RISCV::FRM, /*TRI=*/nullptr)) |
18856 | return; |
18857 | MI.addOperand( |
18858 | Op: MachineOperand::CreateReg(Reg: RISCV::FRM, /*isDef*/ false, /*isImp*/ true)); |
18859 | } |
18860 | |
18861 | // Calling Convention Implementation. |
18862 | // The expectations for frontend ABI lowering vary from target to target. |
18863 | // Ideally, an LLVM frontend would be able to avoid worrying about many ABI |
18864 | // details, but this is a longer term goal. For now, we simply try to keep the |
18865 | // role of the frontend as simple and well-defined as possible. The rules can |
18866 | // be summarised as: |
18867 | // * Never split up large scalar arguments. We handle them here. |
18868 | // * If a hardfloat calling convention is being used, and the struct may be |
18869 | // passed in a pair of registers (fp+fp, int+fp), and both registers are |
18870 | // available, then pass as two separate arguments. If either the GPRs or FPRs |
18871 | // are exhausted, then pass according to the rule below. |
18872 | // * If a struct could never be passed in registers or directly in a stack |
18873 | // slot (as it is larger than 2*XLEN and the floating point rules don't |
18874 | // apply), then pass it using a pointer with the byval attribute. |
18875 | // * If a struct is less than 2*XLEN, then coerce to either a two-element |
18876 | // word-sized array or a 2*XLEN scalar (depending on alignment). |
18877 | // * The frontend can determine whether a struct is returned by reference or |
18878 | // not based on its size and fields. If it will be returned by reference, the |
18879 | // frontend must modify the prototype so a pointer with the sret annotation is |
18880 | // passed as the first argument. This is not necessary for large scalar |
18881 | // returns. |
18882 | // * Struct return values and varargs should be coerced to structs containing |
18883 | // register-size fields in the same situations they would be for fixed |
18884 | // arguments. |
18885 | |
18886 | static const MCPhysReg ArgFPR16s[] = { |
18887 | RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, |
18888 | RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H |
18889 | }; |
18890 | static const MCPhysReg ArgFPR32s[] = { |
18891 | RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, |
18892 | RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F |
18893 | }; |
18894 | static const MCPhysReg ArgFPR64s[] = { |
18895 | RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, |
18896 | RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D |
18897 | }; |
18898 | // This is an interim calling convention and it may be changed in the future. |
18899 | static const MCPhysReg ArgVRs[] = { |
18900 | RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, |
18901 | RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, |
18902 | RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; |
18903 | static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, |
18904 | RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, |
18905 | RISCV::V20M2, RISCV::V22M2}; |
18906 | static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, |
18907 | RISCV::V20M4}; |
18908 | static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; |
18909 | |
18910 | ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) { |
18911 | // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except |
18912 | // the ILP32E ABI. |
18913 | static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, |
18914 | RISCV::X13, RISCV::X14, RISCV::X15, |
18915 | RISCV::X16, RISCV::X17}; |
18916 | // The GPRs used for passing arguments in the ILP32E/ILP64E ABI. |
18917 | static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, |
18918 | RISCV::X13, RISCV::X14, RISCV::X15}; |
18919 | |
18920 | if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) |
18921 | return ArrayRef(ArgEGPRs); |
18922 | |
18923 | return ArrayRef(ArgIGPRs); |
18924 | } |
18925 | |
18926 | static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) { |
18927 | // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used |
18928 | // for save-restore libcall, so we don't use them. |
18929 | // Don't use X7 for fastcc, since Zicfilp uses X7 as the label register. |
18930 | static const MCPhysReg FastCCIGPRs[] = { |
18931 | RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, |
18932 | RISCV::X16, RISCV::X17, RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31}; |
18933 | |
18934 | // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E. |
18935 | static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, |
18936 | RISCV::X13, RISCV::X14, RISCV::X15}; |
18937 | |
18938 | if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) |
18939 | return ArrayRef(FastCCEGPRs); |
18940 | |
18941 | return ArrayRef(FastCCIGPRs); |
18942 | } |
18943 | |
18944 | // Pass a 2*XLEN argument that has been split into two XLEN values through |
18945 | // registers or the stack as necessary. |
18946 | static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, |
18947 | ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, |
18948 | MVT ValVT2, MVT LocVT2, |
18949 | ISD::ArgFlagsTy ArgFlags2, bool EABI) { |
18950 | unsigned XLenInBytes = XLen / 8; |
18951 | const RISCVSubtarget &STI = |
18952 | State.getMachineFunction().getSubtarget<RISCVSubtarget>(); |
18953 | ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI: STI.getTargetABI()); |
18954 | |
18955 | if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) { |
18956 | // At least one half can be passed via register. |
18957 | State.addLoc(V: CCValAssign::getReg(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), RegNo: Reg, |
18958 | LocVT: VA1.getLocVT(), HTP: CCValAssign::Full)); |
18959 | } else { |
18960 | // Both halves must be passed on the stack, with proper alignment. |
18961 | // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte |
18962 | // alignment. This behavior may be changed when RV32E/ILP32E is ratified. |
18963 | Align StackAlign(XLenInBytes); |
18964 | if (!EABI || XLen != 32) |
18965 | StackAlign = std::max(a: StackAlign, b: ArgFlags1.getNonZeroOrigAlign()); |
18966 | State.addLoc( |
18967 | V: CCValAssign::getMem(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), |
18968 | Offset: State.AllocateStack(Size: XLenInBytes, Alignment: StackAlign), |
18969 | LocVT: VA1.getLocVT(), HTP: CCValAssign::Full)); |
18970 | State.addLoc(V: CCValAssign::getMem( |
18971 | ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: XLenInBytes, Alignment: Align(XLenInBytes)), |
18972 | LocVT: LocVT2, HTP: CCValAssign::Full)); |
18973 | return false; |
18974 | } |
18975 | |
18976 | if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) { |
18977 | // The second half can also be passed via register. |
18978 | State.addLoc( |
18979 | V: CCValAssign::getReg(ValNo: ValNo2, ValVT: ValVT2, RegNo: Reg, LocVT: LocVT2, HTP: CCValAssign::Full)); |
18980 | } else { |
18981 | // The second half is passed via the stack, without additional alignment. |
18982 | State.addLoc(V: CCValAssign::getMem( |
18983 | ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: XLenInBytes, Alignment: Align(XLenInBytes)), |
18984 | LocVT: LocVT2, HTP: CCValAssign::Full)); |
18985 | } |
18986 | |
18987 | return false; |
18988 | } |
18989 | |
18990 | // Implements the RISC-V calling convention. Returns true upon failure. |
18991 | bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, |
18992 | MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, |
18993 | ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, |
18994 | bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, |
18995 | RVVArgDispatcher &RVVDispatcher) { |
18996 | unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); |
18997 | assert(XLen == 32 || XLen == 64); |
18998 | MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; |
18999 | |
19000 | // Static chain parameter must not be passed in normal argument registers, |
19001 | // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain |
19002 | if (ArgFlags.isNest()) { |
19003 | if (unsigned Reg = State.AllocateReg(Reg: RISCV::X7)) { |
19004 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
19005 | return false; |
19006 | } |
19007 | } |
19008 | |
19009 | // Any return value split in to more than two values can't be returned |
19010 | // directly. Vectors are returned via the available vector registers. |
19011 | if (!LocVT.isVector() && IsRet && ValNo > 1) |
19012 | return true; |
19013 | |
19014 | // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a |
19015 | // variadic argument, or if no F16/F32 argument registers are available. |
19016 | bool UseGPRForF16_F32 = true; |
19017 | // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a |
19018 | // variadic argument, or if no F64 argument registers are available. |
19019 | bool UseGPRForF64 = true; |
19020 | |
19021 | switch (ABI) { |
19022 | default: |
19023 | llvm_unreachable("Unexpected ABI" ); |
19024 | case RISCVABI::ABI_ILP32: |
19025 | case RISCVABI::ABI_ILP32E: |
19026 | case RISCVABI::ABI_LP64: |
19027 | case RISCVABI::ABI_LP64E: |
19028 | break; |
19029 | case RISCVABI::ABI_ILP32F: |
19030 | case RISCVABI::ABI_LP64F: |
19031 | UseGPRForF16_F32 = !IsFixed; |
19032 | break; |
19033 | case RISCVABI::ABI_ILP32D: |
19034 | case RISCVABI::ABI_LP64D: |
19035 | UseGPRForF16_F32 = !IsFixed; |
19036 | UseGPRForF64 = !IsFixed; |
19037 | break; |
19038 | } |
19039 | |
19040 | // FPR16, FPR32, and FPR64 alias each other. |
19041 | if (State.getFirstUnallocated(Regs: ArgFPR32s) == std::size(ArgFPR32s)) { |
19042 | UseGPRForF16_F32 = true; |
19043 | UseGPRForF64 = true; |
19044 | } |
19045 | |
19046 | // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and |
19047 | // similar local variables rather than directly checking against the target |
19048 | // ABI. |
19049 | |
19050 | if (UseGPRForF16_F32 && |
19051 | (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) { |
19052 | LocVT = XLenVT; |
19053 | LocInfo = CCValAssign::BCvt; |
19054 | } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { |
19055 | LocVT = MVT::i64; |
19056 | LocInfo = CCValAssign::BCvt; |
19057 | } |
19058 | |
19059 | ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI); |
19060 | |
19061 | // If this is a variadic argument, the RISC-V calling convention requires |
19062 | // that it is assigned an 'even' or 'aligned' register if it has 8-byte |
19063 | // alignment (RV32) or 16-byte alignment (RV64). An aligned register should |
19064 | // be used regardless of whether the original argument was split during |
19065 | // legalisation or not. The argument will not be passed by registers if the |
19066 | // original type is larger than 2*XLEN, so the register alignment rule does |
19067 | // not apply. |
19068 | // TODO: To be compatible with GCC's behaviors, we don't align registers |
19069 | // currently if we are using ILP32E calling convention. This behavior may be |
19070 | // changed when RV32E/ILP32E is ratified. |
19071 | unsigned TwoXLenInBytes = (2 * XLen) / 8; |
19072 | if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && |
19073 | DL.getTypeAllocSize(Ty: OrigTy) == TwoXLenInBytes && |
19074 | ABI != RISCVABI::ABI_ILP32E) { |
19075 | unsigned RegIdx = State.getFirstUnallocated(Regs: ArgGPRs); |
19076 | // Skip 'odd' register if necessary. |
19077 | if (RegIdx != std::size(cont: ArgGPRs) && RegIdx % 2 == 1) |
19078 | State.AllocateReg(Regs: ArgGPRs); |
19079 | } |
19080 | |
19081 | SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); |
19082 | SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = |
19083 | State.getPendingArgFlags(); |
19084 | |
19085 | assert(PendingLocs.size() == PendingArgFlags.size() && |
19086 | "PendingLocs and PendingArgFlags out of sync" ); |
19087 | |
19088 | // Handle passing f64 on RV32D with a soft float ABI or when floating point |
19089 | // registers are exhausted. |
19090 | if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { |
19091 | assert(PendingLocs.empty() && "Can't lower f64 if it is split" ); |
19092 | // Depending on available argument GPRS, f64 may be passed in a pair of |
19093 | // GPRs, split between a GPR and the stack, or passed completely on the |
19094 | // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these |
19095 | // cases. |
19096 | Register Reg = State.AllocateReg(Regs: ArgGPRs); |
19097 | if (!Reg) { |
19098 | unsigned StackOffset = State.AllocateStack(Size: 8, Alignment: Align(8)); |
19099 | State.addLoc( |
19100 | V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo)); |
19101 | return false; |
19102 | } |
19103 | LocVT = MVT::i32; |
19104 | State.addLoc(V: CCValAssign::getCustomReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
19105 | Register HiReg = State.AllocateReg(Regs: ArgGPRs); |
19106 | if (HiReg) { |
19107 | State.addLoc( |
19108 | V: CCValAssign::getCustomReg(ValNo, ValVT, RegNo: HiReg, LocVT, HTP: LocInfo)); |
19109 | } else { |
19110 | unsigned StackOffset = State.AllocateStack(Size: 4, Alignment: Align(4)); |
19111 | State.addLoc( |
19112 | V: CCValAssign::getCustomMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo)); |
19113 | } |
19114 | return false; |
19115 | } |
19116 | |
19117 | // Fixed-length vectors are located in the corresponding scalable-vector |
19118 | // container types. |
19119 | if (ValVT.isFixedLengthVector()) |
19120 | LocVT = TLI.getContainerForFixedLengthVector(VT: LocVT); |
19121 | |
19122 | // Split arguments might be passed indirectly, so keep track of the pending |
19123 | // values. Split vectors are passed via a mix of registers and indirectly, so |
19124 | // treat them as we would any other argument. |
19125 | if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { |
19126 | LocVT = XLenVT; |
19127 | LocInfo = CCValAssign::Indirect; |
19128 | PendingLocs.push_back( |
19129 | Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo)); |
19130 | PendingArgFlags.push_back(Elt: ArgFlags); |
19131 | if (!ArgFlags.isSplitEnd()) { |
19132 | return false; |
19133 | } |
19134 | } |
19135 | |
19136 | // If the split argument only had two elements, it should be passed directly |
19137 | // in registers or on the stack. |
19138 | if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && |
19139 | PendingLocs.size() <= 2) { |
19140 | assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()" ); |
19141 | // Apply the normal calling convention rules to the first half of the |
19142 | // split argument. |
19143 | CCValAssign VA = PendingLocs[0]; |
19144 | ISD::ArgFlagsTy AF = PendingArgFlags[0]; |
19145 | PendingLocs.clear(); |
19146 | PendingArgFlags.clear(); |
19147 | return CC_RISCVAssign2XLen( |
19148 | XLen, State, VA1: VA, ArgFlags1: AF, ValNo2: ValNo, ValVT2: ValVT, LocVT2: LocVT, ArgFlags2: ArgFlags, |
19149 | EABI: ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E); |
19150 | } |
19151 | |
19152 | // Allocate to a register if possible, or else a stack slot. |
19153 | Register Reg; |
19154 | unsigned StoreSizeBytes = XLen / 8; |
19155 | Align StackAlign = Align(XLen / 8); |
19156 | |
19157 | if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32) |
19158 | Reg = State.AllocateReg(Regs: ArgFPR16s); |
19159 | else if (ValVT == MVT::f32 && !UseGPRForF16_F32) |
19160 | Reg = State.AllocateReg(Regs: ArgFPR32s); |
19161 | else if (ValVT == MVT::f64 && !UseGPRForF64) |
19162 | Reg = State.AllocateReg(Regs: ArgFPR64s); |
19163 | else if (ValVT.isVector()) { |
19164 | Reg = RVVDispatcher.getNextPhysReg(); |
19165 | if (!Reg) { |
19166 | // For return values, the vector must be passed fully via registers or |
19167 | // via the stack. |
19168 | // FIXME: The proposed vector ABI only mandates v8-v15 for return values, |
19169 | // but we're using all of them. |
19170 | if (IsRet) |
19171 | return true; |
19172 | // Try using a GPR to pass the address |
19173 | if ((Reg = State.AllocateReg(Regs: ArgGPRs))) { |
19174 | LocVT = XLenVT; |
19175 | LocInfo = CCValAssign::Indirect; |
19176 | } else if (ValVT.isScalableVector()) { |
19177 | LocVT = XLenVT; |
19178 | LocInfo = CCValAssign::Indirect; |
19179 | } else { |
19180 | // Pass fixed-length vectors on the stack. |
19181 | LocVT = ValVT; |
19182 | StoreSizeBytes = ValVT.getStoreSize(); |
19183 | // Align vectors to their element sizes, being careful for vXi1 |
19184 | // vectors. |
19185 | StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); |
19186 | } |
19187 | } |
19188 | } else { |
19189 | Reg = State.AllocateReg(Regs: ArgGPRs); |
19190 | } |
19191 | |
19192 | unsigned StackOffset = |
19193 | Reg ? 0 : State.AllocateStack(Size: StoreSizeBytes, Alignment: StackAlign); |
19194 | |
19195 | // If we reach this point and PendingLocs is non-empty, we must be at the |
19196 | // end of a split argument that must be passed indirectly. |
19197 | if (!PendingLocs.empty()) { |
19198 | assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()" ); |
19199 | assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()" ); |
19200 | |
19201 | for (auto &It : PendingLocs) { |
19202 | if (Reg) |
19203 | It.convertToReg(RegNo: Reg); |
19204 | else |
19205 | It.convertToMem(Offset: StackOffset); |
19206 | State.addLoc(V: It); |
19207 | } |
19208 | PendingLocs.clear(); |
19209 | PendingArgFlags.clear(); |
19210 | return false; |
19211 | } |
19212 | |
19213 | assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || |
19214 | (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) && |
19215 | "Expected an XLenVT or vector types at this stage" ); |
19216 | |
19217 | if (Reg) { |
19218 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
19219 | return false; |
19220 | } |
19221 | |
19222 | // When a scalar floating-point value is passed on the stack, no |
19223 | // bit-conversion is needed. |
19224 | if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) { |
19225 | assert(!ValVT.isVector()); |
19226 | LocVT = ValVT; |
19227 | LocInfo = CCValAssign::Full; |
19228 | } |
19229 | State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo)); |
19230 | return false; |
19231 | } |
19232 | |
19233 | template <typename ArgTy> |
19234 | static std::optional<unsigned> preAssignMask(const ArgTy &Args) { |
19235 | for (const auto &ArgIdx : enumerate(Args)) { |
19236 | MVT ArgVT = ArgIdx.value().VT; |
19237 | if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1) |
19238 | return ArgIdx.index(); |
19239 | } |
19240 | return std::nullopt; |
19241 | } |
19242 | |
19243 | void RISCVTargetLowering::analyzeInputArgs( |
19244 | MachineFunction &MF, CCState &CCInfo, |
19245 | const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, |
19246 | RISCVCCAssignFn Fn) const { |
19247 | unsigned NumArgs = Ins.size(); |
19248 | FunctionType *FType = MF.getFunction().getFunctionType(); |
19249 | |
19250 | RVVArgDispatcher Dispatcher; |
19251 | if (IsRet) { |
19252 | Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)}; |
19253 | } else { |
19254 | SmallVector<Type *, 4> TypeList; |
19255 | for (const Argument &Arg : MF.getFunction().args()) |
19256 | TypeList.push_back(Elt: Arg.getType()); |
19257 | Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)}; |
19258 | } |
19259 | |
19260 | for (unsigned i = 0; i != NumArgs; ++i) { |
19261 | MVT ArgVT = Ins[i].VT; |
19262 | ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; |
19263 | |
19264 | Type *ArgTy = nullptr; |
19265 | if (IsRet) |
19266 | ArgTy = FType->getReturnType(); |
19267 | else if (Ins[i].isOrigArg()) |
19268 | ArgTy = FType->getParamType(i: Ins[i].getOrigArgIndex()); |
19269 | |
19270 | RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); |
19271 | if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, |
19272 | ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, |
19273 | Dispatcher)) { |
19274 | LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " |
19275 | << ArgVT << '\n'); |
19276 | llvm_unreachable(nullptr); |
19277 | } |
19278 | } |
19279 | } |
19280 | |
19281 | void RISCVTargetLowering::analyzeOutputArgs( |
19282 | MachineFunction &MF, CCState &CCInfo, |
19283 | const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, |
19284 | CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const { |
19285 | unsigned NumArgs = Outs.size(); |
19286 | |
19287 | SmallVector<Type *, 4> TypeList; |
19288 | if (IsRet) |
19289 | TypeList.push_back(Elt: MF.getFunction().getReturnType()); |
19290 | else if (CLI) |
19291 | for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs()) |
19292 | TypeList.push_back(Elt: Arg.Ty); |
19293 | RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)}; |
19294 | |
19295 | for (unsigned i = 0; i != NumArgs; i++) { |
19296 | MVT ArgVT = Outs[i].VT; |
19297 | ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; |
19298 | Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; |
19299 | |
19300 | RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); |
19301 | if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, |
19302 | ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, |
19303 | Dispatcher)) { |
19304 | LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " |
19305 | << ArgVT << "\n" ); |
19306 | llvm_unreachable(nullptr); |
19307 | } |
19308 | } |
19309 | } |
19310 | |
19311 | // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect |
19312 | // values. |
19313 | static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, |
19314 | const CCValAssign &VA, const SDLoc &DL, |
19315 | const RISCVSubtarget &Subtarget) { |
19316 | switch (VA.getLocInfo()) { |
19317 | default: |
19318 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
19319 | case CCValAssign::Full: |
19320 | if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector()) |
19321 | Val = convertFromScalableVector(VT: VA.getValVT(), V: Val, DAG, Subtarget); |
19322 | break; |
19323 | case CCValAssign::BCvt: |
19324 | if (VA.getLocVT().isInteger() && |
19325 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { |
19326 | Val = DAG.getNode(Opcode: RISCVISD::FMV_H_X, DL, VT: VA.getValVT(), Operand: Val); |
19327 | } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { |
19328 | if (RV64LegalI32) { |
19329 | Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Val); |
19330 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f32, Operand: Val); |
19331 | } else { |
19332 | Val = DAG.getNode(Opcode: RISCVISD::FMV_W_X_RV64, DL, VT: MVT::f32, Operand: Val); |
19333 | } |
19334 | } else { |
19335 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val); |
19336 | } |
19337 | break; |
19338 | } |
19339 | return Val; |
19340 | } |
19341 | |
19342 | // The caller is responsible for loading the full value if the argument is |
19343 | // passed with CCValAssign::Indirect. |
19344 | static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, |
19345 | const CCValAssign &VA, const SDLoc &DL, |
19346 | const ISD::InputArg &In, |
19347 | const RISCVTargetLowering &TLI) { |
19348 | MachineFunction &MF = DAG.getMachineFunction(); |
19349 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
19350 | EVT LocVT = VA.getLocVT(); |
19351 | SDValue Val; |
19352 | const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT()); |
19353 | Register VReg = RegInfo.createVirtualRegister(RegClass: RC); |
19354 | RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg); |
19355 | Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT); |
19356 | |
19357 | // If input is sign extended from 32 bits, note it for the SExtWRemoval pass. |
19358 | if (In.isOrigArg()) { |
19359 | Argument *OrigArg = MF.getFunction().getArg(i: In.getOrigArgIndex()); |
19360 | if (OrigArg->getType()->isIntegerTy()) { |
19361 | unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth(); |
19362 | // An input zero extended from i31 can also be considered sign extended. |
19363 | if ((BitWidth <= 32 && In.Flags.isSExt()) || |
19364 | (BitWidth < 32 && In.Flags.isZExt())) { |
19365 | RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); |
19366 | RVFI->addSExt32Register(Reg: VReg); |
19367 | } |
19368 | } |
19369 | } |
19370 | |
19371 | if (VA.getLocInfo() == CCValAssign::Indirect) |
19372 | return Val; |
19373 | |
19374 | return convertLocVTToValVT(DAG, Val, VA, DL, Subtarget: TLI.getSubtarget()); |
19375 | } |
19376 | |
19377 | static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, |
19378 | const CCValAssign &VA, const SDLoc &DL, |
19379 | const RISCVSubtarget &Subtarget) { |
19380 | EVT LocVT = VA.getLocVT(); |
19381 | |
19382 | switch (VA.getLocInfo()) { |
19383 | default: |
19384 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
19385 | case CCValAssign::Full: |
19386 | if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector()) |
19387 | Val = convertToScalableVector(VT: LocVT, V: Val, DAG, Subtarget); |
19388 | break; |
19389 | case CCValAssign::BCvt: |
19390 | if (LocVT.isInteger() && |
19391 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { |
19392 | Val = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: LocVT, Operand: Val); |
19393 | } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) { |
19394 | if (RV64LegalI32) { |
19395 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i32, Operand: Val); |
19396 | Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Val); |
19397 | } else { |
19398 | Val = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTW_RV64, DL, VT: MVT::i64, Operand: Val); |
19399 | } |
19400 | } else { |
19401 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val); |
19402 | } |
19403 | break; |
19404 | } |
19405 | return Val; |
19406 | } |
19407 | |
19408 | // The caller is responsible for loading the full value if the argument is |
19409 | // passed with CCValAssign::Indirect. |
19410 | static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, |
19411 | const CCValAssign &VA, const SDLoc &DL) { |
19412 | MachineFunction &MF = DAG.getMachineFunction(); |
19413 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
19414 | EVT LocVT = VA.getLocVT(); |
19415 | EVT ValVT = VA.getValVT(); |
19416 | EVT PtrVT = MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: 0)); |
19417 | if (ValVT.isScalableVector()) { |
19418 | // When the value is a scalable vector, we save the pointer which points to |
19419 | // the scalable vector value in the stack. The ValVT will be the pointer |
19420 | // type, instead of the scalable vector type. |
19421 | ValVT = LocVT; |
19422 | } |
19423 | int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(), |
19424 | /*IsImmutable=*/true); |
19425 | SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT); |
19426 | SDValue Val; |
19427 | |
19428 | ISD::LoadExtType ExtType; |
19429 | switch (VA.getLocInfo()) { |
19430 | default: |
19431 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
19432 | case CCValAssign::Full: |
19433 | case CCValAssign::Indirect: |
19434 | case CCValAssign::BCvt: |
19435 | ExtType = ISD::NON_EXTLOAD; |
19436 | break; |
19437 | } |
19438 | Val = DAG.getExtLoad( |
19439 | ExtType, dl: DL, VT: LocVT, Chain, Ptr: FIN, |
19440 | PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT); |
19441 | return Val; |
19442 | } |
19443 | |
19444 | static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, |
19445 | const CCValAssign &VA, |
19446 | const CCValAssign &HiVA, |
19447 | const SDLoc &DL) { |
19448 | assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && |
19449 | "Unexpected VA" ); |
19450 | MachineFunction &MF = DAG.getMachineFunction(); |
19451 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
19452 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
19453 | |
19454 | assert(VA.isRegLoc() && "Expected register VA assignment" ); |
19455 | |
19456 | Register LoVReg = RegInfo.createVirtualRegister(RegClass: &RISCV::GPRRegClass); |
19457 | RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: LoVReg); |
19458 | SDValue Lo = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoVReg, VT: MVT::i32); |
19459 | SDValue Hi; |
19460 | if (HiVA.isMemLoc()) { |
19461 | // Second half of f64 is passed on the stack. |
19462 | int FI = MFI.CreateFixedObject(Size: 4, SPOffset: HiVA.getLocMemOffset(), |
19463 | /*IsImmutable=*/true); |
19464 | SDValue FIN = DAG.getFrameIndex(FI, VT: MVT::i32); |
19465 | Hi = DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr: FIN, |
19466 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)); |
19467 | } else { |
19468 | // Second half of f64 is passed in another GPR. |
19469 | Register HiVReg = RegInfo.createVirtualRegister(RegClass: &RISCV::GPRRegClass); |
19470 | RegInfo.addLiveIn(Reg: HiVA.getLocReg(), vreg: HiVReg); |
19471 | Hi = DAG.getCopyFromReg(Chain, dl: DL, Reg: HiVReg, VT: MVT::i32); |
19472 | } |
19473 | return DAG.getNode(Opcode: RISCVISD::BuildPairF64, DL, VT: MVT::f64, N1: Lo, N2: Hi); |
19474 | } |
19475 | |
19476 | // FastCC has less than 1% performance improvement for some particular |
19477 | // benchmark. But theoretically, it may has benenfit for some cases. |
19478 | bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, |
19479 | unsigned ValNo, MVT ValVT, MVT LocVT, |
19480 | CCValAssign::LocInfo LocInfo, |
19481 | ISD::ArgFlagsTy ArgFlags, CCState &State, |
19482 | bool IsFixed, bool IsRet, Type *OrigTy, |
19483 | const RISCVTargetLowering &TLI, |
19484 | RVVArgDispatcher &RVVDispatcher) { |
19485 | if (LocVT == MVT::i32 || LocVT == MVT::i64) { |
19486 | if (unsigned Reg = State.AllocateReg(Regs: getFastCCArgGPRs(ABI))) { |
19487 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
19488 | return false; |
19489 | } |
19490 | } |
19491 | |
19492 | const RISCVSubtarget &Subtarget = TLI.getSubtarget(); |
19493 | |
19494 | if (LocVT == MVT::f16 && |
19495 | (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) { |
19496 | static const MCPhysReg FPR16List[] = { |
19497 | RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, |
19498 | RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, |
19499 | RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, |
19500 | RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; |
19501 | if (unsigned Reg = State.AllocateReg(Regs: FPR16List)) { |
19502 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
19503 | return false; |
19504 | } |
19505 | } |
19506 | |
19507 | if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) { |
19508 | static const MCPhysReg FPR32List[] = { |
19509 | RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, |
19510 | RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, |
19511 | RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, |
19512 | RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; |
19513 | if (unsigned Reg = State.AllocateReg(Regs: FPR32List)) { |
19514 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
19515 | return false; |
19516 | } |
19517 | } |
19518 | |
19519 | if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) { |
19520 | static const MCPhysReg FPR64List[] = { |
19521 | RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, |
19522 | RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, |
19523 | RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, |
19524 | RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; |
19525 | if (unsigned Reg = State.AllocateReg(Regs: FPR64List)) { |
19526 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
19527 | return false; |
19528 | } |
19529 | } |
19530 | |
19531 | // Check if there is an available GPR before hitting the stack. |
19532 | if ((LocVT == MVT::f16 && |
19533 | (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) || |
19534 | (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || |
19535 | (LocVT == MVT::f64 && Subtarget.is64Bit() && |
19536 | Subtarget.hasStdExtZdinx())) { |
19537 | if (unsigned Reg = State.AllocateReg(Regs: getFastCCArgGPRs(ABI))) { |
19538 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
19539 | return false; |
19540 | } |
19541 | } |
19542 | |
19543 | if (LocVT == MVT::f16) { |
19544 | unsigned Offset2 = State.AllocateStack(Size: 2, Alignment: Align(2)); |
19545 | State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: Offset2, LocVT, HTP: LocInfo)); |
19546 | return false; |
19547 | } |
19548 | |
19549 | if (LocVT == MVT::i32 || LocVT == MVT::f32) { |
19550 | unsigned Offset4 = State.AllocateStack(Size: 4, Alignment: Align(4)); |
19551 | State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: Offset4, LocVT, HTP: LocInfo)); |
19552 | return false; |
19553 | } |
19554 | |
19555 | if (LocVT == MVT::i64 || LocVT == MVT::f64) { |
19556 | unsigned Offset5 = State.AllocateStack(Size: 8, Alignment: Align(8)); |
19557 | State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: Offset5, LocVT, HTP: LocInfo)); |
19558 | return false; |
19559 | } |
19560 | |
19561 | if (LocVT.isVector()) { |
19562 | MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg(); |
19563 | if (AllocatedVReg) { |
19564 | // Fixed-length vectors are located in the corresponding scalable-vector |
19565 | // container types. |
19566 | if (ValVT.isFixedLengthVector()) |
19567 | LocVT = TLI.getContainerForFixedLengthVector(VT: LocVT); |
19568 | State.addLoc( |
19569 | V: CCValAssign::getReg(ValNo, ValVT, RegNo: AllocatedVReg, LocVT, HTP: LocInfo)); |
19570 | } else { |
19571 | // Try and pass the address via a "fast" GPR. |
19572 | if (unsigned GPRReg = State.AllocateReg(Regs: getFastCCArgGPRs(ABI))) { |
19573 | LocInfo = CCValAssign::Indirect; |
19574 | LocVT = TLI.getSubtarget().getXLenVT(); |
19575 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: GPRReg, LocVT, HTP: LocInfo)); |
19576 | } else if (ValVT.isFixedLengthVector()) { |
19577 | auto StackAlign = |
19578 | MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); |
19579 | unsigned StackOffset = |
19580 | State.AllocateStack(Size: ValVT.getStoreSize(), Alignment: StackAlign); |
19581 | State.addLoc( |
19582 | V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo)); |
19583 | } else { |
19584 | // Can't pass scalable vectors on the stack. |
19585 | return true; |
19586 | } |
19587 | } |
19588 | |
19589 | return false; |
19590 | } |
19591 | |
19592 | return true; // CC didn't match. |
19593 | } |
19594 | |
19595 | bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, |
19596 | CCValAssign::LocInfo LocInfo, |
19597 | ISD::ArgFlagsTy ArgFlags, CCState &State) { |
19598 | if (ArgFlags.isNest()) { |
19599 | report_fatal_error( |
19600 | reason: "Attribute 'nest' is not supported in GHC calling convention" ); |
19601 | } |
19602 | |
19603 | static const MCPhysReg GPRList[] = { |
19604 | RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, |
19605 | RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; |
19606 | |
19607 | if (LocVT == MVT::i32 || LocVT == MVT::i64) { |
19608 | // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim |
19609 | // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 |
19610 | if (unsigned Reg = State.AllocateReg(Regs: GPRList)) { |
19611 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
19612 | return false; |
19613 | } |
19614 | } |
19615 | |
19616 | const RISCVSubtarget &Subtarget = |
19617 | State.getMachineFunction().getSubtarget<RISCVSubtarget>(); |
19618 | |
19619 | if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) { |
19620 | // Pass in STG registers: F1, ..., F6 |
19621 | // fs0 ... fs5 |
19622 | static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, |
19623 | RISCV::F18_F, RISCV::F19_F, |
19624 | RISCV::F20_F, RISCV::F21_F}; |
19625 | if (unsigned Reg = State.AllocateReg(Regs: FPR32List)) { |
19626 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
19627 | return false; |
19628 | } |
19629 | } |
19630 | |
19631 | if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) { |
19632 | // Pass in STG registers: D1, ..., D6 |
19633 | // fs6 ... fs11 |
19634 | static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, |
19635 | RISCV::F24_D, RISCV::F25_D, |
19636 | RISCV::F26_D, RISCV::F27_D}; |
19637 | if (unsigned Reg = State.AllocateReg(Regs: FPR64List)) { |
19638 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
19639 | return false; |
19640 | } |
19641 | } |
19642 | |
19643 | if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || |
19644 | (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() && |
19645 | Subtarget.is64Bit())) { |
19646 | if (unsigned Reg = State.AllocateReg(Regs: GPRList)) { |
19647 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
19648 | return false; |
19649 | } |
19650 | } |
19651 | |
19652 | report_fatal_error(reason: "No registers left in GHC calling convention" ); |
19653 | return true; |
19654 | } |
19655 | |
19656 | // Transform physical registers into virtual registers. |
19657 | SDValue RISCVTargetLowering::LowerFormalArguments( |
19658 | SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
19659 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, |
19660 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
19661 | |
19662 | MachineFunction &MF = DAG.getMachineFunction(); |
19663 | |
19664 | switch (CallConv) { |
19665 | default: |
19666 | report_fatal_error(reason: "Unsupported calling convention" ); |
19667 | case CallingConv::C: |
19668 | case CallingConv::Fast: |
19669 | case CallingConv::SPIR_KERNEL: |
19670 | case CallingConv::GRAAL: |
19671 | case CallingConv::RISCV_VectorCall: |
19672 | break; |
19673 | case CallingConv::GHC: |
19674 | if (Subtarget.hasStdExtE()) |
19675 | report_fatal_error(reason: "GHC calling convention is not supported on RVE!" ); |
19676 | if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx()) |
19677 | report_fatal_error(reason: "GHC calling convention requires the (Zfinx/F) and " |
19678 | "(Zdinx/D) instruction set extensions" ); |
19679 | } |
19680 | |
19681 | const Function &Func = MF.getFunction(); |
19682 | if (Func.hasFnAttribute(Kind: "interrupt" )) { |
19683 | if (!Func.arg_empty()) |
19684 | report_fatal_error( |
19685 | reason: "Functions with the interrupt attribute cannot have arguments!" ); |
19686 | |
19687 | StringRef Kind = |
19688 | MF.getFunction().getFnAttribute(Kind: "interrupt" ).getValueAsString(); |
19689 | |
19690 | if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine" )) |
19691 | report_fatal_error( |
19692 | reason: "Function interrupt attribute argument not supported!" ); |
19693 | } |
19694 | |
19695 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
19696 | MVT XLenVT = Subtarget.getXLenVT(); |
19697 | unsigned XLenInBytes = Subtarget.getXLen() / 8; |
19698 | // Used with vargs to acumulate store chains. |
19699 | std::vector<SDValue> OutChains; |
19700 | |
19701 | // Assign locations to all of the incoming arguments. |
19702 | SmallVector<CCValAssign, 16> ArgLocs; |
19703 | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
19704 | |
19705 | if (CallConv == CallingConv::GHC) |
19706 | CCInfo.AnalyzeFormalArguments(Ins, Fn: RISCV::CC_RISCV_GHC); |
19707 | else |
19708 | analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, |
19709 | Fn: CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC |
19710 | : RISCV::CC_RISCV); |
19711 | |
19712 | for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) { |
19713 | CCValAssign &VA = ArgLocs[i]; |
19714 | SDValue ArgValue; |
19715 | // Passing f64 on RV32D with a soft float ABI must be handled as a special |
19716 | // case. |
19717 | if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { |
19718 | assert(VA.needsCustom()); |
19719 | ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, HiVA: ArgLocs[++i], DL); |
19720 | } else if (VA.isRegLoc()) |
19721 | ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, In: Ins[InsIdx], TLI: *this); |
19722 | else |
19723 | ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); |
19724 | |
19725 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
19726 | // If the original argument was split and passed by reference (e.g. i128 |
19727 | // on RV32), we need to load all parts of it here (using the same |
19728 | // address). Vectors may be partly split to registers and partly to the |
19729 | // stack, in which case the base address is partly offset and subsequent |
19730 | // stores are relative to that. |
19731 | InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue, |
19732 | PtrInfo: MachinePointerInfo())); |
19733 | unsigned ArgIndex = Ins[InsIdx].OrigArgIndex; |
19734 | unsigned ArgPartOffset = Ins[InsIdx].PartOffset; |
19735 | assert(VA.getValVT().isVector() || ArgPartOffset == 0); |
19736 | while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) { |
19737 | CCValAssign &PartVA = ArgLocs[i + 1]; |
19738 | unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset; |
19739 | SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL); |
19740 | if (PartVA.getValVT().isScalableVector()) |
19741 | Offset = DAG.getNode(Opcode: ISD::VSCALE, DL, VT: XLenVT, Operand: Offset); |
19742 | SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset); |
19743 | InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address, |
19744 | PtrInfo: MachinePointerInfo())); |
19745 | ++i; |
19746 | ++InsIdx; |
19747 | } |
19748 | continue; |
19749 | } |
19750 | InVals.push_back(Elt: ArgValue); |
19751 | } |
19752 | |
19753 | if (any_of(Range&: ArgLocs, |
19754 | P: [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); })) |
19755 | MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); |
19756 | |
19757 | if (IsVarArg) { |
19758 | ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(ABI: Subtarget.getTargetABI()); |
19759 | unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs); |
19760 | const TargetRegisterClass *RC = &RISCV::GPRRegClass; |
19761 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
19762 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
19763 | RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); |
19764 | |
19765 | // Size of the vararg save area. For now, the varargs save area is either |
19766 | // zero or large enough to hold a0-a7. |
19767 | int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); |
19768 | int FI; |
19769 | |
19770 | // If all registers are allocated, then all varargs must be passed on the |
19771 | // stack and we don't need to save any argregs. |
19772 | if (VarArgsSaveSize == 0) { |
19773 | int VaArgOffset = CCInfo.getStackSize(); |
19774 | FI = MFI.CreateFixedObject(Size: XLenInBytes, SPOffset: VaArgOffset, IsImmutable: true); |
19775 | } else { |
19776 | int VaArgOffset = -VarArgsSaveSize; |
19777 | FI = MFI.CreateFixedObject(Size: VarArgsSaveSize, SPOffset: VaArgOffset, IsImmutable: true); |
19778 | |
19779 | // If saving an odd number of registers then create an extra stack slot to |
19780 | // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures |
19781 | // offsets to even-numbered registered remain 2*XLEN-aligned. |
19782 | if (Idx % 2) { |
19783 | MFI.CreateFixedObject( |
19784 | Size: XLenInBytes, SPOffset: VaArgOffset - static_cast<int>(XLenInBytes), IsImmutable: true); |
19785 | VarArgsSaveSize += XLenInBytes; |
19786 | } |
19787 | |
19788 | SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT); |
19789 | |
19790 | // Copy the integer registers that may have been used for passing varargs |
19791 | // to the vararg save area. |
19792 | for (unsigned I = Idx; I < ArgRegs.size(); ++I) { |
19793 | const Register Reg = RegInfo.createVirtualRegister(RegClass: RC); |
19794 | RegInfo.addLiveIn(Reg: ArgRegs[I], vreg: Reg); |
19795 | SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: XLenVT); |
19796 | SDValue Store = DAG.getStore( |
19797 | Chain, dl: DL, Val: ArgValue, Ptr: FIN, |
19798 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI, Offset: (I - Idx) * XLenInBytes)); |
19799 | OutChains.push_back(x: Store); |
19800 | FIN = |
19801 | DAG.getMemBasePlusOffset(Base: FIN, Offset: TypeSize::getFixed(ExactSize: XLenInBytes), DL); |
19802 | } |
19803 | } |
19804 | |
19805 | // Record the frame index of the first variable argument |
19806 | // which is a value necessary to VASTART. |
19807 | RVFI->setVarArgsFrameIndex(FI); |
19808 | RVFI->setVarArgsSaveSize(VarArgsSaveSize); |
19809 | } |
19810 | |
19811 | // All stores are grouped in one node to allow the matching between |
19812 | // the size of Ins and InVals. This only happens for vararg functions. |
19813 | if (!OutChains.empty()) { |
19814 | OutChains.push_back(x: Chain); |
19815 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains); |
19816 | } |
19817 | |
19818 | return Chain; |
19819 | } |
19820 | |
19821 | /// isEligibleForTailCallOptimization - Check whether the call is eligible |
19822 | /// for tail call optimization. |
19823 | /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. |
19824 | bool RISCVTargetLowering::isEligibleForTailCallOptimization( |
19825 | CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, |
19826 | const SmallVector<CCValAssign, 16> &ArgLocs) const { |
19827 | |
19828 | auto CalleeCC = CLI.CallConv; |
19829 | auto &Outs = CLI.Outs; |
19830 | auto &Caller = MF.getFunction(); |
19831 | auto CallerCC = Caller.getCallingConv(); |
19832 | |
19833 | // Exception-handling functions need a special set of instructions to |
19834 | // indicate a return to the hardware. Tail-calling another function would |
19835 | // probably break this. |
19836 | // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This |
19837 | // should be expanded as new function attributes are introduced. |
19838 | if (Caller.hasFnAttribute(Kind: "interrupt" )) |
19839 | return false; |
19840 | |
19841 | // Do not tail call opt if the stack is used to pass parameters. |
19842 | if (CCInfo.getStackSize() != 0) |
19843 | return false; |
19844 | |
19845 | // Do not tail call opt if any parameters need to be passed indirectly. |
19846 | // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are |
19847 | // passed indirectly. So the address of the value will be passed in a |
19848 | // register, or if not available, then the address is put on the stack. In |
19849 | // order to pass indirectly, space on the stack often needs to be allocated |
19850 | // in order to store the value. In this case the CCInfo.getNextStackOffset() |
19851 | // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs |
19852 | // are passed CCValAssign::Indirect. |
19853 | for (auto &VA : ArgLocs) |
19854 | if (VA.getLocInfo() == CCValAssign::Indirect) |
19855 | return false; |
19856 | |
19857 | // Do not tail call opt if either caller or callee uses struct return |
19858 | // semantics. |
19859 | auto IsCallerStructRet = Caller.hasStructRetAttr(); |
19860 | auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); |
19861 | if (IsCallerStructRet || IsCalleeStructRet) |
19862 | return false; |
19863 | |
19864 | // The callee has to preserve all registers the caller needs to preserve. |
19865 | const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
19866 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
19867 | if (CalleeCC != CallerCC) { |
19868 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
19869 | if (!TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved)) |
19870 | return false; |
19871 | } |
19872 | |
19873 | // Byval parameters hand the function a pointer directly into the stack area |
19874 | // we want to reuse during a tail call. Working around this *is* possible |
19875 | // but less efficient and uglier in LowerCall. |
19876 | for (auto &Arg : Outs) |
19877 | if (Arg.Flags.isByVal()) |
19878 | return false; |
19879 | |
19880 | return true; |
19881 | } |
19882 | |
19883 | static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { |
19884 | return DAG.getDataLayout().getPrefTypeAlign( |
19885 | Ty: VT.getTypeForEVT(Context&: *DAG.getContext())); |
19886 | } |
19887 | |
19888 | // Lower a call to a callseq_start + CALL + callseq_end chain, and add input |
19889 | // and output parameter nodes. |
19890 | SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, |
19891 | SmallVectorImpl<SDValue> &InVals) const { |
19892 | SelectionDAG &DAG = CLI.DAG; |
19893 | SDLoc &DL = CLI.DL; |
19894 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
19895 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
19896 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
19897 | SDValue Chain = CLI.Chain; |
19898 | SDValue Callee = CLI.Callee; |
19899 | bool &IsTailCall = CLI.IsTailCall; |
19900 | CallingConv::ID CallConv = CLI.CallConv; |
19901 | bool IsVarArg = CLI.IsVarArg; |
19902 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
19903 | MVT XLenVT = Subtarget.getXLenVT(); |
19904 | |
19905 | MachineFunction &MF = DAG.getMachineFunction(); |
19906 | |
19907 | // Analyze the operands of the call, assigning locations to each operand. |
19908 | SmallVector<CCValAssign, 16> ArgLocs; |
19909 | CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
19910 | |
19911 | if (CallConv == CallingConv::GHC) { |
19912 | if (Subtarget.hasStdExtE()) |
19913 | report_fatal_error(reason: "GHC calling convention is not supported on RVE!" ); |
19914 | ArgCCInfo.AnalyzeCallOperands(Outs, Fn: RISCV::CC_RISCV_GHC); |
19915 | } else |
19916 | analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /*IsRet=*/false, CLI: &CLI, |
19917 | Fn: CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC |
19918 | : RISCV::CC_RISCV); |
19919 | |
19920 | // Check if it's really possible to do a tail call. |
19921 | if (IsTailCall) |
19922 | IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs); |
19923 | |
19924 | if (IsTailCall) |
19925 | ++NumTailCalls; |
19926 | else if (CLI.CB && CLI.CB->isMustTailCall()) |
19927 | report_fatal_error(reason: "failed to perform tail call elimination on a call " |
19928 | "site marked musttail" ); |
19929 | |
19930 | // Get a count of how many bytes are to be pushed on the stack. |
19931 | unsigned NumBytes = ArgCCInfo.getStackSize(); |
19932 | |
19933 | // Create local copies for byval args |
19934 | SmallVector<SDValue, 8> ByValArgs; |
19935 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
19936 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
19937 | if (!Flags.isByVal()) |
19938 | continue; |
19939 | |
19940 | SDValue Arg = OutVals[i]; |
19941 | unsigned Size = Flags.getByValSize(); |
19942 | Align Alignment = Flags.getNonZeroByValAlign(); |
19943 | |
19944 | int FI = |
19945 | MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/isSpillSlot: false); |
19946 | SDValue FIPtr = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout())); |
19947 | SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: XLenVT); |
19948 | |
19949 | Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FIPtr, Src: Arg, Size: SizeNode, Alignment, |
19950 | /*IsVolatile=*/isVol: false, |
19951 | /*AlwaysInline=*/false, /*CI*/ nullptr, OverrideTailCall: IsTailCall, |
19952 | DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo()); |
19953 | ByValArgs.push_back(Elt: FIPtr); |
19954 | } |
19955 | |
19956 | if (!IsTailCall) |
19957 | Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: CLI.DL); |
19958 | |
19959 | // Copy argument values to their designated locations. |
19960 | SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; |
19961 | SmallVector<SDValue, 8> MemOpChains; |
19962 | SDValue StackPtr; |
19963 | for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e; |
19964 | ++i, ++OutIdx) { |
19965 | CCValAssign &VA = ArgLocs[i]; |
19966 | SDValue ArgValue = OutVals[OutIdx]; |
19967 | ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags; |
19968 | |
19969 | // Handle passing f64 on RV32D with a soft float ABI as a special case. |
19970 | if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { |
19971 | assert(VA.isRegLoc() && "Expected register VA assignment" ); |
19972 | assert(VA.needsCustom()); |
19973 | SDValue SplitF64 = DAG.getNode( |
19974 | Opcode: RISCVISD::SplitF64, DL, VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: ArgValue); |
19975 | SDValue Lo = SplitF64.getValue(R: 0); |
19976 | SDValue Hi = SplitF64.getValue(R: 1); |
19977 | |
19978 | Register RegLo = VA.getLocReg(); |
19979 | RegsToPass.push_back(Elt: std::make_pair(x&: RegLo, y&: Lo)); |
19980 | |
19981 | // Get the CCValAssign for the Hi part. |
19982 | CCValAssign &HiVA = ArgLocs[++i]; |
19983 | |
19984 | if (HiVA.isMemLoc()) { |
19985 | // Second half of f64 is passed on the stack. |
19986 | if (!StackPtr.getNode()) |
19987 | StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: RISCV::X2, VT: PtrVT); |
19988 | SDValue Address = |
19989 | DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, |
19990 | N2: DAG.getIntPtrConstant(Val: HiVA.getLocMemOffset(), DL)); |
19991 | // Emit the store. |
19992 | MemOpChains.push_back( |
19993 | Elt: DAG.getStore(Chain, dl: DL, Val: Hi, Ptr: Address, PtrInfo: MachinePointerInfo())); |
19994 | } else { |
19995 | // Second half of f64 is passed in another GPR. |
19996 | Register RegHigh = HiVA.getLocReg(); |
19997 | RegsToPass.push_back(Elt: std::make_pair(x&: RegHigh, y&: Hi)); |
19998 | } |
19999 | continue; |
20000 | } |
20001 | |
20002 | // Promote the value if needed. |
20003 | // For now, only handle fully promoted and indirect arguments. |
20004 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
20005 | // Store the argument in a stack slot and pass its address. |
20006 | Align StackAlign = |
20007 | std::max(a: getPrefTypeAlign(VT: Outs[OutIdx].ArgVT, DAG), |
20008 | b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG)); |
20009 | TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); |
20010 | // If the original argument was split (e.g. i128), we need |
20011 | // to store the required parts of it here (and pass just one address). |
20012 | // Vectors may be partly split to registers and partly to the stack, in |
20013 | // which case the base address is partly offset and subsequent stores are |
20014 | // relative to that. |
20015 | unsigned ArgIndex = Outs[OutIdx].OrigArgIndex; |
20016 | unsigned ArgPartOffset = Outs[OutIdx].PartOffset; |
20017 | assert(VA.getValVT().isVector() || ArgPartOffset == 0); |
20018 | // Calculate the total size to store. We don't have access to what we're |
20019 | // actually storing other than performing the loop and collecting the |
20020 | // info. |
20021 | SmallVector<std::pair<SDValue, SDValue>> Parts; |
20022 | while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) { |
20023 | SDValue PartValue = OutVals[OutIdx + 1]; |
20024 | unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset; |
20025 | SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL); |
20026 | EVT PartVT = PartValue.getValueType(); |
20027 | if (PartVT.isScalableVector()) |
20028 | Offset = DAG.getNode(Opcode: ISD::VSCALE, DL, VT: XLenVT, Operand: Offset); |
20029 | StoredSize += PartVT.getStoreSize(); |
20030 | StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG)); |
20031 | Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset)); |
20032 | ++i; |
20033 | ++OutIdx; |
20034 | } |
20035 | SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign); |
20036 | int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex(); |
20037 | MemOpChains.push_back( |
20038 | Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot, |
20039 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI))); |
20040 | for (const auto &Part : Parts) { |
20041 | SDValue PartValue = Part.first; |
20042 | SDValue PartOffset = Part.second; |
20043 | SDValue Address = |
20044 | DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset); |
20045 | MemOpChains.push_back( |
20046 | Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address, |
20047 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI))); |
20048 | } |
20049 | ArgValue = SpillSlot; |
20050 | } else { |
20051 | ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL, Subtarget); |
20052 | } |
20053 | |
20054 | // Use local copy if it is a byval arg. |
20055 | if (Flags.isByVal()) |
20056 | ArgValue = ByValArgs[j++]; |
20057 | |
20058 | if (VA.isRegLoc()) { |
20059 | // Queue up the argument copies and emit them at the end. |
20060 | RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue)); |
20061 | } else { |
20062 | assert(VA.isMemLoc() && "Argument not register or memory" ); |
20063 | assert(!IsTailCall && "Tail call not allowed if stack is used " |
20064 | "for passing parameters" ); |
20065 | |
20066 | // Work out the address of the stack slot. |
20067 | if (!StackPtr.getNode()) |
20068 | StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: RISCV::X2, VT: PtrVT); |
20069 | SDValue Address = |
20070 | DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, |
20071 | N2: DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL)); |
20072 | |
20073 | // Emit the store. |
20074 | MemOpChains.push_back( |
20075 | Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address, PtrInfo: MachinePointerInfo())); |
20076 | } |
20077 | } |
20078 | |
20079 | // Join the stores, which are independent of one another. |
20080 | if (!MemOpChains.empty()) |
20081 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains); |
20082 | |
20083 | SDValue Glue; |
20084 | |
20085 | // Build a sequence of copy-to-reg nodes, chained and glued together. |
20086 | for (auto &Reg : RegsToPass) { |
20087 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue); |
20088 | Glue = Chain.getValue(R: 1); |
20089 | } |
20090 | |
20091 | // Validate that none of the argument registers have been marked as |
20092 | // reserved, if so report an error. Do the same for the return address if this |
20093 | // is not a tailcall. |
20094 | validateCCReservedRegs(Regs: RegsToPass, MF); |
20095 | if (!IsTailCall && |
20096 | MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(i: RISCV::X1)) |
20097 | MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported{ |
20098 | MF.getFunction(), |
20099 | "Return address register required, but has been reserved." }); |
20100 | |
20101 | // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a |
20102 | // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't |
20103 | // split it and then direct call can be matched by PseudoCALL. |
20104 | if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) { |
20105 | const GlobalValue *GV = S->getGlobal(); |
20106 | Callee = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: 0, TargetFlags: RISCVII::MO_CALL); |
20107 | } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) { |
20108 | Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: RISCVII::MO_CALL); |
20109 | } |
20110 | |
20111 | // The first call operand is the chain and the second is the target address. |
20112 | SmallVector<SDValue, 8> Ops; |
20113 | Ops.push_back(Elt: Chain); |
20114 | Ops.push_back(Elt: Callee); |
20115 | |
20116 | // Add argument registers to the end of the list so that they are |
20117 | // known live into the call. |
20118 | for (auto &Reg : RegsToPass) |
20119 | Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType())); |
20120 | |
20121 | if (!IsTailCall) { |
20122 | // Add a register mask operand representing the call-preserved registers. |
20123 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
20124 | const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); |
20125 | assert(Mask && "Missing call preserved mask for calling convention" ); |
20126 | Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask)); |
20127 | } |
20128 | |
20129 | // Glue the call to the argument copies, if any. |
20130 | if (Glue.getNode()) |
20131 | Ops.push_back(Elt: Glue); |
20132 | |
20133 | assert((!CLI.CFIType || CLI.CB->isIndirectCall()) && |
20134 | "Unexpected CFI type for a direct call" ); |
20135 | |
20136 | // Emit the call. |
20137 | SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue); |
20138 | |
20139 | if (IsTailCall) { |
20140 | MF.getFrameInfo().setHasTailCall(); |
20141 | SDValue Ret = DAG.getNode(Opcode: RISCVISD::TAIL, DL, VTList: NodeTys, Ops); |
20142 | if (CLI.CFIType) |
20143 | Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue()); |
20144 | DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge); |
20145 | return Ret; |
20146 | } |
20147 | |
20148 | Chain = DAG.getNode(Opcode: RISCVISD::CALL, DL, VTList: NodeTys, Ops); |
20149 | if (CLI.CFIType) |
20150 | Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); |
20151 | DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge); |
20152 | Glue = Chain.getValue(R: 1); |
20153 | |
20154 | // Mark the end of the call, which is glued to the call itself. |
20155 | Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue, DL); |
20156 | Glue = Chain.getValue(R: 1); |
20157 | |
20158 | // Assign locations to each value returned by this call. |
20159 | SmallVector<CCValAssign, 16> RVLocs; |
20160 | CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); |
20161 | analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /*IsRet=*/true, Fn: RISCV::CC_RISCV); |
20162 | |
20163 | // Copy all of the result registers out of their specified physreg. |
20164 | for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { |
20165 | auto &VA = RVLocs[i]; |
20166 | // Copy the value out |
20167 | SDValue RetValue = |
20168 | DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue); |
20169 | // Glue the RetValue to the end of the call sequence |
20170 | Chain = RetValue.getValue(R: 1); |
20171 | Glue = RetValue.getValue(R: 2); |
20172 | |
20173 | if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { |
20174 | assert(VA.needsCustom()); |
20175 | SDValue RetValue2 = DAG.getCopyFromReg(Chain, dl: DL, Reg: RVLocs[++i].getLocReg(), |
20176 | VT: MVT::i32, Glue); |
20177 | Chain = RetValue2.getValue(R: 1); |
20178 | Glue = RetValue2.getValue(R: 2); |
20179 | RetValue = DAG.getNode(Opcode: RISCVISD::BuildPairF64, DL, VT: MVT::f64, N1: RetValue, |
20180 | N2: RetValue2); |
20181 | } |
20182 | |
20183 | RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL, Subtarget); |
20184 | |
20185 | InVals.push_back(Elt: RetValue); |
20186 | } |
20187 | |
20188 | return Chain; |
20189 | } |
20190 | |
20191 | bool RISCVTargetLowering::CanLowerReturn( |
20192 | CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, |
20193 | const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { |
20194 | SmallVector<CCValAssign, 16> RVLocs; |
20195 | CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); |
20196 | |
20197 | RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)}; |
20198 | |
20199 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
20200 | MVT VT = Outs[i].VT; |
20201 | ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; |
20202 | RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); |
20203 | if (RISCV::CC_RISCV(DL: MF.getDataLayout(), ABI, ValNo: i, ValVT: VT, LocVT: VT, LocInfo: CCValAssign::Full, |
20204 | ArgFlags, State&: CCInfo, /*IsFixed=*/true, /*IsRet=*/true, |
20205 | OrigTy: nullptr, TLI: *this, RVVDispatcher&: Dispatcher)) |
20206 | return false; |
20207 | } |
20208 | return true; |
20209 | } |
20210 | |
20211 | SDValue |
20212 | RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, |
20213 | bool IsVarArg, |
20214 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
20215 | const SmallVectorImpl<SDValue> &OutVals, |
20216 | const SDLoc &DL, SelectionDAG &DAG) const { |
20217 | MachineFunction &MF = DAG.getMachineFunction(); |
20218 | const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); |
20219 | |
20220 | // Stores the assignment of the return value to a location. |
20221 | SmallVector<CCValAssign, 16> RVLocs; |
20222 | |
20223 | // Info about the registers and stack slot. |
20224 | CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, |
20225 | *DAG.getContext()); |
20226 | |
20227 | analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, |
20228 | CLI: nullptr, Fn: RISCV::CC_RISCV); |
20229 | |
20230 | if (CallConv == CallingConv::GHC && !RVLocs.empty()) |
20231 | report_fatal_error(reason: "GHC functions return void only" ); |
20232 | |
20233 | SDValue Glue; |
20234 | SmallVector<SDValue, 4> RetOps(1, Chain); |
20235 | |
20236 | // Copy the result values into the output registers. |
20237 | for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) { |
20238 | SDValue Val = OutVals[OutIdx]; |
20239 | CCValAssign &VA = RVLocs[i]; |
20240 | assert(VA.isRegLoc() && "Can only return in registers!" ); |
20241 | |
20242 | if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { |
20243 | // Handle returning f64 on RV32D with a soft float ABI. |
20244 | assert(VA.isRegLoc() && "Expected return via registers" ); |
20245 | assert(VA.needsCustom()); |
20246 | SDValue SplitF64 = DAG.getNode(Opcode: RISCVISD::SplitF64, DL, |
20247 | VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Val); |
20248 | SDValue Lo = SplitF64.getValue(R: 0); |
20249 | SDValue Hi = SplitF64.getValue(R: 1); |
20250 | Register RegLo = VA.getLocReg(); |
20251 | Register RegHi = RVLocs[++i].getLocReg(); |
20252 | |
20253 | if (STI.isRegisterReservedByUser(i: RegLo) || |
20254 | STI.isRegisterReservedByUser(i: RegHi)) |
20255 | MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported{ |
20256 | MF.getFunction(), |
20257 | "Return value register required, but has been reserved." }); |
20258 | |
20259 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegLo, N: Lo, Glue); |
20260 | Glue = Chain.getValue(R: 1); |
20261 | RetOps.push_back(Elt: DAG.getRegister(Reg: RegLo, VT: MVT::i32)); |
20262 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegHi, N: Hi, Glue); |
20263 | Glue = Chain.getValue(R: 1); |
20264 | RetOps.push_back(Elt: DAG.getRegister(Reg: RegHi, VT: MVT::i32)); |
20265 | } else { |
20266 | // Handle a 'normal' return. |
20267 | Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget); |
20268 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue); |
20269 | |
20270 | if (STI.isRegisterReservedByUser(i: VA.getLocReg())) |
20271 | MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported{ |
20272 | MF.getFunction(), |
20273 | "Return value register required, but has been reserved." }); |
20274 | |
20275 | // Guarantee that all emitted copies are stuck together. |
20276 | Glue = Chain.getValue(R: 1); |
20277 | RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT())); |
20278 | } |
20279 | } |
20280 | |
20281 | RetOps[0] = Chain; // Update chain. |
20282 | |
20283 | // Add the glue node if we have it. |
20284 | if (Glue.getNode()) { |
20285 | RetOps.push_back(Elt: Glue); |
20286 | } |
20287 | |
20288 | if (any_of(Range&: RVLocs, |
20289 | P: [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); })) |
20290 | MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); |
20291 | |
20292 | unsigned RetOpc = RISCVISD::RET_GLUE; |
20293 | // Interrupt service routines use different return instructions. |
20294 | const Function &Func = DAG.getMachineFunction().getFunction(); |
20295 | if (Func.hasFnAttribute(Kind: "interrupt" )) { |
20296 | if (!Func.getReturnType()->isVoidTy()) |
20297 | report_fatal_error( |
20298 | reason: "Functions with the interrupt attribute must have void return type!" ); |
20299 | |
20300 | MachineFunction &MF = DAG.getMachineFunction(); |
20301 | StringRef Kind = |
20302 | MF.getFunction().getFnAttribute(Kind: "interrupt" ).getValueAsString(); |
20303 | |
20304 | if (Kind == "supervisor" ) |
20305 | RetOpc = RISCVISD::SRET_GLUE; |
20306 | else |
20307 | RetOpc = RISCVISD::MRET_GLUE; |
20308 | } |
20309 | |
20310 | return DAG.getNode(Opcode: RetOpc, DL, VT: MVT::Other, Ops: RetOps); |
20311 | } |
20312 | |
20313 | void RISCVTargetLowering::validateCCReservedRegs( |
20314 | const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, |
20315 | MachineFunction &MF) const { |
20316 | const Function &F = MF.getFunction(); |
20317 | const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); |
20318 | |
20319 | if (llvm::any_of(Range: Regs, P: [&STI](auto Reg) { |
20320 | return STI.isRegisterReservedByUser(i: Reg.first); |
20321 | })) |
20322 | F.getContext().diagnose(DI: DiagnosticInfoUnsupported{ |
20323 | F, "Argument register required, but has been reserved." }); |
20324 | } |
20325 | |
20326 | // Check if the result of the node is only used as a return value, as |
20327 | // otherwise we can't perform a tail-call. |
20328 | bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { |
20329 | if (N->getNumValues() != 1) |
20330 | return false; |
20331 | if (!N->hasNUsesOfValue(NUses: 1, Value: 0)) |
20332 | return false; |
20333 | |
20334 | SDNode *Copy = *N->use_begin(); |
20335 | |
20336 | if (Copy->getOpcode() == ISD::BITCAST) { |
20337 | return isUsedByReturnOnly(N: Copy, Chain); |
20338 | } |
20339 | |
20340 | // TODO: Handle additional opcodes in order to support tail-calling libcalls |
20341 | // with soft float ABIs. |
20342 | if (Copy->getOpcode() != ISD::CopyToReg) { |
20343 | return false; |
20344 | } |
20345 | |
20346 | // If the ISD::CopyToReg has a glue operand, we conservatively assume it |
20347 | // isn't safe to perform a tail call. |
20348 | if (Copy->getOperand(Num: Copy->getNumOperands() - 1).getValueType() == MVT::Glue) |
20349 | return false; |
20350 | |
20351 | // The copy must be used by a RISCVISD::RET_GLUE, and nothing else. |
20352 | bool HasRet = false; |
20353 | for (SDNode *Node : Copy->uses()) { |
20354 | if (Node->getOpcode() != RISCVISD::RET_GLUE) |
20355 | return false; |
20356 | HasRet = true; |
20357 | } |
20358 | if (!HasRet) |
20359 | return false; |
20360 | |
20361 | Chain = Copy->getOperand(Num: 0); |
20362 | return true; |
20363 | } |
20364 | |
20365 | bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { |
20366 | return CI->isTailCall(); |
20367 | } |
20368 | |
20369 | const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { |
20370 | #define NODE_NAME_CASE(NODE) \ |
20371 | case RISCVISD::NODE: \ |
20372 | return "RISCVISD::" #NODE; |
20373 | // clang-format off |
20374 | switch ((RISCVISD::NodeType)Opcode) { |
20375 | case RISCVISD::FIRST_NUMBER: |
20376 | break; |
20377 | NODE_NAME_CASE(RET_GLUE) |
20378 | NODE_NAME_CASE(SRET_GLUE) |
20379 | NODE_NAME_CASE(MRET_GLUE) |
20380 | NODE_NAME_CASE(CALL) |
20381 | NODE_NAME_CASE(SELECT_CC) |
20382 | NODE_NAME_CASE(BR_CC) |
20383 | NODE_NAME_CASE(BuildPairF64) |
20384 | NODE_NAME_CASE(SplitF64) |
20385 | NODE_NAME_CASE(TAIL) |
20386 | NODE_NAME_CASE(ADD_LO) |
20387 | NODE_NAME_CASE(HI) |
20388 | NODE_NAME_CASE(LLA) |
20389 | NODE_NAME_CASE(ADD_TPREL) |
20390 | NODE_NAME_CASE(MULHSU) |
20391 | NODE_NAME_CASE(SHL_ADD) |
20392 | NODE_NAME_CASE(SLLW) |
20393 | NODE_NAME_CASE(SRAW) |
20394 | NODE_NAME_CASE(SRLW) |
20395 | NODE_NAME_CASE(DIVW) |
20396 | NODE_NAME_CASE(DIVUW) |
20397 | NODE_NAME_CASE(REMUW) |
20398 | NODE_NAME_CASE(ROLW) |
20399 | NODE_NAME_CASE(RORW) |
20400 | NODE_NAME_CASE(CLZW) |
20401 | NODE_NAME_CASE(CTZW) |
20402 | NODE_NAME_CASE(ABSW) |
20403 | NODE_NAME_CASE(FMV_H_X) |
20404 | NODE_NAME_CASE(FMV_X_ANYEXTH) |
20405 | NODE_NAME_CASE(FMV_X_SIGNEXTH) |
20406 | NODE_NAME_CASE(FMV_W_X_RV64) |
20407 | NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) |
20408 | NODE_NAME_CASE(FCVT_X) |
20409 | NODE_NAME_CASE(FCVT_XU) |
20410 | NODE_NAME_CASE(FCVT_W_RV64) |
20411 | NODE_NAME_CASE(FCVT_WU_RV64) |
20412 | NODE_NAME_CASE(STRICT_FCVT_W_RV64) |
20413 | NODE_NAME_CASE(STRICT_FCVT_WU_RV64) |
20414 | NODE_NAME_CASE(FP_ROUND_BF16) |
20415 | NODE_NAME_CASE(FP_EXTEND_BF16) |
20416 | NODE_NAME_CASE(FROUND) |
20417 | NODE_NAME_CASE(FCLASS) |
20418 | NODE_NAME_CASE(FMAX) |
20419 | NODE_NAME_CASE(FMIN) |
20420 | NODE_NAME_CASE(READ_COUNTER_WIDE) |
20421 | NODE_NAME_CASE(BREV8) |
20422 | NODE_NAME_CASE(ORC_B) |
20423 | NODE_NAME_CASE(ZIP) |
20424 | NODE_NAME_CASE(UNZIP) |
20425 | NODE_NAME_CASE(CLMUL) |
20426 | NODE_NAME_CASE(CLMULH) |
20427 | NODE_NAME_CASE(CLMULR) |
20428 | NODE_NAME_CASE(MOPR) |
20429 | NODE_NAME_CASE(MOPRR) |
20430 | NODE_NAME_CASE(SHA256SIG0) |
20431 | NODE_NAME_CASE(SHA256SIG1) |
20432 | NODE_NAME_CASE(SHA256SUM0) |
20433 | NODE_NAME_CASE(SHA256SUM1) |
20434 | NODE_NAME_CASE(SM4KS) |
20435 | NODE_NAME_CASE(SM4ED) |
20436 | NODE_NAME_CASE(SM3P0) |
20437 | NODE_NAME_CASE(SM3P1) |
20438 | NODE_NAME_CASE(TH_LWD) |
20439 | NODE_NAME_CASE(TH_LWUD) |
20440 | NODE_NAME_CASE(TH_LDD) |
20441 | NODE_NAME_CASE(TH_SWD) |
20442 | NODE_NAME_CASE(TH_SDD) |
20443 | NODE_NAME_CASE(VMV_V_V_VL) |
20444 | NODE_NAME_CASE(VMV_V_X_VL) |
20445 | NODE_NAME_CASE(VFMV_V_F_VL) |
20446 | NODE_NAME_CASE(VMV_X_S) |
20447 | NODE_NAME_CASE(VMV_S_X_VL) |
20448 | NODE_NAME_CASE(VFMV_S_F_VL) |
20449 | NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL) |
20450 | NODE_NAME_CASE(READ_VLENB) |
20451 | NODE_NAME_CASE(TRUNCATE_VECTOR_VL) |
20452 | NODE_NAME_CASE(VSLIDEUP_VL) |
20453 | NODE_NAME_CASE(VSLIDE1UP_VL) |
20454 | NODE_NAME_CASE(VSLIDEDOWN_VL) |
20455 | NODE_NAME_CASE(VSLIDE1DOWN_VL) |
20456 | NODE_NAME_CASE(VFSLIDE1UP_VL) |
20457 | NODE_NAME_CASE(VFSLIDE1DOWN_VL) |
20458 | NODE_NAME_CASE(VID_VL) |
20459 | NODE_NAME_CASE(VFNCVT_ROD_VL) |
20460 | NODE_NAME_CASE(VECREDUCE_ADD_VL) |
20461 | NODE_NAME_CASE(VECREDUCE_UMAX_VL) |
20462 | NODE_NAME_CASE(VECREDUCE_SMAX_VL) |
20463 | NODE_NAME_CASE(VECREDUCE_UMIN_VL) |
20464 | NODE_NAME_CASE(VECREDUCE_SMIN_VL) |
20465 | NODE_NAME_CASE(VECREDUCE_AND_VL) |
20466 | NODE_NAME_CASE(VECREDUCE_OR_VL) |
20467 | NODE_NAME_CASE(VECREDUCE_XOR_VL) |
20468 | NODE_NAME_CASE(VECREDUCE_FADD_VL) |
20469 | NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL) |
20470 | NODE_NAME_CASE(VECREDUCE_FMIN_VL) |
20471 | NODE_NAME_CASE(VECREDUCE_FMAX_VL) |
20472 | NODE_NAME_CASE(ADD_VL) |
20473 | NODE_NAME_CASE(AND_VL) |
20474 | NODE_NAME_CASE(MUL_VL) |
20475 | NODE_NAME_CASE(OR_VL) |
20476 | NODE_NAME_CASE(SDIV_VL) |
20477 | NODE_NAME_CASE(SHL_VL) |
20478 | NODE_NAME_CASE(SREM_VL) |
20479 | NODE_NAME_CASE(SRA_VL) |
20480 | NODE_NAME_CASE(SRL_VL) |
20481 | NODE_NAME_CASE(ROTL_VL) |
20482 | NODE_NAME_CASE(ROTR_VL) |
20483 | NODE_NAME_CASE(SUB_VL) |
20484 | NODE_NAME_CASE(UDIV_VL) |
20485 | NODE_NAME_CASE(UREM_VL) |
20486 | NODE_NAME_CASE(XOR_VL) |
20487 | NODE_NAME_CASE(AVGFLOORS_VL) |
20488 | NODE_NAME_CASE(AVGFLOORU_VL) |
20489 | NODE_NAME_CASE(AVGCEILS_VL) |
20490 | NODE_NAME_CASE(AVGCEILU_VL) |
20491 | NODE_NAME_CASE(SADDSAT_VL) |
20492 | NODE_NAME_CASE(UADDSAT_VL) |
20493 | NODE_NAME_CASE(SSUBSAT_VL) |
20494 | NODE_NAME_CASE(USUBSAT_VL) |
20495 | NODE_NAME_CASE(VNCLIP_VL) |
20496 | NODE_NAME_CASE(VNCLIPU_VL) |
20497 | NODE_NAME_CASE(FADD_VL) |
20498 | NODE_NAME_CASE(FSUB_VL) |
20499 | NODE_NAME_CASE(FMUL_VL) |
20500 | NODE_NAME_CASE(FDIV_VL) |
20501 | NODE_NAME_CASE(FNEG_VL) |
20502 | NODE_NAME_CASE(FABS_VL) |
20503 | NODE_NAME_CASE(FSQRT_VL) |
20504 | NODE_NAME_CASE(FCLASS_VL) |
20505 | NODE_NAME_CASE(VFMADD_VL) |
20506 | NODE_NAME_CASE(VFNMADD_VL) |
20507 | NODE_NAME_CASE(VFMSUB_VL) |
20508 | NODE_NAME_CASE(VFNMSUB_VL) |
20509 | NODE_NAME_CASE(VFWMADD_VL) |
20510 | NODE_NAME_CASE(VFWNMADD_VL) |
20511 | NODE_NAME_CASE(VFWMSUB_VL) |
20512 | NODE_NAME_CASE(VFWNMSUB_VL) |
20513 | NODE_NAME_CASE(FCOPYSIGN_VL) |
20514 | NODE_NAME_CASE(SMIN_VL) |
20515 | NODE_NAME_CASE(SMAX_VL) |
20516 | NODE_NAME_CASE(UMIN_VL) |
20517 | NODE_NAME_CASE(UMAX_VL) |
20518 | NODE_NAME_CASE(BITREVERSE_VL) |
20519 | NODE_NAME_CASE(BSWAP_VL) |
20520 | NODE_NAME_CASE(CTLZ_VL) |
20521 | NODE_NAME_CASE(CTTZ_VL) |
20522 | NODE_NAME_CASE(CTPOP_VL) |
20523 | NODE_NAME_CASE(VFMIN_VL) |
20524 | NODE_NAME_CASE(VFMAX_VL) |
20525 | NODE_NAME_CASE(MULHS_VL) |
20526 | NODE_NAME_CASE(MULHU_VL) |
20527 | NODE_NAME_CASE(VFCVT_RTZ_X_F_VL) |
20528 | NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL) |
20529 | NODE_NAME_CASE(VFCVT_RM_X_F_VL) |
20530 | NODE_NAME_CASE(VFCVT_RM_XU_F_VL) |
20531 | NODE_NAME_CASE(VFCVT_X_F_VL) |
20532 | NODE_NAME_CASE(VFCVT_XU_F_VL) |
20533 | NODE_NAME_CASE(VFROUND_NOEXCEPT_VL) |
20534 | NODE_NAME_CASE(SINT_TO_FP_VL) |
20535 | NODE_NAME_CASE(UINT_TO_FP_VL) |
20536 | NODE_NAME_CASE(VFCVT_RM_F_XU_VL) |
20537 | NODE_NAME_CASE(VFCVT_RM_F_X_VL) |
20538 | NODE_NAME_CASE(FP_EXTEND_VL) |
20539 | NODE_NAME_CASE(FP_ROUND_VL) |
20540 | NODE_NAME_CASE(STRICT_FADD_VL) |
20541 | NODE_NAME_CASE(STRICT_FSUB_VL) |
20542 | NODE_NAME_CASE(STRICT_FMUL_VL) |
20543 | NODE_NAME_CASE(STRICT_FDIV_VL) |
20544 | NODE_NAME_CASE(STRICT_FSQRT_VL) |
20545 | NODE_NAME_CASE(STRICT_VFMADD_VL) |
20546 | NODE_NAME_CASE(STRICT_VFNMADD_VL) |
20547 | NODE_NAME_CASE(STRICT_VFMSUB_VL) |
20548 | NODE_NAME_CASE(STRICT_VFNMSUB_VL) |
20549 | NODE_NAME_CASE(STRICT_FP_ROUND_VL) |
20550 | NODE_NAME_CASE(STRICT_FP_EXTEND_VL) |
20551 | NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL) |
20552 | NODE_NAME_CASE(STRICT_SINT_TO_FP_VL) |
20553 | NODE_NAME_CASE(STRICT_UINT_TO_FP_VL) |
20554 | NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL) |
20555 | NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL) |
20556 | NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL) |
20557 | NODE_NAME_CASE(STRICT_FSETCC_VL) |
20558 | NODE_NAME_CASE(STRICT_FSETCCS_VL) |
20559 | NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL) |
20560 | NODE_NAME_CASE(VWMUL_VL) |
20561 | NODE_NAME_CASE(VWMULU_VL) |
20562 | NODE_NAME_CASE(VWMULSU_VL) |
20563 | NODE_NAME_CASE(VWADD_VL) |
20564 | NODE_NAME_CASE(VWADDU_VL) |
20565 | NODE_NAME_CASE(VWSUB_VL) |
20566 | NODE_NAME_CASE(VWSUBU_VL) |
20567 | NODE_NAME_CASE(VWADD_W_VL) |
20568 | NODE_NAME_CASE(VWADDU_W_VL) |
20569 | NODE_NAME_CASE(VWSUB_W_VL) |
20570 | NODE_NAME_CASE(VWSUBU_W_VL) |
20571 | NODE_NAME_CASE(VWSLL_VL) |
20572 | NODE_NAME_CASE(VFWMUL_VL) |
20573 | NODE_NAME_CASE(VFWADD_VL) |
20574 | NODE_NAME_CASE(VFWSUB_VL) |
20575 | NODE_NAME_CASE(VFWADD_W_VL) |
20576 | NODE_NAME_CASE(VFWSUB_W_VL) |
20577 | NODE_NAME_CASE(VWMACC_VL) |
20578 | NODE_NAME_CASE(VWMACCU_VL) |
20579 | NODE_NAME_CASE(VWMACCSU_VL) |
20580 | NODE_NAME_CASE(VNSRL_VL) |
20581 | NODE_NAME_CASE(SETCC_VL) |
20582 | NODE_NAME_CASE(VMERGE_VL) |
20583 | NODE_NAME_CASE(VMAND_VL) |
20584 | NODE_NAME_CASE(VMOR_VL) |
20585 | NODE_NAME_CASE(VMXOR_VL) |
20586 | NODE_NAME_CASE(VMCLR_VL) |
20587 | NODE_NAME_CASE(VMSET_VL) |
20588 | NODE_NAME_CASE(VRGATHER_VX_VL) |
20589 | NODE_NAME_CASE(VRGATHER_VV_VL) |
20590 | NODE_NAME_CASE(VRGATHEREI16_VV_VL) |
20591 | NODE_NAME_CASE(VSEXT_VL) |
20592 | NODE_NAME_CASE(VZEXT_VL) |
20593 | NODE_NAME_CASE(VCPOP_VL) |
20594 | NODE_NAME_CASE(VFIRST_VL) |
20595 | NODE_NAME_CASE(READ_CSR) |
20596 | NODE_NAME_CASE(WRITE_CSR) |
20597 | NODE_NAME_CASE(SWAP_CSR) |
20598 | NODE_NAME_CASE(CZERO_EQZ) |
20599 | NODE_NAME_CASE(CZERO_NEZ) |
20600 | NODE_NAME_CASE(SW_GUARDED_BRIND) |
20601 | NODE_NAME_CASE(SF_VC_XV_SE) |
20602 | NODE_NAME_CASE(SF_VC_IV_SE) |
20603 | NODE_NAME_CASE(SF_VC_VV_SE) |
20604 | NODE_NAME_CASE(SF_VC_FV_SE) |
20605 | NODE_NAME_CASE(SF_VC_XVV_SE) |
20606 | NODE_NAME_CASE(SF_VC_IVV_SE) |
20607 | NODE_NAME_CASE(SF_VC_VVV_SE) |
20608 | NODE_NAME_CASE(SF_VC_FVV_SE) |
20609 | NODE_NAME_CASE(SF_VC_XVW_SE) |
20610 | NODE_NAME_CASE(SF_VC_IVW_SE) |
20611 | NODE_NAME_CASE(SF_VC_VVW_SE) |
20612 | NODE_NAME_CASE(SF_VC_FVW_SE) |
20613 | NODE_NAME_CASE(SF_VC_V_X_SE) |
20614 | NODE_NAME_CASE(SF_VC_V_I_SE) |
20615 | NODE_NAME_CASE(SF_VC_V_XV_SE) |
20616 | NODE_NAME_CASE(SF_VC_V_IV_SE) |
20617 | NODE_NAME_CASE(SF_VC_V_VV_SE) |
20618 | NODE_NAME_CASE(SF_VC_V_FV_SE) |
20619 | NODE_NAME_CASE(SF_VC_V_XVV_SE) |
20620 | NODE_NAME_CASE(SF_VC_V_IVV_SE) |
20621 | NODE_NAME_CASE(SF_VC_V_VVV_SE) |
20622 | NODE_NAME_CASE(SF_VC_V_FVV_SE) |
20623 | NODE_NAME_CASE(SF_VC_V_XVW_SE) |
20624 | NODE_NAME_CASE(SF_VC_V_IVW_SE) |
20625 | NODE_NAME_CASE(SF_VC_V_VVW_SE) |
20626 | NODE_NAME_CASE(SF_VC_V_FVW_SE) |
20627 | } |
20628 | // clang-format on |
20629 | return nullptr; |
20630 | #undef NODE_NAME_CASE |
20631 | } |
20632 | |
20633 | /// getConstraintType - Given a constraint letter, return the type of |
20634 | /// constraint it is for this target. |
20635 | RISCVTargetLowering::ConstraintType |
20636 | RISCVTargetLowering::getConstraintType(StringRef Constraint) const { |
20637 | if (Constraint.size() == 1) { |
20638 | switch (Constraint[0]) { |
20639 | default: |
20640 | break; |
20641 | case 'f': |
20642 | return C_RegisterClass; |
20643 | case 'I': |
20644 | case 'J': |
20645 | case 'K': |
20646 | return C_Immediate; |
20647 | case 'A': |
20648 | return C_Memory; |
20649 | case 's': |
20650 | case 'S': // A symbolic address |
20651 | return C_Other; |
20652 | } |
20653 | } else { |
20654 | if (Constraint == "vr" || Constraint == "vm" ) |
20655 | return C_RegisterClass; |
20656 | } |
20657 | return TargetLowering::getConstraintType(Constraint); |
20658 | } |
20659 | |
20660 | std::pair<unsigned, const TargetRegisterClass *> |
20661 | RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
20662 | StringRef Constraint, |
20663 | MVT VT) const { |
20664 | // First, see if this is a constraint that directly corresponds to a RISC-V |
20665 | // register class. |
20666 | if (Constraint.size() == 1) { |
20667 | switch (Constraint[0]) { |
20668 | case 'r': |
20669 | // TODO: Support fixed vectors up to XLen for P extension? |
20670 | if (VT.isVector()) |
20671 | break; |
20672 | if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin()) |
20673 | return std::make_pair(x: 0U, y: &RISCV::GPRF16RegClass); |
20674 | if (VT == MVT::f32 && Subtarget.hasStdExtZfinx()) |
20675 | return std::make_pair(x: 0U, y: &RISCV::GPRF32RegClass); |
20676 | if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit()) |
20677 | return std::make_pair(x: 0U, y: &RISCV::GPRPairRegClass); |
20678 | return std::make_pair(x: 0U, y: &RISCV::GPRNoX0RegClass); |
20679 | case 'f': |
20680 | if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) |
20681 | return std::make_pair(x: 0U, y: &RISCV::FPR16RegClass); |
20682 | if (Subtarget.hasStdExtF() && VT == MVT::f32) |
20683 | return std::make_pair(x: 0U, y: &RISCV::FPR32RegClass); |
20684 | if (Subtarget.hasStdExtD() && VT == MVT::f64) |
20685 | return std::make_pair(x: 0U, y: &RISCV::FPR64RegClass); |
20686 | break; |
20687 | default: |
20688 | break; |
20689 | } |
20690 | } else if (Constraint == "vr" ) { |
20691 | for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass, |
20692 | &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { |
20693 | if (TRI->isTypeLegalForClass(RC: *RC, T: VT.SimpleTy)) |
20694 | return std::make_pair(x: 0U, y&: RC); |
20695 | } |
20696 | } else if (Constraint == "vm" ) { |
20697 | if (TRI->isTypeLegalForClass(RC: RISCV::VMV0RegClass, T: VT.SimpleTy)) |
20698 | return std::make_pair(x: 0U, y: &RISCV::VMV0RegClass); |
20699 | } |
20700 | |
20701 | // Clang will correctly decode the usage of register name aliases into their |
20702 | // official names. However, other frontends like `rustc` do not. This allows |
20703 | // users of these frontends to use the ABI names for registers in LLVM-style |
20704 | // register constraints. |
20705 | unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) |
20706 | .Case(S: "{zero}" , Value: RISCV::X0) |
20707 | .Case(S: "{ra}" , Value: RISCV::X1) |
20708 | .Case(S: "{sp}" , Value: RISCV::X2) |
20709 | .Case(S: "{gp}" , Value: RISCV::X3) |
20710 | .Case(S: "{tp}" , Value: RISCV::X4) |
20711 | .Case(S: "{t0}" , Value: RISCV::X5) |
20712 | .Case(S: "{t1}" , Value: RISCV::X6) |
20713 | .Case(S: "{t2}" , Value: RISCV::X7) |
20714 | .Cases(S0: "{s0}" , S1: "{fp}" , Value: RISCV::X8) |
20715 | .Case(S: "{s1}" , Value: RISCV::X9) |
20716 | .Case(S: "{a0}" , Value: RISCV::X10) |
20717 | .Case(S: "{a1}" , Value: RISCV::X11) |
20718 | .Case(S: "{a2}" , Value: RISCV::X12) |
20719 | .Case(S: "{a3}" , Value: RISCV::X13) |
20720 | .Case(S: "{a4}" , Value: RISCV::X14) |
20721 | .Case(S: "{a5}" , Value: RISCV::X15) |
20722 | .Case(S: "{a6}" , Value: RISCV::X16) |
20723 | .Case(S: "{a7}" , Value: RISCV::X17) |
20724 | .Case(S: "{s2}" , Value: RISCV::X18) |
20725 | .Case(S: "{s3}" , Value: RISCV::X19) |
20726 | .Case(S: "{s4}" , Value: RISCV::X20) |
20727 | .Case(S: "{s5}" , Value: RISCV::X21) |
20728 | .Case(S: "{s6}" , Value: RISCV::X22) |
20729 | .Case(S: "{s7}" , Value: RISCV::X23) |
20730 | .Case(S: "{s8}" , Value: RISCV::X24) |
20731 | .Case(S: "{s9}" , Value: RISCV::X25) |
20732 | .Case(S: "{s10}" , Value: RISCV::X26) |
20733 | .Case(S: "{s11}" , Value: RISCV::X27) |
20734 | .Case(S: "{t3}" , Value: RISCV::X28) |
20735 | .Case(S: "{t4}" , Value: RISCV::X29) |
20736 | .Case(S: "{t5}" , Value: RISCV::X30) |
20737 | .Case(S: "{t6}" , Value: RISCV::X31) |
20738 | .Default(Value: RISCV::NoRegister); |
20739 | if (XRegFromAlias != RISCV::NoRegister) |
20740 | return std::make_pair(x&: XRegFromAlias, y: &RISCV::GPRRegClass); |
20741 | |
20742 | // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the |
20743 | // TableGen record rather than the AsmName to choose registers for InlineAsm |
20744 | // constraints, plus we want to match those names to the widest floating point |
20745 | // register type available, manually select floating point registers here. |
20746 | // |
20747 | // The second case is the ABI name of the register, so that frontends can also |
20748 | // use the ABI names in register constraint lists. |
20749 | if (Subtarget.hasStdExtF()) { |
20750 | unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) |
20751 | .Cases(S0: "{f0}" , S1: "{ft0}" , Value: RISCV::F0_F) |
20752 | .Cases(S0: "{f1}" , S1: "{ft1}" , Value: RISCV::F1_F) |
20753 | .Cases(S0: "{f2}" , S1: "{ft2}" , Value: RISCV::F2_F) |
20754 | .Cases(S0: "{f3}" , S1: "{ft3}" , Value: RISCV::F3_F) |
20755 | .Cases(S0: "{f4}" , S1: "{ft4}" , Value: RISCV::F4_F) |
20756 | .Cases(S0: "{f5}" , S1: "{ft5}" , Value: RISCV::F5_F) |
20757 | .Cases(S0: "{f6}" , S1: "{ft6}" , Value: RISCV::F6_F) |
20758 | .Cases(S0: "{f7}" , S1: "{ft7}" , Value: RISCV::F7_F) |
20759 | .Cases(S0: "{f8}" , S1: "{fs0}" , Value: RISCV::F8_F) |
20760 | .Cases(S0: "{f9}" , S1: "{fs1}" , Value: RISCV::F9_F) |
20761 | .Cases(S0: "{f10}" , S1: "{fa0}" , Value: RISCV::F10_F) |
20762 | .Cases(S0: "{f11}" , S1: "{fa1}" , Value: RISCV::F11_F) |
20763 | .Cases(S0: "{f12}" , S1: "{fa2}" , Value: RISCV::F12_F) |
20764 | .Cases(S0: "{f13}" , S1: "{fa3}" , Value: RISCV::F13_F) |
20765 | .Cases(S0: "{f14}" , S1: "{fa4}" , Value: RISCV::F14_F) |
20766 | .Cases(S0: "{f15}" , S1: "{fa5}" , Value: RISCV::F15_F) |
20767 | .Cases(S0: "{f16}" , S1: "{fa6}" , Value: RISCV::F16_F) |
20768 | .Cases(S0: "{f17}" , S1: "{fa7}" , Value: RISCV::F17_F) |
20769 | .Cases(S0: "{f18}" , S1: "{fs2}" , Value: RISCV::F18_F) |
20770 | .Cases(S0: "{f19}" , S1: "{fs3}" , Value: RISCV::F19_F) |
20771 | .Cases(S0: "{f20}" , S1: "{fs4}" , Value: RISCV::F20_F) |
20772 | .Cases(S0: "{f21}" , S1: "{fs5}" , Value: RISCV::F21_F) |
20773 | .Cases(S0: "{f22}" , S1: "{fs6}" , Value: RISCV::F22_F) |
20774 | .Cases(S0: "{f23}" , S1: "{fs7}" , Value: RISCV::F23_F) |
20775 | .Cases(S0: "{f24}" , S1: "{fs8}" , Value: RISCV::F24_F) |
20776 | .Cases(S0: "{f25}" , S1: "{fs9}" , Value: RISCV::F25_F) |
20777 | .Cases(S0: "{f26}" , S1: "{fs10}" , Value: RISCV::F26_F) |
20778 | .Cases(S0: "{f27}" , S1: "{fs11}" , Value: RISCV::F27_F) |
20779 | .Cases(S0: "{f28}" , S1: "{ft8}" , Value: RISCV::F28_F) |
20780 | .Cases(S0: "{f29}" , S1: "{ft9}" , Value: RISCV::F29_F) |
20781 | .Cases(S0: "{f30}" , S1: "{ft10}" , Value: RISCV::F30_F) |
20782 | .Cases(S0: "{f31}" , S1: "{ft11}" , Value: RISCV::F31_F) |
20783 | .Default(Value: RISCV::NoRegister); |
20784 | if (FReg != RISCV::NoRegister) { |
20785 | assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg" ); |
20786 | if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) { |
20787 | unsigned RegNo = FReg - RISCV::F0_F; |
20788 | unsigned DReg = RISCV::F0_D + RegNo; |
20789 | return std::make_pair(x&: DReg, y: &RISCV::FPR64RegClass); |
20790 | } |
20791 | if (VT == MVT::f32 || VT == MVT::Other) |
20792 | return std::make_pair(x&: FReg, y: &RISCV::FPR32RegClass); |
20793 | if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) { |
20794 | unsigned RegNo = FReg - RISCV::F0_F; |
20795 | unsigned HReg = RISCV::F0_H + RegNo; |
20796 | return std::make_pair(x&: HReg, y: &RISCV::FPR16RegClass); |
20797 | } |
20798 | } |
20799 | } |
20800 | |
20801 | if (Subtarget.hasVInstructions()) { |
20802 | Register VReg = StringSwitch<Register>(Constraint.lower()) |
20803 | .Case(S: "{v0}" , Value: RISCV::V0) |
20804 | .Case(S: "{v1}" , Value: RISCV::V1) |
20805 | .Case(S: "{v2}" , Value: RISCV::V2) |
20806 | .Case(S: "{v3}" , Value: RISCV::V3) |
20807 | .Case(S: "{v4}" , Value: RISCV::V4) |
20808 | .Case(S: "{v5}" , Value: RISCV::V5) |
20809 | .Case(S: "{v6}" , Value: RISCV::V6) |
20810 | .Case(S: "{v7}" , Value: RISCV::V7) |
20811 | .Case(S: "{v8}" , Value: RISCV::V8) |
20812 | .Case(S: "{v9}" , Value: RISCV::V9) |
20813 | .Case(S: "{v10}" , Value: RISCV::V10) |
20814 | .Case(S: "{v11}" , Value: RISCV::V11) |
20815 | .Case(S: "{v12}" , Value: RISCV::V12) |
20816 | .Case(S: "{v13}" , Value: RISCV::V13) |
20817 | .Case(S: "{v14}" , Value: RISCV::V14) |
20818 | .Case(S: "{v15}" , Value: RISCV::V15) |
20819 | .Case(S: "{v16}" , Value: RISCV::V16) |
20820 | .Case(S: "{v17}" , Value: RISCV::V17) |
20821 | .Case(S: "{v18}" , Value: RISCV::V18) |
20822 | .Case(S: "{v19}" , Value: RISCV::V19) |
20823 | .Case(S: "{v20}" , Value: RISCV::V20) |
20824 | .Case(S: "{v21}" , Value: RISCV::V21) |
20825 | .Case(S: "{v22}" , Value: RISCV::V22) |
20826 | .Case(S: "{v23}" , Value: RISCV::V23) |
20827 | .Case(S: "{v24}" , Value: RISCV::V24) |
20828 | .Case(S: "{v25}" , Value: RISCV::V25) |
20829 | .Case(S: "{v26}" , Value: RISCV::V26) |
20830 | .Case(S: "{v27}" , Value: RISCV::V27) |
20831 | .Case(S: "{v28}" , Value: RISCV::V28) |
20832 | .Case(S: "{v29}" , Value: RISCV::V29) |
20833 | .Case(S: "{v30}" , Value: RISCV::V30) |
20834 | .Case(S: "{v31}" , Value: RISCV::V31) |
20835 | .Default(Value: RISCV::NoRegister); |
20836 | if (VReg != RISCV::NoRegister) { |
20837 | if (TRI->isTypeLegalForClass(RC: RISCV::VMRegClass, T: VT.SimpleTy)) |
20838 | return std::make_pair(x&: VReg, y: &RISCV::VMRegClass); |
20839 | if (TRI->isTypeLegalForClass(RC: RISCV::VRRegClass, T: VT.SimpleTy)) |
20840 | return std::make_pair(x&: VReg, y: &RISCV::VRRegClass); |
20841 | for (const auto *RC : |
20842 | {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { |
20843 | if (TRI->isTypeLegalForClass(RC: *RC, T: VT.SimpleTy)) { |
20844 | VReg = TRI->getMatchingSuperReg(Reg: VReg, SubIdx: RISCV::sub_vrm1_0, RC); |
20845 | return std::make_pair(x&: VReg, y&: RC); |
20846 | } |
20847 | } |
20848 | } |
20849 | } |
20850 | |
20851 | std::pair<Register, const TargetRegisterClass *> Res = |
20852 | TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); |
20853 | |
20854 | // If we picked one of the Zfinx register classes, remap it to the GPR class. |
20855 | // FIXME: When Zfinx is supported in CodeGen this will need to take the |
20856 | // Subtarget into account. |
20857 | if (Res.second == &RISCV::GPRF16RegClass || |
20858 | Res.second == &RISCV::GPRF32RegClass || |
20859 | Res.second == &RISCV::GPRPairRegClass) |
20860 | return std::make_pair(x&: Res.first, y: &RISCV::GPRRegClass); |
20861 | |
20862 | return Res; |
20863 | } |
20864 | |
20865 | InlineAsm::ConstraintCode |
20866 | RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { |
20867 | // Currently only support length 1 constraints. |
20868 | if (ConstraintCode.size() == 1) { |
20869 | switch (ConstraintCode[0]) { |
20870 | case 'A': |
20871 | return InlineAsm::ConstraintCode::A; |
20872 | default: |
20873 | break; |
20874 | } |
20875 | } |
20876 | |
20877 | return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); |
20878 | } |
20879 | |
20880 | void RISCVTargetLowering::LowerAsmOperandForConstraint( |
20881 | SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, |
20882 | SelectionDAG &DAG) const { |
20883 | // Currently only support length 1 constraints. |
20884 | if (Constraint.size() == 1) { |
20885 | switch (Constraint[0]) { |
20886 | case 'I': |
20887 | // Validate & create a 12-bit signed immediate operand. |
20888 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) { |
20889 | uint64_t CVal = C->getSExtValue(); |
20890 | if (isInt<12>(x: CVal)) |
20891 | Ops.push_back( |
20892 | x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getXLenVT())); |
20893 | } |
20894 | return; |
20895 | case 'J': |
20896 | // Validate & create an integer zero operand. |
20897 | if (isNullConstant(V: Op)) |
20898 | Ops.push_back( |
20899 | x: DAG.getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget.getXLenVT())); |
20900 | return; |
20901 | case 'K': |
20902 | // Validate & create a 5-bit unsigned immediate operand. |
20903 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) { |
20904 | uint64_t CVal = C->getZExtValue(); |
20905 | if (isUInt<5>(x: CVal)) |
20906 | Ops.push_back( |
20907 | x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getXLenVT())); |
20908 | } |
20909 | return; |
20910 | case 'S': |
20911 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint: "s" , Ops, DAG); |
20912 | return; |
20913 | default: |
20914 | break; |
20915 | } |
20916 | } |
20917 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); |
20918 | } |
20919 | |
20920 | Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder, |
20921 | Instruction *Inst, |
20922 | AtomicOrdering Ord) const { |
20923 | if (Subtarget.hasStdExtZtso()) { |
20924 | if (isa<LoadInst>(Val: Inst) && Ord == AtomicOrdering::SequentiallyConsistent) |
20925 | return Builder.CreateFence(Ordering: Ord); |
20926 | return nullptr; |
20927 | } |
20928 | |
20929 | if (isa<LoadInst>(Val: Inst) && Ord == AtomicOrdering::SequentiallyConsistent) |
20930 | return Builder.CreateFence(Ordering: Ord); |
20931 | if (isa<StoreInst>(Val: Inst) && isReleaseOrStronger(AO: Ord)) |
20932 | return Builder.CreateFence(Ordering: AtomicOrdering::Release); |
20933 | return nullptr; |
20934 | } |
20935 | |
20936 | Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder, |
20937 | Instruction *Inst, |
20938 | AtomicOrdering Ord) const { |
20939 | if (Subtarget.hasStdExtZtso()) { |
20940 | if (isa<StoreInst>(Val: Inst) && Ord == AtomicOrdering::SequentiallyConsistent) |
20941 | return Builder.CreateFence(Ordering: Ord); |
20942 | return nullptr; |
20943 | } |
20944 | |
20945 | if (isa<LoadInst>(Val: Inst) && isAcquireOrStronger(AO: Ord)) |
20946 | return Builder.CreateFence(Ordering: AtomicOrdering::Acquire); |
20947 | if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Val: Inst) && |
20948 | Ord == AtomicOrdering::SequentiallyConsistent) |
20949 | return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent); |
20950 | return nullptr; |
20951 | } |
20952 | |
20953 | TargetLowering::AtomicExpansionKind |
20954 | RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { |
20955 | // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating |
20956 | // point operations can't be used in an lr/sc sequence without breaking the |
20957 | // forward-progress guarantee. |
20958 | if (AI->isFloatingPointOperation() || |
20959 | AI->getOperation() == AtomicRMWInst::UIncWrap || |
20960 | AI->getOperation() == AtomicRMWInst::UDecWrap) |
20961 | return AtomicExpansionKind::CmpXChg; |
20962 | |
20963 | // Don't expand forced atomics, we want to have __sync libcalls instead. |
20964 | if (Subtarget.hasForcedAtomics()) |
20965 | return AtomicExpansionKind::None; |
20966 | |
20967 | unsigned Size = AI->getType()->getPrimitiveSizeInBits(); |
20968 | if (AI->getOperation() == AtomicRMWInst::Nand) { |
20969 | if (Subtarget.hasStdExtZacas() && |
20970 | (Size >= 32 || Subtarget.hasStdExtZabha())) |
20971 | return AtomicExpansionKind::CmpXChg; |
20972 | if (Size < 32) |
20973 | return AtomicExpansionKind::MaskedIntrinsic; |
20974 | } |
20975 | |
20976 | if (Size < 32 && !Subtarget.hasStdExtZabha()) |
20977 | return AtomicExpansionKind::MaskedIntrinsic; |
20978 | |
20979 | return AtomicExpansionKind::None; |
20980 | } |
20981 | |
20982 | static Intrinsic::ID |
20983 | getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { |
20984 | if (XLen == 32) { |
20985 | switch (BinOp) { |
20986 | default: |
20987 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
20988 | case AtomicRMWInst::Xchg: |
20989 | return Intrinsic::riscv_masked_atomicrmw_xchg_i32; |
20990 | case AtomicRMWInst::Add: |
20991 | return Intrinsic::riscv_masked_atomicrmw_add_i32; |
20992 | case AtomicRMWInst::Sub: |
20993 | return Intrinsic::riscv_masked_atomicrmw_sub_i32; |
20994 | case AtomicRMWInst::Nand: |
20995 | return Intrinsic::riscv_masked_atomicrmw_nand_i32; |
20996 | case AtomicRMWInst::Max: |
20997 | return Intrinsic::riscv_masked_atomicrmw_max_i32; |
20998 | case AtomicRMWInst::Min: |
20999 | return Intrinsic::riscv_masked_atomicrmw_min_i32; |
21000 | case AtomicRMWInst::UMax: |
21001 | return Intrinsic::riscv_masked_atomicrmw_umax_i32; |
21002 | case AtomicRMWInst::UMin: |
21003 | return Intrinsic::riscv_masked_atomicrmw_umin_i32; |
21004 | } |
21005 | } |
21006 | |
21007 | if (XLen == 64) { |
21008 | switch (BinOp) { |
21009 | default: |
21010 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
21011 | case AtomicRMWInst::Xchg: |
21012 | return Intrinsic::riscv_masked_atomicrmw_xchg_i64; |
21013 | case AtomicRMWInst::Add: |
21014 | return Intrinsic::riscv_masked_atomicrmw_add_i64; |
21015 | case AtomicRMWInst::Sub: |
21016 | return Intrinsic::riscv_masked_atomicrmw_sub_i64; |
21017 | case AtomicRMWInst::Nand: |
21018 | return Intrinsic::riscv_masked_atomicrmw_nand_i64; |
21019 | case AtomicRMWInst::Max: |
21020 | return Intrinsic::riscv_masked_atomicrmw_max_i64; |
21021 | case AtomicRMWInst::Min: |
21022 | return Intrinsic::riscv_masked_atomicrmw_min_i64; |
21023 | case AtomicRMWInst::UMax: |
21024 | return Intrinsic::riscv_masked_atomicrmw_umax_i64; |
21025 | case AtomicRMWInst::UMin: |
21026 | return Intrinsic::riscv_masked_atomicrmw_umin_i64; |
21027 | } |
21028 | } |
21029 | |
21030 | llvm_unreachable("Unexpected XLen\n" ); |
21031 | } |
21032 | |
21033 | Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( |
21034 | IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, |
21035 | Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { |
21036 | // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace |
21037 | // the atomic instruction with an AtomicRMWInst::And/Or with appropriate |
21038 | // mask, as this produces better code than the LR/SC loop emitted by |
21039 | // int_riscv_masked_atomicrmw_xchg. |
21040 | if (AI->getOperation() == AtomicRMWInst::Xchg && |
21041 | isa<ConstantInt>(Val: AI->getValOperand())) { |
21042 | ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand()); |
21043 | if (CVal->isZero()) |
21044 | return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr, |
21045 | Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask" ), |
21046 | Align: AI->getAlign(), Ordering: Ord); |
21047 | if (CVal->isMinusOne()) |
21048 | return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask, |
21049 | Align: AI->getAlign(), Ordering: Ord); |
21050 | } |
21051 | |
21052 | unsigned XLen = Subtarget.getXLen(); |
21053 | Value *Ordering = |
21054 | Builder.getIntN(N: XLen, C: static_cast<uint64_t>(AI->getOrdering())); |
21055 | Type *Tys[] = {AlignedAddr->getType()}; |
21056 | Function *LrwOpScwLoop = Intrinsic::getDeclaration( |
21057 | M: AI->getModule(), |
21058 | id: getIntrinsicForMaskedAtomicRMWBinOp(XLen, BinOp: AI->getOperation()), Tys); |
21059 | |
21060 | if (XLen == 64) { |
21061 | Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty()); |
21062 | Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty()); |
21063 | ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty()); |
21064 | } |
21065 | |
21066 | Value *Result; |
21067 | |
21068 | // Must pass the shift amount needed to sign extend the loaded value prior |
21069 | // to performing a signed comparison for min/max. ShiftAmt is the number of |
21070 | // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which |
21071 | // is the number of bits to left+right shift the value in order to |
21072 | // sign-extend. |
21073 | if (AI->getOperation() == AtomicRMWInst::Min || |
21074 | AI->getOperation() == AtomicRMWInst::Max) { |
21075 | const DataLayout &DL = AI->getDataLayout(); |
21076 | unsigned ValWidth = |
21077 | DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType()); |
21078 | Value *SextShamt = |
21079 | Builder.CreateSub(LHS: Builder.getIntN(N: XLen, C: XLen - ValWidth), RHS: ShiftAmt); |
21080 | Result = Builder.CreateCall(Callee: LrwOpScwLoop, |
21081 | Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering}); |
21082 | } else { |
21083 | Result = |
21084 | Builder.CreateCall(Callee: LrwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering}); |
21085 | } |
21086 | |
21087 | if (XLen == 64) |
21088 | Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty()); |
21089 | return Result; |
21090 | } |
21091 | |
21092 | TargetLowering::AtomicExpansionKind |
21093 | RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( |
21094 | AtomicCmpXchgInst *CI) const { |
21095 | // Don't expand forced atomics, we want to have __sync libcalls instead. |
21096 | if (Subtarget.hasForcedAtomics()) |
21097 | return AtomicExpansionKind::None; |
21098 | |
21099 | unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); |
21100 | if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) && |
21101 | (Size == 8 || Size == 16)) |
21102 | return AtomicExpansionKind::MaskedIntrinsic; |
21103 | return AtomicExpansionKind::None; |
21104 | } |
21105 | |
21106 | Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( |
21107 | IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, |
21108 | Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { |
21109 | unsigned XLen = Subtarget.getXLen(); |
21110 | Value *Ordering = Builder.getIntN(N: XLen, C: static_cast<uint64_t>(Ord)); |
21111 | Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; |
21112 | if (XLen == 64) { |
21113 | CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty()); |
21114 | NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty()); |
21115 | Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty()); |
21116 | CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; |
21117 | } |
21118 | Type *Tys[] = {AlignedAddr->getType()}; |
21119 | Function *MaskedCmpXchg = |
21120 | Intrinsic::getDeclaration(M: CI->getModule(), id: CmpXchgIntrID, Tys); |
21121 | Value *Result = Builder.CreateCall( |
21122 | Callee: MaskedCmpXchg, Args: {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); |
21123 | if (XLen == 64) |
21124 | Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty()); |
21125 | return Result; |
21126 | } |
21127 | |
21128 | bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend, |
21129 | EVT DataVT) const { |
21130 | // We have indexed loads for all supported EEW types. Indices are always |
21131 | // zero extended. |
21132 | return Extend.getOpcode() == ISD::ZERO_EXTEND && |
21133 | isTypeLegal(VT: Extend.getValueType()) && |
21134 | isTypeLegal(VT: Extend.getOperand(i: 0).getValueType()) && |
21135 | Extend.getOperand(i: 0).getValueType().getVectorElementType() != MVT::i1; |
21136 | } |
21137 | |
21138 | bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, |
21139 | EVT VT) const { |
21140 | if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple()) |
21141 | return false; |
21142 | |
21143 | switch (FPVT.getSimpleVT().SimpleTy) { |
21144 | case MVT::f16: |
21145 | return Subtarget.hasStdExtZfhmin(); |
21146 | case MVT::f32: |
21147 | return Subtarget.hasStdExtF(); |
21148 | case MVT::f64: |
21149 | return Subtarget.hasStdExtD(); |
21150 | default: |
21151 | return false; |
21152 | } |
21153 | } |
21154 | |
21155 | unsigned RISCVTargetLowering::getJumpTableEncoding() const { |
21156 | // If we are using the small code model, we can reduce size of jump table |
21157 | // entry to 4 bytes. |
21158 | if (Subtarget.is64Bit() && !isPositionIndependent() && |
21159 | getTargetMachine().getCodeModel() == CodeModel::Small) { |
21160 | return MachineJumpTableInfo::EK_Custom32; |
21161 | } |
21162 | return TargetLowering::getJumpTableEncoding(); |
21163 | } |
21164 | |
21165 | const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry( |
21166 | const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, |
21167 | unsigned uid, MCContext &Ctx) const { |
21168 | assert(Subtarget.is64Bit() && !isPositionIndependent() && |
21169 | getTargetMachine().getCodeModel() == CodeModel::Small); |
21170 | return MCSymbolRefExpr::create(Symbol: MBB->getSymbol(), Ctx); |
21171 | } |
21172 | |
21173 | bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const { |
21174 | // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power |
21175 | // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be |
21176 | // a power of two as well. |
21177 | // FIXME: This doesn't work for zve32, but that's already broken |
21178 | // elsewhere for the same reason. |
21179 | assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported" ); |
21180 | static_assert(RISCV::RVVBitsPerBlock == 64, |
21181 | "RVVBitsPerBlock changed, audit needed" ); |
21182 | return true; |
21183 | } |
21184 | |
21185 | bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, |
21186 | SDValue &Offset, |
21187 | ISD::MemIndexedMode &AM, |
21188 | SelectionDAG &DAG) const { |
21189 | // Target does not support indexed loads. |
21190 | if (!Subtarget.hasVendorXTHeadMemIdx()) |
21191 | return false; |
21192 | |
21193 | if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) |
21194 | return false; |
21195 | |
21196 | Base = Op->getOperand(Num: 0); |
21197 | if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: Op->getOperand(Num: 1))) { |
21198 | int64_t RHSC = RHS->getSExtValue(); |
21199 | if (Op->getOpcode() == ISD::SUB) |
21200 | RHSC = -(uint64_t)RHSC; |
21201 | |
21202 | // The constants that can be encoded in the THeadMemIdx instructions |
21203 | // are of the form (sign_extend(imm5) << imm2). |
21204 | bool isLegalIndexedOffset = false; |
21205 | for (unsigned i = 0; i < 4; i++) |
21206 | if (isInt<5>(x: RHSC >> i) && ((RHSC % (1LL << i)) == 0)) { |
21207 | isLegalIndexedOffset = true; |
21208 | break; |
21209 | } |
21210 | |
21211 | if (!isLegalIndexedOffset) |
21212 | return false; |
21213 | |
21214 | Offset = Op->getOperand(Num: 1); |
21215 | return true; |
21216 | } |
21217 | |
21218 | return false; |
21219 | } |
21220 | |
21221 | bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, |
21222 | SDValue &Offset, |
21223 | ISD::MemIndexedMode &AM, |
21224 | SelectionDAG &DAG) const { |
21225 | EVT VT; |
21226 | SDValue Ptr; |
21227 | if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) { |
21228 | VT = LD->getMemoryVT(); |
21229 | Ptr = LD->getBasePtr(); |
21230 | } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) { |
21231 | VT = ST->getMemoryVT(); |
21232 | Ptr = ST->getBasePtr(); |
21233 | } else |
21234 | return false; |
21235 | |
21236 | if (!getIndexedAddressParts(Op: Ptr.getNode(), Base, Offset, AM, DAG)) |
21237 | return false; |
21238 | |
21239 | AM = ISD::PRE_INC; |
21240 | return true; |
21241 | } |
21242 | |
21243 | bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, |
21244 | SDValue &Base, |
21245 | SDValue &Offset, |
21246 | ISD::MemIndexedMode &AM, |
21247 | SelectionDAG &DAG) const { |
21248 | if (Subtarget.hasVendorXCVmem()) { |
21249 | if (Op->getOpcode() != ISD::ADD) |
21250 | return false; |
21251 | |
21252 | if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Val: N)) |
21253 | Base = LS->getBasePtr(); |
21254 | else |
21255 | return false; |
21256 | |
21257 | if (Base == Op->getOperand(Num: 0)) |
21258 | Offset = Op->getOperand(Num: 1); |
21259 | else if (Base == Op->getOperand(Num: 1)) |
21260 | Offset = Op->getOperand(Num: 0); |
21261 | else |
21262 | return false; |
21263 | |
21264 | AM = ISD::POST_INC; |
21265 | return true; |
21266 | } |
21267 | |
21268 | EVT VT; |
21269 | SDValue Ptr; |
21270 | if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) { |
21271 | VT = LD->getMemoryVT(); |
21272 | Ptr = LD->getBasePtr(); |
21273 | } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) { |
21274 | VT = ST->getMemoryVT(); |
21275 | Ptr = ST->getBasePtr(); |
21276 | } else |
21277 | return false; |
21278 | |
21279 | if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG)) |
21280 | return false; |
21281 | // Post-indexing updates the base, so it's not a valid transform |
21282 | // if that's not the same as the load's pointer. |
21283 | if (Ptr != Base) |
21284 | return false; |
21285 | |
21286 | AM = ISD::POST_INC; |
21287 | return true; |
21288 | } |
21289 | |
21290 | bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
21291 | EVT VT) const { |
21292 | EVT SVT = VT.getScalarType(); |
21293 | |
21294 | if (!SVT.isSimple()) |
21295 | return false; |
21296 | |
21297 | switch (SVT.getSimpleVT().SimpleTy) { |
21298 | case MVT::f16: |
21299 | return VT.isVector() ? Subtarget.hasVInstructionsF16() |
21300 | : Subtarget.hasStdExtZfhOrZhinx(); |
21301 | case MVT::f32: |
21302 | return Subtarget.hasStdExtFOrZfinx(); |
21303 | case MVT::f64: |
21304 | return Subtarget.hasStdExtDOrZdinx(); |
21305 | default: |
21306 | break; |
21307 | } |
21308 | |
21309 | return false; |
21310 | } |
21311 | |
21312 | ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const { |
21313 | // Zacas will use amocas.w which does not require extension. |
21314 | return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND; |
21315 | } |
21316 | |
21317 | Register RISCVTargetLowering::getExceptionPointerRegister( |
21318 | const Constant *PersonalityFn) const { |
21319 | return RISCV::X10; |
21320 | } |
21321 | |
21322 | Register RISCVTargetLowering::getExceptionSelectorRegister( |
21323 | const Constant *PersonalityFn) const { |
21324 | return RISCV::X11; |
21325 | } |
21326 | |
21327 | bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { |
21328 | // Return false to suppress the unnecessary extensions if the LibCall |
21329 | // arguments or return value is a float narrower than XLEN on a soft FP ABI. |
21330 | if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() && |
21331 | Type.getSizeInBits() < Subtarget.getXLen())) |
21332 | return false; |
21333 | |
21334 | return true; |
21335 | } |
21336 | |
21337 | bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { |
21338 | if (Subtarget.is64Bit() && Type == MVT::i32) |
21339 | return true; |
21340 | |
21341 | return IsSigned; |
21342 | } |
21343 | |
21344 | bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, |
21345 | SDValue C) const { |
21346 | // Check integral scalar types. |
21347 | const bool HasZmmul = Subtarget.hasStdExtZmmul(); |
21348 | if (!VT.isScalarInteger()) |
21349 | return false; |
21350 | |
21351 | // Omit the optimization if the sub target has the M extension and the data |
21352 | // size exceeds XLen. |
21353 | if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen()) |
21354 | return false; |
21355 | |
21356 | if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) { |
21357 | // Break the MUL to a SLLI and an ADD/SUB. |
21358 | const APInt &Imm = ConstNode->getAPIntValue(); |
21359 | if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || |
21360 | (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) |
21361 | return true; |
21362 | |
21363 | // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12. |
21364 | if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(N: 12) && |
21365 | ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || |
21366 | (Imm - 8).isPowerOf2())) |
21367 | return true; |
21368 | |
21369 | // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs |
21370 | // a pair of LUI/ADDI. |
21371 | if (!Imm.isSignedIntN(N: 12) && Imm.countr_zero() < 12 && |
21372 | ConstNode->hasOneUse()) { |
21373 | APInt ImmS = Imm.ashr(ShiftAmt: Imm.countr_zero()); |
21374 | if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || |
21375 | (1 - ImmS).isPowerOf2()) |
21376 | return true; |
21377 | } |
21378 | } |
21379 | |
21380 | return false; |
21381 | } |
21382 | |
21383 | bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode, |
21384 | SDValue ConstNode) const { |
21385 | // Let the DAGCombiner decide for vectors. |
21386 | EVT VT = AddNode.getValueType(); |
21387 | if (VT.isVector()) |
21388 | return true; |
21389 | |
21390 | // Let the DAGCombiner decide for larger types. |
21391 | if (VT.getScalarSizeInBits() > Subtarget.getXLen()) |
21392 | return true; |
21393 | |
21394 | // It is worse if c1 is simm12 while c1*c2 is not. |
21395 | ConstantSDNode *C1Node = cast<ConstantSDNode>(Val: AddNode.getOperand(i: 1)); |
21396 | ConstantSDNode *C2Node = cast<ConstantSDNode>(Val&: ConstNode); |
21397 | const APInt &C1 = C1Node->getAPIntValue(); |
21398 | const APInt &C2 = C2Node->getAPIntValue(); |
21399 | if (C1.isSignedIntN(N: 12) && !(C1 * C2).isSignedIntN(N: 12)) |
21400 | return false; |
21401 | |
21402 | // Default to true and let the DAGCombiner decide. |
21403 | return true; |
21404 | } |
21405 | |
21406 | bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( |
21407 | EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, |
21408 | unsigned *Fast) const { |
21409 | if (!VT.isVector()) { |
21410 | if (Fast) |
21411 | *Fast = Subtarget.enableUnalignedScalarMem(); |
21412 | return Subtarget.enableUnalignedScalarMem(); |
21413 | } |
21414 | |
21415 | // All vector implementations must support element alignment |
21416 | EVT ElemVT = VT.getVectorElementType(); |
21417 | if (Alignment >= ElemVT.getStoreSize()) { |
21418 | if (Fast) |
21419 | *Fast = 1; |
21420 | return true; |
21421 | } |
21422 | |
21423 | // Note: We lower an unmasked unaligned vector access to an equally sized |
21424 | // e8 element type access. Given this, we effectively support all unmasked |
21425 | // misaligned accesses. TODO: Work through the codegen implications of |
21426 | // allowing such accesses to be formed, and considered fast. |
21427 | if (Fast) |
21428 | *Fast = Subtarget.enableUnalignedVectorMem(); |
21429 | return Subtarget.enableUnalignedVectorMem(); |
21430 | } |
21431 | |
21432 | |
21433 | EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op, |
21434 | const AttributeList &FuncAttributes) const { |
21435 | if (!Subtarget.hasVInstructions()) |
21436 | return MVT::Other; |
21437 | |
21438 | if (FuncAttributes.hasFnAttr(Kind: Attribute::NoImplicitFloat)) |
21439 | return MVT::Other; |
21440 | |
21441 | // We use LMUL1 memory operations here for a non-obvious reason. Our caller |
21442 | // has an expansion threshold, and we want the number of hardware memory |
21443 | // operations to correspond roughly to that threshold. LMUL>1 operations |
21444 | // are typically expanded linearly internally, and thus correspond to more |
21445 | // than one actual memory operation. Note that store merging and load |
21446 | // combining will typically form larger LMUL operations from the LMUL1 |
21447 | // operations emitted here, and that's okay because combining isn't |
21448 | // introducing new memory operations; it's just merging existing ones. |
21449 | const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8; |
21450 | if (Op.size() < MinVLenInBytes) |
21451 | // TODO: Figure out short memops. For the moment, do the default thing |
21452 | // which ends up using scalar sequences. |
21453 | return MVT::Other; |
21454 | |
21455 | // Prefer i8 for non-zero memset as it allows us to avoid materializing |
21456 | // a large scalar constant and instead use vmv.v.x/i to do the |
21457 | // broadcast. For everything else, prefer ELenVT to minimize VL and thus |
21458 | // maximize the chance we can encode the size in the vsetvli. |
21459 | MVT ELenVT = MVT::getIntegerVT(BitWidth: Subtarget.getELen()); |
21460 | MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT; |
21461 | |
21462 | // Do we have sufficient alignment for our preferred VT? If not, revert |
21463 | // to largest size allowed by our alignment criteria. |
21464 | if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) { |
21465 | Align RequiredAlign(PreferredVT.getStoreSize()); |
21466 | if (Op.isFixedDstAlign()) |
21467 | RequiredAlign = std::min(a: RequiredAlign, b: Op.getDstAlign()); |
21468 | if (Op.isMemcpy()) |
21469 | RequiredAlign = std::min(a: RequiredAlign, b: Op.getSrcAlign()); |
21470 | PreferredVT = MVT::getIntegerVT(BitWidth: RequiredAlign.value() * 8); |
21471 | } |
21472 | return MVT::getVectorVT(VT: PreferredVT, NumElements: MinVLenInBytes/PreferredVT.getStoreSize()); |
21473 | } |
21474 | |
21475 | bool RISCVTargetLowering::splitValueIntoRegisterParts( |
21476 | SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, |
21477 | unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { |
21478 | bool IsABIRegCopy = CC.has_value(); |
21479 | EVT ValueVT = Val.getValueType(); |
21480 | if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && |
21481 | PartVT == MVT::f32) { |
21482 | // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float |
21483 | // nan, and cast to f32. |
21484 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i16, Operand: Val); |
21485 | Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i32, Operand: Val); |
21486 | Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i32, N1: Val, |
21487 | N2: DAG.getConstant(Val: 0xFFFF0000, DL, VT: MVT::i32)); |
21488 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f32, Operand: Val); |
21489 | Parts[0] = Val; |
21490 | return true; |
21491 | } |
21492 | |
21493 | if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { |
21494 | LLVMContext &Context = *DAG.getContext(); |
21495 | EVT ValueEltVT = ValueVT.getVectorElementType(); |
21496 | EVT PartEltVT = PartVT.getVectorElementType(); |
21497 | unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue(); |
21498 | unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue(); |
21499 | if (PartVTBitSize % ValueVTBitSize == 0) { |
21500 | assert(PartVTBitSize >= ValueVTBitSize); |
21501 | // If the element types are different, bitcast to the same element type of |
21502 | // PartVT first. |
21503 | // Give an example here, we want copy a <vscale x 1 x i8> value to |
21504 | // <vscale x 4 x i16>. |
21505 | // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert |
21506 | // subvector, then we can bitcast to <vscale x 4 x i16>. |
21507 | if (ValueEltVT != PartEltVT) { |
21508 | if (PartVTBitSize > ValueVTBitSize) { |
21509 | unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); |
21510 | assert(Count != 0 && "The number of element should not be zero." ); |
21511 | EVT SameEltTypeVT = |
21512 | EVT::getVectorVT(Context, VT: ValueEltVT, NumElements: Count, /*IsScalable=*/true); |
21513 | Val = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: SameEltTypeVT, |
21514 | N1: DAG.getUNDEF(VT: SameEltTypeVT), N2: Val, |
21515 | N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
21516 | } |
21517 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val); |
21518 | } else { |
21519 | Val = |
21520 | DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: PartVT, N1: DAG.getUNDEF(VT: PartVT), |
21521 | N2: Val, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
21522 | } |
21523 | Parts[0] = Val; |
21524 | return true; |
21525 | } |
21526 | } |
21527 | return false; |
21528 | } |
21529 | |
21530 | SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( |
21531 | SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, |
21532 | MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { |
21533 | bool IsABIRegCopy = CC.has_value(); |
21534 | if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && |
21535 | PartVT == MVT::f32) { |
21536 | SDValue Val = Parts[0]; |
21537 | |
21538 | // Cast the f32 to i32, truncate to i16, and cast back to [b]f16. |
21539 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i32, Operand: Val); |
21540 | Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i16, Operand: Val); |
21541 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val); |
21542 | return Val; |
21543 | } |
21544 | |
21545 | if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { |
21546 | LLVMContext &Context = *DAG.getContext(); |
21547 | SDValue Val = Parts[0]; |
21548 | EVT ValueEltVT = ValueVT.getVectorElementType(); |
21549 | EVT PartEltVT = PartVT.getVectorElementType(); |
21550 | unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue(); |
21551 | unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue(); |
21552 | if (PartVTBitSize % ValueVTBitSize == 0) { |
21553 | assert(PartVTBitSize >= ValueVTBitSize); |
21554 | EVT SameEltTypeVT = ValueVT; |
21555 | // If the element types are different, convert it to the same element type |
21556 | // of PartVT. |
21557 | // Give an example here, we want copy a <vscale x 1 x i8> value from |
21558 | // <vscale x 4 x i16>. |
21559 | // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first, |
21560 | // then we can extract <vscale x 1 x i8>. |
21561 | if (ValueEltVT != PartEltVT) { |
21562 | unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); |
21563 | assert(Count != 0 && "The number of element should not be zero." ); |
21564 | SameEltTypeVT = |
21565 | EVT::getVectorVT(Context, VT: ValueEltVT, NumElements: Count, /*IsScalable=*/true); |
21566 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: SameEltTypeVT, Operand: Val); |
21567 | } |
21568 | Val = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ValueVT, N1: Val, |
21569 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
21570 | return Val; |
21571 | } |
21572 | } |
21573 | return SDValue(); |
21574 | } |
21575 | |
21576 | bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { |
21577 | // When aggressively optimizing for code size, we prefer to use a div |
21578 | // instruction, as it is usually smaller than the alternative sequence. |
21579 | // TODO: Add vector division? |
21580 | bool OptSize = Attr.hasFnAttr(Kind: Attribute::MinSize); |
21581 | return OptSize && !VT.isVector(); |
21582 | } |
21583 | |
21584 | bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const { |
21585 | // Scalarize zero_ext and sign_ext might stop match to widening instruction in |
21586 | // some situation. |
21587 | unsigned Opc = N->getOpcode(); |
21588 | if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND) |
21589 | return false; |
21590 | return true; |
21591 | } |
21592 | |
21593 | static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) { |
21594 | Module *M = IRB.GetInsertBlock()->getParent()->getParent(); |
21595 | Function *ThreadPointerFunc = |
21596 | Intrinsic::getDeclaration(M, id: Intrinsic::thread_pointer); |
21597 | return IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), |
21598 | Ptr: IRB.CreateCall(Callee: ThreadPointerFunc), Idx0: Offset); |
21599 | } |
21600 | |
21601 | Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { |
21602 | // Fuchsia provides a fixed TLS slot for the stack cookie. |
21603 | // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value. |
21604 | if (Subtarget.isTargetFuchsia()) |
21605 | return useTpOffset(IRB, Offset: -0x10); |
21606 | |
21607 | // Android provides a fixed TLS slot for the stack cookie. See the definition |
21608 | // of TLS_SLOT_STACK_GUARD in |
21609 | // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h |
21610 | if (Subtarget.isTargetAndroid()) |
21611 | return useTpOffset(IRB, Offset: -0x18); |
21612 | |
21613 | return TargetLowering::getIRStackGuard(IRB); |
21614 | } |
21615 | |
21616 | bool RISCVTargetLowering::isLegalInterleavedAccessType( |
21617 | VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, |
21618 | const DataLayout &DL) const { |
21619 | EVT VT = getValueType(DL, Ty: VTy); |
21620 | // Don't lower vlseg/vsseg for vector types that can't be split. |
21621 | if (!isTypeLegal(VT)) |
21622 | return false; |
21623 | |
21624 | if (!isLegalElementTypeForRVV(ScalarTy: VT.getScalarType()) || |
21625 | !allowsMemoryAccessForAlignment(Context&: VTy->getContext(), DL, VT, AddrSpace, |
21626 | Alignment)) |
21627 | return false; |
21628 | |
21629 | MVT ContainerVT = VT.getSimpleVT(); |
21630 | |
21631 | if (auto *FVTy = dyn_cast<FixedVectorType>(Val: VTy)) { |
21632 | if (!Subtarget.useRVVForFixedLengthVectors()) |
21633 | return false; |
21634 | // Sometimes the interleaved access pass picks up splats as interleaves of |
21635 | // one element. Don't lower these. |
21636 | if (FVTy->getNumElements() < 2) |
21637 | return false; |
21638 | |
21639 | ContainerVT = getContainerForFixedLengthVector(VT: VT.getSimpleVT()); |
21640 | } else { |
21641 | // The intrinsics for scalable vectors are not overloaded on pointer type |
21642 | // and can only handle the default address space. |
21643 | if (AddrSpace) |
21644 | return false; |
21645 | } |
21646 | |
21647 | // Need to make sure that EMUL * NFIELDS ≤ 8 |
21648 | auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(VLMUL: getLMUL(VT: ContainerVT)); |
21649 | if (Fractional) |
21650 | return true; |
21651 | return Factor * LMUL <= 8; |
21652 | } |
21653 | |
21654 | bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType, |
21655 | Align Alignment) const { |
21656 | if (!Subtarget.hasVInstructions()) |
21657 | return false; |
21658 | |
21659 | // Only support fixed vectors if we know the minimum vector size. |
21660 | if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors()) |
21661 | return false; |
21662 | |
21663 | EVT ScalarType = DataType.getScalarType(); |
21664 | if (!isLegalElementTypeForRVV(ScalarTy: ScalarType)) |
21665 | return false; |
21666 | |
21667 | if (!Subtarget.enableUnalignedVectorMem() && |
21668 | Alignment < ScalarType.getStoreSize()) |
21669 | return false; |
21670 | |
21671 | return true; |
21672 | } |
21673 | |
21674 | static const Intrinsic::ID FixedVlsegIntrIds[] = { |
21675 | Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load, |
21676 | Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load, |
21677 | Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load, |
21678 | Intrinsic::riscv_seg8_load}; |
21679 | |
21680 | /// Lower an interleaved load into a vlsegN intrinsic. |
21681 | /// |
21682 | /// E.g. Lower an interleaved load (Factor = 2): |
21683 | /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr |
21684 | /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements |
21685 | /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements |
21686 | /// |
21687 | /// Into: |
21688 | /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64( |
21689 | /// %ptr, i64 4) |
21690 | /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0 |
21691 | /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1 |
21692 | bool RISCVTargetLowering::lowerInterleavedLoad( |
21693 | LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles, |
21694 | ArrayRef<unsigned> Indices, unsigned Factor) const { |
21695 | IRBuilder<> Builder(LI); |
21696 | |
21697 | auto *VTy = cast<FixedVectorType>(Val: Shuffles[0]->getType()); |
21698 | if (!isLegalInterleavedAccessType(VTy, Factor, Alignment: LI->getAlign(), |
21699 | AddrSpace: LI->getPointerAddressSpace(), |
21700 | DL: LI->getDataLayout())) |
21701 | return false; |
21702 | |
21703 | auto *XLenTy = Type::getIntNTy(C&: LI->getContext(), N: Subtarget.getXLen()); |
21704 | |
21705 | Function *VlsegNFunc = |
21706 | Intrinsic::getDeclaration(M: LI->getModule(), id: FixedVlsegIntrIds[Factor - 2], |
21707 | Tys: {VTy, LI->getPointerOperandType(), XLenTy}); |
21708 | |
21709 | Value *VL = ConstantInt::get(Ty: XLenTy, V: VTy->getNumElements()); |
21710 | |
21711 | CallInst *VlsegN = |
21712 | Builder.CreateCall(Callee: VlsegNFunc, Args: {LI->getPointerOperand(), VL}); |
21713 | |
21714 | for (unsigned i = 0; i < Shuffles.size(); i++) { |
21715 | Value *SubVec = Builder.CreateExtractValue(Agg: VlsegN, Idxs: Indices[i]); |
21716 | Shuffles[i]->replaceAllUsesWith(V: SubVec); |
21717 | } |
21718 | |
21719 | return true; |
21720 | } |
21721 | |
21722 | static const Intrinsic::ID FixedVssegIntrIds[] = { |
21723 | Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store, |
21724 | Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store, |
21725 | Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store, |
21726 | Intrinsic::riscv_seg8_store}; |
21727 | |
21728 | /// Lower an interleaved store into a vssegN intrinsic. |
21729 | /// |
21730 | /// E.g. Lower an interleaved store (Factor = 3): |
21731 | /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, |
21732 | /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> |
21733 | /// store <12 x i32> %i.vec, <12 x i32>* %ptr |
21734 | /// |
21735 | /// Into: |
21736 | /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3> |
21737 | /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7> |
21738 | /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11> |
21739 | /// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2, |
21740 | /// %ptr, i32 4) |
21741 | /// |
21742 | /// Note that the new shufflevectors will be removed and we'll only generate one |
21743 | /// vsseg3 instruction in CodeGen. |
21744 | bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, |
21745 | ShuffleVectorInst *SVI, |
21746 | unsigned Factor) const { |
21747 | IRBuilder<> Builder(SI); |
21748 | auto *ShuffleVTy = cast<FixedVectorType>(Val: SVI->getType()); |
21749 | // Given SVI : <n*factor x ty>, then VTy : <n x ty> |
21750 | auto *VTy = FixedVectorType::get(ElementType: ShuffleVTy->getElementType(), |
21751 | NumElts: ShuffleVTy->getNumElements() / Factor); |
21752 | if (!isLegalInterleavedAccessType(VTy, Factor, Alignment: SI->getAlign(), |
21753 | AddrSpace: SI->getPointerAddressSpace(), |
21754 | DL: SI->getDataLayout())) |
21755 | return false; |
21756 | |
21757 | auto *XLenTy = Type::getIntNTy(C&: SI->getContext(), N: Subtarget.getXLen()); |
21758 | |
21759 | Function *VssegNFunc = |
21760 | Intrinsic::getDeclaration(M: SI->getModule(), id: FixedVssegIntrIds[Factor - 2], |
21761 | Tys: {VTy, SI->getPointerOperandType(), XLenTy}); |
21762 | |
21763 | auto Mask = SVI->getShuffleMask(); |
21764 | SmallVector<Value *, 10> Ops; |
21765 | |
21766 | for (unsigned i = 0; i < Factor; i++) { |
21767 | Value *Shuffle = Builder.CreateShuffleVector( |
21768 | V1: SVI->getOperand(i_nocapture: 0), V2: SVI->getOperand(i_nocapture: 1), |
21769 | Mask: createSequentialMask(Start: Mask[i], NumInts: VTy->getNumElements(), NumUndefs: 0)); |
21770 | Ops.push_back(Elt: Shuffle); |
21771 | } |
21772 | // This VL should be OK (should be executable in one vsseg instruction, |
21773 | // potentially under larger LMULs) because we checked that the fixed vector |
21774 | // type fits in isLegalInterleavedAccessType |
21775 | Value *VL = ConstantInt::get(Ty: XLenTy, V: VTy->getNumElements()); |
21776 | Ops.append(IL: {SI->getPointerOperand(), VL}); |
21777 | |
21778 | Builder.CreateCall(Callee: VssegNFunc, Args: Ops); |
21779 | |
21780 | return true; |
21781 | } |
21782 | |
21783 | bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI, |
21784 | LoadInst *LI) const { |
21785 | assert(LI->isSimple()); |
21786 | IRBuilder<> Builder(LI); |
21787 | |
21788 | // Only deinterleave2 supported at present. |
21789 | if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2) |
21790 | return false; |
21791 | |
21792 | unsigned Factor = 2; |
21793 | |
21794 | VectorType *VTy = cast<VectorType>(Val: DI->getOperand(i_nocapture: 0)->getType()); |
21795 | VectorType *ResVTy = cast<VectorType>(Val: DI->getType()->getContainedType(i: 0)); |
21796 | |
21797 | if (!isLegalInterleavedAccessType(VTy: ResVTy, Factor, Alignment: LI->getAlign(), |
21798 | AddrSpace: LI->getPointerAddressSpace(), |
21799 | DL: LI->getDataLayout())) |
21800 | return false; |
21801 | |
21802 | Function *VlsegNFunc; |
21803 | Value *VL; |
21804 | Type *XLenTy = Type::getIntNTy(C&: LI->getContext(), N: Subtarget.getXLen()); |
21805 | SmallVector<Value *, 10> Ops; |
21806 | |
21807 | if (auto *FVTy = dyn_cast<FixedVectorType>(Val: VTy)) { |
21808 | VlsegNFunc = Intrinsic::getDeclaration( |
21809 | M: LI->getModule(), id: FixedVlsegIntrIds[Factor - 2], |
21810 | Tys: {ResVTy, LI->getPointerOperandType(), XLenTy}); |
21811 | VL = ConstantInt::get(Ty: XLenTy, V: FVTy->getNumElements()); |
21812 | } else { |
21813 | static const Intrinsic::ID IntrIds[] = { |
21814 | Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, |
21815 | Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, |
21816 | Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, |
21817 | Intrinsic::riscv_vlseg8}; |
21818 | |
21819 | VlsegNFunc = Intrinsic::getDeclaration(M: LI->getModule(), id: IntrIds[Factor - 2], |
21820 | Tys: {ResVTy, XLenTy}); |
21821 | VL = Constant::getAllOnesValue(Ty: XLenTy); |
21822 | Ops.append(NumInputs: Factor, Elt: PoisonValue::get(T: ResVTy)); |
21823 | } |
21824 | |
21825 | Ops.append(IL: {LI->getPointerOperand(), VL}); |
21826 | |
21827 | Value *Vlseg = Builder.CreateCall(Callee: VlsegNFunc, Args: Ops); |
21828 | DI->replaceAllUsesWith(V: Vlseg); |
21829 | |
21830 | return true; |
21831 | } |
21832 | |
21833 | bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II, |
21834 | StoreInst *SI) const { |
21835 | assert(SI->isSimple()); |
21836 | IRBuilder<> Builder(SI); |
21837 | |
21838 | // Only interleave2 supported at present. |
21839 | if (II->getIntrinsicID() != Intrinsic::vector_interleave2) |
21840 | return false; |
21841 | |
21842 | unsigned Factor = 2; |
21843 | |
21844 | VectorType *VTy = cast<VectorType>(Val: II->getType()); |
21845 | VectorType *InVTy = cast<VectorType>(Val: II->getOperand(i_nocapture: 0)->getType()); |
21846 | |
21847 | if (!isLegalInterleavedAccessType(VTy: InVTy, Factor, Alignment: SI->getAlign(), |
21848 | AddrSpace: SI->getPointerAddressSpace(), |
21849 | DL: SI->getDataLayout())) |
21850 | return false; |
21851 | |
21852 | Function *VssegNFunc; |
21853 | Value *VL; |
21854 | Type *XLenTy = Type::getIntNTy(C&: SI->getContext(), N: Subtarget.getXLen()); |
21855 | |
21856 | if (auto *FVTy = dyn_cast<FixedVectorType>(Val: VTy)) { |
21857 | VssegNFunc = Intrinsic::getDeclaration( |
21858 | M: SI->getModule(), id: FixedVssegIntrIds[Factor - 2], |
21859 | Tys: {InVTy, SI->getPointerOperandType(), XLenTy}); |
21860 | VL = ConstantInt::get(Ty: XLenTy, V: FVTy->getNumElements()); |
21861 | } else { |
21862 | static const Intrinsic::ID IntrIds[] = { |
21863 | Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, |
21864 | Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, |
21865 | Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, |
21866 | Intrinsic::riscv_vsseg8}; |
21867 | |
21868 | VssegNFunc = Intrinsic::getDeclaration(M: SI->getModule(), id: IntrIds[Factor - 2], |
21869 | Tys: {InVTy, XLenTy}); |
21870 | VL = Constant::getAllOnesValue(Ty: XLenTy); |
21871 | } |
21872 | |
21873 | Builder.CreateCall(Callee: VssegNFunc, Args: {II->getOperand(i_nocapture: 0), II->getOperand(i_nocapture: 1), |
21874 | SI->getPointerOperand(), VL}); |
21875 | |
21876 | return true; |
21877 | } |
21878 | |
21879 | MachineInstr * |
21880 | RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB, |
21881 | MachineBasicBlock::instr_iterator &MBBI, |
21882 | const TargetInstrInfo *TII) const { |
21883 | assert(MBBI->isCall() && MBBI->getCFIType() && |
21884 | "Invalid call instruction for a KCFI check" ); |
21885 | assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect}, |
21886 | MBBI->getOpcode())); |
21887 | |
21888 | MachineOperand &Target = MBBI->getOperand(i: 0); |
21889 | Target.setIsRenamable(false); |
21890 | |
21891 | return BuildMI(BB&: MBB, I: MBBI, MIMD: MBBI->getDebugLoc(), MCID: TII->get(Opcode: RISCV::KCFI_CHECK)) |
21892 | .addReg(RegNo: Target.getReg()) |
21893 | .addImm(Val: MBBI->getCFIType()) |
21894 | .getInstr(); |
21895 | } |
21896 | |
21897 | #define GET_REGISTER_MATCHER |
21898 | #include "RISCVGenAsmMatcher.inc" |
21899 | |
21900 | Register |
21901 | RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, |
21902 | const MachineFunction &MF) const { |
21903 | Register Reg = MatchRegisterAltName(Name: RegName); |
21904 | if (Reg == RISCV::NoRegister) |
21905 | Reg = MatchRegisterName(Name: RegName); |
21906 | if (Reg == RISCV::NoRegister) |
21907 | report_fatal_error( |
21908 | reason: Twine("Invalid register name \"" + StringRef(RegName) + "\"." )); |
21909 | BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); |
21910 | if (!ReservedRegs.test(Idx: Reg) && !Subtarget.isRegisterReservedByUser(i: Reg)) |
21911 | report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" + |
21912 | StringRef(RegName) + "\"." )); |
21913 | return Reg; |
21914 | } |
21915 | |
21916 | MachineMemOperand::Flags |
21917 | RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const { |
21918 | const MDNode *NontemporalInfo = I.getMetadata(KindID: LLVMContext::MD_nontemporal); |
21919 | |
21920 | if (NontemporalInfo == nullptr) |
21921 | return MachineMemOperand::MONone; |
21922 | |
21923 | // 1 for default value work as __RISCV_NTLH_ALL |
21924 | // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE |
21925 | // 3 -> __RISCV_NTLH_ALL_PRIVATE |
21926 | // 4 -> __RISCV_NTLH_INNERMOST_SHARED |
21927 | // 5 -> __RISCV_NTLH_ALL |
21928 | int NontemporalLevel = 5; |
21929 | const MDNode *RISCVNontemporalInfo = |
21930 | I.getMetadata(Kind: "riscv-nontemporal-domain" ); |
21931 | if (RISCVNontemporalInfo != nullptr) |
21932 | NontemporalLevel = |
21933 | cast<ConstantInt>( |
21934 | Val: cast<ConstantAsMetadata>(Val: RISCVNontemporalInfo->getOperand(I: 0)) |
21935 | ->getValue()) |
21936 | ->getZExtValue(); |
21937 | |
21938 | assert((1 <= NontemporalLevel && NontemporalLevel <= 5) && |
21939 | "RISC-V target doesn't support this non-temporal domain." ); |
21940 | |
21941 | NontemporalLevel -= 2; |
21942 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone; |
21943 | if (NontemporalLevel & 0b1) |
21944 | Flags |= MONontemporalBit0; |
21945 | if (NontemporalLevel & 0b10) |
21946 | Flags |= MONontemporalBit1; |
21947 | |
21948 | return Flags; |
21949 | } |
21950 | |
21951 | MachineMemOperand::Flags |
21952 | RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const { |
21953 | |
21954 | MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags(); |
21955 | MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone; |
21956 | TargetFlags |= (NodeFlags & MONontemporalBit0); |
21957 | TargetFlags |= (NodeFlags & MONontemporalBit1); |
21958 | return TargetFlags; |
21959 | } |
21960 | |
21961 | bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable( |
21962 | const MemSDNode &NodeX, const MemSDNode &NodeY) const { |
21963 | return getTargetMMOFlags(Node: NodeX) == getTargetMMOFlags(Node: NodeY); |
21964 | } |
21965 | |
21966 | bool RISCVTargetLowering::isCtpopFast(EVT VT) const { |
21967 | if (VT.isScalableVector()) |
21968 | return isTypeLegal(VT) && Subtarget.hasStdExtZvbb(); |
21969 | if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb()) |
21970 | return true; |
21971 | return Subtarget.hasStdExtZbb() && |
21972 | (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector()); |
21973 | } |
21974 | |
21975 | unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT, |
21976 | ISD::CondCode Cond) const { |
21977 | return isCtpopFast(VT) ? 0 : 1; |
21978 | } |
21979 | |
21980 | bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const { |
21981 | |
21982 | // GISel support is in progress or complete for these opcodes. |
21983 | unsigned Op = Inst.getOpcode(); |
21984 | if (Op == Instruction::Add || Op == Instruction::Sub || |
21985 | Op == Instruction::And || Op == Instruction::Or || |
21986 | Op == Instruction::Xor || Op == Instruction::InsertElement || |
21987 | Op == Instruction::ShuffleVector || Op == Instruction::Load || |
21988 | Op == Instruction::Freeze || Op == Instruction::Store) |
21989 | return false; |
21990 | |
21991 | if (Inst.getType()->isScalableTy()) |
21992 | return true; |
21993 | |
21994 | for (unsigned i = 0; i < Inst.getNumOperands(); ++i) |
21995 | if (Inst.getOperand(i)->getType()->isScalableTy() && |
21996 | !isa<ReturnInst>(Val: &Inst)) |
21997 | return true; |
21998 | |
21999 | if (const AllocaInst *AI = dyn_cast<AllocaInst>(Val: &Inst)) { |
22000 | if (AI->getAllocatedType()->isScalableTy()) |
22001 | return true; |
22002 | } |
22003 | |
22004 | return false; |
22005 | } |
22006 | |
22007 | SDValue |
22008 | RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, |
22009 | SelectionDAG &DAG, |
22010 | SmallVectorImpl<SDNode *> &Created) const { |
22011 | AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); |
22012 | if (isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr)) |
22013 | return SDValue(N, 0); // Lower SDIV as SDIV |
22014 | |
22015 | // Only perform this transform if short forward branch opt is supported. |
22016 | if (!Subtarget.hasShortForwardBranchOpt()) |
22017 | return SDValue(); |
22018 | EVT VT = N->getValueType(ResNo: 0); |
22019 | if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()))) |
22020 | return SDValue(); |
22021 | |
22022 | // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw. |
22023 | if (Divisor.sgt(RHS: 2048) || Divisor.slt(RHS: -2048)) |
22024 | return SDValue(); |
22025 | return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created); |
22026 | } |
22027 | |
22028 | bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest( |
22029 | EVT VT, const APInt &AndMask) const { |
22030 | if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) |
22031 | return !Subtarget.hasStdExtZbs() && AndMask.ugt(RHS: 1024); |
22032 | return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask); |
22033 | } |
22034 | |
22035 | unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const { |
22036 | return Subtarget.getMinimumJumpTableEntries(); |
22037 | } |
22038 | |
22039 | // Handle single arg such as return value. |
22040 | template <typename Arg> |
22041 | void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) { |
22042 | // This lambda determines whether an array of types are constructed by |
22043 | // homogeneous vector types. |
22044 | auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) { |
22045 | // First, extract the first element in the argument type. |
22046 | auto It = ArgList.begin(); |
22047 | MVT FirstArgRegType = It->VT; |
22048 | |
22049 | // Return if there is no return or the type needs split. |
22050 | if (It == ArgList.end() || It->Flags.isSplit()) |
22051 | return false; |
22052 | |
22053 | ++It; |
22054 | |
22055 | // Return if this argument type contains only 1 element, or it's not a |
22056 | // vector type. |
22057 | if (It == ArgList.end() || !FirstArgRegType.isScalableVector()) |
22058 | return false; |
22059 | |
22060 | // Second, check if the following elements in this argument type are all the |
22061 | // same. |
22062 | for (; It != ArgList.end(); ++It) |
22063 | if (It->Flags.isSplit() || It->VT != FirstArgRegType) |
22064 | return false; |
22065 | |
22066 | return true; |
22067 | }; |
22068 | |
22069 | if (isHomogeneousScalableVectorType(ArgList)) { |
22070 | // Handle as tuple type |
22071 | RVVArgInfos.push_back(Elt: {(unsigned)ArgList.size(), ArgList[0].VT, false}); |
22072 | } else { |
22073 | // Handle as normal vector type |
22074 | bool FirstVMaskAssigned = false; |
22075 | for (const auto &OutArg : ArgList) { |
22076 | MVT RegisterVT = OutArg.VT; |
22077 | |
22078 | // Skip non-RVV register type |
22079 | if (!RegisterVT.isVector()) |
22080 | continue; |
22081 | |
22082 | if (RegisterVT.isFixedLengthVector()) |
22083 | RegisterVT = TLI->getContainerForFixedLengthVector(VT: RegisterVT); |
22084 | |
22085 | if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) { |
22086 | RVVArgInfos.push_back(Elt: {.NF: 1, .VT: RegisterVT, .FirstVMask: true}); |
22087 | FirstVMaskAssigned = true; |
22088 | continue; |
22089 | } |
22090 | |
22091 | RVVArgInfos.push_back(Elt: {.NF: 1, .VT: RegisterVT, .FirstVMask: false}); |
22092 | } |
22093 | } |
22094 | } |
22095 | |
22096 | // Handle multiple args. |
22097 | template <> |
22098 | void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) { |
22099 | const DataLayout &DL = MF->getDataLayout(); |
22100 | const Function &F = MF->getFunction(); |
22101 | LLVMContext &Context = F.getContext(); |
22102 | |
22103 | bool FirstVMaskAssigned = false; |
22104 | for (Type *Ty : TypeList) { |
22105 | StructType *STy = dyn_cast<StructType>(Val: Ty); |
22106 | if (STy && STy->containsHomogeneousScalableVectorTypes()) { |
22107 | Type *ElemTy = STy->getTypeAtIndex(N: 0U); |
22108 | EVT VT = TLI->getValueType(DL, Ty: ElemTy); |
22109 | MVT RegisterVT = |
22110 | TLI->getRegisterTypeForCallingConv(Context, CC: F.getCallingConv(), VT); |
22111 | unsigned NumRegs = |
22112 | TLI->getNumRegistersForCallingConv(Context, CC: F.getCallingConv(), VT); |
22113 | |
22114 | RVVArgInfos.push_back( |
22115 | Elt: {.NF: NumRegs * STy->getNumElements(), .VT: RegisterVT, .FirstVMask: false}); |
22116 | } else { |
22117 | SmallVector<EVT, 4> ValueVTs; |
22118 | ComputeValueVTs(TLI: *TLI, DL, Ty, ValueVTs); |
22119 | |
22120 | for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; |
22121 | ++Value) { |
22122 | EVT VT = ValueVTs[Value]; |
22123 | MVT RegisterVT = |
22124 | TLI->getRegisterTypeForCallingConv(Context, CC: F.getCallingConv(), VT); |
22125 | unsigned NumRegs = |
22126 | TLI->getNumRegistersForCallingConv(Context, CC: F.getCallingConv(), VT); |
22127 | |
22128 | // Skip non-RVV register type |
22129 | if (!RegisterVT.isVector()) |
22130 | continue; |
22131 | |
22132 | if (RegisterVT.isFixedLengthVector()) |
22133 | RegisterVT = TLI->getContainerForFixedLengthVector(VT: RegisterVT); |
22134 | |
22135 | if (!FirstVMaskAssigned && |
22136 | RegisterVT.getVectorElementType() == MVT::i1) { |
22137 | RVVArgInfos.push_back(Elt: {.NF: 1, .VT: RegisterVT, .FirstVMask: true}); |
22138 | FirstVMaskAssigned = true; |
22139 | --NumRegs; |
22140 | } |
22141 | |
22142 | RVVArgInfos.insert(I: RVVArgInfos.end(), NumToInsert: NumRegs, Elt: {.NF: 1, .VT: RegisterVT, .FirstVMask: false}); |
22143 | } |
22144 | } |
22145 | } |
22146 | } |
22147 | |
22148 | void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul, |
22149 | unsigned StartReg) { |
22150 | assert((StartReg % LMul) == 0 && |
22151 | "Start register number should be multiple of lmul" ); |
22152 | const MCPhysReg *VRArrays; |
22153 | switch (LMul) { |
22154 | default: |
22155 | report_fatal_error(reason: "Invalid lmul" ); |
22156 | case 1: |
22157 | VRArrays = ArgVRs; |
22158 | break; |
22159 | case 2: |
22160 | VRArrays = ArgVRM2s; |
22161 | break; |
22162 | case 4: |
22163 | VRArrays = ArgVRM4s; |
22164 | break; |
22165 | case 8: |
22166 | VRArrays = ArgVRM8s; |
22167 | break; |
22168 | } |
22169 | |
22170 | for (unsigned i = 0; i < NF; ++i) |
22171 | if (StartReg) |
22172 | AllocatedPhysRegs.push_back(Elt: VRArrays[(StartReg - 8) / LMul + i]); |
22173 | else |
22174 | AllocatedPhysRegs.push_back(Elt: MCPhysReg()); |
22175 | } |
22176 | |
22177 | /// This function determines if each RVV argument is passed by register, if the |
22178 | /// argument can be assigned to a VR, then give it a specific register. |
22179 | /// Otherwise, assign the argument to 0 which is a invalid MCPhysReg. |
22180 | void RVVArgDispatcher::compute() { |
22181 | uint32_t AssignedMap = 0; |
22182 | auto allocate = [&](const RVVArgInfo &ArgInfo) { |
22183 | // Allocate first vector mask argument to V0. |
22184 | if (ArgInfo.FirstVMask) { |
22185 | AllocatedPhysRegs.push_back(Elt: RISCV::V0); |
22186 | return; |
22187 | } |
22188 | |
22189 | unsigned RegsNeeded = divideCeil( |
22190 | Numerator: ArgInfo.VT.getSizeInBits().getKnownMinValue(), Denominator: RISCV::RVVBitsPerBlock); |
22191 | unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded; |
22192 | for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs; |
22193 | StartReg += RegsNeeded) { |
22194 | uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg; |
22195 | if ((AssignedMap & Map) == 0) { |
22196 | allocatePhysReg(NF: ArgInfo.NF, LMul: RegsNeeded, StartReg: StartReg + 8); |
22197 | AssignedMap |= Map; |
22198 | return; |
22199 | } |
22200 | } |
22201 | |
22202 | allocatePhysReg(NF: ArgInfo.NF, LMul: RegsNeeded, StartReg: 0); |
22203 | }; |
22204 | |
22205 | for (unsigned i = 0; i < RVVArgInfos.size(); ++i) |
22206 | allocate(RVVArgInfos[i]); |
22207 | } |
22208 | |
22209 | MCPhysReg RVVArgDispatcher::getNextPhysReg() { |
22210 | assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range" ); |
22211 | return AllocatedPhysRegs[CurIdx++]; |
22212 | } |
22213 | |
22214 | SDValue RISCVTargetLowering::expandIndirectJTBranch(const SDLoc &dl, |
22215 | SDValue Value, SDValue Addr, |
22216 | int JTI, |
22217 | SelectionDAG &DAG) const { |
22218 | if (Subtarget.hasStdExtZicfilp()) { |
22219 | // When Zicfilp enabled, we need to use software guarded branch for jump |
22220 | // table branch. |
22221 | SDValue JTInfo = DAG.getJumpTableDebugInfo(JTI, Chain: Value, DL: dl); |
22222 | return DAG.getNode(Opcode: RISCVISD::SW_GUARDED_BRIND, DL: dl, VT: MVT::Other, N1: JTInfo, |
22223 | N2: Addr); |
22224 | } |
22225 | return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG); |
22226 | } |
22227 | |
22228 | namespace llvm::RISCVVIntrinsicsTable { |
22229 | |
22230 | #define GET_RISCVVIntrinsicsTable_IMPL |
22231 | #include "RISCVGenSearchableTables.inc" |
22232 | |
22233 | } // namespace llvm::RISCVVIntrinsicsTable |
22234 | |