1 | //===-- RISCVISelLowering.h - RISC-V DAG Lowering Interface -----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that RISC-V uses to lower LLVM code into a |
10 | // selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H |
15 | #define LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H |
16 | |
17 | #include "RISCV.h" |
18 | #include "llvm/CodeGen/CallingConvLower.h" |
19 | #include "llvm/CodeGen/SelectionDAG.h" |
20 | #include "llvm/CodeGen/TargetLowering.h" |
21 | #include <optional> |
22 | |
23 | namespace llvm { |
24 | class InstructionCost; |
25 | class RISCVSubtarget; |
26 | struct RISCVRegisterInfo; |
27 | class RVVArgDispatcher; |
28 | |
29 | namespace RISCVISD { |
30 | // clang-format off |
31 | enum NodeType : unsigned { |
32 | FIRST_NUMBER = ISD::BUILTIN_OP_END, |
33 | RET_GLUE, |
34 | SRET_GLUE, |
35 | MRET_GLUE, |
36 | CALL, |
37 | /// Select with condition operator - This selects between a true value and |
38 | /// a false value (ops #3 and #4) based on the boolean result of comparing |
39 | /// the lhs and rhs (ops #0 and #1) of a conditional expression with the |
40 | /// condition code in op #2, a XLenVT constant from the ISD::CondCode enum. |
41 | /// The lhs and rhs are XLenVT integers. The true and false values can be |
42 | /// integer or floating point. |
43 | SELECT_CC, |
44 | BR_CC, |
45 | BuildPairF64, |
46 | SplitF64, |
47 | TAIL, |
48 | |
49 | // Add the Lo 12 bits from an address. Selected to ADDI. |
50 | ADD_LO, |
51 | // Get the Hi 20 bits from an address. Selected to LUI. |
52 | HI, |
53 | |
54 | // Represents an AUIPC+ADDI pair. Selected to PseudoLLA. |
55 | LLA, |
56 | |
57 | // Selected as PseudoAddTPRel. Used to emit a TP-relative relocation. |
58 | ADD_TPREL, |
59 | |
60 | // Multiply high for signedxunsigned. |
61 | MULHSU, |
62 | |
63 | // Represents (ADD (SHL a, b), c) with the arguments appearing in the order |
64 | // a, b, c. 'b' must be a constant. Maps to sh1add/sh2add/sh3add with zba |
65 | // or addsl with XTheadBa. |
66 | SHL_ADD, |
67 | |
68 | // RV64I shifts, directly matching the semantics of the named RISC-V |
69 | // instructions. |
70 | SLLW, |
71 | SRAW, |
72 | SRLW, |
73 | // 32-bit operations from RV64M that can't be simply matched with a pattern |
74 | // at instruction selection time. These have undefined behavior for division |
75 | // by 0 or overflow (divw) like their target independent counterparts. |
76 | DIVW, |
77 | DIVUW, |
78 | REMUW, |
79 | // RV64IB rotates, directly matching the semantics of the named RISC-V |
80 | // instructions. |
81 | ROLW, |
82 | RORW, |
83 | // RV64IZbb bit counting instructions directly matching the semantics of the |
84 | // named RISC-V instructions. |
85 | CLZW, |
86 | CTZW, |
87 | |
88 | // RV64IZbb absolute value for i32. Expanded to (max (negw X), X) during isel. |
89 | ABSW, |
90 | |
91 | // FPR<->GPR transfer operations when the FPR is smaller than XLEN, needed as |
92 | // XLEN is the only legal integer width. |
93 | // |
94 | // FMV_H_X matches the semantics of the FMV.H.X. |
95 | // FMV_X_ANYEXTH is similar to FMV.X.H but has an any-extended result. |
96 | // FMV_X_SIGNEXTH is similar to FMV.X.H and has a sign-extended result. |
97 | // FMV_W_X_RV64 matches the semantics of the FMV.W.X. |
98 | // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result. |
99 | // |
100 | // This is a more convenient semantic for producing dagcombines that remove |
101 | // unnecessary GPR->FPR->GPR moves. |
102 | FMV_H_X, |
103 | FMV_X_ANYEXTH, |
104 | FMV_X_SIGNEXTH, |
105 | FMV_W_X_RV64, |
106 | FMV_X_ANYEXTW_RV64, |
107 | // FP to XLen int conversions. Corresponds to fcvt.l(u).s/d/h on RV64 and |
108 | // fcvt.w(u).s/d/h on RV32. Unlike FP_TO_S/UINT these saturate out of |
109 | // range inputs. These are used for FP_TO_S/UINT_SAT lowering. Rounding mode |
110 | // is passed as a TargetConstant operand using the RISCVFPRndMode enum. |
111 | FCVT_X, |
112 | FCVT_XU, |
113 | // FP to 32 bit int conversions for RV64. These are used to keep track of the |
114 | // result being sign extended to 64 bit. These saturate out of range inputs. |
115 | // Used for FP_TO_S/UINT and FP_TO_S/UINT_SAT lowering. Rounding mode |
116 | // is passed as a TargetConstant operand using the RISCVFPRndMode enum. |
117 | FCVT_W_RV64, |
118 | FCVT_WU_RV64, |
119 | |
120 | FP_ROUND_BF16, |
121 | FP_EXTEND_BF16, |
122 | |
123 | // Rounds an FP value to its corresponding integer in the same FP format. |
124 | // First operand is the value to round, the second operand is the largest |
125 | // integer that can be represented exactly in the FP format. This will be |
126 | // expanded into multiple instructions and basic blocks with a custom |
127 | // inserter. |
128 | FROUND, |
129 | |
130 | FCLASS, |
131 | |
132 | // Floating point fmax and fmin matching the RISC-V instruction semantics. |
133 | FMAX, FMIN, |
134 | |
135 | // A read of the 64-bit counter CSR on a 32-bit target (returns (Lo, Hi)). |
136 | // It takes a chain operand and another two target constant operands (the |
137 | // CSR numbers of the low and high parts of the counter). |
138 | READ_COUNTER_WIDE, |
139 | |
140 | // brev8, orc.b, zip, and unzip from Zbb and Zbkb. All operands are i32 or |
141 | // XLenVT. |
142 | BREV8, |
143 | ORC_B, |
144 | ZIP, |
145 | UNZIP, |
146 | |
147 | // Scalar cryptography |
148 | CLMUL, CLMULH, CLMULR, |
149 | SHA256SIG0, SHA256SIG1, SHA256SUM0, SHA256SUM1, |
150 | SM4KS, SM4ED, |
151 | SM3P0, SM3P1, |
152 | |
153 | // May-Be-Operations |
154 | MOPR, MOPRR, |
155 | |
156 | // Vector Extension |
157 | FIRST_VL_VECTOR_OP, |
158 | // VMV_V_V_VL matches the semantics of vmv.v.v but includes an extra operand |
159 | // for the VL value to be used for the operation. The first operand is |
160 | // passthru operand. |
161 | VMV_V_V_VL = FIRST_VL_VECTOR_OP, |
162 | // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand |
163 | // for the VL value to be used for the operation. The first operand is |
164 | // passthru operand. |
165 | VMV_V_X_VL, |
166 | // VFMV_V_F_VL matches the semantics of vfmv.v.f but includes an extra operand |
167 | // for the VL value to be used for the operation. The first operand is |
168 | // passthru operand. |
169 | VFMV_V_F_VL, |
170 | // VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT sign |
171 | // extended from the vector element size. |
172 | VMV_X_S, |
173 | // VMV_S_X_VL matches the semantics of vmv.s.x. It carries a VL operand. |
174 | VMV_S_X_VL, |
175 | // VFMV_S_F_VL matches the semantics of vfmv.s.f. It carries a VL operand. |
176 | VFMV_S_F_VL, |
177 | // Splats an 64-bit value that has been split into two i32 parts. This is |
178 | // expanded late to two scalar stores and a stride 0 vector load. |
179 | // The first operand is passthru operand. |
180 | SPLAT_VECTOR_SPLIT_I64_VL, |
181 | // Truncates a RVV integer vector by one power-of-two. Carries both an extra |
182 | // mask and VL operand. |
183 | TRUNCATE_VECTOR_VL, |
184 | // Matches the semantics of vslideup/vslidedown. The first operand is the |
185 | // pass-thru operand, the second is the source vector, the third is the XLenVT |
186 | // index (either constant or non-constant), the fourth is the mask, the fifth |
187 | // is the VL and the sixth is the policy. |
188 | VSLIDEUP_VL, |
189 | VSLIDEDOWN_VL, |
190 | // Matches the semantics of vslide1up/slide1down. The first operand is |
191 | // passthru operand, the second is source vector, third is the XLenVT scalar |
192 | // value. The fourth and fifth operands are the mask and VL operands. |
193 | VSLIDE1UP_VL, |
194 | VSLIDE1DOWN_VL, |
195 | // Matches the semantics of vfslide1up/vfslide1down. The first operand is |
196 | // passthru operand, the second is source vector, third is a scalar value |
197 | // whose type matches the element type of the vectors. The fourth and fifth |
198 | // operands are the mask and VL operands. |
199 | VFSLIDE1UP_VL, |
200 | VFSLIDE1DOWN_VL, |
201 | // Matches the semantics of the vid.v instruction, with a mask and VL |
202 | // operand. |
203 | VID_VL, |
204 | // Matches the semantics of the vfcnvt.rod function (Convert double-width |
205 | // float to single-width float, rounding towards odd). Takes a double-width |
206 | // float vector and produces a single-width float vector. Also has a mask and |
207 | // VL operand. |
208 | VFNCVT_ROD_VL, |
209 | // These nodes match the semantics of the corresponding RVV vector reduction |
210 | // instructions. They produce a vector result which is the reduction |
211 | // performed over the second vector operand plus the first element of the |
212 | // third vector operand. The first operand is the pass-thru operand. The |
213 | // second operand is an unconstrained vector type, and the result, first, and |
214 | // third operand's types are expected to be the corresponding full-width |
215 | // LMUL=1 type for the second operand: |
216 | // nxv8i8 = vecreduce_add nxv8i8, nxv32i8, nxv8i8 |
217 | // nxv2i32 = vecreduce_add nxv2i32, nxv8i32, nxv2i32 |
218 | // The different in types does introduce extra vsetvli instructions but |
219 | // similarly it reduces the number of registers consumed per reduction. |
220 | // Also has a mask and VL operand. |
221 | VECREDUCE_ADD_VL, |
222 | VECREDUCE_UMAX_VL, |
223 | VECREDUCE_SMAX_VL, |
224 | VECREDUCE_UMIN_VL, |
225 | VECREDUCE_SMIN_VL, |
226 | VECREDUCE_AND_VL, |
227 | VECREDUCE_OR_VL, |
228 | VECREDUCE_XOR_VL, |
229 | VECREDUCE_FADD_VL, |
230 | VECREDUCE_SEQ_FADD_VL, |
231 | VECREDUCE_FMIN_VL, |
232 | VECREDUCE_FMAX_VL, |
233 | |
234 | // Vector binary ops with a merge as a third operand, a mask as a fourth |
235 | // operand, and VL as a fifth operand. |
236 | ADD_VL, |
237 | AND_VL, |
238 | MUL_VL, |
239 | OR_VL, |
240 | SDIV_VL, |
241 | SHL_VL, |
242 | SREM_VL, |
243 | SRA_VL, |
244 | SRL_VL, |
245 | ROTL_VL, |
246 | ROTR_VL, |
247 | SUB_VL, |
248 | UDIV_VL, |
249 | UREM_VL, |
250 | XOR_VL, |
251 | SMIN_VL, |
252 | SMAX_VL, |
253 | UMIN_VL, |
254 | UMAX_VL, |
255 | |
256 | BITREVERSE_VL, |
257 | BSWAP_VL, |
258 | CTLZ_VL, |
259 | CTTZ_VL, |
260 | CTPOP_VL, |
261 | |
262 | SADDSAT_VL, |
263 | UADDSAT_VL, |
264 | SSUBSAT_VL, |
265 | USUBSAT_VL, |
266 | |
267 | // Averaging adds of signed integers. |
268 | AVGFLOORS_VL, |
269 | // Averaging adds of unsigned integers. |
270 | AVGFLOORU_VL, |
271 | // Rounding averaging adds of signed integers. |
272 | AVGCEILS_VL, |
273 | // Rounding averaging adds of unsigned integers. |
274 | AVGCEILU_VL, |
275 | |
276 | // Operands are (source, shift, merge, mask, roundmode, vl) |
277 | VNCLIPU_VL, |
278 | VNCLIP_VL, |
279 | |
280 | MULHS_VL, |
281 | MULHU_VL, |
282 | FADD_VL, |
283 | FSUB_VL, |
284 | FMUL_VL, |
285 | FDIV_VL, |
286 | VFMIN_VL, |
287 | VFMAX_VL, |
288 | |
289 | // Vector unary ops with a mask as a second operand and VL as a third operand. |
290 | FNEG_VL, |
291 | FABS_VL, |
292 | FSQRT_VL, |
293 | FCLASS_VL, |
294 | FCOPYSIGN_VL, // Has a merge operand |
295 | VFCVT_RTZ_X_F_VL, |
296 | VFCVT_RTZ_XU_F_VL, |
297 | VFCVT_X_F_VL, |
298 | VFCVT_XU_F_VL, |
299 | VFROUND_NOEXCEPT_VL, |
300 | VFCVT_RM_X_F_VL, // Has a rounding mode operand. |
301 | VFCVT_RM_XU_F_VL, // Has a rounding mode operand. |
302 | SINT_TO_FP_VL, |
303 | UINT_TO_FP_VL, |
304 | VFCVT_RM_F_X_VL, // Has a rounding mode operand. |
305 | VFCVT_RM_F_XU_VL, // Has a rounding mode operand. |
306 | FP_ROUND_VL, |
307 | FP_EXTEND_VL, |
308 | |
309 | // Vector FMA ops with a mask as a fourth operand and VL as a fifth operand. |
310 | VFMADD_VL, |
311 | VFNMADD_VL, |
312 | VFMSUB_VL, |
313 | VFNMSUB_VL, |
314 | |
315 | // Vector widening FMA ops with a mask as a fourth operand and VL as a fifth |
316 | // operand. |
317 | VFWMADD_VL, |
318 | VFWNMADD_VL, |
319 | VFWMSUB_VL, |
320 | VFWNMSUB_VL, |
321 | |
322 | // Widening instructions with a merge value a third operand, a mask as a |
323 | // fourth operand, and VL as a fifth operand. |
324 | VWMUL_VL, |
325 | VWMULU_VL, |
326 | VWMULSU_VL, |
327 | VWADD_VL, |
328 | VWADDU_VL, |
329 | VWSUB_VL, |
330 | VWSUBU_VL, |
331 | VWADD_W_VL, |
332 | VWADDU_W_VL, |
333 | VWSUB_W_VL, |
334 | VWSUBU_W_VL, |
335 | VWSLL_VL, |
336 | |
337 | VFWMUL_VL, |
338 | VFWADD_VL, |
339 | VFWSUB_VL, |
340 | VFWADD_W_VL, |
341 | VFWSUB_W_VL, |
342 | |
343 | // Widening ternary operations with a mask as the fourth operand and VL as the |
344 | // fifth operand. |
345 | VWMACC_VL, |
346 | VWMACCU_VL, |
347 | VWMACCSU_VL, |
348 | |
349 | // Narrowing logical shift right. |
350 | // Operands are (source, shift, passthru, mask, vl) |
351 | VNSRL_VL, |
352 | |
353 | // Vector compare producing a mask. Fourth operand is input mask. Fifth |
354 | // operand is VL. |
355 | SETCC_VL, |
356 | |
357 | // General vmerge node with mask, true, false, passthru, and vl operands. |
358 | // Tail agnostic vselect can be implemented by setting passthru to undef. |
359 | VMERGE_VL, |
360 | |
361 | // Mask binary operators. |
362 | VMAND_VL, |
363 | VMOR_VL, |
364 | VMXOR_VL, |
365 | |
366 | // Set mask vector to all zeros or ones. |
367 | VMCLR_VL, |
368 | VMSET_VL, |
369 | |
370 | // Matches the semantics of vrgather.vx and vrgather.vv with extra operands |
371 | // for passthru and VL. Operands are (src, index, mask, passthru, vl). |
372 | VRGATHER_VX_VL, |
373 | VRGATHER_VV_VL, |
374 | VRGATHEREI16_VV_VL, |
375 | |
376 | // Vector sign/zero extend with additional mask & VL operands. |
377 | VSEXT_VL, |
378 | VZEXT_VL, |
379 | |
380 | // vcpop.m with additional mask and VL operands. |
381 | VCPOP_VL, |
382 | |
383 | // vfirst.m with additional mask and VL operands. |
384 | VFIRST_VL, |
385 | |
386 | LAST_VL_VECTOR_OP = VFIRST_VL, |
387 | |
388 | // Read VLENB CSR |
389 | READ_VLENB, |
390 | // Reads value of CSR. |
391 | // The first operand is a chain pointer. The second specifies address of the |
392 | // required CSR. Two results are produced, the read value and the new chain |
393 | // pointer. |
394 | READ_CSR, |
395 | // Write value to CSR. |
396 | // The first operand is a chain pointer, the second specifies address of the |
397 | // required CSR and the third is the value to write. The result is the new |
398 | // chain pointer. |
399 | WRITE_CSR, |
400 | // Read and write value of CSR. |
401 | // The first operand is a chain pointer, the second specifies address of the |
402 | // required CSR and the third is the value to write. Two results are produced, |
403 | // the value read before the modification and the new chain pointer. |
404 | SWAP_CSR, |
405 | |
406 | // Branchless select operations, matching the semantics of the instructions |
407 | // defined in Zicond or XVentanaCondOps. |
408 | CZERO_EQZ, // vt.maskc for XVentanaCondOps. |
409 | CZERO_NEZ, // vt.maskcn for XVentanaCondOps. |
410 | |
411 | /// Software guarded BRIND node. Operand 0 is the chain operand and |
412 | /// operand 1 is the target address. |
413 | SW_GUARDED_BRIND, |
414 | |
415 | // FP to 32 bit int conversions for RV64. These are used to keep track of the |
416 | // result being sign extended to 64 bit. These saturate out of range inputs. |
417 | STRICT_FCVT_W_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE, |
418 | STRICT_FCVT_WU_RV64, |
419 | STRICT_FADD_VL, |
420 | STRICT_FSUB_VL, |
421 | STRICT_FMUL_VL, |
422 | STRICT_FDIV_VL, |
423 | STRICT_FSQRT_VL, |
424 | STRICT_VFMADD_VL, |
425 | STRICT_VFNMADD_VL, |
426 | STRICT_VFMSUB_VL, |
427 | STRICT_VFNMSUB_VL, |
428 | STRICT_FP_ROUND_VL, |
429 | STRICT_FP_EXTEND_VL, |
430 | STRICT_VFNCVT_ROD_VL, |
431 | STRICT_SINT_TO_FP_VL, |
432 | STRICT_UINT_TO_FP_VL, |
433 | STRICT_VFCVT_RM_X_F_VL, |
434 | STRICT_VFCVT_RTZ_X_F_VL, |
435 | STRICT_VFCVT_RTZ_XU_F_VL, |
436 | STRICT_FSETCC_VL, |
437 | STRICT_FSETCCS_VL, |
438 | STRICT_VFROUND_NOEXCEPT_VL, |
439 | LAST_RISCV_STRICTFP_OPCODE = STRICT_VFROUND_NOEXCEPT_VL, |
440 | |
441 | SF_VC_XV_SE, |
442 | SF_VC_IV_SE, |
443 | SF_VC_VV_SE, |
444 | SF_VC_FV_SE, |
445 | SF_VC_XVV_SE, |
446 | SF_VC_IVV_SE, |
447 | SF_VC_VVV_SE, |
448 | SF_VC_FVV_SE, |
449 | SF_VC_XVW_SE, |
450 | SF_VC_IVW_SE, |
451 | SF_VC_VVW_SE, |
452 | SF_VC_FVW_SE, |
453 | SF_VC_V_X_SE, |
454 | SF_VC_V_I_SE, |
455 | SF_VC_V_XV_SE, |
456 | SF_VC_V_IV_SE, |
457 | SF_VC_V_VV_SE, |
458 | SF_VC_V_FV_SE, |
459 | SF_VC_V_XVV_SE, |
460 | SF_VC_V_IVV_SE, |
461 | SF_VC_V_VVV_SE, |
462 | SF_VC_V_FVV_SE, |
463 | SF_VC_V_XVW_SE, |
464 | SF_VC_V_IVW_SE, |
465 | SF_VC_V_VVW_SE, |
466 | SF_VC_V_FVW_SE, |
467 | |
468 | // WARNING: Do not add anything in the end unless you want the node to |
469 | // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all |
470 | // opcodes will be thought as target memory ops! |
471 | |
472 | TH_LWD = ISD::FIRST_TARGET_MEMORY_OPCODE, |
473 | TH_LWUD, |
474 | TH_LDD, |
475 | TH_SWD, |
476 | TH_SDD, |
477 | }; |
478 | // clang-format on |
479 | } // namespace RISCVISD |
480 | |
481 | class RISCVTargetLowering : public TargetLowering { |
482 | const RISCVSubtarget &Subtarget; |
483 | |
484 | public: |
485 | explicit RISCVTargetLowering(const TargetMachine &TM, |
486 | const RISCVSubtarget &STI); |
487 | |
488 | const RISCVSubtarget &getSubtarget() const { return Subtarget; } |
489 | |
490 | bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, |
491 | MachineFunction &MF, |
492 | unsigned Intrinsic) const override; |
493 | bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, |
494 | unsigned AS, |
495 | Instruction *I = nullptr) const override; |
496 | bool isLegalICmpImmediate(int64_t Imm) const override; |
497 | bool isLegalAddImmediate(int64_t Imm) const override; |
498 | bool isTruncateFree(Type *SrcTy, Type *DstTy) const override; |
499 | bool isTruncateFree(EVT SrcVT, EVT DstVT) const override; |
500 | bool isTruncateFree(SDValue Val, EVT VT2) const override; |
501 | bool isZExtFree(SDValue Val, EVT VT2) const override; |
502 | bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override; |
503 | bool signExtendConstant(const ConstantInt *CI) const override; |
504 | bool isCheapToSpeculateCttz(Type *Ty) const override; |
505 | bool isCheapToSpeculateCtlz(Type *Ty) const override; |
506 | bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; |
507 | bool hasAndNotCompare(SDValue Y) const override; |
508 | bool hasBitTest(SDValue X, SDValue Y) const override; |
509 | bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
510 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
511 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
512 | SelectionDAG &DAG) const override; |
513 | /// Return true if the (vector) instruction I will be lowered to an instruction |
514 | /// with a scalar splat operand for the given Operand number. |
515 | bool canSplatOperand(Instruction *I, int Operand) const; |
516 | /// Return true if a vector instruction will lower to a target instruction |
517 | /// able to splat the given operand. |
518 | bool canSplatOperand(unsigned Opcode, int Operand) const; |
519 | bool shouldSinkOperands(Instruction *I, |
520 | SmallVectorImpl<Use *> &Ops) const override; |
521 | bool shouldScalarizeBinop(SDValue VecOp) const override; |
522 | bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; |
523 | std::pair<int, bool> getLegalZfaFPImm(const APFloat &Imm, EVT VT) const; |
524 | bool isFPImmLegal(const APFloat &Imm, EVT VT, |
525 | bool ForCodeSize) const override; |
526 | bool (EVT ResVT, EVT SrcVT, |
527 | unsigned Index) const override; |
528 | |
529 | bool isIntDivCheap(EVT VT, AttributeList Attr) const override; |
530 | |
531 | bool preferScalarizeSplat(SDNode *N) const override; |
532 | |
533 | bool softPromoteHalfType() const override { return true; } |
534 | |
535 | /// Return the register type for a given MVT, ensuring vectors are treated |
536 | /// as a series of gpr sized integers. |
537 | MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, |
538 | EVT VT) const override; |
539 | |
540 | /// Return the number of registers for a given MVT, ensuring vectors are |
541 | /// treated as a series of gpr sized integers. |
542 | unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
543 | CallingConv::ID CC, |
544 | EVT VT) const override; |
545 | |
546 | unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, |
547 | CallingConv::ID CC, EVT VT, |
548 | EVT &IntermediateVT, |
549 | unsigned &NumIntermediates, |
550 | MVT &RegisterVT) const override; |
551 | |
552 | bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, |
553 | EVT VT) const override; |
554 | |
555 | /// Return true if the given shuffle mask can be codegen'd directly, or if it |
556 | /// should be stack expanded. |
557 | bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; |
558 | |
559 | bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { |
560 | // If the pair to store is a mixture of float and int values, we will |
561 | // save two bitwise instructions and one float-to-int instruction and |
562 | // increase one store instruction. There is potentially a more |
563 | // significant benefit because it avoids the float->int domain switch |
564 | // for input value. So It is more likely a win. |
565 | if ((LTy.isFloatingPoint() && HTy.isInteger()) || |
566 | (LTy.isInteger() && HTy.isFloatingPoint())) |
567 | return true; |
568 | // If the pair only contains int values, we will save two bitwise |
569 | // instructions and increase one store instruction (costing one more |
570 | // store buffer). Since the benefit is more blurred we leave such a pair |
571 | // out until we get testcase to prove it is a win. |
572 | return false; |
573 | } |
574 | |
575 | bool |
576 | shouldExpandBuildVectorWithShuffles(EVT VT, |
577 | unsigned DefinedValues) const override; |
578 | |
579 | bool shouldExpandCttzElements(EVT VT) const override; |
580 | |
581 | /// Return the cost of LMUL for linear operations. |
582 | InstructionCost getLMULCost(MVT VT) const; |
583 | |
584 | InstructionCost getVRGatherVVCost(MVT VT) const; |
585 | InstructionCost getVRGatherVICost(MVT VT) const; |
586 | InstructionCost getVSlideVXCost(MVT VT) const; |
587 | InstructionCost getVSlideVICost(MVT VT) const; |
588 | |
589 | // Provide custom lowering hooks for some operations. |
590 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
591 | void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
592 | SelectionDAG &DAG) const override; |
593 | |
594 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
595 | |
596 | bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, |
597 | const APInt &DemandedElts, |
598 | TargetLoweringOpt &TLO) const override; |
599 | |
600 | void computeKnownBitsForTargetNode(const SDValue Op, |
601 | KnownBits &Known, |
602 | const APInt &DemandedElts, |
603 | const SelectionDAG &DAG, |
604 | unsigned Depth) const override; |
605 | unsigned ComputeNumSignBitsForTargetNode(SDValue Op, |
606 | const APInt &DemandedElts, |
607 | const SelectionDAG &DAG, |
608 | unsigned Depth) const override; |
609 | |
610 | bool canCreateUndefOrPoisonForTargetNode(SDValue Op, |
611 | const APInt &DemandedElts, |
612 | const SelectionDAG &DAG, |
613 | bool PoisonOnly, bool ConsiderFlags, |
614 | unsigned Depth) const override; |
615 | |
616 | const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; |
617 | |
618 | // This method returns the name of a target specific DAG node. |
619 | const char *getTargetNodeName(unsigned Opcode) const override; |
620 | |
621 | MachineMemOperand::Flags |
622 | getTargetMMOFlags(const Instruction &I) const override; |
623 | |
624 | MachineMemOperand::Flags |
625 | getTargetMMOFlags(const MemSDNode &Node) const override; |
626 | |
627 | bool |
628 | areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, |
629 | const MemSDNode &NodeY) const override; |
630 | |
631 | ConstraintType getConstraintType(StringRef Constraint) const override; |
632 | |
633 | InlineAsm::ConstraintCode |
634 | getInlineAsmMemConstraint(StringRef ConstraintCode) const override; |
635 | |
636 | std::pair<unsigned, const TargetRegisterClass *> |
637 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
638 | StringRef Constraint, MVT VT) const override; |
639 | |
640 | void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
641 | std::vector<SDValue> &Ops, |
642 | SelectionDAG &DAG) const override; |
643 | |
644 | MachineBasicBlock * |
645 | EmitInstrWithCustomInserter(MachineInstr &MI, |
646 | MachineBasicBlock *BB) const override; |
647 | |
648 | void AdjustInstrPostInstrSelection(MachineInstr &MI, |
649 | SDNode *Node) const override; |
650 | |
651 | EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
652 | EVT VT) const override; |
653 | |
654 | bool shouldFormOverflowOp(unsigned Opcode, EVT VT, |
655 | bool MathUsed) const override { |
656 | if (VT == MVT::i8 || VT == MVT::i16) |
657 | return false; |
658 | |
659 | return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed); |
660 | } |
661 | |
662 | bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, |
663 | unsigned AddrSpace) const override { |
664 | // If we can replace 4 or more scalar stores, there will be a reduction |
665 | // in instructions even after we add a vector constant load. |
666 | return NumElem >= 4; |
667 | } |
668 | |
669 | bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { |
670 | return VT.isScalarInteger(); |
671 | } |
672 | bool convertSelectOfConstantsToMath(EVT VT) const override { return true; } |
673 | |
674 | bool isCtpopFast(EVT VT) const override; |
675 | |
676 | unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override; |
677 | |
678 | bool preferZeroCompareBranch() const override { return true; } |
679 | |
680 | bool shouldInsertFencesForAtomic(const Instruction *I) const override { |
681 | return isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I); |
682 | } |
683 | Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, |
684 | AtomicOrdering Ord) const override; |
685 | Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, |
686 | AtomicOrdering Ord) const override; |
687 | |
688 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
689 | EVT VT) const override; |
690 | |
691 | ISD::NodeType getExtendForAtomicOps() const override { |
692 | return ISD::SIGN_EXTEND; |
693 | } |
694 | |
695 | ISD::NodeType getExtendForAtomicCmpSwapArg() const override; |
696 | |
697 | bool shouldTransformSignedTruncationCheck(EVT XVT, |
698 | unsigned KeptBits) const override; |
699 | |
700 | TargetLowering::ShiftLegalizationStrategy |
701 | preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, |
702 | unsigned ExpansionFactor) const override { |
703 | if (DAG.getMachineFunction().getFunction().hasMinSize()) |
704 | return ShiftLegalizationStrategy::LowerToLibcall; |
705 | return TargetLowering::preferredShiftLegalizationStrategy(DAG, N, |
706 | ExpansionFactor); |
707 | } |
708 | |
709 | bool isDesirableToCommuteWithShift(const SDNode *N, |
710 | CombineLevel Level) const override; |
711 | |
712 | /// If a physical register, this returns the register that receives the |
713 | /// exception address on entry to an EH pad. |
714 | Register |
715 | getExceptionPointerRegister(const Constant *PersonalityFn) const override; |
716 | |
717 | /// If a physical register, this returns the register that receives the |
718 | /// exception typeid on entry to a landing pad. |
719 | Register |
720 | getExceptionSelectorRegister(const Constant *PersonalityFn) const override; |
721 | |
722 | bool shouldExtendTypeInLibCall(EVT Type) const override; |
723 | bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; |
724 | |
725 | /// Returns the register with the specified architectural or ABI name. This |
726 | /// method is necessary to lower the llvm.read_register.* and |
727 | /// llvm.write_register.* intrinsics. Allocatable registers must be reserved |
728 | /// with the clang -ffixed-xX flag for access to be allowed. |
729 | Register getRegisterByName(const char *RegName, LLT VT, |
730 | const MachineFunction &MF) const override; |
731 | |
732 | // Lower incoming arguments, copy physregs into vregs |
733 | SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, |
734 | bool IsVarArg, |
735 | const SmallVectorImpl<ISD::InputArg> &Ins, |
736 | const SDLoc &DL, SelectionDAG &DAG, |
737 | SmallVectorImpl<SDValue> &InVals) const override; |
738 | bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, |
739 | bool IsVarArg, |
740 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
741 | LLVMContext &Context) const override; |
742 | SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
743 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
744 | const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, |
745 | SelectionDAG &DAG) const override; |
746 | SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, |
747 | SmallVectorImpl<SDValue> &InVals) const override; |
748 | |
749 | bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
750 | Type *Ty) const override; |
751 | bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; |
752 | bool mayBeEmittedAsTailCall(const CallInst *CI) const override; |
753 | bool shouldConsiderGEPOffsetSplit() const override { return true; } |
754 | |
755 | bool decomposeMulByConstant(LLVMContext &Context, EVT VT, |
756 | SDValue C) const override; |
757 | |
758 | bool isMulAddWithConstProfitable(SDValue AddNode, |
759 | SDValue ConstNode) const override; |
760 | |
761 | TargetLowering::AtomicExpansionKind |
762 | shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; |
763 | Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, |
764 | Value *AlignedAddr, Value *Incr, |
765 | Value *Mask, Value *ShiftAmt, |
766 | AtomicOrdering Ord) const override; |
767 | TargetLowering::AtomicExpansionKind |
768 | shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override; |
769 | Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, |
770 | AtomicCmpXchgInst *CI, |
771 | Value *AlignedAddr, Value *CmpVal, |
772 | Value *NewVal, Value *Mask, |
773 | AtomicOrdering Ord) const override; |
774 | |
775 | /// Returns true if the target allows unaligned memory accesses of the |
776 | /// specified type. |
777 | bool allowsMisalignedMemoryAccesses( |
778 | EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), |
779 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
780 | unsigned *Fast = nullptr) const override; |
781 | |
782 | EVT getOptimalMemOpType(const MemOp &Op, |
783 | const AttributeList &FuncAttributes) const override; |
784 | |
785 | bool splitValueIntoRegisterParts( |
786 | SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, |
787 | unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) |
788 | const override; |
789 | |
790 | SDValue joinRegisterPartsIntoValue( |
791 | SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts, |
792 | unsigned NumParts, MVT PartVT, EVT ValueVT, |
793 | std::optional<CallingConv::ID> CC) const override; |
794 | |
795 | // Return the value of VLMax for the given vector type (i.e. SEW and LMUL) |
796 | SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const; |
797 | |
798 | static RISCVII::VLMUL getLMUL(MVT VT); |
799 | inline static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, |
800 | unsigned MinSize) { |
801 | // Original equation: |
802 | // VLMAX = (VectorBits / EltSize) * LMUL |
803 | // where LMUL = MinSize / RISCV::RVVBitsPerBlock |
804 | // The following equations have been reordered to prevent loss of precision |
805 | // when calculating fractional LMUL. |
806 | return ((VectorBits / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; |
807 | } |
808 | |
809 | // Return inclusive (low, high) bounds on the value of VLMAX for the |
810 | // given scalable container type given known bounds on VLEN. |
811 | static std::pair<unsigned, unsigned> |
812 | computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget); |
813 | |
814 | static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul); |
815 | static unsigned getSubregIndexByMVT(MVT VT, unsigned Index); |
816 | static unsigned getRegClassIDForVecVT(MVT VT); |
817 | static std::pair<unsigned, unsigned> |
818 | (MVT VecVT, MVT SubVecVT, |
819 | unsigned , |
820 | const RISCVRegisterInfo *TRI); |
821 | MVT getContainerForFixedLengthVector(MVT VT) const; |
822 | |
823 | bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override; |
824 | |
825 | bool isLegalElementTypeForRVV(EVT ScalarTy) const; |
826 | |
827 | bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; |
828 | |
829 | unsigned getJumpTableEncoding() const override; |
830 | |
831 | const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, |
832 | const MachineBasicBlock *MBB, |
833 | unsigned uid, |
834 | MCContext &Ctx) const override; |
835 | |
836 | bool isVScaleKnownToBeAPowerOfTwo() const override; |
837 | |
838 | bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, |
839 | ISD::MemIndexedMode &AM, SelectionDAG &DAG) const; |
840 | bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, |
841 | ISD::MemIndexedMode &AM, |
842 | SelectionDAG &DAG) const override; |
843 | bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, |
844 | SDValue &Offset, ISD::MemIndexedMode &AM, |
845 | SelectionDAG &DAG) const override; |
846 | |
847 | bool isLegalScaleForGatherScatter(uint64_t Scale, |
848 | uint64_t ElemSize) const override { |
849 | // Scaled addressing not supported on indexed load/stores |
850 | return Scale == 1; |
851 | } |
852 | |
853 | /// If the target has a standard location for the stack protector cookie, |
854 | /// returns the address of that location. Otherwise, returns nullptr. |
855 | Value *getIRStackGuard(IRBuilderBase &IRB) const override; |
856 | |
857 | /// Returns whether or not generating a interleaved load/store intrinsic for |
858 | /// this type will be legal. |
859 | bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, |
860 | Align Alignment, unsigned AddrSpace, |
861 | const DataLayout &) const; |
862 | |
863 | /// Return true if a stride load store of the given result type and |
864 | /// alignment is legal. |
865 | bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const; |
866 | |
867 | unsigned getMaxSupportedInterleaveFactor() const override { return 8; } |
868 | |
869 | bool fallBackToDAGISel(const Instruction &Inst) const override; |
870 | |
871 | bool lowerInterleavedLoad(LoadInst *LI, |
872 | ArrayRef<ShuffleVectorInst *> Shuffles, |
873 | ArrayRef<unsigned> Indices, |
874 | unsigned Factor) const override; |
875 | |
876 | bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, |
877 | unsigned Factor) const override; |
878 | |
879 | bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, |
880 | LoadInst *LI) const override; |
881 | |
882 | bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, |
883 | StoreInst *SI) const override; |
884 | |
885 | bool supportKCFIBundles() const override { return true; } |
886 | |
887 | SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, |
888 | int JTI, SelectionDAG &DAG) const override; |
889 | |
890 | MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, |
891 | MachineBasicBlock::instr_iterator &MBBI, |
892 | const TargetInstrInfo *TII) const override; |
893 | |
894 | /// RISCVCCAssignFn - This target-specific function extends the default |
895 | /// CCValAssign with additional information used to lower RISC-V calling |
896 | /// conventions. |
897 | typedef bool RISCVCCAssignFn(const DataLayout &DL, RISCVABI::ABI, |
898 | unsigned ValNo, MVT ValVT, MVT LocVT, |
899 | CCValAssign::LocInfo LocInfo, |
900 | ISD::ArgFlagsTy ArgFlags, CCState &State, |
901 | bool IsFixed, bool IsRet, Type *OrigTy, |
902 | const RISCVTargetLowering &TLI, |
903 | RVVArgDispatcher &RVVDispatcher); |
904 | |
905 | private: |
906 | void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, |
907 | const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, |
908 | RISCVCCAssignFn Fn) const; |
909 | void analyzeOutputArgs(MachineFunction &MF, CCState &CCInfo, |
910 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
911 | bool IsRet, CallLoweringInfo *CLI, |
912 | RISCVCCAssignFn Fn) const; |
913 | |
914 | template <class NodeTy> |
915 | SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true, |
916 | bool IsExternWeak = false) const; |
917 | SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, |
918 | bool UseGOT) const; |
919 | SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const; |
920 | SDValue getTLSDescAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const; |
921 | |
922 | SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; |
923 | SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; |
924 | SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; |
925 | SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; |
926 | SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
927 | SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; |
928 | SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const; |
929 | SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; |
930 | SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; |
931 | SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
932 | SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; |
933 | SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; |
934 | SDValue lowerSPLAT_VECTOR_PARTS(SDValue Op, SelectionDAG &DAG) const; |
935 | SDValue lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const; |
936 | SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, |
937 | int64_t ExtTrueVal) const; |
938 | SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const; |
939 | SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const; |
940 | SDValue lowerVectorFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const; |
941 | SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
942 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
943 | SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
944 | SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
945 | SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; |
946 | SDValue lowerVPREDUCE(SDValue Op, SelectionDAG &DAG) const; |
947 | SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; |
948 | SDValue lowerVectorMaskVecReduction(SDValue Op, SelectionDAG &DAG, |
949 | bool IsVP) const; |
950 | SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const; |
951 | SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; |
952 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
953 | SDValue lowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const; |
954 | SDValue lowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const; |
955 | SDValue lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
956 | SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const; |
957 | SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; |
958 | SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const; |
959 | SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const; |
960 | SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const; |
961 | SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op, |
962 | SelectionDAG &DAG) const; |
963 | SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const; |
964 | SDValue lowerMaskedScatter(SDValue Op, SelectionDAG &DAG) const; |
965 | SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const; |
966 | SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const; |
967 | SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const; |
968 | SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op, |
969 | SelectionDAG &DAG) const; |
970 | SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; |
971 | SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; |
972 | SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const; |
973 | SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const; |
974 | SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const; |
975 | SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const; |
976 | SDValue lowerVPSplatExperimental(SDValue Op, SelectionDAG &DAG) const; |
977 | SDValue lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const; |
978 | SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const; |
979 | SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const; |
980 | SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const; |
981 | SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const; |
982 | SDValue lowerVPCttzElements(SDValue Op, SelectionDAG &DAG) const; |
983 | SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG, |
984 | unsigned ExtendOpc) const; |
985 | SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
986 | SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
987 | |
988 | SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; |
989 | SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const; |
990 | |
991 | SDValue lowerStrictFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const; |
992 | |
993 | SDValue lowerVectorStrictFSetcc(SDValue Op, SelectionDAG &DAG) const; |
994 | |
995 | SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const; |
996 | SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const; |
997 | |
998 | bool isEligibleForTailCallOptimization( |
999 | CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, |
1000 | const SmallVector<CCValAssign, 16> &ArgLocs) const; |
1001 | |
1002 | /// Generate error diagnostics if any register used by CC has been marked |
1003 | /// reserved. |
1004 | void validateCCReservedRegs( |
1005 | const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, |
1006 | MachineFunction &MF) const; |
1007 | |
1008 | bool useRVVForFixedLengthVectorVT(MVT VT) const; |
1009 | |
1010 | MVT getVPExplicitVectorLengthTy() const override; |
1011 | |
1012 | bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF, |
1013 | bool IsScalable) const override; |
1014 | |
1015 | /// RVV code generation for fixed length vectors does not lower all |
1016 | /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to |
1017 | /// merge. However, merging them creates a BUILD_VECTOR that is just as |
1018 | /// illegal as the original, thus leading to an infinite legalisation loop. |
1019 | /// NOTE: Once BUILD_VECTOR can be custom lowered for all legal vector types, |
1020 | /// this override can be removed. |
1021 | bool mergeStoresAfterLegalization(EVT VT) const override; |
1022 | |
1023 | /// Disable normalizing |
1024 | /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and |
1025 | /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) |
1026 | /// RISC-V doesn't have flags so it's better to perform the and/or in a GPR. |
1027 | bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override { |
1028 | return false; |
1029 | } |
1030 | |
1031 | /// For available scheduling models FDIV + two independent FMULs are much |
1032 | /// faster than two FDIVs. |
1033 | unsigned combineRepeatedFPDivisors() const override; |
1034 | |
1035 | SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, |
1036 | SmallVectorImpl<SDNode *> &Created) const override; |
1037 | |
1038 | bool shouldFoldSelectWithSingleBitTest(EVT VT, |
1039 | const APInt &AndMask) const override; |
1040 | |
1041 | unsigned getMinimumJumpTableEntries() const override; |
1042 | |
1043 | SDValue emitFlushICache(SelectionDAG &DAG, SDValue InChain, SDValue Start, |
1044 | SDValue End, SDValue Flags, SDLoc DL) const; |
1045 | }; |
1046 | |
1047 | /// As per the spec, the rules for passing vector arguments are as follows: |
1048 | /// |
1049 | /// 1. For the first vector mask argument, use v0 to pass it. |
1050 | /// 2. For vector data arguments or rest vector mask arguments, starting from |
1051 | /// the v8 register, if a vector register group between v8-v23 that has not been |
1052 | /// allocated can be found and the first register number is a multiple of LMUL, |
1053 | /// then allocate this vector register group to the argument and mark these |
1054 | /// registers as allocated. Otherwise, pass it by reference and are replaced in |
1055 | /// the argument list with the address. |
1056 | /// 3. For tuple vector data arguments, starting from the v8 register, if |
1057 | /// NFIELDS consecutive vector register groups between v8-v23 that have not been |
1058 | /// allocated can be found and the first register number is a multiple of LMUL, |
1059 | /// then allocate these vector register groups to the argument and mark these |
1060 | /// registers as allocated. Otherwise, pass it by reference and are replaced in |
1061 | /// the argument list with the address. |
1062 | class RVVArgDispatcher { |
1063 | public: |
1064 | static constexpr unsigned NumArgVRs = 16; |
1065 | |
1066 | struct RVVArgInfo { |
1067 | unsigned NF; |
1068 | MVT VT; |
1069 | bool FirstVMask = false; |
1070 | }; |
1071 | |
1072 | template <typename Arg> |
1073 | RVVArgDispatcher(const MachineFunction *MF, const RISCVTargetLowering *TLI, |
1074 | ArrayRef<Arg> ArgList) |
1075 | : MF(MF), TLI(TLI) { |
1076 | constructArgInfos(ArgList); |
1077 | compute(); |
1078 | } |
1079 | |
1080 | RVVArgDispatcher() = default; |
1081 | |
1082 | MCPhysReg getNextPhysReg(); |
1083 | |
1084 | private: |
1085 | SmallVector<RVVArgInfo, 4> RVVArgInfos; |
1086 | SmallVector<MCPhysReg, 4> AllocatedPhysRegs; |
1087 | |
1088 | const MachineFunction *MF = nullptr; |
1089 | const RISCVTargetLowering *TLI = nullptr; |
1090 | |
1091 | unsigned CurIdx = 0; |
1092 | |
1093 | template <typename Arg> void constructArgInfos(ArrayRef<Arg> Ret); |
1094 | void compute(); |
1095 | void allocatePhysReg(unsigned NF = 1, unsigned LMul = 1, |
1096 | unsigned StartReg = 0); |
1097 | }; |
1098 | |
1099 | namespace RISCV { |
1100 | |
1101 | bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, |
1102 | MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, |
1103 | ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, |
1104 | bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, |
1105 | RVVArgDispatcher &RVVDispatcher); |
1106 | |
1107 | bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, |
1108 | MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, |
1109 | ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, |
1110 | bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, |
1111 | RVVArgDispatcher &RVVDispatcher); |
1112 | |
1113 | bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, |
1114 | CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, |
1115 | CCState &State); |
1116 | |
1117 | ArrayRef<MCPhysReg> getArgGPRs(const RISCVABI::ABI ABI); |
1118 | |
1119 | } // end namespace RISCV |
1120 | |
1121 | namespace RISCVVIntrinsicsTable { |
1122 | |
1123 | struct RISCVVIntrinsicInfo { |
1124 | unsigned IntrinsicID; |
1125 | uint8_t ScalarOperand; |
1126 | uint8_t VLOperand; |
1127 | bool hasScalarOperand() const { |
1128 | // 0xF is not valid. See NoScalarOperand in IntrinsicsRISCV.td. |
1129 | return ScalarOperand != 0xF; |
1130 | } |
1131 | bool hasVLOperand() const { |
1132 | // 0x1F is not valid. See NoVLOperand in IntrinsicsRISCV.td. |
1133 | return VLOperand != 0x1F; |
1134 | } |
1135 | }; |
1136 | |
1137 | using namespace RISCV; |
1138 | |
1139 | #define GET_RISCVVIntrinsicsTable_DECL |
1140 | #include "RISCVGenSearchableTables.inc" |
1141 | #undef GET_RISCVVIntrinsicsTable_DECL |
1142 | |
1143 | } // end namespace RISCVVIntrinsicsTable |
1144 | |
1145 | } // end namespace llvm |
1146 | |
1147 | #endif |
1148 | |