1 | //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that AArch64 uses to lower LLVM code into a |
10 | // selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H |
15 | #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H |
16 | |
17 | #include "AArch64.h" |
18 | #include "Utils/AArch64SMEAttributes.h" |
19 | #include "llvm/CodeGen/CallingConvLower.h" |
20 | #include "llvm/CodeGen/MachineFunction.h" |
21 | #include "llvm/CodeGen/SelectionDAG.h" |
22 | #include "llvm/CodeGen/TargetLowering.h" |
23 | #include "llvm/IR/CallingConv.h" |
24 | #include "llvm/IR/Instruction.h" |
25 | |
26 | namespace llvm { |
27 | |
28 | namespace AArch64ISD { |
29 | |
30 | // For predicated nodes where the result is a vector, the operation is |
31 | // controlled by a governing predicate and the inactive lanes are explicitly |
32 | // defined with a value, please stick the following naming convention: |
33 | // |
34 | // _MERGE_OP<n> The result value is a vector with inactive lanes equal |
35 | // to source operand OP<n>. |
36 | // |
37 | // _MERGE_ZERO The result value is a vector with inactive lanes |
38 | // actively zeroed. |
39 | // |
40 | // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal |
41 | // to the last source operand which only purpose is being |
42 | // a passthru value. |
43 | // |
44 | // For other cases where no explicit action is needed to set the inactive lanes, |
45 | // or when the result is not a vector and it is needed or helpful to |
46 | // distinguish a node from similar unpredicated nodes, use: |
47 | // |
48 | // _PRED |
49 | // |
50 | enum NodeType : unsigned { |
51 | FIRST_NUMBER = ISD::BUILTIN_OP_END, |
52 | WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. |
53 | CALL, // Function call. |
54 | |
55 | // Pseudo for a OBJC call that gets emitted together with a special `mov |
56 | // x29, x29` marker instruction. |
57 | CALL_RVMARKER, |
58 | |
59 | CALL_BTI, // Function call followed by a BTI instruction. |
60 | |
61 | // Function call, authenticating the callee value first: |
62 | // AUTH_CALL chain, callee, auth key #, int disc, addr disc, operands. |
63 | AUTH_CALL, |
64 | // AUTH_TC_RETURN chain, callee, fpdiff, auth key #, int disc, addr disc, |
65 | // operands. |
66 | AUTH_TC_RETURN, |
67 | |
68 | // Authenticated variant of CALL_RVMARKER. |
69 | AUTH_CALL_RVMARKER, |
70 | |
71 | COALESCER_BARRIER, |
72 | |
73 | VG_SAVE, |
74 | VG_RESTORE, |
75 | |
76 | SMSTART, |
77 | SMSTOP, |
78 | RESTORE_ZA, |
79 | RESTORE_ZT, |
80 | SAVE_ZT, |
81 | |
82 | // A call with the callee in x16, i.e. "blr x16". |
83 | CALL_ARM64EC_TO_X64, |
84 | |
85 | // Produces the full sequence of instructions for getting the thread pointer |
86 | // offset of a variable into X0, using the TLSDesc model. |
87 | TLSDESC_CALLSEQ, |
88 | ADRP, // Page address of a TargetGlobalAddress operand. |
89 | ADR, // ADR |
90 | ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. |
91 | LOADgot, // Load from automatically generated descriptor (e.g. Global |
92 | // Offset Table, TLS record). |
93 | RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand. |
94 | BRCOND, // Conditional branch instruction; "b.cond". |
95 | CSEL, |
96 | CSINV, // Conditional select invert. |
97 | CSNEG, // Conditional select negate. |
98 | CSINC, // Conditional select increment. |
99 | |
100 | // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on |
101 | // ELF. |
102 | THREAD_POINTER, |
103 | ADC, |
104 | SBC, // adc, sbc instructions |
105 | |
106 | // To avoid stack clash, allocation is performed by block and each block is |
107 | // probed. |
108 | PROBED_ALLOCA, |
109 | |
110 | // Predicated instructions where inactive lanes produce undefined results. |
111 | ABDS_PRED, |
112 | ABDU_PRED, |
113 | FADD_PRED, |
114 | FDIV_PRED, |
115 | FMA_PRED, |
116 | FMAX_PRED, |
117 | FMAXNM_PRED, |
118 | FMIN_PRED, |
119 | FMINNM_PRED, |
120 | FMUL_PRED, |
121 | FSUB_PRED, |
122 | HADDS_PRED, |
123 | HADDU_PRED, |
124 | MUL_PRED, |
125 | MULHS_PRED, |
126 | MULHU_PRED, |
127 | RHADDS_PRED, |
128 | RHADDU_PRED, |
129 | SDIV_PRED, |
130 | SHL_PRED, |
131 | SMAX_PRED, |
132 | SMIN_PRED, |
133 | SRA_PRED, |
134 | SRL_PRED, |
135 | UDIV_PRED, |
136 | UMAX_PRED, |
137 | UMIN_PRED, |
138 | |
139 | // Unpredicated vector instructions |
140 | BIC, |
141 | |
142 | SRAD_MERGE_OP1, |
143 | |
144 | // Predicated instructions with the result of inactive lanes provided by the |
145 | // last operand. |
146 | FABS_MERGE_PASSTHRU, |
147 | FCEIL_MERGE_PASSTHRU, |
148 | FFLOOR_MERGE_PASSTHRU, |
149 | FNEARBYINT_MERGE_PASSTHRU, |
150 | FNEG_MERGE_PASSTHRU, |
151 | FRECPX_MERGE_PASSTHRU, |
152 | FRINT_MERGE_PASSTHRU, |
153 | FROUND_MERGE_PASSTHRU, |
154 | FROUNDEVEN_MERGE_PASSTHRU, |
155 | FSQRT_MERGE_PASSTHRU, |
156 | FTRUNC_MERGE_PASSTHRU, |
157 | FP_ROUND_MERGE_PASSTHRU, |
158 | FP_EXTEND_MERGE_PASSTHRU, |
159 | UINT_TO_FP_MERGE_PASSTHRU, |
160 | SINT_TO_FP_MERGE_PASSTHRU, |
161 | FCVTZU_MERGE_PASSTHRU, |
162 | FCVTZS_MERGE_PASSTHRU, |
163 | SIGN_EXTEND_INREG_MERGE_PASSTHRU, |
164 | ZERO_EXTEND_INREG_MERGE_PASSTHRU, |
165 | ABS_MERGE_PASSTHRU, |
166 | NEG_MERGE_PASSTHRU, |
167 | |
168 | SETCC_MERGE_ZERO, |
169 | |
170 | // Arithmetic instructions which write flags. |
171 | ADDS, |
172 | SUBS, |
173 | ADCS, |
174 | SBCS, |
175 | ANDS, |
176 | |
177 | // Conditional compares. Operands: left,right,falsecc,cc,flags |
178 | CCMP, |
179 | CCMN, |
180 | FCCMP, |
181 | |
182 | // Floating point comparison |
183 | FCMP, |
184 | |
185 | // Scalar-to-vector duplication |
186 | DUP, |
187 | DUPLANE8, |
188 | DUPLANE16, |
189 | DUPLANE32, |
190 | DUPLANE64, |
191 | DUPLANE128, |
192 | |
193 | // Vector immedate moves |
194 | MOVI, |
195 | MOVIshift, |
196 | MOVIedit, |
197 | MOVImsl, |
198 | FMOV, |
199 | MVNIshift, |
200 | MVNImsl, |
201 | |
202 | // Vector immediate ops |
203 | BICi, |
204 | ORRi, |
205 | |
206 | // Vector bitwise select: similar to ISD::VSELECT but not all bits within an |
207 | // element must be identical. |
208 | BSP, |
209 | |
210 | // Vector shuffles |
211 | ZIP1, |
212 | ZIP2, |
213 | UZP1, |
214 | UZP2, |
215 | TRN1, |
216 | TRN2, |
217 | REV16, |
218 | REV32, |
219 | REV64, |
220 | EXT, |
221 | SPLICE, |
222 | |
223 | // Vector shift by scalar |
224 | VSHL, |
225 | VLSHR, |
226 | VASHR, |
227 | |
228 | // Vector shift by scalar (again) |
229 | SQSHL_I, |
230 | UQSHL_I, |
231 | SQSHLU_I, |
232 | SRSHR_I, |
233 | URSHR_I, |
234 | URSHR_I_PRED, |
235 | |
236 | // Vector narrowing shift by immediate (bottom) |
237 | RSHRNB_I, |
238 | |
239 | // Vector shift by constant and insert |
240 | VSLI, |
241 | VSRI, |
242 | |
243 | // Vector comparisons |
244 | CMEQ, |
245 | CMGE, |
246 | CMGT, |
247 | CMHI, |
248 | CMHS, |
249 | FCMEQ, |
250 | FCMGE, |
251 | FCMGT, |
252 | |
253 | // Vector zero comparisons |
254 | CMEQz, |
255 | CMGEz, |
256 | CMGTz, |
257 | CMLEz, |
258 | CMLTz, |
259 | FCMEQz, |
260 | FCMGEz, |
261 | FCMGTz, |
262 | FCMLEz, |
263 | FCMLTz, |
264 | |
265 | // Round wide FP to narrow FP with inexact results to odd. |
266 | FCVTXN, |
267 | |
268 | // Vector across-lanes addition |
269 | // Only the lower result lane is defined. |
270 | SADDV, |
271 | UADDV, |
272 | |
273 | // Unsigned sum Long across Vector |
274 | UADDLV, |
275 | SADDLV, |
276 | |
277 | // Add Pairwise of two vectors |
278 | ADDP, |
279 | // Add Long Pairwise |
280 | SADDLP, |
281 | UADDLP, |
282 | |
283 | // udot/sdot instructions |
284 | UDOT, |
285 | SDOT, |
286 | |
287 | // Vector across-lanes min/max |
288 | // Only the lower result lane is defined. |
289 | SMINV, |
290 | UMINV, |
291 | SMAXV, |
292 | UMAXV, |
293 | |
294 | SADDV_PRED, |
295 | UADDV_PRED, |
296 | SMAXV_PRED, |
297 | UMAXV_PRED, |
298 | SMINV_PRED, |
299 | UMINV_PRED, |
300 | ORV_PRED, |
301 | EORV_PRED, |
302 | ANDV_PRED, |
303 | |
304 | // Compare-and-branch |
305 | CBZ, |
306 | CBNZ, |
307 | TBZ, |
308 | TBNZ, |
309 | |
310 | // Tail calls |
311 | TC_RETURN, |
312 | |
313 | // Custom prefetch handling |
314 | PREFETCH, |
315 | |
316 | // {s|u}int to FP within a FP register. |
317 | SITOF, |
318 | UITOF, |
319 | |
320 | /// Natural vector cast. ISD::BITCAST is not natural in the big-endian |
321 | /// world w.r.t vectors; which causes additional REV instructions to be |
322 | /// generated to compensate for the byte-swapping. But sometimes we do |
323 | /// need to re-interpret the data in SIMD vector registers in big-endian |
324 | /// mode without emitting such REV instructions. |
325 | NVCAST, |
326 | |
327 | MRS, // MRS, also sets the flags via a glue. |
328 | |
329 | SMULL, |
330 | UMULL, |
331 | |
332 | PMULL, |
333 | |
334 | // Reciprocal estimates and steps. |
335 | FRECPE, |
336 | FRECPS, |
337 | FRSQRTE, |
338 | FRSQRTS, |
339 | |
340 | SUNPKHI, |
341 | SUNPKLO, |
342 | UUNPKHI, |
343 | UUNPKLO, |
344 | |
345 | CLASTA_N, |
346 | CLASTB_N, |
347 | LASTA, |
348 | LASTB, |
349 | TBL, |
350 | |
351 | // Floating-point reductions. |
352 | FADDA_PRED, |
353 | FADDV_PRED, |
354 | FMAXV_PRED, |
355 | FMAXNMV_PRED, |
356 | FMINV_PRED, |
357 | FMINNMV_PRED, |
358 | |
359 | INSR, |
360 | PTEST, |
361 | PTEST_ANY, |
362 | PTRUE, |
363 | |
364 | CTTZ_ELTS, |
365 | |
366 | BITREVERSE_MERGE_PASSTHRU, |
367 | BSWAP_MERGE_PASSTHRU, |
368 | REVH_MERGE_PASSTHRU, |
369 | REVW_MERGE_PASSTHRU, |
370 | CTLZ_MERGE_PASSTHRU, |
371 | CTPOP_MERGE_PASSTHRU, |
372 | DUP_MERGE_PASSTHRU, |
373 | INDEX_VECTOR, |
374 | |
375 | // Cast between vectors of the same element type but differ in length. |
376 | REINTERPRET_CAST, |
377 | |
378 | // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa |
379 | LS64_BUILD, |
380 | , |
381 | |
382 | LD1_MERGE_ZERO, |
383 | LD1S_MERGE_ZERO, |
384 | LDNF1_MERGE_ZERO, |
385 | LDNF1S_MERGE_ZERO, |
386 | LDFF1_MERGE_ZERO, |
387 | LDFF1S_MERGE_ZERO, |
388 | LD1RQ_MERGE_ZERO, |
389 | LD1RO_MERGE_ZERO, |
390 | |
391 | // Structured loads. |
392 | SVE_LD2_MERGE_ZERO, |
393 | SVE_LD3_MERGE_ZERO, |
394 | SVE_LD4_MERGE_ZERO, |
395 | |
396 | // Unsigned gather loads. |
397 | GLD1_MERGE_ZERO, |
398 | GLD1_SCALED_MERGE_ZERO, |
399 | GLD1_UXTW_MERGE_ZERO, |
400 | GLD1_SXTW_MERGE_ZERO, |
401 | GLD1_UXTW_SCALED_MERGE_ZERO, |
402 | GLD1_SXTW_SCALED_MERGE_ZERO, |
403 | GLD1_IMM_MERGE_ZERO, |
404 | GLD1Q_MERGE_ZERO, |
405 | GLD1Q_INDEX_MERGE_ZERO, |
406 | |
407 | // Signed gather loads |
408 | GLD1S_MERGE_ZERO, |
409 | GLD1S_SCALED_MERGE_ZERO, |
410 | GLD1S_UXTW_MERGE_ZERO, |
411 | GLD1S_SXTW_MERGE_ZERO, |
412 | GLD1S_UXTW_SCALED_MERGE_ZERO, |
413 | GLD1S_SXTW_SCALED_MERGE_ZERO, |
414 | GLD1S_IMM_MERGE_ZERO, |
415 | |
416 | // Unsigned gather loads. |
417 | GLDFF1_MERGE_ZERO, |
418 | GLDFF1_SCALED_MERGE_ZERO, |
419 | GLDFF1_UXTW_MERGE_ZERO, |
420 | GLDFF1_SXTW_MERGE_ZERO, |
421 | GLDFF1_UXTW_SCALED_MERGE_ZERO, |
422 | GLDFF1_SXTW_SCALED_MERGE_ZERO, |
423 | GLDFF1_IMM_MERGE_ZERO, |
424 | |
425 | // Signed gather loads. |
426 | GLDFF1S_MERGE_ZERO, |
427 | GLDFF1S_SCALED_MERGE_ZERO, |
428 | GLDFF1S_UXTW_MERGE_ZERO, |
429 | GLDFF1S_SXTW_MERGE_ZERO, |
430 | GLDFF1S_UXTW_SCALED_MERGE_ZERO, |
431 | GLDFF1S_SXTW_SCALED_MERGE_ZERO, |
432 | GLDFF1S_IMM_MERGE_ZERO, |
433 | |
434 | // Non-temporal gather loads |
435 | GLDNT1_MERGE_ZERO, |
436 | GLDNT1_INDEX_MERGE_ZERO, |
437 | GLDNT1S_MERGE_ZERO, |
438 | |
439 | // Contiguous masked store. |
440 | ST1_PRED, |
441 | |
442 | // Scatter store |
443 | SST1_PRED, |
444 | SST1_SCALED_PRED, |
445 | SST1_UXTW_PRED, |
446 | SST1_SXTW_PRED, |
447 | SST1_UXTW_SCALED_PRED, |
448 | SST1_SXTW_SCALED_PRED, |
449 | SST1_IMM_PRED, |
450 | SST1Q_PRED, |
451 | SST1Q_INDEX_PRED, |
452 | |
453 | // Non-temporal scatter store |
454 | SSTNT1_PRED, |
455 | SSTNT1_INDEX_PRED, |
456 | |
457 | // SME |
458 | RDSVL, |
459 | REVD_MERGE_PASSTHRU, |
460 | ALLOCATE_ZA_BUFFER, |
461 | INIT_TPIDR2OBJ, |
462 | |
463 | // Asserts that a function argument (i32) is zero-extended to i8 by |
464 | // the caller |
465 | ASSERT_ZEXT_BOOL, |
466 | |
467 | // 128-bit system register accesses |
468 | // lo64, hi64, chain = MRRS(chain, sysregname) |
469 | MRRS, |
470 | // chain = MSRR(chain, sysregname, lo64, hi64) |
471 | MSRR, |
472 | |
473 | // Strict (exception-raising) floating point comparison |
474 | STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, |
475 | STRICT_FCMPE, |
476 | |
477 | // SME ZA loads and stores |
478 | SME_ZA_LDR, |
479 | SME_ZA_STR, |
480 | |
481 | // NEON Load/Store with post-increment base updates |
482 | LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, |
483 | LD3post, |
484 | LD4post, |
485 | ST2post, |
486 | ST3post, |
487 | ST4post, |
488 | LD1x2post, |
489 | LD1x3post, |
490 | LD1x4post, |
491 | ST1x2post, |
492 | ST1x3post, |
493 | ST1x4post, |
494 | LD1DUPpost, |
495 | LD2DUPpost, |
496 | LD3DUPpost, |
497 | LD4DUPpost, |
498 | LD1LANEpost, |
499 | LD2LANEpost, |
500 | LD3LANEpost, |
501 | LD4LANEpost, |
502 | ST2LANEpost, |
503 | ST3LANEpost, |
504 | ST4LANEpost, |
505 | |
506 | STG, |
507 | STZG, |
508 | ST2G, |
509 | STZ2G, |
510 | |
511 | LDP, |
512 | LDIAPP, |
513 | LDNP, |
514 | STP, |
515 | STILP, |
516 | STNP, |
517 | |
518 | // Memory Operations |
519 | MOPS_MEMSET, |
520 | MOPS_MEMSET_TAGGING, |
521 | MOPS_MEMCOPY, |
522 | MOPS_MEMMOVE, |
523 | }; |
524 | |
525 | } // end namespace AArch64ISD |
526 | |
527 | namespace AArch64 { |
528 | /// Possible values of current rounding mode, which is specified in bits |
529 | /// 23:22 of FPCR. |
530 | enum Rounding { |
531 | RN = 0, // Round to Nearest |
532 | RP = 1, // Round towards Plus infinity |
533 | RM = 2, // Round towards Minus infinity |
534 | RZ = 3, // Round towards Zero |
535 | rmMask = 3 // Bit mask selecting rounding mode |
536 | }; |
537 | |
538 | // Bit position of rounding mode bits in FPCR. |
539 | const unsigned RoundingBitsPos = 22; |
540 | |
541 | // Reserved bits should be preserved when modifying FPCR. |
542 | const uint64_t ReservedFPControlBits = 0xfffffffff80040f8; |
543 | |
544 | // Registers used to pass function arguments. |
545 | ArrayRef<MCPhysReg> getGPRArgRegs(); |
546 | ArrayRef<MCPhysReg> getFPRArgRegs(); |
547 | |
548 | /// Maximum allowed number of unprobed bytes above SP at an ABI |
549 | /// boundary. |
550 | const unsigned StackProbeMaxUnprobedStack = 1024; |
551 | |
552 | /// Maximum number of iterations to unroll for a constant size probing loop. |
553 | const unsigned StackProbeMaxLoopUnroll = 4; |
554 | |
555 | } // namespace AArch64 |
556 | |
557 | class AArch64Subtarget; |
558 | |
559 | class AArch64TargetLowering : public TargetLowering { |
560 | public: |
561 | explicit AArch64TargetLowering(const TargetMachine &TM, |
562 | const AArch64Subtarget &STI); |
563 | |
564 | /// Control the following reassociation of operands: (op (op x, c1), y) -> (op |
565 | /// (op x, y), c1) where N0 is (op x, c1) and N1 is y. |
566 | bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, |
567 | SDValue N1) const override; |
568 | |
569 | /// Selects the correct CCAssignFn for a given CallingConvention value. |
570 | CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; |
571 | |
572 | /// Selects the correct CCAssignFn for a given CallingConvention value. |
573 | CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const; |
574 | |
575 | /// Determine which of the bits specified in Mask are known to be either zero |
576 | /// or one and return them in the KnownZero/KnownOne bitsets. |
577 | void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, |
578 | const APInt &DemandedElts, |
579 | const SelectionDAG &DAG, |
580 | unsigned Depth = 0) const override; |
581 | |
582 | unsigned ComputeNumSignBitsForTargetNode(SDValue Op, |
583 | const APInt &DemandedElts, |
584 | const SelectionDAG &DAG, |
585 | unsigned Depth) const override; |
586 | |
587 | MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { |
588 | // Returning i64 unconditionally here (i.e. even for ILP32) means that the |
589 | // *DAG* representation of pointers will always be 64-bits. They will be |
590 | // truncated and extended when transferred to memory, but the 64-bit DAG |
591 | // allows us to use AArch64's addressing modes much more easily. |
592 | return MVT::getIntegerVT(BitWidth: 64); |
593 | } |
594 | |
595 | bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, |
596 | const APInt &DemandedElts, |
597 | TargetLoweringOpt &TLO) const override; |
598 | |
599 | MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; |
600 | |
601 | /// Returns true if the target allows unaligned memory accesses of the |
602 | /// specified type. |
603 | bool allowsMisalignedMemoryAccesses( |
604 | EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), |
605 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
606 | unsigned *Fast = nullptr) const override; |
607 | /// LLT variant. |
608 | bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, |
609 | Align Alignment, |
610 | MachineMemOperand::Flags Flags, |
611 | unsigned *Fast = nullptr) const override; |
612 | |
613 | /// Provide custom lowering hooks for some operations. |
614 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
615 | |
616 | const char *getTargetNodeName(unsigned Opcode) const override; |
617 | |
618 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
619 | |
620 | /// This method returns a target specific FastISel object, or null if the |
621 | /// target does not support "fast" ISel. |
622 | FastISel *createFastISel(FunctionLoweringInfo &funcInfo, |
623 | const TargetLibraryInfo *libInfo) const override; |
624 | |
625 | bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; |
626 | |
627 | bool isFPImmLegal(const APFloat &Imm, EVT VT, |
628 | bool ForCodeSize) const override; |
629 | |
630 | /// Return true if the given shuffle mask can be codegen'd directly, or if it |
631 | /// should be stack expanded. |
632 | bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; |
633 | |
634 | /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero' |
635 | /// shuffle mask can be codegen'd directly. |
636 | bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override; |
637 | |
638 | /// Return the ISD::SETCC ValueType. |
639 | EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
640 | EVT VT) const override; |
641 | |
642 | SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; |
643 | |
644 | MachineBasicBlock *EmitF128CSEL(MachineInstr &MI, |
645 | MachineBasicBlock *BB) const; |
646 | |
647 | MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, |
648 | MachineBasicBlock *BB) const; |
649 | |
650 | MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI, |
651 | MachineBasicBlock *MBB) const; |
652 | |
653 | MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg, |
654 | MachineInstr &MI, |
655 | MachineBasicBlock *BB) const; |
656 | MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const; |
657 | MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg, |
658 | MachineInstr &MI, MachineBasicBlock *BB) const; |
659 | MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB, |
660 | unsigned Opcode, bool Op0IsDef) const; |
661 | MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; |
662 | MachineBasicBlock *EmitInitTPIDR2Object(MachineInstr &MI, |
663 | MachineBasicBlock *BB) const; |
664 | MachineBasicBlock *EmitAllocateZABuffer(MachineInstr &MI, |
665 | MachineBasicBlock *BB) const; |
666 | |
667 | MachineBasicBlock * |
668 | EmitInstrWithCustomInserter(MachineInstr &MI, |
669 | MachineBasicBlock *MBB) const override; |
670 | |
671 | bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, |
672 | MachineFunction &MF, |
673 | unsigned Intrinsic) const override; |
674 | |
675 | bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, |
676 | EVT NewVT) const override; |
677 | |
678 | bool shouldRemoveRedundantExtend(SDValue Op) const override; |
679 | |
680 | bool isTruncateFree(Type *Ty1, Type *Ty2) const override; |
681 | bool isTruncateFree(EVT VT1, EVT VT2) const override; |
682 | |
683 | bool isProfitableToHoist(Instruction *I) const override; |
684 | |
685 | bool isZExtFree(Type *Ty1, Type *Ty2) const override; |
686 | bool isZExtFree(EVT VT1, EVT VT2) const override; |
687 | bool isZExtFree(SDValue Val, EVT VT2) const override; |
688 | |
689 | bool shouldSinkOperands(Instruction *I, |
690 | SmallVectorImpl<Use *> &Ops) const override; |
691 | |
692 | bool optimizeExtendOrTruncateConversion( |
693 | Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override; |
694 | |
695 | bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override; |
696 | |
697 | unsigned getMaxSupportedInterleaveFactor() const override { return 4; } |
698 | |
699 | bool lowerInterleavedLoad(LoadInst *LI, |
700 | ArrayRef<ShuffleVectorInst *> Shuffles, |
701 | ArrayRef<unsigned> Indices, |
702 | unsigned Factor) const override; |
703 | bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, |
704 | unsigned Factor) const override; |
705 | |
706 | bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI, |
707 | LoadInst *LI) const override; |
708 | |
709 | bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, |
710 | StoreInst *SI) const override; |
711 | |
712 | bool isLegalAddImmediate(int64_t) const override; |
713 | bool isLegalAddScalableImmediate(int64_t) const override; |
714 | bool isLegalICmpImmediate(int64_t) const override; |
715 | |
716 | bool isMulAddWithConstProfitable(SDValue AddNode, |
717 | SDValue ConstNode) const override; |
718 | |
719 | bool shouldConsiderGEPOffsetSplit() const override; |
720 | |
721 | EVT getOptimalMemOpType(const MemOp &Op, |
722 | const AttributeList &FuncAttributes) const override; |
723 | |
724 | LLT getOptimalMemOpLLT(const MemOp &Op, |
725 | const AttributeList &FuncAttributes) const override; |
726 | |
727 | /// Return true if the addressing mode represented by AM is legal for this |
728 | /// target, for a load/store of the specified type. |
729 | bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, |
730 | unsigned AS, |
731 | Instruction *I = nullptr) const override; |
732 | |
733 | int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, |
734 | int64_t MaxOffset) const override; |
735 | |
736 | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
737 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this method |
738 | /// returns true, otherwise fmuladd is expanded to fmul + fadd. |
739 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
740 | EVT VT) const override; |
741 | bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; |
742 | |
743 | bool generateFMAsInMachineCombiner(EVT VT, |
744 | CodeGenOptLevel OptLevel) const override; |
745 | |
746 | const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; |
747 | ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; |
748 | |
749 | /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. |
750 | bool isDesirableToCommuteWithShift(const SDNode *N, |
751 | CombineLevel Level) const override; |
752 | |
753 | bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override { |
754 | return false; |
755 | } |
756 | |
757 | /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. |
758 | bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; |
759 | |
760 | /// Return true if it is profitable to fold a pair of shifts into a mask. |
761 | bool shouldFoldConstantShiftPairToMask(const SDNode *N, |
762 | CombineLevel Level) const override; |
763 | |
764 | bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, |
765 | EVT VT) const override; |
766 | |
767 | /// Returns true if it is beneficial to convert a load of a constant |
768 | /// to just the constant itself. |
769 | bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
770 | Type *Ty) const override; |
771 | |
772 | /// Return true if EXTRACT_SUBVECTOR is cheap for this result type |
773 | /// with this index. |
774 | bool (EVT ResVT, EVT SrcVT, |
775 | unsigned Index) const override; |
776 | |
777 | bool shouldFormOverflowOp(unsigned Opcode, EVT VT, |
778 | bool MathUsed) const override { |
779 | // Using overflow ops for overflow checks only should beneficial on |
780 | // AArch64. |
781 | return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed: true); |
782 | } |
783 | |
784 | Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, |
785 | AtomicOrdering Ord) const override; |
786 | Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, |
787 | AtomicOrdering Ord) const override; |
788 | |
789 | void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override; |
790 | |
791 | bool isOpSuitableForLDPSTP(const Instruction *I) const; |
792 | bool isOpSuitableForLSE128(const Instruction *I) const; |
793 | bool isOpSuitableForRCPC3(const Instruction *I) const; |
794 | bool shouldInsertFencesForAtomic(const Instruction *I) const override; |
795 | bool |
796 | shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override; |
797 | |
798 | TargetLoweringBase::AtomicExpansionKind |
799 | shouldExpandAtomicLoadInIR(LoadInst *LI) const override; |
800 | TargetLoweringBase::AtomicExpansionKind |
801 | shouldExpandAtomicStoreInIR(StoreInst *SI) const override; |
802 | TargetLoweringBase::AtomicExpansionKind |
803 | shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; |
804 | |
805 | TargetLoweringBase::AtomicExpansionKind |
806 | shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; |
807 | |
808 | bool useLoadStackGuardNode() const override; |
809 | TargetLoweringBase::LegalizeTypeAction |
810 | getPreferredVectorAction(MVT VT) const override; |
811 | |
812 | /// If the target has a standard location for the stack protector cookie, |
813 | /// returns the address of that location. Otherwise, returns nullptr. |
814 | Value *getIRStackGuard(IRBuilderBase &IRB) const override; |
815 | |
816 | void insertSSPDeclarations(Module &M) const override; |
817 | Value *getSDagStackGuard(const Module &M) const override; |
818 | Function *getSSPStackGuardCheck(const Module &M) const override; |
819 | |
820 | /// If the target has a standard location for the unsafe stack pointer, |
821 | /// returns the address of that location. Otherwise, returns nullptr. |
822 | Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; |
823 | |
824 | /// If a physical register, this returns the register that receives the |
825 | /// exception address on entry to an EH pad. |
826 | Register |
827 | getExceptionPointerRegister(const Constant *PersonalityFn) const override { |
828 | // FIXME: This is a guess. Has this been defined yet? |
829 | return AArch64::X0; |
830 | } |
831 | |
832 | /// If a physical register, this returns the register that receives the |
833 | /// exception typeid on entry to a landing pad. |
834 | Register |
835 | getExceptionSelectorRegister(const Constant *PersonalityFn) const override { |
836 | // FIXME: This is a guess. Has this been defined yet? |
837 | return AArch64::X1; |
838 | } |
839 | |
840 | bool isIntDivCheap(EVT VT, AttributeList Attr) const override; |
841 | |
842 | bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, |
843 | const MachineFunction &MF) const override { |
844 | // Do not merge to float value size (128 bytes) if no implicit |
845 | // float attribute is set. |
846 | |
847 | bool NoFloat = MF.getFunction().hasFnAttribute(Kind: Attribute::NoImplicitFloat); |
848 | |
849 | if (NoFloat) |
850 | return (MemVT.getSizeInBits() <= 64); |
851 | return true; |
852 | } |
853 | |
854 | bool isCheapToSpeculateCttz(Type *) const override { |
855 | return true; |
856 | } |
857 | |
858 | bool isCheapToSpeculateCtlz(Type *) const override { |
859 | return true; |
860 | } |
861 | |
862 | bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; |
863 | |
864 | bool hasAndNotCompare(SDValue V) const override { |
865 | // We can use bics for any scalar. |
866 | return V.getValueType().isScalarInteger(); |
867 | } |
868 | |
869 | bool hasAndNot(SDValue Y) const override { |
870 | EVT VT = Y.getValueType(); |
871 | |
872 | if (!VT.isVector()) |
873 | return hasAndNotCompare(V: Y); |
874 | |
875 | TypeSize TS = VT.getSizeInBits(); |
876 | // TODO: We should be able to use bic/bif too for SVE. |
877 | return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic' |
878 | } |
879 | |
880 | bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
881 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
882 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
883 | SelectionDAG &DAG) const override; |
884 | |
885 | ShiftLegalizationStrategy |
886 | preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, |
887 | unsigned ExpansionFactor) const override; |
888 | |
889 | bool shouldTransformSignedTruncationCheck(EVT XVT, |
890 | unsigned KeptBits) const override { |
891 | // For vectors, we don't have a preference.. |
892 | if (XVT.isVector()) |
893 | return false; |
894 | |
895 | auto VTIsOk = [](EVT VT) -> bool { |
896 | return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || |
897 | VT == MVT::i64; |
898 | }; |
899 | |
900 | // We are ok with KeptBitsVT being byte/word/dword, what SXT supports. |
901 | // XVT will be larger than KeptBitsVT. |
902 | MVT KeptBitsVT = MVT::getIntegerVT(BitWidth: KeptBits); |
903 | return VTIsOk(XVT) && VTIsOk(KeptBitsVT); |
904 | } |
905 | |
906 | bool preferIncOfAddToSubOfNot(EVT VT) const override; |
907 | |
908 | bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; |
909 | |
910 | bool shouldExpandCmpUsingSelects() const override { return true; } |
911 | |
912 | bool isComplexDeinterleavingSupported() const override; |
913 | bool isComplexDeinterleavingOperationSupported( |
914 | ComplexDeinterleavingOperation Operation, Type *Ty) const override; |
915 | |
916 | Value *createComplexDeinterleavingIR( |
917 | IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, |
918 | ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, |
919 | Value *Accumulator = nullptr) const override; |
920 | |
921 | bool supportSplitCSR(MachineFunction *MF) const override { |
922 | return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && |
923 | MF->getFunction().hasFnAttribute(Kind: Attribute::NoUnwind); |
924 | } |
925 | void initializeSplitCSR(MachineBasicBlock *Entry) const override; |
926 | void insertCopiesSplitCSR( |
927 | MachineBasicBlock *Entry, |
928 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; |
929 | |
930 | bool supportSwiftError() const override { |
931 | return true; |
932 | } |
933 | |
934 | bool supportPtrAuthBundles() const override { return true; } |
935 | |
936 | bool supportKCFIBundles() const override { return true; } |
937 | |
938 | MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, |
939 | MachineBasicBlock::instr_iterator &MBBI, |
940 | const TargetInstrInfo *TII) const override; |
941 | |
942 | /// Enable aggressive FMA fusion on targets that want it. |
943 | bool enableAggressiveFMAFusion(EVT VT) const override; |
944 | |
945 | /// Returns the size of the platform's va_list object. |
946 | unsigned getVaListSizeInBits(const DataLayout &DL) const override; |
947 | |
948 | /// Returns true if \p VecTy is a legal interleaved access type. This |
949 | /// function checks the vector element type and the overall width of the |
950 | /// vector. |
951 | bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, |
952 | bool &UseScalable) const; |
953 | |
954 | /// Returns the number of interleaved accesses that will be generated when |
955 | /// lowering accesses of the given type. |
956 | unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, |
957 | bool UseScalable) const; |
958 | |
959 | MachineMemOperand::Flags getTargetMMOFlags( |
960 | const Instruction &I) const override; |
961 | |
962 | bool functionArgumentNeedsConsecutiveRegisters( |
963 | Type *Ty, CallingConv::ID CallConv, bool isVarArg, |
964 | const DataLayout &DL) const override; |
965 | |
966 | /// Used for exception handling on Win64. |
967 | bool needsFixedCatchObjects() const override; |
968 | |
969 | bool fallBackToDAGISel(const Instruction &Inst) const override; |
970 | |
971 | /// SVE code generation for fixed length vectors does not custom lower |
972 | /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to |
973 | /// merge. However, merging them creates a BUILD_VECTOR that is just as |
974 | /// illegal as the original, thus leading to an infinite legalisation loop. |
975 | /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal |
976 | /// vector types this override can be removed. |
977 | bool mergeStoresAfterLegalization(EVT VT) const override; |
978 | |
979 | // If the platform/function should have a redzone, return the size in bytes. |
980 | unsigned getRedZoneSize(const Function &F) const { |
981 | if (F.hasFnAttribute(Kind: Attribute::NoRedZone)) |
982 | return 0; |
983 | return 128; |
984 | } |
985 | |
986 | bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const; |
987 | EVT getPromotedVTForPredicate(EVT VT) const; |
988 | |
989 | EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, |
990 | bool AllowUnknown = false) const override; |
991 | |
992 | bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override; |
993 | |
994 | bool shouldExpandCttzElements(EVT VT) const override; |
995 | |
996 | /// If a change in streaming mode is required on entry to/return from a |
997 | /// function call it emits and returns the corresponding SMSTART or SMSTOP |
998 | /// node. \p Condition should be one of the enum values from |
999 | /// AArch64SME::ToggleCondition. |
1000 | SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable, |
1001 | SDValue Chain, SDValue InGlue, unsigned Condition, |
1002 | SDValue PStateSM = SDValue()) const; |
1003 | |
1004 | bool isVScaleKnownToBeAPowerOfTwo() const override { return true; } |
1005 | |
1006 | // Normally SVE is only used for byte size vectors that do not fit within a |
1007 | // NEON vector. This changes when OverrideNEON is true, allowing SVE to be |
1008 | // used for 64bit and 128bit vectors as well. |
1009 | bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const; |
1010 | |
1011 | // Follow NEON ABI rules even when using SVE for fixed length vectors. |
1012 | MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, |
1013 | EVT VT) const override; |
1014 | unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
1015 | CallingConv::ID CC, |
1016 | EVT VT) const override; |
1017 | unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, |
1018 | CallingConv::ID CC, EVT VT, |
1019 | EVT &IntermediateVT, |
1020 | unsigned &NumIntermediates, |
1021 | MVT &RegisterVT) const override; |
1022 | |
1023 | /// True if stack clash protection is enabled for this functions. |
1024 | bool hasInlineStackProbe(const MachineFunction &MF) const override; |
1025 | |
1026 | #ifndef NDEBUG |
1027 | void verifyTargetSDNode(const SDNode *N) const override; |
1028 | #endif |
1029 | |
1030 | private: |
1031 | /// Keep a pointer to the AArch64Subtarget around so that we can |
1032 | /// make the right decision when generating code for different targets. |
1033 | const AArch64Subtarget *Subtarget; |
1034 | |
1035 | llvm::BumpPtrAllocator BumpAlloc; |
1036 | llvm::StringSaver Saver{BumpAlloc}; |
1037 | |
1038 | bool isExtFreeImpl(const Instruction *Ext) const override; |
1039 | |
1040 | void addTypeForNEON(MVT VT); |
1041 | void addTypeForFixedLengthSVE(MVT VT); |
1042 | void addDRType(MVT VT); |
1043 | void addQRType(MVT VT); |
1044 | |
1045 | bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override; |
1046 | |
1047 | SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, |
1048 | bool isVarArg, |
1049 | const SmallVectorImpl<ISD::InputArg> &Ins, |
1050 | const SDLoc &DL, SelectionDAG &DAG, |
1051 | SmallVectorImpl<SDValue> &InVals) const override; |
1052 | |
1053 | void AdjustInstrPostInstrSelection(MachineInstr &MI, |
1054 | SDNode *Node) const override; |
1055 | |
1056 | SDValue LowerCall(CallLoweringInfo & /*CLI*/, |
1057 | SmallVectorImpl<SDValue> &InVals) const override; |
1058 | |
1059 | SDValue LowerCallResult(SDValue Chain, SDValue InGlue, |
1060 | CallingConv::ID CallConv, bool isVarArg, |
1061 | const SmallVectorImpl<CCValAssign> &RVLocs, |
1062 | const SDLoc &DL, SelectionDAG &DAG, |
1063 | SmallVectorImpl<SDValue> &InVals, bool isThisReturn, |
1064 | SDValue ThisVal, bool RequiresSMChange) const; |
1065 | |
1066 | SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; |
1067 | SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; |
1068 | SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const; |
1069 | SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; |
1070 | |
1071 | SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; |
1072 | SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; |
1073 | |
1074 | SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const; |
1075 | |
1076 | SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
1077 | SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
1078 | SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; |
1079 | |
1080 | bool |
1081 | isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const; |
1082 | |
1083 | /// Finds the incoming stack arguments which overlap the given fixed stack |
1084 | /// object and incorporates their load into the current chain. This prevents |
1085 | /// an upcoming store from clobbering the stack argument before it's used. |
1086 | SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, |
1087 | MachineFrameInfo &MFI, int ClobberedFI) const; |
1088 | |
1089 | bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; |
1090 | |
1091 | void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, |
1092 | SDValue &Chain) const; |
1093 | |
1094 | bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, |
1095 | bool isVarArg, |
1096 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
1097 | LLVMContext &Context) const override; |
1098 | |
1099 | SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
1100 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
1101 | const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, |
1102 | SelectionDAG &DAG) const override; |
1103 | |
1104 | SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, |
1105 | unsigned Flag) const; |
1106 | SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, |
1107 | unsigned Flag) const; |
1108 | SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, |
1109 | unsigned Flag) const; |
1110 | SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, |
1111 | unsigned Flag) const; |
1112 | SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG, |
1113 | unsigned Flag) const; |
1114 | template <class NodeTy> |
1115 | SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; |
1116 | template <class NodeTy> |
1117 | SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; |
1118 | template <class NodeTy> |
1119 | SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; |
1120 | template <class NodeTy> |
1121 | SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; |
1122 | SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
1123 | SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; |
1124 | SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
1125 | SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
1126 | SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
1127 | SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase, |
1128 | const SDLoc &DL, SelectionDAG &DAG) const; |
1129 | SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, |
1130 | SelectionDAG &DAG) const; |
1131 | SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
1132 | SDValue LowerPtrAuthGlobalAddress(SDValue Op, SelectionDAG &DAG) const; |
1133 | SDValue LowerPtrAuthGlobalAddressStatically(SDValue TGA, SDLoc DL, EVT VT, |
1134 | AArch64PACKey::ID Key, |
1135 | SDValue Discriminator, |
1136 | SDValue AddrDiscriminator, |
1137 | SelectionDAG &DAG) const; |
1138 | SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; |
1139 | SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; |
1140 | SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; |
1141 | SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; |
1142 | SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; |
1143 | SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, |
1144 | SDValue TVal, SDValue FVal, const SDLoc &dl, |
1145 | SelectionDAG &DAG) const; |
1146 | SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; |
1147 | SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; |
1148 | SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; |
1149 | SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; |
1150 | SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const; |
1151 | SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; |
1152 | SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; |
1153 | SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; |
1154 | SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; |
1155 | SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; |
1156 | SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; |
1157 | SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; |
1158 | SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; |
1159 | SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; |
1160 | SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; |
1161 | SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
1162 | SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
1163 | SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
1164 | SDValue LowerGET_FPMODE(SDValue Op, SelectionDAG &DAG) const; |
1165 | SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const; |
1166 | SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const; |
1167 | SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
1168 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
1169 | SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
1170 | SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; |
1171 | SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; |
1172 | SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
1173 | SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; |
1174 | SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, |
1175 | unsigned NewOp) const; |
1176 | SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; |
1177 | SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; |
1178 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
1179 | SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; |
1180 | SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const; |
1181 | SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const; |
1182 | SDValue LowerVECTOR_HISTOGRAM(SDValue Op, SelectionDAG &DAG) const; |
1183 | SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; |
1184 | SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; |
1185 | SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; |
1186 | SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; |
1187 | SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; |
1188 | SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const; |
1189 | SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; |
1190 | SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const; |
1191 | SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; |
1192 | SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; |
1193 | SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; |
1194 | SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; |
1195 | SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; |
1196 | SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; |
1197 | SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; |
1198 | SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; |
1199 | SDValue LowerVectorXRINT(SDValue Op, SelectionDAG &DAG) const; |
1200 | SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
1201 | SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
1202 | SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; |
1203 | SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const; |
1204 | SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; |
1205 | SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; |
1206 | SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; |
1207 | SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; |
1208 | SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; |
1209 | SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; |
1210 | SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; |
1211 | SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
1212 | SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
1213 | SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
1214 | |
1215 | SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const; |
1216 | |
1217 | SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, |
1218 | SelectionDAG &DAG) const; |
1219 | SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op, |
1220 | SelectionDAG &DAG) const; |
1221 | SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const; |
1222 | SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const; |
1223 | SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const; |
1224 | SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const; |
1225 | SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp, |
1226 | SelectionDAG &DAG) const; |
1227 | SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const; |
1228 | SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const; |
1229 | SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const; |
1230 | SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op, |
1231 | SelectionDAG &DAG) const; |
1232 | SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op, |
1233 | SelectionDAG &DAG) const; |
1234 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
1235 | SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const; |
1236 | SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const; |
1237 | SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op, |
1238 | SelectionDAG &DAG) const; |
1239 | SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const; |
1240 | SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const; |
1241 | SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const; |
1242 | SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const; |
1243 | SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op, |
1244 | SelectionDAG &DAG) const; |
1245 | |
1246 | SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, |
1247 | SmallVectorImpl<SDNode *> &Created) const override; |
1248 | SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, |
1249 | SmallVectorImpl<SDNode *> &Created) const override; |
1250 | SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, |
1251 | int &, bool &UseOneConst, |
1252 | bool Reciprocal) const override; |
1253 | SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, |
1254 | int &) const override; |
1255 | SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, |
1256 | const DenormalMode &Mode) const override; |
1257 | SDValue getSqrtResultForDenormInput(SDValue Operand, |
1258 | SelectionDAG &DAG) const override; |
1259 | unsigned combineRepeatedFPDivisors() const override; |
1260 | |
1261 | ConstraintType getConstraintType(StringRef Constraint) const override; |
1262 | Register getRegisterByName(const char* RegName, LLT VT, |
1263 | const MachineFunction &MF) const override; |
1264 | |
1265 | /// Examine constraint string and operand type and determine a weight value. |
1266 | /// The operand object must already have been set up with the operand type. |
1267 | ConstraintWeight |
1268 | getSingleConstraintMatchWeight(AsmOperandInfo &info, |
1269 | const char *constraint) const override; |
1270 | |
1271 | std::pair<unsigned, const TargetRegisterClass *> |
1272 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
1273 | StringRef Constraint, MVT VT) const override; |
1274 | |
1275 | const char *LowerXConstraint(EVT ConstraintVT) const override; |
1276 | |
1277 | void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
1278 | std::vector<SDValue> &Ops, |
1279 | SelectionDAG &DAG) const override; |
1280 | |
1281 | InlineAsm::ConstraintCode |
1282 | getInlineAsmMemConstraint(StringRef ConstraintCode) const override { |
1283 | if (ConstraintCode == "Q" ) |
1284 | return InlineAsm::ConstraintCode::Q; |
1285 | // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are |
1286 | // followed by llvm_unreachable so we'll leave them unimplemented in |
1287 | // the backend for now. |
1288 | return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); |
1289 | } |
1290 | |
1291 | /// Handle Lowering flag assembly outputs. |
1292 | SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, |
1293 | const SDLoc &DL, |
1294 | const AsmOperandInfo &Constraint, |
1295 | SelectionDAG &DAG) const override; |
1296 | |
1297 | bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override; |
1298 | bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override; |
1299 | bool isVectorLoadExtDesirable(SDValue ExtVal) const override; |
1300 | bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; |
1301 | bool mayBeEmittedAsTailCall(const CallInst *CI) const override; |
1302 | bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, |
1303 | SDValue &Offset, SelectionDAG &DAG) const; |
1304 | bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, |
1305 | ISD::MemIndexedMode &AM, |
1306 | SelectionDAG &DAG) const override; |
1307 | bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, |
1308 | SDValue &Offset, ISD::MemIndexedMode &AM, |
1309 | SelectionDAG &DAG) const override; |
1310 | bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, |
1311 | bool IsPre, MachineRegisterInfo &MRI) const override; |
1312 | |
1313 | void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
1314 | SelectionDAG &DAG) const override; |
1315 | void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
1316 | SelectionDAG &DAG) const; |
1317 | void (SDNode *N, |
1318 | SmallVectorImpl<SDValue> &Results, |
1319 | SelectionDAG &DAG) const; |
1320 | |
1321 | bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; |
1322 | |
1323 | void finalizeLowering(MachineFunction &MF) const override; |
1324 | |
1325 | bool shouldLocalize(const MachineInstr &MI, |
1326 | const TargetTransformInfo *TTI) const override; |
1327 | |
1328 | bool SimplifyDemandedBitsForTargetNode(SDValue Op, |
1329 | const APInt &OriginalDemandedBits, |
1330 | const APInt &OriginalDemandedElts, |
1331 | KnownBits &Known, |
1332 | TargetLoweringOpt &TLO, |
1333 | unsigned Depth) const override; |
1334 | |
1335 | bool isTargetCanonicalConstantNode(SDValue Op) const override; |
1336 | |
1337 | // With the exception of data-predicate transitions, no instructions are |
1338 | // required to cast between legal scalable vector types. However: |
1339 | // 1. Packed and unpacked types have different bit lengths, meaning BITCAST |
1340 | // is not universally useable. |
1341 | // 2. Most unpacked integer types are not legal and thus integer extends |
1342 | // cannot be used to convert between unpacked and packed types. |
1343 | // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used |
1344 | // to transition between unpacked and packed types of the same element type, |
1345 | // with BITCAST used otherwise. |
1346 | // This function does not handle predicate bitcasts. |
1347 | SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; |
1348 | |
1349 | // Returns the runtime value for PSTATE.SM by generating a call to |
1350 | // __arm_sme_state. |
1351 | SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL, |
1352 | EVT VT) const; |
1353 | |
1354 | bool preferScalarizeSplat(SDNode *N) const override; |
1355 | |
1356 | unsigned getMinimumJumpTableEntries() const override; |
1357 | |
1358 | bool softPromoteHalfType() const override { return true; } |
1359 | }; |
1360 | |
1361 | namespace AArch64 { |
1362 | FastISel *createFastISel(FunctionLoweringInfo &funcInfo, |
1363 | const TargetLibraryInfo *libInfo); |
1364 | } // end namespace AArch64 |
1365 | |
1366 | } // end namespace llvm |
1367 | |
1368 | #endif |
1369 | |