1//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that AArch64 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15#define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16
17#include "AArch64.h"
18#include "Utils/AArch64SMEAttributes.h"
19#include "llvm/CodeGen/CallingConvLower.h"
20#include "llvm/CodeGen/MachineFunction.h"
21#include "llvm/CodeGen/SelectionDAG.h"
22#include "llvm/CodeGen/TargetLowering.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Instruction.h"
25
26namespace llvm {
27
28namespace AArch64ISD {
29
30// For predicated nodes where the result is a vector, the operation is
31// controlled by a governing predicate and the inactive lanes are explicitly
32// defined with a value, please stick the following naming convention:
33//
34// _MERGE_OP<n> The result value is a vector with inactive lanes equal
35// to source operand OP<n>.
36//
37// _MERGE_ZERO The result value is a vector with inactive lanes
38// actively zeroed.
39//
40// _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
41// to the last source operand which only purpose is being
42// a passthru value.
43//
44// For other cases where no explicit action is needed to set the inactive lanes,
45// or when the result is not a vector and it is needed or helpful to
46// distinguish a node from similar unpredicated nodes, use:
47//
48// _PRED
49//
50enum NodeType : unsigned {
51 FIRST_NUMBER = ISD::BUILTIN_OP_END,
52 WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
53 CALL, // Function call.
54
55 // Pseudo for a OBJC call that gets emitted together with a special `mov
56 // x29, x29` marker instruction.
57 CALL_RVMARKER,
58
59 CALL_BTI, // Function call followed by a BTI instruction.
60
61 // Function call, authenticating the callee value first:
62 // AUTH_CALL chain, callee, auth key #, int disc, addr disc, operands.
63 AUTH_CALL,
64 // AUTH_TC_RETURN chain, callee, fpdiff, auth key #, int disc, addr disc,
65 // operands.
66 AUTH_TC_RETURN,
67
68 // Authenticated variant of CALL_RVMARKER.
69 AUTH_CALL_RVMARKER,
70
71 COALESCER_BARRIER,
72
73 VG_SAVE,
74 VG_RESTORE,
75
76 SMSTART,
77 SMSTOP,
78 RESTORE_ZA,
79 RESTORE_ZT,
80 SAVE_ZT,
81
82 // A call with the callee in x16, i.e. "blr x16".
83 CALL_ARM64EC_TO_X64,
84
85 // Produces the full sequence of instructions for getting the thread pointer
86 // offset of a variable into X0, using the TLSDesc model.
87 TLSDESC_CALLSEQ,
88 ADRP, // Page address of a TargetGlobalAddress operand.
89 ADR, // ADR
90 ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
91 LOADgot, // Load from automatically generated descriptor (e.g. Global
92 // Offset Table, TLS record).
93 RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand.
94 BRCOND, // Conditional branch instruction; "b.cond".
95 CSEL,
96 CSINV, // Conditional select invert.
97 CSNEG, // Conditional select negate.
98 CSINC, // Conditional select increment.
99
100 // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
101 // ELF.
102 THREAD_POINTER,
103 ADC,
104 SBC, // adc, sbc instructions
105
106 // To avoid stack clash, allocation is performed by block and each block is
107 // probed.
108 PROBED_ALLOCA,
109
110 // Predicated instructions where inactive lanes produce undefined results.
111 ABDS_PRED,
112 ABDU_PRED,
113 FADD_PRED,
114 FDIV_PRED,
115 FMA_PRED,
116 FMAX_PRED,
117 FMAXNM_PRED,
118 FMIN_PRED,
119 FMINNM_PRED,
120 FMUL_PRED,
121 FSUB_PRED,
122 HADDS_PRED,
123 HADDU_PRED,
124 MUL_PRED,
125 MULHS_PRED,
126 MULHU_PRED,
127 RHADDS_PRED,
128 RHADDU_PRED,
129 SDIV_PRED,
130 SHL_PRED,
131 SMAX_PRED,
132 SMIN_PRED,
133 SRA_PRED,
134 SRL_PRED,
135 UDIV_PRED,
136 UMAX_PRED,
137 UMIN_PRED,
138
139 // Unpredicated vector instructions
140 BIC,
141
142 SRAD_MERGE_OP1,
143
144 // Predicated instructions with the result of inactive lanes provided by the
145 // last operand.
146 FABS_MERGE_PASSTHRU,
147 FCEIL_MERGE_PASSTHRU,
148 FFLOOR_MERGE_PASSTHRU,
149 FNEARBYINT_MERGE_PASSTHRU,
150 FNEG_MERGE_PASSTHRU,
151 FRECPX_MERGE_PASSTHRU,
152 FRINT_MERGE_PASSTHRU,
153 FROUND_MERGE_PASSTHRU,
154 FROUNDEVEN_MERGE_PASSTHRU,
155 FSQRT_MERGE_PASSTHRU,
156 FTRUNC_MERGE_PASSTHRU,
157 FP_ROUND_MERGE_PASSTHRU,
158 FP_EXTEND_MERGE_PASSTHRU,
159 UINT_TO_FP_MERGE_PASSTHRU,
160 SINT_TO_FP_MERGE_PASSTHRU,
161 FCVTZU_MERGE_PASSTHRU,
162 FCVTZS_MERGE_PASSTHRU,
163 SIGN_EXTEND_INREG_MERGE_PASSTHRU,
164 ZERO_EXTEND_INREG_MERGE_PASSTHRU,
165 ABS_MERGE_PASSTHRU,
166 NEG_MERGE_PASSTHRU,
167
168 SETCC_MERGE_ZERO,
169
170 // Arithmetic instructions which write flags.
171 ADDS,
172 SUBS,
173 ADCS,
174 SBCS,
175 ANDS,
176
177 // Conditional compares. Operands: left,right,falsecc,cc,flags
178 CCMP,
179 CCMN,
180 FCCMP,
181
182 // Floating point comparison
183 FCMP,
184
185 // Scalar-to-vector duplication
186 DUP,
187 DUPLANE8,
188 DUPLANE16,
189 DUPLANE32,
190 DUPLANE64,
191 DUPLANE128,
192
193 // Vector immedate moves
194 MOVI,
195 MOVIshift,
196 MOVIedit,
197 MOVImsl,
198 FMOV,
199 MVNIshift,
200 MVNImsl,
201
202 // Vector immediate ops
203 BICi,
204 ORRi,
205
206 // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
207 // element must be identical.
208 BSP,
209
210 // Vector shuffles
211 ZIP1,
212 ZIP2,
213 UZP1,
214 UZP2,
215 TRN1,
216 TRN2,
217 REV16,
218 REV32,
219 REV64,
220 EXT,
221 SPLICE,
222
223 // Vector shift by scalar
224 VSHL,
225 VLSHR,
226 VASHR,
227
228 // Vector shift by scalar (again)
229 SQSHL_I,
230 UQSHL_I,
231 SQSHLU_I,
232 SRSHR_I,
233 URSHR_I,
234 URSHR_I_PRED,
235
236 // Vector narrowing shift by immediate (bottom)
237 RSHRNB_I,
238
239 // Vector shift by constant and insert
240 VSLI,
241 VSRI,
242
243 // Vector comparisons
244 CMEQ,
245 CMGE,
246 CMGT,
247 CMHI,
248 CMHS,
249 FCMEQ,
250 FCMGE,
251 FCMGT,
252
253 // Vector zero comparisons
254 CMEQz,
255 CMGEz,
256 CMGTz,
257 CMLEz,
258 CMLTz,
259 FCMEQz,
260 FCMGEz,
261 FCMGTz,
262 FCMLEz,
263 FCMLTz,
264
265 // Round wide FP to narrow FP with inexact results to odd.
266 FCVTXN,
267
268 // Vector across-lanes addition
269 // Only the lower result lane is defined.
270 SADDV,
271 UADDV,
272
273 // Unsigned sum Long across Vector
274 UADDLV,
275 SADDLV,
276
277 // Add Pairwise of two vectors
278 ADDP,
279 // Add Long Pairwise
280 SADDLP,
281 UADDLP,
282
283 // udot/sdot instructions
284 UDOT,
285 SDOT,
286
287 // Vector across-lanes min/max
288 // Only the lower result lane is defined.
289 SMINV,
290 UMINV,
291 SMAXV,
292 UMAXV,
293
294 SADDV_PRED,
295 UADDV_PRED,
296 SMAXV_PRED,
297 UMAXV_PRED,
298 SMINV_PRED,
299 UMINV_PRED,
300 ORV_PRED,
301 EORV_PRED,
302 ANDV_PRED,
303
304 // Compare-and-branch
305 CBZ,
306 CBNZ,
307 TBZ,
308 TBNZ,
309
310 // Tail calls
311 TC_RETURN,
312
313 // Custom prefetch handling
314 PREFETCH,
315
316 // {s|u}int to FP within a FP register.
317 SITOF,
318 UITOF,
319
320 /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
321 /// world w.r.t vectors; which causes additional REV instructions to be
322 /// generated to compensate for the byte-swapping. But sometimes we do
323 /// need to re-interpret the data in SIMD vector registers in big-endian
324 /// mode without emitting such REV instructions.
325 NVCAST,
326
327 MRS, // MRS, also sets the flags via a glue.
328
329 SMULL,
330 UMULL,
331
332 PMULL,
333
334 // Reciprocal estimates and steps.
335 FRECPE,
336 FRECPS,
337 FRSQRTE,
338 FRSQRTS,
339
340 SUNPKHI,
341 SUNPKLO,
342 UUNPKHI,
343 UUNPKLO,
344
345 CLASTA_N,
346 CLASTB_N,
347 LASTA,
348 LASTB,
349 TBL,
350
351 // Floating-point reductions.
352 FADDA_PRED,
353 FADDV_PRED,
354 FMAXV_PRED,
355 FMAXNMV_PRED,
356 FMINV_PRED,
357 FMINNMV_PRED,
358
359 INSR,
360 PTEST,
361 PTEST_ANY,
362 PTRUE,
363
364 CTTZ_ELTS,
365
366 BITREVERSE_MERGE_PASSTHRU,
367 BSWAP_MERGE_PASSTHRU,
368 REVH_MERGE_PASSTHRU,
369 REVW_MERGE_PASSTHRU,
370 CTLZ_MERGE_PASSTHRU,
371 CTPOP_MERGE_PASSTHRU,
372 DUP_MERGE_PASSTHRU,
373 INDEX_VECTOR,
374
375 // Cast between vectors of the same element type but differ in length.
376 REINTERPRET_CAST,
377
378 // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
379 LS64_BUILD,
380 LS64_EXTRACT,
381
382 LD1_MERGE_ZERO,
383 LD1S_MERGE_ZERO,
384 LDNF1_MERGE_ZERO,
385 LDNF1S_MERGE_ZERO,
386 LDFF1_MERGE_ZERO,
387 LDFF1S_MERGE_ZERO,
388 LD1RQ_MERGE_ZERO,
389 LD1RO_MERGE_ZERO,
390
391 // Structured loads.
392 SVE_LD2_MERGE_ZERO,
393 SVE_LD3_MERGE_ZERO,
394 SVE_LD4_MERGE_ZERO,
395
396 // Unsigned gather loads.
397 GLD1_MERGE_ZERO,
398 GLD1_SCALED_MERGE_ZERO,
399 GLD1_UXTW_MERGE_ZERO,
400 GLD1_SXTW_MERGE_ZERO,
401 GLD1_UXTW_SCALED_MERGE_ZERO,
402 GLD1_SXTW_SCALED_MERGE_ZERO,
403 GLD1_IMM_MERGE_ZERO,
404 GLD1Q_MERGE_ZERO,
405 GLD1Q_INDEX_MERGE_ZERO,
406
407 // Signed gather loads
408 GLD1S_MERGE_ZERO,
409 GLD1S_SCALED_MERGE_ZERO,
410 GLD1S_UXTW_MERGE_ZERO,
411 GLD1S_SXTW_MERGE_ZERO,
412 GLD1S_UXTW_SCALED_MERGE_ZERO,
413 GLD1S_SXTW_SCALED_MERGE_ZERO,
414 GLD1S_IMM_MERGE_ZERO,
415
416 // Unsigned gather loads.
417 GLDFF1_MERGE_ZERO,
418 GLDFF1_SCALED_MERGE_ZERO,
419 GLDFF1_UXTW_MERGE_ZERO,
420 GLDFF1_SXTW_MERGE_ZERO,
421 GLDFF1_UXTW_SCALED_MERGE_ZERO,
422 GLDFF1_SXTW_SCALED_MERGE_ZERO,
423 GLDFF1_IMM_MERGE_ZERO,
424
425 // Signed gather loads.
426 GLDFF1S_MERGE_ZERO,
427 GLDFF1S_SCALED_MERGE_ZERO,
428 GLDFF1S_UXTW_MERGE_ZERO,
429 GLDFF1S_SXTW_MERGE_ZERO,
430 GLDFF1S_UXTW_SCALED_MERGE_ZERO,
431 GLDFF1S_SXTW_SCALED_MERGE_ZERO,
432 GLDFF1S_IMM_MERGE_ZERO,
433
434 // Non-temporal gather loads
435 GLDNT1_MERGE_ZERO,
436 GLDNT1_INDEX_MERGE_ZERO,
437 GLDNT1S_MERGE_ZERO,
438
439 // Contiguous masked store.
440 ST1_PRED,
441
442 // Scatter store
443 SST1_PRED,
444 SST1_SCALED_PRED,
445 SST1_UXTW_PRED,
446 SST1_SXTW_PRED,
447 SST1_UXTW_SCALED_PRED,
448 SST1_SXTW_SCALED_PRED,
449 SST1_IMM_PRED,
450 SST1Q_PRED,
451 SST1Q_INDEX_PRED,
452
453 // Non-temporal scatter store
454 SSTNT1_PRED,
455 SSTNT1_INDEX_PRED,
456
457 // SME
458 RDSVL,
459 REVD_MERGE_PASSTHRU,
460 ALLOCATE_ZA_BUFFER,
461 INIT_TPIDR2OBJ,
462
463 // Asserts that a function argument (i32) is zero-extended to i8 by
464 // the caller
465 ASSERT_ZEXT_BOOL,
466
467 // 128-bit system register accesses
468 // lo64, hi64, chain = MRRS(chain, sysregname)
469 MRRS,
470 // chain = MSRR(chain, sysregname, lo64, hi64)
471 MSRR,
472
473 // Strict (exception-raising) floating point comparison
474 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
475 STRICT_FCMPE,
476
477 // SME ZA loads and stores
478 SME_ZA_LDR,
479 SME_ZA_STR,
480
481 // NEON Load/Store with post-increment base updates
482 LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
483 LD3post,
484 LD4post,
485 ST2post,
486 ST3post,
487 ST4post,
488 LD1x2post,
489 LD1x3post,
490 LD1x4post,
491 ST1x2post,
492 ST1x3post,
493 ST1x4post,
494 LD1DUPpost,
495 LD2DUPpost,
496 LD3DUPpost,
497 LD4DUPpost,
498 LD1LANEpost,
499 LD2LANEpost,
500 LD3LANEpost,
501 LD4LANEpost,
502 ST2LANEpost,
503 ST3LANEpost,
504 ST4LANEpost,
505
506 STG,
507 STZG,
508 ST2G,
509 STZ2G,
510
511 LDP,
512 LDIAPP,
513 LDNP,
514 STP,
515 STILP,
516 STNP,
517
518 // Memory Operations
519 MOPS_MEMSET,
520 MOPS_MEMSET_TAGGING,
521 MOPS_MEMCOPY,
522 MOPS_MEMMOVE,
523};
524
525} // end namespace AArch64ISD
526
527namespace AArch64 {
528/// Possible values of current rounding mode, which is specified in bits
529/// 23:22 of FPCR.
530enum Rounding {
531 RN = 0, // Round to Nearest
532 RP = 1, // Round towards Plus infinity
533 RM = 2, // Round towards Minus infinity
534 RZ = 3, // Round towards Zero
535 rmMask = 3 // Bit mask selecting rounding mode
536};
537
538// Bit position of rounding mode bits in FPCR.
539const unsigned RoundingBitsPos = 22;
540
541// Reserved bits should be preserved when modifying FPCR.
542const uint64_t ReservedFPControlBits = 0xfffffffff80040f8;
543
544// Registers used to pass function arguments.
545ArrayRef<MCPhysReg> getGPRArgRegs();
546ArrayRef<MCPhysReg> getFPRArgRegs();
547
548/// Maximum allowed number of unprobed bytes above SP at an ABI
549/// boundary.
550const unsigned StackProbeMaxUnprobedStack = 1024;
551
552/// Maximum number of iterations to unroll for a constant size probing loop.
553const unsigned StackProbeMaxLoopUnroll = 4;
554
555} // namespace AArch64
556
557class AArch64Subtarget;
558
559class AArch64TargetLowering : public TargetLowering {
560public:
561 explicit AArch64TargetLowering(const TargetMachine &TM,
562 const AArch64Subtarget &STI);
563
564 /// Control the following reassociation of operands: (op (op x, c1), y) -> (op
565 /// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
566 bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
567 SDValue N1) const override;
568
569 /// Selects the correct CCAssignFn for a given CallingConvention value.
570 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
571
572 /// Selects the correct CCAssignFn for a given CallingConvention value.
573 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
574
575 /// Determine which of the bits specified in Mask are known to be either zero
576 /// or one and return them in the KnownZero/KnownOne bitsets.
577 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
578 const APInt &DemandedElts,
579 const SelectionDAG &DAG,
580 unsigned Depth = 0) const override;
581
582 unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
583 const APInt &DemandedElts,
584 const SelectionDAG &DAG,
585 unsigned Depth) const override;
586
587 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
588 // Returning i64 unconditionally here (i.e. even for ILP32) means that the
589 // *DAG* representation of pointers will always be 64-bits. They will be
590 // truncated and extended when transferred to memory, but the 64-bit DAG
591 // allows us to use AArch64's addressing modes much more easily.
592 return MVT::getIntegerVT(BitWidth: 64);
593 }
594
595 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
596 const APInt &DemandedElts,
597 TargetLoweringOpt &TLO) const override;
598
599 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
600
601 /// Returns true if the target allows unaligned memory accesses of the
602 /// specified type.
603 bool allowsMisalignedMemoryAccesses(
604 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
605 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
606 unsigned *Fast = nullptr) const override;
607 /// LLT variant.
608 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
609 Align Alignment,
610 MachineMemOperand::Flags Flags,
611 unsigned *Fast = nullptr) const override;
612
613 /// Provide custom lowering hooks for some operations.
614 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
615
616 const char *getTargetNodeName(unsigned Opcode) const override;
617
618 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
619
620 /// This method returns a target specific FastISel object, or null if the
621 /// target does not support "fast" ISel.
622 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
623 const TargetLibraryInfo *libInfo) const override;
624
625 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
626
627 bool isFPImmLegal(const APFloat &Imm, EVT VT,
628 bool ForCodeSize) const override;
629
630 /// Return true if the given shuffle mask can be codegen'd directly, or if it
631 /// should be stack expanded.
632 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
633
634 /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
635 /// shuffle mask can be codegen'd directly.
636 bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;
637
638 /// Return the ISD::SETCC ValueType.
639 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
640 EVT VT) const override;
641
642 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
643
644 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
645 MachineBasicBlock *BB) const;
646
647 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
648 MachineBasicBlock *BB) const;
649
650 MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI,
651 MachineBasicBlock *MBB) const;
652
653 MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
654 MachineInstr &MI,
655 MachineBasicBlock *BB) const;
656 MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
657 MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
658 MachineInstr &MI, MachineBasicBlock *BB) const;
659 MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB,
660 unsigned Opcode, bool Op0IsDef) const;
661 MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
662 MachineBasicBlock *EmitInitTPIDR2Object(MachineInstr &MI,
663 MachineBasicBlock *BB) const;
664 MachineBasicBlock *EmitAllocateZABuffer(MachineInstr &MI,
665 MachineBasicBlock *BB) const;
666
667 MachineBasicBlock *
668 EmitInstrWithCustomInserter(MachineInstr &MI,
669 MachineBasicBlock *MBB) const override;
670
671 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
672 MachineFunction &MF,
673 unsigned Intrinsic) const override;
674
675 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
676 EVT NewVT) const override;
677
678 bool shouldRemoveRedundantExtend(SDValue Op) const override;
679
680 bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
681 bool isTruncateFree(EVT VT1, EVT VT2) const override;
682
683 bool isProfitableToHoist(Instruction *I) const override;
684
685 bool isZExtFree(Type *Ty1, Type *Ty2) const override;
686 bool isZExtFree(EVT VT1, EVT VT2) const override;
687 bool isZExtFree(SDValue Val, EVT VT2) const override;
688
689 bool shouldSinkOperands(Instruction *I,
690 SmallVectorImpl<Use *> &Ops) const override;
691
692 bool optimizeExtendOrTruncateConversion(
693 Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override;
694
695 bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
696
697 unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
698
699 bool lowerInterleavedLoad(LoadInst *LI,
700 ArrayRef<ShuffleVectorInst *> Shuffles,
701 ArrayRef<unsigned> Indices,
702 unsigned Factor) const override;
703 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
704 unsigned Factor) const override;
705
706 bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
707 LoadInst *LI) const override;
708
709 bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
710 StoreInst *SI) const override;
711
712 bool isLegalAddImmediate(int64_t) const override;
713 bool isLegalAddScalableImmediate(int64_t) const override;
714 bool isLegalICmpImmediate(int64_t) const override;
715
716 bool isMulAddWithConstProfitable(SDValue AddNode,
717 SDValue ConstNode) const override;
718
719 bool shouldConsiderGEPOffsetSplit() const override;
720
721 EVT getOptimalMemOpType(const MemOp &Op,
722 const AttributeList &FuncAttributes) const override;
723
724 LLT getOptimalMemOpLLT(const MemOp &Op,
725 const AttributeList &FuncAttributes) const override;
726
727 /// Return true if the addressing mode represented by AM is legal for this
728 /// target, for a load/store of the specified type.
729 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
730 unsigned AS,
731 Instruction *I = nullptr) const override;
732
733 int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
734 int64_t MaxOffset) const override;
735
736 /// Return true if an FMA operation is faster than a pair of fmul and fadd
737 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
738 /// returns true, otherwise fmuladd is expanded to fmul + fadd.
739 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
740 EVT VT) const override;
741 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
742
743 bool generateFMAsInMachineCombiner(EVT VT,
744 CodeGenOptLevel OptLevel) const override;
745
746 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
747 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
748
749 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
750 bool isDesirableToCommuteWithShift(const SDNode *N,
751 CombineLevel Level) const override;
752
753 bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override {
754 return false;
755 }
756
757 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
758 bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
759
760 /// Return true if it is profitable to fold a pair of shifts into a mask.
761 bool shouldFoldConstantShiftPairToMask(const SDNode *N,
762 CombineLevel Level) const override;
763
764 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
765 EVT VT) const override;
766
767 /// Returns true if it is beneficial to convert a load of a constant
768 /// to just the constant itself.
769 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
770 Type *Ty) const override;
771
772 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
773 /// with this index.
774 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
775 unsigned Index) const override;
776
777 bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
778 bool MathUsed) const override {
779 // Using overflow ops for overflow checks only should beneficial on
780 // AArch64.
781 return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed: true);
782 }
783
784 Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
785 AtomicOrdering Ord) const override;
786 Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
787 AtomicOrdering Ord) const override;
788
789 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
790
791 bool isOpSuitableForLDPSTP(const Instruction *I) const;
792 bool isOpSuitableForLSE128(const Instruction *I) const;
793 bool isOpSuitableForRCPC3(const Instruction *I) const;
794 bool shouldInsertFencesForAtomic(const Instruction *I) const override;
795 bool
796 shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override;
797
798 TargetLoweringBase::AtomicExpansionKind
799 shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
800 TargetLoweringBase::AtomicExpansionKind
801 shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
802 TargetLoweringBase::AtomicExpansionKind
803 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
804
805 TargetLoweringBase::AtomicExpansionKind
806 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
807
808 bool useLoadStackGuardNode() const override;
809 TargetLoweringBase::LegalizeTypeAction
810 getPreferredVectorAction(MVT VT) const override;
811
812 /// If the target has a standard location for the stack protector cookie,
813 /// returns the address of that location. Otherwise, returns nullptr.
814 Value *getIRStackGuard(IRBuilderBase &IRB) const override;
815
816 void insertSSPDeclarations(Module &M) const override;
817 Value *getSDagStackGuard(const Module &M) const override;
818 Function *getSSPStackGuardCheck(const Module &M) const override;
819
820 /// If the target has a standard location for the unsafe stack pointer,
821 /// returns the address of that location. Otherwise, returns nullptr.
822 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
823
824 /// If a physical register, this returns the register that receives the
825 /// exception address on entry to an EH pad.
826 Register
827 getExceptionPointerRegister(const Constant *PersonalityFn) const override {
828 // FIXME: This is a guess. Has this been defined yet?
829 return AArch64::X0;
830 }
831
832 /// If a physical register, this returns the register that receives the
833 /// exception typeid on entry to a landing pad.
834 Register
835 getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
836 // FIXME: This is a guess. Has this been defined yet?
837 return AArch64::X1;
838 }
839
840 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
841
842 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
843 const MachineFunction &MF) const override {
844 // Do not merge to float value size (128 bytes) if no implicit
845 // float attribute is set.
846
847 bool NoFloat = MF.getFunction().hasFnAttribute(Kind: Attribute::NoImplicitFloat);
848
849 if (NoFloat)
850 return (MemVT.getSizeInBits() <= 64);
851 return true;
852 }
853
854 bool isCheapToSpeculateCttz(Type *) const override {
855 return true;
856 }
857
858 bool isCheapToSpeculateCtlz(Type *) const override {
859 return true;
860 }
861
862 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
863
864 bool hasAndNotCompare(SDValue V) const override {
865 // We can use bics for any scalar.
866 return V.getValueType().isScalarInteger();
867 }
868
869 bool hasAndNot(SDValue Y) const override {
870 EVT VT = Y.getValueType();
871
872 if (!VT.isVector())
873 return hasAndNotCompare(V: Y);
874
875 TypeSize TS = VT.getSizeInBits();
876 // TODO: We should be able to use bic/bif too for SVE.
877 return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
878 }
879
880 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
881 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
882 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
883 SelectionDAG &DAG) const override;
884
885 ShiftLegalizationStrategy
886 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
887 unsigned ExpansionFactor) const override;
888
889 bool shouldTransformSignedTruncationCheck(EVT XVT,
890 unsigned KeptBits) const override {
891 // For vectors, we don't have a preference..
892 if (XVT.isVector())
893 return false;
894
895 auto VTIsOk = [](EVT VT) -> bool {
896 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
897 VT == MVT::i64;
898 };
899
900 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
901 // XVT will be larger than KeptBitsVT.
902 MVT KeptBitsVT = MVT::getIntegerVT(BitWidth: KeptBits);
903 return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
904 }
905
906 bool preferIncOfAddToSubOfNot(EVT VT) const override;
907
908 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
909
910 bool shouldExpandCmpUsingSelects() const override { return true; }
911
912 bool isComplexDeinterleavingSupported() const override;
913 bool isComplexDeinterleavingOperationSupported(
914 ComplexDeinterleavingOperation Operation, Type *Ty) const override;
915
916 Value *createComplexDeinterleavingIR(
917 IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
918 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
919 Value *Accumulator = nullptr) const override;
920
921 bool supportSplitCSR(MachineFunction *MF) const override {
922 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
923 MF->getFunction().hasFnAttribute(Kind: Attribute::NoUnwind);
924 }
925 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
926 void insertCopiesSplitCSR(
927 MachineBasicBlock *Entry,
928 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
929
930 bool supportSwiftError() const override {
931 return true;
932 }
933
934 bool supportPtrAuthBundles() const override { return true; }
935
936 bool supportKCFIBundles() const override { return true; }
937
938 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
939 MachineBasicBlock::instr_iterator &MBBI,
940 const TargetInstrInfo *TII) const override;
941
942 /// Enable aggressive FMA fusion on targets that want it.
943 bool enableAggressiveFMAFusion(EVT VT) const override;
944
945 /// Returns the size of the platform's va_list object.
946 unsigned getVaListSizeInBits(const DataLayout &DL) const override;
947
948 /// Returns true if \p VecTy is a legal interleaved access type. This
949 /// function checks the vector element type and the overall width of the
950 /// vector.
951 bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
952 bool &UseScalable) const;
953
954 /// Returns the number of interleaved accesses that will be generated when
955 /// lowering accesses of the given type.
956 unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
957 bool UseScalable) const;
958
959 MachineMemOperand::Flags getTargetMMOFlags(
960 const Instruction &I) const override;
961
962 bool functionArgumentNeedsConsecutiveRegisters(
963 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
964 const DataLayout &DL) const override;
965
966 /// Used for exception handling on Win64.
967 bool needsFixedCatchObjects() const override;
968
969 bool fallBackToDAGISel(const Instruction &Inst) const override;
970
971 /// SVE code generation for fixed length vectors does not custom lower
972 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
973 /// merge. However, merging them creates a BUILD_VECTOR that is just as
974 /// illegal as the original, thus leading to an infinite legalisation loop.
975 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
976 /// vector types this override can be removed.
977 bool mergeStoresAfterLegalization(EVT VT) const override;
978
979 // If the platform/function should have a redzone, return the size in bytes.
980 unsigned getRedZoneSize(const Function &F) const {
981 if (F.hasFnAttribute(Kind: Attribute::NoRedZone))
982 return 0;
983 return 128;
984 }
985
986 bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
987 EVT getPromotedVTForPredicate(EVT VT) const;
988
989 EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
990 bool AllowUnknown = false) const override;
991
992 bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
993
994 bool shouldExpandCttzElements(EVT VT) const override;
995
996 /// If a change in streaming mode is required on entry to/return from a
997 /// function call it emits and returns the corresponding SMSTART or SMSTOP
998 /// node. \p Condition should be one of the enum values from
999 /// AArch64SME::ToggleCondition.
1000 SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
1001 SDValue Chain, SDValue InGlue, unsigned Condition,
1002 SDValue PStateSM = SDValue()) const;
1003
1004 bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
1005
1006 // Normally SVE is only used for byte size vectors that do not fit within a
1007 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1008 // used for 64bit and 128bit vectors as well.
1009 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1010
1011 // Follow NEON ABI rules even when using SVE for fixed length vectors.
1012 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1013 EVT VT) const override;
1014 unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1015 CallingConv::ID CC,
1016 EVT VT) const override;
1017 unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
1018 CallingConv::ID CC, EVT VT,
1019 EVT &IntermediateVT,
1020 unsigned &NumIntermediates,
1021 MVT &RegisterVT) const override;
1022
1023 /// True if stack clash protection is enabled for this functions.
1024 bool hasInlineStackProbe(const MachineFunction &MF) const override;
1025
1026#ifndef NDEBUG
1027 void verifyTargetSDNode(const SDNode *N) const override;
1028#endif
1029
1030private:
1031 /// Keep a pointer to the AArch64Subtarget around so that we can
1032 /// make the right decision when generating code for different targets.
1033 const AArch64Subtarget *Subtarget;
1034
1035 llvm::BumpPtrAllocator BumpAlloc;
1036 llvm::StringSaver Saver{BumpAlloc};
1037
1038 bool isExtFreeImpl(const Instruction *Ext) const override;
1039
1040 void addTypeForNEON(MVT VT);
1041 void addTypeForFixedLengthSVE(MVT VT);
1042 void addDRType(MVT VT);
1043 void addQRType(MVT VT);
1044
1045 bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override;
1046
1047 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
1048 bool isVarArg,
1049 const SmallVectorImpl<ISD::InputArg> &Ins,
1050 const SDLoc &DL, SelectionDAG &DAG,
1051 SmallVectorImpl<SDValue> &InVals) const override;
1052
1053 void AdjustInstrPostInstrSelection(MachineInstr &MI,
1054 SDNode *Node) const override;
1055
1056 SDValue LowerCall(CallLoweringInfo & /*CLI*/,
1057 SmallVectorImpl<SDValue> &InVals) const override;
1058
1059 SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1060 CallingConv::ID CallConv, bool isVarArg,
1061 const SmallVectorImpl<CCValAssign> &RVLocs,
1062 const SDLoc &DL, SelectionDAG &DAG,
1063 SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1064 SDValue ThisVal, bool RequiresSMChange) const;
1065
1066 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
1067 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
1068 SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
1069 SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
1070
1071 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
1072 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
1073
1074 SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
1075
1076 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1077 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1078 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
1079
1080 bool
1081 isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
1082
1083 /// Finds the incoming stack arguments which overlap the given fixed stack
1084 /// object and incorporates their load into the current chain. This prevents
1085 /// an upcoming store from clobbering the stack argument before it's used.
1086 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
1087 MachineFrameInfo &MFI, int ClobberedFI) const;
1088
1089 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
1090
1091 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
1092 SDValue &Chain) const;
1093
1094 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1095 bool isVarArg,
1096 const SmallVectorImpl<ISD::OutputArg> &Outs,
1097 LLVMContext &Context) const override;
1098
1099 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1100 const SmallVectorImpl<ISD::OutputArg> &Outs,
1101 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1102 SelectionDAG &DAG) const override;
1103
1104 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1105 unsigned Flag) const;
1106 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
1107 unsigned Flag) const;
1108 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
1109 unsigned Flag) const;
1110 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1111 unsigned Flag) const;
1112 SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG,
1113 unsigned Flag) const;
1114 template <class NodeTy>
1115 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1116 template <class NodeTy>
1117 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1118 template <class NodeTy>
1119 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1120 template <class NodeTy>
1121 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1122 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1123 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1124 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1125 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1126 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1127 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
1128 const SDLoc &DL, SelectionDAG &DAG) const;
1129 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
1130 SelectionDAG &DAG) const;
1131 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1132 SDValue LowerPtrAuthGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1133 SDValue LowerPtrAuthGlobalAddressStatically(SDValue TGA, SDLoc DL, EVT VT,
1134 AArch64PACKey::ID Key,
1135 SDValue Discriminator,
1136 SDValue AddrDiscriminator,
1137 SelectionDAG &DAG) const;
1138 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1139 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1140 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
1141 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1142 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
1143 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
1144 SDValue TVal, SDValue FVal, const SDLoc &dl,
1145 SelectionDAG &DAG) const;
1146 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1147 SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1148 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1149 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
1150 SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const;
1151 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1152 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1153 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
1154 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
1155 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
1156 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1157 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
1158 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1159 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1160 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
1161 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1162 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1163 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1164 SDValue LowerGET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1165 SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1166 SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1167 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1168 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1169 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1170 SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
1171 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
1172 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1173 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
1174 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
1175 unsigned NewOp) const;
1176 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
1177 SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
1178 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1179 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1180 SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1181 SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1182 SDValue LowerVECTOR_HISTOGRAM(SDValue Op, SelectionDAG &DAG) const;
1183 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
1184 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
1185 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
1186 SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
1187 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
1188 SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
1189 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
1190 SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
1191 SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
1192 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
1193 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1194 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1195 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1196 SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1197 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1198 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1199 SDValue LowerVectorXRINT(SDValue Op, SelectionDAG &DAG) const;
1200 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1201 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1202 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
1203 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
1204 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
1205 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
1206 SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1207 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
1208 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1209 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
1210 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
1211 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1212 SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1213 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1214
1215 SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
1216
1217 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
1218 SelectionDAG &DAG) const;
1219 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
1220 SelectionDAG &DAG) const;
1221 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1222 SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1223 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1224 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1225 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1226 SelectionDAG &DAG) const;
1227 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1228 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1229 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1230 SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1231 SelectionDAG &DAG) const;
1232 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1233 SelectionDAG &DAG) const;
1234 SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1235 SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1236 SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1237 SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1238 SelectionDAG &DAG) const;
1239 SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1240 SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1241 SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1242 SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1243 SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1244 SelectionDAG &DAG) const;
1245
1246 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1247 SmallVectorImpl<SDNode *> &Created) const override;
1248 SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1249 SmallVectorImpl<SDNode *> &Created) const override;
1250 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1251 int &ExtraSteps, bool &UseOneConst,
1252 bool Reciprocal) const override;
1253 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1254 int &ExtraSteps) const override;
1255 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1256 const DenormalMode &Mode) const override;
1257 SDValue getSqrtResultForDenormInput(SDValue Operand,
1258 SelectionDAG &DAG) const override;
1259 unsigned combineRepeatedFPDivisors() const override;
1260
1261 ConstraintType getConstraintType(StringRef Constraint) const override;
1262 Register getRegisterByName(const char* RegName, LLT VT,
1263 const MachineFunction &MF) const override;
1264
1265 /// Examine constraint string and operand type and determine a weight value.
1266 /// The operand object must already have been set up with the operand type.
1267 ConstraintWeight
1268 getSingleConstraintMatchWeight(AsmOperandInfo &info,
1269 const char *constraint) const override;
1270
1271 std::pair<unsigned, const TargetRegisterClass *>
1272 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1273 StringRef Constraint, MVT VT) const override;
1274
1275 const char *LowerXConstraint(EVT ConstraintVT) const override;
1276
1277 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1278 std::vector<SDValue> &Ops,
1279 SelectionDAG &DAG) const override;
1280
1281 InlineAsm::ConstraintCode
1282 getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1283 if (ConstraintCode == "Q")
1284 return InlineAsm::ConstraintCode::Q;
1285 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1286 // followed by llvm_unreachable so we'll leave them unimplemented in
1287 // the backend for now.
1288 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1289 }
1290
1291 /// Handle Lowering flag assembly outputs.
1292 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1293 const SDLoc &DL,
1294 const AsmOperandInfo &Constraint,
1295 SelectionDAG &DAG) const override;
1296
1297 bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1298 bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
1299 bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1300 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1301 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1302 bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1303 SDValue &Offset, SelectionDAG &DAG) const;
1304 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1305 ISD::MemIndexedMode &AM,
1306 SelectionDAG &DAG) const override;
1307 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1308 SDValue &Offset, ISD::MemIndexedMode &AM,
1309 SelectionDAG &DAG) const override;
1310 bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
1311 bool IsPre, MachineRegisterInfo &MRI) const override;
1312
1313 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1314 SelectionDAG &DAG) const override;
1315 void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1316 SelectionDAG &DAG) const;
1317 void ReplaceExtractSubVectorResults(SDNode *N,
1318 SmallVectorImpl<SDValue> &Results,
1319 SelectionDAG &DAG) const;
1320
1321 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1322
1323 void finalizeLowering(MachineFunction &MF) const override;
1324
1325 bool shouldLocalize(const MachineInstr &MI,
1326 const TargetTransformInfo *TTI) const override;
1327
1328 bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1329 const APInt &OriginalDemandedBits,
1330 const APInt &OriginalDemandedElts,
1331 KnownBits &Known,
1332 TargetLoweringOpt &TLO,
1333 unsigned Depth) const override;
1334
1335 bool isTargetCanonicalConstantNode(SDValue Op) const override;
1336
1337 // With the exception of data-predicate transitions, no instructions are
1338 // required to cast between legal scalable vector types. However:
1339 // 1. Packed and unpacked types have different bit lengths, meaning BITCAST
1340 // is not universally useable.
1341 // 2. Most unpacked integer types are not legal and thus integer extends
1342 // cannot be used to convert between unpacked and packed types.
1343 // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1344 // to transition between unpacked and packed types of the same element type,
1345 // with BITCAST used otherwise.
1346 // This function does not handle predicate bitcasts.
1347 SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1348
1349 // Returns the runtime value for PSTATE.SM by generating a call to
1350 // __arm_sme_state.
1351 SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
1352 EVT VT) const;
1353
1354 bool preferScalarizeSplat(SDNode *N) const override;
1355
1356 unsigned getMinimumJumpTableEntries() const override;
1357
1358 bool softPromoteHalfType() const override { return true; }
1359};
1360
1361namespace AArch64 {
1362FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1363 const TargetLibraryInfo *libInfo);
1364} // end namespace AArch64
1365
1366} // end namespace llvm
1367
1368#endif
1369