| 1 | //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines the interfaces that AArch64 uses to lower LLVM code into a |
| 10 | // selection DAG. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H |
| 15 | #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H |
| 16 | |
| 17 | #include "llvm/CodeGen/CallingConvLower.h" |
| 18 | #include "llvm/CodeGen/MachineFunction.h" |
| 19 | #include "llvm/CodeGen/SelectionDAG.h" |
| 20 | #include "llvm/CodeGen/TargetLowering.h" |
| 21 | #include "llvm/IR/CallingConv.h" |
| 22 | #include "llvm/IR/Instruction.h" |
| 23 | |
| 24 | namespace llvm { |
| 25 | |
| 26 | class AArch64TargetMachine; |
| 27 | |
| 28 | namespace AArch64 { |
| 29 | /// Possible values of current rounding mode, which is specified in bits |
| 30 | /// 23:22 of FPCR. |
| 31 | enum Rounding { |
| 32 | RN = 0, // Round to Nearest |
| 33 | RP = 1, // Round towards Plus infinity |
| 34 | RM = 2, // Round towards Minus infinity |
| 35 | RZ = 3, // Round towards Zero |
| 36 | rmMask = 3 // Bit mask selecting rounding mode |
| 37 | }; |
| 38 | |
| 39 | // Bit position of rounding mode bits in FPCR. |
| 40 | const unsigned RoundingBitsPos = 22; |
| 41 | |
| 42 | // Reserved bits should be preserved when modifying FPCR. |
| 43 | const uint64_t ReservedFPControlBits = 0xfffffffff80040f8; |
| 44 | |
| 45 | // Registers used to pass function arguments. |
| 46 | ArrayRef<MCPhysReg> getGPRArgRegs(); |
| 47 | ArrayRef<MCPhysReg> getFPRArgRegs(); |
| 48 | |
| 49 | /// Maximum allowed number of unprobed bytes above SP at an ABI |
| 50 | /// boundary. |
| 51 | const unsigned StackProbeMaxUnprobedStack = 1024; |
| 52 | |
| 53 | /// Maximum number of iterations to unroll for a constant size probing loop. |
| 54 | const unsigned StackProbeMaxLoopUnroll = 4; |
| 55 | |
| 56 | } // namespace AArch64 |
| 57 | |
| 58 | namespace ARM64AS { |
| 59 | enum : unsigned { PTR32_SPTR = 270, PTR32_UPTR = 271, PTR64 = 272 }; |
| 60 | } |
| 61 | |
| 62 | class AArch64Subtarget; |
| 63 | |
| 64 | class AArch64TargetLowering : public TargetLowering { |
| 65 | public: |
| 66 | explicit AArch64TargetLowering(const TargetMachine &TM, |
| 67 | const AArch64Subtarget &STI); |
| 68 | |
| 69 | const AArch64TargetMachine &getTM() const; |
| 70 | |
| 71 | /// Control the following reassociation of operands: (op (op x, c1), y) -> (op |
| 72 | /// (op x, y), c1) where N0 is (op x, c1) and N1 is y. |
| 73 | bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, |
| 74 | SDValue N1) const override; |
| 75 | |
| 76 | /// Selects the correct CCAssignFn for a given CallingConvention value. |
| 77 | CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; |
| 78 | |
| 79 | /// Selects the correct CCAssignFn for a given CallingConvention value. |
| 80 | CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const; |
| 81 | |
| 82 | /// Determine which of the bits specified in Mask are known to be either zero |
| 83 | /// or one and return them in the KnownZero/KnownOne bitsets. |
| 84 | void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, |
| 85 | const APInt &DemandedElts, |
| 86 | const SelectionDAG &DAG, |
| 87 | unsigned Depth = 0) const override; |
| 88 | |
| 89 | unsigned ComputeNumSignBitsForTargetNode(SDValue Op, |
| 90 | const APInt &DemandedElts, |
| 91 | const SelectionDAG &DAG, |
| 92 | unsigned Depth) const override; |
| 93 | |
| 94 | MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { |
| 95 | if ((AS == ARM64AS::PTR32_SPTR) || (AS == ARM64AS::PTR32_UPTR)) { |
| 96 | // These are 32-bit pointers created using the `__ptr32` extension or |
| 97 | // similar. They are handled by marking them as being in a different |
| 98 | // address space, and will be extended to 64-bits when used as the target |
| 99 | // of a load or store operation, or cast to a 64-bit pointer type. |
| 100 | return MVT::i32; |
| 101 | } else { |
| 102 | // Returning i64 unconditionally here (i.e. even for ILP32) means that the |
| 103 | // *DAG* representation of pointers will always be 64-bits. They will be |
| 104 | // truncated and extended when transferred to memory, but the 64-bit DAG |
| 105 | // allows us to use AArch64's addressing modes much more easily. |
| 106 | return MVT::i64; |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | unsigned getVectorIdxWidth(const DataLayout &DL) const override { |
| 111 | // The VectorIdx type is i64, with both normal and ilp32. |
| 112 | return 64; |
| 113 | } |
| 114 | |
| 115 | bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, |
| 116 | const APInt &DemandedElts, |
| 117 | TargetLoweringOpt &TLO) const override; |
| 118 | |
| 119 | MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; |
| 120 | |
| 121 | /// Returns true if the target allows unaligned memory accesses of the |
| 122 | /// specified type. |
| 123 | bool allowsMisalignedMemoryAccesses( |
| 124 | EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), |
| 125 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
| 126 | unsigned *Fast = nullptr) const override; |
| 127 | /// LLT variant. |
| 128 | bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, |
| 129 | Align Alignment, |
| 130 | MachineMemOperand::Flags Flags, |
| 131 | unsigned *Fast = nullptr) const override; |
| 132 | |
| 133 | /// Provide custom lowering hooks for some operations. |
| 134 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
| 135 | |
| 136 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
| 137 | |
| 138 | /// This method returns a target specific FastISel object, or null if the |
| 139 | /// target does not support "fast" ISel. |
| 140 | FastISel * |
| 141 | createFastISel(FunctionLoweringInfo &funcInfo, |
| 142 | const TargetLibraryInfo *libInfo, |
| 143 | const LibcallLoweringInfo *libcallLowering) const override; |
| 144 | |
| 145 | bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; |
| 146 | |
| 147 | bool isFPImmLegal(const APFloat &Imm, EVT VT, |
| 148 | bool ForCodeSize) const override; |
| 149 | |
| 150 | /// Return true if the given shuffle mask can be codegen'd directly, or if it |
| 151 | /// should be stack expanded. |
| 152 | bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; |
| 153 | |
| 154 | /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero' |
| 155 | /// shuffle mask can be codegen'd directly. |
| 156 | bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override; |
| 157 | |
| 158 | /// Return the ISD::SETCC ValueType. |
| 159 | EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
| 160 | EVT VT) const override; |
| 161 | |
| 162 | SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; |
| 163 | |
| 164 | MachineBasicBlock *EmitF128CSEL(MachineInstr &MI, |
| 165 | MachineBasicBlock *BB) const; |
| 166 | |
| 167 | MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, |
| 168 | MachineBasicBlock *BB) const; |
| 169 | |
| 170 | MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI, |
| 171 | MachineBasicBlock *MBB) const; |
| 172 | |
| 173 | MachineBasicBlock *EmitCheckMatchingVL(MachineInstr &MI, |
| 174 | MachineBasicBlock *MBB) const; |
| 175 | |
| 176 | MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg, |
| 177 | MachineInstr &MI, |
| 178 | MachineBasicBlock *BB) const; |
| 179 | MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const; |
| 180 | MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg, |
| 181 | MachineInstr &MI, MachineBasicBlock *BB) const; |
| 182 | MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB, |
| 183 | unsigned Opcode, bool Op0IsDef) const; |
| 184 | MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; |
| 185 | |
| 186 | // Note: The following group of functions are only used as part of the old SME |
| 187 | // ABI lowering. They will be removed once -aarch64-new-sme-abi=true is the |
| 188 | // default. |
| 189 | MachineBasicBlock *EmitInitTPIDR2Object(MachineInstr &MI, |
| 190 | MachineBasicBlock *BB) const; |
| 191 | MachineBasicBlock *EmitAllocateZABuffer(MachineInstr &MI, |
| 192 | MachineBasicBlock *BB) const; |
| 193 | MachineBasicBlock *EmitAllocateSMESaveBuffer(MachineInstr &MI, |
| 194 | MachineBasicBlock *BB) const; |
| 195 | MachineBasicBlock *EmitGetSMESaveSize(MachineInstr &MI, |
| 196 | MachineBasicBlock *BB) const; |
| 197 | MachineBasicBlock *EmitEntryPStateSM(MachineInstr &MI, |
| 198 | MachineBasicBlock *BB) const; |
| 199 | |
| 200 | /// Replace (0, vreg) discriminator components with the operands of blend |
| 201 | /// or with (immediate, NoRegister) when possible. |
| 202 | void fixupPtrauthDiscriminator(MachineInstr &MI, MachineBasicBlock *BB, |
| 203 | MachineOperand &IntDiscOp, |
| 204 | MachineOperand &AddrDiscOp, |
| 205 | const TargetRegisterClass *AddrDiscRC) const; |
| 206 | |
| 207 | MachineBasicBlock * |
| 208 | EmitInstrWithCustomInserter(MachineInstr &MI, |
| 209 | MachineBasicBlock *MBB) const override; |
| 210 | |
| 211 | void getTgtMemIntrinsic(SmallVectorImpl<IntrinsicInfo> &Infos, |
| 212 | const CallBase &I, MachineFunction &MF, |
| 213 | unsigned Intrinsic) const override; |
| 214 | |
| 215 | bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, |
| 216 | std::optional<unsigned> ByteOffset) const override; |
| 217 | |
| 218 | bool shouldRemoveRedundantExtend(SDValue Op) const override; |
| 219 | |
| 220 | bool isTruncateFree(Type *Ty1, Type *Ty2) const override; |
| 221 | bool isTruncateFree(EVT VT1, EVT VT2) const override; |
| 222 | |
| 223 | bool isProfitableToHoist(Instruction *I) const override; |
| 224 | |
| 225 | bool isZExtFree(Type *Ty1, Type *Ty2) const override; |
| 226 | bool isZExtFree(EVT VT1, EVT VT2) const override; |
| 227 | bool isZExtFree(SDValue Val, EVT VT2) const override; |
| 228 | |
| 229 | bool optimizeExtendOrTruncateConversion( |
| 230 | Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override; |
| 231 | |
| 232 | bool hasPairedLoad(EVT LoadedType, Align &RequiredAlignment) const override; |
| 233 | |
| 234 | unsigned getMaxSupportedInterleaveFactor() const override { return 4; } |
| 235 | |
| 236 | bool lowerInterleavedLoad(Instruction *Load, Value *Mask, |
| 237 | ArrayRef<ShuffleVectorInst *> Shuffles, |
| 238 | ArrayRef<unsigned> Indices, unsigned Factor, |
| 239 | const APInt &GapMask) const override; |
| 240 | bool lowerInterleavedStore(Instruction *Store, Value *Mask, |
| 241 | ShuffleVectorInst *SVI, unsigned Factor, |
| 242 | const APInt &GapMask) const override; |
| 243 | |
| 244 | bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask, |
| 245 | IntrinsicInst *DI) const override; |
| 246 | |
| 247 | bool lowerInterleaveIntrinsicToStore( |
| 248 | Instruction *Store, Value *Mask, |
| 249 | ArrayRef<Value *> InterleaveValues) const override; |
| 250 | |
| 251 | bool isLegalAddImmediate(int64_t) const override; |
| 252 | bool isLegalAddScalableImmediate(int64_t) const override; |
| 253 | bool isLegalICmpImmediate(int64_t) const override; |
| 254 | |
| 255 | bool isMulAddWithConstProfitable(SDValue AddNode, |
| 256 | SDValue ConstNode) const override; |
| 257 | |
| 258 | bool shouldConsiderGEPOffsetSplit() const override; |
| 259 | |
| 260 | EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, |
| 261 | const AttributeList &FuncAttributes) const override; |
| 262 | |
| 263 | LLT getOptimalMemOpLLT(const MemOp &Op, |
| 264 | const AttributeList &FuncAttributes) const override; |
| 265 | |
| 266 | bool findOptimalMemOpLowering(LLVMContext &Context, std::vector<EVT> &MemOps, |
| 267 | unsigned Limit, const MemOp &Op, unsigned DstAS, |
| 268 | unsigned SrcAS, |
| 269 | const AttributeList &FuncAttributes, |
| 270 | EVT *LargestVT = nullptr) const override; |
| 271 | |
| 272 | /// Return true if the addressing mode represented by AM is legal for this |
| 273 | /// target, for a load/store of the specified type. |
| 274 | bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, |
| 275 | unsigned AS, |
| 276 | Instruction *I = nullptr) const override; |
| 277 | |
| 278 | int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, |
| 279 | int64_t MaxOffset) const override; |
| 280 | |
| 281 | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
| 282 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this method |
| 283 | /// returns true, otherwise fmuladd is expanded to fmul + fadd. |
| 284 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
| 285 | EVT VT) const override; |
| 286 | bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; |
| 287 | |
| 288 | bool generateFMAsInMachineCombiner(EVT VT, |
| 289 | CodeGenOptLevel OptLevel) const override; |
| 290 | |
| 291 | /// Return true if the target has native support for |
| 292 | /// the specified value type and it is 'desirable' to use the type for the |
| 293 | /// given node type. |
| 294 | bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; |
| 295 | |
| 296 | const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; |
| 297 | ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; |
| 298 | |
| 299 | /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. |
| 300 | bool isDesirableToCommuteWithShift(const SDNode *N, |
| 301 | CombineLevel Level) const override; |
| 302 | |
| 303 | bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override { |
| 304 | return false; |
| 305 | } |
| 306 | |
| 307 | /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. |
| 308 | bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; |
| 309 | |
| 310 | /// Return true if it is profitable to fold a pair of shifts into a mask. |
| 311 | bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override; |
| 312 | |
| 313 | /// Return true if it is profitable to fold a pair of shifts into a mask. |
| 314 | bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override { |
| 315 | EVT VT = Y.getValueType(); |
| 316 | |
| 317 | if (VT.isVector()) |
| 318 | return false; |
| 319 | |
| 320 | return VT.getScalarSizeInBits() <= 64; |
| 321 | } |
| 322 | |
| 323 | bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, |
| 324 | unsigned SelectOpcode, SDValue X, |
| 325 | SDValue Y) const override; |
| 326 | |
| 327 | /// Returns true if it is beneficial to convert a load of a constant |
| 328 | /// to just the constant itself. |
| 329 | bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
| 330 | Type *Ty) const override; |
| 331 | |
| 332 | /// Return true if EXTRACT_SUBVECTOR is cheap for this result type |
| 333 | /// with this index. |
| 334 | bool (EVT ResVT, EVT SrcVT, |
| 335 | unsigned Index) const override; |
| 336 | |
| 337 | bool shouldFormOverflowOp(unsigned Opcode, EVT VT, |
| 338 | bool MathUsed) const override { |
| 339 | // Using overflow ops for overflow checks only should beneficial on |
| 340 | // AArch64. |
| 341 | return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed: true); |
| 342 | } |
| 343 | |
| 344 | // Return true if the target wants to optimize the mul overflow intrinsic |
| 345 | // for the given \p VT. |
| 346 | bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context, |
| 347 | EVT VT) const override; |
| 348 | |
| 349 | Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, |
| 350 | AtomicOrdering Ord) const override; |
| 351 | Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, |
| 352 | AtomicOrdering Ord) const override; |
| 353 | |
| 354 | void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override; |
| 355 | |
| 356 | bool isOpSuitableForLDPSTP(const Instruction *I) const; |
| 357 | bool isOpSuitableForLSE128(const Instruction *I) const; |
| 358 | bool isOpSuitableForRCPC3(const Instruction *I) const; |
| 359 | bool shouldInsertFencesForAtomic(const Instruction *I) const override; |
| 360 | bool shouldInsertTrailingSeqCstFenceForAtomicStore( |
| 361 | const Instruction *I) const override; |
| 362 | |
| 363 | TargetLoweringBase::AtomicExpansionKind |
| 364 | shouldExpandAtomicLoadInIR(LoadInst *LI) const override; |
| 365 | TargetLoweringBase::AtomicExpansionKind |
| 366 | shouldExpandAtomicStoreInIR(StoreInst *SI) const override; |
| 367 | TargetLoweringBase::AtomicExpansionKind |
| 368 | shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override; |
| 369 | |
| 370 | TargetLoweringBase::AtomicExpansionKind |
| 371 | shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const override; |
| 372 | |
| 373 | bool useLoadStackGuardNode(const Module &M) const override; |
| 374 | TargetLoweringBase::LegalizeTypeAction |
| 375 | getPreferredVectorAction(MVT VT) const override; |
| 376 | |
| 377 | /// If the target has a standard location for the stack protector cookie, |
| 378 | /// returns the address of that location. Otherwise, returns nullptr. |
| 379 | Value *getIRStackGuard(IRBuilderBase &IRB, |
| 380 | const LibcallLoweringInfo &Libcalls) const override; |
| 381 | |
| 382 | void |
| 383 | insertSSPDeclarations(Module &M, |
| 384 | const LibcallLoweringInfo &Libcalls) const override; |
| 385 | |
| 386 | /// If the target has a standard location for the unsafe stack pointer, |
| 387 | /// returns the address of that location. Otherwise, returns nullptr. |
| 388 | Value *getSafeStackPointerLocation( |
| 389 | IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const override; |
| 390 | |
| 391 | /// If a physical register, this returns the register that receives the |
| 392 | /// exception address on entry to an EH pad. |
| 393 | Register |
| 394 | getExceptionPointerRegister(const Constant *PersonalityFn) const override; |
| 395 | |
| 396 | /// If a physical register, this returns the register that receives the |
| 397 | /// exception typeid on entry to a landing pad. |
| 398 | Register |
| 399 | getExceptionSelectorRegister(const Constant *PersonalityFn) const override; |
| 400 | |
| 401 | bool isIntDivCheap(EVT VT, AttributeList Attr) const override; |
| 402 | |
| 403 | bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, |
| 404 | const MachineFunction &MF) const override; |
| 405 | |
| 406 | bool isCheapToSpeculateCttz(Type *) const override { |
| 407 | return true; |
| 408 | } |
| 409 | |
| 410 | bool isCheapToSpeculateCtlz(Type *) const override { |
| 411 | return true; |
| 412 | } |
| 413 | |
| 414 | bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; |
| 415 | |
| 416 | bool hasAndNotCompare(SDValue V) const override { |
| 417 | // We can use bics for any scalar. |
| 418 | return V.getValueType().isScalarInteger(); |
| 419 | } |
| 420 | |
| 421 | bool hasAndNot(SDValue Y) const override { |
| 422 | EVT VT = Y.getValueType(); |
| 423 | |
| 424 | if (!VT.isVector()) |
| 425 | return hasAndNotCompare(V: Y); |
| 426 | |
| 427 | if (VT.isScalableVector()) |
| 428 | return true; |
| 429 | |
| 430 | return VT.getFixedSizeInBits() >= 64; // vector 'bic' |
| 431 | } |
| 432 | |
| 433 | bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
| 434 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
| 435 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
| 436 | SelectionDAG &DAG) const override; |
| 437 | |
| 438 | ShiftLegalizationStrategy |
| 439 | preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, |
| 440 | unsigned ExpansionFactor) const override; |
| 441 | |
| 442 | bool shouldTransformSignedTruncationCheck(EVT XVT, |
| 443 | unsigned KeptBits) const override { |
| 444 | // For vectors, we don't have a preference.. |
| 445 | if (XVT.isVector()) |
| 446 | return false; |
| 447 | |
| 448 | auto VTIsOk = [](EVT VT) -> bool { |
| 449 | return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || |
| 450 | VT == MVT::i64; |
| 451 | }; |
| 452 | |
| 453 | // We are ok with KeptBitsVT being byte/word/dword, what SXT supports. |
| 454 | // XVT will be larger than KeptBitsVT. |
| 455 | MVT KeptBitsVT = MVT::getIntegerVT(BitWidth: KeptBits); |
| 456 | return VTIsOk(XVT) && VTIsOk(KeptBitsVT); |
| 457 | } |
| 458 | |
| 459 | bool preferIncOfAddToSubOfNot(EVT VT) const override; |
| 460 | |
| 461 | bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; |
| 462 | |
| 463 | bool preferSelectsOverBooleanArithmetic(EVT VT) const override; |
| 464 | |
| 465 | bool isComplexDeinterleavingSupported() const override; |
| 466 | bool isComplexDeinterleavingOperationSupported( |
| 467 | ComplexDeinterleavingOperation Operation, Type *Ty) const override; |
| 468 | |
| 469 | Value *createComplexDeinterleavingIR( |
| 470 | IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, |
| 471 | ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, |
| 472 | Value *Accumulator = nullptr) const override; |
| 473 | |
| 474 | bool supportSplitCSR(MachineFunction *MF) const override { |
| 475 | return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && |
| 476 | MF->getFunction().hasFnAttribute(Kind: Attribute::NoUnwind); |
| 477 | } |
| 478 | void initializeSplitCSR(MachineBasicBlock *Entry) const override; |
| 479 | void insertCopiesSplitCSR( |
| 480 | MachineBasicBlock *Entry, |
| 481 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; |
| 482 | |
| 483 | bool supportSwiftError() const override { |
| 484 | return true; |
| 485 | } |
| 486 | |
| 487 | bool supportPtrAuthBundles() const override { return true; } |
| 488 | |
| 489 | bool supportKCFIBundles() const override { return true; } |
| 490 | |
| 491 | MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, |
| 492 | MachineBasicBlock::instr_iterator &MBBI, |
| 493 | const TargetInstrInfo *TII) const override; |
| 494 | |
| 495 | bool ( |
| 496 | Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override; |
| 497 | |
| 498 | /// Enable aggressive FMA fusion on targets that want it. |
| 499 | bool enableAggressiveFMAFusion(EVT VT) const override; |
| 500 | |
| 501 | bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override { |
| 502 | return true; |
| 503 | } |
| 504 | |
| 505 | /// Returns the size of the platform's va_list object. |
| 506 | unsigned getVaListSizeInBits(const DataLayout &DL) const override; |
| 507 | |
| 508 | /// Returns true if \p VecTy is a legal interleaved access type. This |
| 509 | /// function checks the vector element type and the overall width of the |
| 510 | /// vector. |
| 511 | bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, |
| 512 | bool &UseScalable) const; |
| 513 | |
| 514 | /// Returns the number of interleaved accesses that will be generated when |
| 515 | /// lowering accesses of the given type. |
| 516 | unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, |
| 517 | bool UseScalable) const; |
| 518 | |
| 519 | MachineMemOperand::Flags getTargetMMOFlags( |
| 520 | const Instruction &I) const override; |
| 521 | |
| 522 | bool functionArgumentNeedsConsecutiveRegisters( |
| 523 | Type *Ty, CallingConv::ID CallConv, bool isVarArg, |
| 524 | const DataLayout &DL) const override; |
| 525 | |
| 526 | /// Used for exception handling on Win64. |
| 527 | bool needsFixedCatchObjects() const override; |
| 528 | |
| 529 | bool fallBackToDAGISel(const Instruction &Inst) const override; |
| 530 | |
| 531 | /// SVE code generation for fixed length vectors does not custom lower |
| 532 | /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to |
| 533 | /// merge. However, merging them creates a BUILD_VECTOR that is just as |
| 534 | /// illegal as the original, thus leading to an infinite legalisation loop. |
| 535 | /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal |
| 536 | /// vector types this override can be removed. |
| 537 | bool mergeStoresAfterLegalization(EVT VT) const override; |
| 538 | |
| 539 | // If the platform/function should have a redzone, return the size in bytes. |
| 540 | unsigned getRedZoneSize(const Function &F) const { |
| 541 | if (F.hasFnAttribute(Kind: Attribute::NoRedZone)) |
| 542 | return 0; |
| 543 | return 128; |
| 544 | } |
| 545 | |
| 546 | bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const; |
| 547 | EVT getPromotedVTForPredicate(EVT VT) const; |
| 548 | |
| 549 | EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, |
| 550 | bool AllowUnknown = false) const override; |
| 551 | |
| 552 | bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override; |
| 553 | |
| 554 | bool shouldExpandCttzElements(EVT VT) const override; |
| 555 | |
| 556 | bool shouldExpandVectorMatch(EVT VT, unsigned SearchSize) const override; |
| 557 | |
| 558 | /// If a change in streaming mode is required on entry to/return from a |
| 559 | /// function call it emits and returns the corresponding SMSTART or SMSTOP |
| 560 | /// node. \p Condition should be one of the enum values from |
| 561 | /// AArch64SME::ToggleCondition. |
| 562 | SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable, |
| 563 | SDValue Chain, SDValue InGlue, unsigned Condition, |
| 564 | bool InsertVectorLengthCheck = false) const; |
| 565 | |
| 566 | bool isVScaleKnownToBeAPowerOfTwo() const override { return true; } |
| 567 | |
| 568 | // Normally SVE is only used for byte size vectors that do not fit within a |
| 569 | // NEON vector. This changes when OverrideNEON is true, allowing SVE to be |
| 570 | // used for 64bit and 128bit vectors as well. |
| 571 | bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const; |
| 572 | |
| 573 | // Follow NEON ABI rules even when using SVE for fixed length vectors. |
| 574 | MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, |
| 575 | EVT VT) const override; |
| 576 | unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
| 577 | CallingConv::ID CC, |
| 578 | EVT VT) const override; |
| 579 | unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, |
| 580 | CallingConv::ID CC, EVT VT, |
| 581 | EVT &IntermediateVT, |
| 582 | unsigned &NumIntermediates, |
| 583 | MVT &RegisterVT) const override; |
| 584 | |
| 585 | /// True if stack clash protection is enabled for this functions. |
| 586 | bool hasInlineStackProbe(const MachineFunction &MF) const override; |
| 587 | |
| 588 | /// In AArch64, true if FEAT_CPA is present. Allows pointer arithmetic |
| 589 | /// semantics to be preserved for instruction selection. |
| 590 | bool shouldPreservePtrArith(const Function &F, EVT PtrVT) const override; |
| 591 | |
| 592 | private: |
| 593 | /// Keep a pointer to the AArch64Subtarget around so that we can |
| 594 | /// make the right decision when generating code for different targets. |
| 595 | const AArch64Subtarget *Subtarget; |
| 596 | |
| 597 | bool isExtFreeImpl(const Instruction *Ext) const override; |
| 598 | |
| 599 | void addTypeForNEON(MVT VT); |
| 600 | void addTypeForFixedLengthSVE(MVT VT); |
| 601 | void addDRType(MVT VT); |
| 602 | void addQRType(MVT VT); |
| 603 | |
| 604 | bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override; |
| 605 | |
| 606 | SDValue lowerEHPadEntry(SDValue Chain, SDLoc const &DL, |
| 607 | SelectionDAG &DAG) const override; |
| 608 | |
| 609 | SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, |
| 610 | bool isVarArg, |
| 611 | const SmallVectorImpl<ISD::InputArg> &Ins, |
| 612 | const SDLoc &DL, SelectionDAG &DAG, |
| 613 | SmallVectorImpl<SDValue> &InVals) const override; |
| 614 | |
| 615 | void AdjustInstrPostInstrSelection(MachineInstr &MI, |
| 616 | SDNode *Node) const override; |
| 617 | |
| 618 | SDValue LowerCall(CallLoweringInfo & /*CLI*/, |
| 619 | SmallVectorImpl<SDValue> &InVals) const override; |
| 620 | |
| 621 | SDValue LowerCallResult(SDValue Chain, SDValue InGlue, |
| 622 | CallingConv::ID CallConv, bool isVarArg, |
| 623 | const SmallVectorImpl<CCValAssign> &RVLocs, |
| 624 | const SDLoc &DL, SelectionDAG &DAG, |
| 625 | SmallVectorImpl<SDValue> &InVals, bool isThisReturn, |
| 626 | SDValue ThisVal, bool RequiresSMChange) const; |
| 627 | |
| 628 | SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; |
| 629 | SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; |
| 630 | SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const; |
| 631 | SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; |
| 632 | SDValue LowerFMUL(SDValue Op, SelectionDAG &DAG) const; |
| 633 | SDValue LowerFMA(SDValue Op, SelectionDAG &DAG) const; |
| 634 | |
| 635 | SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; |
| 636 | SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; |
| 637 | |
| 638 | SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const; |
| 639 | |
| 640 | SDValue LowerVECTOR_COMPRESS(SDValue Op, SelectionDAG &DAG) const; |
| 641 | |
| 642 | SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
| 643 | SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
| 644 | SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; |
| 645 | |
| 646 | bool |
| 647 | isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const; |
| 648 | |
| 649 | /// Finds the incoming stack arguments which overlap the given fixed stack |
| 650 | /// object and incorporates their load into the current chain. This prevents |
| 651 | /// an upcoming store from clobbering the stack argument before it's used. |
| 652 | SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, |
| 653 | MachineFrameInfo &MFI, int ClobberedFI) const; |
| 654 | |
| 655 | bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; |
| 656 | |
| 657 | void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, |
| 658 | SDValue &Chain) const; |
| 659 | |
| 660 | bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, |
| 661 | bool isVarArg, |
| 662 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
| 663 | LLVMContext &Context, const Type *RetTy) const override; |
| 664 | |
| 665 | SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
| 666 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
| 667 | const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, |
| 668 | SelectionDAG &DAG) const override; |
| 669 | |
| 670 | SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, |
| 671 | unsigned Flag) const; |
| 672 | SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, |
| 673 | unsigned Flag) const; |
| 674 | SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, |
| 675 | unsigned Flag) const; |
| 676 | SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, |
| 677 | unsigned Flag) const; |
| 678 | SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG, |
| 679 | unsigned Flag) const; |
| 680 | template <class NodeTy> |
| 681 | SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; |
| 682 | template <class NodeTy> |
| 683 | SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; |
| 684 | template <class NodeTy> |
| 685 | SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; |
| 686 | template <class NodeTy> |
| 687 | SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; |
| 688 | SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
| 689 | SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; |
| 690 | SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
| 691 | SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
| 692 | SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
| 693 | SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase, |
| 694 | const SDLoc &DL, SelectionDAG &DAG) const; |
| 695 | SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, |
| 696 | SelectionDAG &DAG) const; |
| 697 | SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
| 698 | SDValue LowerPtrAuthGlobalAddress(SDValue Op, SelectionDAG &DAG) const; |
| 699 | SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; |
| 700 | SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; |
| 701 | SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; |
| 702 | SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; |
| 703 | SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; |
| 704 | SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, |
| 705 | SDValue TVal, SDValue FVal, |
| 706 | iterator_range<SDNode::user_iterator> Users, |
| 707 | SDNodeFlags Flags, const SDLoc &dl, |
| 708 | SelectionDAG &DAG) const; |
| 709 | SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; |
| 710 | SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; |
| 711 | SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; |
| 712 | SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; |
| 713 | SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const; |
| 714 | SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; |
| 715 | SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; |
| 716 | SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; |
| 717 | SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; |
| 718 | SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; |
| 719 | SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; |
| 720 | SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; |
| 721 | SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; |
| 722 | SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; |
| 723 | SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; |
| 724 | SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
| 725 | SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
| 726 | SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
| 727 | SDValue LowerGET_FPMODE(SDValue Op, SelectionDAG &DAG) const; |
| 728 | SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const; |
| 729 | SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const; |
| 730 | SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
| 731 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
| 732 | SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
| 733 | SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; |
| 734 | SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; |
| 735 | SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; |
| 736 | SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
| 737 | SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; |
| 738 | SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, |
| 739 | unsigned NewOp) const; |
| 740 | SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; |
| 741 | SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; |
| 742 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
| 743 | SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; |
| 744 | SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const; |
| 745 | SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const; |
| 746 | SDValue LowerVECTOR_HISTOGRAM(SDValue Op, SelectionDAG &DAG) const; |
| 747 | SDValue LowerPARTIAL_REDUCE_MLA(SDValue Op, SelectionDAG &DAG) const; |
| 748 | SDValue LowerGET_ACTIVE_LANE_MASK(SDValue Op, SelectionDAG &DAG) const; |
| 749 | SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; |
| 750 | SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; |
| 751 | SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; |
| 752 | SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; |
| 753 | SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; |
| 754 | SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const; |
| 755 | SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; |
| 756 | SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const; |
| 757 | SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; |
| 758 | SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; |
| 759 | SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; |
| 760 | SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; |
| 761 | SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; |
| 762 | SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; |
| 763 | SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; |
| 764 | SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; |
| 765 | SDValue LowerVectorXRINT(SDValue Op, SelectionDAG &DAG) const; |
| 766 | SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
| 767 | SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
| 768 | SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; |
| 769 | SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const; |
| 770 | SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; |
| 771 | SDValue LowerLOOP_DEPENDENCE_MASK(SDValue Op, SelectionDAG &DAG) const; |
| 772 | SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; |
| 773 | SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; |
| 774 | SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; |
| 775 | SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; |
| 776 | SDValue LowerVECREDUCE_MUL(SDValue Op, SelectionDAG &DAG) const; |
| 777 | SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; |
| 778 | SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
| 779 | SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
| 780 | SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
| 781 | SDValue LowerMSTORE(SDValue Op, SelectionDAG &DAG) const; |
| 782 | |
| 783 | SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const; |
| 784 | |
| 785 | SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, |
| 786 | SelectionDAG &DAG) const; |
| 787 | SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op, |
| 788 | SelectionDAG &DAG) const; |
| 789 | SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const; |
| 790 | SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const; |
| 791 | SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const; |
| 792 | SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const; |
| 793 | SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp, |
| 794 | SelectionDAG &DAG) const; |
| 795 | SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const; |
| 796 | SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const; |
| 797 | SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const; |
| 798 | SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op, |
| 799 | SelectionDAG &DAG) const; |
| 800 | SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op, |
| 801 | SelectionDAG &DAG) const; |
| 802 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
| 803 | SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const; |
| 804 | SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const; |
| 805 | SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op, |
| 806 | SelectionDAG &DAG) const; |
| 807 | SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const; |
| 808 | SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const; |
| 809 | SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const; |
| 810 | SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const; |
| 811 | SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op, |
| 812 | SelectionDAG &DAG) const; |
| 813 | SDValue LowerFixedLengthBuildVectorToSVE(SDValue Op, SelectionDAG &DAG) const; |
| 814 | SDValue LowerFixedLengthVectorCompressToSVE(SDValue Op, |
| 815 | SelectionDAG &DAG) const; |
| 816 | |
| 817 | SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, |
| 818 | SmallVectorImpl<SDNode *> &Created) const override; |
| 819 | SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, |
| 820 | SmallVectorImpl<SDNode *> &Created) const override; |
| 821 | SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, |
| 822 | int &, bool &UseOneConst, |
| 823 | bool Reciprocal) const override; |
| 824 | SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, |
| 825 | int &) const override; |
| 826 | SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, |
| 827 | const DenormalMode &Mode) const override; |
| 828 | SDValue getSqrtResultForDenormInput(SDValue Operand, |
| 829 | SelectionDAG &DAG) const override; |
| 830 | unsigned combineRepeatedFPDivisors() const override; |
| 831 | |
| 832 | ConstraintType getConstraintType(StringRef Constraint) const override; |
| 833 | Register getRegisterByName(const char* RegName, LLT VT, |
| 834 | const MachineFunction &MF) const override; |
| 835 | |
| 836 | /// Examine constraint string and operand type and determine a weight value. |
| 837 | /// The operand object must already have been set up with the operand type. |
| 838 | ConstraintWeight |
| 839 | getSingleConstraintMatchWeight(AsmOperandInfo &info, |
| 840 | const char *constraint) const override; |
| 841 | |
| 842 | std::pair<unsigned, const TargetRegisterClass *> |
| 843 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
| 844 | StringRef Constraint, MVT VT) const override; |
| 845 | |
| 846 | const char *LowerXConstraint(EVT ConstraintVT) const override; |
| 847 | |
| 848 | void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
| 849 | std::vector<SDValue> &Ops, |
| 850 | SelectionDAG &DAG) const override; |
| 851 | |
| 852 | InlineAsm::ConstraintCode |
| 853 | getInlineAsmMemConstraint(StringRef ConstraintCode) const override { |
| 854 | if (ConstraintCode == "Q" ) |
| 855 | return InlineAsm::ConstraintCode::Q; |
| 856 | // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are |
| 857 | // followed by llvm_unreachable so we'll leave them unimplemented in |
| 858 | // the backend for now. |
| 859 | return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); |
| 860 | } |
| 861 | |
| 862 | /// Handle Lowering flag assembly outputs. |
| 863 | SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, |
| 864 | const SDLoc &DL, |
| 865 | const AsmOperandInfo &Constraint, |
| 866 | SelectionDAG &DAG) const override; |
| 867 | |
| 868 | bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override; |
| 869 | bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override; |
| 870 | bool isVectorLoadExtDesirable(SDValue ExtVal) const override; |
| 871 | bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; |
| 872 | bool mayBeEmittedAsTailCall(const CallInst *CI) const override; |
| 873 | bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, |
| 874 | SDValue &Offset, SelectionDAG &DAG) const; |
| 875 | bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, |
| 876 | ISD::MemIndexedMode &AM, |
| 877 | SelectionDAG &DAG) const override; |
| 878 | bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, |
| 879 | SDValue &Offset, ISD::MemIndexedMode &AM, |
| 880 | SelectionDAG &DAG) const override; |
| 881 | bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, |
| 882 | bool IsPre, MachineRegisterInfo &MRI) const override; |
| 883 | |
| 884 | void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
| 885 | SelectionDAG &DAG) const override; |
| 886 | void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
| 887 | SelectionDAG &DAG) const; |
| 888 | void (SDNode *N, |
| 889 | SmallVectorImpl<SDValue> &Results, |
| 890 | SelectionDAG &DAG) const; |
| 891 | void ReplaceGetActiveLaneMaskResults(SDNode *N, |
| 892 | SmallVectorImpl<SDValue> &Results, |
| 893 | SelectionDAG &DAG) const; |
| 894 | |
| 895 | bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; |
| 896 | |
| 897 | void finalizeLowering(MachineFunction &MF) const override; |
| 898 | |
| 899 | bool shouldLocalize(const MachineInstr &MI, |
| 900 | const TargetTransformInfo *TTI) const override; |
| 901 | |
| 902 | bool SimplifyDemandedBitsForTargetNode(SDValue Op, |
| 903 | const APInt &OriginalDemandedBits, |
| 904 | const APInt &OriginalDemandedElts, |
| 905 | KnownBits &Known, |
| 906 | TargetLoweringOpt &TLO, |
| 907 | unsigned Depth) const override; |
| 908 | |
| 909 | bool canCreateUndefOrPoisonForTargetNode(SDValue Op, |
| 910 | const APInt &DemandedElts, |
| 911 | const SelectionDAG &DAG, |
| 912 | bool PoisonOnly, bool ConsiderFlags, |
| 913 | unsigned Depth) const override; |
| 914 | |
| 915 | bool isTargetCanonicalConstantNode(SDValue Op) const override; |
| 916 | |
| 917 | // With the exception of data-predicate transitions, no instructions are |
| 918 | // required to cast between legal scalable vector types. However: |
| 919 | // 1. Packed and unpacked types have different bit lengths, meaning BITCAST |
| 920 | // is not universally useable. |
| 921 | // 2. Most unpacked integer types are not legal and thus integer extends |
| 922 | // cannot be used to convert between unpacked and packed types. |
| 923 | // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used |
| 924 | // to transition between unpacked and packed types of the same element type, |
| 925 | // with BITCAST used otherwise. |
| 926 | // This function does not handle predicate bitcasts. |
| 927 | SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; |
| 928 | |
| 929 | // Returns the runtime value for PSTATE.SM by generating a call to |
| 930 | // __arm_sme_state. |
| 931 | SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL, |
| 932 | EVT VT) const; |
| 933 | |
| 934 | bool preferScalarizeSplat(SDNode *N) const override; |
| 935 | |
| 936 | unsigned getMinimumJumpTableEntries() const override; |
| 937 | |
| 938 | bool shouldScalarizeBinop(SDValue VecOp) const override { |
| 939 | return VecOp.getOpcode() == ISD::SETCC; |
| 940 | } |
| 941 | |
| 942 | bool hasMultipleConditionRegisters(EVT VT) const override { |
| 943 | return VT.isScalableVector(); |
| 944 | } |
| 945 | }; |
| 946 | |
| 947 | namespace AArch64 { |
| 948 | FastISel *createFastISel(FunctionLoweringInfo &funcInfo, |
| 949 | const TargetLibraryInfo *libInfo, |
| 950 | const LibcallLoweringInfo *libcallLowering); |
| 951 | } // end namespace AArch64 |
| 952 | |
| 953 | } // end namespace llvm |
| 954 | |
| 955 | #endif |
| 956 | |