1//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that AArch64 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15#define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16
17#include "llvm/CodeGen/CallingConvLower.h"
18#include "llvm/CodeGen/MachineFunction.h"
19#include "llvm/CodeGen/SelectionDAG.h"
20#include "llvm/CodeGen/TargetLowering.h"
21#include "llvm/IR/CallingConv.h"
22#include "llvm/IR/Instruction.h"
23
24namespace llvm {
25
26namespace AArch64 {
27/// Possible values of current rounding mode, which is specified in bits
28/// 23:22 of FPCR.
29enum Rounding {
30 RN = 0, // Round to Nearest
31 RP = 1, // Round towards Plus infinity
32 RM = 2, // Round towards Minus infinity
33 RZ = 3, // Round towards Zero
34 rmMask = 3 // Bit mask selecting rounding mode
35};
36
37// Bit position of rounding mode bits in FPCR.
38const unsigned RoundingBitsPos = 22;
39
40// Reserved bits should be preserved when modifying FPCR.
41const uint64_t ReservedFPControlBits = 0xfffffffff80040f8;
42
43// Registers used to pass function arguments.
44ArrayRef<MCPhysReg> getGPRArgRegs();
45ArrayRef<MCPhysReg> getFPRArgRegs();
46
47/// Maximum allowed number of unprobed bytes above SP at an ABI
48/// boundary.
49const unsigned StackProbeMaxUnprobedStack = 1024;
50
51/// Maximum number of iterations to unroll for a constant size probing loop.
52const unsigned StackProbeMaxLoopUnroll = 4;
53
54} // namespace AArch64
55
56namespace ARM64AS {
57enum : unsigned { PTR32_SPTR = 270, PTR32_UPTR = 271, PTR64 = 272 };
58}
59
60class AArch64Subtarget;
61
62class AArch64TargetLowering : public TargetLowering {
63public:
64 explicit AArch64TargetLowering(const TargetMachine &TM,
65 const AArch64Subtarget &STI);
66
67 /// Control the following reassociation of operands: (op (op x, c1), y) -> (op
68 /// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
69 bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
70 SDValue N1) const override;
71
72 /// Selects the correct CCAssignFn for a given CallingConvention value.
73 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
74
75 /// Selects the correct CCAssignFn for a given CallingConvention value.
76 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
77
78 /// Determine which of the bits specified in Mask are known to be either zero
79 /// or one and return them in the KnownZero/KnownOne bitsets.
80 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
81 const APInt &DemandedElts,
82 const SelectionDAG &DAG,
83 unsigned Depth = 0) const override;
84
85 unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
86 const APInt &DemandedElts,
87 const SelectionDAG &DAG,
88 unsigned Depth) const override;
89
90 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
91 if ((AS == ARM64AS::PTR32_SPTR) || (AS == ARM64AS::PTR32_UPTR)) {
92 // These are 32-bit pointers created using the `__ptr32` extension or
93 // similar. They are handled by marking them as being in a different
94 // address space, and will be extended to 64-bits when used as the target
95 // of a load or store operation, or cast to a 64-bit pointer type.
96 return MVT::i32;
97 } else {
98 // Returning i64 unconditionally here (i.e. even for ILP32) means that the
99 // *DAG* representation of pointers will always be 64-bits. They will be
100 // truncated and extended when transferred to memory, but the 64-bit DAG
101 // allows us to use AArch64's addressing modes much more easily.
102 return MVT::i64;
103 }
104 }
105
106 unsigned getVectorIdxWidth(const DataLayout &DL) const override {
107 // The VectorIdx type is i64, with both normal and ilp32.
108 return 64;
109 }
110
111 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
112 const APInt &DemandedElts,
113 TargetLoweringOpt &TLO) const override;
114
115 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
116
117 /// Returns true if the target allows unaligned memory accesses of the
118 /// specified type.
119 bool allowsMisalignedMemoryAccesses(
120 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
121 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
122 unsigned *Fast = nullptr) const override;
123 /// LLT variant.
124 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
125 Align Alignment,
126 MachineMemOperand::Flags Flags,
127 unsigned *Fast = nullptr) const override;
128
129 /// Provide custom lowering hooks for some operations.
130 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
131
132 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
133
134 /// This method returns a target specific FastISel object, or null if the
135 /// target does not support "fast" ISel.
136 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
137 const TargetLibraryInfo *libInfo) const override;
138
139 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
140
141 bool isFPImmLegal(const APFloat &Imm, EVT VT,
142 bool ForCodeSize) const override;
143
144 /// Return true if the given shuffle mask can be codegen'd directly, or if it
145 /// should be stack expanded.
146 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
147
148 /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
149 /// shuffle mask can be codegen'd directly.
150 bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;
151
152 /// Return the ISD::SETCC ValueType.
153 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
154 EVT VT) const override;
155
156 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
157
158 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
159 MachineBasicBlock *BB) const;
160
161 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
162 MachineBasicBlock *BB) const;
163
164 MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI,
165 MachineBasicBlock *MBB) const;
166
167 MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
168 MachineInstr &MI,
169 MachineBasicBlock *BB) const;
170 MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
171 MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
172 MachineInstr &MI, MachineBasicBlock *BB) const;
173 MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB,
174 unsigned Opcode, bool Op0IsDef) const;
175 MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
176 MachineBasicBlock *EmitInitTPIDR2Object(MachineInstr &MI,
177 MachineBasicBlock *BB) const;
178 MachineBasicBlock *EmitAllocateZABuffer(MachineInstr &MI,
179 MachineBasicBlock *BB) const;
180 MachineBasicBlock *EmitAllocateSMESaveBuffer(MachineInstr &MI,
181 MachineBasicBlock *BB) const;
182 MachineBasicBlock *EmitGetSMESaveSize(MachineInstr &MI,
183 MachineBasicBlock *BB) const;
184
185 MachineBasicBlock *
186 EmitInstrWithCustomInserter(MachineInstr &MI,
187 MachineBasicBlock *MBB) const override;
188
189 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
190 MachineFunction &MF,
191 unsigned Intrinsic) const override;
192
193 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
194 std::optional<unsigned> ByteOffset) const override;
195
196 bool shouldRemoveRedundantExtend(SDValue Op) const override;
197
198 bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
199 bool isTruncateFree(EVT VT1, EVT VT2) const override;
200
201 bool isProfitableToHoist(Instruction *I) const override;
202
203 bool isZExtFree(Type *Ty1, Type *Ty2) const override;
204 bool isZExtFree(EVT VT1, EVT VT2) const override;
205 bool isZExtFree(SDValue Val, EVT VT2) const override;
206
207 bool optimizeExtendOrTruncateConversion(
208 Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override;
209
210 bool hasPairedLoad(EVT LoadedType, Align &RequiredAlignment) const override;
211
212 unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
213
214 bool lowerInterleavedLoad(LoadInst *LI,
215 ArrayRef<ShuffleVectorInst *> Shuffles,
216 ArrayRef<unsigned> Indices,
217 unsigned Factor) const override;
218 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
219 unsigned Factor) const override;
220
221 bool lowerDeinterleaveIntrinsicToLoad(
222 LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const override;
223
224 bool lowerInterleaveIntrinsicToStore(
225 StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override;
226
227 bool isLegalAddImmediate(int64_t) const override;
228 bool isLegalAddScalableImmediate(int64_t) const override;
229 bool isLegalICmpImmediate(int64_t) const override;
230
231 bool isMulAddWithConstProfitable(SDValue AddNode,
232 SDValue ConstNode) const override;
233
234 bool shouldConsiderGEPOffsetSplit() const override;
235
236 EVT getOptimalMemOpType(const MemOp &Op,
237 const AttributeList &FuncAttributes) const override;
238
239 LLT getOptimalMemOpLLT(const MemOp &Op,
240 const AttributeList &FuncAttributes) const override;
241
242 /// Return true if the addressing mode represented by AM is legal for this
243 /// target, for a load/store of the specified type.
244 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
245 unsigned AS,
246 Instruction *I = nullptr) const override;
247
248 int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
249 int64_t MaxOffset) const override;
250
251 /// Return true if an FMA operation is faster than a pair of fmul and fadd
252 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
253 /// returns true, otherwise fmuladd is expanded to fmul + fadd.
254 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
255 EVT VT) const override;
256 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
257
258 bool generateFMAsInMachineCombiner(EVT VT,
259 CodeGenOptLevel OptLevel) const override;
260
261 /// Return true if the target has native support for
262 /// the specified value type and it is 'desirable' to use the type for the
263 /// given node type.
264 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
265
266 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
267 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
268
269 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
270 bool isDesirableToCommuteWithShift(const SDNode *N,
271 CombineLevel Level) const override;
272
273 bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override {
274 return false;
275 }
276
277 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
278 bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
279
280 /// Return true if it is profitable to fold a pair of shifts into a mask.
281 bool shouldFoldConstantShiftPairToMask(const SDNode *N,
282 CombineLevel Level) const override;
283
284 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
285 unsigned SelectOpcode, SDValue X,
286 SDValue Y) const override;
287
288 /// Returns true if it is beneficial to convert a load of a constant
289 /// to just the constant itself.
290 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
291 Type *Ty) const override;
292
293 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
294 /// with this index.
295 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
296 unsigned Index) const override;
297
298 bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
299 bool MathUsed) const override {
300 // Using overflow ops for overflow checks only should beneficial on
301 // AArch64.
302 return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed: true);
303 }
304
305 Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
306 AtomicOrdering Ord) const override;
307 Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
308 AtomicOrdering Ord) const override;
309
310 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
311
312 bool isOpSuitableForLDPSTP(const Instruction *I) const;
313 bool isOpSuitableForLSE128(const Instruction *I) const;
314 bool isOpSuitableForRCPC3(const Instruction *I) const;
315 bool shouldInsertFencesForAtomic(const Instruction *I) const override;
316 bool
317 shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override;
318
319 TargetLoweringBase::AtomicExpansionKind
320 shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
321 TargetLoweringBase::AtomicExpansionKind
322 shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
323 TargetLoweringBase::AtomicExpansionKind
324 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
325
326 TargetLoweringBase::AtomicExpansionKind
327 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
328
329 bool useLoadStackGuardNode(const Module &M) const override;
330 TargetLoweringBase::LegalizeTypeAction
331 getPreferredVectorAction(MVT VT) const override;
332
333 /// If the target has a standard location for the stack protector cookie,
334 /// returns the address of that location. Otherwise, returns nullptr.
335 Value *getIRStackGuard(IRBuilderBase &IRB) const override;
336
337 void insertSSPDeclarations(Module &M) const override;
338 Value *getSDagStackGuard(const Module &M) const override;
339 Function *getSSPStackGuardCheck(const Module &M) const override;
340
341 /// If the target has a standard location for the unsafe stack pointer,
342 /// returns the address of that location. Otherwise, returns nullptr.
343 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
344
345 /// If a physical register, this returns the register that receives the
346 /// exception address on entry to an EH pad.
347 Register
348 getExceptionPointerRegister(const Constant *PersonalityFn) const override;
349
350 /// If a physical register, this returns the register that receives the
351 /// exception typeid on entry to a landing pad.
352 Register
353 getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
354
355 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
356
357 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
358 const MachineFunction &MF) const override;
359
360 bool isCheapToSpeculateCttz(Type *) const override {
361 return true;
362 }
363
364 bool isCheapToSpeculateCtlz(Type *) const override {
365 return true;
366 }
367
368 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
369
370 bool hasAndNotCompare(SDValue V) const override {
371 // We can use bics for any scalar.
372 return V.getValueType().isScalarInteger();
373 }
374
375 bool hasAndNot(SDValue Y) const override {
376 EVT VT = Y.getValueType();
377
378 if (!VT.isVector())
379 return hasAndNotCompare(V: Y);
380
381 if (VT.isScalableVector())
382 return true;
383
384 return VT.getFixedSizeInBits() >= 64; // vector 'bic'
385 }
386
387 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
388 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
389 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
390 SelectionDAG &DAG) const override;
391
392 ShiftLegalizationStrategy
393 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
394 unsigned ExpansionFactor) const override;
395
396 bool shouldTransformSignedTruncationCheck(EVT XVT,
397 unsigned KeptBits) const override {
398 // For vectors, we don't have a preference..
399 if (XVT.isVector())
400 return false;
401
402 auto VTIsOk = [](EVT VT) -> bool {
403 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
404 VT == MVT::i64;
405 };
406
407 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
408 // XVT will be larger than KeptBitsVT.
409 MVT KeptBitsVT = MVT::getIntegerVT(BitWidth: KeptBits);
410 return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
411 }
412
413 bool preferIncOfAddToSubOfNot(EVT VT) const override;
414
415 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
416
417 bool shouldExpandCmpUsingSelects(EVT VT) const override;
418
419 bool isComplexDeinterleavingSupported() const override;
420 bool isComplexDeinterleavingOperationSupported(
421 ComplexDeinterleavingOperation Operation, Type *Ty) const override;
422
423 Value *createComplexDeinterleavingIR(
424 IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
425 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
426 Value *Accumulator = nullptr) const override;
427
428 bool supportSplitCSR(MachineFunction *MF) const override {
429 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
430 MF->getFunction().hasFnAttribute(Kind: Attribute::NoUnwind);
431 }
432 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
433 void insertCopiesSplitCSR(
434 MachineBasicBlock *Entry,
435 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
436
437 bool supportSwiftError() const override {
438 return true;
439 }
440
441 bool supportPtrAuthBundles() const override { return true; }
442
443 bool supportKCFIBundles() const override { return true; }
444
445 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
446 MachineBasicBlock::instr_iterator &MBBI,
447 const TargetInstrInfo *TII) const override;
448
449 /// Enable aggressive FMA fusion on targets that want it.
450 bool enableAggressiveFMAFusion(EVT VT) const override;
451
452 bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override {
453 return true;
454 }
455
456 /// Returns the size of the platform's va_list object.
457 unsigned getVaListSizeInBits(const DataLayout &DL) const override;
458
459 /// Returns true if \p VecTy is a legal interleaved access type. This
460 /// function checks the vector element type and the overall width of the
461 /// vector.
462 bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
463 bool &UseScalable) const;
464
465 /// Returns the number of interleaved accesses that will be generated when
466 /// lowering accesses of the given type.
467 unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
468 bool UseScalable) const;
469
470 MachineMemOperand::Flags getTargetMMOFlags(
471 const Instruction &I) const override;
472
473 bool functionArgumentNeedsConsecutiveRegisters(
474 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
475 const DataLayout &DL) const override;
476
477 /// Used for exception handling on Win64.
478 bool needsFixedCatchObjects() const override;
479
480 bool fallBackToDAGISel(const Instruction &Inst) const override;
481
482 /// SVE code generation for fixed length vectors does not custom lower
483 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
484 /// merge. However, merging them creates a BUILD_VECTOR that is just as
485 /// illegal as the original, thus leading to an infinite legalisation loop.
486 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
487 /// vector types this override can be removed.
488 bool mergeStoresAfterLegalization(EVT VT) const override;
489
490 // If the platform/function should have a redzone, return the size in bytes.
491 unsigned getRedZoneSize(const Function &F) const {
492 if (F.hasFnAttribute(Kind: Attribute::NoRedZone))
493 return 0;
494 return 128;
495 }
496
497 bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
498 EVT getPromotedVTForPredicate(EVT VT) const;
499
500 EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
501 bool AllowUnknown = false) const override;
502
503 bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
504
505 bool
506 shouldExpandPartialReductionIntrinsic(const IntrinsicInst *I) const override;
507
508 bool shouldExpandCttzElements(EVT VT) const override;
509
510 bool shouldExpandVectorMatch(EVT VT, unsigned SearchSize) const override;
511
512 /// If a change in streaming mode is required on entry to/return from a
513 /// function call it emits and returns the corresponding SMSTART or SMSTOP
514 /// node. \p Condition should be one of the enum values from
515 /// AArch64SME::ToggleCondition.
516 SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
517 SDValue Chain, SDValue InGlue, unsigned Condition,
518 SDValue PStateSM = SDValue()) const;
519
520 bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
521
522 // Normally SVE is only used for byte size vectors that do not fit within a
523 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
524 // used for 64bit and 128bit vectors as well.
525 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
526
527 // Follow NEON ABI rules even when using SVE for fixed length vectors.
528 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
529 EVT VT) const override;
530 unsigned getNumRegistersForCallingConv(LLVMContext &Context,
531 CallingConv::ID CC,
532 EVT VT) const override;
533 unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
534 CallingConv::ID CC, EVT VT,
535 EVT &IntermediateVT,
536 unsigned &NumIntermediates,
537 MVT &RegisterVT) const override;
538
539 /// True if stack clash protection is enabled for this functions.
540 bool hasInlineStackProbe(const MachineFunction &MF) const override;
541
542 /// In AArch64, true if FEAT_CPA is present. Allows pointer arithmetic
543 /// semantics to be preserved for instruction selection.
544 bool shouldPreservePtrArith(const Function &F, EVT PtrVT) const override;
545
546private:
547 /// Keep a pointer to the AArch64Subtarget around so that we can
548 /// make the right decision when generating code for different targets.
549 const AArch64Subtarget *Subtarget;
550
551 bool isExtFreeImpl(const Instruction *Ext) const override;
552
553 void addTypeForNEON(MVT VT);
554 void addTypeForFixedLengthSVE(MVT VT);
555 void addDRType(MVT VT);
556 void addQRType(MVT VT);
557
558 bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override;
559
560 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
561 bool isVarArg,
562 const SmallVectorImpl<ISD::InputArg> &Ins,
563 const SDLoc &DL, SelectionDAG &DAG,
564 SmallVectorImpl<SDValue> &InVals) const override;
565
566 void AdjustInstrPostInstrSelection(MachineInstr &MI,
567 SDNode *Node) const override;
568
569 SDValue LowerCall(CallLoweringInfo & /*CLI*/,
570 SmallVectorImpl<SDValue> &InVals) const override;
571
572 SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
573 CallingConv::ID CallConv, bool isVarArg,
574 const SmallVectorImpl<CCValAssign> &RVLocs,
575 const SDLoc &DL, SelectionDAG &DAG,
576 SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
577 SDValue ThisVal, bool RequiresSMChange) const;
578
579 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
580 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
581 SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
582 SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
583
584 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
585 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
586
587 SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
588
589 SDValue LowerVECTOR_COMPRESS(SDValue Op, SelectionDAG &DAG) const;
590
591 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
592 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
593 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
594
595 bool
596 isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
597
598 /// Finds the incoming stack arguments which overlap the given fixed stack
599 /// object and incorporates their load into the current chain. This prevents
600 /// an upcoming store from clobbering the stack argument before it's used.
601 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
602 MachineFrameInfo &MFI, int ClobberedFI) const;
603
604 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
605
606 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
607 SDValue &Chain) const;
608
609 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
610 bool isVarArg,
611 const SmallVectorImpl<ISD::OutputArg> &Outs,
612 LLVMContext &Context, const Type *RetTy) const override;
613
614 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
615 const SmallVectorImpl<ISD::OutputArg> &Outs,
616 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
617 SelectionDAG &DAG) const override;
618
619 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
620 unsigned Flag) const;
621 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
622 unsigned Flag) const;
623 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
624 unsigned Flag) const;
625 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
626 unsigned Flag) const;
627 SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG,
628 unsigned Flag) const;
629 template <class NodeTy>
630 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
631 template <class NodeTy>
632 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
633 template <class NodeTy>
634 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
635 template <class NodeTy>
636 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
637 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
638 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
639 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
640 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
641 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
642 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
643 const SDLoc &DL, SelectionDAG &DAG) const;
644 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
645 SelectionDAG &DAG) const;
646 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
647 SDValue LowerPtrAuthGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
648 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
649 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
650 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
651 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
652 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
653 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
654 SDValue TVal, SDValue FVal,
655 iterator_range<SDNode::user_iterator> Users,
656 bool HasNoNans, const SDLoc &dl,
657 SelectionDAG &DAG) const;
658 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
659 SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
660 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
661 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
662 SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const;
663 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
664 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
665 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
666 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
667 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
668 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
669 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
670 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
671 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
672 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
673 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
674 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
675 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
676 SDValue LowerGET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
677 SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
678 SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
679 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
680 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
681 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
682 SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
683 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
684 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
685 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
686 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
687 unsigned NewOp) const;
688 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
689 SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
690 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
691 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
692 SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
693 SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
694 SDValue LowerVECTOR_HISTOGRAM(SDValue Op, SelectionDAG &DAG) const;
695 SDValue LowerPARTIAL_REDUCE_MLA(SDValue Op, SelectionDAG &DAG) const;
696 SDValue LowerGET_ACTIVE_LANE_MASK(SDValue Op, SelectionDAG &DAG) const;
697 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
698 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
699 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
700 SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
701 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
702 SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
703 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
704 SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
705 SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
706 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
707 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
708 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
709 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
710 SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
711 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
712 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
713 SDValue LowerVectorXRINT(SDValue Op, SelectionDAG &DAG) const;
714 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
715 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
716 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
717 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
718 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
719 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
720 SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
721 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
722 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
723 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
724 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
725 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
726 SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
727 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
728
729 SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
730
731 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
732 SelectionDAG &DAG) const;
733 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
734 SelectionDAG &DAG) const;
735 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
736 SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
737 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
738 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
739 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
740 SelectionDAG &DAG) const;
741 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
742 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
743 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
744 SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
745 SelectionDAG &DAG) const;
746 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
747 SelectionDAG &DAG) const;
748 SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
749 SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
750 SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
751 SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
752 SelectionDAG &DAG) const;
753 SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
754 SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
755 SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
756 SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
757 SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
758 SelectionDAG &DAG) const;
759 SDValue LowerFixedLengthBuildVectorToSVE(SDValue Op, SelectionDAG &DAG) const;
760
761 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
762 SmallVectorImpl<SDNode *> &Created) const override;
763 SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
764 SmallVectorImpl<SDNode *> &Created) const override;
765 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
766 int &ExtraSteps, bool &UseOneConst,
767 bool Reciprocal) const override;
768 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
769 int &ExtraSteps) const override;
770 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
771 const DenormalMode &Mode) const override;
772 SDValue getSqrtResultForDenormInput(SDValue Operand,
773 SelectionDAG &DAG) const override;
774 unsigned combineRepeatedFPDivisors() const override;
775
776 ConstraintType getConstraintType(StringRef Constraint) const override;
777 Register getRegisterByName(const char* RegName, LLT VT,
778 const MachineFunction &MF) const override;
779
780 /// Examine constraint string and operand type and determine a weight value.
781 /// The operand object must already have been set up with the operand type.
782 ConstraintWeight
783 getSingleConstraintMatchWeight(AsmOperandInfo &info,
784 const char *constraint) const override;
785
786 std::pair<unsigned, const TargetRegisterClass *>
787 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
788 StringRef Constraint, MVT VT) const override;
789
790 const char *LowerXConstraint(EVT ConstraintVT) const override;
791
792 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
793 std::vector<SDValue> &Ops,
794 SelectionDAG &DAG) const override;
795
796 InlineAsm::ConstraintCode
797 getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
798 if (ConstraintCode == "Q")
799 return InlineAsm::ConstraintCode::Q;
800 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
801 // followed by llvm_unreachable so we'll leave them unimplemented in
802 // the backend for now.
803 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
804 }
805
806 /// Handle Lowering flag assembly outputs.
807 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
808 const SDLoc &DL,
809 const AsmOperandInfo &Constraint,
810 SelectionDAG &DAG) const override;
811
812 bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
813 bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
814 bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
815 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
816 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
817 bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
818 SDValue &Offset, SelectionDAG &DAG) const;
819 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
820 ISD::MemIndexedMode &AM,
821 SelectionDAG &DAG) const override;
822 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
823 SDValue &Offset, ISD::MemIndexedMode &AM,
824 SelectionDAG &DAG) const override;
825 bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
826 bool IsPre, MachineRegisterInfo &MRI) const override;
827
828 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
829 SelectionDAG &DAG) const override;
830 void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
831 SelectionDAG &DAG) const;
832 void ReplaceExtractSubVectorResults(SDNode *N,
833 SmallVectorImpl<SDValue> &Results,
834 SelectionDAG &DAG) const;
835 void ReplaceGetActiveLaneMaskResults(SDNode *N,
836 SmallVectorImpl<SDValue> &Results,
837 SelectionDAG &DAG) const;
838
839 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
840
841 void finalizeLowering(MachineFunction &MF) const override;
842
843 bool shouldLocalize(const MachineInstr &MI,
844 const TargetTransformInfo *TTI) const override;
845
846 bool SimplifyDemandedBitsForTargetNode(SDValue Op,
847 const APInt &OriginalDemandedBits,
848 const APInt &OriginalDemandedElts,
849 KnownBits &Known,
850 TargetLoweringOpt &TLO,
851 unsigned Depth) const override;
852
853 bool isTargetCanonicalConstantNode(SDValue Op) const override;
854
855 // With the exception of data-predicate transitions, no instructions are
856 // required to cast between legal scalable vector types. However:
857 // 1. Packed and unpacked types have different bit lengths, meaning BITCAST
858 // is not universally useable.
859 // 2. Most unpacked integer types are not legal and thus integer extends
860 // cannot be used to convert between unpacked and packed types.
861 // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
862 // to transition between unpacked and packed types of the same element type,
863 // with BITCAST used otherwise.
864 // This function does not handle predicate bitcasts.
865 SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
866
867 // Returns the runtime value for PSTATE.SM by generating a call to
868 // __arm_sme_state.
869 SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
870 EVT VT) const;
871
872 bool preferScalarizeSplat(SDNode *N) const override;
873
874 unsigned getMinimumJumpTableEntries() const override;
875
876 bool softPromoteHalfType() const override { return true; }
877
878 bool shouldScalarizeBinop(SDValue VecOp) const override {
879 return VecOp.getOpcode() == ISD::SETCC;
880 }
881};
882
883namespace AArch64 {
884FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
885 const TargetLibraryInfo *libInfo);
886} // end namespace AArch64
887
888} // end namespace llvm
889
890#endif
891