AArch64ISelLowering.h source code [llvm_projects/llvm/lib/Target/AArch64/AArch64ISelLowering.h]

1	//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----- C++ --==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the interfaces that AArch64 uses to lower LLVM code into a
10	// selection DAG.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15	#define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16
17	#include "AArch64.h"
18	#include "Utils/AArch64SMEAttributes.h"
19	#include "llvm/CodeGen/CallingConvLower.h"
20	#include "llvm/CodeGen/MachineFunction.h"
21	#include "llvm/CodeGen/SelectionDAG.h"
22	#include "llvm/CodeGen/TargetLowering.h"
23	#include "llvm/IR/CallingConv.h"
24	#include "llvm/IR/Instruction.h"
25
26	namespace llvm {
27
28	namespace AArch64ISD {
29
30	// For predicated nodes where the result is a vector, the operation is
31	// controlled by a governing predicate and the inactive lanes are explicitly
32	// defined with a value, please stick the following naming convention:
33	//
34	// _MERGE_OP<n> The result value is a vector with inactive lanes equal
35	// to source operand OP<n>.
36	//
37	// _MERGE_ZERO The result value is a vector with inactive lanes
38	// actively zeroed.
39	//
40	// _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
41	// to the last source operand which only purpose is being
42	// a passthru value.
43	//
44	// For other cases where no explicit action is needed to set the inactive lanes,
45	// or when the result is not a vector and it is needed or helpful to
46	// distinguish a node from similar unpredicated nodes, use:
47	//
48	// _PRED
49	//
50	enum NodeType : unsigned {
51	FIRST_NUMBER = ISD::BUILTIN_OP_END,
52	WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
53	CALL, // Function call.
54
55	// Pseudo for a OBJC call that gets emitted together with a special `mov
56	// x29, x29` marker instruction.
57	CALL_RVMARKER,
58
59	CALL_BTI, // Function call followed by a BTI instruction.
60
61	// Function call, authenticating the callee value first:
62	// AUTH_CALL chain, callee, auth key #, int disc, addr disc, operands.
63	AUTH_CALL,
64	// AUTH_TC_RETURN chain, callee, fpdiff, auth key #, int disc, addr disc,
65	// operands.
66	AUTH_TC_RETURN,
67
68	// Authenticated variant of CALL_RVMARKER.
69	AUTH_CALL_RVMARKER,
70
71	COALESCER_BARRIER,
72
73	VG_SAVE,
74	VG_RESTORE,
75
76	SMSTART,
77	SMSTOP,
78	RESTORE_ZA,
79	RESTORE_ZT,
80	SAVE_ZT,
81
82	// A call with the callee in x16, i.e. "blr x16".
83	CALL_ARM64EC_TO_X64,
84
85	// Produces the full sequence of instructions for getting the thread pointer
86	// offset of a variable into X0, using the TLSDesc model.
87	TLSDESC_CALLSEQ,
88	ADRP, // Page address of a TargetGlobalAddress operand.
89	ADR, // ADR
90	ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
91	LOADgot, // Load from automatically generated descriptor (e.g. Global
92	// Offset Table, TLS record).
93	RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand.
94	BRCOND, // Conditional branch instruction; "b.cond".
95	CSEL,
96	CSINV, // Conditional select invert.
97	CSNEG, // Conditional select negate.
98	CSINC, // Conditional select increment.
99
100	// Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
101	// ELF.
102	THREAD_POINTER,
103	ADC,
104	SBC, // adc, sbc instructions
105
106	// To avoid stack clash, allocation is performed by block and each block is
107	// probed.
108	PROBED_ALLOCA,
109
110	// Predicated instructions where inactive lanes produce undefined results.
111	ABDS_PRED,
112	ABDU_PRED,
113	FADD_PRED,
114	FDIV_PRED,
115	FMA_PRED,
116	FMAX_PRED,
117	FMAXNM_PRED,
118	FMIN_PRED,
119	FMINNM_PRED,
120	FMUL_PRED,
121	FSUB_PRED,
122	HADDS_PRED,
123	HADDU_PRED,
124	MUL_PRED,
125	MULHS_PRED,
126	MULHU_PRED,
127	RHADDS_PRED,
128	RHADDU_PRED,
129	SDIV_PRED,
130	SHL_PRED,
131	SMAX_PRED,
132	SMIN_PRED,
133	SRA_PRED,
134	SRL_PRED,
135	UDIV_PRED,
136	UMAX_PRED,
137	UMIN_PRED,
138
139	// Unpredicated vector instructions
140	BIC,
141
142	SRAD_MERGE_OP1,
143
144	// Predicated instructions with the result of inactive lanes provided by the
145	// last operand.
146	FABS_MERGE_PASSTHRU,
147	FCEIL_MERGE_PASSTHRU,
148	FFLOOR_MERGE_PASSTHRU,
149	FNEARBYINT_MERGE_PASSTHRU,
150	FNEG_MERGE_PASSTHRU,
151	FRECPX_MERGE_PASSTHRU,
152	FRINT_MERGE_PASSTHRU,
153	FROUND_MERGE_PASSTHRU,
154	FROUNDEVEN_MERGE_PASSTHRU,
155	FSQRT_MERGE_PASSTHRU,
156	FTRUNC_MERGE_PASSTHRU,
157	FP_ROUND_MERGE_PASSTHRU,
158	FP_EXTEND_MERGE_PASSTHRU,
159	UINT_TO_FP_MERGE_PASSTHRU,
160	SINT_TO_FP_MERGE_PASSTHRU,
161	FCVTZU_MERGE_PASSTHRU,
162	FCVTZS_MERGE_PASSTHRU,
163	SIGN_EXTEND_INREG_MERGE_PASSTHRU,
164	ZERO_EXTEND_INREG_MERGE_PASSTHRU,
165	ABS_MERGE_PASSTHRU,
166	NEG_MERGE_PASSTHRU,
167
168	SETCC_MERGE_ZERO,
169
170	// Arithmetic instructions which write flags.
171	ADDS,
172	SUBS,
173	ADCS,
174	SBCS,
175	ANDS,
176
177	// Conditional compares. Operands: left,right,falsecc,cc,flags
178	CCMP,
179	CCMN,
180	FCCMP,
181
182	// Floating point comparison
183	FCMP,
184
185	// Scalar-to-vector duplication
186	DUP,
187	DUPLANE8,
188	DUPLANE16,
189	DUPLANE32,
190	DUPLANE64,
191	DUPLANE128,
192
193	// Vector immedate moves
194	MOVI,
195	MOVIshift,
196	MOVIedit,
197	MOVImsl,
198	FMOV,
199	MVNIshift,
200	MVNImsl,
201
202	// Vector immediate ops
203	BICi,
204	ORRi,
205
206	// Vector bitwise select: similar to ISD::VSELECT but not all bits within an
207	// element must be identical.
208	BSP,
209
210	// Vector shuffles
211	ZIP1,
212	ZIP2,
213	UZP1,
214	UZP2,
215	TRN1,
216	TRN2,
217	REV16,
218	REV32,
219	REV64,
220	EXT,
221	SPLICE,
222
223	// Vector shift by scalar
224	VSHL,
225	VLSHR,
226	VASHR,
227
228	// Vector shift by scalar (again)
229	SQSHL_I,
230	UQSHL_I,
231	SQSHLU_I,
232	SRSHR_I,
233	URSHR_I,
234	URSHR_I_PRED,
235
236	// Vector narrowing shift by immediate (bottom)
237	RSHRNB_I,
238
239	// Vector shift by constant and insert
240	VSLI,
241	VSRI,
242
243	// Vector comparisons
244	CMEQ,
245	CMGE,
246	CMGT,
247	CMHI,
248	CMHS,
249	FCMEQ,
250	FCMGE,
251	FCMGT,
252
253	// Vector zero comparisons
254	CMEQz,
255	CMGEz,
256	CMGTz,
257	CMLEz,
258	CMLTz,
259	FCMEQz,
260	FCMGEz,
261	FCMGTz,
262	FCMLEz,
263	FCMLTz,
264
265	// Round wide FP to narrow FP with inexact results to odd.
266	FCVTXN,
267
268	// Vector across-lanes addition
269	// Only the lower result lane is defined.
270	SADDV,
271	UADDV,
272
273	// Unsigned sum Long across Vector
274	UADDLV,
275	SADDLV,
276
277	// Add Pairwise of two vectors
278	ADDP,
279	// Add Long Pairwise
280	SADDLP,
281	UADDLP,
282
283	// udot/sdot instructions
284	UDOT,
285	SDOT,
286
287	// Vector across-lanes min/max
288	// Only the lower result lane is defined.
289	SMINV,
290	UMINV,
291	SMAXV,
292	UMAXV,
293
294	SADDV_PRED,
295	UADDV_PRED,
296	SMAXV_PRED,
297	UMAXV_PRED,
298	SMINV_PRED,
299	UMINV_PRED,
300	ORV_PRED,
301	EORV_PRED,
302	ANDV_PRED,
303
304	// Compare-and-branch
305	CBZ,
306	CBNZ,
307	TBZ,
308	TBNZ,
309
310	// Tail calls
311	TC_RETURN,
312
313	// Custom prefetch handling
314	PREFETCH,
315
316	// {s\|u}int to FP within a FP register.
317	SITOF,
318	UITOF,
319
320	/// Natural vector cast. ISD::BITCAST is not natural in the big-endian
321	/// world w.r.t vectors; which causes additional REV instructions to be
322	/// generated to compensate for the byte-swapping. But sometimes we do
323	/// need to re-interpret the data in SIMD vector registers in big-endian
324	/// mode without emitting such REV instructions.
325	NVCAST,
326
327	MRS, // MRS, also sets the flags via a glue.
328
329	SMULL,
330	UMULL,
331
332	PMULL,
333
334	// Reciprocal estimates and steps.
335	FRECPE,
336	FRECPS,
337	FRSQRTE,
338	FRSQRTS,
339
340	SUNPKHI,
341	SUNPKLO,
342	UUNPKHI,
343	UUNPKLO,
344
345	CLASTA_N,
346	CLASTB_N,
347	LASTA,
348	LASTB,
349	TBL,
350
351	// Floating-point reductions.
352	FADDA_PRED,
353	FADDV_PRED,
354	FMAXV_PRED,
355	FMAXNMV_PRED,
356	FMINV_PRED,
357	FMINNMV_PRED,
358
359	INSR,
360	PTEST,
361	PTEST_ANY,
362	PTRUE,
363
364	CTTZ_ELTS,
365
366	BITREVERSE_MERGE_PASSTHRU,
367	BSWAP_MERGE_PASSTHRU,
368	REVH_MERGE_PASSTHRU,
369	REVW_MERGE_PASSTHRU,
370	CTLZ_MERGE_PASSTHRU,
371	CTPOP_MERGE_PASSTHRU,
372	DUP_MERGE_PASSTHRU,
373	INDEX_VECTOR,
374
375	// Cast between vectors of the same element type but differ in length.
376	REINTERPRET_CAST,
377
378	// Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
379	LS64_BUILD,
380	LS64_EXTRACT,
381
382	LD1_MERGE_ZERO,
383	LD1S_MERGE_ZERO,
384	LDNF1_MERGE_ZERO,
385	LDNF1S_MERGE_ZERO,
386	LDFF1_MERGE_ZERO,
387	LDFF1S_MERGE_ZERO,
388	LD1RQ_MERGE_ZERO,
389	LD1RO_MERGE_ZERO,
390
391	// Structured loads.
392	SVE_LD2_MERGE_ZERO,
393	SVE_LD3_MERGE_ZERO,
394	SVE_LD4_MERGE_ZERO,
395
396	// Unsigned gather loads.
397	GLD1_MERGE_ZERO,
398	GLD1_SCALED_MERGE_ZERO,
399	GLD1_UXTW_MERGE_ZERO,
400	GLD1_SXTW_MERGE_ZERO,
401	GLD1_UXTW_SCALED_MERGE_ZERO,
402	GLD1_SXTW_SCALED_MERGE_ZERO,
403	GLD1_IMM_MERGE_ZERO,
404	GLD1Q_MERGE_ZERO,
405	GLD1Q_INDEX_MERGE_ZERO,
406
407	// Signed gather loads
408	GLD1S_MERGE_ZERO,
409	GLD1S_SCALED_MERGE_ZERO,
410	GLD1S_UXTW_MERGE_ZERO,
411	GLD1S_SXTW_MERGE_ZERO,
412	GLD1S_UXTW_SCALED_MERGE_ZERO,
413	GLD1S_SXTW_SCALED_MERGE_ZERO,
414	GLD1S_IMM_MERGE_ZERO,
415
416	// Unsigned gather loads.
417	GLDFF1_MERGE_ZERO,
418	GLDFF1_SCALED_MERGE_ZERO,
419	GLDFF1_UXTW_MERGE_ZERO,
420	GLDFF1_SXTW_MERGE_ZERO,
421	GLDFF1_UXTW_SCALED_MERGE_ZERO,
422	GLDFF1_SXTW_SCALED_MERGE_ZERO,
423	GLDFF1_IMM_MERGE_ZERO,
424
425	// Signed gather loads.
426	GLDFF1S_MERGE_ZERO,
427	GLDFF1S_SCALED_MERGE_ZERO,
428	GLDFF1S_UXTW_MERGE_ZERO,
429	GLDFF1S_SXTW_MERGE_ZERO,
430	GLDFF1S_UXTW_SCALED_MERGE_ZERO,
431	GLDFF1S_SXTW_SCALED_MERGE_ZERO,
432	GLDFF1S_IMM_MERGE_ZERO,
433
434	// Non-temporal gather loads
435	GLDNT1_MERGE_ZERO,
436	GLDNT1_INDEX_MERGE_ZERO,
437	GLDNT1S_MERGE_ZERO,
438
439	// Contiguous masked store.
440	ST1_PRED,
441
442	// Scatter store
443	SST1_PRED,
444	SST1_SCALED_PRED,
445	SST1_UXTW_PRED,
446	SST1_SXTW_PRED,
447	SST1_UXTW_SCALED_PRED,
448	SST1_SXTW_SCALED_PRED,
449	SST1_IMM_PRED,
450	SST1Q_PRED,
451	SST1Q_INDEX_PRED,
452
453	// Non-temporal scatter store
454	SSTNT1_PRED,
455	SSTNT1_INDEX_PRED,
456
457	// SME
458	RDSVL,
459	REVD_MERGE_PASSTHRU,
460	ALLOCATE_ZA_BUFFER,
461	INIT_TPIDR2OBJ,
462
463	// Asserts that a function argument (i32) is zero-extended to i8 by
464	// the caller
465	ASSERT_ZEXT_BOOL,
466
467	// 128-bit system register accesses
468	// lo64, hi64, chain = MRRS(chain, sysregname)
469	MRRS,
470	// chain = MSRR(chain, sysregname, lo64, hi64)
471	MSRR,
472
473	// Strict (exception-raising) floating point comparison
474	STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
475	STRICT_FCMPE,
476
477	// SME ZA loads and stores
478	SME_ZA_LDR,
479	SME_ZA_STR,
480
481	// NEON Load/Store with post-increment base updates
482	LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
483	LD3post,
484	LD4post,
485	ST2post,
486	ST3post,
487	ST4post,
488	LD1x2post,
489	LD1x3post,
490	LD1x4post,
491	ST1x2post,
492	ST1x3post,
493	ST1x4post,
494	LD1DUPpost,
495	LD2DUPpost,
496	LD3DUPpost,
497	LD4DUPpost,
498	LD1LANEpost,
499	LD2LANEpost,
500	LD3LANEpost,
501	LD4LANEpost,
502	ST2LANEpost,
503	ST3LANEpost,
504	ST4LANEpost,
505
506	STG,
507	STZG,
508	ST2G,
509	STZ2G,
510
511	LDP,
512	LDIAPP,
513	LDNP,
514	STP,
515	STILP,
516	STNP,
517
518	// Memory Operations
519	MOPS_MEMSET,
520	MOPS_MEMSET_TAGGING,
521	MOPS_MEMCOPY,
522	MOPS_MEMMOVE,
523	};
524
525	} // end namespace AArch64ISD
526
527	namespace AArch64 {
528	/// Possible values of current rounding mode, which is specified in bits
529	/// 23:22 of FPCR.
530	enum Rounding {
531	RN = `0`, // Round to Nearest
532	RP = `1`, // Round towards Plus infinity
533	RM = `2`, // Round towards Minus infinity
534	RZ = `3`, // Round towards Zero
535	rmMask = `3` // Bit mask selecting rounding mode
536	};
537
538	// Bit position of rounding mode bits in FPCR.
539	const unsigned RoundingBitsPos = `22`;
540
541	// Reserved bits should be preserved when modifying FPCR.
542	const uint64_t ReservedFPControlBits = `0xfffffffff80040f8`;
543
544	// Registers used to pass function arguments.
545	ArrayRef<MCPhysReg> getGPRArgRegs();
546	ArrayRef<MCPhysReg> getFPRArgRegs();
547
548	/// Maximum allowed number of unprobed bytes above SP at an ABI
549	/// boundary.
550	const unsigned StackProbeMaxUnprobedStack = `1024`;
551
552	/// Maximum number of iterations to unroll for a constant size probing loop.
553	const unsigned StackProbeMaxLoopUnroll = `4`;
554
555	} // namespace AArch64
556
557	class AArch64Subtarget;
558
559	class AArch64TargetLowering : public TargetLowering {
560	public:
561	explicit AArch64TargetLowering(const TargetMachine &TM,
562	const AArch64Subtarget &STI);
563
564	/// Control the following reassociation of operands: (op (op x, c1), y) -> (op
565	/// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
566	bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
567	SDValue N1) const override;
568
569	/// Selects the correct CCAssignFn for a given CallingConvention value.
570	CCAssignFn CCAssignFnForCall(CallingConv::ID CC, bool* IsVarArg) const;
571
572	/// Selects the correct CCAssignFn for a given CallingConvention value.
573	CCAssignFn CCAssignFnForReturn(CallingConv::ID CC) const*;
574
575	/// Determine which of the bits specified in Mask are known to be either zero
576	/// or one and return them in the KnownZero/KnownOne bitsets.
577	void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
578	const APInt &DemandedElts,
579	const SelectionDAG &DAG,
580	unsigned Depth = `0`) const override;
581
582	unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
583	const APInt &DemandedElts,
584	const SelectionDAG &DAG,
585	unsigned Depth) const override;
586
587	MVT getPointerTy(const DataLayout &DL, uint32_t AS = `0`) const override {
588	// Returning i64 unconditionally here (i.e. even for ILP32) means that the
589	// DAG* representation of pointers will always be 64-bits. They will be*
590	// truncated and extended when transferred to memory, but the 64-bit DAG
591	// allows us to use AArch64's addressing modes much more easily.
592	return MVT::getIntegerVT(BitWidth: `64`);
593	}
594
595	bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
596	const APInt &DemandedElts,
597	TargetLoweringOpt &TLO) const override;
598
599	MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
600
601	/// Returns true if the target allows unaligned memory accesses of the
602	/// specified type.
603	bool allowsMisalignedMemoryAccesses(
604	EVT VT, unsigned AddrSpace = `0`, Align Alignment = Align (`1`),
605	MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
606	unsigned Fast = nullptr) const* override;
607	/// LLT variant.
608	bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
609	Align Alignment,
610	MachineMemOperand::Flags Flags,
611	unsigned Fast = nullptr) const* override;
612
613	/// Provide custom lowering hooks for some operations.
614	SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
615
616	const char getTargetNodeName(unsigned* Opcode) const override;
617
618	SDValue PerformDAGCombine(SDNode N, DAGCombinerInfo &DCI) const* override;
619
620	/// This method returns a target specific FastISel object, or null if the
621	/// target does not support "fast" ISel.
622	FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
623	const TargetLibraryInfo libInfo) const* override;
624
625	bool isOffsetFoldingLegal(const GlobalAddressSDNode GA) const* override;
626
627	bool isFPImmLegal(const APFloat &Imm, EVT VT,
628	bool ForCodeSize) const override;
629
630	/// Return true if the given shuffle mask can be codegen'd directly, or if it
631	/// should be stack expanded.
632	bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
633
634	/// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
635	/// shuffle mask can be codegen'd directly.
636	bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;
637
638	/// Return the ISD::SETCC ValueType.
639	EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
640	EVT VT) const override;
641
642	SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
643
644	MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
645	MachineBasicBlock BB) const*;
646
647	MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
648	MachineBasicBlock BB) const*;
649
650	MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI,
651	MachineBasicBlock MBB) const*;
652
653	MachineBasicBlock EmitTileLoad(unsigned* Opc, unsigned BaseReg,
654	MachineInstr &MI,
655	MachineBasicBlock BB) const*;
656	MachineBasicBlock EmitFill(MachineInstr &MI, MachineBasicBlock BB) const;
657	MachineBasicBlock EmitZAInstr(unsigned* Opc, unsigned BaseReg,
658	MachineInstr &MI, MachineBasicBlock BB) const*;
659	MachineBasicBlock EmitZTInstr(MachineInstr &MI, MachineBasicBlock BB,
660	unsigned Opcode, bool Op0IsDef) const;
661	MachineBasicBlock EmitZero(MachineInstr &MI, MachineBasicBlock BB) const;
662	MachineBasicBlock *EmitInitTPIDR2Object(MachineInstr &MI,
663	MachineBasicBlock BB) const*;
664	MachineBasicBlock *EmitAllocateZABuffer(MachineInstr &MI,
665	MachineBasicBlock BB) const*;
666
667	MachineBasicBlock *
668	EmitInstrWithCustomInserter(MachineInstr &MI,
669	MachineBasicBlock MBB) const* override;
670
671	bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
672	MachineFunction &MF,
673	unsigned Intrinsic) const override;
674
675	bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
676	EVT NewVT) const override;
677
678	bool shouldRemoveRedundantExtend(SDValue Op) const override;
679
680	bool isTruncateFree(Type Ty1, Type Ty2) const override;
681	bool isTruncateFree(EVT VT1, EVT VT2) const override;
682
683	bool isProfitableToHoist(Instruction I) const* override;
684
685	bool isZExtFree(Type Ty1, Type Ty2) const override;
686	bool isZExtFree(EVT VT1, EVT VT2) const override;
687	bool isZExtFree(SDValue Val, EVT VT2) const override;
688
689	bool shouldSinkOperands(Instruction *I,
690	SmallVectorImpl<Use > &Ops) const* override;
691
692	bool optimizeExtendOrTruncateConversion(
693	Instruction I, Loop L, const TargetTransformInfo &TTI) const override;
694
695	bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
696
697	unsigned getMaxSupportedInterleaveFactor() const override { return `4`; }
698
699	bool lowerInterleavedLoad(LoadInst *LI,
700	ArrayRef<ShuffleVectorInst *> Shuffles,
701	ArrayRef<unsigned> Indices,
702	unsigned Factor) const override;
703	bool lowerInterleavedStore(StoreInst SI, ShuffleVectorInst SVI,
704	unsigned Factor) const override;
705
706	bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
707	LoadInst LI) const* override;
708
709	bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
710	StoreInst SI) const* override;
711
712	bool isLegalAddImmediate(int64_t) const override;
713	bool isLegalAddScalableImmediate(int64_t) const override;
714	bool isLegalICmpImmediate(int64_t) const override;
715
716	bool isMulAddWithConstProfitable(SDValue AddNode,
717	SDValue ConstNode) const override;
718
719	bool shouldConsiderGEPOffsetSplit() const override;
720
721	EVT getOptimalMemOpType(const MemOp &Op,
722	const AttributeList &FuncAttributes) const override;
723
724	LLT getOptimalMemOpLLT(const MemOp &Op,
725	const AttributeList &FuncAttributes) const override;
726
727	/// Return true if the addressing mode represented by AM is legal for this
728	/// target, for a load/store of the specified type.
729	bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
730	unsigned AS,
731	Instruction I = nullptr) const* override;
732
733	int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
734	int64_t MaxOffset) const override;
735
736	/// Return true if an FMA operation is faster than a pair of fmul and fadd
737	/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
738	/// returns true, otherwise fmuladd is expanded to fmul + fadd.
739	bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
740	EVT VT) const override;
741	bool isFMAFasterThanFMulAndFAdd(const Function &F, Type Ty) const* override;
742
743	bool generateFMAsInMachineCombiner(EVT VT,
744	CodeGenOptLevel OptLevel) const override;
745
746	const MCPhysReg getScratchRegisters(CallingConv::ID CC) const* override;
747	ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
748
749	/// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
750	bool isDesirableToCommuteWithShift(const SDNode *N,
751	CombineLevel Level) const override;
752
753	bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override {
754	return false;
755	}
756
757	/// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
758	bool isDesirableToCommuteXorWithShift(const SDNode N) const* override;
759
760	/// Return true if it is profitable to fold a pair of shifts into a mask.
761	bool shouldFoldConstantShiftPairToMask(const SDNode *N,
762	CombineLevel Level) const override;
763
764	bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
765	EVT VT) const override;
766
767	/// Returns true if it is beneficial to convert a load of a constant
768	/// to just the constant itself.
769	bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
770	Type Ty) const* override;
771
772	/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
773	/// with this index.
774	bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
775	unsigned Index) const override;
776
777	bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
778	bool MathUsed) const override {
779	// Using overflow ops for overflow checks only should beneficial on
780	// AArch64.
781	return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed: true);
782	}
783
784	Value emitLoadLinked(IRBuilderBase &Builder, Type ValueTy, Value *Addr,
785	AtomicOrdering Ord) const override;
786	Value emitStoreConditional(IRBuilderBase &Builder, Value Val, Value *Addr,
787	AtomicOrdering Ord) const override;
788
789	void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
790
791	bool isOpSuitableForLDPSTP(const Instruction I) const*;
792	bool isOpSuitableForLSE128(const Instruction I) const*;
793	bool isOpSuitableForRCPC3(const Instruction I) const*;
794	bool shouldInsertFencesForAtomic(const Instruction I) const* override;
795	bool
796	shouldInsertTrailingFenceForAtomicStore(const Instruction I) const* override;
797
798	TargetLoweringBase::AtomicExpansionKind
799	shouldExpandAtomicLoadInIR(LoadInst LI) const* override;
800	TargetLoweringBase::AtomicExpansionKind
801	shouldExpandAtomicStoreInIR(StoreInst SI) const* override;
802	TargetLoweringBase::AtomicExpansionKind
803	shouldExpandAtomicRMWInIR(AtomicRMWInst AI) const* override;
804
805	TargetLoweringBase::AtomicExpansionKind
806	shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst AI) const* override;
807
808	bool useLoadStackGuardNode() const override;
809	TargetLoweringBase::LegalizeTypeAction
810	getPreferredVectorAction(MVT VT) const override;
811
812	/// If the target has a standard location for the stack protector cookie,
813	/// returns the address of that location. Otherwise, returns nullptr.
814	Value getIRStackGuard(IRBuilderBase &IRB) const* override;
815
816	void insertSSPDeclarations(Module &M) const override;
817	Value getSDagStackGuard(const* Module &M) const override;
818	Function getSSPStackGuardCheck(const* Module &M) const override;
819
820	/// If the target has a standard location for the unsafe stack pointer,
821	/// returns the address of that location. Otherwise, returns nullptr.
822	Value getSafeStackPointerLocation(IRBuilderBase &IRB) const* override;
823
824	/// If a physical register, this returns the register that receives the
825	/// exception address on entry to an EH pad.
826	Register
827	getExceptionPointerRegister(const Constant PersonalityFn) const* override {
828	// FIXME: This is a guess. Has this been defined yet?
829	return AArch64::X0;
830	}
831
832	/// If a physical register, this returns the register that receives the
833	/// exception typeid on entry to a landing pad.
834	Register
835	getExceptionSelectorRegister(const Constant PersonalityFn) const* override {
836	// FIXME: This is a guess. Has this been defined yet?
837	return AArch64::X1;
838	}
839
840	bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
841
842	bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
843	const MachineFunction &MF) const override {
844	// Do not merge to float value size (128 bytes) if no implicit
845	// float attribute is set.
846
847	bool NoFloat = MF.getFunction().hasFnAttribute(Kind: Attribute::NoImplicitFloat);
848
849	if (NoFloat)
850	return (MemVT.getSizeInBits() <= `64`);
851	return true;
852	}
853
854	bool isCheapToSpeculateCttz(Type ) const* override {
855	return true;
856	}
857
858	bool isCheapToSpeculateCtlz(Type ) const* override {
859	return true;
860	}
861
862	bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
863
864	bool hasAndNotCompare(SDValue V) const override {
865	// We can use bics for any scalar.
866	return V.getValueType().isScalarInteger();
867	}
868
869	bool hasAndNot(SDValue Y) const override {
870	EVT VT = Y.getValueType();
871
872	if (!VT.isVector())
873	return hasAndNotCompare(V: Y);
874
875	TypeSize TS = VT.getSizeInBits();
876	// TODO: We should be able to use bic/bif too for SVE.
877	return !TS.isScalable() && TS.getFixedValue() >= `64`; // vector 'bic'
878	}
879
880	bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
881	SDValue X, ConstantSDNode XC, ConstantSDNode CC, SDValue Y,
882	unsigned OldShiftOpcode, unsigned NewShiftOpcode,
883	SelectionDAG &DAG) const override;
884
885	ShiftLegalizationStrategy
886	preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
887	unsigned ExpansionFactor) const override;
888
889	bool shouldTransformSignedTruncationCheck(EVT XVT,
890	unsigned KeptBits) const override {
891	// For vectors, we don't have a preference..
892	if (XVT.isVector())
893	return false;
894
895	auto VTIsOk = [](EVT VT) -> bool {
896	return VT == MVT::i8 \|\| VT == MVT::i16 \|\| VT == MVT::i32 \|\|
897	VT == MVT::i64;
898	};
899
900	// We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
901	// XVT will be larger than KeptBitsVT.
902	MVT KeptBitsVT = MVT::getIntegerVT(BitWidth: KeptBits);
903	return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
904	}
905
906	bool preferIncOfAddToSubOfNot(EVT VT) const override;
907
908	bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
909
910	bool shouldExpandCmpUsingSelects() const override { return true; }
911
912	bool isComplexDeinterleavingSupported() const override;
913	bool isComplexDeinterleavingOperationSupported(
914	ComplexDeinterleavingOperation Operation, Type Ty) const* override;
915
916	Value *createComplexDeinterleavingIR(
917	IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
918	ComplexDeinterleavingRotation Rotation, Value InputA, Value InputB,
919	Value Accumulator = nullptr) const* override;
920
921	bool supportSplitCSR(MachineFunction MF) const* override {
922	return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
923	MF->getFunction().hasFnAttribute(Kind: Attribute::NoUnwind);
924	}
925	void initializeSplitCSR(MachineBasicBlock Entry) const* override;
926	void insertCopiesSplitCSR(
927	MachineBasicBlock *Entry,
928	const SmallVectorImpl<MachineBasicBlock > &Exits) const* override;
929
930	bool supportSwiftError() const override {
931	return true;
932	}
933
934	bool supportPtrAuthBundles() const override { return true; }
935
936	bool supportKCFIBundles() const override { return true; }
937
938	MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
939	MachineBasicBlock::instr_iterator &MBBI,
940	const TargetInstrInfo TII) const* override;
941
942	/// Enable aggressive FMA fusion on targets that want it.
943	bool enableAggressiveFMAFusion(EVT VT) const override;
944
945	/// Returns the size of the platform's va_list object.
946	unsigned getVaListSizeInBits(const DataLayout &DL) const override;
947
948	/// Returns true if \p VecTy is a legal interleaved access type. This
949	/// function checks the vector element type and the overall width of the
950	/// vector.
951	bool isLegalInterleavedAccessType(VectorType VecTy, const* DataLayout &DL,
952	bool &UseScalable) const;
953
954	/// Returns the number of interleaved accesses that will be generated when
955	/// lowering accesses of the given type.
956	unsigned getNumInterleavedAccesses(VectorType VecTy, const* DataLayout &DL,
957	bool UseScalable) const;
958
959	MachineMemOperand::Flags getTargetMMOFlags(
960	const Instruction &I) const override;
961
962	bool functionArgumentNeedsConsecutiveRegisters(
963	Type Ty, CallingConv::ID CallConv, bool* isVarArg,
964	const DataLayout &DL) const override;
965
966	/// Used for exception handling on Win64.
967	bool needsFixedCatchObjects() const override;
968
969	bool fallBackToDAGISel(const Instruction &Inst) const override;
970
971	/// SVE code generation for fixed length vectors does not custom lower
972	/// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
973	/// merge. However, merging them creates a BUILD_VECTOR that is just as
974	/// illegal as the original, thus leading to an infinite legalisation loop.
975	/// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
976	/// vector types this override can be removed.
977	bool mergeStoresAfterLegalization(EVT VT) const override;
978
979	// If the platform/function should have a redzone, return the size in bytes.
980	unsigned getRedZoneSize(const Function &F) const {
981	if (F.hasFnAttribute(Kind: Attribute::NoRedZone))
982	return `0`;
983	return `128`;
984	}
985
986	bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
987	EVT getPromotedVTForPredicate(EVT VT) const;
988
989	EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
990	bool AllowUnknown = false) const override;
991
992	bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
993
994	bool shouldExpandCttzElements(EVT VT) const override;
995
996	/// If a change in streaming mode is required on entry to/return from a
997	/// function call it emits and returns the corresponding SMSTART or SMSTOP
998	/// node. \p Condition should be one of the enum values from
999	/// AArch64SME::ToggleCondition.
1000	SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
1001	SDValue Chain, SDValue InGlue, unsigned Condition,
1002	SDValue PStateSM = SDValue ()) const;
1003
1004	bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
1005
1006	// Normally SVE is only used for byte size vectors that do not fit within a
1007	// NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1008	// used for 64bit and 128bit vectors as well.
1009	bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1010
1011	// Follow NEON ABI rules even when using SVE for fixed length vectors.
1012	MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1013	EVT VT) const override;
1014	unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1015	CallingConv::ID CC,
1016	EVT VT) const override;
1017	unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
1018	CallingConv::ID CC, EVT VT,
1019	EVT &IntermediateVT,
1020	unsigned &NumIntermediates,
1021	MVT &RegisterVT) const override;
1022
1023	/// True if stack clash protection is enabled for this functions.
1024	bool hasInlineStackProbe(const MachineFunction &MF) const override;
1025
1026	#ifndef NDEBUG
1027	void verifyTargetSDNode(const SDNode N) const* override;
1028	#endif
1029
1030	private:
1031	/// Keep a pointer to the AArch64Subtarget around so that we can
1032	/// make the right decision when generating code for different targets.
1033	const AArch64Subtarget *Subtarget;
1034
1035	llvm::BumpPtrAllocator BumpAlloc;
1036	llvm::StringSaver Saver{BumpAlloc};
1037
1038	bool isExtFreeImpl(const Instruction Ext) const* override;
1039
1040	void addTypeForNEON(MVT VT);
1041	void addTypeForFixedLengthSVE(MVT VT);
1042	void addDRType(MVT VT);
1043	void addQRType(MVT VT);
1044
1045	bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override;
1046
1047	SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
1048	bool isVarArg,
1049	const SmallVectorImpl<ISD::InputArg> &Ins,
1050	const SDLoc &DL, SelectionDAG &DAG,
1051	SmallVectorImpl<SDValue> &InVals) const override;
1052
1053	void AdjustInstrPostInstrSelection(MachineInstr &MI,
1054	SDNode Node) const* override;
1055
1056	SDValue LowerCall(CallLoweringInfo & /CLI/,
1057	SmallVectorImpl<SDValue> &InVals) const override;
1058
1059	SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1060	CallingConv::ID CallConv, bool isVarArg,
1061	const SmallVectorImpl<CCValAssign> &RVLocs,
1062	const SDLoc &DL, SelectionDAG &DAG,
1063	SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1064	SDValue ThisVal, bool RequiresSMChange) const;
1065
1066	SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
1067	SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
1068	SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
1069	SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
1070
1071	SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
1072	SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
1073
1074	SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
1075
1076	SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1077	SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1078	SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
1079
1080	bool
1081	isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
1082
1083	/// Finds the incoming stack arguments which overlap the given fixed stack
1084	/// object and incorporates their load into the current chain. This prevents
1085	/// an upcoming store from clobbering the stack argument before it's used.
1086	SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
1087	MachineFrameInfo &MFI, int ClobberedFI) const;
1088
1089	bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
1090
1091	void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
1092	SDValue &Chain) const;
1093
1094	bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1095	bool isVarArg,
1096	const SmallVectorImpl<ISD::OutputArg> &Outs,
1097	LLVMContext &Context) const override;
1098
1099	SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1100	const SmallVectorImpl<ISD::OutputArg> &Outs,
1101	const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1102	SelectionDAG &DAG) const override;
1103
1104	SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1105	unsigned Flag) const;
1106	SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
1107	unsigned Flag) const;
1108	SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
1109	unsigned Flag) const;
1110	SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1111	unsigned Flag) const;
1112	SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG,
1113	unsigned Flag) const;
1114	template <class NodeTy>
1115	SDValue getGOT(NodeTy N, SelectionDAG &DAG, unsigned* Flags = `0`) const;
1116	template <class NodeTy>
1117	SDValue getAddrLarge(NodeTy N, SelectionDAG &DAG, unsigned* Flags = `0`) const;
1118	template <class NodeTy>
1119	SDValue getAddr(NodeTy N, SelectionDAG &DAG, unsigned* Flags = `0`) const;
1120	template <class NodeTy>
1121	SDValue getAddrTiny(NodeTy N, SelectionDAG &DAG, unsigned* Flags = `0`) const;
1122	SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1123	SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1124	SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1125	SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1126	SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1127	SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
1128	const SDLoc &DL, SelectionDAG &DAG) const;
1129	SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
1130	SelectionDAG &DAG) const;
1131	SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1132	SDValue LowerPtrAuthGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1133	SDValue LowerPtrAuthGlobalAddressStatically(SDValue TGA, SDLoc DL, EVT VT,
1134	AArch64PACKey::ID Key,
1135	SDValue Discriminator,
1136	SDValue AddrDiscriminator,
1137	SelectionDAG &DAG) const;
1138	SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1139	SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1140	SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
1141	SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1142	SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
1143	SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
1144	SDValue TVal, SDValue FVal, const SDLoc &dl,
1145	SelectionDAG &DAG) const;
1146	SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1147	SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1148	SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1149	SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
1150	SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const;
1151	SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1152	SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1153	SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
1154	SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
1155	SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
1156	SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1157	SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
1158	SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1159	SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1160	SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
1161	SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1162	SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1163	SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1164	SDValue LowerGET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1165	SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1166	SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1167	SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1168	SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1169	SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1170	SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
1171	SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
1172	SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1173	SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
1174	SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
1175	unsigned NewOp) const;
1176	SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
1177	SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
1178	SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1179	SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1180	SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1181	SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1182	SDValue LowerVECTOR_HISTOGRAM(SDValue Op, SelectionDAG &DAG) const;
1183	SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
1184	SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
1185	SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
1186	SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
1187	SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
1188	SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
1189	SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
1190	SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
1191	SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
1192	SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
1193	SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1194	SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1195	SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1196	SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1197	SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1198	SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1199	SDValue LowerVectorXRINT(SDValue Op, SelectionDAG &DAG) const;
1200	SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1201	SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1202	SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
1203	SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
1204	SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
1205	SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
1206	SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1207	SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
1208	SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1209	SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
1210	SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
1211	SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1212	SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1213	SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1214
1215	SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
1216
1217	SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
1218	SelectionDAG &DAG) const;
1219	SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
1220	SelectionDAG &DAG) const;
1221	SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1222	SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1223	SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1224	SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1225	SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1226	SelectionDAG &DAG) const;
1227	SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1228	SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1229	SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1230	SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1231	SelectionDAG &DAG) const;
1232	SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1233	SelectionDAG &DAG) const;
1234	SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1235	SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1236	SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1237	SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1238	SelectionDAG &DAG) const;
1239	SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1240	SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1241	SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1242	SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1243	SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1244	SelectionDAG &DAG) const;
1245
1246	SDValue BuildSDIVPow2(SDNode N, const* APInt &Divisor, SelectionDAG &DAG,
1247	SmallVectorImpl<SDNode > &Created) const* override;
1248	SDValue BuildSREMPow2(SDNode N, const* APInt &Divisor, SelectionDAG &DAG,
1249	SmallVectorImpl<SDNode > &Created) const* override;
1250	SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1251	int &ExtraSteps, bool &UseOneConst,
1252	bool Reciprocal) const override;
1253	SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1254	int &ExtraSteps) const override;
1255	SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1256	const DenormalMode &Mode) const override;
1257	SDValue getSqrtResultForDenormInput(SDValue Operand,
1258	SelectionDAG &DAG) const override;
1259	unsigned combineRepeatedFPDivisors() const override;
1260
1261	ConstraintType getConstraintType(StringRef Constraint) const override;
1262	Register getRegisterByName(const char* RegName, LLT VT,
1263	const MachineFunction &MF) const override;
1264
1265	/// Examine constraint string and operand type and determine a weight value.
1266	/// The operand object must already have been set up with the operand type.
1267	ConstraintWeight
1268	getSingleConstraintMatchWeight(AsmOperandInfo &info,
1269	const char constraint) const* override;
1270
1271	std::pair<unsigned, const TargetRegisterClass *>
1272	getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1273	StringRef Constraint, MVT VT) const override;
1274
1275	const char LowerXConstraint(EVT ConstraintVT) const* override;
1276
1277	void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1278	std::vector<SDValue> &Ops,
1279	SelectionDAG &DAG) const override;
1280
1281	InlineAsm::ConstraintCode
1282	getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1283	if (ConstraintCode == "Q")
1284	return InlineAsm::ConstraintCode::Q;
1285	// FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1286	// followed by llvm_unreachable so we'll leave them unimplemented in
1287	// the backend for now.
1288	return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1289	}
1290
1291	/// Handle Lowering flag assembly outputs.
1292	SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1293	const SDLoc &DL,
1294	const AsmOperandInfo &Constraint,
1295	SelectionDAG &DAG) const override;
1296
1297	bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1298	bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
1299	bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1300	bool isUsedByReturnOnly(SDNode N, SDValue &Chain) const* override;
1301	bool mayBeEmittedAsTailCall(const CallInst CI) const* override;
1302	bool getIndexedAddressParts(SDNode N, SDNode Op, SDValue &Base,
1303	SDValue &Offset, SelectionDAG &DAG) const;
1304	bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1305	ISD::MemIndexedMode &AM,
1306	SelectionDAG &DAG) const override;
1307	bool getPostIndexedAddressParts(SDNode N, SDNode Op, SDValue &Base,
1308	SDValue &Offset, ISD::MemIndexedMode &AM,
1309	SelectionDAG &DAG) const override;
1310	bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
1311	bool IsPre, MachineRegisterInfo &MRI) const override;
1312
1313	void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1314	SelectionDAG &DAG) const override;
1315	void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1316	SelectionDAG &DAG) const;
1317	void ReplaceExtractSubVectorResults(SDNode *N,
1318	SmallVectorImpl<SDValue> &Results,
1319	SelectionDAG &DAG) const;
1320
1321	bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1322
1323	void finalizeLowering(MachineFunction &MF) const override;
1324
1325	bool shouldLocalize(const MachineInstr &MI,
1326	const TargetTransformInfo TTI) const* override;
1327
1328	bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1329	const APInt &OriginalDemandedBits,
1330	const APInt &OriginalDemandedElts,
1331	KnownBits &Known,
1332	TargetLoweringOpt &TLO,
1333	unsigned Depth) const override;
1334
1335	bool isTargetCanonicalConstantNode(SDValue Op) const override;
1336
1337	// With the exception of data-predicate transitions, no instructions are
1338	// required to cast between legal scalable vector types. However:
1339	// 1. Packed and unpacked types have different bit lengths, meaning BITCAST
1340	// is not universally useable.
1341	// 2. Most unpacked integer types are not legal and thus integer extends
1342	// cannot be used to convert between unpacked and packed types.
1343	// These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1344	// to transition between unpacked and packed types of the same element type,
1345	// with BITCAST used otherwise.
1346	// This function does not handle predicate bitcasts.
1347	SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1348
1349	// Returns the runtime value for PSTATE.SM by generating a call to
1350	// __arm_sme_state.
1351	SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
1352	EVT VT) const;
1353
1354	bool preferScalarizeSplat(SDNode N) const* override;
1355
1356	unsigned getMinimumJumpTableEntries() const override;
1357
1358	bool softPromoteHalfType() const override { return true; }
1359	};
1360
1361	namespace AArch64 {
1362	FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1363	const TargetLibraryInfo *libInfo);
1364	} // end namespace AArch64
1365
1366	} // end namespace llvm
1367
1368	#endif
1369

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/AArch64ISelLowering.h