AArch64Subtarget.h source code [llvm_projects/llvm/lib/Target/AArch64/AArch64Subtarget.h]

1	//===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -- C++ ---===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file declares the AArch64 specific subclass of TargetSubtarget.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
14	#define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
15
16	#include "AArch64FrameLowering.h"
17	#include "AArch64ISelLowering.h"
18	#include "AArch64InstrInfo.h"
19	#include "AArch64PointerAuth.h"
20	#include "AArch64RegisterInfo.h"
21	#include "AArch64SelectionDAGInfo.h"
22	#include "llvm/CodeGen/GlobalISel/CallLowering.h"
23	#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
24	#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
25	#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
26	#include "llvm/CodeGen/RegisterBankInfo.h"
27	#include "llvm/CodeGen/TargetSubtargetInfo.h"
28	#include "llvm/IR/DataLayout.h"
29	#include "llvm/TargetParser/Triple.h"
30
31	#define GET_SUBTARGETINFO_HEADER
32	#include "AArch64GenSubtargetInfo.inc"
33
34	namespace llvm {
35	class GlobalValue;
36	class StringRef;
37
38	class AArch64Subtarget final : public AArch64GenSubtargetInfo {
39	public:
40	enum ARMProcFamilyEnum : uint8_t {
41	Generic,
42	#define ARM_PROCESSOR_FAMILY(ENUM) ENUM,
43	#include "llvm/TargetParser/AArch64TargetParserDef.inc"
44	#undef ARM_PROCESSOR_FAMILY
45	};
46
47	protected:
48	/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
49	ARMProcFamilyEnum ARMProcFamily = Generic;
50
51	// Enable 64-bit vectorization in SLP.
52	unsigned MinVectorRegisterBitWidth = `64`;
53
54	// Bool members corresponding to the SubtargetFeatures defined in tablegen
55	#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
56	bool ATTRIBUTE = DEFAULT;
57	#include "AArch64GenSubtargetInfo.inc"
58
59	unsigned EpilogueVectorizationMinVF = `16`;
60	uint8_t MaxInterleaveFactor = `2`;
61	uint8_t VectorInsertExtractBaseCost = `2`;
62	uint16_t CacheLineSize = `0`;
63	// Default scatter/gather overhead.
64	unsigned ScatterOverhead = `10`;
65	unsigned GatherOverhead = `10`;
66	uint16_t PrefetchDistance = `0`;
67	uint16_t MinPrefetchStride = `1`;
68	unsigned MaxPrefetchIterationsAhead = UINT_MAX;
69	Align PrefFunctionAlignment;
70	Align PrefLoopAlignment;
71	unsigned MaxBytesForLoopAlignment = `0`;
72	unsigned MinimumJumpTableEntries = `4`;
73	unsigned MaxJumpTableSize = `0`;
74
75	// ReserveXRegister[i] - X#i is not available as a general purpose register.
76	BitVector ReserveXRegister;
77
78	// ReserveXRegisterForRA[i] - X#i is not available for register allocator.
79	BitVector ReserveXRegisterForRA;
80
81	// CustomCallUsedXRegister[i] - X#i call saved.
82	BitVector CustomCallSavedXRegs;
83
84	bool IsLittle;
85
86	bool IsStreaming;
87	bool IsStreamingCompatible;
88	std::optional<unsigned> StreamingHazardSize;
89	unsigned MinSVEVectorSizeInBits;
90	unsigned MaxSVEVectorSizeInBits;
91	bool EnableSRLTSubregToRegMitigation;
92	unsigned VScaleForTuning = `1`;
93	TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled;
94
95	bool EnableSubregLiveness;
96
97	/// TargetTriple - What processor and OS we're targeting.
98	Triple TargetTriple;
99
100	AArch64FrameLowering FrameLowering;
101	AArch64InstrInfo InstrInfo;
102	AArch64SelectionDAGInfo TSInfo;
103	AArch64TargetLowering TLInfo;
104
105	/// GlobalISel related APIs.
106	std::unique_ptr<CallLowering> CallLoweringInfo;
107	std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
108	std::unique_ptr<InstructionSelector> InstSelector;
109	std::unique_ptr<LegalizerInfo> Legalizer;
110	std::unique_ptr<RegisterBankInfo> RegBankInfo;
111
112	private:
113	/// initializeSubtargetDependencies - Initializes using CPUString and the
114	/// passed in feature string so that we can use initializer lists for
115	/// subtarget initialization.
116	AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
117	StringRef CPUString,
118	StringRef TuneCPUString,
119	bool HasMinSize);
120
121	/// Initialize properties based on the selected processor family.
122	void initializeProperties(bool HasMinSize);
123
124	public:
125	/// This constructor initializes the data members to match that
126	/// of the specified triple.
127	AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
128	StringRef FS, const TargetMachine &TM, bool LittleEndian,
129	unsigned MinSVEVectorSizeInBitsOverride = `0`,
130	unsigned MaxSVEVectorSizeInBitsOverride = `0`,
131	bool IsStreaming = false, bool IsStreamingCompatible = false,
132	bool HasMinSize = false,
133	bool EnableSRLTSubregToRegMitigation = false);
134
135	// Getters for SubtargetFeatures defined in tablegen
136	#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
137	bool GETTER() const { return ATTRIBUTE; }
138	#include "AArch64GenSubtargetInfo.inc"
139
140	const AArch64SelectionDAGInfo getSelectionDAGInfo() const* override {
141	return &TSInfo;
142	}
143	const AArch64FrameLowering getFrameLowering() const* override {
144	return &FrameLowering;
145	}
146	const AArch64TargetLowering getTargetLowering() const* override {
147	return &TLInfo;
148	}
149	const AArch64InstrInfo getInstrInfo() const* override { return &InstrInfo; }
150	const AArch64RegisterInfo getRegisterInfo() const* override {
151	return &getInstrInfo()->getRegisterInfo();
152	}
153	const CallLowering getCallLowering() const* override;
154	const InlineAsmLowering getInlineAsmLowering() const* override;
155	InstructionSelector getInstructionSelector() const* override;
156	const LegalizerInfo getLegalizerInfo() const* override;
157	const RegisterBankInfo getRegBankInfo() const* override;
158	const Triple &getTargetTriple() const { return TargetTriple; }
159	bool enableMachineScheduler() const override { return true; }
160	bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
161	bool enableSubRegLiveness() const override { return EnableSubregLiveness; }
162
163	bool enableMachinePipeliner() const override;
164	bool useDFAforSMS() const override { return false; }
165
166	/// Returns ARM processor family.
167	/// Avoid this function! CPU specifics should be kept local to this class
168	/// and preferably modeled with SubtargetFeatures or properties in
169	/// initializeProperties().
170	ARMProcFamilyEnum getProcFamily() const {
171	return ARMProcFamily;
172	}
173
174	/// Returns true if the processor is an Apple M-series or aligned A-series
175	/// (A14 or newer).
176	bool isAppleMLike() const {
177	switch (ARMProcFamily) {
178	case AppleA14:
179	case AppleA15:
180	case AppleA16:
181	case AppleA17:
182	case AppleM4:
183	case AppleM5:
184	return true;
185	default:
186	return false;
187	}
188	}
189
190	bool isXRaySupported() const override { return true; }
191
192	/// Returns true if the function has a streaming body.
193	bool isStreaming() const { return IsStreaming; }
194
195	/// Returns true if the function has a streaming-compatible body.
196	bool isStreamingCompatible() const { return IsStreamingCompatible; }
197
198	/// Returns the size of memory region that if accessed by both the CPU and
199	/// the SME unit could result in a hazard. 0 = disabled.
200	unsigned getStreamingHazardSize() const {
201	return StreamingHazardSize.value_or(
202	u: !hasSMEFA64() && hasSME() && hasSVE() ? `1024` : `0`);
203	}
204
205	/// Returns true if the target has NEON and the function at runtime is known
206	/// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
207	/// mode, which disables NEON instructions).
208	bool isNeonAvailable() const {
209	return hasNEON() &&
210	(hasSMEFA64() \|\| (!isStreaming() && !isStreamingCompatible()));
211	}
212
213	/// Returns true if the target has SVE and can use the full range of SVE
214	/// instructions, for example because it knows the function is known not to be
215	/// in streaming-SVE mode or when the target has FEAT_FA64 enabled.
216	bool isSVEAvailable() const {
217	return hasSVE() &&
218	(hasSMEFA64() \|\| (!isStreaming() && !isStreamingCompatible()));
219	}
220
221	/// Returns true if the target has access to the streaming-compatible subset
222	/// of SVE instructions.
223	bool isStreamingSVEAvailable() const { return hasSME() && isStreaming(); }
224
225	/// Returns true if the target has access to either the full range of SVE
226	/// instructions, or the streaming-compatible subset of SVE instructions.
227	bool isSVEorStreamingSVEAvailable() const {
228	return hasSVE() \|\| isStreamingSVEAvailable();
229	}
230
231	/// Returns true if the target has access to either the full range of SVE
232	/// instructions, or the streaming-compatible subset of SVE instructions
233	/// available to SME2.
234	bool isNonStreamingSVEorSME2Available() const {
235	return isSVEAvailable() \|\| (isSVEorStreamingSVEAvailable() && hasSME2());
236	}
237
238	unsigned getMinVectorRegisterBitWidth() const {
239	// Don't assume any minimum vector size when PSTATE.SM may not be 0, because
240	// we don't yet support streaming-compatible codegen support that we trust
241	// is safe for functions that may be executed in streaming-SVE mode.
242	// By returning '0' here, we disable vectorization.
243	if (!isSVEAvailable() && !isNeonAvailable())
244	return `0`;
245	return MinVectorRegisterBitWidth;
246	}
247
248	bool isXRegisterReserved(size_t i) const { return ReserveXRegister [i]; }
249	bool isXRegisterReservedForRA(size_t i) const { return ReserveXRegisterForRA [i]; }
250	unsigned getNumXRegisterReserved() const {
251	BitVector AllReservedX(AArch64::GPR64commonRegClass.getNumRegs());
252	AllReservedX \|= ReserveXRegister;
253	AllReservedX \|= ReserveXRegisterForRA;
254	return AllReservedX.count();
255	}
256	bool isLRReservedForRA() const { return ReserveLRForRA; }
257	bool isXRegCustomCalleeSaved(size_t i) const {
258	return CustomCallSavedXRegs [i];
259	}
260	bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); }
261
262	/// Return true if the CPU supports any kind of instruction fusion.
263	bool hasFusion() const {
264	return hasArithmeticBccFusion() \|\| hasArithmeticCbzFusion() \|\|
265	hasFuseAES() \|\| hasFuseArithmeticLogic() \|\| hasFuseCmpCSel() \|\|
266	hasFuseFCmpFCSel() \|\| hasFuseCmpCSet() \|\| hasFuseAdrpAdd() \|\|
267	hasFuseLiterals();
268	}
269
270	unsigned getEpilogueVectorizationMinVF() const {
271	return EpilogueVectorizationMinVF;
272	}
273	unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
274	unsigned getVectorInsertExtractBaseCost() const;
275	unsigned getCacheLineSize() const override { return CacheLineSize; }
276	unsigned getScatterOverhead() const { return ScatterOverhead; }
277	unsigned getGatherOverhead() const { return GatherOverhead; }
278	unsigned getPrefetchDistance() const override { return PrefetchDistance; }
279	unsigned getMinPrefetchStride(unsigned NumMemAccesses,
280	unsigned NumStridedMemAccesses,
281	unsigned NumPrefetches,
282	bool HasCall) const override {
283	return MinPrefetchStride;
284	}
285	unsigned getMaxPrefetchIterationsAhead() const override {
286	return MaxPrefetchIterationsAhead;
287	}
288	Align getPrefFunctionAlignment() const {
289	return PrefFunctionAlignment;
290	}
291	Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
292
293	unsigned getMaxBytesForLoopAlignment() const {
294	return MaxBytesForLoopAlignment;
295	}
296
297	unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
298	unsigned getMinimumJumpTableEntries() const {
299	return MinimumJumpTableEntries;
300	}
301
302	/// CPU has TBI (top byte of addresses is ignored during HW address
303	/// translation) and OS enables it.
304	bool supportsAddressTopByteIgnored() const;
305
306	bool isLittleEndian() const { return IsLittle; }
307
308	bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
309	bool isTargetIOS() const { return TargetTriple.isiOS(); }
310	bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
311	bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
312	bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
313	bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
314	bool isWindowsArm64EC() const { return TargetTriple.isWindowsArm64EC(); }
315	bool isLFI() const { return TargetTriple.isLFI(); }
316
317	bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
318	bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
319	bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
320
321	bool isTargetILP32() const {
322	return TargetTriple.isArch32Bit() \|\|
323	TargetTriple.getEnvironment() == Triple::GNUILP32;
324	}
325
326	bool useAA() const override;
327
328	bool addrSinkUsingGEPs() const override {
329	// Keeping GEPs inbounds is important for exploiting AArch64
330	// addressing-modes in ILP32 mode.
331	return useAA() \|\| isTargetILP32();
332	}
333
334	bool useSmallAddressing() const {
335	switch (TLInfo.getTargetMachine().getCodeModel()) {
336	case CodeModel::Kernel:
337	// Kernel is currently allowed only for Fuchsia targets,
338	// where it is the same as Small for almost all purposes.
339	case CodeModel::Small:
340	return true;
341	default:
342	return false;
343	}
344	}
345
346	/// Returns whether the operating system makes it safer to store sensitive
347	/// values in x16 and x17 as opposed to other registers.
348	bool isX16X17Safer() const;
349
350	/// ParseSubtargetFeatures - Parses features string setting specified
351	/// subtarget options. Definition of function is auto generated by tblgen.
352	void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
353
354	/// ClassifyGlobalReference - Find the target operand flags that describe
355	/// how a global value should be referenced for the current subtarget.
356	unsigned ClassifyGlobalReference(const GlobalValue *GV,
357	const TargetMachine &TM) const;
358
359	unsigned classifyGlobalFunctionReference(const GlobalValue *GV,
360	const TargetMachine &TM) const;
361
362	/// This function is design to compatible with the function def in other
363	/// targets and escape build error about the virtual function def in base
364	/// class TargetSubtargetInfo. Updeate me if AArch64 target need to use it.
365	unsigned char
366	classifyGlobalFunctionReference(const GlobalValue GV) const* override {
367	return `0`;
368	}
369
370	void overrideSchedPolicy(MachineSchedPolicy &Policy,
371	const SchedRegion &Region) const override;
372
373	void adjustSchedDependency(SUnit Def, int* DefOpIdx, SUnit Use, int* UseOpIdx,
374	SDep &Dep,
375	const TargetSchedModel SchedModel) const* override;
376
377	bool enableEarlyIfConversion() const override;
378
379	std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
380
381	bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const {
382	switch (CC) {
383	case CallingConv::C:
384	case CallingConv::Fast:
385	case CallingConv::Swift:
386	case CallingConv::SwiftTail:
387	return isTargetWindows();
388	case CallingConv::PreserveNone:
389	return IsVarArg && isTargetWindows();
390	case CallingConv::Win64:
391	return true;
392	default:
393	return false;
394	}
395	}
396
397	/// Return whether FrameLowering should always set the "extended frame
398	/// present" bit in FP, or set it based on a symbol in the runtime.
399	bool swiftAsyncContextIsDynamicallySet() const {
400	// Older OS versions (particularly system unwinders) are confused by the
401	// Swift extended frame, so when building code that might be run on them we
402	// must dynamically query the concurrency library to determine whether
403	// extended frames should be flagged as present.
404	const Triple &TT = getTargetTriple();
405
406	unsigned Major = TT.getOSVersion().getMajor();
407	switch(TT.getOS()) {
408	default:
409	return false;
410	case Triple::IOS:
411	case Triple::TvOS:
412	return Major < `15`;
413	case Triple::WatchOS:
414	return Major < `8`;
415	case Triple::MacOSX:
416	case Triple::Darwin:
417	return Major < `12`;
418	}
419	}
420
421	void mirFileLoaded(MachineFunction &MF) const override;
422
423	// Return the known range for the bit length of SVE data registers. A value
424	// of 0 means nothing is known about that particular limit beyond what's
425	// implied by the architecture.
426	unsigned getMaxSVEVectorSizeInBits() const {
427	assert(isSVEorStreamingSVEAvailable() &&
428	"Tried to get SVE vector length without SVE support!");
429	return MaxSVEVectorSizeInBits;
430	}
431
432	unsigned getMinSVEVectorSizeInBits() const {
433	assert(isSVEorStreamingSVEAvailable() &&
434	"Tried to get SVE vector length without SVE support!");
435	return MinSVEVectorSizeInBits;
436	}
437
438	// Return the known bit length of SVE data registers. A value of 0 means the
439	// length is unknown beyond what's implied by the architecture.
440	unsigned getSVEVectorSizeInBits() const {
441	assert(isSVEorStreamingSVEAvailable() &&
442	"Tried to get SVE vector length without SVE support!");
443	if (MinSVEVectorSizeInBits == MaxSVEVectorSizeInBits)
444	return MaxSVEVectorSizeInBits;
445	return `0`;
446	}
447
448	bool useSVEForFixedLengthVectors() const {
449	if (!isSVEorStreamingSVEAvailable())
450	return false;
451
452	// Prefer NEON unless larger SVE registers are available.
453	return !isNeonAvailable() \|\| getMinSVEVectorSizeInBits() >= `256`;
454	}
455
456	bool useSVEForFixedLengthVectors(EVT VT) const {
457	if (!useSVEForFixedLengthVectors() \|\| !VT.isFixedLengthVector())
458	return false;
459	return VT.getFixedSizeInBits() > AArch64::SVEBitsPerBlock \|\|
460	!isNeonAvailable();
461	}
462
463	unsigned getVScaleForTuning() const { return VScaleForTuning; }
464
465	TailFoldingOpts getSVETailFoldingDefaultOpts() const {
466	return DefaultSVETFOpts;
467	}
468
469	/// Returns true to use the addvl/inc/dec instructions, as opposed to separate
470	/// add + cnt instructions.
471	bool useScalarIncVL() const;
472
473	bool enableSRLTSubregToRegMitigation() const {
474	return EnableSRLTSubregToRegMitigation;
475	}
476
477	/// Choose a method of checking LR before performing a tail call.
478	AArch64PAuth::AuthCheckMethod
479	getAuthenticatedLRCheckMethod(const MachineFunction &MF) const;
480
481	/// Compute the integer discriminator for a given BlockAddress constant, if
482	/// blockaddress signing is enabled, or std::nullopt otherwise.
483	/// Blockaddress signing is controlled by the function attribute
484	/// "ptrauth-indirect-gotos" on the parent function.
485	/// Note that this assumes the discriminator is independent of the indirect
486	/// goto branch site itself, i.e., it's the same for all BlockAddresses in
487	/// a function.
488	std::optional<uint16_t>
489	getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const;
490
491	bool enableAggressiveInterleaving() const { return AggressiveInterleaving; }
492	};
493	} // End llvm namespace
494
495	#endif
496

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/AArch64Subtarget.h