1//===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the AArch64 specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
14#define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
15
16#include "AArch64FrameLowering.h"
17#include "AArch64ISelLowering.h"
18#include "AArch64InstrInfo.h"
19#include "AArch64PointerAuth.h"
20#include "AArch64RegisterInfo.h"
21#include "AArch64SelectionDAGInfo.h"
22#include "llvm/CodeGen/GlobalISel/CallLowering.h"
23#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
24#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
25#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
26#include "llvm/CodeGen/RegisterBankInfo.h"
27#include "llvm/CodeGen/TargetSubtargetInfo.h"
28#include "llvm/IR/DataLayout.h"
29#include "llvm/TargetParser/Triple.h"
30
31#define GET_SUBTARGETINFO_HEADER
32#include "AArch64GenSubtargetInfo.inc"
33
34namespace llvm {
35class GlobalValue;
36class StringRef;
37
38class AArch64Subtarget final : public AArch64GenSubtargetInfo {
39public:
40 enum ARMProcFamilyEnum : uint8_t {
41 Generic,
42#define ARM_PROCESSOR_FAMILY(ENUM) ENUM,
43#include "llvm/TargetParser/AArch64TargetParserDef.inc"
44#undef ARM_PROCESSOR_FAMILY
45 };
46
47protected:
48 /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
49 ARMProcFamilyEnum ARMProcFamily = Generic;
50
51 // Enable 64-bit vectorization in SLP.
52 unsigned MinVectorRegisterBitWidth = 64;
53
54// Bool members corresponding to the SubtargetFeatures defined in tablegen
55#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
56 bool ATTRIBUTE = DEFAULT;
57#include "AArch64GenSubtargetInfo.inc"
58
59 unsigned EpilogueVectorizationMinVF = 16;
60 uint8_t MaxInterleaveFactor = 2;
61 uint8_t VectorInsertExtractBaseCost = 2;
62 uint16_t CacheLineSize = 64;
63 // Default scatter/gather overhead.
64 unsigned ScatterOverhead = 10;
65 unsigned GatherOverhead = 10;
66 uint16_t PrefetchDistance = 0;
67 uint16_t MinPrefetchStride = 1;
68 unsigned MaxPrefetchIterationsAhead = UINT_MAX;
69 Align PrefFunctionAlignment;
70 Align PrefLoopAlignment;
71 unsigned MaxBytesForLoopAlignment = 0;
72 unsigned MinimumJumpTableEntries = 4;
73 unsigned MaxJumpTableSize = 0;
74
75 // ReserveXRegister[i] - X#i is not available as a general purpose register.
76 BitVector ReserveXRegister;
77
78 // ReserveXRegisterForRA[i] - X#i is not available for register allocator.
79 BitVector ReserveXRegisterForRA;
80
81 // CustomCallUsedXRegister[i] - X#i call saved.
82 BitVector CustomCallSavedXRegs;
83
84 bool IsLittle;
85
86 bool IsStreaming;
87 bool IsStreamingCompatible;
88 std::optional<unsigned> StreamingHazardSize;
89 unsigned MinSVEVectorSizeInBits;
90 unsigned MaxSVEVectorSizeInBits;
91 bool EnableSRLTSubregToRegMitigation;
92 unsigned VScaleForTuning = 1;
93 TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled;
94
95 bool EnableSubregLiveness;
96
97 /// TargetTriple - What processor and OS we're targeting.
98 Triple TargetTriple;
99
100 AArch64FrameLowering FrameLowering;
101 AArch64InstrInfo InstrInfo;
102 AArch64SelectionDAGInfo TSInfo;
103 AArch64TargetLowering TLInfo;
104
105 /// GlobalISel related APIs.
106 std::unique_ptr<CallLowering> CallLoweringInfo;
107 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
108 std::unique_ptr<InstructionSelector> InstSelector;
109 std::unique_ptr<LegalizerInfo> Legalizer;
110 std::unique_ptr<RegisterBankInfo> RegBankInfo;
111
112private:
113 /// initializeSubtargetDependencies - Initializes using CPUString and the
114 /// passed in feature string so that we can use initializer lists for
115 /// subtarget initialization.
116 AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
117 StringRef CPUString,
118 StringRef TuneCPUString,
119 bool HasMinSize);
120
121 /// Initialize properties based on the selected processor family.
122 void initializeProperties(bool HasMinSize);
123
124public:
125 /// This constructor initializes the data members to match that
126 /// of the specified triple.
127 AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
128 StringRef FS, const TargetMachine &TM, bool LittleEndian,
129 unsigned MinSVEVectorSizeInBitsOverride = 0,
130 unsigned MaxSVEVectorSizeInBitsOverride = 0,
131 bool IsStreaming = false, bool IsStreamingCompatible = false,
132 bool HasMinSize = false,
133 bool EnableSRLTSubregToRegMitigation = false);
134
135// Getters for SubtargetFeatures defined in tablegen
136#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
137 bool GETTER() const { return ATTRIBUTE; }
138#include "AArch64GenSubtargetInfo.inc"
139
140 const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
141 return &TSInfo;
142 }
143 const AArch64FrameLowering *getFrameLowering() const override {
144 return &FrameLowering;
145 }
146 const AArch64TargetLowering *getTargetLowering() const override {
147 return &TLInfo;
148 }
149 const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
150 const AArch64RegisterInfo *getRegisterInfo() const override {
151 return &getInstrInfo()->getRegisterInfo();
152 }
153 const CallLowering *getCallLowering() const override;
154 const InlineAsmLowering *getInlineAsmLowering() const override;
155 InstructionSelector *getInstructionSelector() const override;
156 const LegalizerInfo *getLegalizerInfo() const override;
157 const RegisterBankInfo *getRegBankInfo() const override;
158 const Triple &getTargetTriple() const { return TargetTriple; }
159 bool enableMachineScheduler() const override { return true; }
160 bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
161 bool enableSubRegLiveness() const override { return EnableSubregLiveness; }
162 bool enableSpillageCopyElimination() const override { return true; }
163
164 bool enableMachinePipeliner() const override;
165 bool useDFAforSMS() const override { return false; }
166
167 /// Returns ARM processor family.
168 /// Avoid this function! CPU specifics should be kept local to this class
169 /// and preferably modeled with SubtargetFeatures or properties in
170 /// initializeProperties().
171 ARMProcFamilyEnum getProcFamily() const {
172 return ARMProcFamily;
173 }
174
175 /// Returns true if the processor is an Apple M-series or aligned A-series
176 /// (A14 or newer).
177 bool isAppleMLike() const {
178 switch (ARMProcFamily) {
179 case AppleA14:
180 case AppleA15:
181 case AppleA16:
182 case AppleA17:
183 case AppleM4:
184 case AppleM5:
185 return true;
186 default:
187 return false;
188 }
189 }
190
191 bool isXRaySupported() const override { return true; }
192
193 /// Returns true if the function has a streaming body.
194 bool isStreaming() const { return IsStreaming; }
195
196 /// Returns true if the function has a streaming-compatible body.
197 bool isStreamingCompatible() const { return IsStreamingCompatible; }
198
199 /// Returns the size of memory region that if accessed by both the CPU and
200 /// the SME unit could result in a hazard. 0 = disabled.
201 unsigned getStreamingHazardSize() const {
202 return StreamingHazardSize.value_or(
203 u: !hasSMEFA64() && hasSME() && hasSVE() ? 1024 : 0);
204 }
205
206 /// Returns true if the target has NEON and the function at runtime is known
207 /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
208 /// mode, which disables NEON instructions).
209 bool isNeonAvailable() const {
210 return hasNEON() &&
211 (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
212 }
213
214 /// Returns true if the target has SVE and can use the full range of SVE
215 /// instructions, for example because it knows the function is known not to be
216 /// in streaming-SVE mode or when the target has FEAT_FA64 enabled.
217 bool isSVEAvailable() const {
218 return hasSVE() &&
219 (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
220 }
221
222 /// Returns true if the target has access to the streaming-compatible subset
223 /// of SVE instructions.
224 bool isStreamingSVEAvailable() const { return hasSME() && isStreaming(); }
225
226 /// Returns true if the target has access to either the full range of SVE
227 /// instructions, or the streaming-compatible subset of SVE instructions.
228 bool isSVEorStreamingSVEAvailable() const {
229 return hasSVE() || isStreamingSVEAvailable();
230 }
231
232 /// Returns true if the target has access to either the full range of SVE
233 /// instructions, or the streaming-compatible subset of SVE instructions
234 /// available to SME2.
235 bool isNonStreamingSVEorSME2Available() const {
236 return isSVEAvailable() || (isSVEorStreamingSVEAvailable() && hasSME2());
237 }
238
239 unsigned getMinVectorRegisterBitWidth() const {
240 // Don't assume any minimum vector size when PSTATE.SM may not be 0, because
241 // we don't yet support streaming-compatible codegen support that we trust
242 // is safe for functions that may be executed in streaming-SVE mode.
243 // By returning '0' here, we disable vectorization.
244 if (!isSVEAvailable() && !isNeonAvailable())
245 return 0;
246 return MinVectorRegisterBitWidth;
247 }
248
249 bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; }
250 bool isXRegisterReservedForRA(size_t i) const { return ReserveXRegisterForRA[i]; }
251 unsigned getNumXRegisterReserved() const {
252 BitVector AllReservedX(AArch64::GPR64commonRegClass.getNumRegs());
253 AllReservedX |= ReserveXRegister;
254 AllReservedX |= ReserveXRegisterForRA;
255 return AllReservedX.count();
256 }
257 bool isLRReservedForRA() const { return ReserveLRForRA; }
258 bool isXRegCustomCalleeSaved(size_t i) const {
259 return CustomCallSavedXRegs[i];
260 }
261 bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); }
262
263 /// Return true if the CPU supports any kind of instruction fusion.
264 bool hasFusion() const {
265 return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
266 hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCmpCSel() ||
267 hasFuseFCmpFCSel() || hasFuseCmpCSet() || hasFuseAdrpAdd() ||
268 hasFuseLiterals();
269 }
270
271 unsigned getEpilogueVectorizationMinVF() const {
272 return EpilogueVectorizationMinVF;
273 }
274 unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
275 unsigned getVectorInsertExtractBaseCost() const;
276 unsigned getCacheLineSize() const override { return CacheLineSize; }
277 unsigned getScatterOverhead() const { return ScatterOverhead; }
278 unsigned getGatherOverhead() const { return GatherOverhead; }
279 unsigned getPrefetchDistance() const override { return PrefetchDistance; }
280 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
281 unsigned NumStridedMemAccesses,
282 unsigned NumPrefetches,
283 bool HasCall) const override {
284 return MinPrefetchStride;
285 }
286 unsigned getMaxPrefetchIterationsAhead() const override {
287 return MaxPrefetchIterationsAhead;
288 }
289 Align getPrefFunctionAlignment() const {
290 return PrefFunctionAlignment;
291 }
292 Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
293
294 unsigned getMaxBytesForLoopAlignment() const {
295 return MaxBytesForLoopAlignment;
296 }
297
298 unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
299 unsigned getMinimumJumpTableEntries() const {
300 return MinimumJumpTableEntries;
301 }
302
303 /// CPU has TBI (top byte of addresses is ignored during HW address
304 /// translation) and OS enables it.
305 bool supportsAddressTopByteIgnored() const;
306
307 bool isLittleEndian() const { return IsLittle; }
308
309 bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
310 bool isTargetIOS() const { return TargetTriple.isiOS(); }
311 bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
312 bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
313 bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
314 bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
315 bool isWindowsArm64EC() const { return TargetTriple.isWindowsArm64EC(); }
316 bool isLFI() const { return TargetTriple.isLFI(); }
317
318 bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
319 bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
320 bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
321
322 bool isTargetILP32() const {
323 return TargetTriple.isArch32Bit() ||
324 TargetTriple.getEnvironment() == Triple::GNUILP32;
325 }
326
327 bool useAA() const override;
328
329 bool addrSinkUsingGEPs() const override {
330 // Keeping GEPs inbounds is important for exploiting AArch64
331 // addressing-modes in ILP32 mode.
332 return useAA() || isTargetILP32();
333 }
334
335 bool useSmallAddressing() const {
336 switch (TLInfo.getTargetMachine().getCodeModel()) {
337 case CodeModel::Kernel:
338 // Kernel is currently allowed only for Fuchsia targets,
339 // where it is the same as Small for almost all purposes.
340 case CodeModel::Small:
341 return true;
342 default:
343 return false;
344 }
345 }
346
347 /// Returns whether the operating system makes it safer to store sensitive
348 /// values in x16 and x17 as opposed to other registers.
349 bool isX16X17Safer() const;
350
351 /// ParseSubtargetFeatures - Parses features string setting specified
352 /// subtarget options. Definition of function is auto generated by tblgen.
353 void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
354
355 /// ClassifyGlobalReference - Find the target operand flags that describe
356 /// how a global value should be referenced for the current subtarget.
357 unsigned ClassifyGlobalReference(const GlobalValue *GV,
358 const TargetMachine &TM) const;
359
360 unsigned classifyGlobalFunctionReference(const GlobalValue *GV,
361 const TargetMachine &TM) const;
362
363 /// This function is design to compatible with the function def in other
364 /// targets and escape build error about the virtual function def in base
365 /// class TargetSubtargetInfo. Updeate me if AArch64 target need to use it.
366 unsigned char
367 classifyGlobalFunctionReference(const GlobalValue *GV) const override {
368 return 0;
369 }
370
371 void overrideSchedPolicy(MachineSchedPolicy &Policy,
372 const SchedRegion &Region) const override;
373
374 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
375 SDep &Dep,
376 const TargetSchedModel *SchedModel) const override;
377
378 bool enableEarlyIfConversion() const override;
379
380 std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
381
382 bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const {
383 switch (CC) {
384 case CallingConv::C:
385 case CallingConv::Fast:
386 case CallingConv::Swift:
387 case CallingConv::SwiftTail:
388 return isTargetWindows();
389 case CallingConv::PreserveNone:
390 return IsVarArg && isTargetWindows();
391 case CallingConv::Win64:
392 return true;
393 default:
394 return false;
395 }
396 }
397
398 /// Return whether FrameLowering should always set the "extended frame
399 /// present" bit in FP, or set it based on a symbol in the runtime.
400 bool swiftAsyncContextIsDynamicallySet() const {
401 // Older OS versions (particularly system unwinders) are confused by the
402 // Swift extended frame, so when building code that might be run on them we
403 // must dynamically query the concurrency library to determine whether
404 // extended frames should be flagged as present.
405 const Triple &TT = getTargetTriple();
406
407 unsigned Major = TT.getOSVersion().getMajor();
408 switch(TT.getOS()) {
409 default:
410 return false;
411 case Triple::IOS:
412 case Triple::TvOS:
413 return Major < 15;
414 case Triple::WatchOS:
415 return Major < 8;
416 case Triple::MacOSX:
417 case Triple::Darwin:
418 return Major < 12;
419 }
420 }
421
422 void mirFileLoaded(MachineFunction &MF) const override;
423
424 // Return the known range for the bit length of SVE data registers. A value
425 // of 0 means nothing is known about that particular limit beyond what's
426 // implied by the architecture.
427 unsigned getMaxSVEVectorSizeInBits() const {
428 assert(isSVEorStreamingSVEAvailable() &&
429 "Tried to get SVE vector length without SVE support!");
430 return MaxSVEVectorSizeInBits;
431 }
432
433 unsigned getMinSVEVectorSizeInBits() const {
434 assert(isSVEorStreamingSVEAvailable() &&
435 "Tried to get SVE vector length without SVE support!");
436 return MinSVEVectorSizeInBits;
437 }
438
439 // Return the known bit length of SVE data registers. A value of 0 means the
440 // length is unknown beyond what's implied by the architecture.
441 unsigned getSVEVectorSizeInBits() const {
442 assert(isSVEorStreamingSVEAvailable() &&
443 "Tried to get SVE vector length without SVE support!");
444 if (MinSVEVectorSizeInBits == MaxSVEVectorSizeInBits)
445 return MaxSVEVectorSizeInBits;
446 return 0;
447 }
448
449 // Return the known bit length of SVE predicate registers. A value of 0 means
450 // the length is unknown beyond what's implied by the architecture.
451 unsigned getSVEPredicateSizeInBits() const {
452 return getSVEVectorSizeInBits() / 8;
453 }
454
455 bool useSVEForFixedLengthVectors() const {
456 if (!isSVEorStreamingSVEAvailable())
457 return false;
458
459 // Prefer NEON unless larger SVE registers are available.
460 return !isNeonAvailable() || getMinSVEVectorSizeInBits() >= 256;
461 }
462
463 bool useSVEForFixedLengthVectors(EVT VT) const {
464 if (!useSVEForFixedLengthVectors() || !VT.isFixedLengthVector())
465 return false;
466 return VT.getFixedSizeInBits() > AArch64::SVEBitsPerBlock ||
467 !isNeonAvailable();
468 }
469
470 unsigned getVScaleForTuning() const { return VScaleForTuning; }
471
472 TailFoldingOpts getSVETailFoldingDefaultOpts() const {
473 return DefaultSVETFOpts;
474 }
475
476 /// Returns true to use the addvl/inc/dec instructions, as opposed to separate
477 /// add + cnt instructions.
478 bool useScalarIncVL() const;
479
480 bool enableSRLTSubregToRegMitigation() const {
481 return EnableSRLTSubregToRegMitigation;
482 }
483
484 /// Choose a method of checking LR before performing a tail call.
485 AArch64PAuth::AuthCheckMethod
486 getAuthenticatedLRCheckMethod(const MachineFunction &MF) const;
487
488 /// Compute the integer discriminator for a given BlockAddress constant, if
489 /// blockaddress signing is enabled, or std::nullopt otherwise.
490 /// Blockaddress signing is controlled by the function attribute
491 /// "ptrauth-indirect-gotos" on the parent function.
492 /// Note that this assumes the discriminator is independent of the indirect
493 /// goto branch site itself, i.e., it's the same for all BlockAddresses in
494 /// a function.
495 std::optional<uint16_t>
496 getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const;
497
498 bool enableAggressiveInterleaving() const { return AggressiveInterleaving; }
499};
500} // End llvm namespace
501
502#endif
503