1//===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the AArch64 specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
14#define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
15
16#include "AArch64FrameLowering.h"
17#include "AArch64ISelLowering.h"
18#include "AArch64InstrInfo.h"
19#include "AArch64PointerAuth.h"
20#include "AArch64RegisterInfo.h"
21#include "AArch64SelectionDAGInfo.h"
22#include "llvm/CodeGen/GlobalISel/CallLowering.h"
23#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
24#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
25#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
26#include "llvm/CodeGen/RegisterBankInfo.h"
27#include "llvm/CodeGen/TargetSubtargetInfo.h"
28#include "llvm/IR/DataLayout.h"
29
30#define GET_SUBTARGETINFO_HEADER
31#include "AArch64GenSubtargetInfo.inc"
32
33namespace llvm {
34class GlobalValue;
35class StringRef;
36class Triple;
37
38class AArch64Subtarget final : public AArch64GenSubtargetInfo {
39public:
40 enum ARMProcFamilyEnum : uint8_t {
41 Others,
42#define ARM_PROCESSOR_FAMILY(ENUM) ENUM,
43#include "llvm/TargetParser/AArch64TargetParserDef.inc"
44#undef ARM_PROCESSOR_FAMILY
45 };
46
47protected:
48 /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
49 ARMProcFamilyEnum ARMProcFamily = Others;
50
51 // Enable 64-bit vectorization in SLP.
52 unsigned MinVectorRegisterBitWidth = 64;
53
54// Bool members corresponding to the SubtargetFeatures defined in tablegen
55#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
56 bool ATTRIBUTE = DEFAULT;
57#include "AArch64GenSubtargetInfo.inc"
58
59 uint8_t MaxInterleaveFactor = 2;
60 uint8_t VectorInsertExtractBaseCost = 2;
61 uint16_t CacheLineSize = 0;
62 uint16_t PrefetchDistance = 0;
63 uint16_t MinPrefetchStride = 1;
64 unsigned MaxPrefetchIterationsAhead = UINT_MAX;
65 Align PrefFunctionAlignment;
66 Align PrefLoopAlignment;
67 unsigned MaxBytesForLoopAlignment = 0;
68 unsigned MinimumJumpTableEntries = 4;
69 unsigned MaxJumpTableSize = 0;
70
71 // ReserveXRegister[i] - X#i is not available as a general purpose register.
72 BitVector ReserveXRegister;
73
74 // ReserveXRegisterForRA[i] - X#i is not available for register allocator.
75 BitVector ReserveXRegisterForRA;
76
77 // CustomCallUsedXRegister[i] - X#i call saved.
78 BitVector CustomCallSavedXRegs;
79
80 bool IsLittle;
81
82 bool IsStreaming;
83 bool IsStreamingCompatible;
84 unsigned MinSVEVectorSizeInBits;
85 unsigned MaxSVEVectorSizeInBits;
86 unsigned VScaleForTuning = 2;
87 TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled;
88
89 /// TargetTriple - What processor and OS we're targeting.
90 Triple TargetTriple;
91
92 AArch64FrameLowering FrameLowering;
93 AArch64InstrInfo InstrInfo;
94 AArch64SelectionDAGInfo TSInfo;
95 AArch64TargetLowering TLInfo;
96
97 /// GlobalISel related APIs.
98 std::unique_ptr<CallLowering> CallLoweringInfo;
99 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
100 std::unique_ptr<InstructionSelector> InstSelector;
101 std::unique_ptr<LegalizerInfo> Legalizer;
102 std::unique_ptr<RegisterBankInfo> RegBankInfo;
103
104private:
105 /// initializeSubtargetDependencies - Initializes using CPUString and the
106 /// passed in feature string so that we can use initializer lists for
107 /// subtarget initialization.
108 AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
109 StringRef CPUString,
110 StringRef TuneCPUString,
111 bool HasMinSize);
112
113 /// Initialize properties based on the selected processor family.
114 void initializeProperties(bool HasMinSize);
115
116public:
117 /// This constructor initializes the data members to match that
118 /// of the specified triple.
119 AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
120 StringRef FS, const TargetMachine &TM, bool LittleEndian,
121 unsigned MinSVEVectorSizeInBitsOverride = 0,
122 unsigned MaxSVEVectorSizeInBitsOverride = 0,
123 bool IsStreaming = false, bool IsStreamingCompatible = false,
124 bool HasMinSize = false);
125
126// Getters for SubtargetFeatures defined in tablegen
127#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
128 bool GETTER() const { return ATTRIBUTE; }
129#include "AArch64GenSubtargetInfo.inc"
130
131 const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
132 return &TSInfo;
133 }
134 const AArch64FrameLowering *getFrameLowering() const override {
135 return &FrameLowering;
136 }
137 const AArch64TargetLowering *getTargetLowering() const override {
138 return &TLInfo;
139 }
140 const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
141 const AArch64RegisterInfo *getRegisterInfo() const override {
142 return &getInstrInfo()->getRegisterInfo();
143 }
144 const CallLowering *getCallLowering() const override;
145 const InlineAsmLowering *getInlineAsmLowering() const override;
146 InstructionSelector *getInstructionSelector() const override;
147 const LegalizerInfo *getLegalizerInfo() const override;
148 const RegisterBankInfo *getRegBankInfo() const override;
149 const Triple &getTargetTriple() const { return TargetTriple; }
150 bool enableMachineScheduler() const override { return true; }
151 bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
152
153 bool enableMachinePipeliner() const override;
154 bool useDFAforSMS() const override { return false; }
155
156 /// Returns ARM processor family.
157 /// Avoid this function! CPU specifics should be kept local to this class
158 /// and preferably modeled with SubtargetFeatures or properties in
159 /// initializeProperties().
160 ARMProcFamilyEnum getProcFamily() const {
161 return ARMProcFamily;
162 }
163
164 bool isXRaySupported() const override { return true; }
165
166 /// Returns true if the function has a streaming body.
167 bool isStreaming() const { return IsStreaming; }
168
169 /// Returns true if the function has a streaming-compatible body.
170 bool isStreamingCompatible() const { return IsStreamingCompatible; }
171
172 /// Returns true if the target has NEON and the function at runtime is known
173 /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
174 /// mode, which disables NEON instructions).
175 bool isNeonAvailable() const {
176 return hasNEON() &&
177 (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
178 }
179
180 /// Returns true if the target has SVE and can use the full range of SVE
181 /// instructions, for example because it knows the function is known not to be
182 /// in streaming-SVE mode or when the target has FEAT_FA64 enabled.
183 bool isSVEAvailable() const {
184 return hasSVE() &&
185 (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
186 }
187
188 /// Returns true if the target has access to either the full range of SVE instructions,
189 /// or the streaming-compatible subset of SVE instructions.
190 bool isSVEorStreamingSVEAvailable() const {
191 return hasSVE() || (hasSME() && isStreaming());
192 }
193
194 unsigned getMinVectorRegisterBitWidth() const {
195 // Don't assume any minimum vector size when PSTATE.SM may not be 0, because
196 // we don't yet support streaming-compatible codegen support that we trust
197 // is safe for functions that may be executed in streaming-SVE mode.
198 // By returning '0' here, we disable vectorization.
199 if (!isSVEAvailable() && !isNeonAvailable())
200 return 0;
201 return MinVectorRegisterBitWidth;
202 }
203
204 bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; }
205 bool isXRegisterReservedForRA(size_t i) const { return ReserveXRegisterForRA[i]; }
206 unsigned getNumXRegisterReserved() const {
207 BitVector AllReservedX(AArch64::GPR64commonRegClass.getNumRegs());
208 AllReservedX |= ReserveXRegister;
209 AllReservedX |= ReserveXRegisterForRA;
210 return AllReservedX.count();
211 }
212 bool isLRReservedForRA() const { return ReserveLRForRA; }
213 bool isXRegCustomCalleeSaved(size_t i) const {
214 return CustomCallSavedXRegs[i];
215 }
216 bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); }
217
218 /// Return true if the CPU supports any kind of instruction fusion.
219 bool hasFusion() const {
220 return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
221 hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() ||
222 hasFuseAdrpAdd() || hasFuseLiterals();
223 }
224
225 unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
226 unsigned getVectorInsertExtractBaseCost() const;
227 unsigned getCacheLineSize() const override { return CacheLineSize; }
228 unsigned getPrefetchDistance() const override { return PrefetchDistance; }
229 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
230 unsigned NumStridedMemAccesses,
231 unsigned NumPrefetches,
232 bool HasCall) const override {
233 return MinPrefetchStride;
234 }
235 unsigned getMaxPrefetchIterationsAhead() const override {
236 return MaxPrefetchIterationsAhead;
237 }
238 Align getPrefFunctionAlignment() const {
239 return PrefFunctionAlignment;
240 }
241 Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
242
243 unsigned getMaxBytesForLoopAlignment() const {
244 return MaxBytesForLoopAlignment;
245 }
246
247 unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
248 unsigned getMinimumJumpTableEntries() const {
249 return MinimumJumpTableEntries;
250 }
251
252 /// CPU has TBI (top byte of addresses is ignored during HW address
253 /// translation) and OS enables it.
254 bool supportsAddressTopByteIgnored() const;
255
256 bool isLittleEndian() const { return IsLittle; }
257
258 bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
259 bool isTargetIOS() const { return TargetTriple.isiOS(); }
260 bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
261 bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
262 bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
263 bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
264 bool isWindowsArm64EC() const { return TargetTriple.isWindowsArm64EC(); }
265
266 bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
267 bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
268 bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
269
270 bool isTargetILP32() const {
271 return TargetTriple.isArch32Bit() ||
272 TargetTriple.getEnvironment() == Triple::GNUILP32;
273 }
274
275 bool useAA() const override;
276
277 bool addrSinkUsingGEPs() const override {
278 // Keeping GEPs inbounds is important for exploiting AArch64
279 // addressing-modes in ILP32 mode.
280 return useAA() || isTargetILP32();
281 }
282
283 bool useSmallAddressing() const {
284 switch (TLInfo.getTargetMachine().getCodeModel()) {
285 case CodeModel::Kernel:
286 // Kernel is currently allowed only for Fuchsia targets,
287 // where it is the same as Small for almost all purposes.
288 case CodeModel::Small:
289 return true;
290 default:
291 return false;
292 }
293 }
294
295 /// ParseSubtargetFeatures - Parses features string setting specified
296 /// subtarget options. Definition of function is auto generated by tblgen.
297 void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
298
299 /// ClassifyGlobalReference - Find the target operand flags that describe
300 /// how a global value should be referenced for the current subtarget.
301 unsigned ClassifyGlobalReference(const GlobalValue *GV,
302 const TargetMachine &TM) const;
303
304 unsigned classifyGlobalFunctionReference(const GlobalValue *GV,
305 const TargetMachine &TM) const;
306
307 /// This function is design to compatible with the function def in other
308 /// targets and escape build error about the virtual function def in base
309 /// class TargetSubtargetInfo. Updeate me if AArch64 target need to use it.
310 unsigned char
311 classifyGlobalFunctionReference(const GlobalValue *GV) const override {
312 return 0;
313 }
314
315 void overrideSchedPolicy(MachineSchedPolicy &Policy,
316 unsigned NumRegionInstrs) const override;
317 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
318 SDep &Dep,
319 const TargetSchedModel *SchedModel) const override;
320
321 bool enableEarlyIfConversion() const override;
322
323 std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
324
325 bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const {
326 switch (CC) {
327 case CallingConv::C:
328 case CallingConv::Fast:
329 case CallingConv::Swift:
330 case CallingConv::SwiftTail:
331 return isTargetWindows();
332 case CallingConv::PreserveNone:
333 return IsVarArg && isTargetWindows();
334 case CallingConv::Win64:
335 return true;
336 default:
337 return false;
338 }
339 }
340
341 /// Return whether FrameLowering should always set the "extended frame
342 /// present" bit in FP, or set it based on a symbol in the runtime.
343 bool swiftAsyncContextIsDynamicallySet() const {
344 // Older OS versions (particularly system unwinders) are confused by the
345 // Swift extended frame, so when building code that might be run on them we
346 // must dynamically query the concurrency library to determine whether
347 // extended frames should be flagged as present.
348 const Triple &TT = getTargetTriple();
349
350 unsigned Major = TT.getOSVersion().getMajor();
351 switch(TT.getOS()) {
352 default:
353 return false;
354 case Triple::IOS:
355 case Triple::TvOS:
356 return Major < 15;
357 case Triple::WatchOS:
358 return Major < 8;
359 case Triple::MacOSX:
360 case Triple::Darwin:
361 return Major < 12;
362 }
363 }
364
365 void mirFileLoaded(MachineFunction &MF) const override;
366
367 // Return the known range for the bit length of SVE data registers. A value
368 // of 0 means nothing is known about that particular limit beyong what's
369 // implied by the architecture.
370 unsigned getMaxSVEVectorSizeInBits() const {
371 assert(isSVEorStreamingSVEAvailable() &&
372 "Tried to get SVE vector length without SVE support!");
373 return MaxSVEVectorSizeInBits;
374 }
375
376 unsigned getMinSVEVectorSizeInBits() const {
377 assert(isSVEorStreamingSVEAvailable() &&
378 "Tried to get SVE vector length without SVE support!");
379 return MinSVEVectorSizeInBits;
380 }
381
382 bool useSVEForFixedLengthVectors() const {
383 if (!isSVEorStreamingSVEAvailable())
384 return false;
385
386 // Prefer NEON unless larger SVE registers are available.
387 return !isNeonAvailable() || getMinSVEVectorSizeInBits() >= 256;
388 }
389
390 bool useSVEForFixedLengthVectors(EVT VT) const {
391 if (!useSVEForFixedLengthVectors() || !VT.isFixedLengthVector())
392 return false;
393 return VT.getFixedSizeInBits() > AArch64::SVEBitsPerBlock ||
394 !isNeonAvailable();
395 }
396
397 unsigned getVScaleForTuning() const { return VScaleForTuning; }
398
399 TailFoldingOpts getSVETailFoldingDefaultOpts() const {
400 return DefaultSVETFOpts;
401 }
402
403 const char* getChkStkName() const {
404 if (isWindowsArm64EC())
405 return "#__chkstk_arm64ec";
406 return "__chkstk";
407 }
408
409 const char* getSecurityCheckCookieName() const {
410 if (isWindowsArm64EC())
411 return "#__security_check_cookie_arm64ec";
412 return "__security_check_cookie";
413 }
414
415 /// Choose a method of checking LR before performing a tail call.
416 AArch64PAuth::AuthCheckMethod
417 getAuthenticatedLRCheckMethod(const MachineFunction &MF) const;
418
419 /// Compute the integer discriminator for a given BlockAddress constant, if
420 /// blockaddress signing is enabled, or std::nullopt otherwise.
421 /// Blockaddress signing is controlled by the function attribute
422 /// "ptrauth-indirect-gotos" on the parent function.
423 /// Note that this assumes the discriminator is independent of the indirect
424 /// goto branch site itself, i.e., it's the same for all BlockAddresses in
425 /// a function.
426 std::optional<uint16_t>
427 getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const;
428
429 const PseudoSourceValue *getAddressCheckPSV() const {
430 return AddressCheckPSV.get();
431 }
432
433private:
434 /// Pseudo value representing memory load performed to check an address.
435 ///
436 /// This load operation is solely used for its side-effects: if the address
437 /// is not mapped (or not readable), it triggers CPU exception, otherwise
438 /// execution proceeds and the value is not used.
439 class AddressCheckPseudoSourceValue : public PseudoSourceValue {
440 public:
441 AddressCheckPseudoSourceValue(const TargetMachine &TM)
442 : PseudoSourceValue(TargetCustom, TM) {}
443
444 bool isConstant(const MachineFrameInfo *) const override { return false; }
445 bool isAliased(const MachineFrameInfo *) const override { return true; }
446 bool mayAlias(const MachineFrameInfo *) const override { return true; }
447 void printCustom(raw_ostream &OS) const override { OS << "AddressCheck"; }
448 };
449
450 std::unique_ptr<AddressCheckPseudoSourceValue> AddressCheckPSV;
451};
452} // End llvm namespace
453
454#endif
455