1 | //===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file declares the AArch64 specific subclass of TargetSubtarget. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H |
14 | #define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H |
15 | |
16 | #include "AArch64FrameLowering.h" |
17 | #include "AArch64ISelLowering.h" |
18 | #include "AArch64InstrInfo.h" |
19 | #include "AArch64PointerAuth.h" |
20 | #include "AArch64RegisterInfo.h" |
21 | #include "AArch64SelectionDAGInfo.h" |
22 | #include "llvm/CodeGen/GlobalISel/CallLowering.h" |
23 | #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" |
24 | #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" |
25 | #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" |
26 | #include "llvm/CodeGen/RegisterBankInfo.h" |
27 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
28 | #include "llvm/IR/DataLayout.h" |
29 | #include "llvm/TargetParser/Triple.h" |
30 | |
31 | #define |
32 | #include "AArch64GenSubtargetInfo.inc" |
33 | |
34 | namespace llvm { |
35 | class GlobalValue; |
36 | class StringRef; |
37 | |
38 | class AArch64Subtarget final : public AArch64GenSubtargetInfo { |
39 | public: |
40 | enum ARMProcFamilyEnum : uint8_t { |
41 | Generic, |
42 | #define ARM_PROCESSOR_FAMILY(ENUM) ENUM, |
43 | #include "llvm/TargetParser/AArch64TargetParserDef.inc" |
44 | #undef ARM_PROCESSOR_FAMILY |
45 | }; |
46 | |
47 | protected: |
48 | /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. |
49 | ARMProcFamilyEnum ARMProcFamily = Generic; |
50 | |
51 | // Enable 64-bit vectorization in SLP. |
52 | unsigned MinVectorRegisterBitWidth = 64; |
53 | |
54 | // Bool members corresponding to the SubtargetFeatures defined in tablegen |
55 | #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ |
56 | bool ATTRIBUTE = DEFAULT; |
57 | #include "AArch64GenSubtargetInfo.inc" |
58 | |
59 | unsigned EpilogueVectorizationMinVF = 16; |
60 | uint8_t MaxInterleaveFactor = 2; |
61 | uint8_t = 2; |
62 | uint16_t CacheLineSize = 0; |
63 | // Default scatter/gather overhead. |
64 | unsigned ScatterOverhead = 10; |
65 | unsigned GatherOverhead = 10; |
66 | uint16_t PrefetchDistance = 0; |
67 | uint16_t MinPrefetchStride = 1; |
68 | unsigned MaxPrefetchIterationsAhead = UINT_MAX; |
69 | Align PrefFunctionAlignment; |
70 | Align PrefLoopAlignment; |
71 | unsigned MaxBytesForLoopAlignment = 0; |
72 | unsigned MinimumJumpTableEntries = 4; |
73 | unsigned MaxJumpTableSize = 0; |
74 | |
75 | // ReserveXRegister[i] - X#i is not available as a general purpose register. |
76 | BitVector ReserveXRegister; |
77 | |
78 | // ReserveXRegisterForRA[i] - X#i is not available for register allocator. |
79 | BitVector ReserveXRegisterForRA; |
80 | |
81 | // CustomCallUsedXRegister[i] - X#i call saved. |
82 | BitVector CustomCallSavedXRegs; |
83 | |
84 | bool IsLittle; |
85 | |
86 | bool IsStreaming; |
87 | bool IsStreamingCompatible; |
88 | std::optional<unsigned> StreamingHazardSize; |
89 | unsigned MinSVEVectorSizeInBits; |
90 | unsigned MaxSVEVectorSizeInBits; |
91 | unsigned VScaleForTuning = 1; |
92 | TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled; |
93 | |
94 | bool EnableSubregLiveness; |
95 | |
96 | /// TargetTriple - What processor and OS we're targeting. |
97 | Triple TargetTriple; |
98 | |
99 | AArch64FrameLowering FrameLowering; |
100 | AArch64InstrInfo InstrInfo; |
101 | AArch64SelectionDAGInfo TSInfo; |
102 | AArch64TargetLowering TLInfo; |
103 | |
104 | /// GlobalISel related APIs. |
105 | std::unique_ptr<CallLowering> CallLoweringInfo; |
106 | std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo; |
107 | std::unique_ptr<InstructionSelector> InstSelector; |
108 | std::unique_ptr<LegalizerInfo> Legalizer; |
109 | std::unique_ptr<RegisterBankInfo> RegBankInfo; |
110 | |
111 | private: |
112 | /// initializeSubtargetDependencies - Initializes using CPUString and the |
113 | /// passed in feature string so that we can use initializer lists for |
114 | /// subtarget initialization. |
115 | AArch64Subtarget &initializeSubtargetDependencies(StringRef FS, |
116 | StringRef CPUString, |
117 | StringRef TuneCPUString, |
118 | bool HasMinSize); |
119 | |
120 | /// Initialize properties based on the selected processor family. |
121 | void initializeProperties(bool HasMinSize); |
122 | |
123 | public: |
124 | /// This constructor initializes the data members to match that |
125 | /// of the specified triple. |
126 | AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, |
127 | StringRef FS, const TargetMachine &TM, bool LittleEndian, |
128 | unsigned MinSVEVectorSizeInBitsOverride = 0, |
129 | unsigned MaxSVEVectorSizeInBitsOverride = 0, |
130 | bool IsStreaming = false, bool IsStreamingCompatible = false, |
131 | bool HasMinSize = false); |
132 | |
133 | virtual unsigned getHwModeSet() const override; |
134 | |
135 | // Getters for SubtargetFeatures defined in tablegen |
136 | #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ |
137 | bool GETTER() const { return ATTRIBUTE; } |
138 | #include "AArch64GenSubtargetInfo.inc" |
139 | |
140 | const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { |
141 | return &TSInfo; |
142 | } |
143 | const AArch64FrameLowering *getFrameLowering() const override { |
144 | return &FrameLowering; |
145 | } |
146 | const AArch64TargetLowering *getTargetLowering() const override { |
147 | return &TLInfo; |
148 | } |
149 | const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; } |
150 | const AArch64RegisterInfo *getRegisterInfo() const override { |
151 | return &getInstrInfo()->getRegisterInfo(); |
152 | } |
153 | const CallLowering *getCallLowering() const override; |
154 | const InlineAsmLowering *getInlineAsmLowering() const override; |
155 | InstructionSelector *getInstructionSelector() const override; |
156 | const LegalizerInfo *getLegalizerInfo() const override; |
157 | const RegisterBankInfo *getRegBankInfo() const override; |
158 | const Triple &getTargetTriple() const { return TargetTriple; } |
159 | bool enableMachineScheduler() const override { return true; } |
160 | bool enablePostRAScheduler() const override { return usePostRAScheduler(); } |
161 | bool enableSubRegLiveness() const override { return EnableSubregLiveness; } |
162 | |
163 | bool enableMachinePipeliner() const override; |
164 | bool useDFAforSMS() const override { return false; } |
165 | |
166 | /// Returns ARM processor family. |
167 | /// Avoid this function! CPU specifics should be kept local to this class |
168 | /// and preferably modeled with SubtargetFeatures or properties in |
169 | /// initializeProperties(). |
170 | ARMProcFamilyEnum getProcFamily() const { |
171 | return ARMProcFamily; |
172 | } |
173 | |
174 | bool isXRaySupported() const override { return true; } |
175 | |
176 | /// Returns true if the function has a streaming body. |
177 | bool isStreaming() const { return IsStreaming; } |
178 | |
179 | /// Returns true if the function has a streaming-compatible body. |
180 | bool isStreamingCompatible() const { return IsStreamingCompatible; } |
181 | |
182 | /// Returns the size of memory region that if accessed by both the CPU and |
183 | /// the SME unit could result in a hazard. 0 = disabled. |
184 | unsigned getStreamingHazardSize() const { |
185 | return StreamingHazardSize.value_or( |
186 | u: !hasSMEFA64() && hasSME() && hasSVE() ? 1024 : 0); |
187 | } |
188 | |
189 | /// Returns true if the target has NEON and the function at runtime is known |
190 | /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE |
191 | /// mode, which disables NEON instructions). |
192 | bool isNeonAvailable() const { |
193 | return hasNEON() && |
194 | (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible())); |
195 | } |
196 | |
197 | /// Returns true if the target has SVE and can use the full range of SVE |
198 | /// instructions, for example because it knows the function is known not to be |
199 | /// in streaming-SVE mode or when the target has FEAT_FA64 enabled. |
200 | bool isSVEAvailable() const { |
201 | return hasSVE() && |
202 | (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible())); |
203 | } |
204 | |
205 | /// Returns true if the target has access to the streaming-compatible subset |
206 | /// of SVE instructions. |
207 | bool isStreamingSVEAvailable() const { return hasSME() && isStreaming(); } |
208 | |
209 | /// Returns true if the target has access to either the full range of SVE |
210 | /// instructions, or the streaming-compatible subset of SVE instructions. |
211 | bool isSVEorStreamingSVEAvailable() const { |
212 | return hasSVE() || isStreamingSVEAvailable(); |
213 | } |
214 | |
215 | unsigned getMinVectorRegisterBitWidth() const { |
216 | // Don't assume any minimum vector size when PSTATE.SM may not be 0, because |
217 | // we don't yet support streaming-compatible codegen support that we trust |
218 | // is safe for functions that may be executed in streaming-SVE mode. |
219 | // By returning '0' here, we disable vectorization. |
220 | if (!isSVEAvailable() && !isNeonAvailable()) |
221 | return 0; |
222 | return MinVectorRegisterBitWidth; |
223 | } |
224 | |
225 | bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; } |
226 | bool isXRegisterReservedForRA(size_t i) const { return ReserveXRegisterForRA[i]; } |
227 | unsigned getNumXRegisterReserved() const { |
228 | BitVector AllReservedX(AArch64::GPR64commonRegClass.getNumRegs()); |
229 | AllReservedX |= ReserveXRegister; |
230 | AllReservedX |= ReserveXRegisterForRA; |
231 | return AllReservedX.count(); |
232 | } |
233 | bool isLRReservedForRA() const { return ReserveLRForRA; } |
234 | bool isXRegCustomCalleeSaved(size_t i) const { |
235 | return CustomCallSavedXRegs[i]; |
236 | } |
237 | bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); } |
238 | |
239 | /// Return true if the CPU supports any kind of instruction fusion. |
240 | bool hasFusion() const { |
241 | return hasArithmeticBccFusion() || hasArithmeticCbzFusion() || |
242 | hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() || |
243 | hasFuseAdrpAdd() || hasFuseLiterals(); |
244 | } |
245 | |
246 | unsigned getEpilogueVectorizationMinVF() const { |
247 | return EpilogueVectorizationMinVF; |
248 | } |
249 | unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } |
250 | unsigned () const; |
251 | unsigned getCacheLineSize() const override { return CacheLineSize; } |
252 | unsigned getScatterOverhead() const { return ScatterOverhead; } |
253 | unsigned getGatherOverhead() const { return GatherOverhead; } |
254 | unsigned getPrefetchDistance() const override { return PrefetchDistance; } |
255 | unsigned getMinPrefetchStride(unsigned NumMemAccesses, |
256 | unsigned NumStridedMemAccesses, |
257 | unsigned NumPrefetches, |
258 | bool HasCall) const override { |
259 | return MinPrefetchStride; |
260 | } |
261 | unsigned getMaxPrefetchIterationsAhead() const override { |
262 | return MaxPrefetchIterationsAhead; |
263 | } |
264 | Align getPrefFunctionAlignment() const { |
265 | return PrefFunctionAlignment; |
266 | } |
267 | Align getPrefLoopAlignment() const { return PrefLoopAlignment; } |
268 | |
269 | unsigned getMaxBytesForLoopAlignment() const { |
270 | return MaxBytesForLoopAlignment; |
271 | } |
272 | |
273 | unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; } |
274 | unsigned getMinimumJumpTableEntries() const { |
275 | return MinimumJumpTableEntries; |
276 | } |
277 | |
278 | /// CPU has TBI (top byte of addresses is ignored during HW address |
279 | /// translation) and OS enables it. |
280 | bool supportsAddressTopByteIgnored() const; |
281 | |
282 | bool isLittleEndian() const { return IsLittle; } |
283 | |
284 | bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } |
285 | bool isTargetIOS() const { return TargetTriple.isiOS(); } |
286 | bool isTargetLinux() const { return TargetTriple.isOSLinux(); } |
287 | bool isTargetWindows() const { return TargetTriple.isOSWindows(); } |
288 | bool isTargetAndroid() const { return TargetTriple.isAndroid(); } |
289 | bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } |
290 | bool isWindowsArm64EC() const { return TargetTriple.isWindowsArm64EC(); } |
291 | |
292 | bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } |
293 | bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } |
294 | bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } |
295 | |
296 | bool isTargetILP32() const { |
297 | return TargetTriple.isArch32Bit() || |
298 | TargetTriple.getEnvironment() == Triple::GNUILP32; |
299 | } |
300 | |
301 | bool useAA() const override; |
302 | |
303 | bool addrSinkUsingGEPs() const override { |
304 | // Keeping GEPs inbounds is important for exploiting AArch64 |
305 | // addressing-modes in ILP32 mode. |
306 | return useAA() || isTargetILP32(); |
307 | } |
308 | |
309 | bool useSmallAddressing() const { |
310 | switch (TLInfo.getTargetMachine().getCodeModel()) { |
311 | case CodeModel::Kernel: |
312 | // Kernel is currently allowed only for Fuchsia targets, |
313 | // where it is the same as Small for almost all purposes. |
314 | case CodeModel::Small: |
315 | return true; |
316 | default: |
317 | return false; |
318 | } |
319 | } |
320 | |
321 | /// ParseSubtargetFeatures - Parses features string setting specified |
322 | /// subtarget options. Definition of function is auto generated by tblgen. |
323 | void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); |
324 | |
325 | /// ClassifyGlobalReference - Find the target operand flags that describe |
326 | /// how a global value should be referenced for the current subtarget. |
327 | unsigned ClassifyGlobalReference(const GlobalValue *GV, |
328 | const TargetMachine &TM) const; |
329 | |
330 | unsigned classifyGlobalFunctionReference(const GlobalValue *GV, |
331 | const TargetMachine &TM) const; |
332 | |
333 | /// This function is design to compatible with the function def in other |
334 | /// targets and escape build error about the virtual function def in base |
335 | /// class TargetSubtargetInfo. Updeate me if AArch64 target need to use it. |
336 | unsigned char |
337 | classifyGlobalFunctionReference(const GlobalValue *GV) const override { |
338 | return 0; |
339 | } |
340 | |
341 | void overrideSchedPolicy(MachineSchedPolicy &Policy, |
342 | unsigned NumRegionInstrs) const override; |
343 | void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, |
344 | SDep &Dep, |
345 | const TargetSchedModel *SchedModel) const override; |
346 | |
347 | bool enableEarlyIfConversion() const override; |
348 | |
349 | std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override; |
350 | |
351 | bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const { |
352 | switch (CC) { |
353 | case CallingConv::C: |
354 | case CallingConv::Fast: |
355 | case CallingConv::Swift: |
356 | case CallingConv::SwiftTail: |
357 | return isTargetWindows(); |
358 | case CallingConv::PreserveNone: |
359 | return IsVarArg && isTargetWindows(); |
360 | case CallingConv::Win64: |
361 | return true; |
362 | default: |
363 | return false; |
364 | } |
365 | } |
366 | |
367 | /// Return whether FrameLowering should always set the "extended frame |
368 | /// present" bit in FP, or set it based on a symbol in the runtime. |
369 | bool swiftAsyncContextIsDynamicallySet() const { |
370 | // Older OS versions (particularly system unwinders) are confused by the |
371 | // Swift extended frame, so when building code that might be run on them we |
372 | // must dynamically query the concurrency library to determine whether |
373 | // extended frames should be flagged as present. |
374 | const Triple &TT = getTargetTriple(); |
375 | |
376 | unsigned Major = TT.getOSVersion().getMajor(); |
377 | switch(TT.getOS()) { |
378 | default: |
379 | return false; |
380 | case Triple::IOS: |
381 | case Triple::TvOS: |
382 | return Major < 15; |
383 | case Triple::WatchOS: |
384 | return Major < 8; |
385 | case Triple::MacOSX: |
386 | case Triple::Darwin: |
387 | return Major < 12; |
388 | } |
389 | } |
390 | |
391 | void mirFileLoaded(MachineFunction &MF) const override; |
392 | |
393 | // Return the known range for the bit length of SVE data registers. A value |
394 | // of 0 means nothing is known about that particular limit beyond what's |
395 | // implied by the architecture. |
396 | unsigned getMaxSVEVectorSizeInBits() const { |
397 | assert(isSVEorStreamingSVEAvailable() && |
398 | "Tried to get SVE vector length without SVE support!" ); |
399 | return MaxSVEVectorSizeInBits; |
400 | } |
401 | |
402 | unsigned getMinSVEVectorSizeInBits() const { |
403 | assert(isSVEorStreamingSVEAvailable() && |
404 | "Tried to get SVE vector length without SVE support!" ); |
405 | return MinSVEVectorSizeInBits; |
406 | } |
407 | |
408 | // Return the known bit length of SVE data registers. A value of 0 means the |
409 | // length is unknown beyond what's implied by the architecture. |
410 | unsigned getSVEVectorSizeInBits() const { |
411 | assert(isSVEorStreamingSVEAvailable() && |
412 | "Tried to get SVE vector length without SVE support!" ); |
413 | if (MinSVEVectorSizeInBits == MaxSVEVectorSizeInBits) |
414 | return MaxSVEVectorSizeInBits; |
415 | return 0; |
416 | } |
417 | |
418 | bool useSVEForFixedLengthVectors() const { |
419 | if (!isSVEorStreamingSVEAvailable()) |
420 | return false; |
421 | |
422 | // Prefer NEON unless larger SVE registers are available. |
423 | return !isNeonAvailable() || getMinSVEVectorSizeInBits() >= 256; |
424 | } |
425 | |
426 | bool useSVEForFixedLengthVectors(EVT VT) const { |
427 | if (!useSVEForFixedLengthVectors() || !VT.isFixedLengthVector()) |
428 | return false; |
429 | return VT.getFixedSizeInBits() > AArch64::SVEBitsPerBlock || |
430 | !isNeonAvailable(); |
431 | } |
432 | |
433 | unsigned getVScaleForTuning() const { return VScaleForTuning; } |
434 | |
435 | TailFoldingOpts getSVETailFoldingDefaultOpts() const { |
436 | return DefaultSVETFOpts; |
437 | } |
438 | |
439 | /// Returns true to use the addvl/inc/dec instructions, as opposed to separate |
440 | /// add + cnt instructions. |
441 | bool useScalarIncVL() const; |
442 | |
443 | const char* getChkStkName() const { |
444 | if (isWindowsArm64EC()) |
445 | return "#__chkstk_arm64ec" ; |
446 | return "__chkstk" ; |
447 | } |
448 | |
449 | const char* getSecurityCheckCookieName() const { |
450 | if (isWindowsArm64EC()) |
451 | return "#__security_check_cookie_arm64ec" ; |
452 | return "__security_check_cookie" ; |
453 | } |
454 | |
455 | /// Choose a method of checking LR before performing a tail call. |
456 | AArch64PAuth::AuthCheckMethod |
457 | getAuthenticatedLRCheckMethod(const MachineFunction &MF) const; |
458 | |
459 | /// Compute the integer discriminator for a given BlockAddress constant, if |
460 | /// blockaddress signing is enabled, or std::nullopt otherwise. |
461 | /// Blockaddress signing is controlled by the function attribute |
462 | /// "ptrauth-indirect-gotos" on the parent function. |
463 | /// Note that this assumes the discriminator is independent of the indirect |
464 | /// goto branch site itself, i.e., it's the same for all BlockAddresses in |
465 | /// a function. |
466 | std::optional<uint16_t> |
467 | getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const; |
468 | }; |
469 | } // End llvm namespace |
470 | |
471 | #endif |
472 | |