1 | //===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file declares the AArch64 specific subclass of TargetSubtarget. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H |
14 | #define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H |
15 | |
16 | #include "AArch64FrameLowering.h" |
17 | #include "AArch64ISelLowering.h" |
18 | #include "AArch64InstrInfo.h" |
19 | #include "AArch64PointerAuth.h" |
20 | #include "AArch64RegisterInfo.h" |
21 | #include "AArch64SelectionDAGInfo.h" |
22 | #include "llvm/CodeGen/GlobalISel/CallLowering.h" |
23 | #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" |
24 | #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" |
25 | #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" |
26 | #include "llvm/CodeGen/RegisterBankInfo.h" |
27 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
28 | #include "llvm/IR/DataLayout.h" |
29 | |
30 | #define |
31 | #include "AArch64GenSubtargetInfo.inc" |
32 | |
33 | namespace llvm { |
34 | class GlobalValue; |
35 | class StringRef; |
36 | class Triple; |
37 | |
38 | class AArch64Subtarget final : public AArch64GenSubtargetInfo { |
39 | public: |
40 | enum ARMProcFamilyEnum : uint8_t { |
41 | Others, |
42 | #define ARM_PROCESSOR_FAMILY(ENUM) ENUM, |
43 | #include "llvm/TargetParser/AArch64TargetParserDef.inc" |
44 | #undef ARM_PROCESSOR_FAMILY |
45 | }; |
46 | |
47 | protected: |
48 | /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. |
49 | ARMProcFamilyEnum ARMProcFamily = Others; |
50 | |
51 | // Enable 64-bit vectorization in SLP. |
52 | unsigned MinVectorRegisterBitWidth = 64; |
53 | |
54 | // Bool members corresponding to the SubtargetFeatures defined in tablegen |
55 | #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ |
56 | bool ATTRIBUTE = DEFAULT; |
57 | #include "AArch64GenSubtargetInfo.inc" |
58 | |
59 | uint8_t MaxInterleaveFactor = 2; |
60 | uint8_t = 2; |
61 | uint16_t CacheLineSize = 0; |
62 | uint16_t PrefetchDistance = 0; |
63 | uint16_t MinPrefetchStride = 1; |
64 | unsigned MaxPrefetchIterationsAhead = UINT_MAX; |
65 | Align PrefFunctionAlignment; |
66 | Align PrefLoopAlignment; |
67 | unsigned MaxBytesForLoopAlignment = 0; |
68 | unsigned MinimumJumpTableEntries = 4; |
69 | unsigned MaxJumpTableSize = 0; |
70 | |
71 | // ReserveXRegister[i] - X#i is not available as a general purpose register. |
72 | BitVector ReserveXRegister; |
73 | |
74 | // ReserveXRegisterForRA[i] - X#i is not available for register allocator. |
75 | BitVector ReserveXRegisterForRA; |
76 | |
77 | // CustomCallUsedXRegister[i] - X#i call saved. |
78 | BitVector CustomCallSavedXRegs; |
79 | |
80 | bool IsLittle; |
81 | |
82 | bool IsStreaming; |
83 | bool IsStreamingCompatible; |
84 | unsigned MinSVEVectorSizeInBits; |
85 | unsigned MaxSVEVectorSizeInBits; |
86 | unsigned VScaleForTuning = 2; |
87 | TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled; |
88 | |
89 | /// TargetTriple - What processor and OS we're targeting. |
90 | Triple TargetTriple; |
91 | |
92 | AArch64FrameLowering FrameLowering; |
93 | AArch64InstrInfo InstrInfo; |
94 | AArch64SelectionDAGInfo TSInfo; |
95 | AArch64TargetLowering TLInfo; |
96 | |
97 | /// GlobalISel related APIs. |
98 | std::unique_ptr<CallLowering> CallLoweringInfo; |
99 | std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo; |
100 | std::unique_ptr<InstructionSelector> InstSelector; |
101 | std::unique_ptr<LegalizerInfo> Legalizer; |
102 | std::unique_ptr<RegisterBankInfo> RegBankInfo; |
103 | |
104 | private: |
105 | /// initializeSubtargetDependencies - Initializes using CPUString and the |
106 | /// passed in feature string so that we can use initializer lists for |
107 | /// subtarget initialization. |
108 | AArch64Subtarget &initializeSubtargetDependencies(StringRef FS, |
109 | StringRef CPUString, |
110 | StringRef TuneCPUString, |
111 | bool HasMinSize); |
112 | |
113 | /// Initialize properties based on the selected processor family. |
114 | void initializeProperties(bool HasMinSize); |
115 | |
116 | public: |
117 | /// This constructor initializes the data members to match that |
118 | /// of the specified triple. |
119 | AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, |
120 | StringRef FS, const TargetMachine &TM, bool LittleEndian, |
121 | unsigned MinSVEVectorSizeInBitsOverride = 0, |
122 | unsigned MaxSVEVectorSizeInBitsOverride = 0, |
123 | bool IsStreaming = false, bool IsStreamingCompatible = false, |
124 | bool HasMinSize = false); |
125 | |
126 | // Getters for SubtargetFeatures defined in tablegen |
127 | #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ |
128 | bool GETTER() const { return ATTRIBUTE; } |
129 | #include "AArch64GenSubtargetInfo.inc" |
130 | |
131 | const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { |
132 | return &TSInfo; |
133 | } |
134 | const AArch64FrameLowering *getFrameLowering() const override { |
135 | return &FrameLowering; |
136 | } |
137 | const AArch64TargetLowering *getTargetLowering() const override { |
138 | return &TLInfo; |
139 | } |
140 | const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; } |
141 | const AArch64RegisterInfo *getRegisterInfo() const override { |
142 | return &getInstrInfo()->getRegisterInfo(); |
143 | } |
144 | const CallLowering *getCallLowering() const override; |
145 | const InlineAsmLowering *getInlineAsmLowering() const override; |
146 | InstructionSelector *getInstructionSelector() const override; |
147 | const LegalizerInfo *getLegalizerInfo() const override; |
148 | const RegisterBankInfo *getRegBankInfo() const override; |
149 | const Triple &getTargetTriple() const { return TargetTriple; } |
150 | bool enableMachineScheduler() const override { return true; } |
151 | bool enablePostRAScheduler() const override { return usePostRAScheduler(); } |
152 | |
153 | bool enableMachinePipeliner() const override; |
154 | bool useDFAforSMS() const override { return false; } |
155 | |
156 | /// Returns ARM processor family. |
157 | /// Avoid this function! CPU specifics should be kept local to this class |
158 | /// and preferably modeled with SubtargetFeatures or properties in |
159 | /// initializeProperties(). |
160 | ARMProcFamilyEnum getProcFamily() const { |
161 | return ARMProcFamily; |
162 | } |
163 | |
164 | bool isXRaySupported() const override { return true; } |
165 | |
166 | /// Returns true if the function has a streaming body. |
167 | bool isStreaming() const { return IsStreaming; } |
168 | |
169 | /// Returns true if the function has a streaming-compatible body. |
170 | bool isStreamingCompatible() const { return IsStreamingCompatible; } |
171 | |
172 | /// Returns true if the target has NEON and the function at runtime is known |
173 | /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE |
174 | /// mode, which disables NEON instructions). |
175 | bool isNeonAvailable() const { |
176 | return hasNEON() && |
177 | (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible())); |
178 | } |
179 | |
180 | /// Returns true if the target has SVE and can use the full range of SVE |
181 | /// instructions, for example because it knows the function is known not to be |
182 | /// in streaming-SVE mode or when the target has FEAT_FA64 enabled. |
183 | bool isSVEAvailable() const { |
184 | return hasSVE() && |
185 | (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible())); |
186 | } |
187 | |
188 | /// Returns true if the target has access to either the full range of SVE instructions, |
189 | /// or the streaming-compatible subset of SVE instructions. |
190 | bool isSVEorStreamingSVEAvailable() const { |
191 | return hasSVE() || (hasSME() && isStreaming()); |
192 | } |
193 | |
194 | unsigned getMinVectorRegisterBitWidth() const { |
195 | // Don't assume any minimum vector size when PSTATE.SM may not be 0, because |
196 | // we don't yet support streaming-compatible codegen support that we trust |
197 | // is safe for functions that may be executed in streaming-SVE mode. |
198 | // By returning '0' here, we disable vectorization. |
199 | if (!isSVEAvailable() && !isNeonAvailable()) |
200 | return 0; |
201 | return MinVectorRegisterBitWidth; |
202 | } |
203 | |
204 | bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; } |
205 | bool isXRegisterReservedForRA(size_t i) const { return ReserveXRegisterForRA[i]; } |
206 | unsigned getNumXRegisterReserved() const { |
207 | BitVector AllReservedX(AArch64::GPR64commonRegClass.getNumRegs()); |
208 | AllReservedX |= ReserveXRegister; |
209 | AllReservedX |= ReserveXRegisterForRA; |
210 | return AllReservedX.count(); |
211 | } |
212 | bool isLRReservedForRA() const { return ReserveLRForRA; } |
213 | bool isXRegCustomCalleeSaved(size_t i) const { |
214 | return CustomCallSavedXRegs[i]; |
215 | } |
216 | bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); } |
217 | |
218 | /// Return true if the CPU supports any kind of instruction fusion. |
219 | bool hasFusion() const { |
220 | return hasArithmeticBccFusion() || hasArithmeticCbzFusion() || |
221 | hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() || |
222 | hasFuseAdrpAdd() || hasFuseLiterals(); |
223 | } |
224 | |
225 | unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } |
226 | unsigned () const; |
227 | unsigned getCacheLineSize() const override { return CacheLineSize; } |
228 | unsigned getPrefetchDistance() const override { return PrefetchDistance; } |
229 | unsigned getMinPrefetchStride(unsigned NumMemAccesses, |
230 | unsigned NumStridedMemAccesses, |
231 | unsigned NumPrefetches, |
232 | bool HasCall) const override { |
233 | return MinPrefetchStride; |
234 | } |
235 | unsigned getMaxPrefetchIterationsAhead() const override { |
236 | return MaxPrefetchIterationsAhead; |
237 | } |
238 | Align getPrefFunctionAlignment() const { |
239 | return PrefFunctionAlignment; |
240 | } |
241 | Align getPrefLoopAlignment() const { return PrefLoopAlignment; } |
242 | |
243 | unsigned getMaxBytesForLoopAlignment() const { |
244 | return MaxBytesForLoopAlignment; |
245 | } |
246 | |
247 | unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; } |
248 | unsigned getMinimumJumpTableEntries() const { |
249 | return MinimumJumpTableEntries; |
250 | } |
251 | |
252 | /// CPU has TBI (top byte of addresses is ignored during HW address |
253 | /// translation) and OS enables it. |
254 | bool supportsAddressTopByteIgnored() const; |
255 | |
256 | bool isLittleEndian() const { return IsLittle; } |
257 | |
258 | bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } |
259 | bool isTargetIOS() const { return TargetTriple.isiOS(); } |
260 | bool isTargetLinux() const { return TargetTriple.isOSLinux(); } |
261 | bool isTargetWindows() const { return TargetTriple.isOSWindows(); } |
262 | bool isTargetAndroid() const { return TargetTriple.isAndroid(); } |
263 | bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } |
264 | bool isWindowsArm64EC() const { return TargetTriple.isWindowsArm64EC(); } |
265 | |
266 | bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } |
267 | bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } |
268 | bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } |
269 | |
270 | bool isTargetILP32() const { |
271 | return TargetTriple.isArch32Bit() || |
272 | TargetTriple.getEnvironment() == Triple::GNUILP32; |
273 | } |
274 | |
275 | bool useAA() const override; |
276 | |
277 | bool addrSinkUsingGEPs() const override { |
278 | // Keeping GEPs inbounds is important for exploiting AArch64 |
279 | // addressing-modes in ILP32 mode. |
280 | return useAA() || isTargetILP32(); |
281 | } |
282 | |
283 | bool useSmallAddressing() const { |
284 | switch (TLInfo.getTargetMachine().getCodeModel()) { |
285 | case CodeModel::Kernel: |
286 | // Kernel is currently allowed only for Fuchsia targets, |
287 | // where it is the same as Small for almost all purposes. |
288 | case CodeModel::Small: |
289 | return true; |
290 | default: |
291 | return false; |
292 | } |
293 | } |
294 | |
295 | /// ParseSubtargetFeatures - Parses features string setting specified |
296 | /// subtarget options. Definition of function is auto generated by tblgen. |
297 | void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); |
298 | |
299 | /// ClassifyGlobalReference - Find the target operand flags that describe |
300 | /// how a global value should be referenced for the current subtarget. |
301 | unsigned ClassifyGlobalReference(const GlobalValue *GV, |
302 | const TargetMachine &TM) const; |
303 | |
304 | unsigned classifyGlobalFunctionReference(const GlobalValue *GV, |
305 | const TargetMachine &TM) const; |
306 | |
307 | /// This function is design to compatible with the function def in other |
308 | /// targets and escape build error about the virtual function def in base |
309 | /// class TargetSubtargetInfo. Updeate me if AArch64 target need to use it. |
310 | unsigned char |
311 | classifyGlobalFunctionReference(const GlobalValue *GV) const override { |
312 | return 0; |
313 | } |
314 | |
315 | void overrideSchedPolicy(MachineSchedPolicy &Policy, |
316 | unsigned NumRegionInstrs) const override; |
317 | void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, |
318 | SDep &Dep, |
319 | const TargetSchedModel *SchedModel) const override; |
320 | |
321 | bool enableEarlyIfConversion() const override; |
322 | |
323 | std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override; |
324 | |
325 | bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const { |
326 | switch (CC) { |
327 | case CallingConv::C: |
328 | case CallingConv::Fast: |
329 | case CallingConv::Swift: |
330 | case CallingConv::SwiftTail: |
331 | return isTargetWindows(); |
332 | case CallingConv::PreserveNone: |
333 | return IsVarArg && isTargetWindows(); |
334 | case CallingConv::Win64: |
335 | return true; |
336 | default: |
337 | return false; |
338 | } |
339 | } |
340 | |
341 | /// Return whether FrameLowering should always set the "extended frame |
342 | /// present" bit in FP, or set it based on a symbol in the runtime. |
343 | bool swiftAsyncContextIsDynamicallySet() const { |
344 | // Older OS versions (particularly system unwinders) are confused by the |
345 | // Swift extended frame, so when building code that might be run on them we |
346 | // must dynamically query the concurrency library to determine whether |
347 | // extended frames should be flagged as present. |
348 | const Triple &TT = getTargetTriple(); |
349 | |
350 | unsigned Major = TT.getOSVersion().getMajor(); |
351 | switch(TT.getOS()) { |
352 | default: |
353 | return false; |
354 | case Triple::IOS: |
355 | case Triple::TvOS: |
356 | return Major < 15; |
357 | case Triple::WatchOS: |
358 | return Major < 8; |
359 | case Triple::MacOSX: |
360 | case Triple::Darwin: |
361 | return Major < 12; |
362 | } |
363 | } |
364 | |
365 | void mirFileLoaded(MachineFunction &MF) const override; |
366 | |
367 | // Return the known range for the bit length of SVE data registers. A value |
368 | // of 0 means nothing is known about that particular limit beyong what's |
369 | // implied by the architecture. |
370 | unsigned getMaxSVEVectorSizeInBits() const { |
371 | assert(isSVEorStreamingSVEAvailable() && |
372 | "Tried to get SVE vector length without SVE support!" ); |
373 | return MaxSVEVectorSizeInBits; |
374 | } |
375 | |
376 | unsigned getMinSVEVectorSizeInBits() const { |
377 | assert(isSVEorStreamingSVEAvailable() && |
378 | "Tried to get SVE vector length without SVE support!" ); |
379 | return MinSVEVectorSizeInBits; |
380 | } |
381 | |
382 | bool useSVEForFixedLengthVectors() const { |
383 | if (!isSVEorStreamingSVEAvailable()) |
384 | return false; |
385 | |
386 | // Prefer NEON unless larger SVE registers are available. |
387 | return !isNeonAvailable() || getMinSVEVectorSizeInBits() >= 256; |
388 | } |
389 | |
390 | bool useSVEForFixedLengthVectors(EVT VT) const { |
391 | if (!useSVEForFixedLengthVectors() || !VT.isFixedLengthVector()) |
392 | return false; |
393 | return VT.getFixedSizeInBits() > AArch64::SVEBitsPerBlock || |
394 | !isNeonAvailable(); |
395 | } |
396 | |
397 | unsigned getVScaleForTuning() const { return VScaleForTuning; } |
398 | |
399 | TailFoldingOpts getSVETailFoldingDefaultOpts() const { |
400 | return DefaultSVETFOpts; |
401 | } |
402 | |
403 | const char* getChkStkName() const { |
404 | if (isWindowsArm64EC()) |
405 | return "#__chkstk_arm64ec" ; |
406 | return "__chkstk" ; |
407 | } |
408 | |
409 | const char* getSecurityCheckCookieName() const { |
410 | if (isWindowsArm64EC()) |
411 | return "#__security_check_cookie_arm64ec" ; |
412 | return "__security_check_cookie" ; |
413 | } |
414 | |
415 | /// Choose a method of checking LR before performing a tail call. |
416 | AArch64PAuth::AuthCheckMethod |
417 | getAuthenticatedLRCheckMethod(const MachineFunction &MF) const; |
418 | |
419 | /// Compute the integer discriminator for a given BlockAddress constant, if |
420 | /// blockaddress signing is enabled, or std::nullopt otherwise. |
421 | /// Blockaddress signing is controlled by the function attribute |
422 | /// "ptrauth-indirect-gotos" on the parent function. |
423 | /// Note that this assumes the discriminator is independent of the indirect |
424 | /// goto branch site itself, i.e., it's the same for all BlockAddresses in |
425 | /// a function. |
426 | std::optional<uint16_t> |
427 | getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const; |
428 | |
429 | const PseudoSourceValue *getAddressCheckPSV() const { |
430 | return AddressCheckPSV.get(); |
431 | } |
432 | |
433 | private: |
434 | /// Pseudo value representing memory load performed to check an address. |
435 | /// |
436 | /// This load operation is solely used for its side-effects: if the address |
437 | /// is not mapped (or not readable), it triggers CPU exception, otherwise |
438 | /// execution proceeds and the value is not used. |
439 | class AddressCheckPseudoSourceValue : public PseudoSourceValue { |
440 | public: |
441 | AddressCheckPseudoSourceValue(const TargetMachine &TM) |
442 | : PseudoSourceValue(TargetCustom, TM) {} |
443 | |
444 | bool isConstant(const MachineFrameInfo *) const override { return false; } |
445 | bool isAliased(const MachineFrameInfo *) const override { return true; } |
446 | bool mayAlias(const MachineFrameInfo *) const override { return true; } |
447 | void printCustom(raw_ostream &OS) const override { OS << "AddressCheck" ; } |
448 | }; |
449 | |
450 | std::unique_ptr<AddressCheckPseudoSourceValue> AddressCheckPSV; |
451 | }; |
452 | } // End llvm namespace |
453 | |
454 | #endif |
455 | |