1//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64 specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64Subtarget.h"
14
15#include "AArch64.h"
16#include "AArch64InstrInfo.h"
17#include "AArch64PBQPRegAlloc.h"
18#include "AArch64TargetMachine.h"
19#include "GISel/AArch64CallLowering.h"
20#include "GISel/AArch64LegalizerInfo.h"
21#include "GISel/AArch64RegisterBankInfo.h"
22#include "MCTargetDesc/AArch64AddressingModes.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
24#include "llvm/CodeGen/MachineFrameInfo.h"
25#include "llvm/CodeGen/MachineScheduler.h"
26#include "llvm/IR/GlobalValue.h"
27#include "llvm/Support/SipHash.h"
28#include "llvm/TargetParser/AArch64TargetParser.h"
29
30using namespace llvm;
31
32#define DEBUG_TYPE "aarch64-subtarget"
33
34#define GET_SUBTARGETINFO_CTOR
35#define GET_SUBTARGETINFO_TARGET_DESC
36#include "AArch64GenSubtargetInfo.inc"
37
38static cl::opt<bool>
39EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
40 "converter pass"), cl::init(Val: true), cl::Hidden);
41
42// If OS supports TBI, use this flag to enable it.
43static cl::opt<bool>
44UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
45 "an address is ignored"), cl::init(Val: false), cl::Hidden);
46
47static cl::opt<bool> MachOUseNonLazyBind(
48 "aarch64-macho-enable-nonlazybind",
49 cl::desc("Call nonlazybind functions via direct GOT load for Mach-O"),
50 cl::Hidden);
51
52static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(Val: true),
53 cl::desc("Enable the use of AA during codegen."));
54
55static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost(
56 "aarch64-insert-extract-base-cost",
57 cl::desc("Base cost of vector insert/extract element"), cl::Hidden);
58
59// Reserve a list of X# registers, so they are unavailable for register
60// allocator, but can still be used as ABI requests, such as passing arguments
61// to function call.
62static cl::list<std::string>
63ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
64 "registers, so they can't be used by register allocator. "
65 "Should only be used for testing register allocator."),
66 cl::CommaSeparated, cl::Hidden);
67
68static cl::opt<AArch64PAuth::AuthCheckMethod>
69 AuthenticatedLRCheckMethod("aarch64-authenticated-lr-check-method",
70 cl::Hidden,
71 cl::desc("Override the variant of check applied "
72 "to authenticated LR during tail call"),
73 cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR));
74
75static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
76 "aarch64-min-jump-table-entries", cl::init(Val: 10), cl::Hidden,
77 cl::desc("Set minimum number of entries to use a jump table on AArch64"));
78
79static cl::opt<unsigned> AArch64StreamingHazardSize(
80 "aarch64-streaming-hazard-size",
81 cl::desc("Hazard size for streaming mode memory accesses. 0 = disabled."),
82 cl::init(Val: 0), cl::Hidden);
83
84static cl::alias AArch64StreamingStackHazardSize(
85 "aarch64-stack-hazard-size",
86 cl::desc("alias for -aarch64-streaming-hazard-size"),
87 cl::aliasopt(AArch64StreamingHazardSize));
88
89static cl::opt<unsigned>
90 VScaleForTuningOpt("sve-vscale-for-tuning", cl::Hidden,
91 cl::desc("Force a vscale for tuning factor for SVE"));
92
93// Subreg liveness tracking is disabled by default for now until all issues
94// are ironed out. This option allows the feature to be used in tests.
95static cl::opt<bool>
96 EnableSubregLivenessTracking("aarch64-enable-subreg-liveness-tracking",
97 cl::init(Val: false), cl::Hidden,
98 cl::desc("Enable subreg liveness tracking"));
99
100static cl::opt<bool>
101 UseScalarIncVL("sve-use-scalar-inc-vl", cl::init(Val: false), cl::Hidden,
102 cl::desc("Prefer add+cnt over addvl/inc/dec"));
103
104unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
105 if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
106 return OverrideVectorInsertExtractBaseCost;
107 return VectorInsertExtractBaseCost;
108}
109
110AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
111 StringRef FS, StringRef CPUString, StringRef TuneCPUString,
112 bool HasMinSize) {
113 // Determine default and user-specified characteristics
114
115 if (CPUString.empty())
116 CPUString = "generic";
117
118 if (TuneCPUString.empty())
119 TuneCPUString = CPUString;
120
121 ParseSubtargetFeatures(CPU: CPUString, TuneCPU: TuneCPUString, FS);
122 initializeProperties(HasMinSize);
123
124 return *this;
125}
126
127void AArch64Subtarget::initializeProperties(bool HasMinSize) {
128 // Initialize CPU specific properties. We should add a tablegen feature for
129 // this in the future so we can specify it together with the subtarget
130 // features.
131 switch (ARMProcFamily) {
132 case Generic:
133 // Using TuneCPU=generic we avoid ldapur instructions to line up with the
134 // cpus that use the AvoidLDAPUR feature. We don't want this to be on
135 // forever, so it is enabled between armv8.4 and armv8.7/armv9.2.
136 if (hasV8_4aOps() && !hasV8_8aOps())
137 AvoidLDAPUR = true;
138 break;
139 case Carmel:
140 CacheLineSize = 64;
141 break;
142 case CortexA35:
143 case CortexA53:
144 case CortexA55:
145 case CortexR82:
146 case CortexR82AE:
147 PrefFunctionAlignment = Align(16);
148 PrefLoopAlignment = Align(16);
149 MaxBytesForLoopAlignment = 8;
150 break;
151 case CortexA57:
152 PrefFunctionAlignment = Align(16);
153 PrefLoopAlignment = Align(16);
154 MaxBytesForLoopAlignment = 8;
155 break;
156 case CortexA65:
157 PrefFunctionAlignment = Align(8);
158 break;
159 case CortexA72:
160 case CortexA73:
161 case CortexA75:
162 PrefFunctionAlignment = Align(16);
163 PrefLoopAlignment = Align(16);
164 MaxBytesForLoopAlignment = 8;
165 break;
166 case CortexA76:
167 case CortexA77:
168 case CortexA78:
169 case CortexA78AE:
170 case CortexA78C:
171 case CortexX1:
172 PrefFunctionAlignment = Align(16);
173 PrefLoopAlignment = Align(32);
174 MaxBytesForLoopAlignment = 16;
175 break;
176 case CortexA320:
177 case CortexA510:
178 case CortexA520:
179 case C1Nano:
180 PrefFunctionAlignment = Align(16);
181 VScaleForTuning = 1;
182 PrefLoopAlignment = Align(16);
183 MaxBytesForLoopAlignment = 8;
184 break;
185 case CortexA710:
186 case CortexA715:
187 case CortexA720:
188 case CortexA725:
189 case C1Pro:
190 case CortexX2:
191 case CortexX3:
192 case CortexX4:
193 case CortexX925:
194 case C1Premium:
195 case C1Ultra:
196 PrefFunctionAlignment = Align(16);
197 VScaleForTuning = 1;
198 PrefLoopAlignment = Align(32);
199 MaxBytesForLoopAlignment = 16;
200 break;
201 case A64FX:
202 CacheLineSize = 256;
203 PrefFunctionAlignment = Align(8);
204 PrefLoopAlignment = Align(4);
205 PrefetchDistance = 128;
206 MinPrefetchStride = 1024;
207 MaxPrefetchIterationsAhead = 4;
208 VScaleForTuning = 4;
209 break;
210 case MONAKA:
211 VScaleForTuning = 2;
212 break;
213 case AppleA7:
214 case AppleA10:
215 case AppleA11:
216 case AppleA12:
217 case AppleA13:
218 case AppleA14:
219 case AppleA15:
220 case AppleA16:
221 case AppleA17:
222 case AppleM4:
223 case AppleM5:
224 CacheLineSize = 64;
225 PrefetchDistance = 280;
226 MinPrefetchStride = 2048;
227 MaxPrefetchIterationsAhead = 3;
228 break;
229 case ExynosM3:
230 MaxJumpTableSize = 20;
231 PrefFunctionAlignment = Align(32);
232 PrefLoopAlignment = Align(16);
233 break;
234 case Falkor:
235 // FIXME: remove this to enable 64-bit SLP if performance looks good.
236 MinVectorRegisterBitWidth = 128;
237 CacheLineSize = 128;
238 PrefetchDistance = 820;
239 MinPrefetchStride = 2048;
240 MaxPrefetchIterationsAhead = 8;
241 break;
242 case Kryo:
243 VectorInsertExtractBaseCost = 2;
244 CacheLineSize = 128;
245 PrefetchDistance = 740;
246 MinPrefetchStride = 1024;
247 MaxPrefetchIterationsAhead = 11;
248 // FIXME: remove this to enable 64-bit SLP if performance looks good.
249 MinVectorRegisterBitWidth = 128;
250 break;
251 case NeoverseE1:
252 PrefFunctionAlignment = Align(8);
253 break;
254 case NeoverseN1:
255 PrefFunctionAlignment = Align(16);
256 PrefLoopAlignment = Align(32);
257 MaxBytesForLoopAlignment = 16;
258 break;
259 case NeoverseV2:
260 case NeoverseV3:
261 CacheLineSize = 64;
262 EpilogueVectorizationMinVF = 8;
263 ScatterOverhead = 13;
264 [[fallthrough]];
265 case NeoverseN2:
266 case NeoverseN3:
267 PrefFunctionAlignment = Align(16);
268 PrefLoopAlignment = Align(32);
269 MaxBytesForLoopAlignment = 16;
270 VScaleForTuning = 1;
271 break;
272 case NeoverseV1:
273 PrefFunctionAlignment = Align(16);
274 PrefLoopAlignment = Align(32);
275 MaxBytesForLoopAlignment = 16;
276 VScaleForTuning = 2;
277 DefaultSVETFOpts = TailFoldingOpts::Simple;
278 break;
279 case Neoverse512TVB:
280 PrefFunctionAlignment = Align(16);
281 VScaleForTuning = 1;
282 break;
283 case Saphira:
284 // FIXME: remove this to enable 64-bit SLP if performance looks good.
285 MinVectorRegisterBitWidth = 128;
286 break;
287 case ThunderX2T99:
288 CacheLineSize = 64;
289 PrefFunctionAlignment = Align(8);
290 PrefLoopAlignment = Align(4);
291 PrefetchDistance = 128;
292 MinPrefetchStride = 1024;
293 MaxPrefetchIterationsAhead = 4;
294 // FIXME: remove this to enable 64-bit SLP if performance looks good.
295 MinVectorRegisterBitWidth = 128;
296 break;
297 case ThunderX:
298 case ThunderXT88:
299 case ThunderXT81:
300 case ThunderXT83:
301 CacheLineSize = 128;
302 PrefFunctionAlignment = Align(8);
303 PrefLoopAlignment = Align(4);
304 // FIXME: remove this to enable 64-bit SLP if performance looks good.
305 MinVectorRegisterBitWidth = 128;
306 break;
307 case TSV110:
308 CacheLineSize = 64;
309 PrefFunctionAlignment = Align(16);
310 PrefLoopAlignment = Align(4);
311 break;
312 case ThunderX3T110:
313 CacheLineSize = 64;
314 PrefFunctionAlignment = Align(16);
315 PrefLoopAlignment = Align(4);
316 PrefetchDistance = 128;
317 MinPrefetchStride = 1024;
318 MaxPrefetchIterationsAhead = 4;
319 // FIXME: remove this to enable 64-bit SLP if performance looks good.
320 MinVectorRegisterBitWidth = 128;
321 break;
322 case Ampere1:
323 case Ampere1A:
324 case Ampere1B:
325 case Ampere1C:
326 CacheLineSize = 64;
327 PrefFunctionAlignment = Align(64);
328 PrefLoopAlignment = Align(64);
329 break;
330 case Oryon:
331 CacheLineSize = 64;
332 PrefFunctionAlignment = Align(16);
333 PrefetchDistance = 128;
334 MinPrefetchStride = 1024;
335 break;
336 case Olympus:
337 EpilogueVectorizationMinVF = 8;
338 ScatterOverhead = 13;
339 PrefFunctionAlignment = Align(16);
340 PrefLoopAlignment = Align(32);
341 MaxBytesForLoopAlignment = 16;
342 VScaleForTuning = 1;
343 break;
344 }
345
346 if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize)
347 MinimumJumpTableEntries = AArch64MinimumJumpTableEntries;
348 if (VScaleForTuningOpt.getNumOccurrences() > 0)
349 VScaleForTuning = VScaleForTuningOpt;
350}
351
352AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
353 StringRef TuneCPU, StringRef FS,
354 const TargetMachine &TM, bool LittleEndian,
355 unsigned MinSVEVectorSizeInBitsOverride,
356 unsigned MaxSVEVectorSizeInBitsOverride,
357 bool IsStreaming, bool IsStreamingCompatible,
358 bool HasMinSize,
359 bool EnableSRLTSubregToRegMitigation)
360 : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
361 ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
362 ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
363 CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
364 IsLittle(LittleEndian), IsStreaming(IsStreaming),
365 IsStreamingCompatible(IsStreamingCompatible),
366 StreamingHazardSize(
367 AArch64StreamingHazardSize.getNumOccurrences() > 0
368 ? std::optional<unsigned>(AArch64StreamingHazardSize)
369 : std::nullopt),
370 MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
371 MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride),
372 EnableSRLTSubregToRegMitigation(EnableSRLTSubregToRegMitigation),
373 TargetTriple(TT),
374 InstrInfo(initializeSubtargetDependencies(FS, CPUString: CPU, TuneCPUString: TuneCPU, HasMinSize)),
375 TLInfo(TM, *this) {
376 if (AArch64::isX18ReservedByDefault(TT))
377 ReserveXRegister.set(18);
378
379 CallLoweringInfo.reset(p: new AArch64CallLowering(*getTargetLowering()));
380 InlineAsmLoweringInfo.reset(p: new InlineAsmLowering(getTargetLowering()));
381 Legalizer.reset(p: new AArch64LegalizerInfo(*this));
382
383 auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
384
385 // FIXME: At this point, we can't rely on Subtarget having RBI.
386 // It's awkward to mix passing RBI and the Subtarget; should we pass
387 // TII/TRI as well?
388 InstSelector.reset(p: createAArch64InstructionSelector(
389 *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
390
391 RegBankInfo.reset(p: RBI);
392
393 auto TRI = getRegisterInfo();
394 StringSet<> ReservedRegNames(llvm::from_range, ReservedRegsForRA);
395 for (unsigned i = 0; i < 29; ++i) {
396 if (ReservedRegNames.count(Key: TRI->getName(RegNo: AArch64::X0 + i)))
397 ReserveXRegisterForRA.set(i);
398 }
399 // X30 is named LR, so we can't use TRI->getName to check X30.
400 if (ReservedRegNames.count(Key: "X30") || ReservedRegNames.count(Key: "LR"))
401 ReserveXRegisterForRA.set(30);
402 // X29 is named FP, so we can't use TRI->getName to check X29.
403 if (ReservedRegNames.count(Key: "X29") || ReservedRegNames.count(Key: "FP"))
404 ReserveXRegisterForRA.set(29);
405
406 // To benefit from SME2's strided-register multi-vector load/store
407 // instructions we'll need to enable subreg liveness. Our longer
408 // term aim is to make this the default, regardless of streaming
409 // mode, but there are still some outstanding issues, see:
410 // https://github.com/llvm/llvm-project/pull/174188
411 // and:
412 // https://github.com/llvm/llvm-project/pull/168353
413 if (IsStreaming)
414 EnableSubregLiveness = true;
415 else
416 EnableSubregLiveness = EnableSubregLivenessTracking.getValue();
417}
418
419const CallLowering *AArch64Subtarget::getCallLowering() const {
420 return CallLoweringInfo.get();
421}
422
423const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const {
424 return InlineAsmLoweringInfo.get();
425}
426
427InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
428 return InstSelector.get();
429}
430
431const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
432 return Legalizer.get();
433}
434
435const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
436 return RegBankInfo.get();
437}
438
439/// Find the target operand flags that describe how a global value should be
440/// referenced for the current subtarget.
441unsigned
442AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
443 const TargetMachine &TM) const {
444 // MachO large model always goes via a GOT, simply to get a single 8-byte
445 // absolute relocation on all global addresses.
446 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
447 return AArch64II::MO_GOT;
448
449 // All globals dynamically protected by MTE must have their address tags
450 // synthesized. This is done by having the loader stash the tag in the GOT
451 // entry. Force all tagged globals (even ones with internal linkage) through
452 // the GOT.
453 if (GV->isTagged())
454 return AArch64II::MO_GOT;
455
456 if (!TM.shouldAssumeDSOLocal(GV)) {
457 if (GV->hasDLLImportStorageClass()) {
458 return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
459 }
460 if (getTargetTriple().isOSWindows())
461 return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
462 return AArch64II::MO_GOT;
463 }
464
465 // The small code model's direct accesses use ADRP, which cannot
466 // necessarily produce the value 0 (if the code is above 4GB).
467 // Same for the tiny code model, where we have a pc relative LDR.
468 if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
469 GV->hasExternalWeakLinkage())
470 return AArch64II::MO_GOT;
471
472 // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
473 // that their nominal addresses are tagged and outside of the code model. In
474 // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
475 // tag if necessary based on MO_TAGGED.
476 if (AllowTaggedGlobals && !isa<FunctionType>(Val: GV->getValueType()))
477 return AArch64II::MO_NC | AArch64II::MO_TAGGED;
478
479 return AArch64II::MO_NO_FLAG;
480}
481
482unsigned AArch64Subtarget::classifyGlobalFunctionReference(
483 const GlobalValue *GV, const TargetMachine &TM) const {
484 // MachO large model always goes via a GOT, because we don't have the
485 // relocations available to do anything else..
486 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
487 !GV->hasInternalLinkage())
488 return AArch64II::MO_GOT;
489
490 // NonLazyBind goes via GOT unless we know it's available locally.
491 auto *F = dyn_cast<Function>(Val: GV);
492 if ((!isTargetMachO() || MachOUseNonLazyBind) && F &&
493 F->hasFnAttribute(Kind: Attribute::NonLazyBind) && !TM.shouldAssumeDSOLocal(GV))
494 return AArch64II::MO_GOT;
495
496 if (getTargetTriple().isOSWindows()) {
497 if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy()) {
498 if (GV->hasDLLImportStorageClass()) {
499 // On Arm64EC, if we're calling a symbol from the import table
500 // directly, use MO_ARM64EC_CALLMANGLE.
501 return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT |
502 AArch64II::MO_ARM64EC_CALLMANGLE;
503 }
504 if (GV->hasExternalLinkage()) {
505 // If we're calling a symbol directly, use the mangled form in the
506 // call instruction.
507 return AArch64II::MO_ARM64EC_CALLMANGLE;
508 }
509 }
510
511 // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
512 return ClassifyGlobalReference(GV, TM);
513 }
514
515 return AArch64II::MO_NO_FLAG;
516}
517
518void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
519 const SchedRegion &Region) const {
520 // LNT run (at least on Cyclone) showed reasonably significant gains for
521 // bi-directional scheduling. 253.perlbmk.
522 Policy.OnlyTopDown = false;
523 Policy.OnlyBottomUp = false;
524 // Enabling or Disabling the latency heuristic is a close call: It seems to
525 // help nearly no benchmark on out-of-order architectures, on the other hand
526 // it regresses register pressure on a few benchmarking.
527 Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
528}
529
530void AArch64Subtarget::adjustSchedDependency(
531 SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep,
532 const TargetSchedModel *SchedModel) const {
533 if (!SchedModel || Dep.getKind() != SDep::Kind::Data || !Dep.getReg() ||
534 !Def->isInstr() || !Use->isInstr() ||
535 (Def->getInstr()->getOpcode() != TargetOpcode::BUNDLE &&
536 Use->getInstr()->getOpcode() != TargetOpcode::BUNDLE))
537 return;
538
539 // If the Def is a BUNDLE, find the last instruction in the bundle that defs
540 // the register.
541 const MachineInstr *DefMI = Def->getInstr();
542 if (DefMI->getOpcode() == TargetOpcode::BUNDLE) {
543 Register Reg = DefMI->getOperand(i: DefOpIdx).getReg();
544 for (const auto &Op : const_mi_bundle_ops(MI: *DefMI)) {
545 if (Op.isReg() && Op.isDef() && Op.getReg() == Reg) {
546 DefMI = Op.getParent();
547 DefOpIdx = Op.getOperandNo();
548 }
549 }
550 }
551
552 // If the Use is a BUNDLE, find the first instruction that uses the Reg.
553 const MachineInstr *UseMI = Use->getInstr();
554 if (UseMI->getOpcode() == TargetOpcode::BUNDLE) {
555 Register Reg = UseMI->getOperand(i: UseOpIdx).getReg();
556 for (const auto &Op : const_mi_bundle_ops(MI: *UseMI)) {
557 if (Op.isReg() && Op.isUse() && Op.getReg() == Reg) {
558 UseMI = Op.getParent();
559 UseOpIdx = Op.getOperandNo();
560 break;
561 }
562 }
563 }
564
565 Dep.setLatency(
566 SchedModel->computeOperandLatency(DefMI, DefOperIdx: DefOpIdx, UseMI, UseOperIdx: UseOpIdx));
567}
568
569bool AArch64Subtarget::enableEarlyIfConversion() const {
570 return EnableEarlyIfConvert;
571}
572
573bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
574 if (!UseAddressTopByteIgnored)
575 return false;
576
577 if (TargetTriple.isDriverKit())
578 return true;
579 if (TargetTriple.isiOS()) {
580 return TargetTriple.getiOSVersion() >= VersionTuple(8);
581 }
582
583 return false;
584}
585
586std::unique_ptr<PBQPRAConstraint>
587AArch64Subtarget::getCustomPBQPConstraints() const {
588 return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
589}
590
591void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
592 // We usually compute max call frame size after ISel. Do the computation now
593 // if the .mir file didn't specify it. Note that this will probably give you
594 // bogus values after PEI has eliminated the callframe setup/destroy pseudo
595 // instructions, specify explicitly if you need it to be correct.
596 MachineFrameInfo &MFI = MF.getFrameInfo();
597 if (!MFI.isMaxCallFrameSizeComputed())
598 MFI.computeMaxCallFrameSize(MF);
599}
600
601bool AArch64Subtarget::useAA() const { return UseAA; }
602
603bool AArch64Subtarget::useScalarIncVL() const {
604 // If SVE2 or SME is present (we are not SVE-1 only) and UseScalarIncVL
605 // is not otherwise set, enable it by default.
606 if (UseScalarIncVL.getNumOccurrences())
607 return UseScalarIncVL;
608 return hasSVE2() || hasSME();
609}
610
611// If return address signing is enabled, tail calls are emitted as follows:
612//
613// ```
614// <authenticate LR>
615// <check LR>
616// TCRETURN ; the callee may sign and spill the LR in its prologue
617// ```
618//
619// LR may require explicit checking because if FEAT_FPAC is not implemented
620// and LR was tampered with, then `<authenticate LR>` will not generate an
621// exception on its own. Later, if the callee spills the signed LR value and
622// neither FEAT_PAuth2 nor FEAT_EPAC are implemented, the valid PAC replaces
623// the higher bits of LR thus hiding the authentication failure.
624AArch64PAuth::AuthCheckMethod AArch64Subtarget::getAuthenticatedLRCheckMethod(
625 const MachineFunction &MF) const {
626 // TODO: Check subtarget for the scheme. Present variant is a default for
627 // pauthtest ABI.
628 if (MF.getFunction().hasFnAttribute(Kind: "ptrauth-returns") &&
629 MF.getFunction().hasFnAttribute(Kind: "ptrauth-auth-traps"))
630 return AArch64PAuth::AuthCheckMethod::HighBitsNoTBI;
631 if (AuthenticatedLRCheckMethod.getNumOccurrences())
632 return AuthenticatedLRCheckMethod;
633
634 // At now, use None by default because checks may introduce an unexpected
635 // performance regression or incompatibility with execute-only mappings.
636 return AArch64PAuth::AuthCheckMethod::None;
637}
638
639std::optional<uint16_t>
640AArch64Subtarget::getPtrAuthBlockAddressDiscriminatorIfEnabled(
641 const Function &ParentFn) const {
642 if (!ParentFn.hasFnAttribute(Kind: "ptrauth-indirect-gotos"))
643 return std::nullopt;
644 // We currently have one simple mechanism for all targets.
645 // This isn't ABI, so we can always do better in the future.
646 return getPointerAuthStableSipHash(
647 S: (Twine(ParentFn.getName()) + " blockaddress").str());
648}
649
650bool AArch64Subtarget::isX16X17Safer() const {
651 // The Darwin kernel implements special protections for x16 and x17 so we
652 // should prefer to use those registers on that platform.
653 return isTargetDarwin();
654}
655
656bool AArch64Subtarget::enableMachinePipeliner() const {
657 return getSchedModel().hasInstrSchedModel();
658}
659