1//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64 specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64Subtarget.h"
14
15#include "AArch64.h"
16#include "AArch64InstrInfo.h"
17#include "AArch64PBQPRegAlloc.h"
18#include "AArch64TargetMachine.h"
19#include "GISel/AArch64CallLowering.h"
20#include "GISel/AArch64LegalizerInfo.h"
21#include "GISel/AArch64RegisterBankInfo.h"
22#include "MCTargetDesc/AArch64AddressingModes.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
24#include "llvm/CodeGen/MachineFrameInfo.h"
25#include "llvm/CodeGen/MachineScheduler.h"
26#include "llvm/IR/GlobalValue.h"
27#include "llvm/Support/SipHash.h"
28#include "llvm/TargetParser/AArch64TargetParser.h"
29
30using namespace llvm;
31
32#define DEBUG_TYPE "aarch64-subtarget"
33
34#define GET_SUBTARGETINFO_CTOR
35#define GET_SUBTARGETINFO_TARGET_DESC
36#include "AArch64GenSubtargetInfo.inc"
37
38static cl::opt<bool>
39EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
40 "converter pass"), cl::init(Val: true), cl::Hidden);
41
42// If OS supports TBI, use this flag to enable it.
43static cl::opt<bool>
44UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
45 "an address is ignored"), cl::init(Val: false), cl::Hidden);
46
47static cl::opt<bool> MachOUseNonLazyBind(
48 "aarch64-macho-enable-nonlazybind",
49 cl::desc("Call nonlazybind functions via direct GOT load for Mach-O"),
50 cl::Hidden);
51
52static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(Val: true),
53 cl::desc("Enable the use of AA during codegen."));
54
55static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost(
56 "aarch64-insert-extract-base-cost",
57 cl::desc("Base cost of vector insert/extract element"), cl::Hidden);
58
59// Reserve a list of X# registers, so they are unavailable for register
60// allocator, but can still be used as ABI requests, such as passing arguments
61// to function call.
62static cl::list<std::string>
63ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
64 "registers, so they can't be used by register allocator. "
65 "Should only be used for testing register allocator."),
66 cl::CommaSeparated, cl::Hidden);
67
68static cl::opt<AArch64PAuth::AuthCheckMethod>
69 AuthenticatedLRCheckMethod("aarch64-authenticated-lr-check-method",
70 cl::Hidden,
71 cl::desc("Override the variant of check applied "
72 "to authenticated LR during tail call"),
73 cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR));
74
75static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
76 "aarch64-min-jump-table-entries", cl::init(Val: 10), cl::Hidden,
77 cl::desc("Set minimum number of entries to use a jump table on AArch64"));
78
79static cl::opt<unsigned> AArch64StreamingHazardSize(
80 "aarch64-streaming-hazard-size",
81 cl::desc("Hazard size for streaming mode memory accesses. 0 = disabled."),
82 cl::init(Val: 0), cl::Hidden);
83
84static cl::alias AArch64StreamingStackHazardSize(
85 "aarch64-stack-hazard-size",
86 cl::desc("alias for -aarch64-streaming-hazard-size"),
87 cl::aliasopt(AArch64StreamingHazardSize));
88
89static cl::opt<unsigned>
90 VScaleForTuningOpt("sve-vscale-for-tuning", cl::Hidden,
91 cl::desc("Force a vscale for tuning factor for SVE"));
92
93// Subreg liveness tracking is disabled by default for now until all issues
94// are ironed out. This option allows the feature to be used in tests.
95static cl::opt<bool>
96 EnableSubregLivenessTracking("aarch64-enable-subreg-liveness-tracking",
97 cl::init(Val: false), cl::Hidden,
98 cl::desc("Enable subreg liveness tracking"));
99
100static cl::opt<bool>
101 UseScalarIncVL("sve-use-scalar-inc-vl", cl::init(Val: false), cl::Hidden,
102 cl::desc("Prefer add+cnt over addvl/inc/dec"));
103
104unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
105 if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
106 return OverrideVectorInsertExtractBaseCost;
107 return VectorInsertExtractBaseCost;
108}
109
110AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
111 StringRef FS, StringRef CPUString, StringRef TuneCPUString,
112 bool HasMinSize) {
113 // Determine default and user-specified characteristics
114
115 if (CPUString.empty())
116 CPUString = "generic";
117
118 if (TuneCPUString.empty())
119 TuneCPUString = CPUString;
120
121 ParseSubtargetFeatures(CPU: CPUString, TuneCPU: TuneCPUString, FS);
122 initializeProperties(HasMinSize);
123
124 return *this;
125}
126
127void AArch64Subtarget::initializeProperties(bool HasMinSize) {
128 // Initialize CPU specific properties. We should add a tablegen feature for
129 // this in the future so we can specify it together with the subtarget
130 // features.
131 switch (ARMProcFamily) {
132 case Generic:
133 // Using TuneCPU=generic we avoid ldapur instructions to line up with the
134 // cpus that use the AvoidLDAPUR feature. We don't want this to be on
135 // forever, so it is enabled between armv8.4 and armv8.7/armv9.2.
136 if (hasV8_4aOps() && !hasV8_8aOps())
137 AvoidLDAPUR = true;
138 break;
139 case Carmel:
140 break;
141 case CortexA35:
142 case CortexA53:
143 case CortexA55:
144 case CortexR82:
145 case CortexR82AE:
146 PrefFunctionAlignment = Align(16);
147 PrefLoopAlignment = Align(16);
148 MaxBytesForLoopAlignment = 8;
149 break;
150 case CortexA57:
151 PrefFunctionAlignment = Align(16);
152 PrefLoopAlignment = Align(16);
153 MaxBytesForLoopAlignment = 8;
154 break;
155 case CortexA65:
156 PrefFunctionAlignment = Align(8);
157 break;
158 case CortexA72:
159 case CortexA73:
160 case CortexA75:
161 PrefFunctionAlignment = Align(16);
162 PrefLoopAlignment = Align(16);
163 MaxBytesForLoopAlignment = 8;
164 break;
165 case CortexA76:
166 case CortexA77:
167 case CortexA78:
168 case CortexA78AE:
169 case CortexA78C:
170 case CortexX1:
171 PrefFunctionAlignment = Align(16);
172 PrefLoopAlignment = Align(32);
173 MaxBytesForLoopAlignment = 16;
174 break;
175 case CortexA320:
176 case CortexA510:
177 case CortexA520:
178 case C1Nano:
179 PrefFunctionAlignment = Align(16);
180 VScaleForTuning = 1;
181 PrefLoopAlignment = Align(16);
182 MaxBytesForLoopAlignment = 8;
183 break;
184 case CortexA710:
185 case CortexA715:
186 case CortexA720:
187 case CortexA725:
188 case C1Pro:
189 case CortexX2:
190 case CortexX3:
191 case CortexX4:
192 case CortexX925:
193 case C1Premium:
194 case C1Ultra:
195 PrefFunctionAlignment = Align(16);
196 VScaleForTuning = 1;
197 PrefLoopAlignment = Align(32);
198 MaxBytesForLoopAlignment = 16;
199 break;
200 case A64FX:
201 CacheLineSize = 256;
202 PrefFunctionAlignment = Align(8);
203 PrefLoopAlignment = Align(4);
204 PrefetchDistance = 128;
205 MinPrefetchStride = 1024;
206 MaxPrefetchIterationsAhead = 4;
207 VScaleForTuning = 4;
208 break;
209 case MONAKA:
210 VScaleForTuning = 2;
211 break;
212 case AppleA7:
213 case AppleA10:
214 case AppleA11:
215 case AppleA12:
216 case AppleA13:
217 case AppleA14:
218 case AppleA15:
219 case AppleA16:
220 case AppleA17:
221 case AppleM4:
222 case AppleM5:
223 PrefetchDistance = 280;
224 MinPrefetchStride = 2048;
225 MaxPrefetchIterationsAhead = 3;
226 break;
227 case ExynosM3:
228 MaxJumpTableSize = 20;
229 PrefFunctionAlignment = Align(32);
230 PrefLoopAlignment = Align(16);
231 break;
232 case Falkor:
233 // FIXME: remove this to enable 64-bit SLP if performance looks good.
234 MinVectorRegisterBitWidth = 128;
235 CacheLineSize = 128;
236 PrefetchDistance = 820;
237 MinPrefetchStride = 2048;
238 MaxPrefetchIterationsAhead = 8;
239 break;
240 case Kryo:
241 VectorInsertExtractBaseCost = 2;
242 CacheLineSize = 128;
243 PrefetchDistance = 740;
244 MinPrefetchStride = 1024;
245 MaxPrefetchIterationsAhead = 11;
246 // FIXME: remove this to enable 64-bit SLP if performance looks good.
247 MinVectorRegisterBitWidth = 128;
248 break;
249 case NeoverseE1:
250 PrefFunctionAlignment = Align(8);
251 break;
252 case NeoverseN1:
253 PrefFunctionAlignment = Align(16);
254 PrefLoopAlignment = Align(32);
255 MaxBytesForLoopAlignment = 16;
256 break;
257 case NeoverseV2:
258 case NeoverseV3:
259 EpilogueVectorizationMinVF = 8;
260 ScatterOverhead = 13;
261 [[fallthrough]];
262 case NeoverseN2:
263 case NeoverseN3:
264 case NeoverseV3AE:
265 PrefFunctionAlignment = Align(16);
266 PrefLoopAlignment = Align(32);
267 MaxBytesForLoopAlignment = 16;
268 VScaleForTuning = 1;
269 break;
270 case NeoverseV1:
271 PrefFunctionAlignment = Align(16);
272 PrefLoopAlignment = Align(32);
273 MaxBytesForLoopAlignment = 16;
274 VScaleForTuning = 2;
275 DefaultSVETFOpts = TailFoldingOpts::Simple;
276 break;
277 case Neoverse512TVB:
278 PrefFunctionAlignment = Align(16);
279 VScaleForTuning = 1;
280 break;
281 case Saphira:
282 // FIXME: remove this to enable 64-bit SLP if performance looks good.
283 MinVectorRegisterBitWidth = 128;
284 break;
285 case ThunderX2T99:
286 PrefFunctionAlignment = Align(8);
287 PrefLoopAlignment = Align(4);
288 PrefetchDistance = 128;
289 MinPrefetchStride = 1024;
290 MaxPrefetchIterationsAhead = 4;
291 // FIXME: remove this to enable 64-bit SLP if performance looks good.
292 MinVectorRegisterBitWidth = 128;
293 break;
294 case ThunderX:
295 case ThunderXT88:
296 case ThunderXT81:
297 case ThunderXT83:
298 CacheLineSize = 128;
299 PrefFunctionAlignment = Align(8);
300 PrefLoopAlignment = Align(4);
301 // FIXME: remove this to enable 64-bit SLP if performance looks good.
302 MinVectorRegisterBitWidth = 128;
303 break;
304 case TSV110:
305 PrefFunctionAlignment = Align(16);
306 PrefLoopAlignment = Align(4);
307 break;
308 case HIP12:
309 PrefFunctionAlignment = Align(16);
310 PrefLoopAlignment = Align(4);
311 VScaleForTuning = 2;
312 DefaultSVETFOpts = TailFoldingOpts::Simple;
313 break;
314 case ThunderX3T110:
315 PrefFunctionAlignment = Align(16);
316 PrefLoopAlignment = Align(4);
317 PrefetchDistance = 128;
318 MinPrefetchStride = 1024;
319 MaxPrefetchIterationsAhead = 4;
320 // FIXME: remove this to enable 64-bit SLP if performance looks good.
321 MinVectorRegisterBitWidth = 128;
322 break;
323 case Ampere1:
324 case Ampere1A:
325 case Ampere1B:
326 case Ampere1C:
327 PrefFunctionAlignment = Align(64);
328 PrefLoopAlignment = Align(64);
329 break;
330 case Oryon:
331 PrefFunctionAlignment = Align(16);
332 PrefetchDistance = 128;
333 MinPrefetchStride = 1024;
334 break;
335 case Olympus:
336 EpilogueVectorizationMinVF = 8;
337 ScatterOverhead = 13;
338 PrefFunctionAlignment = Align(16);
339 PrefLoopAlignment = Align(32);
340 MaxBytesForLoopAlignment = 16;
341 VScaleForTuning = 1;
342 break;
343 }
344
345 if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize)
346 MinimumJumpTableEntries = AArch64MinimumJumpTableEntries;
347 if (VScaleForTuningOpt.getNumOccurrences() > 0)
348 VScaleForTuning = VScaleForTuningOpt;
349}
350
351AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
352 StringRef TuneCPU, StringRef FS,
353 const TargetMachine &TM, bool LittleEndian,
354 unsigned MinSVEVectorSizeInBitsOverride,
355 unsigned MaxSVEVectorSizeInBitsOverride,
356 bool IsStreaming, bool IsStreamingCompatible,
357 bool HasMinSize,
358 bool EnableSRLTSubregToRegMitigation)
359 : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
360 ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
361 ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
362 CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
363 IsLittle(LittleEndian), IsStreaming(IsStreaming),
364 IsStreamingCompatible(IsStreamingCompatible),
365 StreamingHazardSize(
366 AArch64StreamingHazardSize.getNumOccurrences() > 0
367 ? std::optional<unsigned>(AArch64StreamingHazardSize)
368 : std::nullopt),
369 MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
370 MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride),
371 EnableSRLTSubregToRegMitigation(EnableSRLTSubregToRegMitigation),
372 // To benefit from SME2's strided-register multi-vector load/store
373 // instructions we'll need to enable subreg liveness. Our longer
374 // term aim is to make this the default, regardless of streaming
375 // mode, but there are still some outstanding issues, see:
376 // https://github.com/llvm/llvm-project/pull/174188
377 // and:
378 // https://github.com/llvm/llvm-project/pull/168353
379 EnableSubregLiveness(IsStreaming || EnableSubregLivenessTracking),
380 TargetTriple(TT),
381 InstrInfo(initializeSubtargetDependencies(FS, CPUString: CPU, TuneCPUString: TuneCPU, HasMinSize)),
382 TLInfo(TM, *this) {
383 if (AArch64::isX18ReservedByDefault(TT))
384 ReserveXRegister.set(18);
385
386 CallLoweringInfo.reset(p: new AArch64CallLowering(*getTargetLowering()));
387 InlineAsmLoweringInfo.reset(p: new InlineAsmLowering(getTargetLowering()));
388 Legalizer.reset(p: new AArch64LegalizerInfo(*this));
389
390 auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
391
392 // FIXME: At this point, we can't rely on Subtarget having RBI.
393 // It's awkward to mix passing RBI and the Subtarget; should we pass
394 // TII/TRI as well?
395 InstSelector.reset(p: createAArch64InstructionSelector(
396 *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
397
398 RegBankInfo.reset(p: RBI);
399
400 auto TRI = getRegisterInfo();
401 StringSet<> ReservedRegNames(llvm::from_range, ReservedRegsForRA);
402 for (unsigned i = 0; i < 29; ++i) {
403 if (ReservedRegNames.count(Key: TRI->getName(RegNo: AArch64::X0 + i)))
404 ReserveXRegisterForRA.set(i);
405 }
406 // X30 is named LR, so we can't use TRI->getName to check X30.
407 if (ReservedRegNames.count(Key: "X30") || ReservedRegNames.count(Key: "LR"))
408 ReserveXRegisterForRA.set(30);
409 // X29 is named FP, so we can't use TRI->getName to check X29.
410 if (ReservedRegNames.count(Key: "X29") || ReservedRegNames.count(Key: "FP"))
411 ReserveXRegisterForRA.set(29);
412}
413
414const CallLowering *AArch64Subtarget::getCallLowering() const {
415 return CallLoweringInfo.get();
416}
417
418const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const {
419 return InlineAsmLoweringInfo.get();
420}
421
422InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
423 return InstSelector.get();
424}
425
426const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
427 return Legalizer.get();
428}
429
430const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
431 return RegBankInfo.get();
432}
433
434/// Find the target operand flags that describe how a global value should be
435/// referenced for the current subtarget.
436unsigned
437AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
438 const TargetMachine &TM) const {
439 // MachO large model always goes via a GOT, simply to get a single 8-byte
440 // absolute relocation on all global addresses.
441 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
442 return AArch64II::MO_GOT;
443
444 // All globals dynamically protected by MTE must have their address tags
445 // synthesized. This is done by having the loader stash the tag in the GOT
446 // entry. Force all tagged globals (even ones with internal linkage) through
447 // the GOT.
448 if (GV->isTagged())
449 return AArch64II::MO_GOT;
450
451 if (!TM.shouldAssumeDSOLocal(GV)) {
452 if (GV->hasDLLImportStorageClass()) {
453 return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
454 }
455 if (getTargetTriple().isOSWindows())
456 return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
457 return AArch64II::MO_GOT;
458 }
459
460 // The small code model's direct accesses use ADRP, which cannot
461 // necessarily produce the value 0 (if the code is above 4GB).
462 // Same for the tiny code model, where we have a pc relative LDR.
463 if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
464 GV->hasExternalWeakLinkage())
465 return AArch64II::MO_GOT;
466
467 // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
468 // that their nominal addresses are tagged and outside of the code model. In
469 // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
470 // tag if necessary based on MO_TAGGED.
471 if (AllowTaggedGlobals && !isa<FunctionType>(Val: GV->getValueType()))
472 return AArch64II::MO_NC | AArch64II::MO_TAGGED;
473
474 return AArch64II::MO_NO_FLAG;
475}
476
477unsigned AArch64Subtarget::classifyGlobalFunctionReference(
478 const GlobalValue *GV, const TargetMachine &TM) const {
479 // MachO large model always goes via a GOT, because we don't have the
480 // relocations available to do anything else..
481 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
482 !GV->hasInternalLinkage())
483 return AArch64II::MO_GOT;
484
485 // NonLazyBind goes via GOT unless we know it's available locally.
486 auto *F = dyn_cast<Function>(Val: GV);
487 if ((!isTargetMachO() || MachOUseNonLazyBind) && F &&
488 F->hasFnAttribute(Kind: Attribute::NonLazyBind) && !TM.shouldAssumeDSOLocal(GV))
489 return AArch64II::MO_GOT;
490
491 if (getTargetTriple().isOSWindows()) {
492 if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy()) {
493 if (GV->hasDLLImportStorageClass()) {
494 // On Arm64EC, if we're calling a symbol from the import table
495 // directly, use MO_ARM64EC_CALLMANGLE.
496 return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT |
497 AArch64II::MO_ARM64EC_CALLMANGLE;
498 }
499 if (GV->hasExternalLinkage()) {
500 // If we're calling a symbol directly, use the mangled form in the
501 // call instruction.
502 return AArch64II::MO_ARM64EC_CALLMANGLE;
503 }
504 }
505
506 // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
507 return ClassifyGlobalReference(GV, TM);
508 }
509
510 return AArch64II::MO_NO_FLAG;
511}
512
513void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
514 const SchedRegion &Region) const {
515 // LNT run (at least on Cyclone) showed reasonably significant gains for
516 // bi-directional scheduling. 253.perlbmk.
517 Policy.OnlyTopDown = false;
518 Policy.OnlyBottomUp = false;
519 // Enabling or Disabling the latency heuristic is a close call: It seems to
520 // help nearly no benchmark on out-of-order architectures, on the other hand
521 // it regresses register pressure on a few benchmarking.
522 Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
523}
524
525void AArch64Subtarget::adjustSchedDependency(
526 SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep,
527 const TargetSchedModel *SchedModel) const {
528 if (!SchedModel || Dep.getKind() != SDep::Kind::Data || !Dep.getReg() ||
529 !Def->isInstr() || !Use->isInstr() ||
530 (Def->getInstr()->getOpcode() != TargetOpcode::BUNDLE &&
531 Use->getInstr()->getOpcode() != TargetOpcode::BUNDLE))
532 return;
533
534 // If the Def is a BUNDLE, find the last instruction in the bundle that defs
535 // the register.
536 const MachineInstr *DefMI = Def->getInstr();
537 if (DefMI->getOpcode() == TargetOpcode::BUNDLE) {
538 Register Reg = DefMI->getOperand(i: DefOpIdx).getReg();
539 for (const auto &Op : const_mi_bundle_ops(MI: *DefMI)) {
540 if (Op.isReg() && Op.isDef() && Op.getReg() == Reg) {
541 DefMI = Op.getParent();
542 DefOpIdx = Op.getOperandNo();
543 }
544 }
545 }
546
547 // If the Use is a BUNDLE, find the first instruction that uses the Reg.
548 const MachineInstr *UseMI = Use->getInstr();
549 if (UseMI->getOpcode() == TargetOpcode::BUNDLE) {
550 Register Reg = UseMI->getOperand(i: UseOpIdx).getReg();
551 for (const auto &Op : const_mi_bundle_ops(MI: *UseMI)) {
552 if (Op.isReg() && Op.isUse() && Op.getReg() == Reg) {
553 UseMI = Op.getParent();
554 UseOpIdx = Op.getOperandNo();
555 break;
556 }
557 }
558 }
559
560 Dep.setLatency(
561 SchedModel->computeOperandLatency(DefMI, DefOperIdx: DefOpIdx, UseMI, UseOperIdx: UseOpIdx));
562}
563
564bool AArch64Subtarget::enableEarlyIfConversion() const {
565 return EnableEarlyIfConvert;
566}
567
568bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
569 if (!UseAddressTopByteIgnored)
570 return false;
571
572 if (TargetTriple.isDriverKit())
573 return true;
574 if (TargetTriple.isiOS()) {
575 return TargetTriple.getiOSVersion() >= VersionTuple(8);
576 }
577
578 return false;
579}
580
581std::unique_ptr<PBQPRAConstraint>
582AArch64Subtarget::getCustomPBQPConstraints() const {
583 return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
584}
585
586void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
587 // We usually compute max call frame size after ISel. Do the computation now
588 // if the .mir file didn't specify it. Note that this will probably give you
589 // bogus values after PEI has eliminated the callframe setup/destroy pseudo
590 // instructions, specify explicitly if you need it to be correct.
591 MachineFrameInfo &MFI = MF.getFrameInfo();
592 if (!MFI.isMaxCallFrameSizeComputed())
593 MFI.computeMaxCallFrameSize(MF);
594}
595
596bool AArch64Subtarget::useAA() const { return UseAA; }
597
598bool AArch64Subtarget::useScalarIncVL() const {
599 // If SVE2 or SME is present (we are not SVE-1 only) and UseScalarIncVL
600 // is not otherwise set, enable it by default.
601 if (UseScalarIncVL.getNumOccurrences())
602 return UseScalarIncVL;
603 return hasSVE2() || hasSME();
604}
605
606// If return address signing is enabled, tail calls are emitted as follows:
607//
608// ```
609// <authenticate LR>
610// <check LR>
611// TCRETURN ; the callee may sign and spill the LR in its prologue
612// ```
613//
614// LR may require explicit checking because if FEAT_FPAC is not implemented
615// and LR was tampered with, then `<authenticate LR>` will not generate an
616// exception on its own. Later, if the callee spills the signed LR value and
617// neither FEAT_PAuth2 nor FEAT_EPAC are implemented, the valid PAC replaces
618// the higher bits of LR thus hiding the authentication failure.
619AArch64PAuth::AuthCheckMethod AArch64Subtarget::getAuthenticatedLRCheckMethod(
620 const MachineFunction &MF) const {
621 // TODO: Check subtarget for the scheme. Present variant is a default for
622 // pauthtest ABI.
623 if (MF.getFunction().hasFnAttribute(Kind: "ptrauth-returns") &&
624 MF.getFunction().hasFnAttribute(Kind: "ptrauth-auth-traps"))
625 return AArch64PAuth::AuthCheckMethod::HighBitsNoTBI;
626 if (AuthenticatedLRCheckMethod.getNumOccurrences())
627 return AuthenticatedLRCheckMethod;
628
629 // At now, use None by default because checks may introduce an unexpected
630 // performance regression or incompatibility with execute-only mappings.
631 return AArch64PAuth::AuthCheckMethod::None;
632}
633
634std::optional<uint16_t>
635AArch64Subtarget::getPtrAuthBlockAddressDiscriminatorIfEnabled(
636 const Function &ParentFn) const {
637 if (!ParentFn.hasFnAttribute(Kind: "ptrauth-indirect-gotos"))
638 return std::nullopt;
639 // We currently have one simple mechanism for all targets.
640 // This isn't ABI, so we can always do better in the future.
641 return getPointerAuthStableSipHash(
642 S: (Twine(ParentFn.getName()) + " blockaddress").str());
643}
644
645bool AArch64Subtarget::isX16X17Safer() const {
646 // The Darwin kernel implements special protections for x16 and x17 so we
647 // should prefer to use those registers on that platform.
648 return isTargetDarwin();
649}
650
651bool AArch64Subtarget::enableMachinePipeliner() const {
652 return getSchedModel().hasInstrSchedModel();
653}
654