1//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64 specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64Subtarget.h"
14
15#include "AArch64.h"
16#include "AArch64InstrInfo.h"
17#include "AArch64PBQPRegAlloc.h"
18#include "AArch64TargetMachine.h"
19#include "GISel/AArch64CallLowering.h"
20#include "GISel/AArch64LegalizerInfo.h"
21#include "GISel/AArch64RegisterBankInfo.h"
22#include "MCTargetDesc/AArch64AddressingModes.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
24#include "llvm/CodeGen/MachineFrameInfo.h"
25#include "llvm/CodeGen/MachineScheduler.h"
26#include "llvm/IR/GlobalValue.h"
27#include "llvm/Support/SipHash.h"
28#include "llvm/TargetParser/AArch64TargetParser.h"
29
30using namespace llvm;
31
32#define DEBUG_TYPE "aarch64-subtarget"
33
34#define GET_SUBTARGETINFO_CTOR
35#define GET_SUBTARGETINFO_TARGET_DESC
36#include "AArch64GenSubtargetInfo.inc"
37
38static cl::opt<bool>
39EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
40 "converter pass"), cl::init(Val: true), cl::Hidden);
41
42// If OS supports TBI, use this flag to enable it.
43static cl::opt<bool>
44UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
45 "an address is ignored"), cl::init(Val: false), cl::Hidden);
46
47static cl::opt<bool> MachOUseNonLazyBind(
48 "aarch64-macho-enable-nonlazybind",
49 cl::desc("Call nonlazybind functions via direct GOT load for Mach-O"),
50 cl::Hidden);
51
52static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(Val: true),
53 cl::desc("Enable the use of AA during codegen."));
54
55static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost(
56 "aarch64-insert-extract-base-cost",
57 cl::desc("Base cost of vector insert/extract element"), cl::Hidden);
58
59// Reserve a list of X# registers, so they are unavailable for register
60// allocator, but can still be used as ABI requests, such as passing arguments
61// to function call.
62static cl::list<std::string>
63ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
64 "registers, so they can't be used by register allocator. "
65 "Should only be used for testing register allocator."),
66 cl::CommaSeparated, cl::Hidden);
67
68static cl::opt<AArch64PAuth::AuthCheckMethod>
69 AuthenticatedLRCheckMethod("aarch64-authenticated-lr-check-method",
70 cl::Hidden,
71 cl::desc("Override the variant of check applied "
72 "to authenticated LR during tail call"),
73 cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR));
74
75static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
76 "aarch64-min-jump-table-entries", cl::init(Val: 10), cl::Hidden,
77 cl::desc("Set minimum number of entries to use a jump table on AArch64"));
78
79static cl::opt<unsigned> AArch64StreamingHazardSize(
80 "aarch64-streaming-hazard-size",
81 cl::desc("Hazard size for streaming mode memory accesses. 0 = disabled."),
82 cl::init(Val: 0), cl::Hidden);
83
84static cl::alias AArch64StreamingStackHazardSize(
85 "aarch64-stack-hazard-size",
86 cl::desc("alias for -aarch64-streaming-hazard-size"),
87 cl::aliasopt(AArch64StreamingHazardSize));
88
89static cl::opt<bool> EnableZPRPredicateSpills(
90 "aarch64-enable-zpr-predicate-spills", cl::init(Val: false), cl::Hidden,
91 cl::desc(
92 "Enables spilling/reloading SVE predicates as data vectors (ZPRs)"));
93
94// Subreg liveness tracking is disabled by default for now until all issues
95// are ironed out. This option allows the feature to be used in tests.
96static cl::opt<bool>
97 EnableSubregLivenessTracking("aarch64-enable-subreg-liveness-tracking",
98 cl::init(Val: false), cl::Hidden,
99 cl::desc("Enable subreg liveness tracking"));
100
101static cl::opt<bool>
102 UseScalarIncVL("sve-use-scalar-inc-vl", cl::init(Val: false), cl::Hidden,
103 cl::desc("Prefer add+cnt over addvl/inc/dec"));
104
105unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
106 if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
107 return OverrideVectorInsertExtractBaseCost;
108 return VectorInsertExtractBaseCost;
109}
110
111AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
112 StringRef FS, StringRef CPUString, StringRef TuneCPUString,
113 bool HasMinSize) {
114 // Determine default and user-specified characteristics
115
116 if (CPUString.empty())
117 CPUString = "generic";
118
119 if (TuneCPUString.empty())
120 TuneCPUString = CPUString;
121
122 ParseSubtargetFeatures(CPU: CPUString, TuneCPU: TuneCPUString, FS);
123 initializeProperties(HasMinSize);
124
125 return *this;
126}
127
128void AArch64Subtarget::initializeProperties(bool HasMinSize) {
129 // Initialize CPU specific properties. We should add a tablegen feature for
130 // this in the future so we can specify it together with the subtarget
131 // features.
132 switch (ARMProcFamily) {
133 case Generic:
134 // Using TuneCPU=generic we avoid ldapur instructions to line up with the
135 // cpus that use the AvoidLDAPUR feature. We don't want this to be on
136 // forever, so it is enabled between armv8.4 and armv8.7/armv9.2.
137 if (hasV8_4aOps() && !hasV8_8aOps())
138 AvoidLDAPUR = true;
139 break;
140 case Carmel:
141 CacheLineSize = 64;
142 break;
143 case CortexA35:
144 case CortexA53:
145 case CortexA55:
146 case CortexR82:
147 case CortexR82AE:
148 PrefFunctionAlignment = Align(16);
149 PrefLoopAlignment = Align(16);
150 MaxBytesForLoopAlignment = 8;
151 break;
152 case CortexA57:
153 MaxInterleaveFactor = 4;
154 PrefFunctionAlignment = Align(16);
155 PrefLoopAlignment = Align(16);
156 MaxBytesForLoopAlignment = 8;
157 break;
158 case CortexA65:
159 PrefFunctionAlignment = Align(8);
160 break;
161 case CortexA72:
162 case CortexA73:
163 case CortexA75:
164 PrefFunctionAlignment = Align(16);
165 PrefLoopAlignment = Align(16);
166 MaxBytesForLoopAlignment = 8;
167 break;
168 case CortexA76:
169 case CortexA77:
170 case CortexA78:
171 case CortexA78AE:
172 case CortexA78C:
173 case CortexX1:
174 PrefFunctionAlignment = Align(16);
175 PrefLoopAlignment = Align(32);
176 MaxBytesForLoopAlignment = 16;
177 break;
178 case CortexA320:
179 case CortexA510:
180 case CortexA520:
181 PrefFunctionAlignment = Align(16);
182 VScaleForTuning = 1;
183 PrefLoopAlignment = Align(16);
184 MaxBytesForLoopAlignment = 8;
185 break;
186 case CortexA710:
187 case CortexA715:
188 case CortexA720:
189 case CortexA725:
190 case CortexX2:
191 case CortexX3:
192 case CortexX4:
193 case CortexX925:
194 PrefFunctionAlignment = Align(16);
195 VScaleForTuning = 1;
196 PrefLoopAlignment = Align(32);
197 MaxBytesForLoopAlignment = 16;
198 break;
199 case A64FX:
200 CacheLineSize = 256;
201 PrefFunctionAlignment = Align(8);
202 PrefLoopAlignment = Align(4);
203 MaxInterleaveFactor = 4;
204 PrefetchDistance = 128;
205 MinPrefetchStride = 1024;
206 MaxPrefetchIterationsAhead = 4;
207 VScaleForTuning = 4;
208 break;
209 case MONAKA:
210 VScaleForTuning = 2;
211 break;
212 case AppleA7:
213 case AppleA10:
214 case AppleA11:
215 case AppleA12:
216 case AppleA13:
217 case AppleA14:
218 case AppleA15:
219 case AppleA16:
220 case AppleA17:
221 case AppleM4:
222 CacheLineSize = 64;
223 PrefetchDistance = 280;
224 MinPrefetchStride = 2048;
225 MaxPrefetchIterationsAhead = 3;
226 switch (ARMProcFamily) {
227 case AppleA14:
228 case AppleA15:
229 case AppleA16:
230 case AppleA17:
231 case AppleM4:
232 MaxInterleaveFactor = 4;
233 break;
234 default:
235 break;
236 }
237 break;
238 case ExynosM3:
239 MaxInterleaveFactor = 4;
240 MaxJumpTableSize = 20;
241 PrefFunctionAlignment = Align(32);
242 PrefLoopAlignment = Align(16);
243 break;
244 case Falkor:
245 MaxInterleaveFactor = 4;
246 // FIXME: remove this to enable 64-bit SLP if performance looks good.
247 MinVectorRegisterBitWidth = 128;
248 CacheLineSize = 128;
249 PrefetchDistance = 820;
250 MinPrefetchStride = 2048;
251 MaxPrefetchIterationsAhead = 8;
252 break;
253 case Kryo:
254 MaxInterleaveFactor = 4;
255 VectorInsertExtractBaseCost = 2;
256 CacheLineSize = 128;
257 PrefetchDistance = 740;
258 MinPrefetchStride = 1024;
259 MaxPrefetchIterationsAhead = 11;
260 // FIXME: remove this to enable 64-bit SLP if performance looks good.
261 MinVectorRegisterBitWidth = 128;
262 break;
263 case NeoverseE1:
264 PrefFunctionAlignment = Align(8);
265 break;
266 case NeoverseN1:
267 PrefFunctionAlignment = Align(16);
268 PrefLoopAlignment = Align(32);
269 MaxBytesForLoopAlignment = 16;
270 break;
271 case NeoverseV2:
272 case NeoverseV3:
273 EpilogueVectorizationMinVF = 8;
274 MaxInterleaveFactor = 4;
275 ScatterOverhead = 13;
276 LLVM_FALLTHROUGH;
277 case NeoverseN2:
278 case NeoverseN3:
279 PrefFunctionAlignment = Align(16);
280 PrefLoopAlignment = Align(32);
281 MaxBytesForLoopAlignment = 16;
282 VScaleForTuning = 1;
283 break;
284 case NeoverseV1:
285 PrefFunctionAlignment = Align(16);
286 PrefLoopAlignment = Align(32);
287 MaxBytesForLoopAlignment = 16;
288 VScaleForTuning = 2;
289 DefaultSVETFOpts = TailFoldingOpts::Simple;
290 break;
291 case Neoverse512TVB:
292 PrefFunctionAlignment = Align(16);
293 VScaleForTuning = 1;
294 MaxInterleaveFactor = 4;
295 break;
296 case Saphira:
297 MaxInterleaveFactor = 4;
298 // FIXME: remove this to enable 64-bit SLP if performance looks good.
299 MinVectorRegisterBitWidth = 128;
300 break;
301 case ThunderX2T99:
302 CacheLineSize = 64;
303 PrefFunctionAlignment = Align(8);
304 PrefLoopAlignment = Align(4);
305 MaxInterleaveFactor = 4;
306 PrefetchDistance = 128;
307 MinPrefetchStride = 1024;
308 MaxPrefetchIterationsAhead = 4;
309 // FIXME: remove this to enable 64-bit SLP if performance looks good.
310 MinVectorRegisterBitWidth = 128;
311 break;
312 case ThunderX:
313 case ThunderXT88:
314 case ThunderXT81:
315 case ThunderXT83:
316 CacheLineSize = 128;
317 PrefFunctionAlignment = Align(8);
318 PrefLoopAlignment = Align(4);
319 // FIXME: remove this to enable 64-bit SLP if performance looks good.
320 MinVectorRegisterBitWidth = 128;
321 break;
322 case TSV110:
323 CacheLineSize = 64;
324 PrefFunctionAlignment = Align(16);
325 PrefLoopAlignment = Align(4);
326 break;
327 case ThunderX3T110:
328 CacheLineSize = 64;
329 PrefFunctionAlignment = Align(16);
330 PrefLoopAlignment = Align(4);
331 MaxInterleaveFactor = 4;
332 PrefetchDistance = 128;
333 MinPrefetchStride = 1024;
334 MaxPrefetchIterationsAhead = 4;
335 // FIXME: remove this to enable 64-bit SLP if performance looks good.
336 MinVectorRegisterBitWidth = 128;
337 break;
338 case Ampere1:
339 case Ampere1A:
340 case Ampere1B:
341 CacheLineSize = 64;
342 PrefFunctionAlignment = Align(64);
343 PrefLoopAlignment = Align(64);
344 MaxInterleaveFactor = 4;
345 break;
346 case Oryon:
347 CacheLineSize = 64;
348 PrefFunctionAlignment = Align(16);
349 MaxInterleaveFactor = 4;
350 PrefetchDistance = 128;
351 MinPrefetchStride = 1024;
352 break;
353 case Olympus:
354 EpilogueVectorizationMinVF = 8;
355 MaxInterleaveFactor = 4;
356 ScatterOverhead = 13;
357 PrefFunctionAlignment = Align(16);
358 PrefLoopAlignment = Align(32);
359 MaxBytesForLoopAlignment = 16;
360 VScaleForTuning = 1;
361 break;
362 }
363
364 if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize)
365 MinimumJumpTableEntries = AArch64MinimumJumpTableEntries;
366}
367
368AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
369 StringRef TuneCPU, StringRef FS,
370 const TargetMachine &TM, bool LittleEndian,
371 unsigned MinSVEVectorSizeInBitsOverride,
372 unsigned MaxSVEVectorSizeInBitsOverride,
373 bool IsStreaming, bool IsStreamingCompatible,
374 bool HasMinSize)
375 : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
376 ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
377 ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
378 CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
379 IsLittle(LittleEndian), IsStreaming(IsStreaming),
380 IsStreamingCompatible(IsStreamingCompatible),
381 StreamingHazardSize(
382 AArch64StreamingHazardSize.getNumOccurrences() > 0
383 ? std::optional<unsigned>(AArch64StreamingHazardSize)
384 : std::nullopt),
385 MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
386 MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
387 InstrInfo(initializeSubtargetDependencies(FS, CPUString: CPU, TuneCPUString: TuneCPU, HasMinSize)),
388 TLInfo(TM, *this) {
389 if (AArch64::isX18ReservedByDefault(TT))
390 ReserveXRegister.set(18);
391
392 CallLoweringInfo.reset(p: new AArch64CallLowering(*getTargetLowering()));
393 InlineAsmLoweringInfo.reset(p: new InlineAsmLowering(getTargetLowering()));
394 Legalizer.reset(p: new AArch64LegalizerInfo(*this));
395
396 auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
397
398 // FIXME: At this point, we can't rely on Subtarget having RBI.
399 // It's awkward to mix passing RBI and the Subtarget; should we pass
400 // TII/TRI as well?
401 InstSelector.reset(p: createAArch64InstructionSelector(
402 *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
403
404 RegBankInfo.reset(p: RBI);
405
406 auto TRI = getRegisterInfo();
407 StringSet<> ReservedRegNames(llvm::from_range, ReservedRegsForRA);
408 for (unsigned i = 0; i < 29; ++i) {
409 if (ReservedRegNames.count(Key: TRI->getName(RegNo: AArch64::X0 + i)))
410 ReserveXRegisterForRA.set(i);
411 }
412 // X30 is named LR, so we can't use TRI->getName to check X30.
413 if (ReservedRegNames.count(Key: "X30") || ReservedRegNames.count(Key: "LR"))
414 ReserveXRegisterForRA.set(30);
415 // X29 is named FP, so we can't use TRI->getName to check X29.
416 if (ReservedRegNames.count(Key: "X29") || ReservedRegNames.count(Key: "FP"))
417 ReserveXRegisterForRA.set(29);
418
419 EnableSubregLiveness = EnableSubregLivenessTracking.getValue();
420}
421
422unsigned AArch64Subtarget::getHwModeSet() const {
423 AArch64HwModeBits Modes = AArch64HwModeBits::DefaultMode;
424
425 // Use a special hardware mode in streaming[-compatible] functions with
426 // aarch64-enable-zpr-predicate-spills. This changes the spill size (and
427 // alignment) for the predicate register class.
428 if (EnableZPRPredicateSpills.getValue() &&
429 (isStreaming() || isStreamingCompatible())) {
430 Modes |= AArch64HwModeBits::SMEWithZPRPredicateSpills;
431 }
432
433 return to_underlying(E: Modes);
434}
435
436const CallLowering *AArch64Subtarget::getCallLowering() const {
437 return CallLoweringInfo.get();
438}
439
440const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const {
441 return InlineAsmLoweringInfo.get();
442}
443
444InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
445 return InstSelector.get();
446}
447
448const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
449 return Legalizer.get();
450}
451
452const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
453 return RegBankInfo.get();
454}
455
456/// Find the target operand flags that describe how a global value should be
457/// referenced for the current subtarget.
458unsigned
459AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
460 const TargetMachine &TM) const {
461 // MachO large model always goes via a GOT, simply to get a single 8-byte
462 // absolute relocation on all global addresses.
463 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
464 return AArch64II::MO_GOT;
465
466 // All globals dynamically protected by MTE must have their address tags
467 // synthesized. This is done by having the loader stash the tag in the GOT
468 // entry. Force all tagged globals (even ones with internal linkage) through
469 // the GOT.
470 if (GV->isTagged())
471 return AArch64II::MO_GOT;
472
473 if (!TM.shouldAssumeDSOLocal(GV)) {
474 if (GV->hasDLLImportStorageClass()) {
475 return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
476 }
477 if (getTargetTriple().isOSWindows())
478 return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
479 return AArch64II::MO_GOT;
480 }
481
482 // The small code model's direct accesses use ADRP, which cannot
483 // necessarily produce the value 0 (if the code is above 4GB).
484 // Same for the tiny code model, where we have a pc relative LDR.
485 if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
486 GV->hasExternalWeakLinkage())
487 return AArch64II::MO_GOT;
488
489 // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
490 // that their nominal addresses are tagged and outside of the code model. In
491 // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
492 // tag if necessary based on MO_TAGGED.
493 if (AllowTaggedGlobals && !isa<FunctionType>(Val: GV->getValueType()))
494 return AArch64II::MO_NC | AArch64II::MO_TAGGED;
495
496 return AArch64II::MO_NO_FLAG;
497}
498
499unsigned AArch64Subtarget::classifyGlobalFunctionReference(
500 const GlobalValue *GV, const TargetMachine &TM) const {
501 // MachO large model always goes via a GOT, because we don't have the
502 // relocations available to do anything else..
503 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
504 !GV->hasInternalLinkage())
505 return AArch64II::MO_GOT;
506
507 // NonLazyBind goes via GOT unless we know it's available locally.
508 auto *F = dyn_cast<Function>(Val: GV);
509 if ((!isTargetMachO() || MachOUseNonLazyBind) && F &&
510 F->hasFnAttribute(Kind: Attribute::NonLazyBind) && !TM.shouldAssumeDSOLocal(GV))
511 return AArch64II::MO_GOT;
512
513 if (getTargetTriple().isOSWindows()) {
514 if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy()) {
515 if (GV->hasDLLImportStorageClass()) {
516 // On Arm64EC, if we're calling a symbol from the import table
517 // directly, use MO_ARM64EC_CALLMANGLE.
518 return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT |
519 AArch64II::MO_ARM64EC_CALLMANGLE;
520 }
521 if (GV->hasExternalLinkage()) {
522 // If we're calling a symbol directly, use the mangled form in the
523 // call instruction.
524 return AArch64II::MO_ARM64EC_CALLMANGLE;
525 }
526 }
527
528 // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
529 return ClassifyGlobalReference(GV, TM);
530 }
531
532 return AArch64II::MO_NO_FLAG;
533}
534
535void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
536 unsigned NumRegionInstrs) const {
537 // LNT run (at least on Cyclone) showed reasonably significant gains for
538 // bi-directional scheduling. 253.perlbmk.
539 Policy.OnlyTopDown = false;
540 Policy.OnlyBottomUp = false;
541 // Enabling or Disabling the latency heuristic is a close call: It seems to
542 // help nearly no benchmark on out-of-order architectures, on the other hand
543 // it regresses register pressure on a few benchmarking.
544 Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
545}
546
547void AArch64Subtarget::adjustSchedDependency(
548 SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep,
549 const TargetSchedModel *SchedModel) const {
550 if (!SchedModel || Dep.getKind() != SDep::Kind::Data || !Dep.getReg() ||
551 !Def->isInstr() || !Use->isInstr() ||
552 (Def->getInstr()->getOpcode() != TargetOpcode::BUNDLE &&
553 Use->getInstr()->getOpcode() != TargetOpcode::BUNDLE))
554 return;
555
556 // If the Def is a BUNDLE, find the last instruction in the bundle that defs
557 // the register.
558 const MachineInstr *DefMI = Def->getInstr();
559 if (DefMI->getOpcode() == TargetOpcode::BUNDLE) {
560 Register Reg = DefMI->getOperand(i: DefOpIdx).getReg();
561 for (const auto &Op : const_mi_bundle_ops(MI: *DefMI)) {
562 if (Op.isReg() && Op.isDef() && Op.getReg() == Reg) {
563 DefMI = Op.getParent();
564 DefOpIdx = Op.getOperandNo();
565 }
566 }
567 }
568
569 // If the Use is a BUNDLE, find the first instruction that uses the Reg.
570 const MachineInstr *UseMI = Use->getInstr();
571 if (UseMI->getOpcode() == TargetOpcode::BUNDLE) {
572 Register Reg = UseMI->getOperand(i: UseOpIdx).getReg();
573 for (const auto &Op : const_mi_bundle_ops(MI: *UseMI)) {
574 if (Op.isReg() && Op.isUse() && Op.getReg() == Reg) {
575 UseMI = Op.getParent();
576 UseOpIdx = Op.getOperandNo();
577 break;
578 }
579 }
580 }
581
582 Dep.setLatency(
583 SchedModel->computeOperandLatency(DefMI, DefOperIdx: DefOpIdx, UseMI, UseOperIdx: UseOpIdx));
584}
585
586bool AArch64Subtarget::enableEarlyIfConversion() const {
587 return EnableEarlyIfConvert;
588}
589
590bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
591 if (!UseAddressTopByteIgnored)
592 return false;
593
594 if (TargetTriple.isDriverKit())
595 return true;
596 if (TargetTriple.isiOS()) {
597 return TargetTriple.getiOSVersion() >= VersionTuple(8);
598 }
599
600 return false;
601}
602
603std::unique_ptr<PBQPRAConstraint>
604AArch64Subtarget::getCustomPBQPConstraints() const {
605 return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
606}
607
608void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
609 // We usually compute max call frame size after ISel. Do the computation now
610 // if the .mir file didn't specify it. Note that this will probably give you
611 // bogus values after PEI has eliminated the callframe setup/destroy pseudo
612 // instructions, specify explicitly if you need it to be correct.
613 MachineFrameInfo &MFI = MF.getFrameInfo();
614 if (!MFI.isMaxCallFrameSizeComputed())
615 MFI.computeMaxCallFrameSize(MF);
616}
617
618bool AArch64Subtarget::useAA() const { return UseAA; }
619
620bool AArch64Subtarget::useScalarIncVL() const {
621 // If SVE2 or SME is present (we are not SVE-1 only) and UseScalarIncVL
622 // is not otherwise set, enable it by default.
623 if (UseScalarIncVL.getNumOccurrences())
624 return UseScalarIncVL;
625 return hasSVE2() || hasSME();
626}
627
628// If return address signing is enabled, tail calls are emitted as follows:
629//
630// ```
631// <authenticate LR>
632// <check LR>
633// TCRETURN ; the callee may sign and spill the LR in its prologue
634// ```
635//
636// LR may require explicit checking because if FEAT_FPAC is not implemented
637// and LR was tampered with, then `<authenticate LR>` will not generate an
638// exception on its own. Later, if the callee spills the signed LR value and
639// neither FEAT_PAuth2 nor FEAT_EPAC are implemented, the valid PAC replaces
640// the higher bits of LR thus hiding the authentication failure.
641AArch64PAuth::AuthCheckMethod AArch64Subtarget::getAuthenticatedLRCheckMethod(
642 const MachineFunction &MF) const {
643 // TODO: Check subtarget for the scheme. Present variant is a default for
644 // pauthtest ABI.
645 if (MF.getFunction().hasFnAttribute(Kind: "ptrauth-returns") &&
646 MF.getFunction().hasFnAttribute(Kind: "ptrauth-auth-traps"))
647 return AArch64PAuth::AuthCheckMethod::HighBitsNoTBI;
648 if (AuthenticatedLRCheckMethod.getNumOccurrences())
649 return AuthenticatedLRCheckMethod;
650
651 // At now, use None by default because checks may introduce an unexpected
652 // performance regression or incompatibility with execute-only mappings.
653 return AArch64PAuth::AuthCheckMethod::None;
654}
655
656std::optional<uint16_t>
657AArch64Subtarget::getPtrAuthBlockAddressDiscriminatorIfEnabled(
658 const Function &ParentFn) const {
659 if (!ParentFn.hasFnAttribute(Kind: "ptrauth-indirect-gotos"))
660 return std::nullopt;
661 // We currently have one simple mechanism for all targets.
662 // This isn't ABI, so we can always do better in the future.
663 return getPointerAuthStableSipHash(
664 S: (Twine(ParentFn.getName()) + " blockaddress").str());
665}
666
667bool AArch64Subtarget::enableMachinePipeliner() const {
668 return getSchedModel().hasInstrSchedModel();
669}
670