AArch64Subtarget.cpp source code [llvm_projects/llvm/lib/Target/AArch64/AArch64Subtarget.cpp]

1	//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the AArch64 specific subclass of TargetSubtarget.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "AArch64Subtarget.h"
14
15	#include "AArch64.h"
16	#include "AArch64InstrInfo.h"
17	#include "AArch64PBQPRegAlloc.h"
18	#include "AArch64TargetMachine.h"
19	#include "GISel/AArch64CallLowering.h"
20	#include "GISel/AArch64LegalizerInfo.h"
21	#include "GISel/AArch64RegisterBankInfo.h"
22	#include "MCTargetDesc/AArch64AddressingModes.h"
23	#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
24	#include "llvm/CodeGen/MachineFrameInfo.h"
25	#include "llvm/CodeGen/MachineScheduler.h"
26	#include "llvm/IR/GlobalValue.h"
27	#include "llvm/Support/SipHash.h"
28	#include "llvm/TargetParser/AArch64TargetParser.h"
29
30	using namespace llvm;
31
32	#define DEBUG_TYPE "aarch64-subtarget"
33
34	#define GET_SUBTARGETINFO_CTOR
35	#define GET_SUBTARGETINFO_TARGET_DESC
36	#include "AArch64GenSubtargetInfo.inc"
37
38	static cl::opt<bool>
39	EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc ("Enable the early if "
40	"converter pass"), cl::init(Val: true), cl::Hidden);
41
42	// If OS supports TBI, use this flag to enable it.
43	static cl::opt<bool>
44	UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc ("Assume that top byte of "
45	"an address is ignored"), cl::init(Val: false), cl::Hidden);
46
47	static cl::opt<bool> MachOUseNonLazyBind(
48	"aarch64-macho-enable-nonlazybind",
49	cl::desc ("Call nonlazybind functions via direct GOT load for Mach-O"),
50	cl::Hidden);
51
52	static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(Val: true),
53	cl::desc ("Enable the use of AA during codegen."));
54
55	static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost(
56	"aarch64-insert-extract-base-cost",
57	cl::desc ("Base cost of vector insert/extract element"), cl::Hidden);
58
59	// Reserve a list of X# registers, so they are unavailable for register
60	// allocator, but can still be used as ABI requests, such as passing arguments
61	// to function call.
62	static cl::list<std::string>
63	ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc ("Reserve physical "
64	"registers, so they can't be used by register allocator. "
65	"Should only be used for testing register allocator."),
66	cl::CommaSeparated, cl::Hidden);
67
68	static cl::opt<AArch64PAuth::AuthCheckMethod>
69	AuthenticatedLRCheckMethod("aarch64-authenticated-lr-check-method",
70	cl::Hidden,
71	cl::desc ("Override the variant of check applied "
72	"to authenticated LR during tail call"),
73	cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR));
74
75	static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
76	"aarch64-min-jump-table-entries", cl::init(Val: `10`), cl::Hidden,
77	cl::desc ("Set minimum number of entries to use a jump table on AArch64"));
78
79	static cl::opt<unsigned> AArch64StreamingHazardSize(
80	"aarch64-streaming-hazard-size",
81	cl::desc ("Hazard size for streaming mode memory accesses. 0 = disabled."),
82	cl::init(Val: `0`), cl::Hidden);
83
84	static cl::alias AArch64StreamingStackHazardSize(
85	"aarch64-stack-hazard-size",
86	cl::desc ("alias for -aarch64-streaming-hazard-size"),
87	cl::aliasopt (AArch64StreamingHazardSize));
88
89	static cl::opt<unsigned>
90	VScaleForTuningOpt("sve-vscale-for-tuning", cl::Hidden,
91	cl::desc ("Force a vscale for tuning factor for SVE"));
92
93	// Subreg liveness tracking is disabled by default for now until all issues
94	// are ironed out. This option allows the feature to be used in tests.
95	static cl::opt<bool>
96	EnableSubregLivenessTracking("aarch64-enable-subreg-liveness-tracking",
97	cl::init(Val: false), cl::Hidden,
98	cl::desc ("Enable subreg liveness tracking"));
99
100	static cl::opt<bool>
101	UseScalarIncVL("sve-use-scalar-inc-vl", cl::init(Val: false), cl::Hidden,
102	cl::desc ("Prefer add+cnt over addvl/inc/dec"));
103
104	unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
105	if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > `0`)
106	return OverrideVectorInsertExtractBaseCost;
107	return VectorInsertExtractBaseCost;
108	}
109
110	AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
111	StringRef FS, StringRef CPUString, StringRef TuneCPUString,
112	bool HasMinSize) {
113	// Determine default and user-specified characteristics
114
115	if (CPUString.empty())
116	CPUString = "generic";
117
118	if (TuneCPUString.empty())
119	TuneCPUString = CPUString;
120
121	ParseSubtargetFeatures(CPU: CPUString, TuneCPU: TuneCPUString, FS);
122	initializeProperties(HasMinSize);
123
124	return *this;
125	}
126
127	void AArch64Subtarget::initializeProperties(bool HasMinSize) {
128	// Initialize CPU specific properties. We should add a tablegen feature for
129	// this in the future so we can specify it together with the subtarget
130	// features.
131	switch (ARMProcFamily) {
132	case Generic:
133	// Using TuneCPU=generic we avoid ldapur instructions to line up with the
134	// cpus that use the AvoidLDAPUR feature. We don't want this to be on
135	// forever, so it is enabled between armv8.4 and armv8.7/armv9.2.
136	if (hasV8_4aOps() && !hasV8_8aOps())
137	AvoidLDAPUR = true;
138	break;
139	case Carmel:
140	CacheLineSize = `64`;
141	break;
142	case CortexA35:
143	case CortexA53:
144	case CortexA55:
145	case CortexR82:
146	case CortexR82AE:
147	PrefFunctionAlignment = Align (`16`);
148	PrefLoopAlignment = Align (`16`);
149	MaxBytesForLoopAlignment = `8`;
150	break;
151	case CortexA57:
152	PrefFunctionAlignment = Align (`16`);
153	PrefLoopAlignment = Align (`16`);
154	MaxBytesForLoopAlignment = `8`;
155	break;
156	case CortexA65:
157	PrefFunctionAlignment = Align (`8`);
158	break;
159	case CortexA72:
160	case CortexA73:
161	case CortexA75:
162	PrefFunctionAlignment = Align (`16`);
163	PrefLoopAlignment = Align (`16`);
164	MaxBytesForLoopAlignment = `8`;
165	break;
166	case CortexA76:
167	case CortexA77:
168	case CortexA78:
169	case CortexA78AE:
170	case CortexA78C:
171	case CortexX1:
172	PrefFunctionAlignment = Align (`16`);
173	PrefLoopAlignment = Align (`32`);
174	MaxBytesForLoopAlignment = `16`;
175	break;
176	case CortexA320:
177	case CortexA510:
178	case CortexA520:
179	case C1Nano:
180	PrefFunctionAlignment = Align (`16`);
181	VScaleForTuning = `1`;
182	PrefLoopAlignment = Align (`16`);
183	MaxBytesForLoopAlignment = `8`;
184	break;
185	case CortexA710:
186	case CortexA715:
187	case CortexA720:
188	case CortexA725:
189	case C1Pro:
190	case CortexX2:
191	case CortexX3:
192	case CortexX4:
193	case CortexX925:
194	case C1Premium:
195	case C1Ultra:
196	PrefFunctionAlignment = Align (`16`);
197	VScaleForTuning = `1`;
198	PrefLoopAlignment = Align (`32`);
199	MaxBytesForLoopAlignment = `16`;
200	break;
201	case A64FX:
202	CacheLineSize = `256`;
203	PrefFunctionAlignment = Align (`8`);
204	PrefLoopAlignment = Align (`4`);
205	PrefetchDistance = `128`;
206	MinPrefetchStride = `1024`;
207	MaxPrefetchIterationsAhead = `4`;
208	VScaleForTuning = `4`;
209	break;
210	case MONAKA:
211	VScaleForTuning = `2`;
212	break;
213	case AppleA7:
214	case AppleA10:
215	case AppleA11:
216	case AppleA12:
217	case AppleA13:
218	case AppleA14:
219	case AppleA15:
220	case AppleA16:
221	case AppleA17:
222	case AppleM4:
223	case AppleM5:
224	CacheLineSize = `64`;
225	PrefetchDistance = `280`;
226	MinPrefetchStride = `2048`;
227	MaxPrefetchIterationsAhead = `3`;
228	break;
229	case ExynosM3:
230	MaxJumpTableSize = `20`;
231	PrefFunctionAlignment = Align (`32`);
232	PrefLoopAlignment = Align (`16`);
233	break;
234	case Falkor:
235	// FIXME: remove this to enable 64-bit SLP if performance looks good.
236	MinVectorRegisterBitWidth = `128`;
237	CacheLineSize = `128`;
238	PrefetchDistance = `820`;
239	MinPrefetchStride = `2048`;
240	MaxPrefetchIterationsAhead = `8`;
241	break;
242	case Kryo:
243	VectorInsertExtractBaseCost = `2`;
244	CacheLineSize = `128`;
245	PrefetchDistance = `740`;
246	MinPrefetchStride = `1024`;
247	MaxPrefetchIterationsAhead = `11`;
248	// FIXME: remove this to enable 64-bit SLP if performance looks good.
249	MinVectorRegisterBitWidth = `128`;
250	break;
251	case NeoverseE1:
252	PrefFunctionAlignment = Align (`8`);
253	break;
254	case NeoverseN1:
255	PrefFunctionAlignment = Align (`16`);
256	PrefLoopAlignment = Align (`32`);
257	MaxBytesForLoopAlignment = `16`;
258	break;
259	case NeoverseV2:
260	case NeoverseV3:
261	CacheLineSize = `64`;
262	EpilogueVectorizationMinVF = `8`;
263	ScatterOverhead = `13`;
264	[[fallthrough]];
265	case NeoverseN2:
266	case NeoverseN3:
267	PrefFunctionAlignment = Align (`16`);
268	PrefLoopAlignment = Align (`32`);
269	MaxBytesForLoopAlignment = `16`;
270	VScaleForTuning = `1`;
271	break;
272	case NeoverseV1:
273	PrefFunctionAlignment = Align (`16`);
274	PrefLoopAlignment = Align (`32`);
275	MaxBytesForLoopAlignment = `16`;
276	VScaleForTuning = `2`;
277	DefaultSVETFOpts = TailFoldingOpts::Simple;
278	break;
279	case Neoverse512TVB:
280	PrefFunctionAlignment = Align (`16`);
281	VScaleForTuning = `1`;
282	break;
283	case Saphira:
284	// FIXME: remove this to enable 64-bit SLP if performance looks good.
285	MinVectorRegisterBitWidth = `128`;
286	break;
287	case ThunderX2T99:
288	CacheLineSize = `64`;
289	PrefFunctionAlignment = Align (`8`);
290	PrefLoopAlignment = Align (`4`);
291	PrefetchDistance = `128`;
292	MinPrefetchStride = `1024`;
293	MaxPrefetchIterationsAhead = `4`;
294	// FIXME: remove this to enable 64-bit SLP if performance looks good.
295	MinVectorRegisterBitWidth = `128`;
296	break;
297	case ThunderX:
298	case ThunderXT88:
299	case ThunderXT81:
300	case ThunderXT83:
301	CacheLineSize = `128`;
302	PrefFunctionAlignment = Align (`8`);
303	PrefLoopAlignment = Align (`4`);
304	// FIXME: remove this to enable 64-bit SLP if performance looks good.
305	MinVectorRegisterBitWidth = `128`;
306	break;
307	case TSV110:
308	CacheLineSize = `64`;
309	PrefFunctionAlignment = Align (`16`);
310	PrefLoopAlignment = Align (`4`);
311	break;
312	case ThunderX3T110:
313	CacheLineSize = `64`;
314	PrefFunctionAlignment = Align (`16`);
315	PrefLoopAlignment = Align (`4`);
316	PrefetchDistance = `128`;
317	MinPrefetchStride = `1024`;
318	MaxPrefetchIterationsAhead = `4`;
319	// FIXME: remove this to enable 64-bit SLP if performance looks good.
320	MinVectorRegisterBitWidth = `128`;
321	break;
322	case Ampere1:
323	case Ampere1A:
324	case Ampere1B:
325	case Ampere1C:
326	CacheLineSize = `64`;
327	PrefFunctionAlignment = Align (`64`);
328	PrefLoopAlignment = Align (`64`);
329	break;
330	case Oryon:
331	CacheLineSize = `64`;
332	PrefFunctionAlignment = Align (`16`);
333	PrefetchDistance = `128`;
334	MinPrefetchStride = `1024`;
335	break;
336	case Olympus:
337	EpilogueVectorizationMinVF = `8`;
338	ScatterOverhead = `13`;
339	PrefFunctionAlignment = Align (`16`);
340	PrefLoopAlignment = Align (`32`);
341	MaxBytesForLoopAlignment = `16`;
342	VScaleForTuning = `1`;
343	break;
344	}
345
346	if (AArch64MinimumJumpTableEntries.getNumOccurrences() > `0` \|\| !HasMinSize)
347	MinimumJumpTableEntries = AArch64MinimumJumpTableEntries;
348	if (VScaleForTuningOpt.getNumOccurrences() > `0`)
349	VScaleForTuning = VScaleForTuningOpt;
350	}
351
352	AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
353	StringRef TuneCPU, StringRef FS,
354	const TargetMachine &TM, bool LittleEndian,
355	unsigned MinSVEVectorSizeInBitsOverride,
356	unsigned MaxSVEVectorSizeInBitsOverride,
357	bool IsStreaming, bool IsStreamingCompatible,
358	bool HasMinSize,
359	bool EnableSRLTSubregToRegMitigation)
360	: AArch64GenSubtargetInfo (TT, CPU, TuneCPU, FS),
361	ReserveXRegister (AArch64::GPR64commonRegClass.getNumRegs()),
362	ReserveXRegisterForRA (AArch64::GPR64commonRegClass.getNumRegs()),
363	CustomCallSavedXRegs (AArch64::GPR64commonRegClass.getNumRegs()),
364	IsLittle(LittleEndian), IsStreaming(IsStreaming),
365	IsStreamingCompatible(IsStreamingCompatible),
366	StreamingHazardSize(
367	AArch64StreamingHazardSize.getNumOccurrences() > `0`
368	? std::optional<unsigned>(AArch64StreamingHazardSize)
369	: std::nullopt),
370	MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
371	MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride),
372	EnableSRLTSubregToRegMitigation(EnableSRLTSubregToRegMitigation),
373	TargetTriple (TT),
374	InstrInfo (initializeSubtargetDependencies(FS, CPUString: CPU, TuneCPUString: TuneCPU, HasMinSize)),
375	TLInfo (TM, *this) {
376	if (AArch64::isX18ReservedByDefault(TT))
377	ReserveXRegister.set(`18`);
378
379	CallLoweringInfo.reset(p: new AArch64CallLowering (*getTargetLowering()));
380	InlineAsmLoweringInfo.reset(p: new InlineAsmLowering (getTargetLowering()));
381	Legalizer.reset(p: new AArch64LegalizerInfo (*this));
382
383	auto RBI = new* AArch64RegisterBankInfo (*getRegisterInfo());
384
385	// FIXME: At this point, we can't rely on Subtarget having RBI.
386	// It's awkward to mix passing RBI and the Subtarget; should we pass
387	// TII/TRI as well?
388	InstSelector.reset(p: createAArch64InstructionSelector(
389	*static_cast<const AArch64TargetMachine >(&TM), this, *RBI));
390
391	RegBankInfo.reset(p: RBI);
392
393	auto TRI = getRegisterInfo();
394	StringSet<> ReservedRegNames(llvm::from_range, ReservedRegsForRA);
395	for (unsigned i = `0`; i < `29`; ++i) {
396	if (ReservedRegNames.count(Key: TRI->getName(RegNo: AArch64::X0 + i)))
397	ReserveXRegisterForRA.set(i);
398	}
399	// X30 is named LR, so we can't use TRI->getName to check X30.
400	if (ReservedRegNames.count(Key: "X30") \|\| ReservedRegNames.count(Key: "LR"))
401	ReserveXRegisterForRA.set(`30`);
402	// X29 is named FP, so we can't use TRI->getName to check X29.
403	if (ReservedRegNames.count(Key: "X29") \|\| ReservedRegNames.count(Key: "FP"))
404	ReserveXRegisterForRA.set(`29`);
405
406	// To benefit from SME2's strided-register multi-vector load/store
407	// instructions we'll need to enable subreg liveness. Our longer
408	// term aim is to make this the default, regardless of streaming
409	// mode, but there are still some outstanding issues, see:
410	// https://github.com/llvm/llvm-project/pull/174188
411	// and:
412	// https://github.com/llvm/llvm-project/pull/168353
413	if (IsStreaming)
414	EnableSubregLiveness = true;
415	else
416	EnableSubregLiveness = EnableSubregLivenessTracking.getValue();
417	}
418
419	const CallLowering AArch64Subtarget::getCallLowering() const* {
420	return CallLoweringInfo.get();
421	}
422
423	const InlineAsmLowering AArch64Subtarget::getInlineAsmLowering() const* {
424	return InlineAsmLoweringInfo.get();
425	}
426
427	InstructionSelector AArch64Subtarget::getInstructionSelector() const* {
428	return InstSelector.get();
429	}
430
431	const LegalizerInfo AArch64Subtarget::getLegalizerInfo() const* {
432	return Legalizer.get();
433	}
434
435	const RegisterBankInfo AArch64Subtarget::getRegBankInfo() const* {
436	return RegBankInfo.get();
437	}
438
439	/// Find the target operand flags that describe how a global value should be
440	/// referenced for the current subtarget.
441	unsigned
442	AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
443	const TargetMachine &TM) const {
444	// MachO large model always goes via a GOT, simply to get a single 8-byte
445	// absolute relocation on all global addresses.
446	if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
447	return AArch64II::MO_GOT;
448
449	// All globals dynamically protected by MTE must have their address tags
450	// synthesized. This is done by having the loader stash the tag in the GOT
451	// entry. Force all tagged globals (even ones with internal linkage) through
452	// the GOT.
453	if (GV->isTagged())
454	return AArch64II::MO_GOT;
455
456	if (!TM.shouldAssumeDSOLocal(GV)) {
457	if (GV->hasDLLImportStorageClass()) {
458	return AArch64II::MO_GOT \| AArch64II::MO_DLLIMPORT;
459	}
460	if (getTargetTriple().isOSWindows())
461	return AArch64II::MO_GOT \| AArch64II::MO_COFFSTUB;
462	return AArch64II::MO_GOT;
463	}
464
465	// The small code model's direct accesses use ADRP, which cannot
466	// necessarily produce the value 0 (if the code is above 4GB).
467	// Same for the tiny code model, where we have a pc relative LDR.
468	if ((useSmallAddressing() \|\| TM.getCodeModel() == CodeModel::Tiny) &&
469	GV->hasExternalWeakLinkage())
470	return AArch64II::MO_GOT;
471
472	// References to tagged globals are marked with MO_NC \| MO_TAGGED to indicate
473	// that their nominal addresses are tagged and outside of the code model. In
474	// AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
475	// tag if necessary based on MO_TAGGED.
476	if (AllowTaggedGlobals && !isa<FunctionType>(Val: GV->getValueType()))
477	return AArch64II::MO_NC \| AArch64II::MO_TAGGED;
478
479	return AArch64II::MO_NO_FLAG;
480	}
481
482	unsigned AArch64Subtarget::classifyGlobalFunctionReference(
483	const GlobalValue GV, const* TargetMachine &TM) const {
484	// MachO large model always goes via a GOT, because we don't have the
485	// relocations available to do anything else..
486	if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
487	!GV->hasInternalLinkage())
488	return AArch64II::MO_GOT;
489
490	// NonLazyBind goes via GOT unless we know it's available locally.
491	auto *F = dyn_cast<Function>(Val: GV);
492	if ((!isTargetMachO() \|\| MachOUseNonLazyBind) && F &&
493	F->hasFnAttribute(Kind: Attribute::NonLazyBind) && !TM.shouldAssumeDSOLocal(GV))
494	return AArch64II::MO_GOT;
495
496	if (getTargetTriple().isOSWindows()) {
497	if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy()) {
498	if (GV->hasDLLImportStorageClass()) {
499	// On Arm64EC, if we're calling a symbol from the import table
500	// directly, use MO_ARM64EC_CALLMANGLE.
501	return AArch64II::MO_GOT \| AArch64II::MO_DLLIMPORT \|
502	AArch64II::MO_ARM64EC_CALLMANGLE;
503	}
504	if (GV->hasExternalLinkage()) {
505	// If we're calling a symbol directly, use the mangled form in the
506	// call instruction.
507	return AArch64II::MO_ARM64EC_CALLMANGLE;
508	}
509	}
510
511	// Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
512	return ClassifyGlobalReference(GV, TM);
513	}
514
515	return AArch64II::MO_NO_FLAG;
516	}
517
518	void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
519	const SchedRegion &Region) const {
520	// LNT run (at least on Cyclone) showed reasonably significant gains for
521	// bi-directional scheduling. 253.perlbmk.
522	Policy.OnlyTopDown = false;
523	Policy.OnlyBottomUp = false;
524	// Enabling or Disabling the latency heuristic is a close call: It seems to
525	// help nearly no benchmark on out-of-order architectures, on the other hand
526	// it regresses register pressure on a few benchmarking.
527	Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
528	}
529
530	void AArch64Subtarget::adjustSchedDependency(
531	SUnit Def, int* DefOpIdx, SUnit Use, int* UseOpIdx, SDep &Dep,
532	const TargetSchedModel SchedModel) const* {
533	if (!SchedModel \|\| Dep.getKind() != SDep::Kind::Data \|\| !Dep.getReg() \|\|
534	!Def->isInstr() \|\| !Use->isInstr() \|\|
535	(Def->getInstr()->getOpcode() != TargetOpcode::BUNDLE &&
536	Use->getInstr()->getOpcode() != TargetOpcode::BUNDLE))
537	return;
538
539	// If the Def is a BUNDLE, find the last instruction in the bundle that defs
540	// the register.
541	const MachineInstr *DefMI = Def->getInstr();
542	if (DefMI->getOpcode() == TargetOpcode::BUNDLE) {
543	Register Reg = DefMI->getOperand(i: DefOpIdx).getReg();
544	for (const auto &Op : const_mi_bundle_ops(MI: *DefMI)) {
545	if (Op.isReg() && Op.isDef() && Op.getReg() == Reg) {
546	DefMI = Op.getParent();
547	DefOpIdx = Op.getOperandNo();
548	}
549	}
550	}
551
552	// If the Use is a BUNDLE, find the first instruction that uses the Reg.
553	const MachineInstr *UseMI = Use->getInstr();
554	if (UseMI->getOpcode() == TargetOpcode::BUNDLE) {
555	Register Reg = UseMI->getOperand(i: UseOpIdx).getReg();
556	for (const auto &Op : const_mi_bundle_ops(MI: *UseMI)) {
557	if (Op.isReg() && Op.isUse() && Op.getReg() == Reg) {
558	UseMI = Op.getParent();
559	UseOpIdx = Op.getOperandNo();
560	break;
561	}
562	}
563	}
564
565	Dep.setLatency(
566	SchedModel->computeOperandLatency(DefMI, DefOperIdx: DefOpIdx, UseMI, UseOperIdx: UseOpIdx));
567	}
568
569	bool AArch64Subtarget::enableEarlyIfConversion() const {
570	return EnableEarlyIfConvert;
571	}
572
573	bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
574	if (!UseAddressTopByteIgnored)
575	return false;
576
577	if (TargetTriple.isDriverKit())
578	return true;
579	if (TargetTriple.isiOS()) {
580	return TargetTriple.getiOSVersion() >= VersionTuple (`8`);
581	}
582
583	return false;
584	}
585
586	std::unique_ptr<PBQPRAConstraint>
587	AArch64Subtarget::getCustomPBQPConstraints() const {
588	return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
589	}
590
591	void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
592	// We usually compute max call frame size after ISel. Do the computation now
593	// if the .mir file didn't specify it. Note that this will probably give you
594	// bogus values after PEI has eliminated the callframe setup/destroy pseudo
595	// instructions, specify explicitly if you need it to be correct.
596	MachineFrameInfo &MFI = MF.getFrameInfo();
597	if (!MFI.isMaxCallFrameSizeComputed())
598	MFI.computeMaxCallFrameSize(MF);
599	}
600
601	bool AArch64Subtarget::useAA() const { return UseAA; }
602
603	bool AArch64Subtarget::useScalarIncVL() const {
604	// If SVE2 or SME is present (we are not SVE-1 only) and UseScalarIncVL
605	// is not otherwise set, enable it by default.
606	if (UseScalarIncVL.getNumOccurrences())
607	return UseScalarIncVL;
608	return hasSVE2() \|\| hasSME();
609	}
610
611	// If return address signing is enabled, tail calls are emitted as follows:
612	//
613	// ```
614	// <authenticate LR>
615	// <check LR>
616	// TCRETURN ; the callee may sign and spill the LR in its prologue
617	// ```
618	//
619	// LR may require explicit checking because if FEAT_FPAC is not implemented
620	// and LR was tampered with, then `<authenticate LR>` will not generate an
621	// exception on its own. Later, if the callee spills the signed LR value and
622	// neither FEAT_PAuth2 nor FEAT_EPAC are implemented, the valid PAC replaces
623	// the higher bits of LR thus hiding the authentication failure.
624	AArch64PAuth::AuthCheckMethod AArch64Subtarget::getAuthenticatedLRCheckMethod(
625	const MachineFunction &MF) const {
626	// TODO: Check subtarget for the scheme. Present variant is a default for
627	// pauthtest ABI.
628	if (MF.getFunction().hasFnAttribute(Kind: "ptrauth-returns") &&
629	MF.getFunction().hasFnAttribute(Kind: "ptrauth-auth-traps"))
630	return AArch64PAuth::AuthCheckMethod::HighBitsNoTBI;
631	if (AuthenticatedLRCheckMethod.getNumOccurrences())
632	return AuthenticatedLRCheckMethod;
633
634	// At now, use None by default because checks may introduce an unexpected
635	// performance regression or incompatibility with execute-only mappings.
636	return AArch64PAuth::AuthCheckMethod::None;
637	}
638
639	std::optional<uint16_t>
640	AArch64Subtarget::getPtrAuthBlockAddressDiscriminatorIfEnabled(
641	const Function &ParentFn) const {
642	if (!ParentFn.hasFnAttribute(Kind: "ptrauth-indirect-gotos"))
643	return std::nullopt;
644	// We currently have one simple mechanism for all targets.
645	// This isn't ABI, so we can always do better in the future.
646	return getPointerAuthStableSipHash(
647	S: (Twine (ParentFn.getName()) + " blockaddress").str());
648	}
649
650	bool AArch64Subtarget::isX16X17Safer() const {
651	// The Darwin kernel implements special protections for x16 and x17 so we
652	// should prefer to use those registers on that platform.
653	return isTargetDarwin();
654	}
655
656	bool AArch64Subtarget::enableMachinePipeliner() const {
657	return getSchedModel().hasInstrSchedModel();
658	}
659

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/AArch64Subtarget.cpp