AArch64Subtarget.cpp source code [llvm_projects/llvm/lib/Target/AArch64/AArch64Subtarget.cpp]

1	//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the AArch64 specific subclass of TargetSubtarget.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "AArch64Subtarget.h"
14
15	#include "AArch64.h"
16	#include "AArch64InstrInfo.h"
17	#include "AArch64PBQPRegAlloc.h"
18	#include "AArch64TargetMachine.h"
19	#include "GISel/AArch64CallLowering.h"
20	#include "GISel/AArch64LegalizerInfo.h"
21	#include "GISel/AArch64RegisterBankInfo.h"
22	#include "MCTargetDesc/AArch64AddressingModes.h"
23	#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
24	#include "llvm/CodeGen/MachineFrameInfo.h"
25	#include "llvm/CodeGen/MachineScheduler.h"
26	#include "llvm/IR/GlobalValue.h"
27	#include "llvm/Support/SipHash.h"
28	#include "llvm/TargetParser/AArch64TargetParser.h"
29
30	using namespace llvm;
31
32	#define DEBUG_TYPE "aarch64-subtarget"
33
34	#define GET_SUBTARGETINFO_CTOR
35	#define GET_SUBTARGETINFO_TARGET_DESC
36	#include "AArch64GenSubtargetInfo.inc"
37
38	static cl::opt<bool>
39	EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc ("Enable the early if "
40	"converter pass"), cl::init(Val: true), cl::Hidden);
41
42	// If OS supports TBI, use this flag to enable it.
43	static cl::opt<bool>
44	UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc ("Assume that top byte of "
45	"an address is ignored"), cl::init(Val: false), cl::Hidden);
46
47	static cl::opt<bool> MachOUseNonLazyBind(
48	"aarch64-macho-enable-nonlazybind",
49	cl::desc ("Call nonlazybind functions via direct GOT load for Mach-O"),
50	cl::Hidden);
51
52	static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(Val: true),
53	cl::desc ("Enable the use of AA during codegen."));
54
55	static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost(
56	"aarch64-insert-extract-base-cost",
57	cl::desc ("Base cost of vector insert/extract element"), cl::Hidden);
58
59	// Reserve a list of X# registers, so they are unavailable for register
60	// allocator, but can still be used as ABI requests, such as passing arguments
61	// to function call.
62	static cl::list<std::string>
63	ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc ("Reserve physical "
64	"registers, so they can't be used by register allocator. "
65	"Should only be used for testing register allocator."),
66	cl::CommaSeparated, cl::Hidden);
67
68	static cl::opt<AArch64PAuth::AuthCheckMethod>
69	AuthenticatedLRCheckMethod("aarch64-authenticated-lr-check-method",
70	cl::Hidden,
71	cl::desc ("Override the variant of check applied "
72	"to authenticated LR during tail call"),
73	cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR));
74
75	static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
76	"aarch64-min-jump-table-entries", cl::init(Val: `10`), cl::Hidden,
77	cl::desc ("Set minimum number of entries to use a jump table on AArch64"));
78
79	static cl::opt<unsigned> AArch64StreamingHazardSize(
80	"aarch64-streaming-hazard-size",
81	cl::desc ("Hazard size for streaming mode memory accesses. 0 = disabled."),
82	cl::init(Val: `0`), cl::Hidden);
83
84	static cl::alias AArch64StreamingStackHazardSize(
85	"aarch64-stack-hazard-size",
86	cl::desc ("alias for -aarch64-streaming-hazard-size"),
87	cl::aliasopt (AArch64StreamingHazardSize));
88
89	static cl::opt<bool> EnableZPRPredicateSpills(
90	"aarch64-enable-zpr-predicate-spills", cl::init(Val: false), cl::Hidden,
91	cl::desc (
92	"Enables spilling/reloading SVE predicates as data vectors (ZPRs)"));
93
94	// Subreg liveness tracking is disabled by default for now until all issues
95	// are ironed out. This option allows the feature to be used in tests.
96	static cl::opt<bool>
97	EnableSubregLivenessTracking("aarch64-enable-subreg-liveness-tracking",
98	cl::init(Val: false), cl::Hidden,
99	cl::desc ("Enable subreg liveness tracking"));
100
101	static cl::opt<bool>
102	UseScalarIncVL("sve-use-scalar-inc-vl", cl::init(Val: false), cl::Hidden,
103	cl::desc ("Prefer add+cnt over addvl/inc/dec"));
104
105	unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
106	if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > `0`)
107	return OverrideVectorInsertExtractBaseCost;
108	return VectorInsertExtractBaseCost;
109	}
110
111	AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
112	StringRef FS, StringRef CPUString, StringRef TuneCPUString,
113	bool HasMinSize) {
114	// Determine default and user-specified characteristics
115
116	if (CPUString.empty())
117	CPUString = "generic";
118
119	if (TuneCPUString.empty())
120	TuneCPUString = CPUString;
121
122	ParseSubtargetFeatures(CPU: CPUString, TuneCPU: TuneCPUString, FS);
123	initializeProperties(HasMinSize);
124
125	return *this;
126	}
127
128	void AArch64Subtarget::initializeProperties(bool HasMinSize) {
129	// Initialize CPU specific properties. We should add a tablegen feature for
130	// this in the future so we can specify it together with the subtarget
131	// features.
132	switch (ARMProcFamily) {
133	case Generic:
134	// Using TuneCPU=generic we avoid ldapur instructions to line up with the
135	// cpus that use the AvoidLDAPUR feature. We don't want this to be on
136	// forever, so it is enabled between armv8.4 and armv8.7/armv9.2.
137	if (hasV8_4aOps() && !hasV8_8aOps())
138	AvoidLDAPUR = true;
139	break;
140	case Carmel:
141	CacheLineSize = `64`;
142	break;
143	case CortexA35:
144	case CortexA53:
145	case CortexA55:
146	case CortexR82:
147	case CortexR82AE:
148	PrefFunctionAlignment = Align (`16`);
149	PrefLoopAlignment = Align (`16`);
150	MaxBytesForLoopAlignment = `8`;
151	break;
152	case CortexA57:
153	MaxInterleaveFactor = `4`;
154	PrefFunctionAlignment = Align (`16`);
155	PrefLoopAlignment = Align (`16`);
156	MaxBytesForLoopAlignment = `8`;
157	break;
158	case CortexA65:
159	PrefFunctionAlignment = Align (`8`);
160	break;
161	case CortexA72:
162	case CortexA73:
163	case CortexA75:
164	PrefFunctionAlignment = Align (`16`);
165	PrefLoopAlignment = Align (`16`);
166	MaxBytesForLoopAlignment = `8`;
167	break;
168	case CortexA76:
169	case CortexA77:
170	case CortexA78:
171	case CortexA78AE:
172	case CortexA78C:
173	case CortexX1:
174	PrefFunctionAlignment = Align (`16`);
175	PrefLoopAlignment = Align (`32`);
176	MaxBytesForLoopAlignment = `16`;
177	break;
178	case CortexA320:
179	case CortexA510:
180	case CortexA520:
181	PrefFunctionAlignment = Align (`16`);
182	VScaleForTuning = `1`;
183	PrefLoopAlignment = Align (`16`);
184	MaxBytesForLoopAlignment = `8`;
185	break;
186	case CortexA710:
187	case CortexA715:
188	case CortexA720:
189	case CortexA725:
190	case CortexX2:
191	case CortexX3:
192	case CortexX4:
193	case CortexX925:
194	PrefFunctionAlignment = Align (`16`);
195	VScaleForTuning = `1`;
196	PrefLoopAlignment = Align (`32`);
197	MaxBytesForLoopAlignment = `16`;
198	break;
199	case A64FX:
200	CacheLineSize = `256`;
201	PrefFunctionAlignment = Align (`8`);
202	PrefLoopAlignment = Align (`4`);
203	MaxInterleaveFactor = `4`;
204	PrefetchDistance = `128`;
205	MinPrefetchStride = `1024`;
206	MaxPrefetchIterationsAhead = `4`;
207	VScaleForTuning = `4`;
208	break;
209	case MONAKA:
210	VScaleForTuning = `2`;
211	break;
212	case AppleA7:
213	case AppleA10:
214	case AppleA11:
215	case AppleA12:
216	case AppleA13:
217	case AppleA14:
218	case AppleA15:
219	case AppleA16:
220	case AppleA17:
221	case AppleM4:
222	CacheLineSize = `64`;
223	PrefetchDistance = `280`;
224	MinPrefetchStride = `2048`;
225	MaxPrefetchIterationsAhead = `3`;
226	switch (ARMProcFamily) {
227	case AppleA14:
228	case AppleA15:
229	case AppleA16:
230	case AppleA17:
231	case AppleM4:
232	MaxInterleaveFactor = `4`;
233	break;
234	default:
235	break;
236	}
237	break;
238	case ExynosM3:
239	MaxInterleaveFactor = `4`;
240	MaxJumpTableSize = `20`;
241	PrefFunctionAlignment = Align (`32`);
242	PrefLoopAlignment = Align (`16`);
243	break;
244	case Falkor:
245	MaxInterleaveFactor = `4`;
246	// FIXME: remove this to enable 64-bit SLP if performance looks good.
247	MinVectorRegisterBitWidth = `128`;
248	CacheLineSize = `128`;
249	PrefetchDistance = `820`;
250	MinPrefetchStride = `2048`;
251	MaxPrefetchIterationsAhead = `8`;
252	break;
253	case Kryo:
254	MaxInterleaveFactor = `4`;
255	VectorInsertExtractBaseCost = `2`;
256	CacheLineSize = `128`;
257	PrefetchDistance = `740`;
258	MinPrefetchStride = `1024`;
259	MaxPrefetchIterationsAhead = `11`;
260	// FIXME: remove this to enable 64-bit SLP if performance looks good.
261	MinVectorRegisterBitWidth = `128`;
262	break;
263	case NeoverseE1:
264	PrefFunctionAlignment = Align (`8`);
265	break;
266	case NeoverseN1:
267	PrefFunctionAlignment = Align (`16`);
268	PrefLoopAlignment = Align (`32`);
269	MaxBytesForLoopAlignment = `16`;
270	break;
271	case NeoverseV2:
272	case NeoverseV3:
273	EpilogueVectorizationMinVF = `8`;
274	MaxInterleaveFactor = `4`;
275	ScatterOverhead = `13`;
276	LLVM_FALLTHROUGH;
277	case NeoverseN2:
278	case NeoverseN3:
279	PrefFunctionAlignment = Align (`16`);
280	PrefLoopAlignment = Align (`32`);
281	MaxBytesForLoopAlignment = `16`;
282	VScaleForTuning = `1`;
283	break;
284	case NeoverseV1:
285	PrefFunctionAlignment = Align (`16`);
286	PrefLoopAlignment = Align (`32`);
287	MaxBytesForLoopAlignment = `16`;
288	VScaleForTuning = `2`;
289	DefaultSVETFOpts = TailFoldingOpts::Simple;
290	break;
291	case Neoverse512TVB:
292	PrefFunctionAlignment = Align (`16`);
293	VScaleForTuning = `1`;
294	MaxInterleaveFactor = `4`;
295	break;
296	case Saphira:
297	MaxInterleaveFactor = `4`;
298	// FIXME: remove this to enable 64-bit SLP if performance looks good.
299	MinVectorRegisterBitWidth = `128`;
300	break;
301	case ThunderX2T99:
302	CacheLineSize = `64`;
303	PrefFunctionAlignment = Align (`8`);
304	PrefLoopAlignment = Align (`4`);
305	MaxInterleaveFactor = `4`;
306	PrefetchDistance = `128`;
307	MinPrefetchStride = `1024`;
308	MaxPrefetchIterationsAhead = `4`;
309	// FIXME: remove this to enable 64-bit SLP if performance looks good.
310	MinVectorRegisterBitWidth = `128`;
311	break;
312	case ThunderX:
313	case ThunderXT88:
314	case ThunderXT81:
315	case ThunderXT83:
316	CacheLineSize = `128`;
317	PrefFunctionAlignment = Align (`8`);
318	PrefLoopAlignment = Align (`4`);
319	// FIXME: remove this to enable 64-bit SLP if performance looks good.
320	MinVectorRegisterBitWidth = `128`;
321	break;
322	case TSV110:
323	CacheLineSize = `64`;
324	PrefFunctionAlignment = Align (`16`);
325	PrefLoopAlignment = Align (`4`);
326	break;
327	case ThunderX3T110:
328	CacheLineSize = `64`;
329	PrefFunctionAlignment = Align (`16`);
330	PrefLoopAlignment = Align (`4`);
331	MaxInterleaveFactor = `4`;
332	PrefetchDistance = `128`;
333	MinPrefetchStride = `1024`;
334	MaxPrefetchIterationsAhead = `4`;
335	// FIXME: remove this to enable 64-bit SLP if performance looks good.
336	MinVectorRegisterBitWidth = `128`;
337	break;
338	case Ampere1:
339	case Ampere1A:
340	case Ampere1B:
341	CacheLineSize = `64`;
342	PrefFunctionAlignment = Align (`64`);
343	PrefLoopAlignment = Align (`64`);
344	MaxInterleaveFactor = `4`;
345	break;
346	case Oryon:
347	CacheLineSize = `64`;
348	PrefFunctionAlignment = Align (`16`);
349	MaxInterleaveFactor = `4`;
350	PrefetchDistance = `128`;
351	MinPrefetchStride = `1024`;
352	break;
353	case Olympus:
354	EpilogueVectorizationMinVF = `8`;
355	MaxInterleaveFactor = `4`;
356	ScatterOverhead = `13`;
357	PrefFunctionAlignment = Align (`16`);
358	PrefLoopAlignment = Align (`32`);
359	MaxBytesForLoopAlignment = `16`;
360	VScaleForTuning = `1`;
361	break;
362	}
363
364	if (AArch64MinimumJumpTableEntries.getNumOccurrences() > `0` \|\| !HasMinSize)
365	MinimumJumpTableEntries = AArch64MinimumJumpTableEntries;
366	}
367
368	AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
369	StringRef TuneCPU, StringRef FS,
370	const TargetMachine &TM, bool LittleEndian,
371	unsigned MinSVEVectorSizeInBitsOverride,
372	unsigned MaxSVEVectorSizeInBitsOverride,
373	bool IsStreaming, bool IsStreamingCompatible,
374	bool HasMinSize)
375	: AArch64GenSubtargetInfo (TT, CPU, TuneCPU, FS),
376	ReserveXRegister (AArch64::GPR64commonRegClass.getNumRegs()),
377	ReserveXRegisterForRA (AArch64::GPR64commonRegClass.getNumRegs()),
378	CustomCallSavedXRegs (AArch64::GPR64commonRegClass.getNumRegs()),
379	IsLittle(LittleEndian), IsStreaming(IsStreaming),
380	IsStreamingCompatible(IsStreamingCompatible),
381	StreamingHazardSize(
382	AArch64StreamingHazardSize.getNumOccurrences() > `0`
383	? std::optional<unsigned>(AArch64StreamingHazardSize)
384	: std::nullopt),
385	MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
386	MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple (TT),
387	InstrInfo (initializeSubtargetDependencies(FS, CPUString: CPU, TuneCPUString: TuneCPU, HasMinSize)),
388	TLInfo (TM, *this) {
389	if (AArch64::isX18ReservedByDefault(TT))
390	ReserveXRegister.set(`18`);
391
392	CallLoweringInfo.reset(p: new AArch64CallLowering (*getTargetLowering()));
393	InlineAsmLoweringInfo.reset(p: new InlineAsmLowering (getTargetLowering()));
394	Legalizer.reset(p: new AArch64LegalizerInfo (*this));
395
396	auto RBI = new* AArch64RegisterBankInfo (*getRegisterInfo());
397
398	// FIXME: At this point, we can't rely on Subtarget having RBI.
399	// It's awkward to mix passing RBI and the Subtarget; should we pass
400	// TII/TRI as well?
401	InstSelector.reset(p: createAArch64InstructionSelector(
402	*static_cast<const AArch64TargetMachine >(&TM), this, *RBI));
403
404	RegBankInfo.reset(p: RBI);
405
406	auto TRI = getRegisterInfo();
407	StringSet<> ReservedRegNames(llvm::from_range, ReservedRegsForRA);
408	for (unsigned i = `0`; i < `29`; ++i) {
409	if (ReservedRegNames.count(Key: TRI->getName(RegNo: AArch64::X0 + i)))
410	ReserveXRegisterForRA.set(i);
411	}
412	// X30 is named LR, so we can't use TRI->getName to check X30.
413	if (ReservedRegNames.count(Key: "X30") \|\| ReservedRegNames.count(Key: "LR"))
414	ReserveXRegisterForRA.set(`30`);
415	// X29 is named FP, so we can't use TRI->getName to check X29.
416	if (ReservedRegNames.count(Key: "X29") \|\| ReservedRegNames.count(Key: "FP"))
417	ReserveXRegisterForRA.set(`29`);
418
419	EnableSubregLiveness = EnableSubregLivenessTracking.getValue();
420	}
421
422	unsigned AArch64Subtarget::getHwModeSet() const {
423	AArch64HwModeBits Modes = AArch64HwModeBits::DefaultMode;
424
425	// Use a special hardware mode in streaming[-compatible] functions with
426	// aarch64-enable-zpr-predicate-spills. This changes the spill size (and
427	// alignment) for the predicate register class.
428	if (EnableZPRPredicateSpills.getValue() &&
429	(isStreaming() \|\| isStreamingCompatible())) {
430	Modes \|= AArch64HwModeBits::SMEWithZPRPredicateSpills;
431	}
432
433	return to_underlying(E: Modes);
434	}
435
436	const CallLowering AArch64Subtarget::getCallLowering() const* {
437	return CallLoweringInfo.get();
438	}
439
440	const InlineAsmLowering AArch64Subtarget::getInlineAsmLowering() const* {
441	return InlineAsmLoweringInfo.get();
442	}
443
444	InstructionSelector AArch64Subtarget::getInstructionSelector() const* {
445	return InstSelector.get();
446	}
447
448	const LegalizerInfo AArch64Subtarget::getLegalizerInfo() const* {
449	return Legalizer.get();
450	}
451
452	const RegisterBankInfo AArch64Subtarget::getRegBankInfo() const* {
453	return RegBankInfo.get();
454	}
455
456	/// Find the target operand flags that describe how a global value should be
457	/// referenced for the current subtarget.
458	unsigned
459	AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
460	const TargetMachine &TM) const {
461	// MachO large model always goes via a GOT, simply to get a single 8-byte
462	// absolute relocation on all global addresses.
463	if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
464	return AArch64II::MO_GOT;
465
466	// All globals dynamically protected by MTE must have their address tags
467	// synthesized. This is done by having the loader stash the tag in the GOT
468	// entry. Force all tagged globals (even ones with internal linkage) through
469	// the GOT.
470	if (GV->isTagged())
471	return AArch64II::MO_GOT;
472
473	if (!TM.shouldAssumeDSOLocal(GV)) {
474	if (GV->hasDLLImportStorageClass()) {
475	return AArch64II::MO_GOT \| AArch64II::MO_DLLIMPORT;
476	}
477	if (getTargetTriple().isOSWindows())
478	return AArch64II::MO_GOT \| AArch64II::MO_COFFSTUB;
479	return AArch64II::MO_GOT;
480	}
481
482	// The small code model's direct accesses use ADRP, which cannot
483	// necessarily produce the value 0 (if the code is above 4GB).
484	// Same for the tiny code model, where we have a pc relative LDR.
485	if ((useSmallAddressing() \|\| TM.getCodeModel() == CodeModel::Tiny) &&
486	GV->hasExternalWeakLinkage())
487	return AArch64II::MO_GOT;
488
489	// References to tagged globals are marked with MO_NC \| MO_TAGGED to indicate
490	// that their nominal addresses are tagged and outside of the code model. In
491	// AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
492	// tag if necessary based on MO_TAGGED.
493	if (AllowTaggedGlobals && !isa<FunctionType>(Val: GV->getValueType()))
494	return AArch64II::MO_NC \| AArch64II::MO_TAGGED;
495
496	return AArch64II::MO_NO_FLAG;
497	}
498
499	unsigned AArch64Subtarget::classifyGlobalFunctionReference(
500	const GlobalValue GV, const* TargetMachine &TM) const {
501	// MachO large model always goes via a GOT, because we don't have the
502	// relocations available to do anything else..
503	if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
504	!GV->hasInternalLinkage())
505	return AArch64II::MO_GOT;
506
507	// NonLazyBind goes via GOT unless we know it's available locally.
508	auto *F = dyn_cast<Function>(Val: GV);
509	if ((!isTargetMachO() \|\| MachOUseNonLazyBind) && F &&
510	F->hasFnAttribute(Kind: Attribute::NonLazyBind) && !TM.shouldAssumeDSOLocal(GV))
511	return AArch64II::MO_GOT;
512
513	if (getTargetTriple().isOSWindows()) {
514	if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy()) {
515	if (GV->hasDLLImportStorageClass()) {
516	// On Arm64EC, if we're calling a symbol from the import table
517	// directly, use MO_ARM64EC_CALLMANGLE.
518	return AArch64II::MO_GOT \| AArch64II::MO_DLLIMPORT \|
519	AArch64II::MO_ARM64EC_CALLMANGLE;
520	}
521	if (GV->hasExternalLinkage()) {
522	// If we're calling a symbol directly, use the mangled form in the
523	// call instruction.
524	return AArch64II::MO_ARM64EC_CALLMANGLE;
525	}
526	}
527
528	// Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
529	return ClassifyGlobalReference(GV, TM);
530	}
531
532	return AArch64II::MO_NO_FLAG;
533	}
534
535	void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
536	unsigned NumRegionInstrs) const {
537	// LNT run (at least on Cyclone) showed reasonably significant gains for
538	// bi-directional scheduling. 253.perlbmk.
539	Policy.OnlyTopDown = false;
540	Policy.OnlyBottomUp = false;
541	// Enabling or Disabling the latency heuristic is a close call: It seems to
542	// help nearly no benchmark on out-of-order architectures, on the other hand
543	// it regresses register pressure on a few benchmarking.
544	Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
545	}
546
547	void AArch64Subtarget::adjustSchedDependency(
548	SUnit Def, int* DefOpIdx, SUnit Use, int* UseOpIdx, SDep &Dep,
549	const TargetSchedModel SchedModel) const* {
550	if (!SchedModel \|\| Dep.getKind() != SDep::Kind::Data \|\| !Dep.getReg() \|\|
551	!Def->isInstr() \|\| !Use->isInstr() \|\|
552	(Def->getInstr()->getOpcode() != TargetOpcode::BUNDLE &&
553	Use->getInstr()->getOpcode() != TargetOpcode::BUNDLE))
554	return;
555
556	// If the Def is a BUNDLE, find the last instruction in the bundle that defs
557	// the register.
558	const MachineInstr *DefMI = Def->getInstr();
559	if (DefMI->getOpcode() == TargetOpcode::BUNDLE) {
560	Register Reg = DefMI->getOperand(i: DefOpIdx).getReg();
561	for (const auto &Op : const_mi_bundle_ops(MI: *DefMI)) {
562	if (Op.isReg() && Op.isDef() && Op.getReg() == Reg) {
563	DefMI = Op.getParent();
564	DefOpIdx = Op.getOperandNo();
565	}
566	}
567	}
568
569	// If the Use is a BUNDLE, find the first instruction that uses the Reg.
570	const MachineInstr *UseMI = Use->getInstr();
571	if (UseMI->getOpcode() == TargetOpcode::BUNDLE) {
572	Register Reg = UseMI->getOperand(i: UseOpIdx).getReg();
573	for (const auto &Op : const_mi_bundle_ops(MI: *UseMI)) {
574	if (Op.isReg() && Op.isUse() && Op.getReg() == Reg) {
575	UseMI = Op.getParent();
576	UseOpIdx = Op.getOperandNo();
577	break;
578	}
579	}
580	}
581
582	Dep.setLatency(
583	SchedModel->computeOperandLatency(DefMI, DefOperIdx: DefOpIdx, UseMI, UseOperIdx: UseOpIdx));
584	}
585
586	bool AArch64Subtarget::enableEarlyIfConversion() const {
587	return EnableEarlyIfConvert;
588	}
589
590	bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
591	if (!UseAddressTopByteIgnored)
592	return false;
593
594	if (TargetTriple.isDriverKit())
595	return true;
596	if (TargetTriple.isiOS()) {
597	return TargetTriple.getiOSVersion() >= VersionTuple (`8`);
598	}
599
600	return false;
601	}
602
603	std::unique_ptr<PBQPRAConstraint>
604	AArch64Subtarget::getCustomPBQPConstraints() const {
605	return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
606	}
607
608	void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
609	// We usually compute max call frame size after ISel. Do the computation now
610	// if the .mir file didn't specify it. Note that this will probably give you
611	// bogus values after PEI has eliminated the callframe setup/destroy pseudo
612	// instructions, specify explicitly if you need it to be correct.
613	MachineFrameInfo &MFI = MF.getFrameInfo();
614	if (!MFI.isMaxCallFrameSizeComputed())
615	MFI.computeMaxCallFrameSize(MF);
616	}
617
618	bool AArch64Subtarget::useAA() const { return UseAA; }
619
620	bool AArch64Subtarget::useScalarIncVL() const {
621	// If SVE2 or SME is present (we are not SVE-1 only) and UseScalarIncVL
622	// is not otherwise set, enable it by default.
623	if (UseScalarIncVL.getNumOccurrences())
624	return UseScalarIncVL;
625	return hasSVE2() \|\| hasSME();
626	}
627
628	// If return address signing is enabled, tail calls are emitted as follows:
629	//
630	// ```
631	// <authenticate LR>
632	// <check LR>
633	// TCRETURN ; the callee may sign and spill the LR in its prologue
634	// ```
635	//
636	// LR may require explicit checking because if FEAT_FPAC is not implemented
637	// and LR was tampered with, then `<authenticate LR>` will not generate an
638	// exception on its own. Later, if the callee spills the signed LR value and
639	// neither FEAT_PAuth2 nor FEAT_EPAC are implemented, the valid PAC replaces
640	// the higher bits of LR thus hiding the authentication failure.
641	AArch64PAuth::AuthCheckMethod AArch64Subtarget::getAuthenticatedLRCheckMethod(
642	const MachineFunction &MF) const {
643	// TODO: Check subtarget for the scheme. Present variant is a default for
644	// pauthtest ABI.
645	if (MF.getFunction().hasFnAttribute(Kind: "ptrauth-returns") &&
646	MF.getFunction().hasFnAttribute(Kind: "ptrauth-auth-traps"))
647	return AArch64PAuth::AuthCheckMethod::HighBitsNoTBI;
648	if (AuthenticatedLRCheckMethod.getNumOccurrences())
649	return AuthenticatedLRCheckMethod;
650
651	// At now, use None by default because checks may introduce an unexpected
652	// performance regression or incompatibility with execute-only mappings.
653	return AArch64PAuth::AuthCheckMethod::None;
654	}
655
656	std::optional<uint16_t>
657	AArch64Subtarget::getPtrAuthBlockAddressDiscriminatorIfEnabled(
658	const Function &ParentFn) const {
659	if (!ParentFn.hasFnAttribute(Kind: "ptrauth-indirect-gotos"))
660	return std::nullopt;
661	// We currently have one simple mechanism for all targets.
662	// This isn't ABI, so we can always do better in the future.
663	return getPointerAuthStableSipHash(
664	S: (Twine (ParentFn.getName()) + " blockaddress").str());
665	}
666
667	bool AArch64Subtarget::enableMachinePipeliner() const {
668	return getSchedModel().hasInstrSchedModel();
669	}
670

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/AArch64Subtarget.cpp