1//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//
10//===----------------------------------------------------------------------===//
11
12#include "AArch64TargetMachine.h"
13#include "AArch64.h"
14#include "AArch64MachineFunctionInfo.h"
15#include "AArch64MachineScheduler.h"
16#include "AArch64MacroFusion.h"
17#include "AArch64Subtarget.h"
18#include "AArch64TargetObjectFile.h"
19#include "AArch64TargetTransformInfo.h"
20#include "MCTargetDesc/AArch64MCTargetDesc.h"
21#include "TargetInfo/AArch64TargetInfo.h"
22#include "llvm/ADT/StringExtras.h"
23#include "llvm/Analysis/TargetTransformInfo.h"
24#include "llvm/Analysis/ValueTracking.h"
25#include "llvm/CodeGen/CSEConfigBase.h"
26#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
27#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
28#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
29#include "llvm/CodeGen/GlobalISel/Legalizer.h"
30#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
31#include "llvm/CodeGen/GlobalISel/Localizer.h"
32#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
33#include "llvm/CodeGen/MIRParser/MIParser.h"
34#include "llvm/CodeGen/MachineScheduler.h"
35#include "llvm/CodeGen/Passes.h"
36#include "llvm/CodeGen/TargetInstrInfo.h"
37#include "llvm/CodeGen/TargetPassConfig.h"
38#include "llvm/IR/Attributes.h"
39#include "llvm/IR/Function.h"
40#include "llvm/InitializePasses.h"
41#include "llvm/MC/MCAsmInfo.h"
42#include "llvm/MC/MCTargetOptions.h"
43#include "llvm/MC/TargetRegistry.h"
44#include "llvm/Pass.h"
45#include "llvm/Passes/PassBuilder.h"
46#include "llvm/Support/CodeGen.h"
47#include "llvm/Support/CommandLine.h"
48#include "llvm/Support/Compiler.h"
49#include "llvm/Target/TargetLoweringObjectFile.h"
50#include "llvm/Target/TargetOptions.h"
51#include "llvm/TargetParser/Triple.h"
52#include "llvm/Transforms/CFGuard.h"
53#include "llvm/Transforms/Scalar.h"
54#include "llvm/Transforms/Utils/LowerIFunc.h"
55#include "llvm/Transforms/Vectorize/LoopIdiomVectorize.h"
56#include <memory>
57
58using namespace llvm;
59
60static cl::opt<bool> EnableCCMP("aarch64-enable-ccmp",
61 cl::desc("Enable the CCMP formation pass"),
62 cl::init(Val: true), cl::Hidden);
63
64static cl::opt<bool>
65 EnableCondBrTuning("aarch64-enable-cond-br-tune",
66 cl::desc("Enable the conditional branch tuning pass"),
67 cl::init(Val: true), cl::Hidden);
68
69static cl::opt<bool> EnableAArch64CopyPropagation(
70 "aarch64-enable-copy-propagation",
71 cl::desc("Enable the copy propagation with AArch64 copy instr"),
72 cl::init(Val: true), cl::Hidden);
73
74static cl::opt<bool> EnableMCR("aarch64-enable-mcr",
75 cl::desc("Enable the machine combiner pass"),
76 cl::init(Val: true), cl::Hidden);
77
78static cl::opt<bool> EnableStPairSuppress("aarch64-enable-stp-suppress",
79 cl::desc("Suppress STP for AArch64"),
80 cl::init(Val: true), cl::Hidden);
81
82static cl::opt<bool> EnableAdvSIMDScalar(
83 "aarch64-enable-simd-scalar",
84 cl::desc("Enable use of AdvSIMD scalar integer instructions"),
85 cl::init(Val: false), cl::Hidden);
86
87static cl::opt<bool>
88 EnablePromoteConstant("aarch64-enable-promote-const",
89 cl::desc("Enable the promote constant pass"),
90 cl::init(Val: true), cl::Hidden);
91
92static cl::opt<bool> EnableCollectLOH(
93 "aarch64-enable-collect-loh",
94 cl::desc("Enable the pass that emits the linker optimization hints (LOH)"),
95 cl::init(Val: true), cl::Hidden);
96
97static cl::opt<bool>
98 EnableDeadRegisterElimination("aarch64-enable-dead-defs", cl::Hidden,
99 cl::desc("Enable the pass that removes dead"
100 " definitions and replaces stores to"
101 " them with stores to the zero"
102 " register"),
103 cl::init(Val: true));
104
105static cl::opt<bool> EnableRedundantCopyElimination(
106 "aarch64-enable-copyelim",
107 cl::desc("Enable the redundant copy elimination pass"), cl::init(Val: true),
108 cl::Hidden);
109
110static cl::opt<bool> EnableLoadStoreOpt("aarch64-enable-ldst-opt",
111 cl::desc("Enable the load/store pair"
112 " optimization pass"),
113 cl::init(Val: true), cl::Hidden);
114
115static cl::opt<bool> EnableAtomicTidy(
116 "aarch64-enable-atomic-cfg-tidy", cl::Hidden,
117 cl::desc("Run SimplifyCFG after expanding atomic operations"
118 " to make use of cmpxchg flow-based information"),
119 cl::init(Val: true));
120
121static cl::opt<bool>
122EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden,
123 cl::desc("Run early if-conversion"),
124 cl::init(Val: true));
125
126static cl::opt<bool>
127 EnableCondOpt("aarch64-enable-condopt",
128 cl::desc("Enable the condition optimizer pass"),
129 cl::init(Val: true), cl::Hidden);
130
131static cl::opt<bool>
132 EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
133 cl::desc("Enable optimizations on complex GEPs"),
134 cl::init(Val: false));
135
136static cl::opt<bool>
137 EnableSelectOpt("aarch64-select-opt", cl::Hidden,
138 cl::desc("Enable select to branch optimizations"),
139 cl::init(Val: true));
140
141static cl::opt<bool>
142 BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(Val: true),
143 cl::desc("Relax out of range conditional branches"));
144
145static cl::opt<bool> EnableCompressJumpTables(
146 "aarch64-enable-compress-jump-tables", cl::Hidden, cl::init(Val: true),
147 cl::desc("Use smallest entry possible for jump tables"));
148
149// FIXME: Unify control over GlobalMerge.
150static cl::opt<cl::boolOrDefault>
151 EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden,
152 cl::desc("Enable the global merge pass"));
153
154static cl::opt<bool>
155 EnableLoopDataPrefetch("aarch64-enable-loop-data-prefetch", cl::Hidden,
156 cl::desc("Enable the loop data prefetch pass"),
157 cl::init(Val: true));
158
159static cl::opt<int> EnableGlobalISelAtO(
160 "aarch64-enable-global-isel-at-O", cl::Hidden,
161 cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
162 cl::init(Val: 0));
163
164static cl::opt<bool>
165 EnableSVEIntrinsicOpts("aarch64-enable-sve-intrinsic-opts", cl::Hidden,
166 cl::desc("Enable SVE intrinsic opts"),
167 cl::init(Val: true));
168
169static cl::opt<bool>
170 EnableSMEPeepholeOpt("enable-aarch64-sme-peephole-opt", cl::init(Val: true),
171 cl::Hidden,
172 cl::desc("Perform SME peephole optimization"));
173
174static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
175 cl::init(Val: true), cl::Hidden);
176
177static cl::opt<bool>
178 EnableBranchTargets("aarch64-enable-branch-targets", cl::Hidden,
179 cl::desc("Enable the AArch64 branch target pass"),
180 cl::init(Val: true));
181
182static cl::opt<unsigned> SVEVectorBitsMaxOpt(
183 "aarch64-sve-vector-bits-max",
184 cl::desc("Assume SVE vector registers are at most this big, "
185 "with zero meaning no maximum size is assumed."),
186 cl::init(Val: 0), cl::Hidden);
187
188static cl::opt<unsigned> SVEVectorBitsMinOpt(
189 "aarch64-sve-vector-bits-min",
190 cl::desc("Assume SVE vector registers are at least this big, "
191 "with zero meaning no minimum size is assumed."),
192 cl::init(Val: 0), cl::Hidden);
193
194static cl::opt<bool> ForceStreaming(
195 "force-streaming",
196 cl::desc("Force the use of streaming code for all functions"),
197 cl::init(Val: false), cl::Hidden);
198
199static cl::opt<bool> ForceStreamingCompatible(
200 "force-streaming-compatible",
201 cl::desc("Force the use of streaming-compatible code for all functions"),
202 cl::init(Val: false), cl::Hidden);
203
204extern cl::opt<bool> EnableHomogeneousPrologEpilog;
205
206static cl::opt<bool> EnableGISelLoadStoreOptPreLegal(
207 "aarch64-enable-gisel-ldst-prelegal",
208 cl::desc("Enable GlobalISel's pre-legalizer load/store optimization pass"),
209 cl::init(Val: true), cl::Hidden);
210
211static cl::opt<bool> EnableGISelLoadStoreOptPostLegal(
212 "aarch64-enable-gisel-ldst-postlegal",
213 cl::desc("Enable GlobalISel's post-legalizer load/store optimization pass"),
214 cl::init(Val: false), cl::Hidden);
215
216static cl::opt<bool>
217 EnableSinkFold("aarch64-enable-sink-fold",
218 cl::desc("Enable sinking and folding of instruction copies"),
219 cl::init(Val: true), cl::Hidden);
220
221static cl::opt<bool>
222 EnableMachinePipeliner("aarch64-enable-pipeliner",
223 cl::desc("Enable Machine Pipeliner for AArch64"),
224 cl::init(Val: false), cl::Hidden);
225
226static cl::opt<bool> EnableSRLTSubregToRegMitigation(
227 "aarch64-srlt-mitigate-sr2r",
228 cl::desc("Enable SUBREG_TO_REG mitigation by adding 'implicit-def' for "
229 "super-regs when using Subreg Liveness Tracking"),
230 cl::init(Val: true), cl::Hidden);
231
232static cl::opt<bool> EnableSVEShuffleOpt(
233 "aarch64-enable-sve-shuffle-opts",
234 cl::desc("Enable pattern matching of shuffles that could make use of SVE "
235 "instructions like tbl or the bottom/top variants"),
236 cl::init(Val: true), cl::Hidden);
237
238extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
239LLVMInitializeAArch64Target() {
240 // Register the target.
241 RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
242 RegisterTargetMachine<AArch64beTargetMachine> Y(getTheAArch64beTarget());
243 RegisterTargetMachine<AArch64leTargetMachine> Z(getTheARM64Target());
244 RegisterTargetMachine<AArch64leTargetMachine> W(getTheARM64_32Target());
245 RegisterTargetMachine<AArch64leTargetMachine> V(getTheAArch64_32Target());
246 auto &PR = *PassRegistry::getPassRegistry();
247 initializeGlobalISel(PR);
248 initializeAArch64A53Fix835769LegacyPass(PR);
249 initializeAArch64A57FPLoadBalancingLegacyPass(PR);
250 initializeAArch64CodeLayoutOptPass(PR);
251 initializeAArch64AdvSIMDScalarLegacyPass(PR);
252 initializeAArch64AsmPrinterPass(PR);
253 initializeAArch64BranchTargetsLegacyPass(PR);
254 initializeAArch64CollectLOHLegacyPass(PR);
255 initializeAArch64CompressJumpTablesLegacyPass(PR);
256 initializeAArch64ConditionalComparesLegacyPass(PR);
257 initializeAArch64ConditionOptimizerLegacyPass(PR);
258 initializeAArch64DeadRegisterDefinitionsLegacyPass(PR);
259 initializeAArch64ExpandPseudoLegacyPass(PR);
260 initializeAArch64LoadStoreOptLegacyPass(PR);
261 initializeAArch64MIPeepholeOptLegacyPass(PR);
262 initializeAArch64SIMDInstrOptLegacyPass(PR);
263 initializeAArch64O0PreLegalizerCombinerLegacyPass(PR);
264 initializeAArch64PreLegalizerCombinerLegacyPass(PR);
265 initializeAArch64PointerAuthLegacyPass(PR);
266 initializeAArch64PostCoalescerLegacyPass(PR);
267 initializeAArch64PostLegalizerCombinerLegacyPass(PR);
268 initializeAArch64PostSelectOptimizeLegacyPass(PR);
269 initializeAArch64PostLegalizerLoweringLegacyPass(PR);
270 initializeAArch64PromoteConstantPass(PR);
271 initializeAArch64RedundantCopyEliminationLegacyPass(PR);
272 initializeAArch64RedundantCondBranchLegacyPass(PR);
273 initializeAArch64StorePairSuppressPass(PR);
274 initializeFalkorHWPFFixPass(PR);
275 initializeFalkorMarkStridedAccessesLegacyPass(PR);
276 initializeLDTLSCleanupPass(PR);
277 initializeMachineKCFILegacyPass(PR);
278 initializeMachineSMEABIPass(PR);
279 initializeAArch64SRLTDefineSuperRegsLegacyPass(PR);
280 initializeSMEPeepholeOptPass(PR);
281 initializeSVEIntrinsicOptsPass(PR);
282 initializeAArch64SpeculationHardeningPass(PR);
283 initializeAArch64SLSHardeningLegacyPass(PR);
284 initializeAArch64StackTaggingPass(PR);
285 initializeAArch64StackTaggingPreRALegacyPass(PR);
286 initializeAArch64LowerHomogeneousPrologEpilogLegacyPass(PR);
287 initializeAArch64DAGToDAGISelLegacyPass(PR);
288 initializeAArch64CondBrTuningPass(PR);
289 initializeAArch64Arm64ECCallLoweringPass(PR);
290 initializeSVEShuffleOptsPass(PR);
291}
292
293bool AArch64TargetMachine::isGlobalISelOptNone() const {
294 const bool GlobalISelFlag = getCGPassBuilderOption().EnableGlobalISelOption ==
295 cl::boolOrDefault::BOU_TRUE;
296
297 return getOptLevel() == CodeGenOptLevel::None ||
298 (static_cast<unsigned>(getOptLevel()) >
299 static_cast<unsigned>(EnableGlobalISelAtO) &&
300 !GlobalISelFlag);
301}
302
303void AArch64TargetMachine::reset() { SubtargetMap.clear(); }
304
305//===----------------------------------------------------------------------===//
306// AArch64 Lowering public interface.
307//===----------------------------------------------------------------------===//
308static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
309 if (TT.isOSBinFormatMachO())
310 return std::make_unique<AArch64_MachoTargetObjectFile>();
311 if (TT.isOSBinFormatCOFF())
312 return std::make_unique<AArch64_COFFTargetObjectFile>();
313
314 return std::make_unique<AArch64_ELFTargetObjectFile>();
315}
316
317static StringRef computeDefaultCPU(const Triple &TT, StringRef CPU) {
318 if (CPU.empty() && TT.isArm64e())
319 return "apple-a12";
320 return CPU;
321}
322
323static Reloc::Model getEffectiveRelocModel(const Triple &TT,
324 std::optional<Reloc::Model> RM) {
325 // AArch64 Darwin and Windows are always PIC.
326 if (TT.isOSDarwin() || TT.isOSWindows())
327 return Reloc::PIC_;
328 // On ELF platforms the default static relocation model has a smart enough
329 // linker to cope with referencing external symbols defined in a shared
330 // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
331 if (!RM || *RM == Reloc::DynamicNoPIC)
332 return Reloc::Static;
333 return *RM;
334}
335
336static CodeModel::Model
337getEffectiveAArch64CodeModel(const Triple &TT,
338 std::optional<CodeModel::Model> CM, bool JIT) {
339 if (CM) {
340 if (*CM != CodeModel::Small && *CM != CodeModel::Tiny &&
341 *CM != CodeModel::Large) {
342 report_fatal_error(
343 reason: "Only small, tiny and large code models are allowed on AArch64");
344 } else if (*CM == CodeModel::Tiny && !TT.isOSBinFormatELF()) {
345 report_fatal_error(reason: "tiny code model is only supported on ELF");
346 }
347 return *CM;
348 }
349 // The default MCJIT memory managers make no guarantees about where they can
350 // find an executable page; JITed code needs to be able to refer to globals
351 // no matter how far away they are.
352 // We should set the CodeModel::Small for Windows ARM64 in JIT mode,
353 // since with large code model LLVM generating 4 MOV instructions, and
354 // Windows doesn't support relocating these long branch (4 MOVs).
355 if (JIT && !TT.isOSWindows())
356 return CodeModel::Large;
357 return CodeModel::Small;
358}
359
360/// Create an AArch64 architecture model.
361///
362AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
363 StringRef CPU, StringRef FS,
364 const TargetOptions &Options,
365 std::optional<Reloc::Model> RM,
366 std::optional<CodeModel::Model> CM,
367 CodeGenOptLevel OL, bool JIT,
368 bool LittleEndian)
369 : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT,
370 computeDefaultCPU(TT, CPU), FS, Options,
371 getEffectiveRelocModel(TT, RM),
372 getEffectiveAArch64CodeModel(TT, CM, JIT), OL),
373 TLOF(createTLOF(TT: getTargetTriple())), isLittle(LittleEndian) {
374 initAsmInfo();
375
376 if (TT.isOSBinFormatMachO()) {
377 this->Options.TrapUnreachable = true;
378 this->Options.NoTrapAfterNoreturn = true;
379 }
380
381 if (getMCAsmInfo().usesWindowsCFI()) {
382 // Unwinding can get confused if the last instruction in an
383 // exception-handling region (function, funclet, try block, etc.)
384 // is a call.
385 //
386 // FIXME: We could elide the trap if the next instruction would be in
387 // the same region anyway.
388 this->Options.TrapUnreachable = true;
389 }
390
391 if (this->Options.TLSSize == 0) // default
392 this->Options.TLSSize = 24;
393 if ((getCodeModel() == CodeModel::Small ||
394 getCodeModel() == CodeModel::Kernel) &&
395 this->Options.TLSSize > 32)
396 // for the small (and kernel) code model, the maximum TLS size is 4GiB
397 this->Options.TLSSize = 32;
398 else if (getCodeModel() == CodeModel::Tiny && this->Options.TLSSize > 24)
399 // for the tiny code model, the maximum TLS size is 1MiB (< 16MiB)
400 this->Options.TLSSize = 24;
401
402 const bool TargetSupportsGISel =
403 TT.getArch() != Triple::aarch64_32 &&
404 TT.getEnvironment() != Triple::GNUILP32 &&
405 !(getCodeModel() == CodeModel::Large && TT.isOSBinFormatMachO());
406
407 const bool GlobalISelFlag = getCGPassBuilderOption().EnableGlobalISelOption ==
408 cl::boolOrDefault::BOU_TRUE;
409
410 // Enable GlobalISel at or below EnableGlobalISelAt0, unless this is
411 // MachO/CodeModel::Large, which GlobalISel does not support.
412 if (TargetSupportsGISel && EnableGlobalISelAtO != -1 &&
413 (static_cast<int>(getOptLevel()) <= EnableGlobalISelAtO ||
414 (!GlobalISelFlag && !Options.EnableGlobalISel))) {
415 setGlobalISel(true);
416 setGlobalISelAbort(GlobalISelAbortMode::Disable);
417 }
418
419 LLT::setUseExtended(true);
420
421 // AArch64 supports the MachineOutliner.
422 setMachineOutliner(true);
423
424 // AArch64 supports default outlining behaviour.
425 setSupportsDefaultOutlining(true);
426
427 // AArch64 supports the debug entry values.
428 setSupportsDebugEntryValues(true);
429
430 // AArch64 supports fixing up the DWARF unwind information.
431 if (!getMCAsmInfo().usesWindowsCFI())
432 setCFIFixup(true);
433}
434
435unsigned AArch64TargetMachine::getEnableGlobalISelAtO() const {
436 return EnableGlobalISelAtO;
437}
438
439AArch64TargetMachine::~AArch64TargetMachine() = default;
440
441const AArch64Subtarget *
442AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
443 Attribute CPUAttr = F.getFnAttribute(Kind: "target-cpu");
444 Attribute TuneAttr = F.getFnAttribute(Kind: "tune-cpu");
445 Attribute FSAttr = F.getFnAttribute(Kind: "target-features");
446
447 StringRef CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString() : TargetCPU;
448 StringRef TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString() : CPU;
449 StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : TargetFS;
450 bool HasMinSize = F.hasMinSize();
451
452 bool IsStreaming = ForceStreaming ||
453 F.hasFnAttribute(Kind: "aarch64_pstate_sm_enabled") ||
454 F.hasFnAttribute(Kind: "aarch64_pstate_sm_body");
455 bool IsStreamingCompatible = ForceStreamingCompatible ||
456 F.hasFnAttribute(Kind: "aarch64_pstate_sm_compatible");
457
458 unsigned MinSVEVectorSize = 0;
459 unsigned MaxSVEVectorSize = 0;
460 if (F.hasFnAttribute(Kind: Attribute::VScaleRange)) {
461 ConstantRange CR = getVScaleRange(F: &F, BitWidth: 64);
462 MinSVEVectorSize = CR.getUnsignedMin().getZExtValue() * 128;
463 MaxSVEVectorSize = CR.getUnsignedMax().getZExtValue() * 128;
464 } else {
465 MinSVEVectorSize = SVEVectorBitsMinOpt;
466 MaxSVEVectorSize = SVEVectorBitsMaxOpt;
467 }
468
469 assert(MinSVEVectorSize % 128 == 0 &&
470 "SVE requires vector length in multiples of 128!");
471 assert(MaxSVEVectorSize % 128 == 0 &&
472 "SVE requires vector length in multiples of 128!");
473 assert((MaxSVEVectorSize >= MinSVEVectorSize || MaxSVEVectorSize == 0) &&
474 "Minimum SVE vector size should not be larger than its maximum!");
475
476 // Sanitize user input in case of no asserts
477 if (MaxSVEVectorSize != 0) {
478 MinSVEVectorSize = std::min(a: MinSVEVectorSize, b: MaxSVEVectorSize);
479 MaxSVEVectorSize = std::max(a: MinSVEVectorSize, b: MaxSVEVectorSize);
480 }
481
482 SmallString<512> Key;
483 // This lookup is hot during repeated TTI queries, so build the key directly
484 // instead of formatting through raw_svector_ostream.
485 Key += "SVEMin";
486 Key += utostr(X: MinSVEVectorSize);
487 Key += "SVEMax";
488 Key += utostr(X: MaxSVEVectorSize);
489 Key += "IsStreaming=";
490 Key += utostr(X: IsStreaming);
491 Key += "IsStreamingCompatible=";
492 Key += utostr(X: IsStreamingCompatible);
493 Key += CPU;
494 Key += TuneCPU;
495 Key += FS;
496 Key += "HasMinSize=";
497 Key += utostr(X: HasMinSize);
498
499 auto &I = SubtargetMap[Key];
500 if (!I) {
501 I = std::make_unique<AArch64Subtarget>(
502 args: TargetTriple, args&: CPU, args&: TuneCPU, args&: FS, args: *this, args: isLittle, args&: MinSVEVectorSize,
503 args&: MaxSVEVectorSize, args&: IsStreaming, args&: IsStreamingCompatible, args&: HasMinSize,
504 args&: EnableSRLTSubregToRegMitigation);
505 }
506
507 if (IsStreaming && !I->hasSME())
508 reportFatalUsageError(reason: "streaming SVE functions require SME");
509
510 return I.get();
511}
512
513ScheduleDAGInstrs *
514AArch64TargetMachine::createMachineScheduler(MachineSchedContext *C) const {
515 const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
516 ScheduleDAGMILive *DAG = createSchedLive(C);
517 DAG->addMutation(Mutation: createLoadClusterDAGMutation(TII: DAG->TII, TRI: DAG->TRI));
518 DAG->addMutation(Mutation: createStoreClusterDAGMutation(TII: DAG->TII, TRI: DAG->TRI));
519 if (ST.hasFusion())
520 DAG->addMutation(Mutation: createAArch64MacroFusionDAGMutation());
521 return DAG;
522}
523
524ScheduleDAGInstrs *
525AArch64TargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
526 const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
527 ScheduleDAGMI *DAG = createSchedPostRA<AArch64PostRASchedStrategy>(C);
528 if (ST.hasFusion()) {
529 // Run the Macro Fusion after RA again since literals are expanded from
530 // pseudos then (v. addPreSched2()).
531 DAG->addMutation(Mutation: createAArch64MacroFusionDAGMutation());
532 return DAG;
533 }
534
535 return DAG;
536}
537
538size_t AArch64TargetMachine::clearLinkerOptimizationHints(
539 const SmallPtrSetImpl<MachineInstr *> &MIs) const {
540 if (MIs.empty())
541 return 0;
542 auto *MI = *MIs.begin();
543 auto *FuncInfo = MI->getMF()->getInfo<AArch64FunctionInfo>();
544 return FuncInfo->clearLinkerOptimizationHints(MIs);
545}
546
547void AArch64leTargetMachine::anchor() { }
548
549AArch64leTargetMachine::AArch64leTargetMachine(
550 const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
551 const TargetOptions &Options, std::optional<Reloc::Model> RM,
552 std::optional<CodeModel::Model> CM, CodeGenOptLevel OL, bool JIT)
553 : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {}
554
555void AArch64beTargetMachine::anchor() { }
556
557AArch64beTargetMachine::AArch64beTargetMachine(
558 const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
559 const TargetOptions &Options, std::optional<Reloc::Model> RM,
560 std::optional<CodeModel::Model> CM, CodeGenOptLevel OL, bool JIT)
561 : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {}
562
563namespace {
564
565/// AArch64 Code Generator Pass Configuration Options.
566class AArch64PassConfig : public TargetPassConfig {
567public:
568 AArch64PassConfig(AArch64TargetMachine &TM, PassManagerBase &PM)
569 : TargetPassConfig(TM, PM) {
570 if (TM.getOptLevel() != CodeGenOptLevel::None)
571 substitutePass(StandardID: &PostRASchedulerID, TargetID: &PostMachineSchedulerID);
572 setEnableSinkAndFold(EnableSinkFold);
573 }
574
575 AArch64TargetMachine &getAArch64TargetMachine() const {
576 return getTM<AArch64TargetMachine>();
577 }
578
579 void addIRPasses() override;
580 bool addPreISel() override;
581 void addCodeGenPrepare() override;
582 bool addInstSelector() override;
583 bool addIRTranslator() override;
584 void addPreLegalizeMachineIR() override;
585 bool addLegalizeMachineIR() override;
586 void addPreRegBankSelect() override;
587 bool addRegBankSelect() override;
588 bool addGlobalInstructionSelect() override;
589 void addMachineSSAOptimization() override;
590 bool addILPOpts() override;
591 void addPreRegAlloc() override;
592 void addPostRewrite() override;
593 void addPostRegAlloc() override;
594 void addPreSched2() override;
595 void addPreEmitPass() override;
596 void addPostBBSections() override;
597 void addPreEmitPass2() override;
598 bool addRegAssignAndRewriteOptimized() override;
599
600 std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
601};
602
603} // end anonymous namespace
604
605void AArch64TargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
606#define GET_PASS_REGISTRY "AArch64PassRegistry.def"
607#include "llvm/Passes/TargetPassRegistry.inc"
608
609 PB.registerLateLoopOptimizationsEPCallback(
610 C: [=](LoopPassManager &LPM, OptimizationLevel Level) {
611 if (Level != OptimizationLevel::O0)
612 LPM.addPass(Pass: LoopIdiomVectorizePass());
613 });
614 if (getTargetTriple().isOSWindows())
615 PB.registerPipelineEarlySimplificationEPCallback(
616 C: [](ModulePassManager &PM, OptimizationLevel, ThinOrFullLTOPhase) {
617 PM.addPass(Pass: LowerIFuncPass());
618 });
619}
620
621TargetTransformInfo
622AArch64TargetMachine::getTargetTransformInfo(const Function &F) const {
623 return TargetTransformInfo(std::make_unique<AArch64TTIImpl>(args: this, args: F));
624}
625
626TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
627 return new AArch64PassConfig(*this, PM);
628}
629
630std::unique_ptr<CSEConfigBase> AArch64PassConfig::getCSEConfig() const {
631 return getStandardCSEConfigForOpt(Level: TM->getOptLevel());
632}
633
634void AArch64PassConfig::addIRPasses() {
635 // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
636 // ourselves.
637 addPass(P: createAtomicExpandLegacyPass());
638
639 // Expand any SVE vector library calls that we can't code generate directly.
640 if (EnableSVEIntrinsicOpts &&
641 TM->getOptLevel() != CodeGenOptLevel::None)
642 addPass(P: createSVEIntrinsicOptsPass());
643
644 // Cmpxchg instructions are often used with a subsequent comparison to
645 // determine whether it succeeded. We can exploit existing control-flow in
646 // ldrex/strex loops to simplify this, but it needs tidying up.
647 if (TM->getOptLevel() != CodeGenOptLevel::None && EnableAtomicTidy)
648 addPass(P: createCFGSimplificationPass(Options: SimplifyCFGOptions()
649 .forwardSwitchCondToPhi(B: true)
650 .convertSwitchRangeToICmp(B: true)
651 .convertSwitchToLookupTable(B: true)
652 .needCanonicalLoops(B: false)
653 .hoistCommonInsts(B: true)
654 .sinkCommonInsts(B: true)));
655
656 // Run LoopDataPrefetch
657 //
658 // Run this before LSR to remove the multiplies involved in computing the
659 // pointer values N iterations ahead.
660 if (TM->getOptLevel() != CodeGenOptLevel::None) {
661 if (EnableLoopDataPrefetch)
662 addPass(P: createLoopDataPrefetchPass());
663 if (EnableFalkorHWPFFix)
664 addPass(P: createFalkorMarkStridedAccessesPass());
665 }
666
667 if (EnableGEPOpt) {
668 // Call SeparateConstOffsetFromGEP pass to extract constants within indices
669 // and lower a GEP with multiple indices to either arithmetic operations or
670 // multiple GEPs with single index.
671 addPass(P: createSeparateConstOffsetFromGEPPass(LowerGEP: true));
672 // Call EarlyCSE pass to find and remove subexpressions in the lowered
673 // result.
674 addPass(P: createEarlyCSEPass());
675 // Do loop invariant code motion in case part of the lowered result is
676 // invariant.
677 addPass(P: createLICMPass());
678 }
679
680 TargetPassConfig::addIRPasses();
681
682 if (getOptLevel() == CodeGenOptLevel::Aggressive && EnableSelectOpt)
683 addPass(P: createSelectOptimizePass());
684
685 addPass(P: createAArch64StackTaggingPass(
686 /*IsOptNone=*/TM->getOptLevel() == CodeGenOptLevel::None));
687
688 // Try to use tbl in place of other shuffling operations if doing so would
689 // reduce the total number of instructions. Shuffle masks for big endian may
690 // be different, so require a little endian target.
691 if (TM->createDataLayout().isLittleEndian() &&
692 getOptLevel() >= CodeGenOptLevel::Default && EnableSVEShuffleOpt)
693 addPass(P: createSVEShuffleOptsPass());
694
695 // Match complex arithmetic patterns
696 if (TM->getOptLevel() >= CodeGenOptLevel::Default)
697 addPass(P: createComplexDeinterleavingPass(TM));
698
699 // Match interleaved memory accesses to ldN/stN intrinsics.
700 if (TM->getOptLevel() != CodeGenOptLevel::None) {
701 addPass(P: createInterleavedLoadCombinePass());
702 addPass(P: createInterleavedAccessPass());
703 }
704
705 // Add Control Flow Guard checks.
706 if (TM->getTargetTriple().isOSWindows()) {
707 if (TM->getTargetTriple().isWindowsArm64EC())
708 addPass(P: createAArch64Arm64ECCallLoweringPass());
709 else
710 addPass(P: createCFGuardPass());
711 }
712
713 if (TM->Options.JMCInstrument)
714 addPass(P: createJMCInstrumenterPass());
715}
716
717// Pass Pipeline Configuration
718bool AArch64PassConfig::addPreISel() {
719 // Run promote constant before global merge, so that the promoted constants
720 // get a chance to be merged
721 if (TM->getOptLevel() != CodeGenOptLevel::None && EnablePromoteConstant)
722 addPass(P: createAArch64PromoteConstantPass());
723 // FIXME: On AArch64, this depends on the type.
724 // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
725 // and the offset has to be a multiple of the related size in bytes.
726 if ((TM->getOptLevel() != CodeGenOptLevel::None &&
727 EnableGlobalMerge == cl::boolOrDefault::BOU_UNSET) ||
728 EnableGlobalMerge == cl::boolOrDefault::BOU_TRUE) {
729 bool OnlyOptimizeForSize =
730 (TM->getOptLevel() < CodeGenOptLevel::Aggressive) &&
731 (EnableGlobalMerge == cl::boolOrDefault::BOU_UNSET);
732
733 // Merging of extern globals is enabled by default on non-Mach-O as we
734 // expect it to be generally either beneficial or harmless. On Mach-O it
735 // is disabled as we emit the .subsections_via_symbols directive which
736 // means that merging extern globals is not safe.
737 bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();
738 addPass(P: createGlobalMergePass(TM, MaximalOffset: 4095, OnlyOptimizeForSize,
739 MergeExternalByDefault));
740 }
741
742 return false;
743}
744
745void AArch64PassConfig::addCodeGenPrepare() {
746 if (getOptLevel() != CodeGenOptLevel::None)
747 addPass(P: createTypePromotionLegacyPass());
748 TargetPassConfig::addCodeGenPrepare();
749}
750
751bool AArch64PassConfig::addInstSelector() {
752 addPass(P: createAArch64ISelDag(TM&: getAArch64TargetMachine(), OptLevel: getOptLevel()));
753
754 // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
755 // references to _TLS_MODULE_BASE_ as possible.
756 if (TM->getTargetTriple().isOSBinFormatELF() &&
757 getOptLevel() != CodeGenOptLevel::None)
758 addPass(P: createAArch64CleanupLocalDynamicTLSPass());
759
760 return false;
761}
762
763bool AArch64PassConfig::addIRTranslator() {
764 addPass(P: new IRTranslator(getOptLevel()));
765 return false;
766}
767
768void AArch64PassConfig::addPreLegalizeMachineIR() {
769 if (getAArch64TargetMachine().isGlobalISelOptNone()) {
770 addPass(P: createAArch64O0PreLegalizerCombiner());
771 addPass(P: new Localizer());
772 } else {
773 addPass(P: createAArch64PreLegalizerCombiner());
774 addPass(P: new Localizer());
775 if (EnableGISelLoadStoreOptPreLegal)
776 addPass(P: new LoadStoreOpt());
777 }
778}
779
780bool AArch64PassConfig::addLegalizeMachineIR() {
781 addPass(P: new Legalizer());
782 return false;
783}
784
785void AArch64PassConfig::addPreRegBankSelect() {
786 const bool IsGlobalISelOptNone =
787 getAArch64TargetMachine().isGlobalISelOptNone();
788 if (!IsGlobalISelOptNone) {
789 addPass(P: createAArch64PostLegalizerCombinerLegacy(IsOptNone: IsGlobalISelOptNone));
790 if (EnableGISelLoadStoreOptPostLegal)
791 addPass(P: new LoadStoreOpt());
792 }
793 addPass(P: createAArch64PostLegalizerLowering());
794}
795
796bool AArch64PassConfig::addRegBankSelect() {
797 addPass(P: new RegBankSelect());
798 return false;
799}
800
801bool AArch64PassConfig::addGlobalInstructionSelect() {
802 addPass(P: new InstructionSelect(getOptLevel()));
803 if (!getAArch64TargetMachine().isGlobalISelOptNone())
804 addPass(P: createAArch64PostSelectOptimize());
805 return false;
806}
807
808void AArch64PassConfig::addMachineSSAOptimization() {
809 if (TM->getOptLevel() != CodeGenOptLevel::None)
810 addPass(P: createMachineSMEABIPass(TM->getOptLevel()));
811
812 if (TM->getOptLevel() != CodeGenOptLevel::None && EnableSMEPeepholeOpt)
813 addPass(P: createSMEPeepholeOptPass());
814
815 // Run default MachineSSAOptimization first.
816 TargetPassConfig::addMachineSSAOptimization();
817
818 if (TM->getOptLevel() != CodeGenOptLevel::None)
819 addPass(P: createAArch64MIPeepholeOptLegacyPass());
820}
821
822bool AArch64PassConfig::addILPOpts() {
823 if (EnableCondOpt)
824 addPass(P: createAArch64ConditionOptimizerLegacyPass());
825 if (EnableCCMP)
826 addPass(P: createAArch64ConditionalCompares());
827 if (EnableMCR)
828 addPass(PassID: &MachineCombinerID);
829 if (EnableCondBrTuning)
830 addPass(P: createAArch64CondBrTuning());
831 if (EnableEarlyIfConversion)
832 addPass(PassID: &EarlyIfConverterLegacyID);
833 if (EnableStPairSuppress)
834 addPass(P: createAArch64StorePairSuppressPass());
835 addPass(P: createAArch64SIMDInstrOptPass());
836 if (TM->getOptLevel() != CodeGenOptLevel::None)
837 addPass(P: createAArch64StackTaggingPreRALegacyPass());
838 return true;
839}
840
841void AArch64PassConfig::addPreRegAlloc() {
842 if (TM->getOptLevel() == CodeGenOptLevel::None)
843 addPass(P: createMachineSMEABIPass(CodeGenOptLevel::None));
844
845 // Change dead register definitions to refer to the zero register.
846 if (TM->getOptLevel() != CodeGenOptLevel::None &&
847 EnableDeadRegisterElimination)
848 addPass(P: createAArch64DeadRegisterDefinitions());
849
850 // Use AdvSIMD scalar instructions whenever profitable.
851 if (TM->getOptLevel() != CodeGenOptLevel::None && EnableAdvSIMDScalar) {
852 addPass(P: createAArch64AdvSIMDScalar());
853 // The AdvSIMD pass may produce copies that can be rewritten to
854 // be register coalescer friendly.
855 addPass(PassID: &PeepholeOptimizerLegacyID);
856 }
857 if (TM->getOptLevel() != CodeGenOptLevel::None && EnableMachinePipeliner)
858 addPass(PassID: &MachinePipelinerID);
859}
860
861void AArch64PassConfig::addPostRewrite() {
862 if (EnableSRLTSubregToRegMitigation)
863 addPass(P: createAArch64SRLTDefineSuperRegsLegacyPass());
864}
865
866void AArch64PassConfig::addPostRegAlloc() {
867 // Remove redundant copy instructions.
868 if (TM->getOptLevel() != CodeGenOptLevel::None &&
869 EnableRedundantCopyElimination)
870 addPass(P: createAArch64RedundantCopyEliminationPass());
871
872 if (TM->getOptLevel() != CodeGenOptLevel::None && usingDefaultRegAlloc())
873 // Improve performance for some FP/SIMD code for A57.
874 addPass(P: createAArch64A57FPLoadBalancingLegacyPass());
875}
876
877void AArch64PassConfig::addPreSched2() {
878 // Lower homogeneous frame instructions
879 if (EnableHomogeneousPrologEpilog)
880 addPass(P: createAArch64LowerHomogeneousPrologEpilogPass());
881 // Expand some pseudo instructions to allow proper scheduling.
882 addPass(P: createAArch64ExpandPseudoLegacyPass());
883 // Use load/store pair instructions when possible.
884 if (TM->getOptLevel() != CodeGenOptLevel::None) {
885 if (EnableLoadStoreOpt)
886 addPass(P: createAArch64LoadStoreOptLegacyPass());
887 }
888 // Emit KCFI checks for indirect calls.
889 addPass(P: createKCFIPass());
890
891 // The AArch64SpeculationHardeningPass destroys dominator tree and natural
892 // loop info, which is needed for the FalkorHWPFFixPass and also later on.
893 // Therefore, run the AArch64SpeculationHardeningPass before the
894 // FalkorHWPFFixPass to avoid recomputing dominator tree and natural loop
895 // info.
896 addPass(P: createAArch64SpeculationHardeningPass());
897
898 if (TM->getOptLevel() != CodeGenOptLevel::None) {
899 if (EnableFalkorHWPFFix)
900 addPass(P: createFalkorHWPFFixPass());
901 }
902}
903
904void AArch64PassConfig::addPreEmitPass() {
905 // Machine Block Placement might have created new opportunities when run
906 // at O3, where the Tail Duplication Threshold is set to 4 instructions.
907 // Run the load/store optimizer once more.
908 if (TM->getOptLevel() >= CodeGenOptLevel::Aggressive && EnableLoadStoreOpt)
909 addPass(P: createAArch64LoadStoreOptLegacyPass());
910
911 if (TM->getOptLevel() >= CodeGenOptLevel::Aggressive &&
912 EnableAArch64CopyPropagation)
913 addPass(P: createMachineCopyPropagationPass(UseCopyInstr: true));
914 if (TM->getOptLevel() != CodeGenOptLevel::None)
915 addPass(P: createAArch64RedundantCondBranchPass());
916
917 addPass(P: createAArch64A53Fix835769LegacyPass());
918
919 if (TM->getTargetTriple().isOSWindows()) {
920 // Identify valid longjmp targets for Windows Control Flow Guard.
921 addPass(P: createCFGuardLongjmpPass());
922 // Identify valid eh continuation targets for Windows EHCont Guard.
923 addPass(P: createEHContGuardTargetsPass());
924 }
925
926 if (TM->getOptLevel() != CodeGenOptLevel::None && EnableCollectLOH &&
927 TM->getTargetTriple().isOSBinFormatMachO())
928 addPass(P: createAArch64CollectLOHPass());
929
930 // Apply code layout optimizations. Run late so detection reflects the
931 // final MI stream.
932 if (getOptLevel() != CodeGenOptLevel::None)
933 addPass(P: createAArch64CodeLayoutOptPass());
934}
935
936void AArch64PassConfig::addPostBBSections() {
937 addPass(P: createAArch64SLSHardeningLegacyPass());
938 addPass(P: createAArch64PointerAuthPass());
939 if (EnableBranchTargets)
940 addPass(P: createAArch64BranchTargetsPass());
941 // Relax conditional branch instructions if they're otherwise out of
942 // range of their destination.
943 if (BranchRelaxation)
944 addPass(PassID: &BranchRelaxationPassID);
945
946 if (TM->getOptLevel() != CodeGenOptLevel::None && EnableCompressJumpTables)
947 addPass(P: createAArch64CompressJumpTablesPass());
948}
949
950void AArch64PassConfig::addPreEmitPass2() {
951 // Insert pseudo probe annotation for callsite profiling
952 addPass(P: createPseudoProbeInserter());
953
954 // SVE bundles move prefixes with destructive operations. BLR_RVMARKER pseudo
955 // instructions are lowered to bundles as well.
956 addPass(P: createUnpackMachineBundlesLegacy(Ftor: nullptr));
957}
958
959bool AArch64PassConfig::addRegAssignAndRewriteOptimized() {
960 addPass(P: createAArch64PostCoalescerPass());
961 return TargetPassConfig::addRegAssignAndRewriteOptimized();
962}
963
964MachineFunctionInfo *AArch64TargetMachine::createMachineFunctionInfo(
965 BumpPtrAllocator &Allocator, const Function &F,
966 const TargetSubtargetInfo *STI) const {
967 return AArch64FunctionInfo::create<AArch64FunctionInfo>(
968 Allocator, F, STI: static_cast<const AArch64Subtarget *>(STI));
969}
970
971yaml::MachineFunctionInfo *
972AArch64TargetMachine::createDefaultFuncInfoYAML() const {
973 return new yaml::AArch64FunctionInfo();
974}
975
976yaml::MachineFunctionInfo *
977AArch64TargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const {
978 const auto *MFI = MF.getInfo<AArch64FunctionInfo>();
979 return new yaml::AArch64FunctionInfo(*MFI);
980}
981
982bool AArch64TargetMachine::parseMachineFunctionInfo(
983 const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS,
984 SMDiagnostic &Error, SMRange &SourceRange) const {
985 const auto &YamlMFI = static_cast<const yaml::AArch64FunctionInfo &>(MFI);
986 MachineFunction &MF = PFS.MF;
987 MF.getInfo<AArch64FunctionInfo>()->initializeBaseYamlFields(YamlMFI);
988 return false;
989}
990