1 | //===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // |
10 | //===----------------------------------------------------------------------===// |
11 | |
12 | #include "ARMTargetMachine.h" |
13 | #include "ARM.h" |
14 | #include "ARMMachineFunctionInfo.h" |
15 | #include "ARMMacroFusion.h" |
16 | #include "ARMSubtarget.h" |
17 | #include "ARMTargetObjectFile.h" |
18 | #include "ARMTargetTransformInfo.h" |
19 | #include "MCTargetDesc/ARMMCTargetDesc.h" |
20 | #include "TargetInfo/ARMTargetInfo.h" |
21 | #include "llvm/ADT/STLExtras.h" |
22 | #include "llvm/ADT/StringRef.h" |
23 | #include "llvm/Analysis/TargetTransformInfo.h" |
24 | #include "llvm/CodeGen/ExecutionDomainFix.h" |
25 | #include "llvm/CodeGen/GlobalISel/CSEInfo.h" |
26 | #include "llvm/CodeGen/GlobalISel/CallLowering.h" |
27 | #include "llvm/CodeGen/GlobalISel/IRTranslator.h" |
28 | #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" |
29 | #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" |
30 | #include "llvm/CodeGen/GlobalISel/Legalizer.h" |
31 | #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" |
32 | #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" |
33 | #include "llvm/CodeGen/MIRParser/MIParser.h" |
34 | #include "llvm/CodeGen/MachineFunction.h" |
35 | #include "llvm/CodeGen/MachineScheduler.h" |
36 | #include "llvm/CodeGen/Passes.h" |
37 | #include "llvm/CodeGen/RegisterBankInfo.h" |
38 | #include "llvm/CodeGen/TargetPassConfig.h" |
39 | #include "llvm/IR/Attributes.h" |
40 | #include "llvm/IR/DataLayout.h" |
41 | #include "llvm/IR/Function.h" |
42 | #include "llvm/MC/TargetRegistry.h" |
43 | #include "llvm/Pass.h" |
44 | #include "llvm/Support/CodeGen.h" |
45 | #include "llvm/Support/CommandLine.h" |
46 | #include "llvm/Support/ErrorHandling.h" |
47 | #include "llvm/Target/TargetLoweringObjectFile.h" |
48 | #include "llvm/Target/TargetOptions.h" |
49 | #include "llvm/TargetParser/ARMTargetParser.h" |
50 | #include "llvm/TargetParser/TargetParser.h" |
51 | #include "llvm/TargetParser/Triple.h" |
52 | #include "llvm/Transforms/CFGuard.h" |
53 | #include "llvm/Transforms/IPO.h" |
54 | #include "llvm/Transforms/Scalar.h" |
55 | #include <cassert> |
56 | #include <memory> |
57 | #include <optional> |
58 | #include <string> |
59 | |
60 | using namespace llvm; |
61 | |
62 | static cl::opt<bool> |
63 | DisableA15SDOptimization("disable-a15-sd-optimization" , cl::Hidden, |
64 | cl::desc("Inhibit optimization of S->D register accesses on A15" ), |
65 | cl::init(Val: false)); |
66 | |
67 | static cl::opt<bool> |
68 | EnableAtomicTidy("arm-atomic-cfg-tidy" , cl::Hidden, |
69 | cl::desc("Run SimplifyCFG after expanding atomic operations" |
70 | " to make use of cmpxchg flow-based information" ), |
71 | cl::init(Val: true)); |
72 | |
73 | static cl::opt<bool> |
74 | EnableARMLoadStoreOpt("arm-load-store-opt" , cl::Hidden, |
75 | cl::desc("Enable ARM load/store optimization pass" ), |
76 | cl::init(Val: true)); |
77 | |
78 | // FIXME: Unify control over GlobalMerge. |
79 | static cl::opt<cl::boolOrDefault> |
80 | EnableGlobalMerge("arm-global-merge" , cl::Hidden, |
81 | cl::desc("Enable the global merge pass" )); |
82 | |
83 | namespace llvm { |
84 | void initializeARMExecutionDomainFixPass(PassRegistry&); |
85 | } |
86 | |
87 | extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() { |
88 | // Register the target. |
89 | RegisterTargetMachine<ARMLETargetMachine> X(getTheARMLETarget()); |
90 | RegisterTargetMachine<ARMLETargetMachine> A(getTheThumbLETarget()); |
91 | RegisterTargetMachine<ARMBETargetMachine> Y(getTheARMBETarget()); |
92 | RegisterTargetMachine<ARMBETargetMachine> B(getTheThumbBETarget()); |
93 | |
94 | PassRegistry &Registry = *PassRegistry::getPassRegistry(); |
95 | initializeGlobalISel(Registry); |
96 | initializeARMLoadStoreOptPass(Registry); |
97 | initializeARMPreAllocLoadStoreOptPass(Registry); |
98 | initializeARMParallelDSPPass(Registry); |
99 | initializeARMBranchTargetsPass(Registry); |
100 | initializeARMConstantIslandsPass(Registry); |
101 | initializeARMExecutionDomainFixPass(Registry); |
102 | initializeARMExpandPseudoPass(Registry); |
103 | initializeThumb2SizeReducePass(Registry); |
104 | initializeMVEVPTBlockPass(Registry); |
105 | initializeMVETPAndVPTOptimisationsPass(Registry); |
106 | initializeMVETailPredicationPass(Registry); |
107 | initializeARMLowOverheadLoopsPass(Registry); |
108 | initializeARMBlockPlacementPass(Registry); |
109 | initializeMVEGatherScatterLoweringPass(Registry); |
110 | initializeARMSLSHardeningPass(Registry); |
111 | initializeMVELaneInterleavingPass(Registry); |
112 | initializeARMFixCortexA57AES1742098Pass(Registry); |
113 | initializeARMDAGToDAGISelLegacyPass(Registry); |
114 | } |
115 | |
116 | static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { |
117 | if (TT.isOSBinFormatMachO()) |
118 | return std::make_unique<TargetLoweringObjectFileMachO>(); |
119 | if (TT.isOSWindows()) |
120 | return std::make_unique<TargetLoweringObjectFileCOFF>(); |
121 | return std::make_unique<ARMElfTargetObjectFile>(); |
122 | } |
123 | |
124 | static ARMBaseTargetMachine::ARMABI |
125 | computeTargetABI(const Triple &TT, StringRef CPU, |
126 | const TargetOptions &Options) { |
127 | StringRef ABIName = Options.MCOptions.getABIName(); |
128 | |
129 | if (ABIName.empty()) |
130 | ABIName = ARM::computeDefaultTargetABI(TT, CPU); |
131 | |
132 | if (ABIName == "aapcs16" ) |
133 | return ARMBaseTargetMachine::ARM_ABI_AAPCS16; |
134 | else if (ABIName.starts_with(Prefix: "aapcs" )) |
135 | return ARMBaseTargetMachine::ARM_ABI_AAPCS; |
136 | else if (ABIName.starts_with(Prefix: "apcs" )) |
137 | return ARMBaseTargetMachine::ARM_ABI_APCS; |
138 | |
139 | llvm_unreachable("Unhandled/unknown ABI Name!" ); |
140 | return ARMBaseTargetMachine::ARM_ABI_UNKNOWN; |
141 | } |
142 | |
143 | static std::string computeDataLayout(const Triple &TT, StringRef CPU, |
144 | const TargetOptions &Options, |
145 | bool isLittle) { |
146 | auto ABI = computeTargetABI(TT, CPU, Options); |
147 | std::string Ret; |
148 | |
149 | if (isLittle) |
150 | // Little endian. |
151 | Ret += "e" ; |
152 | else |
153 | // Big endian. |
154 | Ret += "E" ; |
155 | |
156 | Ret += DataLayout::getManglingComponent(T: TT); |
157 | |
158 | // Pointers are 32 bits and aligned to 32 bits. |
159 | Ret += "-p:32:32" ; |
160 | |
161 | // Function pointers are aligned to 8 bits (because the LSB stores the |
162 | // ARM/Thumb state). |
163 | Ret += "-Fi8" ; |
164 | |
165 | // ABIs other than APCS have 64 bit integers with natural alignment. |
166 | if (ABI != ARMBaseTargetMachine::ARM_ABI_APCS) |
167 | Ret += "-i64:64" ; |
168 | |
169 | // We have 64 bits floats. The APCS ABI requires them to be aligned to 32 |
170 | // bits, others to 64 bits. We always try to align to 64 bits. |
171 | if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS) |
172 | Ret += "-f64:32:64" ; |
173 | |
174 | // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others |
175 | // to 64. We always ty to give them natural alignment. |
176 | if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS) |
177 | Ret += "-v64:32:64-v128:32:128" ; |
178 | else if (ABI != ARMBaseTargetMachine::ARM_ABI_AAPCS16) |
179 | Ret += "-v128:64:128" ; |
180 | |
181 | // Try to align aggregates to 32 bits (the default is 64 bits, which has no |
182 | // particular hardware support on 32-bit ARM). |
183 | Ret += "-a:0:32" ; |
184 | |
185 | // Integer registers are 32 bits. |
186 | Ret += "-n32" ; |
187 | |
188 | // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit |
189 | // aligned everywhere else. |
190 | if (TT.isOSNaCl() || ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16) |
191 | Ret += "-S128" ; |
192 | else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS) |
193 | Ret += "-S64" ; |
194 | else |
195 | Ret += "-S32" ; |
196 | |
197 | return Ret; |
198 | } |
199 | |
200 | static Reloc::Model getEffectiveRelocModel(const Triple &TT, |
201 | std::optional<Reloc::Model> RM) { |
202 | if (!RM) |
203 | // Default relocation model on Darwin is PIC. |
204 | return TT.isOSBinFormatMachO() ? Reloc::PIC_ : Reloc::Static; |
205 | |
206 | if (*RM == Reloc::ROPI || *RM == Reloc::RWPI || *RM == Reloc::ROPI_RWPI) |
207 | assert(TT.isOSBinFormatELF() && |
208 | "ROPI/RWPI currently only supported for ELF" ); |
209 | |
210 | // DynamicNoPIC is only used on darwin. |
211 | if (*RM == Reloc::DynamicNoPIC && !TT.isOSDarwin()) |
212 | return Reloc::Static; |
213 | |
214 | return *RM; |
215 | } |
216 | |
217 | /// Create an ARM architecture model. |
218 | /// |
219 | ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, |
220 | StringRef CPU, StringRef FS, |
221 | const TargetOptions &Options, |
222 | std::optional<Reloc::Model> RM, |
223 | std::optional<CodeModel::Model> CM, |
224 | CodeGenOptLevel OL, bool isLittle) |
225 | : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT, |
226 | CPU, FS, Options, getEffectiveRelocModel(TT, RM), |
227 | getEffectiveCodeModel(CM, Default: CodeModel::Small), OL), |
228 | TargetABI(computeTargetABI(TT, CPU, Options)), |
229 | TLOF(createTLOF(TT: getTargetTriple())), isLittle(isLittle) { |
230 | |
231 | // Default to triple-appropriate float ABI |
232 | if (Options.FloatABIType == FloatABI::Default) { |
233 | if (isTargetHardFloat()) |
234 | this->Options.FloatABIType = FloatABI::Hard; |
235 | else |
236 | this->Options.FloatABIType = FloatABI::Soft; |
237 | } |
238 | |
239 | // Default to triple-appropriate EABI |
240 | if (Options.EABIVersion == EABI::Default || |
241 | Options.EABIVersion == EABI::Unknown) { |
242 | // musl is compatible with glibc with regard to EABI version |
243 | if ((TargetTriple.getEnvironment() == Triple::GNUEABI || |
244 | TargetTriple.getEnvironment() == Triple::GNUEABIHF || |
245 | TargetTriple.getEnvironment() == Triple::MuslEABI || |
246 | TargetTriple.getEnvironment() == Triple::MuslEABIHF || |
247 | TargetTriple.getEnvironment() == Triple::OpenHOS) && |
248 | !(TargetTriple.isOSWindows() || TargetTriple.isOSDarwin())) |
249 | this->Options.EABIVersion = EABI::GNU; |
250 | else |
251 | this->Options.EABIVersion = EABI::EABI5; |
252 | } |
253 | |
254 | if (TT.isOSBinFormatMachO()) { |
255 | this->Options.TrapUnreachable = true; |
256 | this->Options.NoTrapAfterNoreturn = true; |
257 | } |
258 | |
259 | // ARM supports the debug entry values. |
260 | setSupportsDebugEntryValues(true); |
261 | |
262 | initAsmInfo(); |
263 | |
264 | // ARM supports the MachineOutliner. |
265 | setMachineOutliner(true); |
266 | setSupportsDefaultOutlining(true); |
267 | } |
268 | |
269 | ARMBaseTargetMachine::~ARMBaseTargetMachine() = default; |
270 | |
271 | MachineFunctionInfo *ARMBaseTargetMachine::createMachineFunctionInfo( |
272 | BumpPtrAllocator &Allocator, const Function &F, |
273 | const TargetSubtargetInfo *STI) const { |
274 | return ARMFunctionInfo::create<ARMFunctionInfo>( |
275 | Allocator, F, STI: static_cast<const ARMSubtarget *>(STI)); |
276 | } |
277 | |
278 | const ARMSubtarget * |
279 | ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { |
280 | Attribute CPUAttr = F.getFnAttribute(Kind: "target-cpu" ); |
281 | Attribute FSAttr = F.getFnAttribute(Kind: "target-features" ); |
282 | |
283 | std::string CPU = |
284 | CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; |
285 | std::string FS = |
286 | FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; |
287 | |
288 | // FIXME: This is related to the code below to reset the target options, |
289 | // we need to know whether or not the soft float flag is set on the |
290 | // function before we can generate a subtarget. We also need to use |
291 | // it as a key for the subtarget since that can be the only difference |
292 | // between two functions. |
293 | bool SoftFloat = F.getFnAttribute(Kind: "use-soft-float" ).getValueAsBool(); |
294 | // If the soft float attribute is set on the function turn on the soft float |
295 | // subtarget feature. |
296 | if (SoftFloat) |
297 | FS += FS.empty() ? "+soft-float" : ",+soft-float" ; |
298 | |
299 | // Use the optminsize to identify the subtarget, but don't use it in the |
300 | // feature string. |
301 | std::string Key = CPU + FS; |
302 | if (F.hasMinSize()) |
303 | Key += "+minsize" ; |
304 | |
305 | auto &I = SubtargetMap[Key]; |
306 | if (!I) { |
307 | // This needs to be done before we create a new subtarget since any |
308 | // creation will depend on the TM and the code generation flags on the |
309 | // function that reside in TargetOptions. |
310 | resetTargetOptions(F); |
311 | I = std::make_unique<ARMSubtarget>(args: TargetTriple, args&: CPU, args&: FS, args: *this, args: isLittle, |
312 | args: F.hasMinSize()); |
313 | |
314 | if (!I->isThumb() && !I->hasARMOps()) |
315 | F.getContext().emitError(ErrorStr: "Function '" + F.getName() + "' uses ARM " |
316 | "instructions, but the target does not support ARM mode execution." ); |
317 | } |
318 | |
319 | return I.get(); |
320 | } |
321 | |
322 | TargetTransformInfo |
323 | ARMBaseTargetMachine::getTargetTransformInfo(const Function &F) const { |
324 | return TargetTransformInfo(ARMTTIImpl(this, F)); |
325 | } |
326 | |
327 | ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, |
328 | StringRef CPU, StringRef FS, |
329 | const TargetOptions &Options, |
330 | std::optional<Reloc::Model> RM, |
331 | std::optional<CodeModel::Model> CM, |
332 | CodeGenOptLevel OL, bool JIT) |
333 | : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} |
334 | |
335 | ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, |
336 | StringRef CPU, StringRef FS, |
337 | const TargetOptions &Options, |
338 | std::optional<Reloc::Model> RM, |
339 | std::optional<CodeModel::Model> CM, |
340 | CodeGenOptLevel OL, bool JIT) |
341 | : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} |
342 | |
343 | namespace { |
344 | |
345 | /// ARM Code Generator Pass Configuration Options. |
346 | class ARMPassConfig : public TargetPassConfig { |
347 | public: |
348 | ARMPassConfig(ARMBaseTargetMachine &TM, PassManagerBase &PM) |
349 | : TargetPassConfig(TM, PM) {} |
350 | |
351 | ARMBaseTargetMachine &getARMTargetMachine() const { |
352 | return getTM<ARMBaseTargetMachine>(); |
353 | } |
354 | |
355 | ScheduleDAGInstrs * |
356 | createMachineScheduler(MachineSchedContext *C) const override { |
357 | ScheduleDAGMILive *DAG = createGenericSchedLive(C); |
358 | // add DAG Mutations here. |
359 | const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>(); |
360 | if (ST.hasFusion()) |
361 | DAG->addMutation(Mutation: createARMMacroFusionDAGMutation()); |
362 | return DAG; |
363 | } |
364 | |
365 | ScheduleDAGInstrs * |
366 | createPostMachineScheduler(MachineSchedContext *C) const override { |
367 | ScheduleDAGMI *DAG = createGenericSchedPostRA(C); |
368 | // add DAG Mutations here. |
369 | const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>(); |
370 | if (ST.hasFusion()) |
371 | DAG->addMutation(Mutation: createARMMacroFusionDAGMutation()); |
372 | return DAG; |
373 | } |
374 | |
375 | void addIRPasses() override; |
376 | void addCodeGenPrepare() override; |
377 | bool addPreISel() override; |
378 | bool addInstSelector() override; |
379 | bool addIRTranslator() override; |
380 | bool addLegalizeMachineIR() override; |
381 | bool addRegBankSelect() override; |
382 | bool addGlobalInstructionSelect() override; |
383 | void addPreRegAlloc() override; |
384 | void addPreSched2() override; |
385 | void addPreEmitPass() override; |
386 | void addPreEmitPass2() override; |
387 | |
388 | std::unique_ptr<CSEConfigBase> getCSEConfig() const override; |
389 | }; |
390 | |
391 | class ARMExecutionDomainFix : public ExecutionDomainFix { |
392 | public: |
393 | static char ID; |
394 | ARMExecutionDomainFix() : ExecutionDomainFix(ID, ARM::DPRRegClass) {} |
395 | StringRef getPassName() const override { |
396 | return "ARM Execution Domain Fix" ; |
397 | } |
398 | }; |
399 | char ARMExecutionDomainFix::ID; |
400 | |
401 | } // end anonymous namespace |
402 | |
403 | INITIALIZE_PASS_BEGIN(ARMExecutionDomainFix, "arm-execution-domain-fix" , |
404 | "ARM Execution Domain Fix" , false, false) |
405 | INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis) |
406 | INITIALIZE_PASS_END(ARMExecutionDomainFix, "arm-execution-domain-fix" , |
407 | "ARM Execution Domain Fix" , false, false) |
408 | |
409 | TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { |
410 | return new ARMPassConfig(*this, PM); |
411 | } |
412 | |
413 | std::unique_ptr<CSEConfigBase> ARMPassConfig::getCSEConfig() const { |
414 | return getStandardCSEConfigForOpt(Level: TM->getOptLevel()); |
415 | } |
416 | |
417 | void ARMPassConfig::addIRPasses() { |
418 | if (TM->Options.ThreadModel == ThreadModel::Single) |
419 | addPass(P: createLowerAtomicPass()); |
420 | else |
421 | addPass(P: createAtomicExpandLegacyPass()); |
422 | |
423 | // Cmpxchg instructions are often used with a subsequent comparison to |
424 | // determine whether it succeeded. We can exploit existing control-flow in |
425 | // ldrex/strex loops to simplify this, but it needs tidying up. |
426 | if (TM->getOptLevel() != CodeGenOptLevel::None && EnableAtomicTidy) |
427 | addPass(P: createCFGSimplificationPass( |
428 | Options: SimplifyCFGOptions().hoistCommonInsts(B: true).sinkCommonInsts(B: true), |
429 | Ftor: [this](const Function &F) { |
430 | const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F); |
431 | return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); |
432 | })); |
433 | |
434 | addPass(P: createMVEGatherScatterLoweringPass()); |
435 | addPass(P: createMVELaneInterleavingPass()); |
436 | |
437 | TargetPassConfig::addIRPasses(); |
438 | |
439 | // Run the parallel DSP pass. |
440 | if (getOptLevel() == CodeGenOptLevel::Aggressive) |
441 | addPass(P: createARMParallelDSPPass()); |
442 | |
443 | // Match complex arithmetic patterns |
444 | if (TM->getOptLevel() >= CodeGenOptLevel::Default) |
445 | addPass(P: createComplexDeinterleavingPass(TM)); |
446 | |
447 | // Match interleaved memory accesses to ldN/stN intrinsics. |
448 | if (TM->getOptLevel() != CodeGenOptLevel::None) |
449 | addPass(P: createInterleavedAccessPass()); |
450 | |
451 | // Add Control Flow Guard checks. |
452 | if (TM->getTargetTriple().isOSWindows()) |
453 | addPass(P: createCFGuardCheckPass()); |
454 | |
455 | if (TM->Options.JMCInstrument) |
456 | addPass(P: createJMCInstrumenterPass()); |
457 | } |
458 | |
459 | void ARMPassConfig::addCodeGenPrepare() { |
460 | if (getOptLevel() != CodeGenOptLevel::None) |
461 | addPass(P: createTypePromotionLegacyPass()); |
462 | TargetPassConfig::addCodeGenPrepare(); |
463 | } |
464 | |
465 | bool ARMPassConfig::addPreISel() { |
466 | if ((TM->getOptLevel() != CodeGenOptLevel::None && |
467 | EnableGlobalMerge == cl::BOU_UNSET) || |
468 | EnableGlobalMerge == cl::BOU_TRUE) { |
469 | // FIXME: This is using the thumb1 only constant value for |
470 | // maximal global offset for merging globals. We may want |
471 | // to look into using the old value for non-thumb1 code of |
472 | // 4095 based on the TargetMachine, but this starts to become |
473 | // tricky when doing code gen per function. |
474 | bool OnlyOptimizeForSize = |
475 | (TM->getOptLevel() < CodeGenOptLevel::Aggressive) && |
476 | (EnableGlobalMerge == cl::BOU_UNSET); |
477 | // Merging of extern globals is enabled by default on non-Mach-O as we |
478 | // expect it to be generally either beneficial or harmless. On Mach-O it |
479 | // is disabled as we emit the .subsections_via_symbols directive which |
480 | // means that merging extern globals is not safe. |
481 | bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO(); |
482 | addPass(P: createGlobalMergePass(TM, MaximalOffset: 127, OnlyOptimizeForSize, |
483 | MergeExternalByDefault)); |
484 | } |
485 | |
486 | if (TM->getOptLevel() != CodeGenOptLevel::None) { |
487 | addPass(P: createHardwareLoopsLegacyPass()); |
488 | addPass(P: createMVETailPredicationPass()); |
489 | // FIXME: IR passes can delete address-taken basic blocks, deleting |
490 | // corresponding blockaddresses. ARMConstantPoolConstant holds references to |
491 | // address-taken basic blocks which can be invalidated if the function |
492 | // containing the blockaddress has already been codegen'd and the basic |
493 | // block is removed. Work around this by forcing all IR passes to run before |
494 | // any ISel takes place. We should have a more principled way of handling |
495 | // this. See D99707 for more details. |
496 | addPass(P: createBarrierNoopPass()); |
497 | } |
498 | |
499 | return false; |
500 | } |
501 | |
502 | bool ARMPassConfig::addInstSelector() { |
503 | addPass(P: createARMISelDag(TM&: getARMTargetMachine(), OptLevel: getOptLevel())); |
504 | return false; |
505 | } |
506 | |
507 | bool ARMPassConfig::addIRTranslator() { |
508 | addPass(P: new IRTranslator(getOptLevel())); |
509 | return false; |
510 | } |
511 | |
512 | bool ARMPassConfig::addLegalizeMachineIR() { |
513 | addPass(P: new Legalizer()); |
514 | return false; |
515 | } |
516 | |
517 | bool ARMPassConfig::addRegBankSelect() { |
518 | addPass(P: new RegBankSelect()); |
519 | return false; |
520 | } |
521 | |
522 | bool ARMPassConfig::addGlobalInstructionSelect() { |
523 | addPass(P: new InstructionSelect(getOptLevel())); |
524 | return false; |
525 | } |
526 | |
527 | void ARMPassConfig::addPreRegAlloc() { |
528 | if (getOptLevel() != CodeGenOptLevel::None) { |
529 | if (getOptLevel() == CodeGenOptLevel::Aggressive) |
530 | addPass(PassID: &MachinePipelinerID); |
531 | |
532 | addPass(P: createMVETPAndVPTOptimisationsPass()); |
533 | |
534 | addPass(P: createMLxExpansionPass()); |
535 | |
536 | if (EnableARMLoadStoreOpt) |
537 | addPass(P: createARMLoadStoreOptimizationPass(/* pre-register alloc */ PreAlloc: true)); |
538 | |
539 | if (!DisableA15SDOptimization) |
540 | addPass(P: createA15SDOptimizerPass()); |
541 | } |
542 | } |
543 | |
544 | void ARMPassConfig::addPreSched2() { |
545 | if (getOptLevel() != CodeGenOptLevel::None) { |
546 | if (EnableARMLoadStoreOpt) |
547 | addPass(P: createARMLoadStoreOptimizationPass()); |
548 | |
549 | addPass(P: new ARMExecutionDomainFix()); |
550 | addPass(P: createBreakFalseDeps()); |
551 | } |
552 | |
553 | // Expand some pseudo instructions into multiple instructions to allow |
554 | // proper scheduling. |
555 | addPass(P: createARMExpandPseudoPass()); |
556 | |
557 | if (getOptLevel() != CodeGenOptLevel::None) { |
558 | // When optimising for size, always run the Thumb2SizeReduction pass before |
559 | // IfConversion. Otherwise, check whether IT blocks are restricted |
560 | // (e.g. in v8, IfConversion depends on Thumb instruction widths) |
561 | addPass(P: createThumb2SizeReductionPass(Ftor: [this](const Function &F) { |
562 | return this->TM->getSubtarget<ARMSubtarget>(F).hasMinSize() || |
563 | this->TM->getSubtarget<ARMSubtarget>(F).restrictIT(); |
564 | })); |
565 | |
566 | addPass(P: createIfConverter(Ftor: [](const MachineFunction &MF) { |
567 | return !MF.getSubtarget<ARMSubtarget>().isThumb1Only(); |
568 | })); |
569 | } |
570 | addPass(P: createThumb2ITBlockPass()); |
571 | |
572 | // Add both scheduling passes to give the subtarget an opportunity to pick |
573 | // between them. |
574 | if (getOptLevel() != CodeGenOptLevel::None) { |
575 | addPass(PassID: &PostMachineSchedulerID); |
576 | addPass(PassID: &PostRASchedulerID); |
577 | } |
578 | |
579 | addPass(P: createMVEVPTBlockPass()); |
580 | addPass(P: createARMIndirectThunks()); |
581 | addPass(P: createARMSLSHardeningPass()); |
582 | } |
583 | |
584 | void ARMPassConfig::addPreEmitPass() { |
585 | addPass(P: createThumb2SizeReductionPass()); |
586 | |
587 | // Constant island pass work on unbundled instructions. |
588 | addPass(P: createUnpackMachineBundles(Ftor: [](const MachineFunction &MF) { |
589 | return MF.getSubtarget<ARMSubtarget>().isThumb2(); |
590 | })); |
591 | |
592 | // Don't optimize barriers or block placement at -O0. |
593 | if (getOptLevel() != CodeGenOptLevel::None) { |
594 | addPass(P: createARMBlockPlacementPass()); |
595 | addPass(P: createARMOptimizeBarriersPass()); |
596 | } |
597 | } |
598 | |
599 | void ARMPassConfig::addPreEmitPass2() { |
600 | // Inserts fixup instructions before unsafe AES operations. Instructions may |
601 | // be inserted at the start of blocks and at within blocks so this pass has to |
602 | // come before those below. |
603 | addPass(P: createARMFixCortexA57AES1742098Pass()); |
604 | // Inserts BTIs at the start of functions and indirectly-called basic blocks, |
605 | // so passes cannot add to the start of basic blocks once this has run. |
606 | addPass(P: createARMBranchTargetsPass()); |
607 | // Inserts Constant Islands. Block sizes cannot be increased after this point, |
608 | // as this may push the branch ranges and load offsets of accessing constant |
609 | // pools out of range.. |
610 | addPass(P: createARMConstantIslandPass()); |
611 | // Finalises Low-Overhead Loops. This replaces pseudo instructions with real |
612 | // instructions, but the pseudos all have conservative sizes so that block |
613 | // sizes will only be decreased by this pass. |
614 | addPass(P: createARMLowOverheadLoopsPass()); |
615 | |
616 | if (TM->getTargetTriple().isOSWindows()) { |
617 | // Identify valid longjmp targets for Windows Control Flow Guard. |
618 | addPass(P: createCFGuardLongjmpPass()); |
619 | // Identify valid eh continuation targets for Windows EHCont Guard. |
620 | addPass(P: createEHContGuardCatchretPass()); |
621 | } |
622 | } |
623 | |
624 | yaml::MachineFunctionInfo * |
625 | ARMBaseTargetMachine::createDefaultFuncInfoYAML() const { |
626 | return new yaml::ARMFunctionInfo(); |
627 | } |
628 | |
629 | yaml::MachineFunctionInfo * |
630 | ARMBaseTargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const { |
631 | const auto *MFI = MF.getInfo<ARMFunctionInfo>(); |
632 | return new yaml::ARMFunctionInfo(*MFI); |
633 | } |
634 | |
635 | bool ARMBaseTargetMachine::parseMachineFunctionInfo( |
636 | const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS, |
637 | SMDiagnostic &Error, SMRange &SourceRange) const { |
638 | const auto &YamlMFI = static_cast<const yaml::ARMFunctionInfo &>(MFI); |
639 | MachineFunction &MF = PFS.MF; |
640 | MF.getInfo<ARMFunctionInfo>()->initializeBaseYamlFields(YamlMFI); |
641 | return false; |
642 | } |
643 | |