1 | //===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // |
10 | //===----------------------------------------------------------------------===// |
11 | |
12 | #include "ARMTargetMachine.h" |
13 | #include "ARM.h" |
14 | #include "ARMLatencyMutations.h" |
15 | #include "ARMMachineFunctionInfo.h" |
16 | #include "ARMMacroFusion.h" |
17 | #include "ARMSubtarget.h" |
18 | #include "ARMTargetObjectFile.h" |
19 | #include "ARMTargetTransformInfo.h" |
20 | #include "MCTargetDesc/ARMMCTargetDesc.h" |
21 | #include "TargetInfo/ARMTargetInfo.h" |
22 | #include "llvm/ADT/StringRef.h" |
23 | #include "llvm/Analysis/TargetTransformInfo.h" |
24 | #include "llvm/CodeGen/ExecutionDomainFix.h" |
25 | #include "llvm/CodeGen/GlobalISel/CSEInfo.h" |
26 | #include "llvm/CodeGen/GlobalISel/CallLowering.h" |
27 | #include "llvm/CodeGen/GlobalISel/IRTranslator.h" |
28 | #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" |
29 | #include "llvm/CodeGen/GlobalISel/Legalizer.h" |
30 | #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" |
31 | #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" |
32 | #include "llvm/CodeGen/MIRParser/MIParser.h" |
33 | #include "llvm/CodeGen/MachineFunction.h" |
34 | #include "llvm/CodeGen/MachineScheduler.h" |
35 | #include "llvm/CodeGen/Passes.h" |
36 | #include "llvm/CodeGen/TargetPassConfig.h" |
37 | #include "llvm/IR/Attributes.h" |
38 | #include "llvm/IR/DataLayout.h" |
39 | #include "llvm/IR/Function.h" |
40 | #include "llvm/MC/TargetRegistry.h" |
41 | #include "llvm/Pass.h" |
42 | #include "llvm/Support/CodeGen.h" |
43 | #include "llvm/Support/CommandLine.h" |
44 | #include "llvm/Support/Compiler.h" |
45 | #include "llvm/Support/ErrorHandling.h" |
46 | #include "llvm/Target/TargetLoweringObjectFile.h" |
47 | #include "llvm/Target/TargetOptions.h" |
48 | #include "llvm/TargetParser/ARMTargetParser.h" |
49 | #include "llvm/TargetParser/TargetParser.h" |
50 | #include "llvm/TargetParser/Triple.h" |
51 | #include "llvm/Transforms/CFGuard.h" |
52 | #include "llvm/Transforms/IPO.h" |
53 | #include "llvm/Transforms/Scalar.h" |
54 | #include <cassert> |
55 | #include <memory> |
56 | #include <optional> |
57 | #include <string> |
58 | |
59 | using namespace llvm; |
60 | |
61 | static cl::opt<bool> |
62 | DisableA15SDOptimization("disable-a15-sd-optimization" , cl::Hidden, |
63 | cl::desc("Inhibit optimization of S->D register accesses on A15" ), |
64 | cl::init(Val: false)); |
65 | |
66 | static cl::opt<bool> |
67 | EnableAtomicTidy("arm-atomic-cfg-tidy" , cl::Hidden, |
68 | cl::desc("Run SimplifyCFG after expanding atomic operations" |
69 | " to make use of cmpxchg flow-based information" ), |
70 | cl::init(Val: true)); |
71 | |
72 | static cl::opt<bool> |
73 | EnableARMLoadStoreOpt("arm-load-store-opt" , cl::Hidden, |
74 | cl::desc("Enable ARM load/store optimization pass" ), |
75 | cl::init(Val: true)); |
76 | |
77 | // FIXME: Unify control over GlobalMerge. |
78 | static cl::opt<cl::boolOrDefault> |
79 | EnableGlobalMerge("arm-global-merge" , cl::Hidden, |
80 | cl::desc("Enable the global merge pass" )); |
81 | |
82 | namespace llvm { |
83 | void initializeARMExecutionDomainFixPass(PassRegistry&); |
84 | } |
85 | |
86 | extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() { |
87 | // Register the target. |
88 | RegisterTargetMachine<ARMLETargetMachine> X(getTheARMLETarget()); |
89 | RegisterTargetMachine<ARMLETargetMachine> A(getTheThumbLETarget()); |
90 | RegisterTargetMachine<ARMBETargetMachine> Y(getTheARMBETarget()); |
91 | RegisterTargetMachine<ARMBETargetMachine> B(getTheThumbBETarget()); |
92 | |
93 | PassRegistry &Registry = *PassRegistry::getPassRegistry(); |
94 | initializeGlobalISel(Registry); |
95 | initializeARMAsmPrinterPass(Registry); |
96 | initializeARMLoadStoreOptPass(Registry); |
97 | initializeARMPreAllocLoadStoreOptPass(Registry); |
98 | initializeARMParallelDSPPass(Registry); |
99 | initializeARMBranchTargetsPass(Registry); |
100 | initializeARMConstantIslandsPass(Registry); |
101 | initializeARMExecutionDomainFixPass(Registry); |
102 | initializeARMExpandPseudoPass(Registry); |
103 | initializeThumb2SizeReducePass(Registry); |
104 | initializeMVEVPTBlockPass(Registry); |
105 | initializeMVETPAndVPTOptimisationsPass(Registry); |
106 | initializeMVETailPredicationPass(Registry); |
107 | initializeARMLowOverheadLoopsPass(Registry); |
108 | initializeARMBlockPlacementPass(Registry); |
109 | initializeMVEGatherScatterLoweringPass(Registry); |
110 | initializeARMSLSHardeningPass(Registry); |
111 | initializeMVELaneInterleavingPass(Registry); |
112 | initializeARMFixCortexA57AES1742098Pass(Registry); |
113 | initializeARMDAGToDAGISelLegacyPass(Registry); |
114 | } |
115 | |
116 | static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { |
117 | if (TT.isOSBinFormatMachO()) |
118 | return std::make_unique<TargetLoweringObjectFileMachO>(); |
119 | if (TT.isOSWindows()) |
120 | return std::make_unique<TargetLoweringObjectFileCOFF>(); |
121 | return std::make_unique<ARMElfTargetObjectFile>(); |
122 | } |
123 | |
124 | static std::string computeDataLayout(const Triple &TT, StringRef CPU, |
125 | const TargetOptions &Options, |
126 | bool isLittle) { |
127 | auto ABI = ARM::computeTargetABI(TT, CPU, ABIName: Options.MCOptions.ABIName); |
128 | std::string Ret; |
129 | |
130 | if (isLittle) |
131 | // Little endian. |
132 | Ret += "e" ; |
133 | else |
134 | // Big endian. |
135 | Ret += "E" ; |
136 | |
137 | Ret += DataLayout::getManglingComponent(T: TT); |
138 | |
139 | // Pointers are 32 bits and aligned to 32 bits. |
140 | Ret += "-p:32:32" ; |
141 | |
142 | // Function pointers are aligned to 8 bits (because the LSB stores the |
143 | // ARM/Thumb state). |
144 | Ret += "-Fi8" ; |
145 | |
146 | // ABIs other than APCS have 64 bit integers with natural alignment. |
147 | if (ABI != ARM::ARM_ABI_APCS) |
148 | Ret += "-i64:64" ; |
149 | |
150 | // We have 64 bits floats. The APCS ABI requires them to be aligned to 32 |
151 | // bits, others to 64 bits. We always try to align to 64 bits. |
152 | if (ABI == ARM::ARM_ABI_APCS) |
153 | Ret += "-f64:32:64" ; |
154 | |
155 | // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others |
156 | // to 64. We always ty to give them natural alignment. |
157 | if (ABI == ARM::ARM_ABI_APCS) |
158 | Ret += "-v64:32:64-v128:32:128" ; |
159 | else if (ABI != ARM::ARM_ABI_AAPCS16) |
160 | Ret += "-v128:64:128" ; |
161 | |
162 | // Try to align aggregates to 32 bits (the default is 64 bits, which has no |
163 | // particular hardware support on 32-bit ARM). |
164 | Ret += "-a:0:32" ; |
165 | |
166 | // Integer registers are 32 bits. |
167 | Ret += "-n32" ; |
168 | |
169 | // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit |
170 | // aligned everywhere else. |
171 | if (TT.isOSNaCl() || ABI == ARM::ARM_ABI_AAPCS16) |
172 | Ret += "-S128" ; |
173 | else if (ABI == ARM::ARM_ABI_AAPCS) |
174 | Ret += "-S64" ; |
175 | else |
176 | Ret += "-S32" ; |
177 | |
178 | return Ret; |
179 | } |
180 | |
181 | static Reloc::Model getEffectiveRelocModel(const Triple &TT, |
182 | std::optional<Reloc::Model> RM) { |
183 | if (!RM) |
184 | // Default relocation model on Darwin is PIC. |
185 | return TT.isOSBinFormatMachO() ? Reloc::PIC_ : Reloc::Static; |
186 | |
187 | if (*RM == Reloc::ROPI || *RM == Reloc::RWPI || *RM == Reloc::ROPI_RWPI) |
188 | assert(TT.isOSBinFormatELF() && |
189 | "ROPI/RWPI currently only supported for ELF" ); |
190 | |
191 | // DynamicNoPIC is only used on darwin. |
192 | if (*RM == Reloc::DynamicNoPIC && !TT.isOSDarwin()) |
193 | return Reloc::Static; |
194 | |
195 | return *RM; |
196 | } |
197 | |
198 | /// Create an ARM architecture model. |
199 | /// |
200 | ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, |
201 | StringRef CPU, StringRef FS, |
202 | const TargetOptions &Options, |
203 | std::optional<Reloc::Model> RM, |
204 | std::optional<CodeModel::Model> CM, |
205 | CodeGenOptLevel OL, bool isLittle) |
206 | : CodeGenTargetMachineImpl(T, computeDataLayout(TT, CPU, Options, isLittle), |
207 | TT, CPU, FS, Options, |
208 | getEffectiveRelocModel(TT, RM), |
209 | getEffectiveCodeModel(CM, Default: CodeModel::Small), OL), |
210 | TargetABI(ARM::computeTargetABI(TT, CPU, ABIName: Options.MCOptions.ABIName)), |
211 | TLOF(createTLOF(TT: getTargetTriple())), isLittle(isLittle) { |
212 | |
213 | // Default to triple-appropriate float ABI |
214 | if (Options.FloatABIType == FloatABI::Default) { |
215 | if (isTargetHardFloat()) |
216 | this->Options.FloatABIType = FloatABI::Hard; |
217 | else |
218 | this->Options.FloatABIType = FloatABI::Soft; |
219 | } |
220 | |
221 | // Default to triple-appropriate EABI |
222 | if (Options.EABIVersion == EABI::Default || |
223 | Options.EABIVersion == EABI::Unknown) { |
224 | // musl is compatible with glibc with regard to EABI version |
225 | if ((TargetTriple.getEnvironment() == Triple::GNUEABI || |
226 | TargetTriple.getEnvironment() == Triple::GNUEABIT64 || |
227 | TargetTriple.getEnvironment() == Triple::GNUEABIHF || |
228 | TargetTriple.getEnvironment() == Triple::GNUEABIHFT64 || |
229 | TargetTriple.getEnvironment() == Triple::MuslEABI || |
230 | TargetTriple.getEnvironment() == Triple::MuslEABIHF || |
231 | TargetTriple.getEnvironment() == Triple::OpenHOS) && |
232 | !(TargetTriple.isOSWindows() || TargetTriple.isOSDarwin())) |
233 | this->Options.EABIVersion = EABI::GNU; |
234 | else |
235 | this->Options.EABIVersion = EABI::EABI5; |
236 | } |
237 | |
238 | if (TT.isOSBinFormatMachO()) { |
239 | this->Options.TrapUnreachable = true; |
240 | this->Options.NoTrapAfterNoreturn = true; |
241 | } |
242 | |
243 | // ARM supports the debug entry values. |
244 | setSupportsDebugEntryValues(true); |
245 | |
246 | initAsmInfo(); |
247 | |
248 | // ARM supports the MachineOutliner. |
249 | setMachineOutliner(true); |
250 | setSupportsDefaultOutlining(true); |
251 | } |
252 | |
253 | ARMBaseTargetMachine::~ARMBaseTargetMachine() = default; |
254 | |
255 | MachineFunctionInfo *ARMBaseTargetMachine::createMachineFunctionInfo( |
256 | BumpPtrAllocator &Allocator, const Function &F, |
257 | const TargetSubtargetInfo *STI) const { |
258 | return ARMFunctionInfo::create<ARMFunctionInfo>( |
259 | Allocator, F, STI: static_cast<const ARMSubtarget *>(STI)); |
260 | } |
261 | |
262 | const ARMSubtarget * |
263 | ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { |
264 | Attribute CPUAttr = F.getFnAttribute(Kind: "target-cpu" ); |
265 | Attribute FSAttr = F.getFnAttribute(Kind: "target-features" ); |
266 | |
267 | std::string CPU = |
268 | CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; |
269 | std::string FS = |
270 | FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; |
271 | |
272 | // FIXME: This is related to the code below to reset the target options, |
273 | // we need to know whether or not the soft float flag is set on the |
274 | // function before we can generate a subtarget. We also need to use |
275 | // it as a key for the subtarget since that can be the only difference |
276 | // between two functions. |
277 | bool SoftFloat = F.getFnAttribute(Kind: "use-soft-float" ).getValueAsBool(); |
278 | // If the soft float attribute is set on the function turn on the soft float |
279 | // subtarget feature. |
280 | if (SoftFloat) |
281 | FS += FS.empty() ? "+soft-float" : ",+soft-float" ; |
282 | |
283 | // Use the optminsize to identify the subtarget, but don't use it in the |
284 | // feature string. |
285 | std::string Key = CPU + FS; |
286 | if (F.hasMinSize()) |
287 | Key += "+minsize" ; |
288 | |
289 | auto &I = SubtargetMap[Key]; |
290 | if (!I) { |
291 | // This needs to be done before we create a new subtarget since any |
292 | // creation will depend on the TM and the code generation flags on the |
293 | // function that reside in TargetOptions. |
294 | resetTargetOptions(F); |
295 | I = std::make_unique<ARMSubtarget>(args: TargetTriple, args&: CPU, args&: FS, args: *this, args: isLittle, |
296 | args: F.hasMinSize()); |
297 | |
298 | if (!I->isThumb() && !I->hasARMOps()) |
299 | F.getContext().emitError(ErrorStr: "Function '" + F.getName() + "' uses ARM " |
300 | "instructions, but the target does not support ARM mode execution." ); |
301 | } |
302 | |
303 | return I.get(); |
304 | } |
305 | |
306 | TargetTransformInfo |
307 | ARMBaseTargetMachine::getTargetTransformInfo(const Function &F) const { |
308 | return TargetTransformInfo(std::make_unique<ARMTTIImpl>(args: this, args: F)); |
309 | } |
310 | |
311 | ScheduleDAGInstrs * |
312 | ARMBaseTargetMachine::createMachineScheduler(MachineSchedContext *C) const { |
313 | ScheduleDAGMILive *DAG = createSchedLive(C); |
314 | // add DAG Mutations here. |
315 | const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>(); |
316 | if (ST.hasFusion()) |
317 | DAG->addMutation(Mutation: createARMMacroFusionDAGMutation()); |
318 | return DAG; |
319 | } |
320 | |
321 | ScheduleDAGInstrs * |
322 | ARMBaseTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const { |
323 | ScheduleDAGMI *DAG = createSchedPostRA(C); |
324 | // add DAG Mutations here. |
325 | const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>(); |
326 | if (ST.hasFusion()) |
327 | DAG->addMutation(Mutation: createARMMacroFusionDAGMutation()); |
328 | if (auto Mutation = createARMLatencyMutations(ST, AA: C->AA)) |
329 | DAG->addMutation(Mutation: std::move(Mutation)); |
330 | return DAG; |
331 | } |
332 | |
333 | ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, |
334 | StringRef CPU, StringRef FS, |
335 | const TargetOptions &Options, |
336 | std::optional<Reloc::Model> RM, |
337 | std::optional<CodeModel::Model> CM, |
338 | CodeGenOptLevel OL, bool JIT) |
339 | : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} |
340 | |
341 | ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, |
342 | StringRef CPU, StringRef FS, |
343 | const TargetOptions &Options, |
344 | std::optional<Reloc::Model> RM, |
345 | std::optional<CodeModel::Model> CM, |
346 | CodeGenOptLevel OL, bool JIT) |
347 | : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} |
348 | |
349 | namespace { |
350 | |
351 | /// ARM Code Generator Pass Configuration Options. |
352 | class ARMPassConfig : public TargetPassConfig { |
353 | public: |
354 | ARMPassConfig(ARMBaseTargetMachine &TM, PassManagerBase &PM) |
355 | : TargetPassConfig(TM, PM) {} |
356 | |
357 | ARMBaseTargetMachine &getARMTargetMachine() const { |
358 | return getTM<ARMBaseTargetMachine>(); |
359 | } |
360 | |
361 | void addIRPasses() override; |
362 | void addCodeGenPrepare() override; |
363 | bool addPreISel() override; |
364 | bool addInstSelector() override; |
365 | bool addIRTranslator() override; |
366 | bool addLegalizeMachineIR() override; |
367 | bool addRegBankSelect() override; |
368 | bool addGlobalInstructionSelect() override; |
369 | void addPreRegAlloc() override; |
370 | void addPreSched2() override; |
371 | void addPreEmitPass() override; |
372 | void addPreEmitPass2() override; |
373 | |
374 | std::unique_ptr<CSEConfigBase> getCSEConfig() const override; |
375 | }; |
376 | |
377 | class ARMExecutionDomainFix : public ExecutionDomainFix { |
378 | public: |
379 | static char ID; |
380 | ARMExecutionDomainFix() : ExecutionDomainFix(ID, ARM::DPRRegClass) {} |
381 | StringRef getPassName() const override { |
382 | return "ARM Execution Domain Fix" ; |
383 | } |
384 | }; |
385 | char ARMExecutionDomainFix::ID; |
386 | |
387 | } // end anonymous namespace |
388 | |
389 | INITIALIZE_PASS_BEGIN(ARMExecutionDomainFix, "arm-execution-domain-fix" , |
390 | "ARM Execution Domain Fix" , false, false) |
391 | INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis) |
392 | INITIALIZE_PASS_END(ARMExecutionDomainFix, "arm-execution-domain-fix" , |
393 | "ARM Execution Domain Fix" , false, false) |
394 | |
395 | TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { |
396 | return new ARMPassConfig(*this, PM); |
397 | } |
398 | |
399 | std::unique_ptr<CSEConfigBase> ARMPassConfig::getCSEConfig() const { |
400 | return getStandardCSEConfigForOpt(Level: TM->getOptLevel()); |
401 | } |
402 | |
403 | void ARMPassConfig::addIRPasses() { |
404 | if (TM->Options.ThreadModel == ThreadModel::Single) |
405 | addPass(P: createLowerAtomicPass()); |
406 | else |
407 | addPass(P: createAtomicExpandLegacyPass()); |
408 | |
409 | // Cmpxchg instructions are often used with a subsequent comparison to |
410 | // determine whether it succeeded. We can exploit existing control-flow in |
411 | // ldrex/strex loops to simplify this, but it needs tidying up. |
412 | if (TM->getOptLevel() != CodeGenOptLevel::None && EnableAtomicTidy) |
413 | addPass(P: createCFGSimplificationPass( |
414 | Options: SimplifyCFGOptions().hoistCommonInsts(B: true).sinkCommonInsts(B: true), |
415 | Ftor: [this](const Function &F) { |
416 | const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F); |
417 | return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); |
418 | })); |
419 | |
420 | addPass(P: createMVEGatherScatterLoweringPass()); |
421 | addPass(P: createMVELaneInterleavingPass()); |
422 | |
423 | TargetPassConfig::addIRPasses(); |
424 | |
425 | // Run the parallel DSP pass. |
426 | if (getOptLevel() == CodeGenOptLevel::Aggressive) |
427 | addPass(P: createARMParallelDSPPass()); |
428 | |
429 | // Match complex arithmetic patterns |
430 | if (TM->getOptLevel() >= CodeGenOptLevel::Default) |
431 | addPass(P: createComplexDeinterleavingPass(TM)); |
432 | |
433 | // Match interleaved memory accesses to ldN/stN intrinsics. |
434 | if (TM->getOptLevel() != CodeGenOptLevel::None) |
435 | addPass(P: createInterleavedAccessPass()); |
436 | |
437 | // Add Control Flow Guard checks. |
438 | if (TM->getTargetTriple().isOSWindows()) |
439 | addPass(P: createCFGuardCheckPass()); |
440 | |
441 | if (TM->Options.JMCInstrument) |
442 | addPass(P: createJMCInstrumenterPass()); |
443 | } |
444 | |
445 | void ARMPassConfig::addCodeGenPrepare() { |
446 | if (getOptLevel() != CodeGenOptLevel::None) |
447 | addPass(P: createTypePromotionLegacyPass()); |
448 | TargetPassConfig::addCodeGenPrepare(); |
449 | } |
450 | |
451 | bool ARMPassConfig::addPreISel() { |
452 | if ((TM->getOptLevel() != CodeGenOptLevel::None && |
453 | EnableGlobalMerge == cl::BOU_UNSET) || |
454 | EnableGlobalMerge == cl::BOU_TRUE) { |
455 | // FIXME: This is using the thumb1 only constant value for |
456 | // maximal global offset for merging globals. We may want |
457 | // to look into using the old value for non-thumb1 code of |
458 | // 4095 based on the TargetMachine, but this starts to become |
459 | // tricky when doing code gen per function. |
460 | bool OnlyOptimizeForSize = |
461 | (TM->getOptLevel() < CodeGenOptLevel::Aggressive) && |
462 | (EnableGlobalMerge == cl::BOU_UNSET); |
463 | // Merging of extern globals is enabled by default on non-Mach-O as we |
464 | // expect it to be generally either beneficial or harmless. On Mach-O it |
465 | // is disabled as we emit the .subsections_via_symbols directive which |
466 | // means that merging extern globals is not safe. |
467 | bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO(); |
468 | addPass(P: createGlobalMergePass(TM, MaximalOffset: 127, OnlyOptimizeForSize, |
469 | MergeExternalByDefault)); |
470 | } |
471 | |
472 | if (TM->getOptLevel() != CodeGenOptLevel::None) { |
473 | addPass(P: createHardwareLoopsLegacyPass()); |
474 | addPass(P: createMVETailPredicationPass()); |
475 | // FIXME: IR passes can delete address-taken basic blocks, deleting |
476 | // corresponding blockaddresses. ARMConstantPoolConstant holds references to |
477 | // address-taken basic blocks which can be invalidated if the function |
478 | // containing the blockaddress has already been codegen'd and the basic |
479 | // block is removed. Work around this by forcing all IR passes to run before |
480 | // any ISel takes place. We should have a more principled way of handling |
481 | // this. See D99707 for more details. |
482 | addPass(P: createBarrierNoopPass()); |
483 | } |
484 | |
485 | return false; |
486 | } |
487 | |
488 | bool ARMPassConfig::addInstSelector() { |
489 | addPass(P: createARMISelDag(TM&: getARMTargetMachine(), OptLevel: getOptLevel())); |
490 | return false; |
491 | } |
492 | |
493 | bool ARMPassConfig::addIRTranslator() { |
494 | addPass(P: new IRTranslator(getOptLevel())); |
495 | return false; |
496 | } |
497 | |
498 | bool ARMPassConfig::addLegalizeMachineIR() { |
499 | addPass(P: new Legalizer()); |
500 | return false; |
501 | } |
502 | |
503 | bool ARMPassConfig::addRegBankSelect() { |
504 | addPass(P: new RegBankSelect()); |
505 | return false; |
506 | } |
507 | |
508 | bool ARMPassConfig::addGlobalInstructionSelect() { |
509 | addPass(P: new InstructionSelect(getOptLevel())); |
510 | return false; |
511 | } |
512 | |
513 | void ARMPassConfig::addPreRegAlloc() { |
514 | if (getOptLevel() != CodeGenOptLevel::None) { |
515 | if (getOptLevel() == CodeGenOptLevel::Aggressive) |
516 | addPass(PassID: &MachinePipelinerID); |
517 | |
518 | addPass(P: createMVETPAndVPTOptimisationsPass()); |
519 | |
520 | addPass(P: createMLxExpansionPass()); |
521 | |
522 | if (EnableARMLoadStoreOpt) |
523 | addPass(P: createARMLoadStoreOptimizationPass(/* pre-register alloc */ PreAlloc: true)); |
524 | |
525 | if (!DisableA15SDOptimization) |
526 | addPass(P: createA15SDOptimizerPass()); |
527 | } |
528 | } |
529 | |
530 | void ARMPassConfig::addPreSched2() { |
531 | if (getOptLevel() != CodeGenOptLevel::None) { |
532 | if (EnableARMLoadStoreOpt) |
533 | addPass(P: createARMLoadStoreOptimizationPass()); |
534 | |
535 | addPass(P: new ARMExecutionDomainFix()); |
536 | addPass(P: createBreakFalseDeps()); |
537 | } |
538 | |
539 | // Expand some pseudo instructions into multiple instructions to allow |
540 | // proper scheduling. |
541 | addPass(P: createARMExpandPseudoPass()); |
542 | |
543 | if (getOptLevel() != CodeGenOptLevel::None) { |
544 | // When optimising for size, always run the Thumb2SizeReduction pass before |
545 | // IfConversion. Otherwise, check whether IT blocks are restricted |
546 | // (e.g. in v8, IfConversion depends on Thumb instruction widths) |
547 | addPass(P: createThumb2SizeReductionPass(Ftor: [this](const Function &F) { |
548 | return this->TM->getSubtarget<ARMSubtarget>(F).hasMinSize() || |
549 | this->TM->getSubtarget<ARMSubtarget>(F).restrictIT(); |
550 | })); |
551 | |
552 | addPass(P: createIfConverter(Ftor: [](const MachineFunction &MF) { |
553 | return !MF.getSubtarget<ARMSubtarget>().isThumb1Only(); |
554 | })); |
555 | } |
556 | addPass(P: createThumb2ITBlockPass()); |
557 | |
558 | // Add both scheduling passes to give the subtarget an opportunity to pick |
559 | // between them. |
560 | if (getOptLevel() != CodeGenOptLevel::None) { |
561 | addPass(PassID: &PostMachineSchedulerID); |
562 | addPass(PassID: &PostRASchedulerID); |
563 | } |
564 | |
565 | addPass(P: createMVEVPTBlockPass()); |
566 | addPass(P: createARMIndirectThunks()); |
567 | addPass(P: createARMSLSHardeningPass()); |
568 | } |
569 | |
570 | void ARMPassConfig::addPreEmitPass() { |
571 | addPass(P: createThumb2SizeReductionPass()); |
572 | |
573 | // Constant island pass work on unbundled instructions. |
574 | addPass(P: createUnpackMachineBundles(Ftor: [](const MachineFunction &MF) { |
575 | return MF.getSubtarget<ARMSubtarget>().isThumb2(); |
576 | })); |
577 | |
578 | // Don't optimize barriers or block placement at -O0. |
579 | if (getOptLevel() != CodeGenOptLevel::None) { |
580 | addPass(P: createARMBlockPlacementPass()); |
581 | addPass(P: createARMOptimizeBarriersPass()); |
582 | } |
583 | } |
584 | |
585 | void ARMPassConfig::addPreEmitPass2() { |
586 | // Inserts fixup instructions before unsafe AES operations. Instructions may |
587 | // be inserted at the start of blocks and at within blocks so this pass has to |
588 | // come before those below. |
589 | addPass(P: createARMFixCortexA57AES1742098Pass()); |
590 | // Inserts BTIs at the start of functions and indirectly-called basic blocks, |
591 | // so passes cannot add to the start of basic blocks once this has run. |
592 | addPass(P: createARMBranchTargetsPass()); |
593 | // Inserts Constant Islands. Block sizes cannot be increased after this point, |
594 | // as this may push the branch ranges and load offsets of accessing constant |
595 | // pools out of range.. |
596 | addPass(P: createARMConstantIslandPass()); |
597 | // Finalises Low-Overhead Loops. This replaces pseudo instructions with real |
598 | // instructions, but the pseudos all have conservative sizes so that block |
599 | // sizes will only be decreased by this pass. |
600 | addPass(P: createARMLowOverheadLoopsPass()); |
601 | |
602 | if (TM->getTargetTriple().isOSWindows()) { |
603 | // Identify valid longjmp targets for Windows Control Flow Guard. |
604 | addPass(P: createCFGuardLongjmpPass()); |
605 | // Identify valid eh continuation targets for Windows EHCont Guard. |
606 | addPass(P: createEHContGuardTargetsPass()); |
607 | } |
608 | } |
609 | |
610 | yaml::MachineFunctionInfo * |
611 | ARMBaseTargetMachine::createDefaultFuncInfoYAML() const { |
612 | return new yaml::ARMFunctionInfo(); |
613 | } |
614 | |
615 | yaml::MachineFunctionInfo * |
616 | ARMBaseTargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const { |
617 | const auto *MFI = MF.getInfo<ARMFunctionInfo>(); |
618 | return new yaml::ARMFunctionInfo(*MFI); |
619 | } |
620 | |
621 | bool ARMBaseTargetMachine::parseMachineFunctionInfo( |
622 | const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS, |
623 | SMDiagnostic &Error, SMRange &SourceRange) const { |
624 | const auto &YamlMFI = static_cast<const yaml::ARMFunctionInfo &>(MFI); |
625 | MachineFunction &MF = PFS.MF; |
626 | MF.getInfo<ARMFunctionInfo>()->initializeBaseYamlFields(YamlMFI); |
627 | return false; |
628 | } |
629 | |
630 | void ARMBaseTargetMachine::reset() { SubtargetMap.clear(); } |
631 | |