1 | //===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Top-level implementation for the PowerPC target. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "PPCTargetMachine.h" |
14 | #include "MCTargetDesc/PPCMCTargetDesc.h" |
15 | #include "PPC.h" |
16 | #include "PPCMachineFunctionInfo.h" |
17 | #include "PPCMachineScheduler.h" |
18 | #include "PPCMacroFusion.h" |
19 | #include "PPCSubtarget.h" |
20 | #include "PPCTargetObjectFile.h" |
21 | #include "PPCTargetTransformInfo.h" |
22 | #include "TargetInfo/PowerPCTargetInfo.h" |
23 | #include "llvm/ADT/StringRef.h" |
24 | #include "llvm/Analysis/TargetTransformInfo.h" |
25 | #include "llvm/CodeGen/GlobalISel/IRTranslator.h" |
26 | #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" |
27 | #include "llvm/CodeGen/GlobalISel/Legalizer.h" |
28 | #include "llvm/CodeGen/GlobalISel/Localizer.h" |
29 | #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" |
30 | #include "llvm/CodeGen/MachineScheduler.h" |
31 | #include "llvm/CodeGen/Passes.h" |
32 | #include "llvm/CodeGen/TargetPassConfig.h" |
33 | #include "llvm/IR/Attributes.h" |
34 | #include "llvm/IR/DataLayout.h" |
35 | #include "llvm/IR/Function.h" |
36 | #include "llvm/InitializePasses.h" |
37 | #include "llvm/MC/TargetRegistry.h" |
38 | #include "llvm/Pass.h" |
39 | #include "llvm/Support/CodeGen.h" |
40 | #include "llvm/Support/CommandLine.h" |
41 | #include "llvm/Support/Compiler.h" |
42 | #include "llvm/Target/TargetLoweringObjectFile.h" |
43 | #include "llvm/Target/TargetOptions.h" |
44 | #include "llvm/TargetParser/Triple.h" |
45 | #include "llvm/Transforms/Scalar.h" |
46 | #include <cassert> |
47 | #include <memory> |
48 | #include <optional> |
49 | #include <string> |
50 | |
51 | using namespace llvm; |
52 | |
53 | |
54 | static cl::opt<bool> |
55 | EnableBranchCoalescing("enable-ppc-branch-coalesce" , cl::Hidden, |
56 | cl::desc("enable coalescing of duplicate branches for PPC" )); |
57 | static cl:: |
58 | opt<bool> DisableCTRLoops("disable-ppc-ctrloops" , cl::Hidden, |
59 | cl::desc("Disable CTR loops for PPC" )); |
60 | |
61 | static cl:: |
62 | opt<bool> DisableInstrFormPrep("disable-ppc-instr-form-prep" , cl::Hidden, |
63 | cl::desc("Disable PPC loop instr form prep" )); |
64 | |
65 | static cl::opt<bool> |
66 | VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early" , |
67 | cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early" )); |
68 | |
69 | static cl:: |
70 | opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal" , cl::Hidden, |
71 | cl::desc("Disable VSX Swap Removal for PPC" )); |
72 | |
73 | static cl:: |
74 | opt<bool> DisableMIPeephole("disable-ppc-peephole" , cl::Hidden, |
75 | cl::desc("Disable machine peepholes for PPC" )); |
76 | |
77 | static cl::opt<bool> |
78 | EnableGEPOpt("ppc-gep-opt" , cl::Hidden, |
79 | cl::desc("Enable optimizations on complex GEPs" ), |
80 | cl::init(Val: true)); |
81 | |
82 | static cl::opt<bool> |
83 | EnablePrefetch("enable-ppc-prefetching" , |
84 | cl::desc("enable software prefetching on PPC" ), |
85 | cl::init(Val: false), cl::Hidden); |
86 | |
87 | static cl::opt<bool> |
88 | ("enable-ppc-extra-toc-reg-deps" , |
89 | cl::desc("Add extra TOC register dependencies" ), |
90 | cl::init(Val: true), cl::Hidden); |
91 | |
92 | static cl::opt<bool> |
93 | EnableMachineCombinerPass("ppc-machine-combiner" , |
94 | cl::desc("Enable the machine combiner pass" ), |
95 | cl::init(Val: true), cl::Hidden); |
96 | |
97 | static cl::opt<bool> |
98 | ReduceCRLogical("ppc-reduce-cr-logicals" , |
99 | cl::desc("Expand eligible cr-logical binary ops to branches" ), |
100 | cl::init(Val: true), cl::Hidden); |
101 | |
102 | static cl::opt<bool> EnablePPCGenScalarMASSEntries( |
103 | "enable-ppc-gen-scalar-mass" , cl::init(Val: false), |
104 | cl::desc("Enable lowering math functions to their corresponding MASS " |
105 | "(scalar) entries" ), |
106 | cl::Hidden); |
107 | |
108 | static cl::opt<bool> |
109 | EnableGlobalMerge("ppc-global-merge" , cl::Hidden, cl::init(Val: false), |
110 | cl::desc("Enable the global merge pass" )); |
111 | |
112 | static cl::opt<unsigned> |
113 | GlobalMergeMaxOffset("ppc-global-merge-max-offset" , cl::Hidden, |
114 | cl::init(Val: 0x7fff), |
115 | cl::desc("Maximum global merge offset" )); |
116 | |
117 | extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void |
118 | LLVMInitializePowerPCTarget() { |
119 | // Register the targets |
120 | RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target()); |
121 | RegisterTargetMachine<PPCTargetMachine> B(getThePPC32LETarget()); |
122 | RegisterTargetMachine<PPCTargetMachine> C(getThePPC64Target()); |
123 | RegisterTargetMachine<PPCTargetMachine> D(getThePPC64LETarget()); |
124 | |
125 | PassRegistry &PR = *PassRegistry::getPassRegistry(); |
126 | #ifndef NDEBUG |
127 | initializePPCCTRLoopsVerifyPass(PR); |
128 | #endif |
129 | initializePPCLoopInstrFormPrepPass(PR); |
130 | initializePPCTOCRegDepsPass(PR); |
131 | initializePPCEarlyReturnPass(PR); |
132 | initializePPCVSXCopyPass(PR); |
133 | initializePPCVSXFMAMutatePass(PR); |
134 | initializePPCVSXSwapRemovalPass(PR); |
135 | initializePPCReduceCRLogicalsPass(PR); |
136 | initializePPCBSelPass(PR); |
137 | initializePPCBranchCoalescingPass(PR); |
138 | initializePPCBoolRetToIntPass(PR); |
139 | initializePPCPreEmitPeepholePass(PR); |
140 | initializePPCTLSDynamicCallPass(PR); |
141 | initializePPCMIPeepholePass(PR); |
142 | initializePPCLowerMASSVEntriesPass(PR); |
143 | initializePPCGenScalarMASSEntriesPass(PR); |
144 | initializePPCExpandAtomicPseudoPass(PR); |
145 | initializeGlobalISel(PR); |
146 | initializePPCCTRLoopsPass(PR); |
147 | initializePPCDAGToDAGISelLegacyPass(PR); |
148 | initializePPCLinuxAsmPrinterPass(PR); |
149 | initializePPCAIXAsmPrinterPass(PR); |
150 | } |
151 | |
152 | static bool isLittleEndianTriple(const Triple &T) { |
153 | return T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle; |
154 | } |
155 | |
156 | /// Return the datalayout string of a subtarget. |
157 | static std::string getDataLayoutString(const Triple &T) { |
158 | bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le; |
159 | std::string Ret; |
160 | |
161 | // Most PPC* platforms are big endian, PPC(64)LE is little endian. |
162 | if (isLittleEndianTriple(T)) |
163 | Ret = "e" ; |
164 | else |
165 | Ret = "E" ; |
166 | |
167 | Ret += DataLayout::getManglingComponent(T); |
168 | |
169 | // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit |
170 | // pointers. |
171 | if (!is64Bit || T.getOS() == Triple::Lv2) |
172 | Ret += "-p:32:32" ; |
173 | |
174 | // If the target ABI uses function descriptors, then the alignment of function |
175 | // pointers depends on the alignment used to emit the descriptor. Otherwise, |
176 | // function pointers are aligned to 32 bits because the instructions must be. |
177 | if ((T.getArch() == Triple::ppc64 && !T.isPPC64ELFv2ABI())) { |
178 | Ret += "-Fi64" ; |
179 | } else if (T.isOSAIX()) { |
180 | Ret += is64Bit ? "-Fi64" : "-Fi32" ; |
181 | } else { |
182 | Ret += "-Fn32" ; |
183 | } |
184 | |
185 | // Note, the alignment values for f64 and i64 on ppc64 in Darwin |
186 | // documentation are wrong; these are correct (i.e. "what gcc does"). |
187 | Ret += "-i64:64" ; |
188 | |
189 | // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. |
190 | if (is64Bit) |
191 | Ret += "-i128:128-n32:64" ; |
192 | else |
193 | Ret += "-n32" ; |
194 | |
195 | // Specify the vector alignment explicitly. For v256i1 and v512i1, the |
196 | // calculated alignment would be 256*alignment(i1) and 512*alignment(i1), |
197 | // which is 256 and 512 bytes - way over aligned. |
198 | if (is64Bit && (T.isOSAIX() || T.isOSLinux())) |
199 | Ret += "-S128-v256:256:256-v512:512:512" ; |
200 | |
201 | return Ret; |
202 | } |
203 | |
204 | static std::string computeFSAdditions(StringRef FS, CodeGenOptLevel OL, |
205 | const Triple &TT) { |
206 | std::string FullFS = std::string(FS); |
207 | |
208 | // Make sure 64-bit features are available when CPUname is generic |
209 | if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) { |
210 | if (!FullFS.empty()) |
211 | FullFS = "+64bit," + FullFS; |
212 | else |
213 | FullFS = "+64bit" ; |
214 | } |
215 | |
216 | if (OL >= CodeGenOptLevel::Default) { |
217 | if (!FullFS.empty()) |
218 | FullFS = "+crbits," + FullFS; |
219 | else |
220 | FullFS = "+crbits" ; |
221 | } |
222 | |
223 | if (OL != CodeGenOptLevel::None) { |
224 | if (!FullFS.empty()) |
225 | FullFS = "+invariant-function-descriptors," + FullFS; |
226 | else |
227 | FullFS = "+invariant-function-descriptors" ; |
228 | } |
229 | |
230 | if (TT.isOSAIX()) { |
231 | if (!FullFS.empty()) |
232 | FullFS = "+aix," + FullFS; |
233 | else |
234 | FullFS = "+aix" ; |
235 | } |
236 | |
237 | return FullFS; |
238 | } |
239 | |
240 | static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { |
241 | if (TT.isOSAIX()) |
242 | return std::make_unique<TargetLoweringObjectFileXCOFF>(); |
243 | |
244 | return std::make_unique<PPC64LinuxTargetObjectFile>(); |
245 | } |
246 | |
247 | static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, |
248 | const TargetOptions &Options) { |
249 | if (Options.MCOptions.getABIName().starts_with(Prefix: "elfv1" )) |
250 | return PPCTargetMachine::PPC_ABI_ELFv1; |
251 | else if (Options.MCOptions.getABIName().starts_with(Prefix: "elfv2" )) |
252 | return PPCTargetMachine::PPC_ABI_ELFv2; |
253 | |
254 | assert(Options.MCOptions.getABIName().empty() && |
255 | "Unknown target-abi option!" ); |
256 | |
257 | switch (TT.getArch()) { |
258 | case Triple::ppc64le: |
259 | return PPCTargetMachine::PPC_ABI_ELFv2; |
260 | case Triple::ppc64: |
261 | if (TT.isPPC64ELFv2ABI()) |
262 | return PPCTargetMachine::PPC_ABI_ELFv2; |
263 | else |
264 | return PPCTargetMachine::PPC_ABI_ELFv1; |
265 | default: |
266 | return PPCTargetMachine::PPC_ABI_UNKNOWN; |
267 | } |
268 | } |
269 | |
270 | static Reloc::Model getEffectiveRelocModel(const Triple &TT, |
271 | std::optional<Reloc::Model> RM) { |
272 | if (TT.isOSAIX() && RM && *RM != Reloc::PIC_) |
273 | report_fatal_error(reason: "invalid relocation model, AIX only supports PIC" , |
274 | gen_crash_diag: false); |
275 | |
276 | if (RM) |
277 | return *RM; |
278 | |
279 | // Big Endian PPC and AIX default to PIC. |
280 | if (TT.getArch() == Triple::ppc64 || TT.isOSAIX()) |
281 | return Reloc::PIC_; |
282 | |
283 | // Rest are static by default. |
284 | return Reloc::Static; |
285 | } |
286 | |
287 | static CodeModel::Model |
288 | getEffectivePPCCodeModel(const Triple &TT, std::optional<CodeModel::Model> CM, |
289 | bool JIT) { |
290 | if (CM) { |
291 | if (*CM == CodeModel::Tiny) |
292 | report_fatal_error(reason: "Target does not support the tiny CodeModel" , gen_crash_diag: false); |
293 | if (*CM == CodeModel::Kernel) |
294 | report_fatal_error(reason: "Target does not support the kernel CodeModel" , gen_crash_diag: false); |
295 | return *CM; |
296 | } |
297 | |
298 | if (JIT) |
299 | return CodeModel::Small; |
300 | if (TT.isOSAIX()) |
301 | return CodeModel::Small; |
302 | |
303 | assert(TT.isOSBinFormatELF() && "All remaining PPC OSes are ELF based." ); |
304 | |
305 | if (TT.isArch32Bit()) |
306 | return CodeModel::Small; |
307 | |
308 | assert(TT.isArch64Bit() && "Unsupported PPC architecture." ); |
309 | return CodeModel::Medium; |
310 | } |
311 | |
312 | |
313 | static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) { |
314 | const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>(); |
315 | ScheduleDAGMILive *DAG = ST.usePPCPreRASchedStrategy() |
316 | ? createSchedLive<PPCPreRASchedStrategy>(C) |
317 | : createSchedLive<GenericScheduler>(C); |
318 | // add DAG Mutations here. |
319 | if (ST.hasStoreFusion()) |
320 | DAG->addMutation(Mutation: createStoreClusterDAGMutation(TII: DAG->TII, TRI: DAG->TRI)); |
321 | if (ST.hasFusion()) |
322 | DAG->addMutation(Mutation: createPowerPCMacroFusionDAGMutation()); |
323 | |
324 | return DAG; |
325 | } |
326 | |
327 | static ScheduleDAGInstrs * |
328 | createPPCPostMachineScheduler(MachineSchedContext *C) { |
329 | const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>(); |
330 | ScheduleDAGMI *DAG = ST.usePPCPostRASchedStrategy() |
331 | ? createSchedPostRA<PPCPostRASchedStrategy>(C) |
332 | : createSchedPostRA<PostGenericScheduler>(C); |
333 | // add DAG Mutations here. |
334 | if (ST.hasStoreFusion()) |
335 | DAG->addMutation(Mutation: createStoreClusterDAGMutation(TII: DAG->TII, TRI: DAG->TRI)); |
336 | if (ST.hasFusion()) |
337 | DAG->addMutation(Mutation: createPowerPCMacroFusionDAGMutation()); |
338 | return DAG; |
339 | } |
340 | |
341 | // The FeatureString here is a little subtle. We are modifying the feature |
342 | // string with what are (currently) non-function specific overrides as it goes |
343 | // into the CodeGenTargetMachineImpl constructor and then using the stored value |
344 | // in the Subtarget constructor below it. |
345 | PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, |
346 | StringRef CPU, StringRef FS, |
347 | const TargetOptions &Options, |
348 | std::optional<Reloc::Model> RM, |
349 | std::optional<CodeModel::Model> CM, |
350 | CodeGenOptLevel OL, bool JIT) |
351 | : CodeGenTargetMachineImpl(T, getDataLayoutString(T: TT), TT, CPU, |
352 | computeFSAdditions(FS, OL, TT), Options, |
353 | getEffectiveRelocModel(TT, RM), |
354 | getEffectivePPCCodeModel(TT, CM, JIT), OL), |
355 | TLOF(createTLOF(TT: getTargetTriple())), |
356 | TargetABI(computeTargetABI(TT, Options)), |
357 | Endianness(isLittleEndianTriple(T: TT) ? Endian::LITTLE : Endian::BIG) { |
358 | initAsmInfo(); |
359 | } |
360 | |
361 | PPCTargetMachine::~PPCTargetMachine() = default; |
362 | |
363 | const PPCSubtarget * |
364 | PPCTargetMachine::getSubtargetImpl(const Function &F) const { |
365 | Attribute CPUAttr = F.getFnAttribute(Kind: "target-cpu" ); |
366 | Attribute TuneAttr = F.getFnAttribute(Kind: "tune-cpu" ); |
367 | Attribute FSAttr = F.getFnAttribute(Kind: "target-features" ); |
368 | |
369 | std::string CPU = |
370 | CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; |
371 | std::string TuneCPU = |
372 | TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; |
373 | std::string FS = |
374 | FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; |
375 | |
376 | // FIXME: This is related to the code below to reset the target options, |
377 | // we need to know whether or not the soft float flag is set on the |
378 | // function before we can generate a subtarget. We also need to use |
379 | // it as a key for the subtarget since that can be the only difference |
380 | // between two functions. |
381 | bool SoftFloat = F.getFnAttribute(Kind: "use-soft-float" ).getValueAsBool(); |
382 | // If the soft float attribute is set on the function turn on the soft float |
383 | // subtarget feature. |
384 | if (SoftFloat) |
385 | FS += FS.empty() ? "-hard-float" : ",-hard-float" ; |
386 | |
387 | auto &I = SubtargetMap[CPU + TuneCPU + FS]; |
388 | if (!I) { |
389 | // This needs to be done before we create a new subtarget since any |
390 | // creation will depend on the TM and the code generation flags on the |
391 | // function that reside in TargetOptions. |
392 | resetTargetOptions(F); |
393 | I = std::make_unique<PPCSubtarget>( |
394 | args: TargetTriple, args&: CPU, args&: TuneCPU, |
395 | // FIXME: It would be good to have the subtarget additions here |
396 | // not necessary. Anything that turns them on/off (overrides) ends |
397 | // up being put at the end of the feature string, but the defaults |
398 | // shouldn't require adding them. Fixing this means pulling Feature64Bit |
399 | // out of most of the target cpus in the .td file and making it set only |
400 | // as part of initialization via the TargetTriple. |
401 | args: computeFSAdditions(FS, OL: getOptLevel(), TT: getTargetTriple()), args: *this); |
402 | } |
403 | return I.get(); |
404 | } |
405 | |
406 | ScheduleDAGInstrs * |
407 | PPCTargetMachine::createMachineScheduler(MachineSchedContext *C) const { |
408 | return createPPCMachineScheduler(C); |
409 | } |
410 | |
411 | ScheduleDAGInstrs * |
412 | PPCTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const { |
413 | return createPPCPostMachineScheduler(C); |
414 | } |
415 | |
416 | //===----------------------------------------------------------------------===// |
417 | // Pass Pipeline Configuration |
418 | //===----------------------------------------------------------------------===// |
419 | |
420 | namespace { |
421 | |
422 | /// PPC Code Generator Pass Configuration Options. |
423 | class PPCPassConfig : public TargetPassConfig { |
424 | public: |
425 | PPCPassConfig(PPCTargetMachine &TM, PassManagerBase &PM) |
426 | : TargetPassConfig(TM, PM) { |
427 | // At any optimization level above -O0 we use the Machine Scheduler and not |
428 | // the default Post RA List Scheduler. |
429 | if (TM.getOptLevel() != CodeGenOptLevel::None) |
430 | substitutePass(StandardID: &PostRASchedulerID, TargetID: &PostMachineSchedulerID); |
431 | } |
432 | |
433 | PPCTargetMachine &getPPCTargetMachine() const { |
434 | return getTM<PPCTargetMachine>(); |
435 | } |
436 | |
437 | void addIRPasses() override; |
438 | bool addPreISel() override; |
439 | bool addILPOpts() override; |
440 | bool addInstSelector() override; |
441 | void addMachineSSAOptimization() override; |
442 | void addPreRegAlloc() override; |
443 | void addPreSched2() override; |
444 | void addPreEmitPass() override; |
445 | void addPreEmitPass2() override; |
446 | // GlobalISEL |
447 | bool addIRTranslator() override; |
448 | bool addLegalizeMachineIR() override; |
449 | bool addRegBankSelect() override; |
450 | bool addGlobalInstructionSelect() override; |
451 | }; |
452 | |
453 | } // end anonymous namespace |
454 | |
455 | TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { |
456 | return new PPCPassConfig(*this, PM); |
457 | } |
458 | |
459 | void PPCPassConfig::addIRPasses() { |
460 | if (TM->getOptLevel() != CodeGenOptLevel::None) |
461 | addPass(P: createPPCBoolRetToIntPass()); |
462 | addPass(P: createAtomicExpandLegacyPass()); |
463 | |
464 | // Lower generic MASSV routines to PowerPC subtarget-specific entries. |
465 | addPass(P: createPPCLowerMASSVEntriesPass()); |
466 | |
467 | // Generate PowerPC target-specific entries for scalar math functions |
468 | // that are available in IBM MASS (scalar) library. |
469 | if (TM->getOptLevel() == CodeGenOptLevel::Aggressive && |
470 | EnablePPCGenScalarMASSEntries) { |
471 | TM->Options.PPCGenScalarMASSEntries = EnablePPCGenScalarMASSEntries; |
472 | addPass(P: createPPCGenScalarMASSEntriesPass()); |
473 | } |
474 | |
475 | // If explicitly requested, add explicit data prefetch intrinsics. |
476 | if (EnablePrefetch.getNumOccurrences() > 0) |
477 | addPass(P: createLoopDataPrefetchPass()); |
478 | |
479 | if (TM->getOptLevel() >= CodeGenOptLevel::Default && EnableGEPOpt) { |
480 | // Call SeparateConstOffsetFromGEP pass to extract constants within indices |
481 | // and lower a GEP with multiple indices to either arithmetic operations or |
482 | // multiple GEPs with single index. |
483 | addPass(P: createSeparateConstOffsetFromGEPPass(LowerGEP: true)); |
484 | // Call EarlyCSE pass to find and remove subexpressions in the lowered |
485 | // result. |
486 | addPass(P: createEarlyCSEPass()); |
487 | // Do loop invariant code motion in case part of the lowered result is |
488 | // invariant. |
489 | addPass(P: createLICMPass()); |
490 | } |
491 | |
492 | TargetPassConfig::addIRPasses(); |
493 | } |
494 | |
495 | bool PPCPassConfig::addPreISel() { |
496 | // The GlobalMerge pass is intended to be on by default on AIX. |
497 | // Specifying the command line option overrides the AIX default. |
498 | if ((EnableGlobalMerge.getNumOccurrences() > 0) |
499 | ? EnableGlobalMerge |
500 | : getOptLevel() != CodeGenOptLevel::None) |
501 | addPass(P: createGlobalMergePass(TM, MaximalOffset: GlobalMergeMaxOffset, OnlyOptimizeForSize: false, MergeExternalByDefault: false, MergeConstantByDefault: true, |
502 | MergeConstAggressiveByDefault: true)); |
503 | |
504 | if (!DisableInstrFormPrep && getOptLevel() != CodeGenOptLevel::None) |
505 | addPass(P: createPPCLoopInstrFormPrepPass(TM&: getPPCTargetMachine())); |
506 | |
507 | if (!DisableCTRLoops && getOptLevel() != CodeGenOptLevel::None) |
508 | addPass(P: createHardwareLoopsLegacyPass()); |
509 | |
510 | return false; |
511 | } |
512 | |
513 | bool PPCPassConfig::addILPOpts() { |
514 | addPass(PassID: &EarlyIfConverterLegacyID); |
515 | |
516 | if (EnableMachineCombinerPass) |
517 | addPass(PassID: &MachineCombinerID); |
518 | |
519 | return true; |
520 | } |
521 | |
522 | bool PPCPassConfig::addInstSelector() { |
523 | // Install an instruction selector. |
524 | addPass(P: createPPCISelDag(TM&: getPPCTargetMachine(), OL: getOptLevel())); |
525 | |
526 | #ifndef NDEBUG |
527 | if (!DisableCTRLoops && getOptLevel() != CodeGenOptLevel::None) |
528 | addPass(createPPCCTRLoopsVerify()); |
529 | #endif |
530 | |
531 | addPass(P: createPPCVSXCopyPass()); |
532 | return false; |
533 | } |
534 | |
535 | void PPCPassConfig::addMachineSSAOptimization() { |
536 | // Run CTR loops pass before any cfg modification pass to prevent the |
537 | // canonical form of hardware loop from being destroied. |
538 | if (!DisableCTRLoops && getOptLevel() != CodeGenOptLevel::None) |
539 | addPass(P: createPPCCTRLoopsPass()); |
540 | |
541 | // PPCBranchCoalescingPass need to be done before machine sinking |
542 | // since it merges empty blocks. |
543 | if (EnableBranchCoalescing && getOptLevel() != CodeGenOptLevel::None) |
544 | addPass(P: createPPCBranchCoalescingPass()); |
545 | TargetPassConfig::addMachineSSAOptimization(); |
546 | // For little endian, remove where possible the vector swap instructions |
547 | // introduced at code generation to normalize vector element order. |
548 | if (TM->getTargetTriple().getArch() == Triple::ppc64le && |
549 | !DisableVSXSwapRemoval) |
550 | addPass(P: createPPCVSXSwapRemovalPass()); |
551 | // Reduce the number of cr-logical ops. |
552 | if (ReduceCRLogical && getOptLevel() != CodeGenOptLevel::None) |
553 | addPass(P: createPPCReduceCRLogicalsPass()); |
554 | // Target-specific peephole cleanups performed after instruction |
555 | // selection. |
556 | if (!DisableMIPeephole) { |
557 | addPass(P: createPPCMIPeepholePass()); |
558 | addPass(PassID: &DeadMachineInstructionElimID); |
559 | } |
560 | } |
561 | |
562 | void PPCPassConfig::addPreRegAlloc() { |
563 | if (getOptLevel() != CodeGenOptLevel::None) { |
564 | insertPass(TargetPassID: VSXFMAMutateEarly ? &TwoAddressInstructionPassID |
565 | : &MachineSchedulerID, |
566 | InsertedPassID: &PPCVSXFMAMutateID); |
567 | } |
568 | |
569 | // FIXME: We probably don't need to run these for -fPIE. |
570 | if (getPPCTargetMachine().isPositionIndependent()) { |
571 | // FIXME: LiveVariables should not be necessary here! |
572 | // PPCTLSDynamicCallPass uses LiveIntervals which previously dependent on |
573 | // LiveVariables. This (unnecessary) dependency has been removed now, |
574 | // however a stage-2 clang build fails without LiveVariables computed here. |
575 | addPass(PassID: &LiveVariablesID); |
576 | addPass(P: createPPCTLSDynamicCallPass()); |
577 | } |
578 | if (EnableExtraTOCRegDeps) |
579 | addPass(P: createPPCTOCRegDepsPass()); |
580 | |
581 | if (getOptLevel() != CodeGenOptLevel::None) |
582 | addPass(PassID: &MachinePipelinerID); |
583 | } |
584 | |
585 | void PPCPassConfig::addPreSched2() { |
586 | if (getOptLevel() != CodeGenOptLevel::None) |
587 | addPass(PassID: &IfConverterID); |
588 | } |
589 | |
590 | void PPCPassConfig::addPreEmitPass() { |
591 | addPass(P: createPPCPreEmitPeepholePass()); |
592 | |
593 | if (getOptLevel() != CodeGenOptLevel::None) |
594 | addPass(P: createPPCEarlyReturnPass()); |
595 | } |
596 | |
597 | void PPCPassConfig::addPreEmitPass2() { |
598 | // Schedule the expansion of AMOs at the last possible moment, avoiding the |
599 | // possibility for other passes to break the requirements for forward |
600 | // progress in the LL/SC block. |
601 | addPass(P: createPPCExpandAtomicPseudoPass()); |
602 | // Must run branch selection immediately preceding the asm printer. |
603 | addPass(P: createPPCBranchSelectionPass()); |
604 | } |
605 | |
606 | TargetTransformInfo |
607 | PPCTargetMachine::getTargetTransformInfo(const Function &F) const { |
608 | return TargetTransformInfo(std::make_unique<PPCTTIImpl>(args: this, args: F)); |
609 | } |
610 | |
611 | bool PPCTargetMachine::isLittleEndian() const { |
612 | assert(Endianness != Endian::NOT_DETECTED && |
613 | "Unable to determine endianness" ); |
614 | return Endianness == Endian::LITTLE; |
615 | } |
616 | |
617 | MachineFunctionInfo *PPCTargetMachine::createMachineFunctionInfo( |
618 | BumpPtrAllocator &Allocator, const Function &F, |
619 | const TargetSubtargetInfo *STI) const { |
620 | return PPCFunctionInfo::create<PPCFunctionInfo>(Allocator, F, STI); |
621 | } |
622 | |
623 | static MachineSchedRegistry |
624 | PPCPreRASchedRegistry("ppc-prera" , |
625 | "Run PowerPC PreRA specific scheduler" , |
626 | createPPCMachineScheduler); |
627 | |
628 | static MachineSchedRegistry |
629 | PPCPostRASchedRegistry("ppc-postra" , |
630 | "Run PowerPC PostRA specific scheduler" , |
631 | createPPCPostMachineScheduler); |
632 | |
633 | // Global ISEL |
634 | bool PPCPassConfig::addIRTranslator() { |
635 | addPass(P: new IRTranslator()); |
636 | return false; |
637 | } |
638 | |
639 | bool PPCPassConfig::addLegalizeMachineIR() { |
640 | addPass(P: new Legalizer()); |
641 | return false; |
642 | } |
643 | |
644 | bool PPCPassConfig::addRegBankSelect() { |
645 | addPass(P: new RegBankSelect()); |
646 | return false; |
647 | } |
648 | |
649 | bool PPCPassConfig::addGlobalInstructionSelect() { |
650 | addPass(P: new InstructionSelect(getOptLevel())); |
651 | return false; |
652 | } |
653 | |