1 | //===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Top-level implementation for the PowerPC target. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "PPCTargetMachine.h" |
14 | #include "MCTargetDesc/PPCMCTargetDesc.h" |
15 | #include "PPC.h" |
16 | #include "PPCMachineFunctionInfo.h" |
17 | #include "PPCMachineScheduler.h" |
18 | #include "PPCMacroFusion.h" |
19 | #include "PPCSubtarget.h" |
20 | #include "PPCTargetObjectFile.h" |
21 | #include "PPCTargetTransformInfo.h" |
22 | #include "TargetInfo/PowerPCTargetInfo.h" |
23 | #include "llvm/ADT/STLExtras.h" |
24 | #include "llvm/ADT/StringRef.h" |
25 | #include "llvm/Analysis/TargetTransformInfo.h" |
26 | #include "llvm/CodeGen/GlobalISel/IRTranslator.h" |
27 | #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" |
28 | #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" |
29 | #include "llvm/CodeGen/GlobalISel/Legalizer.h" |
30 | #include "llvm/CodeGen/GlobalISel/Localizer.h" |
31 | #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" |
32 | #include "llvm/CodeGen/MachineScheduler.h" |
33 | #include "llvm/CodeGen/Passes.h" |
34 | #include "llvm/CodeGen/TargetPassConfig.h" |
35 | #include "llvm/IR/Attributes.h" |
36 | #include "llvm/IR/DataLayout.h" |
37 | #include "llvm/IR/Function.h" |
38 | #include "llvm/InitializePasses.h" |
39 | #include "llvm/MC/TargetRegistry.h" |
40 | #include "llvm/Pass.h" |
41 | #include "llvm/Support/CodeGen.h" |
42 | #include "llvm/Support/CommandLine.h" |
43 | #include "llvm/Target/TargetLoweringObjectFile.h" |
44 | #include "llvm/Target/TargetOptions.h" |
45 | #include "llvm/TargetParser/Triple.h" |
46 | #include "llvm/Transforms/Scalar.h" |
47 | #include <cassert> |
48 | #include <memory> |
49 | #include <optional> |
50 | #include <string> |
51 | |
52 | using namespace llvm; |
53 | |
54 | |
55 | static cl::opt<bool> |
56 | EnableBranchCoalescing("enable-ppc-branch-coalesce" , cl::Hidden, |
57 | cl::desc("enable coalescing of duplicate branches for PPC" )); |
58 | static cl:: |
59 | opt<bool> DisableCTRLoops("disable-ppc-ctrloops" , cl::Hidden, |
60 | cl::desc("Disable CTR loops for PPC" )); |
61 | |
62 | static cl:: |
63 | opt<bool> DisableInstrFormPrep("disable-ppc-instr-form-prep" , cl::Hidden, |
64 | cl::desc("Disable PPC loop instr form prep" )); |
65 | |
66 | static cl::opt<bool> |
67 | VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early" , |
68 | cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early" )); |
69 | |
70 | static cl:: |
71 | opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal" , cl::Hidden, |
72 | cl::desc("Disable VSX Swap Removal for PPC" )); |
73 | |
74 | static cl:: |
75 | opt<bool> DisableMIPeephole("disable-ppc-peephole" , cl::Hidden, |
76 | cl::desc("Disable machine peepholes for PPC" )); |
77 | |
78 | static cl::opt<bool> |
79 | EnableGEPOpt("ppc-gep-opt" , cl::Hidden, |
80 | cl::desc("Enable optimizations on complex GEPs" ), |
81 | cl::init(Val: true)); |
82 | |
83 | static cl::opt<bool> |
84 | EnablePrefetch("enable-ppc-prefetching" , |
85 | cl::desc("enable software prefetching on PPC" ), |
86 | cl::init(Val: false), cl::Hidden); |
87 | |
88 | static cl::opt<bool> |
89 | ("enable-ppc-extra-toc-reg-deps" , |
90 | cl::desc("Add extra TOC register dependencies" ), |
91 | cl::init(Val: true), cl::Hidden); |
92 | |
93 | static cl::opt<bool> |
94 | EnableMachineCombinerPass("ppc-machine-combiner" , |
95 | cl::desc("Enable the machine combiner pass" ), |
96 | cl::init(Val: true), cl::Hidden); |
97 | |
98 | static cl::opt<bool> |
99 | ReduceCRLogical("ppc-reduce-cr-logicals" , |
100 | cl::desc("Expand eligible cr-logical binary ops to branches" ), |
101 | cl::init(Val: true), cl::Hidden); |
102 | |
103 | static cl::opt<bool> MergeStringPool( |
104 | "ppc-merge-string-pool" , |
105 | cl::desc("Merge all of the strings in a module into one pool" ), |
106 | cl::init(Val: true), cl::Hidden); |
107 | |
108 | static cl::opt<bool> EnablePPCGenScalarMASSEntries( |
109 | "enable-ppc-gen-scalar-mass" , cl::init(Val: false), |
110 | cl::desc("Enable lowering math functions to their corresponding MASS " |
111 | "(scalar) entries" ), |
112 | cl::Hidden); |
113 | |
114 | extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() { |
115 | // Register the targets |
116 | RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target()); |
117 | RegisterTargetMachine<PPCTargetMachine> B(getThePPC32LETarget()); |
118 | RegisterTargetMachine<PPCTargetMachine> C(getThePPC64Target()); |
119 | RegisterTargetMachine<PPCTargetMachine> D(getThePPC64LETarget()); |
120 | |
121 | PassRegistry &PR = *PassRegistry::getPassRegistry(); |
122 | #ifndef NDEBUG |
123 | initializePPCCTRLoopsVerifyPass(PR); |
124 | #endif |
125 | initializePPCLoopInstrFormPrepPass(PR); |
126 | initializePPCTOCRegDepsPass(PR); |
127 | initializePPCEarlyReturnPass(PR); |
128 | initializePPCVSXCopyPass(PR); |
129 | initializePPCVSXFMAMutatePass(PR); |
130 | initializePPCVSXSwapRemovalPass(PR); |
131 | initializePPCReduceCRLogicalsPass(PR); |
132 | initializePPCBSelPass(PR); |
133 | initializePPCBranchCoalescingPass(PR); |
134 | initializePPCBoolRetToIntPass(PR); |
135 | initializePPCExpandISELPass(PR); |
136 | initializePPCPreEmitPeepholePass(PR); |
137 | initializePPCTLSDynamicCallPass(PR); |
138 | initializePPCMIPeepholePass(PR); |
139 | initializePPCLowerMASSVEntriesPass(PR); |
140 | initializePPCGenScalarMASSEntriesPass(PR); |
141 | initializePPCExpandAtomicPseudoPass(PR); |
142 | initializeGlobalISel(PR); |
143 | initializePPCCTRLoopsPass(PR); |
144 | initializePPCDAGToDAGISelLegacyPass(PR); |
145 | initializePPCMergeStringPoolPass(PR); |
146 | } |
147 | |
148 | static bool isLittleEndianTriple(const Triple &T) { |
149 | return T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle; |
150 | } |
151 | |
152 | /// Return the datalayout string of a subtarget. |
153 | static std::string getDataLayoutString(const Triple &T) { |
154 | bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le; |
155 | std::string Ret; |
156 | |
157 | // Most PPC* platforms are big endian, PPC(64)LE is little endian. |
158 | if (isLittleEndianTriple(T)) |
159 | Ret = "e" ; |
160 | else |
161 | Ret = "E" ; |
162 | |
163 | Ret += DataLayout::getManglingComponent(T); |
164 | |
165 | // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit |
166 | // pointers. |
167 | if (!is64Bit || T.getOS() == Triple::Lv2) |
168 | Ret += "-p:32:32" ; |
169 | |
170 | // If the target ABI uses function descriptors, then the alignment of function |
171 | // pointers depends on the alignment used to emit the descriptor. Otherwise, |
172 | // function pointers are aligned to 32 bits because the instructions must be. |
173 | if ((T.getArch() == Triple::ppc64 && !T.isPPC64ELFv2ABI())) { |
174 | Ret += "-Fi64" ; |
175 | } else if (T.isOSAIX()) { |
176 | Ret += is64Bit ? "-Fi64" : "-Fi32" ; |
177 | } else { |
178 | Ret += "-Fn32" ; |
179 | } |
180 | |
181 | // Note, the alignment values for f64 and i64 on ppc64 in Darwin |
182 | // documentation are wrong; these are correct (i.e. "what gcc does"). |
183 | Ret += "-i64:64" ; |
184 | |
185 | // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. |
186 | if (is64Bit) |
187 | Ret += "-n32:64" ; |
188 | else |
189 | Ret += "-n32" ; |
190 | |
191 | // Specify the vector alignment explicitly. For v256i1 and v512i1, the |
192 | // calculated alignment would be 256*alignment(i1) and 512*alignment(i1), |
193 | // which is 256 and 512 bytes - way over aligned. |
194 | if (is64Bit && (T.isOSAIX() || T.isOSLinux())) |
195 | Ret += "-S128-v256:256:256-v512:512:512" ; |
196 | |
197 | return Ret; |
198 | } |
199 | |
200 | static std::string computeFSAdditions(StringRef FS, CodeGenOptLevel OL, |
201 | const Triple &TT) { |
202 | std::string FullFS = std::string(FS); |
203 | |
204 | // Make sure 64-bit features are available when CPUname is generic |
205 | if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) { |
206 | if (!FullFS.empty()) |
207 | FullFS = "+64bit," + FullFS; |
208 | else |
209 | FullFS = "+64bit" ; |
210 | } |
211 | |
212 | if (OL >= CodeGenOptLevel::Default) { |
213 | if (!FullFS.empty()) |
214 | FullFS = "+crbits," + FullFS; |
215 | else |
216 | FullFS = "+crbits" ; |
217 | } |
218 | |
219 | if (OL != CodeGenOptLevel::None) { |
220 | if (!FullFS.empty()) |
221 | FullFS = "+invariant-function-descriptors," + FullFS; |
222 | else |
223 | FullFS = "+invariant-function-descriptors" ; |
224 | } |
225 | |
226 | if (TT.isOSAIX()) { |
227 | if (!FullFS.empty()) |
228 | FullFS = "+aix," + FullFS; |
229 | else |
230 | FullFS = "+aix" ; |
231 | } |
232 | |
233 | return FullFS; |
234 | } |
235 | |
236 | static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { |
237 | if (TT.isOSAIX()) |
238 | return std::make_unique<TargetLoweringObjectFileXCOFF>(); |
239 | |
240 | return std::make_unique<PPC64LinuxTargetObjectFile>(); |
241 | } |
242 | |
243 | static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, |
244 | const TargetOptions &Options) { |
245 | if (Options.MCOptions.getABIName().starts_with(Prefix: "elfv1" )) |
246 | return PPCTargetMachine::PPC_ABI_ELFv1; |
247 | else if (Options.MCOptions.getABIName().starts_with(Prefix: "elfv2" )) |
248 | return PPCTargetMachine::PPC_ABI_ELFv2; |
249 | |
250 | assert(Options.MCOptions.getABIName().empty() && |
251 | "Unknown target-abi option!" ); |
252 | |
253 | switch (TT.getArch()) { |
254 | case Triple::ppc64le: |
255 | return PPCTargetMachine::PPC_ABI_ELFv2; |
256 | case Triple::ppc64: |
257 | if (TT.isPPC64ELFv2ABI()) |
258 | return PPCTargetMachine::PPC_ABI_ELFv2; |
259 | else |
260 | return PPCTargetMachine::PPC_ABI_ELFv1; |
261 | default: |
262 | return PPCTargetMachine::PPC_ABI_UNKNOWN; |
263 | } |
264 | } |
265 | |
266 | static Reloc::Model getEffectiveRelocModel(const Triple &TT, |
267 | std::optional<Reloc::Model> RM) { |
268 | if (TT.isOSAIX() && RM && *RM != Reloc::PIC_) |
269 | report_fatal_error(reason: "invalid relocation model, AIX only supports PIC" , |
270 | gen_crash_diag: false); |
271 | |
272 | if (RM) |
273 | return *RM; |
274 | |
275 | // Big Endian PPC and AIX default to PIC. |
276 | if (TT.getArch() == Triple::ppc64 || TT.isOSAIX()) |
277 | return Reloc::PIC_; |
278 | |
279 | // Rest are static by default. |
280 | return Reloc::Static; |
281 | } |
282 | |
283 | static CodeModel::Model |
284 | getEffectivePPCCodeModel(const Triple &TT, std::optional<CodeModel::Model> CM, |
285 | bool JIT) { |
286 | if (CM) { |
287 | if (*CM == CodeModel::Tiny) |
288 | report_fatal_error(reason: "Target does not support the tiny CodeModel" , gen_crash_diag: false); |
289 | if (*CM == CodeModel::Kernel) |
290 | report_fatal_error(reason: "Target does not support the kernel CodeModel" , gen_crash_diag: false); |
291 | return *CM; |
292 | } |
293 | |
294 | if (JIT) |
295 | return CodeModel::Small; |
296 | if (TT.isOSAIX()) |
297 | return CodeModel::Small; |
298 | |
299 | assert(TT.isOSBinFormatELF() && "All remaining PPC OSes are ELF based." ); |
300 | |
301 | if (TT.isArch32Bit()) |
302 | return CodeModel::Small; |
303 | |
304 | assert(TT.isArch64Bit() && "Unsupported PPC architecture." ); |
305 | return CodeModel::Medium; |
306 | } |
307 | |
308 | |
309 | static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) { |
310 | const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>(); |
311 | ScheduleDAGMILive *DAG = |
312 | new ScheduleDAGMILive(C, ST.usePPCPreRASchedStrategy() ? |
313 | std::make_unique<PPCPreRASchedStrategy>(args&: C) : |
314 | std::make_unique<GenericScheduler>(args&: C)); |
315 | // add DAG Mutations here. |
316 | DAG->addMutation(Mutation: createCopyConstrainDAGMutation(TII: DAG->TII, TRI: DAG->TRI)); |
317 | if (ST.hasStoreFusion()) |
318 | DAG->addMutation(Mutation: createStoreClusterDAGMutation(TII: DAG->TII, TRI: DAG->TRI)); |
319 | if (ST.hasFusion()) |
320 | DAG->addMutation(Mutation: createPowerPCMacroFusionDAGMutation()); |
321 | |
322 | return DAG; |
323 | } |
324 | |
325 | static ScheduleDAGInstrs *createPPCPostMachineScheduler( |
326 | MachineSchedContext *C) { |
327 | const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>(); |
328 | ScheduleDAGMI *DAG = |
329 | new ScheduleDAGMI(C, ST.usePPCPostRASchedStrategy() ? |
330 | std::make_unique<PPCPostRASchedStrategy>(args&: C) : |
331 | std::make_unique<PostGenericScheduler>(args&: C), true); |
332 | // add DAG Mutations here. |
333 | if (ST.hasStoreFusion()) |
334 | DAG->addMutation(Mutation: createStoreClusterDAGMutation(TII: DAG->TII, TRI: DAG->TRI)); |
335 | if (ST.hasFusion()) |
336 | DAG->addMutation(Mutation: createPowerPCMacroFusionDAGMutation()); |
337 | return DAG; |
338 | } |
339 | |
340 | // The FeatureString here is a little subtle. We are modifying the feature |
341 | // string with what are (currently) non-function specific overrides as it goes |
342 | // into the LLVMTargetMachine constructor and then using the stored value in the |
343 | // Subtarget constructor below it. |
344 | PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, |
345 | StringRef CPU, StringRef FS, |
346 | const TargetOptions &Options, |
347 | std::optional<Reloc::Model> RM, |
348 | std::optional<CodeModel::Model> CM, |
349 | CodeGenOptLevel OL, bool JIT) |
350 | : LLVMTargetMachine(T, getDataLayoutString(T: TT), TT, CPU, |
351 | computeFSAdditions(FS, OL, TT), Options, |
352 | getEffectiveRelocModel(TT, RM), |
353 | getEffectivePPCCodeModel(TT, CM, JIT), OL), |
354 | TLOF(createTLOF(TT: getTargetTriple())), |
355 | TargetABI(computeTargetABI(TT, Options)), |
356 | Endianness(isLittleEndianTriple(T: TT) ? Endian::LITTLE : Endian::BIG) { |
357 | initAsmInfo(); |
358 | } |
359 | |
360 | PPCTargetMachine::~PPCTargetMachine() = default; |
361 | |
362 | const PPCSubtarget * |
363 | PPCTargetMachine::getSubtargetImpl(const Function &F) const { |
364 | Attribute CPUAttr = F.getFnAttribute(Kind: "target-cpu" ); |
365 | Attribute TuneAttr = F.getFnAttribute(Kind: "tune-cpu" ); |
366 | Attribute FSAttr = F.getFnAttribute(Kind: "target-features" ); |
367 | |
368 | std::string CPU = |
369 | CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; |
370 | std::string TuneCPU = |
371 | TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; |
372 | std::string FS = |
373 | FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; |
374 | |
375 | // FIXME: This is related to the code below to reset the target options, |
376 | // we need to know whether or not the soft float flag is set on the |
377 | // function before we can generate a subtarget. We also need to use |
378 | // it as a key for the subtarget since that can be the only difference |
379 | // between two functions. |
380 | bool SoftFloat = F.getFnAttribute(Kind: "use-soft-float" ).getValueAsBool(); |
381 | // If the soft float attribute is set on the function turn on the soft float |
382 | // subtarget feature. |
383 | if (SoftFloat) |
384 | FS += FS.empty() ? "-hard-float" : ",-hard-float" ; |
385 | |
386 | auto &I = SubtargetMap[CPU + TuneCPU + FS]; |
387 | if (!I) { |
388 | // This needs to be done before we create a new subtarget since any |
389 | // creation will depend on the TM and the code generation flags on the |
390 | // function that reside in TargetOptions. |
391 | resetTargetOptions(F); |
392 | I = std::make_unique<PPCSubtarget>( |
393 | args: TargetTriple, args&: CPU, args&: TuneCPU, |
394 | // FIXME: It would be good to have the subtarget additions here |
395 | // not necessary. Anything that turns them on/off (overrides) ends |
396 | // up being put at the end of the feature string, but the defaults |
397 | // shouldn't require adding them. Fixing this means pulling Feature64Bit |
398 | // out of most of the target cpus in the .td file and making it set only |
399 | // as part of initialization via the TargetTriple. |
400 | args: computeFSAdditions(FS, OL: getOptLevel(), TT: getTargetTriple()), args: *this); |
401 | } |
402 | return I.get(); |
403 | } |
404 | |
405 | //===----------------------------------------------------------------------===// |
406 | // Pass Pipeline Configuration |
407 | //===----------------------------------------------------------------------===// |
408 | |
409 | namespace { |
410 | |
411 | /// PPC Code Generator Pass Configuration Options. |
412 | class PPCPassConfig : public TargetPassConfig { |
413 | public: |
414 | PPCPassConfig(PPCTargetMachine &TM, PassManagerBase &PM) |
415 | : TargetPassConfig(TM, PM) { |
416 | // At any optimization level above -O0 we use the Machine Scheduler and not |
417 | // the default Post RA List Scheduler. |
418 | if (TM.getOptLevel() != CodeGenOptLevel::None) |
419 | substitutePass(StandardID: &PostRASchedulerID, TargetID: &PostMachineSchedulerID); |
420 | } |
421 | |
422 | PPCTargetMachine &getPPCTargetMachine() const { |
423 | return getTM<PPCTargetMachine>(); |
424 | } |
425 | |
426 | void addIRPasses() override; |
427 | bool addPreISel() override; |
428 | bool addILPOpts() override; |
429 | bool addInstSelector() override; |
430 | void addMachineSSAOptimization() override; |
431 | void addPreRegAlloc() override; |
432 | void addPreSched2() override; |
433 | void addPreEmitPass() override; |
434 | void addPreEmitPass2() override; |
435 | // GlobalISEL |
436 | bool addIRTranslator() override; |
437 | bool addLegalizeMachineIR() override; |
438 | bool addRegBankSelect() override; |
439 | bool addGlobalInstructionSelect() override; |
440 | |
441 | ScheduleDAGInstrs * |
442 | createMachineScheduler(MachineSchedContext *C) const override { |
443 | return createPPCMachineScheduler(C); |
444 | } |
445 | ScheduleDAGInstrs * |
446 | createPostMachineScheduler(MachineSchedContext *C) const override { |
447 | return createPPCPostMachineScheduler(C); |
448 | } |
449 | }; |
450 | |
451 | } // end anonymous namespace |
452 | |
453 | TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { |
454 | return new PPCPassConfig(*this, PM); |
455 | } |
456 | |
457 | void PPCPassConfig::addIRPasses() { |
458 | if (TM->getOptLevel() != CodeGenOptLevel::None) |
459 | addPass(P: createPPCBoolRetToIntPass()); |
460 | addPass(P: createAtomicExpandLegacyPass()); |
461 | |
462 | // Lower generic MASSV routines to PowerPC subtarget-specific entries. |
463 | addPass(P: createPPCLowerMASSVEntriesPass()); |
464 | |
465 | // Generate PowerPC target-specific entries for scalar math functions |
466 | // that are available in IBM MASS (scalar) library. |
467 | if (TM->getOptLevel() == CodeGenOptLevel::Aggressive && |
468 | EnablePPCGenScalarMASSEntries) { |
469 | TM->Options.PPCGenScalarMASSEntries = EnablePPCGenScalarMASSEntries; |
470 | addPass(P: createPPCGenScalarMASSEntriesPass()); |
471 | } |
472 | |
473 | // If explicitly requested, add explicit data prefetch intrinsics. |
474 | if (EnablePrefetch.getNumOccurrences() > 0) |
475 | addPass(P: createLoopDataPrefetchPass()); |
476 | |
477 | if (TM->getOptLevel() >= CodeGenOptLevel::Default && EnableGEPOpt) { |
478 | // Call SeparateConstOffsetFromGEP pass to extract constants within indices |
479 | // and lower a GEP with multiple indices to either arithmetic operations or |
480 | // multiple GEPs with single index. |
481 | addPass(P: createSeparateConstOffsetFromGEPPass(LowerGEP: true)); |
482 | // Call EarlyCSE pass to find and remove subexpressions in the lowered |
483 | // result. |
484 | addPass(P: createEarlyCSEPass()); |
485 | // Do loop invariant code motion in case part of the lowered result is |
486 | // invariant. |
487 | addPass(P: createLICMPass()); |
488 | } |
489 | |
490 | TargetPassConfig::addIRPasses(); |
491 | } |
492 | |
493 | bool PPCPassConfig::addPreISel() { |
494 | if (MergeStringPool && getOptLevel() != CodeGenOptLevel::None) |
495 | addPass(P: createPPCMergeStringPoolPass()); |
496 | |
497 | if (!DisableInstrFormPrep && getOptLevel() != CodeGenOptLevel::None) |
498 | addPass(P: createPPCLoopInstrFormPrepPass(TM&: getPPCTargetMachine())); |
499 | |
500 | if (!DisableCTRLoops && getOptLevel() != CodeGenOptLevel::None) |
501 | addPass(P: createHardwareLoopsLegacyPass()); |
502 | |
503 | return false; |
504 | } |
505 | |
506 | bool PPCPassConfig::addILPOpts() { |
507 | addPass(PassID: &EarlyIfConverterID); |
508 | |
509 | if (EnableMachineCombinerPass) |
510 | addPass(PassID: &MachineCombinerID); |
511 | |
512 | return true; |
513 | } |
514 | |
515 | bool PPCPassConfig::addInstSelector() { |
516 | // Install an instruction selector. |
517 | addPass(P: createPPCISelDag(TM&: getPPCTargetMachine(), OL: getOptLevel())); |
518 | |
519 | #ifndef NDEBUG |
520 | if (!DisableCTRLoops && getOptLevel() != CodeGenOptLevel::None) |
521 | addPass(createPPCCTRLoopsVerify()); |
522 | #endif |
523 | |
524 | addPass(P: createPPCVSXCopyPass()); |
525 | return false; |
526 | } |
527 | |
528 | void PPCPassConfig::addMachineSSAOptimization() { |
529 | // Run CTR loops pass before any cfg modification pass to prevent the |
530 | // canonical form of hardware loop from being destroied. |
531 | if (!DisableCTRLoops && getOptLevel() != CodeGenOptLevel::None) |
532 | addPass(P: createPPCCTRLoopsPass()); |
533 | |
534 | // PPCBranchCoalescingPass need to be done before machine sinking |
535 | // since it merges empty blocks. |
536 | if (EnableBranchCoalescing && getOptLevel() != CodeGenOptLevel::None) |
537 | addPass(P: createPPCBranchCoalescingPass()); |
538 | TargetPassConfig::addMachineSSAOptimization(); |
539 | // For little endian, remove where possible the vector swap instructions |
540 | // introduced at code generation to normalize vector element order. |
541 | if (TM->getTargetTriple().getArch() == Triple::ppc64le && |
542 | !DisableVSXSwapRemoval) |
543 | addPass(P: createPPCVSXSwapRemovalPass()); |
544 | // Reduce the number of cr-logical ops. |
545 | if (ReduceCRLogical && getOptLevel() != CodeGenOptLevel::None) |
546 | addPass(P: createPPCReduceCRLogicalsPass()); |
547 | // Target-specific peephole cleanups performed after instruction |
548 | // selection. |
549 | if (!DisableMIPeephole) { |
550 | addPass(P: createPPCMIPeepholePass()); |
551 | addPass(PassID: &DeadMachineInstructionElimID); |
552 | } |
553 | } |
554 | |
555 | void PPCPassConfig::addPreRegAlloc() { |
556 | if (getOptLevel() != CodeGenOptLevel::None) { |
557 | initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); |
558 | insertPass(TargetPassID: VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID, |
559 | InsertedPassID: &PPCVSXFMAMutateID); |
560 | } |
561 | |
562 | // FIXME: We probably don't need to run these for -fPIE. |
563 | if (getPPCTargetMachine().isPositionIndependent()) { |
564 | // FIXME: LiveVariables should not be necessary here! |
565 | // PPCTLSDynamicCallPass uses LiveIntervals which previously dependent on |
566 | // LiveVariables. This (unnecessary) dependency has been removed now, |
567 | // however a stage-2 clang build fails without LiveVariables computed here. |
568 | addPass(PassID: &LiveVariablesID); |
569 | addPass(P: createPPCTLSDynamicCallPass()); |
570 | } |
571 | if (EnableExtraTOCRegDeps) |
572 | addPass(P: createPPCTOCRegDepsPass()); |
573 | |
574 | if (getOptLevel() != CodeGenOptLevel::None) |
575 | addPass(PassID: &MachinePipelinerID); |
576 | } |
577 | |
578 | void PPCPassConfig::addPreSched2() { |
579 | if (getOptLevel() != CodeGenOptLevel::None) |
580 | addPass(PassID: &IfConverterID); |
581 | } |
582 | |
583 | void PPCPassConfig::addPreEmitPass() { |
584 | addPass(P: createPPCPreEmitPeepholePass()); |
585 | addPass(P: createPPCExpandISELPass()); |
586 | |
587 | if (getOptLevel() != CodeGenOptLevel::None) |
588 | addPass(P: createPPCEarlyReturnPass()); |
589 | } |
590 | |
591 | void PPCPassConfig::addPreEmitPass2() { |
592 | // Schedule the expansion of AMOs at the last possible moment, avoiding the |
593 | // possibility for other passes to break the requirements for forward |
594 | // progress in the LL/SC block. |
595 | addPass(P: createPPCExpandAtomicPseudoPass()); |
596 | // Must run branch selection immediately preceding the asm printer. |
597 | addPass(P: createPPCBranchSelectionPass()); |
598 | } |
599 | |
600 | TargetTransformInfo |
601 | PPCTargetMachine::getTargetTransformInfo(const Function &F) const { |
602 | return TargetTransformInfo(PPCTTIImpl(this, F)); |
603 | } |
604 | |
605 | bool PPCTargetMachine::isLittleEndian() const { |
606 | assert(Endianness != Endian::NOT_DETECTED && |
607 | "Unable to determine endianness" ); |
608 | return Endianness == Endian::LITTLE; |
609 | } |
610 | |
611 | MachineFunctionInfo *PPCTargetMachine::createMachineFunctionInfo( |
612 | BumpPtrAllocator &Allocator, const Function &F, |
613 | const TargetSubtargetInfo *STI) const { |
614 | return PPCFunctionInfo::create<PPCFunctionInfo>(Allocator, F, STI); |
615 | } |
616 | |
617 | static MachineSchedRegistry |
618 | PPCPreRASchedRegistry("ppc-prera" , |
619 | "Run PowerPC PreRA specific scheduler" , |
620 | createPPCMachineScheduler); |
621 | |
622 | static MachineSchedRegistry |
623 | PPCPostRASchedRegistry("ppc-postra" , |
624 | "Run PowerPC PostRA specific scheduler" , |
625 | createPPCPostMachineScheduler); |
626 | |
627 | // Global ISEL |
628 | bool PPCPassConfig::addIRTranslator() { |
629 | addPass(P: new IRTranslator()); |
630 | return false; |
631 | } |
632 | |
633 | bool PPCPassConfig::addLegalizeMachineIR() { |
634 | addPass(P: new Legalizer()); |
635 | return false; |
636 | } |
637 | |
638 | bool PPCPassConfig::addRegBankSelect() { |
639 | addPass(P: new RegBankSelect()); |
640 | return false; |
641 | } |
642 | |
643 | bool PPCPassConfig::addGlobalInstructionSelect() { |
644 | addPass(P: new InstructionSelect(getOptLevel())); |
645 | return false; |
646 | } |
647 | |