1 | //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Top-level implementation for the NVPTX target. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "NVPTXTargetMachine.h" |
14 | #include "NVPTX.h" |
15 | #include "NVPTXAliasAnalysis.h" |
16 | #include "NVPTXAllocaHoisting.h" |
17 | #include "NVPTXAtomicLower.h" |
18 | #include "NVPTXCtorDtorLowering.h" |
19 | #include "NVPTXLowerAggrCopies.h" |
20 | #include "NVPTXMachineFunctionInfo.h" |
21 | #include "NVPTXTargetObjectFile.h" |
22 | #include "NVPTXTargetTransformInfo.h" |
23 | #include "TargetInfo/NVPTXTargetInfo.h" |
24 | #include "llvm/Analysis/KernelInfo.h" |
25 | #include "llvm/Analysis/TargetTransformInfo.h" |
26 | #include "llvm/CodeGen/Passes.h" |
27 | #include "llvm/CodeGen/TargetPassConfig.h" |
28 | #include "llvm/IR/IntrinsicsNVPTX.h" |
29 | #include "llvm/MC/TargetRegistry.h" |
30 | #include "llvm/Pass.h" |
31 | #include "llvm/Passes/PassBuilder.h" |
32 | #include "llvm/Support/CommandLine.h" |
33 | #include "llvm/Support/Compiler.h" |
34 | #include "llvm/Target/TargetMachine.h" |
35 | #include "llvm/Target/TargetOptions.h" |
36 | #include "llvm/TargetParser/Triple.h" |
37 | #include "llvm/Transforms/IPO/ExpandVariadics.h" |
38 | #include "llvm/Transforms/Scalar.h" |
39 | #include "llvm/Transforms/Scalar/GVN.h" |
40 | #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h" |
41 | #include <cassert> |
42 | #include <optional> |
43 | #include <string> |
44 | |
45 | using namespace llvm; |
46 | |
47 | // LSV is still relatively new; this switch lets us turn it off in case we |
48 | // encounter (or suspect) a bug. |
49 | static cl::opt<bool> |
50 | DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer" , |
51 | cl::desc("Disable load/store vectorizer" ), |
52 | cl::init(Val: false), cl::Hidden); |
53 | |
54 | // TODO: Remove this flag when we are confident with no regressions. |
55 | static cl::opt<bool> DisableRequireStructuredCFG( |
56 | "disable-nvptx-require-structured-cfg" , |
57 | cl::desc("Transitional flag to turn off NVPTX's requirement on preserving " |
58 | "structured CFG. The requirement should be disabled only when " |
59 | "unexpected regressions happen." ), |
60 | cl::init(Val: false), cl::Hidden); |
61 | |
62 | static cl::opt<bool> UseShortPointersOpt( |
63 | "nvptx-short-ptr" , |
64 | cl::desc( |
65 | "Use 32-bit pointers for accessing const/local/shared address spaces." ), |
66 | cl::init(Val: false), cl::Hidden); |
67 | |
68 | // byval arguments in NVPTX are special. We're only allowed to read from them |
69 | // using a special instruction, and if we ever need to write to them or take an |
70 | // address, we must make a local copy and use it, instead. |
71 | // |
72 | // The problem is that local copies are very expensive, and we create them very |
73 | // late in the compilation pipeline, so LLVM does not have much of a chance to |
74 | // eliminate them, if they turn out to be unnecessary. |
75 | // |
76 | // One way around that is to create such copies early on, and let them percolate |
77 | // through the optimizations. The copying itself will never trigger creation of |
78 | // another copy later on, as the reads are allowed. If LLVM can eliminate it, |
79 | // it's a win. It the full optimization pipeline can't remove the copy, that's |
80 | // as good as it gets in terms of the effort we could've done, and it's |
81 | // certainly a much better effort than what we do now. |
82 | // |
83 | // This early injection of the copies has potential to create undesireable |
84 | // side-effects, so it's disabled by default, for now, until it sees more |
85 | // testing. |
86 | static cl::opt<bool> EarlyByValArgsCopy( |
87 | "nvptx-early-byval-copy" , |
88 | cl::desc("Create a copy of byval function arguments early." ), |
89 | cl::init(Val: false), cl::Hidden); |
90 | |
91 | extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() { |
92 | // Register the target. |
93 | RegisterTargetMachine<NVPTXTargetMachine32> X(getTheNVPTXTarget32()); |
94 | RegisterTargetMachine<NVPTXTargetMachine64> Y(getTheNVPTXTarget64()); |
95 | |
96 | PassRegistry &PR = *PassRegistry::getPassRegistry(); |
97 | // FIXME: This pass is really intended to be invoked during IR optimization, |
98 | // but it's very NVPTX-specific. |
99 | initializeNVVMReflectLegacyPassPass(PR); |
100 | initializeNVVMIntrRangePass(PR); |
101 | initializeGenericToNVVMLegacyPassPass(PR); |
102 | initializeNVPTXAllocaHoistingPass(PR); |
103 | initializeNVPTXAsmPrinterPass(PR); |
104 | initializeNVPTXAssignValidGlobalNamesPass(PR); |
105 | initializeNVPTXAtomicLowerPass(PR); |
106 | initializeNVPTXLowerArgsLegacyPassPass(PR); |
107 | initializeNVPTXLowerAllocaPass(PR); |
108 | initializeNVPTXLowerUnreachablePass(PR); |
109 | initializeNVPTXCtorDtorLoweringLegacyPass(PR); |
110 | initializeNVPTXLowerAggrCopiesPass(PR); |
111 | initializeNVPTXProxyRegErasurePass(PR); |
112 | initializeNVPTXForwardParamsPassPass(PR); |
113 | initializeNVPTXDAGToDAGISelLegacyPass(PR); |
114 | initializeNVPTXAAWrapperPassPass(PR); |
115 | initializeNVPTXExternalAAWrapperPass(PR); |
116 | initializeNVPTXPeepholePass(PR); |
117 | initializeNVPTXTagInvariantLoadLegacyPassPass(PR); |
118 | initializeNVPTXPrologEpilogPassPass(PR); |
119 | } |
120 | |
121 | static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) { |
122 | std::string Ret = "e" ; |
123 | |
124 | // Tensor Memory (addrspace:6) is always 32-bits. |
125 | // Distributed Shared Memory (addrspace:7) follows shared memory |
126 | // (addrspace:3). |
127 | if (!is64Bit) |
128 | Ret += "-p:32:32-p6:32:32-p7:32:32" ; |
129 | else if (UseShortPointers) { |
130 | Ret += "-p3:32:32-p4:32:32-p5:32:32-p6:32:32-p7:32:32" ; |
131 | } else |
132 | Ret += "-p6:32:32" ; |
133 | |
134 | Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64" ; |
135 | |
136 | return Ret; |
137 | } |
138 | |
139 | NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, |
140 | StringRef CPU, StringRef FS, |
141 | const TargetOptions &Options, |
142 | std::optional<Reloc::Model> RM, |
143 | std::optional<CodeModel::Model> CM, |
144 | CodeGenOptLevel OL, bool is64bit) |
145 | // The pic relocation model is used regardless of what the client has |
146 | // specified, as it is the only relocation model currently supported. |
147 | : CodeGenTargetMachineImpl(T, |
148 | computeDataLayout(is64Bit: is64bit, UseShortPointers: UseShortPointersOpt), |
149 | TT, CPU, FS, Options, Reloc::PIC_, |
150 | getEffectiveCodeModel(CM, Default: CodeModel::Small), OL), |
151 | is64bit(is64bit), TLOF(std::make_unique<NVPTXTargetObjectFile>()), |
152 | Subtarget(TT, std::string(CPU), std::string(FS), *this), |
153 | StrPool(StrAlloc) { |
154 | if (TT.getOS() == Triple::NVCL) |
155 | drvInterface = NVPTX::NVCL; |
156 | else |
157 | drvInterface = NVPTX::CUDA; |
158 | if (!DisableRequireStructuredCFG) |
159 | setRequiresStructuredCFG(true); |
160 | initAsmInfo(); |
161 | } |
162 | |
163 | NVPTXTargetMachine::~NVPTXTargetMachine() = default; |
164 | |
165 | void NVPTXTargetMachine32::anchor() {} |
166 | |
167 | NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT, |
168 | StringRef CPU, StringRef FS, |
169 | const TargetOptions &Options, |
170 | std::optional<Reloc::Model> RM, |
171 | std::optional<CodeModel::Model> CM, |
172 | CodeGenOptLevel OL, bool JIT) |
173 | : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} |
174 | |
175 | void NVPTXTargetMachine64::anchor() {} |
176 | |
177 | NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT, |
178 | StringRef CPU, StringRef FS, |
179 | const TargetOptions &Options, |
180 | std::optional<Reloc::Model> RM, |
181 | std::optional<CodeModel::Model> CM, |
182 | CodeGenOptLevel OL, bool JIT) |
183 | : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} |
184 | |
185 | namespace { |
186 | |
187 | class NVPTXPassConfig : public TargetPassConfig { |
188 | public: |
189 | NVPTXPassConfig(NVPTXTargetMachine &TM, PassManagerBase &PM) |
190 | : TargetPassConfig(TM, PM) {} |
191 | |
192 | NVPTXTargetMachine &getNVPTXTargetMachine() const { |
193 | return getTM<NVPTXTargetMachine>(); |
194 | } |
195 | |
196 | void addIRPasses() override; |
197 | bool addInstSelector() override; |
198 | void addPreRegAlloc() override; |
199 | void addPostRegAlloc() override; |
200 | void addMachineSSAOptimization() override; |
201 | |
202 | FunctionPass *createTargetRegisterAllocator(bool) override; |
203 | void addFastRegAlloc() override; |
204 | void addOptimizedRegAlloc() override; |
205 | |
206 | bool addRegAssignAndRewriteFast() override { |
207 | llvm_unreachable("should not be used" ); |
208 | } |
209 | |
210 | bool addRegAssignAndRewriteOptimized() override { |
211 | llvm_unreachable("should not be used" ); |
212 | } |
213 | |
214 | private: |
215 | // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This |
216 | // function is only called in opt mode. |
217 | void addEarlyCSEOrGVNPass(); |
218 | |
219 | // Add passes that propagate special memory spaces. |
220 | void addAddressSpaceInferencePasses(); |
221 | |
222 | // Add passes that perform straight-line scalar optimizations. |
223 | void addStraightLineScalarOptimizationPasses(); |
224 | }; |
225 | |
226 | } // end anonymous namespace |
227 | |
228 | TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { |
229 | return new NVPTXPassConfig(*this, PM); |
230 | } |
231 | |
232 | MachineFunctionInfo *NVPTXTargetMachine::createMachineFunctionInfo( |
233 | BumpPtrAllocator &Allocator, const Function &F, |
234 | const TargetSubtargetInfo *STI) const { |
235 | return NVPTXMachineFunctionInfo::create<NVPTXMachineFunctionInfo>(Allocator, |
236 | F, STI); |
237 | } |
238 | |
239 | void NVPTXTargetMachine::registerEarlyDefaultAliasAnalyses(AAManager &AAM) { |
240 | AAM.registerFunctionAnalysis<NVPTXAA>(); |
241 | } |
242 | |
243 | void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { |
244 | #define GET_PASS_REGISTRY "NVPTXPassRegistry.def" |
245 | #include "llvm/Passes/TargetPassRegistry.inc" |
246 | |
247 | PB.registerPipelineStartEPCallback( |
248 | C: [this](ModulePassManager &PM, OptimizationLevel Level) { |
249 | // We do not want to fold out calls to nvvm.reflect early if the user |
250 | // has not provided a target architecture just yet. |
251 | if (Subtarget.hasTargetName()) |
252 | PM.addPass(Pass: NVVMReflectPass(Subtarget.getSmVersion())); |
253 | |
254 | FunctionPassManager FPM; |
255 | // Note: NVVMIntrRangePass was causing numerical discrepancies at one |
256 | // point, if issues crop up, consider disabling. |
257 | FPM.addPass(Pass: NVVMIntrRangePass()); |
258 | if (EarlyByValArgsCopy) |
259 | FPM.addPass(Pass: NVPTXCopyByValArgsPass()); |
260 | PM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM))); |
261 | }); |
262 | |
263 | if (!NoKernelInfoEndLTO) { |
264 | PB.registerFullLinkTimeOptimizationLastEPCallback( |
265 | C: [this](ModulePassManager &PM, OptimizationLevel Level) { |
266 | FunctionPassManager FPM; |
267 | FPM.addPass(Pass: KernelInfoPrinter(this)); |
268 | PM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM))); |
269 | }); |
270 | } |
271 | } |
272 | |
273 | TargetTransformInfo |
274 | NVPTXTargetMachine::getTargetTransformInfo(const Function &F) const { |
275 | return TargetTransformInfo(std::make_unique<NVPTXTTIImpl>(args: this, args: F)); |
276 | } |
277 | |
278 | std::pair<const Value *, unsigned> |
279 | NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const { |
280 | if (auto *II = dyn_cast<IntrinsicInst>(Val: V)) { |
281 | switch (II->getIntrinsicID()) { |
282 | case Intrinsic::nvvm_isspacep_const: |
283 | return std::make_pair(x: II->getArgOperand(i: 0), y: llvm::ADDRESS_SPACE_CONST); |
284 | case Intrinsic::nvvm_isspacep_global: |
285 | return std::make_pair(x: II->getArgOperand(i: 0), y: llvm::ADDRESS_SPACE_GLOBAL); |
286 | case Intrinsic::nvvm_isspacep_local: |
287 | return std::make_pair(x: II->getArgOperand(i: 0), y: llvm::ADDRESS_SPACE_LOCAL); |
288 | case Intrinsic::nvvm_isspacep_shared: |
289 | return std::make_pair(x: II->getArgOperand(i: 0), y: llvm::ADDRESS_SPACE_SHARED); |
290 | case Intrinsic::nvvm_isspacep_shared_cluster: |
291 | return std::make_pair(x: II->getArgOperand(i: 0), |
292 | y: llvm::ADDRESS_SPACE_SHARED_CLUSTER); |
293 | default: |
294 | break; |
295 | } |
296 | } |
297 | return std::make_pair(x: nullptr, y: -1); |
298 | } |
299 | |
300 | void NVPTXPassConfig::addEarlyCSEOrGVNPass() { |
301 | if (getOptLevel() == CodeGenOptLevel::Aggressive) |
302 | addPass(P: createGVNPass()); |
303 | else |
304 | addPass(P: createEarlyCSEPass()); |
305 | } |
306 | |
307 | void NVPTXPassConfig::addAddressSpaceInferencePasses() { |
308 | // NVPTXLowerArgs emits alloca for byval parameters which can often |
309 | // be eliminated by SROA. |
310 | addPass(P: createSROAPass()); |
311 | addPass(P: createNVPTXLowerAllocaPass()); |
312 | // TODO: Consider running InferAddressSpaces during opt, earlier in the |
313 | // compilation flow. |
314 | addPass(P: createInferAddressSpacesPass()); |
315 | addPass(P: createNVPTXAtomicLowerPass()); |
316 | } |
317 | |
318 | void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() { |
319 | addPass(P: createSeparateConstOffsetFromGEPPass()); |
320 | addPass(P: createSpeculativeExecutionPass()); |
321 | // ReassociateGEPs exposes more opportunites for SLSR. See |
322 | // the example in reassociate-geps-and-slsr.ll. |
323 | addPass(P: createStraightLineStrengthReducePass()); |
324 | // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or |
325 | // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE |
326 | // for some of our benchmarks. |
327 | addEarlyCSEOrGVNPass(); |
328 | // Run NaryReassociate after EarlyCSE/GVN to be more effective. |
329 | addPass(P: createNaryReassociatePass()); |
330 | // NaryReassociate on GEPs creates redundant common expressions, so run |
331 | // EarlyCSE after it. |
332 | addPass(P: createEarlyCSEPass()); |
333 | } |
334 | |
335 | void NVPTXPassConfig::addIRPasses() { |
336 | // The following passes are known to not play well with virtual regs hanging |
337 | // around after register allocation (which in our case, is *all* registers). |
338 | // We explicitly disable them here. We do, however, need some functionality |
339 | // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the |
340 | // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). |
341 | disablePass(PassID: &PrologEpilogCodeInserterID); |
342 | disablePass(PassID: &MachineLateInstrsCleanupID); |
343 | disablePass(PassID: &MachineCopyPropagationID); |
344 | disablePass(PassID: &TailDuplicateLegacyID); |
345 | disablePass(PassID: &StackMapLivenessID); |
346 | disablePass(PassID: &PostRAMachineSinkingID); |
347 | disablePass(PassID: &PostRASchedulerID); |
348 | disablePass(PassID: &FuncletLayoutID); |
349 | disablePass(PassID: &PatchableFunctionID); |
350 | disablePass(PassID: &ShrinkWrapID); |
351 | disablePass(PassID: &RemoveLoadsIntoFakeUsesID); |
352 | |
353 | addPass(P: createNVPTXAAWrapperPass()); |
354 | addPass(P: createNVPTXExternalAAWrapperPass()); |
355 | |
356 | // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running |
357 | // it here does nothing. But since we need it for correctness when lowering |
358 | // to NVPTX, run it here too, in case whoever built our pass pipeline didn't |
359 | // call addEarlyAsPossiblePasses. |
360 | const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); |
361 | addPass(P: createNVVMReflectPass(SmVersion: ST.getSmVersion())); |
362 | |
363 | if (getOptLevel() != CodeGenOptLevel::None) |
364 | addPass(P: createNVPTXImageOptimizerPass()); |
365 | addPass(P: createNVPTXAssignValidGlobalNamesPass()); |
366 | addPass(P: createGenericToNVVMLegacyPass()); |
367 | |
368 | // NVPTXLowerArgs is required for correctness and should be run right |
369 | // before the address space inference passes. |
370 | addPass(P: createNVPTXLowerArgsPass()); |
371 | if (getOptLevel() != CodeGenOptLevel::None) { |
372 | addAddressSpaceInferencePasses(); |
373 | addStraightLineScalarOptimizationPasses(); |
374 | } |
375 | |
376 | addPass(P: createAtomicExpandLegacyPass()); |
377 | addPass(P: createExpandVariadicsPass(ExpandVariadicsMode::Lowering)); |
378 | addPass(P: createNVPTXCtorDtorLoweringLegacyPass()); |
379 | |
380 | // === LSR and other generic IR passes === |
381 | TargetPassConfig::addIRPasses(); |
382 | // EarlyCSE is not always strong enough to clean up what LSR produces. For |
383 | // example, GVN can combine |
384 | // |
385 | // %0 = add %a, %b |
386 | // %1 = add %b, %a |
387 | // |
388 | // and |
389 | // |
390 | // %0 = shl nsw %a, 2 |
391 | // %1 = shl %a, 2 |
392 | // |
393 | // but EarlyCSE can do neither of them. |
394 | if (getOptLevel() != CodeGenOptLevel::None) { |
395 | addEarlyCSEOrGVNPass(); |
396 | if (!DisableLoadStoreVectorizer) |
397 | addPass(P: createLoadStoreVectorizerPass()); |
398 | addPass(P: createSROAPass()); |
399 | addPass(P: createNVPTXTagInvariantLoadsPass()); |
400 | } |
401 | |
402 | if (ST.hasPTXASUnreachableBug()) { |
403 | // Run LowerUnreachable to WAR a ptxas bug. See the commit description of |
404 | // 1ee4d880e8760256c606fe55b7af85a4f70d006d for more details. |
405 | const auto &Options = getNVPTXTargetMachine().Options; |
406 | addPass(P: createNVPTXLowerUnreachablePass(TrapUnreachable: Options.TrapUnreachable, |
407 | NoTrapAfterNoreturn: Options.NoTrapAfterNoreturn)); |
408 | } |
409 | } |
410 | |
411 | bool NVPTXPassConfig::addInstSelector() { |
412 | addPass(P: createLowerAggrCopies()); |
413 | addPass(P: createAllocaHoisting()); |
414 | addPass(P: createNVPTXISelDag(TM&: getNVPTXTargetMachine(), OptLevel: getOptLevel())); |
415 | addPass(P: createNVPTXReplaceImageHandlesPass()); |
416 | |
417 | return false; |
418 | } |
419 | |
420 | void NVPTXPassConfig::addPreRegAlloc() { |
421 | addPass(P: createNVPTXForwardParamsPass()); |
422 | // Remove Proxy Register pseudo instructions used to keep `callseq_end` alive. |
423 | addPass(P: createNVPTXProxyRegErasurePass()); |
424 | } |
425 | |
426 | void NVPTXPassConfig::addPostRegAlloc() { |
427 | addPass(P: createNVPTXPrologEpilogPass()); |
428 | if (getOptLevel() != CodeGenOptLevel::None) { |
429 | // NVPTXPrologEpilogPass calculates frame object offset and replace frame |
430 | // index with VRFrame register. NVPTXPeephole need to be run after that and |
431 | // will replace VRFrame with VRFrameLocal when possible. |
432 | addPass(P: createNVPTXPeephole()); |
433 | } |
434 | } |
435 | |
436 | FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { |
437 | return nullptr; // No reg alloc |
438 | } |
439 | |
440 | void NVPTXPassConfig::addFastRegAlloc() { |
441 | addPass(PassID: &PHIEliminationID); |
442 | addPass(PassID: &TwoAddressInstructionPassID); |
443 | } |
444 | |
445 | void NVPTXPassConfig::addOptimizedRegAlloc() { |
446 | addPass(PassID: &ProcessImplicitDefsID); |
447 | addPass(PassID: &LiveVariablesID); |
448 | addPass(PassID: &MachineLoopInfoID); |
449 | addPass(PassID: &PHIEliminationID); |
450 | |
451 | addPass(PassID: &TwoAddressInstructionPassID); |
452 | addPass(PassID: &RegisterCoalescerID); |
453 | |
454 | // PreRA instruction scheduling. |
455 | if (addPass(PassID: &MachineSchedulerID)) |
456 | printAndVerify(Banner: "After Machine Scheduling" ); |
457 | |
458 | addPass(PassID: &StackSlotColoringID); |
459 | |
460 | // FIXME: Needs physical registers |
461 | // addPass(&MachineLICMID); |
462 | |
463 | printAndVerify(Banner: "After StackSlotColoring" ); |
464 | } |
465 | |
466 | void NVPTXPassConfig::addMachineSSAOptimization() { |
467 | // Pre-ra tail duplication. |
468 | if (addPass(PassID: &EarlyTailDuplicateLegacyID)) |
469 | printAndVerify(Banner: "After Pre-RegAlloc TailDuplicate" ); |
470 | |
471 | // Optimize PHIs before DCE: removing dead PHI cycles may make more |
472 | // instructions dead. |
473 | addPass(PassID: &OptimizePHIsLegacyID); |
474 | |
475 | // This pass merges large allocas. StackSlotColoring is a different pass |
476 | // which merges spill slots. |
477 | addPass(PassID: &StackColoringLegacyID); |
478 | |
479 | // If the target requests it, assign local variables to stack slots relative |
480 | // to one another and simplify frame index references where possible. |
481 | addPass(PassID: &LocalStackSlotAllocationID); |
482 | |
483 | // With optimization, dead code should already be eliminated. However |
484 | // there is one known exception: lowered code for arguments that are only |
485 | // used by tail calls, where the tail calls reuse the incoming stack |
486 | // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). |
487 | addPass(PassID: &DeadMachineInstructionElimID); |
488 | printAndVerify(Banner: "After codegen DCE pass" ); |
489 | |
490 | // Allow targets to insert passes that improve instruction level parallelism, |
491 | // like if-conversion. Such passes will typically need dominator trees and |
492 | // loop info, just like LICM and CSE below. |
493 | if (addILPOpts()) |
494 | printAndVerify(Banner: "After ILP optimizations" ); |
495 | |
496 | addPass(PassID: &EarlyMachineLICMID); |
497 | addPass(PassID: &MachineCSELegacyID); |
498 | |
499 | addPass(PassID: &MachineSinkingLegacyID); |
500 | printAndVerify(Banner: "After Machine LICM, CSE and Sinking passes" ); |
501 | |
502 | addPass(PassID: &PeepholeOptimizerLegacyID); |
503 | printAndVerify(Banner: "After codegen peephole optimization pass" ); |
504 | } |
505 | |