1 | //===- Construction of pass pipelines -------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// |
10 | /// This file provides the implementation of the PassBuilder based on our |
11 | /// static pass registry as well as related functionality. It also provides |
12 | /// helpers to aid in analyzing, debugging, and testing passes and pass |
13 | /// pipelines. |
14 | /// |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #include "llvm/ADT/Statistic.h" |
18 | #include "llvm/Analysis/AliasAnalysis.h" |
19 | #include "llvm/Analysis/BasicAliasAnalysis.h" |
20 | #include "llvm/Analysis/CGSCCPassManager.h" |
21 | #include "llvm/Analysis/GlobalsModRef.h" |
22 | #include "llvm/Analysis/InlineAdvisor.h" |
23 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
24 | #include "llvm/Analysis/ScopedNoAliasAA.h" |
25 | #include "llvm/Analysis/TypeBasedAliasAnalysis.h" |
26 | #include "llvm/IR/PassManager.h" |
27 | #include "llvm/Passes/OptimizationLevel.h" |
28 | #include "llvm/Passes/PassBuilder.h" |
29 | #include "llvm/Support/CommandLine.h" |
30 | #include "llvm/Support/ErrorHandling.h" |
31 | #include "llvm/Support/PGOOptions.h" |
32 | #include "llvm/Support/VirtualFileSystem.h" |
33 | #include "llvm/Target/TargetMachine.h" |
34 | #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" |
35 | #include "llvm/Transforms/Coroutines/CoroCleanup.h" |
36 | #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h" |
37 | #include "llvm/Transforms/Coroutines/CoroEarly.h" |
38 | #include "llvm/Transforms/Coroutines/CoroElide.h" |
39 | #include "llvm/Transforms/Coroutines/CoroSplit.h" |
40 | #include "llvm/Transforms/HipStdPar/HipStdPar.h" |
41 | #include "llvm/Transforms/IPO/AlwaysInliner.h" |
42 | #include "llvm/Transforms/IPO/Annotation2Metadata.h" |
43 | #include "llvm/Transforms/IPO/ArgumentPromotion.h" |
44 | #include "llvm/Transforms/IPO/Attributor.h" |
45 | #include "llvm/Transforms/IPO/CalledValuePropagation.h" |
46 | #include "llvm/Transforms/IPO/ConstantMerge.h" |
47 | #include "llvm/Transforms/IPO/CrossDSOCFI.h" |
48 | #include "llvm/Transforms/IPO/DeadArgumentElimination.h" |
49 | #include "llvm/Transforms/IPO/ElimAvailExtern.h" |
50 | #include "llvm/Transforms/IPO/EmbedBitcodePass.h" |
51 | #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" |
52 | #include "llvm/Transforms/IPO/FunctionAttrs.h" |
53 | #include "llvm/Transforms/IPO/GlobalDCE.h" |
54 | #include "llvm/Transforms/IPO/GlobalOpt.h" |
55 | #include "llvm/Transforms/IPO/GlobalSplit.h" |
56 | #include "llvm/Transforms/IPO/HotColdSplitting.h" |
57 | #include "llvm/Transforms/IPO/IROutliner.h" |
58 | #include "llvm/Transforms/IPO/InferFunctionAttrs.h" |
59 | #include "llvm/Transforms/IPO/Inliner.h" |
60 | #include "llvm/Transforms/IPO/LowerTypeTests.h" |
61 | #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" |
62 | #include "llvm/Transforms/IPO/MergeFunctions.h" |
63 | #include "llvm/Transforms/IPO/ModuleInliner.h" |
64 | #include "llvm/Transforms/IPO/OpenMPOpt.h" |
65 | #include "llvm/Transforms/IPO/PartialInlining.h" |
66 | #include "llvm/Transforms/IPO/SCCP.h" |
67 | #include "llvm/Transforms/IPO/SampleProfile.h" |
68 | #include "llvm/Transforms/IPO/SampleProfileProbe.h" |
69 | #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" |
70 | #include "llvm/Transforms/IPO/WholeProgramDevirt.h" |
71 | #include "llvm/Transforms/InstCombine/InstCombine.h" |
72 | #include "llvm/Transforms/Instrumentation/CGProfile.h" |
73 | #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" |
74 | #include "llvm/Transforms/Instrumentation/InstrOrderFile.h" |
75 | #include "llvm/Transforms/Instrumentation/InstrProfiling.h" |
76 | #include "llvm/Transforms/Instrumentation/MemProfiler.h" |
77 | #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" |
78 | #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h" |
79 | #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" |
80 | #include "llvm/Transforms/Scalar/ADCE.h" |
81 | #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" |
82 | #include "llvm/Transforms/Scalar/AnnotationRemarks.h" |
83 | #include "llvm/Transforms/Scalar/BDCE.h" |
84 | #include "llvm/Transforms/Scalar/CallSiteSplitting.h" |
85 | #include "llvm/Transforms/Scalar/ConstraintElimination.h" |
86 | #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" |
87 | #include "llvm/Transforms/Scalar/DFAJumpThreading.h" |
88 | #include "llvm/Transforms/Scalar/DeadStoreElimination.h" |
89 | #include "llvm/Transforms/Scalar/DivRemPairs.h" |
90 | #include "llvm/Transforms/Scalar/EarlyCSE.h" |
91 | #include "llvm/Transforms/Scalar/Float2Int.h" |
92 | #include "llvm/Transforms/Scalar/GVN.h" |
93 | #include "llvm/Transforms/Scalar/IndVarSimplify.h" |
94 | #include "llvm/Transforms/Scalar/InferAlignment.h" |
95 | #include "llvm/Transforms/Scalar/InstSimplifyPass.h" |
96 | #include "llvm/Transforms/Scalar/JumpTableToSwitch.h" |
97 | #include "llvm/Transforms/Scalar/JumpThreading.h" |
98 | #include "llvm/Transforms/Scalar/LICM.h" |
99 | #include "llvm/Transforms/Scalar/LoopDeletion.h" |
100 | #include "llvm/Transforms/Scalar/LoopDistribute.h" |
101 | #include "llvm/Transforms/Scalar/LoopFlatten.h" |
102 | #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" |
103 | #include "llvm/Transforms/Scalar/LoopInstSimplify.h" |
104 | #include "llvm/Transforms/Scalar/LoopInterchange.h" |
105 | #include "llvm/Transforms/Scalar/LoopLoadElimination.h" |
106 | #include "llvm/Transforms/Scalar/LoopPassManager.h" |
107 | #include "llvm/Transforms/Scalar/LoopRotation.h" |
108 | #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" |
109 | #include "llvm/Transforms/Scalar/LoopSink.h" |
110 | #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" |
111 | #include "llvm/Transforms/Scalar/LoopUnrollPass.h" |
112 | #include "llvm/Transforms/Scalar/LoopVersioningLICM.h" |
113 | #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" |
114 | #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" |
115 | #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" |
116 | #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" |
117 | #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" |
118 | #include "llvm/Transforms/Scalar/NewGVN.h" |
119 | #include "llvm/Transforms/Scalar/Reassociate.h" |
120 | #include "llvm/Transforms/Scalar/SCCP.h" |
121 | #include "llvm/Transforms/Scalar/SROA.h" |
122 | #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" |
123 | #include "llvm/Transforms/Scalar/SimplifyCFG.h" |
124 | #include "llvm/Transforms/Scalar/SpeculativeExecution.h" |
125 | #include "llvm/Transforms/Scalar/TailRecursionElimination.h" |
126 | #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" |
127 | #include "llvm/Transforms/Utils/AddDiscriminators.h" |
128 | #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" |
129 | #include "llvm/Transforms/Utils/CanonicalizeAliases.h" |
130 | #include "llvm/Transforms/Utils/CountVisits.h" |
131 | #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" |
132 | #include "llvm/Transforms/Utils/InjectTLIMappings.h" |
133 | #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" |
134 | #include "llvm/Transforms/Utils/Mem2Reg.h" |
135 | #include "llvm/Transforms/Utils/MoveAutoInit.h" |
136 | #include "llvm/Transforms/Utils/NameAnonGlobals.h" |
137 | #include "llvm/Transforms/Utils/RelLookupTableConverter.h" |
138 | #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" |
139 | #include "llvm/Transforms/Vectorize/LoopVectorize.h" |
140 | #include "llvm/Transforms/Vectorize/SLPVectorizer.h" |
141 | #include "llvm/Transforms/Vectorize/VectorCombine.h" |
142 | |
143 | using namespace llvm; |
144 | |
145 | static cl::opt<InliningAdvisorMode> UseInlineAdvisor( |
146 | "enable-ml-inliner" , cl::init(Val: InliningAdvisorMode::Default), cl::Hidden, |
147 | cl::desc("Enable ML policy for inliner. Currently trained for -Oz only" ), |
148 | cl::values(clEnumValN(InliningAdvisorMode::Default, "default" , |
149 | "Heuristics-based inliner version" ), |
150 | clEnumValN(InliningAdvisorMode::Development, "development" , |
151 | "Use development mode (runtime-loadable model)" ), |
152 | clEnumValN(InliningAdvisorMode::Release, "release" , |
153 | "Use release mode (AOT-compiled model)" ))); |
154 | |
155 | static cl::opt<bool> EnableSyntheticCounts( |
156 | "enable-npm-synthetic-counts" , cl::Hidden, |
157 | cl::desc("Run synthetic function entry count generation " |
158 | "pass" )); |
159 | |
160 | /// Flag to enable inline deferral during PGO. |
161 | static cl::opt<bool> |
162 | EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral" , cl::init(Val: true), |
163 | cl::Hidden, |
164 | cl::desc("Enable inline deferral during PGO" )); |
165 | |
166 | static cl::opt<bool> EnableModuleInliner("enable-module-inliner" , |
167 | cl::init(Val: false), cl::Hidden, |
168 | cl::desc("Enable module inliner" )); |
169 | |
170 | static cl::opt<bool> PerformMandatoryInliningsFirst( |
171 | "mandatory-inlining-first" , cl::init(Val: false), cl::Hidden, |
172 | cl::desc("Perform mandatory inlinings module-wide, before performing " |
173 | "inlining" )); |
174 | |
175 | static cl::opt<bool> EnableEagerlyInvalidateAnalyses( |
176 | "eagerly-invalidate-analyses" , cl::init(Val: true), cl::Hidden, |
177 | cl::desc("Eagerly invalidate more analyses in default pipelines" )); |
178 | |
179 | static cl::opt<bool> EnableMergeFunctions( |
180 | "enable-merge-functions" , cl::init(Val: false), cl::Hidden, |
181 | cl::desc("Enable function merging as part of the optimization pipeline" )); |
182 | |
183 | static cl::opt<bool> EnablePostPGOLoopRotation( |
184 | "enable-post-pgo-loop-rotation" , cl::init(Val: true), cl::Hidden, |
185 | cl::desc("Run the loop rotation transformation after PGO instrumentation" )); |
186 | |
187 | static cl::opt<bool> EnableGlobalAnalyses( |
188 | "enable-global-analyses" , cl::init(Val: true), cl::Hidden, |
189 | cl::desc("Enable inter-procedural analyses" )); |
190 | |
191 | static cl::opt<bool> |
192 | RunPartialInlining("enable-partial-inlining" , cl::init(Val: false), cl::Hidden, |
193 | cl::desc("Run Partial inlinining pass" )); |
194 | |
195 | static cl::opt<bool> ( |
196 | "extra-vectorizer-passes" , cl::init(Val: false), cl::Hidden, |
197 | cl::desc("Run cleanup optimization passes after vectorization" )); |
198 | |
199 | static cl::opt<bool> RunNewGVN("enable-newgvn" , cl::init(Val: false), cl::Hidden, |
200 | cl::desc("Run the NewGVN pass" )); |
201 | |
202 | static cl::opt<bool> EnableLoopInterchange( |
203 | "enable-loopinterchange" , cl::init(Val: false), cl::Hidden, |
204 | cl::desc("Enable the experimental LoopInterchange Pass" )); |
205 | |
206 | static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam" , |
207 | cl::init(Val: false), cl::Hidden, |
208 | cl::desc("Enable Unroll And Jam Pass" )); |
209 | |
210 | static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten" , cl::init(Val: false), |
211 | cl::Hidden, |
212 | cl::desc("Enable the LoopFlatten Pass" )); |
213 | |
214 | // Experimentally allow loop header duplication. This should allow for better |
215 | // optimization at Oz, since loop-idiom recognition can then recognize things |
216 | // like memcpy. If this ends up being useful for many targets, we should drop |
217 | // this flag and make a code generation option that can be controlled |
218 | // independent of the opt level and exposed through the frontend. |
219 | static cl::opt<bool> ( |
220 | "enable-loop-header-duplication" , cl::init(Val: false), cl::Hidden, |
221 | cl::desc("Enable loop header duplication at any optimization level" )); |
222 | |
223 | static cl::opt<bool> |
224 | EnableDFAJumpThreading("enable-dfa-jump-thread" , |
225 | cl::desc("Enable DFA jump threading" ), |
226 | cl::init(Val: false), cl::Hidden); |
227 | |
228 | // TODO: turn on and remove flag |
229 | static cl::opt<bool> EnablePGOForceFunctionAttrs( |
230 | "enable-pgo-force-function-attrs" , |
231 | cl::desc("Enable pass to set function attributes based on PGO profiles" ), |
232 | cl::init(Val: false)); |
233 | |
234 | static cl::opt<bool> |
235 | EnableHotColdSplit("hot-cold-split" , |
236 | cl::desc("Enable hot-cold splitting pass" )); |
237 | |
238 | static cl::opt<bool> EnableIROutliner("ir-outliner" , cl::init(Val: false), |
239 | cl::Hidden, |
240 | cl::desc("Enable ir outliner pass" )); |
241 | |
242 | static cl::opt<bool> |
243 | DisablePreInliner("disable-preinline" , cl::init(Val: false), cl::Hidden, |
244 | cl::desc("Disable pre-instrumentation inliner" )); |
245 | |
246 | static cl::opt<int> PreInlineThreshold( |
247 | "preinline-threshold" , cl::Hidden, cl::init(Val: 75), |
248 | cl::desc("Control the amount of inlining in pre-instrumentation inliner " |
249 | "(default = 75)" )); |
250 | |
251 | static cl::opt<bool> |
252 | EnableGVNHoist("enable-gvn-hoist" , |
253 | cl::desc("Enable the GVN hoisting pass (default = off)" )); |
254 | |
255 | static cl::opt<bool> |
256 | EnableGVNSink("enable-gvn-sink" , |
257 | cl::desc("Enable the GVN sinking pass (default = off)" )); |
258 | |
259 | static cl::opt<bool> EnableJumpTableToSwitch( |
260 | "enable-jump-table-to-switch" , |
261 | cl::desc("Enable JumpTableToSwitch pass (default = off)" )); |
262 | |
263 | // This option is used in simplifying testing SampleFDO optimizations for |
264 | // profile loading. |
265 | static cl::opt<bool> |
266 | EnableCHR("enable-chr" , cl::init(Val: true), cl::Hidden, |
267 | cl::desc("Enable control height reduction optimization (CHR)" )); |
268 | |
269 | static cl::opt<bool> FlattenedProfileUsed( |
270 | "flattened-profile-used" , cl::init(Val: false), cl::Hidden, |
271 | cl::desc("Indicate the sample profile being used is flattened, i.e., " |
272 | "no inline hierachy exists in the profile" )); |
273 | |
274 | static cl::opt<bool> EnableOrderFileInstrumentation( |
275 | "enable-order-file-instrumentation" , cl::init(Val: false), cl::Hidden, |
276 | cl::desc("Enable order file instrumentation (default = off)" )); |
277 | |
278 | static cl::opt<bool> |
279 | EnableMatrix("enable-matrix" , cl::init(Val: false), cl::Hidden, |
280 | cl::desc("Enable lowering of the matrix intrinsics" )); |
281 | |
282 | static cl::opt<bool> EnableConstraintElimination( |
283 | "enable-constraint-elimination" , cl::init(Val: true), cl::Hidden, |
284 | cl::desc( |
285 | "Enable pass to eliminate conditions based on linear constraints" )); |
286 | |
287 | static cl::opt<AttributorRunOption> AttributorRun( |
288 | "attributor-enable" , cl::Hidden, cl::init(Val: AttributorRunOption::NONE), |
289 | cl::desc("Enable the attributor inter-procedural deduction pass" ), |
290 | cl::values(clEnumValN(AttributorRunOption::ALL, "all" , |
291 | "enable all attributor runs" ), |
292 | clEnumValN(AttributorRunOption::MODULE, "module" , |
293 | "enable module-wide attributor runs" ), |
294 | clEnumValN(AttributorRunOption::CGSCC, "cgscc" , |
295 | "enable call graph SCC attributor runs" ), |
296 | clEnumValN(AttributorRunOption::NONE, "none" , |
297 | "disable attributor runs" ))); |
298 | |
299 | static cl::opt<bool> EnableSampledInstr( |
300 | "enable-sampled-instrumentation" , cl::init(Val: false), cl::Hidden, |
301 | cl::desc("Enable profile instrumentation sampling (default = off)" )); |
302 | static cl::opt<bool> UseLoopVersioningLICM( |
303 | "enable-loop-versioning-licm" , cl::init(Val: false), cl::Hidden, |
304 | cl::desc("Enable the experimental Loop Versioning LICM pass" )); |
305 | |
306 | namespace llvm { |
307 | extern cl::opt<bool> EnableMemProfContextDisambiguation; |
308 | |
309 | extern cl::opt<bool> EnableInferAlignmentPass; |
310 | } // namespace llvm |
311 | |
312 | PipelineTuningOptions::PipelineTuningOptions() { |
313 | LoopInterleaving = true; |
314 | LoopVectorization = true; |
315 | SLPVectorization = false; |
316 | LoopUnrolling = true; |
317 | ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; |
318 | LicmMssaOptCap = SetLicmMssaOptCap; |
319 | LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; |
320 | CallGraphProfile = true; |
321 | UnifiedLTO = false; |
322 | MergeFunctions = EnableMergeFunctions; |
323 | InlinerThreshold = -1; |
324 | EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; |
325 | } |
326 | |
327 | namespace llvm { |
328 | extern cl::opt<unsigned> MaxDevirtIterations; |
329 | } // namespace llvm |
330 | |
331 | void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM, |
332 | OptimizationLevel Level) { |
333 | for (auto &C : PeepholeEPCallbacks) |
334 | C(FPM, Level); |
335 | } |
336 | void PassBuilder::invokeLateLoopOptimizationsEPCallbacks( |
337 | LoopPassManager &LPM, OptimizationLevel Level) { |
338 | for (auto &C : LateLoopOptimizationsEPCallbacks) |
339 | C(LPM, Level); |
340 | } |
341 | void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, |
342 | OptimizationLevel Level) { |
343 | for (auto &C : LoopOptimizerEndEPCallbacks) |
344 | C(LPM, Level); |
345 | } |
346 | void PassBuilder::invokeScalarOptimizerLateEPCallbacks( |
347 | FunctionPassManager &FPM, OptimizationLevel Level) { |
348 | for (auto &C : ScalarOptimizerLateEPCallbacks) |
349 | C(FPM, Level); |
350 | } |
351 | void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, |
352 | OptimizationLevel Level) { |
353 | for (auto &C : CGSCCOptimizerLateEPCallbacks) |
354 | C(CGPM, Level); |
355 | } |
356 | void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, |
357 | OptimizationLevel Level) { |
358 | for (auto &C : VectorizerStartEPCallbacks) |
359 | C(FPM, Level); |
360 | } |
361 | void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, |
362 | OptimizationLevel Level) { |
363 | for (auto &C : OptimizerEarlyEPCallbacks) |
364 | C(MPM, Level); |
365 | } |
366 | void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, |
367 | OptimizationLevel Level) { |
368 | for (auto &C : OptimizerLastEPCallbacks) |
369 | C(MPM, Level); |
370 | } |
371 | void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks( |
372 | ModulePassManager &MPM, OptimizationLevel Level) { |
373 | for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks) |
374 | C(MPM, Level); |
375 | } |
376 | void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks( |
377 | ModulePassManager &MPM, OptimizationLevel Level) { |
378 | for (auto &C : FullLinkTimeOptimizationLastEPCallbacks) |
379 | C(MPM, Level); |
380 | } |
381 | void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM, |
382 | OptimizationLevel Level) { |
383 | for (auto &C : PipelineStartEPCallbacks) |
384 | C(MPM, Level); |
385 | } |
386 | void PassBuilder::invokePipelineEarlySimplificationEPCallbacks( |
387 | ModulePassManager &MPM, OptimizationLevel Level) { |
388 | for (auto &C : PipelineEarlySimplificationEPCallbacks) |
389 | C(MPM, Level); |
390 | } |
391 | |
392 | // Helper to add AnnotationRemarksPass. |
393 | static void (ModulePassManager &MPM) { |
394 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AnnotationRemarksPass())); |
395 | } |
396 | |
397 | // Helper to check if the current compilation phase is preparing for LTO |
398 | static bool isLTOPreLink(ThinOrFullLTOPhase Phase) { |
399 | return Phase == ThinOrFullLTOPhase::ThinLTOPreLink || |
400 | Phase == ThinOrFullLTOPhase::FullLTOPreLink; |
401 | } |
402 | |
403 | // TODO: Investigate the cost/benefit of tail call elimination on debugging. |
404 | FunctionPassManager |
405 | PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, |
406 | ThinOrFullLTOPhase Phase) { |
407 | |
408 | FunctionPassManager FPM; |
409 | |
410 | if (AreStatisticsEnabled()) |
411 | FPM.addPass(Pass: CountVisitsPass()); |
412 | |
413 | // Form SSA out of local memory accesses after breaking apart aggregates into |
414 | // scalars. |
415 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
416 | |
417 | // Catch trivial redundancies |
418 | FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */)); |
419 | |
420 | // Hoisting of scalars and load expressions. |
421 | FPM.addPass( |
422 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
423 | FPM.addPass(Pass: InstCombinePass()); |
424 | |
425 | FPM.addPass(Pass: LibCallsShrinkWrapPass()); |
426 | |
427 | invokePeepholeEPCallbacks(FPM, Level); |
428 | |
429 | FPM.addPass( |
430 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
431 | |
432 | // Form canonically associated expression trees, and simplify the trees using |
433 | // basic mathematical properties. For example, this will form (nearly) |
434 | // minimal multiplication trees. |
435 | FPM.addPass(Pass: ReassociatePass()); |
436 | |
437 | // Add the primary loop simplification pipeline. |
438 | // FIXME: Currently this is split into two loop pass pipelines because we run |
439 | // some function passes in between them. These can and should be removed |
440 | // and/or replaced by scheduling the loop pass equivalents in the correct |
441 | // positions. But those equivalent passes aren't powerful enough yet. |
442 | // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still |
443 | // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to |
444 | // fully replace `SimplifyCFGPass`, and the closest to the other we have is |
445 | // `LoopInstSimplify`. |
446 | LoopPassManager LPM1, LPM2; |
447 | |
448 | // Simplify the loop body. We do this initially to clean up after other loop |
449 | // passes run, either when iterating on a loop or on inner loops with |
450 | // implications on the outer loop. |
451 | LPM1.addPass(Pass: LoopInstSimplifyPass()); |
452 | LPM1.addPass(Pass: LoopSimplifyCFGPass()); |
453 | |
454 | // Try to remove as much code from the loop header as possible, |
455 | // to reduce amount of IR that will have to be duplicated. However, |
456 | // do not perform speculative hoisting the first time as LICM |
457 | // will destroy metadata that may not need to be destroyed if run |
458 | // after loop rotation. |
459 | // TODO: Investigate promotion cap for O1. |
460 | LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
461 | /*AllowSpeculation=*/false)); |
462 | |
463 | LPM1.addPass(Pass: LoopRotatePass(/* Disable header duplication */ true, |
464 | isLTOPreLink(Phase))); |
465 | // TODO: Investigate promotion cap for O1. |
466 | LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
467 | /*AllowSpeculation=*/true)); |
468 | LPM1.addPass(Pass: SimpleLoopUnswitchPass()); |
469 | if (EnableLoopFlatten) |
470 | LPM1.addPass(Pass: LoopFlattenPass()); |
471 | |
472 | LPM2.addPass(Pass: LoopIdiomRecognizePass()); |
473 | LPM2.addPass(Pass: IndVarSimplifyPass()); |
474 | |
475 | invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level); |
476 | |
477 | LPM2.addPass(Pass: LoopDeletionPass()); |
478 | |
479 | if (EnableLoopInterchange) |
480 | LPM2.addPass(Pass: LoopInterchangePass()); |
481 | |
482 | // Do not enable unrolling in PreLinkThinLTO phase during sample PGO |
483 | // because it changes IR to makes profile annotation in back compile |
484 | // inaccurate. The normal unroller doesn't pay attention to forced full unroll |
485 | // attributes so we need to make sure and allow the full unroll pass to pay |
486 | // attention to it. |
487 | if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || |
488 | PGOOpt->Action != PGOOptions::SampleUse) |
489 | LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(), |
490 | /* OnlyWhenForced= */ !PTO.LoopUnrolling, |
491 | PTO.ForgetAllSCEVInLoopUnroll)); |
492 | |
493 | invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level); |
494 | |
495 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1), |
496 | /*UseMemorySSA=*/true, |
497 | /*UseBlockFrequencyInfo=*/true)); |
498 | FPM.addPass( |
499 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
500 | FPM.addPass(Pass: InstCombinePass()); |
501 | // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. |
502 | // *All* loop passes must preserve it, in order to be able to use it. |
503 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2), |
504 | /*UseMemorySSA=*/false, |
505 | /*UseBlockFrequencyInfo=*/false)); |
506 | |
507 | // Delete small array after loop unroll. |
508 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
509 | |
510 | // Specially optimize memory movement as it doesn't look like dataflow in SSA. |
511 | FPM.addPass(Pass: MemCpyOptPass()); |
512 | |
513 | // Sparse conditional constant propagation. |
514 | // FIXME: It isn't clear why we do this *after* loop passes rather than |
515 | // before... |
516 | FPM.addPass(Pass: SCCPPass()); |
517 | |
518 | // Delete dead bit computations (instcombine runs after to fold away the dead |
519 | // computations, and then ADCE will run later to exploit any new DCE |
520 | // opportunities that creates). |
521 | FPM.addPass(Pass: BDCEPass()); |
522 | |
523 | // Run instcombine after redundancy and dead bit elimination to exploit |
524 | // opportunities opened up by them. |
525 | FPM.addPass(Pass: InstCombinePass()); |
526 | invokePeepholeEPCallbacks(FPM, Level); |
527 | |
528 | FPM.addPass(Pass: CoroElidePass()); |
529 | |
530 | invokeScalarOptimizerLateEPCallbacks(FPM, Level); |
531 | |
532 | // Finally, do an expensive DCE pass to catch all the dead code exposed by |
533 | // the simplifications and basic cleanup after all the simplifications. |
534 | // TODO: Investigate if this is too expensive. |
535 | FPM.addPass(Pass: ADCEPass()); |
536 | FPM.addPass( |
537 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
538 | FPM.addPass(Pass: InstCombinePass()); |
539 | invokePeepholeEPCallbacks(FPM, Level); |
540 | |
541 | return FPM; |
542 | } |
543 | |
544 | FunctionPassManager |
545 | PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, |
546 | ThinOrFullLTOPhase Phase) { |
547 | assert(Level != OptimizationLevel::O0 && "Must request optimizations!" ); |
548 | |
549 | // The O1 pipeline has a separate pipeline creation function to simplify |
550 | // construction readability. |
551 | if (Level.getSpeedupLevel() == 1) |
552 | return buildO1FunctionSimplificationPipeline(Level, Phase); |
553 | |
554 | FunctionPassManager FPM; |
555 | |
556 | if (AreStatisticsEnabled()) |
557 | FPM.addPass(Pass: CountVisitsPass()); |
558 | |
559 | // Form SSA out of local memory accesses after breaking apart aggregates into |
560 | // scalars. |
561 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
562 | |
563 | // Catch trivial redundancies |
564 | FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */)); |
565 | if (EnableKnowledgeRetention) |
566 | FPM.addPass(Pass: AssumeSimplifyPass()); |
567 | |
568 | // Hoisting of scalars and load expressions. |
569 | if (EnableGVNHoist) |
570 | FPM.addPass(Pass: GVNHoistPass()); |
571 | |
572 | // Global value numbering based sinking. |
573 | if (EnableGVNSink) { |
574 | FPM.addPass(Pass: GVNSinkPass()); |
575 | FPM.addPass( |
576 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
577 | } |
578 | |
579 | // Speculative execution if the target has divergent branches; otherwise nop. |
580 | FPM.addPass(Pass: SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true)); |
581 | |
582 | // Optimize based on known information about branches, and cleanup afterward. |
583 | FPM.addPass(Pass: JumpThreadingPass()); |
584 | FPM.addPass(Pass: CorrelatedValuePropagationPass()); |
585 | |
586 | // Jump table to switch conversion. |
587 | if (EnableJumpTableToSwitch) |
588 | FPM.addPass(Pass: JumpTableToSwitchPass()); |
589 | |
590 | FPM.addPass( |
591 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
592 | FPM.addPass(Pass: InstCombinePass()); |
593 | FPM.addPass(Pass: AggressiveInstCombinePass()); |
594 | |
595 | if (!Level.isOptimizingForSize()) |
596 | FPM.addPass(Pass: LibCallsShrinkWrapPass()); |
597 | |
598 | invokePeepholeEPCallbacks(FPM, Level); |
599 | |
600 | // For PGO use pipeline, try to optimize memory intrinsics such as memcpy |
601 | // using the size value profile. Don't perform this when optimizing for size. |
602 | if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && |
603 | !Level.isOptimizingForSize()) |
604 | FPM.addPass(Pass: PGOMemOPSizeOpt()); |
605 | |
606 | FPM.addPass(Pass: TailCallElimPass()); |
607 | FPM.addPass( |
608 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
609 | |
610 | // Form canonically associated expression trees, and simplify the trees using |
611 | // basic mathematical properties. For example, this will form (nearly) |
612 | // minimal multiplication trees. |
613 | FPM.addPass(Pass: ReassociatePass()); |
614 | |
615 | if (EnableConstraintElimination) |
616 | FPM.addPass(Pass: ConstraintEliminationPass()); |
617 | |
618 | // Add the primary loop simplification pipeline. |
619 | // FIXME: Currently this is split into two loop pass pipelines because we run |
620 | // some function passes in between them. These can and should be removed |
621 | // and/or replaced by scheduling the loop pass equivalents in the correct |
622 | // positions. But those equivalent passes aren't powerful enough yet. |
623 | // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still |
624 | // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to |
625 | // fully replace `SimplifyCFGPass`, and the closest to the other we have is |
626 | // `LoopInstSimplify`. |
627 | LoopPassManager LPM1, LPM2; |
628 | |
629 | // Simplify the loop body. We do this initially to clean up after other loop |
630 | // passes run, either when iterating on a loop or on inner loops with |
631 | // implications on the outer loop. |
632 | LPM1.addPass(Pass: LoopInstSimplifyPass()); |
633 | LPM1.addPass(Pass: LoopSimplifyCFGPass()); |
634 | |
635 | // Try to remove as much code from the loop header as possible, |
636 | // to reduce amount of IR that will have to be duplicated. However, |
637 | // do not perform speculative hoisting the first time as LICM |
638 | // will destroy metadata that may not need to be destroyed if run |
639 | // after loop rotation. |
640 | // TODO: Investigate promotion cap for O1. |
641 | LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
642 | /*AllowSpeculation=*/false)); |
643 | |
644 | // Disable header duplication in loop rotation at -Oz. |
645 | LPM1.addPass(Pass: LoopRotatePass(EnableLoopHeaderDuplication || |
646 | Level != OptimizationLevel::Oz, |
647 | isLTOPreLink(Phase))); |
648 | // TODO: Investigate promotion cap for O1. |
649 | LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
650 | /*AllowSpeculation=*/true)); |
651 | LPM1.addPass( |
652 | Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3)); |
653 | if (EnableLoopFlatten) |
654 | LPM1.addPass(Pass: LoopFlattenPass()); |
655 | |
656 | LPM2.addPass(Pass: LoopIdiomRecognizePass()); |
657 | LPM2.addPass(Pass: IndVarSimplifyPass()); |
658 | |
659 | { |
660 | ExtraSimpleLoopUnswitchPassManager ; |
661 | ExtraPasses.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level == |
662 | OptimizationLevel::O3)); |
663 | LPM2.addPass(Pass: std::move(ExtraPasses)); |
664 | } |
665 | |
666 | invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level); |
667 | |
668 | LPM2.addPass(Pass: LoopDeletionPass()); |
669 | |
670 | if (EnableLoopInterchange) |
671 | LPM2.addPass(Pass: LoopInterchangePass()); |
672 | |
673 | // Do not enable unrolling in PreLinkThinLTO phase during sample PGO |
674 | // because it changes IR to makes profile annotation in back compile |
675 | // inaccurate. The normal unroller doesn't pay attention to forced full unroll |
676 | // attributes so we need to make sure and allow the full unroll pass to pay |
677 | // attention to it. |
678 | if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || |
679 | PGOOpt->Action != PGOOptions::SampleUse) |
680 | LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(), |
681 | /* OnlyWhenForced= */ !PTO.LoopUnrolling, |
682 | PTO.ForgetAllSCEVInLoopUnroll)); |
683 | |
684 | invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level); |
685 | |
686 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1), |
687 | /*UseMemorySSA=*/true, |
688 | /*UseBlockFrequencyInfo=*/true)); |
689 | FPM.addPass( |
690 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
691 | FPM.addPass(Pass: InstCombinePass()); |
692 | // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass, |
693 | // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. |
694 | // *All* loop passes must preserve it, in order to be able to use it. |
695 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2), |
696 | /*UseMemorySSA=*/false, |
697 | /*UseBlockFrequencyInfo=*/false)); |
698 | |
699 | // Delete small array after loop unroll. |
700 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
701 | |
702 | // Try vectorization/scalarization transforms that are both improvements |
703 | // themselves and can allow further folds with GVN and InstCombine. |
704 | FPM.addPass(Pass: VectorCombinePass(/*TryEarlyFoldsOnly=*/true)); |
705 | |
706 | // Eliminate redundancies. |
707 | FPM.addPass(Pass: MergedLoadStoreMotionPass()); |
708 | if (RunNewGVN) |
709 | FPM.addPass(Pass: NewGVNPass()); |
710 | else |
711 | FPM.addPass(Pass: GVNPass()); |
712 | |
713 | // Sparse conditional constant propagation. |
714 | // FIXME: It isn't clear why we do this *after* loop passes rather than |
715 | // before... |
716 | FPM.addPass(Pass: SCCPPass()); |
717 | |
718 | // Delete dead bit computations (instcombine runs after to fold away the dead |
719 | // computations, and then ADCE will run later to exploit any new DCE |
720 | // opportunities that creates). |
721 | FPM.addPass(Pass: BDCEPass()); |
722 | |
723 | // Run instcombine after redundancy and dead bit elimination to exploit |
724 | // opportunities opened up by them. |
725 | FPM.addPass(Pass: InstCombinePass()); |
726 | invokePeepholeEPCallbacks(FPM, Level); |
727 | |
728 | // Re-consider control flow based optimizations after redundancy elimination, |
729 | // redo DCE, etc. |
730 | if (EnableDFAJumpThreading) |
731 | FPM.addPass(Pass: DFAJumpThreadingPass()); |
732 | |
733 | FPM.addPass(Pass: JumpThreadingPass()); |
734 | FPM.addPass(Pass: CorrelatedValuePropagationPass()); |
735 | |
736 | // Finally, do an expensive DCE pass to catch all the dead code exposed by |
737 | // the simplifications and basic cleanup after all the simplifications. |
738 | // TODO: Investigate if this is too expensive. |
739 | FPM.addPass(Pass: ADCEPass()); |
740 | |
741 | // Specially optimize memory movement as it doesn't look like dataflow in SSA. |
742 | FPM.addPass(Pass: MemCpyOptPass()); |
743 | |
744 | FPM.addPass(Pass: DSEPass()); |
745 | FPM.addPass(Pass: MoveAutoInitPass()); |
746 | |
747 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
748 | Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
749 | /*AllowSpeculation=*/true), |
750 | /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); |
751 | |
752 | FPM.addPass(Pass: CoroElidePass()); |
753 | |
754 | invokeScalarOptimizerLateEPCallbacks(FPM, Level); |
755 | |
756 | FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions() |
757 | .convertSwitchRangeToICmp(B: true) |
758 | .hoistCommonInsts(B: true) |
759 | .sinkCommonInsts(B: true))); |
760 | FPM.addPass(Pass: InstCombinePass()); |
761 | invokePeepholeEPCallbacks(FPM, Level); |
762 | |
763 | return FPM; |
764 | } |
765 | |
766 | void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) { |
767 | MPM.addPass(Pass: CanonicalizeAliasesPass()); |
768 | MPM.addPass(Pass: NameAnonGlobalPass()); |
769 | } |
770 | |
771 | void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM, |
772 | OptimizationLevel Level, |
773 | ThinOrFullLTOPhase LTOPhase) { |
774 | assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!" ); |
775 | if (DisablePreInliner) |
776 | return; |
777 | InlineParams IP; |
778 | |
779 | IP.DefaultThreshold = PreInlineThreshold; |
780 | |
781 | // FIXME: The hint threshold has the same value used by the regular inliner |
782 | // when not optimzing for size. This should probably be lowered after |
783 | // performance testing. |
784 | // FIXME: this comment is cargo culted from the old pass manager, revisit). |
785 | IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325; |
786 | ModuleInlinerWrapperPass MIWP( |
787 | IP, /* MandatoryFirst */ true, |
788 | InlineContext{.LTOPhase: LTOPhase, .Pass: InlinePass::EarlyInliner}); |
789 | CGSCCPassManager &CGPipeline = MIWP.getPM(); |
790 | |
791 | FunctionPassManager FPM; |
792 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
793 | FPM.addPass(Pass: EarlyCSEPass()); // Catch trivial redundancies. |
794 | FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp( |
795 | B: true))); // Merge & remove basic blocks. |
796 | FPM.addPass(Pass: InstCombinePass()); // Combine silly sequences. |
797 | invokePeepholeEPCallbacks(FPM, Level); |
798 | |
799 | CGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor( |
800 | Pass: std::move(FPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
801 | |
802 | MPM.addPass(Pass: std::move(MIWP)); |
803 | |
804 | // Delete anything that is now dead to make sure that we don't instrument |
805 | // dead code. Instrumentation can end up keeping dead code around and |
806 | // dramatically increase code size. |
807 | MPM.addPass(Pass: GlobalDCEPass()); |
808 | } |
809 | |
810 | void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM, |
811 | OptimizationLevel Level) { |
812 | if (EnablePostPGOLoopRotation) { |
813 | // Disable header duplication in loop rotation at -Oz. |
814 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
815 | Pass: createFunctionToLoopPassAdaptor( |
816 | Pass: LoopRotatePass(EnableLoopHeaderDuplication || |
817 | Level != OptimizationLevel::Oz), |
818 | /*UseMemorySSA=*/false, |
819 | /*UseBlockFrequencyInfo=*/false), |
820 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
821 | } |
822 | } |
823 | |
824 | void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, |
825 | OptimizationLevel Level, bool RunProfileGen, |
826 | bool IsCS, bool AtomicCounterUpdate, |
827 | std::string ProfileFile, |
828 | std::string ProfileRemappingFile, |
829 | IntrusiveRefCntPtr<vfs::FileSystem> FS) { |
830 | assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!" ); |
831 | |
832 | if (!RunProfileGen) { |
833 | assert(!ProfileFile.empty() && "Profile use expecting a profile file!" ); |
834 | MPM.addPass( |
835 | Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS)); |
836 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
837 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
838 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
839 | return; |
840 | } |
841 | |
842 | // Perform PGO instrumentation. |
843 | MPM.addPass(Pass: PGOInstrumentationGen(IsCS)); |
844 | |
845 | addPostPGOLoopRotation(MPM, Level); |
846 | // Add the profile lowering pass. |
847 | InstrProfOptions Options; |
848 | if (!ProfileFile.empty()) |
849 | Options.InstrProfileOutput = ProfileFile; |
850 | // Do counter promotion at Level greater than O0. |
851 | Options.DoCounterPromotion = true; |
852 | Options.UseBFIInPromotion = IsCS; |
853 | if (EnableSampledInstr) { |
854 | Options.Sampling = true; |
855 | // With sampling, there is little beneifit to enable counter promotion. |
856 | // But note that sampling does work with counter promotion. |
857 | Options.DoCounterPromotion = false; |
858 | } |
859 | Options.Atomic = AtomicCounterUpdate; |
860 | MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS)); |
861 | } |
862 | |
863 | void PassBuilder::addPGOInstrPassesForO0( |
864 | ModulePassManager &MPM, bool RunProfileGen, bool IsCS, |
865 | bool AtomicCounterUpdate, std::string ProfileFile, |
866 | std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) { |
867 | if (!RunProfileGen) { |
868 | assert(!ProfileFile.empty() && "Profile use expecting a profile file!" ); |
869 | MPM.addPass( |
870 | Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS)); |
871 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
872 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
873 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
874 | return; |
875 | } |
876 | |
877 | // Perform PGO instrumentation. |
878 | MPM.addPass(Pass: PGOInstrumentationGen(IsCS)); |
879 | // Add the profile lowering pass. |
880 | InstrProfOptions Options; |
881 | if (!ProfileFile.empty()) |
882 | Options.InstrProfileOutput = ProfileFile; |
883 | // Do not do counter promotion at O0. |
884 | Options.DoCounterPromotion = false; |
885 | Options.UseBFIInPromotion = IsCS; |
886 | Options.Atomic = AtomicCounterUpdate; |
887 | MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS)); |
888 | } |
889 | |
890 | static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) { |
891 | return getInlineParams(OptLevel: Level.getSpeedupLevel(), SizeOptLevel: Level.getSizeLevel()); |
892 | } |
893 | |
894 | ModuleInlinerWrapperPass |
895 | PassBuilder::buildInlinerPipeline(OptimizationLevel Level, |
896 | ThinOrFullLTOPhase Phase) { |
897 | InlineParams IP; |
898 | if (PTO.InlinerThreshold == -1) |
899 | IP = getInlineParamsFromOptLevel(Level); |
900 | else |
901 | IP = getInlineParams(Threshold: PTO.InlinerThreshold); |
902 | // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to |
903 | // disable hot callsite inline (as much as possible [1]) because it makes |
904 | // profile annotation in the backend inaccurate. |
905 | // |
906 | // [1] Note the cost of a function could be below zero due to erased |
907 | // prologue / epilogue. |
908 | if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && |
909 | PGOOpt->Action == PGOOptions::SampleUse) |
910 | IP.HotCallSiteThreshold = 0; |
911 | |
912 | if (PGOOpt) |
913 | IP.EnableDeferral = EnablePGOInlineDeferral; |
914 | |
915 | ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst, |
916 | InlineContext{.LTOPhase: Phase, .Pass: InlinePass::CGSCCInliner}, |
917 | UseInlineAdvisor, MaxDevirtIterations); |
918 | |
919 | // Require the GlobalsAA analysis for the module so we can query it within |
920 | // the CGSCC pipeline. |
921 | if (EnableGlobalAnalyses) { |
922 | MIWP.addModulePass(Pass: RequireAnalysisPass<GlobalsAA, Module>()); |
923 | // Invalidate AAManager so it can be recreated and pick up the newly |
924 | // available GlobalsAA. |
925 | MIWP.addModulePass( |
926 | Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>())); |
927 | } |
928 | |
929 | // Require the ProfileSummaryAnalysis for the module so we can query it within |
930 | // the inliner pass. |
931 | MIWP.addModulePass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
932 | |
933 | // Now begin the main postorder CGSCC pipeline. |
934 | // FIXME: The current CGSCC pipeline has its origins in the legacy pass |
935 | // manager and trying to emulate its precise behavior. Much of this doesn't |
936 | // make a lot of sense and we should revisit the core CGSCC structure. |
937 | CGSCCPassManager &MainCGPipeline = MIWP.getPM(); |
938 | |
939 | // Note: historically, the PruneEH pass was run first to deduce nounwind and |
940 | // generally clean up exception handling overhead. It isn't clear this is |
941 | // valuable as the inliner doesn't currently care whether it is inlining an |
942 | // invoke or a call. |
943 | |
944 | if (AttributorRun & AttributorRunOption::CGSCC) |
945 | MainCGPipeline.addPass(Pass: AttributorCGSCCPass()); |
946 | |
947 | // Deduce function attributes. We do another run of this after the function |
948 | // simplification pipeline, so this only needs to run when it could affect the |
949 | // function simplification pipeline, which is only the case with recursive |
950 | // functions. |
951 | MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true)); |
952 | |
953 | // When at O3 add argument promotion to the pass pipeline. |
954 | // FIXME: It isn't at all clear why this should be limited to O3. |
955 | if (Level == OptimizationLevel::O3) |
956 | MainCGPipeline.addPass(Pass: ArgumentPromotionPass()); |
957 | |
958 | // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if |
959 | // there are no OpenMP runtime calls present in the module. |
960 | if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3) |
961 | MainCGPipeline.addPass(Pass: OpenMPOptCGSCCPass()); |
962 | |
963 | invokeCGSCCOptimizerLateEPCallbacks(CGPM&: MainCGPipeline, Level); |
964 | |
965 | // Add the core function simplification pipeline nested inside the |
966 | // CGSCC walk. |
967 | MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor( |
968 | Pass: buildFunctionSimplificationPipeline(Level, Phase), |
969 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true)); |
970 | |
971 | // Finally, deduce any function attributes based on the fully simplified |
972 | // function. |
973 | MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass()); |
974 | |
975 | // Mark that the function is fully simplified and that it shouldn't be |
976 | // simplified again if we somehow revisit it due to CGSCC mutations unless |
977 | // it's been modified since. |
978 | MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor( |
979 | Pass: RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>())); |
980 | |
981 | MainCGPipeline.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0)); |
982 | |
983 | // Make sure we don't affect potential future NoRerun CGSCC adaptors. |
984 | MIWP.addLateModulePass(Pass: createModuleToFunctionPassAdaptor( |
985 | Pass: InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>())); |
986 | |
987 | return MIWP; |
988 | } |
989 | |
990 | ModulePassManager |
991 | PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level, |
992 | ThinOrFullLTOPhase Phase) { |
993 | ModulePassManager MPM; |
994 | |
995 | InlineParams IP = getInlineParamsFromOptLevel(Level); |
996 | // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to |
997 | // disable hot callsite inline (as much as possible [1]) because it makes |
998 | // profile annotation in the backend inaccurate. |
999 | // |
1000 | // [1] Note the cost of a function could be below zero due to erased |
1001 | // prologue / epilogue. |
1002 | if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && |
1003 | PGOOpt->Action == PGOOptions::SampleUse) |
1004 | IP.HotCallSiteThreshold = 0; |
1005 | |
1006 | if (PGOOpt) |
1007 | IP.EnableDeferral = EnablePGOInlineDeferral; |
1008 | |
1009 | // The inline deferral logic is used to avoid losing some |
1010 | // inlining chance in future. It is helpful in SCC inliner, in which |
1011 | // inlining is processed in bottom-up order. |
1012 | // While in module inliner, the inlining order is a priority-based order |
1013 | // by default. The inline deferral is unnecessary there. So we disable the |
1014 | // inline deferral logic in module inliner. |
1015 | IP.EnableDeferral = false; |
1016 | |
1017 | MPM.addPass(Pass: ModuleInlinerPass(IP, UseInlineAdvisor, Phase)); |
1018 | |
1019 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
1020 | Pass: buildFunctionSimplificationPipeline(Level, Phase), |
1021 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
1022 | |
1023 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor( |
1024 | Pass: CoroSplitPass(Level != OptimizationLevel::O0))); |
1025 | |
1026 | return MPM; |
1027 | } |
1028 | |
1029 | ModulePassManager |
1030 | PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, |
1031 | ThinOrFullLTOPhase Phase) { |
1032 | assert(Level != OptimizationLevel::O0 && |
1033 | "Should not be used for O0 pipeline" ); |
1034 | |
1035 | assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink && |
1036 | "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!" ); |
1037 | |
1038 | ModulePassManager MPM; |
1039 | |
1040 | // Place pseudo probe instrumentation as the first pass of the pipeline to |
1041 | // minimize the impact of optimization changes. |
1042 | if (PGOOpt && PGOOpt->PseudoProbeForProfiling && |
1043 | Phase != ThinOrFullLTOPhase::ThinLTOPostLink) |
1044 | MPM.addPass(Pass: SampleProfileProbePass(TM)); |
1045 | |
1046 | bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse); |
1047 | |
1048 | // In ThinLTO mode, when flattened profile is used, all the available |
1049 | // profile information will be annotated in PreLink phase so there is |
1050 | // no need to load the profile again in PostLink. |
1051 | bool LoadSampleProfile = |
1052 | HasSampleProfile && |
1053 | !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink); |
1054 | |
1055 | // During the ThinLTO backend phase we perform early indirect call promotion |
1056 | // here, before globalopt. Otherwise imported available_externally functions |
1057 | // look unreferenced and are removed. If we are going to load the sample |
1058 | // profile then defer until later. |
1059 | // TODO: See if we can move later and consolidate with the location where |
1060 | // we perform ICP when we are loading a sample profile. |
1061 | // TODO: We pass HasSampleProfile (whether there was a sample profile file |
1062 | // passed to the compile) to the SamplePGO flag of ICP. This is used to |
1063 | // determine whether the new direct calls are annotated with prof metadata. |
1064 | // Ideally this should be determined from whether the IR is annotated with |
1065 | // sample profile, and not whether the a sample profile was provided on the |
1066 | // command line. E.g. for flattened profiles where we will not be reloading |
1067 | // the sample profile in the ThinLTO backend, we ideally shouldn't have to |
1068 | // provide the sample profile file. |
1069 | if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile) |
1070 | MPM.addPass(Pass: PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile)); |
1071 | |
1072 | // Create an early function pass manager to cleanup the output of the |
1073 | // frontend. Not necessary with LTO post link pipelines since the pre link |
1074 | // pipeline already cleaned up the frontend output. |
1075 | if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) { |
1076 | // Do basic inference of function attributes from known properties of system |
1077 | // libraries and other oracles. |
1078 | MPM.addPass(Pass: InferFunctionAttrsPass()); |
1079 | MPM.addPass(Pass: CoroEarlyPass()); |
1080 | |
1081 | FunctionPassManager EarlyFPM; |
1082 | EarlyFPM.addPass(Pass: EntryExitInstrumenterPass(/*PostInlining=*/false)); |
1083 | // Lower llvm.expect to metadata before attempting transforms. |
1084 | // Compare/branch metadata may alter the behavior of passes like |
1085 | // SimplifyCFG. |
1086 | EarlyFPM.addPass(Pass: LowerExpectIntrinsicPass()); |
1087 | EarlyFPM.addPass(Pass: SimplifyCFGPass()); |
1088 | EarlyFPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
1089 | EarlyFPM.addPass(Pass: EarlyCSEPass()); |
1090 | if (Level == OptimizationLevel::O3) |
1091 | EarlyFPM.addPass(Pass: CallSiteSplittingPass()); |
1092 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
1093 | Pass: std::move(EarlyFPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
1094 | } |
1095 | |
1096 | if (LoadSampleProfile) { |
1097 | // Annotate sample profile right after early FPM to ensure freshness of |
1098 | // the debug info. |
1099 | MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile, |
1100 | PGOOpt->ProfileRemappingFile, Phase)); |
1101 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
1102 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
1103 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
1104 | // Do not invoke ICP in the LTOPrelink phase as it makes it hard |
1105 | // for the profile annotation to be accurate in the LTO backend. |
1106 | if (!isLTOPreLink(Phase)) |
1107 | // We perform early indirect call promotion here, before globalopt. |
1108 | // This is important for the ThinLTO backend phase because otherwise |
1109 | // imported available_externally functions look unreferenced and are |
1110 | // removed. |
1111 | MPM.addPass( |
1112 | Pass: PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */)); |
1113 | } |
1114 | |
1115 | // Try to perform OpenMP specific optimizations on the module. This is a |
1116 | // (quick!) no-op if there are no OpenMP runtime calls present in the module. |
1117 | MPM.addPass(Pass: OpenMPOptPass()); |
1118 | |
1119 | if (AttributorRun & AttributorRunOption::MODULE) |
1120 | MPM.addPass(Pass: AttributorPass()); |
1121 | |
1122 | // Lower type metadata and the type.test intrinsic in the ThinLTO |
1123 | // post link pipeline after ICP. This is to enable usage of the type |
1124 | // tests in ICP sequences. |
1125 | if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink) |
1126 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, true)); |
1127 | |
1128 | invokePipelineEarlySimplificationEPCallbacks(MPM, Level); |
1129 | |
1130 | // Interprocedural constant propagation now that basic cleanup has occurred |
1131 | // and prior to optimizing globals. |
1132 | // FIXME: This position in the pipeline hasn't been carefully considered in |
1133 | // years, it should be re-analyzed. |
1134 | MPM.addPass(Pass: IPSCCPPass( |
1135 | IPSCCPOptions(/*AllowFuncSpec=*/ |
1136 | Level != OptimizationLevel::Os && |
1137 | Level != OptimizationLevel::Oz && |
1138 | !isLTOPreLink(Phase)))); |
1139 | |
1140 | // Attach metadata to indirect call sites indicating the set of functions |
1141 | // they may target at run-time. This should follow IPSCCP. |
1142 | MPM.addPass(Pass: CalledValuePropagationPass()); |
1143 | |
1144 | // Optimize globals to try and fold them into constants. |
1145 | MPM.addPass(Pass: GlobalOptPass()); |
1146 | |
1147 | // Create a small function pass pipeline to cleanup after all the global |
1148 | // optimizations. |
1149 | FunctionPassManager GlobalCleanupPM; |
1150 | // FIXME: Should this instead by a run of SROA? |
1151 | GlobalCleanupPM.addPass(Pass: PromotePass()); |
1152 | GlobalCleanupPM.addPass(Pass: InstCombinePass()); |
1153 | invokePeepholeEPCallbacks(FPM&: GlobalCleanupPM, Level); |
1154 | GlobalCleanupPM.addPass( |
1155 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
1156 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(GlobalCleanupPM), |
1157 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
1158 | |
1159 | // We already asserted this happens in non-FullLTOPostLink earlier. |
1160 | const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink; |
1161 | const bool IsPGOPreLink = PGOOpt && IsPreLink; |
1162 | const bool IsPGOInstrGen = |
1163 | IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr; |
1164 | const bool IsPGOInstrUse = |
1165 | IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse; |
1166 | const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty(); |
1167 | // We don't want to mix pgo ctx gen and pgo gen; we also don't currently |
1168 | // enable ctx profiling from the frontend. |
1169 | assert( |
1170 | !(IsPGOInstrGen && PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) && |
1171 | "Enabling both instrumented FDO and contextual instrumentation is not " |
1172 | "supported." ); |
1173 | // Enable contextual profiling instrumentation. |
1174 | const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink && |
1175 | PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); |
1176 | |
1177 | if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen) |
1178 | addPreInlinerPasses(MPM, Level, LTOPhase: Phase); |
1179 | |
1180 | // Add all the requested passes for instrumentation PGO, if requested. |
1181 | if (IsPGOInstrGen || IsPGOInstrUse) { |
1182 | addPGOInstrPasses(MPM, Level, |
1183 | /*RunProfileGen=*/IsPGOInstrGen, |
1184 | /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
1185 | ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile, |
1186 | FS: PGOOpt->FS); |
1187 | } else if (IsCtxProfGen) { |
1188 | MPM.addPass(Pass: PGOInstrumentationGen(false)); |
1189 | addPostPGOLoopRotation(MPM, Level); |
1190 | MPM.addPass(Pass: PGOCtxProfLoweringPass()); |
1191 | } |
1192 | |
1193 | if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen) |
1194 | MPM.addPass(Pass: PGOIndirectCallPromotion(false, false)); |
1195 | |
1196 | if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr) |
1197 | MPM.addPass(Pass: PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile, |
1198 | EnableSampledInstr)); |
1199 | |
1200 | if (IsMemprofUse) |
1201 | MPM.addPass(Pass: MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS)); |
1202 | |
1203 | // Synthesize function entry counts for non-PGO compilation. |
1204 | if (EnableSyntheticCounts && !PGOOpt) |
1205 | MPM.addPass(Pass: SyntheticCountsPropagation()); |
1206 | |
1207 | if (EnablePGOForceFunctionAttrs && PGOOpt) |
1208 | MPM.addPass(Pass: PGOForceFunctionAttrsPass(PGOOpt->ColdOptType)); |
1209 | |
1210 | MPM.addPass(Pass: AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true)); |
1211 | |
1212 | if (EnableModuleInliner) |
1213 | MPM.addPass(Pass: buildModuleInlinerPipeline(Level, Phase)); |
1214 | else |
1215 | MPM.addPass(Pass: buildInlinerPipeline(Level, Phase)); |
1216 | |
1217 | // Remove any dead arguments exposed by cleanups, constant folding globals, |
1218 | // and argument promotion. |
1219 | MPM.addPass(Pass: DeadArgumentEliminationPass()); |
1220 | |
1221 | MPM.addPass(Pass: CoroCleanupPass()); |
1222 | |
1223 | // Optimize globals now that functions are fully simplified. |
1224 | MPM.addPass(Pass: GlobalOptPass()); |
1225 | MPM.addPass(Pass: GlobalDCEPass()); |
1226 | |
1227 | return MPM; |
1228 | } |
1229 | |
1230 | /// TODO: Should LTO cause any differences to this set of passes? |
1231 | void PassBuilder::addVectorPasses(OptimizationLevel Level, |
1232 | FunctionPassManager &FPM, bool IsFullLTO) { |
1233 | FPM.addPass(Pass: LoopVectorizePass( |
1234 | LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); |
1235 | |
1236 | if (EnableInferAlignmentPass) |
1237 | FPM.addPass(Pass: InferAlignmentPass()); |
1238 | if (IsFullLTO) { |
1239 | // The vectorizer may have significantly shortened a loop body; unroll |
1240 | // again. Unroll small loops to hide loop backedge latency and saturate any |
1241 | // parallel execution resources of an out-of-order processor. We also then |
1242 | // need to clean up redundancies and loop invariant code. |
1243 | // FIXME: It would be really good to use a loop-integrated instruction |
1244 | // combiner for cleanup here so that the unrolling and LICM can be pipelined |
1245 | // across the loop nests. |
1246 | // We do UnrollAndJam in a separate LPM to ensure it happens before unroll |
1247 | if (EnableUnrollAndJam && PTO.LoopUnrolling) |
1248 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
1249 | Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel()))); |
1250 | FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions( |
1251 | Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, |
1252 | PTO.ForgetAllSCEVInLoopUnroll))); |
1253 | FPM.addPass(Pass: WarnMissedTransformationsPass()); |
1254 | // Now that we are done with loop unrolling, be it either by LoopVectorizer, |
1255 | // or LoopUnroll passes, some variable-offset GEP's into alloca's could have |
1256 | // become constant-offset, thus enabling SROA and alloca promotion. Do so. |
1257 | // NOTE: we are very late in the pipeline, and we don't have any LICM |
1258 | // or SimplifyCFG passes scheduled after us, that would cleanup |
1259 | // the CFG mess this may created if allowed to modify CFG, so forbid that. |
1260 | FPM.addPass(Pass: SROAPass(SROAOptions::PreserveCFG)); |
1261 | } |
1262 | |
1263 | if (!IsFullLTO) { |
1264 | // Eliminate loads by forwarding stores from the previous iteration to loads |
1265 | // of the current iteration. |
1266 | FPM.addPass(Pass: LoopLoadEliminationPass()); |
1267 | } |
1268 | // Cleanup after the loop optimization passes. |
1269 | FPM.addPass(Pass: InstCombinePass()); |
1270 | |
1271 | if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { |
1272 | ExtraVectorPassManager ; |
1273 | // At higher optimization levels, try to clean up any runtime overlap and |
1274 | // alignment checks inserted by the vectorizer. We want to track correlated |
1275 | // runtime checks for two inner loops in the same outer loop, fold any |
1276 | // common computations, hoist loop-invariant aspects out of any outer loop, |
1277 | // and unswitch the runtime checks if possible. Once hoisted, we may have |
1278 | // dead (or speculatable) control flows or more combining opportunities. |
1279 | ExtraPasses.addPass(Pass: EarlyCSEPass()); |
1280 | ExtraPasses.addPass(Pass: CorrelatedValuePropagationPass()); |
1281 | ExtraPasses.addPass(Pass: InstCombinePass()); |
1282 | LoopPassManager LPM; |
1283 | LPM.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
1284 | /*AllowSpeculation=*/true)); |
1285 | LPM.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level == |
1286 | OptimizationLevel::O3)); |
1287 | ExtraPasses.addPass( |
1288 | Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /*UseMemorySSA=*/true, |
1289 | /*UseBlockFrequencyInfo=*/true)); |
1290 | ExtraPasses.addPass( |
1291 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
1292 | ExtraPasses.addPass(Pass: InstCombinePass()); |
1293 | FPM.addPass(Pass: std::move(ExtraPasses)); |
1294 | } |
1295 | |
1296 | // Now that we've formed fast to execute loop structures, we do further |
1297 | // optimizations. These are run afterward as they might block doing complex |
1298 | // analyses and transforms such as what are needed for loop vectorization. |
1299 | |
1300 | // Cleanup after loop vectorization, etc. Simplification passes like CVP and |
1301 | // GVN, loop transforms, and others have already run, so it's now better to |
1302 | // convert to more optimized IR using more aggressive simplify CFG options. |
1303 | // The extra sinking transform can create larger basic blocks, so do this |
1304 | // before SLP vectorization. |
1305 | FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions() |
1306 | .forwardSwitchCondToPhi(B: true) |
1307 | .convertSwitchRangeToICmp(B: true) |
1308 | .convertSwitchToLookupTable(B: true) |
1309 | .needCanonicalLoops(B: false) |
1310 | .hoistCommonInsts(B: true) |
1311 | .sinkCommonInsts(B: true))); |
1312 | |
1313 | if (IsFullLTO) { |
1314 | FPM.addPass(Pass: SCCPPass()); |
1315 | FPM.addPass(Pass: InstCombinePass()); |
1316 | FPM.addPass(Pass: BDCEPass()); |
1317 | } |
1318 | |
1319 | // Optimize parallel scalar instruction chains into SIMD instructions. |
1320 | if (PTO.SLPVectorization) { |
1321 | FPM.addPass(Pass: SLPVectorizerPass()); |
1322 | if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { |
1323 | FPM.addPass(Pass: EarlyCSEPass()); |
1324 | } |
1325 | } |
1326 | // Enhance/cleanup vector code. |
1327 | FPM.addPass(Pass: VectorCombinePass()); |
1328 | |
1329 | if (!IsFullLTO) { |
1330 | FPM.addPass(Pass: InstCombinePass()); |
1331 | // Unroll small loops to hide loop backedge latency and saturate any |
1332 | // parallel execution resources of an out-of-order processor. We also then |
1333 | // need to clean up redundancies and loop invariant code. |
1334 | // FIXME: It would be really good to use a loop-integrated instruction |
1335 | // combiner for cleanup here so that the unrolling and LICM can be pipelined |
1336 | // across the loop nests. |
1337 | // We do UnrollAndJam in a separate LPM to ensure it happens before unroll |
1338 | if (EnableUnrollAndJam && PTO.LoopUnrolling) { |
1339 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
1340 | Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel()))); |
1341 | } |
1342 | FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions( |
1343 | Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, |
1344 | PTO.ForgetAllSCEVInLoopUnroll))); |
1345 | FPM.addPass(Pass: WarnMissedTransformationsPass()); |
1346 | // Now that we are done with loop unrolling, be it either by LoopVectorizer, |
1347 | // or LoopUnroll passes, some variable-offset GEP's into alloca's could have |
1348 | // become constant-offset, thus enabling SROA and alloca promotion. Do so. |
1349 | // NOTE: we are very late in the pipeline, and we don't have any LICM |
1350 | // or SimplifyCFG passes scheduled after us, that would cleanup |
1351 | // the CFG mess this may created if allowed to modify CFG, so forbid that. |
1352 | FPM.addPass(Pass: SROAPass(SROAOptions::PreserveCFG)); |
1353 | } |
1354 | |
1355 | if (EnableInferAlignmentPass) |
1356 | FPM.addPass(Pass: InferAlignmentPass()); |
1357 | FPM.addPass(Pass: InstCombinePass()); |
1358 | |
1359 | // This is needed for two reasons: |
1360 | // 1. It works around problems that instcombine introduces, such as sinking |
1361 | // expensive FP divides into loops containing multiplications using the |
1362 | // divide result. |
1363 | // 2. It helps to clean up some loop-invariant code created by the loop |
1364 | // unroll pass when IsFullLTO=false. |
1365 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
1366 | Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
1367 | /*AllowSpeculation=*/true), |
1368 | /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); |
1369 | |
1370 | // Now that we've vectorized and unrolled loops, we may have more refined |
1371 | // alignment information, try to re-derive it here. |
1372 | FPM.addPass(Pass: AlignmentFromAssumptionsPass()); |
1373 | } |
1374 | |
1375 | ModulePassManager |
1376 | PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, |
1377 | ThinOrFullLTOPhase LTOPhase) { |
1378 | const bool LTOPreLink = isLTOPreLink(Phase: LTOPhase); |
1379 | ModulePassManager MPM; |
1380 | |
1381 | // Run partial inlining pass to partially inline functions that have |
1382 | // large bodies. |
1383 | if (RunPartialInlining) |
1384 | MPM.addPass(Pass: PartialInlinerPass()); |
1385 | |
1386 | // Remove avail extern fns and globals definitions since we aren't compiling |
1387 | // an object file for later LTO. For LTO we want to preserve these so they |
1388 | // are eligible for inlining at link-time. Note if they are unreferenced they |
1389 | // will be removed by GlobalDCE later, so this only impacts referenced |
1390 | // available externally globals. Eventually they will be suppressed during |
1391 | // codegen, but eliminating here enables more opportunity for GlobalDCE as it |
1392 | // may make globals referenced by available external functions dead and saves |
1393 | // running remaining passes on the eliminated functions. These should be |
1394 | // preserved during prelinking for link-time inlining decisions. |
1395 | if (!LTOPreLink) |
1396 | MPM.addPass(Pass: EliminateAvailableExternallyPass()); |
1397 | |
1398 | if (EnableOrderFileInstrumentation) |
1399 | MPM.addPass(Pass: InstrOrderFilePass()); |
1400 | |
1401 | // Do RPO function attribute inference across the module to forward-propagate |
1402 | // attributes where applicable. |
1403 | // FIXME: Is this really an optimization rather than a canonicalization? |
1404 | MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass()); |
1405 | |
1406 | // Do a post inline PGO instrumentation and use pass. This is a context |
1407 | // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as |
1408 | // cross-module inline has not been done yet. The context sensitive |
1409 | // instrumentation is after all the inlines are done. |
1410 | if (!LTOPreLink && PGOOpt) { |
1411 | if (PGOOpt->CSAction == PGOOptions::CSIRInstr) |
1412 | addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true, |
1413 | /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
1414 | ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile, |
1415 | FS: PGOOpt->FS); |
1416 | else if (PGOOpt->CSAction == PGOOptions::CSIRUse) |
1417 | addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false, |
1418 | /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
1419 | ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile, |
1420 | FS: PGOOpt->FS); |
1421 | } |
1422 | |
1423 | // Re-compute GlobalsAA here prior to function passes. This is particularly |
1424 | // useful as the above will have inlined, DCE'ed, and function-attr |
1425 | // propagated everything. We should at this point have a reasonably minimal |
1426 | // and richly annotated call graph. By computing aliasing and mod/ref |
1427 | // information for all local globals here, the late loop passes and notably |
1428 | // the vectorizer will be able to use them to help recognize vectorizable |
1429 | // memory operations. |
1430 | if (EnableGlobalAnalyses) |
1431 | MPM.addPass(Pass: RecomputeGlobalsAAPass()); |
1432 | |
1433 | invokeOptimizerEarlyEPCallbacks(MPM, Level); |
1434 | |
1435 | FunctionPassManager OptimizePM; |
1436 | // Scheduling LoopVersioningLICM when inlining is over, because after that |
1437 | // we may see more accurate aliasing. Reason to run this late is that too |
1438 | // early versioning may prevent further inlining due to increase of code |
1439 | // size. Other optimizations which runs later might get benefit of no-alias |
1440 | // assumption in clone loop. |
1441 | if (UseLoopVersioningLICM) { |
1442 | OptimizePM.addPass( |
1443 | Pass: createFunctionToLoopPassAdaptor(Pass: LoopVersioningLICMPass())); |
1444 | // LoopVersioningLICM pass might increase new LICM opportunities. |
1445 | OptimizePM.addPass(Pass: createFunctionToLoopPassAdaptor( |
1446 | Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
1447 | /*AllowSpeculation=*/true), |
1448 | /*USeMemorySSA=*/UseMemorySSA: true, /*UseBlockFrequencyInfo=*/false)); |
1449 | } |
1450 | |
1451 | OptimizePM.addPass(Pass: Float2IntPass()); |
1452 | OptimizePM.addPass(Pass: LowerConstantIntrinsicsPass()); |
1453 | |
1454 | if (EnableMatrix) { |
1455 | OptimizePM.addPass(Pass: LowerMatrixIntrinsicsPass()); |
1456 | OptimizePM.addPass(Pass: EarlyCSEPass()); |
1457 | } |
1458 | |
1459 | // CHR pass should only be applied with the profile information. |
1460 | // The check is to check the profile summary information in CHR. |
1461 | if (EnableCHR && Level == OptimizationLevel::O3) |
1462 | OptimizePM.addPass(Pass: ControlHeightReductionPass()); |
1463 | |
1464 | // FIXME: We need to run some loop optimizations to re-rotate loops after |
1465 | // simplifycfg and others undo their rotation. |
1466 | |
1467 | // Optimize the loop execution. These passes operate on entire loop nests |
1468 | // rather than on each loop in an inside-out manner, and so they are actually |
1469 | // function passes. |
1470 | |
1471 | invokeVectorizerStartEPCallbacks(FPM&: OptimizePM, Level); |
1472 | |
1473 | LoopPassManager LPM; |
1474 | // First rotate loops that may have been un-rotated by prior passes. |
1475 | // Disable header duplication at -Oz. |
1476 | LPM.addPass(Pass: LoopRotatePass(EnableLoopHeaderDuplication || |
1477 | Level != OptimizationLevel::Oz, |
1478 | LTOPreLink)); |
1479 | // Some loops may have become dead by now. Try to delete them. |
1480 | // FIXME: see discussion in https://reviews.llvm.org/D112851, |
1481 | // this may need to be revisited once we run GVN before loop deletion |
1482 | // in the simplification pipeline. |
1483 | LPM.addPass(Pass: LoopDeletionPass()); |
1484 | OptimizePM.addPass(Pass: createFunctionToLoopPassAdaptor( |
1485 | Pass: std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); |
1486 | |
1487 | // Distribute loops to allow partial vectorization. I.e. isolate dependences |
1488 | // into separate loop that would otherwise inhibit vectorization. This is |
1489 | // currently only performed for loops marked with the metadata |
1490 | // llvm.loop.distribute=true or when -enable-loop-distribute is specified. |
1491 | OptimizePM.addPass(Pass: LoopDistributePass()); |
1492 | |
1493 | // Populates the VFABI attribute with the scalar-to-vector mappings |
1494 | // from the TargetLibraryInfo. |
1495 | OptimizePM.addPass(Pass: InjectTLIMappings()); |
1496 | |
1497 | addVectorPasses(Level, FPM&: OptimizePM, /* IsFullLTO */ false); |
1498 | |
1499 | // LoopSink pass sinks instructions hoisted by LICM, which serves as a |
1500 | // canonicalization pass that enables other optimizations. As a result, |
1501 | // LoopSink pass needs to be a very late IR pass to avoid undoing LICM |
1502 | // result too early. |
1503 | OptimizePM.addPass(Pass: LoopSinkPass()); |
1504 | |
1505 | // And finally clean up LCSSA form before generating code. |
1506 | OptimizePM.addPass(Pass: InstSimplifyPass()); |
1507 | |
1508 | // This hoists/decomposes div/rem ops. It should run after other sink/hoist |
1509 | // passes to avoid re-sinking, but before SimplifyCFG because it can allow |
1510 | // flattening of blocks. |
1511 | OptimizePM.addPass(Pass: DivRemPairsPass()); |
1512 | |
1513 | // Try to annotate calls that were created during optimization. |
1514 | OptimizePM.addPass(Pass: TailCallElimPass()); |
1515 | |
1516 | // LoopSink (and other loop passes since the last simplifyCFG) might have |
1517 | // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. |
1518 | OptimizePM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions() |
1519 | .convertSwitchRangeToICmp(B: true) |
1520 | .speculateUnpredictables(B: true))); |
1521 | |
1522 | // Add the core optimizing pipeline. |
1523 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(OptimizePM), |
1524 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
1525 | |
1526 | invokeOptimizerLastEPCallbacks(MPM, Level); |
1527 | |
1528 | // Split out cold code. Splitting is done late to avoid hiding context from |
1529 | // other optimizations and inadvertently regressing performance. The tradeoff |
1530 | // is that this has a higher code size cost than splitting early. |
1531 | if (EnableHotColdSplit && !LTOPreLink) |
1532 | MPM.addPass(Pass: HotColdSplittingPass()); |
1533 | |
1534 | // Search the code for similar regions of code. If enough similar regions can |
1535 | // be found where extracting the regions into their own function will decrease |
1536 | // the size of the program, we extract the regions, a deduplicate the |
1537 | // structurally similar regions. |
1538 | if (EnableIROutliner) |
1539 | MPM.addPass(Pass: IROutlinerPass()); |
1540 | |
1541 | // Now we need to do some global optimization transforms. |
1542 | // FIXME: It would seem like these should come first in the optimization |
1543 | // pipeline and maybe be the bottom of the canonicalization pipeline? Weird |
1544 | // ordering here. |
1545 | MPM.addPass(Pass: GlobalDCEPass()); |
1546 | MPM.addPass(Pass: ConstantMergePass()); |
1547 | |
1548 | // Merge functions if requested. It has a better chance to merge functions |
1549 | // after ConstantMerge folded jump tables. |
1550 | if (PTO.MergeFunctions) |
1551 | MPM.addPass(Pass: MergeFunctionsPass()); |
1552 | |
1553 | if (PTO.CallGraphProfile && !LTOPreLink) |
1554 | MPM.addPass(Pass: CGProfilePass(LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink || |
1555 | LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink)); |
1556 | |
1557 | // TODO: Relative look table converter pass caused an issue when full lto is |
1558 | // enabled. See https://reviews.llvm.org/D94355 for more details. |
1559 | // Until the issue fixed, disable this pass during pre-linking phase. |
1560 | if (!LTOPreLink) |
1561 | MPM.addPass(Pass: RelLookupTableConverterPass()); |
1562 | |
1563 | return MPM; |
1564 | } |
1565 | |
1566 | ModulePassManager |
1567 | PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, |
1568 | bool LTOPreLink) { |
1569 | if (Level == OptimizationLevel::O0) |
1570 | return buildO0DefaultPipeline(Level, LTOPreLink); |
1571 | |
1572 | ModulePassManager MPM; |
1573 | |
1574 | // Convert @llvm.global.annotations to !annotation metadata. |
1575 | MPM.addPass(Pass: Annotation2MetadataPass()); |
1576 | |
1577 | // Force any function attributes we want the rest of the pipeline to observe. |
1578 | MPM.addPass(Pass: ForceFunctionAttrsPass()); |
1579 | |
1580 | if (PGOOpt && PGOOpt->DebugInfoForProfiling) |
1581 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass())); |
1582 | |
1583 | // Apply module pipeline start EP callback. |
1584 | invokePipelineStartEPCallbacks(MPM, Level); |
1585 | |
1586 | const ThinOrFullLTOPhase LTOPhase = LTOPreLink |
1587 | ? ThinOrFullLTOPhase::FullLTOPreLink |
1588 | : ThinOrFullLTOPhase::None; |
1589 | // Add the core simplification pipeline. |
1590 | MPM.addPass(Pass: buildModuleSimplificationPipeline(Level, Phase: LTOPhase)); |
1591 | |
1592 | // Now add the optimization pipeline. |
1593 | MPM.addPass(Pass: buildModuleOptimizationPipeline(Level, LTOPhase)); |
1594 | |
1595 | if (PGOOpt && PGOOpt->PseudoProbeForProfiling && |
1596 | PGOOpt->Action == PGOOptions::SampleUse) |
1597 | MPM.addPass(Pass: PseudoProbeUpdatePass()); |
1598 | |
1599 | // Emit annotation remarks. |
1600 | addAnnotationRemarksPass(MPM); |
1601 | |
1602 | if (LTOPreLink) |
1603 | addRequiredLTOPreLinkPasses(MPM); |
1604 | return MPM; |
1605 | } |
1606 | |
1607 | ModulePassManager |
1608 | PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, |
1609 | bool EmitSummary) { |
1610 | ModulePassManager MPM; |
1611 | if (ThinLTO) |
1612 | MPM.addPass(Pass: buildThinLTOPreLinkDefaultPipeline(Level)); |
1613 | else |
1614 | MPM.addPass(Pass: buildLTOPreLinkDefaultPipeline(Level)); |
1615 | MPM.addPass(Pass: EmbedBitcodePass(ThinLTO, EmitSummary)); |
1616 | |
1617 | // Use the ThinLTO post-link pipeline with sample profiling |
1618 | if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) |
1619 | MPM.addPass(Pass: buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr)); |
1620 | else { |
1621 | // otherwise, just use module optimization |
1622 | MPM.addPass( |
1623 | Pass: buildModuleOptimizationPipeline(Level, LTOPhase: ThinOrFullLTOPhase::None)); |
1624 | // Emit annotation remarks. |
1625 | addAnnotationRemarksPass(MPM); |
1626 | } |
1627 | return MPM; |
1628 | } |
1629 | |
1630 | ModulePassManager |
1631 | PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { |
1632 | if (Level == OptimizationLevel::O0) |
1633 | return buildO0DefaultPipeline(Level, /*LTOPreLink*/true); |
1634 | |
1635 | ModulePassManager MPM; |
1636 | |
1637 | // Convert @llvm.global.annotations to !annotation metadata. |
1638 | MPM.addPass(Pass: Annotation2MetadataPass()); |
1639 | |
1640 | // Force any function attributes we want the rest of the pipeline to observe. |
1641 | MPM.addPass(Pass: ForceFunctionAttrsPass()); |
1642 | |
1643 | if (PGOOpt && PGOOpt->DebugInfoForProfiling) |
1644 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass())); |
1645 | |
1646 | // Apply module pipeline start EP callback. |
1647 | invokePipelineStartEPCallbacks(MPM, Level); |
1648 | |
1649 | // If we are planning to perform ThinLTO later, we don't bloat the code with |
1650 | // unrolling/vectorization/... now. Just simplify the module as much as we |
1651 | // can. |
1652 | MPM.addPass(Pass: buildModuleSimplificationPipeline( |
1653 | Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink)); |
1654 | |
1655 | // Run partial inlining pass to partially inline functions that have |
1656 | // large bodies. |
1657 | // FIXME: It isn't clear whether this is really the right place to run this |
1658 | // in ThinLTO. Because there is another canonicalization and simplification |
1659 | // phase that will run after the thin link, running this here ends up with |
1660 | // less information than will be available later and it may grow functions in |
1661 | // ways that aren't beneficial. |
1662 | if (RunPartialInlining) |
1663 | MPM.addPass(Pass: PartialInlinerPass()); |
1664 | |
1665 | if (PGOOpt && PGOOpt->PseudoProbeForProfiling && |
1666 | PGOOpt->Action == PGOOptions::SampleUse) |
1667 | MPM.addPass(Pass: PseudoProbeUpdatePass()); |
1668 | |
1669 | // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual |
1670 | // optimization is going to be done in PostLink stage, but clang can't add |
1671 | // callbacks there in case of in-process ThinLTO called by linker. |
1672 | invokeOptimizerEarlyEPCallbacks(MPM, Level); |
1673 | invokeOptimizerLastEPCallbacks(MPM, Level); |
1674 | |
1675 | // Emit annotation remarks. |
1676 | addAnnotationRemarksPass(MPM); |
1677 | |
1678 | addRequiredLTOPreLinkPasses(MPM); |
1679 | |
1680 | return MPM; |
1681 | } |
1682 | |
1683 | ModulePassManager PassBuilder::buildThinLTODefaultPipeline( |
1684 | OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) { |
1685 | ModulePassManager MPM; |
1686 | |
1687 | if (ImportSummary) { |
1688 | // For ThinLTO we must apply the context disambiguation decisions early, to |
1689 | // ensure we can correctly match the callsites to summary data. |
1690 | if (EnableMemProfContextDisambiguation) |
1691 | MPM.addPass(Pass: MemProfContextDisambiguation(ImportSummary)); |
1692 | |
1693 | // These passes import type identifier resolutions for whole-program |
1694 | // devirtualization and CFI. They must run early because other passes may |
1695 | // disturb the specific instruction patterns that these passes look for, |
1696 | // creating dependencies on resolutions that may not appear in the summary. |
1697 | // |
1698 | // For example, GVN may transform the pattern assume(type.test) appearing in |
1699 | // two basic blocks into assume(phi(type.test, type.test)), which would |
1700 | // transform a dependency on a WPD resolution into a dependency on a type |
1701 | // identifier resolution for CFI. |
1702 | // |
1703 | // Also, WPD has access to more precise information than ICP and can |
1704 | // devirtualize more effectively, so it should operate on the IR first. |
1705 | // |
1706 | // The WPD and LowerTypeTest passes need to run at -O0 to lower type |
1707 | // metadata and intrinsics. |
1708 | MPM.addPass(Pass: WholeProgramDevirtPass(nullptr, ImportSummary)); |
1709 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, ImportSummary)); |
1710 | } |
1711 | |
1712 | if (Level == OptimizationLevel::O0) { |
1713 | // Run a second time to clean up any type tests left behind by WPD for use |
1714 | // in ICP. |
1715 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, true)); |
1716 | // Drop available_externally and unreferenced globals. This is necessary |
1717 | // with ThinLTO in order to avoid leaving undefined references to dead |
1718 | // globals in the object file. |
1719 | MPM.addPass(Pass: EliminateAvailableExternallyPass()); |
1720 | MPM.addPass(Pass: GlobalDCEPass()); |
1721 | return MPM; |
1722 | } |
1723 | |
1724 | // Add the core simplification pipeline. |
1725 | MPM.addPass(Pass: buildModuleSimplificationPipeline( |
1726 | Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink)); |
1727 | |
1728 | // Now add the optimization pipeline. |
1729 | MPM.addPass(Pass: buildModuleOptimizationPipeline( |
1730 | Level, LTOPhase: ThinOrFullLTOPhase::ThinLTOPostLink)); |
1731 | |
1732 | // Emit annotation remarks. |
1733 | addAnnotationRemarksPass(MPM); |
1734 | |
1735 | return MPM; |
1736 | } |
1737 | |
1738 | ModulePassManager |
1739 | PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) { |
1740 | // FIXME: We should use a customized pre-link pipeline! |
1741 | return buildPerModuleDefaultPipeline(Level, |
1742 | /* LTOPreLink */ true); |
1743 | } |
1744 | |
1745 | ModulePassManager |
1746 | PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, |
1747 | ModuleSummaryIndex *ExportSummary) { |
1748 | ModulePassManager MPM; |
1749 | |
1750 | invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level); |
1751 | |
1752 | // Create a function that performs CFI checks for cross-DSO calls with targets |
1753 | // in the current module. |
1754 | MPM.addPass(Pass: CrossDSOCFIPass()); |
1755 | |
1756 | if (Level == OptimizationLevel::O0) { |
1757 | // The WPD and LowerTypeTest passes need to run at -O0 to lower type |
1758 | // metadata and intrinsics. |
1759 | MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr)); |
1760 | MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr)); |
1761 | // Run a second time to clean up any type tests left behind by WPD for use |
1762 | // in ICP. |
1763 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, true)); |
1764 | |
1765 | invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); |
1766 | |
1767 | // Emit annotation remarks. |
1768 | addAnnotationRemarksPass(MPM); |
1769 | |
1770 | return MPM; |
1771 | } |
1772 | |
1773 | if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { |
1774 | // Load sample profile before running the LTO optimization pipeline. |
1775 | MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile, |
1776 | PGOOpt->ProfileRemappingFile, |
1777 | ThinOrFullLTOPhase::FullLTOPostLink)); |
1778 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
1779 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
1780 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
1781 | } |
1782 | |
1783 | // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present. |
1784 | MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)); |
1785 | |
1786 | // Remove unused virtual tables to improve the quality of code generated by |
1787 | // whole-program devirtualization and bitset lowering. |
1788 | MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true)); |
1789 | |
1790 | // Do basic inference of function attributes from known properties of system |
1791 | // libraries and other oracles. |
1792 | MPM.addPass(Pass: InferFunctionAttrsPass()); |
1793 | |
1794 | if (Level.getSpeedupLevel() > 1) { |
1795 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
1796 | Pass: CallSiteSplittingPass(), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
1797 | |
1798 | // Indirect call promotion. This should promote all the targets that are |
1799 | // left by the earlier promotion pass that promotes intra-module targets. |
1800 | // This two-step promotion is to save the compile time. For LTO, it should |
1801 | // produce the same result as if we only do promotion here. |
1802 | MPM.addPass(Pass: PGOIndirectCallPromotion( |
1803 | true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); |
1804 | |
1805 | // Propagate constants at call sites into the functions they call. This |
1806 | // opens opportunities for globalopt (and inlining) by substituting function |
1807 | // pointers passed as arguments to direct uses of functions. |
1808 | MPM.addPass(Pass: IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/ |
1809 | Level != OptimizationLevel::Os && |
1810 | Level != OptimizationLevel::Oz))); |
1811 | |
1812 | // Attach metadata to indirect call sites indicating the set of functions |
1813 | // they may target at run-time. This should follow IPSCCP. |
1814 | MPM.addPass(Pass: CalledValuePropagationPass()); |
1815 | } |
1816 | |
1817 | // Now deduce any function attributes based in the current code. |
1818 | MPM.addPass( |
1819 | Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: PostOrderFunctionAttrsPass())); |
1820 | |
1821 | // Do RPO function attribute inference across the module to forward-propagate |
1822 | // attributes where applicable. |
1823 | // FIXME: Is this really an optimization rather than a canonicalization? |
1824 | MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass()); |
1825 | |
1826 | // Use in-range annotations on GEP indices to split globals where beneficial. |
1827 | MPM.addPass(Pass: GlobalSplitPass()); |
1828 | |
1829 | // Run whole program optimization of virtual call when the list of callees |
1830 | // is fixed. |
1831 | MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr)); |
1832 | |
1833 | // Stop here at -O1. |
1834 | if (Level == OptimizationLevel::O1) { |
1835 | // The LowerTypeTestsPass needs to run to lower type metadata and the |
1836 | // type.test intrinsics. The pass does nothing if CFI is disabled. |
1837 | MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr)); |
1838 | // Run a second time to clean up any type tests left behind by WPD for use |
1839 | // in ICP (which is performed earlier than this in the regular LTO |
1840 | // pipeline). |
1841 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, true)); |
1842 | |
1843 | invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); |
1844 | |
1845 | // Emit annotation remarks. |
1846 | addAnnotationRemarksPass(MPM); |
1847 | |
1848 | return MPM; |
1849 | } |
1850 | |
1851 | // Optimize globals to try and fold them into constants. |
1852 | MPM.addPass(Pass: GlobalOptPass()); |
1853 | |
1854 | // Promote any localized globals to SSA registers. |
1855 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: PromotePass())); |
1856 | |
1857 | // Linking modules together can lead to duplicate global constant, only |
1858 | // keep one copy of each constant. |
1859 | MPM.addPass(Pass: ConstantMergePass()); |
1860 | |
1861 | // Remove unused arguments from functions. |
1862 | MPM.addPass(Pass: DeadArgumentEliminationPass()); |
1863 | |
1864 | // Reduce the code after globalopt and ipsccp. Both can open up significant |
1865 | // simplification opportunities, and both can propagate functions through |
1866 | // function pointers. When this happens, we often have to resolve varargs |
1867 | // calls, etc, so let instcombine do this. |
1868 | FunctionPassManager PeepholeFPM; |
1869 | PeepholeFPM.addPass(Pass: InstCombinePass()); |
1870 | if (Level.getSpeedupLevel() > 1) |
1871 | PeepholeFPM.addPass(Pass: AggressiveInstCombinePass()); |
1872 | invokePeepholeEPCallbacks(FPM&: PeepholeFPM, Level); |
1873 | |
1874 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(PeepholeFPM), |
1875 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
1876 | |
1877 | // Note: historically, the PruneEH pass was run first to deduce nounwind and |
1878 | // generally clean up exception handling overhead. It isn't clear this is |
1879 | // valuable as the inliner doesn't currently care whether it is inlining an |
1880 | // invoke or a call. |
1881 | // Run the inliner now. |
1882 | if (EnableModuleInliner) { |
1883 | MPM.addPass(Pass: ModuleInlinerPass(getInlineParamsFromOptLevel(Level), |
1884 | UseInlineAdvisor, |
1885 | ThinOrFullLTOPhase::FullLTOPostLink)); |
1886 | } else { |
1887 | MPM.addPass(Pass: ModuleInlinerWrapperPass( |
1888 | getInlineParamsFromOptLevel(Level), |
1889 | /* MandatoryFirst */ true, |
1890 | InlineContext{.LTOPhase: ThinOrFullLTOPhase::FullLTOPostLink, |
1891 | .Pass: InlinePass::CGSCCInliner})); |
1892 | } |
1893 | |
1894 | // Perform context disambiguation after inlining, since that would reduce the |
1895 | // amount of additional cloning required to distinguish the allocation |
1896 | // contexts. |
1897 | if (EnableMemProfContextDisambiguation) |
1898 | MPM.addPass(Pass: MemProfContextDisambiguation()); |
1899 | |
1900 | // Optimize globals again after we ran the inliner. |
1901 | MPM.addPass(Pass: GlobalOptPass()); |
1902 | |
1903 | // Run the OpenMPOpt pass again after global optimizations. |
1904 | MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)); |
1905 | |
1906 | // Garbage collect dead functions. |
1907 | MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true)); |
1908 | |
1909 | // If we didn't decide to inline a function, check to see if we can |
1910 | // transform it to pass arguments by value instead of by reference. |
1911 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: ArgumentPromotionPass())); |
1912 | |
1913 | FunctionPassManager FPM; |
1914 | // The IPO Passes may leave cruft around. Clean up after them. |
1915 | FPM.addPass(Pass: InstCombinePass()); |
1916 | invokePeepholeEPCallbacks(FPM, Level); |
1917 | |
1918 | if (EnableConstraintElimination) |
1919 | FPM.addPass(Pass: ConstraintEliminationPass()); |
1920 | |
1921 | FPM.addPass(Pass: JumpThreadingPass()); |
1922 | |
1923 | // Do a post inline PGO instrumentation and use pass. This is a context |
1924 | // sensitive PGO pass. |
1925 | if (PGOOpt) { |
1926 | if (PGOOpt->CSAction == PGOOptions::CSIRInstr) |
1927 | addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true, |
1928 | /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
1929 | ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile, |
1930 | FS: PGOOpt->FS); |
1931 | else if (PGOOpt->CSAction == PGOOptions::CSIRUse) |
1932 | addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false, |
1933 | /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
1934 | ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile, |
1935 | FS: PGOOpt->FS); |
1936 | } |
1937 | |
1938 | // Break up allocas |
1939 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
1940 | |
1941 | // LTO provides additional opportunities for tailcall elimination due to |
1942 | // link-time inlining, and visibility of nocapture attribute. |
1943 | FPM.addPass(Pass: TailCallElimPass()); |
1944 | |
1945 | // Run a few AA driver optimizations here and now to cleanup the code. |
1946 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM), |
1947 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
1948 | |
1949 | MPM.addPass( |
1950 | Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: PostOrderFunctionAttrsPass())); |
1951 | |
1952 | // Require the GlobalsAA analysis for the module so we can query it within |
1953 | // MainFPM. |
1954 | if (EnableGlobalAnalyses) { |
1955 | MPM.addPass(Pass: RequireAnalysisPass<GlobalsAA, Module>()); |
1956 | // Invalidate AAManager so it can be recreated and pick up the newly |
1957 | // available GlobalsAA. |
1958 | MPM.addPass( |
1959 | Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>())); |
1960 | } |
1961 | |
1962 | FunctionPassManager MainFPM; |
1963 | MainFPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
1964 | Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
1965 | /*AllowSpeculation=*/true), |
1966 | /*USeMemorySSA=*/UseMemorySSA: true, /*UseBlockFrequencyInfo=*/false)); |
1967 | |
1968 | if (RunNewGVN) |
1969 | MainFPM.addPass(Pass: NewGVNPass()); |
1970 | else |
1971 | MainFPM.addPass(Pass: GVNPass()); |
1972 | |
1973 | // Remove dead memcpy()'s. |
1974 | MainFPM.addPass(Pass: MemCpyOptPass()); |
1975 | |
1976 | // Nuke dead stores. |
1977 | MainFPM.addPass(Pass: DSEPass()); |
1978 | MainFPM.addPass(Pass: MoveAutoInitPass()); |
1979 | MainFPM.addPass(Pass: MergedLoadStoreMotionPass()); |
1980 | |
1981 | LoopPassManager LPM; |
1982 | if (EnableLoopFlatten && Level.getSpeedupLevel() > 1) |
1983 | LPM.addPass(Pass: LoopFlattenPass()); |
1984 | LPM.addPass(Pass: IndVarSimplifyPass()); |
1985 | LPM.addPass(Pass: LoopDeletionPass()); |
1986 | // FIXME: Add loop interchange. |
1987 | |
1988 | // Unroll small loops and perform peeling. |
1989 | LPM.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(), |
1990 | /* OnlyWhenForced= */ !PTO.LoopUnrolling, |
1991 | PTO.ForgetAllSCEVInLoopUnroll)); |
1992 | // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA. |
1993 | // *All* loop passes must preserve it, in order to be able to use it. |
1994 | MainFPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
1995 | Pass: std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true)); |
1996 | |
1997 | MainFPM.addPass(Pass: LoopDistributePass()); |
1998 | |
1999 | addVectorPasses(Level, FPM&: MainFPM, /* IsFullLTO */ true); |
2000 | |
2001 | // Run the OpenMPOpt CGSCC pass again late. |
2002 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor( |
2003 | Pass: OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink))); |
2004 | |
2005 | invokePeepholeEPCallbacks(FPM&: MainFPM, Level); |
2006 | MainFPM.addPass(Pass: JumpThreadingPass()); |
2007 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(MainFPM), |
2008 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
2009 | |
2010 | // Lower type metadata and the type.test intrinsic. This pass supports |
2011 | // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs |
2012 | // to be run at link time if CFI is enabled. This pass does nothing if |
2013 | // CFI is disabled. |
2014 | MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr)); |
2015 | // Run a second time to clean up any type tests left behind by WPD for use |
2016 | // in ICP (which is performed earlier than this in the regular LTO pipeline). |
2017 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, true)); |
2018 | |
2019 | // Enable splitting late in the FullLTO post-link pipeline. |
2020 | if (EnableHotColdSplit) |
2021 | MPM.addPass(Pass: HotColdSplittingPass()); |
2022 | |
2023 | // Add late LTO optimization passes. |
2024 | FunctionPassManager LateFPM; |
2025 | |
2026 | // LoopSink pass sinks instructions hoisted by LICM, which serves as a |
2027 | // canonicalization pass that enables other optimizations. As a result, |
2028 | // LoopSink pass needs to be a very late IR pass to avoid undoing LICM |
2029 | // result too early. |
2030 | LateFPM.addPass(Pass: LoopSinkPass()); |
2031 | |
2032 | // This hoists/decomposes div/rem ops. It should run after other sink/hoist |
2033 | // passes to avoid re-sinking, but before SimplifyCFG because it can allow |
2034 | // flattening of blocks. |
2035 | LateFPM.addPass(Pass: DivRemPairsPass()); |
2036 | |
2037 | // Delete basic blocks, which optimization passes may have killed. |
2038 | LateFPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions() |
2039 | .convertSwitchRangeToICmp(B: true) |
2040 | .hoistCommonInsts(B: true) |
2041 | .speculateUnpredictables(B: true))); |
2042 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(LateFPM))); |
2043 | |
2044 | // Drop bodies of available eternally objects to improve GlobalDCE. |
2045 | MPM.addPass(Pass: EliminateAvailableExternallyPass()); |
2046 | |
2047 | // Now that we have optimized the program, discard unreachable functions. |
2048 | MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true)); |
2049 | |
2050 | if (PTO.MergeFunctions) |
2051 | MPM.addPass(Pass: MergeFunctionsPass()); |
2052 | |
2053 | if (PTO.CallGraphProfile) |
2054 | MPM.addPass(Pass: CGProfilePass(/*InLTOPostLink=*/true)); |
2055 | |
2056 | invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); |
2057 | |
2058 | // Emit annotation remarks. |
2059 | addAnnotationRemarksPass(MPM); |
2060 | |
2061 | return MPM; |
2062 | } |
2063 | |
2064 | ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, |
2065 | bool LTOPreLink) { |
2066 | assert(Level == OptimizationLevel::O0 && |
2067 | "buildO0DefaultPipeline should only be used with O0" ); |
2068 | |
2069 | ModulePassManager MPM; |
2070 | |
2071 | // Perform pseudo probe instrumentation in O0 mode. This is for the |
2072 | // consistency between different build modes. For example, a LTO build can be |
2073 | // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in |
2074 | // the postlink will require pseudo probe instrumentation in the prelink. |
2075 | if (PGOOpt && PGOOpt->PseudoProbeForProfiling) |
2076 | MPM.addPass(Pass: SampleProfileProbePass(TM)); |
2077 | |
2078 | if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr || |
2079 | PGOOpt->Action == PGOOptions::IRUse)) |
2080 | addPGOInstrPassesForO0( |
2081 | MPM, |
2082 | /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr), |
2083 | /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, ProfileFile: PGOOpt->ProfileFile, |
2084 | ProfileRemappingFile: PGOOpt->ProfileRemappingFile, FS: PGOOpt->FS); |
2085 | |
2086 | // Instrument function entry and exit before all inlining. |
2087 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
2088 | Pass: EntryExitInstrumenterPass(/*PostInlining=*/false))); |
2089 | |
2090 | invokePipelineStartEPCallbacks(MPM, Level); |
2091 | |
2092 | if (PGOOpt && PGOOpt->DebugInfoForProfiling) |
2093 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass())); |
2094 | |
2095 | invokePipelineEarlySimplificationEPCallbacks(MPM, Level); |
2096 | |
2097 | // Build a minimal pipeline based on the semantics required by LLVM, |
2098 | // which is just that always inlining occurs. Further, disable generating |
2099 | // lifetime intrinsics to avoid enabling further optimizations during |
2100 | // code generation. |
2101 | MPM.addPass(Pass: AlwaysInlinerPass( |
2102 | /*InsertLifetimeIntrinsics=*/false)); |
2103 | |
2104 | if (PTO.MergeFunctions) |
2105 | MPM.addPass(Pass: MergeFunctionsPass()); |
2106 | |
2107 | if (EnableMatrix) |
2108 | MPM.addPass( |
2109 | Pass: createModuleToFunctionPassAdaptor(Pass: LowerMatrixIntrinsicsPass(true))); |
2110 | |
2111 | if (!CGSCCOptimizerLateEPCallbacks.empty()) { |
2112 | CGSCCPassManager CGPM; |
2113 | invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level); |
2114 | if (!CGPM.isEmpty()) |
2115 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM))); |
2116 | } |
2117 | if (!LateLoopOptimizationsEPCallbacks.empty()) { |
2118 | LoopPassManager LPM; |
2119 | invokeLateLoopOptimizationsEPCallbacks(LPM, Level); |
2120 | if (!LPM.isEmpty()) { |
2121 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
2122 | Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM)))); |
2123 | } |
2124 | } |
2125 | if (!LoopOptimizerEndEPCallbacks.empty()) { |
2126 | LoopPassManager LPM; |
2127 | invokeLoopOptimizerEndEPCallbacks(LPM, Level); |
2128 | if (!LPM.isEmpty()) { |
2129 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
2130 | Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM)))); |
2131 | } |
2132 | } |
2133 | if (!ScalarOptimizerLateEPCallbacks.empty()) { |
2134 | FunctionPassManager FPM; |
2135 | invokeScalarOptimizerLateEPCallbacks(FPM, Level); |
2136 | if (!FPM.isEmpty()) |
2137 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM))); |
2138 | } |
2139 | |
2140 | invokeOptimizerEarlyEPCallbacks(MPM, Level); |
2141 | |
2142 | if (!VectorizerStartEPCallbacks.empty()) { |
2143 | FunctionPassManager FPM; |
2144 | invokeVectorizerStartEPCallbacks(FPM, Level); |
2145 | if (!FPM.isEmpty()) |
2146 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM))); |
2147 | } |
2148 | |
2149 | ModulePassManager CoroPM; |
2150 | CoroPM.addPass(Pass: CoroEarlyPass()); |
2151 | CGSCCPassManager CGPM; |
2152 | CGPM.addPass(Pass: CoroSplitPass()); |
2153 | CoroPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM))); |
2154 | CoroPM.addPass(Pass: CoroCleanupPass()); |
2155 | CoroPM.addPass(Pass: GlobalDCEPass()); |
2156 | MPM.addPass(Pass: CoroConditionalWrapper(std::move(CoroPM))); |
2157 | |
2158 | invokeOptimizerLastEPCallbacks(MPM, Level); |
2159 | |
2160 | if (LTOPreLink) |
2161 | addRequiredLTOPreLinkPasses(MPM); |
2162 | |
2163 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AnnotationRemarksPass())); |
2164 | |
2165 | return MPM; |
2166 | } |
2167 | |
2168 | AAManager PassBuilder::buildDefaultAAPipeline() { |
2169 | AAManager AA; |
2170 | |
2171 | // The order in which these are registered determines their priority when |
2172 | // being queried. |
2173 | |
2174 | // First we register the basic alias analysis that provides the majority of |
2175 | // per-function local AA logic. This is a stateless, on-demand local set of |
2176 | // AA techniques. |
2177 | AA.registerFunctionAnalysis<BasicAA>(); |
2178 | |
2179 | // Next we query fast, specialized alias analyses that wrap IR-embedded |
2180 | // information about aliasing. |
2181 | AA.registerFunctionAnalysis<ScopedNoAliasAA>(); |
2182 | AA.registerFunctionAnalysis<TypeBasedAA>(); |
2183 | |
2184 | // Add support for querying global aliasing information when available. |
2185 | // Because the `AAManager` is a function analysis and `GlobalsAA` is a module |
2186 | // analysis, all that the `AAManager` can do is query for any *cached* |
2187 | // results from `GlobalsAA` through a readonly proxy. |
2188 | if (EnableGlobalAnalyses) |
2189 | AA.registerModuleAnalysis<GlobalsAA>(); |
2190 | |
2191 | // Add target-specific alias analyses. |
2192 | if (TM) |
2193 | TM->registerDefaultAliasAnalyses(AA); |
2194 | |
2195 | return AA; |
2196 | } |
2197 | |