1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
18#include "llvm/Analysis/AliasAnalysis.h"
19#include "llvm/Analysis/BasicAliasAnalysis.h"
20#include "llvm/Analysis/CGSCCPassManager.h"
21#include "llvm/Analysis/CtxProfAnalysis.h"
22#include "llvm/Analysis/GlobalsModRef.h"
23#include "llvm/Analysis/InlineAdvisor.h"
24#include "llvm/Analysis/ProfileSummaryInfo.h"
25#include "llvm/Analysis/ScopedNoAliasAA.h"
26#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/Pass.h"
29#include "llvm/Passes/OptimizationLevel.h"
30#include "llvm/Passes/PassBuilder.h"
31#include "llvm/Support/CommandLine.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/Support/PGOOptions.h"
34#include "llvm/Support/VirtualFileSystem.h"
35#include "llvm/Target/TargetMachine.h"
36#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
37#include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"
38#include "llvm/Transforms/Coroutines/CoroCleanup.h"
39#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
40#include "llvm/Transforms/Coroutines/CoroEarly.h"
41#include "llvm/Transforms/Coroutines/CoroElide.h"
42#include "llvm/Transforms/Coroutines/CoroSplit.h"
43#include "llvm/Transforms/HipStdPar/HipStdPar.h"
44#include "llvm/Transforms/IPO/AlwaysInliner.h"
45#include "llvm/Transforms/IPO/Annotation2Metadata.h"
46#include "llvm/Transforms/IPO/ArgumentPromotion.h"
47#include "llvm/Transforms/IPO/Attributor.h"
48#include "llvm/Transforms/IPO/CalledValuePropagation.h"
49#include "llvm/Transforms/IPO/ConstantMerge.h"
50#include "llvm/Transforms/IPO/CrossDSOCFI.h"
51#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
52#include "llvm/Transforms/IPO/ElimAvailExtern.h"
53#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
54#include "llvm/Transforms/IPO/ExpandVariadics.h"
55#include "llvm/Transforms/IPO/FatLTOCleanup.h"
56#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
57#include "llvm/Transforms/IPO/FunctionAttrs.h"
58#include "llvm/Transforms/IPO/GlobalDCE.h"
59#include "llvm/Transforms/IPO/GlobalOpt.h"
60#include "llvm/Transforms/IPO/GlobalSplit.h"
61#include "llvm/Transforms/IPO/HotColdSplitting.h"
62#include "llvm/Transforms/IPO/IROutliner.h"
63#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
64#include "llvm/Transforms/IPO/Inliner.h"
65#include "llvm/Transforms/IPO/LowerTypeTests.h"
66#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
67#include "llvm/Transforms/IPO/MergeFunctions.h"
68#include "llvm/Transforms/IPO/ModuleInliner.h"
69#include "llvm/Transforms/IPO/OpenMPOpt.h"
70#include "llvm/Transforms/IPO/PartialInlining.h"
71#include "llvm/Transforms/IPO/SCCP.h"
72#include "llvm/Transforms/IPO/SampleProfile.h"
73#include "llvm/Transforms/IPO/SampleProfileProbe.h"
74#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
75#include "llvm/Transforms/InstCombine/InstCombine.h"
76#include "llvm/Transforms/Instrumentation/CGProfile.h"
77#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
78#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
79#include "llvm/Transforms/Instrumentation/MemProfInstrumentation.h"
80#include "llvm/Transforms/Instrumentation/MemProfUse.h"
81#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
82#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
83#include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
84#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
85#include "llvm/Transforms/Scalar/ADCE.h"
86#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
87#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
88#include "llvm/Transforms/Scalar/BDCE.h"
89#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
90#include "llvm/Transforms/Scalar/ConstraintElimination.h"
91#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
92#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
93#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
94#include "llvm/Transforms/Scalar/DivRemPairs.h"
95#include "llvm/Transforms/Scalar/EarlyCSE.h"
96#include "llvm/Transforms/Scalar/Float2Int.h"
97#include "llvm/Transforms/Scalar/GVN.h"
98#include "llvm/Transforms/Scalar/IndVarSimplify.h"
99#include "llvm/Transforms/Scalar/InferAlignment.h"
100#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
101#include "llvm/Transforms/Scalar/JumpTableToSwitch.h"
102#include "llvm/Transforms/Scalar/JumpThreading.h"
103#include "llvm/Transforms/Scalar/LICM.h"
104#include "llvm/Transforms/Scalar/LoopDeletion.h"
105#include "llvm/Transforms/Scalar/LoopDistribute.h"
106#include "llvm/Transforms/Scalar/LoopFlatten.h"
107#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
108#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
109#include "llvm/Transforms/Scalar/LoopInterchange.h"
110#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
111#include "llvm/Transforms/Scalar/LoopPassManager.h"
112#include "llvm/Transforms/Scalar/LoopRotation.h"
113#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
114#include "llvm/Transforms/Scalar/LoopSink.h"
115#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
116#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
117#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
118#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
119#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
120#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
121#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
122#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
123#include "llvm/Transforms/Scalar/NewGVN.h"
124#include "llvm/Transforms/Scalar/Reassociate.h"
125#include "llvm/Transforms/Scalar/SCCP.h"
126#include "llvm/Transforms/Scalar/SROA.h"
127#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
128#include "llvm/Transforms/Scalar/SimplifyCFG.h"
129#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
130#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
131#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
132#include "llvm/Transforms/Utils/AddDiscriminators.h"
133#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
134#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
135#include "llvm/Transforms/Utils/CountVisits.h"
136#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
137#include "llvm/Transforms/Utils/ExtraPassManager.h"
138#include "llvm/Transforms/Utils/InjectTLIMappings.h"
139#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
140#include "llvm/Transforms/Utils/Mem2Reg.h"
141#include "llvm/Transforms/Utils/MoveAutoInit.h"
142#include "llvm/Transforms/Utils/NameAnonGlobals.h"
143#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
144#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
145#include "llvm/Transforms/Vectorize/LoopVectorize.h"
146#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
147#include "llvm/Transforms/Vectorize/VectorCombine.h"
148
149using namespace llvm;
150
151static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
152 "enable-ml-inliner", cl::init(Val: InliningAdvisorMode::Default), cl::Hidden,
153 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
154 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
155 "Heuristics-based inliner version"),
156 clEnumValN(InliningAdvisorMode::Development, "development",
157 "Use development mode (runtime-loadable model)"),
158 clEnumValN(InliningAdvisorMode::Release, "release",
159 "Use release mode (AOT-compiled model)")));
160
161/// Flag to enable inline deferral during PGO.
162static cl::opt<bool>
163 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(Val: true),
164 cl::Hidden,
165 cl::desc("Enable inline deferral during PGO"));
166
167static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
168 cl::init(Val: false), cl::Hidden,
169 cl::desc("Enable module inliner"));
170
171static cl::opt<bool> PerformMandatoryInliningsFirst(
172 "mandatory-inlining-first", cl::init(Val: false), cl::Hidden,
173 cl::desc("Perform mandatory inlinings module-wide, before performing "
174 "inlining"));
175
176static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
177 "eagerly-invalidate-analyses", cl::init(Val: true), cl::Hidden,
178 cl::desc("Eagerly invalidate more analyses in default pipelines"));
179
180static cl::opt<bool> EnableMergeFunctions(
181 "enable-merge-functions", cl::init(Val: false), cl::Hidden,
182 cl::desc("Enable function merging as part of the optimization pipeline"));
183
184static cl::opt<bool> EnablePostPGOLoopRotation(
185 "enable-post-pgo-loop-rotation", cl::init(Val: true), cl::Hidden,
186 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
187
188static cl::opt<bool> EnableGlobalAnalyses(
189 "enable-global-analyses", cl::init(Val: true), cl::Hidden,
190 cl::desc("Enable inter-procedural analyses"));
191
192static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
193 cl::init(Val: false), cl::Hidden,
194 cl::desc("Run Partial inlining pass"));
195
196static cl::opt<bool> ExtraVectorizerPasses(
197 "extra-vectorizer-passes", cl::init(Val: false), cl::Hidden,
198 cl::desc("Run cleanup optimization passes after vectorization"));
199
200static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(Val: false), cl::Hidden,
201 cl::desc("Run the NewGVN pass"));
202
203static cl::opt<bool>
204 EnableLoopInterchange("enable-loopinterchange", cl::init(Val: false), cl::Hidden,
205 cl::desc("Enable the LoopInterchange Pass"));
206
207static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
208 cl::init(Val: false), cl::Hidden,
209 cl::desc("Enable Unroll And Jam Pass"));
210
211static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(Val: false),
212 cl::Hidden,
213 cl::desc("Enable the LoopFlatten Pass"));
214
215// Experimentally allow loop header duplication. This should allow for better
216// optimization at Oz, since loop-idiom recognition can then recognize things
217// like memcpy. If this ends up being useful for many targets, we should drop
218// this flag and make a code generation option that can be controlled
219// independent of the opt level and exposed through the frontend.
220static cl::opt<bool> EnableLoopHeaderDuplication(
221 "enable-loop-header-duplication", cl::init(Val: false), cl::Hidden,
222 cl::desc("Enable loop header duplication at any optimization level"));
223
224static cl::opt<bool>
225 EnableDFAJumpThreading("enable-dfa-jump-thread",
226 cl::desc("Enable DFA jump threading"),
227 cl::init(Val: false), cl::Hidden);
228
229static cl::opt<bool>
230 EnableHotColdSplit("hot-cold-split",
231 cl::desc("Enable hot-cold splitting pass"));
232
233static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(Val: false),
234 cl::Hidden,
235 cl::desc("Enable ir outliner pass"));
236
237static cl::opt<bool>
238 DisablePreInliner("disable-preinline", cl::init(Val: false), cl::Hidden,
239 cl::desc("Disable pre-instrumentation inliner"));
240
241static cl::opt<int> PreInlineThreshold(
242 "preinline-threshold", cl::Hidden, cl::init(Val: 75),
243 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
244 "(default = 75)"));
245
246static cl::opt<bool>
247 EnableGVNHoist("enable-gvn-hoist",
248 cl::desc("Enable the GVN hoisting pass (default = off)"));
249
250static cl::opt<bool>
251 EnableGVNSink("enable-gvn-sink",
252 cl::desc("Enable the GVN sinking pass (default = off)"));
253
254static cl::opt<bool> EnableJumpTableToSwitch(
255 "enable-jump-table-to-switch",
256 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
257
258// This option is used in simplifying testing SampleFDO optimizations for
259// profile loading.
260static cl::opt<bool>
261 EnableCHR("enable-chr", cl::init(Val: true), cl::Hidden,
262 cl::desc("Enable control height reduction optimization (CHR)"));
263
264static cl::opt<bool> FlattenedProfileUsed(
265 "flattened-profile-used", cl::init(Val: false), cl::Hidden,
266 cl::desc("Indicate the sample profile being used is flattened, i.e., "
267 "no inline hierarchy exists in the profile"));
268
269static cl::opt<bool>
270 EnableMatrix("enable-matrix", cl::init(Val: false), cl::Hidden,
271 cl::desc("Enable lowering of the matrix intrinsics"));
272
273static cl::opt<bool> EnableConstraintElimination(
274 "enable-constraint-elimination", cl::init(Val: true), cl::Hidden,
275 cl::desc(
276 "Enable pass to eliminate conditions based on linear constraints"));
277
278static cl::opt<AttributorRunOption> AttributorRun(
279 "attributor-enable", cl::Hidden, cl::init(Val: AttributorRunOption::NONE),
280 cl::desc("Enable the attributor inter-procedural deduction pass"),
281 cl::values(clEnumValN(AttributorRunOption::ALL, "all",
282 "enable all attributor runs"),
283 clEnumValN(AttributorRunOption::MODULE, "module",
284 "enable module-wide attributor runs"),
285 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
286 "enable call graph SCC attributor runs"),
287 clEnumValN(AttributorRunOption::NONE, "none",
288 "disable attributor runs")));
289
290static cl::opt<bool> EnableSampledInstr(
291 "enable-sampled-instrumentation", cl::init(Val: false), cl::Hidden,
292 cl::desc("Enable profile instrumentation sampling (default = off)"));
293static cl::opt<bool> UseLoopVersioningLICM(
294 "enable-loop-versioning-licm", cl::init(Val: false), cl::Hidden,
295 cl::desc("Enable the experimental Loop Versioning LICM pass"));
296
297static cl::opt<std::string> InstrumentColdFuncOnlyPath(
298 "instrument-cold-function-only-path", cl::init(Val: ""),
299 cl::desc("File path for cold function only instrumentation(requires use "
300 "with --pgo-instrument-cold-function-only)"),
301 cl::Hidden);
302
303extern cl::opt<std::string> UseCtxProfile;
304extern cl::opt<bool> PGOInstrumentColdFunctionOnly;
305
306namespace llvm {
307extern cl::opt<bool> EnableMemProfContextDisambiguation;
308} // namespace llvm
309
310PipelineTuningOptions::PipelineTuningOptions() {
311 LoopInterleaving = true;
312 LoopVectorization = true;
313 SLPVectorization = false;
314 LoopUnrolling = true;
315 LoopInterchange = EnableLoopInterchange;
316 ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
317 LicmMssaOptCap = SetLicmMssaOptCap;
318 LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
319 CallGraphProfile = true;
320 UnifiedLTO = false;
321 MergeFunctions = EnableMergeFunctions;
322 InlinerThreshold = -1;
323 EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
324}
325
326namespace llvm {
327extern cl::opt<unsigned> MaxDevirtIterations;
328} // namespace llvm
329
330void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
331 OptimizationLevel Level) {
332 for (auto &C : PeepholeEPCallbacks)
333 C(FPM, Level);
334}
335void PassBuilder::invokeLateLoopOptimizationsEPCallbacks(
336 LoopPassManager &LPM, OptimizationLevel Level) {
337 for (auto &C : LateLoopOptimizationsEPCallbacks)
338 C(LPM, Level);
339}
340void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM,
341 OptimizationLevel Level) {
342 for (auto &C : LoopOptimizerEndEPCallbacks)
343 C(LPM, Level);
344}
345void PassBuilder::invokeScalarOptimizerLateEPCallbacks(
346 FunctionPassManager &FPM, OptimizationLevel Level) {
347 for (auto &C : ScalarOptimizerLateEPCallbacks)
348 C(FPM, Level);
349}
350void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM,
351 OptimizationLevel Level) {
352 for (auto &C : CGSCCOptimizerLateEPCallbacks)
353 C(CGPM, Level);
354}
355void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
356 OptimizationLevel Level) {
357 for (auto &C : VectorizerStartEPCallbacks)
358 C(FPM, Level);
359}
360void PassBuilder::invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM,
361 OptimizationLevel Level) {
362 for (auto &C : VectorizerEndEPCallbacks)
363 C(FPM, Level);
364}
365void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
366 OptimizationLevel Level,
367 ThinOrFullLTOPhase Phase) {
368 for (auto &C : OptimizerEarlyEPCallbacks)
369 C(MPM, Level, Phase);
370}
371void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
372 OptimizationLevel Level,
373 ThinOrFullLTOPhase Phase) {
374 for (auto &C : OptimizerLastEPCallbacks)
375 C(MPM, Level, Phase);
376}
377void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks(
378 ModulePassManager &MPM, OptimizationLevel Level) {
379 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
380 C(MPM, Level);
381}
382void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks(
383 ModulePassManager &MPM, OptimizationLevel Level) {
384 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
385 C(MPM, Level);
386}
387void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM,
388 OptimizationLevel Level) {
389 for (auto &C : PipelineStartEPCallbacks)
390 C(MPM, Level);
391}
392void PassBuilder::invokePipelineEarlySimplificationEPCallbacks(
393 ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase) {
394 for (auto &C : PipelineEarlySimplificationEPCallbacks)
395 C(MPM, Level, Phase);
396}
397
398// Helper to add AnnotationRemarksPass.
399static void addAnnotationRemarksPass(ModulePassManager &MPM) {
400 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AnnotationRemarksPass()));
401}
402
403// Helper to check if the current compilation phase is preparing for LTO
404static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
405 return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
406 Phase == ThinOrFullLTOPhase::FullLTOPreLink;
407}
408
409// Helper to check if the current compilation phase is LTO backend
410static bool isLTOPostLink(ThinOrFullLTOPhase Phase) {
411 return Phase == ThinOrFullLTOPhase::ThinLTOPostLink ||
412 Phase == ThinOrFullLTOPhase::FullLTOPostLink;
413}
414
415// Helper to wrap conditionally Coro passes.
416static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase) {
417 // TODO: Skip passes according to Phase.
418 ModulePassManager CoroPM;
419 CoroPM.addPass(Pass: CoroEarlyPass());
420 CGSCCPassManager CGPM;
421 CGPM.addPass(Pass: CoroSplitPass());
422 CoroPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
423 CoroPM.addPass(Pass: CoroCleanupPass());
424 CoroPM.addPass(Pass: GlobalDCEPass());
425 return CoroConditionalWrapper(std::move(CoroPM));
426}
427
428// TODO: Investigate the cost/benefit of tail call elimination on debugging.
429FunctionPassManager
430PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
431 ThinOrFullLTOPhase Phase) {
432
433 FunctionPassManager FPM;
434
435 if (AreStatisticsEnabled())
436 FPM.addPass(Pass: CountVisitsPass());
437
438 // Form SSA out of local memory accesses after breaking apart aggregates into
439 // scalars.
440 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
441
442 // Catch trivial redundancies
443 FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */));
444
445 // Hoisting of scalars and load expressions.
446 FPM.addPass(
447 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
448 FPM.addPass(Pass: InstCombinePass());
449
450 FPM.addPass(Pass: LibCallsShrinkWrapPass());
451
452 invokePeepholeEPCallbacks(FPM, Level);
453
454 FPM.addPass(
455 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
456
457 // Form canonically associated expression trees, and simplify the trees using
458 // basic mathematical properties. For example, this will form (nearly)
459 // minimal multiplication trees.
460 FPM.addPass(Pass: ReassociatePass());
461
462 // Add the primary loop simplification pipeline.
463 // FIXME: Currently this is split into two loop pass pipelines because we run
464 // some function passes in between them. These can and should be removed
465 // and/or replaced by scheduling the loop pass equivalents in the correct
466 // positions. But those equivalent passes aren't powerful enough yet.
467 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
468 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
469 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
470 // `LoopInstSimplify`.
471 LoopPassManager LPM1, LPM2;
472
473 // Simplify the loop body. We do this initially to clean up after other loop
474 // passes run, either when iterating on a loop or on inner loops with
475 // implications on the outer loop.
476 LPM1.addPass(Pass: LoopInstSimplifyPass());
477 LPM1.addPass(Pass: LoopSimplifyCFGPass());
478
479 // Try to remove as much code from the loop header as possible,
480 // to reduce amount of IR that will have to be duplicated. However,
481 // do not perform speculative hoisting the first time as LICM
482 // will destroy metadata that may not need to be destroyed if run
483 // after loop rotation.
484 // TODO: Investigate promotion cap for O1.
485 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
486 /*AllowSpeculation=*/false));
487
488 LPM1.addPass(Pass: LoopRotatePass(/* Disable header duplication */ true,
489 isLTOPreLink(Phase)));
490 // TODO: Investigate promotion cap for O1.
491 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
492 /*AllowSpeculation=*/true));
493 LPM1.addPass(Pass: SimpleLoopUnswitchPass());
494 if (EnableLoopFlatten)
495 LPM1.addPass(Pass: LoopFlattenPass());
496
497 LPM2.addPass(Pass: LoopIdiomRecognizePass());
498 LPM2.addPass(Pass: IndVarSimplifyPass());
499
500 invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level);
501
502 LPM2.addPass(Pass: LoopDeletionPass());
503
504 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
505 // because it changes IR to makes profile annotation in back compile
506 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
507 // attributes so we need to make sure and allow the full unroll pass to pay
508 // attention to it.
509 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
510 PGOOpt->Action != PGOOptions::SampleUse)
511 LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(),
512 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
513 PTO.ForgetAllSCEVInLoopUnroll));
514
515 invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level);
516
517 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1),
518 /*UseMemorySSA=*/true,
519 /*UseBlockFrequencyInfo=*/true));
520 FPM.addPass(
521 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
522 FPM.addPass(Pass: InstCombinePass());
523 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
524 // *All* loop passes must preserve it, in order to be able to use it.
525 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2),
526 /*UseMemorySSA=*/false,
527 /*UseBlockFrequencyInfo=*/false));
528
529 // Delete small array after loop unroll.
530 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
531
532 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
533 FPM.addPass(Pass: MemCpyOptPass());
534
535 // Sparse conditional constant propagation.
536 // FIXME: It isn't clear why we do this *after* loop passes rather than
537 // before...
538 FPM.addPass(Pass: SCCPPass());
539
540 // Delete dead bit computations (instcombine runs after to fold away the dead
541 // computations, and then ADCE will run later to exploit any new DCE
542 // opportunities that creates).
543 FPM.addPass(Pass: BDCEPass());
544
545 // Run instcombine after redundancy and dead bit elimination to exploit
546 // opportunities opened up by them.
547 FPM.addPass(Pass: InstCombinePass());
548 invokePeepholeEPCallbacks(FPM, Level);
549
550 FPM.addPass(Pass: CoroElidePass());
551
552 invokeScalarOptimizerLateEPCallbacks(FPM, Level);
553
554 // Finally, do an expensive DCE pass to catch all the dead code exposed by
555 // the simplifications and basic cleanup after all the simplifications.
556 // TODO: Investigate if this is too expensive.
557 FPM.addPass(Pass: ADCEPass());
558 FPM.addPass(
559 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
560 FPM.addPass(Pass: InstCombinePass());
561 invokePeepholeEPCallbacks(FPM, Level);
562
563 return FPM;
564}
565
566FunctionPassManager
567PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
568 ThinOrFullLTOPhase Phase) {
569 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
570
571 // The O1 pipeline has a separate pipeline creation function to simplify
572 // construction readability.
573 if (Level.getSpeedupLevel() == 1)
574 return buildO1FunctionSimplificationPipeline(Level, Phase);
575
576 FunctionPassManager FPM;
577
578 if (AreStatisticsEnabled())
579 FPM.addPass(Pass: CountVisitsPass());
580
581 // Form SSA out of local memory accesses after breaking apart aggregates into
582 // scalars.
583 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
584
585 // Catch trivial redundancies
586 FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */));
587 if (EnableKnowledgeRetention)
588 FPM.addPass(Pass: AssumeSimplifyPass());
589
590 // Hoisting of scalars and load expressions.
591 if (EnableGVNHoist)
592 FPM.addPass(Pass: GVNHoistPass());
593
594 // Global value numbering based sinking.
595 if (EnableGVNSink) {
596 FPM.addPass(Pass: GVNSinkPass());
597 FPM.addPass(
598 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
599 }
600
601 // Speculative execution if the target has divergent branches; otherwise nop.
602 FPM.addPass(Pass: SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
603
604 // Optimize based on known information about branches, and cleanup afterward.
605 FPM.addPass(Pass: JumpThreadingPass());
606 FPM.addPass(Pass: CorrelatedValuePropagationPass());
607
608 // Jump table to switch conversion.
609 if (EnableJumpTableToSwitch)
610 FPM.addPass(Pass: JumpTableToSwitchPass());
611
612 FPM.addPass(
613 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
614 FPM.addPass(Pass: InstCombinePass());
615 FPM.addPass(Pass: AggressiveInstCombinePass());
616
617 if (!Level.isOptimizingForSize())
618 FPM.addPass(Pass: LibCallsShrinkWrapPass());
619
620 invokePeepholeEPCallbacks(FPM, Level);
621
622 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
623 // using the size value profile. Don't perform this when optimizing for size.
624 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
625 !Level.isOptimizingForSize())
626 FPM.addPass(Pass: PGOMemOPSizeOpt());
627
628 FPM.addPass(Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/
629 isInstrumentedPGOUse()));
630 FPM.addPass(
631 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
632
633 // Form canonically associated expression trees, and simplify the trees using
634 // basic mathematical properties. For example, this will form (nearly)
635 // minimal multiplication trees.
636 FPM.addPass(Pass: ReassociatePass());
637
638 if (EnableConstraintElimination)
639 FPM.addPass(Pass: ConstraintEliminationPass());
640
641 // Add the primary loop simplification pipeline.
642 // FIXME: Currently this is split into two loop pass pipelines because we run
643 // some function passes in between them. These can and should be removed
644 // and/or replaced by scheduling the loop pass equivalents in the correct
645 // positions. But those equivalent passes aren't powerful enough yet.
646 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
647 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
648 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
649 // `LoopInstSimplify`.
650 LoopPassManager LPM1, LPM2;
651
652 // Simplify the loop body. We do this initially to clean up after other loop
653 // passes run, either when iterating on a loop or on inner loops with
654 // implications on the outer loop.
655 LPM1.addPass(Pass: LoopInstSimplifyPass());
656 LPM1.addPass(Pass: LoopSimplifyCFGPass());
657
658 // Try to remove as much code from the loop header as possible,
659 // to reduce amount of IR that will have to be duplicated. However,
660 // do not perform speculative hoisting the first time as LICM
661 // will destroy metadata that may not need to be destroyed if run
662 // after loop rotation.
663 // TODO: Investigate promotion cap for O1.
664 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
665 /*AllowSpeculation=*/false));
666
667 // Disable header duplication in loop rotation at -Oz.
668 LPM1.addPass(Pass: LoopRotatePass(EnableLoopHeaderDuplication ||
669 Level != OptimizationLevel::Oz,
670 isLTOPreLink(Phase)));
671 // TODO: Investigate promotion cap for O1.
672 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
673 /*AllowSpeculation=*/true));
674 LPM1.addPass(
675 Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
676 if (EnableLoopFlatten)
677 LPM1.addPass(Pass: LoopFlattenPass());
678
679 LPM2.addPass(Pass: LoopIdiomRecognizePass());
680 LPM2.addPass(Pass: IndVarSimplifyPass());
681
682 {
683 ExtraLoopPassManager<ShouldRunExtraSimpleLoopUnswitch> ExtraPasses;
684 ExtraPasses.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
685 OptimizationLevel::O3));
686 LPM2.addPass(Pass: std::move(ExtraPasses));
687 }
688
689 invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level);
690
691 LPM2.addPass(Pass: LoopDeletionPass());
692
693 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
694 // because it changes IR to makes profile annotation in back compile
695 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
696 // attributes so we need to make sure and allow the full unroll pass to pay
697 // attention to it.
698 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
699 PGOOpt->Action != PGOOptions::SampleUse)
700 LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(),
701 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
702 PTO.ForgetAllSCEVInLoopUnroll));
703
704 invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level);
705
706 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1),
707 /*UseMemorySSA=*/true,
708 /*UseBlockFrequencyInfo=*/true));
709 FPM.addPass(
710 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
711 FPM.addPass(Pass: InstCombinePass());
712 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
713 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
714 // *All* loop passes must preserve it, in order to be able to use it.
715 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2),
716 /*UseMemorySSA=*/false,
717 /*UseBlockFrequencyInfo=*/false));
718
719 // Delete small array after loop unroll.
720 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
721
722 // Try vectorization/scalarization transforms that are both improvements
723 // themselves and can allow further folds with GVN and InstCombine.
724 FPM.addPass(Pass: VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
725
726 // Eliminate redundancies.
727 FPM.addPass(Pass: MergedLoadStoreMotionPass());
728 if (RunNewGVN)
729 FPM.addPass(Pass: NewGVNPass());
730 else
731 FPM.addPass(Pass: GVNPass());
732
733 // Sparse conditional constant propagation.
734 // FIXME: It isn't clear why we do this *after* loop passes rather than
735 // before...
736 FPM.addPass(Pass: SCCPPass());
737
738 // Delete dead bit computations (instcombine runs after to fold away the dead
739 // computations, and then ADCE will run later to exploit any new DCE
740 // opportunities that creates).
741 FPM.addPass(Pass: BDCEPass());
742
743 // Run instcombine after redundancy and dead bit elimination to exploit
744 // opportunities opened up by them.
745 FPM.addPass(Pass: InstCombinePass());
746 invokePeepholeEPCallbacks(FPM, Level);
747
748 // Re-consider control flow based optimizations after redundancy elimination,
749 // redo DCE, etc.
750 if (EnableDFAJumpThreading)
751 FPM.addPass(Pass: DFAJumpThreadingPass());
752
753 FPM.addPass(Pass: JumpThreadingPass());
754 FPM.addPass(Pass: CorrelatedValuePropagationPass());
755
756 // Finally, do an expensive DCE pass to catch all the dead code exposed by
757 // the simplifications and basic cleanup after all the simplifications.
758 // TODO: Investigate if this is too expensive.
759 FPM.addPass(Pass: ADCEPass());
760
761 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
762 FPM.addPass(Pass: MemCpyOptPass());
763
764 FPM.addPass(Pass: DSEPass());
765 FPM.addPass(Pass: MoveAutoInitPass());
766
767 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
768 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
769 /*AllowSpeculation=*/true),
770 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
771
772 FPM.addPass(Pass: CoroElidePass());
773
774 invokeScalarOptimizerLateEPCallbacks(FPM, Level);
775
776 FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions()
777 .convertSwitchRangeToICmp(B: true)
778 .hoistCommonInsts(B: true)
779 .sinkCommonInsts(B: true)));
780 FPM.addPass(Pass: InstCombinePass());
781 invokePeepholeEPCallbacks(FPM, Level);
782
783 return FPM;
784}
785
786void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
787 MPM.addPass(Pass: CanonicalizeAliasesPass());
788 MPM.addPass(Pass: NameAnonGlobalPass());
789}
790
791void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
792 OptimizationLevel Level,
793 ThinOrFullLTOPhase LTOPhase) {
794 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
795 if (DisablePreInliner)
796 return;
797 InlineParams IP;
798
799 IP.DefaultThreshold = PreInlineThreshold;
800
801 // FIXME: The hint threshold has the same value used by the regular inliner
802 // when not optimzing for size. This should probably be lowered after
803 // performance testing.
804 // FIXME: this comment is cargo culted from the old pass manager, revisit).
805 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
806 ModuleInlinerWrapperPass MIWP(
807 IP, /* MandatoryFirst */ true,
808 InlineContext{.LTOPhase: LTOPhase, .Pass: InlinePass::EarlyInliner});
809 CGSCCPassManager &CGPipeline = MIWP.getPM();
810
811 FunctionPassManager FPM;
812 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
813 FPM.addPass(Pass: EarlyCSEPass()); // Catch trivial redundancies.
814 FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
815 B: true))); // Merge & remove basic blocks.
816 FPM.addPass(Pass: InstCombinePass()); // Combine silly sequences.
817 invokePeepholeEPCallbacks(FPM, Level);
818
819 CGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
820 Pass: std::move(FPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
821
822 MPM.addPass(Pass: std::move(MIWP));
823
824 // Delete anything that is now dead to make sure that we don't instrument
825 // dead code. Instrumentation can end up keeping dead code around and
826 // dramatically increase code size.
827 MPM.addPass(Pass: GlobalDCEPass());
828}
829
830void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
831 OptimizationLevel Level) {
832 if (EnablePostPGOLoopRotation) {
833 // Disable header duplication in loop rotation at -Oz.
834 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
835 Pass: createFunctionToLoopPassAdaptor(
836 Pass: LoopRotatePass(EnableLoopHeaderDuplication ||
837 Level != OptimizationLevel::Oz),
838 /*UseMemorySSA=*/false,
839 /*UseBlockFrequencyInfo=*/false),
840 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
841 }
842}
843
844void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
845 OptimizationLevel Level, bool RunProfileGen,
846 bool IsCS, bool AtomicCounterUpdate,
847 std::string ProfileFile,
848 std::string ProfileRemappingFile,
849 IntrusiveRefCntPtr<vfs::FileSystem> FS) {
850 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
851
852 if (!RunProfileGen) {
853 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
854 MPM.addPass(
855 Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
856 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
857 // RequireAnalysisPass for PSI before subsequent non-module passes.
858 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
859 return;
860 }
861
862 // Perform PGO instrumentation.
863 MPM.addPass(Pass: PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
864 : PGOInstrumentationType::FDO));
865
866 addPostPGOLoopRotation(MPM, Level);
867 // Add the profile lowering pass.
868 InstrProfOptions Options;
869 if (!ProfileFile.empty())
870 Options.InstrProfileOutput = ProfileFile;
871 // Do counter promotion at Level greater than O0.
872 Options.DoCounterPromotion = true;
873 Options.UseBFIInPromotion = IsCS;
874 if (EnableSampledInstr) {
875 Options.Sampling = true;
876 // With sampling, there is little beneifit to enable counter promotion.
877 // But note that sampling does work with counter promotion.
878 Options.DoCounterPromotion = false;
879 }
880 Options.Atomic = AtomicCounterUpdate;
881 MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS));
882}
883
884void PassBuilder::addPGOInstrPassesForO0(
885 ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
886 bool AtomicCounterUpdate, std::string ProfileFile,
887 std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
888 if (!RunProfileGen) {
889 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
890 MPM.addPass(
891 Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
892 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
893 // RequireAnalysisPass for PSI before subsequent non-module passes.
894 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
895 return;
896 }
897
898 // Perform PGO instrumentation.
899 MPM.addPass(Pass: PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
900 : PGOInstrumentationType::FDO));
901 // Add the profile lowering pass.
902 InstrProfOptions Options;
903 if (!ProfileFile.empty())
904 Options.InstrProfileOutput = ProfileFile;
905 // Do not do counter promotion at O0.
906 Options.DoCounterPromotion = false;
907 Options.UseBFIInPromotion = IsCS;
908 Options.Atomic = AtomicCounterUpdate;
909 MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS));
910}
911
912static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
913 return getInlineParams(OptLevel: Level.getSpeedupLevel(), SizeOptLevel: Level.getSizeLevel());
914}
915
916ModuleInlinerWrapperPass
917PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
918 ThinOrFullLTOPhase Phase) {
919 InlineParams IP;
920 if (PTO.InlinerThreshold == -1)
921 IP = getInlineParamsFromOptLevel(Level);
922 else
923 IP = getInlineParams(Threshold: PTO.InlinerThreshold);
924 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
925 // set hot-caller threshold to 0 to disable hot
926 // callsite inline (as much as possible [1]) because it makes
927 // profile annotation in the backend inaccurate.
928 //
929 // [1] Note the cost of a function could be below zero due to erased
930 // prologue / epilogue.
931 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
932 IP.HotCallSiteThreshold = 0;
933
934 if (PGOOpt)
935 IP.EnableDeferral = EnablePGOInlineDeferral;
936
937 ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
938 InlineContext{.LTOPhase: Phase, .Pass: InlinePass::CGSCCInliner},
939 UseInlineAdvisor, MaxDevirtIterations);
940
941 // Require the GlobalsAA analysis for the module so we can query it within
942 // the CGSCC pipeline.
943 if (EnableGlobalAnalyses) {
944 MIWP.addModulePass(Pass: RequireAnalysisPass<GlobalsAA, Module>());
945 // Invalidate AAManager so it can be recreated and pick up the newly
946 // available GlobalsAA.
947 MIWP.addModulePass(
948 Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>()));
949 }
950
951 // Require the ProfileSummaryAnalysis for the module so we can query it within
952 // the inliner pass.
953 MIWP.addModulePass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
954
955 // Now begin the main postorder CGSCC pipeline.
956 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
957 // manager and trying to emulate its precise behavior. Much of this doesn't
958 // make a lot of sense and we should revisit the core CGSCC structure.
959 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
960
961 // Note: historically, the PruneEH pass was run first to deduce nounwind and
962 // generally clean up exception handling overhead. It isn't clear this is
963 // valuable as the inliner doesn't currently care whether it is inlining an
964 // invoke or a call.
965
966 if (AttributorRun & AttributorRunOption::CGSCC)
967 MainCGPipeline.addPass(Pass: AttributorCGSCCPass());
968
969 // Deduce function attributes. We do another run of this after the function
970 // simplification pipeline, so this only needs to run when it could affect the
971 // function simplification pipeline, which is only the case with recursive
972 // functions.
973 MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
974
975 // When at O3 add argument promotion to the pass pipeline.
976 // FIXME: It isn't at all clear why this should be limited to O3.
977 if (Level == OptimizationLevel::O3)
978 MainCGPipeline.addPass(Pass: ArgumentPromotionPass());
979
980 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
981 // there are no OpenMP runtime calls present in the module.
982 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
983 MainCGPipeline.addPass(Pass: OpenMPOptCGSCCPass(Phase));
984
985 invokeCGSCCOptimizerLateEPCallbacks(CGPM&: MainCGPipeline, Level);
986
987 // Add the core function simplification pipeline nested inside the
988 // CGSCC walk.
989 MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
990 Pass: buildFunctionSimplificationPipeline(Level, Phase),
991 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
992
993 // Finally, deduce any function attributes based on the fully simplified
994 // function.
995 MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass());
996
997 // Mark that the function is fully simplified and that it shouldn't be
998 // simplified again if we somehow revisit it due to CGSCC mutations unless
999 // it's been modified since.
1000 MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
1001 Pass: RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>()));
1002
1003 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
1004 MainCGPipeline.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0));
1005 MainCGPipeline.addPass(Pass: CoroAnnotationElidePass());
1006 }
1007
1008 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1009 MIWP.addLateModulePass(Pass: createModuleToFunctionPassAdaptor(
1010 Pass: InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
1011
1012 return MIWP;
1013}
1014
1015ModulePassManager
1016PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
1017 ThinOrFullLTOPhase Phase) {
1018 ModulePassManager MPM;
1019
1020 InlineParams IP = getInlineParamsFromOptLevel(Level);
1021 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1022 // set hot-caller threshold to 0 to disable hot
1023 // callsite inline (as much as possible [1]) because it makes
1024 // profile annotation in the backend inaccurate.
1025 //
1026 // [1] Note the cost of a function could be below zero due to erased
1027 // prologue / epilogue.
1028 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1029 IP.HotCallSiteThreshold = 0;
1030
1031 if (PGOOpt)
1032 IP.EnableDeferral = EnablePGOInlineDeferral;
1033
1034 // The inline deferral logic is used to avoid losing some
1035 // inlining chance in future. It is helpful in SCC inliner, in which
1036 // inlining is processed in bottom-up order.
1037 // While in module inliner, the inlining order is a priority-based order
1038 // by default. The inline deferral is unnecessary there. So we disable the
1039 // inline deferral logic in module inliner.
1040 IP.EnableDeferral = false;
1041
1042 MPM.addPass(Pass: ModuleInlinerPass(IP, UseInlineAdvisor, Phase));
1043 if (!UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPostLink) {
1044 MPM.addPass(Pass: GlobalOptPass());
1045 MPM.addPass(Pass: GlobalDCEPass());
1046 MPM.addPass(Pass: PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1047 }
1048
1049 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
1050 Pass: buildFunctionSimplificationPipeline(Level, Phase),
1051 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1052
1053 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
1054 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(
1055 Pass: CoroSplitPass(Level != OptimizationLevel::O0)));
1056 MPM.addPass(
1057 Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: CoroAnnotationElidePass()));
1058 }
1059
1060 return MPM;
1061}
1062
1063ModulePassManager
1064PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
1065 ThinOrFullLTOPhase Phase) {
1066 assert(Level != OptimizationLevel::O0 &&
1067 "Should not be used for O0 pipeline");
1068
1069 assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink &&
1070 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1071
1072 ModulePassManager MPM;
1073
1074 // Place pseudo probe instrumentation as the first pass of the pipeline to
1075 // minimize the impact of optimization changes.
1076 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1077 Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
1078 MPM.addPass(Pass: SampleProfileProbePass(TM));
1079
1080 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1081
1082 // In ThinLTO mode, when flattened profile is used, all the available
1083 // profile information will be annotated in PreLink phase so there is
1084 // no need to load the profile again in PostLink.
1085 bool LoadSampleProfile =
1086 HasSampleProfile &&
1087 !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
1088
1089 // During the ThinLTO backend phase we perform early indirect call promotion
1090 // here, before globalopt. Otherwise imported available_externally functions
1091 // look unreferenced and are removed. If we are going to load the sample
1092 // profile then defer until later.
1093 // TODO: See if we can move later and consolidate with the location where
1094 // we perform ICP when we are loading a sample profile.
1095 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1096 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1097 // determine whether the new direct calls are annotated with prof metadata.
1098 // Ideally this should be determined from whether the IR is annotated with
1099 // sample profile, and not whether the a sample profile was provided on the
1100 // command line. E.g. for flattened profiles where we will not be reloading
1101 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1102 // provide the sample profile file.
1103 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1104 MPM.addPass(Pass: PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1105
1106 // Create an early function pass manager to cleanup the output of the
1107 // frontend. Not necessary with LTO post link pipelines since the pre link
1108 // pipeline already cleaned up the frontend output.
1109 if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) {
1110 // Do basic inference of function attributes from known properties of system
1111 // libraries and other oracles.
1112 MPM.addPass(Pass: InferFunctionAttrsPass());
1113 MPM.addPass(Pass: CoroEarlyPass());
1114
1115 FunctionPassManager EarlyFPM;
1116 EarlyFPM.addPass(Pass: EntryExitInstrumenterPass(/*PostInlining=*/false));
1117 // Lower llvm.expect to metadata before attempting transforms.
1118 // Compare/branch metadata may alter the behavior of passes like
1119 // SimplifyCFG.
1120 EarlyFPM.addPass(Pass: LowerExpectIntrinsicPass());
1121 EarlyFPM.addPass(Pass: SimplifyCFGPass());
1122 EarlyFPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
1123 EarlyFPM.addPass(Pass: EarlyCSEPass());
1124 if (Level == OptimizationLevel::O3)
1125 EarlyFPM.addPass(Pass: CallSiteSplittingPass());
1126 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
1127 Pass: std::move(EarlyFPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1128 }
1129
1130 if (LoadSampleProfile) {
1131 // Annotate sample profile right after early FPM to ensure freshness of
1132 // the debug info.
1133 MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile,
1134 PGOOpt->ProfileRemappingFile, Phase));
1135 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1136 // RequireAnalysisPass for PSI before subsequent non-module passes.
1137 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1138 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1139 // for the profile annotation to be accurate in the LTO backend.
1140 if (!isLTOPreLink(Phase))
1141 // We perform early indirect call promotion here, before globalopt.
1142 // This is important for the ThinLTO backend phase because otherwise
1143 // imported available_externally functions look unreferenced and are
1144 // removed.
1145 MPM.addPass(
1146 Pass: PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1147 }
1148
1149 // Try to perform OpenMP specific optimizations on the module. This is a
1150 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1151 MPM.addPass(Pass: OpenMPOptPass(Phase));
1152
1153 if (AttributorRun & AttributorRunOption::MODULE)
1154 MPM.addPass(Pass: AttributorPass());
1155
1156 // Lower type metadata and the type.test intrinsic in the ThinLTO
1157 // post link pipeline after ICP. This is to enable usage of the type
1158 // tests in ICP sequences.
1159 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1160 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr,
1161 lowertypetests::DropTestKind::Assume));
1162
1163 invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase);
1164
1165 // Interprocedural constant propagation now that basic cleanup has occurred
1166 // and prior to optimizing globals.
1167 // FIXME: This position in the pipeline hasn't been carefully considered in
1168 // years, it should be re-analyzed.
1169 MPM.addPass(Pass: IPSCCPPass(
1170 IPSCCPOptions(/*AllowFuncSpec=*/
1171 Level != OptimizationLevel::Os &&
1172 Level != OptimizationLevel::Oz &&
1173 !isLTOPreLink(Phase))));
1174
1175 // Attach metadata to indirect call sites indicating the set of functions
1176 // they may target at run-time. This should follow IPSCCP.
1177 MPM.addPass(Pass: CalledValuePropagationPass());
1178
1179 // Optimize globals to try and fold them into constants.
1180 MPM.addPass(Pass: GlobalOptPass());
1181
1182 // Create a small function pass pipeline to cleanup after all the global
1183 // optimizations.
1184 FunctionPassManager GlobalCleanupPM;
1185 // FIXME: Should this instead by a run of SROA?
1186 GlobalCleanupPM.addPass(Pass: PromotePass());
1187 GlobalCleanupPM.addPass(Pass: InstCombinePass());
1188 invokePeepholeEPCallbacks(FPM&: GlobalCleanupPM, Level);
1189 GlobalCleanupPM.addPass(
1190 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
1191 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(GlobalCleanupPM),
1192 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1193
1194 // We already asserted this happens in non-FullLTOPostLink earlier.
1195 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1196 // Enable contextual profiling instrumentation.
1197 const bool IsCtxProfGen =
1198 IsPreLink && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled();
1199 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1200 const bool IsPGOInstrGen =
1201 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1202 const bool IsPGOInstrUse =
1203 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1204 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1205 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1206 // enable ctx profiling from the frontend.
1207 assert(!(IsPGOInstrGen && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled()) &&
1208 "Enabling both instrumented PGO and contextual instrumentation is not "
1209 "supported.");
1210 const bool IsCtxProfUse =
1211 !UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
1212
1213 assert(
1214 (InstrumentColdFuncOnlyPath.empty() || PGOInstrumentColdFunctionOnly) &&
1215 "--instrument-cold-function-only-path is provided but "
1216 "--pgo-instrument-cold-function-only is not enabled");
1217 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1218 IsPGOPreLink &&
1219 !InstrumentColdFuncOnlyPath.empty();
1220
1221 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1222 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1223 addPreInlinerPasses(MPM, Level, LTOPhase: Phase);
1224
1225 // Add all the requested passes for instrumentation PGO, if requested.
1226 if (IsPGOInstrGen || IsPGOInstrUse) {
1227 addPGOInstrPasses(MPM, Level,
1228 /*RunProfileGen=*/IsPGOInstrGen,
1229 /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
1230 ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile,
1231 FS: PGOOpt->FS);
1232 } else if (IsCtxProfGen || IsCtxProfUse) {
1233 MPM.addPass(Pass: PGOInstrumentationGen(PGOInstrumentationType::CTXPROF));
1234 // In pre-link, we just want the instrumented IR. We use the contextual
1235 // profile in the post-thinlink phase.
1236 // The instrumentation will be removed in post-thinlink after IPO.
1237 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1238 // mechanism for GUIDs.
1239 MPM.addPass(Pass: AssignGUIDPass());
1240 if (IsCtxProfUse) {
1241 MPM.addPass(Pass: PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1242 return MPM;
1243 }
1244 // Block further inlining in the instrumented ctxprof case. This avoids
1245 // confusingly collecting profiles for the same GUID corresponding to
1246 // different variants of the function. We could do like PGO and identify
1247 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1248 // thinlto to happen before performing any further optimizations, it's
1249 // unnecessary to collect profiles for non-prevailing copies.
1250 MPM.addPass(Pass: NoinlineNonPrevailing());
1251 addPostPGOLoopRotation(MPM, Level);
1252 MPM.addPass(Pass: PGOCtxProfLoweringPass());
1253 } else if (IsColdFuncOnlyInstrGen) {
1254 addPGOInstrPasses(
1255 MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1256 /* AtomicCounterUpdate */ false, ProfileFile: InstrumentColdFuncOnlyPath,
1257 /* ProfileRemappingFile */ "", FS: IntrusiveRefCntPtr<vfs::FileSystem>());
1258 }
1259
1260 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1261 MPM.addPass(Pass: PGOIndirectCallPromotion(false, false));
1262
1263 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1264 MPM.addPass(Pass: PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1265 EnableSampledInstr));
1266
1267 if (IsMemprofUse)
1268 MPM.addPass(Pass: MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1269
1270 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1271 PGOOpt->Action == PGOOptions::SampleUse))
1272 MPM.addPass(Pass: PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1273
1274 MPM.addPass(Pass: AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1275
1276 if (EnableModuleInliner)
1277 MPM.addPass(Pass: buildModuleInlinerPipeline(Level, Phase));
1278 else
1279 MPM.addPass(Pass: buildInlinerPipeline(Level, Phase));
1280
1281 // Remove any dead arguments exposed by cleanups, constant folding globals,
1282 // and argument promotion.
1283 MPM.addPass(Pass: DeadArgumentEliminationPass());
1284
1285 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1286 MPM.addPass(Pass: SimplifyTypeTestsPass());
1287
1288 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink)
1289 MPM.addPass(Pass: CoroCleanupPass());
1290
1291 // Optimize globals now that functions are fully simplified.
1292 MPM.addPass(Pass: GlobalOptPass());
1293 MPM.addPass(Pass: GlobalDCEPass());
1294
1295 return MPM;
1296}
1297
1298/// TODO: Should LTO cause any differences to this set of passes?
1299void PassBuilder::addVectorPasses(OptimizationLevel Level,
1300 FunctionPassManager &FPM, bool IsFullLTO) {
1301 FPM.addPass(Pass: LoopVectorizePass(
1302 LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
1303
1304 FPM.addPass(Pass: InferAlignmentPass());
1305 if (IsFullLTO) {
1306 // The vectorizer may have significantly shortened a loop body; unroll
1307 // again. Unroll small loops to hide loop backedge latency and saturate any
1308 // parallel execution resources of an out-of-order processor. We also then
1309 // need to clean up redundancies and loop invariant code.
1310 // FIXME: It would be really good to use a loop-integrated instruction
1311 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1312 // across the loop nests.
1313 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1314 if (EnableUnrollAndJam && PTO.LoopUnrolling)
1315 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1316 Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1317 FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions(
1318 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1319 PTO.ForgetAllSCEVInLoopUnroll)));
1320 FPM.addPass(Pass: WarnMissedTransformationsPass());
1321 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1322 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1323 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1324 // NOTE: we are very late in the pipeline, and we don't have any LICM
1325 // or SimplifyCFG passes scheduled after us, that would cleanup
1326 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1327 FPM.addPass(Pass: SROAPass(SROAOptions::PreserveCFG));
1328 }
1329
1330 if (!IsFullLTO) {
1331 // Eliminate loads by forwarding stores from the previous iteration to loads
1332 // of the current iteration.
1333 FPM.addPass(Pass: LoopLoadEliminationPass());
1334 }
1335 // Cleanup after the loop optimization passes.
1336 FPM.addPass(Pass: InstCombinePass());
1337
1338 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1339 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1340 // At higher optimization levels, try to clean up any runtime overlap and
1341 // alignment checks inserted by the vectorizer. We want to track correlated
1342 // runtime checks for two inner loops in the same outer loop, fold any
1343 // common computations, hoist loop-invariant aspects out of any outer loop,
1344 // and unswitch the runtime checks if possible. Once hoisted, we may have
1345 // dead (or speculatable) control flows or more combining opportunities.
1346 ExtraPasses.addPass(Pass: EarlyCSEPass());
1347 ExtraPasses.addPass(Pass: CorrelatedValuePropagationPass());
1348 ExtraPasses.addPass(Pass: InstCombinePass());
1349 LoopPassManager LPM;
1350 LPM.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1351 /*AllowSpeculation=*/true));
1352 LPM.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1353 OptimizationLevel::O3));
1354 ExtraPasses.addPass(
1355 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /*UseMemorySSA=*/true,
1356 /*UseBlockFrequencyInfo=*/true));
1357 ExtraPasses.addPass(
1358 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
1359 ExtraPasses.addPass(Pass: InstCombinePass());
1360 FPM.addPass(Pass: std::move(ExtraPasses));
1361 }
1362
1363 // Now that we've formed fast to execute loop structures, we do further
1364 // optimizations. These are run afterward as they might block doing complex
1365 // analyses and transforms such as what are needed for loop vectorization.
1366
1367 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1368 // GVN, loop transforms, and others have already run, so it's now better to
1369 // convert to more optimized IR using more aggressive simplify CFG options.
1370 // The extra sinking transform can create larger basic blocks, so do this
1371 // before SLP vectorization.
1372 FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions()
1373 .forwardSwitchCondToPhi(B: true)
1374 .convertSwitchRangeToICmp(B: true)
1375 .convertSwitchToLookupTable(B: true)
1376 .needCanonicalLoops(B: false)
1377 .hoistCommonInsts(B: true)
1378 .sinkCommonInsts(B: true)));
1379
1380 if (IsFullLTO) {
1381 FPM.addPass(Pass: SCCPPass());
1382 FPM.addPass(Pass: InstCombinePass());
1383 FPM.addPass(Pass: BDCEPass());
1384 }
1385
1386 // Optimize parallel scalar instruction chains into SIMD instructions.
1387 if (PTO.SLPVectorization) {
1388 FPM.addPass(Pass: SLPVectorizerPass());
1389 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1390 FPM.addPass(Pass: EarlyCSEPass());
1391 }
1392 }
1393 // Enhance/cleanup vector code.
1394 FPM.addPass(Pass: VectorCombinePass());
1395
1396 if (!IsFullLTO) {
1397 FPM.addPass(Pass: InstCombinePass());
1398 // Unroll small loops to hide loop backedge latency and saturate any
1399 // parallel execution resources of an out-of-order processor. We also then
1400 // need to clean up redundancies and loop invariant code.
1401 // FIXME: It would be really good to use a loop-integrated instruction
1402 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1403 // across the loop nests.
1404 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1405 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1406 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1407 Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1408 }
1409 FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions(
1410 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1411 PTO.ForgetAllSCEVInLoopUnroll)));
1412 FPM.addPass(Pass: WarnMissedTransformationsPass());
1413 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1414 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1415 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1416 // NOTE: we are very late in the pipeline, and we don't have any LICM
1417 // or SimplifyCFG passes scheduled after us, that would cleanup
1418 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1419 FPM.addPass(Pass: SROAPass(SROAOptions::PreserveCFG));
1420 }
1421
1422 FPM.addPass(Pass: InferAlignmentPass());
1423 FPM.addPass(Pass: InstCombinePass());
1424
1425 // This is needed for two reasons:
1426 // 1. It works around problems that instcombine introduces, such as sinking
1427 // expensive FP divides into loops containing multiplications using the
1428 // divide result.
1429 // 2. It helps to clean up some loop-invariant code created by the loop
1430 // unroll pass when IsFullLTO=false.
1431 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1432 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1433 /*AllowSpeculation=*/true),
1434 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1435
1436 // Now that we've vectorized and unrolled loops, we may have more refined
1437 // alignment information, try to re-derive it here.
1438 FPM.addPass(Pass: AlignmentFromAssumptionsPass());
1439}
1440
1441ModulePassManager
1442PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1443 ThinOrFullLTOPhase LTOPhase) {
1444 const bool LTOPreLink = isLTOPreLink(Phase: LTOPhase);
1445 ModulePassManager MPM;
1446
1447 // Run partial inlining pass to partially inline functions that have
1448 // large bodies.
1449 if (RunPartialInlining)
1450 MPM.addPass(Pass: PartialInlinerPass());
1451
1452 // Remove avail extern fns and globals definitions since we aren't compiling
1453 // an object file for later LTO. For LTO we want to preserve these so they
1454 // are eligible for inlining at link-time. Note if they are unreferenced they
1455 // will be removed by GlobalDCE later, so this only impacts referenced
1456 // available externally globals. Eventually they will be suppressed during
1457 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1458 // may make globals referenced by available external functions dead and saves
1459 // running remaining passes on the eliminated functions. These should be
1460 // preserved during prelinking for link-time inlining decisions.
1461 if (!LTOPreLink)
1462 MPM.addPass(Pass: EliminateAvailableExternallyPass());
1463
1464 // Do RPO function attribute inference across the module to forward-propagate
1465 // attributes where applicable.
1466 // FIXME: Is this really an optimization rather than a canonicalization?
1467 MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass());
1468
1469 // Do a post inline PGO instrumentation and use pass. This is a context
1470 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1471 // cross-module inline has not been done yet. The context sensitive
1472 // instrumentation is after all the inlines are done.
1473 if (!LTOPreLink && PGOOpt) {
1474 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1475 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1476 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
1477 ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile,
1478 FS: PGOOpt->FS);
1479 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1480 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1481 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
1482 ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile,
1483 FS: PGOOpt->FS);
1484 }
1485
1486 // Re-compute GlobalsAA here prior to function passes. This is particularly
1487 // useful as the above will have inlined, DCE'ed, and function-attr
1488 // propagated everything. We should at this point have a reasonably minimal
1489 // and richly annotated call graph. By computing aliasing and mod/ref
1490 // information for all local globals here, the late loop passes and notably
1491 // the vectorizer will be able to use them to help recognize vectorizable
1492 // memory operations.
1493 if (EnableGlobalAnalyses)
1494 MPM.addPass(Pass: RecomputeGlobalsAAPass());
1495
1496 invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase: LTOPhase);
1497
1498 FunctionPassManager OptimizePM;
1499 // Scheduling LoopVersioningLICM when inlining is over, because after that
1500 // we may see more accurate aliasing. Reason to run this late is that too
1501 // early versioning may prevent further inlining due to increase of code
1502 // size. Other optimizations which runs later might get benefit of no-alias
1503 // assumption in clone loop.
1504 if (UseLoopVersioningLICM) {
1505 OptimizePM.addPass(
1506 Pass: createFunctionToLoopPassAdaptor(Pass: LoopVersioningLICMPass()));
1507 // LoopVersioningLICM pass might increase new LICM opportunities.
1508 OptimizePM.addPass(Pass: createFunctionToLoopPassAdaptor(
1509 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1510 /*AllowSpeculation=*/true),
1511 /*USeMemorySSA=*/UseMemorySSA: true, /*UseBlockFrequencyInfo=*/false));
1512 }
1513
1514 OptimizePM.addPass(Pass: Float2IntPass());
1515 OptimizePM.addPass(Pass: LowerConstantIntrinsicsPass());
1516
1517 if (EnableMatrix) {
1518 OptimizePM.addPass(Pass: LowerMatrixIntrinsicsPass());
1519 OptimizePM.addPass(Pass: EarlyCSEPass());
1520 }
1521
1522 // CHR pass should only be applied with the profile information.
1523 // The check is to check the profile summary information in CHR.
1524 if (EnableCHR && Level == OptimizationLevel::O3)
1525 OptimizePM.addPass(Pass: ControlHeightReductionPass());
1526
1527 // FIXME: We need to run some loop optimizations to re-rotate loops after
1528 // simplifycfg and others undo their rotation.
1529
1530 // Optimize the loop execution. These passes operate on entire loop nests
1531 // rather than on each loop in an inside-out manner, and so they are actually
1532 // function passes.
1533
1534 invokeVectorizerStartEPCallbacks(FPM&: OptimizePM, Level);
1535
1536 LoopPassManager LPM;
1537 // First rotate loops that may have been un-rotated by prior passes.
1538 // Disable header duplication at -Oz.
1539 LPM.addPass(Pass: LoopRotatePass(EnableLoopHeaderDuplication ||
1540 Level != OptimizationLevel::Oz,
1541 LTOPreLink));
1542 // Some loops may have become dead by now. Try to delete them.
1543 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1544 // this may need to be revisited once we run GVN before loop deletion
1545 // in the simplification pipeline.
1546 LPM.addPass(Pass: LoopDeletionPass());
1547
1548 if (PTO.LoopInterchange)
1549 LPM.addPass(Pass: LoopInterchangePass());
1550
1551 OptimizePM.addPass(Pass: createFunctionToLoopPassAdaptor(
1552 Pass: std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1553
1554 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1555 // into separate loop that would otherwise inhibit vectorization. This is
1556 // currently only performed for loops marked with the metadata
1557 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1558 OptimizePM.addPass(Pass: LoopDistributePass());
1559
1560 // Populates the VFABI attribute with the scalar-to-vector mappings
1561 // from the TargetLibraryInfo.
1562 OptimizePM.addPass(Pass: InjectTLIMappings());
1563
1564 addVectorPasses(Level, FPM&: OptimizePM, /* IsFullLTO */ false);
1565
1566 invokeVectorizerEndEPCallbacks(FPM&: OptimizePM, Level);
1567
1568 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1569 // canonicalization pass that enables other optimizations. As a result,
1570 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1571 // result too early.
1572 OptimizePM.addPass(Pass: LoopSinkPass());
1573
1574 // And finally clean up LCSSA form before generating code.
1575 OptimizePM.addPass(Pass: InstSimplifyPass());
1576
1577 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1578 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1579 // flattening of blocks.
1580 OptimizePM.addPass(Pass: DivRemPairsPass());
1581
1582 // Try to annotate calls that were created during optimization.
1583 OptimizePM.addPass(
1584 Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1585
1586 // LoopSink (and other loop passes since the last simplifyCFG) might have
1587 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1588 OptimizePM.addPass(
1589 Pass: SimplifyCFGPass(SimplifyCFGOptions()
1590 .convertSwitchRangeToICmp(B: true)
1591 .speculateUnpredictables(B: true)
1592 .hoistLoadsStoresWithCondFaulting(B: true)));
1593
1594 // Add the core optimizing pipeline.
1595 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(OptimizePM),
1596 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1597
1598 invokeOptimizerLastEPCallbacks(MPM, Level, Phase: LTOPhase);
1599
1600 // Split out cold code. Splitting is done late to avoid hiding context from
1601 // other optimizations and inadvertently regressing performance. The tradeoff
1602 // is that this has a higher code size cost than splitting early.
1603 if (EnableHotColdSplit && !LTOPreLink)
1604 MPM.addPass(Pass: HotColdSplittingPass());
1605
1606 // Search the code for similar regions of code. If enough similar regions can
1607 // be found where extracting the regions into their own function will decrease
1608 // the size of the program, we extract the regions, a deduplicate the
1609 // structurally similar regions.
1610 if (EnableIROutliner)
1611 MPM.addPass(Pass: IROutlinerPass());
1612
1613 // Now we need to do some global optimization transforms.
1614 // FIXME: It would seem like these should come first in the optimization
1615 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1616 // ordering here.
1617 MPM.addPass(Pass: GlobalDCEPass());
1618 MPM.addPass(Pass: ConstantMergePass());
1619
1620 // Merge functions if requested. It has a better chance to merge functions
1621 // after ConstantMerge folded jump tables.
1622 if (PTO.MergeFunctions)
1623 MPM.addPass(Pass: MergeFunctionsPass());
1624
1625 if (PTO.CallGraphProfile && !LTOPreLink)
1626 MPM.addPass(Pass: CGProfilePass(isLTOPostLink(Phase: LTOPhase)));
1627
1628 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1629 if (!LTOPreLink)
1630 MPM.addPass(Pass: RelLookupTableConverterPass());
1631
1632 return MPM;
1633}
1634
1635ModulePassManager
1636PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
1637 ThinOrFullLTOPhase Phase) {
1638 if (Level == OptimizationLevel::O0)
1639 return buildO0DefaultPipeline(Level, Phase);
1640
1641 ModulePassManager MPM;
1642
1643 // Convert @llvm.global.annotations to !annotation metadata.
1644 MPM.addPass(Pass: Annotation2MetadataPass());
1645
1646 // Force any function attributes we want the rest of the pipeline to observe.
1647 MPM.addPass(Pass: ForceFunctionAttrsPass());
1648
1649 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1650 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass()));
1651
1652 // Apply module pipeline start EP callback.
1653 invokePipelineStartEPCallbacks(MPM, Level);
1654
1655 // Add the core simplification pipeline.
1656 MPM.addPass(Pass: buildModuleSimplificationPipeline(Level, Phase));
1657
1658 // Now add the optimization pipeline.
1659 MPM.addPass(Pass: buildModuleOptimizationPipeline(Level, LTOPhase: Phase));
1660
1661 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1662 PGOOpt->Action == PGOOptions::SampleUse)
1663 MPM.addPass(Pass: PseudoProbeUpdatePass());
1664
1665 // Emit annotation remarks.
1666 addAnnotationRemarksPass(MPM);
1667
1668 if (isLTOPreLink(Phase))
1669 addRequiredLTOPreLinkPasses(MPM);
1670 return MPM;
1671}
1672
1673ModulePassManager
1674PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
1675 bool EmitSummary) {
1676 ModulePassManager MPM;
1677 if (ThinLTO)
1678 MPM.addPass(Pass: buildThinLTOPreLinkDefaultPipeline(Level));
1679 else
1680 MPM.addPass(Pass: buildLTOPreLinkDefaultPipeline(Level));
1681 MPM.addPass(Pass: EmbedBitcodePass(ThinLTO, EmitSummary));
1682
1683 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1684 // like removing CFI/WPD related instructions. Note, we reuse
1685 // LowerTypeTestsPass to clean up type tests rather than duplicate that logic
1686 // in FatLtoCleanup.
1687 MPM.addPass(Pass: FatLtoCleanup());
1688
1689 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1690 // object code, only in the bitcode section, so drop it before we run
1691 // module optimization and generate machine code. If llvm.type.test() isn't in
1692 // the IR, this won't do anything.
1693 MPM.addPass(
1694 Pass: LowerTypeTestsPass(nullptr, nullptr, lowertypetests::DropTestKind::All));
1695
1696 // Use the ThinLTO post-link pipeline with sample profiling
1697 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1698 MPM.addPass(Pass: buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1699 else {
1700 // ModuleSimplification does not run the coroutine passes for
1701 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1702 // builds, otherwise they will miscompile.
1703 if (ThinLTO) {
1704 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1705 // consideration.
1706 CGSCCPassManager CGPM;
1707 CGPM.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0));
1708 CGPM.addPass(Pass: CoroAnnotationElidePass());
1709 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
1710 MPM.addPass(Pass: CoroCleanupPass());
1711 }
1712
1713 // otherwise, just use module optimization
1714 MPM.addPass(
1715 Pass: buildModuleOptimizationPipeline(Level, LTOPhase: ThinOrFullLTOPhase::None));
1716 // Emit annotation remarks.
1717 addAnnotationRemarksPass(MPM);
1718 }
1719 return MPM;
1720}
1721
1722ModulePassManager
1723PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1724 if (Level == OptimizationLevel::O0)
1725 return buildO0DefaultPipeline(Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink);
1726
1727 ModulePassManager MPM;
1728
1729 // Convert @llvm.global.annotations to !annotation metadata.
1730 MPM.addPass(Pass: Annotation2MetadataPass());
1731
1732 // Force any function attributes we want the rest of the pipeline to observe.
1733 MPM.addPass(Pass: ForceFunctionAttrsPass());
1734
1735 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1736 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass()));
1737
1738 // Apply module pipeline start EP callback.
1739 invokePipelineStartEPCallbacks(MPM, Level);
1740
1741 // If we are planning to perform ThinLTO later, we don't bloat the code with
1742 // unrolling/vectorization/... now. Just simplify the module as much as we
1743 // can.
1744 MPM.addPass(Pass: buildModuleSimplificationPipeline(
1745 Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink));
1746 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1747 // thinlto use the contextual info to perform imports; then use the contextual
1748 // profile in the post-thinlink phase.
1749 if (!UseCtxProfile.empty()) {
1750 addRequiredLTOPreLinkPasses(MPM);
1751 return MPM;
1752 }
1753
1754 // Run partial inlining pass to partially inline functions that have
1755 // large bodies.
1756 // FIXME: It isn't clear whether this is really the right place to run this
1757 // in ThinLTO. Because there is another canonicalization and simplification
1758 // phase that will run after the thin link, running this here ends up with
1759 // less information than will be available later and it may grow functions in
1760 // ways that aren't beneficial.
1761 if (RunPartialInlining)
1762 MPM.addPass(Pass: PartialInlinerPass());
1763
1764 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1765 PGOOpt->Action == PGOOptions::SampleUse)
1766 MPM.addPass(Pass: PseudoProbeUpdatePass());
1767
1768 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1769 // optimization is going to be done in PostLink stage, but clang can't add
1770 // callbacks there in case of in-process ThinLTO called by linker.
1771 invokeOptimizerEarlyEPCallbacks(MPM, Level,
1772 /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
1773 invokeOptimizerLastEPCallbacks(MPM, Level,
1774 /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
1775
1776 // Emit annotation remarks.
1777 addAnnotationRemarksPass(MPM);
1778
1779 addRequiredLTOPreLinkPasses(MPM);
1780
1781 return MPM;
1782}
1783
1784ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
1785 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1786 ModulePassManager MPM;
1787
1788 if (ImportSummary) {
1789 // For ThinLTO we must apply the context disambiguation decisions early, to
1790 // ensure we can correctly match the callsites to summary data.
1791 if (EnableMemProfContextDisambiguation)
1792 MPM.addPass(Pass: MemProfContextDisambiguation(
1793 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1794
1795 // These passes import type identifier resolutions for whole-program
1796 // devirtualization and CFI. They must run early because other passes may
1797 // disturb the specific instruction patterns that these passes look for,
1798 // creating dependencies on resolutions that may not appear in the summary.
1799 //
1800 // For example, GVN may transform the pattern assume(type.test) appearing in
1801 // two basic blocks into assume(phi(type.test, type.test)), which would
1802 // transform a dependency on a WPD resolution into a dependency on a type
1803 // identifier resolution for CFI.
1804 //
1805 // Also, WPD has access to more precise information than ICP and can
1806 // devirtualize more effectively, so it should operate on the IR first.
1807 //
1808 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1809 // metadata and intrinsics.
1810 MPM.addPass(Pass: WholeProgramDevirtPass(nullptr, ImportSummary));
1811 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, ImportSummary));
1812 }
1813
1814 if (Level == OptimizationLevel::O0) {
1815 // Run a second time to clean up any type tests left behind by WPD for use
1816 // in ICP.
1817 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr,
1818 lowertypetests::DropTestKind::Assume));
1819 // Drop available_externally and unreferenced globals. This is necessary
1820 // with ThinLTO in order to avoid leaving undefined references to dead
1821 // globals in the object file.
1822 MPM.addPass(Pass: EliminateAvailableExternallyPass());
1823 MPM.addPass(Pass: GlobalDCEPass());
1824 return MPM;
1825 }
1826 if (!UseCtxProfile.empty()) {
1827 MPM.addPass(
1828 Pass: buildModuleInlinerPipeline(Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink));
1829 } else {
1830 // Add the core simplification pipeline.
1831 MPM.addPass(Pass: buildModuleSimplificationPipeline(
1832 Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink));
1833 }
1834 // Now add the optimization pipeline.
1835 MPM.addPass(Pass: buildModuleOptimizationPipeline(
1836 Level, LTOPhase: ThinOrFullLTOPhase::ThinLTOPostLink));
1837
1838 // Emit annotation remarks.
1839 addAnnotationRemarksPass(MPM);
1840
1841 return MPM;
1842}
1843
1844ModulePassManager
1845PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1846 // FIXME: We should use a customized pre-link pipeline!
1847 return buildPerModuleDefaultPipeline(Level,
1848 Phase: ThinOrFullLTOPhase::FullLTOPreLink);
1849}
1850
1851ModulePassManager
1852PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
1853 ModuleSummaryIndex *ExportSummary) {
1854 ModulePassManager MPM;
1855
1856 invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level);
1857
1858 // Create a function that performs CFI checks for cross-DSO calls with targets
1859 // in the current module.
1860 MPM.addPass(Pass: CrossDSOCFIPass());
1861
1862 if (Level == OptimizationLevel::O0) {
1863 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1864 // metadata and intrinsics.
1865 MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr));
1866 MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr));
1867 // Run a second time to clean up any type tests left behind by WPD for use
1868 // in ICP.
1869 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr,
1870 lowertypetests::DropTestKind::Assume));
1871
1872 MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::FullLTOPostLink));
1873
1874 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1875
1876 // Emit annotation remarks.
1877 addAnnotationRemarksPass(MPM);
1878
1879 return MPM;
1880 }
1881
1882 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1883 // Load sample profile before running the LTO optimization pipeline.
1884 MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile,
1885 PGOOpt->ProfileRemappingFile,
1886 ThinOrFullLTOPhase::FullLTOPostLink));
1887 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1888 // RequireAnalysisPass for PSI before subsequent non-module passes.
1889 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1890 }
1891
1892 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1893 MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
1894
1895 // Remove unused virtual tables to improve the quality of code generated by
1896 // whole-program devirtualization and bitset lowering.
1897 MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true));
1898
1899 // Do basic inference of function attributes from known properties of system
1900 // libraries and other oracles.
1901 MPM.addPass(Pass: InferFunctionAttrsPass());
1902
1903 if (Level.getSpeedupLevel() > 1) {
1904 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
1905 Pass: CallSiteSplittingPass(), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1906
1907 // Indirect call promotion. This should promote all the targets that are
1908 // left by the earlier promotion pass that promotes intra-module targets.
1909 // This two-step promotion is to save the compile time. For LTO, it should
1910 // produce the same result as if we only do promotion here.
1911 MPM.addPass(Pass: PGOIndirectCallPromotion(
1912 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1913
1914 // Promoting by-reference arguments to by-value exposes more constants to
1915 // IPSCCP.
1916 CGSCCPassManager CGPM;
1917 CGPM.addPass(Pass: PostOrderFunctionAttrsPass());
1918 CGPM.addPass(Pass: ArgumentPromotionPass());
1919 CGPM.addPass(
1920 Pass: createCGSCCToFunctionPassAdaptor(Pass: SROAPass(SROAOptions::ModifyCFG)));
1921 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
1922
1923 // Propagate constants at call sites into the functions they call. This
1924 // opens opportunities for globalopt (and inlining) by substituting function
1925 // pointers passed as arguments to direct uses of functions.
1926 MPM.addPass(Pass: IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1927 Level != OptimizationLevel::Os &&
1928 Level != OptimizationLevel::Oz)));
1929
1930 // Attach metadata to indirect call sites indicating the set of functions
1931 // they may target at run-time. This should follow IPSCCP.
1932 MPM.addPass(Pass: CalledValuePropagationPass());
1933 }
1934
1935 // Do RPO function attribute inference across the module to forward-propagate
1936 // attributes where applicable.
1937 // FIXME: Is this really an optimization rather than a canonicalization?
1938 MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass());
1939
1940 // Use in-range annotations on GEP indices to split globals where beneficial.
1941 MPM.addPass(Pass: GlobalSplitPass());
1942
1943 // Run whole program optimization of virtual call when the list of callees
1944 // is fixed.
1945 MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr));
1946
1947 // Stop here at -O1.
1948 if (Level == OptimizationLevel::O1) {
1949 // The LowerTypeTestsPass needs to run to lower type metadata and the
1950 // type.test intrinsics. The pass does nothing if CFI is disabled.
1951 MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr));
1952 // Run a second time to clean up any type tests left behind by WPD for use
1953 // in ICP (which is performed earlier than this in the regular LTO
1954 // pipeline).
1955 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr,
1956 lowertypetests::DropTestKind::Assume));
1957
1958 MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::FullLTOPostLink));
1959
1960 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1961
1962 // Emit annotation remarks.
1963 addAnnotationRemarksPass(MPM);
1964
1965 return MPM;
1966 }
1967
1968 // TODO: Skip to match buildCoroWrapper.
1969 MPM.addPass(Pass: CoroEarlyPass());
1970
1971 // Optimize globals to try and fold them into constants.
1972 MPM.addPass(Pass: GlobalOptPass());
1973
1974 // Promote any localized globals to SSA registers.
1975 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: PromotePass()));
1976
1977 // Linking modules together can lead to duplicate global constant, only
1978 // keep one copy of each constant.
1979 MPM.addPass(Pass: ConstantMergePass());
1980
1981 // Remove unused arguments from functions.
1982 MPM.addPass(Pass: DeadArgumentEliminationPass());
1983
1984 // Reduce the code after globalopt and ipsccp. Both can open up significant
1985 // simplification opportunities, and both can propagate functions through
1986 // function pointers. When this happens, we often have to resolve varargs
1987 // calls, etc, so let instcombine do this.
1988 FunctionPassManager PeepholeFPM;
1989 PeepholeFPM.addPass(Pass: InstCombinePass());
1990 if (Level.getSpeedupLevel() > 1)
1991 PeepholeFPM.addPass(Pass: AggressiveInstCombinePass());
1992 invokePeepholeEPCallbacks(FPM&: PeepholeFPM, Level);
1993
1994 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(PeepholeFPM),
1995 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1996
1997 // Lower variadic functions for supported targets prior to inlining.
1998 MPM.addPass(Pass: ExpandVariadicsPass(ExpandVariadicsMode::Optimize));
1999
2000 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2001 // generally clean up exception handling overhead. It isn't clear this is
2002 // valuable as the inliner doesn't currently care whether it is inlining an
2003 // invoke or a call.
2004 // Run the inliner now.
2005 if (EnableModuleInliner) {
2006 MPM.addPass(Pass: ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
2007 UseInlineAdvisor,
2008 ThinOrFullLTOPhase::FullLTOPostLink));
2009 } else {
2010 MPM.addPass(Pass: ModuleInlinerWrapperPass(
2011 getInlineParamsFromOptLevel(Level),
2012 /* MandatoryFirst */ true,
2013 InlineContext{.LTOPhase: ThinOrFullLTOPhase::FullLTOPostLink,
2014 .Pass: InlinePass::CGSCCInliner}));
2015 }
2016
2017 // Perform context disambiguation after inlining, since that would reduce the
2018 // amount of additional cloning required to distinguish the allocation
2019 // contexts.
2020 if (EnableMemProfContextDisambiguation)
2021 MPM.addPass(Pass: MemProfContextDisambiguation(
2022 /*Summary=*/nullptr,
2023 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2024
2025 // Optimize globals again after we ran the inliner.
2026 MPM.addPass(Pass: GlobalOptPass());
2027
2028 // Run the OpenMPOpt pass again after global optimizations.
2029 MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
2030
2031 // Garbage collect dead functions.
2032 MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true));
2033
2034 // If we didn't decide to inline a function, check to see if we can
2035 // transform it to pass arguments by value instead of by reference.
2036 CGSCCPassManager CGPM;
2037 CGPM.addPass(Pass: ArgumentPromotionPass());
2038 CGPM.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0));
2039 CGPM.addPass(Pass: CoroAnnotationElidePass());
2040 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2041
2042 FunctionPassManager FPM;
2043 // The IPO Passes may leave cruft around. Clean up after them.
2044 FPM.addPass(Pass: InstCombinePass());
2045 invokePeepholeEPCallbacks(FPM, Level);
2046
2047 if (EnableConstraintElimination)
2048 FPM.addPass(Pass: ConstraintEliminationPass());
2049
2050 FPM.addPass(Pass: JumpThreadingPass());
2051
2052 // Do a post inline PGO instrumentation and use pass. This is a context
2053 // sensitive PGO pass.
2054 if (PGOOpt) {
2055 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2056 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2057 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
2058 ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile,
2059 FS: PGOOpt->FS);
2060 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2061 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2062 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
2063 ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile,
2064 FS: PGOOpt->FS);
2065 }
2066
2067 // Break up allocas
2068 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
2069
2070 // LTO provides additional opportunities for tailcall elimination due to
2071 // link-time inlining, and visibility of nocapture attribute.
2072 FPM.addPass(
2073 Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2074
2075 // Run a few AA driver optimizations here and now to cleanup the code.
2076 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM),
2077 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2078
2079 MPM.addPass(
2080 Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: PostOrderFunctionAttrsPass()));
2081
2082 // Require the GlobalsAA analysis for the module so we can query it within
2083 // MainFPM.
2084 if (EnableGlobalAnalyses) {
2085 MPM.addPass(Pass: RequireAnalysisPass<GlobalsAA, Module>());
2086 // Invalidate AAManager so it can be recreated and pick up the newly
2087 // available GlobalsAA.
2088 MPM.addPass(
2089 Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>()));
2090 }
2091
2092 FunctionPassManager MainFPM;
2093 MainFPM.addPass(Pass: createFunctionToLoopPassAdaptor(
2094 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2095 /*AllowSpeculation=*/true),
2096 /*USeMemorySSA=*/UseMemorySSA: true, /*UseBlockFrequencyInfo=*/false));
2097
2098 if (RunNewGVN)
2099 MainFPM.addPass(Pass: NewGVNPass());
2100 else
2101 MainFPM.addPass(Pass: GVNPass());
2102
2103 // Remove dead memcpy()'s.
2104 MainFPM.addPass(Pass: MemCpyOptPass());
2105
2106 // Nuke dead stores.
2107 MainFPM.addPass(Pass: DSEPass());
2108 MainFPM.addPass(Pass: MoveAutoInitPass());
2109 MainFPM.addPass(Pass: MergedLoadStoreMotionPass());
2110
2111 invokeVectorizerStartEPCallbacks(FPM&: MainFPM, Level);
2112
2113 LoopPassManager LPM;
2114 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2115 LPM.addPass(Pass: LoopFlattenPass());
2116 LPM.addPass(Pass: IndVarSimplifyPass());
2117 LPM.addPass(Pass: LoopDeletionPass());
2118 // FIXME: Add loop interchange.
2119
2120 // Unroll small loops and perform peeling.
2121 LPM.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(),
2122 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2123 PTO.ForgetAllSCEVInLoopUnroll));
2124 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2125 // *All* loop passes must preserve it, in order to be able to use it.
2126 MainFPM.addPass(Pass: createFunctionToLoopPassAdaptor(
2127 Pass: std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
2128
2129 MainFPM.addPass(Pass: LoopDistributePass());
2130
2131 addVectorPasses(Level, FPM&: MainFPM, /* IsFullLTO */ true);
2132
2133 invokeVectorizerEndEPCallbacks(FPM&: MainFPM, Level);
2134
2135 // Run the OpenMPOpt CGSCC pass again late.
2136 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(
2137 Pass: OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink)));
2138
2139 invokePeepholeEPCallbacks(FPM&: MainFPM, Level);
2140 MainFPM.addPass(Pass: JumpThreadingPass());
2141 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(MainFPM),
2142 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2143
2144 // Lower type metadata and the type.test intrinsic. This pass supports
2145 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2146 // to be run at link time if CFI is enabled. This pass does nothing if
2147 // CFI is disabled.
2148 MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr));
2149 // Run a second time to clean up any type tests left behind by WPD for use
2150 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2151 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr,
2152 lowertypetests::DropTestKind::Assume));
2153
2154 // Enable splitting late in the FullLTO post-link pipeline.
2155 if (EnableHotColdSplit)
2156 MPM.addPass(Pass: HotColdSplittingPass());
2157
2158 // Add late LTO optimization passes.
2159 FunctionPassManager LateFPM;
2160
2161 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2162 // canonicalization pass that enables other optimizations. As a result,
2163 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2164 // result too early.
2165 LateFPM.addPass(Pass: LoopSinkPass());
2166
2167 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2168 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2169 // flattening of blocks.
2170 LateFPM.addPass(Pass: DivRemPairsPass());
2171
2172 // Delete basic blocks, which optimization passes may have killed.
2173 LateFPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions()
2174 .convertSwitchRangeToICmp(B: true)
2175 .hoistCommonInsts(B: true)
2176 .speculateUnpredictables(B: true)));
2177 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(LateFPM)));
2178
2179 // Drop bodies of available eternally objects to improve GlobalDCE.
2180 MPM.addPass(Pass: EliminateAvailableExternallyPass());
2181
2182 // Now that we have optimized the program, discard unreachable functions.
2183 MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true));
2184
2185 if (PTO.MergeFunctions)
2186 MPM.addPass(Pass: MergeFunctionsPass());
2187
2188 MPM.addPass(Pass: RelLookupTableConverterPass());
2189
2190 if (PTO.CallGraphProfile)
2191 MPM.addPass(Pass: CGProfilePass(/*InLTOPostLink=*/true));
2192
2193 MPM.addPass(Pass: CoroCleanupPass());
2194
2195 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
2196
2197 // Emit annotation remarks.
2198 addAnnotationRemarksPass(MPM);
2199
2200 return MPM;
2201}
2202
2203ModulePassManager
2204PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
2205 ThinOrFullLTOPhase Phase) {
2206 assert(Level == OptimizationLevel::O0 &&
2207 "buildO0DefaultPipeline should only be used with O0");
2208
2209 ModulePassManager MPM;
2210
2211 // Perform pseudo probe instrumentation in O0 mode. This is for the
2212 // consistency between different build modes. For example, a LTO build can be
2213 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2214 // the postlink will require pseudo probe instrumentation in the prelink.
2215 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2216 MPM.addPass(Pass: SampleProfileProbePass(TM));
2217
2218 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2219 PGOOpt->Action == PGOOptions::IRUse))
2220 addPGOInstrPassesForO0(
2221 MPM,
2222 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2223 /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, ProfileFile: PGOOpt->ProfileFile,
2224 ProfileRemappingFile: PGOOpt->ProfileRemappingFile, FS: PGOOpt->FS);
2225
2226 // Instrument function entry and exit before all inlining.
2227 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2228 Pass: EntryExitInstrumenterPass(/*PostInlining=*/false)));
2229
2230 invokePipelineStartEPCallbacks(MPM, Level);
2231
2232 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2233 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass()));
2234
2235 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2236 // Explicitly disable sample loader inlining and use flattened profile in O0
2237 // pipeline.
2238 MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile,
2239 PGOOpt->ProfileRemappingFile,
2240 ThinOrFullLTOPhase::None, nullptr,
2241 /*DisableSampleProfileInlining=*/true,
2242 /*UseFlattenedProfile=*/true));
2243 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2244 // RequireAnalysisPass for PSI before subsequent non-module passes.
2245 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
2246 }
2247
2248 invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase);
2249
2250 // Build a minimal pipeline based on the semantics required by LLVM,
2251 // which is just that always inlining occurs. Further, disable generating
2252 // lifetime intrinsics to avoid enabling further optimizations during
2253 // code generation.
2254 MPM.addPass(Pass: AlwaysInlinerPass(
2255 /*InsertLifetimeIntrinsics=*/false));
2256
2257 if (PTO.MergeFunctions)
2258 MPM.addPass(Pass: MergeFunctionsPass());
2259
2260 if (EnableMatrix)
2261 MPM.addPass(
2262 Pass: createModuleToFunctionPassAdaptor(Pass: LowerMatrixIntrinsicsPass(true)));
2263
2264 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2265 CGSCCPassManager CGPM;
2266 invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level);
2267 if (!CGPM.isEmpty())
2268 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2269 }
2270 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2271 LoopPassManager LPM;
2272 invokeLateLoopOptimizationsEPCallbacks(LPM, Level);
2273 if (!LPM.isEmpty()) {
2274 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2275 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM))));
2276 }
2277 }
2278 if (!LoopOptimizerEndEPCallbacks.empty()) {
2279 LoopPassManager LPM;
2280 invokeLoopOptimizerEndEPCallbacks(LPM, Level);
2281 if (!LPM.isEmpty()) {
2282 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2283 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM))));
2284 }
2285 }
2286 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2287 FunctionPassManager FPM;
2288 invokeScalarOptimizerLateEPCallbacks(FPM, Level);
2289 if (!FPM.isEmpty())
2290 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2291 }
2292
2293 invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase);
2294
2295 if (!VectorizerStartEPCallbacks.empty()) {
2296 FunctionPassManager FPM;
2297 invokeVectorizerStartEPCallbacks(FPM, Level);
2298 if (!FPM.isEmpty())
2299 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2300 }
2301
2302 if (!VectorizerEndEPCallbacks.empty()) {
2303 FunctionPassManager FPM;
2304 invokeVectorizerEndEPCallbacks(FPM, Level);
2305 if (!FPM.isEmpty())
2306 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2307 }
2308
2309 MPM.addPass(Pass: buildCoroWrapper(Phase));
2310
2311 invokeOptimizerLastEPCallbacks(MPM, Level, Phase);
2312
2313 if (isLTOPreLink(Phase))
2314 addRequiredLTOPreLinkPasses(MPM);
2315
2316 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AnnotationRemarksPass()));
2317
2318 return MPM;
2319}
2320
2321AAManager PassBuilder::buildDefaultAAPipeline() {
2322 AAManager AA;
2323
2324 // The order in which these are registered determines their priority when
2325 // being queried.
2326
2327 // Add any target-specific alias analyses that should be run early.
2328 if (TM)
2329 TM->registerEarlyDefaultAliasAnalyses(AA);
2330
2331 // First we register the basic alias analysis that provides the majority of
2332 // per-function local AA logic. This is a stateless, on-demand local set of
2333 // AA techniques.
2334 AA.registerFunctionAnalysis<BasicAA>();
2335
2336 // Next we query fast, specialized alias analyses that wrap IR-embedded
2337 // information about aliasing.
2338 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2339 AA.registerFunctionAnalysis<TypeBasedAA>();
2340
2341 // Add support for querying global aliasing information when available.
2342 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2343 // analysis, all that the `AAManager` can do is query for any *cached*
2344 // results from `GlobalsAA` through a readonly proxy.
2345 if (EnableGlobalAnalyses)
2346 AA.registerModuleAnalysis<GlobalsAA>();
2347
2348 // Add target-specific alias analyses.
2349 if (TM)
2350 TM->registerDefaultAliasAnalyses(AA);
2351
2352 return AA;
2353}
2354
2355bool PassBuilder::isInstrumentedPGOUse() const {
2356 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2357 !UseCtxProfile.empty();
2358}