1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
18#include "llvm/Analysis/AliasAnalysis.h"
19#include "llvm/Analysis/BasicAliasAnalysis.h"
20#include "llvm/Analysis/CGSCCPassManager.h"
21#include "llvm/Analysis/CtxProfAnalysis.h"
22#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
23#include "llvm/Analysis/GlobalsModRef.h"
24#include "llvm/Analysis/InlineAdvisor.h"
25#include "llvm/Analysis/InstCount.h"
26#include "llvm/Analysis/ProfileSummaryInfo.h"
27#include "llvm/Analysis/ScopedNoAliasAA.h"
28#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
29#include "llvm/IR/PassManager.h"
30#include "llvm/Pass.h"
31#include "llvm/Passes/OptimizationLevel.h"
32#include "llvm/Passes/PassBuilder.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/PGOOptions.h"
36#include "llvm/Support/VirtualFileSystem.h"
37#include "llvm/Target/TargetMachine.h"
38#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
39#include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"
40#include "llvm/Transforms/Coroutines/CoroCleanup.h"
41#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
42#include "llvm/Transforms/Coroutines/CoroEarly.h"
43#include "llvm/Transforms/Coroutines/CoroElide.h"
44#include "llvm/Transforms/Coroutines/CoroSplit.h"
45#include "llvm/Transforms/HipStdPar/HipStdPar.h"
46#include "llvm/Transforms/IPO/AlwaysInliner.h"
47#include "llvm/Transforms/IPO/Annotation2Metadata.h"
48#include "llvm/Transforms/IPO/ArgumentPromotion.h"
49#include "llvm/Transforms/IPO/Attributor.h"
50#include "llvm/Transforms/IPO/CalledValuePropagation.h"
51#include "llvm/Transforms/IPO/ConstantMerge.h"
52#include "llvm/Transforms/IPO/CrossDSOCFI.h"
53#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
54#include "llvm/Transforms/IPO/ElimAvailExtern.h"
55#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
56#include "llvm/Transforms/IPO/ExpandVariadics.h"
57#include "llvm/Transforms/IPO/FatLTOCleanup.h"
58#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
59#include "llvm/Transforms/IPO/FunctionAttrs.h"
60#include "llvm/Transforms/IPO/GlobalDCE.h"
61#include "llvm/Transforms/IPO/GlobalOpt.h"
62#include "llvm/Transforms/IPO/GlobalSplit.h"
63#include "llvm/Transforms/IPO/HotColdSplitting.h"
64#include "llvm/Transforms/IPO/IROutliner.h"
65#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
66#include "llvm/Transforms/IPO/Inliner.h"
67#include "llvm/Transforms/IPO/LowerTypeTests.h"
68#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
69#include "llvm/Transforms/IPO/MergeFunctions.h"
70#include "llvm/Transforms/IPO/ModuleInliner.h"
71#include "llvm/Transforms/IPO/OpenMPOpt.h"
72#include "llvm/Transforms/IPO/PartialInlining.h"
73#include "llvm/Transforms/IPO/SCCP.h"
74#include "llvm/Transforms/IPO/SampleProfile.h"
75#include "llvm/Transforms/IPO/SampleProfileProbe.h"
76#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
77#include "llvm/Transforms/InstCombine/InstCombine.h"
78#include "llvm/Transforms/Instrumentation/AllocToken.h"
79#include "llvm/Transforms/Instrumentation/CGProfile.h"
80#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
81#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
82#include "llvm/Transforms/Instrumentation/MemProfInstrumentation.h"
83#include "llvm/Transforms/Instrumentation/MemProfUse.h"
84#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
85#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
86#include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
87#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
88#include "llvm/Transforms/Scalar/ADCE.h"
89#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
90#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
91#include "llvm/Transforms/Scalar/BDCE.h"
92#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
93#include "llvm/Transforms/Scalar/ConstraintElimination.h"
94#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
95#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
96#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
97#include "llvm/Transforms/Scalar/DivRemPairs.h"
98#include "llvm/Transforms/Scalar/DropUnnecessaryAssumes.h"
99#include "llvm/Transforms/Scalar/EarlyCSE.h"
100#include "llvm/Transforms/Scalar/Float2Int.h"
101#include "llvm/Transforms/Scalar/GVN.h"
102#include "llvm/Transforms/Scalar/IndVarSimplify.h"
103#include "llvm/Transforms/Scalar/InferAlignment.h"
104#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
105#include "llvm/Transforms/Scalar/JumpTableToSwitch.h"
106#include "llvm/Transforms/Scalar/JumpThreading.h"
107#include "llvm/Transforms/Scalar/LICM.h"
108#include "llvm/Transforms/Scalar/LoopDeletion.h"
109#include "llvm/Transforms/Scalar/LoopDistribute.h"
110#include "llvm/Transforms/Scalar/LoopFlatten.h"
111#include "llvm/Transforms/Scalar/LoopFuse.h"
112#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
113#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
114#include "llvm/Transforms/Scalar/LoopInterchange.h"
115#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
116#include "llvm/Transforms/Scalar/LoopPassManager.h"
117#include "llvm/Transforms/Scalar/LoopRotation.h"
118#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
119#include "llvm/Transforms/Scalar/LoopSink.h"
120#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
121#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
122#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
123#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
124#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
125#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
126#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
127#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
128#include "llvm/Transforms/Scalar/NewGVN.h"
129#include "llvm/Transforms/Scalar/Reassociate.h"
130#include "llvm/Transforms/Scalar/SCCP.h"
131#include "llvm/Transforms/Scalar/SROA.h"
132#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
133#include "llvm/Transforms/Scalar/SimplifyCFG.h"
134#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
135#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
136#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
137#include "llvm/Transforms/Utils/AddDiscriminators.h"
138#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
139#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
140#include "llvm/Transforms/Utils/CountVisits.h"
141#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
142#include "llvm/Transforms/Utils/ExtraPassManager.h"
143#include "llvm/Transforms/Utils/InjectTLIMappings.h"
144#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
145#include "llvm/Transforms/Utils/Mem2Reg.h"
146#include "llvm/Transforms/Utils/MoveAutoInit.h"
147#include "llvm/Transforms/Utils/NameAnonGlobals.h"
148#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
149#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
150#include "llvm/Transforms/Vectorize/LoopVectorize.h"
151#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
152#include "llvm/Transforms/Vectorize/VectorCombine.h"
153
154using namespace llvm;
155
156namespace llvm {
157
158static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
159 "enable-ml-inliner", cl::init(Val: InliningAdvisorMode::Default), cl::Hidden,
160 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
161 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
162 "Heuristics-based inliner version"),
163 clEnumValN(InliningAdvisorMode::Development, "development",
164 "Use development mode (runtime-loadable model)"),
165 clEnumValN(InliningAdvisorMode::Release, "release",
166 "Use release mode (AOT-compiled model)")));
167
168/// Flag to enable inline deferral during PGO.
169static cl::opt<bool>
170 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(Val: true),
171 cl::Hidden,
172 cl::desc("Enable inline deferral during PGO"));
173
174static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
175 cl::init(Val: false), cl::Hidden,
176 cl::desc("Enable module inliner"));
177
178static cl::opt<bool> PerformMandatoryInliningsFirst(
179 "mandatory-inlining-first", cl::init(Val: false), cl::Hidden,
180 cl::desc("Perform mandatory inlinings module-wide, before performing "
181 "inlining"));
182
183static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
184 "eagerly-invalidate-analyses", cl::init(Val: true), cl::Hidden,
185 cl::desc("Eagerly invalidate more analyses in default pipelines"));
186
187static cl::opt<bool> EnableMergeFunctions(
188 "enable-merge-functions", cl::init(Val: false), cl::Hidden,
189 cl::desc("Enable function merging as part of the optimization pipeline"));
190
191static cl::opt<bool> EnablePostPGOLoopRotation(
192 "enable-post-pgo-loop-rotation", cl::init(Val: true), cl::Hidden,
193 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
194
195static cl::opt<bool> EnableGlobalAnalyses(
196 "enable-global-analyses", cl::init(Val: true), cl::Hidden,
197 cl::desc("Enable inter-procedural analyses"));
198
199static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
200 cl::init(Val: false), cl::Hidden,
201 cl::desc("Run Partial inlining pass"));
202
203static cl::opt<bool> ExtraVectorizerPasses(
204 "extra-vectorizer-passes", cl::init(Val: false), cl::Hidden,
205 cl::desc("Run cleanup optimization passes after vectorization"));
206
207static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(Val: false), cl::Hidden,
208 cl::desc("Run the NewGVN pass"));
209
210static cl::opt<bool>
211 EnableLoopInterchange("enable-loopinterchange", cl::init(Val: false), cl::Hidden,
212 cl::desc("Enable the LoopInterchange Pass"));
213
214static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
215 cl::init(Val: false), cl::Hidden,
216 cl::desc("Enable Unroll And Jam Pass"));
217
218static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(Val: false),
219 cl::Hidden,
220 cl::desc("Enable the LoopFlatten Pass"));
221
222// Experimentally allow loop header duplication. This should allow for better
223// optimization at Oz, since loop-idiom recognition can then recognize things
224// like memcpy. If this ends up being useful for many targets, we should drop
225// this flag and make a code generation option that can be controlled
226// independent of the opt level and exposed through the frontend.
227static cl::opt<bool> EnableLoopHeaderDuplication(
228 "enable-loop-header-duplication", cl::init(Val: false), cl::Hidden,
229 cl::desc("Enable loop header duplication at any optimization level"));
230
231static cl::opt<bool>
232 EnableDFAJumpThreading("enable-dfa-jump-thread",
233 cl::desc("Enable DFA jump threading"),
234 cl::init(Val: false), cl::Hidden);
235
236static cl::opt<bool>
237 EnableHotColdSplit("hot-cold-split",
238 cl::desc("Enable hot-cold splitting pass"));
239
240static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(Val: false),
241 cl::Hidden,
242 cl::desc("Enable ir outliner pass"));
243
244static cl::opt<bool>
245 DisablePreInliner("disable-preinline", cl::init(Val: false), cl::Hidden,
246 cl::desc("Disable pre-instrumentation inliner"));
247
248static cl::opt<int> PreInlineThreshold(
249 "preinline-threshold", cl::Hidden, cl::init(Val: 75),
250 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
251 "(default = 75)"));
252
253static cl::opt<bool>
254 EnableGVNHoist("enable-gvn-hoist",
255 cl::desc("Enable the GVN hoisting pass (default = off)"));
256
257static cl::opt<bool>
258 EnableGVNSink("enable-gvn-sink",
259 cl::desc("Enable the GVN sinking pass (default = off)"));
260
261static cl::opt<bool> EnableJumpTableToSwitch(
262 "enable-jump-table-to-switch",
263 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
264
265// This option is used in simplifying testing SampleFDO optimizations for
266// profile loading.
267static cl::opt<bool>
268 EnableCHR("enable-chr", cl::init(Val: true), cl::Hidden,
269 cl::desc("Enable control height reduction optimization (CHR)"));
270
271static cl::opt<bool> FlattenedProfileUsed(
272 "flattened-profile-used", cl::init(Val: false), cl::Hidden,
273 cl::desc("Indicate the sample profile being used is flattened, i.e., "
274 "no inline hierarchy exists in the profile"));
275
276static cl::opt<bool>
277 EnableMatrix("enable-matrix", cl::init(Val: false), cl::Hidden,
278 cl::desc("Enable lowering of the matrix intrinsics"));
279
280static cl::opt<bool> EnableConstraintElimination(
281 "enable-constraint-elimination", cl::init(Val: true), cl::Hidden,
282 cl::desc(
283 "Enable pass to eliminate conditions based on linear constraints"));
284
285static cl::opt<AttributorRunOption> AttributorRun(
286 "attributor-enable", cl::Hidden, cl::init(Val: AttributorRunOption::NONE),
287 cl::desc("Enable the attributor inter-procedural deduction pass"),
288 cl::values(clEnumValN(AttributorRunOption::FULL, "full",
289 "enable all full attributor runs"),
290 clEnumValN(AttributorRunOption::LIGHT, "light",
291 "enable all attributor-light runs"),
292 clEnumValN(AttributorRunOption::MODULE, "module",
293 "enable module-wide attributor runs"),
294 clEnumValN(AttributorRunOption::MODULE_LIGHT, "module-light",
295 "enable module-wide attributor-light runs"),
296 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
297 "enable call graph SCC attributor runs"),
298 clEnumValN(AttributorRunOption::CGSCC_LIGHT, "cgscc-light",
299 "enable call graph SCC attributor-light runs"),
300 clEnumValN(AttributorRunOption::NONE, "none",
301 "disable attributor runs")));
302
303static cl::opt<bool> EnableSampledInstr(
304 "enable-sampled-instrumentation", cl::init(Val: false), cl::Hidden,
305 cl::desc("Enable profile instrumentation sampling (default = off)"));
306static cl::opt<bool> UseLoopVersioningLICM(
307 "enable-loop-versioning-licm", cl::init(Val: false), cl::Hidden,
308 cl::desc("Enable the experimental Loop Versioning LICM pass"));
309
310static cl::opt<std::string> InstrumentColdFuncOnlyPath(
311 "instrument-cold-function-only-path", cl::init(Val: ""),
312 cl::desc("File path for cold function only instrumentation(requires use "
313 "with --pgo-instrument-cold-function-only)"),
314 cl::Hidden);
315
316// TODO: There is a similar flag in WPD pass, we should consolidate them by
317// parsing the option only once in PassBuilder and share it across both places.
318static cl::opt<bool> EnableDevirtualizeSpeculatively(
319 "enable-devirtualize-speculatively",
320 cl::desc("Enable speculative devirtualization optimization"),
321 cl::init(Val: false));
322
323extern cl::opt<std::string> UseCtxProfile;
324extern cl::opt<bool> PGOInstrumentColdFunctionOnly;
325
326extern cl::opt<bool> EnableMemProfContextDisambiguation;
327} // namespace llvm
328
329PipelineTuningOptions::PipelineTuningOptions() {
330 LoopInterleaving = true;
331 LoopVectorization = true;
332 SLPVectorization = false;
333 LoopUnrolling = true;
334 LoopInterchange = EnableLoopInterchange;
335 LoopFusion = false;
336 ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
337 LicmMssaOptCap = SetLicmMssaOptCap;
338 LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
339 CallGraphProfile = true;
340 UnifiedLTO = false;
341 MergeFunctions = EnableMergeFunctions;
342 InlinerThreshold = -1;
343 EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
344 DevirtualizeSpeculatively = EnableDevirtualizeSpeculatively;
345}
346
347namespace llvm {
348extern cl::opt<unsigned> MaxDevirtIterations;
349} // namespace llvm
350
351void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
352 OptimizationLevel Level) {
353 for (auto &C : PeepholeEPCallbacks)
354 C(FPM, Level);
355}
356void PassBuilder::invokeLateLoopOptimizationsEPCallbacks(
357 LoopPassManager &LPM, OptimizationLevel Level) {
358 for (auto &C : LateLoopOptimizationsEPCallbacks)
359 C(LPM, Level);
360}
361void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM,
362 OptimizationLevel Level) {
363 for (auto &C : LoopOptimizerEndEPCallbacks)
364 C(LPM, Level);
365}
366void PassBuilder::invokeScalarOptimizerLateEPCallbacks(
367 FunctionPassManager &FPM, OptimizationLevel Level) {
368 for (auto &C : ScalarOptimizerLateEPCallbacks)
369 C(FPM, Level);
370}
371void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM,
372 OptimizationLevel Level) {
373 for (auto &C : CGSCCOptimizerLateEPCallbacks)
374 C(CGPM, Level);
375}
376void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
377 OptimizationLevel Level) {
378 for (auto &C : VectorizerStartEPCallbacks)
379 C(FPM, Level);
380}
381void PassBuilder::invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM,
382 OptimizationLevel Level) {
383 for (auto &C : VectorizerEndEPCallbacks)
384 C(FPM, Level);
385}
386void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
387 OptimizationLevel Level,
388 ThinOrFullLTOPhase Phase) {
389 for (auto &C : OptimizerEarlyEPCallbacks)
390 C(MPM, Level, Phase);
391}
392void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
393 OptimizationLevel Level,
394 ThinOrFullLTOPhase Phase) {
395 for (auto &C : OptimizerLastEPCallbacks)
396 C(MPM, Level, Phase);
397}
398void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks(
399 ModulePassManager &MPM, OptimizationLevel Level) {
400 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
401 C(MPM, Level);
402}
403void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks(
404 ModulePassManager &MPM, OptimizationLevel Level) {
405 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
406 C(MPM, Level);
407}
408void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM,
409 OptimizationLevel Level) {
410 for (auto &C : PipelineStartEPCallbacks)
411 C(MPM, Level);
412}
413void PassBuilder::invokePipelineEarlySimplificationEPCallbacks(
414 ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase) {
415 for (auto &C : PipelineEarlySimplificationEPCallbacks)
416 C(MPM, Level, Phase);
417}
418
419// Helper to add AnnotationRemarksPass.
420static void addAnnotationRemarksPass(ModulePassManager &MPM) {
421 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AnnotationRemarksPass()));
422 // Count the stats for InstCount and FunctionPropertiesAnalysis
423 if (AreStatisticsEnabled()) {
424 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: InstCountPass()));
425 MPM.addPass(
426 Pass: createModuleToFunctionPassAdaptor(Pass: FunctionPropertiesStatisticsPass()));
427 }
428}
429
430// Helper to check if the current compilation phase is preparing for LTO
431static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
432 return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
433 Phase == ThinOrFullLTOPhase::FullLTOPreLink;
434}
435
436// Helper to check if the current compilation phase is LTO backend
437static bool isLTOPostLink(ThinOrFullLTOPhase Phase) {
438 return Phase == ThinOrFullLTOPhase::ThinLTOPostLink ||
439 Phase == ThinOrFullLTOPhase::FullLTOPostLink;
440}
441
442// Helper to wrap conditionally Coro passes.
443static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase) {
444 // TODO: Skip passes according to Phase.
445 ModulePassManager CoroPM;
446 CoroPM.addPass(Pass: CoroEarlyPass());
447 CGSCCPassManager CGPM;
448 CGPM.addPass(Pass: CoroSplitPass());
449 CoroPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
450 CoroPM.addPass(Pass: CoroCleanupPass());
451 CoroPM.addPass(Pass: GlobalDCEPass());
452 return CoroConditionalWrapper(std::move(CoroPM));
453}
454
455// TODO: Investigate the cost/benefit of tail call elimination on debugging.
456FunctionPassManager
457PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
458 ThinOrFullLTOPhase Phase) {
459
460 FunctionPassManager FPM;
461
462 if (AreStatisticsEnabled())
463 FPM.addPass(Pass: CountVisitsPass());
464
465 // Form SSA out of local memory accesses after breaking apart aggregates into
466 // scalars.
467 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
468
469 // Catch trivial redundancies
470 FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */));
471
472 // Hoisting of scalars and load expressions.
473 FPM.addPass(
474 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
475 FPM.addPass(Pass: InstCombinePass());
476
477 FPM.addPass(Pass: LibCallsShrinkWrapPass());
478
479 invokePeepholeEPCallbacks(FPM, Level);
480
481 FPM.addPass(
482 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
483
484 // Form canonically associated expression trees, and simplify the trees using
485 // basic mathematical properties. For example, this will form (nearly)
486 // minimal multiplication trees.
487 FPM.addPass(Pass: ReassociatePass());
488
489 // Add the primary loop simplification pipeline.
490 // FIXME: Currently this is split into two loop pass pipelines because we run
491 // some function passes in between them. These can and should be removed
492 // and/or replaced by scheduling the loop pass equivalents in the correct
493 // positions. But those equivalent passes aren't powerful enough yet.
494 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
495 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
496 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
497 // `LoopInstSimplify`.
498 LoopPassManager LPM1, LPM2;
499
500 // Simplify the loop body. We do this initially to clean up after other loop
501 // passes run, either when iterating on a loop or on inner loops with
502 // implications on the outer loop.
503 LPM1.addPass(Pass: LoopInstSimplifyPass());
504 LPM1.addPass(Pass: LoopSimplifyCFGPass());
505
506 // Try to remove as much code from the loop header as possible,
507 // to reduce amount of IR that will have to be duplicated. However,
508 // do not perform speculative hoisting the first time as LICM
509 // will destroy metadata that may not need to be destroyed if run
510 // after loop rotation.
511 // TODO: Investigate promotion cap for O1.
512 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
513 /*AllowSpeculation=*/false));
514
515 LPM1.addPass(Pass: LoopRotatePass(/* Disable header duplication */ true,
516 isLTOPreLink(Phase)));
517 // TODO: Investigate promotion cap for O1.
518 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
519 /*AllowSpeculation=*/true));
520 LPM1.addPass(Pass: SimpleLoopUnswitchPass());
521 if (EnableLoopFlatten)
522 LPM1.addPass(Pass: LoopFlattenPass());
523
524 LPM2.addPass(Pass: LoopIdiomRecognizePass());
525 LPM2.addPass(Pass: IndVarSimplifyPass());
526
527 invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level);
528
529 LPM2.addPass(Pass: LoopDeletionPass());
530
531 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
532 // because it changes IR to makes profile annotation in back compile
533 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
534 // attributes so we need to make sure and allow the full unroll pass to pay
535 // attention to it.
536 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
537 PGOOpt->Action != PGOOptions::SampleUse)
538 LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(),
539 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
540 PTO.ForgetAllSCEVInLoopUnroll));
541
542 invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level);
543
544 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1),
545 /*UseMemorySSA=*/true));
546 FPM.addPass(
547 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
548 FPM.addPass(Pass: InstCombinePass());
549 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
550 // *All* loop passes must preserve it, in order to be able to use it.
551 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2),
552 /*UseMemorySSA=*/false));
553
554 // Delete small array after loop unroll.
555 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
556
557 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
558 FPM.addPass(Pass: MemCpyOptPass());
559
560 // Sparse conditional constant propagation.
561 // FIXME: It isn't clear why we do this *after* loop passes rather than
562 // before...
563 FPM.addPass(Pass: SCCPPass());
564
565 // Delete dead bit computations (instcombine runs after to fold away the dead
566 // computations, and then ADCE will run later to exploit any new DCE
567 // opportunities that creates).
568 FPM.addPass(Pass: BDCEPass());
569
570 // Run instcombine after redundancy and dead bit elimination to exploit
571 // opportunities opened up by them.
572 FPM.addPass(Pass: InstCombinePass());
573 invokePeepholeEPCallbacks(FPM, Level);
574
575 FPM.addPass(Pass: CoroElidePass());
576
577 invokeScalarOptimizerLateEPCallbacks(FPM, Level);
578
579 // Finally, do an expensive DCE pass to catch all the dead code exposed by
580 // the simplifications and basic cleanup after all the simplifications.
581 // TODO: Investigate if this is too expensive.
582 FPM.addPass(Pass: ADCEPass());
583 FPM.addPass(
584 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
585 FPM.addPass(Pass: InstCombinePass());
586 invokePeepholeEPCallbacks(FPM, Level);
587
588 return FPM;
589}
590
591FunctionPassManager
592PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
593 ThinOrFullLTOPhase Phase) {
594 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
595
596 // The O1 pipeline has a separate pipeline creation function to simplify
597 // construction readability.
598 if (Level.getSpeedupLevel() == 1)
599 return buildO1FunctionSimplificationPipeline(Level, Phase);
600
601 FunctionPassManager FPM;
602
603 if (AreStatisticsEnabled())
604 FPM.addPass(Pass: CountVisitsPass());
605
606 // Form SSA out of local memory accesses after breaking apart aggregates into
607 // scalars.
608 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
609
610 // Catch trivial redundancies
611 FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */));
612 if (EnableKnowledgeRetention)
613 FPM.addPass(Pass: AssumeSimplifyPass());
614
615 // Hoisting of scalars and load expressions.
616 if (EnableGVNHoist)
617 FPM.addPass(Pass: GVNHoistPass());
618
619 // Global value numbering based sinking.
620 if (EnableGVNSink) {
621 FPM.addPass(Pass: GVNSinkPass());
622 FPM.addPass(
623 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
624 }
625
626 // Speculative execution if the target has divergent branches; otherwise nop.
627 FPM.addPass(Pass: SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
628
629 // Optimize based on known information about branches, and cleanup afterward.
630 FPM.addPass(Pass: JumpThreadingPass());
631 FPM.addPass(Pass: CorrelatedValuePropagationPass());
632
633 // Jump table to switch conversion.
634 if (EnableJumpTableToSwitch)
635 FPM.addPass(Pass: JumpTableToSwitchPass(
636 /*InLTO=*/Phase == ThinOrFullLTOPhase::ThinLTOPostLink ||
637 Phase == ThinOrFullLTOPhase::FullLTOPostLink));
638
639 FPM.addPass(
640 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
641 FPM.addPass(Pass: InstCombinePass());
642 FPM.addPass(Pass: AggressiveInstCombinePass());
643
644 if (!Level.isOptimizingForSize())
645 FPM.addPass(Pass: LibCallsShrinkWrapPass());
646
647 invokePeepholeEPCallbacks(FPM, Level);
648
649 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
650 // using the size value profile. Don't perform this when optimizing for size.
651 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
652 !Level.isOptimizingForSize())
653 FPM.addPass(Pass: PGOMemOPSizeOpt());
654
655 FPM.addPass(Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/
656 isInstrumentedPGOUse()));
657 FPM.addPass(
658 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
659
660 // Form canonically associated expression trees, and simplify the trees using
661 // basic mathematical properties. For example, this will form (nearly)
662 // minimal multiplication trees.
663 FPM.addPass(Pass: ReassociatePass());
664
665 if (EnableConstraintElimination)
666 FPM.addPass(Pass: ConstraintEliminationPass());
667
668 // Add the primary loop simplification pipeline.
669 // FIXME: Currently this is split into two loop pass pipelines because we run
670 // some function passes in between them. These can and should be removed
671 // and/or replaced by scheduling the loop pass equivalents in the correct
672 // positions. But those equivalent passes aren't powerful enough yet.
673 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
674 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
675 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
676 // `LoopInstSimplify`.
677 LoopPassManager LPM1, LPM2;
678
679 // Simplify the loop body. We do this initially to clean up after other loop
680 // passes run, either when iterating on a loop or on inner loops with
681 // implications on the outer loop.
682 LPM1.addPass(Pass: LoopInstSimplifyPass());
683 LPM1.addPass(Pass: LoopSimplifyCFGPass());
684
685 // Try to remove as much code from the loop header as possible,
686 // to reduce amount of IR that will have to be duplicated. However,
687 // do not perform speculative hoisting the first time as LICM
688 // will destroy metadata that may not need to be destroyed if run
689 // after loop rotation.
690 // TODO: Investigate promotion cap for O1.
691 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
692 /*AllowSpeculation=*/false));
693
694 // Disable header duplication in loop rotation at -Oz.
695 LPM1.addPass(Pass: LoopRotatePass(EnableLoopHeaderDuplication ||
696 Level != OptimizationLevel::Oz,
697 isLTOPreLink(Phase)));
698 // TODO: Investigate promotion cap for O1.
699 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
700 /*AllowSpeculation=*/true));
701 LPM1.addPass(
702 Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
703 if (EnableLoopFlatten)
704 LPM1.addPass(Pass: LoopFlattenPass());
705
706 LPM2.addPass(Pass: LoopIdiomRecognizePass());
707 LPM2.addPass(Pass: IndVarSimplifyPass());
708
709 {
710 ExtraLoopPassManager<ShouldRunExtraSimpleLoopUnswitch> ExtraPasses;
711 ExtraPasses.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
712 OptimizationLevel::O3));
713 LPM2.addPass(Pass: std::move(ExtraPasses));
714 }
715
716 invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level);
717
718 LPM2.addPass(Pass: LoopDeletionPass());
719
720 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
721 // because it changes IR to makes profile annotation in back compile
722 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
723 // attributes so we need to make sure and allow the full unroll pass to pay
724 // attention to it.
725 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
726 PGOOpt->Action != PGOOptions::SampleUse)
727 LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(),
728 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
729 PTO.ForgetAllSCEVInLoopUnroll));
730
731 invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level);
732
733 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1),
734 /*UseMemorySSA=*/true));
735 FPM.addPass(
736 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
737 FPM.addPass(Pass: InstCombinePass());
738 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
739 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
740 // *All* loop passes must preserve it, in order to be able to use it.
741 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2),
742 /*UseMemorySSA=*/false));
743
744 // Delete small array after loop unroll.
745 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
746
747 // Try vectorization/scalarization transforms that are both improvements
748 // themselves and can allow further folds with GVN and InstCombine.
749 FPM.addPass(Pass: VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
750
751 // Eliminate redundancies.
752 FPM.addPass(Pass: MergedLoadStoreMotionPass());
753 if (RunNewGVN)
754 FPM.addPass(Pass: NewGVNPass());
755 else
756 FPM.addPass(Pass: GVNPass());
757
758 // Sparse conditional constant propagation.
759 // FIXME: It isn't clear why we do this *after* loop passes rather than
760 // before...
761 FPM.addPass(Pass: SCCPPass());
762
763 // Delete dead bit computations (instcombine runs after to fold away the dead
764 // computations, and then ADCE will run later to exploit any new DCE
765 // opportunities that creates).
766 FPM.addPass(Pass: BDCEPass());
767
768 // Run instcombine after redundancy and dead bit elimination to exploit
769 // opportunities opened up by them.
770 FPM.addPass(Pass: InstCombinePass());
771 invokePeepholeEPCallbacks(FPM, Level);
772
773 // Re-consider control flow based optimizations after redundancy elimination,
774 // redo DCE, etc.
775 if (EnableDFAJumpThreading)
776 FPM.addPass(Pass: DFAJumpThreadingPass());
777
778 FPM.addPass(Pass: JumpThreadingPass());
779 FPM.addPass(Pass: CorrelatedValuePropagationPass());
780
781 // Finally, do an expensive DCE pass to catch all the dead code exposed by
782 // the simplifications and basic cleanup after all the simplifications.
783 // TODO: Investigate if this is too expensive.
784 FPM.addPass(Pass: ADCEPass());
785
786 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
787 FPM.addPass(Pass: MemCpyOptPass());
788
789 FPM.addPass(Pass: DSEPass());
790 FPM.addPass(Pass: MoveAutoInitPass());
791
792 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
793 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
794 /*AllowSpeculation=*/true),
795 /*UseMemorySSA=*/true));
796
797 FPM.addPass(Pass: CoroElidePass());
798
799 invokeScalarOptimizerLateEPCallbacks(FPM, Level);
800
801 FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions()
802 .convertSwitchRangeToICmp(B: true)
803 .convertSwitchToArithmetic(B: true)
804 .hoistCommonInsts(B: true)
805 .sinkCommonInsts(B: true)));
806 FPM.addPass(Pass: InstCombinePass());
807 invokePeepholeEPCallbacks(FPM, Level);
808
809 return FPM;
810}
811
812void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
813 MPM.addPass(Pass: CanonicalizeAliasesPass());
814 MPM.addPass(Pass: NameAnonGlobalPass());
815}
816
817void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
818 OptimizationLevel Level,
819 ThinOrFullLTOPhase LTOPhase) {
820 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
821 if (DisablePreInliner)
822 return;
823 InlineParams IP;
824
825 IP.DefaultThreshold = PreInlineThreshold;
826
827 // FIXME: The hint threshold has the same value used by the regular inliner
828 // when not optimzing for size. This should probably be lowered after
829 // performance testing.
830 // FIXME: this comment is cargo culted from the old pass manager, revisit).
831 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
832 ModuleInlinerWrapperPass MIWP(
833 IP, /* MandatoryFirst */ true,
834 InlineContext{.LTOPhase: LTOPhase, .Pass: InlinePass::EarlyInliner});
835 CGSCCPassManager &CGPipeline = MIWP.getPM();
836
837 FunctionPassManager FPM;
838 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
839 FPM.addPass(Pass: EarlyCSEPass()); // Catch trivial redundancies.
840 FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
841 B: true))); // Merge & remove basic blocks.
842 FPM.addPass(Pass: InstCombinePass()); // Combine silly sequences.
843 invokePeepholeEPCallbacks(FPM, Level);
844
845 CGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
846 Pass: std::move(FPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
847
848 MPM.addPass(Pass: std::move(MIWP));
849
850 // Delete anything that is now dead to make sure that we don't instrument
851 // dead code. Instrumentation can end up keeping dead code around and
852 // dramatically increase code size.
853 MPM.addPass(Pass: GlobalDCEPass());
854}
855
856void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
857 OptimizationLevel Level) {
858 if (EnablePostPGOLoopRotation) {
859 // Disable header duplication in loop rotation at -Oz.
860 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
861 Pass: createFunctionToLoopPassAdaptor(
862 Pass: LoopRotatePass(EnableLoopHeaderDuplication ||
863 Level != OptimizationLevel::Oz),
864 /*UseMemorySSA=*/false),
865 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
866 }
867}
868
869void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
870 OptimizationLevel Level, bool RunProfileGen,
871 bool IsCS, bool AtomicCounterUpdate,
872 std::string ProfileFile,
873 std::string ProfileRemappingFile) {
874 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
875
876 if (!RunProfileGen) {
877 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
878 MPM.addPass(
879 Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
880 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
881 // RequireAnalysisPass for PSI before subsequent non-module passes.
882 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
883 return;
884 }
885
886 // Perform PGO instrumentation.
887 MPM.addPass(Pass: PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
888 : PGOInstrumentationType::FDO));
889
890 addPostPGOLoopRotation(MPM, Level);
891 // Add the profile lowering pass.
892 InstrProfOptions Options;
893 if (!ProfileFile.empty())
894 Options.InstrProfileOutput = ProfileFile;
895 // Do counter promotion at Level greater than O0.
896 Options.DoCounterPromotion = true;
897 Options.UseBFIInPromotion = IsCS;
898 if (EnableSampledInstr) {
899 Options.Sampling = true;
900 // With sampling, there is little beneifit to enable counter promotion.
901 // But note that sampling does work with counter promotion.
902 Options.DoCounterPromotion = false;
903 }
904 Options.Atomic = AtomicCounterUpdate;
905 MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS));
906}
907
908void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
909 bool RunProfileGen, bool IsCS,
910 bool AtomicCounterUpdate,
911 std::string ProfileFile,
912 std::string ProfileRemappingFile) {
913 if (!RunProfileGen) {
914 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
915 MPM.addPass(
916 Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
917 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
918 // RequireAnalysisPass for PSI before subsequent non-module passes.
919 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
920 return;
921 }
922
923 // Perform PGO instrumentation.
924 MPM.addPass(Pass: PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
925 : PGOInstrumentationType::FDO));
926 // Add the profile lowering pass.
927 InstrProfOptions Options;
928 if (!ProfileFile.empty())
929 Options.InstrProfileOutput = ProfileFile;
930 // Do not do counter promotion at O0.
931 Options.DoCounterPromotion = false;
932 Options.UseBFIInPromotion = IsCS;
933 Options.Atomic = AtomicCounterUpdate;
934 MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS));
935}
936
937static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
938 return getInlineParams(OptLevel: Level.getSpeedupLevel(), SizeOptLevel: Level.getSizeLevel());
939}
940
941ModuleInlinerWrapperPass
942PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
943 ThinOrFullLTOPhase Phase) {
944 InlineParams IP;
945 if (PTO.InlinerThreshold == -1)
946 IP = getInlineParamsFromOptLevel(Level);
947 else
948 IP = getInlineParams(Threshold: PTO.InlinerThreshold);
949 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
950 // set hot-caller threshold to 0 to disable hot
951 // callsite inline (as much as possible [1]) because it makes
952 // profile annotation in the backend inaccurate.
953 //
954 // [1] Note the cost of a function could be below zero due to erased
955 // prologue / epilogue.
956 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
957 IP.HotCallSiteThreshold = 0;
958
959 if (PGOOpt)
960 IP.EnableDeferral = EnablePGOInlineDeferral;
961
962 ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
963 InlineContext{.LTOPhase: Phase, .Pass: InlinePass::CGSCCInliner},
964 UseInlineAdvisor, MaxDevirtIterations);
965
966 // Require the GlobalsAA analysis for the module so we can query it within
967 // the CGSCC pipeline.
968 if (EnableGlobalAnalyses) {
969 MIWP.addModulePass(Pass: RequireAnalysisPass<GlobalsAA, Module>());
970 // Invalidate AAManager so it can be recreated and pick up the newly
971 // available GlobalsAA.
972 MIWP.addModulePass(
973 Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>()));
974 }
975
976 // Require the ProfileSummaryAnalysis for the module so we can query it within
977 // the inliner pass.
978 MIWP.addModulePass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
979
980 // Now begin the main postorder CGSCC pipeline.
981 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
982 // manager and trying to emulate its precise behavior. Much of this doesn't
983 // make a lot of sense and we should revisit the core CGSCC structure.
984 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
985
986 // Note: historically, the PruneEH pass was run first to deduce nounwind and
987 // generally clean up exception handling overhead. It isn't clear this is
988 // valuable as the inliner doesn't currently care whether it is inlining an
989 // invoke or a call.
990
991 if (AttributorRun & AttributorRunOption::CGSCC)
992 MainCGPipeline.addPass(Pass: AttributorCGSCCPass());
993 else if (AttributorRun & AttributorRunOption::CGSCC_LIGHT)
994 MainCGPipeline.addPass(Pass: AttributorLightCGSCCPass());
995
996 // Deduce function attributes. We do another run of this after the function
997 // simplification pipeline, so this only needs to run when it could affect the
998 // function simplification pipeline, which is only the case with recursive
999 // functions.
1000 MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
1001
1002 // When at O3 add argument promotion to the pass pipeline.
1003 // FIXME: It isn't at all clear why this should be limited to O3.
1004 if (Level == OptimizationLevel::O3)
1005 MainCGPipeline.addPass(Pass: ArgumentPromotionPass());
1006
1007 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
1008 // there are no OpenMP runtime calls present in the module.
1009 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
1010 MainCGPipeline.addPass(Pass: OpenMPOptCGSCCPass(Phase));
1011
1012 invokeCGSCCOptimizerLateEPCallbacks(CGPM&: MainCGPipeline, Level);
1013
1014 // Add the core function simplification pipeline nested inside the
1015 // CGSCC walk.
1016 MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
1017 Pass: buildFunctionSimplificationPipeline(Level, Phase),
1018 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
1019
1020 // Finally, deduce any function attributes based on the fully simplified
1021 // function.
1022 MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass());
1023
1024 // Mark that the function is fully simplified and that it shouldn't be
1025 // simplified again if we somehow revisit it due to CGSCC mutations unless
1026 // it's been modified since.
1027 MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
1028 Pass: RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>()));
1029
1030 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
1031 MainCGPipeline.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0));
1032 MainCGPipeline.addPass(Pass: CoroAnnotationElidePass());
1033 }
1034
1035 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1036 MIWP.addLateModulePass(Pass: createModuleToFunctionPassAdaptor(
1037 Pass: InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
1038
1039 return MIWP;
1040}
1041
1042ModulePassManager
1043PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
1044 ThinOrFullLTOPhase Phase) {
1045 ModulePassManager MPM;
1046
1047 InlineParams IP = getInlineParamsFromOptLevel(Level);
1048 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1049 // set hot-caller threshold to 0 to disable hot
1050 // callsite inline (as much as possible [1]) because it makes
1051 // profile annotation in the backend inaccurate.
1052 //
1053 // [1] Note the cost of a function could be below zero due to erased
1054 // prologue / epilogue.
1055 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1056 IP.HotCallSiteThreshold = 0;
1057
1058 if (PGOOpt)
1059 IP.EnableDeferral = EnablePGOInlineDeferral;
1060
1061 // The inline deferral logic is used to avoid losing some
1062 // inlining chance in future. It is helpful in SCC inliner, in which
1063 // inlining is processed in bottom-up order.
1064 // While in module inliner, the inlining order is a priority-based order
1065 // by default. The inline deferral is unnecessary there. So we disable the
1066 // inline deferral logic in module inliner.
1067 IP.EnableDeferral = false;
1068
1069 MPM.addPass(Pass: ModuleInlinerPass(IP, UseInlineAdvisor, Phase));
1070 if (!UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPostLink) {
1071 MPM.addPass(Pass: GlobalOptPass());
1072 MPM.addPass(Pass: GlobalDCEPass());
1073 MPM.addPass(Pass: PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1074 }
1075
1076 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
1077 Pass: buildFunctionSimplificationPipeline(Level, Phase),
1078 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1079
1080 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
1081 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(
1082 Pass: CoroSplitPass(Level != OptimizationLevel::O0)));
1083 MPM.addPass(
1084 Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: CoroAnnotationElidePass()));
1085 }
1086
1087 return MPM;
1088}
1089
1090ModulePassManager
1091PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
1092 ThinOrFullLTOPhase Phase) {
1093 assert(Level != OptimizationLevel::O0 &&
1094 "Should not be used for O0 pipeline");
1095
1096 assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink &&
1097 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1098
1099 ModulePassManager MPM;
1100
1101 // Place pseudo probe instrumentation as the first pass of the pipeline to
1102 // minimize the impact of optimization changes.
1103 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1104 Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
1105 MPM.addPass(Pass: SampleProfileProbePass(TM));
1106
1107 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1108
1109 // In ThinLTO mode, when flattened profile is used, all the available
1110 // profile information will be annotated in PreLink phase so there is
1111 // no need to load the profile again in PostLink.
1112 bool LoadSampleProfile =
1113 HasSampleProfile &&
1114 !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
1115
1116 // During the ThinLTO backend phase we perform early indirect call promotion
1117 // here, before globalopt. Otherwise imported available_externally functions
1118 // look unreferenced and are removed. If we are going to load the sample
1119 // profile then defer until later.
1120 // TODO: See if we can move later and consolidate with the location where
1121 // we perform ICP when we are loading a sample profile.
1122 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1123 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1124 // determine whether the new direct calls are annotated with prof metadata.
1125 // Ideally this should be determined from whether the IR is annotated with
1126 // sample profile, and not whether the a sample profile was provided on the
1127 // command line. E.g. for flattened profiles where we will not be reloading
1128 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1129 // provide the sample profile file.
1130 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1131 MPM.addPass(Pass: PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1132
1133 // Create an early function pass manager to cleanup the output of the
1134 // frontend. Not necessary with LTO post link pipelines since the pre link
1135 // pipeline already cleaned up the frontend output.
1136 if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) {
1137 // Do basic inference of function attributes from known properties of system
1138 // libraries and other oracles.
1139 MPM.addPass(Pass: InferFunctionAttrsPass());
1140 MPM.addPass(Pass: CoroEarlyPass());
1141
1142 FunctionPassManager EarlyFPM;
1143 EarlyFPM.addPass(Pass: EntryExitInstrumenterPass(/*PostInlining=*/false));
1144 // Lower llvm.expect to metadata before attempting transforms.
1145 // Compare/branch metadata may alter the behavior of passes like
1146 // SimplifyCFG.
1147 EarlyFPM.addPass(Pass: LowerExpectIntrinsicPass());
1148 EarlyFPM.addPass(Pass: SimplifyCFGPass());
1149 EarlyFPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
1150 EarlyFPM.addPass(Pass: EarlyCSEPass());
1151 if (Level == OptimizationLevel::O3)
1152 EarlyFPM.addPass(Pass: CallSiteSplittingPass());
1153 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
1154 Pass: std::move(EarlyFPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1155 }
1156
1157 if (LoadSampleProfile) {
1158 // Annotate sample profile right after early FPM to ensure freshness of
1159 // the debug info.
1160 MPM.addPass(Pass: SampleProfileLoaderPass(
1161 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, FS));
1162 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1163 // RequireAnalysisPass for PSI before subsequent non-module passes.
1164 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1165 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1166 // for the profile annotation to be accurate in the LTO backend.
1167 if (!isLTOPreLink(Phase))
1168 // We perform early indirect call promotion here, before globalopt.
1169 // This is important for the ThinLTO backend phase because otherwise
1170 // imported available_externally functions look unreferenced and are
1171 // removed.
1172 MPM.addPass(
1173 Pass: PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1174 }
1175
1176 // Try to perform OpenMP specific optimizations on the module. This is a
1177 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1178 MPM.addPass(Pass: OpenMPOptPass(Phase));
1179
1180 if (AttributorRun & AttributorRunOption::MODULE)
1181 MPM.addPass(Pass: AttributorPass());
1182 else if (AttributorRun & AttributorRunOption::MODULE_LIGHT)
1183 MPM.addPass(Pass: AttributorLightPass());
1184
1185 // Lower type metadata and the type.test intrinsic in the ThinLTO
1186 // post link pipeline after ICP. This is to enable usage of the type
1187 // tests in ICP sequences.
1188 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1189 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr,
1190 lowertypetests::DropTestKind::Assume));
1191
1192 invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase);
1193
1194 // Interprocedural constant propagation now that basic cleanup has occurred
1195 // and prior to optimizing globals.
1196 // FIXME: This position in the pipeline hasn't been carefully considered in
1197 // years, it should be re-analyzed.
1198 MPM.addPass(Pass: IPSCCPPass(
1199 IPSCCPOptions(/*AllowFuncSpec=*/
1200 Level != OptimizationLevel::Os &&
1201 Level != OptimizationLevel::Oz &&
1202 !isLTOPreLink(Phase))));
1203
1204 // Attach metadata to indirect call sites indicating the set of functions
1205 // they may target at run-time. This should follow IPSCCP.
1206 MPM.addPass(Pass: CalledValuePropagationPass());
1207
1208 // Optimize globals to try and fold them into constants.
1209 MPM.addPass(Pass: GlobalOptPass());
1210
1211 // Create a small function pass pipeline to cleanup after all the global
1212 // optimizations.
1213 FunctionPassManager GlobalCleanupPM;
1214 // FIXME: Should this instead by a run of SROA?
1215 GlobalCleanupPM.addPass(Pass: PromotePass());
1216 GlobalCleanupPM.addPass(Pass: InstCombinePass());
1217 invokePeepholeEPCallbacks(FPM&: GlobalCleanupPM, Level);
1218 GlobalCleanupPM.addPass(
1219 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
1220 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(GlobalCleanupPM),
1221 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1222
1223 // We already asserted this happens in non-FullLTOPostLink earlier.
1224 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1225 // Enable contextual profiling instrumentation.
1226 const bool IsCtxProfGen =
1227 IsPreLink && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled();
1228 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1229 const bool IsPGOInstrGen =
1230 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1231 const bool IsPGOInstrUse =
1232 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1233 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1234 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1235 // enable ctx profiling from the frontend.
1236 assert(!(IsPGOInstrGen && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled()) &&
1237 "Enabling both instrumented PGO and contextual instrumentation is not "
1238 "supported.");
1239 const bool IsCtxProfUse =
1240 !UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
1241
1242 assert(
1243 (InstrumentColdFuncOnlyPath.empty() || PGOInstrumentColdFunctionOnly) &&
1244 "--instrument-cold-function-only-path is provided but "
1245 "--pgo-instrument-cold-function-only is not enabled");
1246 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1247 IsPGOPreLink &&
1248 !InstrumentColdFuncOnlyPath.empty();
1249
1250 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1251 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1252 addPreInlinerPasses(MPM, Level, LTOPhase: Phase);
1253
1254 // Add all the requested passes for instrumentation PGO, if requested.
1255 if (IsPGOInstrGen || IsPGOInstrUse) {
1256 addPGOInstrPasses(MPM, Level,
1257 /*RunProfileGen=*/IsPGOInstrGen,
1258 /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
1259 ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
1260 } else if (IsCtxProfGen || IsCtxProfUse) {
1261 MPM.addPass(Pass: PGOInstrumentationGen(PGOInstrumentationType::CTXPROF));
1262 // In pre-link, we just want the instrumented IR. We use the contextual
1263 // profile in the post-thinlink phase.
1264 // The instrumentation will be removed in post-thinlink after IPO.
1265 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1266 // mechanism for GUIDs.
1267 MPM.addPass(Pass: AssignGUIDPass());
1268 if (IsCtxProfUse) {
1269 MPM.addPass(Pass: PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1270 return MPM;
1271 }
1272 // Block further inlining in the instrumented ctxprof case. This avoids
1273 // confusingly collecting profiles for the same GUID corresponding to
1274 // different variants of the function. We could do like PGO and identify
1275 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1276 // thinlto to happen before performing any further optimizations, it's
1277 // unnecessary to collect profiles for non-prevailing copies.
1278 MPM.addPass(Pass: NoinlineNonPrevailing());
1279 addPostPGOLoopRotation(MPM, Level);
1280 MPM.addPass(Pass: PGOCtxProfLoweringPass());
1281 } else if (IsColdFuncOnlyInstrGen) {
1282 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1283 /* AtomicCounterUpdate */ false,
1284 ProfileFile: InstrumentColdFuncOnlyPath,
1285 /* ProfileRemappingFile */ "");
1286 }
1287
1288 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1289 MPM.addPass(Pass: PGOIndirectCallPromotion(false, false));
1290
1291 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1292 MPM.addPass(Pass: PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1293 EnableSampledInstr));
1294
1295 if (IsMemprofUse)
1296 MPM.addPass(Pass: MemProfUsePass(PGOOpt->MemoryProfile, FS));
1297
1298 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1299 PGOOpt->Action == PGOOptions::SampleUse))
1300 MPM.addPass(Pass: PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1301
1302 MPM.addPass(Pass: AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1303
1304 if (EnableModuleInliner)
1305 MPM.addPass(Pass: buildModuleInlinerPipeline(Level, Phase));
1306 else
1307 MPM.addPass(Pass: buildInlinerPipeline(Level, Phase));
1308
1309 // Remove any dead arguments exposed by cleanups, constant folding globals,
1310 // and argument promotion.
1311 MPM.addPass(Pass: DeadArgumentEliminationPass());
1312
1313 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1314 MPM.addPass(Pass: SimplifyTypeTestsPass());
1315
1316 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink)
1317 MPM.addPass(Pass: CoroCleanupPass());
1318
1319 // Optimize globals now that functions are fully simplified.
1320 MPM.addPass(Pass: GlobalOptPass());
1321 MPM.addPass(Pass: GlobalDCEPass());
1322
1323 return MPM;
1324}
1325
1326/// TODO: Should LTO cause any differences to this set of passes?
1327void PassBuilder::addVectorPasses(OptimizationLevel Level,
1328 FunctionPassManager &FPM,
1329 ThinOrFullLTOPhase LTOPhase) {
1330 const bool IsFullLTO = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink;
1331
1332 FPM.addPass(Pass: LoopVectorizePass(
1333 LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
1334
1335 // Drop dereferenceable assumes after vectorization, as they are no longer
1336 // needed and can inhibit further optimization.
1337 if (!isLTOPreLink(Phase: LTOPhase))
1338 FPM.addPass(Pass: DropUnnecessaryAssumesPass(/*DropDereferenceable=*/true));
1339
1340 FPM.addPass(Pass: InferAlignmentPass());
1341 if (IsFullLTO) {
1342 // The vectorizer may have significantly shortened a loop body; unroll
1343 // again. Unroll small loops to hide loop backedge latency and saturate any
1344 // parallel execution resources of an out-of-order processor. We also then
1345 // need to clean up redundancies and loop invariant code.
1346 // FIXME: It would be really good to use a loop-integrated instruction
1347 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1348 // across the loop nests.
1349 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1350 if (EnableUnrollAndJam && PTO.LoopUnrolling)
1351 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1352 Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1353 FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions(
1354 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1355 PTO.ForgetAllSCEVInLoopUnroll)));
1356 FPM.addPass(Pass: WarnMissedTransformationsPass());
1357 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1358 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1359 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1360 // NOTE: we are very late in the pipeline, and we don't have any LICM
1361 // or SimplifyCFG passes scheduled after us, that would cleanup
1362 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1363 FPM.addPass(Pass: SROAPass(SROAOptions::PreserveCFG));
1364 }
1365
1366 if (!IsFullLTO) {
1367 // Eliminate loads by forwarding stores from the previous iteration to loads
1368 // of the current iteration.
1369 FPM.addPass(Pass: LoopLoadEliminationPass());
1370 }
1371 // Cleanup after the loop optimization passes.
1372 FPM.addPass(Pass: InstCombinePass());
1373
1374 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1375 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1376 // At higher optimization levels, try to clean up any runtime overlap and
1377 // alignment checks inserted by the vectorizer. We want to track correlated
1378 // runtime checks for two inner loops in the same outer loop, fold any
1379 // common computations, hoist loop-invariant aspects out of any outer loop,
1380 // and unswitch the runtime checks if possible. Once hoisted, we may have
1381 // dead (or speculatable) control flows or more combining opportunities.
1382 ExtraPasses.addPass(Pass: EarlyCSEPass());
1383 ExtraPasses.addPass(Pass: CorrelatedValuePropagationPass());
1384 ExtraPasses.addPass(Pass: InstCombinePass());
1385 LoopPassManager LPM;
1386 LPM.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1387 /*AllowSpeculation=*/true));
1388 LPM.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1389 OptimizationLevel::O3));
1390 ExtraPasses.addPass(
1391 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /*UseMemorySSA=*/true));
1392 ExtraPasses.addPass(
1393 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
1394 ExtraPasses.addPass(Pass: InstCombinePass());
1395 FPM.addPass(Pass: std::move(ExtraPasses));
1396 }
1397
1398 // Now that we've formed fast to execute loop structures, we do further
1399 // optimizations. These are run afterward as they might block doing complex
1400 // analyses and transforms such as what are needed for loop vectorization.
1401
1402 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1403 // GVN, loop transforms, and others have already run, so it's now better to
1404 // convert to more optimized IR using more aggressive simplify CFG options.
1405 // The extra sinking transform can create larger basic blocks, so do this
1406 // before SLP vectorization.
1407 FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions()
1408 .forwardSwitchCondToPhi(B: true)
1409 .convertSwitchRangeToICmp(B: true)
1410 .convertSwitchToArithmetic(B: true)
1411 .convertSwitchToLookupTable(B: true)
1412 .needCanonicalLoops(B: false)
1413 .hoistCommonInsts(B: true)
1414 .sinkCommonInsts(B: true)));
1415
1416 if (IsFullLTO) {
1417 FPM.addPass(Pass: SCCPPass());
1418 FPM.addPass(Pass: InstCombinePass());
1419 FPM.addPass(Pass: BDCEPass());
1420 }
1421
1422 // Optimize parallel scalar instruction chains into SIMD instructions.
1423 if (PTO.SLPVectorization) {
1424 FPM.addPass(Pass: SLPVectorizerPass());
1425 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1426 FPM.addPass(Pass: EarlyCSEPass());
1427 }
1428 }
1429 // Enhance/cleanup vector code.
1430 FPM.addPass(Pass: VectorCombinePass());
1431
1432 if (!IsFullLTO) {
1433 FPM.addPass(Pass: InstCombinePass());
1434 // Unroll small loops to hide loop backedge latency and saturate any
1435 // parallel execution resources of an out-of-order processor. We also then
1436 // need to clean up redundancies and loop invariant code.
1437 // FIXME: It would be really good to use a loop-integrated instruction
1438 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1439 // across the loop nests.
1440 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1441 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1442 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1443 Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1444 }
1445 FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions(
1446 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1447 PTO.ForgetAllSCEVInLoopUnroll)));
1448 FPM.addPass(Pass: WarnMissedTransformationsPass());
1449 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1450 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1451 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1452 // NOTE: we are very late in the pipeline, and we don't have any LICM
1453 // or SimplifyCFG passes scheduled after us, that would cleanup
1454 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1455 FPM.addPass(Pass: SROAPass(SROAOptions::PreserveCFG));
1456 }
1457
1458 FPM.addPass(Pass: InferAlignmentPass());
1459 FPM.addPass(Pass: InstCombinePass());
1460
1461 // This is needed for two reasons:
1462 // 1. It works around problems that instcombine introduces, such as sinking
1463 // expensive FP divides into loops containing multiplications using the
1464 // divide result.
1465 // 2. It helps to clean up some loop-invariant code created by the loop
1466 // unroll pass when IsFullLTO=false.
1467 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1468 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1469 /*AllowSpeculation=*/true),
1470 /*UseMemorySSA=*/true));
1471
1472 // Now that we've vectorized and unrolled loops, we may have more refined
1473 // alignment information, try to re-derive it here.
1474 FPM.addPass(Pass: AlignmentFromAssumptionsPass());
1475}
1476
1477ModulePassManager
1478PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1479 ThinOrFullLTOPhase LTOPhase) {
1480 const bool LTOPreLink = isLTOPreLink(Phase: LTOPhase);
1481 ModulePassManager MPM;
1482
1483 // Run partial inlining pass to partially inline functions that have
1484 // large bodies.
1485 if (RunPartialInlining)
1486 MPM.addPass(Pass: PartialInlinerPass());
1487
1488 // Remove avail extern fns and globals definitions since we aren't compiling
1489 // an object file for later LTO. For LTO we want to preserve these so they
1490 // are eligible for inlining at link-time. Note if they are unreferenced they
1491 // will be removed by GlobalDCE later, so this only impacts referenced
1492 // available externally globals. Eventually they will be suppressed during
1493 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1494 // may make globals referenced by available external functions dead and saves
1495 // running remaining passes on the eliminated functions. These should be
1496 // preserved during prelinking for link-time inlining decisions.
1497 if (!LTOPreLink)
1498 MPM.addPass(Pass: EliminateAvailableExternallyPass());
1499
1500 // Do RPO function attribute inference across the module to forward-propagate
1501 // attributes where applicable.
1502 // FIXME: Is this really an optimization rather than a canonicalization?
1503 MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass());
1504
1505 // Do a post inline PGO instrumentation and use pass. This is a context
1506 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1507 // cross-module inline has not been done yet. The context sensitive
1508 // instrumentation is after all the inlines are done.
1509 if (!LTOPreLink && PGOOpt) {
1510 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1511 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1512 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
1513 ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
1514 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1515 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1516 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
1517 ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
1518 }
1519
1520 // Re-compute GlobalsAA here prior to function passes. This is particularly
1521 // useful as the above will have inlined, DCE'ed, and function-attr
1522 // propagated everything. We should at this point have a reasonably minimal
1523 // and richly annotated call graph. By computing aliasing and mod/ref
1524 // information for all local globals here, the late loop passes and notably
1525 // the vectorizer will be able to use them to help recognize vectorizable
1526 // memory operations.
1527 if (EnableGlobalAnalyses)
1528 MPM.addPass(Pass: RecomputeGlobalsAAPass());
1529
1530 invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase: LTOPhase);
1531
1532 FunctionPassManager OptimizePM;
1533
1534 // Only drop unnecessary assumes post-inline and post-link, as otherwise
1535 // additional uses of the affected value may be introduced through inlining
1536 // and CSE.
1537 if (!isLTOPreLink(Phase: LTOPhase))
1538 OptimizePM.addPass(Pass: DropUnnecessaryAssumesPass());
1539
1540 // Scheduling LoopVersioningLICM when inlining is over, because after that
1541 // we may see more accurate aliasing. Reason to run this late is that too
1542 // early versioning may prevent further inlining due to increase of code
1543 // size. Other optimizations which runs later might get benefit of no-alias
1544 // assumption in clone loop.
1545 if (UseLoopVersioningLICM) {
1546 OptimizePM.addPass(
1547 Pass: createFunctionToLoopPassAdaptor(Pass: LoopVersioningLICMPass()));
1548 // LoopVersioningLICM pass might increase new LICM opportunities.
1549 OptimizePM.addPass(Pass: createFunctionToLoopPassAdaptor(
1550 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1551 /*AllowSpeculation=*/true),
1552 /*USeMemorySSA=*/UseMemorySSA: true));
1553 }
1554
1555 OptimizePM.addPass(Pass: Float2IntPass());
1556 OptimizePM.addPass(Pass: LowerConstantIntrinsicsPass());
1557
1558 if (EnableMatrix) {
1559 OptimizePM.addPass(Pass: LowerMatrixIntrinsicsPass());
1560 OptimizePM.addPass(Pass: EarlyCSEPass());
1561 }
1562
1563 // CHR pass should only be applied with the profile information.
1564 // The check is to check the profile summary information in CHR.
1565 if (EnableCHR && Level == OptimizationLevel::O3)
1566 OptimizePM.addPass(Pass: ControlHeightReductionPass());
1567
1568 // FIXME: We need to run some loop optimizations to re-rotate loops after
1569 // simplifycfg and others undo their rotation.
1570
1571 // Optimize the loop execution. These passes operate on entire loop nests
1572 // rather than on each loop in an inside-out manner, and so they are actually
1573 // function passes.
1574
1575 invokeVectorizerStartEPCallbacks(FPM&: OptimizePM, Level);
1576
1577 LoopPassManager LPM;
1578 // First rotate loops that may have been un-rotated by prior passes.
1579 // Disable header duplication at -Oz.
1580 LPM.addPass(Pass: LoopRotatePass(EnableLoopHeaderDuplication ||
1581 Level != OptimizationLevel::Oz,
1582 LTOPreLink));
1583 // Some loops may have become dead by now. Try to delete them.
1584 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1585 // this may need to be revisited once we run GVN before loop deletion
1586 // in the simplification pipeline.
1587 LPM.addPass(Pass: LoopDeletionPass());
1588
1589 if (PTO.LoopInterchange)
1590 LPM.addPass(Pass: LoopInterchangePass());
1591
1592 OptimizePM.addPass(
1593 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /*UseMemorySSA=*/false));
1594
1595 // FIXME: This may not be the right place in the pipeline.
1596 // We need to have the data to support the right place.
1597 if (PTO.LoopFusion)
1598 OptimizePM.addPass(Pass: LoopFusePass());
1599
1600 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1601 // into separate loop that would otherwise inhibit vectorization. This is
1602 // currently only performed for loops marked with the metadata
1603 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1604 OptimizePM.addPass(Pass: LoopDistributePass());
1605
1606 // Populates the VFABI attribute with the scalar-to-vector mappings
1607 // from the TargetLibraryInfo.
1608 OptimizePM.addPass(Pass: InjectTLIMappings());
1609
1610 addVectorPasses(Level, FPM&: OptimizePM, LTOPhase);
1611
1612 invokeVectorizerEndEPCallbacks(FPM&: OptimizePM, Level);
1613
1614 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1615 // canonicalization pass that enables other optimizations. As a result,
1616 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1617 // result too early.
1618 OptimizePM.addPass(Pass: LoopSinkPass());
1619
1620 // And finally clean up LCSSA form before generating code.
1621 OptimizePM.addPass(Pass: InstSimplifyPass());
1622
1623 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1624 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1625 // flattening of blocks.
1626 OptimizePM.addPass(Pass: DivRemPairsPass());
1627
1628 // Try to annotate calls that were created during optimization.
1629 OptimizePM.addPass(
1630 Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1631
1632 // LoopSink (and other loop passes since the last simplifyCFG) might have
1633 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1634 OptimizePM.addPass(
1635 Pass: SimplifyCFGPass(SimplifyCFGOptions()
1636 .convertSwitchRangeToICmp(B: true)
1637 .convertSwitchToArithmetic(B: true)
1638 .speculateUnpredictables(B: true)
1639 .hoistLoadsStoresWithCondFaulting(B: true)));
1640
1641 // Add the core optimizing pipeline.
1642 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(OptimizePM),
1643 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1644
1645 // AllocToken transforms heap allocation calls; this needs to run late after
1646 // other allocation call transformations (such as those in InstCombine).
1647 if (!LTOPreLink)
1648 MPM.addPass(Pass: AllocTokenPass());
1649
1650 invokeOptimizerLastEPCallbacks(MPM, Level, Phase: LTOPhase);
1651
1652 // Split out cold code. Splitting is done late to avoid hiding context from
1653 // other optimizations and inadvertently regressing performance. The tradeoff
1654 // is that this has a higher code size cost than splitting early.
1655 if (EnableHotColdSplit && !LTOPreLink)
1656 MPM.addPass(Pass: HotColdSplittingPass());
1657
1658 // Search the code for similar regions of code. If enough similar regions can
1659 // be found where extracting the regions into their own function will decrease
1660 // the size of the program, we extract the regions, a deduplicate the
1661 // structurally similar regions.
1662 if (EnableIROutliner)
1663 MPM.addPass(Pass: IROutlinerPass());
1664
1665 // Now we need to do some global optimization transforms.
1666 // FIXME: It would seem like these should come first in the optimization
1667 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1668 // ordering here.
1669 MPM.addPass(Pass: GlobalDCEPass());
1670 MPM.addPass(Pass: ConstantMergePass());
1671
1672 // Merge functions if requested. It has a better chance to merge functions
1673 // after ConstantMerge folded jump tables.
1674 if (PTO.MergeFunctions)
1675 MPM.addPass(Pass: MergeFunctionsPass());
1676
1677 if (PTO.CallGraphProfile && !LTOPreLink)
1678 MPM.addPass(Pass: CGProfilePass(isLTOPostLink(Phase: LTOPhase)));
1679
1680 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1681 if (!LTOPreLink)
1682 MPM.addPass(Pass: RelLookupTableConverterPass());
1683
1684 // Add devirtualization pass only when LTO is not enabled, as otherwise
1685 // the pass is already enabled in the LTO pipeline.
1686 if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) {
1687 // TODO: explore a better pipeline configuration that can improve
1688 // compilation time overhead.
1689 MPM.addPass(Pass: WholeProgramDevirtPass(
1690 /*ExportSummary*/ nullptr,
1691 /*ImportSummary*/ nullptr,
1692 /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively));
1693 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr,
1694 lowertypetests::DropTestKind::Assume));
1695 // Given that the devirtualization creates more opportunities for inlining,
1696 // we run the Inliner again here to maximize the optimization gain we
1697 // get from devirtualization.
1698 // Also, we can't run devirtualization before inlining because the
1699 // devirtualization depends on the passes optimizing/eliminating vtable GVs
1700 // and those passes are only effective after inlining.
1701 if (EnableModuleInliner) {
1702 MPM.addPass(Pass: ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
1703 UseInlineAdvisor,
1704 ThinOrFullLTOPhase::None));
1705 } else {
1706 MPM.addPass(Pass: ModuleInlinerWrapperPass(
1707 getInlineParamsFromOptLevel(Level),
1708 /* MandatoryFirst */ true,
1709 InlineContext{.LTOPhase: ThinOrFullLTOPhase::None, .Pass: InlinePass::CGSCCInliner}));
1710 }
1711 }
1712 return MPM;
1713}
1714
1715ModulePassManager
1716PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
1717 ThinOrFullLTOPhase Phase) {
1718 if (Level == OptimizationLevel::O0)
1719 return buildO0DefaultPipeline(Level, Phase);
1720
1721 ModulePassManager MPM;
1722
1723 // Currently this pipeline is only invoked in an LTO pre link pass or when we
1724 // are not running LTO. If that changes the below checks may need updating.
1725 assert(isLTOPreLink(Phase) || Phase == ThinOrFullLTOPhase::None);
1726
1727 // If we are invoking this in non-LTO mode, remove any MemProf related
1728 // attributes and metadata, as we don't know whether we are linking with
1729 // a library containing the necessary interfaces.
1730 if (Phase == ThinOrFullLTOPhase::None)
1731 MPM.addPass(Pass: MemProfRemoveInfo());
1732
1733 // Convert @llvm.global.annotations to !annotation metadata.
1734 MPM.addPass(Pass: Annotation2MetadataPass());
1735
1736 // Force any function attributes we want the rest of the pipeline to observe.
1737 MPM.addPass(Pass: ForceFunctionAttrsPass());
1738
1739 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1740 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass()));
1741
1742 // Apply module pipeline start EP callback.
1743 invokePipelineStartEPCallbacks(MPM, Level);
1744
1745 // Add the core simplification pipeline.
1746 MPM.addPass(Pass: buildModuleSimplificationPipeline(Level, Phase));
1747
1748 // Now add the optimization pipeline.
1749 MPM.addPass(Pass: buildModuleOptimizationPipeline(Level, LTOPhase: Phase));
1750
1751 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1752 PGOOpt->Action == PGOOptions::SampleUse)
1753 MPM.addPass(Pass: PseudoProbeUpdatePass());
1754
1755 // Emit annotation remarks.
1756 addAnnotationRemarksPass(MPM);
1757
1758 if (isLTOPreLink(Phase))
1759 addRequiredLTOPreLinkPasses(MPM);
1760 return MPM;
1761}
1762
1763ModulePassManager
1764PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
1765 bool EmitSummary) {
1766 ModulePassManager MPM;
1767 if (ThinLTO)
1768 MPM.addPass(Pass: buildThinLTOPreLinkDefaultPipeline(Level));
1769 else
1770 MPM.addPass(Pass: buildLTOPreLinkDefaultPipeline(Level));
1771 MPM.addPass(Pass: EmbedBitcodePass(ThinLTO, EmitSummary));
1772
1773 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1774 // like removing CFI/WPD related instructions. Note, we reuse
1775 // LowerTypeTestsPass to clean up type tests rather than duplicate that logic
1776 // in FatLtoCleanup.
1777 MPM.addPass(Pass: FatLtoCleanup());
1778
1779 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1780 // object code, only in the bitcode section, so drop it before we run
1781 // module optimization and generate machine code. If llvm.type.test() isn't in
1782 // the IR, this won't do anything.
1783 MPM.addPass(
1784 Pass: LowerTypeTestsPass(nullptr, nullptr, lowertypetests::DropTestKind::All));
1785
1786 // Use the ThinLTO post-link pipeline with sample profiling
1787 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1788 MPM.addPass(Pass: buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1789 else {
1790 // ModuleSimplification does not run the coroutine passes for
1791 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1792 // builds, otherwise they will miscompile.
1793 if (ThinLTO) {
1794 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1795 // consideration.
1796 CGSCCPassManager CGPM;
1797 CGPM.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0));
1798 CGPM.addPass(Pass: CoroAnnotationElidePass());
1799 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
1800 MPM.addPass(Pass: CoroCleanupPass());
1801 }
1802
1803 // otherwise, just use module optimization
1804 MPM.addPass(
1805 Pass: buildModuleOptimizationPipeline(Level, LTOPhase: ThinOrFullLTOPhase::None));
1806 // Emit annotation remarks.
1807 addAnnotationRemarksPass(MPM);
1808 }
1809 return MPM;
1810}
1811
1812ModulePassManager
1813PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1814 if (Level == OptimizationLevel::O0)
1815 return buildO0DefaultPipeline(Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink);
1816
1817 ModulePassManager MPM;
1818
1819 // Convert @llvm.global.annotations to !annotation metadata.
1820 MPM.addPass(Pass: Annotation2MetadataPass());
1821
1822 // Force any function attributes we want the rest of the pipeline to observe.
1823 MPM.addPass(Pass: ForceFunctionAttrsPass());
1824
1825 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1826 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass()));
1827
1828 // Apply module pipeline start EP callback.
1829 invokePipelineStartEPCallbacks(MPM, Level);
1830
1831 // If we are planning to perform ThinLTO later, we don't bloat the code with
1832 // unrolling/vectorization/... now. Just simplify the module as much as we
1833 // can.
1834 MPM.addPass(Pass: buildModuleSimplificationPipeline(
1835 Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink));
1836 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1837 // thinlto use the contextual info to perform imports; then use the contextual
1838 // profile in the post-thinlink phase.
1839 if (!UseCtxProfile.empty()) {
1840 addRequiredLTOPreLinkPasses(MPM);
1841 return MPM;
1842 }
1843
1844 // Run partial inlining pass to partially inline functions that have
1845 // large bodies.
1846 // FIXME: It isn't clear whether this is really the right place to run this
1847 // in ThinLTO. Because there is another canonicalization and simplification
1848 // phase that will run after the thin link, running this here ends up with
1849 // less information than will be available later and it may grow functions in
1850 // ways that aren't beneficial.
1851 if (RunPartialInlining)
1852 MPM.addPass(Pass: PartialInlinerPass());
1853
1854 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1855 PGOOpt->Action == PGOOptions::SampleUse)
1856 MPM.addPass(Pass: PseudoProbeUpdatePass());
1857
1858 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1859 // optimization is going to be done in PostLink stage, but clang can't add
1860 // callbacks there in case of in-process ThinLTO called by linker.
1861 invokeOptimizerEarlyEPCallbacks(MPM, Level,
1862 /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
1863 invokeOptimizerLastEPCallbacks(MPM, Level,
1864 /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
1865
1866 // Emit annotation remarks.
1867 addAnnotationRemarksPass(MPM);
1868
1869 addRequiredLTOPreLinkPasses(MPM);
1870
1871 return MPM;
1872}
1873
1874ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
1875 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1876 ModulePassManager MPM;
1877
1878 // If we are invoking this without a summary index noting that we are linking
1879 // with a library containing the necessary APIs, remove any MemProf related
1880 // attributes and metadata.
1881 if (!ImportSummary || !ImportSummary->withSupportsHotColdNew())
1882 MPM.addPass(Pass: MemProfRemoveInfo());
1883
1884 if (ImportSummary) {
1885 // For ThinLTO we must apply the context disambiguation decisions early, to
1886 // ensure we can correctly match the callsites to summary data.
1887 if (EnableMemProfContextDisambiguation)
1888 MPM.addPass(Pass: MemProfContextDisambiguation(
1889 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1890
1891 // These passes import type identifier resolutions for whole-program
1892 // devirtualization and CFI. They must run early because other passes may
1893 // disturb the specific instruction patterns that these passes look for,
1894 // creating dependencies on resolutions that may not appear in the summary.
1895 //
1896 // For example, GVN may transform the pattern assume(type.test) appearing in
1897 // two basic blocks into assume(phi(type.test, type.test)), which would
1898 // transform a dependency on a WPD resolution into a dependency on a type
1899 // identifier resolution for CFI.
1900 //
1901 // Also, WPD has access to more precise information than ICP and can
1902 // devirtualize more effectively, so it should operate on the IR first.
1903 //
1904 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1905 // metadata and intrinsics.
1906 MPM.addPass(Pass: WholeProgramDevirtPass(nullptr, ImportSummary));
1907 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, ImportSummary));
1908 }
1909
1910 if (Level == OptimizationLevel::O0) {
1911 // Run a second time to clean up any type tests left behind by WPD for use
1912 // in ICP.
1913 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr,
1914 lowertypetests::DropTestKind::Assume));
1915 MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::ThinLTOPostLink));
1916
1917 // AllocToken transforms heap allocation calls; this needs to run late after
1918 // other allocation call transformations (such as those in InstCombine).
1919 MPM.addPass(Pass: AllocTokenPass());
1920
1921 // Drop available_externally and unreferenced globals. This is necessary
1922 // with ThinLTO in order to avoid leaving undefined references to dead
1923 // globals in the object file.
1924 MPM.addPass(Pass: EliminateAvailableExternallyPass());
1925 MPM.addPass(Pass: GlobalDCEPass());
1926 return MPM;
1927 }
1928 if (!UseCtxProfile.empty()) {
1929 MPM.addPass(
1930 Pass: buildModuleInlinerPipeline(Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink));
1931 } else {
1932 // Add the core simplification pipeline.
1933 MPM.addPass(Pass: buildModuleSimplificationPipeline(
1934 Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink));
1935 }
1936 // Now add the optimization pipeline.
1937 MPM.addPass(Pass: buildModuleOptimizationPipeline(
1938 Level, LTOPhase: ThinOrFullLTOPhase::ThinLTOPostLink));
1939
1940 // Emit annotation remarks.
1941 addAnnotationRemarksPass(MPM);
1942
1943 return MPM;
1944}
1945
1946ModulePassManager
1947PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1948 // FIXME: We should use a customized pre-link pipeline!
1949 return buildPerModuleDefaultPipeline(Level,
1950 Phase: ThinOrFullLTOPhase::FullLTOPreLink);
1951}
1952
1953ModulePassManager
1954PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
1955 ModuleSummaryIndex *ExportSummary) {
1956 ModulePassManager MPM;
1957
1958 invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level);
1959
1960 // If we are invoking this without a summary index noting that we are linking
1961 // with a library containing the necessary APIs, remove any MemProf related
1962 // attributes and metadata.
1963 if (!ExportSummary || !ExportSummary->withSupportsHotColdNew())
1964 MPM.addPass(Pass: MemProfRemoveInfo());
1965
1966 // Create a function that performs CFI checks for cross-DSO calls with targets
1967 // in the current module.
1968 MPM.addPass(Pass: CrossDSOCFIPass());
1969
1970 if (Level == OptimizationLevel::O0) {
1971 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1972 // metadata and intrinsics.
1973 MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr));
1974 MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr));
1975 // Run a second time to clean up any type tests left behind by WPD for use
1976 // in ICP.
1977 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr,
1978 lowertypetests::DropTestKind::Assume));
1979
1980 MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::FullLTOPostLink));
1981
1982 // AllocToken transforms heap allocation calls; this needs to run late after
1983 // other allocation call transformations (such as those in InstCombine).
1984 MPM.addPass(Pass: AllocTokenPass());
1985
1986 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1987
1988 // Emit annotation remarks.
1989 addAnnotationRemarksPass(MPM);
1990
1991 return MPM;
1992 }
1993
1994 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1995 // Load sample profile before running the LTO optimization pipeline.
1996 MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile,
1997 PGOOpt->ProfileRemappingFile,
1998 ThinOrFullLTOPhase::FullLTOPostLink));
1999 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2000 // RequireAnalysisPass for PSI before subsequent non-module passes.
2001 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
2002 }
2003
2004 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
2005 MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
2006
2007 // Remove unused virtual tables to improve the quality of code generated by
2008 // whole-program devirtualization and bitset lowering.
2009 MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true));
2010
2011 // Do basic inference of function attributes from known properties of system
2012 // libraries and other oracles.
2013 MPM.addPass(Pass: InferFunctionAttrsPass());
2014
2015 if (Level.getSpeedupLevel() > 1) {
2016 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2017 Pass: CallSiteSplittingPass(), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2018
2019 // Indirect call promotion. This should promote all the targets that are
2020 // left by the earlier promotion pass that promotes intra-module targets.
2021 // This two-step promotion is to save the compile time. For LTO, it should
2022 // produce the same result as if we only do promotion here.
2023 MPM.addPass(Pass: PGOIndirectCallPromotion(
2024 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2025
2026 // Promoting by-reference arguments to by-value exposes more constants to
2027 // IPSCCP.
2028 CGSCCPassManager CGPM;
2029 CGPM.addPass(Pass: PostOrderFunctionAttrsPass());
2030 CGPM.addPass(Pass: ArgumentPromotionPass());
2031 CGPM.addPass(
2032 Pass: createCGSCCToFunctionPassAdaptor(Pass: SROAPass(SROAOptions::ModifyCFG)));
2033 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2034
2035 // Propagate constants at call sites into the functions they call. This
2036 // opens opportunities for globalopt (and inlining) by substituting function
2037 // pointers passed as arguments to direct uses of functions.
2038 MPM.addPass(Pass: IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
2039 Level != OptimizationLevel::Os &&
2040 Level != OptimizationLevel::Oz)));
2041
2042 // Attach metadata to indirect call sites indicating the set of functions
2043 // they may target at run-time. This should follow IPSCCP.
2044 MPM.addPass(Pass: CalledValuePropagationPass());
2045 }
2046
2047 // Do RPO function attribute inference across the module to forward-propagate
2048 // attributes where applicable.
2049 // FIXME: Is this really an optimization rather than a canonicalization?
2050 MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass());
2051
2052 // Use in-range annotations on GEP indices to split globals where beneficial.
2053 MPM.addPass(Pass: GlobalSplitPass());
2054
2055 // Run whole program optimization of virtual call when the list of callees
2056 // is fixed.
2057 MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr));
2058
2059 MPM.addPass(Pass: NoRecurseLTOInferencePass());
2060 // Stop here at -O1.
2061 if (Level == OptimizationLevel::O1) {
2062 // The LowerTypeTestsPass needs to run to lower type metadata and the
2063 // type.test intrinsics. The pass does nothing if CFI is disabled.
2064 MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr));
2065 // Run a second time to clean up any type tests left behind by WPD for use
2066 // in ICP (which is performed earlier than this in the regular LTO
2067 // pipeline).
2068 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr,
2069 lowertypetests::DropTestKind::Assume));
2070
2071 MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::FullLTOPostLink));
2072
2073 // AllocToken transforms heap allocation calls; this needs to run late after
2074 // other allocation call transformations (such as those in InstCombine).
2075 MPM.addPass(Pass: AllocTokenPass());
2076
2077 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
2078
2079 // Emit annotation remarks.
2080 addAnnotationRemarksPass(MPM);
2081
2082 return MPM;
2083 }
2084
2085 // TODO: Skip to match buildCoroWrapper.
2086 MPM.addPass(Pass: CoroEarlyPass());
2087
2088 // Optimize globals to try and fold them into constants.
2089 MPM.addPass(Pass: GlobalOptPass());
2090
2091 // Promote any localized globals to SSA registers.
2092 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: PromotePass()));
2093
2094 // Linking modules together can lead to duplicate global constant, only
2095 // keep one copy of each constant.
2096 MPM.addPass(Pass: ConstantMergePass());
2097
2098 // Remove unused arguments from functions.
2099 MPM.addPass(Pass: DeadArgumentEliminationPass());
2100
2101 // Reduce the code after globalopt and ipsccp. Both can open up significant
2102 // simplification opportunities, and both can propagate functions through
2103 // function pointers. When this happens, we often have to resolve varargs
2104 // calls, etc, so let instcombine do this.
2105 FunctionPassManager PeepholeFPM;
2106 PeepholeFPM.addPass(Pass: InstCombinePass());
2107 if (Level.getSpeedupLevel() > 1)
2108 PeepholeFPM.addPass(Pass: AggressiveInstCombinePass());
2109 invokePeepholeEPCallbacks(FPM&: PeepholeFPM, Level);
2110
2111 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(PeepholeFPM),
2112 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2113
2114 // Lower variadic functions for supported targets prior to inlining.
2115 MPM.addPass(Pass: ExpandVariadicsPass(ExpandVariadicsMode::Optimize));
2116
2117 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2118 // generally clean up exception handling overhead. It isn't clear this is
2119 // valuable as the inliner doesn't currently care whether it is inlining an
2120 // invoke or a call.
2121 // Run the inliner now.
2122 if (EnableModuleInliner) {
2123 MPM.addPass(Pass: ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
2124 UseInlineAdvisor,
2125 ThinOrFullLTOPhase::FullLTOPostLink));
2126 } else {
2127 MPM.addPass(Pass: ModuleInlinerWrapperPass(
2128 getInlineParamsFromOptLevel(Level),
2129 /* MandatoryFirst */ true,
2130 InlineContext{.LTOPhase: ThinOrFullLTOPhase::FullLTOPostLink,
2131 .Pass: InlinePass::CGSCCInliner}));
2132 }
2133
2134 // Perform context disambiguation after inlining, since that would reduce the
2135 // amount of additional cloning required to distinguish the allocation
2136 // contexts.
2137 if (EnableMemProfContextDisambiguation)
2138 MPM.addPass(Pass: MemProfContextDisambiguation(
2139 /*Summary=*/nullptr,
2140 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2141
2142 // Optimize globals again after we ran the inliner.
2143 MPM.addPass(Pass: GlobalOptPass());
2144
2145 // Run the OpenMPOpt pass again after global optimizations.
2146 MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
2147
2148 // Garbage collect dead functions.
2149 MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true));
2150
2151 // If we didn't decide to inline a function, check to see if we can
2152 // transform it to pass arguments by value instead of by reference.
2153 CGSCCPassManager CGPM;
2154 CGPM.addPass(Pass: ArgumentPromotionPass());
2155 CGPM.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0));
2156 CGPM.addPass(Pass: CoroAnnotationElidePass());
2157 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2158
2159 FunctionPassManager FPM;
2160 // The IPO Passes may leave cruft around. Clean up after them.
2161 FPM.addPass(Pass: InstCombinePass());
2162 invokePeepholeEPCallbacks(FPM, Level);
2163
2164 if (EnableConstraintElimination)
2165 FPM.addPass(Pass: ConstraintEliminationPass());
2166
2167 FPM.addPass(Pass: JumpThreadingPass());
2168
2169 // Do a post inline PGO instrumentation and use pass. This is a context
2170 // sensitive PGO pass.
2171 if (PGOOpt) {
2172 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2173 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2174 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
2175 ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
2176 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2177 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2178 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
2179 ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
2180 }
2181
2182 // Break up allocas
2183 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
2184
2185 // LTO provides additional opportunities for tailcall elimination due to
2186 // link-time inlining, and visibility of nocapture attribute.
2187 FPM.addPass(
2188 Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2189
2190 // Run a few AA driver optimizations here and now to cleanup the code.
2191 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM),
2192 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2193
2194 MPM.addPass(
2195 Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: PostOrderFunctionAttrsPass()));
2196
2197 // Require the GlobalsAA analysis for the module so we can query it within
2198 // MainFPM.
2199 if (EnableGlobalAnalyses) {
2200 MPM.addPass(Pass: RequireAnalysisPass<GlobalsAA, Module>());
2201 // Invalidate AAManager so it can be recreated and pick up the newly
2202 // available GlobalsAA.
2203 MPM.addPass(
2204 Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>()));
2205 }
2206
2207 FunctionPassManager MainFPM;
2208 MainFPM.addPass(Pass: createFunctionToLoopPassAdaptor(
2209 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2210 /*AllowSpeculation=*/true),
2211 /*USeMemorySSA=*/UseMemorySSA: true));
2212
2213 if (RunNewGVN)
2214 MainFPM.addPass(Pass: NewGVNPass());
2215 else
2216 MainFPM.addPass(Pass: GVNPass());
2217
2218 // Remove dead memcpy()'s.
2219 MainFPM.addPass(Pass: MemCpyOptPass());
2220
2221 // Nuke dead stores.
2222 MainFPM.addPass(Pass: DSEPass());
2223 MainFPM.addPass(Pass: MoveAutoInitPass());
2224 MainFPM.addPass(Pass: MergedLoadStoreMotionPass());
2225
2226 invokeVectorizerStartEPCallbacks(FPM&: MainFPM, Level);
2227
2228 LoopPassManager LPM;
2229 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2230 LPM.addPass(Pass: LoopFlattenPass());
2231 LPM.addPass(Pass: IndVarSimplifyPass());
2232 LPM.addPass(Pass: LoopDeletionPass());
2233 // FIXME: Add loop interchange.
2234
2235 // Unroll small loops and perform peeling.
2236 LPM.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(),
2237 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2238 PTO.ForgetAllSCEVInLoopUnroll));
2239 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2240 // *All* loop passes must preserve it, in order to be able to use it.
2241 MainFPM.addPass(
2242 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /*UseMemorySSA=*/false));
2243
2244 MainFPM.addPass(Pass: LoopDistributePass());
2245
2246 addVectorPasses(Level, FPM&: MainFPM, LTOPhase: ThinOrFullLTOPhase::FullLTOPostLink);
2247
2248 invokeVectorizerEndEPCallbacks(FPM&: MainFPM, Level);
2249
2250 // Run the OpenMPOpt CGSCC pass again late.
2251 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(
2252 Pass: OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink)));
2253
2254 invokePeepholeEPCallbacks(FPM&: MainFPM, Level);
2255 MainFPM.addPass(Pass: JumpThreadingPass());
2256 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(MainFPM),
2257 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2258
2259 // Lower type metadata and the type.test intrinsic. This pass supports
2260 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2261 // to be run at link time if CFI is enabled. This pass does nothing if
2262 // CFI is disabled.
2263 MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr));
2264 // Run a second time to clean up any type tests left behind by WPD for use
2265 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2266 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr,
2267 lowertypetests::DropTestKind::Assume));
2268
2269 // Enable splitting late in the FullLTO post-link pipeline.
2270 if (EnableHotColdSplit)
2271 MPM.addPass(Pass: HotColdSplittingPass());
2272
2273 // Add late LTO optimization passes.
2274 FunctionPassManager LateFPM;
2275
2276 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2277 // canonicalization pass that enables other optimizations. As a result,
2278 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2279 // result too early.
2280 LateFPM.addPass(Pass: LoopSinkPass());
2281
2282 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2283 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2284 // flattening of blocks.
2285 LateFPM.addPass(Pass: DivRemPairsPass());
2286
2287 // Delete basic blocks, which optimization passes may have killed.
2288 LateFPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions()
2289 .convertSwitchRangeToICmp(B: true)
2290 .convertSwitchToArithmetic(B: true)
2291 .hoistCommonInsts(B: true)
2292 .speculateUnpredictables(B: true)));
2293 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(LateFPM)));
2294
2295 // Drop bodies of available eternally objects to improve GlobalDCE.
2296 MPM.addPass(Pass: EliminateAvailableExternallyPass());
2297
2298 // Now that we have optimized the program, discard unreachable functions.
2299 MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true));
2300
2301 if (PTO.MergeFunctions)
2302 MPM.addPass(Pass: MergeFunctionsPass());
2303
2304 MPM.addPass(Pass: RelLookupTableConverterPass());
2305
2306 if (PTO.CallGraphProfile)
2307 MPM.addPass(Pass: CGProfilePass(/*InLTOPostLink=*/true));
2308
2309 MPM.addPass(Pass: CoroCleanupPass());
2310
2311 // AllocToken transforms heap allocation calls; this needs to run late after
2312 // other allocation call transformations (such as those in InstCombine).
2313 MPM.addPass(Pass: AllocTokenPass());
2314
2315 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
2316
2317 // Emit annotation remarks.
2318 addAnnotationRemarksPass(MPM);
2319
2320 return MPM;
2321}
2322
2323ModulePassManager
2324PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
2325 ThinOrFullLTOPhase Phase) {
2326 assert(Level == OptimizationLevel::O0 &&
2327 "buildO0DefaultPipeline should only be used with O0");
2328
2329 ModulePassManager MPM;
2330
2331 // Perform pseudo probe instrumentation in O0 mode. This is for the
2332 // consistency between different build modes. For example, a LTO build can be
2333 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2334 // the postlink will require pseudo probe instrumentation in the prelink.
2335 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2336 MPM.addPass(Pass: SampleProfileProbePass(TM));
2337
2338 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2339 PGOOpt->Action == PGOOptions::IRUse))
2340 addPGOInstrPassesForO0(
2341 MPM,
2342 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2343 /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, ProfileFile: PGOOpt->ProfileFile,
2344 ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
2345
2346 // Instrument function entry and exit before all inlining.
2347 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2348 Pass: EntryExitInstrumenterPass(/*PostInlining=*/false)));
2349
2350 invokePipelineStartEPCallbacks(MPM, Level);
2351
2352 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2353 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass()));
2354
2355 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2356 // Explicitly disable sample loader inlining and use flattened profile in O0
2357 // pipeline.
2358 MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile,
2359 PGOOpt->ProfileRemappingFile,
2360 ThinOrFullLTOPhase::None, FS,
2361 /*DisableSampleProfileInlining=*/true,
2362 /*UseFlattenedProfile=*/true));
2363 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2364 // RequireAnalysisPass for PSI before subsequent non-module passes.
2365 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
2366 }
2367
2368 invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase);
2369
2370 // Build a minimal pipeline based on the semantics required by LLVM,
2371 // which is just that always inlining occurs. Further, disable generating
2372 // lifetime intrinsics to avoid enabling further optimizations during
2373 // code generation.
2374 MPM.addPass(Pass: AlwaysInlinerPass(
2375 /*InsertLifetimeIntrinsics=*/false));
2376
2377 if (PTO.MergeFunctions)
2378 MPM.addPass(Pass: MergeFunctionsPass());
2379
2380 if (EnableMatrix)
2381 MPM.addPass(
2382 Pass: createModuleToFunctionPassAdaptor(Pass: LowerMatrixIntrinsicsPass(true)));
2383
2384 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2385 CGSCCPassManager CGPM;
2386 invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level);
2387 if (!CGPM.isEmpty())
2388 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2389 }
2390 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2391 LoopPassManager LPM;
2392 invokeLateLoopOptimizationsEPCallbacks(LPM, Level);
2393 if (!LPM.isEmpty()) {
2394 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2395 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM))));
2396 }
2397 }
2398 if (!LoopOptimizerEndEPCallbacks.empty()) {
2399 LoopPassManager LPM;
2400 invokeLoopOptimizerEndEPCallbacks(LPM, Level);
2401 if (!LPM.isEmpty()) {
2402 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2403 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM))));
2404 }
2405 }
2406 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2407 FunctionPassManager FPM;
2408 invokeScalarOptimizerLateEPCallbacks(FPM, Level);
2409 if (!FPM.isEmpty())
2410 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2411 }
2412
2413 invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase);
2414
2415 if (!VectorizerStartEPCallbacks.empty()) {
2416 FunctionPassManager FPM;
2417 invokeVectorizerStartEPCallbacks(FPM, Level);
2418 if (!FPM.isEmpty())
2419 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2420 }
2421
2422 if (!VectorizerEndEPCallbacks.empty()) {
2423 FunctionPassManager FPM;
2424 invokeVectorizerEndEPCallbacks(FPM, Level);
2425 if (!FPM.isEmpty())
2426 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2427 }
2428
2429 MPM.addPass(Pass: buildCoroWrapper(Phase));
2430
2431 // AllocToken transforms heap allocation calls; this needs to run late after
2432 // other allocation call transformations (such as those in InstCombine).
2433 if (!isLTOPreLink(Phase))
2434 MPM.addPass(Pass: AllocTokenPass());
2435
2436 invokeOptimizerLastEPCallbacks(MPM, Level, Phase);
2437
2438 if (isLTOPreLink(Phase))
2439 addRequiredLTOPreLinkPasses(MPM);
2440
2441 // Emit annotation remarks.
2442 addAnnotationRemarksPass(MPM);
2443
2444 return MPM;
2445}
2446
2447AAManager PassBuilder::buildDefaultAAPipeline() {
2448 AAManager AA;
2449
2450 // The order in which these are registered determines their priority when
2451 // being queried.
2452
2453 // Add any target-specific alias analyses that should be run early.
2454 if (TM)
2455 TM->registerEarlyDefaultAliasAnalyses(AA);
2456
2457 // First we register the basic alias analysis that provides the majority of
2458 // per-function local AA logic. This is a stateless, on-demand local set of
2459 // AA techniques.
2460 AA.registerFunctionAnalysis<BasicAA>();
2461
2462 // Next we query fast, specialized alias analyses that wrap IR-embedded
2463 // information about aliasing.
2464 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2465 AA.registerFunctionAnalysis<TypeBasedAA>();
2466
2467 // Add support for querying global aliasing information when available.
2468 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2469 // analysis, all that the `AAManager` can do is query for any *cached*
2470 // results from `GlobalsAA` through a readonly proxy.
2471 if (EnableGlobalAnalyses)
2472 AA.registerModuleAnalysis<GlobalsAA>();
2473
2474 // Add target-specific alias analyses.
2475 if (TM)
2476 TM->registerDefaultAliasAnalyses(AA);
2477
2478 return AA;
2479}
2480
2481bool PassBuilder::isInstrumentedPGOUse() const {
2482 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2483 !UseCtxProfile.empty();
2484}
2485