1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
18#include "llvm/Analysis/AliasAnalysis.h"
19#include "llvm/Analysis/BasicAliasAnalysis.h"
20#include "llvm/Analysis/CGSCCPassManager.h"
21#include "llvm/Analysis/CtxProfAnalysis.h"
22#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
23#include "llvm/Analysis/GlobalsModRef.h"
24#include "llvm/Analysis/InlineAdvisor.h"
25#include "llvm/Analysis/InstCount.h"
26#include "llvm/Analysis/ProfileSummaryInfo.h"
27#include "llvm/Analysis/ScopedNoAliasAA.h"
28#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
29#include "llvm/IR/PassManager.h"
30#include "llvm/Pass.h"
31#include "llvm/Passes/OptimizationLevel.h"
32#include "llvm/Passes/PassBuilder.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/PGOOptions.h"
36#include "llvm/Support/VirtualFileSystem.h"
37#include "llvm/Target/TargetMachine.h"
38#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
39#include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"
40#include "llvm/Transforms/Coroutines/CoroCleanup.h"
41#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
42#include "llvm/Transforms/Coroutines/CoroEarly.h"
43#include "llvm/Transforms/Coroutines/CoroElide.h"
44#include "llvm/Transforms/Coroutines/CoroSplit.h"
45#include "llvm/Transforms/HipStdPar/HipStdPar.h"
46#include "llvm/Transforms/IPO/AlwaysInliner.h"
47#include "llvm/Transforms/IPO/Annotation2Metadata.h"
48#include "llvm/Transforms/IPO/ArgumentPromotion.h"
49#include "llvm/Transforms/IPO/Attributor.h"
50#include "llvm/Transforms/IPO/CalledValuePropagation.h"
51#include "llvm/Transforms/IPO/ConstantMerge.h"
52#include "llvm/Transforms/IPO/CrossDSOCFI.h"
53#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
54#include "llvm/Transforms/IPO/ElimAvailExtern.h"
55#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
56#include "llvm/Transforms/IPO/ExpandVariadics.h"
57#include "llvm/Transforms/IPO/FatLTOCleanup.h"
58#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
59#include "llvm/Transforms/IPO/FunctionAttrs.h"
60#include "llvm/Transforms/IPO/GlobalDCE.h"
61#include "llvm/Transforms/IPO/GlobalOpt.h"
62#include "llvm/Transforms/IPO/GlobalSplit.h"
63#include "llvm/Transforms/IPO/HotColdSplitting.h"
64#include "llvm/Transforms/IPO/IROutliner.h"
65#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
66#include "llvm/Transforms/IPO/Inliner.h"
67#include "llvm/Transforms/IPO/LowerTypeTests.h"
68#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
69#include "llvm/Transforms/IPO/MergeFunctions.h"
70#include "llvm/Transforms/IPO/ModuleInliner.h"
71#include "llvm/Transforms/IPO/OpenMPOpt.h"
72#include "llvm/Transforms/IPO/PartialInlining.h"
73#include "llvm/Transforms/IPO/SCCP.h"
74#include "llvm/Transforms/IPO/SampleProfile.h"
75#include "llvm/Transforms/IPO/SampleProfileProbe.h"
76#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
77#include "llvm/Transforms/InstCombine/InstCombine.h"
78#include "llvm/Transforms/Instrumentation/AllocToken.h"
79#include "llvm/Transforms/Instrumentation/CGProfile.h"
80#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
81#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
82#include "llvm/Transforms/Instrumentation/MemProfInstrumentation.h"
83#include "llvm/Transforms/Instrumentation/MemProfUse.h"
84#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
85#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
86#include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
87#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
88#include "llvm/Transforms/Scalar/ADCE.h"
89#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
90#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
91#include "llvm/Transforms/Scalar/BDCE.h"
92#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
93#include "llvm/Transforms/Scalar/ConstraintElimination.h"
94#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
95#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
96#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
97#include "llvm/Transforms/Scalar/DivRemPairs.h"
98#include "llvm/Transforms/Scalar/DropUnnecessaryAssumes.h"
99#include "llvm/Transforms/Scalar/EarlyCSE.h"
100#include "llvm/Transforms/Scalar/ExpandMemCmp.h"
101#include "llvm/Transforms/Scalar/Float2Int.h"
102#include "llvm/Transforms/Scalar/GVN.h"
103#include "llvm/Transforms/Scalar/IndVarSimplify.h"
104#include "llvm/Transforms/Scalar/InferAlignment.h"
105#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
106#include "llvm/Transforms/Scalar/JumpTableToSwitch.h"
107#include "llvm/Transforms/Scalar/JumpThreading.h"
108#include "llvm/Transforms/Scalar/LICM.h"
109#include "llvm/Transforms/Scalar/LoopDeletion.h"
110#include "llvm/Transforms/Scalar/LoopDistribute.h"
111#include "llvm/Transforms/Scalar/LoopFlatten.h"
112#include "llvm/Transforms/Scalar/LoopFuse.h"
113#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
114#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
115#include "llvm/Transforms/Scalar/LoopInterchange.h"
116#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
117#include "llvm/Transforms/Scalar/LoopPassManager.h"
118#include "llvm/Transforms/Scalar/LoopRotation.h"
119#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
120#include "llvm/Transforms/Scalar/LoopSink.h"
121#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
122#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
123#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
124#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
125#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
126#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
127#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
128#include "llvm/Transforms/Scalar/MergeICmps.h"
129#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
130#include "llvm/Transforms/Scalar/NewGVN.h"
131#include "llvm/Transforms/Scalar/Reassociate.h"
132#include "llvm/Transforms/Scalar/SCCP.h"
133#include "llvm/Transforms/Scalar/SROA.h"
134#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
135#include "llvm/Transforms/Scalar/SimplifyCFG.h"
136#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
137#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
138#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
139#include "llvm/Transforms/Utils/AddDiscriminators.h"
140#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
141#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
142#include "llvm/Transforms/Utils/CountVisits.h"
143#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
144#include "llvm/Transforms/Utils/ExtraPassManager.h"
145#include "llvm/Transforms/Utils/InjectTLIMappings.h"
146#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
147#include "llvm/Transforms/Utils/Mem2Reg.h"
148#include "llvm/Transforms/Utils/MoveAutoInit.h"
149#include "llvm/Transforms/Utils/NameAnonGlobals.h"
150#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
151#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
152#include "llvm/Transforms/Vectorize/LoopVectorize.h"
153#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
154#include "llvm/Transforms/Vectorize/VectorCombine.h"
155
156using namespace llvm;
157
158namespace llvm {
159
160static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
161 "enable-ml-inliner", cl::init(Val: InliningAdvisorMode::Default), cl::Hidden,
162 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
163 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
164 "Heuristics-based inliner version"),
165 clEnumValN(InliningAdvisorMode::Development, "development",
166 "Use development mode (runtime-loadable model)"),
167 clEnumValN(InliningAdvisorMode::Release, "release",
168 "Use release mode (AOT-compiled model)")));
169
170/// Flag to enable inline deferral during PGO.
171static cl::opt<bool>
172 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(Val: true),
173 cl::Hidden,
174 cl::desc("Enable inline deferral during PGO"));
175
176static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
177 cl::init(Val: false), cl::Hidden,
178 cl::desc("Enable module inliner"));
179
180static cl::opt<bool> PerformMandatoryInliningsFirst(
181 "mandatory-inlining-first", cl::init(Val: false), cl::Hidden,
182 cl::desc("Perform mandatory inlinings module-wide, before performing "
183 "inlining"));
184
185static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
186 "eagerly-invalidate-analyses", cl::init(Val: true), cl::Hidden,
187 cl::desc("Eagerly invalidate more analyses in default pipelines"));
188
189static cl::opt<bool> EnableMergeFunctions(
190 "enable-merge-functions", cl::init(Val: false), cl::Hidden,
191 cl::desc("Enable function merging as part of the optimization pipeline"));
192
193static cl::opt<bool> EnablePostPGOLoopRotation(
194 "enable-post-pgo-loop-rotation", cl::init(Val: true), cl::Hidden,
195 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
196
197static cl::opt<bool> EnableGlobalAnalyses(
198 "enable-global-analyses", cl::init(Val: true), cl::Hidden,
199 cl::desc("Enable inter-procedural analyses"));
200
201static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
202 cl::init(Val: false), cl::Hidden,
203 cl::desc("Run Partial inlining pass"));
204
205static cl::opt<bool> ExtraVectorizerPasses(
206 "extra-vectorizer-passes", cl::init(Val: false), cl::Hidden,
207 cl::desc("Run cleanup optimization passes after vectorization"));
208
209static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(Val: false), cl::Hidden,
210 cl::desc("Run the NewGVN pass"));
211
212static cl::opt<bool>
213 EnableLoopInterchange("enable-loopinterchange", cl::init(Val: false), cl::Hidden,
214 cl::desc("Enable the LoopInterchange Pass"));
215
216static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
217 cl::init(Val: false), cl::Hidden,
218 cl::desc("Enable Unroll And Jam Pass"));
219
220static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(Val: false),
221 cl::Hidden,
222 cl::desc("Enable the LoopFlatten Pass"));
223
224static cl::opt<bool>
225 EnableDFAJumpThreading("enable-dfa-jump-thread",
226 cl::desc("Enable DFA jump threading"),
227 cl::init(Val: false), cl::Hidden);
228
229static cl::opt<bool>
230 EnableHotColdSplit("hot-cold-split",
231 cl::desc("Enable hot-cold splitting pass"));
232
233static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(Val: false),
234 cl::Hidden,
235 cl::desc("Enable ir outliner pass"));
236
237static cl::opt<bool>
238 DisablePreInliner("disable-preinline", cl::init(Val: false), cl::Hidden,
239 cl::desc("Disable pre-instrumentation inliner"));
240
241static cl::opt<int> PreInlineThreshold(
242 "preinline-threshold", cl::Hidden, cl::init(Val: 75),
243 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
244 "(default = 75)"));
245
246static cl::opt<bool>
247 EnableGVNHoist("enable-gvn-hoist",
248 cl::desc("Enable the GVN hoisting pass (default = off)"));
249
250static cl::opt<bool>
251 EnableGVNSink("enable-gvn-sink",
252 cl::desc("Enable the GVN sinking pass (default = off)"));
253
254static cl::opt<bool> EnableJumpTableToSwitch(
255 "enable-jump-table-to-switch",
256 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
257
258// This option is used in simplifying testing SampleFDO optimizations for
259// profile loading.
260static cl::opt<bool>
261 EnableCHR("enable-chr", cl::init(Val: true), cl::Hidden,
262 cl::desc("Enable control height reduction optimization (CHR)"));
263
264static cl::opt<bool> FlattenedProfileUsed(
265 "flattened-profile-used", cl::init(Val: false), cl::Hidden,
266 cl::desc("Indicate the sample profile being used is flattened, i.e., "
267 "no inline hierarchy exists in the profile"));
268
269static cl::opt<bool>
270 EnableMatrix("enable-matrix", cl::init(Val: false), cl::Hidden,
271 cl::desc("Enable lowering of the matrix intrinsics"));
272
273static cl::opt<bool> EnableMergeICmps(
274 "enable-mergeicmps", cl::init(Val: true), cl::Hidden,
275 cl::desc("Enable MergeICmps pass in the optimization pipeline"));
276
277static cl::opt<bool> EnableConstraintElimination(
278 "enable-constraint-elimination", cl::init(Val: true), cl::Hidden,
279 cl::desc(
280 "Enable pass to eliminate conditions based on linear constraints"));
281
282static cl::opt<AttributorRunOption> AttributorRun(
283 "attributor-enable", cl::Hidden, cl::init(Val: AttributorRunOption::NONE),
284 cl::desc("Enable the attributor inter-procedural deduction pass"),
285 cl::values(clEnumValN(AttributorRunOption::FULL, "full",
286 "enable all full attributor runs"),
287 clEnumValN(AttributorRunOption::LIGHT, "light",
288 "enable all attributor-light runs"),
289 clEnumValN(AttributorRunOption::MODULE, "module",
290 "enable module-wide attributor runs"),
291 clEnumValN(AttributorRunOption::MODULE_LIGHT, "module-light",
292 "enable module-wide attributor-light runs"),
293 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
294 "enable call graph SCC attributor runs"),
295 clEnumValN(AttributorRunOption::CGSCC_LIGHT, "cgscc-light",
296 "enable call graph SCC attributor-light runs"),
297 clEnumValN(AttributorRunOption::NONE, "none",
298 "disable attributor runs")));
299
300static cl::opt<bool> EnableSampledInstr(
301 "enable-sampled-instrumentation", cl::init(Val: false), cl::Hidden,
302 cl::desc("Enable profile instrumentation sampling (default = off)"));
303static cl::opt<bool> UseLoopVersioningLICM(
304 "enable-loop-versioning-licm", cl::init(Val: false), cl::Hidden,
305 cl::desc("Enable the experimental Loop Versioning LICM pass"));
306
307static cl::opt<std::string> InstrumentColdFuncOnlyPath(
308 "instrument-cold-function-only-path", cl::init(Val: ""),
309 cl::desc("File path for cold function only instrumentation(requires use "
310 "with --pgo-instrument-cold-function-only)"),
311 cl::Hidden);
312
313// TODO: There is a similar flag in WPD pass, we should consolidate them by
314// parsing the option only once in PassBuilder and share it across both places.
315static cl::opt<bool> EnableDevirtualizeSpeculatively(
316 "enable-devirtualize-speculatively",
317 cl::desc("Enable speculative devirtualization optimization"),
318 cl::init(Val: false));
319
320extern cl::opt<std::string> UseCtxProfile;
321extern cl::opt<bool> PGOInstrumentColdFunctionOnly;
322
323extern cl::opt<bool> EnableMemProfContextDisambiguation;
324} // namespace llvm
325
326PipelineTuningOptions::PipelineTuningOptions() {
327 LoopInterleaving = true;
328 LoopVectorization = true;
329 SLPVectorization = false;
330 LoopUnrolling = true;
331 LoopInterchange = EnableLoopInterchange;
332 LoopFusion = false;
333 ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
334 LicmMssaOptCap = SetLicmMssaOptCap;
335 LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
336 CallGraphProfile = true;
337 UnifiedLTO = false;
338 MergeFunctions = EnableMergeFunctions;
339 InlinerThreshold = -1;
340 EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
341 DevirtualizeSpeculatively = EnableDevirtualizeSpeculatively;
342}
343
344namespace llvm {
345extern cl::opt<unsigned> MaxDevirtIterations;
346} // namespace llvm
347
348void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
349 OptimizationLevel Level) {
350 for (auto &C : PeepholeEPCallbacks)
351 C(FPM, Level);
352}
353void PassBuilder::invokeLateLoopOptimizationsEPCallbacks(
354 LoopPassManager &LPM, OptimizationLevel Level) {
355 for (auto &C : LateLoopOptimizationsEPCallbacks)
356 C(LPM, Level);
357}
358void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM,
359 OptimizationLevel Level) {
360 for (auto &C : LoopOptimizerEndEPCallbacks)
361 C(LPM, Level);
362}
363void PassBuilder::invokeScalarOptimizerLateEPCallbacks(
364 FunctionPassManager &FPM, OptimizationLevel Level) {
365 for (auto &C : ScalarOptimizerLateEPCallbacks)
366 C(FPM, Level);
367}
368void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM,
369 OptimizationLevel Level) {
370 for (auto &C : CGSCCOptimizerLateEPCallbacks)
371 C(CGPM, Level);
372}
373void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
374 OptimizationLevel Level) {
375 for (auto &C : VectorizerStartEPCallbacks)
376 C(FPM, Level);
377}
378void PassBuilder::invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM,
379 OptimizationLevel Level) {
380 for (auto &C : VectorizerEndEPCallbacks)
381 C(FPM, Level);
382}
383void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
384 OptimizationLevel Level,
385 ThinOrFullLTOPhase Phase) {
386 for (auto &C : OptimizerEarlyEPCallbacks)
387 C(MPM, Level, Phase);
388}
389void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
390 OptimizationLevel Level,
391 ThinOrFullLTOPhase Phase) {
392 for (auto &C : OptimizerLastEPCallbacks)
393 C(MPM, Level, Phase);
394}
395void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks(
396 ModulePassManager &MPM, OptimizationLevel Level) {
397 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
398 C(MPM, Level);
399}
400void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks(
401 ModulePassManager &MPM, OptimizationLevel Level) {
402 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
403 C(MPM, Level);
404}
405void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM,
406 OptimizationLevel Level) {
407 for (auto &C : PipelineStartEPCallbacks)
408 C(MPM, Level);
409}
410void PassBuilder::invokePipelineEarlySimplificationEPCallbacks(
411 ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase) {
412 for (auto &C : PipelineEarlySimplificationEPCallbacks)
413 C(MPM, Level, Phase);
414}
415
416// Helper to add AnnotationRemarksPass.
417static void addAnnotationRemarksPass(ModulePassManager &MPM) {
418 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AnnotationRemarksPass()));
419 // Count the stats for InstCount and FunctionPropertiesAnalysis
420 if (AreStatisticsEnabled()) {
421 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: InstCountPass()));
422 MPM.addPass(
423 Pass: createModuleToFunctionPassAdaptor(Pass: FunctionPropertiesStatisticsPass()));
424 }
425}
426
427// Helper to check if the current compilation phase is preparing for LTO
428static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
429 return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
430 Phase == ThinOrFullLTOPhase::FullLTOPreLink;
431}
432
433// Helper to check if the current compilation phase is LTO backend
434static bool isLTOPostLink(ThinOrFullLTOPhase Phase) {
435 return Phase == ThinOrFullLTOPhase::ThinLTOPostLink ||
436 Phase == ThinOrFullLTOPhase::FullLTOPostLink;
437}
438
439// Helper to wrap conditionally Coro passes.
440static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase) {
441 // TODO: Skip passes according to Phase.
442 ModulePassManager CoroPM;
443 CoroPM.addPass(Pass: CoroEarlyPass());
444 CGSCCPassManager CGPM;
445 CGPM.addPass(Pass: CoroSplitPass());
446 CoroPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
447 CoroPM.addPass(Pass: CoroCleanupPass());
448 CoroPM.addPass(Pass: GlobalDCEPass());
449 return CoroConditionalWrapper(std::move(CoroPM));
450}
451
452// TODO: Investigate the cost/benefit of tail call elimination on debugging.
453FunctionPassManager
454PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
455 ThinOrFullLTOPhase Phase) {
456
457 FunctionPassManager FPM;
458
459 if (AreStatisticsEnabled())
460 FPM.addPass(Pass: CountVisitsPass());
461
462 // Form SSA out of local memory accesses after breaking apart aggregates into
463 // scalars.
464 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
465
466 // Catch trivial redundancies
467 FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */));
468
469 // Hoisting of scalars and load expressions.
470 FPM.addPass(
471 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
472 FPM.addPass(Pass: InstCombinePass());
473
474 FPM.addPass(Pass: LibCallsShrinkWrapPass());
475
476 invokePeepholeEPCallbacks(FPM, Level);
477
478 FPM.addPass(
479 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
480
481 // Form canonically associated expression trees, and simplify the trees using
482 // basic mathematical properties. For example, this will form (nearly)
483 // minimal multiplication trees.
484 FPM.addPass(Pass: ReassociatePass());
485
486 // Add the primary loop simplification pipeline.
487 // FIXME: Currently this is split into two loop pass pipelines because we run
488 // some function passes in between them. These can and should be removed
489 // and/or replaced by scheduling the loop pass equivalents in the correct
490 // positions. But those equivalent passes aren't powerful enough yet.
491 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
492 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
493 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
494 // `LoopInstSimplify`.
495 LoopPassManager LPM1, LPM2;
496
497 // Simplify the loop body. We do this initially to clean up after other loop
498 // passes run, either when iterating on a loop or on inner loops with
499 // implications on the outer loop.
500 LPM1.addPass(Pass: LoopInstSimplifyPass());
501 LPM1.addPass(Pass: LoopSimplifyCFGPass());
502
503 // Try to remove as much code from the loop header as possible,
504 // to reduce amount of IR that will have to be duplicated. However,
505 // do not perform speculative hoisting the first time as LICM
506 // will destroy metadata that may not need to be destroyed if run
507 // after loop rotation.
508 // TODO: Investigate promotion cap for O1.
509 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
510 /*AllowSpeculation=*/false));
511
512 LPM1.addPass(
513 Pass: LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
514 // TODO: Investigate promotion cap for O1.
515 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
516 /*AllowSpeculation=*/true));
517 LPM1.addPass(Pass: SimpleLoopUnswitchPass());
518 if (EnableLoopFlatten)
519 LPM1.addPass(Pass: LoopFlattenPass());
520
521 LPM2.addPass(Pass: LoopIdiomRecognizePass());
522 LPM2.addPass(Pass: IndVarSimplifyPass());
523
524 invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level);
525
526 LPM2.addPass(Pass: LoopDeletionPass());
527
528 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
529 // because it changes IR to makes profile annotation in back compile
530 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
531 // attributes so we need to make sure and allow the full unroll pass to pay
532 // attention to it.
533 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
534 PGOOpt->Action != PGOOptions::SampleUse)
535 LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(),
536 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
537 PTO.ForgetAllSCEVInLoopUnroll));
538
539 invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level);
540
541 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1),
542 /*UseMemorySSA=*/true));
543 FPM.addPass(
544 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
545 FPM.addPass(Pass: InstCombinePass());
546 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
547 // *All* loop passes must preserve it, in order to be able to use it.
548 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2),
549 /*UseMemorySSA=*/false));
550
551 // Delete small array after loop unroll.
552 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
553
554 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
555 FPM.addPass(Pass: MemCpyOptPass());
556
557 // Sparse conditional constant propagation.
558 // FIXME: It isn't clear why we do this *after* loop passes rather than
559 // before...
560 FPM.addPass(Pass: SCCPPass());
561
562 // Delete dead bit computations (instcombine runs after to fold away the dead
563 // computations, and then ADCE will run later to exploit any new DCE
564 // opportunities that creates).
565 FPM.addPass(Pass: BDCEPass());
566
567 // Run instcombine after redundancy and dead bit elimination to exploit
568 // opportunities opened up by them.
569 FPM.addPass(Pass: InstCombinePass());
570 invokePeepholeEPCallbacks(FPM, Level);
571
572 FPM.addPass(Pass: CoroElidePass());
573
574 invokeScalarOptimizerLateEPCallbacks(FPM, Level);
575
576 // Finally, do an expensive DCE pass to catch all the dead code exposed by
577 // the simplifications and basic cleanup after all the simplifications.
578 // TODO: Investigate if this is too expensive.
579 FPM.addPass(Pass: ADCEPass());
580 FPM.addPass(
581 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
582 FPM.addPass(Pass: InstCombinePass());
583 invokePeepholeEPCallbacks(FPM, Level);
584
585 return FPM;
586}
587
588FunctionPassManager
589PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
590 ThinOrFullLTOPhase Phase) {
591 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
592
593 // The O1 pipeline has a separate pipeline creation function to simplify
594 // construction readability.
595 if (Level.getSpeedupLevel() == 1)
596 return buildO1FunctionSimplificationPipeline(Level, Phase);
597
598 FunctionPassManager FPM;
599
600 if (AreStatisticsEnabled())
601 FPM.addPass(Pass: CountVisitsPass());
602
603 // Form SSA out of local memory accesses after breaking apart aggregates into
604 // scalars.
605 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
606
607 // Catch trivial redundancies
608 FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */));
609 if (EnableKnowledgeRetention)
610 FPM.addPass(Pass: AssumeSimplifyPass());
611
612 // Hoisting of scalars and load expressions.
613 if (EnableGVNHoist)
614 FPM.addPass(Pass: GVNHoistPass());
615
616 // Global value numbering based sinking.
617 if (EnableGVNSink) {
618 FPM.addPass(Pass: GVNSinkPass());
619 FPM.addPass(
620 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
621 }
622
623 // Speculative execution if the target has divergent branches; otherwise nop.
624 FPM.addPass(Pass: SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
625
626 // Optimize based on known information about branches, and cleanup afterward.
627 FPM.addPass(Pass: JumpThreadingPass());
628 FPM.addPass(Pass: CorrelatedValuePropagationPass());
629
630 // Jump table to switch conversion.
631 if (EnableJumpTableToSwitch)
632 FPM.addPass(Pass: JumpTableToSwitchPass(
633 /*InLTO=*/Phase == ThinOrFullLTOPhase::ThinLTOPostLink ||
634 Phase == ThinOrFullLTOPhase::FullLTOPostLink));
635
636 FPM.addPass(
637 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
638 FPM.addPass(Pass: InstCombinePass());
639 FPM.addPass(Pass: AggressiveInstCombinePass());
640 FPM.addPass(Pass: LibCallsShrinkWrapPass());
641
642 invokePeepholeEPCallbacks(FPM, Level);
643
644 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
645 // using the size value profile. Don't perform this when optimizing for size.
646 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse)
647 FPM.addPass(Pass: PGOMemOPSizeOpt());
648
649 FPM.addPass(Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/
650 isInstrumentedPGOUse()));
651 FPM.addPass(
652 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
653
654 // Form canonically associated expression trees, and simplify the trees using
655 // basic mathematical properties. For example, this will form (nearly)
656 // minimal multiplication trees.
657 FPM.addPass(Pass: ReassociatePass());
658
659 if (EnableConstraintElimination)
660 FPM.addPass(Pass: ConstraintEliminationPass());
661
662 // Add the primary loop simplification pipeline.
663 // FIXME: Currently this is split into two loop pass pipelines because we run
664 // some function passes in between them. These can and should be removed
665 // and/or replaced by scheduling the loop pass equivalents in the correct
666 // positions. But those equivalent passes aren't powerful enough yet.
667 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
668 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
669 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
670 // `LoopInstSimplify`.
671 LoopPassManager LPM1, LPM2;
672
673 // Simplify the loop body. We do this initially to clean up after other loop
674 // passes run, either when iterating on a loop or on inner loops with
675 // implications on the outer loop.
676 LPM1.addPass(Pass: LoopInstSimplifyPass());
677 LPM1.addPass(Pass: LoopSimplifyCFGPass());
678
679 // Try to remove as much code from the loop header as possible,
680 // to reduce amount of IR that will have to be duplicated. However,
681 // do not perform speculative hoisting the first time as LICM
682 // will destroy metadata that may not need to be destroyed if run
683 // after loop rotation.
684 // TODO: Investigate promotion cap for O1.
685 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
686 /*AllowSpeculation=*/false));
687
688 LPM1.addPass(
689 Pass: LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
690 // TODO: Investigate promotion cap for O1.
691 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
692 /*AllowSpeculation=*/true));
693 LPM1.addPass(
694 Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
695 if (EnableLoopFlatten)
696 LPM1.addPass(Pass: LoopFlattenPass());
697
698 LPM2.addPass(Pass: LoopIdiomRecognizePass());
699 LPM2.addPass(Pass: IndVarSimplifyPass());
700
701 {
702 ExtraLoopPassManager<ShouldRunExtraSimpleLoopUnswitch> ExtraPasses;
703 ExtraPasses.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
704 OptimizationLevel::O3));
705 LPM2.addPass(Pass: std::move(ExtraPasses));
706 }
707
708 invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level);
709
710 LPM2.addPass(Pass: LoopDeletionPass());
711
712 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
713 // because it changes IR to makes profile annotation in back compile
714 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
715 // attributes so we need to make sure and allow the full unroll pass to pay
716 // attention to it.
717 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
718 PGOOpt->Action != PGOOptions::SampleUse)
719 LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(),
720 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
721 PTO.ForgetAllSCEVInLoopUnroll));
722
723 invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level);
724
725 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1),
726 /*UseMemorySSA=*/true));
727 FPM.addPass(
728 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
729 FPM.addPass(Pass: InstCombinePass());
730 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
731 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
732 // *All* loop passes must preserve it, in order to be able to use it.
733 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2),
734 /*UseMemorySSA=*/false));
735
736 // Delete small array after loop unroll.
737 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
738
739 // Try vectorization/scalarization transforms that are both improvements
740 // themselves and can allow further folds with GVN and InstCombine.
741 FPM.addPass(Pass: VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
742
743 // Eliminate redundancies.
744 FPM.addPass(Pass: MergedLoadStoreMotionPass());
745 if (RunNewGVN)
746 FPM.addPass(Pass: NewGVNPass());
747 else
748 FPM.addPass(Pass: GVNPass());
749
750 // Sparse conditional constant propagation.
751 // FIXME: It isn't clear why we do this *after* loop passes rather than
752 // before...
753 FPM.addPass(Pass: SCCPPass());
754
755 // Delete dead bit computations (instcombine runs after to fold away the dead
756 // computations, and then ADCE will run later to exploit any new DCE
757 // opportunities that creates).
758 FPM.addPass(Pass: BDCEPass());
759
760 // Run instcombine after redundancy and dead bit elimination to exploit
761 // opportunities opened up by them.
762 FPM.addPass(Pass: InstCombinePass());
763 invokePeepholeEPCallbacks(FPM, Level);
764
765 // Re-consider control flow based optimizations after redundancy elimination,
766 // redo DCE, etc.
767 if (EnableDFAJumpThreading)
768 FPM.addPass(Pass: DFAJumpThreadingPass());
769
770 FPM.addPass(Pass: JumpThreadingPass());
771 FPM.addPass(Pass: CorrelatedValuePropagationPass());
772
773 // Finally, do an expensive DCE pass to catch all the dead code exposed by
774 // the simplifications and basic cleanup after all the simplifications.
775 // TODO: Investigate if this is too expensive.
776 FPM.addPass(Pass: ADCEPass());
777
778 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
779 FPM.addPass(Pass: MemCpyOptPass());
780
781 FPM.addPass(Pass: DSEPass());
782 FPM.addPass(Pass: MoveAutoInitPass());
783
784 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
785 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
786 /*AllowSpeculation=*/true),
787 /*UseMemorySSA=*/true));
788
789 FPM.addPass(Pass: CoroElidePass());
790
791 invokeScalarOptimizerLateEPCallbacks(FPM, Level);
792
793 FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions()
794 .convertSwitchRangeToICmp(B: true)
795 .convertSwitchToArithmetic(B: true)
796 .hoistCommonInsts(B: true)
797 .sinkCommonInsts(B: true)));
798 FPM.addPass(Pass: InstCombinePass());
799 invokePeepholeEPCallbacks(FPM, Level);
800
801 return FPM;
802}
803
804void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
805 MPM.addPass(Pass: CanonicalizeAliasesPass());
806 MPM.addPass(Pass: NameAnonGlobalPass());
807}
808
809void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
810 OptimizationLevel Level,
811 ThinOrFullLTOPhase LTOPhase) {
812 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
813 if (DisablePreInliner)
814 return;
815 InlineParams IP;
816
817 IP.DefaultThreshold = PreInlineThreshold;
818
819 // FIXME: The hint threshold has the same value used by the regular inliner
820 // when not optimzing for size. This should probably be lowered after
821 // performance testing.
822 // FIXME: this comment is cargo culted from the old pass manager, revisit).
823 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
824 ModuleInlinerWrapperPass MIWP(
825 IP, /* MandatoryFirst */ true,
826 InlineContext{.LTOPhase: LTOPhase, .Pass: InlinePass::EarlyInliner});
827 CGSCCPassManager &CGPipeline = MIWP.getPM();
828
829 FunctionPassManager FPM;
830 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
831 FPM.addPass(Pass: EarlyCSEPass()); // Catch trivial redundancies.
832 FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
833 B: true))); // Merge & remove basic blocks.
834 FPM.addPass(Pass: InstCombinePass()); // Combine silly sequences.
835 invokePeepholeEPCallbacks(FPM, Level);
836
837 CGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
838 Pass: std::move(FPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
839
840 MPM.addPass(Pass: std::move(MIWP));
841
842 // Delete anything that is now dead to make sure that we don't instrument
843 // dead code. Instrumentation can end up keeping dead code around and
844 // dramatically increase code size.
845 MPM.addPass(Pass: GlobalDCEPass());
846}
847
848void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
849 OptimizationLevel Level) {
850 if (EnablePostPGOLoopRotation) {
851 // Disable header duplication in loop rotation at -Oz.
852 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
853 Pass: createFunctionToLoopPassAdaptor(Pass: LoopRotatePass(),
854 /*UseMemorySSA=*/false),
855 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
856 }
857}
858
859void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
860 OptimizationLevel Level, bool RunProfileGen,
861 bool IsCS, bool AtomicCounterUpdate,
862 std::string ProfileFile,
863 std::string ProfileRemappingFile) {
864 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
865
866 if (!RunProfileGen) {
867 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
868 MPM.addPass(
869 Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
870 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
871 // RequireAnalysisPass for PSI before subsequent non-module passes.
872 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
873 return;
874 }
875
876 // Perform PGO instrumentation.
877 MPM.addPass(Pass: PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
878 : PGOInstrumentationType::FDO));
879
880 addPostPGOLoopRotation(MPM, Level);
881 // Add the profile lowering pass.
882 InstrProfOptions Options;
883 if (!ProfileFile.empty())
884 Options.InstrProfileOutput = ProfileFile;
885 // Do counter promotion at Level greater than O0.
886 Options.DoCounterPromotion = true;
887 Options.UseBFIInPromotion = IsCS;
888 if (EnableSampledInstr) {
889 Options.Sampling = true;
890 // With sampling, there is little beneifit to enable counter promotion.
891 // But note that sampling does work with counter promotion.
892 Options.DoCounterPromotion = false;
893 }
894 Options.Atomic = AtomicCounterUpdate;
895 MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS));
896}
897
898void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
899 bool RunProfileGen, bool IsCS,
900 bool AtomicCounterUpdate,
901 std::string ProfileFile,
902 std::string ProfileRemappingFile) {
903 if (!RunProfileGen) {
904 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
905 MPM.addPass(
906 Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
907 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
908 // RequireAnalysisPass for PSI before subsequent non-module passes.
909 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
910 return;
911 }
912
913 // Perform PGO instrumentation.
914 MPM.addPass(Pass: PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
915 : PGOInstrumentationType::FDO));
916 // Add the profile lowering pass.
917 InstrProfOptions Options;
918 if (!ProfileFile.empty())
919 Options.InstrProfileOutput = ProfileFile;
920 // Do not do counter promotion at O0.
921 Options.DoCounterPromotion = false;
922 Options.UseBFIInPromotion = IsCS;
923 Options.Atomic = AtomicCounterUpdate;
924 MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS));
925}
926
927static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
928 return getInlineParams(OptLevel: Level.getSpeedupLevel(), SizeOptLevel: Level.getSizeLevel());
929}
930
931ModuleInlinerWrapperPass
932PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
933 ThinOrFullLTOPhase Phase) {
934 InlineParams IP;
935 if (PTO.InlinerThreshold == -1)
936 IP = getInlineParamsFromOptLevel(Level);
937 else
938 IP = getInlineParams(Threshold: PTO.InlinerThreshold);
939 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
940 // set hot-caller threshold to 0 to disable hot
941 // callsite inline (as much as possible [1]) because it makes
942 // profile annotation in the backend inaccurate.
943 //
944 // [1] Note the cost of a function could be below zero due to erased
945 // prologue / epilogue.
946 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
947 IP.HotCallSiteThreshold = 0;
948
949 if (PGOOpt)
950 IP.EnableDeferral = EnablePGOInlineDeferral;
951
952 ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
953 InlineContext{.LTOPhase: Phase, .Pass: InlinePass::CGSCCInliner},
954 UseInlineAdvisor, MaxDevirtIterations);
955
956 // Require the GlobalsAA analysis for the module so we can query it within
957 // the CGSCC pipeline.
958 if (EnableGlobalAnalyses) {
959 MIWP.addModulePass(Pass: RequireAnalysisPass<GlobalsAA, Module>());
960 // Invalidate AAManager so it can be recreated and pick up the newly
961 // available GlobalsAA.
962 MIWP.addModulePass(
963 Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>()));
964 }
965
966 // Require the ProfileSummaryAnalysis for the module so we can query it within
967 // the inliner pass.
968 MIWP.addModulePass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
969
970 // Now begin the main postorder CGSCC pipeline.
971 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
972 // manager and trying to emulate its precise behavior. Much of this doesn't
973 // make a lot of sense and we should revisit the core CGSCC structure.
974 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
975
976 // Note: historically, the PruneEH pass was run first to deduce nounwind and
977 // generally clean up exception handling overhead. It isn't clear this is
978 // valuable as the inliner doesn't currently care whether it is inlining an
979 // invoke or a call.
980
981 if (AttributorRun & AttributorRunOption::CGSCC)
982 MainCGPipeline.addPass(Pass: AttributorCGSCCPass());
983 else if (AttributorRun & AttributorRunOption::CGSCC_LIGHT)
984 MainCGPipeline.addPass(Pass: AttributorLightCGSCCPass());
985
986 // Deduce function attributes. We do another run of this after the function
987 // simplification pipeline, so this only needs to run when it could affect the
988 // function simplification pipeline, which is only the case with recursive
989 // functions.
990 MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
991
992 // When at O3 add argument promotion to the pass pipeline.
993 // FIXME: It isn't at all clear why this should be limited to O3.
994 if (Level == OptimizationLevel::O3)
995 MainCGPipeline.addPass(Pass: ArgumentPromotionPass());
996
997 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
998 // there are no OpenMP runtime calls present in the module.
999 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
1000 MainCGPipeline.addPass(Pass: OpenMPOptCGSCCPass(Phase));
1001
1002 invokeCGSCCOptimizerLateEPCallbacks(CGPM&: MainCGPipeline, Level);
1003
1004 // Add the core function simplification pipeline nested inside the
1005 // CGSCC walk.
1006 MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
1007 Pass: buildFunctionSimplificationPipeline(Level, Phase),
1008 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
1009
1010 // Finally, deduce any function attributes based on the fully simplified
1011 // function.
1012 MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass());
1013
1014 // Mark that the function is fully simplified and that it shouldn't be
1015 // simplified again if we somehow revisit it due to CGSCC mutations unless
1016 // it's been modified since.
1017 MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
1018 Pass: RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>()));
1019
1020 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
1021 MainCGPipeline.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0));
1022 MainCGPipeline.addPass(Pass: CoroAnnotationElidePass());
1023 }
1024
1025 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1026 MIWP.addLateModulePass(Pass: createModuleToFunctionPassAdaptor(
1027 Pass: InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
1028
1029 return MIWP;
1030}
1031
1032ModulePassManager
1033PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
1034 ThinOrFullLTOPhase Phase) {
1035 ModulePassManager MPM;
1036
1037 InlineParams IP = getInlineParamsFromOptLevel(Level);
1038 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1039 // set hot-caller threshold to 0 to disable hot
1040 // callsite inline (as much as possible [1]) because it makes
1041 // profile annotation in the backend inaccurate.
1042 //
1043 // [1] Note the cost of a function could be below zero due to erased
1044 // prologue / epilogue.
1045 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1046 IP.HotCallSiteThreshold = 0;
1047
1048 if (PGOOpt)
1049 IP.EnableDeferral = EnablePGOInlineDeferral;
1050
1051 // The inline deferral logic is used to avoid losing some
1052 // inlining chance in future. It is helpful in SCC inliner, in which
1053 // inlining is processed in bottom-up order.
1054 // While in module inliner, the inlining order is a priority-based order
1055 // by default. The inline deferral is unnecessary there. So we disable the
1056 // inline deferral logic in module inliner.
1057 IP.EnableDeferral = false;
1058
1059 MPM.addPass(Pass: ModuleInlinerPass(IP, UseInlineAdvisor, Phase));
1060 if (!UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPostLink) {
1061 MPM.addPass(Pass: GlobalOptPass());
1062 MPM.addPass(Pass: GlobalDCEPass());
1063 MPM.addPass(Pass: PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1064 }
1065
1066 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
1067 Pass: buildFunctionSimplificationPipeline(Level, Phase),
1068 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1069
1070 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
1071 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(
1072 Pass: CoroSplitPass(Level != OptimizationLevel::O0)));
1073 MPM.addPass(
1074 Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: CoroAnnotationElidePass()));
1075 }
1076
1077 return MPM;
1078}
1079
1080ModulePassManager
1081PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
1082 ThinOrFullLTOPhase Phase) {
1083 assert(Level != OptimizationLevel::O0 &&
1084 "Should not be used for O0 pipeline");
1085
1086 assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink &&
1087 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1088
1089 ModulePassManager MPM;
1090
1091 // Place pseudo probe instrumentation as the first pass of the pipeline to
1092 // minimize the impact of optimization changes.
1093 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1094 Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
1095 MPM.addPass(Pass: SampleProfileProbePass(TM));
1096
1097 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1098
1099 // In ThinLTO mode, when flattened profile is used, all the available
1100 // profile information will be annotated in PreLink phase so there is
1101 // no need to load the profile again in PostLink.
1102 bool LoadSampleProfile =
1103 HasSampleProfile &&
1104 !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
1105
1106 // During the ThinLTO backend phase we perform early indirect call promotion
1107 // here, before globalopt. Otherwise imported available_externally functions
1108 // look unreferenced and are removed. If we are going to load the sample
1109 // profile then defer until later.
1110 // TODO: See if we can move later and consolidate with the location where
1111 // we perform ICP when we are loading a sample profile.
1112 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1113 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1114 // determine whether the new direct calls are annotated with prof metadata.
1115 // Ideally this should be determined from whether the IR is annotated with
1116 // sample profile, and not whether the a sample profile was provided on the
1117 // command line. E.g. for flattened profiles where we will not be reloading
1118 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1119 // provide the sample profile file.
1120 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1121 MPM.addPass(Pass: PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1122
1123 // Create an early function pass manager to cleanup the output of the
1124 // frontend. Not necessary with LTO post link pipelines since the pre link
1125 // pipeline already cleaned up the frontend output.
1126 if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) {
1127 // Do basic inference of function attributes from known properties of system
1128 // libraries and other oracles.
1129 MPM.addPass(Pass: InferFunctionAttrsPass());
1130 MPM.addPass(Pass: CoroEarlyPass());
1131
1132 FunctionPassManager EarlyFPM;
1133 EarlyFPM.addPass(Pass: EntryExitInstrumenterPass(/*PostInlining=*/false));
1134 // Lower llvm.expect to metadata before attempting transforms.
1135 // Compare/branch metadata may alter the behavior of passes like
1136 // SimplifyCFG.
1137 EarlyFPM.addPass(Pass: LowerExpectIntrinsicPass());
1138 EarlyFPM.addPass(Pass: SimplifyCFGPass());
1139 EarlyFPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
1140 EarlyFPM.addPass(Pass: EarlyCSEPass());
1141 if (Level == OptimizationLevel::O3)
1142 EarlyFPM.addPass(Pass: CallSiteSplittingPass());
1143 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
1144 Pass: std::move(EarlyFPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1145 }
1146
1147 if (LoadSampleProfile) {
1148 // Annotate sample profile right after early FPM to ensure freshness of
1149 // the debug info.
1150 MPM.addPass(Pass: SampleProfileLoaderPass(
1151 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, FS));
1152 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1153 // RequireAnalysisPass for PSI before subsequent non-module passes.
1154 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1155 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1156 // for the profile annotation to be accurate in the LTO backend.
1157 if (!isLTOPreLink(Phase))
1158 // We perform early indirect call promotion here, before globalopt.
1159 // This is important for the ThinLTO backend phase because otherwise
1160 // imported available_externally functions look unreferenced and are
1161 // removed.
1162 MPM.addPass(
1163 Pass: PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1164 }
1165
1166 // Try to perform OpenMP specific optimizations on the module. This is a
1167 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1168 MPM.addPass(Pass: OpenMPOptPass(Phase));
1169
1170 if (AttributorRun & AttributorRunOption::MODULE)
1171 MPM.addPass(Pass: AttributorPass());
1172 else if (AttributorRun & AttributorRunOption::MODULE_LIGHT)
1173 MPM.addPass(Pass: AttributorLightPass());
1174
1175 // Lower type metadata and the type.test intrinsic in the ThinLTO
1176 // post link pipeline after ICP. This is to enable usage of the type
1177 // tests in ICP sequences.
1178 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1179 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr,
1180 lowertypetests::DropTestKind::Assume));
1181
1182 invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase);
1183
1184 // Interprocedural constant propagation now that basic cleanup has occurred
1185 // and prior to optimizing globals.
1186 // FIXME: This position in the pipeline hasn't been carefully considered in
1187 // years, it should be re-analyzed.
1188 MPM.addPass(
1189 Pass: IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/!isLTOPreLink(Phase))));
1190
1191 // Attach metadata to indirect call sites indicating the set of functions
1192 // they may target at run-time. This should follow IPSCCP.
1193 MPM.addPass(Pass: CalledValuePropagationPass());
1194
1195 // Optimize globals to try and fold them into constants.
1196 MPM.addPass(Pass: GlobalOptPass());
1197
1198 // Create a small function pass pipeline to cleanup after all the global
1199 // optimizations.
1200 FunctionPassManager GlobalCleanupPM;
1201 // FIXME: Should this instead by a run of SROA?
1202 GlobalCleanupPM.addPass(Pass: PromotePass());
1203 GlobalCleanupPM.addPass(Pass: InstCombinePass());
1204 invokePeepholeEPCallbacks(FPM&: GlobalCleanupPM, Level);
1205 GlobalCleanupPM.addPass(
1206 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
1207 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(GlobalCleanupPM),
1208 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1209
1210 // We already asserted this happens in non-FullLTOPostLink earlier.
1211 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1212 // Enable contextual profiling instrumentation.
1213 const bool IsCtxProfGen =
1214 IsPreLink && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled();
1215 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1216 const bool IsPGOInstrGen =
1217 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1218 const bool IsPGOInstrUse =
1219 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1220 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1221 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1222 // enable ctx profiling from the frontend.
1223 assert(!(IsPGOInstrGen && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled()) &&
1224 "Enabling both instrumented PGO and contextual instrumentation is not "
1225 "supported.");
1226 const bool IsCtxProfUse =
1227 !UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
1228
1229 assert(
1230 (InstrumentColdFuncOnlyPath.empty() || PGOInstrumentColdFunctionOnly) &&
1231 "--instrument-cold-function-only-path is provided but "
1232 "--pgo-instrument-cold-function-only is not enabled");
1233 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1234 IsPGOPreLink &&
1235 !InstrumentColdFuncOnlyPath.empty();
1236
1237 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1238 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1239 addPreInlinerPasses(MPM, Level, LTOPhase: Phase);
1240
1241 // Add all the requested passes for instrumentation PGO, if requested.
1242 if (IsPGOInstrGen || IsPGOInstrUse) {
1243 addPGOInstrPasses(MPM, Level,
1244 /*RunProfileGen=*/IsPGOInstrGen,
1245 /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
1246 ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
1247 } else if (IsCtxProfGen || IsCtxProfUse) {
1248 MPM.addPass(Pass: PGOInstrumentationGen(PGOInstrumentationType::CTXPROF));
1249 // In pre-link, we just want the instrumented IR. We use the contextual
1250 // profile in the post-thinlink phase.
1251 // The instrumentation will be removed in post-thinlink after IPO.
1252 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1253 // mechanism for GUIDs.
1254 MPM.addPass(Pass: AssignGUIDPass());
1255 if (IsCtxProfUse) {
1256 MPM.addPass(Pass: PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1257 return MPM;
1258 }
1259 // Block further inlining in the instrumented ctxprof case. This avoids
1260 // confusingly collecting profiles for the same GUID corresponding to
1261 // different variants of the function. We could do like PGO and identify
1262 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1263 // thinlto to happen before performing any further optimizations, it's
1264 // unnecessary to collect profiles for non-prevailing copies.
1265 MPM.addPass(Pass: NoinlineNonPrevailing());
1266 addPostPGOLoopRotation(MPM, Level);
1267 MPM.addPass(Pass: PGOCtxProfLoweringPass());
1268 } else if (IsColdFuncOnlyInstrGen) {
1269 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1270 /* AtomicCounterUpdate */ false,
1271 ProfileFile: InstrumentColdFuncOnlyPath,
1272 /* ProfileRemappingFile */ "");
1273 }
1274
1275 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1276 MPM.addPass(Pass: PGOIndirectCallPromotion(false, false));
1277
1278 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1279 MPM.addPass(Pass: PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1280 EnableSampledInstr));
1281
1282 if (IsMemprofUse)
1283 MPM.addPass(Pass: MemProfUsePass(PGOOpt->MemoryProfile, FS));
1284
1285 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1286 PGOOpt->Action == PGOOptions::SampleUse))
1287 MPM.addPass(Pass: PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1288
1289 MPM.addPass(Pass: AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1290
1291 if (EnableModuleInliner)
1292 MPM.addPass(Pass: buildModuleInlinerPipeline(Level, Phase));
1293 else
1294 MPM.addPass(Pass: buildInlinerPipeline(Level, Phase));
1295
1296 // Remove any dead arguments exposed by cleanups, constant folding globals,
1297 // and argument promotion.
1298 MPM.addPass(Pass: DeadArgumentEliminationPass());
1299
1300 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1301 MPM.addPass(Pass: SimplifyTypeTestsPass());
1302
1303 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink)
1304 MPM.addPass(Pass: CoroCleanupPass());
1305
1306 // Optimize globals now that functions are fully simplified.
1307 MPM.addPass(Pass: GlobalOptPass());
1308 MPM.addPass(Pass: GlobalDCEPass());
1309
1310 return MPM;
1311}
1312
1313/// TODO: Should LTO cause any differences to this set of passes?
1314void PassBuilder::addVectorPasses(OptimizationLevel Level,
1315 FunctionPassManager &FPM,
1316 ThinOrFullLTOPhase LTOPhase) {
1317 const bool IsFullLTO = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink;
1318
1319 FPM.addPass(Pass: LoopVectorizePass(
1320 LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
1321
1322 // Drop dereferenceable assumes after vectorization, as they are no longer
1323 // needed and can inhibit further optimization.
1324 if (!isLTOPreLink(Phase: LTOPhase))
1325 FPM.addPass(Pass: DropUnnecessaryAssumesPass(/*DropDereferenceable=*/true));
1326
1327 FPM.addPass(Pass: InferAlignmentPass());
1328 if (IsFullLTO) {
1329 // The vectorizer may have significantly shortened a loop body; unroll
1330 // again. Unroll small loops to hide loop backedge latency and saturate any
1331 // parallel execution resources of an out-of-order processor. We also then
1332 // need to clean up redundancies and loop invariant code.
1333 // FIXME: It would be really good to use a loop-integrated instruction
1334 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1335 // across the loop nests.
1336 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1337 if (EnableUnrollAndJam && PTO.LoopUnrolling)
1338 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1339 Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1340 FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions(
1341 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1342 PTO.ForgetAllSCEVInLoopUnroll)));
1343 FPM.addPass(Pass: WarnMissedTransformationsPass());
1344 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1345 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1346 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1347 // NOTE: we are very late in the pipeline, and we don't have any LICM
1348 // or SimplifyCFG passes scheduled after us, that would cleanup
1349 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1350 FPM.addPass(Pass: SROAPass(SROAOptions::PreserveCFG));
1351 }
1352
1353 if (!IsFullLTO) {
1354 // Eliminate loads by forwarding stores from the previous iteration to loads
1355 // of the current iteration.
1356 FPM.addPass(Pass: LoopLoadEliminationPass());
1357 }
1358 // Cleanup after the loop optimization passes.
1359 FPM.addPass(Pass: InstCombinePass());
1360
1361 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1362 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1363 // At higher optimization levels, try to clean up any runtime overlap and
1364 // alignment checks inserted by the vectorizer. We want to track correlated
1365 // runtime checks for two inner loops in the same outer loop, fold any
1366 // common computations, hoist loop-invariant aspects out of any outer loop,
1367 // and unswitch the runtime checks if possible. Once hoisted, we may have
1368 // dead (or speculatable) control flows or more combining opportunities.
1369 ExtraPasses.addPass(Pass: EarlyCSEPass());
1370 ExtraPasses.addPass(Pass: CorrelatedValuePropagationPass());
1371 ExtraPasses.addPass(Pass: InstCombinePass());
1372 LoopPassManager LPM;
1373 LPM.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1374 /*AllowSpeculation=*/true));
1375 LPM.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1376 OptimizationLevel::O3));
1377 ExtraPasses.addPass(
1378 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /*UseMemorySSA=*/true));
1379 ExtraPasses.addPass(
1380 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
1381 ExtraPasses.addPass(Pass: InstCombinePass());
1382 FPM.addPass(Pass: std::move(ExtraPasses));
1383 }
1384
1385 // Now that we've formed fast to execute loop structures, we do further
1386 // optimizations. These are run afterward as they might block doing complex
1387 // analyses and transforms such as what are needed for loop vectorization.
1388
1389 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1390 // GVN, loop transforms, and others have already run, so it's now better to
1391 // convert to more optimized IR using more aggressive simplify CFG options.
1392 // The extra sinking transform can create larger basic blocks, so do this
1393 // before SLP vectorization.
1394 FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions()
1395 .forwardSwitchCondToPhi(B: true)
1396 .convertSwitchRangeToICmp(B: true)
1397 .convertSwitchToArithmetic(B: true)
1398 .convertSwitchToLookupTable(B: true)
1399 .needCanonicalLoops(B: false)
1400 .hoistCommonInsts(B: true)
1401 .sinkCommonInsts(B: true)));
1402
1403 if (IsFullLTO) {
1404 FPM.addPass(Pass: SCCPPass());
1405 FPM.addPass(Pass: InstCombinePass());
1406 FPM.addPass(Pass: BDCEPass());
1407 }
1408
1409 // Optimize parallel scalar instruction chains into SIMD instructions.
1410 if (PTO.SLPVectorization) {
1411 FPM.addPass(Pass: SLPVectorizerPass());
1412 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1413 FPM.addPass(Pass: EarlyCSEPass());
1414 }
1415 }
1416 // Enhance/cleanup vector code.
1417 FPM.addPass(Pass: VectorCombinePass());
1418
1419 if (!IsFullLTO) {
1420 FPM.addPass(Pass: InstCombinePass());
1421 // Unroll small loops to hide loop backedge latency and saturate any
1422 // parallel execution resources of an out-of-order processor. We also then
1423 // need to clean up redundancies and loop invariant code.
1424 // FIXME: It would be really good to use a loop-integrated instruction
1425 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1426 // across the loop nests.
1427 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1428 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1429 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1430 Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1431 }
1432 FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions(
1433 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1434 PTO.ForgetAllSCEVInLoopUnroll)));
1435 FPM.addPass(Pass: WarnMissedTransformationsPass());
1436 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1437 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1438 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1439 // NOTE: we are very late in the pipeline, and we don't have any LICM
1440 // or SimplifyCFG passes scheduled after us, that would cleanup
1441 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1442 FPM.addPass(Pass: SROAPass(SROAOptions::PreserveCFG));
1443 }
1444
1445 FPM.addPass(Pass: InferAlignmentPass());
1446 FPM.addPass(Pass: InstCombinePass());
1447
1448 // This is needed for two reasons:
1449 // 1. It works around problems that instcombine introduces, such as sinking
1450 // expensive FP divides into loops containing multiplications using the
1451 // divide result.
1452 // 2. It helps to clean up some loop-invariant code created by the loop
1453 // unroll pass when IsFullLTO=false.
1454 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1455 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1456 /*AllowSpeculation=*/true),
1457 /*UseMemorySSA=*/true));
1458
1459 // Now that we've vectorized and unrolled loops, we may have more refined
1460 // alignment information, try to re-derive it here.
1461 FPM.addPass(Pass: AlignmentFromAssumptionsPass());
1462}
1463
1464ModulePassManager
1465PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1466 ThinOrFullLTOPhase LTOPhase) {
1467 const bool LTOPreLink = isLTOPreLink(Phase: LTOPhase);
1468 ModulePassManager MPM;
1469
1470 // Run partial inlining pass to partially inline functions that have
1471 // large bodies.
1472 if (RunPartialInlining)
1473 MPM.addPass(Pass: PartialInlinerPass());
1474
1475 // Remove avail extern fns and globals definitions since we aren't compiling
1476 // an object file for later LTO. For LTO we want to preserve these so they
1477 // are eligible for inlining at link-time. Note if they are unreferenced they
1478 // will be removed by GlobalDCE later, so this only impacts referenced
1479 // available externally globals. Eventually they will be suppressed during
1480 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1481 // may make globals referenced by available external functions dead and saves
1482 // running remaining passes on the eliminated functions. These should be
1483 // preserved during prelinking for link-time inlining decisions.
1484 if (!LTOPreLink)
1485 MPM.addPass(Pass: EliminateAvailableExternallyPass());
1486
1487 // Do RPO function attribute inference across the module to forward-propagate
1488 // attributes where applicable.
1489 // FIXME: Is this really an optimization rather than a canonicalization?
1490 MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass());
1491
1492 // Do a post inline PGO instrumentation and use pass. This is a context
1493 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1494 // cross-module inline has not been done yet. The context sensitive
1495 // instrumentation is after all the inlines are done.
1496 if (!LTOPreLink && PGOOpt) {
1497 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1498 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1499 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
1500 ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
1501 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1502 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1503 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
1504 ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
1505 }
1506
1507 // Re-compute GlobalsAA here prior to function passes. This is particularly
1508 // useful as the above will have inlined, DCE'ed, and function-attr
1509 // propagated everything. We should at this point have a reasonably minimal
1510 // and richly annotated call graph. By computing aliasing and mod/ref
1511 // information for all local globals here, the late loop passes and notably
1512 // the vectorizer will be able to use them to help recognize vectorizable
1513 // memory operations.
1514 if (EnableGlobalAnalyses)
1515 MPM.addPass(Pass: RecomputeGlobalsAAPass());
1516
1517 invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase: LTOPhase);
1518
1519 FunctionPassManager OptimizePM;
1520
1521 // Only drop unnecessary assumes post-inline and post-link, as otherwise
1522 // additional uses of the affected value may be introduced through inlining
1523 // and CSE.
1524 if (!isLTOPreLink(Phase: LTOPhase))
1525 OptimizePM.addPass(Pass: DropUnnecessaryAssumesPass());
1526
1527 // Scheduling LoopVersioningLICM when inlining is over, because after that
1528 // we may see more accurate aliasing. Reason to run this late is that too
1529 // early versioning may prevent further inlining due to increase of code
1530 // size. Other optimizations which runs later might get benefit of no-alias
1531 // assumption in clone loop.
1532 if (UseLoopVersioningLICM) {
1533 OptimizePM.addPass(
1534 Pass: createFunctionToLoopPassAdaptor(Pass: LoopVersioningLICMPass()));
1535 // LoopVersioningLICM pass might increase new LICM opportunities.
1536 OptimizePM.addPass(Pass: createFunctionToLoopPassAdaptor(
1537 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1538 /*AllowSpeculation=*/true),
1539 /*USeMemorySSA=*/UseMemorySSA: true));
1540 }
1541
1542 OptimizePM.addPass(Pass: Float2IntPass());
1543 OptimizePM.addPass(Pass: LowerConstantIntrinsicsPass());
1544
1545 if (EnableMatrix) {
1546 OptimizePM.addPass(Pass: LowerMatrixIntrinsicsPass());
1547 OptimizePM.addPass(Pass: EarlyCSEPass());
1548 }
1549
1550 // CHR pass should only be applied with the profile information.
1551 // The check is to check the profile summary information in CHR.
1552 if (EnableCHR && Level == OptimizationLevel::O3)
1553 OptimizePM.addPass(Pass: ControlHeightReductionPass());
1554
1555 // FIXME: We need to run some loop optimizations to re-rotate loops after
1556 // simplifycfg and others undo their rotation.
1557
1558 // Optimize the loop execution. These passes operate on entire loop nests
1559 // rather than on each loop in an inside-out manner, and so they are actually
1560 // function passes.
1561
1562 invokeVectorizerStartEPCallbacks(FPM&: OptimizePM, Level);
1563
1564 LoopPassManager LPM;
1565 // First rotate loops that may have been un-rotated by prior passes.
1566 // Disable header duplication at -Oz.
1567 LPM.addPass(Pass: LoopRotatePass(/*EnableLoopHeaderDuplication=*/true, LTOPreLink,
1568 /*CheckExitCount=*/true));
1569 // Some loops may have become dead by now. Try to delete them.
1570 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1571 // this may need to be revisited once we run GVN before loop deletion
1572 // in the simplification pipeline.
1573 LPM.addPass(Pass: LoopDeletionPass());
1574
1575 if (PTO.LoopInterchange)
1576 LPM.addPass(Pass: LoopInterchangePass());
1577
1578 OptimizePM.addPass(
1579 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /*UseMemorySSA=*/false));
1580
1581 // FIXME: This may not be the right place in the pipeline.
1582 // We need to have the data to support the right place.
1583 if (PTO.LoopFusion)
1584 OptimizePM.addPass(Pass: LoopFusePass());
1585
1586 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1587 // into separate loop that would otherwise inhibit vectorization. This is
1588 // currently only performed for loops marked with the metadata
1589 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1590 OptimizePM.addPass(Pass: LoopDistributePass());
1591
1592 // Populates the VFABI attribute with the scalar-to-vector mappings
1593 // from the TargetLibraryInfo.
1594 OptimizePM.addPass(Pass: InjectTLIMappings());
1595
1596 addVectorPasses(Level, FPM&: OptimizePM, LTOPhase);
1597
1598 invokeVectorizerEndEPCallbacks(FPM&: OptimizePM, Level);
1599
1600 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1601 // canonicalization pass that enables other optimizations. As a result,
1602 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1603 // result too early.
1604 OptimizePM.addPass(Pass: LoopSinkPass());
1605
1606 // And finally clean up LCSSA form before generating code.
1607 OptimizePM.addPass(Pass: InstSimplifyPass());
1608
1609 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1610 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1611 // flattening of blocks.
1612 OptimizePM.addPass(Pass: DivRemPairsPass());
1613
1614 // Merge adjacent icmps into memcmp, then expand memcmp to loads/compares.
1615 // TODO: move this furter up so that it can be optimized by GVN, etc.
1616 if (EnableMergeICmps)
1617 OptimizePM.addPass(Pass: MergeICmpsPass());
1618 OptimizePM.addPass(Pass: ExpandMemCmpPass());
1619
1620 // Try to annotate calls that were created during optimization.
1621 OptimizePM.addPass(
1622 Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1623
1624 // LoopSink (and other loop passes since the last simplifyCFG) might have
1625 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1626 OptimizePM.addPass(
1627 Pass: SimplifyCFGPass(SimplifyCFGOptions()
1628 .convertSwitchRangeToICmp(B: true)
1629 .convertSwitchToArithmetic(B: true)
1630 .speculateUnpredictables(B: true)
1631 .hoistLoadsStoresWithCondFaulting(B: true)));
1632
1633 // Add the core optimizing pipeline.
1634 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(OptimizePM),
1635 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1636
1637 // AllocToken transforms heap allocation calls; this needs to run late after
1638 // other allocation call transformations (such as those in InstCombine).
1639 if (!LTOPreLink)
1640 MPM.addPass(Pass: AllocTokenPass());
1641
1642 invokeOptimizerLastEPCallbacks(MPM, Level, Phase: LTOPhase);
1643
1644 // Split out cold code. Splitting is done late to avoid hiding context from
1645 // other optimizations and inadvertently regressing performance. The tradeoff
1646 // is that this has a higher code size cost than splitting early.
1647 if (EnableHotColdSplit && !LTOPreLink)
1648 MPM.addPass(Pass: HotColdSplittingPass());
1649
1650 // Search the code for similar regions of code. If enough similar regions can
1651 // be found where extracting the regions into their own function will decrease
1652 // the size of the program, we extract the regions, a deduplicate the
1653 // structurally similar regions.
1654 if (EnableIROutliner)
1655 MPM.addPass(Pass: IROutlinerPass());
1656
1657 // Now we need to do some global optimization transforms.
1658 // FIXME: It would seem like these should come first in the optimization
1659 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1660 // ordering here.
1661 MPM.addPass(Pass: GlobalDCEPass());
1662 MPM.addPass(Pass: ConstantMergePass());
1663
1664 // Merge functions if requested. It has a better chance to merge functions
1665 // after ConstantMerge folded jump tables.
1666 if (PTO.MergeFunctions)
1667 MPM.addPass(Pass: MergeFunctionsPass());
1668
1669 if (PTO.CallGraphProfile && !LTOPreLink)
1670 MPM.addPass(Pass: CGProfilePass(isLTOPostLink(Phase: LTOPhase)));
1671
1672 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1673 if (!LTOPreLink)
1674 MPM.addPass(Pass: RelLookupTableConverterPass());
1675
1676 // Add devirtualization pass only when LTO is not enabled, as otherwise
1677 // the pass is already enabled in the LTO pipeline.
1678 if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) {
1679 // TODO: explore a better pipeline configuration that can improve
1680 // compilation time overhead.
1681 MPM.addPass(Pass: WholeProgramDevirtPass(
1682 /*ExportSummary*/ nullptr,
1683 /*ImportSummary*/ nullptr,
1684 /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively));
1685 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr,
1686 lowertypetests::DropTestKind::Assume));
1687 // Given that the devirtualization creates more opportunities for inlining,
1688 // we run the Inliner again here to maximize the optimization gain we
1689 // get from devirtualization.
1690 // Also, we can't run devirtualization before inlining because the
1691 // devirtualization depends on the passes optimizing/eliminating vtable GVs
1692 // and those passes are only effective after inlining.
1693 if (EnableModuleInliner) {
1694 MPM.addPass(Pass: ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
1695 UseInlineAdvisor,
1696 ThinOrFullLTOPhase::None));
1697 } else {
1698 MPM.addPass(Pass: ModuleInlinerWrapperPass(
1699 getInlineParamsFromOptLevel(Level),
1700 /* MandatoryFirst */ true,
1701 InlineContext{.LTOPhase: ThinOrFullLTOPhase::None, .Pass: InlinePass::CGSCCInliner}));
1702 }
1703 }
1704 return MPM;
1705}
1706
1707ModulePassManager
1708PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
1709 ThinOrFullLTOPhase Phase) {
1710 if (Level == OptimizationLevel::O0)
1711 return buildO0DefaultPipeline(Level, Phase);
1712
1713 ModulePassManager MPM;
1714
1715 // Currently this pipeline is only invoked in an LTO pre link pass or when we
1716 // are not running LTO. If that changes the below checks may need updating.
1717 assert(isLTOPreLink(Phase) || Phase == ThinOrFullLTOPhase::None);
1718
1719 // If we are invoking this in non-LTO mode, remove any MemProf related
1720 // attributes and metadata, as we don't know whether we are linking with
1721 // a library containing the necessary interfaces.
1722 if (Phase == ThinOrFullLTOPhase::None)
1723 MPM.addPass(Pass: MemProfRemoveInfo());
1724
1725 // Convert @llvm.global.annotations to !annotation metadata.
1726 MPM.addPass(Pass: Annotation2MetadataPass());
1727
1728 // Force any function attributes we want the rest of the pipeline to observe.
1729 MPM.addPass(Pass: ForceFunctionAttrsPass());
1730
1731 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1732 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass()));
1733
1734 // Apply module pipeline start EP callback.
1735 invokePipelineStartEPCallbacks(MPM, Level);
1736
1737 // Add the core simplification pipeline.
1738 MPM.addPass(Pass: buildModuleSimplificationPipeline(Level, Phase));
1739
1740 // Now add the optimization pipeline.
1741 MPM.addPass(Pass: buildModuleOptimizationPipeline(Level, LTOPhase: Phase));
1742
1743 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1744 PGOOpt->Action == PGOOptions::SampleUse)
1745 MPM.addPass(Pass: PseudoProbeUpdatePass());
1746
1747 // Emit annotation remarks.
1748 addAnnotationRemarksPass(MPM);
1749
1750 if (isLTOPreLink(Phase))
1751 addRequiredLTOPreLinkPasses(MPM);
1752 return MPM;
1753}
1754
1755ModulePassManager
1756PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
1757 bool EmitSummary) {
1758 ModulePassManager MPM;
1759 if (ThinLTO)
1760 MPM.addPass(Pass: buildThinLTOPreLinkDefaultPipeline(Level));
1761 else
1762 MPM.addPass(Pass: buildLTOPreLinkDefaultPipeline(Level));
1763 MPM.addPass(Pass: EmbedBitcodePass(ThinLTO, EmitSummary));
1764
1765 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1766 // like removing CFI/WPD related instructions. Note, we reuse
1767 // LowerTypeTestsPass to clean up type tests rather than duplicate that logic
1768 // in FatLtoCleanup.
1769 MPM.addPass(Pass: FatLtoCleanup());
1770
1771 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1772 // object code, only in the bitcode section, so drop it before we run
1773 // module optimization and generate machine code. If llvm.type.test() isn't in
1774 // the IR, this won't do anything.
1775 MPM.addPass(
1776 Pass: LowerTypeTestsPass(nullptr, nullptr, lowertypetests::DropTestKind::All));
1777
1778 // Use the ThinLTO post-link pipeline with sample profiling
1779 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1780 MPM.addPass(Pass: buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1781 else {
1782 // ModuleSimplification does not run the coroutine passes for
1783 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1784 // builds, otherwise they will miscompile.
1785 if (ThinLTO) {
1786 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1787 // consideration.
1788 CGSCCPassManager CGPM;
1789 CGPM.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0));
1790 CGPM.addPass(Pass: CoroAnnotationElidePass());
1791 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
1792 MPM.addPass(Pass: CoroCleanupPass());
1793 }
1794
1795 // otherwise, just use module optimization
1796 MPM.addPass(
1797 Pass: buildModuleOptimizationPipeline(Level, LTOPhase: ThinOrFullLTOPhase::None));
1798 // Emit annotation remarks.
1799 addAnnotationRemarksPass(MPM);
1800 }
1801 return MPM;
1802}
1803
1804ModulePassManager
1805PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1806 if (Level == OptimizationLevel::O0)
1807 return buildO0DefaultPipeline(Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink);
1808
1809 ModulePassManager MPM;
1810
1811 // Convert @llvm.global.annotations to !annotation metadata.
1812 MPM.addPass(Pass: Annotation2MetadataPass());
1813
1814 // Force any function attributes we want the rest of the pipeline to observe.
1815 MPM.addPass(Pass: ForceFunctionAttrsPass());
1816
1817 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1818 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass()));
1819
1820 // Apply module pipeline start EP callback.
1821 invokePipelineStartEPCallbacks(MPM, Level);
1822
1823 // If we are planning to perform ThinLTO later, we don't bloat the code with
1824 // unrolling/vectorization/... now. Just simplify the module as much as we
1825 // can.
1826 MPM.addPass(Pass: buildModuleSimplificationPipeline(
1827 Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink));
1828 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1829 // thinlto use the contextual info to perform imports; then use the contextual
1830 // profile in the post-thinlink phase.
1831 if (!UseCtxProfile.empty()) {
1832 addRequiredLTOPreLinkPasses(MPM);
1833 return MPM;
1834 }
1835
1836 // Run partial inlining pass to partially inline functions that have
1837 // large bodies.
1838 // FIXME: It isn't clear whether this is really the right place to run this
1839 // in ThinLTO. Because there is another canonicalization and simplification
1840 // phase that will run after the thin link, running this here ends up with
1841 // less information than will be available later and it may grow functions in
1842 // ways that aren't beneficial.
1843 if (RunPartialInlining)
1844 MPM.addPass(Pass: PartialInlinerPass());
1845
1846 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1847 PGOOpt->Action == PGOOptions::SampleUse)
1848 MPM.addPass(Pass: PseudoProbeUpdatePass());
1849
1850 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1851 // optimization is going to be done in PostLink stage, but clang can't add
1852 // callbacks there in case of in-process ThinLTO called by linker.
1853 invokeOptimizerEarlyEPCallbacks(MPM, Level,
1854 /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
1855 invokeOptimizerLastEPCallbacks(MPM, Level,
1856 /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
1857
1858 // Emit annotation remarks.
1859 addAnnotationRemarksPass(MPM);
1860
1861 addRequiredLTOPreLinkPasses(MPM);
1862
1863 return MPM;
1864}
1865
1866ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
1867 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1868 ModulePassManager MPM;
1869
1870 // If we are invoking this without a summary index noting that we are linking
1871 // with a library containing the necessary APIs, remove any MemProf related
1872 // attributes and metadata.
1873 if (!ImportSummary || !ImportSummary->withSupportsHotColdNew())
1874 MPM.addPass(Pass: MemProfRemoveInfo());
1875
1876 if (ImportSummary) {
1877 // For ThinLTO we must apply the context disambiguation decisions early, to
1878 // ensure we can correctly match the callsites to summary data.
1879 if (EnableMemProfContextDisambiguation)
1880 MPM.addPass(Pass: MemProfContextDisambiguation(
1881 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1882
1883 // These passes import type identifier resolutions for whole-program
1884 // devirtualization and CFI. They must run early because other passes may
1885 // disturb the specific instruction patterns that these passes look for,
1886 // creating dependencies on resolutions that may not appear in the summary.
1887 //
1888 // For example, GVN may transform the pattern assume(type.test) appearing in
1889 // two basic blocks into assume(phi(type.test, type.test)), which would
1890 // transform a dependency on a WPD resolution into a dependency on a type
1891 // identifier resolution for CFI.
1892 //
1893 // Also, WPD has access to more precise information than ICP and can
1894 // devirtualize more effectively, so it should operate on the IR first.
1895 //
1896 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1897 // metadata and intrinsics.
1898 MPM.addPass(Pass: WholeProgramDevirtPass(nullptr, ImportSummary));
1899 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, ImportSummary));
1900 }
1901
1902 if (Level == OptimizationLevel::O0) {
1903 // Run a second time to clean up any type tests left behind by WPD for use
1904 // in ICP.
1905 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr,
1906 lowertypetests::DropTestKind::Assume));
1907 MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::ThinLTOPostLink));
1908
1909 // AllocToken transforms heap allocation calls; this needs to run late after
1910 // other allocation call transformations (such as those in InstCombine).
1911 MPM.addPass(Pass: AllocTokenPass());
1912
1913 // Drop available_externally and unreferenced globals. This is necessary
1914 // with ThinLTO in order to avoid leaving undefined references to dead
1915 // globals in the object file.
1916 MPM.addPass(Pass: EliminateAvailableExternallyPass());
1917 MPM.addPass(Pass: GlobalDCEPass());
1918 return MPM;
1919 }
1920 if (!UseCtxProfile.empty()) {
1921 MPM.addPass(
1922 Pass: buildModuleInlinerPipeline(Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink));
1923 } else {
1924 // Add the core simplification pipeline.
1925 MPM.addPass(Pass: buildModuleSimplificationPipeline(
1926 Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink));
1927 }
1928 // Now add the optimization pipeline.
1929 MPM.addPass(Pass: buildModuleOptimizationPipeline(
1930 Level, LTOPhase: ThinOrFullLTOPhase::ThinLTOPostLink));
1931
1932 // Emit annotation remarks.
1933 addAnnotationRemarksPass(MPM);
1934
1935 return MPM;
1936}
1937
1938ModulePassManager
1939PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1940 // FIXME: We should use a customized pre-link pipeline!
1941 return buildPerModuleDefaultPipeline(Level,
1942 Phase: ThinOrFullLTOPhase::FullLTOPreLink);
1943}
1944
1945ModulePassManager
1946PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
1947 ModuleSummaryIndex *ExportSummary) {
1948 ModulePassManager MPM;
1949
1950 invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level);
1951
1952 // If we are invoking this without a summary index noting that we are linking
1953 // with a library containing the necessary APIs, remove any MemProf related
1954 // attributes and metadata.
1955 if (!ExportSummary || !ExportSummary->withSupportsHotColdNew())
1956 MPM.addPass(Pass: MemProfRemoveInfo());
1957
1958 // Create a function that performs CFI checks for cross-DSO calls with targets
1959 // in the current module.
1960 MPM.addPass(Pass: CrossDSOCFIPass());
1961
1962 if (Level == OptimizationLevel::O0) {
1963 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1964 // metadata and intrinsics.
1965 MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr));
1966 MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr));
1967 // Run a second time to clean up any type tests left behind by WPD for use
1968 // in ICP.
1969 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr,
1970 lowertypetests::DropTestKind::Assume));
1971
1972 MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::FullLTOPostLink));
1973
1974 // AllocToken transforms heap allocation calls; this needs to run late after
1975 // other allocation call transformations (such as those in InstCombine).
1976 MPM.addPass(Pass: AllocTokenPass());
1977
1978 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1979
1980 // Emit annotation remarks.
1981 addAnnotationRemarksPass(MPM);
1982
1983 return MPM;
1984 }
1985
1986 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1987 // Load sample profile before running the LTO optimization pipeline.
1988 MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile,
1989 PGOOpt->ProfileRemappingFile,
1990 ThinOrFullLTOPhase::FullLTOPostLink));
1991 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1992 // RequireAnalysisPass for PSI before subsequent non-module passes.
1993 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1994 }
1995
1996 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1997 MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
1998
1999 // Remove unused virtual tables to improve the quality of code generated by
2000 // whole-program devirtualization and bitset lowering.
2001 MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true));
2002
2003 // Do basic inference of function attributes from known properties of system
2004 // libraries and other oracles.
2005 MPM.addPass(Pass: InferFunctionAttrsPass());
2006
2007 if (Level.getSpeedupLevel() > 1) {
2008 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2009 Pass: CallSiteSplittingPass(), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2010
2011 // Indirect call promotion. This should promote all the targets that are
2012 // left by the earlier promotion pass that promotes intra-module targets.
2013 // This two-step promotion is to save the compile time. For LTO, it should
2014 // produce the same result as if we only do promotion here.
2015 MPM.addPass(Pass: PGOIndirectCallPromotion(
2016 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2017
2018 // Promoting by-reference arguments to by-value exposes more constants to
2019 // IPSCCP.
2020 CGSCCPassManager CGPM;
2021 CGPM.addPass(Pass: PostOrderFunctionAttrsPass());
2022 CGPM.addPass(Pass: ArgumentPromotionPass());
2023 CGPM.addPass(
2024 Pass: createCGSCCToFunctionPassAdaptor(Pass: SROAPass(SROAOptions::ModifyCFG)));
2025 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2026
2027 // Propagate constants at call sites into the functions they call. This
2028 // opens opportunities for globalopt (and inlining) by substituting function
2029 // pointers passed as arguments to direct uses of functions.
2030 MPM.addPass(Pass: IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/true)));
2031
2032 // Attach metadata to indirect call sites indicating the set of functions
2033 // they may target at run-time. This should follow IPSCCP.
2034 MPM.addPass(Pass: CalledValuePropagationPass());
2035 }
2036
2037 // Do RPO function attribute inference across the module to forward-propagate
2038 // attributes where applicable.
2039 // FIXME: Is this really an optimization rather than a canonicalization?
2040 MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass());
2041
2042 // Use in-range annotations on GEP indices to split globals where beneficial.
2043 MPM.addPass(Pass: GlobalSplitPass());
2044
2045 // Run whole program optimization of virtual call when the list of callees
2046 // is fixed.
2047 MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr));
2048
2049 MPM.addPass(Pass: NoRecurseLTOInferencePass());
2050 // Stop here at -O1.
2051 if (Level == OptimizationLevel::O1) {
2052 // The LowerTypeTestsPass needs to run to lower type metadata and the
2053 // type.test intrinsics. The pass does nothing if CFI is disabled.
2054 MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr));
2055 // Run a second time to clean up any type tests left behind by WPD for use
2056 // in ICP (which is performed earlier than this in the regular LTO
2057 // pipeline).
2058 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr,
2059 lowertypetests::DropTestKind::Assume));
2060
2061 MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::FullLTOPostLink));
2062
2063 // AllocToken transforms heap allocation calls; this needs to run late after
2064 // other allocation call transformations (such as those in InstCombine).
2065 MPM.addPass(Pass: AllocTokenPass());
2066
2067 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
2068
2069 // Emit annotation remarks.
2070 addAnnotationRemarksPass(MPM);
2071
2072 return MPM;
2073 }
2074
2075 // TODO: Skip to match buildCoroWrapper.
2076 MPM.addPass(Pass: CoroEarlyPass());
2077
2078 // Optimize globals to try and fold them into constants.
2079 MPM.addPass(Pass: GlobalOptPass());
2080
2081 // Promote any localized globals to SSA registers.
2082 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: PromotePass()));
2083
2084 // Linking modules together can lead to duplicate global constant, only
2085 // keep one copy of each constant.
2086 MPM.addPass(Pass: ConstantMergePass());
2087
2088 // Remove unused arguments from functions.
2089 MPM.addPass(Pass: DeadArgumentEliminationPass());
2090
2091 // Reduce the code after globalopt and ipsccp. Both can open up significant
2092 // simplification opportunities, and both can propagate functions through
2093 // function pointers. When this happens, we often have to resolve varargs
2094 // calls, etc, so let instcombine do this.
2095 FunctionPassManager PeepholeFPM;
2096 PeepholeFPM.addPass(Pass: InstCombinePass());
2097 if (Level.getSpeedupLevel() > 1)
2098 PeepholeFPM.addPass(Pass: AggressiveInstCombinePass());
2099 invokePeepholeEPCallbacks(FPM&: PeepholeFPM, Level);
2100
2101 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(PeepholeFPM),
2102 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2103
2104 // Lower variadic functions for supported targets prior to inlining.
2105 MPM.addPass(Pass: ExpandVariadicsPass(ExpandVariadicsMode::Optimize));
2106
2107 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2108 // generally clean up exception handling overhead. It isn't clear this is
2109 // valuable as the inliner doesn't currently care whether it is inlining an
2110 // invoke or a call.
2111 // Run the inliner now.
2112 if (EnableModuleInliner) {
2113 MPM.addPass(Pass: ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
2114 UseInlineAdvisor,
2115 ThinOrFullLTOPhase::FullLTOPostLink));
2116 } else {
2117 MPM.addPass(Pass: ModuleInlinerWrapperPass(
2118 getInlineParamsFromOptLevel(Level),
2119 /* MandatoryFirst */ true,
2120 InlineContext{.LTOPhase: ThinOrFullLTOPhase::FullLTOPostLink,
2121 .Pass: InlinePass::CGSCCInliner}));
2122 }
2123
2124 // Perform context disambiguation after inlining, since that would reduce the
2125 // amount of additional cloning required to distinguish the allocation
2126 // contexts.
2127 if (EnableMemProfContextDisambiguation)
2128 MPM.addPass(Pass: MemProfContextDisambiguation(
2129 /*Summary=*/nullptr,
2130 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2131
2132 // Optimize globals again after we ran the inliner.
2133 MPM.addPass(Pass: GlobalOptPass());
2134
2135 // Run the OpenMPOpt pass again after global optimizations.
2136 MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
2137
2138 // Garbage collect dead functions.
2139 MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true));
2140
2141 // If we didn't decide to inline a function, check to see if we can
2142 // transform it to pass arguments by value instead of by reference.
2143 CGSCCPassManager CGPM;
2144 CGPM.addPass(Pass: ArgumentPromotionPass());
2145 CGPM.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0));
2146 CGPM.addPass(Pass: CoroAnnotationElidePass());
2147 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2148
2149 FunctionPassManager FPM;
2150 // The IPO Passes may leave cruft around. Clean up after them.
2151 FPM.addPass(Pass: InstCombinePass());
2152 invokePeepholeEPCallbacks(FPM, Level);
2153
2154 if (EnableConstraintElimination)
2155 FPM.addPass(Pass: ConstraintEliminationPass());
2156
2157 FPM.addPass(Pass: JumpThreadingPass());
2158
2159 // Do a post inline PGO instrumentation and use pass. This is a context
2160 // sensitive PGO pass.
2161 if (PGOOpt) {
2162 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2163 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2164 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
2165 ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
2166 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2167 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2168 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
2169 ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
2170 }
2171
2172 // Break up allocas
2173 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
2174
2175 // LTO provides additional opportunities for tailcall elimination due to
2176 // link-time inlining, and visibility of nocapture attribute.
2177 FPM.addPass(
2178 Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2179
2180 // Run a few AA driver optimizations here and now to cleanup the code.
2181 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM),
2182 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2183
2184 MPM.addPass(
2185 Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: PostOrderFunctionAttrsPass()));
2186
2187 // Require the GlobalsAA analysis for the module so we can query it within
2188 // MainFPM.
2189 if (EnableGlobalAnalyses) {
2190 MPM.addPass(Pass: RequireAnalysisPass<GlobalsAA, Module>());
2191 // Invalidate AAManager so it can be recreated and pick up the newly
2192 // available GlobalsAA.
2193 MPM.addPass(
2194 Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>()));
2195 }
2196
2197 FunctionPassManager MainFPM;
2198 MainFPM.addPass(Pass: createFunctionToLoopPassAdaptor(
2199 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2200 /*AllowSpeculation=*/true),
2201 /*USeMemorySSA=*/UseMemorySSA: true));
2202
2203 if (RunNewGVN)
2204 MainFPM.addPass(Pass: NewGVNPass());
2205 else
2206 MainFPM.addPass(Pass: GVNPass());
2207
2208 // Remove dead memcpy()'s.
2209 MainFPM.addPass(Pass: MemCpyOptPass());
2210
2211 // Nuke dead stores.
2212 MainFPM.addPass(Pass: DSEPass());
2213 MainFPM.addPass(Pass: MoveAutoInitPass());
2214 MainFPM.addPass(Pass: MergedLoadStoreMotionPass());
2215
2216 invokeVectorizerStartEPCallbacks(FPM&: MainFPM, Level);
2217
2218 LoopPassManager LPM;
2219 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2220 LPM.addPass(Pass: LoopFlattenPass());
2221 LPM.addPass(Pass: IndVarSimplifyPass());
2222 LPM.addPass(Pass: LoopDeletionPass());
2223 // FIXME: Add loop interchange.
2224
2225 // Unroll small loops and perform peeling.
2226 LPM.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(),
2227 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2228 PTO.ForgetAllSCEVInLoopUnroll));
2229 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2230 // *All* loop passes must preserve it, in order to be able to use it.
2231 MainFPM.addPass(
2232 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /*UseMemorySSA=*/false));
2233
2234 MainFPM.addPass(Pass: LoopDistributePass());
2235
2236 addVectorPasses(Level, FPM&: MainFPM, LTOPhase: ThinOrFullLTOPhase::FullLTOPostLink);
2237
2238 invokeVectorizerEndEPCallbacks(FPM&: MainFPM, Level);
2239
2240 // Run the OpenMPOpt CGSCC pass again late.
2241 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(
2242 Pass: OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink)));
2243
2244 invokePeepholeEPCallbacks(FPM&: MainFPM, Level);
2245 MainFPM.addPass(Pass: JumpThreadingPass());
2246 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(MainFPM),
2247 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2248
2249 // Lower type metadata and the type.test intrinsic. This pass supports
2250 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2251 // to be run at link time if CFI is enabled. This pass does nothing if
2252 // CFI is disabled.
2253 MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr));
2254 // Run a second time to clean up any type tests left behind by WPD for use
2255 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2256 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr,
2257 lowertypetests::DropTestKind::Assume));
2258
2259 // Enable splitting late in the FullLTO post-link pipeline.
2260 if (EnableHotColdSplit)
2261 MPM.addPass(Pass: HotColdSplittingPass());
2262
2263 // Add late LTO optimization passes.
2264 FunctionPassManager LateFPM;
2265
2266 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2267 // canonicalization pass that enables other optimizations. As a result,
2268 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2269 // result too early.
2270 LateFPM.addPass(Pass: LoopSinkPass());
2271
2272 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2273 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2274 // flattening of blocks.
2275 LateFPM.addPass(Pass: DivRemPairsPass());
2276
2277 // Delete basic blocks, which optimization passes may have killed.
2278 LateFPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions()
2279 .convertSwitchRangeToICmp(B: true)
2280 .convertSwitchToArithmetic(B: true)
2281 .hoistCommonInsts(B: true)
2282 .speculateUnpredictables(B: true)));
2283 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(LateFPM)));
2284
2285 // Drop bodies of available eternally objects to improve GlobalDCE.
2286 MPM.addPass(Pass: EliminateAvailableExternallyPass());
2287
2288 // Now that we have optimized the program, discard unreachable functions.
2289 MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true));
2290
2291 if (PTO.MergeFunctions)
2292 MPM.addPass(Pass: MergeFunctionsPass());
2293
2294 MPM.addPass(Pass: RelLookupTableConverterPass());
2295
2296 if (PTO.CallGraphProfile)
2297 MPM.addPass(Pass: CGProfilePass(/*InLTOPostLink=*/true));
2298
2299 MPM.addPass(Pass: CoroCleanupPass());
2300
2301 // AllocToken transforms heap allocation calls; this needs to run late after
2302 // other allocation call transformations (such as those in InstCombine).
2303 MPM.addPass(Pass: AllocTokenPass());
2304
2305 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
2306
2307 // Emit annotation remarks.
2308 addAnnotationRemarksPass(MPM);
2309
2310 return MPM;
2311}
2312
2313ModulePassManager
2314PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
2315 ThinOrFullLTOPhase Phase) {
2316 assert(Level == OptimizationLevel::O0 &&
2317 "buildO0DefaultPipeline should only be used with O0");
2318
2319 ModulePassManager MPM;
2320
2321 // Perform pseudo probe instrumentation in O0 mode. This is for the
2322 // consistency between different build modes. For example, a LTO build can be
2323 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2324 // the postlink will require pseudo probe instrumentation in the prelink.
2325 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2326 MPM.addPass(Pass: SampleProfileProbePass(TM));
2327
2328 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2329 PGOOpt->Action == PGOOptions::IRUse))
2330 addPGOInstrPassesForO0(
2331 MPM,
2332 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2333 /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, ProfileFile: PGOOpt->ProfileFile,
2334 ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
2335
2336 // Instrument function entry and exit before all inlining.
2337 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2338 Pass: EntryExitInstrumenterPass(/*PostInlining=*/false)));
2339
2340 invokePipelineStartEPCallbacks(MPM, Level);
2341
2342 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2343 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass()));
2344
2345 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2346 // Explicitly disable sample loader inlining and use flattened profile in O0
2347 // pipeline.
2348 MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile,
2349 PGOOpt->ProfileRemappingFile,
2350 ThinOrFullLTOPhase::None, FS,
2351 /*DisableSampleProfileInlining=*/true,
2352 /*UseFlattenedProfile=*/true));
2353 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2354 // RequireAnalysisPass for PSI before subsequent non-module passes.
2355 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
2356 }
2357
2358 invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase);
2359
2360 // Build a minimal pipeline based on the semantics required by LLVM,
2361 // which is just that always inlining occurs. Further, disable generating
2362 // lifetime intrinsics to avoid enabling further optimizations during
2363 // code generation.
2364 MPM.addPass(Pass: AlwaysInlinerPass(
2365 /*InsertLifetimeIntrinsics=*/false));
2366
2367 if (PTO.MergeFunctions)
2368 MPM.addPass(Pass: MergeFunctionsPass());
2369
2370 if (EnableMatrix)
2371 MPM.addPass(
2372 Pass: createModuleToFunctionPassAdaptor(Pass: LowerMatrixIntrinsicsPass(true)));
2373
2374 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2375 CGSCCPassManager CGPM;
2376 invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level);
2377 if (!CGPM.isEmpty())
2378 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2379 }
2380 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2381 LoopPassManager LPM;
2382 invokeLateLoopOptimizationsEPCallbacks(LPM, Level);
2383 if (!LPM.isEmpty()) {
2384 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2385 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM))));
2386 }
2387 }
2388 if (!LoopOptimizerEndEPCallbacks.empty()) {
2389 LoopPassManager LPM;
2390 invokeLoopOptimizerEndEPCallbacks(LPM, Level);
2391 if (!LPM.isEmpty()) {
2392 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2393 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM))));
2394 }
2395 }
2396 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2397 FunctionPassManager FPM;
2398 invokeScalarOptimizerLateEPCallbacks(FPM, Level);
2399 if (!FPM.isEmpty())
2400 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2401 }
2402
2403 invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase);
2404
2405 if (!VectorizerStartEPCallbacks.empty()) {
2406 FunctionPassManager FPM;
2407 invokeVectorizerStartEPCallbacks(FPM, Level);
2408 if (!FPM.isEmpty())
2409 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2410 }
2411
2412 if (!VectorizerEndEPCallbacks.empty()) {
2413 FunctionPassManager FPM;
2414 invokeVectorizerEndEPCallbacks(FPM, Level);
2415 if (!FPM.isEmpty())
2416 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2417 }
2418
2419 MPM.addPass(Pass: buildCoroWrapper(Phase));
2420
2421 // AllocToken transforms heap allocation calls; this needs to run late after
2422 // other allocation call transformations (such as those in InstCombine).
2423 if (!isLTOPreLink(Phase))
2424 MPM.addPass(Pass: AllocTokenPass());
2425
2426 invokeOptimizerLastEPCallbacks(MPM, Level, Phase);
2427
2428 if (isLTOPreLink(Phase))
2429 addRequiredLTOPreLinkPasses(MPM);
2430
2431 // Emit annotation remarks.
2432 addAnnotationRemarksPass(MPM);
2433
2434 return MPM;
2435}
2436
2437AAManager PassBuilder::buildDefaultAAPipeline() {
2438 AAManager AA;
2439
2440 // The order in which these are registered determines their priority when
2441 // being queried.
2442
2443 // Add any target-specific alias analyses that should be run early.
2444 if (TM)
2445 TM->registerEarlyDefaultAliasAnalyses(AA);
2446
2447 // First we register the basic alias analysis that provides the majority of
2448 // per-function local AA logic. This is a stateless, on-demand local set of
2449 // AA techniques.
2450 AA.registerFunctionAnalysis<BasicAA>();
2451
2452 // Next we query fast, specialized alias analyses that wrap IR-embedded
2453 // information about aliasing.
2454 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2455 AA.registerFunctionAnalysis<TypeBasedAA>();
2456
2457 // Add support for querying global aliasing information when available.
2458 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2459 // analysis, all that the `AAManager` can do is query for any *cached*
2460 // results from `GlobalsAA` through a readonly proxy.
2461 if (EnableGlobalAnalyses)
2462 AA.registerModuleAnalysis<GlobalsAA>();
2463
2464 // Add target-specific alias analyses.
2465 if (TM)
2466 TM->registerDefaultAliasAnalyses(AA);
2467
2468 return AA;
2469}
2470
2471bool PassBuilder::isInstrumentedPGOUse() const {
2472 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2473 !UseCtxProfile.empty();
2474}
2475