1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
18#include "llvm/Analysis/AliasAnalysis.h"
19#include "llvm/Analysis/BasicAliasAnalysis.h"
20#include "llvm/Analysis/CGSCCPassManager.h"
21#include "llvm/Analysis/CtxProfAnalysis.h"
22#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
23#include "llvm/Analysis/GlobalsModRef.h"
24#include "llvm/Analysis/InlineAdvisor.h"
25#include "llvm/Analysis/InstCount.h"
26#include "llvm/Analysis/ProfileSummaryInfo.h"
27#include "llvm/Analysis/ScopedNoAliasAA.h"
28#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
29#include "llvm/IR/PassManager.h"
30#include "llvm/Pass.h"
31#include "llvm/Passes/OptimizationLevel.h"
32#include "llvm/Passes/PassBuilder.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/PGOOptions.h"
36#include "llvm/Support/VirtualFileSystem.h"
37#include "llvm/Target/TargetMachine.h"
38#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
39#include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"
40#include "llvm/Transforms/Coroutines/CoroCleanup.h"
41#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
42#include "llvm/Transforms/Coroutines/CoroEarly.h"
43#include "llvm/Transforms/Coroutines/CoroElide.h"
44#include "llvm/Transforms/Coroutines/CoroSplit.h"
45#include "llvm/Transforms/HipStdPar/HipStdPar.h"
46#include "llvm/Transforms/IPO/AlwaysInliner.h"
47#include "llvm/Transforms/IPO/Annotation2Metadata.h"
48#include "llvm/Transforms/IPO/ArgumentPromotion.h"
49#include "llvm/Transforms/IPO/Attributor.h"
50#include "llvm/Transforms/IPO/CalledValuePropagation.h"
51#include "llvm/Transforms/IPO/ConstantMerge.h"
52#include "llvm/Transforms/IPO/CrossDSOCFI.h"
53#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
54#include "llvm/Transforms/IPO/ElimAvailExtern.h"
55#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
56#include "llvm/Transforms/IPO/ExpandVariadics.h"
57#include "llvm/Transforms/IPO/FatLTOCleanup.h"
58#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
59#include "llvm/Transforms/IPO/FunctionAttrs.h"
60#include "llvm/Transforms/IPO/GlobalDCE.h"
61#include "llvm/Transforms/IPO/GlobalOpt.h"
62#include "llvm/Transforms/IPO/GlobalSplit.h"
63#include "llvm/Transforms/IPO/HotColdSplitting.h"
64#include "llvm/Transforms/IPO/IROutliner.h"
65#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
66#include "llvm/Transforms/IPO/Inliner.h"
67#include "llvm/Transforms/IPO/Instrumentor.h"
68#include "llvm/Transforms/IPO/LowerTypeTests.h"
69#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
70#include "llvm/Transforms/IPO/MergeFunctions.h"
71#include "llvm/Transforms/IPO/ModuleInliner.h"
72#include "llvm/Transforms/IPO/OpenMPOpt.h"
73#include "llvm/Transforms/IPO/PartialInlining.h"
74#include "llvm/Transforms/IPO/SCCP.h"
75#include "llvm/Transforms/IPO/SampleProfile.h"
76#include "llvm/Transforms/IPO/SampleProfileProbe.h"
77#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
78#include "llvm/Transforms/InstCombine/InstCombine.h"
79#include "llvm/Transforms/Instrumentation/AllocToken.h"
80#include "llvm/Transforms/Instrumentation/CGProfile.h"
81#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
82#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
83#include "llvm/Transforms/Instrumentation/MemProfInstrumentation.h"
84#include "llvm/Transforms/Instrumentation/MemProfUse.h"
85#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
86#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
87#include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
88#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
89#include "llvm/Transforms/Scalar/ADCE.h"
90#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
91#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
92#include "llvm/Transforms/Scalar/BDCE.h"
93#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
94#include "llvm/Transforms/Scalar/ConstraintElimination.h"
95#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
96#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
97#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
98#include "llvm/Transforms/Scalar/DivRemPairs.h"
99#include "llvm/Transforms/Scalar/DropUnnecessaryAssumes.h"
100#include "llvm/Transforms/Scalar/EarlyCSE.h"
101#include "llvm/Transforms/Scalar/ExpandMemCmp.h"
102#include "llvm/Transforms/Scalar/Float2Int.h"
103#include "llvm/Transforms/Scalar/GVN.h"
104#include "llvm/Transforms/Scalar/IndVarSimplify.h"
105#include "llvm/Transforms/Scalar/InferAlignment.h"
106#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
107#include "llvm/Transforms/Scalar/JumpTableToSwitch.h"
108#include "llvm/Transforms/Scalar/JumpThreading.h"
109#include "llvm/Transforms/Scalar/LICM.h"
110#include "llvm/Transforms/Scalar/LoopDeletion.h"
111#include "llvm/Transforms/Scalar/LoopDistribute.h"
112#include "llvm/Transforms/Scalar/LoopFlatten.h"
113#include "llvm/Transforms/Scalar/LoopFuse.h"
114#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
115#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
116#include "llvm/Transforms/Scalar/LoopInterchange.h"
117#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
118#include "llvm/Transforms/Scalar/LoopPassManager.h"
119#include "llvm/Transforms/Scalar/LoopRotation.h"
120#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
121#include "llvm/Transforms/Scalar/LoopSink.h"
122#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
123#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
124#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
125#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
126#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
127#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
128#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
129#include "llvm/Transforms/Scalar/MergeICmps.h"
130#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
131#include "llvm/Transforms/Scalar/NewGVN.h"
132#include "llvm/Transforms/Scalar/Reassociate.h"
133#include "llvm/Transforms/Scalar/SCCP.h"
134#include "llvm/Transforms/Scalar/SROA.h"
135#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
136#include "llvm/Transforms/Scalar/SimplifyCFG.h"
137#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
138#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
139#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
140#include "llvm/Transforms/Utils/AddDiscriminators.h"
141#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
142#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
143#include "llvm/Transforms/Utils/CountVisits.h"
144#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
145#include "llvm/Transforms/Utils/ExtraPassManager.h"
146#include "llvm/Transforms/Utils/InjectTLIMappings.h"
147#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
148#include "llvm/Transforms/Utils/LowerCommentStringPass.h"
149#include "llvm/Transforms/Utils/Mem2Reg.h"
150#include "llvm/Transforms/Utils/MoveAutoInit.h"
151#include "llvm/Transforms/Utils/NameAnonGlobals.h"
152#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
153#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
154#include "llvm/Transforms/Utils/TriggerCrashPass.h"
155#include "llvm/Transforms/Vectorize/LoopVectorize.h"
156#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
157#include "llvm/Transforms/Vectorize/VectorCombine.h"
158
159using namespace llvm;
160
161namespace llvm {
162
163static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
164 "enable-ml-inliner", cl::init(Val: InliningAdvisorMode::Default), cl::Hidden,
165 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
166 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
167 "Heuristics-based inliner version"),
168 clEnumValN(InliningAdvisorMode::Development, "development",
169 "Use development mode (runtime-loadable model)"),
170 clEnumValN(InliningAdvisorMode::Release, "release",
171 "Use release mode (AOT-compiled model)")));
172
173/// Flag to enable inline deferral during PGO.
174static cl::opt<bool>
175 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(Val: true),
176 cl::Hidden,
177 cl::desc("Enable inline deferral during PGO"));
178
179static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
180 cl::init(Val: false), cl::Hidden,
181 cl::desc("Enable module inliner"));
182
183static cl::opt<bool> PerformMandatoryInliningsFirst(
184 "mandatory-inlining-first", cl::init(Val: false), cl::Hidden,
185 cl::desc("Perform mandatory inlinings module-wide, before performing "
186 "inlining"));
187
188static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
189 "eagerly-invalidate-analyses", cl::init(Val: true), cl::Hidden,
190 cl::desc("Eagerly invalidate more analyses in default pipelines"));
191
192static cl::opt<bool> EnableMergeFunctions(
193 "enable-merge-functions", cl::init(Val: false), cl::Hidden,
194 cl::desc("Enable function merging as part of the optimization pipeline"));
195
196static cl::opt<bool> EnablePostPGOLoopRotation(
197 "enable-post-pgo-loop-rotation", cl::init(Val: true), cl::Hidden,
198 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
199
200static cl::opt<bool>
201 TriggerCrash("opt-pipeline-trigger-crash", cl::init(Val: false), cl::Hidden,
202 cl::desc("Trigger crash in optimization pipeline"));
203
204static cl::opt<bool> EnableGlobalAnalyses(
205 "enable-global-analyses", cl::init(Val: true), cl::Hidden,
206 cl::desc("Enable inter-procedural analyses"));
207
208static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
209 cl::init(Val: false), cl::Hidden,
210 cl::desc("Run Partial inlining pass"));
211
212static cl::opt<bool> ExtraVectorizerPasses(
213 "extra-vectorizer-passes", cl::init(Val: false), cl::Hidden,
214 cl::desc("Run cleanup optimization passes after vectorization"));
215
216static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(Val: false), cl::Hidden,
217 cl::desc("Run the NewGVN pass"));
218
219static cl::opt<bool>
220 EnableLoopInterchange("enable-loopinterchange", cl::init(Val: true), cl::Hidden,
221 cl::desc("Enable the LoopInterchange Pass"));
222
223static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
224 cl::init(Val: false), cl::Hidden,
225 cl::desc("Enable Unroll And Jam Pass"));
226
227static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(Val: false),
228 cl::Hidden,
229 cl::desc("Enable the LoopFlatten Pass"));
230
231static cl::opt<bool>
232 EnableInstrumentor("enable-instrumentor", cl::init(Val: false), cl::Hidden,
233 cl::desc("Enable the Instrumentor Pass"));
234
235static cl::opt<bool>
236 EnableDFAJumpThreading("enable-dfa-jump-thread",
237 cl::desc("Enable DFA jump threading"),
238 cl::init(Val: true), cl::Hidden);
239
240static cl::opt<bool>
241 EnableHotColdSplit("hot-cold-split",
242 cl::desc("Enable hot-cold splitting pass"));
243
244static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(Val: false),
245 cl::Hidden,
246 cl::desc("Enable ir outliner pass"));
247
248static cl::opt<bool>
249 DisablePreInliner("disable-preinline", cl::init(Val: false), cl::Hidden,
250 cl::desc("Disable pre-instrumentation inliner"));
251
252static cl::opt<int> PreInlineThreshold(
253 "preinline-threshold", cl::Hidden, cl::init(Val: 75),
254 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
255 "(default = 75)"));
256
257static cl::opt<bool>
258 EnableGVNHoist("enable-gvn-hoist",
259 cl::desc("Enable the GVN hoisting pass (default = off)"));
260
261static cl::opt<bool>
262 EnableGVNSink("enable-gvn-sink",
263 cl::desc("Enable the GVN sinking pass (default = off)"));
264
265static cl::opt<bool> EnableJumpTableToSwitch(
266 "enable-jump-table-to-switch", cl::init(Val: true),
267 cl::desc("Enable JumpTableToSwitch pass (default = true)"));
268
269// This option is used in simplifying testing SampleFDO optimizations for
270// profile loading.
271static cl::opt<bool>
272 EnableCHR("enable-chr", cl::init(Val: true), cl::Hidden,
273 cl::desc("Enable control height reduction optimization (CHR)"));
274
275static cl::opt<bool> FlattenedProfileUsed(
276 "flattened-profile-used", cl::init(Val: false), cl::Hidden,
277 cl::desc("Indicate the sample profile being used is flattened, i.e., "
278 "no inline hierarchy exists in the profile"));
279
280static cl::opt<bool>
281 EnableMatrix("enable-matrix", cl::init(Val: false), cl::Hidden,
282 cl::desc("Enable lowering of the matrix intrinsics"));
283
284static cl::opt<bool> EnableMergeICmps(
285 "enable-mergeicmps", cl::init(Val: true), cl::Hidden,
286 cl::desc("Enable MergeICmps pass in the optimization pipeline"));
287
288static cl::opt<bool> EnableConstraintElimination(
289 "enable-constraint-elimination", cl::init(Val: true), cl::Hidden,
290 cl::desc(
291 "Enable pass to eliminate conditions based on linear constraints"));
292
293static cl::opt<AttributorRunOption> AttributorRun(
294 "attributor-enable", cl::Hidden, cl::init(Val: AttributorRunOption::NONE),
295 cl::desc("Enable the attributor inter-procedural deduction pass"),
296 cl::values(clEnumValN(AttributorRunOption::FULL, "full",
297 "enable all full attributor runs"),
298 clEnumValN(AttributorRunOption::LIGHT, "light",
299 "enable all attributor-light runs"),
300 clEnumValN(AttributorRunOption::MODULE, "module",
301 "enable module-wide attributor runs"),
302 clEnumValN(AttributorRunOption::MODULE_LIGHT, "module-light",
303 "enable module-wide attributor-light runs"),
304 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
305 "enable call graph SCC attributor runs"),
306 clEnumValN(AttributorRunOption::CGSCC_LIGHT, "cgscc-light",
307 "enable call graph SCC attributor-light runs"),
308 clEnumValN(AttributorRunOption::NONE, "none",
309 "disable attributor runs")));
310
311static cl::opt<bool> EnableSampledInstr(
312 "enable-sampled-instrumentation", cl::init(Val: false), cl::Hidden,
313 cl::desc("Enable profile instrumentation sampling (default = off)"));
314static cl::opt<bool> UseLoopVersioningLICM(
315 "enable-loop-versioning-licm", cl::init(Val: false), cl::Hidden,
316 cl::desc("Enable the experimental Loop Versioning LICM pass"));
317
318static cl::opt<std::string> InstrumentColdFuncOnlyPath(
319 "instrument-cold-function-only-path", cl::init(Val: ""),
320 cl::desc("File path for cold function only instrumentation(requires use "
321 "with --pgo-instrument-cold-function-only)"),
322 cl::Hidden);
323
324// TODO: There is a similar flag in WPD pass, we should consolidate them by
325// parsing the option only once in PassBuilder and share it across both places.
326static cl::opt<bool> EnableDevirtualizeSpeculatively(
327 "enable-devirtualize-speculatively",
328 cl::desc("Enable speculative devirtualization optimization"),
329 cl::init(Val: false));
330
331extern cl::opt<std::string> UseCtxProfile;
332extern cl::opt<bool> PGOInstrumentColdFunctionOnly;
333
334extern cl::opt<bool> EnableMemProfContextDisambiguation;
335} // namespace llvm
336
337PipelineTuningOptions::PipelineTuningOptions() {
338 LoopInterleaving = true;
339 LoopVectorization = true;
340 SLPVectorization = false;
341 LoopUnrolling = true;
342 LoopInterchange = EnableLoopInterchange;
343 LoopFusion = false;
344 ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
345 LicmMssaOptCap = SetLicmMssaOptCap;
346 LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
347 CallGraphProfile = true;
348 UnifiedLTO = false;
349 MergeFunctions = EnableMergeFunctions;
350 InlinerThreshold = -1;
351 EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
352 DevirtualizeSpeculatively = EnableDevirtualizeSpeculatively;
353}
354
355namespace llvm {
356extern cl::opt<unsigned> MaxDevirtIterations;
357} // namespace llvm
358
359void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
360 OptimizationLevel Level) {
361 for (auto &C : PeepholeEPCallbacks)
362 C(FPM, Level);
363}
364void PassBuilder::invokeLateLoopOptimizationsEPCallbacks(
365 LoopPassManager &LPM, OptimizationLevel Level) {
366 for (auto &C : LateLoopOptimizationsEPCallbacks)
367 C(LPM, Level);
368}
369void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM,
370 OptimizationLevel Level) {
371 for (auto &C : LoopOptimizerEndEPCallbacks)
372 C(LPM, Level);
373}
374void PassBuilder::invokeScalarOptimizerLateEPCallbacks(
375 FunctionPassManager &FPM, OptimizationLevel Level) {
376 for (auto &C : ScalarOptimizerLateEPCallbacks)
377 C(FPM, Level);
378}
379void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM,
380 OptimizationLevel Level) {
381 for (auto &C : CGSCCOptimizerLateEPCallbacks)
382 C(CGPM, Level);
383}
384void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
385 OptimizationLevel Level) {
386 for (auto &C : VectorizerStartEPCallbacks)
387 C(FPM, Level);
388}
389void PassBuilder::invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM,
390 OptimizationLevel Level) {
391 for (auto &C : VectorizerEndEPCallbacks)
392 C(FPM, Level);
393}
394void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
395 OptimizationLevel Level,
396 ThinOrFullLTOPhase Phase) {
397 for (auto &C : OptimizerEarlyEPCallbacks)
398 C(MPM, Level, Phase);
399}
400void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
401 OptimizationLevel Level,
402 ThinOrFullLTOPhase Phase) {
403 for (auto &C : OptimizerLastEPCallbacks)
404 C(MPM, Level, Phase);
405}
406void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks(
407 ModulePassManager &MPM, OptimizationLevel Level) {
408 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
409 C(MPM, Level);
410}
411void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks(
412 ModulePassManager &MPM, OptimizationLevel Level) {
413 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
414 C(MPM, Level);
415}
416void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM,
417 OptimizationLevel Level) {
418 for (auto &C : PipelineStartEPCallbacks)
419 C(MPM, Level);
420}
421void PassBuilder::invokePipelineEarlySimplificationEPCallbacks(
422 ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase) {
423 for (auto &C : PipelineEarlySimplificationEPCallbacks)
424 C(MPM, Level, Phase);
425}
426
427// Get IR stats with InstCount before/after the optimization pipeline
428static void instructionCountersPass(ModulePassManager &MPM,
429 bool IsPreOptimization) {
430 if (AreStatisticsEnabled()) {
431 MPM.addPass(
432 Pass: createModuleToFunctionPassAdaptor(Pass: InstCountPass(IsPreOptimization)));
433 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
434 Pass: FunctionPropertiesStatisticsPass(IsPreOptimization)));
435 }
436}
437
438// Helper to add AnnotationRemarksPass.
439static void addAnnotationRemarksPass(ModulePassManager &MPM) {
440 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AnnotationRemarksPass()));
441}
442
443// Helper to check if the current compilation phase is preparing for LTO
444static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
445 return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
446 Phase == ThinOrFullLTOPhase::FullLTOPreLink;
447}
448
449// Helper to check if the current compilation phase is preparing for FullLTO
450[[maybe_unused]] static bool isFullLTOPreLink(ThinOrFullLTOPhase Phase) {
451 return Phase == ThinOrFullLTOPhase::FullLTOPreLink;
452}
453
454// Helper to check if the current compilation phase is preparing for ThinLTO
455static bool isThinLTOPreLink(ThinOrFullLTOPhase Phase) {
456 return Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
457}
458
459// Helper to check if the current compilation phase is LTO backend
460static bool isLTOPostLink(ThinOrFullLTOPhase Phase) {
461 return Phase == ThinOrFullLTOPhase::ThinLTOPostLink ||
462 Phase == ThinOrFullLTOPhase::FullLTOPostLink;
463}
464
465// Helper to check if the current compilation phase is FullLTO backend
466static bool isFullLTOPostLink(ThinOrFullLTOPhase Phase) {
467 return Phase == ThinOrFullLTOPhase::FullLTOPostLink;
468}
469
470// Helper to check if the current compilation phase is ThinLTO backend
471static bool isThinLTOPostLink(ThinOrFullLTOPhase Phase) {
472 return Phase == ThinOrFullLTOPhase::ThinLTOPostLink;
473}
474
475// Helper to wrap conditionally Coro passes.
476static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase) {
477 // TODO: Skip passes according to Phase.
478 ModulePassManager CoroPM;
479 CoroPM.addPass(Pass: CoroEarlyPass());
480 CGSCCPassManager CGPM;
481 CGPM.addPass(Pass: CoroSplitPass());
482 CoroPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
483 CoroPM.addPass(Pass: CoroCleanupPass());
484 CoroPM.addPass(Pass: GlobalDCEPass());
485 return CoroConditionalWrapper(std::move(CoroPM));
486}
487
488// TODO: Investigate the cost/benefit of tail call elimination on debugging.
489FunctionPassManager
490PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
491 ThinOrFullLTOPhase Phase) {
492
493 FunctionPassManager FPM;
494
495 if (AreStatisticsEnabled())
496 FPM.addPass(Pass: CountVisitsPass());
497
498 // Form SSA out of local memory accesses after breaking apart aggregates into
499 // scalars.
500 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
501
502 // Catch trivial redundancies
503 FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */));
504
505 // Hoisting of scalars and load expressions.
506 FPM.addPass(
507 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
508 FPM.addPass(Pass: InstCombinePass());
509
510 FPM.addPass(Pass: LibCallsShrinkWrapPass());
511
512 invokePeepholeEPCallbacks(FPM, Level);
513
514 FPM.addPass(
515 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
516
517 // Form canonically associated expression trees, and simplify the trees using
518 // basic mathematical properties. For example, this will form (nearly)
519 // minimal multiplication trees.
520 FPM.addPass(Pass: ReassociatePass());
521
522 // Add the primary loop simplification pipeline.
523 // FIXME: Currently this is split into two loop pass pipelines because we run
524 // some function passes in between them. These can and should be removed
525 // and/or replaced by scheduling the loop pass equivalents in the correct
526 // positions. But those equivalent passes aren't powerful enough yet.
527 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
528 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
529 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
530 // `LoopInstSimplify`.
531 LoopPassManager LPM1, LPM2;
532
533 // Simplify the loop body. We do this initially to clean up after other loop
534 // passes run, either when iterating on a loop or on inner loops with
535 // implications on the outer loop.
536 LPM1.addPass(Pass: LoopInstSimplifyPass());
537 LPM1.addPass(Pass: LoopSimplifyCFGPass());
538
539 // Try to remove as much code from the loop header as possible,
540 // to reduce amount of IR that will have to be duplicated. However,
541 // do not perform speculative hoisting the first time as LICM
542 // will destroy metadata that may not need to be destroyed if run
543 // after loop rotation.
544 // TODO: Investigate promotion cap for O1.
545 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
546 /*AllowSpeculation=*/false));
547
548 LPM1.addPass(
549 Pass: LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
550 // TODO: Investigate promotion cap for O1.
551 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
552 /*AllowSpeculation=*/true));
553 LPM1.addPass(Pass: SimpleLoopUnswitchPass());
554 if (EnableLoopFlatten)
555 LPM1.addPass(Pass: LoopFlattenPass());
556
557 LPM2.addPass(Pass: LoopIdiomRecognizePass());
558 LPM2.addPass(Pass: IndVarSimplifyPass());
559
560 invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level);
561
562 LPM2.addPass(Pass: LoopDeletionPass());
563
564 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
565 // because it changes IR to makes profile annotation in back compile
566 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
567 // attributes so we need to make sure and allow the full unroll pass to pay
568 // attention to it.
569 if (!isThinLTOPreLink(Phase) || !PGOOpt ||
570 PGOOpt->Action != PGOOptions::SampleUse)
571 LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(),
572 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
573 PTO.ForgetAllSCEVInLoopUnroll));
574
575 invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level);
576
577 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1),
578 /*UseMemorySSA=*/true));
579 FPM.addPass(
580 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
581 FPM.addPass(Pass: InstCombinePass());
582 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
583 // *All* loop passes must preserve it, in order to be able to use it.
584 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2),
585 /*UseMemorySSA=*/false));
586
587 // Delete small array after loop unroll.
588 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
589
590 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
591 FPM.addPass(Pass: MemCpyOptPass());
592
593 // Sparse conditional constant propagation.
594 // FIXME: It isn't clear why we do this *after* loop passes rather than
595 // before...
596 FPM.addPass(Pass: SCCPPass());
597
598 // Delete dead bit computations (instcombine runs after to fold away the dead
599 // computations, and then ADCE will run later to exploit any new DCE
600 // opportunities that creates).
601 FPM.addPass(Pass: BDCEPass());
602
603 // Run instcombine after redundancy and dead bit elimination to exploit
604 // opportunities opened up by them.
605 FPM.addPass(Pass: InstCombinePass());
606 invokePeepholeEPCallbacks(FPM, Level);
607
608 FPM.addPass(Pass: CoroElidePass());
609
610 invokeScalarOptimizerLateEPCallbacks(FPM, Level);
611
612 // Finally, do an expensive DCE pass to catch all the dead code exposed by
613 // the simplifications and basic cleanup after all the simplifications.
614 // TODO: Investigate if this is too expensive.
615 FPM.addPass(Pass: ADCEPass());
616 FPM.addPass(
617 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
618 FPM.addPass(Pass: InstCombinePass());
619 invokePeepholeEPCallbacks(FPM, Level);
620
621 return FPM;
622}
623
624FunctionPassManager
625PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
626 ThinOrFullLTOPhase Phase) {
627 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
628
629 // The O1 pipeline has a separate pipeline creation function to simplify
630 // construction readability.
631 if (Level.getSpeedupLevel() == 1)
632 return buildO1FunctionSimplificationPipeline(Level, Phase);
633
634 FunctionPassManager FPM;
635
636 if (AreStatisticsEnabled())
637 FPM.addPass(Pass: CountVisitsPass());
638
639 // Form SSA out of local memory accesses after breaking apart aggregates into
640 // scalars.
641 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
642
643 // Catch trivial redundancies
644 FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */));
645 if (EnableKnowledgeRetention)
646 FPM.addPass(Pass: AssumeSimplifyPass());
647
648 // Hoisting of scalars and load expressions.
649 if (EnableGVNHoist)
650 FPM.addPass(Pass: GVNHoistPass());
651
652 // Global value numbering based sinking.
653 if (EnableGVNSink) {
654 FPM.addPass(Pass: GVNSinkPass());
655 FPM.addPass(
656 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
657 }
658
659 // Speculative execution if the target has divergent branches; otherwise nop.
660 FPM.addPass(Pass: SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
661
662 // Optimize based on known information about branches, and cleanup afterward.
663 FPM.addPass(Pass: JumpThreadingPass());
664 FPM.addPass(Pass: CorrelatedValuePropagationPass());
665
666 // Jump table to switch conversion.
667 if (EnableJumpTableToSwitch)
668 FPM.addPass(Pass: JumpTableToSwitchPass(/*InLTO=*/isLTOPostLink(Phase)));
669
670 FPM.addPass(
671 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
672 FPM.addPass(Pass: InstCombinePass());
673 FPM.addPass(Pass: AggressiveInstCombinePass());
674 FPM.addPass(Pass: LibCallsShrinkWrapPass());
675
676 invokePeepholeEPCallbacks(FPM, Level);
677
678 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
679 // using the size value profile. Don't perform this when optimizing for size.
680 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse)
681 FPM.addPass(Pass: PGOMemOPSizeOpt());
682
683 FPM.addPass(Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/
684 isInstrumentedPGOUse()));
685 FPM.addPass(
686 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
687
688 // Form canonically associated expression trees, and simplify the trees using
689 // basic mathematical properties. For example, this will form (nearly)
690 // minimal multiplication trees.
691 FPM.addPass(Pass: ReassociatePass());
692
693 if (EnableConstraintElimination)
694 FPM.addPass(Pass: ConstraintEliminationPass());
695
696 // Add the primary loop simplification pipeline.
697 // FIXME: Currently this is split into two loop pass pipelines because we run
698 // some function passes in between them. These can and should be removed
699 // and/or replaced by scheduling the loop pass equivalents in the correct
700 // positions. But those equivalent passes aren't powerful enough yet.
701 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
702 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
703 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
704 // `LoopInstSimplify`.
705 LoopPassManager LPM1, LPM2;
706
707 // Simplify the loop body. We do this initially to clean up after other loop
708 // passes run, either when iterating on a loop or on inner loops with
709 // implications on the outer loop.
710 LPM1.addPass(Pass: LoopInstSimplifyPass());
711 LPM1.addPass(Pass: LoopSimplifyCFGPass());
712
713 // Try to remove as much code from the loop header as possible,
714 // to reduce amount of IR that will have to be duplicated. However,
715 // do not perform speculative hoisting the first time as LICM
716 // will destroy metadata that may not need to be destroyed if run
717 // after loop rotation.
718 // TODO: Investigate promotion cap for O1.
719 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
720 /*AllowSpeculation=*/false));
721
722 LPM1.addPass(
723 Pass: LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
724 // TODO: Investigate promotion cap for O1.
725 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
726 /*AllowSpeculation=*/true));
727 LPM1.addPass(
728 Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
729 if (EnableLoopFlatten)
730 LPM1.addPass(Pass: LoopFlattenPass());
731
732 LPM2.addPass(Pass: LoopIdiomRecognizePass());
733 LPM2.addPass(Pass: IndVarSimplifyPass());
734
735 {
736 ExtraLoopPassManager<ShouldRunExtraSimpleLoopUnswitch> ExtraPasses;
737 ExtraPasses.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
738 OptimizationLevel::O3));
739 LPM2.addPass(Pass: std::move(ExtraPasses));
740 }
741
742 invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level);
743
744 LPM2.addPass(Pass: LoopDeletionPass());
745
746 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
747 // because it changes IR to makes profile annotation in back compile
748 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
749 // attributes so we need to make sure and allow the full unroll pass to pay
750 // attention to it.
751 if (!isThinLTOPreLink(Phase) || !PGOOpt ||
752 PGOOpt->Action != PGOOptions::SampleUse)
753 LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(),
754 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
755 PTO.ForgetAllSCEVInLoopUnroll));
756
757 invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level);
758
759 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1),
760 /*UseMemorySSA=*/true));
761 FPM.addPass(
762 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
763 FPM.addPass(Pass: InstCombinePass());
764 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
765 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
766 // *All* loop passes must preserve it, in order to be able to use it.
767 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2),
768 /*UseMemorySSA=*/false));
769
770 // Delete small array after loop unroll.
771 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
772
773 // Try vectorization/scalarization transforms that are both improvements
774 // themselves and can allow further folds with GVN and InstCombine.
775 FPM.addPass(Pass: VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
776
777 // Eliminate redundancies.
778 FPM.addPass(Pass: MergedLoadStoreMotionPass());
779 if (RunNewGVN)
780 FPM.addPass(Pass: NewGVNPass());
781 else
782 FPM.addPass(Pass: GVNPass());
783
784 // Sparse conditional constant propagation.
785 // FIXME: It isn't clear why we do this *after* loop passes rather than
786 // before...
787 FPM.addPass(Pass: SCCPPass());
788
789 // Delete dead bit computations (instcombine runs after to fold away the dead
790 // computations, and then ADCE will run later to exploit any new DCE
791 // opportunities that creates).
792 FPM.addPass(Pass: BDCEPass());
793
794 // Run instcombine after redundancy and dead bit elimination to exploit
795 // opportunities opened up by them.
796 FPM.addPass(Pass: InstCombinePass());
797 invokePeepholeEPCallbacks(FPM, Level);
798
799 // Re-consider control flow based optimizations after redundancy elimination,
800 // redo DCE, etc.
801 if (EnableDFAJumpThreading)
802 FPM.addPass(Pass: DFAJumpThreadingPass());
803
804 FPM.addPass(Pass: JumpThreadingPass());
805 FPM.addPass(Pass: CorrelatedValuePropagationPass());
806
807 // Finally, do an expensive DCE pass to catch all the dead code exposed by
808 // the simplifications and basic cleanup after all the simplifications.
809 // TODO: Investigate if this is too expensive.
810 FPM.addPass(Pass: ADCEPass());
811
812 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
813 FPM.addPass(Pass: MemCpyOptPass());
814
815 FPM.addPass(Pass: DSEPass());
816 FPM.addPass(Pass: MoveAutoInitPass());
817
818 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
819 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
820 /*AllowSpeculation=*/true),
821 /*UseMemorySSA=*/true));
822
823 FPM.addPass(Pass: CoroElidePass());
824
825 invokeScalarOptimizerLateEPCallbacks(FPM, Level);
826
827 FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions()
828 .convertSwitchRangeToICmp(B: true)
829 .convertSwitchToArithmetic(B: true)
830 .hoistCommonInsts(B: true)
831 .sinkCommonInsts(B: true)));
832 FPM.addPass(Pass: InstCombinePass());
833 invokePeepholeEPCallbacks(FPM, Level);
834
835 return FPM;
836}
837
838void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
839 MPM.addPass(Pass: CanonicalizeAliasesPass());
840 MPM.addPass(Pass: NameAnonGlobalPass());
841}
842
843void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
844 OptimizationLevel Level,
845 ThinOrFullLTOPhase LTOPhase) {
846 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
847 if (DisablePreInliner)
848 return;
849 InlineParams IP;
850
851 IP.DefaultThreshold = PreInlineThreshold;
852
853 // FIXME: The hint threshold has the same value used by the regular inliner
854 // when not optimzing for size. This should probably be lowered after
855 // performance testing.
856 // FIXME: this comment is cargo culted from the old pass manager, revisit).
857 IP.HintThreshold = 325;
858 IP.OptSizeHintThreshold = PreInlineThreshold;
859 ModuleInlinerWrapperPass MIWP(
860 IP, /* MandatoryFirst */ true,
861 InlineContext{.LTOPhase: LTOPhase, .Pass: InlinePass::EarlyInliner});
862 CGSCCPassManager &CGPipeline = MIWP.getPM();
863
864 FunctionPassManager FPM;
865 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
866 FPM.addPass(Pass: EarlyCSEPass()); // Catch trivial redundancies.
867 FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
868 B: true))); // Merge & remove basic blocks.
869 FPM.addPass(Pass: InstCombinePass()); // Combine silly sequences.
870 invokePeepholeEPCallbacks(FPM, Level);
871
872 CGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
873 Pass: std::move(FPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
874
875 MPM.addPass(Pass: std::move(MIWP));
876
877 // Delete anything that is now dead to make sure that we don't instrument
878 // dead code. Instrumentation can end up keeping dead code around and
879 // dramatically increase code size.
880 MPM.addPass(Pass: GlobalDCEPass());
881}
882
883void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
884 OptimizationLevel Level) {
885 if (EnablePostPGOLoopRotation) {
886 // Disable header duplication in loop rotation at -Oz.
887 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
888 Pass: createFunctionToLoopPassAdaptor(Pass: LoopRotatePass(),
889 /*UseMemorySSA=*/false),
890 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
891 }
892}
893
894void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
895 OptimizationLevel Level, bool RunProfileGen,
896 bool IsCS, bool AtomicCounterUpdate,
897 std::string ProfileFile,
898 std::string ProfileRemappingFile) {
899 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
900
901 if (!RunProfileGen) {
902 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
903 MPM.addPass(
904 Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
905 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
906 // RequireAnalysisPass for PSI before subsequent non-module passes.
907 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
908 return;
909 }
910
911 // Perform PGO instrumentation.
912 MPM.addPass(Pass: PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
913 : PGOInstrumentationType::FDO));
914
915 addPostPGOLoopRotation(MPM, Level);
916 // Add the profile lowering pass.
917 InstrProfOptions Options;
918 if (!ProfileFile.empty())
919 Options.InstrProfileOutput = ProfileFile;
920 // Do counter promotion at Level greater than O0.
921 Options.DoCounterPromotion = true;
922 Options.UseBFIInPromotion = IsCS;
923 if (EnableSampledInstr) {
924 Options.Sampling = true;
925 // With sampling, there is little beneifit to enable counter promotion.
926 // But note that sampling does work with counter promotion.
927 Options.DoCounterPromotion = false;
928 }
929 Options.Atomic = AtomicCounterUpdate;
930 MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS));
931}
932
933void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
934 bool RunProfileGen, bool IsCS,
935 bool AtomicCounterUpdate,
936 std::string ProfileFile,
937 std::string ProfileRemappingFile) {
938 if (!RunProfileGen) {
939 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
940 MPM.addPass(
941 Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
942 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
943 // RequireAnalysisPass for PSI before subsequent non-module passes.
944 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
945 return;
946 }
947
948 // Perform PGO instrumentation.
949 MPM.addPass(Pass: PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
950 : PGOInstrumentationType::FDO));
951 // Add the profile lowering pass.
952 InstrProfOptions Options;
953 if (!ProfileFile.empty())
954 Options.InstrProfileOutput = ProfileFile;
955 // Do not do counter promotion at O0.
956 Options.DoCounterPromotion = false;
957 Options.UseBFIInPromotion = IsCS;
958 Options.Atomic = AtomicCounterUpdate;
959 MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS));
960}
961
962static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
963 return getInlineParamsFromOptLevel(OptLevel: Level.getSpeedupLevel());
964}
965
966ModuleInlinerWrapperPass
967PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
968 ThinOrFullLTOPhase Phase) {
969 InlineParams IP;
970 if (PTO.InlinerThreshold == -1)
971 IP = ::getInlineParamsFromOptLevel(Level);
972 else
973 IP = getInlineParams(Threshold: PTO.InlinerThreshold);
974 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
975 // set hot-caller threshold to 0 to disable hot
976 // callsite inline (as much as possible [1]) because it makes
977 // profile annotation in the backend inaccurate.
978 //
979 // [1] Note the cost of a function could be below zero due to erased
980 // prologue / epilogue.
981 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
982 IP.HotCallSiteThreshold = 0;
983
984 if (PGOOpt)
985 IP.EnableDeferral = EnablePGOInlineDeferral;
986
987 ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
988 InlineContext{.LTOPhase: Phase, .Pass: InlinePass::CGSCCInliner},
989 UseInlineAdvisor, MaxDevirtIterations);
990
991 // Require the GlobalsAA analysis for the module so we can query it within
992 // the CGSCC pipeline.
993 if (EnableGlobalAnalyses) {
994 MIWP.addModulePass(Pass: RequireAnalysisPass<GlobalsAA, Module>());
995 // Invalidate AAManager so it can be recreated and pick up the newly
996 // available GlobalsAA.
997 MIWP.addModulePass(
998 Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>()));
999 }
1000
1001 // Require the ProfileSummaryAnalysis for the module so we can query it within
1002 // the inliner pass.
1003 MIWP.addModulePass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1004
1005 // Now begin the main postorder CGSCC pipeline.
1006 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
1007 // manager and trying to emulate its precise behavior. Much of this doesn't
1008 // make a lot of sense and we should revisit the core CGSCC structure.
1009 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
1010
1011 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1012 // generally clean up exception handling overhead. It isn't clear this is
1013 // valuable as the inliner doesn't currently care whether it is inlining an
1014 // invoke or a call.
1015
1016 if (AttributorRun & AttributorRunOption::CGSCC)
1017 MainCGPipeline.addPass(Pass: AttributorCGSCCPass());
1018 else if (AttributorRun & AttributorRunOption::CGSCC_LIGHT)
1019 MainCGPipeline.addPass(Pass: AttributorLightCGSCCPass());
1020
1021 // Deduce function attributes. We do another run of this after the function
1022 // simplification pipeline, so this only needs to run when it could affect the
1023 // function simplification pipeline, which is only the case with recursive
1024 // functions.
1025 MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
1026
1027 // When at O3 add argument promotion to the pass pipeline.
1028 // FIXME: It isn't at all clear why this should be limited to O3.
1029 if (Level == OptimizationLevel::O3)
1030 MainCGPipeline.addPass(Pass: ArgumentPromotionPass());
1031
1032 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
1033 // there are no OpenMP runtime calls present in the module.
1034 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
1035 MainCGPipeline.addPass(Pass: OpenMPOptCGSCCPass(Phase));
1036
1037 invokeCGSCCOptimizerLateEPCallbacks(CGPM&: MainCGPipeline, Level);
1038
1039 // Add the core function simplification pipeline nested inside the
1040 // CGSCC walk.
1041 MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
1042 Pass: buildFunctionSimplificationPipeline(Level, Phase),
1043 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
1044
1045 // Finally, deduce any function attributes based on the fully simplified
1046 // function.
1047 MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass());
1048
1049 // Mark that the function is fully simplified and that it shouldn't be
1050 // simplified again if we somehow revisit it due to CGSCC mutations unless
1051 // it's been modified since.
1052 MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
1053 Pass: RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>()));
1054
1055 if (!isThinLTOPreLink(Phase)) {
1056 MainCGPipeline.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0));
1057 MainCGPipeline.addPass(Pass: CoroAnnotationElidePass());
1058 }
1059
1060 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1061 MIWP.addLateModulePass(Pass: createModuleToFunctionPassAdaptor(
1062 Pass: InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
1063
1064 return MIWP;
1065}
1066
1067ModulePassManager
1068PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
1069 ThinOrFullLTOPhase Phase) {
1070 ModulePassManager MPM;
1071
1072 InlineParams IP = ::getInlineParamsFromOptLevel(Level);
1073 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1074 // set hot-caller threshold to 0 to disable hot
1075 // callsite inline (as much as possible [1]) because it makes
1076 // profile annotation in the backend inaccurate.
1077 //
1078 // [1] Note the cost of a function could be below zero due to erased
1079 // prologue / epilogue.
1080 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1081 IP.HotCallSiteThreshold = 0;
1082
1083 if (PGOOpt)
1084 IP.EnableDeferral = EnablePGOInlineDeferral;
1085
1086 // The inline deferral logic is used to avoid losing some
1087 // inlining chance in future. It is helpful in SCC inliner, in which
1088 // inlining is processed in bottom-up order.
1089 // While in module inliner, the inlining order is a priority-based order
1090 // by default. The inline deferral is unnecessary there. So we disable the
1091 // inline deferral logic in module inliner.
1092 IP.EnableDeferral = false;
1093
1094 MPM.addPass(Pass: ModuleInlinerPass(IP, UseInlineAdvisor, Phase));
1095 if (!UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPostLink) {
1096 MPM.addPass(Pass: GlobalOptPass());
1097 MPM.addPass(Pass: GlobalDCEPass());
1098 MPM.addPass(Pass: PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1099 }
1100
1101 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
1102 Pass: buildFunctionSimplificationPipeline(Level, Phase),
1103 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1104
1105 if (!isThinLTOPreLink(Phase)) {
1106 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(
1107 Pass: CoroSplitPass(Level != OptimizationLevel::O0)));
1108 MPM.addPass(
1109 Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: CoroAnnotationElidePass()));
1110 }
1111
1112 return MPM;
1113}
1114
1115ModulePassManager
1116PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
1117 ThinOrFullLTOPhase Phase) {
1118 assert(Level != OptimizationLevel::O0 &&
1119 "Should not be used for O0 pipeline");
1120
1121 assert(!isFullLTOPostLink(Phase) &&
1122 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1123
1124 ModulePassManager MPM;
1125
1126 // Place pseudo probe instrumentation as the first pass of the pipeline to
1127 // minimize the impact of optimization changes.
1128 if (PGOOpt && PGOOpt->PseudoProbeForProfiling && !isThinLTOPostLink(Phase))
1129 MPM.addPass(Pass: SampleProfileProbePass(TM));
1130
1131 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1132
1133 // In ThinLTO mode, when flattened profile is used, all the available
1134 // profile information will be annotated in PreLink phase so there is
1135 // no need to load the profile again in PostLink.
1136 bool LoadSampleProfile =
1137 HasSampleProfile && !(FlattenedProfileUsed && isThinLTOPostLink(Phase));
1138
1139 // During the ThinLTO backend phase we perform early indirect call promotion
1140 // here, before globalopt. Otherwise imported available_externally functions
1141 // look unreferenced and are removed. If we are going to load the sample
1142 // profile then defer until later.
1143 // TODO: See if we can move later and consolidate with the location where
1144 // we perform ICP when we are loading a sample profile.
1145 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1146 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1147 // determine whether the new direct calls are annotated with prof metadata.
1148 // Ideally this should be determined from whether the IR is annotated with
1149 // sample profile, and not whether the a sample profile was provided on the
1150 // command line. E.g. for flattened profiles where we will not be reloading
1151 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1152 // provide the sample profile file.
1153 if (isThinLTOPostLink(Phase) && !LoadSampleProfile)
1154 MPM.addPass(Pass: PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1155
1156 // Create an early function pass manager to cleanup the output of the
1157 // frontend. Not necessary with LTO post link pipelines since the pre link
1158 // pipeline already cleaned up the frontend output.
1159 if (!isThinLTOPostLink(Phase)) {
1160 // Do basic inference of function attributes from known properties of system
1161 // libraries and other oracles.
1162 MPM.addPass(Pass: InferFunctionAttrsPass());
1163 MPM.addPass(Pass: CoroEarlyPass());
1164
1165 FunctionPassManager EarlyFPM;
1166 EarlyFPM.addPass(Pass: EntryExitInstrumenterPass(/*PostInlining=*/false));
1167 // Lower llvm.expect to metadata before attempting transforms.
1168 // Compare/branch metadata may alter the behavior of passes like
1169 // SimplifyCFG.
1170 EarlyFPM.addPass(Pass: LowerExpectIntrinsicPass());
1171 EarlyFPM.addPass(Pass: SimplifyCFGPass());
1172 EarlyFPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
1173 EarlyFPM.addPass(Pass: EarlyCSEPass());
1174 if (Level == OptimizationLevel::O3)
1175 EarlyFPM.addPass(Pass: CallSiteSplittingPass());
1176 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
1177 Pass: std::move(EarlyFPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1178 }
1179
1180 if (LoadSampleProfile) {
1181 // Annotate sample profile right after early FPM to ensure freshness of
1182 // the debug info.
1183 MPM.addPass(Pass: SampleProfileLoaderPass(
1184 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, FS));
1185 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1186 // RequireAnalysisPass for PSI before subsequent non-module passes.
1187 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1188 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1189 // for the profile annotation to be accurate in the LTO backend.
1190 if (!isLTOPreLink(Phase))
1191 // We perform early indirect call promotion here, before globalopt.
1192 // This is important for the ThinLTO backend phase because otherwise
1193 // imported available_externally functions look unreferenced and are
1194 // removed.
1195 MPM.addPass(
1196 Pass: PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1197 }
1198
1199 // Try to perform OpenMP specific optimizations on the module. This is a
1200 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1201 MPM.addPass(Pass: OpenMPOptPass(Phase));
1202
1203 if (AttributorRun & AttributorRunOption::MODULE)
1204 MPM.addPass(Pass: AttributorPass());
1205 else if (AttributorRun & AttributorRunOption::MODULE_LIGHT)
1206 MPM.addPass(Pass: AttributorLightPass());
1207
1208 // Lower type metadata and the type.test intrinsic in the ThinLTO
1209 // post link pipeline after ICP. This is to enable usage of the type
1210 // tests in ICP sequences.
1211 if (isThinLTOPostLink(Phase))
1212 MPM.addPass(Pass: DropTypeTestsPass());
1213
1214 invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase);
1215
1216 // Interprocedural constant propagation now that basic cleanup has occurred
1217 // and prior to optimizing globals.
1218 // FIXME: This position in the pipeline hasn't been carefully considered in
1219 // years, it should be re-analyzed.
1220 MPM.addPass(
1221 Pass: IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/!isLTOPreLink(Phase))));
1222
1223 // Attach metadata to indirect call sites indicating the set of functions
1224 // they may target at run-time. This should follow IPSCCP.
1225 MPM.addPass(Pass: CalledValuePropagationPass());
1226
1227 // Optimize globals to try and fold them into constants.
1228 MPM.addPass(Pass: GlobalOptPass());
1229
1230 // Create a small function pass pipeline to cleanup after all the global
1231 // optimizations.
1232 FunctionPassManager GlobalCleanupPM;
1233 // FIXME: Should this instead by a run of SROA?
1234 GlobalCleanupPM.addPass(Pass: PromotePass());
1235 GlobalCleanupPM.addPass(Pass: InstCombinePass());
1236 invokePeepholeEPCallbacks(FPM&: GlobalCleanupPM, Level);
1237 GlobalCleanupPM.addPass(
1238 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
1239 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(GlobalCleanupPM),
1240 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1241
1242 // We already asserted this happens in non-FullLTOPostLink earlier.
1243 const bool IsPreLink = !isThinLTOPostLink(Phase);
1244 // Enable contextual profiling instrumentation.
1245 const bool IsCtxProfGen =
1246 IsPreLink && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled();
1247 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1248 const bool IsPGOInstrGen =
1249 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1250 const bool IsPGOInstrUse =
1251 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1252 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1253 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1254 // enable ctx profiling from the frontend.
1255 assert(!(IsPGOInstrGen && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled()) &&
1256 "Enabling both instrumented PGO and contextual instrumentation is not "
1257 "supported.");
1258 const bool IsCtxProfUse = !UseCtxProfile.empty() && isThinLTOPreLink(Phase);
1259
1260 assert(
1261 (InstrumentColdFuncOnlyPath.empty() || PGOInstrumentColdFunctionOnly) &&
1262 "--instrument-cold-function-only-path is provided but "
1263 "--pgo-instrument-cold-function-only is not enabled");
1264 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1265 IsPGOPreLink &&
1266 !InstrumentColdFuncOnlyPath.empty();
1267
1268 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1269 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1270 addPreInlinerPasses(MPM, Level, LTOPhase: Phase);
1271
1272 // Add all the requested passes for instrumentation PGO, if requested.
1273 if (IsPGOInstrGen || IsPGOInstrUse) {
1274 addPGOInstrPasses(MPM, Level,
1275 /*RunProfileGen=*/IsPGOInstrGen,
1276 /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
1277 ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
1278 } else if (IsCtxProfGen || IsCtxProfUse) {
1279 MPM.addPass(Pass: PGOInstrumentationGen(PGOInstrumentationType::CTXPROF));
1280 // In pre-link, we just want the instrumented IR. We use the contextual
1281 // profile in the post-thinlink phase.
1282 // The instrumentation will be removed in post-thinlink after IPO.
1283 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1284 // mechanism for GUIDs.
1285 MPM.addPass(Pass: AssignGUIDPass());
1286 if (IsCtxProfUse) {
1287 MPM.addPass(Pass: PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1288 return MPM;
1289 }
1290 // Block further inlining in the instrumented ctxprof case. This avoids
1291 // confusingly collecting profiles for the same GUID corresponding to
1292 // different variants of the function. We could do like PGO and identify
1293 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1294 // thinlto to happen before performing any further optimizations, it's
1295 // unnecessary to collect profiles for non-prevailing copies.
1296 MPM.addPass(Pass: NoinlineNonPrevailing());
1297 addPostPGOLoopRotation(MPM, Level);
1298 MPM.addPass(Pass: PGOCtxProfLoweringPass());
1299 } else if (IsColdFuncOnlyInstrGen) {
1300 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1301 /* AtomicCounterUpdate */ false,
1302 ProfileFile: InstrumentColdFuncOnlyPath,
1303 /* ProfileRemappingFile */ "");
1304 }
1305
1306 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1307 MPM.addPass(Pass: PGOIndirectCallPromotion(false, false));
1308
1309 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1310 MPM.addPass(Pass: PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1311 EnableSampledInstr));
1312
1313 if (IsMemprofUse)
1314 MPM.addPass(Pass: MemProfUsePass(PGOOpt->MemoryProfile, FS));
1315
1316 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1317 PGOOpt->Action == PGOOptions::SampleUse))
1318 MPM.addPass(Pass: PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1319
1320 MPM.addPass(Pass: AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1321
1322 if (EnableModuleInliner)
1323 MPM.addPass(Pass: buildModuleInlinerPipeline(Level, Phase));
1324 else
1325 MPM.addPass(Pass: buildInlinerPipeline(Level, Phase));
1326
1327 // Remove any dead arguments exposed by cleanups, constant folding globals,
1328 // and argument promotion.
1329 MPM.addPass(Pass: DeadArgumentEliminationPass());
1330
1331 if (isThinLTOPostLink(Phase))
1332 MPM.addPass(Pass: SimplifyTypeTestsPass());
1333
1334 if (!isThinLTOPreLink(Phase))
1335 MPM.addPass(Pass: CoroCleanupPass());
1336
1337 // Optimize globals now that functions are fully simplified.
1338 MPM.addPass(Pass: GlobalOptPass());
1339 MPM.addPass(Pass: GlobalDCEPass());
1340
1341 return MPM;
1342}
1343
1344/// TODO: Should LTO cause any differences to this set of passes?
1345void PassBuilder::addVectorPasses(OptimizationLevel Level,
1346 FunctionPassManager &FPM,
1347 ThinOrFullLTOPhase LTOPhase) {
1348 FPM.addPass(Pass: LoopVectorizePass(
1349 LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
1350
1351 // Drop dereferenceable assumes after vectorization, as they are no longer
1352 // needed and can inhibit further optimization.
1353 if (!isLTOPreLink(Phase: LTOPhase))
1354 FPM.addPass(Pass: DropUnnecessaryAssumesPass(/*DropDereferenceable=*/true));
1355
1356 FPM.addPass(Pass: InferAlignmentPass());
1357 if (isFullLTOPostLink(Phase: LTOPhase)) {
1358 // The vectorizer may have significantly shortened a loop body; unroll
1359 // again. Unroll small loops to hide loop backedge latency and saturate any
1360 // parallel execution resources of an out-of-order processor. We also then
1361 // need to clean up redundancies and loop invariant code.
1362 // FIXME: It would be really good to use a loop-integrated instruction
1363 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1364 // across the loop nests.
1365 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1366 if (EnableUnrollAndJam && PTO.LoopUnrolling)
1367 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1368 Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1369 FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions(
1370 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1371 PTO.ForgetAllSCEVInLoopUnroll)));
1372 FPM.addPass(Pass: WarnMissedTransformationsPass());
1373 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1374 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1375 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1376 // NOTE: we are very late in the pipeline, and we don't have any LICM
1377 // or SimplifyCFG passes scheduled after us, that would cleanup
1378 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1379
1380 // We also turn on struct to vector canonicalization here, which allows
1381 // converting allocas of homogeneous structs into vector allocas when the
1382 // allocas' users are all memory intrinsics. This allows promotion in some
1383 // cases because structs cannot promote to SSA values, but vectors can. We
1384 // only turn this on after memcpyopt runs because this might hinder
1385 // memcpyopt's optimizations if done before. Look at the documentation for
1386 // `tryCanonicalizeStructToVector` in SROA.cpp to see why.
1387 FPM.addPass(Pass: SROAPass(SROAOptions(SROAOptions::PreserveCFG,
1388 /*AggregateToVector=*/true)));
1389 }
1390
1391 if (!isFullLTOPostLink(Phase: LTOPhase)) {
1392 // Eliminate loads by forwarding stores from the previous iteration to loads
1393 // of the current iteration.
1394 FPM.addPass(Pass: LoopLoadEliminationPass());
1395 }
1396 // Cleanup after the loop optimization passes.
1397 FPM.addPass(Pass: InstCombinePass());
1398
1399 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1400 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1401 // At higher optimization levels, try to clean up any runtime overlap and
1402 // alignment checks inserted by the vectorizer. We want to track correlated
1403 // runtime checks for two inner loops in the same outer loop, fold any
1404 // common computations, hoist loop-invariant aspects out of any outer loop,
1405 // and unswitch the runtime checks if possible. Once hoisted, we may have
1406 // dead (or speculatable) control flows or more combining opportunities.
1407 ExtraPasses.addPass(Pass: EarlyCSEPass());
1408 ExtraPasses.addPass(Pass: CorrelatedValuePropagationPass());
1409 ExtraPasses.addPass(Pass: InstCombinePass());
1410 LoopPassManager LPM;
1411 LPM.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1412 /*AllowSpeculation=*/true));
1413 LPM.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1414 OptimizationLevel::O3));
1415 ExtraPasses.addPass(
1416 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /*UseMemorySSA=*/true));
1417 ExtraPasses.addPass(
1418 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
1419 ExtraPasses.addPass(Pass: InstCombinePass());
1420 FPM.addPass(Pass: std::move(ExtraPasses));
1421 }
1422
1423 // Now that we've formed fast to execute loop structures, we do further
1424 // optimizations. These are run afterward as they might block doing complex
1425 // analyses and transforms such as what are needed for loop vectorization.
1426
1427 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1428 // GVN, loop transforms, and others have already run, so it's now better to
1429 // convert to more optimized IR using more aggressive simplify CFG options.
1430 // The extra sinking transform can create larger basic blocks, so do this
1431 // before SLP vectorization.
1432 FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions()
1433 .forwardSwitchCondToPhi(B: true)
1434 .convertSwitchRangeToICmp(B: true)
1435 .convertSwitchToArithmetic(B: true)
1436 .convertSwitchToLookupTable(B: true)
1437 .needCanonicalLoops(B: false)
1438 .hoistCommonInsts(B: true)
1439 .sinkCommonInsts(B: true)));
1440
1441 if (isFullLTOPostLink(Phase: LTOPhase)) {
1442 FPM.addPass(Pass: SCCPPass());
1443 FPM.addPass(Pass: InstCombinePass());
1444 FPM.addPass(Pass: BDCEPass());
1445 }
1446
1447 // Optimize parallel scalar instruction chains into SIMD instructions.
1448 if (PTO.SLPVectorization) {
1449 FPM.addPass(Pass: SLPVectorizerPass());
1450 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1451 FPM.addPass(Pass: EarlyCSEPass());
1452 }
1453 }
1454 // Enhance/cleanup vector code.
1455 FPM.addPass(Pass: VectorCombinePass());
1456
1457 if (!isFullLTOPostLink(Phase: LTOPhase)) {
1458 FPM.addPass(Pass: InstCombinePass());
1459 // Unroll small loops to hide loop backedge latency and saturate any
1460 // parallel execution resources of an out-of-order processor. We also then
1461 // need to clean up redundancies and loop invariant code.
1462 // FIXME: It would be really good to use a loop-integrated instruction
1463 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1464 // across the loop nests.
1465 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1466 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1467 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1468 Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1469 }
1470 FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions(
1471 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1472 PTO.ForgetAllSCEVInLoopUnroll)));
1473 FPM.addPass(Pass: WarnMissedTransformationsPass());
1474 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1475 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1476 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1477 // NOTE: we are very late in the pipeline, and we don't have any LICM
1478 // or SimplifyCFG passes scheduled after us, that would cleanup
1479 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1480
1481 // We also turn on struct to vector canonicalization here, which allows
1482 // converting allocas of homogeneous structs into vector allocas when the
1483 // allocas' users are all memory intrinsics. This allows promotion in some
1484 // cases because structs cannot promote to SSA values, but vectors can. We
1485 // only turn this on after memcpyopt runs because this might hinder
1486 // memcpyopt's optimizations if done before. Look at the documentation for
1487 // `tryCanonicalizeStructToVector` in SROA.cpp to see why.
1488 FPM.addPass(Pass: SROAPass(SROAOptions(SROAOptions::PreserveCFG,
1489 /*AggregateToVector=*/true)));
1490 }
1491
1492 FPM.addPass(Pass: InferAlignmentPass());
1493 FPM.addPass(Pass: InstCombinePass());
1494
1495 // This is needed for two reasons:
1496 // 1. It works around problems that instcombine introduces, such as sinking
1497 // expensive FP divides into loops containing multiplications using the
1498 // divide result.
1499 // 2. It helps to clean up some loop-invariant code created by the loop
1500 // unroll pass when IsFullLTO=false.
1501 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1502 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1503 /*AllowSpeculation=*/true),
1504 /*UseMemorySSA=*/true));
1505
1506 // Now that we've vectorized and unrolled loops, we may have more refined
1507 // alignment information, try to re-derive it here.
1508 FPM.addPass(Pass: AlignmentFromAssumptionsPass());
1509}
1510
1511ModulePassManager
1512PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1513 ThinOrFullLTOPhase LTOPhase) {
1514 ModulePassManager MPM;
1515
1516 // Run partial inlining pass to partially inline functions that have
1517 // large bodies.
1518 if (RunPartialInlining)
1519 MPM.addPass(Pass: PartialInlinerPass());
1520
1521 // Remove avail extern fns and globals definitions since we aren't compiling
1522 // an object file for later LTO. For LTO we want to preserve these so they
1523 // are eligible for inlining at link-time. Note if they are unreferenced they
1524 // will be removed by GlobalDCE later, so this only impacts referenced
1525 // available externally globals. Eventually they will be suppressed during
1526 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1527 // may make globals referenced by available external functions dead and saves
1528 // running remaining passes on the eliminated functions. These should be
1529 // preserved during prelinking for link-time inlining decisions.
1530 if (!isLTOPreLink(Phase: LTOPhase))
1531 MPM.addPass(Pass: EliminateAvailableExternallyPass());
1532
1533 // Do RPO function attribute inference across the module to forward-propagate
1534 // attributes where applicable.
1535 // FIXME: Is this really an optimization rather than a canonicalization?
1536 MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass());
1537
1538 // Do a post inline PGO instrumentation and use pass. This is a context
1539 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1540 // cross-module inline has not been done yet. The context sensitive
1541 // instrumentation is after all the inlines are done.
1542 if (!isLTOPreLink(Phase: LTOPhase) && PGOOpt) {
1543 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1544 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1545 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
1546 ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
1547 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1548 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1549 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
1550 ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
1551 }
1552
1553 // Re-compute GlobalsAA here prior to function passes. This is particularly
1554 // useful as the above will have inlined, DCE'ed, and function-attr
1555 // propagated everything. We should at this point have a reasonably minimal
1556 // and richly annotated call graph. By computing aliasing and mod/ref
1557 // information for all local globals here, the late loop passes and notably
1558 // the vectorizer will be able to use them to help recognize vectorizable
1559 // memory operations.
1560 if (EnableGlobalAnalyses)
1561 MPM.addPass(Pass: RecomputeGlobalsAAPass());
1562
1563 invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase: LTOPhase);
1564
1565 FunctionPassManager OptimizePM;
1566
1567 // Only drop unnecessary assumes post-inline and post-link, as otherwise
1568 // additional uses of the affected value may be introduced through inlining
1569 // and CSE.
1570 if (!isLTOPreLink(Phase: LTOPhase))
1571 OptimizePM.addPass(Pass: DropUnnecessaryAssumesPass());
1572
1573 // Scheduling LoopVersioningLICM when inlining is over, because after that
1574 // we may see more accurate aliasing. Reason to run this late is that too
1575 // early versioning may prevent further inlining due to increase of code
1576 // size. Other optimizations which runs later might get benefit of no-alias
1577 // assumption in clone loop.
1578 if (UseLoopVersioningLICM) {
1579 OptimizePM.addPass(
1580 Pass: createFunctionToLoopPassAdaptor(Pass: LoopVersioningLICMPass()));
1581 // LoopVersioningLICM pass might increase new LICM opportunities.
1582 OptimizePM.addPass(Pass: createFunctionToLoopPassAdaptor(
1583 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1584 /*AllowSpeculation=*/true),
1585 /*USeMemorySSA=*/UseMemorySSA: true));
1586 }
1587
1588 OptimizePM.addPass(Pass: Float2IntPass());
1589 OptimizePM.addPass(Pass: LowerConstantIntrinsicsPass());
1590
1591 if (EnableMatrix) {
1592 OptimizePM.addPass(Pass: LowerMatrixIntrinsicsPass());
1593 OptimizePM.addPass(Pass: EarlyCSEPass());
1594 }
1595
1596 // CHR pass should only be applied with the profile information.
1597 // The check is to check the profile summary information in CHR.
1598 if (EnableCHR && Level == OptimizationLevel::O3)
1599 OptimizePM.addPass(Pass: ControlHeightReductionPass());
1600
1601 // FIXME: We need to run some loop optimizations to re-rotate loops after
1602 // simplifycfg and others undo their rotation.
1603
1604 // Optimize the loop execution. These passes operate on entire loop nests
1605 // rather than on each loop in an inside-out manner, and so they are actually
1606 // function passes.
1607
1608 invokeVectorizerStartEPCallbacks(FPM&: OptimizePM, Level);
1609
1610 LoopPassManager LPM;
1611 // First rotate loops that may have been un-rotated by prior passes.
1612 // Disable header duplication at -Oz.
1613 LPM.addPass(Pass: LoopRotatePass(/*EnableLoopHeaderDuplication=*/true,
1614 isLTOPreLink(Phase: LTOPhase),
1615 /*CheckExitCount=*/true));
1616 // Some loops may have become dead by now. Try to delete them.
1617 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1618 // this may need to be revisited once we run GVN before loop deletion
1619 // in the simplification pipeline.
1620 LPM.addPass(Pass: LoopDeletionPass());
1621
1622 if (PTO.LoopInterchange)
1623 LPM.addPass(Pass: LoopInterchangePass());
1624
1625 OptimizePM.addPass(
1626 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /*UseMemorySSA=*/false));
1627
1628 // FIXME: This may not be the right place in the pipeline.
1629 // We need to have the data to support the right place.
1630 if (PTO.LoopFusion)
1631 OptimizePM.addPass(Pass: LoopFusePass());
1632
1633 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1634 // into separate loop that would otherwise inhibit vectorization. This is
1635 // currently only performed for loops marked with the metadata
1636 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1637 OptimizePM.addPass(Pass: LoopDistributePass());
1638
1639 // Populates the VFABI attribute with the scalar-to-vector mappings
1640 // from the TargetLibraryInfo.
1641 OptimizePM.addPass(Pass: InjectTLIMappings());
1642
1643 addVectorPasses(Level, FPM&: OptimizePM, LTOPhase);
1644
1645 invokeVectorizerEndEPCallbacks(FPM&: OptimizePM, Level);
1646
1647 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1648 // canonicalization pass that enables other optimizations. As a result,
1649 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1650 // result too early.
1651 OptimizePM.addPass(Pass: LoopSinkPass());
1652
1653 // And finally clean up LCSSA form before generating code.
1654 OptimizePM.addPass(Pass: InstSimplifyPass());
1655
1656 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1657 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1658 // flattening of blocks.
1659 OptimizePM.addPass(Pass: DivRemPairsPass());
1660
1661 // Merge adjacent icmps into memcmp, then expand memcmp to loads/compares.
1662 // TODO: move this furter up so that it can be optimized by GVN, etc.
1663 if (EnableMergeICmps)
1664 OptimizePM.addPass(Pass: MergeICmpsPass());
1665 OptimizePM.addPass(Pass: ExpandMemCmpPass());
1666
1667 // Try to annotate calls that were created during optimization.
1668 OptimizePM.addPass(
1669 Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1670
1671 // LoopSink (and other loop passes since the last simplifyCFG) might have
1672 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1673 OptimizePM.addPass(
1674 Pass: SimplifyCFGPass(SimplifyCFGOptions()
1675 .convertSwitchRangeToICmp(B: true)
1676 .convertSwitchToArithmetic(B: true)
1677 .speculateUnpredictables(B: true)
1678 .hoistLoadsStoresWithCondFaulting(B: true)));
1679
1680 // Add the core optimizing pipeline.
1681 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(OptimizePM),
1682 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1683
1684 // AllocToken transforms heap allocation calls; this needs to run late after
1685 // other allocation call transformations (such as those in InstCombine).
1686 if (!isLTOPreLink(Phase: LTOPhase))
1687 MPM.addPass(Pass: AllocTokenPass());
1688
1689 invokeOptimizerLastEPCallbacks(MPM, Level, Phase: LTOPhase);
1690
1691 // Run the Instrumentor pass late.
1692 if (EnableInstrumentor)
1693 MPM.addPass(Pass: InstrumentorPass(FS));
1694
1695 // Split out cold code. Splitting is done late to avoid hiding context from
1696 // other optimizations and inadvertently regressing performance. The tradeoff
1697 // is that this has a higher code size cost than splitting early.
1698 if (EnableHotColdSplit && !isLTOPreLink(Phase: LTOPhase))
1699 MPM.addPass(Pass: HotColdSplittingPass());
1700
1701 // Search the code for similar regions of code. If enough similar regions can
1702 // be found where extracting the regions into their own function will decrease
1703 // the size of the program, we extract the regions, a deduplicate the
1704 // structurally similar regions.
1705 if (EnableIROutliner)
1706 MPM.addPass(Pass: IROutlinerPass());
1707
1708 // Now we need to do some global optimization transforms.
1709 // FIXME: It would seem like these should come first in the optimization
1710 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1711 // ordering here.
1712 MPM.addPass(Pass: GlobalDCEPass());
1713 MPM.addPass(Pass: ConstantMergePass());
1714
1715 // Merge functions if requested. It has a better chance to merge functions
1716 // after ConstantMerge folded jump tables.
1717 if (PTO.MergeFunctions)
1718 MPM.addPass(Pass: MergeFunctionsPass());
1719
1720 if (PTO.CallGraphProfile && !isLTOPreLink(Phase: LTOPhase))
1721 MPM.addPass(Pass: CGProfilePass(isLTOPostLink(Phase: LTOPhase)));
1722
1723 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1724 if (!isLTOPreLink(Phase: LTOPhase))
1725 MPM.addPass(Pass: RelLookupTableConverterPass());
1726
1727 // Add devirtualization pass only when LTO is not enabled, as otherwise
1728 // the pass is already enabled in the LTO pipeline.
1729 if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) {
1730 // TODO: explore a better pipeline configuration that can improve
1731 // compilation time overhead.
1732 MPM.addPass(Pass: WholeProgramDevirtPass(
1733 /*ExportSummary*/ nullptr,
1734 /*ImportSummary*/ nullptr,
1735 /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively));
1736 MPM.addPass(Pass: DropTypeTestsPass());
1737 // Given that the devirtualization creates more opportunities for inlining,
1738 // we run the Inliner again here to maximize the optimization gain we
1739 // get from devirtualization.
1740 // Also, we can't run devirtualization before inlining because the
1741 // devirtualization depends on the passes optimizing/eliminating vtable GVs
1742 // and those passes are only effective after inlining.
1743 if (EnableModuleInliner) {
1744 MPM.addPass(Pass: ModuleInlinerPass(::getInlineParamsFromOptLevel(Level),
1745 UseInlineAdvisor,
1746 ThinOrFullLTOPhase::None));
1747 } else {
1748 MPM.addPass(Pass: ModuleInlinerWrapperPass(
1749 ::getInlineParamsFromOptLevel(Level),
1750 /* MandatoryFirst */ true,
1751 InlineContext{.LTOPhase: ThinOrFullLTOPhase::None, .Pass: InlinePass::CGSCCInliner}));
1752 }
1753 }
1754
1755 // Attach !implicit.ref metadata from all functions to copyright strings.
1756 MPM.addPass(Pass: LowerCommentStringPass());
1757
1758 return MPM;
1759}
1760
1761ModulePassManager
1762PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
1763 ThinOrFullLTOPhase Phase) {
1764 if (Level == OptimizationLevel::O0)
1765 return buildO0DefaultPipeline(Level, Phase);
1766
1767 ModulePassManager MPM;
1768 instructionCountersPass(MPM, /* IsPreOptimization */ true);
1769 // Currently this pipeline is only invoked in an LTO pre link pass or when we
1770 // are not running LTO. If that changes the below checks may need updating.
1771 assert(isLTOPreLink(Phase) || Phase == ThinOrFullLTOPhase::None);
1772
1773 // If we are invoking this in non-LTO mode, remove any MemProf related
1774 // attributes and metadata, as we don't know whether we are linking with
1775 // a library containing the necessary interfaces.
1776 if (Phase == ThinOrFullLTOPhase::None)
1777 MPM.addPass(Pass: MemProfRemoveInfo());
1778
1779 // Convert @llvm.global.annotations to !annotation metadata.
1780 MPM.addPass(Pass: Annotation2MetadataPass());
1781
1782 // Force any function attributes we want the rest of the pipeline to observe.
1783 MPM.addPass(Pass: ForceFunctionAttrsPass());
1784
1785 if (TriggerCrash)
1786 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: TriggerCrashFunctionPass()));
1787
1788 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1789 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass()));
1790
1791 // Apply module pipeline start EP callback.
1792 invokePipelineStartEPCallbacks(MPM, Level);
1793
1794 // Add the core simplification pipeline.
1795 MPM.addPass(Pass: buildModuleSimplificationPipeline(Level, Phase));
1796
1797 // Now add the optimization pipeline.
1798 MPM.addPass(Pass: buildModuleOptimizationPipeline(Level, LTOPhase: Phase));
1799
1800 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1801 PGOOpt->Action == PGOOptions::SampleUse)
1802 MPM.addPass(Pass: PseudoProbeUpdatePass());
1803
1804 // Emit annotation remarks.
1805 addAnnotationRemarksPass(MPM);
1806
1807 if (isLTOPreLink(Phase))
1808 addRequiredLTOPreLinkPasses(MPM);
1809
1810 instructionCountersPass(MPM, /* IsPreOptimization */ false);
1811 return MPM;
1812}
1813
1814ModulePassManager
1815PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
1816 bool EmitSummary) {
1817 ModulePassManager MPM;
1818
1819 instructionCountersPass(MPM, /* IsPreOptimization */ true);
1820
1821 if (ThinLTO)
1822 MPM.addPass(Pass: buildThinLTOPreLinkDefaultPipeline(Level));
1823 else
1824 MPM.addPass(Pass: buildLTOPreLinkDefaultPipeline(Level));
1825 MPM.addPass(Pass: EmbedBitcodePass(ThinLTO, EmitSummary));
1826
1827 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1828 // like removing CFI/WPD related instructions. Note, we reuse
1829 // DropTypeTestsPass to clean up type tests rather than duplicate that logic
1830 // in FatLtoCleanup.
1831 MPM.addPass(Pass: FatLtoCleanup());
1832
1833 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1834 // object code, only in the bitcode section, so drop it before we run
1835 // module optimization and generate machine code. If llvm.type.test() isn't in
1836 // the IR, this won't do anything.
1837 MPM.addPass(Pass: DropTypeTestsPass(lowertypetests::DropTestKind::All));
1838
1839 // Use the ThinLTO post-link pipeline with sample profiling
1840 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1841 MPM.addPass(Pass: buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1842 else {
1843 // ModuleSimplification does not run the coroutine passes for
1844 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1845 // builds, otherwise they will miscompile.
1846 if (ThinLTO) {
1847 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1848 // consideration.
1849 CGSCCPassManager CGPM;
1850 CGPM.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0));
1851 CGPM.addPass(Pass: CoroAnnotationElidePass());
1852 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
1853 MPM.addPass(Pass: CoroCleanupPass());
1854 }
1855
1856 // otherwise, just use module optimization
1857 MPM.addPass(
1858 Pass: buildModuleOptimizationPipeline(Level, LTOPhase: ThinOrFullLTOPhase::None));
1859 // Emit annotation remarks.
1860 addAnnotationRemarksPass(MPM);
1861 }
1862
1863 instructionCountersPass(MPM, /* IsPreOptimization */ false);
1864
1865 return MPM;
1866}
1867
1868ModulePassManager
1869PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1870 if (Level == OptimizationLevel::O0)
1871 return buildO0DefaultPipeline(Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink);
1872
1873 ModulePassManager MPM;
1874
1875 instructionCountersPass(MPM, /* IsPreOptimization */ true);
1876
1877 // Convert @llvm.global.annotations to !annotation metadata.
1878 MPM.addPass(Pass: Annotation2MetadataPass());
1879
1880 // Force any function attributes we want the rest of the pipeline to observe.
1881 MPM.addPass(Pass: ForceFunctionAttrsPass());
1882
1883 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1884 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass()));
1885
1886 // Apply module pipeline start EP callback.
1887 invokePipelineStartEPCallbacks(MPM, Level);
1888
1889 // If we are planning to perform ThinLTO later, we don't bloat the code with
1890 // unrolling/vectorization/... now. Just simplify the module as much as we
1891 // can.
1892 MPM.addPass(Pass: buildModuleSimplificationPipeline(
1893 Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink));
1894 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1895 // thinlto use the contextual info to perform imports; then use the contextual
1896 // profile in the post-thinlink phase.
1897 if (!UseCtxProfile.empty()) {
1898 addRequiredLTOPreLinkPasses(MPM);
1899 return MPM;
1900 }
1901
1902 // Run partial inlining pass to partially inline functions that have
1903 // large bodies.
1904 // FIXME: It isn't clear whether this is really the right place to run this
1905 // in ThinLTO. Because there is another canonicalization and simplification
1906 // phase that will run after the thin link, running this here ends up with
1907 // less information than will be available later and it may grow functions in
1908 // ways that aren't beneficial.
1909 if (RunPartialInlining)
1910 MPM.addPass(Pass: PartialInlinerPass());
1911
1912 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1913 PGOOpt->Action == PGOOptions::SampleUse)
1914 MPM.addPass(Pass: PseudoProbeUpdatePass());
1915
1916 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1917 // optimization is going to be done in PostLink stage, but clang can't add
1918 // callbacks there in case of in-process ThinLTO called by linker.
1919 invokeOptimizerEarlyEPCallbacks(MPM, Level,
1920 /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
1921 invokeOptimizerLastEPCallbacks(MPM, Level,
1922 /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
1923
1924 // Emit annotation remarks.
1925 addAnnotationRemarksPass(MPM);
1926
1927 // Attach !implicit.ref metadata from all functions to copyright strings.
1928 MPM.addPass(Pass: LowerCommentStringPass());
1929
1930 addRequiredLTOPreLinkPasses(MPM);
1931
1932 instructionCountersPass(MPM, /* IsPreOptimization */ false);
1933
1934 return MPM;
1935}
1936
1937ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
1938 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1939 ModulePassManager MPM;
1940
1941 instructionCountersPass(MPM, /* IsPreOptimization */ true);
1942
1943 // If we are invoking this without a summary index noting that we are linking
1944 // with a library containing the necessary APIs, remove any MemProf related
1945 // attributes and metadata.
1946 if (!ImportSummary || !ImportSummary->withSupportsHotColdNew())
1947 MPM.addPass(Pass: MemProfRemoveInfo());
1948
1949 if (ImportSummary) {
1950 // For ThinLTO we must apply the context disambiguation decisions early, to
1951 // ensure we can correctly match the callsites to summary data.
1952 if (EnableMemProfContextDisambiguation)
1953 MPM.addPass(Pass: MemProfContextDisambiguation(
1954 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1955
1956 // These passes import type identifier resolutions for whole-program
1957 // devirtualization and CFI. They must run early because other passes may
1958 // disturb the specific instruction patterns that these passes look for,
1959 // creating dependencies on resolutions that may not appear in the summary.
1960 //
1961 // For example, GVN may transform the pattern assume(type.test) appearing in
1962 // two basic blocks into assume(phi(type.test, type.test)), which would
1963 // transform a dependency on a WPD resolution into a dependency on a type
1964 // identifier resolution for CFI.
1965 //
1966 // Also, WPD has access to more precise information than ICP and can
1967 // devirtualize more effectively, so it should operate on the IR first.
1968 //
1969 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1970 // metadata and intrinsics.
1971 MPM.addPass(Pass: WholeProgramDevirtPass(nullptr, ImportSummary));
1972 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, ImportSummary));
1973 }
1974
1975 if (Level == OptimizationLevel::O0) {
1976 // Run a second time to clean up any type tests left behind by WPD for use
1977 // in ICP.
1978 MPM.addPass(Pass: DropTypeTestsPass());
1979 MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::ThinLTOPostLink));
1980
1981 // AllocToken transforms heap allocation calls; this needs to run late after
1982 // other allocation call transformations (such as those in InstCombine).
1983 MPM.addPass(Pass: AllocTokenPass());
1984
1985 // Drop available_externally and unreferenced globals. This is necessary
1986 // with ThinLTO in order to avoid leaving undefined references to dead
1987 // globals in the object file.
1988 MPM.addPass(Pass: EliminateAvailableExternallyPass());
1989 MPM.addPass(Pass: GlobalDCEPass());
1990 return MPM;
1991 }
1992 if (!UseCtxProfile.empty()) {
1993 MPM.addPass(
1994 Pass: buildModuleInlinerPipeline(Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink));
1995 } else {
1996 // Add the core simplification pipeline.
1997 MPM.addPass(Pass: buildModuleSimplificationPipeline(
1998 Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink));
1999 }
2000 // Now add the optimization pipeline.
2001 MPM.addPass(Pass: buildModuleOptimizationPipeline(
2002 Level, LTOPhase: ThinOrFullLTOPhase::ThinLTOPostLink));
2003
2004 // Emit annotation remarks.
2005 addAnnotationRemarksPass(MPM);
2006
2007 instructionCountersPass(MPM, /* IsPreOptimization */ false);
2008
2009 return MPM;
2010}
2011
2012ModulePassManager
2013PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
2014 // FIXME: We should use a customized pre-link pipeline!
2015 return buildPerModuleDefaultPipeline(Level,
2016 Phase: ThinOrFullLTOPhase::FullLTOPreLink);
2017}
2018
2019ModulePassManager
2020PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
2021 ModuleSummaryIndex *ExportSummary) {
2022 ModulePassManager MPM;
2023
2024 instructionCountersPass(MPM, /* IsPreOptimization */ true);
2025
2026 invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level);
2027
2028 // If we are invoking this without a summary index noting that we are linking
2029 // with a library containing the necessary APIs, remove any MemProf related
2030 // attributes and metadata.
2031 if (!ExportSummary || !ExportSummary->withSupportsHotColdNew())
2032 MPM.addPass(Pass: MemProfRemoveInfo());
2033
2034 // Create a function that performs CFI checks for cross-DSO calls with targets
2035 // in the current module.
2036 MPM.addPass(Pass: CrossDSOCFIPass());
2037
2038 if (Level == OptimizationLevel::O0) {
2039 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
2040 // metadata and intrinsics.
2041 MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr));
2042 MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr));
2043 // Run a second time to clean up any type tests left behind by WPD for use
2044 // in ICP.
2045 MPM.addPass(Pass: DropTypeTestsPass());
2046
2047 MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::FullLTOPostLink));
2048
2049 // AllocToken transforms heap allocation calls; this needs to run late after
2050 // other allocation call transformations (such as those in InstCombine).
2051 MPM.addPass(Pass: AllocTokenPass());
2052
2053 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
2054
2055 // Emit annotation remarks.
2056 addAnnotationRemarksPass(MPM);
2057
2058 return MPM;
2059 }
2060
2061 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2062 // Load sample profile before running the LTO optimization pipeline.
2063 MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile,
2064 PGOOpt->ProfileRemappingFile,
2065 ThinOrFullLTOPhase::FullLTOPostLink));
2066 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2067 // RequireAnalysisPass for PSI before subsequent non-module passes.
2068 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
2069 }
2070
2071 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
2072 MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
2073
2074 // Remove unused virtual tables to improve the quality of code generated by
2075 // whole-program devirtualization and bitset lowering.
2076 MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true));
2077
2078 // Do basic inference of function attributes from known properties of system
2079 // libraries and other oracles.
2080 MPM.addPass(Pass: InferFunctionAttrsPass());
2081
2082 if (Level.getSpeedupLevel() > 1) {
2083 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2084 Pass: CallSiteSplittingPass(), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2085
2086 // Indirect call promotion. This should promote all the targets that are
2087 // left by the earlier promotion pass that promotes intra-module targets.
2088 // This two-step promotion is to save the compile time. For LTO, it should
2089 // produce the same result as if we only do promotion here.
2090 MPM.addPass(Pass: PGOIndirectCallPromotion(
2091 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2092
2093 // Promoting by-reference arguments to by-value exposes more constants to
2094 // IPSCCP.
2095 CGSCCPassManager CGPM;
2096 CGPM.addPass(Pass: PostOrderFunctionAttrsPass());
2097 CGPM.addPass(Pass: ArgumentPromotionPass());
2098 CGPM.addPass(
2099 Pass: createCGSCCToFunctionPassAdaptor(Pass: SROAPass(SROAOptions::ModifyCFG)));
2100 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2101
2102 // Propagate constants at call sites into the functions they call. This
2103 // opens opportunities for globalopt (and inlining) by substituting function
2104 // pointers passed as arguments to direct uses of functions.
2105 MPM.addPass(Pass: IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/true)));
2106
2107 // Attach metadata to indirect call sites indicating the set of functions
2108 // they may target at run-time. This should follow IPSCCP.
2109 MPM.addPass(Pass: CalledValuePropagationPass());
2110 }
2111
2112 // Do RPO function attribute inference across the module to forward-propagate
2113 // attributes where applicable.
2114 // FIXME: Is this really an optimization rather than a canonicalization?
2115 MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass());
2116
2117 // Use in-range annotations on GEP indices to split globals where beneficial.
2118 MPM.addPass(Pass: GlobalSplitPass());
2119
2120 // Run whole program optimization of virtual call when the list of callees
2121 // is fixed.
2122 MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr));
2123
2124 MPM.addPass(Pass: NoRecurseLTOInferencePass());
2125 // Stop here at -O1.
2126 if (Level == OptimizationLevel::O1) {
2127 // The LowerTypeTestsPass needs to run to lower type metadata and the
2128 // type.test intrinsics. The pass does nothing if CFI is disabled.
2129 MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr));
2130 // Run a second time to clean up any type tests left behind by WPD for use
2131 // in ICP (which is performed earlier than this in the regular LTO
2132 // pipeline).
2133 MPM.addPass(Pass: DropTypeTestsPass());
2134
2135 MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::FullLTOPostLink));
2136
2137 // AllocToken transforms heap allocation calls; this needs to run late after
2138 // other allocation call transformations (such as those in InstCombine).
2139 MPM.addPass(Pass: AllocTokenPass());
2140
2141 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
2142
2143 // Emit annotation remarks.
2144 addAnnotationRemarksPass(MPM);
2145
2146 instructionCountersPass(MPM, /* IsPreOptimization */ false);
2147
2148 return MPM;
2149 }
2150
2151 // TODO: Skip to match buildCoroWrapper.
2152 MPM.addPass(Pass: CoroEarlyPass());
2153
2154 // Optimize globals to try and fold them into constants.
2155 MPM.addPass(Pass: GlobalOptPass());
2156
2157 // Promote any localized globals to SSA registers.
2158 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: PromotePass()));
2159
2160 // Linking modules together can lead to duplicate global constant, only
2161 // keep one copy of each constant.
2162 MPM.addPass(Pass: ConstantMergePass());
2163
2164 // Remove unused arguments from functions.
2165 MPM.addPass(Pass: DeadArgumentEliminationPass());
2166
2167 // Reduce the code after globalopt and ipsccp. Both can open up significant
2168 // simplification opportunities, and both can propagate functions through
2169 // function pointers. When this happens, we often have to resolve varargs
2170 // calls, etc, so let instcombine do this.
2171 FunctionPassManager PeepholeFPM;
2172 PeepholeFPM.addPass(Pass: InstCombinePass());
2173 if (Level.getSpeedupLevel() > 1)
2174 PeepholeFPM.addPass(Pass: AggressiveInstCombinePass());
2175 invokePeepholeEPCallbacks(FPM&: PeepholeFPM, Level);
2176
2177 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(PeepholeFPM),
2178 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2179
2180 // Lower variadic functions for supported targets prior to inlining.
2181 MPM.addPass(Pass: ExpandVariadicsPass(ExpandVariadicsMode::Optimize));
2182
2183 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2184 // generally clean up exception handling overhead. It isn't clear this is
2185 // valuable as the inliner doesn't currently care whether it is inlining an
2186 // invoke or a call.
2187 // Run the inliner now.
2188 if (EnableModuleInliner) {
2189 MPM.addPass(Pass: ModuleInlinerPass(::getInlineParamsFromOptLevel(Level),
2190 UseInlineAdvisor,
2191 ThinOrFullLTOPhase::FullLTOPostLink));
2192 } else {
2193 MPM.addPass(Pass: ModuleInlinerWrapperPass(
2194 ::getInlineParamsFromOptLevel(Level),
2195 /* MandatoryFirst */ true,
2196 InlineContext{.LTOPhase: ThinOrFullLTOPhase::FullLTOPostLink,
2197 .Pass: InlinePass::CGSCCInliner}));
2198 }
2199
2200 // Perform context disambiguation after inlining, since that would reduce the
2201 // amount of additional cloning required to distinguish the allocation
2202 // contexts.
2203 if (EnableMemProfContextDisambiguation)
2204 MPM.addPass(Pass: MemProfContextDisambiguation(
2205 /*Summary=*/nullptr,
2206 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2207
2208 // Optimize globals again after we ran the inliner.
2209 MPM.addPass(Pass: GlobalOptPass());
2210
2211 // Run the OpenMPOpt pass again after global optimizations.
2212 MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
2213
2214 // Garbage collect dead functions.
2215 MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true));
2216
2217 // If we didn't decide to inline a function, check to see if we can
2218 // transform it to pass arguments by value instead of by reference.
2219 CGSCCPassManager CGPM;
2220 CGPM.addPass(Pass: ArgumentPromotionPass());
2221 CGPM.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0));
2222 CGPM.addPass(Pass: CoroAnnotationElidePass());
2223 invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level);
2224 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2225
2226 FunctionPassManager FPM;
2227 // The IPO Passes may leave cruft around. Clean up after them.
2228 FPM.addPass(Pass: InstCombinePass());
2229 invokePeepholeEPCallbacks(FPM, Level);
2230
2231 if (EnableConstraintElimination)
2232 FPM.addPass(Pass: ConstraintEliminationPass());
2233
2234 FPM.addPass(Pass: JumpThreadingPass());
2235
2236 // Do a post inline PGO instrumentation and use pass. This is a context
2237 // sensitive PGO pass.
2238 if (PGOOpt) {
2239 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2240 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2241 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
2242 ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
2243 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2244 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2245 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
2246 ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
2247 }
2248
2249 // Break up allocas
2250 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
2251
2252 // LTO provides additional opportunities for tailcall elimination due to
2253 // link-time inlining, and visibility of nocapture attribute.
2254 FPM.addPass(
2255 Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2256
2257 // Run a few AA driver optimizations here and now to cleanup the code.
2258 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM),
2259 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2260
2261 MPM.addPass(
2262 Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: PostOrderFunctionAttrsPass()));
2263
2264 // Require the GlobalsAA analysis for the module so we can query it within
2265 // MainFPM.
2266 if (EnableGlobalAnalyses) {
2267 MPM.addPass(Pass: RequireAnalysisPass<GlobalsAA, Module>());
2268 // Invalidate AAManager so it can be recreated and pick up the newly
2269 // available GlobalsAA.
2270 MPM.addPass(
2271 Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>()));
2272 }
2273
2274 FunctionPassManager MainFPM;
2275 MainFPM.addPass(Pass: createFunctionToLoopPassAdaptor(
2276 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2277 /*AllowSpeculation=*/true),
2278 /*USeMemorySSA=*/UseMemorySSA: true));
2279
2280 if (RunNewGVN)
2281 MainFPM.addPass(Pass: NewGVNPass());
2282 else
2283 MainFPM.addPass(Pass: GVNPass());
2284
2285 // Remove dead memcpy()'s.
2286 MainFPM.addPass(Pass: MemCpyOptPass());
2287
2288 // Nuke dead stores.
2289 MainFPM.addPass(Pass: DSEPass());
2290 MainFPM.addPass(Pass: MoveAutoInitPass());
2291 MainFPM.addPass(Pass: MergedLoadStoreMotionPass());
2292
2293 invokeVectorizerStartEPCallbacks(FPM&: MainFPM, Level);
2294
2295 LoopPassManager LPM;
2296 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2297 LPM.addPass(Pass: LoopFlattenPass());
2298 LPM.addPass(Pass: IndVarSimplifyPass());
2299 LPM.addPass(Pass: LoopDeletionPass());
2300 // FIXME: Add loop interchange.
2301
2302 // Unroll small loops and perform peeling.
2303 LPM.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(),
2304 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2305 PTO.ForgetAllSCEVInLoopUnroll));
2306 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2307 // *All* loop passes must preserve it, in order to be able to use it.
2308 MainFPM.addPass(
2309 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /*UseMemorySSA=*/false));
2310
2311 MainFPM.addPass(Pass: LoopDistributePass());
2312
2313 addVectorPasses(Level, FPM&: MainFPM, LTOPhase: ThinOrFullLTOPhase::FullLTOPostLink);
2314
2315 invokeVectorizerEndEPCallbacks(FPM&: MainFPM, Level);
2316
2317 // Run the OpenMPOpt CGSCC pass again late.
2318 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(
2319 Pass: OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink)));
2320
2321 invokePeepholeEPCallbacks(FPM&: MainFPM, Level);
2322 MainFPM.addPass(Pass: JumpThreadingPass());
2323 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(MainFPM),
2324 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2325
2326 // Lower type metadata and the type.test intrinsic. This pass supports
2327 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2328 // to be run at link time if CFI is enabled. This pass does nothing if
2329 // CFI is disabled.
2330 MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr));
2331 // Run a second time to clean up any type tests left behind by WPD for use
2332 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2333 MPM.addPass(Pass: DropTypeTestsPass());
2334
2335 // Enable splitting late in the FullLTO post-link pipeline.
2336 if (EnableHotColdSplit)
2337 MPM.addPass(Pass: HotColdSplittingPass());
2338
2339 // Add late LTO optimization passes.
2340 FunctionPassManager LateFPM;
2341
2342 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2343 // canonicalization pass that enables other optimizations. As a result,
2344 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2345 // result too early.
2346 LateFPM.addPass(Pass: LoopSinkPass());
2347
2348 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2349 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2350 // flattening of blocks.
2351 LateFPM.addPass(Pass: DivRemPairsPass());
2352
2353 // Delete basic blocks, which optimization passes may have killed.
2354 LateFPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions()
2355 .convertSwitchRangeToICmp(B: true)
2356 .convertSwitchToArithmetic(B: true)
2357 .hoistCommonInsts(B: true)
2358 .speculateUnpredictables(B: true)));
2359 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(LateFPM)));
2360
2361 // Drop bodies of available eternally objects to improve GlobalDCE.
2362 MPM.addPass(Pass: EliminateAvailableExternallyPass());
2363
2364 // Now that we have optimized the program, discard unreachable functions.
2365 MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true));
2366
2367 if (PTO.MergeFunctions)
2368 MPM.addPass(Pass: MergeFunctionsPass());
2369
2370 MPM.addPass(Pass: RelLookupTableConverterPass());
2371
2372 if (PTO.CallGraphProfile)
2373 MPM.addPass(Pass: CGProfilePass(/*InLTOPostLink=*/true));
2374
2375 MPM.addPass(Pass: CoroCleanupPass());
2376
2377 // AllocToken transforms heap allocation calls; this needs to run late after
2378 // other allocation call transformations (such as those in InstCombine).
2379 MPM.addPass(Pass: AllocTokenPass());
2380
2381 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
2382
2383 // Emit annotation remarks.
2384 addAnnotationRemarksPass(MPM);
2385
2386 instructionCountersPass(MPM, /* IsPreOptimization */ false);
2387
2388 return MPM;
2389}
2390
2391ModulePassManager
2392PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
2393 ThinOrFullLTOPhase Phase) {
2394 assert(Level == OptimizationLevel::O0 &&
2395 "buildO0DefaultPipeline should only be used with O0");
2396
2397 ModulePassManager MPM;
2398
2399 instructionCountersPass(MPM, /* IsPreOptimization */ true);
2400
2401 // Perform pseudo probe instrumentation in O0 mode. This is for the
2402 // consistency between different build modes. For example, a LTO build can be
2403 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2404 // the postlink will require pseudo probe instrumentation in the prelink.
2405 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2406 MPM.addPass(Pass: SampleProfileProbePass(TM));
2407
2408 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2409 PGOOpt->Action == PGOOptions::IRUse))
2410 addPGOInstrPassesForO0(
2411 MPM,
2412 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2413 /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, ProfileFile: PGOOpt->ProfileFile,
2414 ProfileRemappingFile: PGOOpt->ProfileRemappingFile);
2415
2416 // Instrument function entry and exit before all inlining.
2417 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2418 Pass: EntryExitInstrumenterPass(/*PostInlining=*/false)));
2419
2420 invokePipelineStartEPCallbacks(MPM, Level);
2421
2422 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2423 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass()));
2424
2425 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2426 // Explicitly disable sample loader inlining and use flattened profile in O0
2427 // pipeline.
2428 MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile,
2429 PGOOpt->ProfileRemappingFile,
2430 ThinOrFullLTOPhase::None, FS,
2431 /*DisableSampleProfileInlining=*/true,
2432 /*UseFlattenedProfile=*/true));
2433 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2434 // RequireAnalysisPass for PSI before subsequent non-module passes.
2435 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
2436 }
2437
2438 invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase);
2439
2440 // Build a minimal pipeline based on the semantics required by LLVM,
2441 // which is just that always inlining occurs. Further, disable generating
2442 // lifetime intrinsics to avoid enabling further optimizations during
2443 // code generation.
2444 MPM.addPass(Pass: AlwaysInlinerPass(
2445 /*InsertLifetimeIntrinsics=*/false));
2446
2447 if (PTO.MergeFunctions)
2448 MPM.addPass(Pass: MergeFunctionsPass());
2449
2450 if (EnableMatrix)
2451 MPM.addPass(
2452 Pass: createModuleToFunctionPassAdaptor(Pass: LowerMatrixIntrinsicsPass(true)));
2453
2454 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2455 CGSCCPassManager CGPM;
2456 invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level);
2457 if (!CGPM.isEmpty())
2458 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2459 }
2460 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2461 LoopPassManager LPM;
2462 invokeLateLoopOptimizationsEPCallbacks(LPM, Level);
2463 if (!LPM.isEmpty()) {
2464 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2465 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM))));
2466 }
2467 }
2468 if (!LoopOptimizerEndEPCallbacks.empty()) {
2469 LoopPassManager LPM;
2470 invokeLoopOptimizerEndEPCallbacks(LPM, Level);
2471 if (!LPM.isEmpty()) {
2472 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2473 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM))));
2474 }
2475 }
2476 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2477 FunctionPassManager FPM;
2478 invokeScalarOptimizerLateEPCallbacks(FPM, Level);
2479 if (!FPM.isEmpty())
2480 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2481 }
2482
2483 invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase);
2484
2485 if (!VectorizerStartEPCallbacks.empty()) {
2486 FunctionPassManager FPM;
2487 invokeVectorizerStartEPCallbacks(FPM, Level);
2488 if (!FPM.isEmpty())
2489 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2490 }
2491
2492 if (!VectorizerEndEPCallbacks.empty()) {
2493 FunctionPassManager FPM;
2494 invokeVectorizerEndEPCallbacks(FPM, Level);
2495 if (!FPM.isEmpty())
2496 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2497 }
2498
2499 MPM.addPass(Pass: buildCoroWrapper(Phase));
2500
2501 // AllocToken transforms heap allocation calls; this needs to run late after
2502 // other allocation call transformations (such as those in InstCombine).
2503 if (!isLTOPreLink(Phase))
2504 MPM.addPass(Pass: AllocTokenPass());
2505
2506 invokeOptimizerLastEPCallbacks(MPM, Level, Phase);
2507
2508 if (EnableInstrumentor)
2509 MPM.addPass(Pass: InstrumentorPass(FS));
2510
2511 // Attach !implicit.ref metadata from all functions to copyright strings.
2512 MPM.addPass(Pass: LowerCommentStringPass());
2513
2514 if (isLTOPreLink(Phase))
2515 addRequiredLTOPreLinkPasses(MPM);
2516
2517 // Emit annotation remarks.
2518 addAnnotationRemarksPass(MPM);
2519
2520 instructionCountersPass(MPM, /* IsPreOptimization */ false);
2521
2522 return MPM;
2523}
2524
2525AAManager PassBuilder::buildDefaultAAPipeline() {
2526 AAManager AA;
2527
2528 // The order in which these are registered determines their priority when
2529 // being queried.
2530
2531 // Add any target-specific alias analyses that should be run early.
2532 if (TM)
2533 TM->registerEarlyDefaultAliasAnalyses(AA);
2534
2535 // First we register the basic alias analysis that provides the majority of
2536 // per-function local AA logic. This is a stateless, on-demand local set of
2537 // AA techniques.
2538 AA.registerFunctionAnalysis<BasicAA>();
2539
2540 // Next we query fast, specialized alias analyses that wrap IR-embedded
2541 // information about aliasing.
2542 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2543 AA.registerFunctionAnalysis<TypeBasedAA>();
2544
2545 // Add support for querying global aliasing information when available.
2546 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2547 // analysis, all that the `AAManager` can do is query for any *cached*
2548 // results from `GlobalsAA` through a readonly proxy.
2549 if (EnableGlobalAnalyses)
2550 AA.registerModuleAnalysis<GlobalsAA>();
2551
2552 // Add target-specific alias analyses.
2553 if (TM)
2554 TM->registerDefaultAliasAnalyses(AA);
2555
2556 return AA;
2557}
2558
2559bool PassBuilder::isInstrumentedPGOUse() const {
2560 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2561 !UseCtxProfile.empty();
2562}
2563