PassBuilderPipelines.cpp source code [llvm_projects/llvm/lib/Passes/PassBuilderPipelines.cpp]

1	//===- Construction of pass pipelines -------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	///
10	/// This file provides the implementation of the PassBuilder based on our
11	/// static pass registry as well as related functionality. It also provides
12	/// helpers to aid in analyzing, debugging, and testing passes and pass
13	/// pipelines.
14	///
15	//===----------------------------------------------------------------------===//
16
17	#include "llvm/ADT/Statistic.h"
18	#include "llvm/Analysis/AliasAnalysis.h"
19	#include "llvm/Analysis/BasicAliasAnalysis.h"
20	#include "llvm/Analysis/CGSCCPassManager.h"
21	#include "llvm/Analysis/CtxProfAnalysis.h"
22	#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
23	#include "llvm/Analysis/GlobalsModRef.h"
24	#include "llvm/Analysis/InlineAdvisor.h"
25	#include "llvm/Analysis/InstCount.h"
26	#include "llvm/Analysis/ProfileSummaryInfo.h"
27	#include "llvm/Analysis/ScopedNoAliasAA.h"
28	#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
29	#include "llvm/IR/PassManager.h"
30	#include "llvm/Pass.h"
31	#include "llvm/Passes/OptimizationLevel.h"
32	#include "llvm/Passes/PassBuilder.h"
33	#include "llvm/Support/CommandLine.h"
34	#include "llvm/Support/ErrorHandling.h"
35	#include "llvm/Support/PGOOptions.h"
36	#include "llvm/Support/VirtualFileSystem.h"
37	#include "llvm/Target/TargetMachine.h"
38	#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
39	#include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"
40	#include "llvm/Transforms/Coroutines/CoroCleanup.h"
41	#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
42	#include "llvm/Transforms/Coroutines/CoroEarly.h"
43	#include "llvm/Transforms/Coroutines/CoroElide.h"
44	#include "llvm/Transforms/Coroutines/CoroSplit.h"
45	#include "llvm/Transforms/HipStdPar/HipStdPar.h"
46	#include "llvm/Transforms/IPO/AlwaysInliner.h"
47	#include "llvm/Transforms/IPO/Annotation2Metadata.h"
48	#include "llvm/Transforms/IPO/ArgumentPromotion.h"
49	#include "llvm/Transforms/IPO/Attributor.h"
50	#include "llvm/Transforms/IPO/CalledValuePropagation.h"
51	#include "llvm/Transforms/IPO/ConstantMerge.h"
52	#include "llvm/Transforms/IPO/CrossDSOCFI.h"
53	#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
54	#include "llvm/Transforms/IPO/ElimAvailExtern.h"
55	#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
56	#include "llvm/Transforms/IPO/ExpandVariadics.h"
57	#include "llvm/Transforms/IPO/FatLTOCleanup.h"
58	#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
59	#include "llvm/Transforms/IPO/FunctionAttrs.h"
60	#include "llvm/Transforms/IPO/GlobalDCE.h"
61	#include "llvm/Transforms/IPO/GlobalOpt.h"
62	#include "llvm/Transforms/IPO/GlobalSplit.h"
63	#include "llvm/Transforms/IPO/HotColdSplitting.h"
64	#include "llvm/Transforms/IPO/IROutliner.h"
65	#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
66	#include "llvm/Transforms/IPO/Inliner.h"
67	#include "llvm/Transforms/IPO/LowerTypeTests.h"
68	#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
69	#include "llvm/Transforms/IPO/MergeFunctions.h"
70	#include "llvm/Transforms/IPO/ModuleInliner.h"
71	#include "llvm/Transforms/IPO/OpenMPOpt.h"
72	#include "llvm/Transforms/IPO/PartialInlining.h"
73	#include "llvm/Transforms/IPO/SCCP.h"
74	#include "llvm/Transforms/IPO/SampleProfile.h"
75	#include "llvm/Transforms/IPO/SampleProfileProbe.h"
76	#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
77	#include "llvm/Transforms/InstCombine/InstCombine.h"
78	#include "llvm/Transforms/Instrumentation/AllocToken.h"
79	#include "llvm/Transforms/Instrumentation/CGProfile.h"
80	#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
81	#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
82	#include "llvm/Transforms/Instrumentation/MemProfInstrumentation.h"
83	#include "llvm/Transforms/Instrumentation/MemProfUse.h"
84	#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
85	#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
86	#include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
87	#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
88	#include "llvm/Transforms/Scalar/ADCE.h"
89	#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
90	#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
91	#include "llvm/Transforms/Scalar/BDCE.h"
92	#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
93	#include "llvm/Transforms/Scalar/ConstraintElimination.h"
94	#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
95	#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
96	#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
97	#include "llvm/Transforms/Scalar/DivRemPairs.h"
98	#include "llvm/Transforms/Scalar/DropUnnecessaryAssumes.h"
99	#include "llvm/Transforms/Scalar/EarlyCSE.h"
100	#include "llvm/Transforms/Scalar/Float2Int.h"
101	#include "llvm/Transforms/Scalar/GVN.h"
102	#include "llvm/Transforms/Scalar/IndVarSimplify.h"
103	#include "llvm/Transforms/Scalar/InferAlignment.h"
104	#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
105	#include "llvm/Transforms/Scalar/JumpTableToSwitch.h"
106	#include "llvm/Transforms/Scalar/JumpThreading.h"
107	#include "llvm/Transforms/Scalar/LICM.h"
108	#include "llvm/Transforms/Scalar/LoopDeletion.h"
109	#include "llvm/Transforms/Scalar/LoopDistribute.h"
110	#include "llvm/Transforms/Scalar/LoopFlatten.h"
111	#include "llvm/Transforms/Scalar/LoopFuse.h"
112	#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
113	#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
114	#include "llvm/Transforms/Scalar/LoopInterchange.h"
115	#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
116	#include "llvm/Transforms/Scalar/LoopPassManager.h"
117	#include "llvm/Transforms/Scalar/LoopRotation.h"
118	#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
119	#include "llvm/Transforms/Scalar/LoopSink.h"
120	#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
121	#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
122	#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
123	#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
124	#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
125	#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
126	#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
127	#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
128	#include "llvm/Transforms/Scalar/NewGVN.h"
129	#include "llvm/Transforms/Scalar/Reassociate.h"
130	#include "llvm/Transforms/Scalar/SCCP.h"
131	#include "llvm/Transforms/Scalar/SROA.h"
132	#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
133	#include "llvm/Transforms/Scalar/SimplifyCFG.h"
134	#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
135	#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
136	#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
137	#include "llvm/Transforms/Utils/AddDiscriminators.h"
138	#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
139	#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
140	#include "llvm/Transforms/Utils/CountVisits.h"
141	#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
142	#include "llvm/Transforms/Utils/ExtraPassManager.h"
143	#include "llvm/Transforms/Utils/InjectTLIMappings.h"
144	#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
145	#include "llvm/Transforms/Utils/Mem2Reg.h"
146	#include "llvm/Transforms/Utils/MoveAutoInit.h"
147	#include "llvm/Transforms/Utils/NameAnonGlobals.h"
148	#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
149	#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
150	#include "llvm/Transforms/Vectorize/LoopVectorize.h"
151	#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
152	#include "llvm/Transforms/Vectorize/VectorCombine.h"
153
154	using namespace llvm;
155
156	namespace llvm {
157
158	static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
159	"enable-ml-inliner", cl::init(Val: InliningAdvisorMode::Default), cl::Hidden,
160	cl::desc ("Enable ML policy for inliner. Currently trained for -Oz only"),
161	cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
162	"Heuristics-based inliner version"),
163	clEnumValN(InliningAdvisorMode::Development, "development",
164	"Use development mode (runtime-loadable model)"),
165	clEnumValN(InliningAdvisorMode::Release, "release",
166	"Use release mode (AOT-compiled model)")));
167
168	/// Flag to enable inline deferral during PGO.
169	static cl::opt<bool>
170	EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(Val: true),
171	cl::Hidden,
172	cl::desc ("Enable inline deferral during PGO"));
173
174	static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
175	cl::init(Val: false), cl::Hidden,
176	cl::desc ("Enable module inliner"));
177
178	static cl::opt<bool> PerformMandatoryInliningsFirst(
179	"mandatory-inlining-first", cl::init(Val: false), cl::Hidden,
180	cl::desc ("Perform mandatory inlinings module-wide, before performing "
181	"inlining"));
182
183	static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
184	"eagerly-invalidate-analyses", cl::init(Val: true), cl::Hidden,
185	cl::desc ("Eagerly invalidate more analyses in default pipelines"));
186
187	static cl::opt<bool> EnableMergeFunctions(
188	"enable-merge-functions", cl::init(Val: false), cl::Hidden,
189	cl::desc ("Enable function merging as part of the optimization pipeline"));
190
191	static cl::opt<bool> EnablePostPGOLoopRotation(
192	"enable-post-pgo-loop-rotation", cl::init(Val: true), cl::Hidden,
193	cl::desc ("Run the loop rotation transformation after PGO instrumentation"));
194
195	static cl::opt<bool> EnableGlobalAnalyses(
196	"enable-global-analyses", cl::init(Val: true), cl::Hidden,
197	cl::desc ("Enable inter-procedural analyses"));
198
199	static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
200	cl::init(Val: false), cl::Hidden,
201	cl::desc ("Run Partial inlining pass"));
202
203	static cl::opt<bool> ExtraVectorizerPasses(
204	"extra-vectorizer-passes", cl::init(Val: false), cl::Hidden,
205	cl::desc ("Run cleanup optimization passes after vectorization"));
206
207	static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(Val: false), cl::Hidden,
208	cl::desc ("Run the NewGVN pass"));
209
210	static cl::opt<bool>
211	EnableLoopInterchange("enable-loopinterchange", cl::init(Val: false), cl::Hidden,
212	cl::desc ("Enable the LoopInterchange Pass"));
213
214	static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
215	cl::init(Val: false), cl::Hidden,
216	cl::desc ("Enable Unroll And Jam Pass"));
217
218	static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(Val: false),
219	cl::Hidden,
220	cl::desc ("Enable the LoopFlatten Pass"));
221
222	// Experimentally allow loop header duplication. This should allow for better
223	// optimization at Oz, since loop-idiom recognition can then recognize things
224	// like memcpy. If this ends up being useful for many targets, we should drop
225	// this flag and make a code generation option that can be controlled
226	// independent of the opt level and exposed through the frontend.
227	static cl::opt<bool> EnableLoopHeaderDuplication(
228	"enable-loop-header-duplication", cl::init(Val: false), cl::Hidden,
229	cl::desc ("Enable loop header duplication at any optimization level"));
230
231	static cl::opt<bool>
232	EnableDFAJumpThreading("enable-dfa-jump-thread",
233	cl::desc ("Enable DFA jump threading"),
234	cl::init(Val: false), cl::Hidden);
235
236	static cl::opt<bool>
237	EnableHotColdSplit("hot-cold-split",
238	cl::desc ("Enable hot-cold splitting pass"));
239
240	static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(Val: false),
241	cl::Hidden,
242	cl::desc ("Enable ir outliner pass"));
243
244	static cl::opt<bool>
245	DisablePreInliner("disable-preinline", cl::init(Val: false), cl::Hidden,
246	cl::desc ("Disable pre-instrumentation inliner"));
247
248	static cl::opt<int> PreInlineThreshold(
249	"preinline-threshold", cl::Hidden, cl::init(Val: `75`),
250	cl::desc ("Control the amount of inlining in pre-instrumentation inliner "
251	"(default = 75)"));
252
253	static cl::opt<bool>
254	EnableGVNHoist("enable-gvn-hoist",
255	cl::desc ("Enable the GVN hoisting pass (default = off)"));
256
257	static cl::opt<bool>
258	EnableGVNSink("enable-gvn-sink",
259	cl::desc ("Enable the GVN sinking pass (default = off)"));
260
261	static cl::opt<bool> EnableJumpTableToSwitch(
262	"enable-jump-table-to-switch",
263	cl::desc ("Enable JumpTableToSwitch pass (default = off)"));
264
265	// This option is used in simplifying testing SampleFDO optimizations for
266	// profile loading.
267	static cl::opt<bool>
268	EnableCHR("enable-chr", cl::init(Val: true), cl::Hidden,
269	cl::desc ("Enable control height reduction optimization (CHR)"));
270
271	static cl::opt<bool> FlattenedProfileUsed(
272	"flattened-profile-used", cl::init(Val: false), cl::Hidden,
273	cl::desc ("Indicate the sample profile being used is flattened, i.e., "
274	"no inline hierarchy exists in the profile"));
275
276	static cl::opt<bool>
277	EnableMatrix("enable-matrix", cl::init(Val: false), cl::Hidden,
278	cl::desc ("Enable lowering of the matrix intrinsics"));
279
280	static cl::opt<bool> EnableConstraintElimination(
281	"enable-constraint-elimination", cl::init(Val: true), cl::Hidden,
282	cl::desc (
283	"Enable pass to eliminate conditions based on linear constraints"));
284
285	static cl::opt<AttributorRunOption> AttributorRun(
286	"attributor-enable", cl::Hidden, cl::init(Val: AttributorRunOption::NONE),
287	cl::desc ("Enable the attributor inter-procedural deduction pass"),
288	cl::values(clEnumValN(AttributorRunOption::FULL, "full",
289	"enable all full attributor runs"),
290	clEnumValN(AttributorRunOption::LIGHT, "light",
291	"enable all attributor-light runs"),
292	clEnumValN(AttributorRunOption::MODULE, "module",
293	"enable module-wide attributor runs"),
294	clEnumValN(AttributorRunOption::MODULE_LIGHT, "module-light",
295	"enable module-wide attributor-light runs"),
296	clEnumValN(AttributorRunOption::CGSCC, "cgscc",
297	"enable call graph SCC attributor runs"),
298	clEnumValN(AttributorRunOption::CGSCC_LIGHT, "cgscc-light",
299	"enable call graph SCC attributor-light runs"),
300	clEnumValN(AttributorRunOption::NONE, "none",
301	"disable attributor runs")));
302
303	static cl::opt<bool> EnableSampledInstr(
304	"enable-sampled-instrumentation", cl::init(Val: false), cl::Hidden,
305	cl::desc ("Enable profile instrumentation sampling (default = off)"));
306	static cl::opt<bool> UseLoopVersioningLICM(
307	"enable-loop-versioning-licm", cl::init(Val: false), cl::Hidden,
308	cl::desc ("Enable the experimental Loop Versioning LICM pass"));
309
310	static cl::opt<std::string> InstrumentColdFuncOnlyPath(
311	"instrument-cold-function-only-path", cl::init(Val: ""),
312	cl::desc ("File path for cold function only instrumentation(requires use "
313	"with --pgo-instrument-cold-function-only)"),
314	cl::Hidden);
315
316	// TODO: There is a similar flag in WPD pass, we should consolidate them by
317	// parsing the option only once in PassBuilder and share it across both places.
318	static cl::opt<bool> EnableDevirtualizeSpeculatively(
319	"enable-devirtualize-speculatively",
320	cl::desc ("Enable speculative devirtualization optimization"),
321	cl::init(Val: false));
322
323	extern cl::opt<std::string> UseCtxProfile;
324	extern cl::opt<bool> PGOInstrumentColdFunctionOnly;
325
326	extern cl::opt<bool> EnableMemProfContextDisambiguation;
327	} // namespace llvm
328
329	PipelineTuningOptions::PipelineTuningOptions() {
330	LoopInterleaving = true;
331	LoopVectorization = true;
332	SLPVectorization = false;
333	LoopUnrolling = true;
334	LoopInterchange = EnableLoopInterchange;
335	LoopFusion = false;
336	ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
337	LicmMssaOptCap = SetLicmMssaOptCap;
338	LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
339	CallGraphProfile = true;
340	UnifiedLTO = false;
341	MergeFunctions = EnableMergeFunctions;
342	InlinerThreshold = -`1`;
343	EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
344	DevirtualizeSpeculatively = EnableDevirtualizeSpeculatively;
345	}
346
347	namespace llvm {
348	extern cl::opt<unsigned> MaxDevirtIterations;
349	} // namespace llvm
350
351	void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
352	OptimizationLevel Level) {
353	for (auto &C : PeepholeEPCallbacks)
354	C (FPM, Level);
355	}
356	void PassBuilder::invokeLateLoopOptimizationsEPCallbacks(
357	LoopPassManager &LPM, OptimizationLevel Level) {
358	for (auto &C : LateLoopOptimizationsEPCallbacks)
359	C (LPM, Level);
360	}
361	void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM,
362	OptimizationLevel Level) {
363	for (auto &C : LoopOptimizerEndEPCallbacks)
364	C (LPM, Level);
365	}
366	void PassBuilder::invokeScalarOptimizerLateEPCallbacks(
367	FunctionPassManager &FPM, OptimizationLevel Level) {
368	for (auto &C : ScalarOptimizerLateEPCallbacks)
369	C (FPM, Level);
370	}
371	void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM,
372	OptimizationLevel Level) {
373	for (auto &C : CGSCCOptimizerLateEPCallbacks)
374	C (CGPM, Level);
375	}
376	void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
377	OptimizationLevel Level) {
378	for (auto &C : VectorizerStartEPCallbacks)
379	C (FPM, Level);
380	}
381	void PassBuilder::invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM,
382	OptimizationLevel Level) {
383	for (auto &C : VectorizerEndEPCallbacks)
384	C (FPM, Level);
385	}
386	void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
387	OptimizationLevel Level,
388	ThinOrFullLTOPhase Phase) {
389	for (auto &C : OptimizerEarlyEPCallbacks)
390	C (MPM, Level, Phase);
391	}
392	void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
393	OptimizationLevel Level,
394	ThinOrFullLTOPhase Phase) {
395	for (auto &C : OptimizerLastEPCallbacks)
396	C (MPM, Level, Phase);
397	}
398	void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks(
399	ModulePassManager &MPM, OptimizationLevel Level) {
400	for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
401	C (MPM, Level);
402	}
403	void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks(
404	ModulePassManager &MPM, OptimizationLevel Level) {
405	for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
406	C (MPM, Level);
407	}
408	void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM,
409	OptimizationLevel Level) {
410	for (auto &C : PipelineStartEPCallbacks)
411	C (MPM, Level);
412	}
413	void PassBuilder::invokePipelineEarlySimplificationEPCallbacks(
414	ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase) {
415	for (auto &C : PipelineEarlySimplificationEPCallbacks)
416	C (MPM, Level, Phase);
417	}
418
419	// Helper to add AnnotationRemarksPass.
420	static void addAnnotationRemarksPass(ModulePassManager &MPM) {
421	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AnnotationRemarksPass ()));
422	// Count the stats for InstCount and FunctionPropertiesAnalysis
423	if (AreStatisticsEnabled()) {
424	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: InstCountPass ()));
425	MPM.addPass(
426	Pass: createModuleToFunctionPassAdaptor(Pass: FunctionPropertiesStatisticsPass ()));
427	}
428	}
429
430	// Helper to check if the current compilation phase is preparing for LTO
431	static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
432	return Phase == ThinOrFullLTOPhase::ThinLTOPreLink \|\|
433	Phase == ThinOrFullLTOPhase::FullLTOPreLink;
434	}
435
436	// Helper to check if the current compilation phase is LTO backend
437	static bool isLTOPostLink(ThinOrFullLTOPhase Phase) {
438	return Phase == ThinOrFullLTOPhase::ThinLTOPostLink \|\|
439	Phase == ThinOrFullLTOPhase::FullLTOPostLink;
440	}
441
442	// Helper to wrap conditionally Coro passes.
443	static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase) {
444	// TODO: Skip passes according to Phase.
445	ModulePassManager CoroPM;
446	CoroPM.addPass(Pass: CoroEarlyPass ());
447	CGSCCPassManager CGPM;
448	CGPM.addPass(Pass: CoroSplitPass ());
449	CoroPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
450	CoroPM.addPass(Pass: CoroCleanupPass ());
451	CoroPM.addPass(Pass: GlobalDCEPass ());
452	return CoroConditionalWrapper (std::move(CoroPM));
453	}
454
455	// TODO: Investigate the cost/benefit of tail call elimination on debugging.
456	FunctionPassManager
457	PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
458	ThinOrFullLTOPhase Phase) {
459
460	FunctionPassManager FPM;
461
462	if (AreStatisticsEnabled())
463	FPM.addPass(Pass: CountVisitsPass ());
464
465	// Form SSA out of local memory accesses after breaking apart aggregates into
466	// scalars.
467	FPM.addPass(Pass: SROAPass (SROAOptions::ModifyCFG));
468
469	// Catch trivial redundancies
470	FPM.addPass(Pass: EarlyCSEPass (true / Enable mem-ssa. /));
471
472	// Hoisting of scalars and load expressions.
473	FPM.addPass(
474	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
475	FPM.addPass(Pass: InstCombinePass ());
476
477	FPM.addPass(Pass: LibCallsShrinkWrapPass ());
478
479	invokePeepholeEPCallbacks(FPM, Level);
480
481	FPM.addPass(
482	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
483
484	// Form canonically associated expression trees, and simplify the trees using
485	// basic mathematical properties. For example, this will form (nearly)
486	// minimal multiplication trees.
487	FPM.addPass(Pass: ReassociatePass ());
488
489	// Add the primary loop simplification pipeline.
490	// FIXME: Currently this is split into two loop pass pipelines because we run
491	// some function passes in between them. These can and should be removed
492	// and/or replaced by scheduling the loop pass equivalents in the correct
493	// positions. But those equivalent passes aren't powerful enough yet.
494	// Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
495	// used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
496	// fully replace `SimplifyCFGPass`, and the closest to the other we have is
497	// `LoopInstSimplify`.
498	LoopPassManager LPM1, LPM2;
499
500	// Simplify the loop body. We do this initially to clean up after other loop
501	// passes run, either when iterating on a loop or on inner loops with
502	// implications on the outer loop.
503	LPM1.addPass(Pass: LoopInstSimplifyPass ());
504	LPM1.addPass(Pass: LoopSimplifyCFGPass ());
505
506	// Try to remove as much code from the loop header as possible,
507	// to reduce amount of IR that will have to be duplicated. However,
508	// do not perform speculative hoisting the first time as LICM
509	// will destroy metadata that may not need to be destroyed if run
510	// after loop rotation.
511	// TODO: Investigate promotion cap for O1.
512	LPM1.addPass(Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
513	/AllowSpeculation=/false));
514
515	LPM1.addPass(
516	Pass: LoopRotatePass (/EnableHeaderDuplication=/true, isLTOPreLink(Phase)));
517	// TODO: Investigate promotion cap for O1.
518	LPM1.addPass(Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
519	/AllowSpeculation=/true));
520	LPM1.addPass(Pass: SimpleLoopUnswitchPass ());
521	if (EnableLoopFlatten)
522	LPM1.addPass(Pass: LoopFlattenPass ());
523
524	LPM2.addPass(Pass: LoopIdiomRecognizePass ());
525	LPM2.addPass(Pass: IndVarSimplifyPass ());
526
527	invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level);
528
529	LPM2.addPass(Pass: LoopDeletionPass ());
530
531	// Do not enable unrolling in PreLinkThinLTO phase during sample PGO
532	// because it changes IR to makes profile annotation in back compile
533	// inaccurate. The normal unroller doesn't pay attention to forced full unroll
534	// attributes so we need to make sure and allow the full unroll pass to pay
535	// attention to it.
536	if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink \|\| !PGOOpt \|\|
537	PGOOpt ->Action != PGOOptions::SampleUse)
538	LPM2.addPass(Pass: LoopFullUnrollPass (Level.getSpeedupLevel(),
539	/ OnlyWhenForced= / !PTO.LoopUnrolling,
540	PTO.ForgetAllSCEVInLoopUnroll));
541
542	invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level);
543
544	FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1),
545	/UseMemorySSA=/true));
546	FPM.addPass(
547	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
548	FPM.addPass(Pass: InstCombinePass ());
549	// The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
550	// All* loop passes must preserve it, in order to be able to use it.*
551	FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2),
552	/UseMemorySSA=/false));
553
554	// Delete small array after loop unroll.
555	FPM.addPass(Pass: SROAPass (SROAOptions::ModifyCFG));
556
557	// Specially optimize memory movement as it doesn't look like dataflow in SSA.
558	FPM.addPass(Pass: MemCpyOptPass ());
559
560	// Sparse conditional constant propagation.
561	// FIXME: It isn't clear why we do this after* loop passes rather than*
562	// before...
563	FPM.addPass(Pass: SCCPPass ());
564
565	// Delete dead bit computations (instcombine runs after to fold away the dead
566	// computations, and then ADCE will run later to exploit any new DCE
567	// opportunities that creates).
568	FPM.addPass(Pass: BDCEPass ());
569
570	// Run instcombine after redundancy and dead bit elimination to exploit
571	// opportunities opened up by them.
572	FPM.addPass(Pass: InstCombinePass ());
573	invokePeepholeEPCallbacks(FPM, Level);
574
575	FPM.addPass(Pass: CoroElidePass ());
576
577	invokeScalarOptimizerLateEPCallbacks(FPM, Level);
578
579	// Finally, do an expensive DCE pass to catch all the dead code exposed by
580	// the simplifications and basic cleanup after all the simplifications.
581	// TODO: Investigate if this is too expensive.
582	FPM.addPass(Pass: ADCEPass ());
583	FPM.addPass(
584	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
585	FPM.addPass(Pass: InstCombinePass ());
586	invokePeepholeEPCallbacks(FPM, Level);
587
588	return FPM;
589	}
590
591	FunctionPassManager
592	PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
593	ThinOrFullLTOPhase Phase) {
594	assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
595
596	// The O1 pipeline has a separate pipeline creation function to simplify
597	// construction readability.
598	if (Level.getSpeedupLevel() == `1`)
599	return buildO1FunctionSimplificationPipeline(Level, Phase);
600
601	FunctionPassManager FPM;
602
603	if (AreStatisticsEnabled())
604	FPM.addPass(Pass: CountVisitsPass ());
605
606	// Form SSA out of local memory accesses after breaking apart aggregates into
607	// scalars.
608	FPM.addPass(Pass: SROAPass (SROAOptions::ModifyCFG));
609
610	// Catch trivial redundancies
611	FPM.addPass(Pass: EarlyCSEPass (true / Enable mem-ssa. /));
612	if (EnableKnowledgeRetention)
613	FPM.addPass(Pass: AssumeSimplifyPass ());
614
615	// Hoisting of scalars and load expressions.
616	if (EnableGVNHoist)
617	FPM.addPass(Pass: GVNHoistPass ());
618
619	// Global value numbering based sinking.
620	if (EnableGVNSink) {
621	FPM.addPass(Pass: GVNSinkPass ());
622	FPM.addPass(
623	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
624	}
625
626	// Speculative execution if the target has divergent branches; otherwise nop.
627	FPM.addPass(Pass: SpeculativeExecutionPass (/ OnlyIfDivergentTarget =/true));
628
629	// Optimize based on known information about branches, and cleanup afterward.
630	FPM.addPass(Pass: JumpThreadingPass ());
631	FPM.addPass(Pass: CorrelatedValuePropagationPass ());
632
633	// Jump table to switch conversion.
634	if (EnableJumpTableToSwitch)
635	FPM.addPass(Pass: JumpTableToSwitchPass (
636	/InLTO=/Phase == ThinOrFullLTOPhase::ThinLTOPostLink \|\|
637	Phase == ThinOrFullLTOPhase::FullLTOPostLink));
638
639	FPM.addPass(
640	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
641	FPM.addPass(Pass: InstCombinePass ());
642	FPM.addPass(Pass: AggressiveInstCombinePass ());
643
644	if (!Level.isOptimizingForSize())
645	FPM.addPass(Pass: LibCallsShrinkWrapPass ());
646
647	invokePeepholeEPCallbacks(FPM, Level);
648
649	// For PGO use pipeline, try to optimize memory intrinsics such as memcpy
650	// using the size value profile. Don't perform this when optimizing for size.
651	if (PGOOpt && PGOOpt ->Action == PGOOptions::IRUse &&
652	!Level.isOptimizingForSize())
653	FPM.addPass(Pass: PGOMemOPSizeOpt ());
654
655	FPM.addPass(Pass: TailCallElimPass (/UpdateFunctionEntryCount=/
656	isInstrumentedPGOUse()));
657	FPM.addPass(
658	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
659
660	// Form canonically associated expression trees, and simplify the trees using
661	// basic mathematical properties. For example, this will form (nearly)
662	// minimal multiplication trees.
663	FPM.addPass(Pass: ReassociatePass ());
664
665	if (EnableConstraintElimination)
666	FPM.addPass(Pass: ConstraintEliminationPass ());
667
668	// Add the primary loop simplification pipeline.
669	// FIXME: Currently this is split into two loop pass pipelines because we run
670	// some function passes in between them. These can and should be removed
671	// and/or replaced by scheduling the loop pass equivalents in the correct
672	// positions. But those equivalent passes aren't powerful enough yet.
673	// Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
674	// used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
675	// fully replace `SimplifyCFGPass`, and the closest to the other we have is
676	// `LoopInstSimplify`.
677	LoopPassManager LPM1, LPM2;
678
679	// Simplify the loop body. We do this initially to clean up after other loop
680	// passes run, either when iterating on a loop or on inner loops with
681	// implications on the outer loop.
682	LPM1.addPass(Pass: LoopInstSimplifyPass ());
683	LPM1.addPass(Pass: LoopSimplifyCFGPass ());
684
685	// Try to remove as much code from the loop header as possible,
686	// to reduce amount of IR that will have to be duplicated. However,
687	// do not perform speculative hoisting the first time as LICM
688	// will destroy metadata that may not need to be destroyed if run
689	// after loop rotation.
690	// TODO: Investigate promotion cap for O1.
691	LPM1.addPass(Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
692	/AllowSpeculation=/false));
693
694	// Disable header duplication in loop rotation at -Oz.
695	LPM1.addPass(Pass: LoopRotatePass (EnableLoopHeaderDuplication \|\|
696	Level != OptimizationLevel::Oz,
697	isLTOPreLink(Phase)));
698	// TODO: Investigate promotion cap for O1.
699	LPM1.addPass(Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
700	/AllowSpeculation=/true));
701	LPM1.addPass(
702	Pass: SimpleLoopUnswitchPass (/ NonTrivial / Level == OptimizationLevel::O3));
703	if (EnableLoopFlatten)
704	LPM1.addPass(Pass: LoopFlattenPass ());
705
706	LPM2.addPass(Pass: LoopIdiomRecognizePass ());
707	LPM2.addPass(Pass: IndVarSimplifyPass ());
708
709	{
710	ExtraLoopPassManager<ShouldRunExtraSimpleLoopUnswitch> ExtraPasses;
711	ExtraPasses.addPass(Pass: SimpleLoopUnswitchPass (/ NonTrivial / Level ==
712	OptimizationLevel::O3));
713	LPM2.addPass(Pass: std::move(ExtraPasses));
714	}
715
716	invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level);
717
718	LPM2.addPass(Pass: LoopDeletionPass ());
719
720	// Do not enable unrolling in PreLinkThinLTO phase during sample PGO
721	// because it changes IR to makes profile annotation in back compile
722	// inaccurate. The normal unroller doesn't pay attention to forced full unroll
723	// attributes so we need to make sure and allow the full unroll pass to pay
724	// attention to it.
725	if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink \|\| !PGOOpt \|\|
726	PGOOpt ->Action != PGOOptions::SampleUse)
727	LPM2.addPass(Pass: LoopFullUnrollPass (Level.getSpeedupLevel(),
728	/ OnlyWhenForced= / !PTO.LoopUnrolling,
729	PTO.ForgetAllSCEVInLoopUnroll));
730
731	invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level);
732
733	FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1),
734	/UseMemorySSA=/true));
735	FPM.addPass(
736	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
737	FPM.addPass(Pass: InstCombinePass ());
738	// The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
739	// LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
740	// All* loop passes must preserve it, in order to be able to use it.*
741	FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2),
742	/UseMemorySSA=/false));
743
744	// Delete small array after loop unroll.
745	FPM.addPass(Pass: SROAPass (SROAOptions::ModifyCFG));
746
747	// Try vectorization/scalarization transforms that are both improvements
748	// themselves and can allow further folds with GVN and InstCombine.
749	FPM.addPass(Pass: VectorCombinePass (/TryEarlyFoldsOnly=/true));
750
751	// Eliminate redundancies.
752	FPM.addPass(Pass: MergedLoadStoreMotionPass ());
753	if (RunNewGVN)
754	FPM.addPass(Pass: NewGVNPass ());
755	else
756	FPM.addPass(Pass: GVNPass ());
757
758	// Sparse conditional constant propagation.
759	// FIXME: It isn't clear why we do this after* loop passes rather than*
760	// before...
761	FPM.addPass(Pass: SCCPPass ());
762
763	// Delete dead bit computations (instcombine runs after to fold away the dead
764	// computations, and then ADCE will run later to exploit any new DCE
765	// opportunities that creates).
766	FPM.addPass(Pass: BDCEPass ());
767
768	// Run instcombine after redundancy and dead bit elimination to exploit
769	// opportunities opened up by them.
770	FPM.addPass(Pass: InstCombinePass ());
771	invokePeepholeEPCallbacks(FPM, Level);
772
773	// Re-consider control flow based optimizations after redundancy elimination,
774	// redo DCE, etc.
775	if (EnableDFAJumpThreading)
776	FPM.addPass(Pass: DFAJumpThreadingPass ());
777
778	FPM.addPass(Pass: JumpThreadingPass ());
779	FPM.addPass(Pass: CorrelatedValuePropagationPass ());
780
781	// Finally, do an expensive DCE pass to catch all the dead code exposed by
782	// the simplifications and basic cleanup after all the simplifications.
783	// TODO: Investigate if this is too expensive.
784	FPM.addPass(Pass: ADCEPass ());
785
786	// Specially optimize memory movement as it doesn't look like dataflow in SSA.
787	FPM.addPass(Pass: MemCpyOptPass ());
788
789	FPM.addPass(Pass: DSEPass ());
790	FPM.addPass(Pass: MoveAutoInitPass ());
791
792	FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
793	Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
794	/AllowSpeculation=/true),
795	/UseMemorySSA=/true));
796
797	FPM.addPass(Pass: CoroElidePass ());
798
799	invokeScalarOptimizerLateEPCallbacks(FPM, Level);
800
801	FPM.addPass(Pass: SimplifyCFGPass (SimplifyCFGOptions ()
802	.convertSwitchRangeToICmp(B: true)
803	.convertSwitchToArithmetic(B: true)
804	.hoistCommonInsts(B: true)
805	.sinkCommonInsts(B: true)));
806	FPM.addPass(Pass: InstCombinePass ());
807	invokePeepholeEPCallbacks(FPM, Level);
808
809	return FPM;
810	}
811
812	void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
813	MPM.addPass(Pass: CanonicalizeAliasesPass ());
814	MPM.addPass(Pass: NameAnonGlobalPass ());
815	}
816
817	void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
818	OptimizationLevel Level,
819	ThinOrFullLTOPhase LTOPhase) {
820	assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
821	if (DisablePreInliner)
822	return;
823	InlineParams IP;
824
825	IP.DefaultThreshold = PreInlineThreshold;
826
827	// FIXME: The hint threshold has the same value used by the regular inliner
828	// when not optimzing for size. This should probably be lowered after
829	// performance testing.
830	// FIXME: this comment is cargo culted from the old pass manager, revisit).
831	IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : `325`;
832	ModuleInlinerWrapperPass MIWP(
833	IP, / MandatoryFirst / true,
834	InlineContext{.LTOPhase: LTOPhase, .Pass: InlinePass::EarlyInliner});
835	CGSCCPassManager &CGPipeline = MIWP.getPM();
836
837	FunctionPassManager FPM;
838	FPM.addPass(Pass: SROAPass (SROAOptions::ModifyCFG));
839	FPM.addPass(Pass: EarlyCSEPass ()); // Catch trivial redundancies.
840	FPM.addPass(Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(
841	B: true))); // Merge & remove basic blocks.
842	FPM.addPass(Pass: InstCombinePass ()); // Combine silly sequences.
843	invokePeepholeEPCallbacks(FPM, Level);
844
845	CGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
846	Pass: std::move(FPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
847
848	MPM.addPass(Pass: std::move(MIWP));
849
850	// Delete anything that is now dead to make sure that we don't instrument
851	// dead code. Instrumentation can end up keeping dead code around and
852	// dramatically increase code size.
853	MPM.addPass(Pass: GlobalDCEPass ());
854	}
855
856	void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
857	OptimizationLevel Level) {
858	if (EnablePostPGOLoopRotation) {
859	// Disable header duplication in loop rotation at -Oz.
860	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
861	Pass: createFunctionToLoopPassAdaptor(
862	Pass: LoopRotatePass (EnableLoopHeaderDuplication \|\|
863	Level != OptimizationLevel::Oz),
864	/UseMemorySSA=/false),
865	EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
866	}
867	}
868
869	void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
870	OptimizationLevel Level, bool RunProfileGen,
871	bool IsCS, bool AtomicCounterUpdate,
872	std::string ProfileFile,
873	std::string ProfileRemappingFile) {
874	assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
875
876	if (!RunProfileGen) {
877	assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
878	MPM.addPass(
879	Pass: PGOInstrumentationUse (ProfileFile, ProfileRemappingFile, IsCS, FS));
880	// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
881	// RequireAnalysisPass for PSI before subsequent non-module passes.
882	MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
883	return;
884	}
885
886	// Perform PGO instrumentation.
887	MPM.addPass(Pass: PGOInstrumentationGen (IsCS ? PGOInstrumentationType::CSFDO
888	: PGOInstrumentationType::FDO));
889
890	addPostPGOLoopRotation(MPM, Level);
891	// Add the profile lowering pass.
892	InstrProfOptions Options;
893	if (!ProfileFile.empty())
894	Options.InstrProfileOutput = ProfileFile;
895	// Do counter promotion at Level greater than O0.
896	Options.DoCounterPromotion = true;
897	Options.UseBFIInPromotion = IsCS;
898	if (EnableSampledInstr) {
899	Options.Sampling = true;
900	// With sampling, there is little beneifit to enable counter promotion.
901	// But note that sampling does work with counter promotion.
902	Options.DoCounterPromotion = false;
903	}
904	Options.Atomic = AtomicCounterUpdate;
905	MPM.addPass(Pass: InstrProfilingLoweringPass (Options, IsCS));
906	}
907
908	void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
909	bool RunProfileGen, bool IsCS,
910	bool AtomicCounterUpdate,
911	std::string ProfileFile,
912	std::string ProfileRemappingFile) {
913	if (!RunProfileGen) {
914	assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
915	MPM.addPass(
916	Pass: PGOInstrumentationUse (ProfileFile, ProfileRemappingFile, IsCS, FS));
917	// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
918	// RequireAnalysisPass for PSI before subsequent non-module passes.
919	MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
920	return;
921	}
922
923	// Perform PGO instrumentation.
924	MPM.addPass(Pass: PGOInstrumentationGen (IsCS ? PGOInstrumentationType::CSFDO
925	: PGOInstrumentationType::FDO));
926	// Add the profile lowering pass.
927	InstrProfOptions Options;
928	if (!ProfileFile.empty())
929	Options.InstrProfileOutput = ProfileFile;
930	// Do not do counter promotion at O0.
931	Options.DoCounterPromotion = false;
932	Options.UseBFIInPromotion = IsCS;
933	Options.Atomic = AtomicCounterUpdate;
934	MPM.addPass(Pass: InstrProfilingLoweringPass (Options, IsCS));
935	}
936
937	static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
938	return getInlineParams(OptLevel: Level.getSpeedupLevel(), SizeOptLevel: Level.getSizeLevel());
939	}
940
941	ModuleInlinerWrapperPass
942	PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
943	ThinOrFullLTOPhase Phase) {
944	InlineParams IP;
945	if (PTO.InlinerThreshold == -`1`)
946	IP = getInlineParamsFromOptLevel(Level);
947	else
948	IP = getInlineParams(Threshold: PTO.InlinerThreshold);
949	// For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
950	// set hot-caller threshold to 0 to disable hot
951	// callsite inline (as much as possible [1]) because it makes
952	// profile annotation in the backend inaccurate.
953	//
954	// [1] Note the cost of a function could be below zero due to erased
955	// prologue / epilogue.
956	if (isLTOPreLink(Phase) && PGOOpt && PGOOpt ->Action == PGOOptions::SampleUse)
957	IP.HotCallSiteThreshold = `0`;
958
959	if (PGOOpt)
960	IP.EnableDeferral = EnablePGOInlineDeferral;
961
962	ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
963	InlineContext{.LTOPhase: Phase, .Pass: InlinePass::CGSCCInliner},
964	UseInlineAdvisor, MaxDevirtIterations);
965
966	// Require the GlobalsAA analysis for the module so we can query it within
967	// the CGSCC pipeline.
968	if (EnableGlobalAnalyses) {
969	MIWP.addModulePass(Pass: RequireAnalysisPass<GlobalsAA, Module>());
970	// Invalidate AAManager so it can be recreated and pick up the newly
971	// available GlobalsAA.
972	MIWP.addModulePass(
973	Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>()));
974	}
975
976	// Require the ProfileSummaryAnalysis for the module so we can query it within
977	// the inliner pass.
978	MIWP.addModulePass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
979
980	// Now begin the main postorder CGSCC pipeline.
981	// FIXME: The current CGSCC pipeline has its origins in the legacy pass
982	// manager and trying to emulate its precise behavior. Much of this doesn't
983	// make a lot of sense and we should revisit the core CGSCC structure.
984	CGSCCPassManager &MainCGPipeline = MIWP.getPM();
985
986	// Note: historically, the PruneEH pass was run first to deduce nounwind and
987	// generally clean up exception handling overhead. It isn't clear this is
988	// valuable as the inliner doesn't currently care whether it is inlining an
989	// invoke or a call.
990
991	if (AttributorRun & AttributorRunOption::CGSCC)
992	MainCGPipeline.addPass(Pass: AttributorCGSCCPass ());
993	else if (AttributorRun & AttributorRunOption::CGSCC_LIGHT)
994	MainCGPipeline.addPass(Pass: AttributorLightCGSCCPass ());
995
996	// Deduce function attributes. We do another run of this after the function
997	// simplification pipeline, so this only needs to run when it could affect the
998	// function simplification pipeline, which is only the case with recursive
999	// functions.
1000	MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass (/SkipNonRecursive/ true));
1001
1002	// When at O3 add argument promotion to the pass pipeline.
1003	// FIXME: It isn't at all clear why this should be limited to O3.
1004	if (Level == OptimizationLevel::O3)
1005	MainCGPipeline.addPass(Pass: ArgumentPromotionPass ());
1006
1007	// Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
1008	// there are no OpenMP runtime calls present in the module.
1009	if (Level == OptimizationLevel::O2 \|\| Level == OptimizationLevel::O3)
1010	MainCGPipeline.addPass(Pass: OpenMPOptCGSCCPass (Phase));
1011
1012	invokeCGSCCOptimizerLateEPCallbacks(CGPM&: MainCGPipeline, Level);
1013
1014	// Add the core function simplification pipeline nested inside the
1015	// CGSCC walk.
1016	MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
1017	Pass: buildFunctionSimplificationPipeline(Level, Phase),
1018	EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses, /NoRerun=/true));
1019
1020	// Finally, deduce any function attributes based on the fully simplified
1021	// function.
1022	MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass ());
1023
1024	// Mark that the function is fully simplified and that it shouldn't be
1025	// simplified again if we somehow revisit it due to CGSCC mutations unless
1026	// it's been modified since.
1027	MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
1028	Pass: RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>()));
1029
1030	if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
1031	MainCGPipeline.addPass(Pass: CoroSplitPass (Level != OptimizationLevel::O0));
1032	MainCGPipeline.addPass(Pass: CoroAnnotationElidePass ());
1033	}
1034
1035	// Make sure we don't affect potential future NoRerun CGSCC adaptors.
1036	MIWP.addLateModulePass(Pass: createModuleToFunctionPassAdaptor(
1037	Pass: InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
1038
1039	return MIWP;
1040	}
1041
1042	ModulePassManager
1043	PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
1044	ThinOrFullLTOPhase Phase) {
1045	ModulePassManager MPM;
1046
1047	InlineParams IP = getInlineParamsFromOptLevel(Level);
1048	// For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1049	// set hot-caller threshold to 0 to disable hot
1050	// callsite inline (as much as possible [1]) because it makes
1051	// profile annotation in the backend inaccurate.
1052	//
1053	// [1] Note the cost of a function could be below zero due to erased
1054	// prologue / epilogue.
1055	if (isLTOPreLink(Phase) && PGOOpt && PGOOpt ->Action == PGOOptions::SampleUse)
1056	IP.HotCallSiteThreshold = `0`;
1057
1058	if (PGOOpt)
1059	IP.EnableDeferral = EnablePGOInlineDeferral;
1060
1061	// The inline deferral logic is used to avoid losing some
1062	// inlining chance in future. It is helpful in SCC inliner, in which
1063	// inlining is processed in bottom-up order.
1064	// While in module inliner, the inlining order is a priority-based order
1065	// by default. The inline deferral is unnecessary there. So we disable the
1066	// inline deferral logic in module inliner.
1067	IP.EnableDeferral = false;
1068
1069	MPM.addPass(Pass: ModuleInlinerPass (IP, UseInlineAdvisor, Phase));
1070	if (!UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPostLink) {
1071	MPM.addPass(Pass: GlobalOptPass ());
1072	MPM.addPass(Pass: GlobalDCEPass ());
1073	MPM.addPass(Pass: PGOCtxProfFlatteningPass (/IsPreThinlink=/false));
1074	}
1075
1076	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
1077	Pass: buildFunctionSimplificationPipeline(Level, Phase),
1078	EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1079
1080	if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
1081	MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(
1082	Pass: CoroSplitPass (Level != OptimizationLevel::O0)));
1083	MPM.addPass(
1084	Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: CoroAnnotationElidePass ()));
1085	}
1086
1087	return MPM;
1088	}
1089
1090	ModulePassManager
1091	PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
1092	ThinOrFullLTOPhase Phase) {
1093	assert(Level != OptimizationLevel::O0 &&
1094	"Should not be used for O0 pipeline");
1095
1096	assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink &&
1097	"FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1098
1099	ModulePassManager MPM;
1100
1101	// Place pseudo probe instrumentation as the first pass of the pipeline to
1102	// minimize the impact of optimization changes.
1103	if (PGOOpt && PGOOpt ->PseudoProbeForProfiling &&
1104	Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
1105	MPM.addPass(Pass: SampleProfileProbePass (TM));
1106
1107	bool HasSampleProfile = PGOOpt && (PGOOpt ->Action == PGOOptions::SampleUse);
1108
1109	// In ThinLTO mode, when flattened profile is used, all the available
1110	// profile information will be annotated in PreLink phase so there is
1111	// no need to load the profile again in PostLink.
1112	bool LoadSampleProfile =
1113	HasSampleProfile &&
1114	!(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
1115
1116	// During the ThinLTO backend phase we perform early indirect call promotion
1117	// here, before globalopt. Otherwise imported available_externally functions
1118	// look unreferenced and are removed. If we are going to load the sample
1119	// profile then defer until later.
1120	// TODO: See if we can move later and consolidate with the location where
1121	// we perform ICP when we are loading a sample profile.
1122	// TODO: We pass HasSampleProfile (whether there was a sample profile file
1123	// passed to the compile) to the SamplePGO flag of ICP. This is used to
1124	// determine whether the new direct calls are annotated with prof metadata.
1125	// Ideally this should be determined from whether the IR is annotated with
1126	// sample profile, and not whether the a sample profile was provided on the
1127	// command line. E.g. for flattened profiles where we will not be reloading
1128	// the sample profile in the ThinLTO backend, we ideally shouldn't have to
1129	// provide the sample profile file.
1130	if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1131	MPM.addPass(Pass: PGOIndirectCallPromotion (true / InLTO /, HasSampleProfile));
1132
1133	// Create an early function pass manager to cleanup the output of the
1134	// frontend. Not necessary with LTO post link pipelines since the pre link
1135	// pipeline already cleaned up the frontend output.
1136	if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) {
1137	// Do basic inference of function attributes from known properties of system
1138	// libraries and other oracles.
1139	MPM.addPass(Pass: InferFunctionAttrsPass ());
1140	MPM.addPass(Pass: CoroEarlyPass ());
1141
1142	FunctionPassManager EarlyFPM;
1143	EarlyFPM.addPass(Pass: EntryExitInstrumenterPass (/PostInlining=/false));
1144	// Lower llvm.expect to metadata before attempting transforms.
1145	// Compare/branch metadata may alter the behavior of passes like
1146	// SimplifyCFG.
1147	EarlyFPM.addPass(Pass: LowerExpectIntrinsicPass ());
1148	EarlyFPM.addPass(Pass: SimplifyCFGPass ());
1149	EarlyFPM.addPass(Pass: SROAPass (SROAOptions::ModifyCFG));
1150	EarlyFPM.addPass(Pass: EarlyCSEPass ());
1151	if (Level == OptimizationLevel::O3)
1152	EarlyFPM.addPass(Pass: CallSiteSplittingPass ());
1153	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
1154	Pass: std::move(EarlyFPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1155	}
1156
1157	if (LoadSampleProfile) {
1158	// Annotate sample profile right after early FPM to ensure freshness of
1159	// the debug info.
1160	MPM.addPass(Pass: SampleProfileLoaderPass (
1161	PGOOpt ->ProfileFile, PGOOpt ->ProfileRemappingFile, Phase, FS));
1162	// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1163	// RequireAnalysisPass for PSI before subsequent non-module passes.
1164	MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1165	// Do not invoke ICP in the LTOPrelink phase as it makes it hard
1166	// for the profile annotation to be accurate in the LTO backend.
1167	if (!isLTOPreLink(Phase))
1168	// We perform early indirect call promotion here, before globalopt.
1169	// This is important for the ThinLTO backend phase because otherwise
1170	// imported available_externally functions look unreferenced and are
1171	// removed.
1172	MPM.addPass(
1173	Pass: PGOIndirectCallPromotion (true / IsInLTO /, true / SamplePGO /));
1174	}
1175
1176	// Try to perform OpenMP specific optimizations on the module. This is a
1177	// (quick!) no-op if there are no OpenMP runtime calls present in the module.
1178	MPM.addPass(Pass: OpenMPOptPass (Phase));
1179
1180	if (AttributorRun & AttributorRunOption::MODULE)
1181	MPM.addPass(Pass: AttributorPass ());
1182	else if (AttributorRun & AttributorRunOption::MODULE_LIGHT)
1183	MPM.addPass(Pass: AttributorLightPass ());
1184
1185	// Lower type metadata and the type.test intrinsic in the ThinLTO
1186	// post link pipeline after ICP. This is to enable usage of the type
1187	// tests in ICP sequences.
1188	if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1189	MPM.addPass(Pass: LowerTypeTestsPass (nullptr, nullptr,
1190	lowertypetests::DropTestKind::Assume));
1191
1192	invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase);
1193
1194	// Interprocedural constant propagation now that basic cleanup has occurred
1195	// and prior to optimizing globals.
1196	// FIXME: This position in the pipeline hasn't been carefully considered in
1197	// years, it should be re-analyzed.
1198	MPM.addPass(Pass: IPSCCPPass (
1199	IPSCCPOptions (/AllowFuncSpec=/
1200	Level != OptimizationLevel::Os &&
1201	Level != OptimizationLevel::Oz &&
1202	!isLTOPreLink(Phase))));
1203
1204	// Attach metadata to indirect call sites indicating the set of functions
1205	// they may target at run-time. This should follow IPSCCP.
1206	MPM.addPass(Pass: CalledValuePropagationPass ());
1207
1208	// Optimize globals to try and fold them into constants.
1209	MPM.addPass(Pass: GlobalOptPass ());
1210
1211	// Create a small function pass pipeline to cleanup after all the global
1212	// optimizations.
1213	FunctionPassManager GlobalCleanupPM;
1214	// FIXME: Should this instead by a run of SROA?
1215	GlobalCleanupPM.addPass(Pass: PromotePass ());
1216	GlobalCleanupPM.addPass(Pass: InstCombinePass ());
1217	invokePeepholeEPCallbacks(FPM&: GlobalCleanupPM, Level);
1218	GlobalCleanupPM.addPass(
1219	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
1220	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(GlobalCleanupPM),
1221	EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1222
1223	// We already asserted this happens in non-FullLTOPostLink earlier.
1224	const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1225	// Enable contextual profiling instrumentation.
1226	const bool IsCtxProfGen =
1227	IsPreLink && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled();
1228	const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1229	const bool IsPGOInstrGen =
1230	IsPGOPreLink && PGOOpt ->Action == PGOOptions::IRInstr;
1231	const bool IsPGOInstrUse =
1232	IsPGOPreLink && PGOOpt ->Action == PGOOptions::IRUse;
1233	const bool IsMemprofUse = IsPGOPreLink && !PGOOpt ->MemoryProfile.empty();
1234	// We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1235	// enable ctx profiling from the frontend.
1236	assert(!(IsPGOInstrGen && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled()) &&
1237	"Enabling both instrumented PGO and contextual instrumentation is not "
1238	"supported.");
1239	const bool IsCtxProfUse =
1240	!UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
1241
1242	assert(
1243	(InstrumentColdFuncOnlyPath.empty() \|\| PGOInstrumentColdFunctionOnly) &&
1244	"--instrument-cold-function-only-path is provided but "
1245	"--pgo-instrument-cold-function-only is not enabled");
1246	const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1247	IsPGOPreLink &&
1248	!InstrumentColdFuncOnlyPath.empty();
1249
1250	if (IsPGOInstrGen \|\| IsPGOInstrUse \|\| IsMemprofUse \|\| IsCtxProfGen \|\|
1251	IsCtxProfUse \|\| IsColdFuncOnlyInstrGen)
1252	addPreInlinerPasses(MPM, Level, LTOPhase: Phase);
1253
1254	// Add all the requested passes for instrumentation PGO, if requested.
1255	if (IsPGOInstrGen \|\| IsPGOInstrUse) {
1256	addPGOInstrPasses(MPM, Level,
1257	/RunProfileGen=/IsPGOInstrGen,
1258	/IsCS=/false, AtomicCounterUpdate: PGOOpt ->AtomicCounterUpdate,
1259	ProfileFile: PGOOpt ->ProfileFile, ProfileRemappingFile: PGOOpt ->ProfileRemappingFile);
1260	} else if (IsCtxProfGen \|\| IsCtxProfUse) {
1261	MPM.addPass(Pass: PGOInstrumentationGen (PGOInstrumentationType::CTXPROF));
1262	// In pre-link, we just want the instrumented IR. We use the contextual
1263	// profile in the post-thinlink phase.
1264	// The instrumentation will be removed in post-thinlink after IPO.
1265	// FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1266	// mechanism for GUIDs.
1267	MPM.addPass(Pass: AssignGUIDPass ());
1268	if (IsCtxProfUse) {
1269	MPM.addPass(Pass: PGOCtxProfFlatteningPass (/IsPreThinlink=/true));
1270	return MPM;
1271	}
1272	// Block further inlining in the instrumented ctxprof case. This avoids
1273	// confusingly collecting profiles for the same GUID corresponding to
1274	// different variants of the function. We could do like PGO and identify
1275	// functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1276	// thinlto to happen before performing any further optimizations, it's
1277	// unnecessary to collect profiles for non-prevailing copies.
1278	MPM.addPass(Pass: NoinlineNonPrevailing ());
1279	addPostPGOLoopRotation(MPM, Level);
1280	MPM.addPass(Pass: PGOCtxProfLoweringPass ());
1281	} else if (IsColdFuncOnlyInstrGen) {
1282	addPGOInstrPasses(MPM, Level, / RunProfileGen / true, / IsCS / false,
1283	/ AtomicCounterUpdate / false,
1284	ProfileFile: InstrumentColdFuncOnlyPath,
1285	/ ProfileRemappingFile / "");
1286	}
1287
1288	if (IsPGOInstrGen \|\| IsPGOInstrUse \|\| IsCtxProfGen)
1289	MPM.addPass(Pass: PGOIndirectCallPromotion (false, false));
1290
1291	if (IsPGOPreLink && PGOOpt ->CSAction == PGOOptions::CSIRInstr)
1292	MPM.addPass(Pass: PGOInstrumentationGenCreateVar (PGOOpt ->CSProfileGenFile,
1293	EnableSampledInstr));
1294
1295	if (IsMemprofUse)
1296	MPM.addPass(Pass: MemProfUsePass (PGOOpt ->MemoryProfile, FS));
1297
1298	if (PGOOpt && (PGOOpt ->Action == PGOOptions::IRUse \|\|
1299	PGOOpt ->Action == PGOOptions::SampleUse))
1300	MPM.addPass(Pass: PGOForceFunctionAttrsPass (PGOOpt ->ColdOptType));
1301
1302	MPM.addPass(Pass: AlwaysInlinerPass (/InsertLifetimeIntrinsics=/true));
1303
1304	if (EnableModuleInliner)
1305	MPM.addPass(Pass: buildModuleInlinerPipeline(Level, Phase));
1306	else
1307	MPM.addPass(Pass: buildInlinerPipeline(Level, Phase));
1308
1309	// Remove any dead arguments exposed by cleanups, constant folding globals,
1310	// and argument promotion.
1311	MPM.addPass(Pass: DeadArgumentEliminationPass ());
1312
1313	if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1314	MPM.addPass(Pass: SimplifyTypeTestsPass ());
1315
1316	if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink)
1317	MPM.addPass(Pass: CoroCleanupPass ());
1318
1319	// Optimize globals now that functions are fully simplified.
1320	MPM.addPass(Pass: GlobalOptPass ());
1321	MPM.addPass(Pass: GlobalDCEPass ());
1322
1323	return MPM;
1324	}
1325
1326	/// TODO: Should LTO cause any differences to this set of passes?
1327	void PassBuilder::addVectorPasses(OptimizationLevel Level,
1328	FunctionPassManager &FPM,
1329	ThinOrFullLTOPhase LTOPhase) {
1330	const bool IsFullLTO = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink;
1331
1332	FPM.addPass(Pass: LoopVectorizePass (
1333	LoopVectorizeOptions (!PTO.LoopInterleaving, !PTO.LoopVectorization)));
1334
1335	// Drop dereferenceable assumes after vectorization, as they are no longer
1336	// needed and can inhibit further optimization.
1337	if (!isLTOPreLink(Phase: LTOPhase))
1338	FPM.addPass(Pass: DropUnnecessaryAssumesPass (/DropDereferenceable=/true));
1339
1340	FPM.addPass(Pass: InferAlignmentPass ());
1341	if (IsFullLTO) {
1342	// The vectorizer may have significantly shortened a loop body; unroll
1343	// again. Unroll small loops to hide loop backedge latency and saturate any
1344	// parallel execution resources of an out-of-order processor. We also then
1345	// need to clean up redundancies and loop invariant code.
1346	// FIXME: It would be really good to use a loop-integrated instruction
1347	// combiner for cleanup here so that the unrolling and LICM can be pipelined
1348	// across the loop nests.
1349	// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1350	if (EnableUnrollAndJam && PTO.LoopUnrolling)
1351	FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1352	Pass: LoopUnrollAndJamPass (Level.getSpeedupLevel())));
1353	FPM.addPass(Pass: LoopUnrollPass (LoopUnrollOptions (
1354	Level.getSpeedupLevel(), /OnlyWhenForced=/!PTO.LoopUnrolling,
1355	PTO.ForgetAllSCEVInLoopUnroll)));
1356	FPM.addPass(Pass: WarnMissedTransformationsPass ());
1357	// Now that we are done with loop unrolling, be it either by LoopVectorizer,
1358	// or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1359	// become constant-offset, thus enabling SROA and alloca promotion. Do so.
1360	// NOTE: we are very late in the pipeline, and we don't have any LICM
1361	// or SimplifyCFG passes scheduled after us, that would cleanup
1362	// the CFG mess this may created if allowed to modify CFG, so forbid that.
1363	FPM.addPass(Pass: SROAPass (SROAOptions::PreserveCFG));
1364	}
1365
1366	if (!IsFullLTO) {
1367	// Eliminate loads by forwarding stores from the previous iteration to loads
1368	// of the current iteration.
1369	FPM.addPass(Pass: LoopLoadEliminationPass ());
1370	}
1371	// Cleanup after the loop optimization passes.
1372	FPM.addPass(Pass: InstCombinePass ());
1373
1374	if (Level.getSpeedupLevel() > `1` && ExtraVectorizerPasses) {
1375	ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1376	// At higher optimization levels, try to clean up any runtime overlap and
1377	// alignment checks inserted by the vectorizer. We want to track correlated
1378	// runtime checks for two inner loops in the same outer loop, fold any
1379	// common computations, hoist loop-invariant aspects out of any outer loop,
1380	// and unswitch the runtime checks if possible. Once hoisted, we may have
1381	// dead (or speculatable) control flows or more combining opportunities.
1382	ExtraPasses.addPass(Pass: EarlyCSEPass ());
1383	ExtraPasses.addPass(Pass: CorrelatedValuePropagationPass ());
1384	ExtraPasses.addPass(Pass: InstCombinePass ());
1385	LoopPassManager LPM;
1386	LPM.addPass(Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1387	/AllowSpeculation=/true));
1388	LPM.addPass(Pass: SimpleLoopUnswitchPass (/ NonTrivial / Level ==
1389	OptimizationLevel::O3));
1390	ExtraPasses.addPass(
1391	Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /UseMemorySSA=/true));
1392	ExtraPasses.addPass(
1393	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
1394	ExtraPasses.addPass(Pass: InstCombinePass ());
1395	FPM.addPass(Pass: std::move(ExtraPasses));
1396	}
1397
1398	// Now that we've formed fast to execute loop structures, we do further
1399	// optimizations. These are run afterward as they might block doing complex
1400	// analyses and transforms such as what are needed for loop vectorization.
1401
1402	// Cleanup after loop vectorization, etc. Simplification passes like CVP and
1403	// GVN, loop transforms, and others have already run, so it's now better to
1404	// convert to more optimized IR using more aggressive simplify CFG options.
1405	// The extra sinking transform can create larger basic blocks, so do this
1406	// before SLP vectorization.
1407	FPM.addPass(Pass: SimplifyCFGPass (SimplifyCFGOptions ()
1408	.forwardSwitchCondToPhi(B: true)
1409	.convertSwitchRangeToICmp(B: true)
1410	.convertSwitchToArithmetic(B: true)
1411	.convertSwitchToLookupTable(B: true)
1412	.needCanonicalLoops(B: false)
1413	.hoistCommonInsts(B: true)
1414	.sinkCommonInsts(B: true)));
1415
1416	if (IsFullLTO) {
1417	FPM.addPass(Pass: SCCPPass ());
1418	FPM.addPass(Pass: InstCombinePass ());
1419	FPM.addPass(Pass: BDCEPass ());
1420	}
1421
1422	// Optimize parallel scalar instruction chains into SIMD instructions.
1423	if (PTO.SLPVectorization) {
1424	FPM.addPass(Pass: SLPVectorizerPass ());
1425	if (Level.getSpeedupLevel() > `1` && ExtraVectorizerPasses) {
1426	FPM.addPass(Pass: EarlyCSEPass ());
1427	}
1428	}
1429	// Enhance/cleanup vector code.
1430	FPM.addPass(Pass: VectorCombinePass ());
1431
1432	if (!IsFullLTO) {
1433	FPM.addPass(Pass: InstCombinePass ());
1434	// Unroll small loops to hide loop backedge latency and saturate any
1435	// parallel execution resources of an out-of-order processor. We also then
1436	// need to clean up redundancies and loop invariant code.
1437	// FIXME: It would be really good to use a loop-integrated instruction
1438	// combiner for cleanup here so that the unrolling and LICM can be pipelined
1439	// across the loop nests.
1440	// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1441	if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1442	FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1443	Pass: LoopUnrollAndJamPass (Level.getSpeedupLevel())));
1444	}
1445	FPM.addPass(Pass: LoopUnrollPass (LoopUnrollOptions (
1446	Level.getSpeedupLevel(), /OnlyWhenForced=/!PTO.LoopUnrolling,
1447	PTO.ForgetAllSCEVInLoopUnroll)));
1448	FPM.addPass(Pass: WarnMissedTransformationsPass ());
1449	// Now that we are done with loop unrolling, be it either by LoopVectorizer,
1450	// or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1451	// become constant-offset, thus enabling SROA and alloca promotion. Do so.
1452	// NOTE: we are very late in the pipeline, and we don't have any LICM
1453	// or SimplifyCFG passes scheduled after us, that would cleanup
1454	// the CFG mess this may created if allowed to modify CFG, so forbid that.
1455	FPM.addPass(Pass: SROAPass (SROAOptions::PreserveCFG));
1456	}
1457
1458	FPM.addPass(Pass: InferAlignmentPass ());
1459	FPM.addPass(Pass: InstCombinePass ());
1460
1461	// This is needed for two reasons:
1462	// 1. It works around problems that instcombine introduces, such as sinking
1463	// expensive FP divides into loops containing multiplications using the
1464	// divide result.
1465	// 2. It helps to clean up some loop-invariant code created by the loop
1466	// unroll pass when IsFullLTO=false.
1467	FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1468	Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1469	/AllowSpeculation=/true),
1470	/UseMemorySSA=/true));
1471
1472	// Now that we've vectorized and unrolled loops, we may have more refined
1473	// alignment information, try to re-derive it here.
1474	FPM.addPass(Pass: AlignmentFromAssumptionsPass ());
1475	}
1476
1477	ModulePassManager
1478	PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1479	ThinOrFullLTOPhase LTOPhase) {
1480	const bool LTOPreLink = isLTOPreLink(Phase: LTOPhase);
1481	ModulePassManager MPM;
1482
1483	// Run partial inlining pass to partially inline functions that have
1484	// large bodies.
1485	if (RunPartialInlining)
1486	MPM.addPass(Pass: PartialInlinerPass ());
1487
1488	// Remove avail extern fns and globals definitions since we aren't compiling
1489	// an object file for later LTO. For LTO we want to preserve these so they
1490	// are eligible for inlining at link-time. Note if they are unreferenced they
1491	// will be removed by GlobalDCE later, so this only impacts referenced
1492	// available externally globals. Eventually they will be suppressed during
1493	// codegen, but eliminating here enables more opportunity for GlobalDCE as it
1494	// may make globals referenced by available external functions dead and saves
1495	// running remaining passes on the eliminated functions. These should be
1496	// preserved during prelinking for link-time inlining decisions.
1497	if (!LTOPreLink)
1498	MPM.addPass(Pass: EliminateAvailableExternallyPass ());
1499
1500	// Do RPO function attribute inference across the module to forward-propagate
1501	// attributes where applicable.
1502	// FIXME: Is this really an optimization rather than a canonicalization?
1503	MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass ());
1504
1505	// Do a post inline PGO instrumentation and use pass. This is a context
1506	// sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1507	// cross-module inline has not been done yet. The context sensitive
1508	// instrumentation is after all the inlines are done.
1509	if (!LTOPreLink && PGOOpt) {
1510	if (PGOOpt ->CSAction == PGOOptions::CSIRInstr)
1511	addPGOInstrPasses(MPM, Level, /RunProfileGen=/true,
1512	/IsCS=/true, AtomicCounterUpdate: PGOOpt ->AtomicCounterUpdate,
1513	ProfileFile: PGOOpt ->CSProfileGenFile, ProfileRemappingFile: PGOOpt ->ProfileRemappingFile);
1514	else if (PGOOpt ->CSAction == PGOOptions::CSIRUse)
1515	addPGOInstrPasses(MPM, Level, /RunProfileGen=/false,
1516	/IsCS=/true, AtomicCounterUpdate: PGOOpt ->AtomicCounterUpdate,
1517	ProfileFile: PGOOpt ->ProfileFile, ProfileRemappingFile: PGOOpt ->ProfileRemappingFile);
1518	}
1519
1520	// Re-compute GlobalsAA here prior to function passes. This is particularly
1521	// useful as the above will have inlined, DCE'ed, and function-attr
1522	// propagated everything. We should at this point have a reasonably minimal
1523	// and richly annotated call graph. By computing aliasing and mod/ref
1524	// information for all local globals here, the late loop passes and notably
1525	// the vectorizer will be able to use them to help recognize vectorizable
1526	// memory operations.
1527	if (EnableGlobalAnalyses)
1528	MPM.addPass(Pass: RecomputeGlobalsAAPass ());
1529
1530	invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase: LTOPhase);
1531
1532	FunctionPassManager OptimizePM;
1533
1534	// Only drop unnecessary assumes post-inline and post-link, as otherwise
1535	// additional uses of the affected value may be introduced through inlining
1536	// and CSE.
1537	if (!isLTOPreLink(Phase: LTOPhase))
1538	OptimizePM.addPass(Pass: DropUnnecessaryAssumesPass ());
1539
1540	// Scheduling LoopVersioningLICM when inlining is over, because after that
1541	// we may see more accurate aliasing. Reason to run this late is that too
1542	// early versioning may prevent further inlining due to increase of code
1543	// size. Other optimizations which runs later might get benefit of no-alias
1544	// assumption in clone loop.
1545	if (UseLoopVersioningLICM) {
1546	OptimizePM.addPass(
1547	Pass: createFunctionToLoopPassAdaptor(Pass: LoopVersioningLICMPass ()));
1548	// LoopVersioningLICM pass might increase new LICM opportunities.
1549	OptimizePM.addPass(Pass: createFunctionToLoopPassAdaptor(
1550	Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1551	/AllowSpeculation=/true),
1552	/USeMemorySSA=/UseMemorySSA: true));
1553	}
1554
1555	OptimizePM.addPass(Pass: Float2IntPass ());
1556	OptimizePM.addPass(Pass: LowerConstantIntrinsicsPass ());
1557
1558	if (EnableMatrix) {
1559	OptimizePM.addPass(Pass: LowerMatrixIntrinsicsPass ());
1560	OptimizePM.addPass(Pass: EarlyCSEPass ());
1561	}
1562
1563	// CHR pass should only be applied with the profile information.
1564	// The check is to check the profile summary information in CHR.
1565	if (EnableCHR && Level == OptimizationLevel::O3)
1566	OptimizePM.addPass(Pass: ControlHeightReductionPass ());
1567
1568	// FIXME: We need to run some loop optimizations to re-rotate loops after
1569	// simplifycfg and others undo their rotation.
1570
1571	// Optimize the loop execution. These passes operate on entire loop nests
1572	// rather than on each loop in an inside-out manner, and so they are actually
1573	// function passes.
1574
1575	invokeVectorizerStartEPCallbacks(FPM&: OptimizePM, Level);
1576
1577	LoopPassManager LPM;
1578	// First rotate loops that may have been un-rotated by prior passes.
1579	// Disable header duplication at -Oz.
1580	LPM.addPass(Pass: LoopRotatePass (EnableLoopHeaderDuplication \|\|
1581	Level != OptimizationLevel::Oz,
1582	LTOPreLink));
1583	// Some loops may have become dead by now. Try to delete them.
1584	// FIXME: see discussion in https://reviews.llvm.org/D112851,
1585	// this may need to be revisited once we run GVN before loop deletion
1586	// in the simplification pipeline.
1587	LPM.addPass(Pass: LoopDeletionPass ());
1588
1589	if (PTO.LoopInterchange)
1590	LPM.addPass(Pass: LoopInterchangePass ());
1591
1592	OptimizePM.addPass(
1593	Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /UseMemorySSA=/false));
1594
1595	// FIXME: This may not be the right place in the pipeline.
1596	// We need to have the data to support the right place.
1597	if (PTO.LoopFusion)
1598	OptimizePM.addPass(Pass: LoopFusePass ());
1599
1600	// Distribute loops to allow partial vectorization. I.e. isolate dependences
1601	// into separate loop that would otherwise inhibit vectorization. This is
1602	// currently only performed for loops marked with the metadata
1603	// llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1604	OptimizePM.addPass(Pass: LoopDistributePass ());
1605
1606	// Populates the VFABI attribute with the scalar-to-vector mappings
1607	// from the TargetLibraryInfo.
1608	OptimizePM.addPass(Pass: InjectTLIMappings ());
1609
1610	addVectorPasses(Level, FPM&: OptimizePM, LTOPhase);
1611
1612	invokeVectorizerEndEPCallbacks(FPM&: OptimizePM, Level);
1613
1614	// LoopSink pass sinks instructions hoisted by LICM, which serves as a
1615	// canonicalization pass that enables other optimizations. As a result,
1616	// LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1617	// result too early.
1618	OptimizePM.addPass(Pass: LoopSinkPass ());
1619
1620	// And finally clean up LCSSA form before generating code.
1621	OptimizePM.addPass(Pass: InstSimplifyPass ());
1622
1623	// This hoists/decomposes div/rem ops. It should run after other sink/hoist
1624	// passes to avoid re-sinking, but before SimplifyCFG because it can allow
1625	// flattening of blocks.
1626	OptimizePM.addPass(Pass: DivRemPairsPass ());
1627
1628	// Try to annotate calls that were created during optimization.
1629	OptimizePM.addPass(
1630	Pass: TailCallElimPass (/UpdateFunctionEntryCount=/isInstrumentedPGOUse()));
1631
1632	// LoopSink (and other loop passes since the last simplifyCFG) might have
1633	// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1634	OptimizePM.addPass(
1635	Pass: SimplifyCFGPass (SimplifyCFGOptions ()
1636	.convertSwitchRangeToICmp(B: true)
1637	.convertSwitchToArithmetic(B: true)
1638	.speculateUnpredictables(B: true)
1639	.hoistLoadsStoresWithCondFaulting(B: true)));
1640
1641	// Add the core optimizing pipeline.
1642	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(OptimizePM),
1643	EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1644
1645	// AllocToken transforms heap allocation calls; this needs to run late after
1646	// other allocation call transformations (such as those in InstCombine).
1647	if (!LTOPreLink)
1648	MPM.addPass(Pass: AllocTokenPass ());
1649
1650	invokeOptimizerLastEPCallbacks(MPM, Level, Phase: LTOPhase);
1651
1652	// Split out cold code. Splitting is done late to avoid hiding context from
1653	// other optimizations and inadvertently regressing performance. The tradeoff
1654	// is that this has a higher code size cost than splitting early.
1655	if (EnableHotColdSplit && !LTOPreLink)
1656	MPM.addPass(Pass: HotColdSplittingPass ());
1657
1658	// Search the code for similar regions of code. If enough similar regions can
1659	// be found where extracting the regions into their own function will decrease
1660	// the size of the program, we extract the regions, a deduplicate the
1661	// structurally similar regions.
1662	if (EnableIROutliner)
1663	MPM.addPass(Pass: IROutlinerPass ());
1664
1665	// Now we need to do some global optimization transforms.
1666	// FIXME: It would seem like these should come first in the optimization
1667	// pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1668	// ordering here.
1669	MPM.addPass(Pass: GlobalDCEPass ());
1670	MPM.addPass(Pass: ConstantMergePass ());
1671
1672	// Merge functions if requested. It has a better chance to merge functions
1673	// after ConstantMerge folded jump tables.
1674	if (PTO.MergeFunctions)
1675	MPM.addPass(Pass: MergeFunctionsPass ());
1676
1677	if (PTO.CallGraphProfile && !LTOPreLink)
1678	MPM.addPass(Pass: CGProfilePass (isLTOPostLink(Phase: LTOPhase)));
1679
1680	// RelLookupTableConverterPass runs later in LTO post-link pipeline.
1681	if (!LTOPreLink)
1682	MPM.addPass(Pass: RelLookupTableConverterPass ());
1683
1684	// Add devirtualization pass only when LTO is not enabled, as otherwise
1685	// the pass is already enabled in the LTO pipeline.
1686	if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) {
1687	// TODO: explore a better pipeline configuration that can improve
1688	// compilation time overhead.
1689	MPM.addPass(Pass: WholeProgramDevirtPass (
1690	/ExportSummary/ nullptr,
1691	/ImportSummary/ nullptr,
1692	/DevirtSpeculatively/ PTO.DevirtualizeSpeculatively));
1693	MPM.addPass(Pass: LowerTypeTestsPass (nullptr, nullptr,
1694	lowertypetests::DropTestKind::Assume));
1695	// Given that the devirtualization creates more opportunities for inlining,
1696	// we run the Inliner again here to maximize the optimization gain we
1697	// get from devirtualization.
1698	// Also, we can't run devirtualization before inlining because the
1699	// devirtualization depends on the passes optimizing/eliminating vtable GVs
1700	// and those passes are only effective after inlining.
1701	if (EnableModuleInliner) {
1702	MPM.addPass(Pass: ModuleInlinerPass (getInlineParamsFromOptLevel(Level),
1703	UseInlineAdvisor,
1704	ThinOrFullLTOPhase::None));
1705	} else {
1706	MPM.addPass(Pass: ModuleInlinerWrapperPass (
1707	getInlineParamsFromOptLevel(Level),
1708	/ MandatoryFirst / true,
1709	InlineContext{.LTOPhase: ThinOrFullLTOPhase::None, .Pass: InlinePass::CGSCCInliner}));
1710	}
1711	}
1712	return MPM;
1713	}
1714
1715	ModulePassManager
1716	PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
1717	ThinOrFullLTOPhase Phase) {
1718	if (Level == OptimizationLevel::O0)
1719	return buildO0DefaultPipeline(Level, Phase);
1720
1721	ModulePassManager MPM;
1722
1723	// Currently this pipeline is only invoked in an LTO pre link pass or when we
1724	// are not running LTO. If that changes the below checks may need updating.
1725	assert(isLTOPreLink(Phase) \|\| Phase == ThinOrFullLTOPhase::None);
1726
1727	// If we are invoking this in non-LTO mode, remove any MemProf related
1728	// attributes and metadata, as we don't know whether we are linking with
1729	// a library containing the necessary interfaces.
1730	if (Phase == ThinOrFullLTOPhase::None)
1731	MPM.addPass(Pass: MemProfRemoveInfo ());
1732
1733	// Convert @llvm.global.annotations to !annotation metadata.
1734	MPM.addPass(Pass: Annotation2MetadataPass ());
1735
1736	// Force any function attributes we want the rest of the pipeline to observe.
1737	MPM.addPass(Pass: ForceFunctionAttrsPass ());
1738
1739	if (PGOOpt && PGOOpt ->DebugInfoForProfiling)
1740	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass ()));
1741
1742	// Apply module pipeline start EP callback.
1743	invokePipelineStartEPCallbacks(MPM, Level);
1744
1745	// Add the core simplification pipeline.
1746	MPM.addPass(Pass: buildModuleSimplificationPipeline(Level, Phase));
1747
1748	// Now add the optimization pipeline.
1749	MPM.addPass(Pass: buildModuleOptimizationPipeline(Level, LTOPhase: Phase));
1750
1751	if (PGOOpt && PGOOpt ->PseudoProbeForProfiling &&
1752	PGOOpt ->Action == PGOOptions::SampleUse)
1753	MPM.addPass(Pass: PseudoProbeUpdatePass ());
1754
1755	// Emit annotation remarks.
1756	addAnnotationRemarksPass(MPM);
1757
1758	if (isLTOPreLink(Phase))
1759	addRequiredLTOPreLinkPasses(MPM);
1760	return MPM;
1761	}
1762
1763	ModulePassManager
1764	PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
1765	bool EmitSummary) {
1766	ModulePassManager MPM;
1767	if (ThinLTO)
1768	MPM.addPass(Pass: buildThinLTOPreLinkDefaultPipeline(Level));
1769	else
1770	MPM.addPass(Pass: buildLTOPreLinkDefaultPipeline(Level));
1771	MPM.addPass(Pass: EmbedBitcodePass (ThinLTO, EmitSummary));
1772
1773	// Perform any cleanups to the IR that aren't suitable for per TU compilation,
1774	// like removing CFI/WPD related instructions. Note, we reuse
1775	// LowerTypeTestsPass to clean up type tests rather than duplicate that logic
1776	// in FatLtoCleanup.
1777	MPM.addPass(Pass: FatLtoCleanup ());
1778
1779	// If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1780	// object code, only in the bitcode section, so drop it before we run
1781	// module optimization and generate machine code. If llvm.type.test() isn't in
1782	// the IR, this won't do anything.
1783	MPM.addPass(
1784	Pass: LowerTypeTestsPass (nullptr, nullptr, lowertypetests::DropTestKind::All));
1785
1786	// Use the ThinLTO post-link pipeline with sample profiling
1787	if (ThinLTO && PGOOpt && PGOOpt ->Action == PGOOptions::SampleUse)
1788	MPM.addPass(Pass: buildThinLTODefaultPipeline(Level, /ImportSummary=/nullptr));
1789	else {
1790	// ModuleSimplification does not run the coroutine passes for
1791	// ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1792	// builds, otherwise they will miscompile.
1793	if (ThinLTO) {
1794	// TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1795	// consideration.
1796	CGSCCPassManager CGPM;
1797	CGPM.addPass(Pass: CoroSplitPass (Level != OptimizationLevel::O0));
1798	CGPM.addPass(Pass: CoroAnnotationElidePass ());
1799	MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
1800	MPM.addPass(Pass: CoroCleanupPass ());
1801	}
1802
1803	// otherwise, just use module optimization
1804	MPM.addPass(
1805	Pass: buildModuleOptimizationPipeline(Level, LTOPhase: ThinOrFullLTOPhase::None));
1806	// Emit annotation remarks.
1807	addAnnotationRemarksPass(MPM);
1808	}
1809	return MPM;
1810	}
1811
1812	ModulePassManager
1813	PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1814	if (Level == OptimizationLevel::O0)
1815	return buildO0DefaultPipeline(Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink);
1816
1817	ModulePassManager MPM;
1818
1819	// Convert @llvm.global.annotations to !annotation metadata.
1820	MPM.addPass(Pass: Annotation2MetadataPass ());
1821
1822	// Force any function attributes we want the rest of the pipeline to observe.
1823	MPM.addPass(Pass: ForceFunctionAttrsPass ());
1824
1825	if (PGOOpt && PGOOpt ->DebugInfoForProfiling)
1826	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass ()));
1827
1828	// Apply module pipeline start EP callback.
1829	invokePipelineStartEPCallbacks(MPM, Level);
1830
1831	// If we are planning to perform ThinLTO later, we don't bloat the code with
1832	// unrolling/vectorization/... now. Just simplify the module as much as we
1833	// can.
1834	MPM.addPass(Pass: buildModuleSimplificationPipeline(
1835	Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink));
1836	// In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1837	// thinlto use the contextual info to perform imports; then use the contextual
1838	// profile in the post-thinlink phase.
1839	if (!UseCtxProfile.empty()) {
1840	addRequiredLTOPreLinkPasses(MPM);
1841	return MPM;
1842	}
1843
1844	// Run partial inlining pass to partially inline functions that have
1845	// large bodies.
1846	// FIXME: It isn't clear whether this is really the right place to run this
1847	// in ThinLTO. Because there is another canonicalization and simplification
1848	// phase that will run after the thin link, running this here ends up with
1849	// less information than will be available later and it may grow functions in
1850	// ways that aren't beneficial.
1851	if (RunPartialInlining)
1852	MPM.addPass(Pass: PartialInlinerPass ());
1853
1854	if (PGOOpt && PGOOpt ->PseudoProbeForProfiling &&
1855	PGOOpt ->Action == PGOOptions::SampleUse)
1856	MPM.addPass(Pass: PseudoProbeUpdatePass ());
1857
1858	// Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1859	// optimization is going to be done in PostLink stage, but clang can't add
1860	// callbacks there in case of in-process ThinLTO called by linker.
1861	invokeOptimizerEarlyEPCallbacks(MPM, Level,
1862	/Phase=/ThinOrFullLTOPhase::ThinLTOPreLink);
1863	invokeOptimizerLastEPCallbacks(MPM, Level,
1864	/Phase=/ThinOrFullLTOPhase::ThinLTOPreLink);
1865
1866	// Emit annotation remarks.
1867	addAnnotationRemarksPass(MPM);
1868
1869	addRequiredLTOPreLinkPasses(MPM);
1870
1871	return MPM;
1872	}
1873
1874	ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
1875	OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1876	ModulePassManager MPM;
1877
1878	// If we are invoking this without a summary index noting that we are linking
1879	// with a library containing the necessary APIs, remove any MemProf related
1880	// attributes and metadata.
1881	if (!ImportSummary \|\| !ImportSummary->withSupportsHotColdNew())
1882	MPM.addPass(Pass: MemProfRemoveInfo ());
1883
1884	if (ImportSummary) {
1885	// For ThinLTO we must apply the context disambiguation decisions early, to
1886	// ensure we can correctly match the callsites to summary data.
1887	if (EnableMemProfContextDisambiguation)
1888	MPM.addPass(Pass: MemProfContextDisambiguation (
1889	ImportSummary, PGOOpt && PGOOpt ->Action == PGOOptions::SampleUse));
1890
1891	// These passes import type identifier resolutions for whole-program
1892	// devirtualization and CFI. They must run early because other passes may
1893	// disturb the specific instruction patterns that these passes look for,
1894	// creating dependencies on resolutions that may not appear in the summary.
1895	//
1896	// For example, GVN may transform the pattern assume(type.test) appearing in
1897	// two basic blocks into assume(phi(type.test, type.test)), which would
1898	// transform a dependency on a WPD resolution into a dependency on a type
1899	// identifier resolution for CFI.
1900	//
1901	// Also, WPD has access to more precise information than ICP and can
1902	// devirtualize more effectively, so it should operate on the IR first.
1903	//
1904	// The WPD and LowerTypeTest passes need to run at -O0 to lower type
1905	// metadata and intrinsics.
1906	MPM.addPass(Pass: WholeProgramDevirtPass (nullptr, ImportSummary));
1907	MPM.addPass(Pass: LowerTypeTestsPass (nullptr, ImportSummary));
1908	}
1909
1910	if (Level == OptimizationLevel::O0) {
1911	// Run a second time to clean up any type tests left behind by WPD for use
1912	// in ICP.
1913	MPM.addPass(Pass: LowerTypeTestsPass (nullptr, nullptr,
1914	lowertypetests::DropTestKind::Assume));
1915	MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::ThinLTOPostLink));
1916
1917	// AllocToken transforms heap allocation calls; this needs to run late after
1918	// other allocation call transformations (such as those in InstCombine).
1919	MPM.addPass(Pass: AllocTokenPass ());
1920
1921	// Drop available_externally and unreferenced globals. This is necessary
1922	// with ThinLTO in order to avoid leaving undefined references to dead
1923	// globals in the object file.
1924	MPM.addPass(Pass: EliminateAvailableExternallyPass ());
1925	MPM.addPass(Pass: GlobalDCEPass ());
1926	return MPM;
1927	}
1928	if (!UseCtxProfile.empty()) {
1929	MPM.addPass(
1930	Pass: buildModuleInlinerPipeline(Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink));
1931	} else {
1932	// Add the core simplification pipeline.
1933	MPM.addPass(Pass: buildModuleSimplificationPipeline(
1934	Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink));
1935	}
1936	// Now add the optimization pipeline.
1937	MPM.addPass(Pass: buildModuleOptimizationPipeline(
1938	Level, LTOPhase: ThinOrFullLTOPhase::ThinLTOPostLink));
1939
1940	// Emit annotation remarks.
1941	addAnnotationRemarksPass(MPM);
1942
1943	return MPM;
1944	}
1945
1946	ModulePassManager
1947	PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1948	// FIXME: We should use a customized pre-link pipeline!
1949	return buildPerModuleDefaultPipeline(Level,
1950	Phase: ThinOrFullLTOPhase::FullLTOPreLink);
1951	}
1952
1953	ModulePassManager
1954	PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
1955	ModuleSummaryIndex *ExportSummary) {
1956	ModulePassManager MPM;
1957
1958	invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level);
1959
1960	// If we are invoking this without a summary index noting that we are linking
1961	// with a library containing the necessary APIs, remove any MemProf related
1962	// attributes and metadata.
1963	if (!ExportSummary \|\| !ExportSummary->withSupportsHotColdNew())
1964	MPM.addPass(Pass: MemProfRemoveInfo ());
1965
1966	// Create a function that performs CFI checks for cross-DSO calls with targets
1967	// in the current module.
1968	MPM.addPass(Pass: CrossDSOCFIPass ());
1969
1970	if (Level == OptimizationLevel::O0) {
1971	// The WPD and LowerTypeTest passes need to run at -O0 to lower type
1972	// metadata and intrinsics.
1973	MPM.addPass(Pass: WholeProgramDevirtPass (ExportSummary, nullptr));
1974	MPM.addPass(Pass: LowerTypeTestsPass (ExportSummary, nullptr));
1975	// Run a second time to clean up any type tests left behind by WPD for use
1976	// in ICP.
1977	MPM.addPass(Pass: LowerTypeTestsPass (nullptr, nullptr,
1978	lowertypetests::DropTestKind::Assume));
1979
1980	MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::FullLTOPostLink));
1981
1982	// AllocToken transforms heap allocation calls; this needs to run late after
1983	// other allocation call transformations (such as those in InstCombine).
1984	MPM.addPass(Pass: AllocTokenPass ());
1985
1986	invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1987
1988	// Emit annotation remarks.
1989	addAnnotationRemarksPass(MPM);
1990
1991	return MPM;
1992	}
1993
1994	if (PGOOpt && PGOOpt ->Action == PGOOptions::SampleUse) {
1995	// Load sample profile before running the LTO optimization pipeline.
1996	MPM.addPass(Pass: SampleProfileLoaderPass (PGOOpt ->ProfileFile,
1997	PGOOpt ->ProfileRemappingFile,
1998	ThinOrFullLTOPhase::FullLTOPostLink));
1999	// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2000	// RequireAnalysisPass for PSI before subsequent non-module passes.
2001	MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
2002	}
2003
2004	// Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
2005	MPM.addPass(Pass: OpenMPOptPass (ThinOrFullLTOPhase::FullLTOPostLink));
2006
2007	// Remove unused virtual tables to improve the quality of code generated by
2008	// whole-program devirtualization and bitset lowering.
2009	MPM.addPass(Pass: GlobalDCEPass (/InLTOPostLink=/true));
2010
2011	// Do basic inference of function attributes from known properties of system
2012	// libraries and other oracles.
2013	MPM.addPass(Pass: InferFunctionAttrsPass ());
2014
2015	if (Level.getSpeedupLevel() > `1`) {
2016	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2017	Pass: CallSiteSplittingPass (), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2018
2019	// Indirect call promotion. This should promote all the targets that are
2020	// left by the earlier promotion pass that promotes intra-module targets.
2021	// This two-step promotion is to save the compile time. For LTO, it should
2022	// produce the same result as if we only do promotion here.
2023	MPM.addPass(Pass: PGOIndirectCallPromotion (
2024	true / InLTO /, PGOOpt && PGOOpt ->Action == PGOOptions::SampleUse));
2025
2026	// Promoting by-reference arguments to by-value exposes more constants to
2027	// IPSCCP.
2028	CGSCCPassManager CGPM;
2029	CGPM.addPass(Pass: PostOrderFunctionAttrsPass ());
2030	CGPM.addPass(Pass: ArgumentPromotionPass ());
2031	CGPM.addPass(
2032	Pass: createCGSCCToFunctionPassAdaptor(Pass: SROAPass (SROAOptions::ModifyCFG)));
2033	MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2034
2035	// Propagate constants at call sites into the functions they call. This
2036	// opens opportunities for globalopt (and inlining) by substituting function
2037	// pointers passed as arguments to direct uses of functions.
2038	MPM.addPass(Pass: IPSCCPPass (IPSCCPOptions (/AllowFuncSpec=/
2039	Level != OptimizationLevel::Os &&
2040	Level != OptimizationLevel::Oz)));
2041
2042	// Attach metadata to indirect call sites indicating the set of functions
2043	// they may target at run-time. This should follow IPSCCP.
2044	MPM.addPass(Pass: CalledValuePropagationPass ());
2045	}
2046
2047	// Do RPO function attribute inference across the module to forward-propagate
2048	// attributes where applicable.
2049	// FIXME: Is this really an optimization rather than a canonicalization?
2050	MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass ());
2051
2052	// Use in-range annotations on GEP indices to split globals where beneficial.
2053	MPM.addPass(Pass: GlobalSplitPass ());
2054
2055	// Run whole program optimization of virtual call when the list of callees
2056	// is fixed.
2057	MPM.addPass(Pass: WholeProgramDevirtPass (ExportSummary, nullptr));
2058
2059	MPM.addPass(Pass: NoRecurseLTOInferencePass ());
2060	// Stop here at -O1.
2061	if (Level == OptimizationLevel::O1) {
2062	// The LowerTypeTestsPass needs to run to lower type metadata and the
2063	// type.test intrinsics. The pass does nothing if CFI is disabled.
2064	MPM.addPass(Pass: LowerTypeTestsPass (ExportSummary, nullptr));
2065	// Run a second time to clean up any type tests left behind by WPD for use
2066	// in ICP (which is performed earlier than this in the regular LTO
2067	// pipeline).
2068	MPM.addPass(Pass: LowerTypeTestsPass (nullptr, nullptr,
2069	lowertypetests::DropTestKind::Assume));
2070
2071	MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::FullLTOPostLink));
2072
2073	// AllocToken transforms heap allocation calls; this needs to run late after
2074	// other allocation call transformations (such as those in InstCombine).
2075	MPM.addPass(Pass: AllocTokenPass ());
2076
2077	invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
2078
2079	// Emit annotation remarks.
2080	addAnnotationRemarksPass(MPM);
2081
2082	return MPM;
2083	}
2084
2085	// TODO: Skip to match buildCoroWrapper.
2086	MPM.addPass(Pass: CoroEarlyPass ());
2087
2088	// Optimize globals to try and fold them into constants.
2089	MPM.addPass(Pass: GlobalOptPass ());
2090
2091	// Promote any localized globals to SSA registers.
2092	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: PromotePass ()));
2093
2094	// Linking modules together can lead to duplicate global constant, only
2095	// keep one copy of each constant.
2096	MPM.addPass(Pass: ConstantMergePass ());
2097
2098	// Remove unused arguments from functions.
2099	MPM.addPass(Pass: DeadArgumentEliminationPass ());
2100
2101	// Reduce the code after globalopt and ipsccp. Both can open up significant
2102	// simplification opportunities, and both can propagate functions through
2103	// function pointers. When this happens, we often have to resolve varargs
2104	// calls, etc, so let instcombine do this.
2105	FunctionPassManager PeepholeFPM;
2106	PeepholeFPM.addPass(Pass: InstCombinePass ());
2107	if (Level.getSpeedupLevel() > `1`)
2108	PeepholeFPM.addPass(Pass: AggressiveInstCombinePass ());
2109	invokePeepholeEPCallbacks(FPM&: PeepholeFPM, Level);
2110
2111	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(PeepholeFPM),
2112	EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2113
2114	// Lower variadic functions for supported targets prior to inlining.
2115	MPM.addPass(Pass: ExpandVariadicsPass (ExpandVariadicsMode::Optimize));
2116
2117	// Note: historically, the PruneEH pass was run first to deduce nounwind and
2118	// generally clean up exception handling overhead. It isn't clear this is
2119	// valuable as the inliner doesn't currently care whether it is inlining an
2120	// invoke or a call.
2121	// Run the inliner now.
2122	if (EnableModuleInliner) {
2123	MPM.addPass(Pass: ModuleInlinerPass (getInlineParamsFromOptLevel(Level),
2124	UseInlineAdvisor,
2125	ThinOrFullLTOPhase::FullLTOPostLink));
2126	} else {
2127	MPM.addPass(Pass: ModuleInlinerWrapperPass (
2128	getInlineParamsFromOptLevel(Level),
2129	/ MandatoryFirst / true,
2130	InlineContext{.LTOPhase: ThinOrFullLTOPhase::FullLTOPostLink,
2131	.Pass: InlinePass::CGSCCInliner}));
2132	}
2133
2134	// Perform context disambiguation after inlining, since that would reduce the
2135	// amount of additional cloning required to distinguish the allocation
2136	// contexts.
2137	if (EnableMemProfContextDisambiguation)
2138	MPM.addPass(Pass: MemProfContextDisambiguation (
2139	/Summary=/nullptr,
2140	PGOOpt && PGOOpt ->Action == PGOOptions::SampleUse));
2141
2142	// Optimize globals again after we ran the inliner.
2143	MPM.addPass(Pass: GlobalOptPass ());
2144
2145	// Run the OpenMPOpt pass again after global optimizations.
2146	MPM.addPass(Pass: OpenMPOptPass (ThinOrFullLTOPhase::FullLTOPostLink));
2147
2148	// Garbage collect dead functions.
2149	MPM.addPass(Pass: GlobalDCEPass (/InLTOPostLink=/true));
2150
2151	// If we didn't decide to inline a function, check to see if we can
2152	// transform it to pass arguments by value instead of by reference.
2153	CGSCCPassManager CGPM;
2154	CGPM.addPass(Pass: ArgumentPromotionPass ());
2155	CGPM.addPass(Pass: CoroSplitPass (Level != OptimizationLevel::O0));
2156	CGPM.addPass(Pass: CoroAnnotationElidePass ());
2157	MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2158
2159	FunctionPassManager FPM;
2160	// The IPO Passes may leave cruft around. Clean up after them.
2161	FPM.addPass(Pass: InstCombinePass ());
2162	invokePeepholeEPCallbacks(FPM, Level);
2163
2164	if (EnableConstraintElimination)
2165	FPM.addPass(Pass: ConstraintEliminationPass ());
2166
2167	FPM.addPass(Pass: JumpThreadingPass ());
2168
2169	// Do a post inline PGO instrumentation and use pass. This is a context
2170	// sensitive PGO pass.
2171	if (PGOOpt) {
2172	if (PGOOpt ->CSAction == PGOOptions::CSIRInstr)
2173	addPGOInstrPasses(MPM, Level, /RunProfileGen=/true,
2174	/IsCS=/true, AtomicCounterUpdate: PGOOpt ->AtomicCounterUpdate,
2175	ProfileFile: PGOOpt ->CSProfileGenFile, ProfileRemappingFile: PGOOpt ->ProfileRemappingFile);
2176	else if (PGOOpt ->CSAction == PGOOptions::CSIRUse)
2177	addPGOInstrPasses(MPM, Level, /RunProfileGen=/false,
2178	/IsCS=/true, AtomicCounterUpdate: PGOOpt ->AtomicCounterUpdate,
2179	ProfileFile: PGOOpt ->ProfileFile, ProfileRemappingFile: PGOOpt ->ProfileRemappingFile);
2180	}
2181
2182	// Break up allocas
2183	FPM.addPass(Pass: SROAPass (SROAOptions::ModifyCFG));
2184
2185	// LTO provides additional opportunities for tailcall elimination due to
2186	// link-time inlining, and visibility of nocapture attribute.
2187	FPM.addPass(
2188	Pass: TailCallElimPass (/UpdateFunctionEntryCount=/isInstrumentedPGOUse()));
2189
2190	// Run a few AA driver optimizations here and now to cleanup the code.
2191	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM),
2192	EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2193
2194	MPM.addPass(
2195	Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: PostOrderFunctionAttrsPass ()));
2196
2197	// Require the GlobalsAA analysis for the module so we can query it within
2198	// MainFPM.
2199	if (EnableGlobalAnalyses) {
2200	MPM.addPass(Pass: RequireAnalysisPass<GlobalsAA, Module>());
2201	// Invalidate AAManager so it can be recreated and pick up the newly
2202	// available GlobalsAA.
2203	MPM.addPass(
2204	Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>()));
2205	}
2206
2207	FunctionPassManager MainFPM;
2208	MainFPM.addPass(Pass: createFunctionToLoopPassAdaptor(
2209	Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2210	/AllowSpeculation=/true),
2211	/USeMemorySSA=/UseMemorySSA: true));
2212
2213	if (RunNewGVN)
2214	MainFPM.addPass(Pass: NewGVNPass ());
2215	else
2216	MainFPM.addPass(Pass: GVNPass ());
2217
2218	// Remove dead memcpy()'s.
2219	MainFPM.addPass(Pass: MemCpyOptPass ());
2220
2221	// Nuke dead stores.
2222	MainFPM.addPass(Pass: DSEPass ());
2223	MainFPM.addPass(Pass: MoveAutoInitPass ());
2224	MainFPM.addPass(Pass: MergedLoadStoreMotionPass ());
2225
2226	invokeVectorizerStartEPCallbacks(FPM&: MainFPM, Level);
2227
2228	LoopPassManager LPM;
2229	if (EnableLoopFlatten && Level.getSpeedupLevel() > `1`)
2230	LPM.addPass(Pass: LoopFlattenPass ());
2231	LPM.addPass(Pass: IndVarSimplifyPass ());
2232	LPM.addPass(Pass: LoopDeletionPass ());
2233	// FIXME: Add loop interchange.
2234
2235	// Unroll small loops and perform peeling.
2236	LPM.addPass(Pass: LoopFullUnrollPass (Level.getSpeedupLevel(),
2237	/ OnlyWhenForced= / !PTO.LoopUnrolling,
2238	PTO.ForgetAllSCEVInLoopUnroll));
2239	// The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2240	// All* loop passes must preserve it, in order to be able to use it.*
2241	MainFPM.addPass(
2242	Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /UseMemorySSA=/false));
2243
2244	MainFPM.addPass(Pass: LoopDistributePass ());
2245
2246	addVectorPasses(Level, FPM&: MainFPM, LTOPhase: ThinOrFullLTOPhase::FullLTOPostLink);
2247
2248	invokeVectorizerEndEPCallbacks(FPM&: MainFPM, Level);
2249
2250	// Run the OpenMPOpt CGSCC pass again late.
2251	MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(
2252	Pass: OpenMPOptCGSCCPass (ThinOrFullLTOPhase::FullLTOPostLink)));
2253
2254	invokePeepholeEPCallbacks(FPM&: MainFPM, Level);
2255	MainFPM.addPass(Pass: JumpThreadingPass ());
2256	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(MainFPM),
2257	EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
2258
2259	// Lower type metadata and the type.test intrinsic. This pass supports
2260	// clang's control flow integrity mechanisms (-fsanitize=cfi) and needs*
2261	// to be run at link time if CFI is enabled. This pass does nothing if
2262	// CFI is disabled.
2263	MPM.addPass(Pass: LowerTypeTestsPass (ExportSummary, nullptr));
2264	// Run a second time to clean up any type tests left behind by WPD for use
2265	// in ICP (which is performed earlier than this in the regular LTO pipeline).
2266	MPM.addPass(Pass: LowerTypeTestsPass (nullptr, nullptr,
2267	lowertypetests::DropTestKind::Assume));
2268
2269	// Enable splitting late in the FullLTO post-link pipeline.
2270	if (EnableHotColdSplit)
2271	MPM.addPass(Pass: HotColdSplittingPass ());
2272
2273	// Add late LTO optimization passes.
2274	FunctionPassManager LateFPM;
2275
2276	// LoopSink pass sinks instructions hoisted by LICM, which serves as a
2277	// canonicalization pass that enables other optimizations. As a result,
2278	// LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2279	// result too early.
2280	LateFPM.addPass(Pass: LoopSinkPass ());
2281
2282	// This hoists/decomposes div/rem ops. It should run after other sink/hoist
2283	// passes to avoid re-sinking, but before SimplifyCFG because it can allow
2284	// flattening of blocks.
2285	LateFPM.addPass(Pass: DivRemPairsPass ());
2286
2287	// Delete basic blocks, which optimization passes may have killed.
2288	LateFPM.addPass(Pass: SimplifyCFGPass (SimplifyCFGOptions ()
2289	.convertSwitchRangeToICmp(B: true)
2290	.convertSwitchToArithmetic(B: true)
2291	.hoistCommonInsts(B: true)
2292	.speculateUnpredictables(B: true)));
2293	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(LateFPM)));
2294
2295	// Drop bodies of available eternally objects to improve GlobalDCE.
2296	MPM.addPass(Pass: EliminateAvailableExternallyPass ());
2297
2298	// Now that we have optimized the program, discard unreachable functions.
2299	MPM.addPass(Pass: GlobalDCEPass (/InLTOPostLink=/true));
2300
2301	if (PTO.MergeFunctions)
2302	MPM.addPass(Pass: MergeFunctionsPass ());
2303
2304	MPM.addPass(Pass: RelLookupTableConverterPass ());
2305
2306	if (PTO.CallGraphProfile)
2307	MPM.addPass(Pass: CGProfilePass (/InLTOPostLink=/true));
2308
2309	MPM.addPass(Pass: CoroCleanupPass ());
2310
2311	// AllocToken transforms heap allocation calls; this needs to run late after
2312	// other allocation call transformations (such as those in InstCombine).
2313	MPM.addPass(Pass: AllocTokenPass ());
2314
2315	invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
2316
2317	// Emit annotation remarks.
2318	addAnnotationRemarksPass(MPM);
2319
2320	return MPM;
2321	}
2322
2323	ModulePassManager
2324	PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
2325	ThinOrFullLTOPhase Phase) {
2326	assert(Level == OptimizationLevel::O0 &&
2327	"buildO0DefaultPipeline should only be used with O0");
2328
2329	ModulePassManager MPM;
2330
2331	// Perform pseudo probe instrumentation in O0 mode. This is for the
2332	// consistency between different build modes. For example, a LTO build can be
2333	// mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2334	// the postlink will require pseudo probe instrumentation in the prelink.
2335	if (PGOOpt && PGOOpt ->PseudoProbeForProfiling)
2336	MPM.addPass(Pass: SampleProfileProbePass (TM));
2337
2338	if (PGOOpt && (PGOOpt ->Action == PGOOptions::IRInstr \|\|
2339	PGOOpt ->Action == PGOOptions::IRUse))
2340	addPGOInstrPassesForO0(
2341	MPM,
2342	/RunProfileGen=/(PGOOpt ->Action == PGOOptions::IRInstr),
2343	/IsCS=/false, AtomicCounterUpdate: PGOOpt ->AtomicCounterUpdate, ProfileFile: PGOOpt ->ProfileFile,
2344	ProfileRemappingFile: PGOOpt ->ProfileRemappingFile);
2345
2346	// Instrument function entry and exit before all inlining.
2347	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2348	Pass: EntryExitInstrumenterPass (/PostInlining=/false)));
2349
2350	invokePipelineStartEPCallbacks(MPM, Level);
2351
2352	if (PGOOpt && PGOOpt ->DebugInfoForProfiling)
2353	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass ()));
2354
2355	if (PGOOpt && PGOOpt ->Action == PGOOptions::SampleUse) {
2356	// Explicitly disable sample loader inlining and use flattened profile in O0
2357	// pipeline.
2358	MPM.addPass(Pass: SampleProfileLoaderPass (PGOOpt ->ProfileFile,
2359	PGOOpt ->ProfileRemappingFile,
2360	ThinOrFullLTOPhase::None, FS,
2361	/DisableSampleProfileInlining=/true,
2362	/UseFlattenedProfile=/true));
2363	// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2364	// RequireAnalysisPass for PSI before subsequent non-module passes.
2365	MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
2366	}
2367
2368	invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase);
2369
2370	// Build a minimal pipeline based on the semantics required by LLVM,
2371	// which is just that always inlining occurs. Further, disable generating
2372	// lifetime intrinsics to avoid enabling further optimizations during
2373	// code generation.
2374	MPM.addPass(Pass: AlwaysInlinerPass (
2375	/InsertLifetimeIntrinsics=/false));
2376
2377	if (PTO.MergeFunctions)
2378	MPM.addPass(Pass: MergeFunctionsPass ());
2379
2380	if (EnableMatrix)
2381	MPM.addPass(
2382	Pass: createModuleToFunctionPassAdaptor(Pass: LowerMatrixIntrinsicsPass (true)));
2383
2384	if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2385	CGSCCPassManager CGPM;
2386	invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level);
2387	if (!CGPM.isEmpty())
2388	MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2389	}
2390	if (!LateLoopOptimizationsEPCallbacks.empty()) {
2391	LoopPassManager LPM;
2392	invokeLateLoopOptimizationsEPCallbacks(LPM, Level);
2393	if (!LPM.isEmpty()) {
2394	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2395	Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM))));
2396	}
2397	}
2398	if (!LoopOptimizerEndEPCallbacks.empty()) {
2399	LoopPassManager LPM;
2400	invokeLoopOptimizerEndEPCallbacks(LPM, Level);
2401	if (!LPM.isEmpty()) {
2402	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2403	Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM))));
2404	}
2405	}
2406	if (!ScalarOptimizerLateEPCallbacks.empty()) {
2407	FunctionPassManager FPM;
2408	invokeScalarOptimizerLateEPCallbacks(FPM, Level);
2409	if (!FPM.isEmpty())
2410	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2411	}
2412
2413	invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase);
2414
2415	if (!VectorizerStartEPCallbacks.empty()) {
2416	FunctionPassManager FPM;
2417	invokeVectorizerStartEPCallbacks(FPM, Level);
2418	if (!FPM.isEmpty())
2419	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2420	}
2421
2422	if (!VectorizerEndEPCallbacks.empty()) {
2423	FunctionPassManager FPM;
2424	invokeVectorizerEndEPCallbacks(FPM, Level);
2425	if (!FPM.isEmpty())
2426	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2427	}
2428
2429	MPM.addPass(Pass: buildCoroWrapper(Phase));
2430
2431	// AllocToken transforms heap allocation calls; this needs to run late after
2432	// other allocation call transformations (such as those in InstCombine).
2433	if (!isLTOPreLink(Phase))
2434	MPM.addPass(Pass: AllocTokenPass ());
2435
2436	invokeOptimizerLastEPCallbacks(MPM, Level, Phase);
2437
2438	if (isLTOPreLink(Phase))
2439	addRequiredLTOPreLinkPasses(MPM);
2440
2441	// Emit annotation remarks.
2442	addAnnotationRemarksPass(MPM);
2443
2444	return MPM;
2445	}
2446
2447	AAManager PassBuilder::buildDefaultAAPipeline() {
2448	AAManager AA;
2449
2450	// The order in which these are registered determines their priority when
2451	// being queried.
2452
2453	// Add any target-specific alias analyses that should be run early.
2454	if (TM)
2455	TM->registerEarlyDefaultAliasAnalyses(AA);
2456
2457	// First we register the basic alias analysis that provides the majority of
2458	// per-function local AA logic. This is a stateless, on-demand local set of
2459	// AA techniques.
2460	AA.registerFunctionAnalysis<BasicAA>();
2461
2462	// Next we query fast, specialized alias analyses that wrap IR-embedded
2463	// information about aliasing.
2464	AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2465	AA.registerFunctionAnalysis<TypeBasedAA>();
2466
2467	// Add support for querying global aliasing information when available.
2468	// Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2469	// analysis, all that the `AAManager` can do is query for any cached
2470	// results from `GlobalsAA` through a readonly proxy.
2471	if (EnableGlobalAnalyses)
2472	AA.registerModuleAnalysis<GlobalsAA>();
2473
2474	// Add target-specific alias analyses.
2475	if (TM)
2476	TM->registerDefaultAliasAnalyses(AA);
2477
2478	return AA;
2479	}
2480
2481	bool PassBuilder::isInstrumentedPGOUse() const {
2482	return (PGOOpt && PGOOpt ->Action == PGOOptions::IRUse) \|\|
2483	!UseCtxProfile.empty();
2484	}
2485

Browse the source code of llvm_projects/llvm/lib/Passes/PassBuilderPipelines.cpp