| 1 | //===- Construction of pass pipelines -------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file |
| 9 | /// |
| 10 | /// This file provides the implementation of the PassBuilder based on our |
| 11 | /// static pass registry as well as related functionality. It also provides |
| 12 | /// helpers to aid in analyzing, debugging, and testing passes and pass |
| 13 | /// pipelines. |
| 14 | /// |
| 15 | //===----------------------------------------------------------------------===// |
| 16 | |
| 17 | #include "llvm/ADT/Statistic.h" |
| 18 | #include "llvm/Analysis/AliasAnalysis.h" |
| 19 | #include "llvm/Analysis/BasicAliasAnalysis.h" |
| 20 | #include "llvm/Analysis/CGSCCPassManager.h" |
| 21 | #include "llvm/Analysis/CtxProfAnalysis.h" |
| 22 | #include "llvm/Analysis/GlobalsModRef.h" |
| 23 | #include "llvm/Analysis/InlineAdvisor.h" |
| 24 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
| 25 | #include "llvm/Analysis/ScopedNoAliasAA.h" |
| 26 | #include "llvm/Analysis/TypeBasedAliasAnalysis.h" |
| 27 | #include "llvm/IR/PassManager.h" |
| 28 | #include "llvm/Pass.h" |
| 29 | #include "llvm/Passes/OptimizationLevel.h" |
| 30 | #include "llvm/Passes/PassBuilder.h" |
| 31 | #include "llvm/Support/CommandLine.h" |
| 32 | #include "llvm/Support/ErrorHandling.h" |
| 33 | #include "llvm/Support/PGOOptions.h" |
| 34 | #include "llvm/Support/VirtualFileSystem.h" |
| 35 | #include "llvm/Target/TargetMachine.h" |
| 36 | #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" |
| 37 | #include "llvm/Transforms/Coroutines/CoroAnnotationElide.h" |
| 38 | #include "llvm/Transforms/Coroutines/CoroCleanup.h" |
| 39 | #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h" |
| 40 | #include "llvm/Transforms/Coroutines/CoroEarly.h" |
| 41 | #include "llvm/Transforms/Coroutines/CoroElide.h" |
| 42 | #include "llvm/Transforms/Coroutines/CoroSplit.h" |
| 43 | #include "llvm/Transforms/HipStdPar/HipStdPar.h" |
| 44 | #include "llvm/Transforms/IPO/AlwaysInliner.h" |
| 45 | #include "llvm/Transforms/IPO/Annotation2Metadata.h" |
| 46 | #include "llvm/Transforms/IPO/ArgumentPromotion.h" |
| 47 | #include "llvm/Transforms/IPO/Attributor.h" |
| 48 | #include "llvm/Transforms/IPO/CalledValuePropagation.h" |
| 49 | #include "llvm/Transforms/IPO/ConstantMerge.h" |
| 50 | #include "llvm/Transforms/IPO/CrossDSOCFI.h" |
| 51 | #include "llvm/Transforms/IPO/DeadArgumentElimination.h" |
| 52 | #include "llvm/Transforms/IPO/ElimAvailExtern.h" |
| 53 | #include "llvm/Transforms/IPO/EmbedBitcodePass.h" |
| 54 | #include "llvm/Transforms/IPO/ExpandVariadics.h" |
| 55 | #include "llvm/Transforms/IPO/FatLTOCleanup.h" |
| 56 | #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" |
| 57 | #include "llvm/Transforms/IPO/FunctionAttrs.h" |
| 58 | #include "llvm/Transforms/IPO/GlobalDCE.h" |
| 59 | #include "llvm/Transforms/IPO/GlobalOpt.h" |
| 60 | #include "llvm/Transforms/IPO/GlobalSplit.h" |
| 61 | #include "llvm/Transforms/IPO/HotColdSplitting.h" |
| 62 | #include "llvm/Transforms/IPO/IROutliner.h" |
| 63 | #include "llvm/Transforms/IPO/InferFunctionAttrs.h" |
| 64 | #include "llvm/Transforms/IPO/Inliner.h" |
| 65 | #include "llvm/Transforms/IPO/LowerTypeTests.h" |
| 66 | #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" |
| 67 | #include "llvm/Transforms/IPO/MergeFunctions.h" |
| 68 | #include "llvm/Transforms/IPO/ModuleInliner.h" |
| 69 | #include "llvm/Transforms/IPO/OpenMPOpt.h" |
| 70 | #include "llvm/Transforms/IPO/PartialInlining.h" |
| 71 | #include "llvm/Transforms/IPO/SCCP.h" |
| 72 | #include "llvm/Transforms/IPO/SampleProfile.h" |
| 73 | #include "llvm/Transforms/IPO/SampleProfileProbe.h" |
| 74 | #include "llvm/Transforms/IPO/WholeProgramDevirt.h" |
| 75 | #include "llvm/Transforms/InstCombine/InstCombine.h" |
| 76 | #include "llvm/Transforms/Instrumentation/CGProfile.h" |
| 77 | #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" |
| 78 | #include "llvm/Transforms/Instrumentation/InstrProfiling.h" |
| 79 | #include "llvm/Transforms/Instrumentation/MemProfInstrumentation.h" |
| 80 | #include "llvm/Transforms/Instrumentation/MemProfUse.h" |
| 81 | #include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h" |
| 82 | #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" |
| 83 | #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h" |
| 84 | #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" |
| 85 | #include "llvm/Transforms/Scalar/ADCE.h" |
| 86 | #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" |
| 87 | #include "llvm/Transforms/Scalar/AnnotationRemarks.h" |
| 88 | #include "llvm/Transforms/Scalar/BDCE.h" |
| 89 | #include "llvm/Transforms/Scalar/CallSiteSplitting.h" |
| 90 | #include "llvm/Transforms/Scalar/ConstraintElimination.h" |
| 91 | #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" |
| 92 | #include "llvm/Transforms/Scalar/DFAJumpThreading.h" |
| 93 | #include "llvm/Transforms/Scalar/DeadStoreElimination.h" |
| 94 | #include "llvm/Transforms/Scalar/DivRemPairs.h" |
| 95 | #include "llvm/Transforms/Scalar/EarlyCSE.h" |
| 96 | #include "llvm/Transforms/Scalar/Float2Int.h" |
| 97 | #include "llvm/Transforms/Scalar/GVN.h" |
| 98 | #include "llvm/Transforms/Scalar/IndVarSimplify.h" |
| 99 | #include "llvm/Transforms/Scalar/InferAlignment.h" |
| 100 | #include "llvm/Transforms/Scalar/InstSimplifyPass.h" |
| 101 | #include "llvm/Transforms/Scalar/JumpTableToSwitch.h" |
| 102 | #include "llvm/Transforms/Scalar/JumpThreading.h" |
| 103 | #include "llvm/Transforms/Scalar/LICM.h" |
| 104 | #include "llvm/Transforms/Scalar/LoopDeletion.h" |
| 105 | #include "llvm/Transforms/Scalar/LoopDistribute.h" |
| 106 | #include "llvm/Transforms/Scalar/LoopFlatten.h" |
| 107 | #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" |
| 108 | #include "llvm/Transforms/Scalar/LoopInstSimplify.h" |
| 109 | #include "llvm/Transforms/Scalar/LoopInterchange.h" |
| 110 | #include "llvm/Transforms/Scalar/LoopLoadElimination.h" |
| 111 | #include "llvm/Transforms/Scalar/LoopPassManager.h" |
| 112 | #include "llvm/Transforms/Scalar/LoopRotation.h" |
| 113 | #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" |
| 114 | #include "llvm/Transforms/Scalar/LoopSink.h" |
| 115 | #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" |
| 116 | #include "llvm/Transforms/Scalar/LoopUnrollPass.h" |
| 117 | #include "llvm/Transforms/Scalar/LoopVersioningLICM.h" |
| 118 | #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" |
| 119 | #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" |
| 120 | #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" |
| 121 | #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" |
| 122 | #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" |
| 123 | #include "llvm/Transforms/Scalar/NewGVN.h" |
| 124 | #include "llvm/Transforms/Scalar/Reassociate.h" |
| 125 | #include "llvm/Transforms/Scalar/SCCP.h" |
| 126 | #include "llvm/Transforms/Scalar/SROA.h" |
| 127 | #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" |
| 128 | #include "llvm/Transforms/Scalar/SimplifyCFG.h" |
| 129 | #include "llvm/Transforms/Scalar/SpeculativeExecution.h" |
| 130 | #include "llvm/Transforms/Scalar/TailRecursionElimination.h" |
| 131 | #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" |
| 132 | #include "llvm/Transforms/Utils/AddDiscriminators.h" |
| 133 | #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" |
| 134 | #include "llvm/Transforms/Utils/CanonicalizeAliases.h" |
| 135 | #include "llvm/Transforms/Utils/CountVisits.h" |
| 136 | #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" |
| 137 | #include "llvm/Transforms/Utils/ExtraPassManager.h" |
| 138 | #include "llvm/Transforms/Utils/InjectTLIMappings.h" |
| 139 | #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" |
| 140 | #include "llvm/Transforms/Utils/Mem2Reg.h" |
| 141 | #include "llvm/Transforms/Utils/MoveAutoInit.h" |
| 142 | #include "llvm/Transforms/Utils/NameAnonGlobals.h" |
| 143 | #include "llvm/Transforms/Utils/RelLookupTableConverter.h" |
| 144 | #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" |
| 145 | #include "llvm/Transforms/Vectorize/LoopVectorize.h" |
| 146 | #include "llvm/Transforms/Vectorize/SLPVectorizer.h" |
| 147 | #include "llvm/Transforms/Vectorize/VectorCombine.h" |
| 148 | |
| 149 | using namespace llvm; |
| 150 | |
| 151 | static cl::opt<InliningAdvisorMode> UseInlineAdvisor( |
| 152 | "enable-ml-inliner" , cl::init(Val: InliningAdvisorMode::Default), cl::Hidden, |
| 153 | cl::desc("Enable ML policy for inliner. Currently trained for -Oz only" ), |
| 154 | cl::values(clEnumValN(InliningAdvisorMode::Default, "default" , |
| 155 | "Heuristics-based inliner version" ), |
| 156 | clEnumValN(InliningAdvisorMode::Development, "development" , |
| 157 | "Use development mode (runtime-loadable model)" ), |
| 158 | clEnumValN(InliningAdvisorMode::Release, "release" , |
| 159 | "Use release mode (AOT-compiled model)" ))); |
| 160 | |
| 161 | /// Flag to enable inline deferral during PGO. |
| 162 | static cl::opt<bool> |
| 163 | EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral" , cl::init(Val: true), |
| 164 | cl::Hidden, |
| 165 | cl::desc("Enable inline deferral during PGO" )); |
| 166 | |
| 167 | static cl::opt<bool> EnableModuleInliner("enable-module-inliner" , |
| 168 | cl::init(Val: false), cl::Hidden, |
| 169 | cl::desc("Enable module inliner" )); |
| 170 | |
| 171 | static cl::opt<bool> PerformMandatoryInliningsFirst( |
| 172 | "mandatory-inlining-first" , cl::init(Val: false), cl::Hidden, |
| 173 | cl::desc("Perform mandatory inlinings module-wide, before performing " |
| 174 | "inlining" )); |
| 175 | |
| 176 | static cl::opt<bool> EnableEagerlyInvalidateAnalyses( |
| 177 | "eagerly-invalidate-analyses" , cl::init(Val: true), cl::Hidden, |
| 178 | cl::desc("Eagerly invalidate more analyses in default pipelines" )); |
| 179 | |
| 180 | static cl::opt<bool> EnableMergeFunctions( |
| 181 | "enable-merge-functions" , cl::init(Val: false), cl::Hidden, |
| 182 | cl::desc("Enable function merging as part of the optimization pipeline" )); |
| 183 | |
| 184 | static cl::opt<bool> EnablePostPGOLoopRotation( |
| 185 | "enable-post-pgo-loop-rotation" , cl::init(Val: true), cl::Hidden, |
| 186 | cl::desc("Run the loop rotation transformation after PGO instrumentation" )); |
| 187 | |
| 188 | static cl::opt<bool> EnableGlobalAnalyses( |
| 189 | "enable-global-analyses" , cl::init(Val: true), cl::Hidden, |
| 190 | cl::desc("Enable inter-procedural analyses" )); |
| 191 | |
| 192 | static cl::opt<bool> RunPartialInlining("enable-partial-inlining" , |
| 193 | cl::init(Val: false), cl::Hidden, |
| 194 | cl::desc("Run Partial inlining pass" )); |
| 195 | |
| 196 | static cl::opt<bool> ( |
| 197 | "extra-vectorizer-passes" , cl::init(Val: false), cl::Hidden, |
| 198 | cl::desc("Run cleanup optimization passes after vectorization" )); |
| 199 | |
| 200 | static cl::opt<bool> RunNewGVN("enable-newgvn" , cl::init(Val: false), cl::Hidden, |
| 201 | cl::desc("Run the NewGVN pass" )); |
| 202 | |
| 203 | static cl::opt<bool> |
| 204 | EnableLoopInterchange("enable-loopinterchange" , cl::init(Val: false), cl::Hidden, |
| 205 | cl::desc("Enable the LoopInterchange Pass" )); |
| 206 | |
| 207 | static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam" , |
| 208 | cl::init(Val: false), cl::Hidden, |
| 209 | cl::desc("Enable Unroll And Jam Pass" )); |
| 210 | |
| 211 | static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten" , cl::init(Val: false), |
| 212 | cl::Hidden, |
| 213 | cl::desc("Enable the LoopFlatten Pass" )); |
| 214 | |
| 215 | // Experimentally allow loop header duplication. This should allow for better |
| 216 | // optimization at Oz, since loop-idiom recognition can then recognize things |
| 217 | // like memcpy. If this ends up being useful for many targets, we should drop |
| 218 | // this flag and make a code generation option that can be controlled |
| 219 | // independent of the opt level and exposed through the frontend. |
| 220 | static cl::opt<bool> ( |
| 221 | "enable-loop-header-duplication" , cl::init(Val: false), cl::Hidden, |
| 222 | cl::desc("Enable loop header duplication at any optimization level" )); |
| 223 | |
| 224 | static cl::opt<bool> |
| 225 | EnableDFAJumpThreading("enable-dfa-jump-thread" , |
| 226 | cl::desc("Enable DFA jump threading" ), |
| 227 | cl::init(Val: false), cl::Hidden); |
| 228 | |
| 229 | static cl::opt<bool> |
| 230 | EnableHotColdSplit("hot-cold-split" , |
| 231 | cl::desc("Enable hot-cold splitting pass" )); |
| 232 | |
| 233 | static cl::opt<bool> EnableIROutliner("ir-outliner" , cl::init(Val: false), |
| 234 | cl::Hidden, |
| 235 | cl::desc("Enable ir outliner pass" )); |
| 236 | |
| 237 | static cl::opt<bool> |
| 238 | DisablePreInliner("disable-preinline" , cl::init(Val: false), cl::Hidden, |
| 239 | cl::desc("Disable pre-instrumentation inliner" )); |
| 240 | |
| 241 | static cl::opt<int> PreInlineThreshold( |
| 242 | "preinline-threshold" , cl::Hidden, cl::init(Val: 75), |
| 243 | cl::desc("Control the amount of inlining in pre-instrumentation inliner " |
| 244 | "(default = 75)" )); |
| 245 | |
| 246 | static cl::opt<bool> |
| 247 | EnableGVNHoist("enable-gvn-hoist" , |
| 248 | cl::desc("Enable the GVN hoisting pass (default = off)" )); |
| 249 | |
| 250 | static cl::opt<bool> |
| 251 | EnableGVNSink("enable-gvn-sink" , |
| 252 | cl::desc("Enable the GVN sinking pass (default = off)" )); |
| 253 | |
| 254 | static cl::opt<bool> EnableJumpTableToSwitch( |
| 255 | "enable-jump-table-to-switch" , |
| 256 | cl::desc("Enable JumpTableToSwitch pass (default = off)" )); |
| 257 | |
| 258 | // This option is used in simplifying testing SampleFDO optimizations for |
| 259 | // profile loading. |
| 260 | static cl::opt<bool> |
| 261 | EnableCHR("enable-chr" , cl::init(Val: true), cl::Hidden, |
| 262 | cl::desc("Enable control height reduction optimization (CHR)" )); |
| 263 | |
| 264 | static cl::opt<bool> FlattenedProfileUsed( |
| 265 | "flattened-profile-used" , cl::init(Val: false), cl::Hidden, |
| 266 | cl::desc("Indicate the sample profile being used is flattened, i.e., " |
| 267 | "no inline hierarchy exists in the profile" )); |
| 268 | |
| 269 | static cl::opt<bool> |
| 270 | EnableMatrix("enable-matrix" , cl::init(Val: false), cl::Hidden, |
| 271 | cl::desc("Enable lowering of the matrix intrinsics" )); |
| 272 | |
| 273 | static cl::opt<bool> EnableConstraintElimination( |
| 274 | "enable-constraint-elimination" , cl::init(Val: true), cl::Hidden, |
| 275 | cl::desc( |
| 276 | "Enable pass to eliminate conditions based on linear constraints" )); |
| 277 | |
| 278 | static cl::opt<AttributorRunOption> AttributorRun( |
| 279 | "attributor-enable" , cl::Hidden, cl::init(Val: AttributorRunOption::NONE), |
| 280 | cl::desc("Enable the attributor inter-procedural deduction pass" ), |
| 281 | cl::values(clEnumValN(AttributorRunOption::ALL, "all" , |
| 282 | "enable all attributor runs" ), |
| 283 | clEnumValN(AttributorRunOption::MODULE, "module" , |
| 284 | "enable module-wide attributor runs" ), |
| 285 | clEnumValN(AttributorRunOption::CGSCC, "cgscc" , |
| 286 | "enable call graph SCC attributor runs" ), |
| 287 | clEnumValN(AttributorRunOption::NONE, "none" , |
| 288 | "disable attributor runs" ))); |
| 289 | |
| 290 | static cl::opt<bool> EnableSampledInstr( |
| 291 | "enable-sampled-instrumentation" , cl::init(Val: false), cl::Hidden, |
| 292 | cl::desc("Enable profile instrumentation sampling (default = off)" )); |
| 293 | static cl::opt<bool> UseLoopVersioningLICM( |
| 294 | "enable-loop-versioning-licm" , cl::init(Val: false), cl::Hidden, |
| 295 | cl::desc("Enable the experimental Loop Versioning LICM pass" )); |
| 296 | |
| 297 | static cl::opt<std::string> InstrumentColdFuncOnlyPath( |
| 298 | "instrument-cold-function-only-path" , cl::init(Val: "" ), |
| 299 | cl::desc("File path for cold function only instrumentation(requires use " |
| 300 | "with --pgo-instrument-cold-function-only)" ), |
| 301 | cl::Hidden); |
| 302 | |
| 303 | extern cl::opt<std::string> UseCtxProfile; |
| 304 | extern cl::opt<bool> PGOInstrumentColdFunctionOnly; |
| 305 | |
| 306 | namespace llvm { |
| 307 | extern cl::opt<bool> EnableMemProfContextDisambiguation; |
| 308 | } // namespace llvm |
| 309 | |
| 310 | PipelineTuningOptions::PipelineTuningOptions() { |
| 311 | LoopInterleaving = true; |
| 312 | LoopVectorization = true; |
| 313 | SLPVectorization = false; |
| 314 | LoopUnrolling = true; |
| 315 | LoopInterchange = EnableLoopInterchange; |
| 316 | ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; |
| 317 | LicmMssaOptCap = SetLicmMssaOptCap; |
| 318 | LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; |
| 319 | CallGraphProfile = true; |
| 320 | UnifiedLTO = false; |
| 321 | MergeFunctions = EnableMergeFunctions; |
| 322 | InlinerThreshold = -1; |
| 323 | EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; |
| 324 | } |
| 325 | |
| 326 | namespace llvm { |
| 327 | extern cl::opt<unsigned> MaxDevirtIterations; |
| 328 | } // namespace llvm |
| 329 | |
| 330 | void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM, |
| 331 | OptimizationLevel Level) { |
| 332 | for (auto &C : PeepholeEPCallbacks) |
| 333 | C(FPM, Level); |
| 334 | } |
| 335 | void PassBuilder::invokeLateLoopOptimizationsEPCallbacks( |
| 336 | LoopPassManager &LPM, OptimizationLevel Level) { |
| 337 | for (auto &C : LateLoopOptimizationsEPCallbacks) |
| 338 | C(LPM, Level); |
| 339 | } |
| 340 | void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, |
| 341 | OptimizationLevel Level) { |
| 342 | for (auto &C : LoopOptimizerEndEPCallbacks) |
| 343 | C(LPM, Level); |
| 344 | } |
| 345 | void PassBuilder::invokeScalarOptimizerLateEPCallbacks( |
| 346 | FunctionPassManager &FPM, OptimizationLevel Level) { |
| 347 | for (auto &C : ScalarOptimizerLateEPCallbacks) |
| 348 | C(FPM, Level); |
| 349 | } |
| 350 | void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, |
| 351 | OptimizationLevel Level) { |
| 352 | for (auto &C : CGSCCOptimizerLateEPCallbacks) |
| 353 | C(CGPM, Level); |
| 354 | } |
| 355 | void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, |
| 356 | OptimizationLevel Level) { |
| 357 | for (auto &C : VectorizerStartEPCallbacks) |
| 358 | C(FPM, Level); |
| 359 | } |
| 360 | void PassBuilder::invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, |
| 361 | OptimizationLevel Level) { |
| 362 | for (auto &C : VectorizerEndEPCallbacks) |
| 363 | C(FPM, Level); |
| 364 | } |
| 365 | void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, |
| 366 | OptimizationLevel Level, |
| 367 | ThinOrFullLTOPhase Phase) { |
| 368 | for (auto &C : OptimizerEarlyEPCallbacks) |
| 369 | C(MPM, Level, Phase); |
| 370 | } |
| 371 | void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, |
| 372 | OptimizationLevel Level, |
| 373 | ThinOrFullLTOPhase Phase) { |
| 374 | for (auto &C : OptimizerLastEPCallbacks) |
| 375 | C(MPM, Level, Phase); |
| 376 | } |
| 377 | void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks( |
| 378 | ModulePassManager &MPM, OptimizationLevel Level) { |
| 379 | for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks) |
| 380 | C(MPM, Level); |
| 381 | } |
| 382 | void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks( |
| 383 | ModulePassManager &MPM, OptimizationLevel Level) { |
| 384 | for (auto &C : FullLinkTimeOptimizationLastEPCallbacks) |
| 385 | C(MPM, Level); |
| 386 | } |
| 387 | void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM, |
| 388 | OptimizationLevel Level) { |
| 389 | for (auto &C : PipelineStartEPCallbacks) |
| 390 | C(MPM, Level); |
| 391 | } |
| 392 | void PassBuilder::invokePipelineEarlySimplificationEPCallbacks( |
| 393 | ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase) { |
| 394 | for (auto &C : PipelineEarlySimplificationEPCallbacks) |
| 395 | C(MPM, Level, Phase); |
| 396 | } |
| 397 | |
| 398 | // Helper to add AnnotationRemarksPass. |
| 399 | static void (ModulePassManager &MPM) { |
| 400 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AnnotationRemarksPass())); |
| 401 | } |
| 402 | |
| 403 | // Helper to check if the current compilation phase is preparing for LTO |
| 404 | static bool isLTOPreLink(ThinOrFullLTOPhase Phase) { |
| 405 | return Phase == ThinOrFullLTOPhase::ThinLTOPreLink || |
| 406 | Phase == ThinOrFullLTOPhase::FullLTOPreLink; |
| 407 | } |
| 408 | |
| 409 | // Helper to check if the current compilation phase is LTO backend |
| 410 | static bool isLTOPostLink(ThinOrFullLTOPhase Phase) { |
| 411 | return Phase == ThinOrFullLTOPhase::ThinLTOPostLink || |
| 412 | Phase == ThinOrFullLTOPhase::FullLTOPostLink; |
| 413 | } |
| 414 | |
| 415 | // Helper to wrap conditionally Coro passes. |
| 416 | static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase) { |
| 417 | // TODO: Skip passes according to Phase. |
| 418 | ModulePassManager CoroPM; |
| 419 | CoroPM.addPass(Pass: CoroEarlyPass()); |
| 420 | CGSCCPassManager CGPM; |
| 421 | CGPM.addPass(Pass: CoroSplitPass()); |
| 422 | CoroPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM))); |
| 423 | CoroPM.addPass(Pass: CoroCleanupPass()); |
| 424 | CoroPM.addPass(Pass: GlobalDCEPass()); |
| 425 | return CoroConditionalWrapper(std::move(CoroPM)); |
| 426 | } |
| 427 | |
| 428 | // TODO: Investigate the cost/benefit of tail call elimination on debugging. |
| 429 | FunctionPassManager |
| 430 | PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, |
| 431 | ThinOrFullLTOPhase Phase) { |
| 432 | |
| 433 | FunctionPassManager FPM; |
| 434 | |
| 435 | if (AreStatisticsEnabled()) |
| 436 | FPM.addPass(Pass: CountVisitsPass()); |
| 437 | |
| 438 | // Form SSA out of local memory accesses after breaking apart aggregates into |
| 439 | // scalars. |
| 440 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
| 441 | |
| 442 | // Catch trivial redundancies |
| 443 | FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */)); |
| 444 | |
| 445 | // Hoisting of scalars and load expressions. |
| 446 | FPM.addPass( |
| 447 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 448 | FPM.addPass(Pass: InstCombinePass()); |
| 449 | |
| 450 | FPM.addPass(Pass: LibCallsShrinkWrapPass()); |
| 451 | |
| 452 | invokePeepholeEPCallbacks(FPM, Level); |
| 453 | |
| 454 | FPM.addPass( |
| 455 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 456 | |
| 457 | // Form canonically associated expression trees, and simplify the trees using |
| 458 | // basic mathematical properties. For example, this will form (nearly) |
| 459 | // minimal multiplication trees. |
| 460 | FPM.addPass(Pass: ReassociatePass()); |
| 461 | |
| 462 | // Add the primary loop simplification pipeline. |
| 463 | // FIXME: Currently this is split into two loop pass pipelines because we run |
| 464 | // some function passes in between them. These can and should be removed |
| 465 | // and/or replaced by scheduling the loop pass equivalents in the correct |
| 466 | // positions. But those equivalent passes aren't powerful enough yet. |
| 467 | // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still |
| 468 | // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to |
| 469 | // fully replace `SimplifyCFGPass`, and the closest to the other we have is |
| 470 | // `LoopInstSimplify`. |
| 471 | LoopPassManager LPM1, LPM2; |
| 472 | |
| 473 | // Simplify the loop body. We do this initially to clean up after other loop |
| 474 | // passes run, either when iterating on a loop or on inner loops with |
| 475 | // implications on the outer loop. |
| 476 | LPM1.addPass(Pass: LoopInstSimplifyPass()); |
| 477 | LPM1.addPass(Pass: LoopSimplifyCFGPass()); |
| 478 | |
| 479 | // Try to remove as much code from the loop header as possible, |
| 480 | // to reduce amount of IR that will have to be duplicated. However, |
| 481 | // do not perform speculative hoisting the first time as LICM |
| 482 | // will destroy metadata that may not need to be destroyed if run |
| 483 | // after loop rotation. |
| 484 | // TODO: Investigate promotion cap for O1. |
| 485 | LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 486 | /*AllowSpeculation=*/false)); |
| 487 | |
| 488 | LPM1.addPass(Pass: LoopRotatePass(/* Disable header duplication */ true, |
| 489 | isLTOPreLink(Phase))); |
| 490 | // TODO: Investigate promotion cap for O1. |
| 491 | LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 492 | /*AllowSpeculation=*/true)); |
| 493 | LPM1.addPass(Pass: SimpleLoopUnswitchPass()); |
| 494 | if (EnableLoopFlatten) |
| 495 | LPM1.addPass(Pass: LoopFlattenPass()); |
| 496 | |
| 497 | LPM2.addPass(Pass: LoopIdiomRecognizePass()); |
| 498 | LPM2.addPass(Pass: IndVarSimplifyPass()); |
| 499 | |
| 500 | invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level); |
| 501 | |
| 502 | LPM2.addPass(Pass: LoopDeletionPass()); |
| 503 | |
| 504 | // Do not enable unrolling in PreLinkThinLTO phase during sample PGO |
| 505 | // because it changes IR to makes profile annotation in back compile |
| 506 | // inaccurate. The normal unroller doesn't pay attention to forced full unroll |
| 507 | // attributes so we need to make sure and allow the full unroll pass to pay |
| 508 | // attention to it. |
| 509 | if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || |
| 510 | PGOOpt->Action != PGOOptions::SampleUse) |
| 511 | LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(), |
| 512 | /* OnlyWhenForced= */ !PTO.LoopUnrolling, |
| 513 | PTO.ForgetAllSCEVInLoopUnroll)); |
| 514 | |
| 515 | invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level); |
| 516 | |
| 517 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1), |
| 518 | /*UseMemorySSA=*/true, |
| 519 | /*UseBlockFrequencyInfo=*/true)); |
| 520 | FPM.addPass( |
| 521 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 522 | FPM.addPass(Pass: InstCombinePass()); |
| 523 | // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. |
| 524 | // *All* loop passes must preserve it, in order to be able to use it. |
| 525 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2), |
| 526 | /*UseMemorySSA=*/false, |
| 527 | /*UseBlockFrequencyInfo=*/false)); |
| 528 | |
| 529 | // Delete small array after loop unroll. |
| 530 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
| 531 | |
| 532 | // Specially optimize memory movement as it doesn't look like dataflow in SSA. |
| 533 | FPM.addPass(Pass: MemCpyOptPass()); |
| 534 | |
| 535 | // Sparse conditional constant propagation. |
| 536 | // FIXME: It isn't clear why we do this *after* loop passes rather than |
| 537 | // before... |
| 538 | FPM.addPass(Pass: SCCPPass()); |
| 539 | |
| 540 | // Delete dead bit computations (instcombine runs after to fold away the dead |
| 541 | // computations, and then ADCE will run later to exploit any new DCE |
| 542 | // opportunities that creates). |
| 543 | FPM.addPass(Pass: BDCEPass()); |
| 544 | |
| 545 | // Run instcombine after redundancy and dead bit elimination to exploit |
| 546 | // opportunities opened up by them. |
| 547 | FPM.addPass(Pass: InstCombinePass()); |
| 548 | invokePeepholeEPCallbacks(FPM, Level); |
| 549 | |
| 550 | FPM.addPass(Pass: CoroElidePass()); |
| 551 | |
| 552 | invokeScalarOptimizerLateEPCallbacks(FPM, Level); |
| 553 | |
| 554 | // Finally, do an expensive DCE pass to catch all the dead code exposed by |
| 555 | // the simplifications and basic cleanup after all the simplifications. |
| 556 | // TODO: Investigate if this is too expensive. |
| 557 | FPM.addPass(Pass: ADCEPass()); |
| 558 | FPM.addPass( |
| 559 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 560 | FPM.addPass(Pass: InstCombinePass()); |
| 561 | invokePeepholeEPCallbacks(FPM, Level); |
| 562 | |
| 563 | return FPM; |
| 564 | } |
| 565 | |
| 566 | FunctionPassManager |
| 567 | PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, |
| 568 | ThinOrFullLTOPhase Phase) { |
| 569 | assert(Level != OptimizationLevel::O0 && "Must request optimizations!" ); |
| 570 | |
| 571 | // The O1 pipeline has a separate pipeline creation function to simplify |
| 572 | // construction readability. |
| 573 | if (Level.getSpeedupLevel() == 1) |
| 574 | return buildO1FunctionSimplificationPipeline(Level, Phase); |
| 575 | |
| 576 | FunctionPassManager FPM; |
| 577 | |
| 578 | if (AreStatisticsEnabled()) |
| 579 | FPM.addPass(Pass: CountVisitsPass()); |
| 580 | |
| 581 | // Form SSA out of local memory accesses after breaking apart aggregates into |
| 582 | // scalars. |
| 583 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
| 584 | |
| 585 | // Catch trivial redundancies |
| 586 | FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */)); |
| 587 | if (EnableKnowledgeRetention) |
| 588 | FPM.addPass(Pass: AssumeSimplifyPass()); |
| 589 | |
| 590 | // Hoisting of scalars and load expressions. |
| 591 | if (EnableGVNHoist) |
| 592 | FPM.addPass(Pass: GVNHoistPass()); |
| 593 | |
| 594 | // Global value numbering based sinking. |
| 595 | if (EnableGVNSink) { |
| 596 | FPM.addPass(Pass: GVNSinkPass()); |
| 597 | FPM.addPass( |
| 598 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 599 | } |
| 600 | |
| 601 | // Speculative execution if the target has divergent branches; otherwise nop. |
| 602 | FPM.addPass(Pass: SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true)); |
| 603 | |
| 604 | // Optimize based on known information about branches, and cleanup afterward. |
| 605 | FPM.addPass(Pass: JumpThreadingPass()); |
| 606 | FPM.addPass(Pass: CorrelatedValuePropagationPass()); |
| 607 | |
| 608 | // Jump table to switch conversion. |
| 609 | if (EnableJumpTableToSwitch) |
| 610 | FPM.addPass(Pass: JumpTableToSwitchPass()); |
| 611 | |
| 612 | FPM.addPass( |
| 613 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 614 | FPM.addPass(Pass: InstCombinePass()); |
| 615 | FPM.addPass(Pass: AggressiveInstCombinePass()); |
| 616 | |
| 617 | if (!Level.isOptimizingForSize()) |
| 618 | FPM.addPass(Pass: LibCallsShrinkWrapPass()); |
| 619 | |
| 620 | invokePeepholeEPCallbacks(FPM, Level); |
| 621 | |
| 622 | // For PGO use pipeline, try to optimize memory intrinsics such as memcpy |
| 623 | // using the size value profile. Don't perform this when optimizing for size. |
| 624 | if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && |
| 625 | !Level.isOptimizingForSize()) |
| 626 | FPM.addPass(Pass: PGOMemOPSizeOpt()); |
| 627 | |
| 628 | FPM.addPass(Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/ |
| 629 | isInstrumentedPGOUse())); |
| 630 | FPM.addPass( |
| 631 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 632 | |
| 633 | // Form canonically associated expression trees, and simplify the trees using |
| 634 | // basic mathematical properties. For example, this will form (nearly) |
| 635 | // minimal multiplication trees. |
| 636 | FPM.addPass(Pass: ReassociatePass()); |
| 637 | |
| 638 | if (EnableConstraintElimination) |
| 639 | FPM.addPass(Pass: ConstraintEliminationPass()); |
| 640 | |
| 641 | // Add the primary loop simplification pipeline. |
| 642 | // FIXME: Currently this is split into two loop pass pipelines because we run |
| 643 | // some function passes in between them. These can and should be removed |
| 644 | // and/or replaced by scheduling the loop pass equivalents in the correct |
| 645 | // positions. But those equivalent passes aren't powerful enough yet. |
| 646 | // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still |
| 647 | // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to |
| 648 | // fully replace `SimplifyCFGPass`, and the closest to the other we have is |
| 649 | // `LoopInstSimplify`. |
| 650 | LoopPassManager LPM1, LPM2; |
| 651 | |
| 652 | // Simplify the loop body. We do this initially to clean up after other loop |
| 653 | // passes run, either when iterating on a loop or on inner loops with |
| 654 | // implications on the outer loop. |
| 655 | LPM1.addPass(Pass: LoopInstSimplifyPass()); |
| 656 | LPM1.addPass(Pass: LoopSimplifyCFGPass()); |
| 657 | |
| 658 | // Try to remove as much code from the loop header as possible, |
| 659 | // to reduce amount of IR that will have to be duplicated. However, |
| 660 | // do not perform speculative hoisting the first time as LICM |
| 661 | // will destroy metadata that may not need to be destroyed if run |
| 662 | // after loop rotation. |
| 663 | // TODO: Investigate promotion cap for O1. |
| 664 | LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 665 | /*AllowSpeculation=*/false)); |
| 666 | |
| 667 | // Disable header duplication in loop rotation at -Oz. |
| 668 | LPM1.addPass(Pass: LoopRotatePass(EnableLoopHeaderDuplication || |
| 669 | Level != OptimizationLevel::Oz, |
| 670 | isLTOPreLink(Phase))); |
| 671 | // TODO: Investigate promotion cap for O1. |
| 672 | LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 673 | /*AllowSpeculation=*/true)); |
| 674 | LPM1.addPass( |
| 675 | Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3)); |
| 676 | if (EnableLoopFlatten) |
| 677 | LPM1.addPass(Pass: LoopFlattenPass()); |
| 678 | |
| 679 | LPM2.addPass(Pass: LoopIdiomRecognizePass()); |
| 680 | LPM2.addPass(Pass: IndVarSimplifyPass()); |
| 681 | |
| 682 | { |
| 683 | ExtraLoopPassManager<ShouldRunExtraSimpleLoopUnswitch> ; |
| 684 | ExtraPasses.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level == |
| 685 | OptimizationLevel::O3)); |
| 686 | LPM2.addPass(Pass: std::move(ExtraPasses)); |
| 687 | } |
| 688 | |
| 689 | invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level); |
| 690 | |
| 691 | LPM2.addPass(Pass: LoopDeletionPass()); |
| 692 | |
| 693 | // Do not enable unrolling in PreLinkThinLTO phase during sample PGO |
| 694 | // because it changes IR to makes profile annotation in back compile |
| 695 | // inaccurate. The normal unroller doesn't pay attention to forced full unroll |
| 696 | // attributes so we need to make sure and allow the full unroll pass to pay |
| 697 | // attention to it. |
| 698 | if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || |
| 699 | PGOOpt->Action != PGOOptions::SampleUse) |
| 700 | LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(), |
| 701 | /* OnlyWhenForced= */ !PTO.LoopUnrolling, |
| 702 | PTO.ForgetAllSCEVInLoopUnroll)); |
| 703 | |
| 704 | invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level); |
| 705 | |
| 706 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1), |
| 707 | /*UseMemorySSA=*/true, |
| 708 | /*UseBlockFrequencyInfo=*/true)); |
| 709 | FPM.addPass( |
| 710 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 711 | FPM.addPass(Pass: InstCombinePass()); |
| 712 | // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass, |
| 713 | // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. |
| 714 | // *All* loop passes must preserve it, in order to be able to use it. |
| 715 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2), |
| 716 | /*UseMemorySSA=*/false, |
| 717 | /*UseBlockFrequencyInfo=*/false)); |
| 718 | |
| 719 | // Delete small array after loop unroll. |
| 720 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
| 721 | |
| 722 | // Try vectorization/scalarization transforms that are both improvements |
| 723 | // themselves and can allow further folds with GVN and InstCombine. |
| 724 | FPM.addPass(Pass: VectorCombinePass(/*TryEarlyFoldsOnly=*/true)); |
| 725 | |
| 726 | // Eliminate redundancies. |
| 727 | FPM.addPass(Pass: MergedLoadStoreMotionPass()); |
| 728 | if (RunNewGVN) |
| 729 | FPM.addPass(Pass: NewGVNPass()); |
| 730 | else |
| 731 | FPM.addPass(Pass: GVNPass()); |
| 732 | |
| 733 | // Sparse conditional constant propagation. |
| 734 | // FIXME: It isn't clear why we do this *after* loop passes rather than |
| 735 | // before... |
| 736 | FPM.addPass(Pass: SCCPPass()); |
| 737 | |
| 738 | // Delete dead bit computations (instcombine runs after to fold away the dead |
| 739 | // computations, and then ADCE will run later to exploit any new DCE |
| 740 | // opportunities that creates). |
| 741 | FPM.addPass(Pass: BDCEPass()); |
| 742 | |
| 743 | // Run instcombine after redundancy and dead bit elimination to exploit |
| 744 | // opportunities opened up by them. |
| 745 | FPM.addPass(Pass: InstCombinePass()); |
| 746 | invokePeepholeEPCallbacks(FPM, Level); |
| 747 | |
| 748 | // Re-consider control flow based optimizations after redundancy elimination, |
| 749 | // redo DCE, etc. |
| 750 | if (EnableDFAJumpThreading) |
| 751 | FPM.addPass(Pass: DFAJumpThreadingPass()); |
| 752 | |
| 753 | FPM.addPass(Pass: JumpThreadingPass()); |
| 754 | FPM.addPass(Pass: CorrelatedValuePropagationPass()); |
| 755 | |
| 756 | // Finally, do an expensive DCE pass to catch all the dead code exposed by |
| 757 | // the simplifications and basic cleanup after all the simplifications. |
| 758 | // TODO: Investigate if this is too expensive. |
| 759 | FPM.addPass(Pass: ADCEPass()); |
| 760 | |
| 761 | // Specially optimize memory movement as it doesn't look like dataflow in SSA. |
| 762 | FPM.addPass(Pass: MemCpyOptPass()); |
| 763 | |
| 764 | FPM.addPass(Pass: DSEPass()); |
| 765 | FPM.addPass(Pass: MoveAutoInitPass()); |
| 766 | |
| 767 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
| 768 | Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 769 | /*AllowSpeculation=*/true), |
| 770 | /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); |
| 771 | |
| 772 | FPM.addPass(Pass: CoroElidePass()); |
| 773 | |
| 774 | invokeScalarOptimizerLateEPCallbacks(FPM, Level); |
| 775 | |
| 776 | FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions() |
| 777 | .convertSwitchRangeToICmp(B: true) |
| 778 | .hoistCommonInsts(B: true) |
| 779 | .sinkCommonInsts(B: true))); |
| 780 | FPM.addPass(Pass: InstCombinePass()); |
| 781 | invokePeepholeEPCallbacks(FPM, Level); |
| 782 | |
| 783 | return FPM; |
| 784 | } |
| 785 | |
| 786 | void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) { |
| 787 | MPM.addPass(Pass: CanonicalizeAliasesPass()); |
| 788 | MPM.addPass(Pass: NameAnonGlobalPass()); |
| 789 | } |
| 790 | |
| 791 | void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM, |
| 792 | OptimizationLevel Level, |
| 793 | ThinOrFullLTOPhase LTOPhase) { |
| 794 | assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!" ); |
| 795 | if (DisablePreInliner) |
| 796 | return; |
| 797 | InlineParams IP; |
| 798 | |
| 799 | IP.DefaultThreshold = PreInlineThreshold; |
| 800 | |
| 801 | // FIXME: The hint threshold has the same value used by the regular inliner |
| 802 | // when not optimzing for size. This should probably be lowered after |
| 803 | // performance testing. |
| 804 | // FIXME: this comment is cargo culted from the old pass manager, revisit). |
| 805 | IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325; |
| 806 | ModuleInlinerWrapperPass MIWP( |
| 807 | IP, /* MandatoryFirst */ true, |
| 808 | InlineContext{.LTOPhase: LTOPhase, .Pass: InlinePass::EarlyInliner}); |
| 809 | CGSCCPassManager &CGPipeline = MIWP.getPM(); |
| 810 | |
| 811 | FunctionPassManager FPM; |
| 812 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
| 813 | FPM.addPass(Pass: EarlyCSEPass()); // Catch trivial redundancies. |
| 814 | FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp( |
| 815 | B: true))); // Merge & remove basic blocks. |
| 816 | FPM.addPass(Pass: InstCombinePass()); // Combine silly sequences. |
| 817 | invokePeepholeEPCallbacks(FPM, Level); |
| 818 | |
| 819 | CGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor( |
| 820 | Pass: std::move(FPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 821 | |
| 822 | MPM.addPass(Pass: std::move(MIWP)); |
| 823 | |
| 824 | // Delete anything that is now dead to make sure that we don't instrument |
| 825 | // dead code. Instrumentation can end up keeping dead code around and |
| 826 | // dramatically increase code size. |
| 827 | MPM.addPass(Pass: GlobalDCEPass()); |
| 828 | } |
| 829 | |
| 830 | void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM, |
| 831 | OptimizationLevel Level) { |
| 832 | if (EnablePostPGOLoopRotation) { |
| 833 | // Disable header duplication in loop rotation at -Oz. |
| 834 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
| 835 | Pass: createFunctionToLoopPassAdaptor( |
| 836 | Pass: LoopRotatePass(EnableLoopHeaderDuplication || |
| 837 | Level != OptimizationLevel::Oz), |
| 838 | /*UseMemorySSA=*/false, |
| 839 | /*UseBlockFrequencyInfo=*/false), |
| 840 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 841 | } |
| 842 | } |
| 843 | |
| 844 | void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, |
| 845 | OptimizationLevel Level, bool RunProfileGen, |
| 846 | bool IsCS, bool AtomicCounterUpdate, |
| 847 | std::string ProfileFile, |
| 848 | std::string ProfileRemappingFile, |
| 849 | IntrusiveRefCntPtr<vfs::FileSystem> FS) { |
| 850 | assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!" ); |
| 851 | |
| 852 | if (!RunProfileGen) { |
| 853 | assert(!ProfileFile.empty() && "Profile use expecting a profile file!" ); |
| 854 | MPM.addPass( |
| 855 | Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS)); |
| 856 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
| 857 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
| 858 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
| 859 | return; |
| 860 | } |
| 861 | |
| 862 | // Perform PGO instrumentation. |
| 863 | MPM.addPass(Pass: PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO |
| 864 | : PGOInstrumentationType::FDO)); |
| 865 | |
| 866 | addPostPGOLoopRotation(MPM, Level); |
| 867 | // Add the profile lowering pass. |
| 868 | InstrProfOptions Options; |
| 869 | if (!ProfileFile.empty()) |
| 870 | Options.InstrProfileOutput = ProfileFile; |
| 871 | // Do counter promotion at Level greater than O0. |
| 872 | Options.DoCounterPromotion = true; |
| 873 | Options.UseBFIInPromotion = IsCS; |
| 874 | if (EnableSampledInstr) { |
| 875 | Options.Sampling = true; |
| 876 | // With sampling, there is little beneifit to enable counter promotion. |
| 877 | // But note that sampling does work with counter promotion. |
| 878 | Options.DoCounterPromotion = false; |
| 879 | } |
| 880 | Options.Atomic = AtomicCounterUpdate; |
| 881 | MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS)); |
| 882 | } |
| 883 | |
| 884 | void PassBuilder::addPGOInstrPassesForO0( |
| 885 | ModulePassManager &MPM, bool RunProfileGen, bool IsCS, |
| 886 | bool AtomicCounterUpdate, std::string ProfileFile, |
| 887 | std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) { |
| 888 | if (!RunProfileGen) { |
| 889 | assert(!ProfileFile.empty() && "Profile use expecting a profile file!" ); |
| 890 | MPM.addPass( |
| 891 | Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS)); |
| 892 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
| 893 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
| 894 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
| 895 | return; |
| 896 | } |
| 897 | |
| 898 | // Perform PGO instrumentation. |
| 899 | MPM.addPass(Pass: PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO |
| 900 | : PGOInstrumentationType::FDO)); |
| 901 | // Add the profile lowering pass. |
| 902 | InstrProfOptions Options; |
| 903 | if (!ProfileFile.empty()) |
| 904 | Options.InstrProfileOutput = ProfileFile; |
| 905 | // Do not do counter promotion at O0. |
| 906 | Options.DoCounterPromotion = false; |
| 907 | Options.UseBFIInPromotion = IsCS; |
| 908 | Options.Atomic = AtomicCounterUpdate; |
| 909 | MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS)); |
| 910 | } |
| 911 | |
| 912 | static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) { |
| 913 | return getInlineParams(OptLevel: Level.getSpeedupLevel(), SizeOptLevel: Level.getSizeLevel()); |
| 914 | } |
| 915 | |
| 916 | ModuleInlinerWrapperPass |
| 917 | PassBuilder::buildInlinerPipeline(OptimizationLevel Level, |
| 918 | ThinOrFullLTOPhase Phase) { |
| 919 | InlineParams IP; |
| 920 | if (PTO.InlinerThreshold == -1) |
| 921 | IP = getInlineParamsFromOptLevel(Level); |
| 922 | else |
| 923 | IP = getInlineParams(Threshold: PTO.InlinerThreshold); |
| 924 | // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO, |
| 925 | // set hot-caller threshold to 0 to disable hot |
| 926 | // callsite inline (as much as possible [1]) because it makes |
| 927 | // profile annotation in the backend inaccurate. |
| 928 | // |
| 929 | // [1] Note the cost of a function could be below zero due to erased |
| 930 | // prologue / epilogue. |
| 931 | if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) |
| 932 | IP.HotCallSiteThreshold = 0; |
| 933 | |
| 934 | if (PGOOpt) |
| 935 | IP.EnableDeferral = EnablePGOInlineDeferral; |
| 936 | |
| 937 | ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst, |
| 938 | InlineContext{.LTOPhase: Phase, .Pass: InlinePass::CGSCCInliner}, |
| 939 | UseInlineAdvisor, MaxDevirtIterations); |
| 940 | |
| 941 | // Require the GlobalsAA analysis for the module so we can query it within |
| 942 | // the CGSCC pipeline. |
| 943 | if (EnableGlobalAnalyses) { |
| 944 | MIWP.addModulePass(Pass: RequireAnalysisPass<GlobalsAA, Module>()); |
| 945 | // Invalidate AAManager so it can be recreated and pick up the newly |
| 946 | // available GlobalsAA. |
| 947 | MIWP.addModulePass( |
| 948 | Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>())); |
| 949 | } |
| 950 | |
| 951 | // Require the ProfileSummaryAnalysis for the module so we can query it within |
| 952 | // the inliner pass. |
| 953 | MIWP.addModulePass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
| 954 | |
| 955 | // Now begin the main postorder CGSCC pipeline. |
| 956 | // FIXME: The current CGSCC pipeline has its origins in the legacy pass |
| 957 | // manager and trying to emulate its precise behavior. Much of this doesn't |
| 958 | // make a lot of sense and we should revisit the core CGSCC structure. |
| 959 | CGSCCPassManager &MainCGPipeline = MIWP.getPM(); |
| 960 | |
| 961 | // Note: historically, the PruneEH pass was run first to deduce nounwind and |
| 962 | // generally clean up exception handling overhead. It isn't clear this is |
| 963 | // valuable as the inliner doesn't currently care whether it is inlining an |
| 964 | // invoke or a call. |
| 965 | |
| 966 | if (AttributorRun & AttributorRunOption::CGSCC) |
| 967 | MainCGPipeline.addPass(Pass: AttributorCGSCCPass()); |
| 968 | |
| 969 | // Deduce function attributes. We do another run of this after the function |
| 970 | // simplification pipeline, so this only needs to run when it could affect the |
| 971 | // function simplification pipeline, which is only the case with recursive |
| 972 | // functions. |
| 973 | MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true)); |
| 974 | |
| 975 | // When at O3 add argument promotion to the pass pipeline. |
| 976 | // FIXME: It isn't at all clear why this should be limited to O3. |
| 977 | if (Level == OptimizationLevel::O3) |
| 978 | MainCGPipeline.addPass(Pass: ArgumentPromotionPass()); |
| 979 | |
| 980 | // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if |
| 981 | // there are no OpenMP runtime calls present in the module. |
| 982 | if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3) |
| 983 | MainCGPipeline.addPass(Pass: OpenMPOptCGSCCPass(Phase)); |
| 984 | |
| 985 | invokeCGSCCOptimizerLateEPCallbacks(CGPM&: MainCGPipeline, Level); |
| 986 | |
| 987 | // Add the core function simplification pipeline nested inside the |
| 988 | // CGSCC walk. |
| 989 | MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor( |
| 990 | Pass: buildFunctionSimplificationPipeline(Level, Phase), |
| 991 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true)); |
| 992 | |
| 993 | // Finally, deduce any function attributes based on the fully simplified |
| 994 | // function. |
| 995 | MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass()); |
| 996 | |
| 997 | // Mark that the function is fully simplified and that it shouldn't be |
| 998 | // simplified again if we somehow revisit it due to CGSCC mutations unless |
| 999 | // it's been modified since. |
| 1000 | MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor( |
| 1001 | Pass: RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>())); |
| 1002 | |
| 1003 | if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) { |
| 1004 | MainCGPipeline.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0)); |
| 1005 | MainCGPipeline.addPass(Pass: CoroAnnotationElidePass()); |
| 1006 | } |
| 1007 | |
| 1008 | // Make sure we don't affect potential future NoRerun CGSCC adaptors. |
| 1009 | MIWP.addLateModulePass(Pass: createModuleToFunctionPassAdaptor( |
| 1010 | Pass: InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>())); |
| 1011 | |
| 1012 | return MIWP; |
| 1013 | } |
| 1014 | |
| 1015 | ModulePassManager |
| 1016 | PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level, |
| 1017 | ThinOrFullLTOPhase Phase) { |
| 1018 | ModulePassManager MPM; |
| 1019 | |
| 1020 | InlineParams IP = getInlineParamsFromOptLevel(Level); |
| 1021 | // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO, |
| 1022 | // set hot-caller threshold to 0 to disable hot |
| 1023 | // callsite inline (as much as possible [1]) because it makes |
| 1024 | // profile annotation in the backend inaccurate. |
| 1025 | // |
| 1026 | // [1] Note the cost of a function could be below zero due to erased |
| 1027 | // prologue / epilogue. |
| 1028 | if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) |
| 1029 | IP.HotCallSiteThreshold = 0; |
| 1030 | |
| 1031 | if (PGOOpt) |
| 1032 | IP.EnableDeferral = EnablePGOInlineDeferral; |
| 1033 | |
| 1034 | // The inline deferral logic is used to avoid losing some |
| 1035 | // inlining chance in future. It is helpful in SCC inliner, in which |
| 1036 | // inlining is processed in bottom-up order. |
| 1037 | // While in module inliner, the inlining order is a priority-based order |
| 1038 | // by default. The inline deferral is unnecessary there. So we disable the |
| 1039 | // inline deferral logic in module inliner. |
| 1040 | IP.EnableDeferral = false; |
| 1041 | |
| 1042 | MPM.addPass(Pass: ModuleInlinerPass(IP, UseInlineAdvisor, Phase)); |
| 1043 | if (!UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPostLink) { |
| 1044 | MPM.addPass(Pass: GlobalOptPass()); |
| 1045 | MPM.addPass(Pass: GlobalDCEPass()); |
| 1046 | MPM.addPass(Pass: PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false)); |
| 1047 | } |
| 1048 | |
| 1049 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
| 1050 | Pass: buildFunctionSimplificationPipeline(Level, Phase), |
| 1051 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 1052 | |
| 1053 | if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) { |
| 1054 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor( |
| 1055 | Pass: CoroSplitPass(Level != OptimizationLevel::O0))); |
| 1056 | MPM.addPass( |
| 1057 | Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: CoroAnnotationElidePass())); |
| 1058 | } |
| 1059 | |
| 1060 | return MPM; |
| 1061 | } |
| 1062 | |
| 1063 | ModulePassManager |
| 1064 | PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, |
| 1065 | ThinOrFullLTOPhase Phase) { |
| 1066 | assert(Level != OptimizationLevel::O0 && |
| 1067 | "Should not be used for O0 pipeline" ); |
| 1068 | |
| 1069 | assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink && |
| 1070 | "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!" ); |
| 1071 | |
| 1072 | ModulePassManager MPM; |
| 1073 | |
| 1074 | // Place pseudo probe instrumentation as the first pass of the pipeline to |
| 1075 | // minimize the impact of optimization changes. |
| 1076 | if (PGOOpt && PGOOpt->PseudoProbeForProfiling && |
| 1077 | Phase != ThinOrFullLTOPhase::ThinLTOPostLink) |
| 1078 | MPM.addPass(Pass: SampleProfileProbePass(TM)); |
| 1079 | |
| 1080 | bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse); |
| 1081 | |
| 1082 | // In ThinLTO mode, when flattened profile is used, all the available |
| 1083 | // profile information will be annotated in PreLink phase so there is |
| 1084 | // no need to load the profile again in PostLink. |
| 1085 | bool LoadSampleProfile = |
| 1086 | HasSampleProfile && |
| 1087 | !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink); |
| 1088 | |
| 1089 | // During the ThinLTO backend phase we perform early indirect call promotion |
| 1090 | // here, before globalopt. Otherwise imported available_externally functions |
| 1091 | // look unreferenced and are removed. If we are going to load the sample |
| 1092 | // profile then defer until later. |
| 1093 | // TODO: See if we can move later and consolidate with the location where |
| 1094 | // we perform ICP when we are loading a sample profile. |
| 1095 | // TODO: We pass HasSampleProfile (whether there was a sample profile file |
| 1096 | // passed to the compile) to the SamplePGO flag of ICP. This is used to |
| 1097 | // determine whether the new direct calls are annotated with prof metadata. |
| 1098 | // Ideally this should be determined from whether the IR is annotated with |
| 1099 | // sample profile, and not whether the a sample profile was provided on the |
| 1100 | // command line. E.g. for flattened profiles where we will not be reloading |
| 1101 | // the sample profile in the ThinLTO backend, we ideally shouldn't have to |
| 1102 | // provide the sample profile file. |
| 1103 | if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile) |
| 1104 | MPM.addPass(Pass: PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile)); |
| 1105 | |
| 1106 | // Create an early function pass manager to cleanup the output of the |
| 1107 | // frontend. Not necessary with LTO post link pipelines since the pre link |
| 1108 | // pipeline already cleaned up the frontend output. |
| 1109 | if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) { |
| 1110 | // Do basic inference of function attributes from known properties of system |
| 1111 | // libraries and other oracles. |
| 1112 | MPM.addPass(Pass: InferFunctionAttrsPass()); |
| 1113 | MPM.addPass(Pass: CoroEarlyPass()); |
| 1114 | |
| 1115 | FunctionPassManager EarlyFPM; |
| 1116 | EarlyFPM.addPass(Pass: EntryExitInstrumenterPass(/*PostInlining=*/false)); |
| 1117 | // Lower llvm.expect to metadata before attempting transforms. |
| 1118 | // Compare/branch metadata may alter the behavior of passes like |
| 1119 | // SimplifyCFG. |
| 1120 | EarlyFPM.addPass(Pass: LowerExpectIntrinsicPass()); |
| 1121 | EarlyFPM.addPass(Pass: SimplifyCFGPass()); |
| 1122 | EarlyFPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
| 1123 | EarlyFPM.addPass(Pass: EarlyCSEPass()); |
| 1124 | if (Level == OptimizationLevel::O3) |
| 1125 | EarlyFPM.addPass(Pass: CallSiteSplittingPass()); |
| 1126 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
| 1127 | Pass: std::move(EarlyFPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 1128 | } |
| 1129 | |
| 1130 | if (LoadSampleProfile) { |
| 1131 | // Annotate sample profile right after early FPM to ensure freshness of |
| 1132 | // the debug info. |
| 1133 | MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile, |
| 1134 | PGOOpt->ProfileRemappingFile, Phase)); |
| 1135 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
| 1136 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
| 1137 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
| 1138 | // Do not invoke ICP in the LTOPrelink phase as it makes it hard |
| 1139 | // for the profile annotation to be accurate in the LTO backend. |
| 1140 | if (!isLTOPreLink(Phase)) |
| 1141 | // We perform early indirect call promotion here, before globalopt. |
| 1142 | // This is important for the ThinLTO backend phase because otherwise |
| 1143 | // imported available_externally functions look unreferenced and are |
| 1144 | // removed. |
| 1145 | MPM.addPass( |
| 1146 | Pass: PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */)); |
| 1147 | } |
| 1148 | |
| 1149 | // Try to perform OpenMP specific optimizations on the module. This is a |
| 1150 | // (quick!) no-op if there are no OpenMP runtime calls present in the module. |
| 1151 | MPM.addPass(Pass: OpenMPOptPass(Phase)); |
| 1152 | |
| 1153 | if (AttributorRun & AttributorRunOption::MODULE) |
| 1154 | MPM.addPass(Pass: AttributorPass()); |
| 1155 | |
| 1156 | // Lower type metadata and the type.test intrinsic in the ThinLTO |
| 1157 | // post link pipeline after ICP. This is to enable usage of the type |
| 1158 | // tests in ICP sequences. |
| 1159 | if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink) |
| 1160 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, |
| 1161 | lowertypetests::DropTestKind::Assume)); |
| 1162 | |
| 1163 | invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase); |
| 1164 | |
| 1165 | // Interprocedural constant propagation now that basic cleanup has occurred |
| 1166 | // and prior to optimizing globals. |
| 1167 | // FIXME: This position in the pipeline hasn't been carefully considered in |
| 1168 | // years, it should be re-analyzed. |
| 1169 | MPM.addPass(Pass: IPSCCPPass( |
| 1170 | IPSCCPOptions(/*AllowFuncSpec=*/ |
| 1171 | Level != OptimizationLevel::Os && |
| 1172 | Level != OptimizationLevel::Oz && |
| 1173 | !isLTOPreLink(Phase)))); |
| 1174 | |
| 1175 | // Attach metadata to indirect call sites indicating the set of functions |
| 1176 | // they may target at run-time. This should follow IPSCCP. |
| 1177 | MPM.addPass(Pass: CalledValuePropagationPass()); |
| 1178 | |
| 1179 | // Optimize globals to try and fold them into constants. |
| 1180 | MPM.addPass(Pass: GlobalOptPass()); |
| 1181 | |
| 1182 | // Create a small function pass pipeline to cleanup after all the global |
| 1183 | // optimizations. |
| 1184 | FunctionPassManager GlobalCleanupPM; |
| 1185 | // FIXME: Should this instead by a run of SROA? |
| 1186 | GlobalCleanupPM.addPass(Pass: PromotePass()); |
| 1187 | GlobalCleanupPM.addPass(Pass: InstCombinePass()); |
| 1188 | invokePeepholeEPCallbacks(FPM&: GlobalCleanupPM, Level); |
| 1189 | GlobalCleanupPM.addPass( |
| 1190 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 1191 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(GlobalCleanupPM), |
| 1192 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 1193 | |
| 1194 | // We already asserted this happens in non-FullLTOPostLink earlier. |
| 1195 | const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink; |
| 1196 | // Enable contextual profiling instrumentation. |
| 1197 | const bool IsCtxProfGen = |
| 1198 | IsPreLink && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled(); |
| 1199 | const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink; |
| 1200 | const bool IsPGOInstrGen = |
| 1201 | IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr; |
| 1202 | const bool IsPGOInstrUse = |
| 1203 | IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse; |
| 1204 | const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty(); |
| 1205 | // We don't want to mix pgo ctx gen and pgo gen; we also don't currently |
| 1206 | // enable ctx profiling from the frontend. |
| 1207 | assert(!(IsPGOInstrGen && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled()) && |
| 1208 | "Enabling both instrumented PGO and contextual instrumentation is not " |
| 1209 | "supported." ); |
| 1210 | const bool IsCtxProfUse = |
| 1211 | !UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPreLink; |
| 1212 | |
| 1213 | assert( |
| 1214 | (InstrumentColdFuncOnlyPath.empty() || PGOInstrumentColdFunctionOnly) && |
| 1215 | "--instrument-cold-function-only-path is provided but " |
| 1216 | "--pgo-instrument-cold-function-only is not enabled" ); |
| 1217 | const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly && |
| 1218 | IsPGOPreLink && |
| 1219 | !InstrumentColdFuncOnlyPath.empty(); |
| 1220 | |
| 1221 | if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen || |
| 1222 | IsCtxProfUse || IsColdFuncOnlyInstrGen) |
| 1223 | addPreInlinerPasses(MPM, Level, LTOPhase: Phase); |
| 1224 | |
| 1225 | // Add all the requested passes for instrumentation PGO, if requested. |
| 1226 | if (IsPGOInstrGen || IsPGOInstrUse) { |
| 1227 | addPGOInstrPasses(MPM, Level, |
| 1228 | /*RunProfileGen=*/IsPGOInstrGen, |
| 1229 | /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
| 1230 | ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile, |
| 1231 | FS: PGOOpt->FS); |
| 1232 | } else if (IsCtxProfGen || IsCtxProfUse) { |
| 1233 | MPM.addPass(Pass: PGOInstrumentationGen(PGOInstrumentationType::CTXPROF)); |
| 1234 | // In pre-link, we just want the instrumented IR. We use the contextual |
| 1235 | // profile in the post-thinlink phase. |
| 1236 | // The instrumentation will be removed in post-thinlink after IPO. |
| 1237 | // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this |
| 1238 | // mechanism for GUIDs. |
| 1239 | MPM.addPass(Pass: AssignGUIDPass()); |
| 1240 | if (IsCtxProfUse) { |
| 1241 | MPM.addPass(Pass: PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true)); |
| 1242 | return MPM; |
| 1243 | } |
| 1244 | // Block further inlining in the instrumented ctxprof case. This avoids |
| 1245 | // confusingly collecting profiles for the same GUID corresponding to |
| 1246 | // different variants of the function. We could do like PGO and identify |
| 1247 | // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for |
| 1248 | // thinlto to happen before performing any further optimizations, it's |
| 1249 | // unnecessary to collect profiles for non-prevailing copies. |
| 1250 | MPM.addPass(Pass: NoinlineNonPrevailing()); |
| 1251 | addPostPGOLoopRotation(MPM, Level); |
| 1252 | MPM.addPass(Pass: PGOCtxProfLoweringPass()); |
| 1253 | } else if (IsColdFuncOnlyInstrGen) { |
| 1254 | addPGOInstrPasses( |
| 1255 | MPM, Level, /* RunProfileGen */ true, /* IsCS */ false, |
| 1256 | /* AtomicCounterUpdate */ false, ProfileFile: InstrumentColdFuncOnlyPath, |
| 1257 | /* ProfileRemappingFile */ "" , FS: IntrusiveRefCntPtr<vfs::FileSystem>()); |
| 1258 | } |
| 1259 | |
| 1260 | if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen) |
| 1261 | MPM.addPass(Pass: PGOIndirectCallPromotion(false, false)); |
| 1262 | |
| 1263 | if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr) |
| 1264 | MPM.addPass(Pass: PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile, |
| 1265 | EnableSampledInstr)); |
| 1266 | |
| 1267 | if (IsMemprofUse) |
| 1268 | MPM.addPass(Pass: MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS)); |
| 1269 | |
| 1270 | if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse || |
| 1271 | PGOOpt->Action == PGOOptions::SampleUse)) |
| 1272 | MPM.addPass(Pass: PGOForceFunctionAttrsPass(PGOOpt->ColdOptType)); |
| 1273 | |
| 1274 | MPM.addPass(Pass: AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true)); |
| 1275 | |
| 1276 | if (EnableModuleInliner) |
| 1277 | MPM.addPass(Pass: buildModuleInlinerPipeline(Level, Phase)); |
| 1278 | else |
| 1279 | MPM.addPass(Pass: buildInlinerPipeline(Level, Phase)); |
| 1280 | |
| 1281 | // Remove any dead arguments exposed by cleanups, constant folding globals, |
| 1282 | // and argument promotion. |
| 1283 | MPM.addPass(Pass: DeadArgumentEliminationPass()); |
| 1284 | |
| 1285 | if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink) |
| 1286 | MPM.addPass(Pass: SimplifyTypeTestsPass()); |
| 1287 | |
| 1288 | if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) |
| 1289 | MPM.addPass(Pass: CoroCleanupPass()); |
| 1290 | |
| 1291 | // Optimize globals now that functions are fully simplified. |
| 1292 | MPM.addPass(Pass: GlobalOptPass()); |
| 1293 | MPM.addPass(Pass: GlobalDCEPass()); |
| 1294 | |
| 1295 | return MPM; |
| 1296 | } |
| 1297 | |
| 1298 | /// TODO: Should LTO cause any differences to this set of passes? |
| 1299 | void PassBuilder::addVectorPasses(OptimizationLevel Level, |
| 1300 | FunctionPassManager &FPM, bool IsFullLTO) { |
| 1301 | FPM.addPass(Pass: LoopVectorizePass( |
| 1302 | LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); |
| 1303 | |
| 1304 | FPM.addPass(Pass: InferAlignmentPass()); |
| 1305 | if (IsFullLTO) { |
| 1306 | // The vectorizer may have significantly shortened a loop body; unroll |
| 1307 | // again. Unroll small loops to hide loop backedge latency and saturate any |
| 1308 | // parallel execution resources of an out-of-order processor. We also then |
| 1309 | // need to clean up redundancies and loop invariant code. |
| 1310 | // FIXME: It would be really good to use a loop-integrated instruction |
| 1311 | // combiner for cleanup here so that the unrolling and LICM can be pipelined |
| 1312 | // across the loop nests. |
| 1313 | // We do UnrollAndJam in a separate LPM to ensure it happens before unroll |
| 1314 | if (EnableUnrollAndJam && PTO.LoopUnrolling) |
| 1315 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
| 1316 | Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel()))); |
| 1317 | FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions( |
| 1318 | Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, |
| 1319 | PTO.ForgetAllSCEVInLoopUnroll))); |
| 1320 | FPM.addPass(Pass: WarnMissedTransformationsPass()); |
| 1321 | // Now that we are done with loop unrolling, be it either by LoopVectorizer, |
| 1322 | // or LoopUnroll passes, some variable-offset GEP's into alloca's could have |
| 1323 | // become constant-offset, thus enabling SROA and alloca promotion. Do so. |
| 1324 | // NOTE: we are very late in the pipeline, and we don't have any LICM |
| 1325 | // or SimplifyCFG passes scheduled after us, that would cleanup |
| 1326 | // the CFG mess this may created if allowed to modify CFG, so forbid that. |
| 1327 | FPM.addPass(Pass: SROAPass(SROAOptions::PreserveCFG)); |
| 1328 | } |
| 1329 | |
| 1330 | if (!IsFullLTO) { |
| 1331 | // Eliminate loads by forwarding stores from the previous iteration to loads |
| 1332 | // of the current iteration. |
| 1333 | FPM.addPass(Pass: LoopLoadEliminationPass()); |
| 1334 | } |
| 1335 | // Cleanup after the loop optimization passes. |
| 1336 | FPM.addPass(Pass: InstCombinePass()); |
| 1337 | |
| 1338 | if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { |
| 1339 | ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ; |
| 1340 | // At higher optimization levels, try to clean up any runtime overlap and |
| 1341 | // alignment checks inserted by the vectorizer. We want to track correlated |
| 1342 | // runtime checks for two inner loops in the same outer loop, fold any |
| 1343 | // common computations, hoist loop-invariant aspects out of any outer loop, |
| 1344 | // and unswitch the runtime checks if possible. Once hoisted, we may have |
| 1345 | // dead (or speculatable) control flows or more combining opportunities. |
| 1346 | ExtraPasses.addPass(Pass: EarlyCSEPass()); |
| 1347 | ExtraPasses.addPass(Pass: CorrelatedValuePropagationPass()); |
| 1348 | ExtraPasses.addPass(Pass: InstCombinePass()); |
| 1349 | LoopPassManager LPM; |
| 1350 | LPM.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 1351 | /*AllowSpeculation=*/true)); |
| 1352 | LPM.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level == |
| 1353 | OptimizationLevel::O3)); |
| 1354 | ExtraPasses.addPass( |
| 1355 | Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /*UseMemorySSA=*/true, |
| 1356 | /*UseBlockFrequencyInfo=*/true)); |
| 1357 | ExtraPasses.addPass( |
| 1358 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 1359 | ExtraPasses.addPass(Pass: InstCombinePass()); |
| 1360 | FPM.addPass(Pass: std::move(ExtraPasses)); |
| 1361 | } |
| 1362 | |
| 1363 | // Now that we've formed fast to execute loop structures, we do further |
| 1364 | // optimizations. These are run afterward as they might block doing complex |
| 1365 | // analyses and transforms such as what are needed for loop vectorization. |
| 1366 | |
| 1367 | // Cleanup after loop vectorization, etc. Simplification passes like CVP and |
| 1368 | // GVN, loop transforms, and others have already run, so it's now better to |
| 1369 | // convert to more optimized IR using more aggressive simplify CFG options. |
| 1370 | // The extra sinking transform can create larger basic blocks, so do this |
| 1371 | // before SLP vectorization. |
| 1372 | FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions() |
| 1373 | .forwardSwitchCondToPhi(B: true) |
| 1374 | .convertSwitchRangeToICmp(B: true) |
| 1375 | .convertSwitchToLookupTable(B: true) |
| 1376 | .needCanonicalLoops(B: false) |
| 1377 | .hoistCommonInsts(B: true) |
| 1378 | .sinkCommonInsts(B: true))); |
| 1379 | |
| 1380 | if (IsFullLTO) { |
| 1381 | FPM.addPass(Pass: SCCPPass()); |
| 1382 | FPM.addPass(Pass: InstCombinePass()); |
| 1383 | FPM.addPass(Pass: BDCEPass()); |
| 1384 | } |
| 1385 | |
| 1386 | // Optimize parallel scalar instruction chains into SIMD instructions. |
| 1387 | if (PTO.SLPVectorization) { |
| 1388 | FPM.addPass(Pass: SLPVectorizerPass()); |
| 1389 | if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { |
| 1390 | FPM.addPass(Pass: EarlyCSEPass()); |
| 1391 | } |
| 1392 | } |
| 1393 | // Enhance/cleanup vector code. |
| 1394 | FPM.addPass(Pass: VectorCombinePass()); |
| 1395 | |
| 1396 | if (!IsFullLTO) { |
| 1397 | FPM.addPass(Pass: InstCombinePass()); |
| 1398 | // Unroll small loops to hide loop backedge latency and saturate any |
| 1399 | // parallel execution resources of an out-of-order processor. We also then |
| 1400 | // need to clean up redundancies and loop invariant code. |
| 1401 | // FIXME: It would be really good to use a loop-integrated instruction |
| 1402 | // combiner for cleanup here so that the unrolling and LICM can be pipelined |
| 1403 | // across the loop nests. |
| 1404 | // We do UnrollAndJam in a separate LPM to ensure it happens before unroll |
| 1405 | if (EnableUnrollAndJam && PTO.LoopUnrolling) { |
| 1406 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
| 1407 | Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel()))); |
| 1408 | } |
| 1409 | FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions( |
| 1410 | Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, |
| 1411 | PTO.ForgetAllSCEVInLoopUnroll))); |
| 1412 | FPM.addPass(Pass: WarnMissedTransformationsPass()); |
| 1413 | // Now that we are done with loop unrolling, be it either by LoopVectorizer, |
| 1414 | // or LoopUnroll passes, some variable-offset GEP's into alloca's could have |
| 1415 | // become constant-offset, thus enabling SROA and alloca promotion. Do so. |
| 1416 | // NOTE: we are very late in the pipeline, and we don't have any LICM |
| 1417 | // or SimplifyCFG passes scheduled after us, that would cleanup |
| 1418 | // the CFG mess this may created if allowed to modify CFG, so forbid that. |
| 1419 | FPM.addPass(Pass: SROAPass(SROAOptions::PreserveCFG)); |
| 1420 | } |
| 1421 | |
| 1422 | FPM.addPass(Pass: InferAlignmentPass()); |
| 1423 | FPM.addPass(Pass: InstCombinePass()); |
| 1424 | |
| 1425 | // This is needed for two reasons: |
| 1426 | // 1. It works around problems that instcombine introduces, such as sinking |
| 1427 | // expensive FP divides into loops containing multiplications using the |
| 1428 | // divide result. |
| 1429 | // 2. It helps to clean up some loop-invariant code created by the loop |
| 1430 | // unroll pass when IsFullLTO=false. |
| 1431 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
| 1432 | Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 1433 | /*AllowSpeculation=*/true), |
| 1434 | /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); |
| 1435 | |
| 1436 | // Now that we've vectorized and unrolled loops, we may have more refined |
| 1437 | // alignment information, try to re-derive it here. |
| 1438 | FPM.addPass(Pass: AlignmentFromAssumptionsPass()); |
| 1439 | } |
| 1440 | |
| 1441 | ModulePassManager |
| 1442 | PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, |
| 1443 | ThinOrFullLTOPhase LTOPhase) { |
| 1444 | const bool LTOPreLink = isLTOPreLink(Phase: LTOPhase); |
| 1445 | ModulePassManager MPM; |
| 1446 | |
| 1447 | // Run partial inlining pass to partially inline functions that have |
| 1448 | // large bodies. |
| 1449 | if (RunPartialInlining) |
| 1450 | MPM.addPass(Pass: PartialInlinerPass()); |
| 1451 | |
| 1452 | // Remove avail extern fns and globals definitions since we aren't compiling |
| 1453 | // an object file for later LTO. For LTO we want to preserve these so they |
| 1454 | // are eligible for inlining at link-time. Note if they are unreferenced they |
| 1455 | // will be removed by GlobalDCE later, so this only impacts referenced |
| 1456 | // available externally globals. Eventually they will be suppressed during |
| 1457 | // codegen, but eliminating here enables more opportunity for GlobalDCE as it |
| 1458 | // may make globals referenced by available external functions dead and saves |
| 1459 | // running remaining passes on the eliminated functions. These should be |
| 1460 | // preserved during prelinking for link-time inlining decisions. |
| 1461 | if (!LTOPreLink) |
| 1462 | MPM.addPass(Pass: EliminateAvailableExternallyPass()); |
| 1463 | |
| 1464 | // Do RPO function attribute inference across the module to forward-propagate |
| 1465 | // attributes where applicable. |
| 1466 | // FIXME: Is this really an optimization rather than a canonicalization? |
| 1467 | MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass()); |
| 1468 | |
| 1469 | // Do a post inline PGO instrumentation and use pass. This is a context |
| 1470 | // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as |
| 1471 | // cross-module inline has not been done yet. The context sensitive |
| 1472 | // instrumentation is after all the inlines are done. |
| 1473 | if (!LTOPreLink && PGOOpt) { |
| 1474 | if (PGOOpt->CSAction == PGOOptions::CSIRInstr) |
| 1475 | addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true, |
| 1476 | /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
| 1477 | ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile, |
| 1478 | FS: PGOOpt->FS); |
| 1479 | else if (PGOOpt->CSAction == PGOOptions::CSIRUse) |
| 1480 | addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false, |
| 1481 | /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
| 1482 | ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile, |
| 1483 | FS: PGOOpt->FS); |
| 1484 | } |
| 1485 | |
| 1486 | // Re-compute GlobalsAA here prior to function passes. This is particularly |
| 1487 | // useful as the above will have inlined, DCE'ed, and function-attr |
| 1488 | // propagated everything. We should at this point have a reasonably minimal |
| 1489 | // and richly annotated call graph. By computing aliasing and mod/ref |
| 1490 | // information for all local globals here, the late loop passes and notably |
| 1491 | // the vectorizer will be able to use them to help recognize vectorizable |
| 1492 | // memory operations. |
| 1493 | if (EnableGlobalAnalyses) |
| 1494 | MPM.addPass(Pass: RecomputeGlobalsAAPass()); |
| 1495 | |
| 1496 | invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase: LTOPhase); |
| 1497 | |
| 1498 | FunctionPassManager OptimizePM; |
| 1499 | // Scheduling LoopVersioningLICM when inlining is over, because after that |
| 1500 | // we may see more accurate aliasing. Reason to run this late is that too |
| 1501 | // early versioning may prevent further inlining due to increase of code |
| 1502 | // size. Other optimizations which runs later might get benefit of no-alias |
| 1503 | // assumption in clone loop. |
| 1504 | if (UseLoopVersioningLICM) { |
| 1505 | OptimizePM.addPass( |
| 1506 | Pass: createFunctionToLoopPassAdaptor(Pass: LoopVersioningLICMPass())); |
| 1507 | // LoopVersioningLICM pass might increase new LICM opportunities. |
| 1508 | OptimizePM.addPass(Pass: createFunctionToLoopPassAdaptor( |
| 1509 | Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 1510 | /*AllowSpeculation=*/true), |
| 1511 | /*USeMemorySSA=*/UseMemorySSA: true, /*UseBlockFrequencyInfo=*/false)); |
| 1512 | } |
| 1513 | |
| 1514 | OptimizePM.addPass(Pass: Float2IntPass()); |
| 1515 | OptimizePM.addPass(Pass: LowerConstantIntrinsicsPass()); |
| 1516 | |
| 1517 | if (EnableMatrix) { |
| 1518 | OptimizePM.addPass(Pass: LowerMatrixIntrinsicsPass()); |
| 1519 | OptimizePM.addPass(Pass: EarlyCSEPass()); |
| 1520 | } |
| 1521 | |
| 1522 | // CHR pass should only be applied with the profile information. |
| 1523 | // The check is to check the profile summary information in CHR. |
| 1524 | if (EnableCHR && Level == OptimizationLevel::O3) |
| 1525 | OptimizePM.addPass(Pass: ControlHeightReductionPass()); |
| 1526 | |
| 1527 | // FIXME: We need to run some loop optimizations to re-rotate loops after |
| 1528 | // simplifycfg and others undo their rotation. |
| 1529 | |
| 1530 | // Optimize the loop execution. These passes operate on entire loop nests |
| 1531 | // rather than on each loop in an inside-out manner, and so they are actually |
| 1532 | // function passes. |
| 1533 | |
| 1534 | invokeVectorizerStartEPCallbacks(FPM&: OptimizePM, Level); |
| 1535 | |
| 1536 | LoopPassManager LPM; |
| 1537 | // First rotate loops that may have been un-rotated by prior passes. |
| 1538 | // Disable header duplication at -Oz. |
| 1539 | LPM.addPass(Pass: LoopRotatePass(EnableLoopHeaderDuplication || |
| 1540 | Level != OptimizationLevel::Oz, |
| 1541 | LTOPreLink)); |
| 1542 | // Some loops may have become dead by now. Try to delete them. |
| 1543 | // FIXME: see discussion in https://reviews.llvm.org/D112851, |
| 1544 | // this may need to be revisited once we run GVN before loop deletion |
| 1545 | // in the simplification pipeline. |
| 1546 | LPM.addPass(Pass: LoopDeletionPass()); |
| 1547 | |
| 1548 | if (PTO.LoopInterchange) |
| 1549 | LPM.addPass(Pass: LoopInterchangePass()); |
| 1550 | |
| 1551 | OptimizePM.addPass(Pass: createFunctionToLoopPassAdaptor( |
| 1552 | Pass: std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); |
| 1553 | |
| 1554 | // Distribute loops to allow partial vectorization. I.e. isolate dependences |
| 1555 | // into separate loop that would otherwise inhibit vectorization. This is |
| 1556 | // currently only performed for loops marked with the metadata |
| 1557 | // llvm.loop.distribute=true or when -enable-loop-distribute is specified. |
| 1558 | OptimizePM.addPass(Pass: LoopDistributePass()); |
| 1559 | |
| 1560 | // Populates the VFABI attribute with the scalar-to-vector mappings |
| 1561 | // from the TargetLibraryInfo. |
| 1562 | OptimizePM.addPass(Pass: InjectTLIMappings()); |
| 1563 | |
| 1564 | addVectorPasses(Level, FPM&: OptimizePM, /* IsFullLTO */ false); |
| 1565 | |
| 1566 | invokeVectorizerEndEPCallbacks(FPM&: OptimizePM, Level); |
| 1567 | |
| 1568 | // LoopSink pass sinks instructions hoisted by LICM, which serves as a |
| 1569 | // canonicalization pass that enables other optimizations. As a result, |
| 1570 | // LoopSink pass needs to be a very late IR pass to avoid undoing LICM |
| 1571 | // result too early. |
| 1572 | OptimizePM.addPass(Pass: LoopSinkPass()); |
| 1573 | |
| 1574 | // And finally clean up LCSSA form before generating code. |
| 1575 | OptimizePM.addPass(Pass: InstSimplifyPass()); |
| 1576 | |
| 1577 | // This hoists/decomposes div/rem ops. It should run after other sink/hoist |
| 1578 | // passes to avoid re-sinking, but before SimplifyCFG because it can allow |
| 1579 | // flattening of blocks. |
| 1580 | OptimizePM.addPass(Pass: DivRemPairsPass()); |
| 1581 | |
| 1582 | // Try to annotate calls that were created during optimization. |
| 1583 | OptimizePM.addPass( |
| 1584 | Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse())); |
| 1585 | |
| 1586 | // LoopSink (and other loop passes since the last simplifyCFG) might have |
| 1587 | // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. |
| 1588 | OptimizePM.addPass( |
| 1589 | Pass: SimplifyCFGPass(SimplifyCFGOptions() |
| 1590 | .convertSwitchRangeToICmp(B: true) |
| 1591 | .speculateUnpredictables(B: true) |
| 1592 | .hoistLoadsStoresWithCondFaulting(B: true))); |
| 1593 | |
| 1594 | // Add the core optimizing pipeline. |
| 1595 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(OptimizePM), |
| 1596 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 1597 | |
| 1598 | invokeOptimizerLastEPCallbacks(MPM, Level, Phase: LTOPhase); |
| 1599 | |
| 1600 | // Split out cold code. Splitting is done late to avoid hiding context from |
| 1601 | // other optimizations and inadvertently regressing performance. The tradeoff |
| 1602 | // is that this has a higher code size cost than splitting early. |
| 1603 | if (EnableHotColdSplit && !LTOPreLink) |
| 1604 | MPM.addPass(Pass: HotColdSplittingPass()); |
| 1605 | |
| 1606 | // Search the code for similar regions of code. If enough similar regions can |
| 1607 | // be found where extracting the regions into their own function will decrease |
| 1608 | // the size of the program, we extract the regions, a deduplicate the |
| 1609 | // structurally similar regions. |
| 1610 | if (EnableIROutliner) |
| 1611 | MPM.addPass(Pass: IROutlinerPass()); |
| 1612 | |
| 1613 | // Now we need to do some global optimization transforms. |
| 1614 | // FIXME: It would seem like these should come first in the optimization |
| 1615 | // pipeline and maybe be the bottom of the canonicalization pipeline? Weird |
| 1616 | // ordering here. |
| 1617 | MPM.addPass(Pass: GlobalDCEPass()); |
| 1618 | MPM.addPass(Pass: ConstantMergePass()); |
| 1619 | |
| 1620 | // Merge functions if requested. It has a better chance to merge functions |
| 1621 | // after ConstantMerge folded jump tables. |
| 1622 | if (PTO.MergeFunctions) |
| 1623 | MPM.addPass(Pass: MergeFunctionsPass()); |
| 1624 | |
| 1625 | if (PTO.CallGraphProfile && !LTOPreLink) |
| 1626 | MPM.addPass(Pass: CGProfilePass(isLTOPostLink(Phase: LTOPhase))); |
| 1627 | |
| 1628 | // RelLookupTableConverterPass runs later in LTO post-link pipeline. |
| 1629 | if (!LTOPreLink) |
| 1630 | MPM.addPass(Pass: RelLookupTableConverterPass()); |
| 1631 | |
| 1632 | return MPM; |
| 1633 | } |
| 1634 | |
| 1635 | ModulePassManager |
| 1636 | PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, |
| 1637 | ThinOrFullLTOPhase Phase) { |
| 1638 | if (Level == OptimizationLevel::O0) |
| 1639 | return buildO0DefaultPipeline(Level, Phase); |
| 1640 | |
| 1641 | ModulePassManager MPM; |
| 1642 | |
| 1643 | // Convert @llvm.global.annotations to !annotation metadata. |
| 1644 | MPM.addPass(Pass: Annotation2MetadataPass()); |
| 1645 | |
| 1646 | // Force any function attributes we want the rest of the pipeline to observe. |
| 1647 | MPM.addPass(Pass: ForceFunctionAttrsPass()); |
| 1648 | |
| 1649 | if (PGOOpt && PGOOpt->DebugInfoForProfiling) |
| 1650 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass())); |
| 1651 | |
| 1652 | // Apply module pipeline start EP callback. |
| 1653 | invokePipelineStartEPCallbacks(MPM, Level); |
| 1654 | |
| 1655 | // Add the core simplification pipeline. |
| 1656 | MPM.addPass(Pass: buildModuleSimplificationPipeline(Level, Phase)); |
| 1657 | |
| 1658 | // Now add the optimization pipeline. |
| 1659 | MPM.addPass(Pass: buildModuleOptimizationPipeline(Level, LTOPhase: Phase)); |
| 1660 | |
| 1661 | if (PGOOpt && PGOOpt->PseudoProbeForProfiling && |
| 1662 | PGOOpt->Action == PGOOptions::SampleUse) |
| 1663 | MPM.addPass(Pass: PseudoProbeUpdatePass()); |
| 1664 | |
| 1665 | // Emit annotation remarks. |
| 1666 | addAnnotationRemarksPass(MPM); |
| 1667 | |
| 1668 | if (isLTOPreLink(Phase)) |
| 1669 | addRequiredLTOPreLinkPasses(MPM); |
| 1670 | return MPM; |
| 1671 | } |
| 1672 | |
| 1673 | ModulePassManager |
| 1674 | PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, |
| 1675 | bool EmitSummary) { |
| 1676 | ModulePassManager MPM; |
| 1677 | if (ThinLTO) |
| 1678 | MPM.addPass(Pass: buildThinLTOPreLinkDefaultPipeline(Level)); |
| 1679 | else |
| 1680 | MPM.addPass(Pass: buildLTOPreLinkDefaultPipeline(Level)); |
| 1681 | MPM.addPass(Pass: EmbedBitcodePass(ThinLTO, EmitSummary)); |
| 1682 | |
| 1683 | // Perform any cleanups to the IR that aren't suitable for per TU compilation, |
| 1684 | // like removing CFI/WPD related instructions. Note, we reuse |
| 1685 | // LowerTypeTestsPass to clean up type tests rather than duplicate that logic |
| 1686 | // in FatLtoCleanup. |
| 1687 | MPM.addPass(Pass: FatLtoCleanup()); |
| 1688 | |
| 1689 | // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the |
| 1690 | // object code, only in the bitcode section, so drop it before we run |
| 1691 | // module optimization and generate machine code. If llvm.type.test() isn't in |
| 1692 | // the IR, this won't do anything. |
| 1693 | MPM.addPass( |
| 1694 | Pass: LowerTypeTestsPass(nullptr, nullptr, lowertypetests::DropTestKind::All)); |
| 1695 | |
| 1696 | // Use the ThinLTO post-link pipeline with sample profiling |
| 1697 | if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) |
| 1698 | MPM.addPass(Pass: buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr)); |
| 1699 | else { |
| 1700 | // ModuleSimplification does not run the coroutine passes for |
| 1701 | // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO |
| 1702 | // builds, otherwise they will miscompile. |
| 1703 | if (ThinLTO) { |
| 1704 | // TODO: replace w/ buildCoroWrapper() when it takes phase and level into |
| 1705 | // consideration. |
| 1706 | CGSCCPassManager CGPM; |
| 1707 | CGPM.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0)); |
| 1708 | CGPM.addPass(Pass: CoroAnnotationElidePass()); |
| 1709 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM))); |
| 1710 | MPM.addPass(Pass: CoroCleanupPass()); |
| 1711 | } |
| 1712 | |
| 1713 | // otherwise, just use module optimization |
| 1714 | MPM.addPass( |
| 1715 | Pass: buildModuleOptimizationPipeline(Level, LTOPhase: ThinOrFullLTOPhase::None)); |
| 1716 | // Emit annotation remarks. |
| 1717 | addAnnotationRemarksPass(MPM); |
| 1718 | } |
| 1719 | return MPM; |
| 1720 | } |
| 1721 | |
| 1722 | ModulePassManager |
| 1723 | PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { |
| 1724 | if (Level == OptimizationLevel::O0) |
| 1725 | return buildO0DefaultPipeline(Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink); |
| 1726 | |
| 1727 | ModulePassManager MPM; |
| 1728 | |
| 1729 | // Convert @llvm.global.annotations to !annotation metadata. |
| 1730 | MPM.addPass(Pass: Annotation2MetadataPass()); |
| 1731 | |
| 1732 | // Force any function attributes we want the rest of the pipeline to observe. |
| 1733 | MPM.addPass(Pass: ForceFunctionAttrsPass()); |
| 1734 | |
| 1735 | if (PGOOpt && PGOOpt->DebugInfoForProfiling) |
| 1736 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass())); |
| 1737 | |
| 1738 | // Apply module pipeline start EP callback. |
| 1739 | invokePipelineStartEPCallbacks(MPM, Level); |
| 1740 | |
| 1741 | // If we are planning to perform ThinLTO later, we don't bloat the code with |
| 1742 | // unrolling/vectorization/... now. Just simplify the module as much as we |
| 1743 | // can. |
| 1744 | MPM.addPass(Pass: buildModuleSimplificationPipeline( |
| 1745 | Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink)); |
| 1746 | // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let |
| 1747 | // thinlto use the contextual info to perform imports; then use the contextual |
| 1748 | // profile in the post-thinlink phase. |
| 1749 | if (!UseCtxProfile.empty()) { |
| 1750 | addRequiredLTOPreLinkPasses(MPM); |
| 1751 | return MPM; |
| 1752 | } |
| 1753 | |
| 1754 | // Run partial inlining pass to partially inline functions that have |
| 1755 | // large bodies. |
| 1756 | // FIXME: It isn't clear whether this is really the right place to run this |
| 1757 | // in ThinLTO. Because there is another canonicalization and simplification |
| 1758 | // phase that will run after the thin link, running this here ends up with |
| 1759 | // less information than will be available later and it may grow functions in |
| 1760 | // ways that aren't beneficial. |
| 1761 | if (RunPartialInlining) |
| 1762 | MPM.addPass(Pass: PartialInlinerPass()); |
| 1763 | |
| 1764 | if (PGOOpt && PGOOpt->PseudoProbeForProfiling && |
| 1765 | PGOOpt->Action == PGOOptions::SampleUse) |
| 1766 | MPM.addPass(Pass: PseudoProbeUpdatePass()); |
| 1767 | |
| 1768 | // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual |
| 1769 | // optimization is going to be done in PostLink stage, but clang can't add |
| 1770 | // callbacks there in case of in-process ThinLTO called by linker. |
| 1771 | invokeOptimizerEarlyEPCallbacks(MPM, Level, |
| 1772 | /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink); |
| 1773 | invokeOptimizerLastEPCallbacks(MPM, Level, |
| 1774 | /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink); |
| 1775 | |
| 1776 | // Emit annotation remarks. |
| 1777 | addAnnotationRemarksPass(MPM); |
| 1778 | |
| 1779 | addRequiredLTOPreLinkPasses(MPM); |
| 1780 | |
| 1781 | return MPM; |
| 1782 | } |
| 1783 | |
| 1784 | ModulePassManager PassBuilder::buildThinLTODefaultPipeline( |
| 1785 | OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) { |
| 1786 | ModulePassManager MPM; |
| 1787 | |
| 1788 | if (ImportSummary) { |
| 1789 | // For ThinLTO we must apply the context disambiguation decisions early, to |
| 1790 | // ensure we can correctly match the callsites to summary data. |
| 1791 | if (EnableMemProfContextDisambiguation) |
| 1792 | MPM.addPass(Pass: MemProfContextDisambiguation( |
| 1793 | ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); |
| 1794 | |
| 1795 | // These passes import type identifier resolutions for whole-program |
| 1796 | // devirtualization and CFI. They must run early because other passes may |
| 1797 | // disturb the specific instruction patterns that these passes look for, |
| 1798 | // creating dependencies on resolutions that may not appear in the summary. |
| 1799 | // |
| 1800 | // For example, GVN may transform the pattern assume(type.test) appearing in |
| 1801 | // two basic blocks into assume(phi(type.test, type.test)), which would |
| 1802 | // transform a dependency on a WPD resolution into a dependency on a type |
| 1803 | // identifier resolution for CFI. |
| 1804 | // |
| 1805 | // Also, WPD has access to more precise information than ICP and can |
| 1806 | // devirtualize more effectively, so it should operate on the IR first. |
| 1807 | // |
| 1808 | // The WPD and LowerTypeTest passes need to run at -O0 to lower type |
| 1809 | // metadata and intrinsics. |
| 1810 | MPM.addPass(Pass: WholeProgramDevirtPass(nullptr, ImportSummary)); |
| 1811 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, ImportSummary)); |
| 1812 | } |
| 1813 | |
| 1814 | if (Level == OptimizationLevel::O0) { |
| 1815 | // Run a second time to clean up any type tests left behind by WPD for use |
| 1816 | // in ICP. |
| 1817 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, |
| 1818 | lowertypetests::DropTestKind::Assume)); |
| 1819 | // Drop available_externally and unreferenced globals. This is necessary |
| 1820 | // with ThinLTO in order to avoid leaving undefined references to dead |
| 1821 | // globals in the object file. |
| 1822 | MPM.addPass(Pass: EliminateAvailableExternallyPass()); |
| 1823 | MPM.addPass(Pass: GlobalDCEPass()); |
| 1824 | return MPM; |
| 1825 | } |
| 1826 | if (!UseCtxProfile.empty()) { |
| 1827 | MPM.addPass( |
| 1828 | Pass: buildModuleInlinerPipeline(Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink)); |
| 1829 | } else { |
| 1830 | // Add the core simplification pipeline. |
| 1831 | MPM.addPass(Pass: buildModuleSimplificationPipeline( |
| 1832 | Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink)); |
| 1833 | } |
| 1834 | // Now add the optimization pipeline. |
| 1835 | MPM.addPass(Pass: buildModuleOptimizationPipeline( |
| 1836 | Level, LTOPhase: ThinOrFullLTOPhase::ThinLTOPostLink)); |
| 1837 | |
| 1838 | // Emit annotation remarks. |
| 1839 | addAnnotationRemarksPass(MPM); |
| 1840 | |
| 1841 | return MPM; |
| 1842 | } |
| 1843 | |
| 1844 | ModulePassManager |
| 1845 | PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) { |
| 1846 | // FIXME: We should use a customized pre-link pipeline! |
| 1847 | return buildPerModuleDefaultPipeline(Level, |
| 1848 | Phase: ThinOrFullLTOPhase::FullLTOPreLink); |
| 1849 | } |
| 1850 | |
| 1851 | ModulePassManager |
| 1852 | PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, |
| 1853 | ModuleSummaryIndex *ExportSummary) { |
| 1854 | ModulePassManager MPM; |
| 1855 | |
| 1856 | invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level); |
| 1857 | |
| 1858 | // Create a function that performs CFI checks for cross-DSO calls with targets |
| 1859 | // in the current module. |
| 1860 | MPM.addPass(Pass: CrossDSOCFIPass()); |
| 1861 | |
| 1862 | if (Level == OptimizationLevel::O0) { |
| 1863 | // The WPD and LowerTypeTest passes need to run at -O0 to lower type |
| 1864 | // metadata and intrinsics. |
| 1865 | MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr)); |
| 1866 | MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr)); |
| 1867 | // Run a second time to clean up any type tests left behind by WPD for use |
| 1868 | // in ICP. |
| 1869 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, |
| 1870 | lowertypetests::DropTestKind::Assume)); |
| 1871 | |
| 1872 | MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::FullLTOPostLink)); |
| 1873 | |
| 1874 | invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); |
| 1875 | |
| 1876 | // Emit annotation remarks. |
| 1877 | addAnnotationRemarksPass(MPM); |
| 1878 | |
| 1879 | return MPM; |
| 1880 | } |
| 1881 | |
| 1882 | if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { |
| 1883 | // Load sample profile before running the LTO optimization pipeline. |
| 1884 | MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile, |
| 1885 | PGOOpt->ProfileRemappingFile, |
| 1886 | ThinOrFullLTOPhase::FullLTOPostLink)); |
| 1887 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
| 1888 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
| 1889 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
| 1890 | } |
| 1891 | |
| 1892 | // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present. |
| 1893 | MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)); |
| 1894 | |
| 1895 | // Remove unused virtual tables to improve the quality of code generated by |
| 1896 | // whole-program devirtualization and bitset lowering. |
| 1897 | MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true)); |
| 1898 | |
| 1899 | // Do basic inference of function attributes from known properties of system |
| 1900 | // libraries and other oracles. |
| 1901 | MPM.addPass(Pass: InferFunctionAttrsPass()); |
| 1902 | |
| 1903 | if (Level.getSpeedupLevel() > 1) { |
| 1904 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
| 1905 | Pass: CallSiteSplittingPass(), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 1906 | |
| 1907 | // Indirect call promotion. This should promote all the targets that are |
| 1908 | // left by the earlier promotion pass that promotes intra-module targets. |
| 1909 | // This two-step promotion is to save the compile time. For LTO, it should |
| 1910 | // produce the same result as if we only do promotion here. |
| 1911 | MPM.addPass(Pass: PGOIndirectCallPromotion( |
| 1912 | true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); |
| 1913 | |
| 1914 | // Promoting by-reference arguments to by-value exposes more constants to |
| 1915 | // IPSCCP. |
| 1916 | CGSCCPassManager CGPM; |
| 1917 | CGPM.addPass(Pass: PostOrderFunctionAttrsPass()); |
| 1918 | CGPM.addPass(Pass: ArgumentPromotionPass()); |
| 1919 | CGPM.addPass( |
| 1920 | Pass: createCGSCCToFunctionPassAdaptor(Pass: SROAPass(SROAOptions::ModifyCFG))); |
| 1921 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM))); |
| 1922 | |
| 1923 | // Propagate constants at call sites into the functions they call. This |
| 1924 | // opens opportunities for globalopt (and inlining) by substituting function |
| 1925 | // pointers passed as arguments to direct uses of functions. |
| 1926 | MPM.addPass(Pass: IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/ |
| 1927 | Level != OptimizationLevel::Os && |
| 1928 | Level != OptimizationLevel::Oz))); |
| 1929 | |
| 1930 | // Attach metadata to indirect call sites indicating the set of functions |
| 1931 | // they may target at run-time. This should follow IPSCCP. |
| 1932 | MPM.addPass(Pass: CalledValuePropagationPass()); |
| 1933 | } |
| 1934 | |
| 1935 | // Do RPO function attribute inference across the module to forward-propagate |
| 1936 | // attributes where applicable. |
| 1937 | // FIXME: Is this really an optimization rather than a canonicalization? |
| 1938 | MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass()); |
| 1939 | |
| 1940 | // Use in-range annotations on GEP indices to split globals where beneficial. |
| 1941 | MPM.addPass(Pass: GlobalSplitPass()); |
| 1942 | |
| 1943 | // Run whole program optimization of virtual call when the list of callees |
| 1944 | // is fixed. |
| 1945 | MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr)); |
| 1946 | |
| 1947 | // Stop here at -O1. |
| 1948 | if (Level == OptimizationLevel::O1) { |
| 1949 | // The LowerTypeTestsPass needs to run to lower type metadata and the |
| 1950 | // type.test intrinsics. The pass does nothing if CFI is disabled. |
| 1951 | MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr)); |
| 1952 | // Run a second time to clean up any type tests left behind by WPD for use |
| 1953 | // in ICP (which is performed earlier than this in the regular LTO |
| 1954 | // pipeline). |
| 1955 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, |
| 1956 | lowertypetests::DropTestKind::Assume)); |
| 1957 | |
| 1958 | MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::FullLTOPostLink)); |
| 1959 | |
| 1960 | invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); |
| 1961 | |
| 1962 | // Emit annotation remarks. |
| 1963 | addAnnotationRemarksPass(MPM); |
| 1964 | |
| 1965 | return MPM; |
| 1966 | } |
| 1967 | |
| 1968 | // TODO: Skip to match buildCoroWrapper. |
| 1969 | MPM.addPass(Pass: CoroEarlyPass()); |
| 1970 | |
| 1971 | // Optimize globals to try and fold them into constants. |
| 1972 | MPM.addPass(Pass: GlobalOptPass()); |
| 1973 | |
| 1974 | // Promote any localized globals to SSA registers. |
| 1975 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: PromotePass())); |
| 1976 | |
| 1977 | // Linking modules together can lead to duplicate global constant, only |
| 1978 | // keep one copy of each constant. |
| 1979 | MPM.addPass(Pass: ConstantMergePass()); |
| 1980 | |
| 1981 | // Remove unused arguments from functions. |
| 1982 | MPM.addPass(Pass: DeadArgumentEliminationPass()); |
| 1983 | |
| 1984 | // Reduce the code after globalopt and ipsccp. Both can open up significant |
| 1985 | // simplification opportunities, and both can propagate functions through |
| 1986 | // function pointers. When this happens, we often have to resolve varargs |
| 1987 | // calls, etc, so let instcombine do this. |
| 1988 | FunctionPassManager PeepholeFPM; |
| 1989 | PeepholeFPM.addPass(Pass: InstCombinePass()); |
| 1990 | if (Level.getSpeedupLevel() > 1) |
| 1991 | PeepholeFPM.addPass(Pass: AggressiveInstCombinePass()); |
| 1992 | invokePeepholeEPCallbacks(FPM&: PeepholeFPM, Level); |
| 1993 | |
| 1994 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(PeepholeFPM), |
| 1995 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 1996 | |
| 1997 | // Lower variadic functions for supported targets prior to inlining. |
| 1998 | MPM.addPass(Pass: ExpandVariadicsPass(ExpandVariadicsMode::Optimize)); |
| 1999 | |
| 2000 | // Note: historically, the PruneEH pass was run first to deduce nounwind and |
| 2001 | // generally clean up exception handling overhead. It isn't clear this is |
| 2002 | // valuable as the inliner doesn't currently care whether it is inlining an |
| 2003 | // invoke or a call. |
| 2004 | // Run the inliner now. |
| 2005 | if (EnableModuleInliner) { |
| 2006 | MPM.addPass(Pass: ModuleInlinerPass(getInlineParamsFromOptLevel(Level), |
| 2007 | UseInlineAdvisor, |
| 2008 | ThinOrFullLTOPhase::FullLTOPostLink)); |
| 2009 | } else { |
| 2010 | MPM.addPass(Pass: ModuleInlinerWrapperPass( |
| 2011 | getInlineParamsFromOptLevel(Level), |
| 2012 | /* MandatoryFirst */ true, |
| 2013 | InlineContext{.LTOPhase: ThinOrFullLTOPhase::FullLTOPostLink, |
| 2014 | .Pass: InlinePass::CGSCCInliner})); |
| 2015 | } |
| 2016 | |
| 2017 | // Perform context disambiguation after inlining, since that would reduce the |
| 2018 | // amount of additional cloning required to distinguish the allocation |
| 2019 | // contexts. |
| 2020 | if (EnableMemProfContextDisambiguation) |
| 2021 | MPM.addPass(Pass: MemProfContextDisambiguation( |
| 2022 | /*Summary=*/nullptr, |
| 2023 | PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); |
| 2024 | |
| 2025 | // Optimize globals again after we ran the inliner. |
| 2026 | MPM.addPass(Pass: GlobalOptPass()); |
| 2027 | |
| 2028 | // Run the OpenMPOpt pass again after global optimizations. |
| 2029 | MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)); |
| 2030 | |
| 2031 | // Garbage collect dead functions. |
| 2032 | MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true)); |
| 2033 | |
| 2034 | // If we didn't decide to inline a function, check to see if we can |
| 2035 | // transform it to pass arguments by value instead of by reference. |
| 2036 | CGSCCPassManager CGPM; |
| 2037 | CGPM.addPass(Pass: ArgumentPromotionPass()); |
| 2038 | CGPM.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0)); |
| 2039 | CGPM.addPass(Pass: CoroAnnotationElidePass()); |
| 2040 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM))); |
| 2041 | |
| 2042 | FunctionPassManager FPM; |
| 2043 | // The IPO Passes may leave cruft around. Clean up after them. |
| 2044 | FPM.addPass(Pass: InstCombinePass()); |
| 2045 | invokePeepholeEPCallbacks(FPM, Level); |
| 2046 | |
| 2047 | if (EnableConstraintElimination) |
| 2048 | FPM.addPass(Pass: ConstraintEliminationPass()); |
| 2049 | |
| 2050 | FPM.addPass(Pass: JumpThreadingPass()); |
| 2051 | |
| 2052 | // Do a post inline PGO instrumentation and use pass. This is a context |
| 2053 | // sensitive PGO pass. |
| 2054 | if (PGOOpt) { |
| 2055 | if (PGOOpt->CSAction == PGOOptions::CSIRInstr) |
| 2056 | addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true, |
| 2057 | /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
| 2058 | ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile, |
| 2059 | FS: PGOOpt->FS); |
| 2060 | else if (PGOOpt->CSAction == PGOOptions::CSIRUse) |
| 2061 | addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false, |
| 2062 | /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
| 2063 | ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile, |
| 2064 | FS: PGOOpt->FS); |
| 2065 | } |
| 2066 | |
| 2067 | // Break up allocas |
| 2068 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
| 2069 | |
| 2070 | // LTO provides additional opportunities for tailcall elimination due to |
| 2071 | // link-time inlining, and visibility of nocapture attribute. |
| 2072 | FPM.addPass( |
| 2073 | Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse())); |
| 2074 | |
| 2075 | // Run a few AA driver optimizations here and now to cleanup the code. |
| 2076 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM), |
| 2077 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 2078 | |
| 2079 | MPM.addPass( |
| 2080 | Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: PostOrderFunctionAttrsPass())); |
| 2081 | |
| 2082 | // Require the GlobalsAA analysis for the module so we can query it within |
| 2083 | // MainFPM. |
| 2084 | if (EnableGlobalAnalyses) { |
| 2085 | MPM.addPass(Pass: RequireAnalysisPass<GlobalsAA, Module>()); |
| 2086 | // Invalidate AAManager so it can be recreated and pick up the newly |
| 2087 | // available GlobalsAA. |
| 2088 | MPM.addPass( |
| 2089 | Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>())); |
| 2090 | } |
| 2091 | |
| 2092 | FunctionPassManager MainFPM; |
| 2093 | MainFPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
| 2094 | Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 2095 | /*AllowSpeculation=*/true), |
| 2096 | /*USeMemorySSA=*/UseMemorySSA: true, /*UseBlockFrequencyInfo=*/false)); |
| 2097 | |
| 2098 | if (RunNewGVN) |
| 2099 | MainFPM.addPass(Pass: NewGVNPass()); |
| 2100 | else |
| 2101 | MainFPM.addPass(Pass: GVNPass()); |
| 2102 | |
| 2103 | // Remove dead memcpy()'s. |
| 2104 | MainFPM.addPass(Pass: MemCpyOptPass()); |
| 2105 | |
| 2106 | // Nuke dead stores. |
| 2107 | MainFPM.addPass(Pass: DSEPass()); |
| 2108 | MainFPM.addPass(Pass: MoveAutoInitPass()); |
| 2109 | MainFPM.addPass(Pass: MergedLoadStoreMotionPass()); |
| 2110 | |
| 2111 | invokeVectorizerStartEPCallbacks(FPM&: MainFPM, Level); |
| 2112 | |
| 2113 | LoopPassManager LPM; |
| 2114 | if (EnableLoopFlatten && Level.getSpeedupLevel() > 1) |
| 2115 | LPM.addPass(Pass: LoopFlattenPass()); |
| 2116 | LPM.addPass(Pass: IndVarSimplifyPass()); |
| 2117 | LPM.addPass(Pass: LoopDeletionPass()); |
| 2118 | // FIXME: Add loop interchange. |
| 2119 | |
| 2120 | // Unroll small loops and perform peeling. |
| 2121 | LPM.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(), |
| 2122 | /* OnlyWhenForced= */ !PTO.LoopUnrolling, |
| 2123 | PTO.ForgetAllSCEVInLoopUnroll)); |
| 2124 | // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA. |
| 2125 | // *All* loop passes must preserve it, in order to be able to use it. |
| 2126 | MainFPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
| 2127 | Pass: std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true)); |
| 2128 | |
| 2129 | MainFPM.addPass(Pass: LoopDistributePass()); |
| 2130 | |
| 2131 | addVectorPasses(Level, FPM&: MainFPM, /* IsFullLTO */ true); |
| 2132 | |
| 2133 | invokeVectorizerEndEPCallbacks(FPM&: MainFPM, Level); |
| 2134 | |
| 2135 | // Run the OpenMPOpt CGSCC pass again late. |
| 2136 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor( |
| 2137 | Pass: OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink))); |
| 2138 | |
| 2139 | invokePeepholeEPCallbacks(FPM&: MainFPM, Level); |
| 2140 | MainFPM.addPass(Pass: JumpThreadingPass()); |
| 2141 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(MainFPM), |
| 2142 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 2143 | |
| 2144 | // Lower type metadata and the type.test intrinsic. This pass supports |
| 2145 | // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs |
| 2146 | // to be run at link time if CFI is enabled. This pass does nothing if |
| 2147 | // CFI is disabled. |
| 2148 | MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr)); |
| 2149 | // Run a second time to clean up any type tests left behind by WPD for use |
| 2150 | // in ICP (which is performed earlier than this in the regular LTO pipeline). |
| 2151 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, |
| 2152 | lowertypetests::DropTestKind::Assume)); |
| 2153 | |
| 2154 | // Enable splitting late in the FullLTO post-link pipeline. |
| 2155 | if (EnableHotColdSplit) |
| 2156 | MPM.addPass(Pass: HotColdSplittingPass()); |
| 2157 | |
| 2158 | // Add late LTO optimization passes. |
| 2159 | FunctionPassManager LateFPM; |
| 2160 | |
| 2161 | // LoopSink pass sinks instructions hoisted by LICM, which serves as a |
| 2162 | // canonicalization pass that enables other optimizations. As a result, |
| 2163 | // LoopSink pass needs to be a very late IR pass to avoid undoing LICM |
| 2164 | // result too early. |
| 2165 | LateFPM.addPass(Pass: LoopSinkPass()); |
| 2166 | |
| 2167 | // This hoists/decomposes div/rem ops. It should run after other sink/hoist |
| 2168 | // passes to avoid re-sinking, but before SimplifyCFG because it can allow |
| 2169 | // flattening of blocks. |
| 2170 | LateFPM.addPass(Pass: DivRemPairsPass()); |
| 2171 | |
| 2172 | // Delete basic blocks, which optimization passes may have killed. |
| 2173 | LateFPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions() |
| 2174 | .convertSwitchRangeToICmp(B: true) |
| 2175 | .hoistCommonInsts(B: true) |
| 2176 | .speculateUnpredictables(B: true))); |
| 2177 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(LateFPM))); |
| 2178 | |
| 2179 | // Drop bodies of available eternally objects to improve GlobalDCE. |
| 2180 | MPM.addPass(Pass: EliminateAvailableExternallyPass()); |
| 2181 | |
| 2182 | // Now that we have optimized the program, discard unreachable functions. |
| 2183 | MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true)); |
| 2184 | |
| 2185 | if (PTO.MergeFunctions) |
| 2186 | MPM.addPass(Pass: MergeFunctionsPass()); |
| 2187 | |
| 2188 | MPM.addPass(Pass: RelLookupTableConverterPass()); |
| 2189 | |
| 2190 | if (PTO.CallGraphProfile) |
| 2191 | MPM.addPass(Pass: CGProfilePass(/*InLTOPostLink=*/true)); |
| 2192 | |
| 2193 | MPM.addPass(Pass: CoroCleanupPass()); |
| 2194 | |
| 2195 | invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); |
| 2196 | |
| 2197 | // Emit annotation remarks. |
| 2198 | addAnnotationRemarksPass(MPM); |
| 2199 | |
| 2200 | return MPM; |
| 2201 | } |
| 2202 | |
| 2203 | ModulePassManager |
| 2204 | PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, |
| 2205 | ThinOrFullLTOPhase Phase) { |
| 2206 | assert(Level == OptimizationLevel::O0 && |
| 2207 | "buildO0DefaultPipeline should only be used with O0" ); |
| 2208 | |
| 2209 | ModulePassManager MPM; |
| 2210 | |
| 2211 | // Perform pseudo probe instrumentation in O0 mode. This is for the |
| 2212 | // consistency between different build modes. For example, a LTO build can be |
| 2213 | // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in |
| 2214 | // the postlink will require pseudo probe instrumentation in the prelink. |
| 2215 | if (PGOOpt && PGOOpt->PseudoProbeForProfiling) |
| 2216 | MPM.addPass(Pass: SampleProfileProbePass(TM)); |
| 2217 | |
| 2218 | if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr || |
| 2219 | PGOOpt->Action == PGOOptions::IRUse)) |
| 2220 | addPGOInstrPassesForO0( |
| 2221 | MPM, |
| 2222 | /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr), |
| 2223 | /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, ProfileFile: PGOOpt->ProfileFile, |
| 2224 | ProfileRemappingFile: PGOOpt->ProfileRemappingFile, FS: PGOOpt->FS); |
| 2225 | |
| 2226 | // Instrument function entry and exit before all inlining. |
| 2227 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
| 2228 | Pass: EntryExitInstrumenterPass(/*PostInlining=*/false))); |
| 2229 | |
| 2230 | invokePipelineStartEPCallbacks(MPM, Level); |
| 2231 | |
| 2232 | if (PGOOpt && PGOOpt->DebugInfoForProfiling) |
| 2233 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass())); |
| 2234 | |
| 2235 | if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { |
| 2236 | // Explicitly disable sample loader inlining and use flattened profile in O0 |
| 2237 | // pipeline. |
| 2238 | MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile, |
| 2239 | PGOOpt->ProfileRemappingFile, |
| 2240 | ThinOrFullLTOPhase::None, nullptr, |
| 2241 | /*DisableSampleProfileInlining=*/true, |
| 2242 | /*UseFlattenedProfile=*/true)); |
| 2243 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
| 2244 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
| 2245 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
| 2246 | } |
| 2247 | |
| 2248 | invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase); |
| 2249 | |
| 2250 | // Build a minimal pipeline based on the semantics required by LLVM, |
| 2251 | // which is just that always inlining occurs. Further, disable generating |
| 2252 | // lifetime intrinsics to avoid enabling further optimizations during |
| 2253 | // code generation. |
| 2254 | MPM.addPass(Pass: AlwaysInlinerPass( |
| 2255 | /*InsertLifetimeIntrinsics=*/false)); |
| 2256 | |
| 2257 | if (PTO.MergeFunctions) |
| 2258 | MPM.addPass(Pass: MergeFunctionsPass()); |
| 2259 | |
| 2260 | if (EnableMatrix) |
| 2261 | MPM.addPass( |
| 2262 | Pass: createModuleToFunctionPassAdaptor(Pass: LowerMatrixIntrinsicsPass(true))); |
| 2263 | |
| 2264 | if (!CGSCCOptimizerLateEPCallbacks.empty()) { |
| 2265 | CGSCCPassManager CGPM; |
| 2266 | invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level); |
| 2267 | if (!CGPM.isEmpty()) |
| 2268 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM))); |
| 2269 | } |
| 2270 | if (!LateLoopOptimizationsEPCallbacks.empty()) { |
| 2271 | LoopPassManager LPM; |
| 2272 | invokeLateLoopOptimizationsEPCallbacks(LPM, Level); |
| 2273 | if (!LPM.isEmpty()) { |
| 2274 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
| 2275 | Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM)))); |
| 2276 | } |
| 2277 | } |
| 2278 | if (!LoopOptimizerEndEPCallbacks.empty()) { |
| 2279 | LoopPassManager LPM; |
| 2280 | invokeLoopOptimizerEndEPCallbacks(LPM, Level); |
| 2281 | if (!LPM.isEmpty()) { |
| 2282 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
| 2283 | Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM)))); |
| 2284 | } |
| 2285 | } |
| 2286 | if (!ScalarOptimizerLateEPCallbacks.empty()) { |
| 2287 | FunctionPassManager FPM; |
| 2288 | invokeScalarOptimizerLateEPCallbacks(FPM, Level); |
| 2289 | if (!FPM.isEmpty()) |
| 2290 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM))); |
| 2291 | } |
| 2292 | |
| 2293 | invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase); |
| 2294 | |
| 2295 | if (!VectorizerStartEPCallbacks.empty()) { |
| 2296 | FunctionPassManager FPM; |
| 2297 | invokeVectorizerStartEPCallbacks(FPM, Level); |
| 2298 | if (!FPM.isEmpty()) |
| 2299 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM))); |
| 2300 | } |
| 2301 | |
| 2302 | if (!VectorizerEndEPCallbacks.empty()) { |
| 2303 | FunctionPassManager FPM; |
| 2304 | invokeVectorizerEndEPCallbacks(FPM, Level); |
| 2305 | if (!FPM.isEmpty()) |
| 2306 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM))); |
| 2307 | } |
| 2308 | |
| 2309 | MPM.addPass(Pass: buildCoroWrapper(Phase)); |
| 2310 | |
| 2311 | invokeOptimizerLastEPCallbacks(MPM, Level, Phase); |
| 2312 | |
| 2313 | if (isLTOPreLink(Phase)) |
| 2314 | addRequiredLTOPreLinkPasses(MPM); |
| 2315 | |
| 2316 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AnnotationRemarksPass())); |
| 2317 | |
| 2318 | return MPM; |
| 2319 | } |
| 2320 | |
| 2321 | AAManager PassBuilder::buildDefaultAAPipeline() { |
| 2322 | AAManager AA; |
| 2323 | |
| 2324 | // The order in which these are registered determines their priority when |
| 2325 | // being queried. |
| 2326 | |
| 2327 | // Add any target-specific alias analyses that should be run early. |
| 2328 | if (TM) |
| 2329 | TM->registerEarlyDefaultAliasAnalyses(AA); |
| 2330 | |
| 2331 | // First we register the basic alias analysis that provides the majority of |
| 2332 | // per-function local AA logic. This is a stateless, on-demand local set of |
| 2333 | // AA techniques. |
| 2334 | AA.registerFunctionAnalysis<BasicAA>(); |
| 2335 | |
| 2336 | // Next we query fast, specialized alias analyses that wrap IR-embedded |
| 2337 | // information about aliasing. |
| 2338 | AA.registerFunctionAnalysis<ScopedNoAliasAA>(); |
| 2339 | AA.registerFunctionAnalysis<TypeBasedAA>(); |
| 2340 | |
| 2341 | // Add support for querying global aliasing information when available. |
| 2342 | // Because the `AAManager` is a function analysis and `GlobalsAA` is a module |
| 2343 | // analysis, all that the `AAManager` can do is query for any *cached* |
| 2344 | // results from `GlobalsAA` through a readonly proxy. |
| 2345 | if (EnableGlobalAnalyses) |
| 2346 | AA.registerModuleAnalysis<GlobalsAA>(); |
| 2347 | |
| 2348 | // Add target-specific alias analyses. |
| 2349 | if (TM) |
| 2350 | TM->registerDefaultAliasAnalyses(AA); |
| 2351 | |
| 2352 | return AA; |
| 2353 | } |
| 2354 | |
| 2355 | bool PassBuilder::isInstrumentedPGOUse() const { |
| 2356 | return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) || |
| 2357 | !UseCtxProfile.empty(); |
| 2358 | } |