| 1 | //===- Construction of pass pipelines -------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file |
| 9 | /// |
| 10 | /// This file provides the implementation of the PassBuilder based on our |
| 11 | /// static pass registry as well as related functionality. It also provides |
| 12 | /// helpers to aid in analyzing, debugging, and testing passes and pass |
| 13 | /// pipelines. |
| 14 | /// |
| 15 | //===----------------------------------------------------------------------===// |
| 16 | |
| 17 | #include "llvm/ADT/Statistic.h" |
| 18 | #include "llvm/Analysis/AliasAnalysis.h" |
| 19 | #include "llvm/Analysis/BasicAliasAnalysis.h" |
| 20 | #include "llvm/Analysis/CGSCCPassManager.h" |
| 21 | #include "llvm/Analysis/CtxProfAnalysis.h" |
| 22 | #include "llvm/Analysis/FunctionPropertiesAnalysis.h" |
| 23 | #include "llvm/Analysis/GlobalsModRef.h" |
| 24 | #include "llvm/Analysis/InlineAdvisor.h" |
| 25 | #include "llvm/Analysis/InstCount.h" |
| 26 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
| 27 | #include "llvm/Analysis/ScopedNoAliasAA.h" |
| 28 | #include "llvm/Analysis/TypeBasedAliasAnalysis.h" |
| 29 | #include "llvm/IR/PassManager.h" |
| 30 | #include "llvm/Pass.h" |
| 31 | #include "llvm/Passes/OptimizationLevel.h" |
| 32 | #include "llvm/Passes/PassBuilder.h" |
| 33 | #include "llvm/Support/CommandLine.h" |
| 34 | #include "llvm/Support/ErrorHandling.h" |
| 35 | #include "llvm/Support/PGOOptions.h" |
| 36 | #include "llvm/Support/VirtualFileSystem.h" |
| 37 | #include "llvm/Target/TargetMachine.h" |
| 38 | #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" |
| 39 | #include "llvm/Transforms/Coroutines/CoroAnnotationElide.h" |
| 40 | #include "llvm/Transforms/Coroutines/CoroCleanup.h" |
| 41 | #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h" |
| 42 | #include "llvm/Transforms/Coroutines/CoroEarly.h" |
| 43 | #include "llvm/Transforms/Coroutines/CoroElide.h" |
| 44 | #include "llvm/Transforms/Coroutines/CoroSplit.h" |
| 45 | #include "llvm/Transforms/HipStdPar/HipStdPar.h" |
| 46 | #include "llvm/Transforms/IPO/AlwaysInliner.h" |
| 47 | #include "llvm/Transforms/IPO/Annotation2Metadata.h" |
| 48 | #include "llvm/Transforms/IPO/ArgumentPromotion.h" |
| 49 | #include "llvm/Transforms/IPO/Attributor.h" |
| 50 | #include "llvm/Transforms/IPO/CalledValuePropagation.h" |
| 51 | #include "llvm/Transforms/IPO/ConstantMerge.h" |
| 52 | #include "llvm/Transforms/IPO/CrossDSOCFI.h" |
| 53 | #include "llvm/Transforms/IPO/DeadArgumentElimination.h" |
| 54 | #include "llvm/Transforms/IPO/ElimAvailExtern.h" |
| 55 | #include "llvm/Transforms/IPO/EmbedBitcodePass.h" |
| 56 | #include "llvm/Transforms/IPO/ExpandVariadics.h" |
| 57 | #include "llvm/Transforms/IPO/FatLTOCleanup.h" |
| 58 | #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" |
| 59 | #include "llvm/Transforms/IPO/FunctionAttrs.h" |
| 60 | #include "llvm/Transforms/IPO/GlobalDCE.h" |
| 61 | #include "llvm/Transforms/IPO/GlobalOpt.h" |
| 62 | #include "llvm/Transforms/IPO/GlobalSplit.h" |
| 63 | #include "llvm/Transforms/IPO/HotColdSplitting.h" |
| 64 | #include "llvm/Transforms/IPO/IROutliner.h" |
| 65 | #include "llvm/Transforms/IPO/InferFunctionAttrs.h" |
| 66 | #include "llvm/Transforms/IPO/Inliner.h" |
| 67 | #include "llvm/Transforms/IPO/LowerTypeTests.h" |
| 68 | #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" |
| 69 | #include "llvm/Transforms/IPO/MergeFunctions.h" |
| 70 | #include "llvm/Transforms/IPO/ModuleInliner.h" |
| 71 | #include "llvm/Transforms/IPO/OpenMPOpt.h" |
| 72 | #include "llvm/Transforms/IPO/PartialInlining.h" |
| 73 | #include "llvm/Transforms/IPO/SCCP.h" |
| 74 | #include "llvm/Transforms/IPO/SampleProfile.h" |
| 75 | #include "llvm/Transforms/IPO/SampleProfileProbe.h" |
| 76 | #include "llvm/Transforms/IPO/WholeProgramDevirt.h" |
| 77 | #include "llvm/Transforms/InstCombine/InstCombine.h" |
| 78 | #include "llvm/Transforms/Instrumentation/AllocToken.h" |
| 79 | #include "llvm/Transforms/Instrumentation/CGProfile.h" |
| 80 | #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" |
| 81 | #include "llvm/Transforms/Instrumentation/InstrProfiling.h" |
| 82 | #include "llvm/Transforms/Instrumentation/MemProfInstrumentation.h" |
| 83 | #include "llvm/Transforms/Instrumentation/MemProfUse.h" |
| 84 | #include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h" |
| 85 | #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" |
| 86 | #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h" |
| 87 | #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" |
| 88 | #include "llvm/Transforms/Scalar/ADCE.h" |
| 89 | #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" |
| 90 | #include "llvm/Transforms/Scalar/AnnotationRemarks.h" |
| 91 | #include "llvm/Transforms/Scalar/BDCE.h" |
| 92 | #include "llvm/Transforms/Scalar/CallSiteSplitting.h" |
| 93 | #include "llvm/Transforms/Scalar/ConstraintElimination.h" |
| 94 | #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" |
| 95 | #include "llvm/Transforms/Scalar/DFAJumpThreading.h" |
| 96 | #include "llvm/Transforms/Scalar/DeadStoreElimination.h" |
| 97 | #include "llvm/Transforms/Scalar/DivRemPairs.h" |
| 98 | #include "llvm/Transforms/Scalar/DropUnnecessaryAssumes.h" |
| 99 | #include "llvm/Transforms/Scalar/EarlyCSE.h" |
| 100 | #include "llvm/Transforms/Scalar/ExpandMemCmp.h" |
| 101 | #include "llvm/Transforms/Scalar/Float2Int.h" |
| 102 | #include "llvm/Transforms/Scalar/GVN.h" |
| 103 | #include "llvm/Transforms/Scalar/IndVarSimplify.h" |
| 104 | #include "llvm/Transforms/Scalar/InferAlignment.h" |
| 105 | #include "llvm/Transforms/Scalar/InstSimplifyPass.h" |
| 106 | #include "llvm/Transforms/Scalar/JumpTableToSwitch.h" |
| 107 | #include "llvm/Transforms/Scalar/JumpThreading.h" |
| 108 | #include "llvm/Transforms/Scalar/LICM.h" |
| 109 | #include "llvm/Transforms/Scalar/LoopDeletion.h" |
| 110 | #include "llvm/Transforms/Scalar/LoopDistribute.h" |
| 111 | #include "llvm/Transforms/Scalar/LoopFlatten.h" |
| 112 | #include "llvm/Transforms/Scalar/LoopFuse.h" |
| 113 | #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" |
| 114 | #include "llvm/Transforms/Scalar/LoopInstSimplify.h" |
| 115 | #include "llvm/Transforms/Scalar/LoopInterchange.h" |
| 116 | #include "llvm/Transforms/Scalar/LoopLoadElimination.h" |
| 117 | #include "llvm/Transforms/Scalar/LoopPassManager.h" |
| 118 | #include "llvm/Transforms/Scalar/LoopRotation.h" |
| 119 | #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" |
| 120 | #include "llvm/Transforms/Scalar/LoopSink.h" |
| 121 | #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" |
| 122 | #include "llvm/Transforms/Scalar/LoopUnrollPass.h" |
| 123 | #include "llvm/Transforms/Scalar/LoopVersioningLICM.h" |
| 124 | #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" |
| 125 | #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" |
| 126 | #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" |
| 127 | #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" |
| 128 | #include "llvm/Transforms/Scalar/MergeICmps.h" |
| 129 | #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" |
| 130 | #include "llvm/Transforms/Scalar/NewGVN.h" |
| 131 | #include "llvm/Transforms/Scalar/Reassociate.h" |
| 132 | #include "llvm/Transforms/Scalar/SCCP.h" |
| 133 | #include "llvm/Transforms/Scalar/SROA.h" |
| 134 | #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" |
| 135 | #include "llvm/Transforms/Scalar/SimplifyCFG.h" |
| 136 | #include "llvm/Transforms/Scalar/SpeculativeExecution.h" |
| 137 | #include "llvm/Transforms/Scalar/TailRecursionElimination.h" |
| 138 | #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" |
| 139 | #include "llvm/Transforms/Utils/AddDiscriminators.h" |
| 140 | #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" |
| 141 | #include "llvm/Transforms/Utils/CanonicalizeAliases.h" |
| 142 | #include "llvm/Transforms/Utils/CountVisits.h" |
| 143 | #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" |
| 144 | #include "llvm/Transforms/Utils/ExtraPassManager.h" |
| 145 | #include "llvm/Transforms/Utils/InjectTLIMappings.h" |
| 146 | #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" |
| 147 | #include "llvm/Transforms/Utils/Mem2Reg.h" |
| 148 | #include "llvm/Transforms/Utils/MoveAutoInit.h" |
| 149 | #include "llvm/Transforms/Utils/NameAnonGlobals.h" |
| 150 | #include "llvm/Transforms/Utils/RelLookupTableConverter.h" |
| 151 | #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" |
| 152 | #include "llvm/Transforms/Vectorize/LoopVectorize.h" |
| 153 | #include "llvm/Transforms/Vectorize/SLPVectorizer.h" |
| 154 | #include "llvm/Transforms/Vectorize/VectorCombine.h" |
| 155 | |
| 156 | using namespace llvm; |
| 157 | |
| 158 | namespace llvm { |
| 159 | |
| 160 | static cl::opt<InliningAdvisorMode> UseInlineAdvisor( |
| 161 | "enable-ml-inliner" , cl::init(Val: InliningAdvisorMode::Default), cl::Hidden, |
| 162 | cl::desc("Enable ML policy for inliner. Currently trained for -Oz only" ), |
| 163 | cl::values(clEnumValN(InliningAdvisorMode::Default, "default" , |
| 164 | "Heuristics-based inliner version" ), |
| 165 | clEnumValN(InliningAdvisorMode::Development, "development" , |
| 166 | "Use development mode (runtime-loadable model)" ), |
| 167 | clEnumValN(InliningAdvisorMode::Release, "release" , |
| 168 | "Use release mode (AOT-compiled model)" ))); |
| 169 | |
| 170 | /// Flag to enable inline deferral during PGO. |
| 171 | static cl::opt<bool> |
| 172 | EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral" , cl::init(Val: true), |
| 173 | cl::Hidden, |
| 174 | cl::desc("Enable inline deferral during PGO" )); |
| 175 | |
| 176 | static cl::opt<bool> EnableModuleInliner("enable-module-inliner" , |
| 177 | cl::init(Val: false), cl::Hidden, |
| 178 | cl::desc("Enable module inliner" )); |
| 179 | |
| 180 | static cl::opt<bool> PerformMandatoryInliningsFirst( |
| 181 | "mandatory-inlining-first" , cl::init(Val: false), cl::Hidden, |
| 182 | cl::desc("Perform mandatory inlinings module-wide, before performing " |
| 183 | "inlining" )); |
| 184 | |
| 185 | static cl::opt<bool> EnableEagerlyInvalidateAnalyses( |
| 186 | "eagerly-invalidate-analyses" , cl::init(Val: true), cl::Hidden, |
| 187 | cl::desc("Eagerly invalidate more analyses in default pipelines" )); |
| 188 | |
| 189 | static cl::opt<bool> EnableMergeFunctions( |
| 190 | "enable-merge-functions" , cl::init(Val: false), cl::Hidden, |
| 191 | cl::desc("Enable function merging as part of the optimization pipeline" )); |
| 192 | |
| 193 | static cl::opt<bool> EnablePostPGOLoopRotation( |
| 194 | "enable-post-pgo-loop-rotation" , cl::init(Val: true), cl::Hidden, |
| 195 | cl::desc("Run the loop rotation transformation after PGO instrumentation" )); |
| 196 | |
| 197 | static cl::opt<bool> EnableGlobalAnalyses( |
| 198 | "enable-global-analyses" , cl::init(Val: true), cl::Hidden, |
| 199 | cl::desc("Enable inter-procedural analyses" )); |
| 200 | |
| 201 | static cl::opt<bool> RunPartialInlining("enable-partial-inlining" , |
| 202 | cl::init(Val: false), cl::Hidden, |
| 203 | cl::desc("Run Partial inlining pass" )); |
| 204 | |
| 205 | static cl::opt<bool> ( |
| 206 | "extra-vectorizer-passes" , cl::init(Val: false), cl::Hidden, |
| 207 | cl::desc("Run cleanup optimization passes after vectorization" )); |
| 208 | |
| 209 | static cl::opt<bool> RunNewGVN("enable-newgvn" , cl::init(Val: false), cl::Hidden, |
| 210 | cl::desc("Run the NewGVN pass" )); |
| 211 | |
| 212 | static cl::opt<bool> |
| 213 | EnableLoopInterchange("enable-loopinterchange" , cl::init(Val: false), cl::Hidden, |
| 214 | cl::desc("Enable the LoopInterchange Pass" )); |
| 215 | |
| 216 | static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam" , |
| 217 | cl::init(Val: false), cl::Hidden, |
| 218 | cl::desc("Enable Unroll And Jam Pass" )); |
| 219 | |
| 220 | static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten" , cl::init(Val: false), |
| 221 | cl::Hidden, |
| 222 | cl::desc("Enable the LoopFlatten Pass" )); |
| 223 | |
| 224 | static cl::opt<bool> |
| 225 | EnableDFAJumpThreading("enable-dfa-jump-thread" , |
| 226 | cl::desc("Enable DFA jump threading" ), |
| 227 | cl::init(Val: false), cl::Hidden); |
| 228 | |
| 229 | static cl::opt<bool> |
| 230 | EnableHotColdSplit("hot-cold-split" , |
| 231 | cl::desc("Enable hot-cold splitting pass" )); |
| 232 | |
| 233 | static cl::opt<bool> EnableIROutliner("ir-outliner" , cl::init(Val: false), |
| 234 | cl::Hidden, |
| 235 | cl::desc("Enable ir outliner pass" )); |
| 236 | |
| 237 | static cl::opt<bool> |
| 238 | DisablePreInliner("disable-preinline" , cl::init(Val: false), cl::Hidden, |
| 239 | cl::desc("Disable pre-instrumentation inliner" )); |
| 240 | |
| 241 | static cl::opt<int> PreInlineThreshold( |
| 242 | "preinline-threshold" , cl::Hidden, cl::init(Val: 75), |
| 243 | cl::desc("Control the amount of inlining in pre-instrumentation inliner " |
| 244 | "(default = 75)" )); |
| 245 | |
| 246 | static cl::opt<bool> |
| 247 | EnableGVNHoist("enable-gvn-hoist" , |
| 248 | cl::desc("Enable the GVN hoisting pass (default = off)" )); |
| 249 | |
| 250 | static cl::opt<bool> |
| 251 | EnableGVNSink("enable-gvn-sink" , |
| 252 | cl::desc("Enable the GVN sinking pass (default = off)" )); |
| 253 | |
| 254 | static cl::opt<bool> EnableJumpTableToSwitch( |
| 255 | "enable-jump-table-to-switch" , |
| 256 | cl::desc("Enable JumpTableToSwitch pass (default = off)" )); |
| 257 | |
| 258 | // This option is used in simplifying testing SampleFDO optimizations for |
| 259 | // profile loading. |
| 260 | static cl::opt<bool> |
| 261 | EnableCHR("enable-chr" , cl::init(Val: true), cl::Hidden, |
| 262 | cl::desc("Enable control height reduction optimization (CHR)" )); |
| 263 | |
| 264 | static cl::opt<bool> FlattenedProfileUsed( |
| 265 | "flattened-profile-used" , cl::init(Val: false), cl::Hidden, |
| 266 | cl::desc("Indicate the sample profile being used is flattened, i.e., " |
| 267 | "no inline hierarchy exists in the profile" )); |
| 268 | |
| 269 | static cl::opt<bool> |
| 270 | EnableMatrix("enable-matrix" , cl::init(Val: false), cl::Hidden, |
| 271 | cl::desc("Enable lowering of the matrix intrinsics" )); |
| 272 | |
| 273 | static cl::opt<bool> EnableMergeICmps( |
| 274 | "enable-mergeicmps" , cl::init(Val: true), cl::Hidden, |
| 275 | cl::desc("Enable MergeICmps pass in the optimization pipeline" )); |
| 276 | |
| 277 | static cl::opt<bool> EnableConstraintElimination( |
| 278 | "enable-constraint-elimination" , cl::init(Val: true), cl::Hidden, |
| 279 | cl::desc( |
| 280 | "Enable pass to eliminate conditions based on linear constraints" )); |
| 281 | |
| 282 | static cl::opt<AttributorRunOption> AttributorRun( |
| 283 | "attributor-enable" , cl::Hidden, cl::init(Val: AttributorRunOption::NONE), |
| 284 | cl::desc("Enable the attributor inter-procedural deduction pass" ), |
| 285 | cl::values(clEnumValN(AttributorRunOption::FULL, "full" , |
| 286 | "enable all full attributor runs" ), |
| 287 | clEnumValN(AttributorRunOption::LIGHT, "light" , |
| 288 | "enable all attributor-light runs" ), |
| 289 | clEnumValN(AttributorRunOption::MODULE, "module" , |
| 290 | "enable module-wide attributor runs" ), |
| 291 | clEnumValN(AttributorRunOption::MODULE_LIGHT, "module-light" , |
| 292 | "enable module-wide attributor-light runs" ), |
| 293 | clEnumValN(AttributorRunOption::CGSCC, "cgscc" , |
| 294 | "enable call graph SCC attributor runs" ), |
| 295 | clEnumValN(AttributorRunOption::CGSCC_LIGHT, "cgscc-light" , |
| 296 | "enable call graph SCC attributor-light runs" ), |
| 297 | clEnumValN(AttributorRunOption::NONE, "none" , |
| 298 | "disable attributor runs" ))); |
| 299 | |
| 300 | static cl::opt<bool> EnableSampledInstr( |
| 301 | "enable-sampled-instrumentation" , cl::init(Val: false), cl::Hidden, |
| 302 | cl::desc("Enable profile instrumentation sampling (default = off)" )); |
| 303 | static cl::opt<bool> UseLoopVersioningLICM( |
| 304 | "enable-loop-versioning-licm" , cl::init(Val: false), cl::Hidden, |
| 305 | cl::desc("Enable the experimental Loop Versioning LICM pass" )); |
| 306 | |
| 307 | static cl::opt<std::string> InstrumentColdFuncOnlyPath( |
| 308 | "instrument-cold-function-only-path" , cl::init(Val: "" ), |
| 309 | cl::desc("File path for cold function only instrumentation(requires use " |
| 310 | "with --pgo-instrument-cold-function-only)" ), |
| 311 | cl::Hidden); |
| 312 | |
| 313 | // TODO: There is a similar flag in WPD pass, we should consolidate them by |
| 314 | // parsing the option only once in PassBuilder and share it across both places. |
| 315 | static cl::opt<bool> EnableDevirtualizeSpeculatively( |
| 316 | "enable-devirtualize-speculatively" , |
| 317 | cl::desc("Enable speculative devirtualization optimization" ), |
| 318 | cl::init(Val: false)); |
| 319 | |
| 320 | extern cl::opt<std::string> UseCtxProfile; |
| 321 | extern cl::opt<bool> PGOInstrumentColdFunctionOnly; |
| 322 | |
| 323 | extern cl::opt<bool> EnableMemProfContextDisambiguation; |
| 324 | } // namespace llvm |
| 325 | |
| 326 | PipelineTuningOptions::PipelineTuningOptions() { |
| 327 | LoopInterleaving = true; |
| 328 | LoopVectorization = true; |
| 329 | SLPVectorization = false; |
| 330 | LoopUnrolling = true; |
| 331 | LoopInterchange = EnableLoopInterchange; |
| 332 | LoopFusion = false; |
| 333 | ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; |
| 334 | LicmMssaOptCap = SetLicmMssaOptCap; |
| 335 | LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; |
| 336 | CallGraphProfile = true; |
| 337 | UnifiedLTO = false; |
| 338 | MergeFunctions = EnableMergeFunctions; |
| 339 | InlinerThreshold = -1; |
| 340 | EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; |
| 341 | DevirtualizeSpeculatively = EnableDevirtualizeSpeculatively; |
| 342 | } |
| 343 | |
| 344 | namespace llvm { |
| 345 | extern cl::opt<unsigned> MaxDevirtIterations; |
| 346 | } // namespace llvm |
| 347 | |
| 348 | void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM, |
| 349 | OptimizationLevel Level) { |
| 350 | for (auto &C : PeepholeEPCallbacks) |
| 351 | C(FPM, Level); |
| 352 | } |
| 353 | void PassBuilder::invokeLateLoopOptimizationsEPCallbacks( |
| 354 | LoopPassManager &LPM, OptimizationLevel Level) { |
| 355 | for (auto &C : LateLoopOptimizationsEPCallbacks) |
| 356 | C(LPM, Level); |
| 357 | } |
| 358 | void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, |
| 359 | OptimizationLevel Level) { |
| 360 | for (auto &C : LoopOptimizerEndEPCallbacks) |
| 361 | C(LPM, Level); |
| 362 | } |
| 363 | void PassBuilder::invokeScalarOptimizerLateEPCallbacks( |
| 364 | FunctionPassManager &FPM, OptimizationLevel Level) { |
| 365 | for (auto &C : ScalarOptimizerLateEPCallbacks) |
| 366 | C(FPM, Level); |
| 367 | } |
| 368 | void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, |
| 369 | OptimizationLevel Level) { |
| 370 | for (auto &C : CGSCCOptimizerLateEPCallbacks) |
| 371 | C(CGPM, Level); |
| 372 | } |
| 373 | void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, |
| 374 | OptimizationLevel Level) { |
| 375 | for (auto &C : VectorizerStartEPCallbacks) |
| 376 | C(FPM, Level); |
| 377 | } |
| 378 | void PassBuilder::invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, |
| 379 | OptimizationLevel Level) { |
| 380 | for (auto &C : VectorizerEndEPCallbacks) |
| 381 | C(FPM, Level); |
| 382 | } |
| 383 | void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, |
| 384 | OptimizationLevel Level, |
| 385 | ThinOrFullLTOPhase Phase) { |
| 386 | for (auto &C : OptimizerEarlyEPCallbacks) |
| 387 | C(MPM, Level, Phase); |
| 388 | } |
| 389 | void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, |
| 390 | OptimizationLevel Level, |
| 391 | ThinOrFullLTOPhase Phase) { |
| 392 | for (auto &C : OptimizerLastEPCallbacks) |
| 393 | C(MPM, Level, Phase); |
| 394 | } |
| 395 | void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks( |
| 396 | ModulePassManager &MPM, OptimizationLevel Level) { |
| 397 | for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks) |
| 398 | C(MPM, Level); |
| 399 | } |
| 400 | void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks( |
| 401 | ModulePassManager &MPM, OptimizationLevel Level) { |
| 402 | for (auto &C : FullLinkTimeOptimizationLastEPCallbacks) |
| 403 | C(MPM, Level); |
| 404 | } |
| 405 | void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM, |
| 406 | OptimizationLevel Level) { |
| 407 | for (auto &C : PipelineStartEPCallbacks) |
| 408 | C(MPM, Level); |
| 409 | } |
| 410 | void PassBuilder::invokePipelineEarlySimplificationEPCallbacks( |
| 411 | ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase) { |
| 412 | for (auto &C : PipelineEarlySimplificationEPCallbacks) |
| 413 | C(MPM, Level, Phase); |
| 414 | } |
| 415 | |
| 416 | // Helper to add AnnotationRemarksPass. |
| 417 | static void (ModulePassManager &MPM) { |
| 418 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AnnotationRemarksPass())); |
| 419 | // Count the stats for InstCount and FunctionPropertiesAnalysis |
| 420 | if (AreStatisticsEnabled()) { |
| 421 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: InstCountPass())); |
| 422 | MPM.addPass( |
| 423 | Pass: createModuleToFunctionPassAdaptor(Pass: FunctionPropertiesStatisticsPass())); |
| 424 | } |
| 425 | } |
| 426 | |
| 427 | // Helper to check if the current compilation phase is preparing for LTO |
| 428 | static bool isLTOPreLink(ThinOrFullLTOPhase Phase) { |
| 429 | return Phase == ThinOrFullLTOPhase::ThinLTOPreLink || |
| 430 | Phase == ThinOrFullLTOPhase::FullLTOPreLink; |
| 431 | } |
| 432 | |
| 433 | // Helper to check if the current compilation phase is LTO backend |
| 434 | static bool isLTOPostLink(ThinOrFullLTOPhase Phase) { |
| 435 | return Phase == ThinOrFullLTOPhase::ThinLTOPostLink || |
| 436 | Phase == ThinOrFullLTOPhase::FullLTOPostLink; |
| 437 | } |
| 438 | |
| 439 | // Helper to wrap conditionally Coro passes. |
| 440 | static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase) { |
| 441 | // TODO: Skip passes according to Phase. |
| 442 | ModulePassManager CoroPM; |
| 443 | CoroPM.addPass(Pass: CoroEarlyPass()); |
| 444 | CGSCCPassManager CGPM; |
| 445 | CGPM.addPass(Pass: CoroSplitPass()); |
| 446 | CoroPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM))); |
| 447 | CoroPM.addPass(Pass: CoroCleanupPass()); |
| 448 | CoroPM.addPass(Pass: GlobalDCEPass()); |
| 449 | return CoroConditionalWrapper(std::move(CoroPM)); |
| 450 | } |
| 451 | |
| 452 | // TODO: Investigate the cost/benefit of tail call elimination on debugging. |
| 453 | FunctionPassManager |
| 454 | PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, |
| 455 | ThinOrFullLTOPhase Phase) { |
| 456 | |
| 457 | FunctionPassManager FPM; |
| 458 | |
| 459 | if (AreStatisticsEnabled()) |
| 460 | FPM.addPass(Pass: CountVisitsPass()); |
| 461 | |
| 462 | // Form SSA out of local memory accesses after breaking apart aggregates into |
| 463 | // scalars. |
| 464 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
| 465 | |
| 466 | // Catch trivial redundancies |
| 467 | FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */)); |
| 468 | |
| 469 | // Hoisting of scalars and load expressions. |
| 470 | FPM.addPass( |
| 471 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 472 | FPM.addPass(Pass: InstCombinePass()); |
| 473 | |
| 474 | FPM.addPass(Pass: LibCallsShrinkWrapPass()); |
| 475 | |
| 476 | invokePeepholeEPCallbacks(FPM, Level); |
| 477 | |
| 478 | FPM.addPass( |
| 479 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 480 | |
| 481 | // Form canonically associated expression trees, and simplify the trees using |
| 482 | // basic mathematical properties. For example, this will form (nearly) |
| 483 | // minimal multiplication trees. |
| 484 | FPM.addPass(Pass: ReassociatePass()); |
| 485 | |
| 486 | // Add the primary loop simplification pipeline. |
| 487 | // FIXME: Currently this is split into two loop pass pipelines because we run |
| 488 | // some function passes in between them. These can and should be removed |
| 489 | // and/or replaced by scheduling the loop pass equivalents in the correct |
| 490 | // positions. But those equivalent passes aren't powerful enough yet. |
| 491 | // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still |
| 492 | // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to |
| 493 | // fully replace `SimplifyCFGPass`, and the closest to the other we have is |
| 494 | // `LoopInstSimplify`. |
| 495 | LoopPassManager LPM1, LPM2; |
| 496 | |
| 497 | // Simplify the loop body. We do this initially to clean up after other loop |
| 498 | // passes run, either when iterating on a loop or on inner loops with |
| 499 | // implications on the outer loop. |
| 500 | LPM1.addPass(Pass: LoopInstSimplifyPass()); |
| 501 | LPM1.addPass(Pass: LoopSimplifyCFGPass()); |
| 502 | |
| 503 | // Try to remove as much code from the loop header as possible, |
| 504 | // to reduce amount of IR that will have to be duplicated. However, |
| 505 | // do not perform speculative hoisting the first time as LICM |
| 506 | // will destroy metadata that may not need to be destroyed if run |
| 507 | // after loop rotation. |
| 508 | // TODO: Investigate promotion cap for O1. |
| 509 | LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 510 | /*AllowSpeculation=*/false)); |
| 511 | |
| 512 | LPM1.addPass( |
| 513 | Pass: LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase))); |
| 514 | // TODO: Investigate promotion cap for O1. |
| 515 | LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 516 | /*AllowSpeculation=*/true)); |
| 517 | LPM1.addPass(Pass: SimpleLoopUnswitchPass()); |
| 518 | if (EnableLoopFlatten) |
| 519 | LPM1.addPass(Pass: LoopFlattenPass()); |
| 520 | |
| 521 | LPM2.addPass(Pass: LoopIdiomRecognizePass()); |
| 522 | LPM2.addPass(Pass: IndVarSimplifyPass()); |
| 523 | |
| 524 | invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level); |
| 525 | |
| 526 | LPM2.addPass(Pass: LoopDeletionPass()); |
| 527 | |
| 528 | // Do not enable unrolling in PreLinkThinLTO phase during sample PGO |
| 529 | // because it changes IR to makes profile annotation in back compile |
| 530 | // inaccurate. The normal unroller doesn't pay attention to forced full unroll |
| 531 | // attributes so we need to make sure and allow the full unroll pass to pay |
| 532 | // attention to it. |
| 533 | if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || |
| 534 | PGOOpt->Action != PGOOptions::SampleUse) |
| 535 | LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(), |
| 536 | /* OnlyWhenForced= */ !PTO.LoopUnrolling, |
| 537 | PTO.ForgetAllSCEVInLoopUnroll)); |
| 538 | |
| 539 | invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level); |
| 540 | |
| 541 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1), |
| 542 | /*UseMemorySSA=*/true)); |
| 543 | FPM.addPass( |
| 544 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 545 | FPM.addPass(Pass: InstCombinePass()); |
| 546 | // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. |
| 547 | // *All* loop passes must preserve it, in order to be able to use it. |
| 548 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2), |
| 549 | /*UseMemorySSA=*/false)); |
| 550 | |
| 551 | // Delete small array after loop unroll. |
| 552 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
| 553 | |
| 554 | // Specially optimize memory movement as it doesn't look like dataflow in SSA. |
| 555 | FPM.addPass(Pass: MemCpyOptPass()); |
| 556 | |
| 557 | // Sparse conditional constant propagation. |
| 558 | // FIXME: It isn't clear why we do this *after* loop passes rather than |
| 559 | // before... |
| 560 | FPM.addPass(Pass: SCCPPass()); |
| 561 | |
| 562 | // Delete dead bit computations (instcombine runs after to fold away the dead |
| 563 | // computations, and then ADCE will run later to exploit any new DCE |
| 564 | // opportunities that creates). |
| 565 | FPM.addPass(Pass: BDCEPass()); |
| 566 | |
| 567 | // Run instcombine after redundancy and dead bit elimination to exploit |
| 568 | // opportunities opened up by them. |
| 569 | FPM.addPass(Pass: InstCombinePass()); |
| 570 | invokePeepholeEPCallbacks(FPM, Level); |
| 571 | |
| 572 | FPM.addPass(Pass: CoroElidePass()); |
| 573 | |
| 574 | invokeScalarOptimizerLateEPCallbacks(FPM, Level); |
| 575 | |
| 576 | // Finally, do an expensive DCE pass to catch all the dead code exposed by |
| 577 | // the simplifications and basic cleanup after all the simplifications. |
| 578 | // TODO: Investigate if this is too expensive. |
| 579 | FPM.addPass(Pass: ADCEPass()); |
| 580 | FPM.addPass( |
| 581 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 582 | FPM.addPass(Pass: InstCombinePass()); |
| 583 | invokePeepholeEPCallbacks(FPM, Level); |
| 584 | |
| 585 | return FPM; |
| 586 | } |
| 587 | |
| 588 | FunctionPassManager |
| 589 | PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, |
| 590 | ThinOrFullLTOPhase Phase) { |
| 591 | assert(Level != OptimizationLevel::O0 && "Must request optimizations!" ); |
| 592 | |
| 593 | // The O1 pipeline has a separate pipeline creation function to simplify |
| 594 | // construction readability. |
| 595 | if (Level.getSpeedupLevel() == 1) |
| 596 | return buildO1FunctionSimplificationPipeline(Level, Phase); |
| 597 | |
| 598 | FunctionPassManager FPM; |
| 599 | |
| 600 | if (AreStatisticsEnabled()) |
| 601 | FPM.addPass(Pass: CountVisitsPass()); |
| 602 | |
| 603 | // Form SSA out of local memory accesses after breaking apart aggregates into |
| 604 | // scalars. |
| 605 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
| 606 | |
| 607 | // Catch trivial redundancies |
| 608 | FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */)); |
| 609 | if (EnableKnowledgeRetention) |
| 610 | FPM.addPass(Pass: AssumeSimplifyPass()); |
| 611 | |
| 612 | // Hoisting of scalars and load expressions. |
| 613 | if (EnableGVNHoist) |
| 614 | FPM.addPass(Pass: GVNHoistPass()); |
| 615 | |
| 616 | // Global value numbering based sinking. |
| 617 | if (EnableGVNSink) { |
| 618 | FPM.addPass(Pass: GVNSinkPass()); |
| 619 | FPM.addPass( |
| 620 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 621 | } |
| 622 | |
| 623 | // Speculative execution if the target has divergent branches; otherwise nop. |
| 624 | FPM.addPass(Pass: SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true)); |
| 625 | |
| 626 | // Optimize based on known information about branches, and cleanup afterward. |
| 627 | FPM.addPass(Pass: JumpThreadingPass()); |
| 628 | FPM.addPass(Pass: CorrelatedValuePropagationPass()); |
| 629 | |
| 630 | // Jump table to switch conversion. |
| 631 | if (EnableJumpTableToSwitch) |
| 632 | FPM.addPass(Pass: JumpTableToSwitchPass( |
| 633 | /*InLTO=*/Phase == ThinOrFullLTOPhase::ThinLTOPostLink || |
| 634 | Phase == ThinOrFullLTOPhase::FullLTOPostLink)); |
| 635 | |
| 636 | FPM.addPass( |
| 637 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 638 | FPM.addPass(Pass: InstCombinePass()); |
| 639 | FPM.addPass(Pass: AggressiveInstCombinePass()); |
| 640 | FPM.addPass(Pass: LibCallsShrinkWrapPass()); |
| 641 | |
| 642 | invokePeepholeEPCallbacks(FPM, Level); |
| 643 | |
| 644 | // For PGO use pipeline, try to optimize memory intrinsics such as memcpy |
| 645 | // using the size value profile. Don't perform this when optimizing for size. |
| 646 | if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) |
| 647 | FPM.addPass(Pass: PGOMemOPSizeOpt()); |
| 648 | |
| 649 | FPM.addPass(Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/ |
| 650 | isInstrumentedPGOUse())); |
| 651 | FPM.addPass( |
| 652 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 653 | |
| 654 | // Form canonically associated expression trees, and simplify the trees using |
| 655 | // basic mathematical properties. For example, this will form (nearly) |
| 656 | // minimal multiplication trees. |
| 657 | FPM.addPass(Pass: ReassociatePass()); |
| 658 | |
| 659 | if (EnableConstraintElimination) |
| 660 | FPM.addPass(Pass: ConstraintEliminationPass()); |
| 661 | |
| 662 | // Add the primary loop simplification pipeline. |
| 663 | // FIXME: Currently this is split into two loop pass pipelines because we run |
| 664 | // some function passes in between them. These can and should be removed |
| 665 | // and/or replaced by scheduling the loop pass equivalents in the correct |
| 666 | // positions. But those equivalent passes aren't powerful enough yet. |
| 667 | // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still |
| 668 | // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to |
| 669 | // fully replace `SimplifyCFGPass`, and the closest to the other we have is |
| 670 | // `LoopInstSimplify`. |
| 671 | LoopPassManager LPM1, LPM2; |
| 672 | |
| 673 | // Simplify the loop body. We do this initially to clean up after other loop |
| 674 | // passes run, either when iterating on a loop or on inner loops with |
| 675 | // implications on the outer loop. |
| 676 | LPM1.addPass(Pass: LoopInstSimplifyPass()); |
| 677 | LPM1.addPass(Pass: LoopSimplifyCFGPass()); |
| 678 | |
| 679 | // Try to remove as much code from the loop header as possible, |
| 680 | // to reduce amount of IR that will have to be duplicated. However, |
| 681 | // do not perform speculative hoisting the first time as LICM |
| 682 | // will destroy metadata that may not need to be destroyed if run |
| 683 | // after loop rotation. |
| 684 | // TODO: Investigate promotion cap for O1. |
| 685 | LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 686 | /*AllowSpeculation=*/false)); |
| 687 | |
| 688 | LPM1.addPass( |
| 689 | Pass: LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase))); |
| 690 | // TODO: Investigate promotion cap for O1. |
| 691 | LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 692 | /*AllowSpeculation=*/true)); |
| 693 | LPM1.addPass( |
| 694 | Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3)); |
| 695 | if (EnableLoopFlatten) |
| 696 | LPM1.addPass(Pass: LoopFlattenPass()); |
| 697 | |
| 698 | LPM2.addPass(Pass: LoopIdiomRecognizePass()); |
| 699 | LPM2.addPass(Pass: IndVarSimplifyPass()); |
| 700 | |
| 701 | { |
| 702 | ExtraLoopPassManager<ShouldRunExtraSimpleLoopUnswitch> ; |
| 703 | ExtraPasses.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level == |
| 704 | OptimizationLevel::O3)); |
| 705 | LPM2.addPass(Pass: std::move(ExtraPasses)); |
| 706 | } |
| 707 | |
| 708 | invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level); |
| 709 | |
| 710 | LPM2.addPass(Pass: LoopDeletionPass()); |
| 711 | |
| 712 | // Do not enable unrolling in PreLinkThinLTO phase during sample PGO |
| 713 | // because it changes IR to makes profile annotation in back compile |
| 714 | // inaccurate. The normal unroller doesn't pay attention to forced full unroll |
| 715 | // attributes so we need to make sure and allow the full unroll pass to pay |
| 716 | // attention to it. |
| 717 | if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || |
| 718 | PGOOpt->Action != PGOOptions::SampleUse) |
| 719 | LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(), |
| 720 | /* OnlyWhenForced= */ !PTO.LoopUnrolling, |
| 721 | PTO.ForgetAllSCEVInLoopUnroll)); |
| 722 | |
| 723 | invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level); |
| 724 | |
| 725 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1), |
| 726 | /*UseMemorySSA=*/true)); |
| 727 | FPM.addPass( |
| 728 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 729 | FPM.addPass(Pass: InstCombinePass()); |
| 730 | // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass, |
| 731 | // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. |
| 732 | // *All* loop passes must preserve it, in order to be able to use it. |
| 733 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2), |
| 734 | /*UseMemorySSA=*/false)); |
| 735 | |
| 736 | // Delete small array after loop unroll. |
| 737 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
| 738 | |
| 739 | // Try vectorization/scalarization transforms that are both improvements |
| 740 | // themselves and can allow further folds with GVN and InstCombine. |
| 741 | FPM.addPass(Pass: VectorCombinePass(/*TryEarlyFoldsOnly=*/true)); |
| 742 | |
| 743 | // Eliminate redundancies. |
| 744 | FPM.addPass(Pass: MergedLoadStoreMotionPass()); |
| 745 | if (RunNewGVN) |
| 746 | FPM.addPass(Pass: NewGVNPass()); |
| 747 | else |
| 748 | FPM.addPass(Pass: GVNPass()); |
| 749 | |
| 750 | // Sparse conditional constant propagation. |
| 751 | // FIXME: It isn't clear why we do this *after* loop passes rather than |
| 752 | // before... |
| 753 | FPM.addPass(Pass: SCCPPass()); |
| 754 | |
| 755 | // Delete dead bit computations (instcombine runs after to fold away the dead |
| 756 | // computations, and then ADCE will run later to exploit any new DCE |
| 757 | // opportunities that creates). |
| 758 | FPM.addPass(Pass: BDCEPass()); |
| 759 | |
| 760 | // Run instcombine after redundancy and dead bit elimination to exploit |
| 761 | // opportunities opened up by them. |
| 762 | FPM.addPass(Pass: InstCombinePass()); |
| 763 | invokePeepholeEPCallbacks(FPM, Level); |
| 764 | |
| 765 | // Re-consider control flow based optimizations after redundancy elimination, |
| 766 | // redo DCE, etc. |
| 767 | if (EnableDFAJumpThreading) |
| 768 | FPM.addPass(Pass: DFAJumpThreadingPass()); |
| 769 | |
| 770 | FPM.addPass(Pass: JumpThreadingPass()); |
| 771 | FPM.addPass(Pass: CorrelatedValuePropagationPass()); |
| 772 | |
| 773 | // Finally, do an expensive DCE pass to catch all the dead code exposed by |
| 774 | // the simplifications and basic cleanup after all the simplifications. |
| 775 | // TODO: Investigate if this is too expensive. |
| 776 | FPM.addPass(Pass: ADCEPass()); |
| 777 | |
| 778 | // Specially optimize memory movement as it doesn't look like dataflow in SSA. |
| 779 | FPM.addPass(Pass: MemCpyOptPass()); |
| 780 | |
| 781 | FPM.addPass(Pass: DSEPass()); |
| 782 | FPM.addPass(Pass: MoveAutoInitPass()); |
| 783 | |
| 784 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
| 785 | Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 786 | /*AllowSpeculation=*/true), |
| 787 | /*UseMemorySSA=*/true)); |
| 788 | |
| 789 | FPM.addPass(Pass: CoroElidePass()); |
| 790 | |
| 791 | invokeScalarOptimizerLateEPCallbacks(FPM, Level); |
| 792 | |
| 793 | FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions() |
| 794 | .convertSwitchRangeToICmp(B: true) |
| 795 | .convertSwitchToArithmetic(B: true) |
| 796 | .hoistCommonInsts(B: true) |
| 797 | .sinkCommonInsts(B: true))); |
| 798 | FPM.addPass(Pass: InstCombinePass()); |
| 799 | invokePeepholeEPCallbacks(FPM, Level); |
| 800 | |
| 801 | return FPM; |
| 802 | } |
| 803 | |
| 804 | void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) { |
| 805 | MPM.addPass(Pass: CanonicalizeAliasesPass()); |
| 806 | MPM.addPass(Pass: NameAnonGlobalPass()); |
| 807 | } |
| 808 | |
| 809 | void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM, |
| 810 | OptimizationLevel Level, |
| 811 | ThinOrFullLTOPhase LTOPhase) { |
| 812 | assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!" ); |
| 813 | if (DisablePreInliner) |
| 814 | return; |
| 815 | InlineParams IP; |
| 816 | |
| 817 | IP.DefaultThreshold = PreInlineThreshold; |
| 818 | |
| 819 | // FIXME: The hint threshold has the same value used by the regular inliner |
| 820 | // when not optimzing for size. This should probably be lowered after |
| 821 | // performance testing. |
| 822 | // FIXME: this comment is cargo culted from the old pass manager, revisit). |
| 823 | IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325; |
| 824 | ModuleInlinerWrapperPass MIWP( |
| 825 | IP, /* MandatoryFirst */ true, |
| 826 | InlineContext{.LTOPhase: LTOPhase, .Pass: InlinePass::EarlyInliner}); |
| 827 | CGSCCPassManager &CGPipeline = MIWP.getPM(); |
| 828 | |
| 829 | FunctionPassManager FPM; |
| 830 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
| 831 | FPM.addPass(Pass: EarlyCSEPass()); // Catch trivial redundancies. |
| 832 | FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp( |
| 833 | B: true))); // Merge & remove basic blocks. |
| 834 | FPM.addPass(Pass: InstCombinePass()); // Combine silly sequences. |
| 835 | invokePeepholeEPCallbacks(FPM, Level); |
| 836 | |
| 837 | CGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor( |
| 838 | Pass: std::move(FPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 839 | |
| 840 | MPM.addPass(Pass: std::move(MIWP)); |
| 841 | |
| 842 | // Delete anything that is now dead to make sure that we don't instrument |
| 843 | // dead code. Instrumentation can end up keeping dead code around and |
| 844 | // dramatically increase code size. |
| 845 | MPM.addPass(Pass: GlobalDCEPass()); |
| 846 | } |
| 847 | |
| 848 | void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM, |
| 849 | OptimizationLevel Level) { |
| 850 | if (EnablePostPGOLoopRotation) { |
| 851 | // Disable header duplication in loop rotation at -Oz. |
| 852 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
| 853 | Pass: createFunctionToLoopPassAdaptor(Pass: LoopRotatePass(), |
| 854 | /*UseMemorySSA=*/false), |
| 855 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 856 | } |
| 857 | } |
| 858 | |
| 859 | void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, |
| 860 | OptimizationLevel Level, bool RunProfileGen, |
| 861 | bool IsCS, bool AtomicCounterUpdate, |
| 862 | std::string ProfileFile, |
| 863 | std::string ProfileRemappingFile) { |
| 864 | assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!" ); |
| 865 | |
| 866 | if (!RunProfileGen) { |
| 867 | assert(!ProfileFile.empty() && "Profile use expecting a profile file!" ); |
| 868 | MPM.addPass( |
| 869 | Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS)); |
| 870 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
| 871 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
| 872 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
| 873 | return; |
| 874 | } |
| 875 | |
| 876 | // Perform PGO instrumentation. |
| 877 | MPM.addPass(Pass: PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO |
| 878 | : PGOInstrumentationType::FDO)); |
| 879 | |
| 880 | addPostPGOLoopRotation(MPM, Level); |
| 881 | // Add the profile lowering pass. |
| 882 | InstrProfOptions Options; |
| 883 | if (!ProfileFile.empty()) |
| 884 | Options.InstrProfileOutput = ProfileFile; |
| 885 | // Do counter promotion at Level greater than O0. |
| 886 | Options.DoCounterPromotion = true; |
| 887 | Options.UseBFIInPromotion = IsCS; |
| 888 | if (EnableSampledInstr) { |
| 889 | Options.Sampling = true; |
| 890 | // With sampling, there is little beneifit to enable counter promotion. |
| 891 | // But note that sampling does work with counter promotion. |
| 892 | Options.DoCounterPromotion = false; |
| 893 | } |
| 894 | Options.Atomic = AtomicCounterUpdate; |
| 895 | MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS)); |
| 896 | } |
| 897 | |
| 898 | void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM, |
| 899 | bool RunProfileGen, bool IsCS, |
| 900 | bool AtomicCounterUpdate, |
| 901 | std::string ProfileFile, |
| 902 | std::string ProfileRemappingFile) { |
| 903 | if (!RunProfileGen) { |
| 904 | assert(!ProfileFile.empty() && "Profile use expecting a profile file!" ); |
| 905 | MPM.addPass( |
| 906 | Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS)); |
| 907 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
| 908 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
| 909 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
| 910 | return; |
| 911 | } |
| 912 | |
| 913 | // Perform PGO instrumentation. |
| 914 | MPM.addPass(Pass: PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO |
| 915 | : PGOInstrumentationType::FDO)); |
| 916 | // Add the profile lowering pass. |
| 917 | InstrProfOptions Options; |
| 918 | if (!ProfileFile.empty()) |
| 919 | Options.InstrProfileOutput = ProfileFile; |
| 920 | // Do not do counter promotion at O0. |
| 921 | Options.DoCounterPromotion = false; |
| 922 | Options.UseBFIInPromotion = IsCS; |
| 923 | Options.Atomic = AtomicCounterUpdate; |
| 924 | MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS)); |
| 925 | } |
| 926 | |
| 927 | static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) { |
| 928 | return getInlineParams(OptLevel: Level.getSpeedupLevel(), SizeOptLevel: Level.getSizeLevel()); |
| 929 | } |
| 930 | |
| 931 | ModuleInlinerWrapperPass |
| 932 | PassBuilder::buildInlinerPipeline(OptimizationLevel Level, |
| 933 | ThinOrFullLTOPhase Phase) { |
| 934 | InlineParams IP; |
| 935 | if (PTO.InlinerThreshold == -1) |
| 936 | IP = getInlineParamsFromOptLevel(Level); |
| 937 | else |
| 938 | IP = getInlineParams(Threshold: PTO.InlinerThreshold); |
| 939 | // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO, |
| 940 | // set hot-caller threshold to 0 to disable hot |
| 941 | // callsite inline (as much as possible [1]) because it makes |
| 942 | // profile annotation in the backend inaccurate. |
| 943 | // |
| 944 | // [1] Note the cost of a function could be below zero due to erased |
| 945 | // prologue / epilogue. |
| 946 | if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) |
| 947 | IP.HotCallSiteThreshold = 0; |
| 948 | |
| 949 | if (PGOOpt) |
| 950 | IP.EnableDeferral = EnablePGOInlineDeferral; |
| 951 | |
| 952 | ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst, |
| 953 | InlineContext{.LTOPhase: Phase, .Pass: InlinePass::CGSCCInliner}, |
| 954 | UseInlineAdvisor, MaxDevirtIterations); |
| 955 | |
| 956 | // Require the GlobalsAA analysis for the module so we can query it within |
| 957 | // the CGSCC pipeline. |
| 958 | if (EnableGlobalAnalyses) { |
| 959 | MIWP.addModulePass(Pass: RequireAnalysisPass<GlobalsAA, Module>()); |
| 960 | // Invalidate AAManager so it can be recreated and pick up the newly |
| 961 | // available GlobalsAA. |
| 962 | MIWP.addModulePass( |
| 963 | Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>())); |
| 964 | } |
| 965 | |
| 966 | // Require the ProfileSummaryAnalysis for the module so we can query it within |
| 967 | // the inliner pass. |
| 968 | MIWP.addModulePass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
| 969 | |
| 970 | // Now begin the main postorder CGSCC pipeline. |
| 971 | // FIXME: The current CGSCC pipeline has its origins in the legacy pass |
| 972 | // manager and trying to emulate its precise behavior. Much of this doesn't |
| 973 | // make a lot of sense and we should revisit the core CGSCC structure. |
| 974 | CGSCCPassManager &MainCGPipeline = MIWP.getPM(); |
| 975 | |
| 976 | // Note: historically, the PruneEH pass was run first to deduce nounwind and |
| 977 | // generally clean up exception handling overhead. It isn't clear this is |
| 978 | // valuable as the inliner doesn't currently care whether it is inlining an |
| 979 | // invoke or a call. |
| 980 | |
| 981 | if (AttributorRun & AttributorRunOption::CGSCC) |
| 982 | MainCGPipeline.addPass(Pass: AttributorCGSCCPass()); |
| 983 | else if (AttributorRun & AttributorRunOption::CGSCC_LIGHT) |
| 984 | MainCGPipeline.addPass(Pass: AttributorLightCGSCCPass()); |
| 985 | |
| 986 | // Deduce function attributes. We do another run of this after the function |
| 987 | // simplification pipeline, so this only needs to run when it could affect the |
| 988 | // function simplification pipeline, which is only the case with recursive |
| 989 | // functions. |
| 990 | MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true)); |
| 991 | |
| 992 | // When at O3 add argument promotion to the pass pipeline. |
| 993 | // FIXME: It isn't at all clear why this should be limited to O3. |
| 994 | if (Level == OptimizationLevel::O3) |
| 995 | MainCGPipeline.addPass(Pass: ArgumentPromotionPass()); |
| 996 | |
| 997 | // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if |
| 998 | // there are no OpenMP runtime calls present in the module. |
| 999 | if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3) |
| 1000 | MainCGPipeline.addPass(Pass: OpenMPOptCGSCCPass(Phase)); |
| 1001 | |
| 1002 | invokeCGSCCOptimizerLateEPCallbacks(CGPM&: MainCGPipeline, Level); |
| 1003 | |
| 1004 | // Add the core function simplification pipeline nested inside the |
| 1005 | // CGSCC walk. |
| 1006 | MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor( |
| 1007 | Pass: buildFunctionSimplificationPipeline(Level, Phase), |
| 1008 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true)); |
| 1009 | |
| 1010 | // Finally, deduce any function attributes based on the fully simplified |
| 1011 | // function. |
| 1012 | MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass()); |
| 1013 | |
| 1014 | // Mark that the function is fully simplified and that it shouldn't be |
| 1015 | // simplified again if we somehow revisit it due to CGSCC mutations unless |
| 1016 | // it's been modified since. |
| 1017 | MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor( |
| 1018 | Pass: RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>())); |
| 1019 | |
| 1020 | if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) { |
| 1021 | MainCGPipeline.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0)); |
| 1022 | MainCGPipeline.addPass(Pass: CoroAnnotationElidePass()); |
| 1023 | } |
| 1024 | |
| 1025 | // Make sure we don't affect potential future NoRerun CGSCC adaptors. |
| 1026 | MIWP.addLateModulePass(Pass: createModuleToFunctionPassAdaptor( |
| 1027 | Pass: InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>())); |
| 1028 | |
| 1029 | return MIWP; |
| 1030 | } |
| 1031 | |
| 1032 | ModulePassManager |
| 1033 | PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level, |
| 1034 | ThinOrFullLTOPhase Phase) { |
| 1035 | ModulePassManager MPM; |
| 1036 | |
| 1037 | InlineParams IP = getInlineParamsFromOptLevel(Level); |
| 1038 | // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO, |
| 1039 | // set hot-caller threshold to 0 to disable hot |
| 1040 | // callsite inline (as much as possible [1]) because it makes |
| 1041 | // profile annotation in the backend inaccurate. |
| 1042 | // |
| 1043 | // [1] Note the cost of a function could be below zero due to erased |
| 1044 | // prologue / epilogue. |
| 1045 | if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) |
| 1046 | IP.HotCallSiteThreshold = 0; |
| 1047 | |
| 1048 | if (PGOOpt) |
| 1049 | IP.EnableDeferral = EnablePGOInlineDeferral; |
| 1050 | |
| 1051 | // The inline deferral logic is used to avoid losing some |
| 1052 | // inlining chance in future. It is helpful in SCC inliner, in which |
| 1053 | // inlining is processed in bottom-up order. |
| 1054 | // While in module inliner, the inlining order is a priority-based order |
| 1055 | // by default. The inline deferral is unnecessary there. So we disable the |
| 1056 | // inline deferral logic in module inliner. |
| 1057 | IP.EnableDeferral = false; |
| 1058 | |
| 1059 | MPM.addPass(Pass: ModuleInlinerPass(IP, UseInlineAdvisor, Phase)); |
| 1060 | if (!UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPostLink) { |
| 1061 | MPM.addPass(Pass: GlobalOptPass()); |
| 1062 | MPM.addPass(Pass: GlobalDCEPass()); |
| 1063 | MPM.addPass(Pass: PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false)); |
| 1064 | } |
| 1065 | |
| 1066 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
| 1067 | Pass: buildFunctionSimplificationPipeline(Level, Phase), |
| 1068 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 1069 | |
| 1070 | if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) { |
| 1071 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor( |
| 1072 | Pass: CoroSplitPass(Level != OptimizationLevel::O0))); |
| 1073 | MPM.addPass( |
| 1074 | Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: CoroAnnotationElidePass())); |
| 1075 | } |
| 1076 | |
| 1077 | return MPM; |
| 1078 | } |
| 1079 | |
| 1080 | ModulePassManager |
| 1081 | PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, |
| 1082 | ThinOrFullLTOPhase Phase) { |
| 1083 | assert(Level != OptimizationLevel::O0 && |
| 1084 | "Should not be used for O0 pipeline" ); |
| 1085 | |
| 1086 | assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink && |
| 1087 | "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!" ); |
| 1088 | |
| 1089 | ModulePassManager MPM; |
| 1090 | |
| 1091 | // Place pseudo probe instrumentation as the first pass of the pipeline to |
| 1092 | // minimize the impact of optimization changes. |
| 1093 | if (PGOOpt && PGOOpt->PseudoProbeForProfiling && |
| 1094 | Phase != ThinOrFullLTOPhase::ThinLTOPostLink) |
| 1095 | MPM.addPass(Pass: SampleProfileProbePass(TM)); |
| 1096 | |
| 1097 | bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse); |
| 1098 | |
| 1099 | // In ThinLTO mode, when flattened profile is used, all the available |
| 1100 | // profile information will be annotated in PreLink phase so there is |
| 1101 | // no need to load the profile again in PostLink. |
| 1102 | bool LoadSampleProfile = |
| 1103 | HasSampleProfile && |
| 1104 | !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink); |
| 1105 | |
| 1106 | // During the ThinLTO backend phase we perform early indirect call promotion |
| 1107 | // here, before globalopt. Otherwise imported available_externally functions |
| 1108 | // look unreferenced and are removed. If we are going to load the sample |
| 1109 | // profile then defer until later. |
| 1110 | // TODO: See if we can move later and consolidate with the location where |
| 1111 | // we perform ICP when we are loading a sample profile. |
| 1112 | // TODO: We pass HasSampleProfile (whether there was a sample profile file |
| 1113 | // passed to the compile) to the SamplePGO flag of ICP. This is used to |
| 1114 | // determine whether the new direct calls are annotated with prof metadata. |
| 1115 | // Ideally this should be determined from whether the IR is annotated with |
| 1116 | // sample profile, and not whether the a sample profile was provided on the |
| 1117 | // command line. E.g. for flattened profiles where we will not be reloading |
| 1118 | // the sample profile in the ThinLTO backend, we ideally shouldn't have to |
| 1119 | // provide the sample profile file. |
| 1120 | if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile) |
| 1121 | MPM.addPass(Pass: PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile)); |
| 1122 | |
| 1123 | // Create an early function pass manager to cleanup the output of the |
| 1124 | // frontend. Not necessary with LTO post link pipelines since the pre link |
| 1125 | // pipeline already cleaned up the frontend output. |
| 1126 | if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) { |
| 1127 | // Do basic inference of function attributes from known properties of system |
| 1128 | // libraries and other oracles. |
| 1129 | MPM.addPass(Pass: InferFunctionAttrsPass()); |
| 1130 | MPM.addPass(Pass: CoroEarlyPass()); |
| 1131 | |
| 1132 | FunctionPassManager EarlyFPM; |
| 1133 | EarlyFPM.addPass(Pass: EntryExitInstrumenterPass(/*PostInlining=*/false)); |
| 1134 | // Lower llvm.expect to metadata before attempting transforms. |
| 1135 | // Compare/branch metadata may alter the behavior of passes like |
| 1136 | // SimplifyCFG. |
| 1137 | EarlyFPM.addPass(Pass: LowerExpectIntrinsicPass()); |
| 1138 | EarlyFPM.addPass(Pass: SimplifyCFGPass()); |
| 1139 | EarlyFPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
| 1140 | EarlyFPM.addPass(Pass: EarlyCSEPass()); |
| 1141 | if (Level == OptimizationLevel::O3) |
| 1142 | EarlyFPM.addPass(Pass: CallSiteSplittingPass()); |
| 1143 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
| 1144 | Pass: std::move(EarlyFPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 1145 | } |
| 1146 | |
| 1147 | if (LoadSampleProfile) { |
| 1148 | // Annotate sample profile right after early FPM to ensure freshness of |
| 1149 | // the debug info. |
| 1150 | MPM.addPass(Pass: SampleProfileLoaderPass( |
| 1151 | PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, FS)); |
| 1152 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
| 1153 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
| 1154 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
| 1155 | // Do not invoke ICP in the LTOPrelink phase as it makes it hard |
| 1156 | // for the profile annotation to be accurate in the LTO backend. |
| 1157 | if (!isLTOPreLink(Phase)) |
| 1158 | // We perform early indirect call promotion here, before globalopt. |
| 1159 | // This is important for the ThinLTO backend phase because otherwise |
| 1160 | // imported available_externally functions look unreferenced and are |
| 1161 | // removed. |
| 1162 | MPM.addPass( |
| 1163 | Pass: PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */)); |
| 1164 | } |
| 1165 | |
| 1166 | // Try to perform OpenMP specific optimizations on the module. This is a |
| 1167 | // (quick!) no-op if there are no OpenMP runtime calls present in the module. |
| 1168 | MPM.addPass(Pass: OpenMPOptPass(Phase)); |
| 1169 | |
| 1170 | if (AttributorRun & AttributorRunOption::MODULE) |
| 1171 | MPM.addPass(Pass: AttributorPass()); |
| 1172 | else if (AttributorRun & AttributorRunOption::MODULE_LIGHT) |
| 1173 | MPM.addPass(Pass: AttributorLightPass()); |
| 1174 | |
| 1175 | // Lower type metadata and the type.test intrinsic in the ThinLTO |
| 1176 | // post link pipeline after ICP. This is to enable usage of the type |
| 1177 | // tests in ICP sequences. |
| 1178 | if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink) |
| 1179 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, |
| 1180 | lowertypetests::DropTestKind::Assume)); |
| 1181 | |
| 1182 | invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase); |
| 1183 | |
| 1184 | // Interprocedural constant propagation now that basic cleanup has occurred |
| 1185 | // and prior to optimizing globals. |
| 1186 | // FIXME: This position in the pipeline hasn't been carefully considered in |
| 1187 | // years, it should be re-analyzed. |
| 1188 | MPM.addPass( |
| 1189 | Pass: IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/!isLTOPreLink(Phase)))); |
| 1190 | |
| 1191 | // Attach metadata to indirect call sites indicating the set of functions |
| 1192 | // they may target at run-time. This should follow IPSCCP. |
| 1193 | MPM.addPass(Pass: CalledValuePropagationPass()); |
| 1194 | |
| 1195 | // Optimize globals to try and fold them into constants. |
| 1196 | MPM.addPass(Pass: GlobalOptPass()); |
| 1197 | |
| 1198 | // Create a small function pass pipeline to cleanup after all the global |
| 1199 | // optimizations. |
| 1200 | FunctionPassManager GlobalCleanupPM; |
| 1201 | // FIXME: Should this instead by a run of SROA? |
| 1202 | GlobalCleanupPM.addPass(Pass: PromotePass()); |
| 1203 | GlobalCleanupPM.addPass(Pass: InstCombinePass()); |
| 1204 | invokePeepholeEPCallbacks(FPM&: GlobalCleanupPM, Level); |
| 1205 | GlobalCleanupPM.addPass( |
| 1206 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 1207 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(GlobalCleanupPM), |
| 1208 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 1209 | |
| 1210 | // We already asserted this happens in non-FullLTOPostLink earlier. |
| 1211 | const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink; |
| 1212 | // Enable contextual profiling instrumentation. |
| 1213 | const bool IsCtxProfGen = |
| 1214 | IsPreLink && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled(); |
| 1215 | const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink; |
| 1216 | const bool IsPGOInstrGen = |
| 1217 | IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr; |
| 1218 | const bool IsPGOInstrUse = |
| 1219 | IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse; |
| 1220 | const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty(); |
| 1221 | // We don't want to mix pgo ctx gen and pgo gen; we also don't currently |
| 1222 | // enable ctx profiling from the frontend. |
| 1223 | assert(!(IsPGOInstrGen && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled()) && |
| 1224 | "Enabling both instrumented PGO and contextual instrumentation is not " |
| 1225 | "supported." ); |
| 1226 | const bool IsCtxProfUse = |
| 1227 | !UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPreLink; |
| 1228 | |
| 1229 | assert( |
| 1230 | (InstrumentColdFuncOnlyPath.empty() || PGOInstrumentColdFunctionOnly) && |
| 1231 | "--instrument-cold-function-only-path is provided but " |
| 1232 | "--pgo-instrument-cold-function-only is not enabled" ); |
| 1233 | const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly && |
| 1234 | IsPGOPreLink && |
| 1235 | !InstrumentColdFuncOnlyPath.empty(); |
| 1236 | |
| 1237 | if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen || |
| 1238 | IsCtxProfUse || IsColdFuncOnlyInstrGen) |
| 1239 | addPreInlinerPasses(MPM, Level, LTOPhase: Phase); |
| 1240 | |
| 1241 | // Add all the requested passes for instrumentation PGO, if requested. |
| 1242 | if (IsPGOInstrGen || IsPGOInstrUse) { |
| 1243 | addPGOInstrPasses(MPM, Level, |
| 1244 | /*RunProfileGen=*/IsPGOInstrGen, |
| 1245 | /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
| 1246 | ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile); |
| 1247 | } else if (IsCtxProfGen || IsCtxProfUse) { |
| 1248 | MPM.addPass(Pass: PGOInstrumentationGen(PGOInstrumentationType::CTXPROF)); |
| 1249 | // In pre-link, we just want the instrumented IR. We use the contextual |
| 1250 | // profile in the post-thinlink phase. |
| 1251 | // The instrumentation will be removed in post-thinlink after IPO. |
| 1252 | // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this |
| 1253 | // mechanism for GUIDs. |
| 1254 | MPM.addPass(Pass: AssignGUIDPass()); |
| 1255 | if (IsCtxProfUse) { |
| 1256 | MPM.addPass(Pass: PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true)); |
| 1257 | return MPM; |
| 1258 | } |
| 1259 | // Block further inlining in the instrumented ctxprof case. This avoids |
| 1260 | // confusingly collecting profiles for the same GUID corresponding to |
| 1261 | // different variants of the function. We could do like PGO and identify |
| 1262 | // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for |
| 1263 | // thinlto to happen before performing any further optimizations, it's |
| 1264 | // unnecessary to collect profiles for non-prevailing copies. |
| 1265 | MPM.addPass(Pass: NoinlineNonPrevailing()); |
| 1266 | addPostPGOLoopRotation(MPM, Level); |
| 1267 | MPM.addPass(Pass: PGOCtxProfLoweringPass()); |
| 1268 | } else if (IsColdFuncOnlyInstrGen) { |
| 1269 | addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ false, |
| 1270 | /* AtomicCounterUpdate */ false, |
| 1271 | ProfileFile: InstrumentColdFuncOnlyPath, |
| 1272 | /* ProfileRemappingFile */ "" ); |
| 1273 | } |
| 1274 | |
| 1275 | if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen) |
| 1276 | MPM.addPass(Pass: PGOIndirectCallPromotion(false, false)); |
| 1277 | |
| 1278 | if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr) |
| 1279 | MPM.addPass(Pass: PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile, |
| 1280 | EnableSampledInstr)); |
| 1281 | |
| 1282 | if (IsMemprofUse) |
| 1283 | MPM.addPass(Pass: MemProfUsePass(PGOOpt->MemoryProfile, FS)); |
| 1284 | |
| 1285 | if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse || |
| 1286 | PGOOpt->Action == PGOOptions::SampleUse)) |
| 1287 | MPM.addPass(Pass: PGOForceFunctionAttrsPass(PGOOpt->ColdOptType)); |
| 1288 | |
| 1289 | MPM.addPass(Pass: AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true)); |
| 1290 | |
| 1291 | if (EnableModuleInliner) |
| 1292 | MPM.addPass(Pass: buildModuleInlinerPipeline(Level, Phase)); |
| 1293 | else |
| 1294 | MPM.addPass(Pass: buildInlinerPipeline(Level, Phase)); |
| 1295 | |
| 1296 | // Remove any dead arguments exposed by cleanups, constant folding globals, |
| 1297 | // and argument promotion. |
| 1298 | MPM.addPass(Pass: DeadArgumentEliminationPass()); |
| 1299 | |
| 1300 | if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink) |
| 1301 | MPM.addPass(Pass: SimplifyTypeTestsPass()); |
| 1302 | |
| 1303 | if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) |
| 1304 | MPM.addPass(Pass: CoroCleanupPass()); |
| 1305 | |
| 1306 | // Optimize globals now that functions are fully simplified. |
| 1307 | MPM.addPass(Pass: GlobalOptPass()); |
| 1308 | MPM.addPass(Pass: GlobalDCEPass()); |
| 1309 | |
| 1310 | return MPM; |
| 1311 | } |
| 1312 | |
| 1313 | /// TODO: Should LTO cause any differences to this set of passes? |
| 1314 | void PassBuilder::addVectorPasses(OptimizationLevel Level, |
| 1315 | FunctionPassManager &FPM, |
| 1316 | ThinOrFullLTOPhase LTOPhase) { |
| 1317 | const bool IsFullLTO = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink; |
| 1318 | |
| 1319 | FPM.addPass(Pass: LoopVectorizePass( |
| 1320 | LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); |
| 1321 | |
| 1322 | // Drop dereferenceable assumes after vectorization, as they are no longer |
| 1323 | // needed and can inhibit further optimization. |
| 1324 | if (!isLTOPreLink(Phase: LTOPhase)) |
| 1325 | FPM.addPass(Pass: DropUnnecessaryAssumesPass(/*DropDereferenceable=*/true)); |
| 1326 | |
| 1327 | FPM.addPass(Pass: InferAlignmentPass()); |
| 1328 | if (IsFullLTO) { |
| 1329 | // The vectorizer may have significantly shortened a loop body; unroll |
| 1330 | // again. Unroll small loops to hide loop backedge latency and saturate any |
| 1331 | // parallel execution resources of an out-of-order processor. We also then |
| 1332 | // need to clean up redundancies and loop invariant code. |
| 1333 | // FIXME: It would be really good to use a loop-integrated instruction |
| 1334 | // combiner for cleanup here so that the unrolling and LICM can be pipelined |
| 1335 | // across the loop nests. |
| 1336 | // We do UnrollAndJam in a separate LPM to ensure it happens before unroll |
| 1337 | if (EnableUnrollAndJam && PTO.LoopUnrolling) |
| 1338 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
| 1339 | Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel()))); |
| 1340 | FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions( |
| 1341 | Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, |
| 1342 | PTO.ForgetAllSCEVInLoopUnroll))); |
| 1343 | FPM.addPass(Pass: WarnMissedTransformationsPass()); |
| 1344 | // Now that we are done with loop unrolling, be it either by LoopVectorizer, |
| 1345 | // or LoopUnroll passes, some variable-offset GEP's into alloca's could have |
| 1346 | // become constant-offset, thus enabling SROA and alloca promotion. Do so. |
| 1347 | // NOTE: we are very late in the pipeline, and we don't have any LICM |
| 1348 | // or SimplifyCFG passes scheduled after us, that would cleanup |
| 1349 | // the CFG mess this may created if allowed to modify CFG, so forbid that. |
| 1350 | FPM.addPass(Pass: SROAPass(SROAOptions::PreserveCFG)); |
| 1351 | } |
| 1352 | |
| 1353 | if (!IsFullLTO) { |
| 1354 | // Eliminate loads by forwarding stores from the previous iteration to loads |
| 1355 | // of the current iteration. |
| 1356 | FPM.addPass(Pass: LoopLoadEliminationPass()); |
| 1357 | } |
| 1358 | // Cleanup after the loop optimization passes. |
| 1359 | FPM.addPass(Pass: InstCombinePass()); |
| 1360 | |
| 1361 | if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { |
| 1362 | ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ; |
| 1363 | // At higher optimization levels, try to clean up any runtime overlap and |
| 1364 | // alignment checks inserted by the vectorizer. We want to track correlated |
| 1365 | // runtime checks for two inner loops in the same outer loop, fold any |
| 1366 | // common computations, hoist loop-invariant aspects out of any outer loop, |
| 1367 | // and unswitch the runtime checks if possible. Once hoisted, we may have |
| 1368 | // dead (or speculatable) control flows or more combining opportunities. |
| 1369 | ExtraPasses.addPass(Pass: EarlyCSEPass()); |
| 1370 | ExtraPasses.addPass(Pass: CorrelatedValuePropagationPass()); |
| 1371 | ExtraPasses.addPass(Pass: InstCombinePass()); |
| 1372 | LoopPassManager LPM; |
| 1373 | LPM.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 1374 | /*AllowSpeculation=*/true)); |
| 1375 | LPM.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level == |
| 1376 | OptimizationLevel::O3)); |
| 1377 | ExtraPasses.addPass( |
| 1378 | Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /*UseMemorySSA=*/true)); |
| 1379 | ExtraPasses.addPass( |
| 1380 | Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true))); |
| 1381 | ExtraPasses.addPass(Pass: InstCombinePass()); |
| 1382 | FPM.addPass(Pass: std::move(ExtraPasses)); |
| 1383 | } |
| 1384 | |
| 1385 | // Now that we've formed fast to execute loop structures, we do further |
| 1386 | // optimizations. These are run afterward as they might block doing complex |
| 1387 | // analyses and transforms such as what are needed for loop vectorization. |
| 1388 | |
| 1389 | // Cleanup after loop vectorization, etc. Simplification passes like CVP and |
| 1390 | // GVN, loop transforms, and others have already run, so it's now better to |
| 1391 | // convert to more optimized IR using more aggressive simplify CFG options. |
| 1392 | // The extra sinking transform can create larger basic blocks, so do this |
| 1393 | // before SLP vectorization. |
| 1394 | FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions() |
| 1395 | .forwardSwitchCondToPhi(B: true) |
| 1396 | .convertSwitchRangeToICmp(B: true) |
| 1397 | .convertSwitchToArithmetic(B: true) |
| 1398 | .convertSwitchToLookupTable(B: true) |
| 1399 | .needCanonicalLoops(B: false) |
| 1400 | .hoistCommonInsts(B: true) |
| 1401 | .sinkCommonInsts(B: true))); |
| 1402 | |
| 1403 | if (IsFullLTO) { |
| 1404 | FPM.addPass(Pass: SCCPPass()); |
| 1405 | FPM.addPass(Pass: InstCombinePass()); |
| 1406 | FPM.addPass(Pass: BDCEPass()); |
| 1407 | } |
| 1408 | |
| 1409 | // Optimize parallel scalar instruction chains into SIMD instructions. |
| 1410 | if (PTO.SLPVectorization) { |
| 1411 | FPM.addPass(Pass: SLPVectorizerPass()); |
| 1412 | if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { |
| 1413 | FPM.addPass(Pass: EarlyCSEPass()); |
| 1414 | } |
| 1415 | } |
| 1416 | // Enhance/cleanup vector code. |
| 1417 | FPM.addPass(Pass: VectorCombinePass()); |
| 1418 | |
| 1419 | if (!IsFullLTO) { |
| 1420 | FPM.addPass(Pass: InstCombinePass()); |
| 1421 | // Unroll small loops to hide loop backedge latency and saturate any |
| 1422 | // parallel execution resources of an out-of-order processor. We also then |
| 1423 | // need to clean up redundancies and loop invariant code. |
| 1424 | // FIXME: It would be really good to use a loop-integrated instruction |
| 1425 | // combiner for cleanup here so that the unrolling and LICM can be pipelined |
| 1426 | // across the loop nests. |
| 1427 | // We do UnrollAndJam in a separate LPM to ensure it happens before unroll |
| 1428 | if (EnableUnrollAndJam && PTO.LoopUnrolling) { |
| 1429 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
| 1430 | Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel()))); |
| 1431 | } |
| 1432 | FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions( |
| 1433 | Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, |
| 1434 | PTO.ForgetAllSCEVInLoopUnroll))); |
| 1435 | FPM.addPass(Pass: WarnMissedTransformationsPass()); |
| 1436 | // Now that we are done with loop unrolling, be it either by LoopVectorizer, |
| 1437 | // or LoopUnroll passes, some variable-offset GEP's into alloca's could have |
| 1438 | // become constant-offset, thus enabling SROA and alloca promotion. Do so. |
| 1439 | // NOTE: we are very late in the pipeline, and we don't have any LICM |
| 1440 | // or SimplifyCFG passes scheduled after us, that would cleanup |
| 1441 | // the CFG mess this may created if allowed to modify CFG, so forbid that. |
| 1442 | FPM.addPass(Pass: SROAPass(SROAOptions::PreserveCFG)); |
| 1443 | } |
| 1444 | |
| 1445 | FPM.addPass(Pass: InferAlignmentPass()); |
| 1446 | FPM.addPass(Pass: InstCombinePass()); |
| 1447 | |
| 1448 | // This is needed for two reasons: |
| 1449 | // 1. It works around problems that instcombine introduces, such as sinking |
| 1450 | // expensive FP divides into loops containing multiplications using the |
| 1451 | // divide result. |
| 1452 | // 2. It helps to clean up some loop-invariant code created by the loop |
| 1453 | // unroll pass when IsFullLTO=false. |
| 1454 | FPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
| 1455 | Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 1456 | /*AllowSpeculation=*/true), |
| 1457 | /*UseMemorySSA=*/true)); |
| 1458 | |
| 1459 | // Now that we've vectorized and unrolled loops, we may have more refined |
| 1460 | // alignment information, try to re-derive it here. |
| 1461 | FPM.addPass(Pass: AlignmentFromAssumptionsPass()); |
| 1462 | } |
| 1463 | |
| 1464 | ModulePassManager |
| 1465 | PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, |
| 1466 | ThinOrFullLTOPhase LTOPhase) { |
| 1467 | const bool LTOPreLink = isLTOPreLink(Phase: LTOPhase); |
| 1468 | ModulePassManager MPM; |
| 1469 | |
| 1470 | // Run partial inlining pass to partially inline functions that have |
| 1471 | // large bodies. |
| 1472 | if (RunPartialInlining) |
| 1473 | MPM.addPass(Pass: PartialInlinerPass()); |
| 1474 | |
| 1475 | // Remove avail extern fns and globals definitions since we aren't compiling |
| 1476 | // an object file for later LTO. For LTO we want to preserve these so they |
| 1477 | // are eligible for inlining at link-time. Note if they are unreferenced they |
| 1478 | // will be removed by GlobalDCE later, so this only impacts referenced |
| 1479 | // available externally globals. Eventually they will be suppressed during |
| 1480 | // codegen, but eliminating here enables more opportunity for GlobalDCE as it |
| 1481 | // may make globals referenced by available external functions dead and saves |
| 1482 | // running remaining passes on the eliminated functions. These should be |
| 1483 | // preserved during prelinking for link-time inlining decisions. |
| 1484 | if (!LTOPreLink) |
| 1485 | MPM.addPass(Pass: EliminateAvailableExternallyPass()); |
| 1486 | |
| 1487 | // Do RPO function attribute inference across the module to forward-propagate |
| 1488 | // attributes where applicable. |
| 1489 | // FIXME: Is this really an optimization rather than a canonicalization? |
| 1490 | MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass()); |
| 1491 | |
| 1492 | // Do a post inline PGO instrumentation and use pass. This is a context |
| 1493 | // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as |
| 1494 | // cross-module inline has not been done yet. The context sensitive |
| 1495 | // instrumentation is after all the inlines are done. |
| 1496 | if (!LTOPreLink && PGOOpt) { |
| 1497 | if (PGOOpt->CSAction == PGOOptions::CSIRInstr) |
| 1498 | addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true, |
| 1499 | /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
| 1500 | ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile); |
| 1501 | else if (PGOOpt->CSAction == PGOOptions::CSIRUse) |
| 1502 | addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false, |
| 1503 | /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
| 1504 | ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile); |
| 1505 | } |
| 1506 | |
| 1507 | // Re-compute GlobalsAA here prior to function passes. This is particularly |
| 1508 | // useful as the above will have inlined, DCE'ed, and function-attr |
| 1509 | // propagated everything. We should at this point have a reasonably minimal |
| 1510 | // and richly annotated call graph. By computing aliasing and mod/ref |
| 1511 | // information for all local globals here, the late loop passes and notably |
| 1512 | // the vectorizer will be able to use them to help recognize vectorizable |
| 1513 | // memory operations. |
| 1514 | if (EnableGlobalAnalyses) |
| 1515 | MPM.addPass(Pass: RecomputeGlobalsAAPass()); |
| 1516 | |
| 1517 | invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase: LTOPhase); |
| 1518 | |
| 1519 | FunctionPassManager OptimizePM; |
| 1520 | |
| 1521 | // Only drop unnecessary assumes post-inline and post-link, as otherwise |
| 1522 | // additional uses of the affected value may be introduced through inlining |
| 1523 | // and CSE. |
| 1524 | if (!isLTOPreLink(Phase: LTOPhase)) |
| 1525 | OptimizePM.addPass(Pass: DropUnnecessaryAssumesPass()); |
| 1526 | |
| 1527 | // Scheduling LoopVersioningLICM when inlining is over, because after that |
| 1528 | // we may see more accurate aliasing. Reason to run this late is that too |
| 1529 | // early versioning may prevent further inlining due to increase of code |
| 1530 | // size. Other optimizations which runs later might get benefit of no-alias |
| 1531 | // assumption in clone loop. |
| 1532 | if (UseLoopVersioningLICM) { |
| 1533 | OptimizePM.addPass( |
| 1534 | Pass: createFunctionToLoopPassAdaptor(Pass: LoopVersioningLICMPass())); |
| 1535 | // LoopVersioningLICM pass might increase new LICM opportunities. |
| 1536 | OptimizePM.addPass(Pass: createFunctionToLoopPassAdaptor( |
| 1537 | Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 1538 | /*AllowSpeculation=*/true), |
| 1539 | /*USeMemorySSA=*/UseMemorySSA: true)); |
| 1540 | } |
| 1541 | |
| 1542 | OptimizePM.addPass(Pass: Float2IntPass()); |
| 1543 | OptimizePM.addPass(Pass: LowerConstantIntrinsicsPass()); |
| 1544 | |
| 1545 | if (EnableMatrix) { |
| 1546 | OptimizePM.addPass(Pass: LowerMatrixIntrinsicsPass()); |
| 1547 | OptimizePM.addPass(Pass: EarlyCSEPass()); |
| 1548 | } |
| 1549 | |
| 1550 | // CHR pass should only be applied with the profile information. |
| 1551 | // The check is to check the profile summary information in CHR. |
| 1552 | if (EnableCHR && Level == OptimizationLevel::O3) |
| 1553 | OptimizePM.addPass(Pass: ControlHeightReductionPass()); |
| 1554 | |
| 1555 | // FIXME: We need to run some loop optimizations to re-rotate loops after |
| 1556 | // simplifycfg and others undo their rotation. |
| 1557 | |
| 1558 | // Optimize the loop execution. These passes operate on entire loop nests |
| 1559 | // rather than on each loop in an inside-out manner, and so they are actually |
| 1560 | // function passes. |
| 1561 | |
| 1562 | invokeVectorizerStartEPCallbacks(FPM&: OptimizePM, Level); |
| 1563 | |
| 1564 | LoopPassManager LPM; |
| 1565 | // First rotate loops that may have been un-rotated by prior passes. |
| 1566 | // Disable header duplication at -Oz. |
| 1567 | LPM.addPass(Pass: LoopRotatePass(/*EnableLoopHeaderDuplication=*/true, LTOPreLink, |
| 1568 | /*CheckExitCount=*/true)); |
| 1569 | // Some loops may have become dead by now. Try to delete them. |
| 1570 | // FIXME: see discussion in https://reviews.llvm.org/D112851, |
| 1571 | // this may need to be revisited once we run GVN before loop deletion |
| 1572 | // in the simplification pipeline. |
| 1573 | LPM.addPass(Pass: LoopDeletionPass()); |
| 1574 | |
| 1575 | if (PTO.LoopInterchange) |
| 1576 | LPM.addPass(Pass: LoopInterchangePass()); |
| 1577 | |
| 1578 | OptimizePM.addPass( |
| 1579 | Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /*UseMemorySSA=*/false)); |
| 1580 | |
| 1581 | // FIXME: This may not be the right place in the pipeline. |
| 1582 | // We need to have the data to support the right place. |
| 1583 | if (PTO.LoopFusion) |
| 1584 | OptimizePM.addPass(Pass: LoopFusePass()); |
| 1585 | |
| 1586 | // Distribute loops to allow partial vectorization. I.e. isolate dependences |
| 1587 | // into separate loop that would otherwise inhibit vectorization. This is |
| 1588 | // currently only performed for loops marked with the metadata |
| 1589 | // llvm.loop.distribute=true or when -enable-loop-distribute is specified. |
| 1590 | OptimizePM.addPass(Pass: LoopDistributePass()); |
| 1591 | |
| 1592 | // Populates the VFABI attribute with the scalar-to-vector mappings |
| 1593 | // from the TargetLibraryInfo. |
| 1594 | OptimizePM.addPass(Pass: InjectTLIMappings()); |
| 1595 | |
| 1596 | addVectorPasses(Level, FPM&: OptimizePM, LTOPhase); |
| 1597 | |
| 1598 | invokeVectorizerEndEPCallbacks(FPM&: OptimizePM, Level); |
| 1599 | |
| 1600 | // LoopSink pass sinks instructions hoisted by LICM, which serves as a |
| 1601 | // canonicalization pass that enables other optimizations. As a result, |
| 1602 | // LoopSink pass needs to be a very late IR pass to avoid undoing LICM |
| 1603 | // result too early. |
| 1604 | OptimizePM.addPass(Pass: LoopSinkPass()); |
| 1605 | |
| 1606 | // And finally clean up LCSSA form before generating code. |
| 1607 | OptimizePM.addPass(Pass: InstSimplifyPass()); |
| 1608 | |
| 1609 | // This hoists/decomposes div/rem ops. It should run after other sink/hoist |
| 1610 | // passes to avoid re-sinking, but before SimplifyCFG because it can allow |
| 1611 | // flattening of blocks. |
| 1612 | OptimizePM.addPass(Pass: DivRemPairsPass()); |
| 1613 | |
| 1614 | // Merge adjacent icmps into memcmp, then expand memcmp to loads/compares. |
| 1615 | // TODO: move this furter up so that it can be optimized by GVN, etc. |
| 1616 | if (EnableMergeICmps) |
| 1617 | OptimizePM.addPass(Pass: MergeICmpsPass()); |
| 1618 | OptimizePM.addPass(Pass: ExpandMemCmpPass()); |
| 1619 | |
| 1620 | // Try to annotate calls that were created during optimization. |
| 1621 | OptimizePM.addPass( |
| 1622 | Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse())); |
| 1623 | |
| 1624 | // LoopSink (and other loop passes since the last simplifyCFG) might have |
| 1625 | // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. |
| 1626 | OptimizePM.addPass( |
| 1627 | Pass: SimplifyCFGPass(SimplifyCFGOptions() |
| 1628 | .convertSwitchRangeToICmp(B: true) |
| 1629 | .convertSwitchToArithmetic(B: true) |
| 1630 | .speculateUnpredictables(B: true) |
| 1631 | .hoistLoadsStoresWithCondFaulting(B: true))); |
| 1632 | |
| 1633 | // Add the core optimizing pipeline. |
| 1634 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(OptimizePM), |
| 1635 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 1636 | |
| 1637 | // AllocToken transforms heap allocation calls; this needs to run late after |
| 1638 | // other allocation call transformations (such as those in InstCombine). |
| 1639 | if (!LTOPreLink) |
| 1640 | MPM.addPass(Pass: AllocTokenPass()); |
| 1641 | |
| 1642 | invokeOptimizerLastEPCallbacks(MPM, Level, Phase: LTOPhase); |
| 1643 | |
| 1644 | // Split out cold code. Splitting is done late to avoid hiding context from |
| 1645 | // other optimizations and inadvertently regressing performance. The tradeoff |
| 1646 | // is that this has a higher code size cost than splitting early. |
| 1647 | if (EnableHotColdSplit && !LTOPreLink) |
| 1648 | MPM.addPass(Pass: HotColdSplittingPass()); |
| 1649 | |
| 1650 | // Search the code for similar regions of code. If enough similar regions can |
| 1651 | // be found where extracting the regions into their own function will decrease |
| 1652 | // the size of the program, we extract the regions, a deduplicate the |
| 1653 | // structurally similar regions. |
| 1654 | if (EnableIROutliner) |
| 1655 | MPM.addPass(Pass: IROutlinerPass()); |
| 1656 | |
| 1657 | // Now we need to do some global optimization transforms. |
| 1658 | // FIXME: It would seem like these should come first in the optimization |
| 1659 | // pipeline and maybe be the bottom of the canonicalization pipeline? Weird |
| 1660 | // ordering here. |
| 1661 | MPM.addPass(Pass: GlobalDCEPass()); |
| 1662 | MPM.addPass(Pass: ConstantMergePass()); |
| 1663 | |
| 1664 | // Merge functions if requested. It has a better chance to merge functions |
| 1665 | // after ConstantMerge folded jump tables. |
| 1666 | if (PTO.MergeFunctions) |
| 1667 | MPM.addPass(Pass: MergeFunctionsPass()); |
| 1668 | |
| 1669 | if (PTO.CallGraphProfile && !LTOPreLink) |
| 1670 | MPM.addPass(Pass: CGProfilePass(isLTOPostLink(Phase: LTOPhase))); |
| 1671 | |
| 1672 | // RelLookupTableConverterPass runs later in LTO post-link pipeline. |
| 1673 | if (!LTOPreLink) |
| 1674 | MPM.addPass(Pass: RelLookupTableConverterPass()); |
| 1675 | |
| 1676 | // Add devirtualization pass only when LTO is not enabled, as otherwise |
| 1677 | // the pass is already enabled in the LTO pipeline. |
| 1678 | if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) { |
| 1679 | // TODO: explore a better pipeline configuration that can improve |
| 1680 | // compilation time overhead. |
| 1681 | MPM.addPass(Pass: WholeProgramDevirtPass( |
| 1682 | /*ExportSummary*/ nullptr, |
| 1683 | /*ImportSummary*/ nullptr, |
| 1684 | /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively)); |
| 1685 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, |
| 1686 | lowertypetests::DropTestKind::Assume)); |
| 1687 | // Given that the devirtualization creates more opportunities for inlining, |
| 1688 | // we run the Inliner again here to maximize the optimization gain we |
| 1689 | // get from devirtualization. |
| 1690 | // Also, we can't run devirtualization before inlining because the |
| 1691 | // devirtualization depends on the passes optimizing/eliminating vtable GVs |
| 1692 | // and those passes are only effective after inlining. |
| 1693 | if (EnableModuleInliner) { |
| 1694 | MPM.addPass(Pass: ModuleInlinerPass(getInlineParamsFromOptLevel(Level), |
| 1695 | UseInlineAdvisor, |
| 1696 | ThinOrFullLTOPhase::None)); |
| 1697 | } else { |
| 1698 | MPM.addPass(Pass: ModuleInlinerWrapperPass( |
| 1699 | getInlineParamsFromOptLevel(Level), |
| 1700 | /* MandatoryFirst */ true, |
| 1701 | InlineContext{.LTOPhase: ThinOrFullLTOPhase::None, .Pass: InlinePass::CGSCCInliner})); |
| 1702 | } |
| 1703 | } |
| 1704 | return MPM; |
| 1705 | } |
| 1706 | |
| 1707 | ModulePassManager |
| 1708 | PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, |
| 1709 | ThinOrFullLTOPhase Phase) { |
| 1710 | if (Level == OptimizationLevel::O0) |
| 1711 | return buildO0DefaultPipeline(Level, Phase); |
| 1712 | |
| 1713 | ModulePassManager MPM; |
| 1714 | |
| 1715 | // Currently this pipeline is only invoked in an LTO pre link pass or when we |
| 1716 | // are not running LTO. If that changes the below checks may need updating. |
| 1717 | assert(isLTOPreLink(Phase) || Phase == ThinOrFullLTOPhase::None); |
| 1718 | |
| 1719 | // If we are invoking this in non-LTO mode, remove any MemProf related |
| 1720 | // attributes and metadata, as we don't know whether we are linking with |
| 1721 | // a library containing the necessary interfaces. |
| 1722 | if (Phase == ThinOrFullLTOPhase::None) |
| 1723 | MPM.addPass(Pass: MemProfRemoveInfo()); |
| 1724 | |
| 1725 | // Convert @llvm.global.annotations to !annotation metadata. |
| 1726 | MPM.addPass(Pass: Annotation2MetadataPass()); |
| 1727 | |
| 1728 | // Force any function attributes we want the rest of the pipeline to observe. |
| 1729 | MPM.addPass(Pass: ForceFunctionAttrsPass()); |
| 1730 | |
| 1731 | if (PGOOpt && PGOOpt->DebugInfoForProfiling) |
| 1732 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass())); |
| 1733 | |
| 1734 | // Apply module pipeline start EP callback. |
| 1735 | invokePipelineStartEPCallbacks(MPM, Level); |
| 1736 | |
| 1737 | // Add the core simplification pipeline. |
| 1738 | MPM.addPass(Pass: buildModuleSimplificationPipeline(Level, Phase)); |
| 1739 | |
| 1740 | // Now add the optimization pipeline. |
| 1741 | MPM.addPass(Pass: buildModuleOptimizationPipeline(Level, LTOPhase: Phase)); |
| 1742 | |
| 1743 | if (PGOOpt && PGOOpt->PseudoProbeForProfiling && |
| 1744 | PGOOpt->Action == PGOOptions::SampleUse) |
| 1745 | MPM.addPass(Pass: PseudoProbeUpdatePass()); |
| 1746 | |
| 1747 | // Emit annotation remarks. |
| 1748 | addAnnotationRemarksPass(MPM); |
| 1749 | |
| 1750 | if (isLTOPreLink(Phase)) |
| 1751 | addRequiredLTOPreLinkPasses(MPM); |
| 1752 | return MPM; |
| 1753 | } |
| 1754 | |
| 1755 | ModulePassManager |
| 1756 | PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, |
| 1757 | bool EmitSummary) { |
| 1758 | ModulePassManager MPM; |
| 1759 | if (ThinLTO) |
| 1760 | MPM.addPass(Pass: buildThinLTOPreLinkDefaultPipeline(Level)); |
| 1761 | else |
| 1762 | MPM.addPass(Pass: buildLTOPreLinkDefaultPipeline(Level)); |
| 1763 | MPM.addPass(Pass: EmbedBitcodePass(ThinLTO, EmitSummary)); |
| 1764 | |
| 1765 | // Perform any cleanups to the IR that aren't suitable for per TU compilation, |
| 1766 | // like removing CFI/WPD related instructions. Note, we reuse |
| 1767 | // LowerTypeTestsPass to clean up type tests rather than duplicate that logic |
| 1768 | // in FatLtoCleanup. |
| 1769 | MPM.addPass(Pass: FatLtoCleanup()); |
| 1770 | |
| 1771 | // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the |
| 1772 | // object code, only in the bitcode section, so drop it before we run |
| 1773 | // module optimization and generate machine code. If llvm.type.test() isn't in |
| 1774 | // the IR, this won't do anything. |
| 1775 | MPM.addPass( |
| 1776 | Pass: LowerTypeTestsPass(nullptr, nullptr, lowertypetests::DropTestKind::All)); |
| 1777 | |
| 1778 | // Use the ThinLTO post-link pipeline with sample profiling |
| 1779 | if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) |
| 1780 | MPM.addPass(Pass: buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr)); |
| 1781 | else { |
| 1782 | // ModuleSimplification does not run the coroutine passes for |
| 1783 | // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO |
| 1784 | // builds, otherwise they will miscompile. |
| 1785 | if (ThinLTO) { |
| 1786 | // TODO: replace w/ buildCoroWrapper() when it takes phase and level into |
| 1787 | // consideration. |
| 1788 | CGSCCPassManager CGPM; |
| 1789 | CGPM.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0)); |
| 1790 | CGPM.addPass(Pass: CoroAnnotationElidePass()); |
| 1791 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM))); |
| 1792 | MPM.addPass(Pass: CoroCleanupPass()); |
| 1793 | } |
| 1794 | |
| 1795 | // otherwise, just use module optimization |
| 1796 | MPM.addPass( |
| 1797 | Pass: buildModuleOptimizationPipeline(Level, LTOPhase: ThinOrFullLTOPhase::None)); |
| 1798 | // Emit annotation remarks. |
| 1799 | addAnnotationRemarksPass(MPM); |
| 1800 | } |
| 1801 | return MPM; |
| 1802 | } |
| 1803 | |
| 1804 | ModulePassManager |
| 1805 | PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { |
| 1806 | if (Level == OptimizationLevel::O0) |
| 1807 | return buildO0DefaultPipeline(Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink); |
| 1808 | |
| 1809 | ModulePassManager MPM; |
| 1810 | |
| 1811 | // Convert @llvm.global.annotations to !annotation metadata. |
| 1812 | MPM.addPass(Pass: Annotation2MetadataPass()); |
| 1813 | |
| 1814 | // Force any function attributes we want the rest of the pipeline to observe. |
| 1815 | MPM.addPass(Pass: ForceFunctionAttrsPass()); |
| 1816 | |
| 1817 | if (PGOOpt && PGOOpt->DebugInfoForProfiling) |
| 1818 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass())); |
| 1819 | |
| 1820 | // Apply module pipeline start EP callback. |
| 1821 | invokePipelineStartEPCallbacks(MPM, Level); |
| 1822 | |
| 1823 | // If we are planning to perform ThinLTO later, we don't bloat the code with |
| 1824 | // unrolling/vectorization/... now. Just simplify the module as much as we |
| 1825 | // can. |
| 1826 | MPM.addPass(Pass: buildModuleSimplificationPipeline( |
| 1827 | Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink)); |
| 1828 | // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let |
| 1829 | // thinlto use the contextual info to perform imports; then use the contextual |
| 1830 | // profile in the post-thinlink phase. |
| 1831 | if (!UseCtxProfile.empty()) { |
| 1832 | addRequiredLTOPreLinkPasses(MPM); |
| 1833 | return MPM; |
| 1834 | } |
| 1835 | |
| 1836 | // Run partial inlining pass to partially inline functions that have |
| 1837 | // large bodies. |
| 1838 | // FIXME: It isn't clear whether this is really the right place to run this |
| 1839 | // in ThinLTO. Because there is another canonicalization and simplification |
| 1840 | // phase that will run after the thin link, running this here ends up with |
| 1841 | // less information than will be available later and it may grow functions in |
| 1842 | // ways that aren't beneficial. |
| 1843 | if (RunPartialInlining) |
| 1844 | MPM.addPass(Pass: PartialInlinerPass()); |
| 1845 | |
| 1846 | if (PGOOpt && PGOOpt->PseudoProbeForProfiling && |
| 1847 | PGOOpt->Action == PGOOptions::SampleUse) |
| 1848 | MPM.addPass(Pass: PseudoProbeUpdatePass()); |
| 1849 | |
| 1850 | // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual |
| 1851 | // optimization is going to be done in PostLink stage, but clang can't add |
| 1852 | // callbacks there in case of in-process ThinLTO called by linker. |
| 1853 | invokeOptimizerEarlyEPCallbacks(MPM, Level, |
| 1854 | /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink); |
| 1855 | invokeOptimizerLastEPCallbacks(MPM, Level, |
| 1856 | /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink); |
| 1857 | |
| 1858 | // Emit annotation remarks. |
| 1859 | addAnnotationRemarksPass(MPM); |
| 1860 | |
| 1861 | addRequiredLTOPreLinkPasses(MPM); |
| 1862 | |
| 1863 | return MPM; |
| 1864 | } |
| 1865 | |
| 1866 | ModulePassManager PassBuilder::buildThinLTODefaultPipeline( |
| 1867 | OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) { |
| 1868 | ModulePassManager MPM; |
| 1869 | |
| 1870 | // If we are invoking this without a summary index noting that we are linking |
| 1871 | // with a library containing the necessary APIs, remove any MemProf related |
| 1872 | // attributes and metadata. |
| 1873 | if (!ImportSummary || !ImportSummary->withSupportsHotColdNew()) |
| 1874 | MPM.addPass(Pass: MemProfRemoveInfo()); |
| 1875 | |
| 1876 | if (ImportSummary) { |
| 1877 | // For ThinLTO we must apply the context disambiguation decisions early, to |
| 1878 | // ensure we can correctly match the callsites to summary data. |
| 1879 | if (EnableMemProfContextDisambiguation) |
| 1880 | MPM.addPass(Pass: MemProfContextDisambiguation( |
| 1881 | ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); |
| 1882 | |
| 1883 | // These passes import type identifier resolutions for whole-program |
| 1884 | // devirtualization and CFI. They must run early because other passes may |
| 1885 | // disturb the specific instruction patterns that these passes look for, |
| 1886 | // creating dependencies on resolutions that may not appear in the summary. |
| 1887 | // |
| 1888 | // For example, GVN may transform the pattern assume(type.test) appearing in |
| 1889 | // two basic blocks into assume(phi(type.test, type.test)), which would |
| 1890 | // transform a dependency on a WPD resolution into a dependency on a type |
| 1891 | // identifier resolution for CFI. |
| 1892 | // |
| 1893 | // Also, WPD has access to more precise information than ICP and can |
| 1894 | // devirtualize more effectively, so it should operate on the IR first. |
| 1895 | // |
| 1896 | // The WPD and LowerTypeTest passes need to run at -O0 to lower type |
| 1897 | // metadata and intrinsics. |
| 1898 | MPM.addPass(Pass: WholeProgramDevirtPass(nullptr, ImportSummary)); |
| 1899 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, ImportSummary)); |
| 1900 | } |
| 1901 | |
| 1902 | if (Level == OptimizationLevel::O0) { |
| 1903 | // Run a second time to clean up any type tests left behind by WPD for use |
| 1904 | // in ICP. |
| 1905 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, |
| 1906 | lowertypetests::DropTestKind::Assume)); |
| 1907 | MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::ThinLTOPostLink)); |
| 1908 | |
| 1909 | // AllocToken transforms heap allocation calls; this needs to run late after |
| 1910 | // other allocation call transformations (such as those in InstCombine). |
| 1911 | MPM.addPass(Pass: AllocTokenPass()); |
| 1912 | |
| 1913 | // Drop available_externally and unreferenced globals. This is necessary |
| 1914 | // with ThinLTO in order to avoid leaving undefined references to dead |
| 1915 | // globals in the object file. |
| 1916 | MPM.addPass(Pass: EliminateAvailableExternallyPass()); |
| 1917 | MPM.addPass(Pass: GlobalDCEPass()); |
| 1918 | return MPM; |
| 1919 | } |
| 1920 | if (!UseCtxProfile.empty()) { |
| 1921 | MPM.addPass( |
| 1922 | Pass: buildModuleInlinerPipeline(Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink)); |
| 1923 | } else { |
| 1924 | // Add the core simplification pipeline. |
| 1925 | MPM.addPass(Pass: buildModuleSimplificationPipeline( |
| 1926 | Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink)); |
| 1927 | } |
| 1928 | // Now add the optimization pipeline. |
| 1929 | MPM.addPass(Pass: buildModuleOptimizationPipeline( |
| 1930 | Level, LTOPhase: ThinOrFullLTOPhase::ThinLTOPostLink)); |
| 1931 | |
| 1932 | // Emit annotation remarks. |
| 1933 | addAnnotationRemarksPass(MPM); |
| 1934 | |
| 1935 | return MPM; |
| 1936 | } |
| 1937 | |
| 1938 | ModulePassManager |
| 1939 | PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) { |
| 1940 | // FIXME: We should use a customized pre-link pipeline! |
| 1941 | return buildPerModuleDefaultPipeline(Level, |
| 1942 | Phase: ThinOrFullLTOPhase::FullLTOPreLink); |
| 1943 | } |
| 1944 | |
| 1945 | ModulePassManager |
| 1946 | PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, |
| 1947 | ModuleSummaryIndex *ExportSummary) { |
| 1948 | ModulePassManager MPM; |
| 1949 | |
| 1950 | invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level); |
| 1951 | |
| 1952 | // If we are invoking this without a summary index noting that we are linking |
| 1953 | // with a library containing the necessary APIs, remove any MemProf related |
| 1954 | // attributes and metadata. |
| 1955 | if (!ExportSummary || !ExportSummary->withSupportsHotColdNew()) |
| 1956 | MPM.addPass(Pass: MemProfRemoveInfo()); |
| 1957 | |
| 1958 | // Create a function that performs CFI checks for cross-DSO calls with targets |
| 1959 | // in the current module. |
| 1960 | MPM.addPass(Pass: CrossDSOCFIPass()); |
| 1961 | |
| 1962 | if (Level == OptimizationLevel::O0) { |
| 1963 | // The WPD and LowerTypeTest passes need to run at -O0 to lower type |
| 1964 | // metadata and intrinsics. |
| 1965 | MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr)); |
| 1966 | MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr)); |
| 1967 | // Run a second time to clean up any type tests left behind by WPD for use |
| 1968 | // in ICP. |
| 1969 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, |
| 1970 | lowertypetests::DropTestKind::Assume)); |
| 1971 | |
| 1972 | MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::FullLTOPostLink)); |
| 1973 | |
| 1974 | // AllocToken transforms heap allocation calls; this needs to run late after |
| 1975 | // other allocation call transformations (such as those in InstCombine). |
| 1976 | MPM.addPass(Pass: AllocTokenPass()); |
| 1977 | |
| 1978 | invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); |
| 1979 | |
| 1980 | // Emit annotation remarks. |
| 1981 | addAnnotationRemarksPass(MPM); |
| 1982 | |
| 1983 | return MPM; |
| 1984 | } |
| 1985 | |
| 1986 | if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { |
| 1987 | // Load sample profile before running the LTO optimization pipeline. |
| 1988 | MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile, |
| 1989 | PGOOpt->ProfileRemappingFile, |
| 1990 | ThinOrFullLTOPhase::FullLTOPostLink)); |
| 1991 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
| 1992 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
| 1993 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
| 1994 | } |
| 1995 | |
| 1996 | // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present. |
| 1997 | MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)); |
| 1998 | |
| 1999 | // Remove unused virtual tables to improve the quality of code generated by |
| 2000 | // whole-program devirtualization and bitset lowering. |
| 2001 | MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true)); |
| 2002 | |
| 2003 | // Do basic inference of function attributes from known properties of system |
| 2004 | // libraries and other oracles. |
| 2005 | MPM.addPass(Pass: InferFunctionAttrsPass()); |
| 2006 | |
| 2007 | if (Level.getSpeedupLevel() > 1) { |
| 2008 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
| 2009 | Pass: CallSiteSplittingPass(), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 2010 | |
| 2011 | // Indirect call promotion. This should promote all the targets that are |
| 2012 | // left by the earlier promotion pass that promotes intra-module targets. |
| 2013 | // This two-step promotion is to save the compile time. For LTO, it should |
| 2014 | // produce the same result as if we only do promotion here. |
| 2015 | MPM.addPass(Pass: PGOIndirectCallPromotion( |
| 2016 | true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); |
| 2017 | |
| 2018 | // Promoting by-reference arguments to by-value exposes more constants to |
| 2019 | // IPSCCP. |
| 2020 | CGSCCPassManager CGPM; |
| 2021 | CGPM.addPass(Pass: PostOrderFunctionAttrsPass()); |
| 2022 | CGPM.addPass(Pass: ArgumentPromotionPass()); |
| 2023 | CGPM.addPass( |
| 2024 | Pass: createCGSCCToFunctionPassAdaptor(Pass: SROAPass(SROAOptions::ModifyCFG))); |
| 2025 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM))); |
| 2026 | |
| 2027 | // Propagate constants at call sites into the functions they call. This |
| 2028 | // opens opportunities for globalopt (and inlining) by substituting function |
| 2029 | // pointers passed as arguments to direct uses of functions. |
| 2030 | MPM.addPass(Pass: IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/true))); |
| 2031 | |
| 2032 | // Attach metadata to indirect call sites indicating the set of functions |
| 2033 | // they may target at run-time. This should follow IPSCCP. |
| 2034 | MPM.addPass(Pass: CalledValuePropagationPass()); |
| 2035 | } |
| 2036 | |
| 2037 | // Do RPO function attribute inference across the module to forward-propagate |
| 2038 | // attributes where applicable. |
| 2039 | // FIXME: Is this really an optimization rather than a canonicalization? |
| 2040 | MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass()); |
| 2041 | |
| 2042 | // Use in-range annotations on GEP indices to split globals where beneficial. |
| 2043 | MPM.addPass(Pass: GlobalSplitPass()); |
| 2044 | |
| 2045 | // Run whole program optimization of virtual call when the list of callees |
| 2046 | // is fixed. |
| 2047 | MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr)); |
| 2048 | |
| 2049 | MPM.addPass(Pass: NoRecurseLTOInferencePass()); |
| 2050 | // Stop here at -O1. |
| 2051 | if (Level == OptimizationLevel::O1) { |
| 2052 | // The LowerTypeTestsPass needs to run to lower type metadata and the |
| 2053 | // type.test intrinsics. The pass does nothing if CFI is disabled. |
| 2054 | MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr)); |
| 2055 | // Run a second time to clean up any type tests left behind by WPD for use |
| 2056 | // in ICP (which is performed earlier than this in the regular LTO |
| 2057 | // pipeline). |
| 2058 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, |
| 2059 | lowertypetests::DropTestKind::Assume)); |
| 2060 | |
| 2061 | MPM.addPass(Pass: buildCoroWrapper(Phase: ThinOrFullLTOPhase::FullLTOPostLink)); |
| 2062 | |
| 2063 | // AllocToken transforms heap allocation calls; this needs to run late after |
| 2064 | // other allocation call transformations (such as those in InstCombine). |
| 2065 | MPM.addPass(Pass: AllocTokenPass()); |
| 2066 | |
| 2067 | invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); |
| 2068 | |
| 2069 | // Emit annotation remarks. |
| 2070 | addAnnotationRemarksPass(MPM); |
| 2071 | |
| 2072 | return MPM; |
| 2073 | } |
| 2074 | |
| 2075 | // TODO: Skip to match buildCoroWrapper. |
| 2076 | MPM.addPass(Pass: CoroEarlyPass()); |
| 2077 | |
| 2078 | // Optimize globals to try and fold them into constants. |
| 2079 | MPM.addPass(Pass: GlobalOptPass()); |
| 2080 | |
| 2081 | // Promote any localized globals to SSA registers. |
| 2082 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: PromotePass())); |
| 2083 | |
| 2084 | // Linking modules together can lead to duplicate global constant, only |
| 2085 | // keep one copy of each constant. |
| 2086 | MPM.addPass(Pass: ConstantMergePass()); |
| 2087 | |
| 2088 | // Remove unused arguments from functions. |
| 2089 | MPM.addPass(Pass: DeadArgumentEliminationPass()); |
| 2090 | |
| 2091 | // Reduce the code after globalopt and ipsccp. Both can open up significant |
| 2092 | // simplification opportunities, and both can propagate functions through |
| 2093 | // function pointers. When this happens, we often have to resolve varargs |
| 2094 | // calls, etc, so let instcombine do this. |
| 2095 | FunctionPassManager PeepholeFPM; |
| 2096 | PeepholeFPM.addPass(Pass: InstCombinePass()); |
| 2097 | if (Level.getSpeedupLevel() > 1) |
| 2098 | PeepholeFPM.addPass(Pass: AggressiveInstCombinePass()); |
| 2099 | invokePeepholeEPCallbacks(FPM&: PeepholeFPM, Level); |
| 2100 | |
| 2101 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(PeepholeFPM), |
| 2102 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 2103 | |
| 2104 | // Lower variadic functions for supported targets prior to inlining. |
| 2105 | MPM.addPass(Pass: ExpandVariadicsPass(ExpandVariadicsMode::Optimize)); |
| 2106 | |
| 2107 | // Note: historically, the PruneEH pass was run first to deduce nounwind and |
| 2108 | // generally clean up exception handling overhead. It isn't clear this is |
| 2109 | // valuable as the inliner doesn't currently care whether it is inlining an |
| 2110 | // invoke or a call. |
| 2111 | // Run the inliner now. |
| 2112 | if (EnableModuleInliner) { |
| 2113 | MPM.addPass(Pass: ModuleInlinerPass(getInlineParamsFromOptLevel(Level), |
| 2114 | UseInlineAdvisor, |
| 2115 | ThinOrFullLTOPhase::FullLTOPostLink)); |
| 2116 | } else { |
| 2117 | MPM.addPass(Pass: ModuleInlinerWrapperPass( |
| 2118 | getInlineParamsFromOptLevel(Level), |
| 2119 | /* MandatoryFirst */ true, |
| 2120 | InlineContext{.LTOPhase: ThinOrFullLTOPhase::FullLTOPostLink, |
| 2121 | .Pass: InlinePass::CGSCCInliner})); |
| 2122 | } |
| 2123 | |
| 2124 | // Perform context disambiguation after inlining, since that would reduce the |
| 2125 | // amount of additional cloning required to distinguish the allocation |
| 2126 | // contexts. |
| 2127 | if (EnableMemProfContextDisambiguation) |
| 2128 | MPM.addPass(Pass: MemProfContextDisambiguation( |
| 2129 | /*Summary=*/nullptr, |
| 2130 | PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); |
| 2131 | |
| 2132 | // Optimize globals again after we ran the inliner. |
| 2133 | MPM.addPass(Pass: GlobalOptPass()); |
| 2134 | |
| 2135 | // Run the OpenMPOpt pass again after global optimizations. |
| 2136 | MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)); |
| 2137 | |
| 2138 | // Garbage collect dead functions. |
| 2139 | MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true)); |
| 2140 | |
| 2141 | // If we didn't decide to inline a function, check to see if we can |
| 2142 | // transform it to pass arguments by value instead of by reference. |
| 2143 | CGSCCPassManager CGPM; |
| 2144 | CGPM.addPass(Pass: ArgumentPromotionPass()); |
| 2145 | CGPM.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0)); |
| 2146 | CGPM.addPass(Pass: CoroAnnotationElidePass()); |
| 2147 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM))); |
| 2148 | |
| 2149 | FunctionPassManager FPM; |
| 2150 | // The IPO Passes may leave cruft around. Clean up after them. |
| 2151 | FPM.addPass(Pass: InstCombinePass()); |
| 2152 | invokePeepholeEPCallbacks(FPM, Level); |
| 2153 | |
| 2154 | if (EnableConstraintElimination) |
| 2155 | FPM.addPass(Pass: ConstraintEliminationPass()); |
| 2156 | |
| 2157 | FPM.addPass(Pass: JumpThreadingPass()); |
| 2158 | |
| 2159 | // Do a post inline PGO instrumentation and use pass. This is a context |
| 2160 | // sensitive PGO pass. |
| 2161 | if (PGOOpt) { |
| 2162 | if (PGOOpt->CSAction == PGOOptions::CSIRInstr) |
| 2163 | addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true, |
| 2164 | /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
| 2165 | ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile); |
| 2166 | else if (PGOOpt->CSAction == PGOOptions::CSIRUse) |
| 2167 | addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false, |
| 2168 | /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, |
| 2169 | ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile); |
| 2170 | } |
| 2171 | |
| 2172 | // Break up allocas |
| 2173 | FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG)); |
| 2174 | |
| 2175 | // LTO provides additional opportunities for tailcall elimination due to |
| 2176 | // link-time inlining, and visibility of nocapture attribute. |
| 2177 | FPM.addPass( |
| 2178 | Pass: TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse())); |
| 2179 | |
| 2180 | // Run a few AA driver optimizations here and now to cleanup the code. |
| 2181 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM), |
| 2182 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 2183 | |
| 2184 | MPM.addPass( |
| 2185 | Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: PostOrderFunctionAttrsPass())); |
| 2186 | |
| 2187 | // Require the GlobalsAA analysis for the module so we can query it within |
| 2188 | // MainFPM. |
| 2189 | if (EnableGlobalAnalyses) { |
| 2190 | MPM.addPass(Pass: RequireAnalysisPass<GlobalsAA, Module>()); |
| 2191 | // Invalidate AAManager so it can be recreated and pick up the newly |
| 2192 | // available GlobalsAA. |
| 2193 | MPM.addPass( |
| 2194 | Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>())); |
| 2195 | } |
| 2196 | |
| 2197 | FunctionPassManager MainFPM; |
| 2198 | MainFPM.addPass(Pass: createFunctionToLoopPassAdaptor( |
| 2199 | Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, |
| 2200 | /*AllowSpeculation=*/true), |
| 2201 | /*USeMemorySSA=*/UseMemorySSA: true)); |
| 2202 | |
| 2203 | if (RunNewGVN) |
| 2204 | MainFPM.addPass(Pass: NewGVNPass()); |
| 2205 | else |
| 2206 | MainFPM.addPass(Pass: GVNPass()); |
| 2207 | |
| 2208 | // Remove dead memcpy()'s. |
| 2209 | MainFPM.addPass(Pass: MemCpyOptPass()); |
| 2210 | |
| 2211 | // Nuke dead stores. |
| 2212 | MainFPM.addPass(Pass: DSEPass()); |
| 2213 | MainFPM.addPass(Pass: MoveAutoInitPass()); |
| 2214 | MainFPM.addPass(Pass: MergedLoadStoreMotionPass()); |
| 2215 | |
| 2216 | invokeVectorizerStartEPCallbacks(FPM&: MainFPM, Level); |
| 2217 | |
| 2218 | LoopPassManager LPM; |
| 2219 | if (EnableLoopFlatten && Level.getSpeedupLevel() > 1) |
| 2220 | LPM.addPass(Pass: LoopFlattenPass()); |
| 2221 | LPM.addPass(Pass: IndVarSimplifyPass()); |
| 2222 | LPM.addPass(Pass: LoopDeletionPass()); |
| 2223 | // FIXME: Add loop interchange. |
| 2224 | |
| 2225 | // Unroll small loops and perform peeling. |
| 2226 | LPM.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(), |
| 2227 | /* OnlyWhenForced= */ !PTO.LoopUnrolling, |
| 2228 | PTO.ForgetAllSCEVInLoopUnroll)); |
| 2229 | // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA. |
| 2230 | // *All* loop passes must preserve it, in order to be able to use it. |
| 2231 | MainFPM.addPass( |
| 2232 | Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /*UseMemorySSA=*/false)); |
| 2233 | |
| 2234 | MainFPM.addPass(Pass: LoopDistributePass()); |
| 2235 | |
| 2236 | addVectorPasses(Level, FPM&: MainFPM, LTOPhase: ThinOrFullLTOPhase::FullLTOPostLink); |
| 2237 | |
| 2238 | invokeVectorizerEndEPCallbacks(FPM&: MainFPM, Level); |
| 2239 | |
| 2240 | // Run the OpenMPOpt CGSCC pass again late. |
| 2241 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor( |
| 2242 | Pass: OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink))); |
| 2243 | |
| 2244 | invokePeepholeEPCallbacks(FPM&: MainFPM, Level); |
| 2245 | MainFPM.addPass(Pass: JumpThreadingPass()); |
| 2246 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(MainFPM), |
| 2247 | EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses)); |
| 2248 | |
| 2249 | // Lower type metadata and the type.test intrinsic. This pass supports |
| 2250 | // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs |
| 2251 | // to be run at link time if CFI is enabled. This pass does nothing if |
| 2252 | // CFI is disabled. |
| 2253 | MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr)); |
| 2254 | // Run a second time to clean up any type tests left behind by WPD for use |
| 2255 | // in ICP (which is performed earlier than this in the regular LTO pipeline). |
| 2256 | MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, |
| 2257 | lowertypetests::DropTestKind::Assume)); |
| 2258 | |
| 2259 | // Enable splitting late in the FullLTO post-link pipeline. |
| 2260 | if (EnableHotColdSplit) |
| 2261 | MPM.addPass(Pass: HotColdSplittingPass()); |
| 2262 | |
| 2263 | // Add late LTO optimization passes. |
| 2264 | FunctionPassManager LateFPM; |
| 2265 | |
| 2266 | // LoopSink pass sinks instructions hoisted by LICM, which serves as a |
| 2267 | // canonicalization pass that enables other optimizations. As a result, |
| 2268 | // LoopSink pass needs to be a very late IR pass to avoid undoing LICM |
| 2269 | // result too early. |
| 2270 | LateFPM.addPass(Pass: LoopSinkPass()); |
| 2271 | |
| 2272 | // This hoists/decomposes div/rem ops. It should run after other sink/hoist |
| 2273 | // passes to avoid re-sinking, but before SimplifyCFG because it can allow |
| 2274 | // flattening of blocks. |
| 2275 | LateFPM.addPass(Pass: DivRemPairsPass()); |
| 2276 | |
| 2277 | // Delete basic blocks, which optimization passes may have killed. |
| 2278 | LateFPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions() |
| 2279 | .convertSwitchRangeToICmp(B: true) |
| 2280 | .convertSwitchToArithmetic(B: true) |
| 2281 | .hoistCommonInsts(B: true) |
| 2282 | .speculateUnpredictables(B: true))); |
| 2283 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(LateFPM))); |
| 2284 | |
| 2285 | // Drop bodies of available eternally objects to improve GlobalDCE. |
| 2286 | MPM.addPass(Pass: EliminateAvailableExternallyPass()); |
| 2287 | |
| 2288 | // Now that we have optimized the program, discard unreachable functions. |
| 2289 | MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true)); |
| 2290 | |
| 2291 | if (PTO.MergeFunctions) |
| 2292 | MPM.addPass(Pass: MergeFunctionsPass()); |
| 2293 | |
| 2294 | MPM.addPass(Pass: RelLookupTableConverterPass()); |
| 2295 | |
| 2296 | if (PTO.CallGraphProfile) |
| 2297 | MPM.addPass(Pass: CGProfilePass(/*InLTOPostLink=*/true)); |
| 2298 | |
| 2299 | MPM.addPass(Pass: CoroCleanupPass()); |
| 2300 | |
| 2301 | // AllocToken transforms heap allocation calls; this needs to run late after |
| 2302 | // other allocation call transformations (such as those in InstCombine). |
| 2303 | MPM.addPass(Pass: AllocTokenPass()); |
| 2304 | |
| 2305 | invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); |
| 2306 | |
| 2307 | // Emit annotation remarks. |
| 2308 | addAnnotationRemarksPass(MPM); |
| 2309 | |
| 2310 | return MPM; |
| 2311 | } |
| 2312 | |
| 2313 | ModulePassManager |
| 2314 | PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, |
| 2315 | ThinOrFullLTOPhase Phase) { |
| 2316 | assert(Level == OptimizationLevel::O0 && |
| 2317 | "buildO0DefaultPipeline should only be used with O0" ); |
| 2318 | |
| 2319 | ModulePassManager MPM; |
| 2320 | |
| 2321 | // Perform pseudo probe instrumentation in O0 mode. This is for the |
| 2322 | // consistency between different build modes. For example, a LTO build can be |
| 2323 | // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in |
| 2324 | // the postlink will require pseudo probe instrumentation in the prelink. |
| 2325 | if (PGOOpt && PGOOpt->PseudoProbeForProfiling) |
| 2326 | MPM.addPass(Pass: SampleProfileProbePass(TM)); |
| 2327 | |
| 2328 | if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr || |
| 2329 | PGOOpt->Action == PGOOptions::IRUse)) |
| 2330 | addPGOInstrPassesForO0( |
| 2331 | MPM, |
| 2332 | /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr), |
| 2333 | /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, ProfileFile: PGOOpt->ProfileFile, |
| 2334 | ProfileRemappingFile: PGOOpt->ProfileRemappingFile); |
| 2335 | |
| 2336 | // Instrument function entry and exit before all inlining. |
| 2337 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
| 2338 | Pass: EntryExitInstrumenterPass(/*PostInlining=*/false))); |
| 2339 | |
| 2340 | invokePipelineStartEPCallbacks(MPM, Level); |
| 2341 | |
| 2342 | if (PGOOpt && PGOOpt->DebugInfoForProfiling) |
| 2343 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass())); |
| 2344 | |
| 2345 | if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { |
| 2346 | // Explicitly disable sample loader inlining and use flattened profile in O0 |
| 2347 | // pipeline. |
| 2348 | MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile, |
| 2349 | PGOOpt->ProfileRemappingFile, |
| 2350 | ThinOrFullLTOPhase::None, FS, |
| 2351 | /*DisableSampleProfileInlining=*/true, |
| 2352 | /*UseFlattenedProfile=*/true)); |
| 2353 | // Cache ProfileSummaryAnalysis once to avoid the potential need to insert |
| 2354 | // RequireAnalysisPass for PSI before subsequent non-module passes. |
| 2355 | MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); |
| 2356 | } |
| 2357 | |
| 2358 | invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase); |
| 2359 | |
| 2360 | // Build a minimal pipeline based on the semantics required by LLVM, |
| 2361 | // which is just that always inlining occurs. Further, disable generating |
| 2362 | // lifetime intrinsics to avoid enabling further optimizations during |
| 2363 | // code generation. |
| 2364 | MPM.addPass(Pass: AlwaysInlinerPass( |
| 2365 | /*InsertLifetimeIntrinsics=*/false)); |
| 2366 | |
| 2367 | if (PTO.MergeFunctions) |
| 2368 | MPM.addPass(Pass: MergeFunctionsPass()); |
| 2369 | |
| 2370 | if (EnableMatrix) |
| 2371 | MPM.addPass( |
| 2372 | Pass: createModuleToFunctionPassAdaptor(Pass: LowerMatrixIntrinsicsPass(true))); |
| 2373 | |
| 2374 | if (!CGSCCOptimizerLateEPCallbacks.empty()) { |
| 2375 | CGSCCPassManager CGPM; |
| 2376 | invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level); |
| 2377 | if (!CGPM.isEmpty()) |
| 2378 | MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM))); |
| 2379 | } |
| 2380 | if (!LateLoopOptimizationsEPCallbacks.empty()) { |
| 2381 | LoopPassManager LPM; |
| 2382 | invokeLateLoopOptimizationsEPCallbacks(LPM, Level); |
| 2383 | if (!LPM.isEmpty()) { |
| 2384 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
| 2385 | Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM)))); |
| 2386 | } |
| 2387 | } |
| 2388 | if (!LoopOptimizerEndEPCallbacks.empty()) { |
| 2389 | LoopPassManager LPM; |
| 2390 | invokeLoopOptimizerEndEPCallbacks(LPM, Level); |
| 2391 | if (!LPM.isEmpty()) { |
| 2392 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor( |
| 2393 | Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM)))); |
| 2394 | } |
| 2395 | } |
| 2396 | if (!ScalarOptimizerLateEPCallbacks.empty()) { |
| 2397 | FunctionPassManager FPM; |
| 2398 | invokeScalarOptimizerLateEPCallbacks(FPM, Level); |
| 2399 | if (!FPM.isEmpty()) |
| 2400 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM))); |
| 2401 | } |
| 2402 | |
| 2403 | invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase); |
| 2404 | |
| 2405 | if (!VectorizerStartEPCallbacks.empty()) { |
| 2406 | FunctionPassManager FPM; |
| 2407 | invokeVectorizerStartEPCallbacks(FPM, Level); |
| 2408 | if (!FPM.isEmpty()) |
| 2409 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM))); |
| 2410 | } |
| 2411 | |
| 2412 | if (!VectorizerEndEPCallbacks.empty()) { |
| 2413 | FunctionPassManager FPM; |
| 2414 | invokeVectorizerEndEPCallbacks(FPM, Level); |
| 2415 | if (!FPM.isEmpty()) |
| 2416 | MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM))); |
| 2417 | } |
| 2418 | |
| 2419 | MPM.addPass(Pass: buildCoroWrapper(Phase)); |
| 2420 | |
| 2421 | // AllocToken transforms heap allocation calls; this needs to run late after |
| 2422 | // other allocation call transformations (such as those in InstCombine). |
| 2423 | if (!isLTOPreLink(Phase)) |
| 2424 | MPM.addPass(Pass: AllocTokenPass()); |
| 2425 | |
| 2426 | invokeOptimizerLastEPCallbacks(MPM, Level, Phase); |
| 2427 | |
| 2428 | if (isLTOPreLink(Phase)) |
| 2429 | addRequiredLTOPreLinkPasses(MPM); |
| 2430 | |
| 2431 | // Emit annotation remarks. |
| 2432 | addAnnotationRemarksPass(MPM); |
| 2433 | |
| 2434 | return MPM; |
| 2435 | } |
| 2436 | |
| 2437 | AAManager PassBuilder::buildDefaultAAPipeline() { |
| 2438 | AAManager AA; |
| 2439 | |
| 2440 | // The order in which these are registered determines their priority when |
| 2441 | // being queried. |
| 2442 | |
| 2443 | // Add any target-specific alias analyses that should be run early. |
| 2444 | if (TM) |
| 2445 | TM->registerEarlyDefaultAliasAnalyses(AA); |
| 2446 | |
| 2447 | // First we register the basic alias analysis that provides the majority of |
| 2448 | // per-function local AA logic. This is a stateless, on-demand local set of |
| 2449 | // AA techniques. |
| 2450 | AA.registerFunctionAnalysis<BasicAA>(); |
| 2451 | |
| 2452 | // Next we query fast, specialized alias analyses that wrap IR-embedded |
| 2453 | // information about aliasing. |
| 2454 | AA.registerFunctionAnalysis<ScopedNoAliasAA>(); |
| 2455 | AA.registerFunctionAnalysis<TypeBasedAA>(); |
| 2456 | |
| 2457 | // Add support for querying global aliasing information when available. |
| 2458 | // Because the `AAManager` is a function analysis and `GlobalsAA` is a module |
| 2459 | // analysis, all that the `AAManager` can do is query for any *cached* |
| 2460 | // results from `GlobalsAA` through a readonly proxy. |
| 2461 | if (EnableGlobalAnalyses) |
| 2462 | AA.registerModuleAnalysis<GlobalsAA>(); |
| 2463 | |
| 2464 | // Add target-specific alias analyses. |
| 2465 | if (TM) |
| 2466 | TM->registerDefaultAliasAnalyses(AA); |
| 2467 | |
| 2468 | return AA; |
| 2469 | } |
| 2470 | |
| 2471 | bool PassBuilder::isInstrumentedPGOUse() const { |
| 2472 | return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) || |
| 2473 | !UseCtxProfile.empty(); |
| 2474 | } |
| 2475 | |