| 1 | //===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the "backend" phase of LTO, i.e. it performs |
| 10 | // optimization and code generation on a loaded module. It is generally used |
| 11 | // internally by the LTO class but can also be used independently, for example |
| 12 | // to implement a standalone ThinLTO backend. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #include "llvm/LTO/LTOBackend.h" |
| 17 | #include "llvm/Analysis/AliasAnalysis.h" |
| 18 | #include "llvm/Analysis/CGSCCPassManager.h" |
| 19 | #include "llvm/Analysis/ModuleSummaryAnalysis.h" |
| 20 | #include "llvm/Analysis/TargetLibraryInfo.h" |
| 21 | #include "llvm/Bitcode/BitcodeReader.h" |
| 22 | #include "llvm/Bitcode/BitcodeWriter.h" |
| 23 | #include "llvm/CGData/CodeGenData.h" |
| 24 | #include "llvm/IR/LLVMRemarkStreamer.h" |
| 25 | #include "llvm/IR/LegacyPassManager.h" |
| 26 | #include "llvm/IR/PassManager.h" |
| 27 | #include "llvm/IR/Verifier.h" |
| 28 | #include "llvm/LTO/LTO.h" |
| 29 | #include "llvm/MC/TargetRegistry.h" |
| 30 | #include "llvm/Object/ModuleSymbolTable.h" |
| 31 | #include "llvm/Passes/PassBuilder.h" |
| 32 | #include "llvm/Passes/PassPlugin.h" |
| 33 | #include "llvm/Passes/StandardInstrumentations.h" |
| 34 | #include "llvm/Support/Error.h" |
| 35 | #include "llvm/Support/FileSystem.h" |
| 36 | #include "llvm/Support/MemoryBuffer.h" |
| 37 | #include "llvm/Support/Path.h" |
| 38 | #include "llvm/Support/ThreadPool.h" |
| 39 | #include "llvm/Support/ToolOutputFile.h" |
| 40 | #include "llvm/Support/VirtualFileSystem.h" |
| 41 | #include "llvm/Support/raw_ostream.h" |
| 42 | #include "llvm/Target/TargetMachine.h" |
| 43 | #include "llvm/TargetParser/SubtargetFeature.h" |
| 44 | #include "llvm/Transforms/IPO/WholeProgramDevirt.h" |
| 45 | #include "llvm/Transforms/Utils/FunctionImportUtils.h" |
| 46 | #include "llvm/Transforms/Utils/SplitModule.h" |
| 47 | #include <optional> |
| 48 | |
| 49 | using namespace llvm; |
| 50 | using namespace lto; |
| 51 | |
| 52 | #define DEBUG_TYPE "lto-backend" |
| 53 | |
| 54 | enum class LTOBitcodeEmbedding { |
| 55 | DoNotEmbed = 0, |
| 56 | EmbedOptimized = 1, |
| 57 | EmbedPostMergePreOptimized = 2 |
| 58 | }; |
| 59 | |
| 60 | static cl::opt<LTOBitcodeEmbedding> EmbedBitcode( |
| 61 | "lto-embed-bitcode" , cl::init(Val: LTOBitcodeEmbedding::DoNotEmbed), |
| 62 | cl::values(clEnumValN(LTOBitcodeEmbedding::DoNotEmbed, "none" , |
| 63 | "Do not embed" ), |
| 64 | clEnumValN(LTOBitcodeEmbedding::EmbedOptimized, "optimized" , |
| 65 | "Embed after all optimization passes" ), |
| 66 | clEnumValN(LTOBitcodeEmbedding::EmbedPostMergePreOptimized, |
| 67 | "post-merge-pre-opt" , |
| 68 | "Embed post merge, but before optimizations" )), |
| 69 | cl::desc("Embed LLVM bitcode in object files produced by LTO" )); |
| 70 | |
| 71 | static cl::opt<bool> ThinLTOAssumeMerged( |
| 72 | "thinlto-assume-merged" , cl::init(Val: false), |
| 73 | cl::desc("Assume the input has already undergone ThinLTO function " |
| 74 | "importing and the other pre-optimization pipeline changes." )); |
| 75 | |
| 76 | namespace llvm { |
| 77 | extern cl::opt<bool> NoPGOWarnMismatch; |
| 78 | } |
| 79 | |
| 80 | [[noreturn]] static void reportOpenError(StringRef Path, Twine Msg) { |
| 81 | errs() << "failed to open " << Path << ": " << Msg << '\n'; |
| 82 | errs().flush(); |
| 83 | exit(status: 1); |
| 84 | } |
| 85 | |
| 86 | Error Config::addSaveTemps(std::string OutputFileName, bool UseInputModulePath, |
| 87 | const DenseSet<StringRef> &SaveTempsArgs) { |
| 88 | ShouldDiscardValueNames = false; |
| 89 | |
| 90 | std::error_code EC; |
| 91 | if (SaveTempsArgs.empty() || SaveTempsArgs.contains(V: "resolution" )) { |
| 92 | ResolutionFile = |
| 93 | std::make_unique<raw_fd_ostream>(args: OutputFileName + "resolution.txt" , args&: EC, |
| 94 | args: sys::fs::OpenFlags::OF_TextWithCRLF); |
| 95 | if (EC) { |
| 96 | ResolutionFile.reset(); |
| 97 | return errorCodeToError(EC); |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) { |
| 102 | // Keep track of the hook provided by the linker, which also needs to run. |
| 103 | ModuleHookFn LinkerHook = Hook; |
| 104 | Hook = [=](unsigned Task, const Module &M) { |
| 105 | // If the linker's hook returned false, we need to pass that result |
| 106 | // through. |
| 107 | if (LinkerHook && !LinkerHook(Task, M)) |
| 108 | return false; |
| 109 | |
| 110 | std::string PathPrefix; |
| 111 | // If this is the combined module (not a ThinLTO backend compile) or the |
| 112 | // user hasn't requested using the input module's path, emit to a file |
| 113 | // named from the provided OutputFileName with the Task ID appended. |
| 114 | if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) { |
| 115 | PathPrefix = OutputFileName; |
| 116 | if (Task != (unsigned)-1) |
| 117 | PathPrefix += utostr(X: Task) + "." ; |
| 118 | } else |
| 119 | PathPrefix = M.getModuleIdentifier() + "." ; |
| 120 | std::string Path = PathPrefix + PathSuffix + ".bc" ; |
| 121 | std::error_code EC; |
| 122 | raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); |
| 123 | // Because -save-temps is a debugging feature, we report the error |
| 124 | // directly and exit. |
| 125 | if (EC) |
| 126 | reportOpenError(Path, Msg: EC.message()); |
| 127 | WriteBitcodeToFile(M, Out&: OS, /*ShouldPreserveUseListOrder=*/false); |
| 128 | return true; |
| 129 | }; |
| 130 | }; |
| 131 | |
| 132 | auto SaveCombinedIndex = |
| 133 | [=](const ModuleSummaryIndex &Index, |
| 134 | const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { |
| 135 | std::string Path = OutputFileName + "index.bc" ; |
| 136 | std::error_code EC; |
| 137 | raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); |
| 138 | // Because -save-temps is a debugging feature, we report the error |
| 139 | // directly and exit. |
| 140 | if (EC) |
| 141 | reportOpenError(Path, Msg: EC.message()); |
| 142 | writeIndexToFile(Index, Out&: OS); |
| 143 | |
| 144 | Path = OutputFileName + "index.dot" ; |
| 145 | raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::OF_Text); |
| 146 | if (EC) |
| 147 | reportOpenError(Path, Msg: EC.message()); |
| 148 | Index.exportToDot(OS&: OSDot, GUIDPreservedSymbols); |
| 149 | return true; |
| 150 | }; |
| 151 | |
| 152 | if (SaveTempsArgs.empty()) { |
| 153 | setHook("0.preopt" , PreOptModuleHook); |
| 154 | setHook("1.promote" , PostPromoteModuleHook); |
| 155 | setHook("2.internalize" , PostInternalizeModuleHook); |
| 156 | setHook("3.import" , PostImportModuleHook); |
| 157 | setHook("4.opt" , PostOptModuleHook); |
| 158 | setHook("5.precodegen" , PreCodeGenModuleHook); |
| 159 | CombinedIndexHook = SaveCombinedIndex; |
| 160 | } else { |
| 161 | if (SaveTempsArgs.contains(V: "preopt" )) |
| 162 | setHook("0.preopt" , PreOptModuleHook); |
| 163 | if (SaveTempsArgs.contains(V: "promote" )) |
| 164 | setHook("1.promote" , PostPromoteModuleHook); |
| 165 | if (SaveTempsArgs.contains(V: "internalize" )) |
| 166 | setHook("2.internalize" , PostInternalizeModuleHook); |
| 167 | if (SaveTempsArgs.contains(V: "import" )) |
| 168 | setHook("3.import" , PostImportModuleHook); |
| 169 | if (SaveTempsArgs.contains(V: "opt" )) |
| 170 | setHook("4.opt" , PostOptModuleHook); |
| 171 | if (SaveTempsArgs.contains(V: "precodegen" )) |
| 172 | setHook("5.precodegen" , PreCodeGenModuleHook); |
| 173 | if (SaveTempsArgs.contains(V: "combinedindex" )) |
| 174 | CombinedIndexHook = SaveCombinedIndex; |
| 175 | } |
| 176 | |
| 177 | return Error::success(); |
| 178 | } |
| 179 | |
| 180 | #define HANDLE_EXTENSION(Ext) \ |
| 181 | llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); |
| 182 | #include "llvm/Support/Extension.def" |
| 183 | #undef HANDLE_EXTENSION |
| 184 | |
| 185 | static void RegisterPassPlugins(ArrayRef<std::string> PassPlugins, |
| 186 | PassBuilder &PB) { |
| 187 | #define HANDLE_EXTENSION(Ext) \ |
| 188 | get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB); |
| 189 | #include "llvm/Support/Extension.def" |
| 190 | #undef HANDLE_EXTENSION |
| 191 | |
| 192 | // Load requested pass plugins and let them register pass builder callbacks |
| 193 | for (auto &PluginFN : PassPlugins) { |
| 194 | auto PassPlugin = PassPlugin::Load(Filename: PluginFN); |
| 195 | if (!PassPlugin) |
| 196 | reportFatalUsageError(Err: PassPlugin.takeError()); |
| 197 | PassPlugin->registerPassBuilderCallbacks(PB); |
| 198 | } |
| 199 | } |
| 200 | |
| 201 | static std::unique_ptr<TargetMachine> |
| 202 | createTargetMachine(const Config &Conf, const Target *TheTarget, Module &M) { |
| 203 | const Triple &TheTriple = M.getTargetTriple(); |
| 204 | SubtargetFeatures Features; |
| 205 | Features.getDefaultSubtargetFeatures(Triple: TheTriple); |
| 206 | for (const std::string &A : Conf.MAttrs) |
| 207 | Features.AddFeature(String: A); |
| 208 | |
| 209 | std::optional<Reloc::Model> RelocModel; |
| 210 | if (Conf.RelocModel) |
| 211 | RelocModel = *Conf.RelocModel; |
| 212 | else if (M.getModuleFlag(Key: "PIC Level" )) |
| 213 | RelocModel = |
| 214 | M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_; |
| 215 | |
| 216 | std::optional<CodeModel::Model> CodeModel; |
| 217 | if (Conf.CodeModel) |
| 218 | CodeModel = *Conf.CodeModel; |
| 219 | else |
| 220 | CodeModel = M.getCodeModel(); |
| 221 | |
| 222 | TargetOptions TargetOpts = Conf.Options; |
| 223 | if (TargetOpts.MCOptions.ABIName.empty()) { |
| 224 | TargetOpts.MCOptions.ABIName = M.getTargetABIFromMD(); |
| 225 | } |
| 226 | |
| 227 | std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine( |
| 228 | TT: TheTriple, CPU: Conf.CPU, Features: Features.getString(), Options: TargetOpts, RM: RelocModel, |
| 229 | CM: CodeModel, OL: Conf.CGOptLevel)); |
| 230 | |
| 231 | assert(TM && "Failed to create target machine" ); |
| 232 | |
| 233 | if (std::optional<uint64_t> LargeDataThreshold = M.getLargeDataThreshold()) |
| 234 | TM->setLargeDataThreshold(*LargeDataThreshold); |
| 235 | |
| 236 | return TM; |
| 237 | } |
| 238 | |
| 239 | static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, |
| 240 | unsigned OptLevel, bool IsThinLTO, |
| 241 | ModuleSummaryIndex *ExportSummary, |
| 242 | const ModuleSummaryIndex *ImportSummary) { |
| 243 | auto FS = vfs::getRealFileSystem(); |
| 244 | std::optional<PGOOptions> PGOOpt; |
| 245 | if (!Conf.SampleProfile.empty()) |
| 246 | PGOOpt = PGOOptions(Conf.SampleProfile, "" , Conf.ProfileRemapping, |
| 247 | /*MemoryProfile=*/"" , FS, PGOOptions::SampleUse, |
| 248 | PGOOptions::NoCSAction, |
| 249 | PGOOptions::ColdFuncOpt::Default, true); |
| 250 | else if (Conf.RunCSIRInstr) { |
| 251 | PGOOpt = PGOOptions("" , Conf.CSIRProfile, Conf.ProfileRemapping, |
| 252 | /*MemoryProfile=*/"" , FS, PGOOptions::IRUse, |
| 253 | PGOOptions::CSIRInstr, PGOOptions::ColdFuncOpt::Default, |
| 254 | Conf.AddFSDiscriminator); |
| 255 | } else if (!Conf.CSIRProfile.empty()) { |
| 256 | PGOOpt = PGOOptions(Conf.CSIRProfile, "" , Conf.ProfileRemapping, |
| 257 | /*MemoryProfile=*/"" , FS, PGOOptions::IRUse, |
| 258 | PGOOptions::CSIRUse, PGOOptions::ColdFuncOpt::Default, |
| 259 | Conf.AddFSDiscriminator); |
| 260 | NoPGOWarnMismatch = !Conf.PGOWarnMismatch; |
| 261 | } else if (Conf.AddFSDiscriminator) { |
| 262 | PGOOpt = PGOOptions("" , "" , "" , /*MemoryProfile=*/"" , nullptr, |
| 263 | PGOOptions::NoAction, PGOOptions::NoCSAction, |
| 264 | PGOOptions::ColdFuncOpt::Default, true); |
| 265 | } |
| 266 | TM->setPGOOption(PGOOpt); |
| 267 | |
| 268 | LoopAnalysisManager LAM; |
| 269 | FunctionAnalysisManager FAM; |
| 270 | CGSCCAnalysisManager CGAM; |
| 271 | ModuleAnalysisManager MAM; |
| 272 | |
| 273 | PassInstrumentationCallbacks PIC; |
| 274 | StandardInstrumentations SI(Mod.getContext(), Conf.DebugPassManager, |
| 275 | Conf.VerifyEach); |
| 276 | SI.registerCallbacks(PIC, MAM: &MAM); |
| 277 | PassBuilder PB(TM, Conf.PTO, PGOOpt, &PIC); |
| 278 | |
| 279 | RegisterPassPlugins(PassPlugins: Conf.PassPlugins, PB); |
| 280 | |
| 281 | std::unique_ptr<TargetLibraryInfoImpl> TLII( |
| 282 | new TargetLibraryInfoImpl(TM->getTargetTriple())); |
| 283 | if (Conf.Freestanding) |
| 284 | TLII->disableAllFunctions(); |
| 285 | FAM.registerPass(PassBuilder: [&] { return TargetLibraryAnalysis(*TLII); }); |
| 286 | |
| 287 | // Parse a custom AA pipeline if asked to. |
| 288 | if (!Conf.AAPipeline.empty()) { |
| 289 | AAManager AA; |
| 290 | if (auto Err = PB.parseAAPipeline(AA, PipelineText: Conf.AAPipeline)) { |
| 291 | report_fatal_error(reason: Twine("unable to parse AA pipeline description '" ) + |
| 292 | Conf.AAPipeline + "': " + toString(E: std::move(Err))); |
| 293 | } |
| 294 | // Register the AA manager first so that our version is the one used. |
| 295 | FAM.registerPass(PassBuilder: [&] { return std::move(AA); }); |
| 296 | } |
| 297 | |
| 298 | // Register all the basic analyses with the managers. |
| 299 | PB.registerModuleAnalyses(MAM); |
| 300 | PB.registerCGSCCAnalyses(CGAM); |
| 301 | PB.registerFunctionAnalyses(FAM); |
| 302 | PB.registerLoopAnalyses(LAM); |
| 303 | PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); |
| 304 | |
| 305 | ModulePassManager MPM; |
| 306 | |
| 307 | if (!Conf.DisableVerify) |
| 308 | MPM.addPass(Pass: VerifierPass()); |
| 309 | |
| 310 | OptimizationLevel OL; |
| 311 | |
| 312 | switch (OptLevel) { |
| 313 | default: |
| 314 | llvm_unreachable("Invalid optimization level" ); |
| 315 | case 0: |
| 316 | OL = OptimizationLevel::O0; |
| 317 | break; |
| 318 | case 1: |
| 319 | OL = OptimizationLevel::O1; |
| 320 | break; |
| 321 | case 2: |
| 322 | OL = OptimizationLevel::O2; |
| 323 | break; |
| 324 | case 3: |
| 325 | OL = OptimizationLevel::O3; |
| 326 | break; |
| 327 | } |
| 328 | |
| 329 | // Parse a custom pipeline if asked to. |
| 330 | if (!Conf.OptPipeline.empty()) { |
| 331 | if (auto Err = PB.parsePassPipeline(MPM, PipelineText: Conf.OptPipeline)) { |
| 332 | report_fatal_error(reason: Twine("unable to parse pass pipeline description '" ) + |
| 333 | Conf.OptPipeline + "': " + toString(E: std::move(Err))); |
| 334 | } |
| 335 | } else if (IsThinLTO) { |
| 336 | MPM.addPass(Pass: PB.buildThinLTODefaultPipeline(Level: OL, ImportSummary)); |
| 337 | } else { |
| 338 | MPM.addPass(Pass: PB.buildLTODefaultPipeline(Level: OL, ExportSummary)); |
| 339 | } |
| 340 | |
| 341 | if (!Conf.DisableVerify) |
| 342 | MPM.addPass(Pass: VerifierPass()); |
| 343 | |
| 344 | if (PrintPipelinePasses) { |
| 345 | std::string PipelineStr; |
| 346 | raw_string_ostream OS(PipelineStr); |
| 347 | MPM.printPipeline(OS, MapClassName2PassName: [&PIC](StringRef ClassName) { |
| 348 | auto PassName = PIC.getPassNameForClassName(ClassName); |
| 349 | return PassName.empty() ? ClassName : PassName; |
| 350 | }); |
| 351 | outs() << "pipeline-passes: " << PipelineStr << '\n'; |
| 352 | } |
| 353 | |
| 354 | MPM.run(IR&: Mod, AM&: MAM); |
| 355 | } |
| 356 | |
| 357 | static bool isEmptyModule(const Module &Mod) { |
| 358 | // Module is empty if it has no functions, no globals, no inline asm and no |
| 359 | // named metadata (aliases and ifuncs require functions or globals so we |
| 360 | // don't need to check those explicitly). |
| 361 | return Mod.empty() && Mod.global_empty() && Mod.named_metadata_empty() && |
| 362 | Mod.getModuleInlineAsm().empty(); |
| 363 | } |
| 364 | |
| 365 | bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, |
| 366 | bool IsThinLTO, ModuleSummaryIndex *ExportSummary, |
| 367 | const ModuleSummaryIndex *ImportSummary, |
| 368 | const std::vector<uint8_t> &CmdArgs) { |
| 369 | if (EmbedBitcode == LTOBitcodeEmbedding::EmbedPostMergePreOptimized) { |
| 370 | // FIXME: the motivation for capturing post-merge bitcode and command line |
| 371 | // is replicating the compilation environment from bitcode, without needing |
| 372 | // to understand the dependencies (the functions to be imported). This |
| 373 | // assumes a clang - based invocation, case in which we have the command |
| 374 | // line. |
| 375 | // It's not very clear how the above motivation would map in the |
| 376 | // linker-based case, so we currently don't plumb the command line args in |
| 377 | // that case. |
| 378 | if (CmdArgs.empty()) |
| 379 | LLVM_DEBUG( |
| 380 | dbgs() << "Post-(Thin)LTO merge bitcode embedding was requested, but " |
| 381 | "command line arguments are not available" ); |
| 382 | llvm::embedBitcodeInModule(M&: Mod, Buf: llvm::MemoryBufferRef(), |
| 383 | /*EmbedBitcode*/ true, /*EmbedCmdline*/ true, |
| 384 | /*Cmdline*/ CmdArgs); |
| 385 | } |
| 386 | // No need to run any opt passes if the module is empty. |
| 387 | // In theory these passes should take almost no time for an empty |
| 388 | // module, however, this guards against doing any unnecessary summary-based |
| 389 | // analysis in the case of a ThinLTO build where this might be an empty |
| 390 | // regular LTO combined module, with a large combined index from ThinLTO. |
| 391 | if (!isEmptyModule(Mod)) { |
| 392 | // FIXME: Plumb the combined index into the new pass manager. |
| 393 | runNewPMPasses(Conf, Mod, TM, OptLevel: Conf.OptLevel, IsThinLTO, ExportSummary, |
| 394 | ImportSummary); |
| 395 | } |
| 396 | return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); |
| 397 | } |
| 398 | |
| 399 | static void codegen(const Config &Conf, TargetMachine *TM, |
| 400 | AddStreamFn AddStream, unsigned Task, Module &Mod, |
| 401 | const ModuleSummaryIndex &CombinedIndex) { |
| 402 | if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod)) |
| 403 | return; |
| 404 | |
| 405 | if (EmbedBitcode == LTOBitcodeEmbedding::EmbedOptimized) |
| 406 | llvm::embedBitcodeInModule(M&: Mod, Buf: llvm::MemoryBufferRef(), |
| 407 | /*EmbedBitcode*/ true, |
| 408 | /*EmbedCmdline*/ false, |
| 409 | /*CmdArgs*/ std::vector<uint8_t>()); |
| 410 | |
| 411 | std::unique_ptr<ToolOutputFile> DwoOut; |
| 412 | SmallString<1024> DwoFile(Conf.SplitDwarfOutput); |
| 413 | if (!Conf.DwoDir.empty()) { |
| 414 | std::error_code EC; |
| 415 | if (auto EC = llvm::sys::fs::create_directories(path: Conf.DwoDir)) |
| 416 | report_fatal_error(reason: Twine("Failed to create directory " ) + Conf.DwoDir + |
| 417 | ": " + EC.message()); |
| 418 | |
| 419 | DwoFile = Conf.DwoDir; |
| 420 | sys::path::append(path&: DwoFile, a: std::to_string(val: Task) + ".dwo" ); |
| 421 | TM->Options.MCOptions.SplitDwarfFile = std::string(DwoFile); |
| 422 | } else |
| 423 | TM->Options.MCOptions.SplitDwarfFile = Conf.SplitDwarfFile; |
| 424 | |
| 425 | if (!DwoFile.empty()) { |
| 426 | std::error_code EC; |
| 427 | DwoOut = std::make_unique<ToolOutputFile>(args&: DwoFile, args&: EC, args: sys::fs::OF_None); |
| 428 | if (EC) |
| 429 | report_fatal_error(reason: Twine("Failed to open " ) + DwoFile + ": " + |
| 430 | EC.message()); |
| 431 | } |
| 432 | |
| 433 | Expected<std::unique_ptr<CachedFileStream>> StreamOrErr = |
| 434 | AddStream(Task, Mod.getModuleIdentifier()); |
| 435 | if (Error Err = StreamOrErr.takeError()) |
| 436 | report_fatal_error(Err: std::move(Err)); |
| 437 | std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr; |
| 438 | TM->Options.ObjectFilenameForDebug = Stream->ObjectPathName; |
| 439 | |
| 440 | // Create the codegen pipeline in its own scope so it gets deleted before |
| 441 | // Stream->commit() is called. The commit function of CacheStream deletes |
| 442 | // the raw stream, which is too early as streamers (e.g. MCAsmStreamer) |
| 443 | // keep the pointer and may use it until their destruction. See #138194. |
| 444 | { |
| 445 | legacy::PassManager CodeGenPasses; |
| 446 | TargetLibraryInfoImpl TLII(Mod.getTargetTriple()); |
| 447 | CodeGenPasses.add(P: new TargetLibraryInfoWrapperPass(TLII)); |
| 448 | // No need to make index available if the module is empty. |
| 449 | // In theory these passes should not use the index for an empty |
| 450 | // module, however, this guards against doing any unnecessary summary-based |
| 451 | // analysis in the case of a ThinLTO build where this might be an empty |
| 452 | // regular LTO combined module, with a large combined index from ThinLTO. |
| 453 | if (!isEmptyModule(Mod)) |
| 454 | CodeGenPasses.add( |
| 455 | P: createImmutableModuleSummaryIndexWrapperPass(Index: &CombinedIndex)); |
| 456 | if (Conf.PreCodeGenPassesHook) |
| 457 | Conf.PreCodeGenPassesHook(CodeGenPasses); |
| 458 | if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS, |
| 459 | DwoOut ? &DwoOut->os() : nullptr, |
| 460 | Conf.CGFileType)) |
| 461 | report_fatal_error(reason: "Failed to setup codegen" ); |
| 462 | CodeGenPasses.run(M&: Mod); |
| 463 | |
| 464 | if (DwoOut) |
| 465 | DwoOut->keep(); |
| 466 | } |
| 467 | |
| 468 | if (Error Err = Stream->commit()) |
| 469 | report_fatal_error(Err: std::move(Err)); |
| 470 | } |
| 471 | |
| 472 | static void splitCodeGen(const Config &C, TargetMachine *TM, |
| 473 | AddStreamFn AddStream, |
| 474 | unsigned ParallelCodeGenParallelismLevel, Module &Mod, |
| 475 | const ModuleSummaryIndex &CombinedIndex) { |
| 476 | DefaultThreadPool CodegenThreadPool( |
| 477 | heavyweight_hardware_concurrency(ThreadCount: ParallelCodeGenParallelismLevel)); |
| 478 | unsigned ThreadCount = 0; |
| 479 | const Target *T = &TM->getTarget(); |
| 480 | |
| 481 | const auto HandleModulePartition = |
| 482 | [&](std::unique_ptr<Module> MPart) { |
| 483 | // We want to clone the module in a new context to multi-thread the |
| 484 | // codegen. We do it by serializing partition modules to bitcode |
| 485 | // (while still on the main thread, in order to avoid data races) and |
| 486 | // spinning up new threads which deserialize the partitions into |
| 487 | // separate contexts. |
| 488 | // FIXME: Provide a more direct way to do this in LLVM. |
| 489 | SmallString<0> BC; |
| 490 | raw_svector_ostream BCOS(BC); |
| 491 | WriteBitcodeToFile(M: *MPart, Out&: BCOS); |
| 492 | |
| 493 | // Enqueue the task |
| 494 | CodegenThreadPool.async( |
| 495 | F: [&](const SmallString<0> &BC, unsigned ThreadId) { |
| 496 | LTOLLVMContext Ctx(C); |
| 497 | Expected<std::unique_ptr<Module>> MOrErr = |
| 498 | parseBitcodeFile(Buffer: MemoryBufferRef(BC.str(), "ld-temp.o" ), Context&: Ctx); |
| 499 | if (!MOrErr) |
| 500 | report_fatal_error(reason: "Failed to read bitcode" ); |
| 501 | std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get()); |
| 502 | |
| 503 | std::unique_ptr<TargetMachine> TM = |
| 504 | createTargetMachine(Conf: C, TheTarget: T, M&: *MPartInCtx); |
| 505 | |
| 506 | codegen(Conf: C, TM: TM.get(), AddStream, Task: ThreadId, Mod&: *MPartInCtx, |
| 507 | CombinedIndex); |
| 508 | }, |
| 509 | // Pass BC using std::move to ensure that it get moved rather than |
| 510 | // copied into the thread's context. |
| 511 | ArgList: std::move(BC), ArgList: ThreadCount++); |
| 512 | }; |
| 513 | |
| 514 | // Try target-specific module splitting first, then fallback to the default. |
| 515 | if (!TM->splitModule(M&: Mod, NumParts: ParallelCodeGenParallelismLevel, |
| 516 | ModuleCallback: HandleModulePartition)) { |
| 517 | SplitModule(M&: Mod, N: ParallelCodeGenParallelismLevel, ModuleCallback: HandleModulePartition, |
| 518 | PreserveLocals: false); |
| 519 | } |
| 520 | |
| 521 | // Because the inner lambda (which runs in a worker thread) captures our local |
| 522 | // variables, we need to wait for the worker threads to terminate before we |
| 523 | // can leave the function scope. |
| 524 | CodegenThreadPool.wait(); |
| 525 | } |
| 526 | |
| 527 | static Expected<const Target *> initAndLookupTarget(const Config &C, |
| 528 | Module &Mod) { |
| 529 | if (!C.OverrideTriple.empty()) |
| 530 | Mod.setTargetTriple(Triple(C.OverrideTriple)); |
| 531 | else if (Mod.getTargetTriple().empty()) |
| 532 | Mod.setTargetTriple(Triple(C.DefaultTriple)); |
| 533 | |
| 534 | std::string Msg; |
| 535 | const Target *T = TargetRegistry::lookupTarget(TheTriple: Mod.getTargetTriple(), Error&: Msg); |
| 536 | if (!T) |
| 537 | return make_error<StringError>(Args&: Msg, Args: inconvertibleErrorCode()); |
| 538 | return T; |
| 539 | } |
| 540 | |
| 541 | Error lto::( |
| 542 | std::unique_ptr<ToolOutputFile> DiagOutputFile) { |
| 543 | // Make sure we flush the diagnostic remarks file in case the linker doesn't |
| 544 | // call the global destructors before exiting. |
| 545 | if (!DiagOutputFile) |
| 546 | return Error::success(); |
| 547 | DiagOutputFile->keep(); |
| 548 | DiagOutputFile->os().flush(); |
| 549 | return Error::success(); |
| 550 | } |
| 551 | |
| 552 | Error lto::backend(const Config &C, AddStreamFn AddStream, |
| 553 | unsigned ParallelCodeGenParallelismLevel, Module &Mod, |
| 554 | ModuleSummaryIndex &CombinedIndex) { |
| 555 | Expected<const Target *> TOrErr = initAndLookupTarget(C, Mod); |
| 556 | if (!TOrErr) |
| 557 | return TOrErr.takeError(); |
| 558 | |
| 559 | std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf: C, TheTarget: *TOrErr, M&: Mod); |
| 560 | |
| 561 | LLVM_DEBUG(dbgs() << "Running regular LTO\n" ); |
| 562 | if (!C.CodeGenOnly) { |
| 563 | if (!opt(Conf: C, TM: TM.get(), Task: 0, Mod, /*IsThinLTO=*/false, |
| 564 | /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr, |
| 565 | /*CmdArgs*/ std::vector<uint8_t>())) |
| 566 | return Error::success(); |
| 567 | } |
| 568 | |
| 569 | if (ParallelCodeGenParallelismLevel == 1) { |
| 570 | codegen(Conf: C, TM: TM.get(), AddStream, Task: 0, Mod, CombinedIndex); |
| 571 | } else { |
| 572 | splitCodeGen(C, TM: TM.get(), AddStream, ParallelCodeGenParallelismLevel, Mod, |
| 573 | CombinedIndex); |
| 574 | } |
| 575 | return Error::success(); |
| 576 | } |
| 577 | |
| 578 | static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals, |
| 579 | const ModuleSummaryIndex &Index) { |
| 580 | std::vector<GlobalValue*> DeadGVs; |
| 581 | for (auto &GV : Mod.global_values()) |
| 582 | if (GlobalValueSummary *GVS = DefinedGlobals.lookup(Val: GV.getGUID())) |
| 583 | if (!Index.isGlobalValueLive(GVS)) { |
| 584 | DeadGVs.push_back(x: &GV); |
| 585 | convertToDeclaration(GV); |
| 586 | } |
| 587 | |
| 588 | // Now that all dead bodies have been dropped, delete the actual objects |
| 589 | // themselves when possible. |
| 590 | for (GlobalValue *GV : DeadGVs) { |
| 591 | GV->removeDeadConstantUsers(); |
| 592 | // Might reference something defined in native object (i.e. dropped a |
| 593 | // non-prevailing IR def, but we need to keep the declaration). |
| 594 | if (GV->use_empty()) |
| 595 | GV->eraseFromParent(); |
| 596 | } |
| 597 | } |
| 598 | |
| 599 | Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, |
| 600 | Module &Mod, const ModuleSummaryIndex &CombinedIndex, |
| 601 | const FunctionImporter::ImportMapTy &ImportList, |
| 602 | const GVSummaryMapTy &DefinedGlobals, |
| 603 | MapVector<StringRef, BitcodeModule> *ModuleMap, |
| 604 | bool CodeGenOnly, AddStreamFn IRAddStream, |
| 605 | const std::vector<uint8_t> &CmdArgs) { |
| 606 | Expected<const Target *> TOrErr = initAndLookupTarget(C: Conf, Mod); |
| 607 | if (!TOrErr) |
| 608 | return TOrErr.takeError(); |
| 609 | |
| 610 | std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf, TheTarget: *TOrErr, M&: Mod); |
| 611 | |
| 612 | // Setup optimization remarks. |
| 613 | auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks( |
| 614 | Context&: Mod.getContext(), RemarksFilename: Conf.RemarksFilename, RemarksPasses: Conf.RemarksPasses, |
| 615 | RemarksFormat: Conf.RemarksFormat, RemarksWithHotness: Conf.RemarksWithHotness, RemarksHotnessThreshold: Conf.RemarksHotnessThreshold, |
| 616 | Count: Task); |
| 617 | if (!DiagFileOrErr) |
| 618 | return DiagFileOrErr.takeError(); |
| 619 | auto DiagnosticOutputFile = std::move(*DiagFileOrErr); |
| 620 | |
| 621 | // Set the partial sample profile ratio in the profile summary module flag of |
| 622 | // the module, if applicable. |
| 623 | Mod.setPartialSampleProfileRatio(CombinedIndex); |
| 624 | |
| 625 | LLVM_DEBUG(dbgs() << "Running ThinLTO\n" ); |
| 626 | if (CodeGenOnly) { |
| 627 | // If CodeGenOnly is set, we only perform code generation and skip |
| 628 | // optimization. This value may differ from Conf.CodeGenOnly. |
| 629 | codegen(Conf, TM: TM.get(), AddStream, Task, Mod, CombinedIndex); |
| 630 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
| 631 | } |
| 632 | |
| 633 | if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod)) |
| 634 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
| 635 | |
| 636 | auto OptimizeAndCodegen = |
| 637 | [&](Module &Mod, TargetMachine *TM, |
| 638 | std::unique_ptr<ToolOutputFile> DiagnosticOutputFile) { |
| 639 | // Perform optimization and code generation for ThinLTO. |
| 640 | if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true, |
| 641 | /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex, |
| 642 | CmdArgs)) |
| 643 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
| 644 | |
| 645 | // Save the current module before the first codegen round. |
| 646 | // Note that the second codegen round runs only `codegen()` without |
| 647 | // running `opt()`. We're not reaching here as it's bailed out earlier |
| 648 | // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`. |
| 649 | if (IRAddStream) |
| 650 | cgdata::saveModuleForTwoRounds(TheModule: Mod, Task, AddStream: IRAddStream); |
| 651 | |
| 652 | codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex); |
| 653 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
| 654 | }; |
| 655 | |
| 656 | if (ThinLTOAssumeMerged) |
| 657 | return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile)); |
| 658 | |
| 659 | // When linking an ELF shared object, dso_local should be dropped. We |
| 660 | // conservatively do this for -fpic. |
| 661 | bool ClearDSOLocalOnDeclarations = |
| 662 | TM->getTargetTriple().isOSBinFormatELF() && |
| 663 | TM->getRelocationModel() != Reloc::Static && |
| 664 | Mod.getPIELevel() == PIELevel::Default; |
| 665 | renameModuleForThinLTO(M&: Mod, Index: CombinedIndex, ClearDSOLocalOnDeclarations); |
| 666 | |
| 667 | dropDeadSymbols(Mod, DefinedGlobals, Index: CombinedIndex); |
| 668 | |
| 669 | thinLTOFinalizeInModule(TheModule&: Mod, DefinedGlobals, /*PropagateAttrs=*/true); |
| 670 | |
| 671 | if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod)) |
| 672 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
| 673 | |
| 674 | if (!DefinedGlobals.empty()) |
| 675 | thinLTOInternalizeModule(TheModule&: Mod, DefinedGlobals); |
| 676 | |
| 677 | if (Conf.PostInternalizeModuleHook && |
| 678 | !Conf.PostInternalizeModuleHook(Task, Mod)) |
| 679 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
| 680 | |
| 681 | auto ModuleLoader = [&](StringRef Identifier) { |
| 682 | assert(Mod.getContext().isODRUniquingDebugTypes() && |
| 683 | "ODR Type uniquing should be enabled on the context" ); |
| 684 | if (ModuleMap) { |
| 685 | auto I = ModuleMap->find(Key: Identifier); |
| 686 | assert(I != ModuleMap->end()); |
| 687 | return I->second.getLazyModule(Context&: Mod.getContext(), |
| 688 | /*ShouldLazyLoadMetadata=*/true, |
| 689 | /*IsImporting*/ true); |
| 690 | } |
| 691 | |
| 692 | ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MBOrErr = |
| 693 | llvm::MemoryBuffer::getFile(Filename: Identifier); |
| 694 | if (!MBOrErr) |
| 695 | return Expected<std::unique_ptr<llvm::Module>>(make_error<StringError>( |
| 696 | Args: Twine("Error loading imported file " ) + Identifier + " : " , |
| 697 | Args: MBOrErr.getError())); |
| 698 | |
| 699 | Expected<BitcodeModule> BMOrErr = findThinLTOModule(MBRef: **MBOrErr); |
| 700 | if (!BMOrErr) |
| 701 | return Expected<std::unique_ptr<llvm::Module>>(make_error<StringError>( |
| 702 | Args: Twine("Error loading imported file " ) + Identifier + " : " + |
| 703 | toString(E: BMOrErr.takeError()), |
| 704 | Args: inconvertibleErrorCode())); |
| 705 | |
| 706 | Expected<std::unique_ptr<Module>> MOrErr = |
| 707 | BMOrErr->getLazyModule(Context&: Mod.getContext(), |
| 708 | /*ShouldLazyLoadMetadata=*/true, |
| 709 | /*IsImporting*/ true); |
| 710 | if (MOrErr) |
| 711 | (*MOrErr)->setOwnedMemoryBuffer(std::move(*MBOrErr)); |
| 712 | return MOrErr; |
| 713 | }; |
| 714 | |
| 715 | FunctionImporter Importer(CombinedIndex, ModuleLoader, |
| 716 | ClearDSOLocalOnDeclarations); |
| 717 | if (Error Err = Importer.importFunctions(M&: Mod, ImportList).takeError()) |
| 718 | return Err; |
| 719 | |
| 720 | // Do this after any importing so that imported code is updated. |
| 721 | updateMemProfAttributes(Mod, Index: CombinedIndex); |
| 722 | updatePublicTypeTestCalls(M&: Mod, WholeProgramVisibilityEnabledInLTO: CombinedIndex.withWholeProgramVisibility()); |
| 723 | |
| 724 | if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod)) |
| 725 | return finalizeOptimizationRemarks(DiagOutputFile: std::move(DiagnosticOutputFile)); |
| 726 | |
| 727 | return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile)); |
| 728 | } |
| 729 | |
| 730 | BitcodeModule *lto::findThinLTOModule(MutableArrayRef<BitcodeModule> BMs) { |
| 731 | if (ThinLTOAssumeMerged && BMs.size() == 1) |
| 732 | return BMs.begin(); |
| 733 | |
| 734 | for (BitcodeModule &BM : BMs) { |
| 735 | Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo(); |
| 736 | if (LTOInfo && LTOInfo->IsThinLTO) |
| 737 | return &BM; |
| 738 | } |
| 739 | return nullptr; |
| 740 | } |
| 741 | |
| 742 | Expected<BitcodeModule> lto::findThinLTOModule(MemoryBufferRef MBRef) { |
| 743 | Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(Buffer: MBRef); |
| 744 | if (!BMsOrErr) |
| 745 | return BMsOrErr.takeError(); |
| 746 | |
| 747 | // The bitcode file may contain multiple modules, we want the one that is |
| 748 | // marked as being the ThinLTO module. |
| 749 | if (const BitcodeModule *Bm = lto::findThinLTOModule(BMs: *BMsOrErr)) |
| 750 | return *Bm; |
| 751 | |
| 752 | return make_error<StringError>(Args: "Could not find module summary" , |
| 753 | Args: inconvertibleErrorCode()); |
| 754 | } |
| 755 | |
| 756 | bool lto::initImportList(const Module &M, |
| 757 | const ModuleSummaryIndex &CombinedIndex, |
| 758 | FunctionImporter::ImportMapTy &ImportList) { |
| 759 | if (ThinLTOAssumeMerged) |
| 760 | return true; |
| 761 | // We can simply import the values mentioned in the combined index, since |
| 762 | // we should only invoke this using the individual indexes written out |
| 763 | // via a WriteIndexesThinBackend. |
| 764 | for (const auto &GlobalList : CombinedIndex) { |
| 765 | // Ignore entries for undefined references. |
| 766 | if (GlobalList.second.SummaryList.empty()) |
| 767 | continue; |
| 768 | |
| 769 | auto GUID = GlobalList.first; |
| 770 | for (const auto &Summary : GlobalList.second.SummaryList) { |
| 771 | // Skip the summaries for the importing module. These are included to |
| 772 | // e.g. record required linkage changes. |
| 773 | if (Summary->modulePath() == M.getModuleIdentifier()) |
| 774 | continue; |
| 775 | // Add an entry to provoke importing by thinBackend. |
| 776 | ImportList.addGUID(FromModule: Summary->modulePath(), GUID, ImportKind: Summary->importType()); |
| 777 | } |
| 778 | } |
| 779 | return true; |
| 780 | } |
| 781 | |