| 1 | //===- Dtlto.cpp - Distributed ThinLTO implementation --------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // \file |
| 10 | // This file implements support functions for Distributed ThinLTO, focusing on |
| 11 | // preparing input files for distribution. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "llvm/DTLTO/DTLTO.h" |
| 16 | |
| 17 | #include "llvm/ADT/SmallString.h" |
| 18 | #include "llvm/ADT/StringExtras.h" |
| 19 | #include "llvm/ADT/StringRef.h" |
| 20 | #include "llvm/BinaryFormat/Magic.h" |
| 21 | #include "llvm/LTO/LTO.h" |
| 22 | #include "llvm/Object/Archive.h" |
| 23 | #include "llvm/Support/FileSystem.h" |
| 24 | #include "llvm/Support/MemoryBufferRef.h" |
| 25 | #include "llvm/Support/Path.h" |
| 26 | #include "llvm/Support/Process.h" |
| 27 | #include "llvm/Support/Signals.h" |
| 28 | #include "llvm/Support/TimeProfiler.h" |
| 29 | #include "llvm/Support/raw_ostream.h" |
| 30 | #ifdef _WIN32 |
| 31 | #include "llvm/Support/Windows/WindowsSupport.h" |
| 32 | #endif |
| 33 | |
| 34 | #include <string> |
| 35 | |
| 36 | using namespace llvm; |
| 37 | |
| 38 | namespace { |
| 39 | |
| 40 | // Saves the content of Buffer to Path overwriting any existing file. |
| 41 | Error save(StringRef Buffer, StringRef Path) { |
| 42 | std::error_code EC; |
| 43 | raw_fd_ostream OS(Path.str(), EC, sys::fs::OpenFlags::OF_None); |
| 44 | if (EC) |
| 45 | return createStringError(EC: inconvertibleErrorCode(), |
| 46 | Fmt: "Failed to create file %s: %s" , Vals: Path.data(), |
| 47 | Vals: EC.message().c_str()); |
| 48 | OS.write(Ptr: Buffer.data(), Size: Buffer.size()); |
| 49 | if (OS.has_error()) |
| 50 | return createStringError(EC: inconvertibleErrorCode(), |
| 51 | Fmt: "Failed writing to file %s" , Vals: Path.data()); |
| 52 | return Error::success(); |
| 53 | } |
| 54 | |
| 55 | // Saves the content of Input to Path overwriting any existing file. |
| 56 | Error save(lto::InputFile *Input, StringRef Path) { |
| 57 | MemoryBufferRef MB = Input->getFileBuffer(); |
| 58 | return save(Buffer: MB.getBuffer(), Path); |
| 59 | } |
| 60 | |
| 61 | // Normalize and save a path. Aside from expanding Windows 8.3 short paths, |
| 62 | // no other normalization is currently required here. These paths are |
| 63 | // machine-local and break distribution systems; other normalization is |
| 64 | // handled by the DTLTO distributors. |
| 65 | Expected<StringRef> normalizePath(StringRef Path, StringSaver &Saver) { |
| 66 | #if defined(_WIN32) |
| 67 | if (Path.empty()) |
| 68 | return Path; |
| 69 | SmallString<256> Expanded; |
| 70 | if (std::error_code EC = llvm::sys::windows::makeLongFormPath(Path, Expanded)) |
| 71 | return createStringError(inconvertibleErrorCode(), |
| 72 | "Normalization failed for path %s: %s" , |
| 73 | Path.str().c_str(), EC.message().c_str()); |
| 74 | return Saver.save(Expanded.str()); |
| 75 | #else |
| 76 | return Saver.save(S: Path); |
| 77 | #endif |
| 78 | } |
| 79 | |
| 80 | // Compute the file path for a thin archive member. |
| 81 | // |
| 82 | // For thin archives, an archive member name is typically a file path relative |
| 83 | // to the archive file's directory. This function resolves that path. |
| 84 | SmallString<256> computeThinArchiveMemberPath(StringRef ArchivePath, |
| 85 | StringRef MemberName) { |
| 86 | assert(!ArchivePath.empty() && "An archive file path must be non empty." ); |
| 87 | SmallString<256> MemberPath; |
| 88 | if (sys::path::is_relative(path: MemberName)) { |
| 89 | MemberPath = sys::path::parent_path(path: ArchivePath); |
| 90 | sys::path::append(path&: MemberPath, a: MemberName); |
| 91 | } else |
| 92 | MemberPath = MemberName; |
| 93 | sys::path::remove_dots(path&: MemberPath, /*remove_dot_dot=*/true); |
| 94 | return MemberPath; |
| 95 | } |
| 96 | |
| 97 | } // namespace |
| 98 | |
| 99 | // Determines if a file at the given path is a thin archive file. |
| 100 | // |
| 101 | // This function uses a cache to avoid repeatedly reading the same file. |
| 102 | // It reads only the header portion (magic bytes) of the file to identify |
| 103 | // the archive type. |
| 104 | Expected<bool> lto::DTLTO::isThinArchive(const StringRef ArchivePath) { |
| 105 | // Return cached result if available. |
| 106 | auto Cached = ArchiveIsThinCache.find(Key: ArchivePath); |
| 107 | if (Cached != ArchiveIsThinCache.end()) |
| 108 | return Cached->second; |
| 109 | |
| 110 | uint64_t FileSize = -1; |
| 111 | std::error_code EC = sys::fs::file_size(Path: ArchivePath, Result&: FileSize); |
| 112 | if (EC) |
| 113 | return createStringError(EC: inconvertibleErrorCode(), |
| 114 | Fmt: "Failed to get file size from archive %s: %s" , |
| 115 | Vals: ArchivePath.data(), Vals: EC.message().c_str()); |
| 116 | if (FileSize < sizeof(object::ThinArchiveMagic)) |
| 117 | return createStringError(EC: inconvertibleErrorCode(), |
| 118 | Fmt: "Archive file size is too small %s" , |
| 119 | Vals: ArchivePath.data()); |
| 120 | |
| 121 | // Read only the first few bytes containing the magic signature. |
| 122 | ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getFileSlice( |
| 123 | Filename: ArchivePath, MapSize: sizeof(object::ThinArchiveMagic), Offset: 0); |
| 124 | if ((EC = MBOrErr.getError())) |
| 125 | return createStringError(EC: inconvertibleErrorCode(), |
| 126 | Fmt: "Failed to read from archive %s: %s" , |
| 127 | Vals: ArchivePath.data(), Vals: EC.message().c_str()); |
| 128 | |
| 129 | StringRef Buf = (*MBOrErr)->getBuffer(); |
| 130 | if (file_magic::archive != identify_magic(magic: Buf)) |
| 131 | return createStringError(EC: inconvertibleErrorCode(), |
| 132 | Fmt: "Unknown format for archive %s" , |
| 133 | Vals: ArchivePath.data()); |
| 134 | |
| 135 | bool IsThin = Buf.starts_with(Prefix: object::ThinArchiveMagic); |
| 136 | |
| 137 | // Cache the result. |
| 138 | ArchiveIsThinCache[ArchivePath] = IsThin; |
| 139 | |
| 140 | return IsThin; |
| 141 | } |
| 142 | |
| 143 | // Add an input file and prepare it for distribution. |
| 144 | // |
| 145 | // This function performs the following tasks: |
| 146 | // 1. Add the input file to the LTO object's list of input files. |
| 147 | // 2. For individual bitcode file inputs on Windows only, overwrite the module |
| 148 | // ID with a normalized path to remove short 8.3 form components. |
| 149 | // 3. For thin archive members, overwrite the module ID with the path |
| 150 | // (normalized on Windows) to the member file on disk. |
| 151 | // 4. For archive members and FatLTO objects, overwrite the module ID with a |
| 152 | // unique path (normalized on Windows) naming a file that will contain the |
| 153 | // member content. The file is created and populated later (see |
| 154 | // serializeInputs()). |
| 155 | Expected<std::shared_ptr<lto::InputFile>> |
| 156 | lto::DTLTO::addInput(std::unique_ptr<InputFile> InputPtr) { |
| 157 | TimeTraceScope TimeScope("Add input for DTLTO" ); |
| 158 | |
| 159 | // Add the input file to the LTO object. |
| 160 | InputFiles.emplace_back(args: InputPtr.release()); |
| 161 | auto &Input = InputFiles.back(); |
| 162 | BitcodeModule &BM = Input->getPrimaryBitcodeModule(); |
| 163 | |
| 164 | auto setIdFromPath = [&](StringRef Path) -> Error { |
| 165 | auto N = normalizePath(Path, Saver); |
| 166 | if (!N) |
| 167 | return N.takeError(); |
| 168 | BM.setModuleIdentifier(*N); |
| 169 | return Error::success(); |
| 170 | }; |
| 171 | |
| 172 | StringRef ArchivePath = Input->getArchivePath(); |
| 173 | |
| 174 | // In most cases, the module ID already points to an individual bitcode file |
| 175 | // on disk, so no further preparation for distribution is required. However, |
| 176 | // on Windows we overwite the module ID to expand Windows 8.3 short form |
| 177 | // paths. These paths are machine-local and break distribution systems; other |
| 178 | // normalization is handled by the DTLTO distributors. |
| 179 | if (ArchivePath.empty() && !Input->isFatLTOObject()) { |
| 180 | #if defined(_WIN32) |
| 181 | if (Error E = setIdFromPath(Input->getName())) |
| 182 | return std::move(E); |
| 183 | #endif |
| 184 | return Input; |
| 185 | } |
| 186 | |
| 187 | // For a member of a thin archive that is not a FatLTO object, there is an |
| 188 | // existing file on disk that can be used, so we can avoid having to |
| 189 | // serialize. |
| 190 | Expected<bool> UseThinMember = |
| 191 | Input->isFatLTOObject() ? false : isThinArchive(ArchivePath); |
| 192 | if (!UseThinMember) |
| 193 | return UseThinMember.takeError(); |
| 194 | if (*UseThinMember) { |
| 195 | // For thin archives, use the path to the actual member file on disk. |
| 196 | auto MemberPath = |
| 197 | computeThinArchiveMemberPath(ArchivePath, MemberName: Input->getMemberName()); |
| 198 | if (Error E = setIdFromPath(MemberPath)) |
| 199 | return std::move(E); |
| 200 | return Input; |
| 201 | } |
| 202 | |
| 203 | // A new file on disk will be needed for archive members and FatLTO objects. |
| 204 | Input->setSerializeForDistribution(true); |
| 205 | |
| 206 | // Get the normalized output directory, if we haven't already. |
| 207 | if (LinkerOutputDir.empty()) { |
| 208 | auto N = normalizePath(Path: sys::path::parent_path(path: LinkerOutputFile), Saver); |
| 209 | if (!N) |
| 210 | return N.takeError(); |
| 211 | LinkerOutputDir = *N; |
| 212 | } |
| 213 | |
| 214 | // Create a unique path by including the process ID and sequence number in the |
| 215 | // filename. |
| 216 | SmallString<256> Id(LinkerOutputDir); |
| 217 | sys::path::append(path&: Id, |
| 218 | a: Twine(sys::path::filename(path: Input->getName())) + "." + |
| 219 | std::to_string(val: InputFiles.size()) /*Sequence number*/ + |
| 220 | "." + utohexstr(X: sys::Process::getProcessId()) + ".o" ); |
| 221 | BM.setModuleIdentifier(Saver.save(S: Id.str())); |
| 222 | return Input; |
| 223 | } |
| 224 | |
| 225 | // Save the contents of ThinLTO-enabled input files that must be serialized for |
| 226 | // distribution, such as archive members and FatLTO objects, to individual |
| 227 | // bitcode files named after the module ID. |
| 228 | // |
| 229 | // Must be called after all input files are added but before optimization |
| 230 | // begins. If a file with that name already exists, it is likely a leftover from |
| 231 | // a previously terminated linker process and can be safely overwritten. |
| 232 | llvm::Error lto::DTLTO::serializeInputsForDistribution() { |
| 233 | for (auto &Input : InputFiles) { |
| 234 | if (!Input->isThinLTO() || !Input->getSerializeForDistribution()) |
| 235 | continue; |
| 236 | // Save the content of the input file to a file named after the module ID. |
| 237 | StringRef ModuleId = Input->getName(); |
| 238 | TimeTraceScope TimeScope("Serialize bitcode input for DTLTO" , ModuleId); |
| 239 | // Cleanup this file on abnormal process exit. |
| 240 | if (!SaveTemps) |
| 241 | llvm::sys::RemoveFileOnSignal(Filename: ModuleId); |
| 242 | if (Error EC = save(Input: Input.get(), Path: ModuleId)) |
| 243 | return EC; |
| 244 | } |
| 245 | |
| 246 | return Error::success(); |
| 247 | } |
| 248 | |
| 249 | // Remove serialized inputs created to enable distribution. |
| 250 | void lto::DTLTO::cleanup() { |
| 251 | if (!SaveTemps) { |
| 252 | TimeTraceScope TimeScope("Remove temporary inputs for DTLTO" ); |
| 253 | for (auto &Input : InputFiles) { |
| 254 | if (!Input->getSerializeForDistribution()) |
| 255 | continue; |
| 256 | std::error_code EC = |
| 257 | sys::fs::remove(path: Input->getName(), /*IgnoreNonExisting=*/true); |
| 258 | if (EC && |
| 259 | EC != std::make_error_code(e: std::errc::no_such_file_or_directory)) |
| 260 | errs() << "warning: could not remove temporary DTLTO input file '" |
| 261 | << Input->getName() << "': " << EC.message() << "\n" ; |
| 262 | } |
| 263 | } |
| 264 | Base::cleanup(); |
| 265 | } |
| 266 | |