1//===- Dtlto.cpp - Distributed ThinLTO implementation --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// This file implements support functions for Distributed ThinLTO, focusing on
11// preparing input files for distribution.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/DTLTO/DTLTO.h"
16
17#include "llvm/ADT/SmallString.h"
18#include "llvm/ADT/StringExtras.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/BinaryFormat/Magic.h"
21#include "llvm/LTO/LTO.h"
22#include "llvm/Object/Archive.h"
23#include "llvm/Support/FileSystem.h"
24#include "llvm/Support/MemoryBufferRef.h"
25#include "llvm/Support/Path.h"
26#include "llvm/Support/Process.h"
27#include "llvm/Support/Signals.h"
28#include "llvm/Support/TimeProfiler.h"
29#include "llvm/Support/raw_ostream.h"
30#ifdef _WIN32
31#include "llvm/Support/Windows/WindowsSupport.h"
32#endif
33
34#include <string>
35
36using namespace llvm;
37
38namespace {
39
40// Saves the content of Buffer to Path overwriting any existing file.
41Error save(StringRef Buffer, StringRef Path) {
42 std::error_code EC;
43 raw_fd_ostream OS(Path.str(), EC, sys::fs::OpenFlags::OF_None);
44 if (EC)
45 return createStringError(EC: inconvertibleErrorCode(),
46 Fmt: "Failed to create file %s: %s", Vals: Path.data(),
47 Vals: EC.message().c_str());
48 OS.write(Ptr: Buffer.data(), Size: Buffer.size());
49 if (OS.has_error())
50 return createStringError(EC: inconvertibleErrorCode(),
51 Fmt: "Failed writing to file %s", Vals: Path.data());
52 return Error::success();
53}
54
55// Saves the content of Input to Path overwriting any existing file.
56Error save(lto::InputFile *Input, StringRef Path) {
57 MemoryBufferRef MB = Input->getFileBuffer();
58 return save(Buffer: MB.getBuffer(), Path);
59}
60
61// Normalize and save a path. Aside from expanding Windows 8.3 short paths,
62// no other normalization is currently required here. These paths are
63// machine-local and break distribution systems; other normalization is
64// handled by the DTLTO distributors.
65Expected<StringRef> normalizePath(StringRef Path, StringSaver &Saver) {
66#if defined(_WIN32)
67 if (Path.empty())
68 return Path;
69 SmallString<256> Expanded;
70 if (std::error_code EC = llvm::sys::windows::makeLongFormPath(Path, Expanded))
71 return createStringError(inconvertibleErrorCode(),
72 "Normalization failed for path %s: %s",
73 Path.str().c_str(), EC.message().c_str());
74 return Saver.save(Expanded.str());
75#else
76 return Saver.save(S: Path);
77#endif
78}
79
80// Compute the file path for a thin archive member.
81//
82// For thin archives, an archive member name is typically a file path relative
83// to the archive file's directory. This function resolves that path.
84SmallString<256> computeThinArchiveMemberPath(StringRef ArchivePath,
85 StringRef MemberName) {
86 assert(!ArchivePath.empty() && "An archive file path must be non empty.");
87 SmallString<256> MemberPath;
88 if (sys::path::is_relative(path: MemberName)) {
89 MemberPath = sys::path::parent_path(path: ArchivePath);
90 sys::path::append(path&: MemberPath, a: MemberName);
91 } else
92 MemberPath = MemberName;
93 sys::path::remove_dots(path&: MemberPath, /*remove_dot_dot=*/true);
94 return MemberPath;
95}
96
97} // namespace
98
99// Determines if a file at the given path is a thin archive file.
100//
101// This function uses a cache to avoid repeatedly reading the same file.
102// It reads only the header portion (magic bytes) of the file to identify
103// the archive type.
104Expected<bool> lto::DTLTO::isThinArchive(const StringRef ArchivePath) {
105 // Return cached result if available.
106 auto Cached = ArchiveIsThinCache.find(Key: ArchivePath);
107 if (Cached != ArchiveIsThinCache.end())
108 return Cached->second;
109
110 uint64_t FileSize = -1;
111 std::error_code EC = sys::fs::file_size(Path: ArchivePath, Result&: FileSize);
112 if (EC)
113 return createStringError(EC: inconvertibleErrorCode(),
114 Fmt: "Failed to get file size from archive %s: %s",
115 Vals: ArchivePath.data(), Vals: EC.message().c_str());
116 if (FileSize < sizeof(object::ThinArchiveMagic))
117 return createStringError(EC: inconvertibleErrorCode(),
118 Fmt: "Archive file size is too small %s",
119 Vals: ArchivePath.data());
120
121 // Read only the first few bytes containing the magic signature.
122 ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getFileSlice(
123 Filename: ArchivePath, MapSize: sizeof(object::ThinArchiveMagic), Offset: 0);
124 if ((EC = MBOrErr.getError()))
125 return createStringError(EC: inconvertibleErrorCode(),
126 Fmt: "Failed to read from archive %s: %s",
127 Vals: ArchivePath.data(), Vals: EC.message().c_str());
128
129 StringRef Buf = (*MBOrErr)->getBuffer();
130 if (file_magic::archive != identify_magic(magic: Buf))
131 return createStringError(EC: inconvertibleErrorCode(),
132 Fmt: "Unknown format for archive %s",
133 Vals: ArchivePath.data());
134
135 bool IsThin = Buf.starts_with(Prefix: object::ThinArchiveMagic);
136
137 // Cache the result.
138 ArchiveIsThinCache[ArchivePath] = IsThin;
139
140 return IsThin;
141}
142
143// Add an input file and prepare it for distribution.
144//
145// This function performs the following tasks:
146// 1. Add the input file to the LTO object's list of input files.
147// 2. For individual bitcode file inputs on Windows only, overwrite the module
148// ID with a normalized path to remove short 8.3 form components.
149// 3. For thin archive members, overwrite the module ID with the path
150// (normalized on Windows) to the member file on disk.
151// 4. For archive members and FatLTO objects, overwrite the module ID with a
152// unique path (normalized on Windows) naming a file that will contain the
153// member content. The file is created and populated later (see
154// serializeInputs()).
155Expected<std::shared_ptr<lto::InputFile>>
156lto::DTLTO::addInput(std::unique_ptr<InputFile> InputPtr) {
157 TimeTraceScope TimeScope("Add input for DTLTO");
158
159 // Add the input file to the LTO object.
160 InputFiles.emplace_back(args: InputPtr.release());
161 auto &Input = InputFiles.back();
162 BitcodeModule &BM = Input->getPrimaryBitcodeModule();
163
164 auto setIdFromPath = [&](StringRef Path) -> Error {
165 auto N = normalizePath(Path, Saver);
166 if (!N)
167 return N.takeError();
168 BM.setModuleIdentifier(*N);
169 return Error::success();
170 };
171
172 StringRef ArchivePath = Input->getArchivePath();
173
174 // In most cases, the module ID already points to an individual bitcode file
175 // on disk, so no further preparation for distribution is required. However,
176 // on Windows we overwite the module ID to expand Windows 8.3 short form
177 // paths. These paths are machine-local and break distribution systems; other
178 // normalization is handled by the DTLTO distributors.
179 if (ArchivePath.empty() && !Input->isFatLTOObject()) {
180#if defined(_WIN32)
181 if (Error E = setIdFromPath(Input->getName()))
182 return std::move(E);
183#endif
184 return Input;
185 }
186
187 // For a member of a thin archive that is not a FatLTO object, there is an
188 // existing file on disk that can be used, so we can avoid having to
189 // serialize.
190 Expected<bool> UseThinMember =
191 Input->isFatLTOObject() ? false : isThinArchive(ArchivePath);
192 if (!UseThinMember)
193 return UseThinMember.takeError();
194 if (*UseThinMember) {
195 // For thin archives, use the path to the actual member file on disk.
196 auto MemberPath =
197 computeThinArchiveMemberPath(ArchivePath, MemberName: Input->getMemberName());
198 if (Error E = setIdFromPath(MemberPath))
199 return std::move(E);
200 return Input;
201 }
202
203 // A new file on disk will be needed for archive members and FatLTO objects.
204 Input->setSerializeForDistribution(true);
205
206 // Get the normalized output directory, if we haven't already.
207 if (LinkerOutputDir.empty()) {
208 auto N = normalizePath(Path: sys::path::parent_path(path: LinkerOutputFile), Saver);
209 if (!N)
210 return N.takeError();
211 LinkerOutputDir = *N;
212 }
213
214 // Create a unique path by including the process ID and sequence number in the
215 // filename.
216 SmallString<256> Id(LinkerOutputDir);
217 sys::path::append(path&: Id,
218 a: Twine(sys::path::filename(path: Input->getName())) + "." +
219 std::to_string(val: InputFiles.size()) /*Sequence number*/ +
220 "." + utohexstr(X: sys::Process::getProcessId()) + ".o");
221 BM.setModuleIdentifier(Saver.save(S: Id.str()));
222 return Input;
223}
224
225// Save the contents of ThinLTO-enabled input files that must be serialized for
226// distribution, such as archive members and FatLTO objects, to individual
227// bitcode files named after the module ID.
228//
229// Must be called after all input files are added but before optimization
230// begins. If a file with that name already exists, it is likely a leftover from
231// a previously terminated linker process and can be safely overwritten.
232llvm::Error lto::DTLTO::serializeInputsForDistribution() {
233 for (auto &Input : InputFiles) {
234 if (!Input->isThinLTO() || !Input->getSerializeForDistribution())
235 continue;
236 // Save the content of the input file to a file named after the module ID.
237 StringRef ModuleId = Input->getName();
238 TimeTraceScope TimeScope("Serialize bitcode input for DTLTO", ModuleId);
239 // Cleanup this file on abnormal process exit.
240 if (!SaveTemps)
241 llvm::sys::RemoveFileOnSignal(Filename: ModuleId);
242 if (Error EC = save(Input: Input.get(), Path: ModuleId))
243 return EC;
244 }
245
246 return Error::success();
247}
248
249// Remove serialized inputs created to enable distribution.
250void lto::DTLTO::cleanup() {
251 if (!SaveTemps) {
252 TimeTraceScope TimeScope("Remove temporary inputs for DTLTO");
253 for (auto &Input : InputFiles) {
254 if (!Input->getSerializeForDistribution())
255 continue;
256 std::error_code EC =
257 sys::fs::remove(path: Input->getName(), /*IgnoreNonExisting=*/true);
258 if (EC &&
259 EC != std::make_error_code(e: std::errc::no_such_file_or_directory))
260 errs() << "warning: could not remove temporary DTLTO input file '"
261 << Input->getName() << "': " << EC.message() << "\n";
262 }
263 }
264 Base::cleanup();
265}
266