1//===- Dtlto.cpp - Distributed ThinLTO implementation --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// This file implements support functions for Distributed ThinLTO, focusing on
11// archive file handling.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/DTLTO/DTLTO.h"
16
17#include "llvm/ADT/SmallString.h"
18#include "llvm/ADT/StringExtras.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/BinaryFormat/Magic.h"
21#include "llvm/LTO/LTO.h"
22#include "llvm/Object/Archive.h"
23#include "llvm/Support/FileSystem.h"
24#include "llvm/Support/MemoryBufferRef.h"
25#include "llvm/Support/Path.h"
26#include "llvm/Support/Process.h"
27#include "llvm/Support/Signals.h"
28#include "llvm/Support/TimeProfiler.h"
29#include "llvm/Support/raw_ostream.h"
30
31#include <string>
32
33using namespace llvm;
34
35namespace {
36
37// Writes the content of a memory buffer into a file.
38llvm::Error saveBuffer(StringRef FileBuffer, StringRef FilePath) {
39 std::error_code EC;
40 raw_fd_ostream OS(FilePath.str(), EC, sys::fs::OpenFlags::OF_None);
41 if (EC) {
42 return createStringError(EC: inconvertibleErrorCode(),
43 Fmt: "Failed to create file %s: %s", Vals: FilePath.data(),
44 Vals: EC.message().c_str());
45 }
46 OS.write(Ptr: FileBuffer.data(), Size: FileBuffer.size());
47 if (OS.has_error()) {
48 return createStringError(EC: inconvertibleErrorCode(),
49 Fmt: "Failed writing to file %s", Vals: FilePath.data());
50 }
51 return Error::success();
52}
53
54// Compute the file path for a thin archive member.
55//
56// For thin archives, an archive member name is typically a file path relative
57// to the archive file's directory. This function resolves that path.
58SmallString<64> computeThinArchiveMemberPath(const StringRef ArchivePath,
59 const StringRef MemberName) {
60 assert(!ArchivePath.empty() && "An archive file path must be non empty.");
61 SmallString<64> MemberPath;
62 if (sys::path::is_relative(path: MemberName)) {
63 MemberPath = sys::path::parent_path(path: ArchivePath);
64 sys::path::append(path&: MemberPath, a: MemberName);
65 } else
66 MemberPath = MemberName;
67 sys::path::remove_dots(path&: MemberPath, /*remove_dot_dot=*/true);
68 return MemberPath;
69}
70
71} // namespace
72
73// Determines if a file at the given path is a thin archive file.
74//
75// This function uses a cache to avoid repeatedly reading the same file.
76// It reads only the header portion (magic bytes) of the file to identify
77// the archive type.
78Expected<bool> lto::DTLTO::isThinArchive(const StringRef ArchivePath) {
79 // Return cached result if available.
80 auto Cached = ArchiveFiles.find(Key: ArchivePath);
81 if (Cached != ArchiveFiles.end())
82 return Cached->second;
83
84 uint64_t FileSize = -1;
85 bool IsThin = false;
86 std::error_code EC = sys::fs::file_size(Path: ArchivePath, Result&: FileSize);
87 if (EC)
88 return createStringError(EC: inconvertibleErrorCode(),
89 Fmt: "Failed to get file size from archive %s: %s",
90 Vals: ArchivePath.data(), Vals: EC.message().c_str());
91 if (FileSize < sizeof(object::ThinArchiveMagic))
92 return createStringError(EC: inconvertibleErrorCode(),
93 Fmt: "Archive file size is too small %s",
94 Vals: ArchivePath.data());
95
96 // Read only the first few bytes containing the magic signature.
97 ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufferOrError =
98 MemoryBuffer::getFileSlice(Filename: ArchivePath, MapSize: sizeof(object::ThinArchiveMagic),
99 Offset: 0);
100
101 if ((EC = MemBufferOrError.getError()))
102 return createStringError(EC: inconvertibleErrorCode(),
103 Fmt: "Failed to read from archive %s: %s",
104 Vals: ArchivePath.data(), Vals: EC.message().c_str());
105
106 StringRef MemBuf = (*MemBufferOrError.get()).getBuffer();
107 if (file_magic::archive != identify_magic(magic: MemBuf))
108 return createStringError(EC: inconvertibleErrorCode(),
109 Fmt: "Unknown format for archive %s",
110 Vals: ArchivePath.data());
111
112 IsThin = MemBuf.starts_with(Prefix: object::ThinArchiveMagic);
113
114 // Cache the result
115 ArchiveFiles[ArchivePath] = IsThin;
116 return IsThin;
117}
118
119// This function performs the following tasks:
120// 1. Adds the input file to the LTO object's list of input files.
121// 2. For thin archive members, generates a new module ID which is a path to a
122// thin archive member file.
123// 3. For regular archive members, generates a new unique module ID.
124// 4. Updates the bitcode module's identifier.
125Expected<std::shared_ptr<lto::InputFile>>
126lto::DTLTO::addInput(std::unique_ptr<lto::InputFile> InputPtr) {
127 TimeTraceScope TimeScope("Add input for DTLTO");
128
129 // Add the input file to the LTO object.
130 InputFiles.emplace_back(args: InputPtr.release());
131 std::shared_ptr<lto::InputFile> &Input = InputFiles.back();
132
133 StringRef ModuleId = Input->getName();
134 StringRef ArchivePath = Input->getArchivePath();
135
136 // In most cases, the module ID already points to an individual bitcode file
137 // on disk, so no further preparation for distribution is required.
138 if (ArchivePath.empty() && !Input->isFatLTOObject())
139 return Input;
140
141 SmallString<64> NewModuleId;
142 BitcodeModule &BM = Input->getPrimaryBitcodeModule();
143
144 // For a member of a thin archive that is not a FatLTO object, there is an
145 // existing file on disk that can be used, so we can avoid having to
146 // materialize.
147 Expected<bool> UseThinMember =
148 Input->isFatLTOObject() ? false : isThinArchive(ArchivePath);
149 if (!UseThinMember)
150 return UseThinMember.takeError();
151
152 if (*UseThinMember) {
153 // For thin archives, use the path to the actual file.
154 NewModuleId =
155 computeThinArchiveMemberPath(ArchivePath, MemberName: Input->getMemberName());
156 } else {
157 // For regular archives and FatLTO objects, generate a unique name.
158 Input->setSerializeForDistribution(true);
159
160 // Create unique identifier using process ID and sequence number.
161 std::string PID = utohexstr(X: sys::Process::getProcessId());
162 std::string Seq = std::to_string(val: InputFiles.size());
163
164 NewModuleId = sys::path::parent_path(path: LinkerOutputFile);
165 sys::path::append(path&: NewModuleId, a: sys::path::filename(path: ModuleId) + "." + Seq +
166 "." + PID + ".o");
167 }
168
169 // Update the module identifier and save it.
170 BM.setModuleIdentifier(Saver.save(S: NewModuleId.str()));
171
172 return Input;
173}
174
175// Write the archive member content to a file named after the module ID.
176// If a file with that name already exists, it's likely a leftover from a
177// previously terminated linker process and can be safely overwritten.
178Error lto::DTLTO::saveInputArchiveMember(lto::InputFile *Input) {
179 StringRef ModuleId = Input->getName();
180 if (Input->getSerializeForDistribution()) {
181 TimeTraceScope TimeScope("Serialize bitcode input for DTLTO", ModuleId);
182 // Cleanup this file on abnormal process exit.
183 if (!SaveTemps)
184 llvm::sys::RemoveFileOnSignal(Filename: ModuleId);
185 MemoryBufferRef MemoryBufferRef = Input->getFileBuffer();
186 if (Error EC = saveBuffer(FileBuffer: MemoryBufferRef.getBuffer(), FilePath: ModuleId))
187 return EC;
188 }
189 return Error::success();
190}
191
192// Iterates through all ThinLTO-enabled input files and saves their content
193// to separate files if they are regular archive members.
194Error lto::DTLTO::saveInputArchiveMembers() {
195 for (auto &Input : InputFiles) {
196 if (!Input->isThinLTO())
197 continue;
198 if (Error EC = saveInputArchiveMember(Input: Input.get()))
199 return EC;
200 }
201 return Error::success();
202}
203
204// Entry point for DTLTO archives support.
205//
206// Sets up the temporary file remover and processes archive members.
207// Must be called after all inputs are added but before optimization begins.
208llvm::Error lto::DTLTO::handleArchiveInputs() {
209
210 // Process and save archive members to separate files if needed.
211 if (Error EC = saveInputArchiveMembers())
212 return EC;
213 return Error::success();
214}
215
216// Remove temporary archive member files created to enable distribution.
217void lto::DTLTO::cleanup() {
218 if (!SaveTemps) {
219 TimeTraceScope TimeScope("Remove temporary inputs for DTLTO");
220 for (auto &Input : InputFiles) {
221 if (!Input->getSerializeForDistribution())
222 continue;
223 std::error_code EC =
224 sys::fs::remove(path: Input->getName(), /*IgnoreNonExisting=*/true);
225 if (EC &&
226 EC != std::make_error_code(e: std::errc::no_such_file_or_directory))
227 errs() << "warning: could not remove temporary DTLTO input file '"
228 << Input->getName() << "': " << EC.message() << "\n";
229 }
230 }
231 Base::cleanup();
232}
233