1//===- DTLTOInputFiles.cpp - Integrated Distributed ThinLTO implementation ===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// This file implements support functions for Integrated Distributed ThinLTO,
11// focusing on preparing input files for distribution.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/DTLTO/DTLTO.h"
16
17#include "llvm/ADT/SmallString.h"
18#include "llvm/ADT/StringExtras.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/BinaryFormat/Magic.h"
21#include "llvm/LTO/LTO.h"
22#include "llvm/Object/Archive.h"
23#include "llvm/Support/FileSystem.h"
24#include "llvm/Support/MemoryBufferRef.h"
25#include "llvm/Support/Path.h"
26#include "llvm/Support/Process.h"
27#include "llvm/Support/TimeProfiler.h"
28#include "llvm/Support/raw_ostream.h"
29#ifdef _WIN32
30#include "llvm/Support/Windows/WindowsSupport.h"
31#endif
32
33#include <string>
34
35using namespace llvm;
36
37// Saves the content of Buffer to Path overwriting any existing file.
38Error lto::DTLTO::save(StringRef Buffer, StringRef Path) {
39 std::error_code EC;
40 raw_fd_ostream OS(Path.str(), EC, sys::fs::OpenFlags::OF_None);
41 if (EC)
42 return createStringError(EC: inconvertibleErrorCode(),
43 Fmt: "Failed to create file %s: %s", Vals: Path.data(),
44 Vals: EC.message().c_str());
45 OS.write(Ptr: Buffer.data(), Size: Buffer.size());
46 if (OS.has_error())
47 return createStringError(EC: inconvertibleErrorCode(),
48 Fmt: "Failed writing to file %s", Vals: Path.data());
49 return Error::success();
50}
51
52namespace {
53// Normalize and save a path. Aside from expanding Windows 8.3 short paths,
54// no other normalization is currently required here. These paths are
55// machine-local and break distribution systems; other normalization is
56// handled by the DTLTO distributors.
57Expected<StringRef> normalizePath(StringRef Path, StringSaver &Saver) {
58#if defined(_WIN32)
59 if (Path.empty())
60 return Path;
61 SmallString<256> Expanded;
62 if (std::error_code EC = sys::windows::makeLongFormPath(Path, Expanded))
63 return createStringError(inconvertibleErrorCode(),
64 "Normalization failed for path %s: %s",
65 Path.str().c_str(), EC.message().c_str());
66 return Saver.save(Expanded.str());
67#else
68 return Saver.save(S: Path);
69#endif
70}
71
72// Compute the file path for a thin archive member.
73//
74// For thin archives, an archive member name is typically a file path relative
75// to the archive file's directory. This function resolves that path.
76SmallString<256> computeThinArchiveMemberPath(StringRef ArchivePath,
77 StringRef MemberName) {
78 assert(!ArchivePath.empty() && "An archive file path must be non empty.");
79 SmallString<256> MemberPath;
80 if (sys::path::is_relative(path: MemberName)) {
81 MemberPath = sys::path::parent_path(path: ArchivePath);
82 sys::path::append(path&: MemberPath, a: MemberName);
83 } else {
84 MemberPath = MemberName;
85 }
86 sys::path::remove_dots(path&: MemberPath, /*remove_dot_dot=*/true);
87 return MemberPath;
88}
89
90} // namespace
91
92// Determines if a file at the given path is a thin archive file.
93Expected<bool> lto::DTLTO::isThinArchive(const StringRef ArchivePath) {
94 // Return cached result if available.
95 auto Cached = ArchiveIsThinCache.find(Key: ArchivePath);
96 if (Cached != ArchiveIsThinCache.end())
97 return Cached->second;
98
99 uint64_t FileSize = -1;
100 std::error_code EC = sys::fs::file_size(Path: ArchivePath, Result&: FileSize);
101 if (EC)
102 return createStringError(EC: inconvertibleErrorCode(),
103 Fmt: "Failed to get file size from archive %s: %s",
104 Vals: ArchivePath.data(), Vals: EC.message().c_str());
105 if (FileSize < sizeof(object::ThinArchiveMagic))
106 return createStringError(EC: inconvertibleErrorCode(),
107 Fmt: "Archive file size is too small %s",
108 Vals: ArchivePath.data());
109
110 // Read only the first few bytes containing the magic signature.
111 ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getFileSlice(
112 Filename: ArchivePath, MapSize: sizeof(object::ThinArchiveMagic), Offset: 0);
113 if ((EC = MBOrErr.getError()))
114 return createStringError(EC: inconvertibleErrorCode(),
115 Fmt: "Failed to read from archive %s: %s",
116 Vals: ArchivePath.data(), Vals: EC.message().c_str());
117
118 StringRef Buf = (*MBOrErr)->getBuffer();
119 if (file_magic::archive != identify_magic(magic: Buf))
120 return createStringError(EC: inconvertibleErrorCode(),
121 Fmt: "Unknown format for archive %s",
122 Vals: ArchivePath.data());
123
124 bool IsThin = Buf.starts_with(Prefix: object::ThinArchiveMagic);
125
126 // Cache the result.
127 ArchiveIsThinCache[ArchivePath] = IsThin;
128
129 return IsThin;
130}
131
132// Add an input file and prepare it for distribution.
133Expected<std::shared_ptr<lto::InputFile>>
134lto::DTLTO::addInput(std::unique_ptr<InputFile> InputPtr) {
135 TimeTraceScope TimeScope("Add input for DTLTO");
136
137 // Add the input file to the LTO object.
138 InputFiles.emplace_back(args: InputPtr.release());
139 auto &Input = InputFiles.back();
140 BitcodeModule &BM = Input->getPrimaryBitcodeModule();
141
142 auto setIdFromPath = [&](StringRef Path) -> Error {
143 auto N = normalizePath(Path, Saver);
144 if (!N)
145 return N.takeError();
146 BM.setModuleIdentifier(*N);
147 return Error::success();
148 };
149
150 StringRef ArchivePath = Input->getArchivePath();
151
152 // In most cases, the module ID already points to an individual bitcode file
153 // on disk, so no further preparation for distribution is required. However,
154 // on Windows we overwite the module ID to expand Windows 8.3 short form
155 // paths. These paths are machine-local and break distribution systems; other
156 // normalization is handled by the DTLTO distributors.
157 if (ArchivePath.empty() && !Input->isFatLTOObject()) {
158#if defined(_WIN32)
159 if (Error E = setIdFromPath(Input->getName()))
160 return std::move(E);
161#endif
162 return Input;
163 }
164
165 // For a member of a thin archive that is not a FatLTO object, there is an
166 // existing file on disk that can be used, so we can avoid having to
167 // serialize.
168 Expected<bool> UseThinMember =
169 Input->isFatLTOObject() ? false : isThinArchive(ArchivePath);
170 if (!UseThinMember)
171 return UseThinMember.takeError();
172 if (*UseThinMember) {
173 // For thin archives, use the path to the actual member file on disk.
174 auto MemberPath =
175 computeThinArchiveMemberPath(ArchivePath, MemberName: Input->getMemberName());
176 if (Error E = setIdFromPath(MemberPath))
177 return std::move(E);
178 return Input;
179 }
180
181 // A new file on disk will be needed for archive members and FatLTO objects.
182 Input->setSerializeForDistribution(true);
183
184 // Get the normalized output directory, if we haven't already.
185 if (LinkerOutputDir.empty()) {
186 auto N = normalizePath(
187 Path: sys::path::parent_path(path: DistributorParams.LinkerOutputFile), Saver);
188 if (!N)
189 return N.takeError();
190 LinkerOutputDir = *N;
191 }
192
193 // Create a unique path by including the process ID and sequence number in the
194 // filename.
195 SmallString<256> Id(LinkerOutputDir);
196 sys::path::append(path&: Id,
197 a: Twine(sys::path::filename(path: Input->getName())) + "." +
198 std::to_string(val: InputFiles.size()) /*Sequence number*/ +
199 "." + utohexstr(X: sys::Process::getProcessId()) + ".o");
200 BM.setModuleIdentifier(Saver.save(S: Id.str()));
201 return Input;
202}
203
204// Save the contents of ThinLTO-enabled input files that must be serialized for
205// distribution.
206Error lto::DTLTO::serializeLTOInputs() {
207 for (auto &Input : InputFiles) {
208 if (!Input->isThinLTO() || !Input->getSerializeForDistribution())
209 continue;
210
211 // Save the content of the input file to a file named after the module ID.
212 StringRef ModuleID = Input->getName();
213 if (!InputModuleIDsToSerialize.contains(V: ModuleID))
214 continue;
215 TimeTraceScope TimeScope("Serialize bitcode input for DTLTO", ModuleID);
216 MemoryBufferRef Buf = Input->getFileBuffer();
217 if (Error Err = save(Buffer: Buf.getBuffer(), Path: ModuleID))
218 return Err;
219 // Cleanup this file on abnormal process exit.
220 if (!SaveTemps)
221 addToCleanup(Filename: ModuleID);
222 }
223 return Error::success();
224}
225