1 | //===- ArchiveWriter.cpp - ar File Format implementation --------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the writeArchive function. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/Object/ArchiveWriter.h" |
14 | #include "llvm/ADT/ArrayRef.h" |
15 | #include "llvm/ADT/StringMap.h" |
16 | #include "llvm/ADT/StringRef.h" |
17 | #include "llvm/BinaryFormat/Magic.h" |
18 | #include "llvm/IR/LLVMContext.h" |
19 | #include "llvm/Object/Archive.h" |
20 | #include "llvm/Object/COFF.h" |
21 | #include "llvm/Object/COFFImportFile.h" |
22 | #include "llvm/Object/Error.h" |
23 | #include "llvm/Object/IRObjectFile.h" |
24 | #include "llvm/Object/MachO.h" |
25 | #include "llvm/Object/ObjectFile.h" |
26 | #include "llvm/Object/SymbolicFile.h" |
27 | #include "llvm/Object/XCOFFObjectFile.h" |
28 | #include "llvm/Support/Alignment.h" |
29 | #include "llvm/Support/EndianStream.h" |
30 | #include "llvm/Support/Errc.h" |
31 | #include "llvm/Support/ErrorHandling.h" |
32 | #include "llvm/Support/Format.h" |
33 | #include "llvm/Support/MathExtras.h" |
34 | #include "llvm/Support/Path.h" |
35 | #include "llvm/Support/SmallVectorMemoryBuffer.h" |
36 | #include "llvm/Support/raw_ostream.h" |
37 | |
38 | #include <cerrno> |
39 | #include <map> |
40 | |
41 | #if !defined(_MSC_VER) && !defined(__MINGW32__) |
42 | #include <unistd.h> |
43 | #else |
44 | #include <io.h> |
45 | #endif |
46 | |
47 | using namespace llvm; |
48 | using namespace llvm::object; |
49 | |
50 | struct SymMap { |
51 | bool UseECMap = false; |
52 | std::map<std::string, uint16_t> Map; |
53 | std::map<std::string, uint16_t> ECMap; |
54 | }; |
55 | |
56 | NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) |
57 | : Buf(MemoryBuffer::getMemBuffer(Ref: BufRef, RequiresNullTerminator: false)), |
58 | MemberName(BufRef.getBufferIdentifier()) {} |
59 | |
60 | object::Archive::Kind NewArchiveMember::detectKindFromObject() const { |
61 | auto MemBufferRef = this->Buf->getMemBufferRef(); |
62 | Expected<std::unique_ptr<object::ObjectFile>> OptionalObject = |
63 | object::ObjectFile::createObjectFile(Object: MemBufferRef); |
64 | |
65 | if (OptionalObject) { |
66 | if (isa<object::MachOObjectFile>(Val: **OptionalObject)) |
67 | return object::Archive::K_DARWIN; |
68 | if (isa<object::XCOFFObjectFile>(Val: **OptionalObject)) |
69 | return object::Archive::K_AIXBIG; |
70 | if (isa<object::COFFObjectFile>(Val: **OptionalObject) || |
71 | isa<object::COFFImportFile>(Val: **OptionalObject)) |
72 | return object::Archive::K_COFF; |
73 | return object::Archive::K_GNU; |
74 | } |
75 | |
76 | // Squelch the error in case we had a non-object file. |
77 | consumeError(Err: OptionalObject.takeError()); |
78 | |
79 | // If we're adding a bitcode file to the archive, detect the Archive kind |
80 | // based on the target triple. |
81 | LLVMContext Context; |
82 | if (identify_magic(magic: MemBufferRef.getBuffer()) == file_magic::bitcode) { |
83 | if (auto ObjOrErr = object::SymbolicFile::createSymbolicFile( |
84 | Object: MemBufferRef, Type: file_magic::bitcode, Context: &Context)) { |
85 | auto &IRObject = cast<object::IRObjectFile>(Val&: **ObjOrErr); |
86 | auto TargetTriple = Triple(IRObject.getTargetTriple()); |
87 | return object::Archive::getDefaultKindForTriple(T&: TargetTriple); |
88 | } else { |
89 | // Squelch the error in case this was not a SymbolicFile. |
90 | consumeError(Err: ObjOrErr.takeError()); |
91 | } |
92 | } |
93 | |
94 | return object::Archive::getDefaultKind(); |
95 | } |
96 | |
97 | Expected<NewArchiveMember> |
98 | NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, |
99 | bool Deterministic) { |
100 | Expected<llvm::MemoryBufferRef> BufOrErr = OldMember.getMemoryBufferRef(); |
101 | if (!BufOrErr) |
102 | return BufOrErr.takeError(); |
103 | |
104 | NewArchiveMember M; |
105 | M.Buf = MemoryBuffer::getMemBuffer(Ref: *BufOrErr, RequiresNullTerminator: false); |
106 | M.MemberName = M.Buf->getBufferIdentifier(); |
107 | if (!Deterministic) { |
108 | auto ModTimeOrErr = OldMember.getLastModified(); |
109 | if (!ModTimeOrErr) |
110 | return ModTimeOrErr.takeError(); |
111 | M.ModTime = ModTimeOrErr.get(); |
112 | Expected<unsigned> UIDOrErr = OldMember.getUID(); |
113 | if (!UIDOrErr) |
114 | return UIDOrErr.takeError(); |
115 | M.UID = UIDOrErr.get(); |
116 | Expected<unsigned> GIDOrErr = OldMember.getGID(); |
117 | if (!GIDOrErr) |
118 | return GIDOrErr.takeError(); |
119 | M.GID = GIDOrErr.get(); |
120 | Expected<sys::fs::perms> AccessModeOrErr = OldMember.getAccessMode(); |
121 | if (!AccessModeOrErr) |
122 | return AccessModeOrErr.takeError(); |
123 | M.Perms = AccessModeOrErr.get(); |
124 | } |
125 | return std::move(M); |
126 | } |
127 | |
128 | Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName, |
129 | bool Deterministic) { |
130 | sys::fs::file_status Status; |
131 | auto FDOrErr = sys::fs::openNativeFileForRead(Name: FileName); |
132 | if (!FDOrErr) |
133 | return FDOrErr.takeError(); |
134 | sys::fs::file_t FD = *FDOrErr; |
135 | assert(FD != sys::fs::kInvalidFile); |
136 | |
137 | if (auto EC = sys::fs::status(FD, Result&: Status)) |
138 | return errorCodeToError(EC); |
139 | |
140 | // Opening a directory doesn't make sense. Let it fail. |
141 | // Linux cannot open directories with open(2), although |
142 | // cygwin and *bsd can. |
143 | if (Status.type() == sys::fs::file_type::directory_file) |
144 | return errorCodeToError(EC: make_error_code(E: errc::is_a_directory)); |
145 | |
146 | ErrorOr<std::unique_ptr<MemoryBuffer>> MemberBufferOrErr = |
147 | MemoryBuffer::getOpenFile(FD, Filename: FileName, FileSize: Status.getSize(), RequiresNullTerminator: false); |
148 | if (!MemberBufferOrErr) |
149 | return errorCodeToError(EC: MemberBufferOrErr.getError()); |
150 | |
151 | if (auto EC = sys::fs::closeFile(F&: FD)) |
152 | return errorCodeToError(EC); |
153 | |
154 | NewArchiveMember M; |
155 | M.Buf = std::move(*MemberBufferOrErr); |
156 | M.MemberName = M.Buf->getBufferIdentifier(); |
157 | if (!Deterministic) { |
158 | M.ModTime = std::chrono::time_point_cast<std::chrono::seconds>( |
159 | t: Status.getLastModificationTime()); |
160 | M.UID = Status.getUser(); |
161 | M.GID = Status.getGroup(); |
162 | M.Perms = Status.permissions(); |
163 | } |
164 | return std::move(M); |
165 | } |
166 | |
167 | template <typename T> |
168 | static void printWithSpacePadding(raw_ostream &OS, T Data, unsigned Size) { |
169 | uint64_t OldPos = OS.tell(); |
170 | OS << Data; |
171 | unsigned SizeSoFar = OS.tell() - OldPos; |
172 | assert(SizeSoFar <= Size && "Data doesn't fit in Size" ); |
173 | OS.indent(NumSpaces: Size - SizeSoFar); |
174 | } |
175 | |
176 | static bool isDarwin(object::Archive::Kind Kind) { |
177 | return Kind == object::Archive::K_DARWIN || |
178 | Kind == object::Archive::K_DARWIN64; |
179 | } |
180 | |
181 | static bool isAIXBigArchive(object::Archive::Kind Kind) { |
182 | return Kind == object::Archive::K_AIXBIG; |
183 | } |
184 | |
185 | static bool isCOFFArchive(object::Archive::Kind Kind) { |
186 | return Kind == object::Archive::K_COFF; |
187 | } |
188 | |
189 | static bool isBSDLike(object::Archive::Kind Kind) { |
190 | switch (Kind) { |
191 | case object::Archive::K_GNU: |
192 | case object::Archive::K_GNU64: |
193 | case object::Archive::K_AIXBIG: |
194 | case object::Archive::K_COFF: |
195 | return false; |
196 | case object::Archive::K_BSD: |
197 | case object::Archive::K_DARWIN: |
198 | case object::Archive::K_DARWIN64: |
199 | return true; |
200 | } |
201 | llvm_unreachable("not supported for writting" ); |
202 | } |
203 | |
204 | template <class T> |
205 | static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) { |
206 | support::endian::write(Out, Val, |
207 | isBSDLike(Kind) ? llvm::endianness::little |
208 | : llvm::endianness::big); |
209 | } |
210 | |
211 | template <class T> static void printLE(raw_ostream &Out, T Val) { |
212 | support::endian::write(Out, Val, llvm::endianness::little); |
213 | } |
214 | |
215 | static void ( |
216 | raw_ostream &Out, const sys::TimePoint<std::chrono::seconds> &ModTime, |
217 | unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) { |
218 | printWithSpacePadding(OS&: Out, Data: sys::toTimeT(TP: ModTime), Size: 12); |
219 | |
220 | // The format has only 6 chars for uid and gid. Truncate if the provided |
221 | // values don't fit. |
222 | printWithSpacePadding(OS&: Out, Data: UID % 1000000, Size: 6); |
223 | printWithSpacePadding(OS&: Out, Data: GID % 1000000, Size: 6); |
224 | |
225 | printWithSpacePadding(OS&: Out, Data: format(Fmt: "%o" , Vals: Perms), Size: 8); |
226 | printWithSpacePadding(OS&: Out, Data: Size, Size: 10); |
227 | Out << "`\n" ; |
228 | } |
229 | |
230 | static void |
231 | (raw_ostream &Out, StringRef Name, |
232 | const sys::TimePoint<std::chrono::seconds> &ModTime, |
233 | unsigned UID, unsigned GID, unsigned Perms, |
234 | uint64_t Size) { |
235 | printWithSpacePadding(OS&: Out, Data: Twine(Name) + "/" , Size: 16); |
236 | printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); |
237 | } |
238 | |
239 | static void |
240 | (raw_ostream &Out, uint64_t Pos, StringRef Name, |
241 | const sys::TimePoint<std::chrono::seconds> &ModTime, |
242 | unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) { |
243 | uint64_t = Pos + 60 + Name.size(); |
244 | // Pad so that even 64 bit object files are aligned. |
245 | unsigned Pad = offsetToAlignment(Value: PosAfterHeader, Alignment: Align(8)); |
246 | unsigned NameWithPadding = Name.size() + Pad; |
247 | printWithSpacePadding(OS&: Out, Data: Twine("#1/" ) + Twine(NameWithPadding), Size: 16); |
248 | printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, |
249 | Size: NameWithPadding + Size); |
250 | Out << Name; |
251 | while (Pad--) |
252 | Out.write(C: uint8_t(0)); |
253 | } |
254 | |
255 | static void |
256 | (raw_ostream &Out, StringRef Name, |
257 | const sys::TimePoint<std::chrono::seconds> &ModTime, |
258 | unsigned UID, unsigned GID, unsigned Perms, |
259 | uint64_t Size, uint64_t PrevOffset, |
260 | uint64_t NextOffset) { |
261 | unsigned NameLen = Name.size(); |
262 | |
263 | printWithSpacePadding(OS&: Out, Data: Size, Size: 20); // File member size |
264 | printWithSpacePadding(OS&: Out, Data: NextOffset, Size: 20); // Next member header offset |
265 | printWithSpacePadding(OS&: Out, Data: PrevOffset, Size: 20); // Previous member header offset |
266 | printWithSpacePadding(OS&: Out, Data: sys::toTimeT(TP: ModTime), Size: 12); // File member date |
267 | // The big archive format has 12 chars for uid and gid. |
268 | printWithSpacePadding(OS&: Out, Data: UID % 1000000000000, Size: 12); // UID |
269 | printWithSpacePadding(OS&: Out, Data: GID % 1000000000000, Size: 12); // GID |
270 | printWithSpacePadding(OS&: Out, Data: format(Fmt: "%o" , Vals: Perms), Size: 12); // Permission |
271 | printWithSpacePadding(OS&: Out, Data: NameLen, Size: 4); // Name length |
272 | if (NameLen) { |
273 | printWithSpacePadding(OS&: Out, Data: Name, Size: NameLen); // Name |
274 | if (NameLen % 2) |
275 | Out.write(C: uint8_t(0)); // Null byte padding |
276 | } |
277 | Out << "`\n" ; // Terminator |
278 | } |
279 | |
280 | static bool useStringTable(bool Thin, StringRef Name) { |
281 | return Thin || Name.size() >= 16 || Name.contains(C: '/'); |
282 | } |
283 | |
284 | static bool is64BitKind(object::Archive::Kind Kind) { |
285 | switch (Kind) { |
286 | case object::Archive::K_GNU: |
287 | case object::Archive::K_BSD: |
288 | case object::Archive::K_DARWIN: |
289 | case object::Archive::K_COFF: |
290 | return false; |
291 | case object::Archive::K_AIXBIG: |
292 | case object::Archive::K_DARWIN64: |
293 | case object::Archive::K_GNU64: |
294 | return true; |
295 | } |
296 | llvm_unreachable("not supported for writting" ); |
297 | } |
298 | |
299 | static void |
300 | (raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable, |
301 | StringMap<uint64_t> &MemberNames, object::Archive::Kind Kind, |
302 | bool Thin, const NewArchiveMember &M, |
303 | sys::TimePoint<std::chrono::seconds> ModTime, uint64_t Size) { |
304 | if (isBSDLike(Kind)) |
305 | return printBSDMemberHeader(Out, Pos, Name: M.MemberName, ModTime, UID: M.UID, GID: M.GID, |
306 | Perms: M.Perms, Size); |
307 | if (!useStringTable(Thin, Name: M.MemberName)) |
308 | return printGNUSmallMemberHeader(Out, Name: M.MemberName, ModTime, UID: M.UID, GID: M.GID, |
309 | Perms: M.Perms, Size); |
310 | Out << '/'; |
311 | uint64_t NamePos; |
312 | if (Thin) { |
313 | NamePos = StringTable.tell(); |
314 | StringTable << M.MemberName << "/\n" ; |
315 | } else { |
316 | auto Insertion = MemberNames.insert(KV: {M.MemberName, uint64_t(0)}); |
317 | if (Insertion.second) { |
318 | Insertion.first->second = StringTable.tell(); |
319 | StringTable << M.MemberName; |
320 | if (isCOFFArchive(Kind)) |
321 | StringTable << '\0'; |
322 | else |
323 | StringTable << "/\n" ; |
324 | } |
325 | NamePos = Insertion.first->second; |
326 | } |
327 | printWithSpacePadding(OS&: Out, Data: NamePos, Size: 15); |
328 | printRestOfMemberHeader(Out, ModTime, UID: M.UID, GID: M.GID, Perms: M.Perms, Size); |
329 | } |
330 | |
331 | namespace { |
332 | struct MemberData { |
333 | std::vector<unsigned> Symbols; |
334 | std::string ; |
335 | StringRef Data; |
336 | StringRef Padding; |
337 | uint64_t PreHeadPadSize = 0; |
338 | std::unique_ptr<SymbolicFile> SymFile = nullptr; |
339 | }; |
340 | } // namespace |
341 | |
342 | static MemberData computeStringTable(StringRef Names) { |
343 | unsigned Size = Names.size(); |
344 | unsigned Pad = offsetToAlignment(Value: Size, Alignment: Align(2)); |
345 | std::string ; |
346 | raw_string_ostream Out(Header); |
347 | printWithSpacePadding(OS&: Out, Data: "//" , Size: 48); |
348 | printWithSpacePadding(OS&: Out, Data: Size + Pad, Size: 10); |
349 | Out << "`\n" ; |
350 | Out.flush(); |
351 | return {.Symbols: {}, .Header: std::move(Header), .Data: Names, .Padding: Pad ? "\n" : "" }; |
352 | } |
353 | |
354 | static sys::TimePoint<std::chrono::seconds> now(bool Deterministic) { |
355 | using namespace std::chrono; |
356 | |
357 | if (!Deterministic) |
358 | return time_point_cast<seconds>(t: system_clock::now()); |
359 | return sys::TimePoint<seconds>(); |
360 | } |
361 | |
362 | static bool isArchiveSymbol(const object::BasicSymbolRef &S) { |
363 | Expected<uint32_t> SymFlagsOrErr = S.getFlags(); |
364 | if (!SymFlagsOrErr) |
365 | // TODO: Actually report errors helpfully. |
366 | report_fatal_error(Err: SymFlagsOrErr.takeError()); |
367 | if (*SymFlagsOrErr & object::SymbolRef::SF_FormatSpecific) |
368 | return false; |
369 | if (!(*SymFlagsOrErr & object::SymbolRef::SF_Global)) |
370 | return false; |
371 | if (*SymFlagsOrErr & object::SymbolRef::SF_Undefined) |
372 | return false; |
373 | return true; |
374 | } |
375 | |
376 | static void printNBits(raw_ostream &Out, object::Archive::Kind Kind, |
377 | uint64_t Val) { |
378 | if (is64BitKind(Kind)) |
379 | print<uint64_t>(Out, Kind, Val); |
380 | else |
381 | print<uint32_t>(Out, Kind, Val); |
382 | } |
383 | |
384 | static uint64_t computeSymbolTableSize(object::Archive::Kind Kind, |
385 | uint64_t NumSyms, uint64_t OffsetSize, |
386 | uint64_t StringTableSize, |
387 | uint32_t *Padding = nullptr) { |
388 | assert((OffsetSize == 4 || OffsetSize == 8) && "Unsupported OffsetSize" ); |
389 | uint64_t Size = OffsetSize; // Number of entries |
390 | if (isBSDLike(Kind)) |
391 | Size += NumSyms * OffsetSize * 2; // Table |
392 | else |
393 | Size += NumSyms * OffsetSize; // Table |
394 | if (isBSDLike(Kind)) |
395 | Size += OffsetSize; // byte count |
396 | Size += StringTableSize; |
397 | // ld64 expects the members to be 8-byte aligned for 64-bit content and at |
398 | // least 4-byte aligned for 32-bit content. Opt for the larger encoding |
399 | // uniformly. |
400 | // We do this for all bsd formats because it simplifies aligning members. |
401 | // For the big archive format, the symbol table is the last member, so there |
402 | // is no need to align. |
403 | uint32_t Pad = isAIXBigArchive(Kind) |
404 | ? 0 |
405 | : offsetToAlignment(Value: Size, Alignment: Align(isBSDLike(Kind) ? 8 : 2)); |
406 | |
407 | Size += Pad; |
408 | if (Padding) |
409 | *Padding = Pad; |
410 | return Size; |
411 | } |
412 | |
413 | static uint64_t computeSymbolMapSize(uint64_t NumObj, SymMap &SymMap, |
414 | uint32_t *Padding = nullptr) { |
415 | uint64_t Size = sizeof(uint32_t) * 2; // Number of symbols and objects entries |
416 | Size += NumObj * sizeof(uint32_t); // Offset table |
417 | |
418 | for (auto S : SymMap.Map) |
419 | Size += sizeof(uint16_t) + S.first.length() + 1; |
420 | |
421 | uint32_t Pad = offsetToAlignment(Value: Size, Alignment: Align(2)); |
422 | Size += Pad; |
423 | if (Padding) |
424 | *Padding = Pad; |
425 | return Size; |
426 | } |
427 | |
428 | static uint64_t computeECSymbolsSize(SymMap &SymMap, |
429 | uint32_t *Padding = nullptr) { |
430 | uint64_t Size = sizeof(uint32_t); // Number of symbols |
431 | |
432 | for (auto S : SymMap.ECMap) |
433 | Size += sizeof(uint16_t) + S.first.length() + 1; |
434 | |
435 | uint32_t Pad = offsetToAlignment(Value: Size, Alignment: Align(2)); |
436 | Size += Pad; |
437 | if (Padding) |
438 | *Padding = Pad; |
439 | return Size; |
440 | } |
441 | |
442 | static void (raw_ostream &Out, object::Archive::Kind Kind, |
443 | bool Deterministic, uint64_t Size, |
444 | uint64_t PrevMemberOffset = 0, |
445 | uint64_t NextMemberOffset = 0) { |
446 | if (isBSDLike(Kind)) { |
447 | const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF" ; |
448 | printBSDMemberHeader(Out, Pos: Out.tell(), Name, ModTime: now(Deterministic), UID: 0, GID: 0, Perms: 0, |
449 | Size); |
450 | } else if (isAIXBigArchive(Kind)) { |
451 | printBigArchiveMemberHeader(Out, Name: "" , ModTime: now(Deterministic), UID: 0, GID: 0, Perms: 0, Size, |
452 | PrevOffset: PrevMemberOffset, NextOffset: NextMemberOffset); |
453 | } else { |
454 | const char *Name = is64BitKind(Kind) ? "/SYM64" : "" ; |
455 | printGNUSmallMemberHeader(Out, Name, ModTime: now(Deterministic), UID: 0, GID: 0, Perms: 0, Size); |
456 | } |
457 | } |
458 | |
459 | static uint64_t (object::Archive::Kind Kind, |
460 | uint64_t NumMembers, |
461 | uint64_t StringMemberSize, uint64_t NumSyms, |
462 | uint64_t SymNamesSize, SymMap *SymMap) { |
463 | uint32_t OffsetSize = is64BitKind(Kind) ? 8 : 4; |
464 | uint64_t SymtabSize = |
465 | computeSymbolTableSize(Kind, NumSyms, OffsetSize, StringTableSize: SymNamesSize); |
466 | auto = [=] { |
467 | SmallString<0> TmpBuf; |
468 | raw_svector_ostream Tmp(TmpBuf); |
469 | writeSymbolTableHeader(Out&: Tmp, Kind, Deterministic: true, Size: SymtabSize); |
470 | return TmpBuf.size(); |
471 | }; |
472 | uint32_t = computeSymbolTableHeaderSize(); |
473 | uint64_t Size = strlen(s: "!<arch>\n" ) + HeaderSize + SymtabSize; |
474 | |
475 | if (SymMap) { |
476 | Size += HeaderSize + computeSymbolMapSize(NumObj: NumMembers, SymMap&: *SymMap); |
477 | if (SymMap->ECMap.size()) |
478 | Size += HeaderSize + computeECSymbolsSize(SymMap&: *SymMap); |
479 | } |
480 | |
481 | return Size + StringMemberSize; |
482 | } |
483 | |
484 | static Expected<std::unique_ptr<SymbolicFile>> |
485 | getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context, |
486 | object::Archive::Kind Kind, function_ref<void(Error)> Warn) { |
487 | const file_magic Type = identify_magic(magic: Buf.getBuffer()); |
488 | // Don't attempt to read non-symbolic file types. |
489 | if (!object::SymbolicFile::isSymbolicFile(Type, Context: &Context)) |
490 | return nullptr; |
491 | if (Type == file_magic::bitcode) { |
492 | auto ObjOrErr = object::SymbolicFile::createSymbolicFile( |
493 | Object: Buf, Type: file_magic::bitcode, Context: &Context); |
494 | // An error reading a bitcode file most likely indicates that the file |
495 | // was created by a compiler from the future. Normally we don't try to |
496 | // implement forwards compatibility for bitcode files, but when creating an |
497 | // archive we can implement best-effort forwards compatibility by treating |
498 | // the file as a blob and not creating symbol index entries for it. lld and |
499 | // mold ignore the archive symbol index, so provided that you use one of |
500 | // these linkers, LTO will work as long as lld or the gold plugin is newer |
501 | // than the compiler. We only ignore errors if the archive format is one |
502 | // that is supported by a linker that is known to ignore the index, |
503 | // otherwise there's no chance of this working so we may as well error out. |
504 | // We print a warning on read failure so that users of linkers that rely on |
505 | // the symbol index can diagnose the issue. |
506 | // |
507 | // This is the same behavior as GNU ar when the linker plugin returns an |
508 | // error when reading the input file. If the bitcode file is actually |
509 | // malformed, it will be diagnosed at link time. |
510 | if (!ObjOrErr) { |
511 | switch (Kind) { |
512 | case object::Archive::K_BSD: |
513 | case object::Archive::K_GNU: |
514 | case object::Archive::K_GNU64: |
515 | Warn(ObjOrErr.takeError()); |
516 | return nullptr; |
517 | case object::Archive::K_AIXBIG: |
518 | case object::Archive::K_COFF: |
519 | case object::Archive::K_DARWIN: |
520 | case object::Archive::K_DARWIN64: |
521 | return ObjOrErr.takeError(); |
522 | } |
523 | } |
524 | return std::move(*ObjOrErr); |
525 | } else { |
526 | auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Object: Buf); |
527 | if (!ObjOrErr) |
528 | return ObjOrErr.takeError(); |
529 | return std::move(*ObjOrErr); |
530 | } |
531 | } |
532 | |
533 | static bool is64BitSymbolicFile(const SymbolicFile *SymObj) { |
534 | return SymObj != nullptr ? SymObj->is64Bit() : false; |
535 | } |
536 | |
537 | // Log2 of PAGESIZE(4096) on an AIX system. |
538 | static const uint32_t Log2OfAIXPageSize = 12; |
539 | |
540 | // In the AIX big archive format, since the data content follows the member file |
541 | // name, if the name ends on an odd byte, an extra byte will be added for |
542 | // padding. This ensures that the data within the member file starts at an even |
543 | // byte. |
544 | static const uint32_t MinBigArchiveMemDataAlign = 2; |
545 | |
546 | template <typename AuxiliaryHeader> |
547 | uint16_t getAuxMaxAlignment(uint16_t , AuxiliaryHeader *, |
548 | uint16_t Log2OfMaxAlign) { |
549 | // If the member doesn't have an auxiliary header, it isn't a loadable object |
550 | // and so it just needs aligning at the minimum value. |
551 | if (AuxHeader == nullptr) |
552 | return MinBigArchiveMemDataAlign; |
553 | |
554 | // If the auxiliary header does not have both MaxAlignOfData and |
555 | // MaxAlignOfText field, it is not a loadable shared object file, so align at |
556 | // the minimum value. The 'ModuleType' member is located right after |
557 | // 'MaxAlignOfData' in the AuxiliaryHeader. |
558 | if (AuxHeaderSize < offsetof(AuxiliaryHeader, ModuleType)) |
559 | return MinBigArchiveMemDataAlign; |
560 | |
561 | // If the XCOFF object file does not have a loader section, it is not |
562 | // loadable, so align at the minimum value. |
563 | if (AuxHeader->SecNumOfLoader == 0) |
564 | return MinBigArchiveMemDataAlign; |
565 | |
566 | // The content of the loadable member file needs to be aligned at MAX(maximum |
567 | // alignment of .text, maximum alignment of .data) if there are both fields. |
568 | // If the desired alignment is > PAGESIZE, 32-bit members are aligned on a |
569 | // word boundary, while 64-bit members are aligned on a PAGESIZE(2^12=4096) |
570 | // boundary. |
571 | uint16_t Log2OfAlign = |
572 | std::max(AuxHeader->MaxAlignOfText, AuxHeader->MaxAlignOfData); |
573 | return 1 << (Log2OfAlign > Log2OfAIXPageSize ? Log2OfMaxAlign : Log2OfAlign); |
574 | } |
575 | |
576 | // AIX big archives may contain shared object members. The AIX OS requires these |
577 | // members to be aligned if they are 64-bit and recommends it for 32-bit |
578 | // members. This ensures that when these members are loaded they are aligned in |
579 | // memory. |
580 | static uint32_t getMemberAlignment(SymbolicFile *SymObj) { |
581 | XCOFFObjectFile *XCOFFObj = dyn_cast_or_null<XCOFFObjectFile>(Val: SymObj); |
582 | if (!XCOFFObj) |
583 | return MinBigArchiveMemDataAlign; |
584 | |
585 | // If the desired alignment is > PAGESIZE, 32-bit members are aligned on a |
586 | // word boundary, while 64-bit members are aligned on a PAGESIZE boundary. |
587 | return XCOFFObj->is64Bit() |
588 | ? getAuxMaxAlignment(AuxHeaderSize: XCOFFObj->fileHeader64()->AuxHeaderSize, |
589 | AuxHeader: XCOFFObj->auxiliaryHeader64(), |
590 | Log2OfMaxAlign: Log2OfAIXPageSize) |
591 | : getAuxMaxAlignment(AuxHeaderSize: XCOFFObj->fileHeader32()->AuxHeaderSize, |
592 | AuxHeader: XCOFFObj->auxiliaryHeader32(), Log2OfMaxAlign: 2); |
593 | } |
594 | |
595 | static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, |
596 | bool Deterministic, ArrayRef<MemberData> Members, |
597 | StringRef StringTable, uint64_t MembersOffset, |
598 | unsigned NumSyms, uint64_t PrevMemberOffset = 0, |
599 | uint64_t NextMemberOffset = 0, |
600 | bool Is64Bit = false) { |
601 | // We don't write a symbol table on an archive with no members -- except on |
602 | // Darwin, where the linker will abort unless the archive has a symbol table. |
603 | if (StringTable.empty() && !isDarwin(Kind) && !isCOFFArchive(Kind)) |
604 | return; |
605 | |
606 | uint64_t OffsetSize = is64BitKind(Kind) ? 8 : 4; |
607 | uint32_t Pad; |
608 | uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, |
609 | StringTableSize: StringTable.size(), Padding: &Pad); |
610 | writeSymbolTableHeader(Out, Kind, Deterministic, Size, PrevMemberOffset, |
611 | NextMemberOffset); |
612 | |
613 | if (isBSDLike(Kind)) |
614 | printNBits(Out, Kind, Val: NumSyms * 2 * OffsetSize); |
615 | else |
616 | printNBits(Out, Kind, Val: NumSyms); |
617 | |
618 | uint64_t Pos = MembersOffset; |
619 | for (const MemberData &M : Members) { |
620 | if (isAIXBigArchive(Kind)) { |
621 | Pos += M.PreHeadPadSize; |
622 | if (is64BitSymbolicFile(SymObj: M.SymFile.get()) != Is64Bit) { |
623 | Pos += M.Header.size() + M.Data.size() + M.Padding.size(); |
624 | continue; |
625 | } |
626 | } |
627 | |
628 | for (unsigned StringOffset : M.Symbols) { |
629 | if (isBSDLike(Kind)) |
630 | printNBits(Out, Kind, Val: StringOffset); |
631 | printNBits(Out, Kind, Val: Pos); // member offset |
632 | } |
633 | Pos += M.Header.size() + M.Data.size() + M.Padding.size(); |
634 | } |
635 | |
636 | if (isBSDLike(Kind)) |
637 | // byte count of the string table |
638 | printNBits(Out, Kind, Val: StringTable.size()); |
639 | Out << StringTable; |
640 | |
641 | while (Pad--) |
642 | Out.write(C: uint8_t(0)); |
643 | } |
644 | |
645 | static void writeSymbolMap(raw_ostream &Out, object::Archive::Kind Kind, |
646 | bool Deterministic, ArrayRef<MemberData> Members, |
647 | SymMap &SymMap, uint64_t MembersOffset) { |
648 | uint32_t Pad; |
649 | uint64_t Size = computeSymbolMapSize(NumObj: Members.size(), SymMap, Padding: &Pad); |
650 | writeSymbolTableHeader(Out, Kind, Deterministic, Size, PrevMemberOffset: 0); |
651 | |
652 | uint32_t Pos = MembersOffset; |
653 | |
654 | printLE<uint32_t>(Out, Val: Members.size()); |
655 | for (const MemberData &M : Members) { |
656 | printLE(Out, Val: Pos); // member offset |
657 | Pos += M.Header.size() + M.Data.size() + M.Padding.size(); |
658 | } |
659 | |
660 | printLE<uint32_t>(Out, Val: SymMap.Map.size()); |
661 | |
662 | for (auto S : SymMap.Map) |
663 | printLE(Out, Val: S.second); |
664 | for (auto S : SymMap.Map) |
665 | Out << S.first << '\0'; |
666 | |
667 | while (Pad--) |
668 | Out.write(C: uint8_t(0)); |
669 | } |
670 | |
671 | static void writeECSymbols(raw_ostream &Out, object::Archive::Kind Kind, |
672 | bool Deterministic, ArrayRef<MemberData> Members, |
673 | SymMap &SymMap) { |
674 | uint32_t Pad; |
675 | uint64_t Size = computeECSymbolsSize(SymMap, Padding: &Pad); |
676 | printGNUSmallMemberHeader(Out, Name: "/<ECSYMBOLS>" , ModTime: now(Deterministic), UID: 0, GID: 0, Perms: 0, |
677 | Size); |
678 | |
679 | printLE<uint32_t>(Out, Val: SymMap.ECMap.size()); |
680 | |
681 | for (auto S : SymMap.ECMap) |
682 | printLE(Out, Val: S.second); |
683 | for (auto S : SymMap.ECMap) |
684 | Out << S.first << '\0'; |
685 | while (Pad--) |
686 | Out.write(C: uint8_t(0)); |
687 | } |
688 | |
689 | static bool isECObject(object::SymbolicFile &Obj) { |
690 | if (Obj.isCOFF()) |
691 | return cast<llvm::object::COFFObjectFile>(Val: &Obj)->getMachine() != |
692 | COFF::IMAGE_FILE_MACHINE_ARM64; |
693 | |
694 | if (Obj.isCOFFImportFile()) |
695 | return cast<llvm::object::COFFImportFile>(Val: &Obj)->getMachine() != |
696 | COFF::IMAGE_FILE_MACHINE_ARM64; |
697 | |
698 | if (Obj.isIR()) { |
699 | Expected<std::string> TripleStr = |
700 | getBitcodeTargetTriple(Buffer: Obj.getMemoryBufferRef()); |
701 | if (!TripleStr) |
702 | return false; |
703 | Triple T(*TripleStr); |
704 | return T.isWindowsArm64EC() || T.getArch() == Triple::x86_64; |
705 | } |
706 | |
707 | return false; |
708 | } |
709 | |
710 | static bool isAnyArm64COFF(object::SymbolicFile &Obj) { |
711 | if (Obj.isCOFF()) |
712 | return COFF::isAnyArm64(Machine: cast<COFFObjectFile>(Val: &Obj)->getMachine()); |
713 | |
714 | if (Obj.isCOFFImportFile()) |
715 | return COFF::isAnyArm64(Machine: cast<COFFImportFile>(Val: &Obj)->getMachine()); |
716 | |
717 | if (Obj.isIR()) { |
718 | Expected<std::string> TripleStr = |
719 | getBitcodeTargetTriple(Buffer: Obj.getMemoryBufferRef()); |
720 | if (!TripleStr) |
721 | return false; |
722 | Triple T(*TripleStr); |
723 | return T.isOSWindows() && T.getArch() == Triple::aarch64; |
724 | } |
725 | |
726 | return false; |
727 | } |
728 | |
729 | bool isImportDescriptor(StringRef Name) { |
730 | return Name.starts_with(Prefix: ImportDescriptorPrefix) || |
731 | Name == StringRef{NullImportDescriptorSymbolName} || |
732 | (Name.starts_with(Prefix: NullThunkDataPrefix) && |
733 | Name.ends_with(Suffix: NullThunkDataSuffix)); |
734 | } |
735 | |
736 | static Expected<std::vector<unsigned>> getSymbols(SymbolicFile *Obj, |
737 | uint16_t Index, |
738 | raw_ostream &SymNames, |
739 | SymMap *SymMap) { |
740 | std::vector<unsigned> Ret; |
741 | |
742 | if (Obj == nullptr) |
743 | return Ret; |
744 | |
745 | std::map<std::string, uint16_t> *Map = nullptr; |
746 | if (SymMap) |
747 | Map = SymMap->UseECMap && isECObject(Obj&: *Obj) ? &SymMap->ECMap : &SymMap->Map; |
748 | |
749 | for (const object::BasicSymbolRef &S : Obj->symbols()) { |
750 | if (!isArchiveSymbol(S)) |
751 | continue; |
752 | if (Map) { |
753 | std::string Name; |
754 | raw_string_ostream NameStream(Name); |
755 | if (Error E = S.printName(OS&: NameStream)) |
756 | return std::move(E); |
757 | if (Map->find(x: Name) != Map->end()) |
758 | continue; // ignore duplicated symbol |
759 | (*Map)[Name] = Index; |
760 | if (Map == &SymMap->Map) { |
761 | Ret.push_back(x: SymNames.tell()); |
762 | SymNames << Name << '\0'; |
763 | // If EC is enabled, then the import descriptors are NOT put into EC |
764 | // objects so we need to copy them to the EC map manually. |
765 | if (SymMap->UseECMap && isImportDescriptor(Name)) |
766 | SymMap->ECMap[Name] = Index; |
767 | } |
768 | } else { |
769 | Ret.push_back(x: SymNames.tell()); |
770 | if (Error E = S.printName(OS&: SymNames)) |
771 | return std::move(E); |
772 | SymNames << '\0'; |
773 | } |
774 | } |
775 | return Ret; |
776 | } |
777 | |
778 | static Expected<std::vector<MemberData>> |
779 | computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, |
780 | object::Archive::Kind Kind, bool Thin, bool Deterministic, |
781 | SymtabWritingMode NeedSymbols, SymMap *SymMap, |
782 | LLVMContext &Context, ArrayRef<NewArchiveMember> NewMembers, |
783 | std::optional<bool> IsEC, function_ref<void(Error)> Warn) { |
784 | static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; |
785 | uint64_t MemHeadPadSize = 0; |
786 | uint64_t Pos = |
787 | isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) : 0; |
788 | |
789 | std::vector<MemberData> Ret; |
790 | bool HasObject = false; |
791 | |
792 | // Deduplicate long member names in the string table and reuse earlier name |
793 | // offsets. This especially saves space for COFF Import libraries where all |
794 | // members have the same name. |
795 | StringMap<uint64_t> MemberNames; |
796 | |
797 | // UniqueTimestamps is a special case to improve debugging on Darwin: |
798 | // |
799 | // The Darwin linker does not link debug info into the final |
800 | // binary. Instead, it emits entries of type N_OSO in the output |
801 | // binary's symbol table, containing references to the linked-in |
802 | // object files. Using that reference, the debugger can read the |
803 | // debug data directly from the object files. Alternatively, an |
804 | // invocation of 'dsymutil' will link the debug data from the object |
805 | // files into a dSYM bundle, which can be loaded by the debugger, |
806 | // instead of the object files. |
807 | // |
808 | // For an object file, the N_OSO entries contain the absolute path |
809 | // path to the file, and the file's timestamp. For an object |
810 | // included in an archive, the path is formatted like |
811 | // "/absolute/path/to/archive.a(member.o)", and the timestamp is the |
812 | // archive member's timestamp, rather than the archive's timestamp. |
813 | // |
814 | // However, this doesn't always uniquely identify an object within |
815 | // an archive -- an archive file can have multiple entries with the |
816 | // same filename. (This will happen commonly if the original object |
817 | // files started in different directories.) The only way they get |
818 | // distinguished, then, is via the timestamp. But this process is |
819 | // unable to find the correct object file in the archive when there |
820 | // are two files of the same name and timestamp. |
821 | // |
822 | // Additionally, timestamp==0 is treated specially, and causes the |
823 | // timestamp to be ignored as a match criteria. |
824 | // |
825 | // That will "usually" work out okay when creating an archive not in |
826 | // deterministic timestamp mode, because the objects will probably |
827 | // have been created at different timestamps. |
828 | // |
829 | // To ameliorate this problem, in deterministic archive mode (which |
830 | // is the default), on Darwin we will emit a unique non-zero |
831 | // timestamp for each entry with a duplicated name. This is still |
832 | // deterministic: the only thing affecting that timestamp is the |
833 | // order of the files in the resultant archive. |
834 | // |
835 | // See also the functions that handle the lookup: |
836 | // in lldb: ObjectContainerBSDArchive::Archive::FindObject() |
837 | // in llvm/tools/dsymutil: BinaryHolder::GetArchiveMemberBuffers(). |
838 | bool UniqueTimestamps = Deterministic && isDarwin(Kind); |
839 | std::map<StringRef, unsigned> FilenameCount; |
840 | if (UniqueTimestamps) { |
841 | for (const NewArchiveMember &M : NewMembers) |
842 | FilenameCount[M.MemberName]++; |
843 | for (auto &Entry : FilenameCount) |
844 | Entry.second = Entry.second > 1 ? 1 : 0; |
845 | } |
846 | |
847 | std::vector<std::unique_ptr<SymbolicFile>> SymFiles; |
848 | |
849 | if (NeedSymbols != SymtabWritingMode::NoSymtab || isAIXBigArchive(Kind)) { |
850 | for (const NewArchiveMember &M : NewMembers) { |
851 | Expected<std::unique_ptr<SymbolicFile>> SymFileOrErr = getSymbolicFile( |
852 | Buf: M.Buf->getMemBufferRef(), Context, Kind, Warn: [&](Error Err) { |
853 | Warn(createFileError(F: M.MemberName, E: std::move(Err))); |
854 | }); |
855 | if (!SymFileOrErr) |
856 | return createFileError(F: M.MemberName, E: SymFileOrErr.takeError()); |
857 | SymFiles.push_back(x: std::move(*SymFileOrErr)); |
858 | } |
859 | } |
860 | |
861 | if (SymMap) { |
862 | if (IsEC) { |
863 | SymMap->UseECMap = *IsEC; |
864 | } else { |
865 | // When IsEC is not specified by the caller, use it when we have both |
866 | // any ARM64 object (ARM64 or ARM64EC) and any EC object (ARM64EC or |
867 | // AMD64). This may be a single ARM64EC object, but may also be separate |
868 | // ARM64 and AMD64 objects. |
869 | bool HaveArm64 = false, HaveEC = false; |
870 | for (std::unique_ptr<SymbolicFile> &SymFile : SymFiles) { |
871 | if (!SymFile) |
872 | continue; |
873 | if (!HaveArm64) |
874 | HaveArm64 = isAnyArm64COFF(Obj&: *SymFile); |
875 | if (!HaveEC) |
876 | HaveEC = isECObject(Obj&: *SymFile); |
877 | if (HaveArm64 && HaveEC) { |
878 | SymMap->UseECMap = true; |
879 | break; |
880 | } |
881 | } |
882 | } |
883 | } |
884 | |
885 | // The big archive format needs to know the offset of the previous member |
886 | // header. |
887 | uint64_t PrevOffset = 0; |
888 | uint64_t NextMemHeadPadSize = 0; |
889 | |
890 | for (uint32_t Index = 0; Index < NewMembers.size(); ++Index) { |
891 | const NewArchiveMember *M = &NewMembers[Index]; |
892 | std::string ; |
893 | raw_string_ostream Out(Header); |
894 | |
895 | MemoryBufferRef Buf = M->Buf->getMemBufferRef(); |
896 | StringRef Data = Thin ? "" : Buf.getBuffer(); |
897 | |
898 | // ld64 expects the members to be 8-byte aligned for 64-bit content and at |
899 | // least 4-byte aligned for 32-bit content. Opt for the larger encoding |
900 | // uniformly. This matches the behaviour with cctools and ensures that ld64 |
901 | // is happy with archives that we generate. |
902 | unsigned MemberPadding = |
903 | isDarwin(Kind) ? offsetToAlignment(Value: Data.size(), Alignment: Align(8)) : 0; |
904 | unsigned TailPadding = |
905 | offsetToAlignment(Value: Data.size() + MemberPadding, Alignment: Align(2)); |
906 | StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding); |
907 | |
908 | sys::TimePoint<std::chrono::seconds> ModTime; |
909 | if (UniqueTimestamps) |
910 | // Increment timestamp for each file of a given name. |
911 | ModTime = sys::toTimePoint(T: FilenameCount[M->MemberName]++); |
912 | else |
913 | ModTime = M->ModTime; |
914 | |
915 | uint64_t Size = Buf.getBufferSize() + MemberPadding; |
916 | if (Size > object::Archive::MaxMemberSize) { |
917 | std::string StringMsg = |
918 | "File " + M->MemberName.str() + " exceeds size limit" ; |
919 | return make_error<object::GenericBinaryError>( |
920 | Args: std::move(StringMsg), Args: object::object_error::parse_failed); |
921 | } |
922 | |
923 | std::unique_ptr<SymbolicFile> CurSymFile; |
924 | if (!SymFiles.empty()) |
925 | CurSymFile = std::move(SymFiles[Index]); |
926 | |
927 | // In the big archive file format, we need to calculate and include the next |
928 | // member offset and previous member offset in the file member header. |
929 | if (isAIXBigArchive(Kind)) { |
930 | uint64_t OffsetToMemData = Pos + sizeof(object::BigArMemHdrType) + |
931 | alignTo(Value: M->MemberName.size(), Align: 2); |
932 | |
933 | if (M == NewMembers.begin()) |
934 | NextMemHeadPadSize = |
935 | alignToPowerOf2(Value: OffsetToMemData, |
936 | Align: getMemberAlignment(SymObj: CurSymFile.get())) - |
937 | OffsetToMemData; |
938 | |
939 | MemHeadPadSize = NextMemHeadPadSize; |
940 | Pos += MemHeadPadSize; |
941 | uint64_t NextOffset = Pos + sizeof(object::BigArMemHdrType) + |
942 | alignTo(Value: M->MemberName.size(), Align: 2) + alignTo(Value: Size, Align: 2); |
943 | |
944 | // If there is another member file after this, we need to calculate the |
945 | // padding before the header. |
946 | if (Index + 1 != SymFiles.size()) { |
947 | uint64_t OffsetToNextMemData = |
948 | NextOffset + sizeof(object::BigArMemHdrType) + |
949 | alignTo(Value: NewMembers[Index + 1].MemberName.size(), Align: 2); |
950 | NextMemHeadPadSize = |
951 | alignToPowerOf2(Value: OffsetToNextMemData, |
952 | Align: getMemberAlignment(SymObj: SymFiles[Index + 1].get())) - |
953 | OffsetToNextMemData; |
954 | NextOffset += NextMemHeadPadSize; |
955 | } |
956 | printBigArchiveMemberHeader(Out, Name: M->MemberName, ModTime, UID: M->UID, GID: M->GID, |
957 | Perms: M->Perms, Size, PrevOffset, NextOffset); |
958 | PrevOffset = Pos; |
959 | } else { |
960 | printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M: *M, |
961 | ModTime, Size); |
962 | } |
963 | Out.flush(); |
964 | |
965 | std::vector<unsigned> Symbols; |
966 | if (NeedSymbols != SymtabWritingMode::NoSymtab) { |
967 | Expected<std::vector<unsigned>> SymbolsOrErr = |
968 | getSymbols(Obj: CurSymFile.get(), Index: Index + 1, SymNames, SymMap); |
969 | if (!SymbolsOrErr) |
970 | return createFileError(F: M->MemberName, E: SymbolsOrErr.takeError()); |
971 | Symbols = std::move(*SymbolsOrErr); |
972 | if (CurSymFile) |
973 | HasObject = true; |
974 | } |
975 | |
976 | Pos += Header.size() + Data.size() + Padding.size(); |
977 | Ret.push_back(x: {.Symbols: std::move(Symbols), .Header: std::move(Header), .Data: Data, .Padding: Padding, |
978 | .PreHeadPadSize: MemHeadPadSize, .SymFile: std::move(CurSymFile)}); |
979 | } |
980 | // If there are no symbols, emit an empty symbol table, to satisfy Solaris |
981 | // tools, older versions of which expect a symbol table in a non-empty |
982 | // archive, regardless of whether there are any symbols in it. |
983 | if (HasObject && SymNames.tell() == 0 && !isCOFFArchive(Kind)) |
984 | SymNames << '\0' << '\0' << '\0'; |
985 | return std::move(Ret); |
986 | } |
987 | |
988 | namespace llvm { |
989 | |
990 | static ErrorOr<SmallString<128>> canonicalizePath(StringRef P) { |
991 | SmallString<128> Ret = P; |
992 | std::error_code Err = sys::fs::make_absolute(path&: Ret); |
993 | if (Err) |
994 | return Err; |
995 | sys::path::remove_dots(path&: Ret, /*removedotdot*/ remove_dot_dot: true); |
996 | return Ret; |
997 | } |
998 | |
999 | // Compute the relative path from From to To. |
1000 | Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To) { |
1001 | ErrorOr<SmallString<128>> PathToOrErr = canonicalizePath(P: To); |
1002 | ErrorOr<SmallString<128>> DirFromOrErr = canonicalizePath(P: From); |
1003 | if (!PathToOrErr || !DirFromOrErr) |
1004 | return errorCodeToError(EC: errnoAsErrorCode()); |
1005 | |
1006 | const SmallString<128> &PathTo = *PathToOrErr; |
1007 | const SmallString<128> &DirFrom = sys::path::parent_path(path: *DirFromOrErr); |
1008 | |
1009 | // Can't construct a relative path between different roots |
1010 | if (sys::path::root_name(path: PathTo) != sys::path::root_name(path: DirFrom)) |
1011 | return sys::path::convert_to_slash(path: PathTo); |
1012 | |
1013 | // Skip common prefixes |
1014 | auto FromTo = |
1015 | std::mismatch(sys::path::begin(path: DirFrom), sys::path::end(path: DirFrom), |
1016 | sys::path::begin(path: PathTo)); |
1017 | auto FromI = FromTo.first; |
1018 | auto ToI = FromTo.second; |
1019 | |
1020 | // Construct relative path |
1021 | SmallString<128> Relative; |
1022 | for (auto FromE = sys::path::end(path: DirFrom); FromI != FromE; ++FromI) |
1023 | sys::path::append(path&: Relative, style: sys::path::Style::posix, a: ".." ); |
1024 | |
1025 | for (auto ToE = sys::path::end(path: PathTo); ToI != ToE; ++ToI) |
1026 | sys::path::append(path&: Relative, style: sys::path::Style::posix, a: *ToI); |
1027 | |
1028 | return std::string(Relative); |
1029 | } |
1030 | |
1031 | Error writeArchiveToStream(raw_ostream &Out, |
1032 | ArrayRef<NewArchiveMember> NewMembers, |
1033 | SymtabWritingMode WriteSymtab, |
1034 | object::Archive::Kind Kind, bool Deterministic, |
1035 | bool Thin, std::optional<bool> IsEC, |
1036 | function_ref<void(Error)> Warn) { |
1037 | assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode" ); |
1038 | |
1039 | SmallString<0> SymNamesBuf; |
1040 | raw_svector_ostream SymNames(SymNamesBuf); |
1041 | SmallString<0> StringTableBuf; |
1042 | raw_svector_ostream StringTable(StringTableBuf); |
1043 | SymMap SymMap; |
1044 | bool ShouldWriteSymtab = WriteSymtab != SymtabWritingMode::NoSymtab; |
1045 | |
1046 | // COFF symbol map uses 16-bit indexes, so we can't use it if there are too |
1047 | // many members. COFF format also requires symbol table presence, so use |
1048 | // GNU format when NoSymtab is requested. |
1049 | if (isCOFFArchive(Kind) && (NewMembers.size() > 0xfffe || !ShouldWriteSymtab)) |
1050 | Kind = object::Archive::K_GNU; |
1051 | |
1052 | // In the scenario when LLVMContext is populated SymbolicFile will contain a |
1053 | // reference to it, thus SymbolicFile should be destroyed first. |
1054 | LLVMContext Context; |
1055 | |
1056 | Expected<std::vector<MemberData>> DataOrErr = computeMemberData( |
1057 | StringTable, SymNames, Kind, Thin, Deterministic, NeedSymbols: WriteSymtab, |
1058 | SymMap: isCOFFArchive(Kind) ? &SymMap : nullptr, Context, NewMembers, IsEC, Warn); |
1059 | if (Error E = DataOrErr.takeError()) |
1060 | return E; |
1061 | std::vector<MemberData> &Data = *DataOrErr; |
1062 | |
1063 | uint64_t StringTableSize = 0; |
1064 | MemberData StringTableMember; |
1065 | if (!StringTableBuf.empty() && !isAIXBigArchive(Kind)) { |
1066 | StringTableMember = computeStringTable(Names: StringTableBuf); |
1067 | StringTableSize = StringTableMember.Header.size() + |
1068 | StringTableMember.Data.size() + |
1069 | StringTableMember.Padding.size(); |
1070 | } |
1071 | |
1072 | // We would like to detect if we need to switch to a 64-bit symbol table. |
1073 | uint64_t LastMemberEndOffset = 0; |
1074 | uint64_t = 0; |
1075 | uint64_t NumSyms = 0; |
1076 | uint64_t NumSyms32 = 0; // Store symbol number of 32-bit member files. |
1077 | |
1078 | for (const auto &M : Data) { |
1079 | // Record the start of the member's offset |
1080 | LastMemberEndOffset += M.PreHeadPadSize; |
1081 | LastMemberHeaderOffset = LastMemberEndOffset; |
1082 | // Account for the size of each part associated with the member. |
1083 | LastMemberEndOffset += M.Header.size() + M.Data.size() + M.Padding.size(); |
1084 | NumSyms += M.Symbols.size(); |
1085 | |
1086 | // AIX big archive files may contain two global symbol tables. The |
1087 | // first global symbol table locates 32-bit file members that define global |
1088 | // symbols; the second global symbol table does the same for 64-bit file |
1089 | // members. As a big archive can have both 32-bit and 64-bit file members, |
1090 | // we need to know the number of symbols in each symbol table individually. |
1091 | if (isAIXBigArchive(Kind) && ShouldWriteSymtab) { |
1092 | if (!is64BitSymbolicFile(SymObj: M.SymFile.get())) |
1093 | NumSyms32 += M.Symbols.size(); |
1094 | } |
1095 | } |
1096 | |
1097 | std::optional<uint64_t> ; |
1098 | |
1099 | // The symbol table is put at the end of the big archive file. The symbol |
1100 | // table is at the start of the archive file for other archive formats. |
1101 | if (ShouldWriteSymtab && !is64BitKind(Kind)) { |
1102 | // We assume 32-bit offsets to see if 32-bit symbols are possible or not. |
1103 | HeadersSize = computeHeadersSize(Kind, NumMembers: Data.size(), StringMemberSize: StringTableSize, |
1104 | NumSyms, SymNamesSize: SymNamesBuf.size(), |
1105 | SymMap: isCOFFArchive(Kind) ? &SymMap : nullptr); |
1106 | |
1107 | // The SYM64 format is used when an archive's member offsets are larger than |
1108 | // 32-bits can hold. The need for this shift in format is detected by |
1109 | // writeArchive. To test this we need to generate a file with a member that |
1110 | // has an offset larger than 32-bits but this demands a very slow test. To |
1111 | // speed the test up we use this environment variable to pretend like the |
1112 | // cutoff happens before 32-bits and instead happens at some much smaller |
1113 | // value. |
1114 | uint64_t Sym64Threshold = 1ULL << 32; |
1115 | const char *Sym64Env = std::getenv(name: "SYM64_THRESHOLD" ); |
1116 | if (Sym64Env) |
1117 | StringRef(Sym64Env).getAsInteger(Radix: 10, Result&: Sym64Threshold); |
1118 | |
1119 | // If LastMemberHeaderOffset isn't going to fit in a 32-bit varible we need |
1120 | // to switch to 64-bit. Note that the file can be larger than 4GB as long as |
1121 | // the last member starts before the 4GB offset. |
1122 | if (*HeadersSize + LastMemberHeaderOffset >= Sym64Threshold) { |
1123 | if (Kind == object::Archive::K_DARWIN) |
1124 | Kind = object::Archive::K_DARWIN64; |
1125 | else |
1126 | Kind = object::Archive::K_GNU64; |
1127 | HeadersSize.reset(); |
1128 | } |
1129 | } |
1130 | |
1131 | if (Thin) |
1132 | Out << "!<thin>\n" ; |
1133 | else if (isAIXBigArchive(Kind)) |
1134 | Out << "<bigaf>\n" ; |
1135 | else |
1136 | Out << "!<arch>\n" ; |
1137 | |
1138 | if (!isAIXBigArchive(Kind)) { |
1139 | if (ShouldWriteSymtab) { |
1140 | if (!HeadersSize) |
1141 | HeadersSize = computeHeadersSize( |
1142 | Kind, NumMembers: Data.size(), StringMemberSize: StringTableSize, NumSyms, SymNamesSize: SymNamesBuf.size(), |
1143 | SymMap: isCOFFArchive(Kind) ? &SymMap : nullptr); |
1144 | writeSymbolTable(Out, Kind, Deterministic, Members: Data, StringTable: SymNamesBuf, |
1145 | MembersOffset: *HeadersSize, NumSyms); |
1146 | |
1147 | if (isCOFFArchive(Kind)) |
1148 | writeSymbolMap(Out, Kind, Deterministic, Members: Data, SymMap, MembersOffset: *HeadersSize); |
1149 | } |
1150 | |
1151 | if (StringTableSize) |
1152 | Out << StringTableMember.Header << StringTableMember.Data |
1153 | << StringTableMember.Padding; |
1154 | |
1155 | if (ShouldWriteSymtab && SymMap.ECMap.size()) |
1156 | writeECSymbols(Out, Kind, Deterministic, Members: Data, SymMap); |
1157 | |
1158 | for (const MemberData &M : Data) |
1159 | Out << M.Header << M.Data << M.Padding; |
1160 | } else { |
1161 | HeadersSize = sizeof(object::BigArchive::FixLenHdr); |
1162 | LastMemberEndOffset += *HeadersSize; |
1163 | LastMemberHeaderOffset += *HeadersSize; |
1164 | |
1165 | // For the big archive (AIX) format, compute a table of member names and |
1166 | // offsets, used in the member table. |
1167 | uint64_t MemberTableNameStrTblSize = 0; |
1168 | std::vector<size_t> MemberOffsets; |
1169 | std::vector<StringRef> MemberNames; |
1170 | // Loop across object to find offset and names. |
1171 | uint64_t MemberEndOffset = sizeof(object::BigArchive::FixLenHdr); |
1172 | for (size_t I = 0, Size = NewMembers.size(); I != Size; ++I) { |
1173 | const NewArchiveMember &Member = NewMembers[I]; |
1174 | MemberTableNameStrTblSize += Member.MemberName.size() + 1; |
1175 | MemberEndOffset += Data[I].PreHeadPadSize; |
1176 | MemberOffsets.push_back(x: MemberEndOffset); |
1177 | MemberNames.push_back(x: Member.MemberName); |
1178 | // File member name ended with "`\n". The length is included in |
1179 | // BigArMemHdrType. |
1180 | MemberEndOffset += sizeof(object::BigArMemHdrType) + |
1181 | alignTo(Value: Data[I].Data.size(), Align: 2) + |
1182 | alignTo(Value: Member.MemberName.size(), Align: 2); |
1183 | } |
1184 | |
1185 | // AIX member table size. |
1186 | uint64_t MemberTableSize = 20 + // Number of members field |
1187 | 20 * MemberOffsets.size() + |
1188 | MemberTableNameStrTblSize; |
1189 | |
1190 | SmallString<0> SymNamesBuf32; |
1191 | SmallString<0> SymNamesBuf64; |
1192 | raw_svector_ostream SymNames32(SymNamesBuf32); |
1193 | raw_svector_ostream SymNames64(SymNamesBuf64); |
1194 | |
1195 | if (ShouldWriteSymtab && NumSyms) |
1196 | // Generate the symbol names for the members. |
1197 | for (const auto &M : Data) { |
1198 | Expected<std::vector<unsigned>> SymbolsOrErr = getSymbols( |
1199 | Obj: M.SymFile.get(), Index: 0, |
1200 | SymNames&: is64BitSymbolicFile(SymObj: M.SymFile.get()) ? SymNames64 : SymNames32, |
1201 | SymMap: nullptr); |
1202 | if (!SymbolsOrErr) |
1203 | return SymbolsOrErr.takeError(); |
1204 | } |
1205 | |
1206 | uint64_t MemberTableEndOffset = |
1207 | LastMemberEndOffset + |
1208 | alignTo(Value: sizeof(object::BigArMemHdrType) + MemberTableSize, Align: 2); |
1209 | |
1210 | // In AIX OS, The 'GlobSymOffset' field in the fixed-length header contains |
1211 | // the offset to the 32-bit global symbol table, and the 'GlobSym64Offset' |
1212 | // contains the offset to the 64-bit global symbol table. |
1213 | uint64_t GlobalSymbolOffset = |
1214 | (ShouldWriteSymtab && |
1215 | (WriteSymtab != SymtabWritingMode::BigArchive64) && NumSyms32 > 0) |
1216 | ? MemberTableEndOffset |
1217 | : 0; |
1218 | |
1219 | uint64_t GlobalSymbolOffset64 = 0; |
1220 | uint64_t NumSyms64 = NumSyms - NumSyms32; |
1221 | if (ShouldWriteSymtab && (WriteSymtab != SymtabWritingMode::BigArchive32) && |
1222 | NumSyms64 > 0) { |
1223 | if (GlobalSymbolOffset == 0) |
1224 | GlobalSymbolOffset64 = MemberTableEndOffset; |
1225 | else |
1226 | // If there is a global symbol table for 32-bit members, |
1227 | // the 64-bit global symbol table is after the 32-bit one. |
1228 | GlobalSymbolOffset64 = |
1229 | GlobalSymbolOffset + sizeof(object::BigArMemHdrType) + |
1230 | (NumSyms32 + 1) * 8 + alignTo(Value: SymNamesBuf32.size(), Align: 2); |
1231 | } |
1232 | |
1233 | // Fixed Sized Header. |
1234 | printWithSpacePadding(OS&: Out, Data: NewMembers.size() ? LastMemberEndOffset : 0, |
1235 | Size: 20); // Offset to member table |
1236 | // If there are no file members in the archive, there will be no global |
1237 | // symbol table. |
1238 | printWithSpacePadding(OS&: Out, Data: GlobalSymbolOffset, Size: 20); |
1239 | printWithSpacePadding(OS&: Out, Data: GlobalSymbolOffset64, Size: 20); |
1240 | printWithSpacePadding(OS&: Out, |
1241 | Data: NewMembers.size() |
1242 | ? sizeof(object::BigArchive::FixLenHdr) + |
1243 | Data[0].PreHeadPadSize |
1244 | : 0, |
1245 | Size: 20); // Offset to first archive member |
1246 | printWithSpacePadding(OS&: Out, Data: NewMembers.size() ? LastMemberHeaderOffset : 0, |
1247 | Size: 20); // Offset to last archive member |
1248 | printWithSpacePadding( |
1249 | OS&: Out, Data: 0, |
1250 | Size: 20); // Offset to first member of free list - Not supported yet |
1251 | |
1252 | for (const MemberData &M : Data) { |
1253 | Out << std::string(M.PreHeadPadSize, '\0'); |
1254 | Out << M.Header << M.Data; |
1255 | if (M.Data.size() % 2) |
1256 | Out << '\0'; |
1257 | } |
1258 | |
1259 | if (NewMembers.size()) { |
1260 | // Member table. |
1261 | printBigArchiveMemberHeader(Out, Name: "" , ModTime: sys::toTimePoint(T: 0), UID: 0, GID: 0, Perms: 0, |
1262 | Size: MemberTableSize, PrevOffset: LastMemberHeaderOffset, |
1263 | NextOffset: GlobalSymbolOffset ? GlobalSymbolOffset |
1264 | : GlobalSymbolOffset64); |
1265 | printWithSpacePadding(OS&: Out, Data: MemberOffsets.size(), Size: 20); // Number of members |
1266 | for (uint64_t MemberOffset : MemberOffsets) |
1267 | printWithSpacePadding(OS&: Out, Data: MemberOffset, |
1268 | Size: 20); // Offset to member file header. |
1269 | for (StringRef MemberName : MemberNames) |
1270 | Out << MemberName << '\0'; // Member file name, null byte padding. |
1271 | |
1272 | if (MemberTableNameStrTblSize % 2) |
1273 | Out << '\0'; // Name table must be tail padded to an even number of |
1274 | // bytes. |
1275 | |
1276 | if (ShouldWriteSymtab) { |
1277 | // Write global symbol table for 32-bit file members. |
1278 | if (GlobalSymbolOffset) { |
1279 | writeSymbolTable(Out, Kind, Deterministic, Members: Data, StringTable: SymNamesBuf32, |
1280 | MembersOffset: *HeadersSize, NumSyms: NumSyms32, PrevMemberOffset: LastMemberEndOffset, |
1281 | NextMemberOffset: GlobalSymbolOffset64); |
1282 | // Add padding between the symbol tables, if needed. |
1283 | if (GlobalSymbolOffset64 && (SymNamesBuf32.size() % 2)) |
1284 | Out << '\0'; |
1285 | } |
1286 | |
1287 | // Write global symbol table for 64-bit file members. |
1288 | if (GlobalSymbolOffset64) |
1289 | writeSymbolTable(Out, Kind, Deterministic, Members: Data, StringTable: SymNamesBuf64, |
1290 | MembersOffset: *HeadersSize, NumSyms: NumSyms64, |
1291 | PrevMemberOffset: GlobalSymbolOffset ? GlobalSymbolOffset |
1292 | : LastMemberEndOffset, |
1293 | NextMemberOffset: 0, Is64Bit: true); |
1294 | } |
1295 | } |
1296 | } |
1297 | Out.flush(); |
1298 | return Error::success(); |
1299 | } |
1300 | |
1301 | void warnToStderr(Error Err) { |
1302 | llvm::logAllUnhandledErrors(E: std::move(Err), OS&: llvm::errs(), ErrorBanner: "warning: " ); |
1303 | } |
1304 | |
1305 | Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers, |
1306 | SymtabWritingMode WriteSymtab, object::Archive::Kind Kind, |
1307 | bool Deterministic, bool Thin, |
1308 | std::unique_ptr<MemoryBuffer> OldArchiveBuf, |
1309 | std::optional<bool> IsEC, function_ref<void(Error)> Warn) { |
1310 | Expected<sys::fs::TempFile> Temp = |
1311 | sys::fs::TempFile::create(Model: ArcName + ".temp-archive-%%%%%%%.a" ); |
1312 | if (!Temp) |
1313 | return Temp.takeError(); |
1314 | raw_fd_ostream Out(Temp->FD, false); |
1315 | |
1316 | if (Error E = writeArchiveToStream(Out, NewMembers, WriteSymtab, Kind, |
1317 | Deterministic, Thin, IsEC, Warn)) { |
1318 | if (Error DiscardError = Temp->discard()) |
1319 | return joinErrors(E1: std::move(E), E2: std::move(DiscardError)); |
1320 | return E; |
1321 | } |
1322 | |
1323 | // At this point, we no longer need whatever backing memory |
1324 | // was used to generate the NewMembers. On Windows, this buffer |
1325 | // could be a mapped view of the file we want to replace (if |
1326 | // we're updating an existing archive, say). In that case, the |
1327 | // rename would still succeed, but it would leave behind a |
1328 | // temporary file (actually the original file renamed) because |
1329 | // a file cannot be deleted while there's a handle open on it, |
1330 | // only renamed. So by freeing this buffer, this ensures that |
1331 | // the last open handle on the destination file, if any, is |
1332 | // closed before we attempt to rename. |
1333 | OldArchiveBuf.reset(); |
1334 | |
1335 | return Temp->keep(Name: ArcName); |
1336 | } |
1337 | |
1338 | Expected<std::unique_ptr<MemoryBuffer>> |
1339 | writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers, |
1340 | SymtabWritingMode WriteSymtab, object::Archive::Kind Kind, |
1341 | bool Deterministic, bool Thin, |
1342 | function_ref<void(Error)> Warn) { |
1343 | SmallVector<char, 0> ArchiveBufferVector; |
1344 | raw_svector_ostream ArchiveStream(ArchiveBufferVector); |
1345 | |
1346 | if (Error E = |
1347 | writeArchiveToStream(Out&: ArchiveStream, NewMembers, WriteSymtab, Kind, |
1348 | Deterministic, Thin, IsEC: std::nullopt, Warn)) |
1349 | return std::move(E); |
1350 | |
1351 | return std::make_unique<SmallVectorMemoryBuffer>( |
1352 | args: std::move(ArchiveBufferVector), /*RequiresNullTerminator=*/args: false); |
1353 | } |
1354 | |
1355 | } // namespace llvm |
1356 | |