1//===- Archive.h - ar archive file format -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the ar archive file format class.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_OBJECT_ARCHIVE_H
14#define LLVM_OBJECT_ARCHIVE_H
15
16#include "llvm/ADT/StringRef.h"
17#include "llvm/ADT/fallible_iterator.h"
18#include "llvm/ADT/iterator_range.h"
19#include "llvm/Object/Binary.h"
20#include "llvm/Support/Chrono.h"
21#include "llvm/Support/Compiler.h"
22#include "llvm/Support/Error.h"
23#include "llvm/Support/FileSystem.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include <cassert>
26#include <cstdint>
27#include <memory>
28#include <string>
29#include <vector>
30
31namespace llvm {
32namespace object {
33
34const char ArchiveMagic[] = "!<arch>\n";
35const char ThinArchiveMagic[] = "!<thin>\n";
36const char BigArchiveMagic[] = "<bigaf>\n";
37const char ZOSArchiveMagic[] =
38 "\x5A\x4C\x81\x99\x83\x88\x6E\x15"; // "!<arch>\n" in EBCDIC
39
40class Archive;
41
42class AbstractArchiveMemberHeader {
43protected:
44 AbstractArchiveMemberHeader(const Archive *Parent) : Parent(Parent){};
45
46public:
47 friend class Archive;
48 virtual std::unique_ptr<AbstractArchiveMemberHeader> clone() const = 0;
49 virtual ~AbstractArchiveMemberHeader() = default;
50
51 /// Get the name without looking up long names.
52 virtual Expected<StringRef> getRawName() const = 0;
53 virtual StringRef getRawAccessMode() const = 0;
54 virtual StringRef getRawLastModified() const = 0;
55 virtual StringRef getRawUID() const = 0;
56 virtual StringRef getRawGID() const = 0;
57
58 /// Get the name looking up long names.
59 virtual Expected<StringRef> getName(uint64_t Size) const = 0;
60 virtual Expected<uint64_t> getSize() const = 0;
61 virtual uint64_t getOffset() const = 0;
62
63 /// Get next file member location.
64 virtual Expected<const char *> getNextChildLoc() const = 0;
65 virtual Expected<bool> isThin() const = 0;
66
67 LLVM_ABI Expected<sys::fs::perms> getAccessMode() const;
68 LLVM_ABI Expected<sys::TimePoint<std::chrono::seconds>>
69 getLastModified() const;
70 LLVM_ABI Expected<unsigned> getUID() const;
71 LLVM_ABI Expected<unsigned> getGID() const;
72
73 /// Returns the size in bytes of the format-defined member header of the
74 /// concrete archive type.
75 virtual uint64_t getSizeOf() const = 0;
76
77 const Archive *Parent;
78};
79
80template <typename T>
81class LLVM_ABI CommonArchiveMemberHeader : public AbstractArchiveMemberHeader {
82public:
83 CommonArchiveMemberHeader(const Archive *Parent, const T *RawHeaderPtr)
84 : AbstractArchiveMemberHeader(Parent), ArMemHdr(RawHeaderPtr){};
85 StringRef getRawAccessMode() const override;
86 StringRef getRawLastModified() const override;
87 StringRef getRawUID() const override;
88 StringRef getRawGID() const override;
89
90 uint64_t getOffset() const override;
91 uint64_t getSizeOf() const override { return sizeof(T); }
92
93 T const *ArMemHdr;
94};
95
96struct UnixArMemHdrType {
97 char Name[16];
98 char LastModified[12];
99 char UID[6];
100 char GID[6];
101 char AccessMode[8];
102 char Size[10]; ///< Size of data, not including header or padding.
103 char Terminator[2];
104};
105
106class LLVM_ABI ArchiveMemberHeader
107 : public CommonArchiveMemberHeader<UnixArMemHdrType> {
108public:
109 ArchiveMemberHeader(const Archive *Parent, const char *RawHeaderPtr,
110 uint64_t Size, Error *Err);
111
112 std::unique_ptr<AbstractArchiveMemberHeader> clone() const override {
113 return std::make_unique<ArchiveMemberHeader>(args: *this);
114 }
115
116 Expected<StringRef> getRawName() const override;
117
118 Expected<StringRef> getName(uint64_t Size) const override;
119 Expected<uint64_t> getSize() const override;
120 Expected<const char *> getNextChildLoc() const override;
121 Expected<bool> isThin() const override;
122};
123
124// File Member Header
125struct BigArMemHdrType {
126 char Size[20]; // File member size in decimal
127 char NextOffset[20]; // Next member offset in decimal
128 char PrevOffset[20]; // Previous member offset in decimal
129 char LastModified[12];
130 char UID[12];
131 char GID[12];
132 char AccessMode[12];
133 char NameLen[4]; // File member name length in decimal
134 union {
135 char Name[2]; // Start of member name
136 char Terminator[2];
137 };
138};
139
140// Define file member header of AIX big archive.
141class LLVM_ABI BigArchiveMemberHeader
142 : public CommonArchiveMemberHeader<BigArMemHdrType> {
143
144public:
145 BigArchiveMemberHeader(Archive const *Parent, const char *RawHeaderPtr,
146 uint64_t Size, Error *Err);
147 std::unique_ptr<AbstractArchiveMemberHeader> clone() const override {
148 return std::make_unique<BigArchiveMemberHeader>(args: *this);
149 }
150
151 Expected<StringRef> getRawName() const override;
152 Expected<uint64_t> getRawNameSize() const;
153
154 Expected<StringRef> getName(uint64_t Size) const override;
155 Expected<uint64_t> getSize() const override;
156 Expected<const char *> getNextChildLoc() const override;
157 Expected<uint64_t> getNextOffset() const;
158 Expected<bool> isThin() const override { return false; }
159};
160
161// Define file member header of z/OS archive.
162// The fixed part of the member header (in EBCDIC) is:
163// struct ar_hdr {
164// char ar_name[16]; /* space-padded member name */
165// char ar_date[12]; /* date (decimal) */
166// char ar_uid[6]; /* user id (decimal) */
167// char ar_gid[6]; /* group id (decimal) */
168// char ar_mode[8]; /* access mode (octal) */
169// char ar_size[10]; /* length in bytes (decimal) */
170// char ar_fmag[2]; /* contains backtick (X'79'), followed by new line
171// (X'15') */
172// };
173class LLVM_ABI ZOSArchiveMemberHeader : public ArchiveMemberHeader {
174public:
175 ZOSArchiveMemberHeader(Archive const *Parent, const char *RawHeaderPtr,
176 uint64_t Size, Error *Err);
177 std::unique_ptr<AbstractArchiveMemberHeader> clone() const override {
178 return std::make_unique<ZOSArchiveMemberHeader>(args: *this);
179 }
180
181 // Converted EBCDIC to ASCII header string fields.
182 std::string RawMemberName;
183 std::string MemberName;
184 std::string LastModified;
185 std::string UID;
186 std::string GID;
187 std::string AccessMode;
188
189 void setMemberHeaderStrings(Error *Err, uint64_t Size);
190
191 Expected<StringRef> getRawName() const override;
192 Expected<StringRef> getName(uint64_t Size) const override;
193 StringRef getRawAccessMode() const override;
194 StringRef getRawLastModified() const override;
195 StringRef getRawUID() const override;
196 StringRef getRawGID() const override;
197 Expected<uint64_t> getSize() const override;
198 Expected<bool> isThin() const override { return false; }
199};
200
201class LLVM_ABI Archive : public Binary {
202 virtual void anchor();
203
204public:
205 class Child {
206 friend Archive;
207 friend AbstractArchiveMemberHeader;
208
209 const Archive *Parent;
210 std::unique_ptr<AbstractArchiveMemberHeader> Header;
211 /// Includes header but not padding byte.
212 StringRef Data;
213 /// Offset from Data to the start of the file.
214 uint16_t StartOfFile;
215
216 Expected<bool> isThinMember() const;
217
218 public:
219 LLVM_ABI Child(const Archive *Parent, const char *Start, Error *Err);
220 LLVM_ABI Child(const Archive *Parent, StringRef Data, uint16_t StartOfFile);
221
222 Child(const Child &C)
223 : Parent(C.Parent), Data(C.Data), StartOfFile(C.StartOfFile) {
224 if (C.Header)
225 Header = C.Header->clone();
226 }
227
228 Child(Child &&C) {
229 Parent = std::move(C.Parent);
230 Header = std::move(C.Header);
231 Data = C.Data;
232 StartOfFile = C.StartOfFile;
233 }
234
235 Child &operator=(Child &&C) noexcept {
236 if (&C == this)
237 return *this;
238
239 Parent = std::move(C.Parent);
240 Header = std::move(C.Header);
241 Data = C.Data;
242 StartOfFile = C.StartOfFile;
243
244 return *this;
245 }
246
247 Child &operator=(const Child &C) {
248 if (&C == this)
249 return *this;
250
251 Parent = C.Parent;
252 if (C.Header)
253 Header = C.Header->clone();
254 Data = C.Data;
255 StartOfFile = C.StartOfFile;
256
257 return *this;
258 }
259
260 bool operator==(const Child &other) const {
261 assert(!Parent || !other.Parent || Parent == other.Parent);
262 return Data.begin() == other.Data.begin();
263 }
264
265 const Archive *getParent() const { return Parent; }
266 LLVM_ABI Expected<Child> getNext() const;
267
268 LLVM_ABI Expected<StringRef> getName() const;
269 LLVM_ABI Expected<std::string> getFullName() const;
270 Expected<StringRef> getRawName() const { return Header->getRawName(); }
271
272 Expected<sys::TimePoint<std::chrono::seconds>> getLastModified() const {
273 return Header->getLastModified();
274 }
275
276 StringRef getRawLastModified() const {
277 return Header->getRawLastModified();
278 }
279
280 Expected<unsigned> getUID() const { return Header->getUID(); }
281 Expected<unsigned> getGID() const { return Header->getGID(); }
282
283 Expected<sys::fs::perms> getAccessMode() const {
284 return Header->getAccessMode();
285 }
286
287 /// \return the size of the archive member without the header or padding.
288 LLVM_ABI Expected<uint64_t> getSize() const;
289 /// \return the size in the archive header for this member.
290 LLVM_ABI Expected<uint64_t> getRawSize() const;
291
292 LLVM_ABI Expected<StringRef> getBuffer() const;
293 LLVM_ABI uint64_t getChildOffset() const;
294 uint64_t getDataOffset() const { return getChildOffset() + StartOfFile; }
295
296 LLVM_ABI Expected<MemoryBufferRef> getMemoryBufferRef() const;
297
298 LLVM_ABI Expected<std::unique_ptr<Binary>>
299 getAsBinary(LLVMContext *Context = nullptr) const;
300 };
301
302 class ChildFallibleIterator {
303 Child C;
304
305 public:
306 ChildFallibleIterator() : C(Child(nullptr, nullptr, nullptr)) {}
307 ChildFallibleIterator(const Child &C) : C(C) {}
308
309 const Child *operator->() const { return &C; }
310 const Child &operator*() const { return C; }
311
312 bool operator==(const ChildFallibleIterator &other) const {
313 // Ignore errors here: If an error occurred during increment then getNext
314 // will have been set to child_end(), and the following comparison should
315 // do the right thing.
316 return C == other.C;
317 }
318
319 bool operator!=(const ChildFallibleIterator &other) const {
320 return !(*this == other);
321 }
322
323 Error inc() {
324 auto NextChild = C.getNext();
325 if (!NextChild)
326 return NextChild.takeError();
327 C = std::move(*NextChild);
328 return Error::success();
329 }
330 };
331
332 using child_iterator = fallible_iterator<ChildFallibleIterator>;
333
334 class Symbol {
335 const Archive *Parent;
336 uint32_t SymbolIndex;
337 uint32_t StringIndex; // Extra index to the string.
338
339 public:
340 Symbol(const Archive *p, uint32_t symi, uint32_t stri)
341 : Parent(p), SymbolIndex(symi), StringIndex(stri) {}
342
343 bool operator==(const Symbol &other) const {
344 return (Parent == other.Parent) && (SymbolIndex == other.SymbolIndex);
345 }
346
347 LLVM_ABI StringRef getName() const;
348 LLVM_ABI Expected<Child> getMember() const;
349 LLVM_ABI Symbol getNext() const;
350 LLVM_ABI bool isECSymbol() const;
351 };
352
353 class symbol_iterator {
354 Symbol symbol;
355
356 public:
357 symbol_iterator(const Symbol &s) : symbol(s) {}
358
359 const Symbol *operator->() const { return &symbol; }
360 const Symbol &operator*() const { return symbol; }
361
362 bool operator==(const symbol_iterator &other) const {
363 return symbol == other.symbol;
364 }
365
366 bool operator!=(const symbol_iterator &other) const {
367 return !(*this == other);
368 }
369
370 symbol_iterator &operator++() { // Preincrement
371 symbol = symbol.getNext();
372 return *this;
373 }
374 };
375
376 Archive(MemoryBufferRef Source, Error &Err);
377 static Expected<std::unique_ptr<Archive>> create(MemoryBufferRef Source);
378
379 // Explicitly non-copyable.
380 Archive(Archive const &) = delete;
381 Archive &operator=(Archive const &) = delete;
382
383 /// Size field is 10 decimal digits long
384 static const uint64_t MaxMemberSize = 9999999999;
385
386 enum Kind {
387 K_GNU,
388 K_GNU64,
389 K_BSD,
390 K_DARWIN,
391 K_DARWIN64,
392 K_COFF,
393 K_AIXBIG,
394 K_ZOS
395 };
396
397 Kind kind() const { return (Kind)Format; }
398 bool isThin() const { return IsThin; }
399 static object::Archive::Kind getDefaultKind();
400 static object::Archive::Kind getDefaultKindForTriple(const Triple &T);
401
402 child_iterator child_begin(Error &Err, bool SkipInternal = true) const;
403 child_iterator child_end() const;
404 iterator_range<child_iterator> children(Error &Err,
405 bool SkipInternal = true) const {
406 return make_range(x: child_begin(Err, SkipInternal), y: child_end());
407 }
408
409 symbol_iterator symbol_begin() const;
410 symbol_iterator symbol_end() const;
411 iterator_range<symbol_iterator> symbols() const {
412 return make_range(x: symbol_begin(), y: symbol_end());
413 }
414
415 Expected<iterator_range<symbol_iterator>> ec_symbols() const;
416
417 static bool classof(Binary const *v) { return v->isArchive(); }
418
419 // check if a symbol is in the archive
420 Expected<std::optional<Child>> findSym(StringRef name) const;
421
422 virtual bool isEmpty() const;
423 bool hasSymbolTable() const;
424 StringRef getSymbolTable() const { return SymbolTable; }
425 StringRef getStringTable() const { return StringTable; }
426 uint32_t getNumberOfSymbols() const;
427 uint32_t getNumberOfECSymbols() const;
428 virtual uint64_t getFirstChildOffset() const { return getArchiveMagicLen(); }
429
430 std::vector<std::unique_ptr<MemoryBuffer>> takeThinBuffers() {
431 return std::move(ThinBuffers);
432 }
433
434 std::unique_ptr<AbstractArchiveMemberHeader>
435 createArchiveMemberHeader(const char *RawHeaderPtr, uint64_t Size,
436 Error *Err) const;
437
438protected:
439 uint64_t getArchiveMagicLen() const;
440 void setFirstRegular(const Child &C);
441
442 StringRef SymbolTable;
443 StringRef ECSymbolTable;
444 StringRef StringTable;
445
446private:
447 StringRef FirstRegularData;
448 uint16_t FirstRegularStartOfFile = -1;
449
450 unsigned Format : 3;
451 unsigned IsThin : 1;
452 mutable std::vector<std::unique_ptr<MemoryBuffer>> ThinBuffers;
453};
454
455class BigArchive : public Archive {
456public:
457 /// Fixed-Length Header.
458 struct FixLenHdr {
459 char Magic[sizeof(BigArchiveMagic) - 1]; ///< Big archive magic string.
460 char MemOffset[20]; ///< Offset to member table.
461 char GlobSymOffset[20]; ///< Offset to global symbol table.
462 char
463 GlobSym64Offset[20]; ///< Offset global symbol table for 64-bit objects.
464 char FirstChildOffset[20]; ///< Offset to first archive member.
465 char LastChildOffset[20]; ///< Offset to last archive member.
466 char FreeOffset[20]; ///< Offset to first mem on free list.
467 };
468
469 const FixLenHdr *ArFixLenHdr;
470 uint64_t FirstChildOffset = 0;
471 uint64_t LastChildOffset = 0;
472 std::string MergedGlobalSymtabBuf;
473 bool Has32BitGlobalSymtab = false;
474 bool Has64BitGlobalSymtab = false;
475
476public:
477 LLVM_ABI BigArchive(MemoryBufferRef Source, Error &Err);
478 uint64_t getFirstChildOffset() const override { return FirstChildOffset; }
479 uint64_t getLastChildOffset() const { return LastChildOffset; }
480 bool isEmpty() const override { return getFirstChildOffset() == 0; }
481
482 bool has32BitGlobalSymtab() { return Has32BitGlobalSymtab; }
483 bool has64BitGlobalSymtab() { return Has64BitGlobalSymtab; }
484};
485
486class ZOSArchive : public Archive {
487public:
488 // Fixed-Length header.
489 struct FixLenHdr {
490 char Magic[sizeof(ZOSArchiveMagic) - 1]; ///< ZOS archive magic string.
491 };
492
493 LLVM_ABI ZOSArchive(MemoryBufferRef Source, Error &Err);
494
495private:
496 std::string SymbolTableBuf; // __.SYMDEF strings converted to ASCII.
497};
498} // end namespace object
499} // end namespace llvm
500
501#endif // LLVM_OBJECT_ARCHIVE_H
502