1//===- Archive.h - ar archive file format -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the ar archive file format class.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_OBJECT_ARCHIVE_H
14#define LLVM_OBJECT_ARCHIVE_H
15
16#include "llvm/ADT/StringRef.h"
17#include "llvm/ADT/fallible_iterator.h"
18#include "llvm/ADT/iterator_range.h"
19#include "llvm/Object/Binary.h"
20#include "llvm/Support/Chrono.h"
21#include "llvm/Support/Compiler.h"
22#include "llvm/Support/Error.h"
23#include "llvm/Support/FileSystem.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include <cassert>
26#include <cstdint>
27#include <memory>
28#include <string>
29#include <vector>
30
31namespace llvm {
32namespace object {
33
34const char ArchiveMagic[] = "!<arch>\n";
35const char ThinArchiveMagic[] = "!<thin>\n";
36const char BigArchiveMagic[] = "<bigaf>\n";
37const char ZOSArchiveMagic[] =
38 "\x5A\x4C\x81\x99\x83\x88\x6E\x15"; // "!<arch>\n" in EBCDIC
39
40class Archive;
41
42class AbstractArchiveMemberHeader {
43protected:
44 AbstractArchiveMemberHeader(const Archive *Parent) : Parent(Parent){};
45
46public:
47 friend class Archive;
48 virtual std::unique_ptr<AbstractArchiveMemberHeader> clone() const = 0;
49 virtual ~AbstractArchiveMemberHeader() = default;
50
51 /// Get the name without looking up long names.
52 virtual Expected<StringRef> getRawName() const = 0;
53 virtual StringRef getRawAccessMode() const = 0;
54 virtual StringRef getRawLastModified() const = 0;
55 virtual StringRef getRawUID() const = 0;
56 virtual StringRef getRawGID() const = 0;
57
58 /// Get the name looking up long names.
59 virtual Expected<StringRef> getName(uint64_t Size) const = 0;
60 virtual Expected<uint64_t> getSize() const = 0;
61 virtual uint64_t getOffset() const = 0;
62
63 /// Get next file member location.
64 virtual Expected<const char *> getNextChildLoc() const = 0;
65 virtual Expected<bool> isThin() const = 0;
66
67 LLVM_ABI Expected<sys::fs::perms> getAccessMode() const;
68 LLVM_ABI Expected<sys::TimePoint<std::chrono::seconds>>
69 getLastModified() const;
70 LLVM_ABI Expected<unsigned> getUID() const;
71 LLVM_ABI Expected<unsigned> getGID() const;
72
73 /// Returns the size in bytes of the format-defined member header of the
74 /// concrete archive type.
75 virtual uint64_t getSizeOf() const = 0;
76
77 const Archive *Parent;
78};
79
80template <typename T>
81class LLVM_ABI CommonArchiveMemberHeader : public AbstractArchiveMemberHeader {
82public:
83 CommonArchiveMemberHeader(const Archive *Parent, const T *RawHeaderPtr)
84 : AbstractArchiveMemberHeader(Parent), ArMemHdr(RawHeaderPtr){};
85 StringRef getRawAccessMode() const override;
86 StringRef getRawLastModified() const override;
87 StringRef getRawUID() const override;
88 StringRef getRawGID() const override;
89
90 uint64_t getOffset() const override;
91 uint64_t getSizeOf() const override { return sizeof(T); }
92
93 T const *ArMemHdr;
94};
95
96struct UnixArMemHdrType {
97 char Name[16];
98 char LastModified[12];
99 char UID[6];
100 char GID[6];
101 char AccessMode[8];
102 char Size[10]; ///< Size of data, not including header or padding.
103 char Terminator[2];
104};
105
106class LLVM_ABI ArchiveMemberHeader
107 : public CommonArchiveMemberHeader<UnixArMemHdrType> {
108public:
109 ArchiveMemberHeader(const Archive *Parent, const char *RawHeaderPtr,
110 uint64_t Size, Error *Err);
111
112 std::unique_ptr<AbstractArchiveMemberHeader> clone() const override {
113 return std::make_unique<ArchiveMemberHeader>(args: *this);
114 }
115
116 Expected<StringRef> getRawName() const override;
117
118 Expected<StringRef> getName(uint64_t Size) const override;
119 Expected<uint64_t> getSize() const override;
120 Expected<const char *> getNextChildLoc() const override;
121 Expected<bool> isThin() const override;
122};
123
124// File Member Header
125struct BigArMemHdrType {
126 char Size[20]; // File member size in decimal
127 char NextOffset[20]; // Next member offset in decimal
128 char PrevOffset[20]; // Previous member offset in decimal
129 char LastModified[12];
130 char UID[12];
131 char GID[12];
132 char AccessMode[12];
133 char NameLen[4]; // File member name length in decimal
134 union {
135 char Name[2]; // Start of member name
136 char Terminator[2];
137 };
138};
139
140// Define file member header of AIX big archive.
141class LLVM_ABI BigArchiveMemberHeader
142 : public CommonArchiveMemberHeader<BigArMemHdrType> {
143
144public:
145 BigArchiveMemberHeader(Archive const *Parent, const char *RawHeaderPtr,
146 uint64_t Size, Error *Err);
147 std::unique_ptr<AbstractArchiveMemberHeader> clone() const override {
148 return std::make_unique<BigArchiveMemberHeader>(args: *this);
149 }
150
151 Expected<StringRef> getRawName() const override;
152 Expected<uint64_t> getRawNameSize() const;
153
154 Expected<StringRef> getName(uint64_t Size) const override;
155 Expected<uint64_t> getSize() const override;
156 Expected<const char *> getNextChildLoc() const override;
157 Expected<uint64_t> getNextOffset() const;
158 Expected<bool> isThin() const override { return false; }
159};
160
161class LLVM_ABI Archive : public Binary {
162 virtual void anchor();
163
164public:
165 class Child {
166 friend Archive;
167 friend AbstractArchiveMemberHeader;
168
169 const Archive *Parent;
170 std::unique_ptr<AbstractArchiveMemberHeader> Header;
171 /// Includes header but not padding byte.
172 StringRef Data;
173 /// Offset from Data to the start of the file.
174 uint16_t StartOfFile;
175
176 Expected<bool> isThinMember() const;
177
178 public:
179 LLVM_ABI Child(const Archive *Parent, const char *Start, Error *Err);
180 LLVM_ABI Child(const Archive *Parent, StringRef Data, uint16_t StartOfFile);
181
182 Child(const Child &C)
183 : Parent(C.Parent), Data(C.Data), StartOfFile(C.StartOfFile) {
184 if (C.Header)
185 Header = C.Header->clone();
186 }
187
188 Child(Child &&C) {
189 Parent = std::move(C.Parent);
190 Header = std::move(C.Header);
191 Data = C.Data;
192 StartOfFile = C.StartOfFile;
193 }
194
195 Child &operator=(Child &&C) noexcept {
196 if (&C == this)
197 return *this;
198
199 Parent = std::move(C.Parent);
200 Header = std::move(C.Header);
201 Data = C.Data;
202 StartOfFile = C.StartOfFile;
203
204 return *this;
205 }
206
207 Child &operator=(const Child &C) {
208 if (&C == this)
209 return *this;
210
211 Parent = C.Parent;
212 if (C.Header)
213 Header = C.Header->clone();
214 Data = C.Data;
215 StartOfFile = C.StartOfFile;
216
217 return *this;
218 }
219
220 bool operator==(const Child &other) const {
221 assert(!Parent || !other.Parent || Parent == other.Parent);
222 return Data.begin() == other.Data.begin();
223 }
224
225 const Archive *getParent() const { return Parent; }
226 LLVM_ABI Expected<Child> getNext() const;
227
228 LLVM_ABI Expected<StringRef> getName() const;
229 LLVM_ABI Expected<std::string> getFullName() const;
230 Expected<StringRef> getRawName() const { return Header->getRawName(); }
231
232 Expected<sys::TimePoint<std::chrono::seconds>> getLastModified() const {
233 return Header->getLastModified();
234 }
235
236 StringRef getRawLastModified() const {
237 return Header->getRawLastModified();
238 }
239
240 Expected<unsigned> getUID() const { return Header->getUID(); }
241 Expected<unsigned> getGID() const { return Header->getGID(); }
242
243 Expected<sys::fs::perms> getAccessMode() const {
244 return Header->getAccessMode();
245 }
246
247 /// \return the size of the archive member without the header or padding.
248 LLVM_ABI Expected<uint64_t> getSize() const;
249 /// \return the size in the archive header for this member.
250 LLVM_ABI Expected<uint64_t> getRawSize() const;
251
252 LLVM_ABI Expected<StringRef> getBuffer() const;
253 LLVM_ABI uint64_t getChildOffset() const;
254 uint64_t getDataOffset() const { return getChildOffset() + StartOfFile; }
255
256 LLVM_ABI Expected<MemoryBufferRef> getMemoryBufferRef() const;
257
258 LLVM_ABI Expected<std::unique_ptr<Binary>>
259 getAsBinary(LLVMContext *Context = nullptr) const;
260 };
261
262 class ChildFallibleIterator {
263 Child C;
264
265 public:
266 ChildFallibleIterator() : C(Child(nullptr, nullptr, nullptr)) {}
267 ChildFallibleIterator(const Child &C) : C(C) {}
268
269 const Child *operator->() const { return &C; }
270 const Child &operator*() const { return C; }
271
272 bool operator==(const ChildFallibleIterator &other) const {
273 // Ignore errors here: If an error occurred during increment then getNext
274 // will have been set to child_end(), and the following comparison should
275 // do the right thing.
276 return C == other.C;
277 }
278
279 bool operator!=(const ChildFallibleIterator &other) const {
280 return !(*this == other);
281 }
282
283 Error inc() {
284 auto NextChild = C.getNext();
285 if (!NextChild)
286 return NextChild.takeError();
287 C = std::move(*NextChild);
288 return Error::success();
289 }
290 };
291
292 using child_iterator = fallible_iterator<ChildFallibleIterator>;
293
294 class Symbol {
295 const Archive *Parent;
296 uint32_t SymbolIndex;
297 uint32_t StringIndex; // Extra index to the string.
298
299 public:
300 Symbol(const Archive *p, uint32_t symi, uint32_t stri)
301 : Parent(p), SymbolIndex(symi), StringIndex(stri) {}
302
303 bool operator==(const Symbol &other) const {
304 return (Parent == other.Parent) && (SymbolIndex == other.SymbolIndex);
305 }
306
307 LLVM_ABI StringRef getName() const;
308 LLVM_ABI Expected<Child> getMember() const;
309 LLVM_ABI Symbol getNext() const;
310 LLVM_ABI bool isECSymbol() const;
311 };
312
313 class symbol_iterator {
314 Symbol symbol;
315
316 public:
317 symbol_iterator(const Symbol &s) : symbol(s) {}
318
319 const Symbol *operator->() const { return &symbol; }
320 const Symbol &operator*() const { return symbol; }
321
322 bool operator==(const symbol_iterator &other) const {
323 return symbol == other.symbol;
324 }
325
326 bool operator!=(const symbol_iterator &other) const {
327 return !(*this == other);
328 }
329
330 symbol_iterator &operator++() { // Preincrement
331 symbol = symbol.getNext();
332 return *this;
333 }
334 };
335
336 Archive(MemoryBufferRef Source, Error &Err);
337 static Expected<std::unique_ptr<Archive>> create(MemoryBufferRef Source);
338
339 // Explicitly non-copyable.
340 Archive(Archive const &) = delete;
341 Archive &operator=(Archive const &) = delete;
342
343 /// Size field is 10 decimal digits long
344 static const uint64_t MaxMemberSize = 9999999999;
345
346 enum Kind { K_GNU, K_GNU64, K_BSD, K_DARWIN, K_DARWIN64, K_COFF, K_AIXBIG };
347
348 Kind kind() const { return (Kind)Format; }
349 bool isThin() const { return IsThin; }
350 static object::Archive::Kind getDefaultKind();
351 static object::Archive::Kind getDefaultKindForTriple(const Triple &T);
352
353 child_iterator child_begin(Error &Err, bool SkipInternal = true) const;
354 child_iterator child_end() const;
355 iterator_range<child_iterator> children(Error &Err,
356 bool SkipInternal = true) const {
357 return make_range(x: child_begin(Err, SkipInternal), y: child_end());
358 }
359
360 symbol_iterator symbol_begin() const;
361 symbol_iterator symbol_end() const;
362 iterator_range<symbol_iterator> symbols() const {
363 return make_range(x: symbol_begin(), y: symbol_end());
364 }
365
366 Expected<iterator_range<symbol_iterator>> ec_symbols() const;
367
368 static bool classof(Binary const *v) { return v->isArchive(); }
369
370 // check if a symbol is in the archive
371 Expected<std::optional<Child>> findSym(StringRef name) const;
372
373 virtual bool isEmpty() const;
374 bool hasSymbolTable() const;
375 StringRef getSymbolTable() const { return SymbolTable; }
376 StringRef getStringTable() const { return StringTable; }
377 uint32_t getNumberOfSymbols() const;
378 uint32_t getNumberOfECSymbols() const;
379 virtual uint64_t getFirstChildOffset() const { return getArchiveMagicLen(); }
380
381 std::vector<std::unique_ptr<MemoryBuffer>> takeThinBuffers() {
382 return std::move(ThinBuffers);
383 }
384
385 std::unique_ptr<AbstractArchiveMemberHeader>
386 createArchiveMemberHeader(const char *RawHeaderPtr, uint64_t Size,
387 Error *Err) const;
388
389protected:
390 uint64_t getArchiveMagicLen() const;
391 void setFirstRegular(const Child &C);
392
393 StringRef SymbolTable;
394 StringRef ECSymbolTable;
395 StringRef StringTable;
396
397private:
398 StringRef FirstRegularData;
399 uint16_t FirstRegularStartOfFile = -1;
400
401 unsigned Format : 3;
402 unsigned IsThin : 1;
403 mutable std::vector<std::unique_ptr<MemoryBuffer>> ThinBuffers;
404};
405
406class BigArchive : public Archive {
407public:
408 /// Fixed-Length Header.
409 struct FixLenHdr {
410 char Magic[sizeof(BigArchiveMagic) - 1]; ///< Big archive magic string.
411 char MemOffset[20]; ///< Offset to member table.
412 char GlobSymOffset[20]; ///< Offset to global symbol table.
413 char
414 GlobSym64Offset[20]; ///< Offset global symbol table for 64-bit objects.
415 char FirstChildOffset[20]; ///< Offset to first archive member.
416 char LastChildOffset[20]; ///< Offset to last archive member.
417 char FreeOffset[20]; ///< Offset to first mem on free list.
418 };
419
420 const FixLenHdr *ArFixLenHdr;
421 uint64_t FirstChildOffset = 0;
422 uint64_t LastChildOffset = 0;
423 std::string MergedGlobalSymtabBuf;
424 bool Has32BitGlobalSymtab = false;
425 bool Has64BitGlobalSymtab = false;
426
427public:
428 LLVM_ABI BigArchive(MemoryBufferRef Source, Error &Err);
429 uint64_t getFirstChildOffset() const override { return FirstChildOffset; }
430 uint64_t getLastChildOffset() const { return LastChildOffset; }
431 bool isEmpty() const override { return getFirstChildOffset() == 0; }
432
433 bool has32BitGlobalSymtab() { return Has32BitGlobalSymtab; }
434 bool has64BitGlobalSymtab() { return Has64BitGlobalSymtab; }
435};
436
437} // end namespace object
438} // end namespace llvm
439
440#endif // LLVM_OBJECT_ARCHIVE_H
441