1 | //===- Archive.cpp - ar File Format implementation ------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the ArchiveObjectFile class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/Object/Archive.h" |
14 | #include "llvm/ADT/SmallString.h" |
15 | #include "llvm/ADT/StringRef.h" |
16 | #include "llvm/ADT/Twine.h" |
17 | #include "llvm/Object/Binary.h" |
18 | #include "llvm/Object/Error.h" |
19 | #include "llvm/Support/Chrono.h" |
20 | #include "llvm/Support/Endian.h" |
21 | #include "llvm/Support/EndianStream.h" |
22 | #include "llvm/Support/Error.h" |
23 | #include "llvm/Support/ErrorOr.h" |
24 | #include "llvm/Support/FileSystem.h" |
25 | #include "llvm/Support/MathExtras.h" |
26 | #include "llvm/Support/MemoryBuffer.h" |
27 | #include "llvm/Support/Path.h" |
28 | #include "llvm/Support/raw_ostream.h" |
29 | #include "llvm/TargetParser/Host.h" |
30 | #include <cassert> |
31 | #include <cstddef> |
32 | #include <cstdint> |
33 | #include <memory> |
34 | #include <string> |
35 | #include <system_error> |
36 | |
37 | using namespace llvm; |
38 | using namespace object; |
39 | using namespace llvm::support::endian; |
40 | |
41 | void Archive::anchor() {} |
42 | |
43 | static Error malformedError(Twine Msg) { |
44 | std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")" ; |
45 | return make_error<GenericBinaryError>(Args: std::move(StringMsg), |
46 | Args: object_error::parse_failed); |
47 | } |
48 | |
49 | static Error |
50 | (const AbstractArchiveMemberHeader *, |
51 | const char *, uint64_t Size) { |
52 | StringRef Msg("remaining size of archive too small for next archive " |
53 | "member header " ); |
54 | |
55 | Expected<StringRef> NameOrErr = ArMemHeader->getName(Size); |
56 | if (NameOrErr) |
57 | return malformedError(Msg: Msg + "for " + *NameOrErr); |
58 | |
59 | consumeError(Err: NameOrErr.takeError()); |
60 | uint64_t Offset = RawHeaderPtr - ArMemHeader->Parent->getData().data(); |
61 | return malformedError(Msg: Msg + "at offset " + Twine(Offset)); |
62 | } |
63 | |
64 | template <class T, std::size_t N> |
65 | StringRef getFieldRawString(const T (&Field)[N]) { |
66 | return StringRef(Field, N).rtrim(Chars: " " ); |
67 | } |
68 | |
69 | template <class T> |
70 | StringRef CommonArchiveMemberHeader<T>::() const { |
71 | return getFieldRawString(ArMemHdr->AccessMode); |
72 | } |
73 | |
74 | template <class T> |
75 | StringRef CommonArchiveMemberHeader<T>::() const { |
76 | return getFieldRawString(ArMemHdr->LastModified); |
77 | } |
78 | |
79 | template <class T> StringRef CommonArchiveMemberHeader<T>::() const { |
80 | return getFieldRawString(ArMemHdr->UID); |
81 | } |
82 | |
83 | template <class T> StringRef CommonArchiveMemberHeader<T>::() const { |
84 | return getFieldRawString(ArMemHdr->GID); |
85 | } |
86 | |
87 | template <class T> uint64_t CommonArchiveMemberHeader<T>::() const { |
88 | return reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); |
89 | } |
90 | |
91 | template class object::<UnixArMemHdrType>; |
92 | template class object::<BigArMemHdrType>; |
93 | |
94 | ArchiveMemberHeader::(const Archive *Parent, |
95 | const char *, |
96 | uint64_t Size, Error *Err) |
97 | : CommonArchiveMemberHeader<UnixArMemHdrType>( |
98 | Parent, reinterpret_cast<const UnixArMemHdrType *>(RawHeaderPtr)) { |
99 | if (RawHeaderPtr == nullptr) |
100 | return; |
101 | ErrorAsOutParameter ErrAsOutParam(Err); |
102 | |
103 | if (Size < getSizeOf()) { |
104 | *Err = createMemberHeaderParseError(ArMemHeader: this, RawHeaderPtr, Size); |
105 | return; |
106 | } |
107 | if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') { |
108 | if (Err) { |
109 | std::string Buf; |
110 | raw_string_ostream OS(Buf); |
111 | OS.write_escaped( |
112 | Str: StringRef(ArMemHdr->Terminator, sizeof(ArMemHdr->Terminator))); |
113 | OS.flush(); |
114 | std::string Msg("terminator characters in archive member \"" + Buf + |
115 | "\" not the correct \"`\\n\" values for the archive " |
116 | "member header " ); |
117 | Expected<StringRef> NameOrErr = getName(Size); |
118 | if (!NameOrErr) { |
119 | consumeError(Err: NameOrErr.takeError()); |
120 | uint64_t Offset = RawHeaderPtr - Parent->getData().data(); |
121 | *Err = malformedError(Msg: Msg + "at offset " + Twine(Offset)); |
122 | } else |
123 | *Err = malformedError(Msg: Msg + "for " + NameOrErr.get()); |
124 | } |
125 | return; |
126 | } |
127 | } |
128 | |
129 | BigArchiveMemberHeader::(const Archive *Parent, |
130 | const char *, |
131 | uint64_t Size, Error *Err) |
132 | : CommonArchiveMemberHeader<BigArMemHdrType>( |
133 | Parent, reinterpret_cast<const BigArMemHdrType *>(RawHeaderPtr)) { |
134 | if (RawHeaderPtr == nullptr) |
135 | return; |
136 | ErrorAsOutParameter ErrAsOutParam(Err); |
137 | |
138 | if (RawHeaderPtr + getSizeOf() >= Parent->getData().end()) { |
139 | if (Err) |
140 | *Err = malformedError(Msg: "malformed AIX big archive: remaining buffer is " |
141 | "unable to contain next archive member" ); |
142 | return; |
143 | } |
144 | |
145 | if (Size < getSizeOf()) { |
146 | Error SubErr = createMemberHeaderParseError(ArMemHeader: this, RawHeaderPtr, Size); |
147 | if (Err) |
148 | *Err = std::move(SubErr); |
149 | } |
150 | } |
151 | |
152 | // This gets the raw name from the ArMemHdr->Name field and checks that it is |
153 | // valid for the kind of archive. If it is not valid it returns an Error. |
154 | Expected<StringRef> ArchiveMemberHeader::() const { |
155 | char EndCond; |
156 | auto Kind = Parent->kind(); |
157 | if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) { |
158 | if (ArMemHdr->Name[0] == ' ') { |
159 | uint64_t Offset = |
160 | reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); |
161 | return malformedError(Msg: "name contains a leading space for archive member " |
162 | "header at offset " + |
163 | Twine(Offset)); |
164 | } |
165 | EndCond = ' '; |
166 | } else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#') |
167 | EndCond = ' '; |
168 | else |
169 | EndCond = '/'; |
170 | StringRef::size_type end = |
171 | StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(C: EndCond); |
172 | if (end == StringRef::npos) |
173 | end = sizeof(ArMemHdr->Name); |
174 | assert(end <= sizeof(ArMemHdr->Name) && end > 0); |
175 | // Don't include the EndCond if there is one. |
176 | return StringRef(ArMemHdr->Name, end); |
177 | } |
178 | |
179 | Expected<uint64_t> |
180 | (Twine FieldName, const StringRef RawField, |
181 | const Archive *Parent, |
182 | const AbstractArchiveMemberHeader *) { |
183 | uint64_t Value; |
184 | if (RawField.getAsInteger(Radix: 10, Result&: Value)) { |
185 | uint64_t Offset = MemHeader->getOffset(); |
186 | return malformedError(Msg: "characters in " + FieldName + |
187 | " field in archive member header are not " |
188 | "all decimal numbers: '" + |
189 | RawField + |
190 | "' for the archive " |
191 | "member header at offset " + |
192 | Twine(Offset)); |
193 | } |
194 | return Value; |
195 | } |
196 | |
197 | Expected<uint64_t> |
198 | (Twine FieldName, const StringRef RawField, |
199 | const Archive *Parent, |
200 | const AbstractArchiveMemberHeader *) { |
201 | uint64_t Value; |
202 | if (RawField.getAsInteger(Radix: 8, Result&: Value)) { |
203 | uint64_t Offset = MemHeader->getOffset(); |
204 | return malformedError(Msg: "characters in " + FieldName + |
205 | " field in archive member header are not " |
206 | "all octal numbers: '" + |
207 | RawField + |
208 | "' for the archive " |
209 | "member header at offset " + |
210 | Twine(Offset)); |
211 | } |
212 | return Value; |
213 | } |
214 | |
215 | Expected<StringRef> BigArchiveMemberHeader::() const { |
216 | Expected<uint64_t> NameLenOrErr = getArchiveMemberDecField( |
217 | FieldName: "NameLen" , RawField: getFieldRawString(Field: ArMemHdr->NameLen), Parent, MemHeader: this); |
218 | if (!NameLenOrErr) |
219 | // TODO: Out-of-line. |
220 | return NameLenOrErr.takeError(); |
221 | uint64_t NameLen = NameLenOrErr.get(); |
222 | |
223 | // If the name length is odd, pad with '\0' to get an even length. After |
224 | // padding, there is the name terminator "`\n". |
225 | uint64_t NameLenWithPadding = alignTo(Value: NameLen, Align: 2); |
226 | StringRef NameTerminator = "`\n" ; |
227 | StringRef NameStringWithNameTerminator = |
228 | StringRef(ArMemHdr->Name, NameLenWithPadding + NameTerminator.size()); |
229 | if (!NameStringWithNameTerminator.ends_with(Suffix: NameTerminator)) { |
230 | uint64_t Offset = |
231 | reinterpret_cast<const char *>(ArMemHdr->Name + NameLenWithPadding) - |
232 | Parent->getData().data(); |
233 | // TODO: Out-of-line. |
234 | return malformedError( |
235 | Msg: "name does not have name terminator \"`\\n\" for archive member" |
236 | "header at offset " + |
237 | Twine(Offset)); |
238 | } |
239 | return StringRef(ArMemHdr->Name, NameLen); |
240 | } |
241 | |
242 | // member including the header, so the size of any name following the header |
243 | // is checked to make sure it does not overflow. |
244 | Expected<StringRef> ArchiveMemberHeader::(uint64_t Size) const { |
245 | |
246 | // This can be called from the ArchiveMemberHeader constructor when the |
247 | // archive header is truncated to produce an error message with the name. |
248 | // Make sure the name field is not truncated. |
249 | if (Size < offsetof(UnixArMemHdrType, Name) + sizeof(ArMemHdr->Name)) { |
250 | uint64_t ArchiveOffset = |
251 | reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); |
252 | return malformedError(Msg: "archive header truncated before the name field " |
253 | "for archive member header at offset " + |
254 | Twine(ArchiveOffset)); |
255 | } |
256 | |
257 | // The raw name itself can be invalid. |
258 | Expected<StringRef> NameOrErr = getRawName(); |
259 | if (!NameOrErr) |
260 | return NameOrErr.takeError(); |
261 | StringRef Name = NameOrErr.get(); |
262 | |
263 | // Check if it's a special name. |
264 | if (Name[0] == '/') { |
265 | if (Name.size() == 1) // Linker member. |
266 | return Name; |
267 | if (Name.size() == 2 && Name[1] == '/') // String table. |
268 | return Name; |
269 | // System libraries from the Windows SDK for Windows 11 contain this symbol. |
270 | // It looks like a CFG guard: we just skip it for now. |
271 | if (Name == "/<XFGHASHMAP>/" ) |
272 | return Name; |
273 | // Some libraries (e.g., arm64rt.lib) from the Windows WDK |
274 | // (version 10.0.22000.0) contain this undocumented special member. |
275 | if (Name == "/<ECSYMBOLS>/" ) |
276 | return Name; |
277 | // It's a long name. |
278 | // Get the string table offset. |
279 | std::size_t StringOffset; |
280 | if (Name.substr(Start: 1).rtrim(Char: ' ').getAsInteger(Radix: 10, Result&: StringOffset)) { |
281 | std::string Buf; |
282 | raw_string_ostream OS(Buf); |
283 | OS.write_escaped(Str: Name.substr(Start: 1).rtrim(Char: ' ')); |
284 | OS.flush(); |
285 | uint64_t ArchiveOffset = |
286 | reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); |
287 | return malformedError(Msg: "long name offset characters after the '/' are " |
288 | "not all decimal numbers: '" + |
289 | Buf + "' for archive member header at offset " + |
290 | Twine(ArchiveOffset)); |
291 | } |
292 | |
293 | // Verify it. |
294 | if (StringOffset >= Parent->getStringTable().size()) { |
295 | uint64_t ArchiveOffset = |
296 | reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); |
297 | return malformedError(Msg: "long name offset " + Twine(StringOffset) + |
298 | " past the end of the string table for archive " |
299 | "member header at offset " + |
300 | Twine(ArchiveOffset)); |
301 | } |
302 | |
303 | // GNU long file names end with a "/\n". |
304 | if (Parent->kind() == Archive::K_GNU || |
305 | Parent->kind() == Archive::K_GNU64) { |
306 | size_t End = Parent->getStringTable().find(C: '\n', /*From=*/StringOffset); |
307 | if (End == StringRef::npos || End < 1 || |
308 | Parent->getStringTable()[End - 1] != '/') { |
309 | return malformedError(Msg: "string table at long name offset " + |
310 | Twine(StringOffset) + "not terminated" ); |
311 | } |
312 | return Parent->getStringTable().slice(Start: StringOffset, End: End - 1); |
313 | } |
314 | return Parent->getStringTable().begin() + StringOffset; |
315 | } |
316 | |
317 | if (Name.starts_with(Prefix: "#1/" )) { |
318 | uint64_t NameLength; |
319 | if (Name.substr(Start: 3).rtrim(Char: ' ').getAsInteger(Radix: 10, Result&: NameLength)) { |
320 | std::string Buf; |
321 | raw_string_ostream OS(Buf); |
322 | OS.write_escaped(Str: Name.substr(Start: 3).rtrim(Char: ' ')); |
323 | OS.flush(); |
324 | uint64_t ArchiveOffset = |
325 | reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); |
326 | return malformedError(Msg: "long name length characters after the #1/ are " |
327 | "not all decimal numbers: '" + |
328 | Buf + "' for archive member header at offset " + |
329 | Twine(ArchiveOffset)); |
330 | } |
331 | if (getSizeOf() + NameLength > Size) { |
332 | uint64_t ArchiveOffset = |
333 | reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); |
334 | return malformedError(Msg: "long name length: " + Twine(NameLength) + |
335 | " extends past the end of the member or archive " |
336 | "for archive member header at offset " + |
337 | Twine(ArchiveOffset)); |
338 | } |
339 | return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(), |
340 | NameLength) |
341 | .rtrim(Char: '\0'); |
342 | } |
343 | |
344 | // It is not a long name so trim the blanks at the end of the name. |
345 | if (Name[Name.size() - 1] != '/') |
346 | return Name.rtrim(Char: ' '); |
347 | |
348 | // It's a simple name. |
349 | return Name.drop_back(N: 1); |
350 | } |
351 | |
352 | Expected<StringRef> BigArchiveMemberHeader::(uint64_t Size) const { |
353 | return getRawName(); |
354 | } |
355 | |
356 | Expected<uint64_t> ArchiveMemberHeader::() const { |
357 | return getArchiveMemberDecField(FieldName: "size" , RawField: getFieldRawString(Field: ArMemHdr->Size), |
358 | Parent, MemHeader: this); |
359 | } |
360 | |
361 | Expected<uint64_t> BigArchiveMemberHeader::() const { |
362 | Expected<uint64_t> SizeOrErr = getArchiveMemberDecField( |
363 | FieldName: "size" , RawField: getFieldRawString(Field: ArMemHdr->Size), Parent, MemHeader: this); |
364 | if (!SizeOrErr) |
365 | return SizeOrErr.takeError(); |
366 | |
367 | Expected<uint64_t> NameLenOrErr = getRawNameSize(); |
368 | if (!NameLenOrErr) |
369 | return NameLenOrErr.takeError(); |
370 | |
371 | return *SizeOrErr + alignTo(Value: *NameLenOrErr, Align: 2); |
372 | } |
373 | |
374 | Expected<uint64_t> BigArchiveMemberHeader::() const { |
375 | return getArchiveMemberDecField( |
376 | FieldName: "NameLen" , RawField: getFieldRawString(Field: ArMemHdr->NameLen), Parent, MemHeader: this); |
377 | } |
378 | |
379 | Expected<uint64_t> BigArchiveMemberHeader::() const { |
380 | return getArchiveMemberDecField( |
381 | FieldName: "NextOffset" , RawField: getFieldRawString(Field: ArMemHdr->NextOffset), Parent, MemHeader: this); |
382 | } |
383 | |
384 | Expected<sys::fs::perms> AbstractArchiveMemberHeader::() const { |
385 | Expected<uint64_t> AccessModeOrErr = |
386 | getArchiveMemberOctField(FieldName: "AccessMode" , RawField: getRawAccessMode(), Parent, MemHeader: this); |
387 | if (!AccessModeOrErr) |
388 | return AccessModeOrErr.takeError(); |
389 | return static_cast<sys::fs::perms>(*AccessModeOrErr); |
390 | } |
391 | |
392 | Expected<sys::TimePoint<std::chrono::seconds>> |
393 | AbstractArchiveMemberHeader::() const { |
394 | Expected<uint64_t> SecondsOrErr = getArchiveMemberDecField( |
395 | FieldName: "LastModified" , RawField: getRawLastModified(), Parent, MemHeader: this); |
396 | |
397 | if (!SecondsOrErr) |
398 | return SecondsOrErr.takeError(); |
399 | |
400 | return sys::toTimePoint(T: *SecondsOrErr); |
401 | } |
402 | |
403 | Expected<unsigned> AbstractArchiveMemberHeader::() const { |
404 | StringRef User = getRawUID(); |
405 | if (User.empty()) |
406 | return 0; |
407 | return getArchiveMemberDecField(FieldName: "UID" , RawField: User, Parent, MemHeader: this); |
408 | } |
409 | |
410 | Expected<unsigned> AbstractArchiveMemberHeader::() const { |
411 | StringRef Group = getRawGID(); |
412 | if (Group.empty()) |
413 | return 0; |
414 | return getArchiveMemberDecField(FieldName: "GID" , RawField: Group, Parent, MemHeader: this); |
415 | } |
416 | |
417 | Expected<bool> ArchiveMemberHeader::() const { |
418 | Expected<StringRef> NameOrErr = getRawName(); |
419 | if (!NameOrErr) |
420 | return NameOrErr.takeError(); |
421 | StringRef Name = NameOrErr.get(); |
422 | return Parent->isThin() && Name != "/" && Name != "//" && Name != "/SYM64/" ; |
423 | } |
424 | |
425 | Expected<const char *> ArchiveMemberHeader::() const { |
426 | uint64_t Size = getSizeOf(); |
427 | Expected<bool> isThinOrErr = isThin(); |
428 | if (!isThinOrErr) |
429 | return isThinOrErr.takeError(); |
430 | |
431 | bool isThin = isThinOrErr.get(); |
432 | if (!isThin) { |
433 | Expected<uint64_t> MemberSize = getSize(); |
434 | if (!MemberSize) |
435 | return MemberSize.takeError(); |
436 | |
437 | Size += MemberSize.get(); |
438 | } |
439 | |
440 | // If Size is odd, add 1 to make it even. |
441 | const char *NextLoc = |
442 | reinterpret_cast<const char *>(ArMemHdr) + alignTo(Value: Size, Align: 2); |
443 | |
444 | if (NextLoc == Parent->getMemoryBufferRef().getBufferEnd()) |
445 | return nullptr; |
446 | |
447 | return NextLoc; |
448 | } |
449 | |
450 | Expected<const char *> BigArchiveMemberHeader::() const { |
451 | if (getOffset() == |
452 | static_cast<const BigArchive *>(Parent)->getLastChildOffset()) |
453 | return nullptr; |
454 | |
455 | Expected<uint64_t> NextOffsetOrErr = getNextOffset(); |
456 | if (!NextOffsetOrErr) |
457 | return NextOffsetOrErr.takeError(); |
458 | return Parent->getData().data() + NextOffsetOrErr.get(); |
459 | } |
460 | |
461 | Archive::Child::Child(const Archive *Parent, StringRef Data, |
462 | uint16_t StartOfFile) |
463 | : Parent(Parent), Data(Data), StartOfFile(StartOfFile) { |
464 | Header = Parent->createArchiveMemberHeader(RawHeaderPtr: Data.data(), Size: Data.size(), Err: nullptr); |
465 | } |
466 | |
467 | Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err) |
468 | : Parent(Parent) { |
469 | if (!Start) { |
470 | Header = nullptr; |
471 | StartOfFile = -1; |
472 | return; |
473 | } |
474 | |
475 | Header = Parent->createArchiveMemberHeader( |
476 | RawHeaderPtr: Start, |
477 | Size: Parent ? Parent->getData().size() - (Start - Parent->getData().data()) |
478 | : 0, |
479 | Err); |
480 | |
481 | // If we are pointed to real data, Start is not a nullptr, then there must be |
482 | // a non-null Err pointer available to report malformed data on. Only in |
483 | // the case sentinel value is being constructed is Err is permitted to be a |
484 | // nullptr. |
485 | assert(Err && "Err can't be nullptr if Start is not a nullptr" ); |
486 | |
487 | ErrorAsOutParameter ErrAsOutParam(Err); |
488 | |
489 | // If there was an error in the construction of the Header |
490 | // then just return with the error now set. |
491 | if (*Err) |
492 | return; |
493 | |
494 | uint64_t Size = Header->getSizeOf(); |
495 | Data = StringRef(Start, Size); |
496 | Expected<bool> isThinOrErr = isThinMember(); |
497 | if (!isThinOrErr) { |
498 | *Err = isThinOrErr.takeError(); |
499 | return; |
500 | } |
501 | bool isThin = isThinOrErr.get(); |
502 | if (!isThin) { |
503 | Expected<uint64_t> MemberSize = getRawSize(); |
504 | if (!MemberSize) { |
505 | *Err = MemberSize.takeError(); |
506 | return; |
507 | } |
508 | Size += MemberSize.get(); |
509 | Data = StringRef(Start, Size); |
510 | } |
511 | |
512 | // Setup StartOfFile and PaddingBytes. |
513 | StartOfFile = Header->getSizeOf(); |
514 | // Don't include attached name. |
515 | Expected<StringRef> NameOrErr = getRawName(); |
516 | if (!NameOrErr) { |
517 | *Err = NameOrErr.takeError(); |
518 | return; |
519 | } |
520 | StringRef Name = NameOrErr.get(); |
521 | |
522 | if (Parent->kind() == Archive::K_AIXBIG) { |
523 | // The actual start of the file is after the name and any necessary |
524 | // even-alignment padding. |
525 | StartOfFile += ((Name.size() + 1) >> 1) << 1; |
526 | } else if (Name.starts_with(Prefix: "#1/" )) { |
527 | uint64_t NameSize; |
528 | StringRef RawNameSize = Name.substr(Start: 3).rtrim(Char: ' '); |
529 | if (RawNameSize.getAsInteger(Radix: 10, Result&: NameSize)) { |
530 | uint64_t Offset = Start - Parent->getData().data(); |
531 | *Err = malformedError(Msg: "long name length characters after the #1/ are " |
532 | "not all decimal numbers: '" + |
533 | RawNameSize + |
534 | "' for archive member header at offset " + |
535 | Twine(Offset)); |
536 | return; |
537 | } |
538 | StartOfFile += NameSize; |
539 | } |
540 | } |
541 | |
542 | Expected<uint64_t> Archive::Child::getSize() const { |
543 | if (Parent->IsThin) |
544 | return Header->getSize(); |
545 | return Data.size() - StartOfFile; |
546 | } |
547 | |
548 | Expected<uint64_t> Archive::Child::getRawSize() const { |
549 | return Header->getSize(); |
550 | } |
551 | |
552 | Expected<bool> Archive::Child::isThinMember() const { return Header->isThin(); } |
553 | |
554 | Expected<std::string> Archive::Child::getFullName() const { |
555 | Expected<bool> isThin = isThinMember(); |
556 | if (!isThin) |
557 | return isThin.takeError(); |
558 | assert(isThin.get()); |
559 | Expected<StringRef> NameOrErr = getName(); |
560 | if (!NameOrErr) |
561 | return NameOrErr.takeError(); |
562 | StringRef Name = *NameOrErr; |
563 | if (sys::path::is_absolute(path: Name)) |
564 | return std::string(Name); |
565 | |
566 | SmallString<128> FullName = sys::path::parent_path( |
567 | path: Parent->getMemoryBufferRef().getBufferIdentifier()); |
568 | sys::path::append(path&: FullName, a: Name); |
569 | return std::string(FullName); |
570 | } |
571 | |
572 | Expected<StringRef> Archive::Child::getBuffer() const { |
573 | Expected<bool> isThinOrErr = isThinMember(); |
574 | if (!isThinOrErr) |
575 | return isThinOrErr.takeError(); |
576 | bool isThin = isThinOrErr.get(); |
577 | if (!isThin) { |
578 | Expected<uint64_t> Size = getSize(); |
579 | if (!Size) |
580 | return Size.takeError(); |
581 | return StringRef(Data.data() + StartOfFile, Size.get()); |
582 | } |
583 | Expected<std::string> FullNameOrErr = getFullName(); |
584 | if (!FullNameOrErr) |
585 | return FullNameOrErr.takeError(); |
586 | const std::string &FullName = *FullNameOrErr; |
587 | ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(Filename: FullName); |
588 | if (std::error_code EC = Buf.getError()) |
589 | return errorCodeToError(EC); |
590 | Parent->ThinBuffers.push_back(x: std::move(*Buf)); |
591 | return Parent->ThinBuffers.back()->getBuffer(); |
592 | } |
593 | |
594 | Expected<Archive::Child> Archive::Child::getNext() const { |
595 | Expected<const char *> NextLocOrErr = Header->getNextChildLoc(); |
596 | if (!NextLocOrErr) |
597 | return NextLocOrErr.takeError(); |
598 | |
599 | const char *NextLoc = *NextLocOrErr; |
600 | |
601 | // Check to see if this is at the end of the archive. |
602 | if (NextLoc == nullptr) |
603 | return Child(nullptr, nullptr, nullptr); |
604 | |
605 | // Check to see if this is past the end of the archive. |
606 | if (NextLoc > Parent->Data.getBufferEnd()) { |
607 | std::string Msg("offset to next archive member past the end of the archive " |
608 | "after member " ); |
609 | Expected<StringRef> NameOrErr = getName(); |
610 | if (!NameOrErr) { |
611 | consumeError(Err: NameOrErr.takeError()); |
612 | uint64_t Offset = Data.data() - Parent->getData().data(); |
613 | return malformedError(Msg: Msg + "at offset " + Twine(Offset)); |
614 | } else |
615 | return malformedError(Msg: Msg + NameOrErr.get()); |
616 | } |
617 | |
618 | Error Err = Error::success(); |
619 | Child Ret(Parent, NextLoc, &Err); |
620 | if (Err) |
621 | return std::move(Err); |
622 | return Ret; |
623 | } |
624 | |
625 | uint64_t Archive::Child::getChildOffset() const { |
626 | const char *a = Parent->Data.getBuffer().data(); |
627 | const char *c = Data.data(); |
628 | uint64_t offset = c - a; |
629 | return offset; |
630 | } |
631 | |
632 | Expected<StringRef> Archive::Child::getName() const { |
633 | Expected<uint64_t> RawSizeOrErr = getRawSize(); |
634 | if (!RawSizeOrErr) |
635 | return RawSizeOrErr.takeError(); |
636 | uint64_t RawSize = RawSizeOrErr.get(); |
637 | Expected<StringRef> NameOrErr = |
638 | Header->getName(Size: Header->getSizeOf() + RawSize); |
639 | if (!NameOrErr) |
640 | return NameOrErr.takeError(); |
641 | StringRef Name = NameOrErr.get(); |
642 | return Name; |
643 | } |
644 | |
645 | Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const { |
646 | Expected<StringRef> NameOrErr = getName(); |
647 | if (!NameOrErr) |
648 | return NameOrErr.takeError(); |
649 | StringRef Name = NameOrErr.get(); |
650 | Expected<StringRef> Buf = getBuffer(); |
651 | if (!Buf) |
652 | return createFileError(F: Name, E: Buf.takeError()); |
653 | return MemoryBufferRef(*Buf, Name); |
654 | } |
655 | |
656 | Expected<std::unique_ptr<Binary>> |
657 | Archive::Child::getAsBinary(LLVMContext *Context) const { |
658 | Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef(); |
659 | if (!BuffOrErr) |
660 | return BuffOrErr.takeError(); |
661 | |
662 | auto BinaryOrErr = createBinary(Source: BuffOrErr.get(), Context); |
663 | if (BinaryOrErr) |
664 | return std::move(*BinaryOrErr); |
665 | return BinaryOrErr.takeError(); |
666 | } |
667 | |
668 | Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) { |
669 | Error Err = Error::success(); |
670 | std::unique_ptr<Archive> Ret; |
671 | StringRef Buffer = Source.getBuffer(); |
672 | |
673 | if (Buffer.starts_with(Prefix: BigArchiveMagic)) |
674 | Ret = std::make_unique<BigArchive>(args&: Source, args&: Err); |
675 | else |
676 | Ret = std::make_unique<Archive>(args&: Source, args&: Err); |
677 | |
678 | if (Err) |
679 | return std::move(Err); |
680 | return std::move(Ret); |
681 | } |
682 | |
683 | std::unique_ptr<AbstractArchiveMemberHeader> |
684 | Archive::(const char *, uint64_t Size, |
685 | Error *Err) const { |
686 | ErrorAsOutParameter ErrAsOutParam(Err); |
687 | if (kind() != K_AIXBIG) |
688 | return std::make_unique<ArchiveMemberHeader>(args: this, args&: RawHeaderPtr, args&: Size, args&: Err); |
689 | return std::make_unique<BigArchiveMemberHeader>(args: this, args&: RawHeaderPtr, args&: Size, |
690 | args&: Err); |
691 | } |
692 | |
693 | uint64_t Archive::getArchiveMagicLen() const { |
694 | if (isThin()) |
695 | return sizeof(ThinArchiveMagic) - 1; |
696 | |
697 | if (Kind() == K_AIXBIG) |
698 | return sizeof(BigArchiveMagic) - 1; |
699 | |
700 | return sizeof(ArchiveMagic) - 1; |
701 | } |
702 | |
703 | void Archive::setFirstRegular(const Child &C) { |
704 | FirstRegularData = C.Data; |
705 | FirstRegularStartOfFile = C.StartOfFile; |
706 | } |
707 | |
708 | Archive::Archive(MemoryBufferRef Source, Error &Err) |
709 | : Binary(Binary::ID_Archive, Source) { |
710 | ErrorAsOutParameter ErrAsOutParam(Err); |
711 | StringRef Buffer = Data.getBuffer(); |
712 | // Check for sufficient magic. |
713 | if (Buffer.starts_with(Prefix: ThinArchiveMagic)) { |
714 | IsThin = true; |
715 | } else if (Buffer.starts_with(Prefix: ArchiveMagic)) { |
716 | IsThin = false; |
717 | } else if (Buffer.starts_with(Prefix: BigArchiveMagic)) { |
718 | Format = K_AIXBIG; |
719 | IsThin = false; |
720 | return; |
721 | } else { |
722 | Err = make_error<GenericBinaryError>(Args: "file too small to be an archive" , |
723 | Args: object_error::invalid_file_type); |
724 | return; |
725 | } |
726 | |
727 | // Make sure Format is initialized before any call to |
728 | // ArchiveMemberHeader::getName() is made. This could be a valid empty |
729 | // archive which is the same in all formats. So claiming it to be gnu to is |
730 | // fine if not totally correct before we look for a string table or table of |
731 | // contents. |
732 | Format = K_GNU; |
733 | |
734 | // Get the special members. |
735 | child_iterator I = child_begin(Err, SkipInternal: false); |
736 | if (Err) |
737 | return; |
738 | child_iterator E = child_end(); |
739 | |
740 | // See if this is a valid empty archive and if so return. |
741 | if (I == E) { |
742 | Err = Error::success(); |
743 | return; |
744 | } |
745 | const Child *C = &*I; |
746 | |
747 | auto Increment = [&]() { |
748 | ++I; |
749 | if (Err) |
750 | return true; |
751 | C = &*I; |
752 | return false; |
753 | }; |
754 | |
755 | Expected<StringRef> NameOrErr = C->getRawName(); |
756 | if (!NameOrErr) { |
757 | Err = NameOrErr.takeError(); |
758 | return; |
759 | } |
760 | StringRef Name = NameOrErr.get(); |
761 | |
762 | // Below is the pattern that is used to figure out the archive format |
763 | // GNU archive format |
764 | // First member : / (may exist, if it exists, points to the symbol table ) |
765 | // Second member : // (may exist, if it exists, points to the string table) |
766 | // Note : The string table is used if the filename exceeds 15 characters |
767 | // BSD archive format |
768 | // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table) |
769 | // There is no string table, if the filename exceeds 15 characters or has a |
770 | // embedded space, the filename has #1/<size>, The size represents the size |
771 | // of the filename that needs to be read after the archive header |
772 | // COFF archive format |
773 | // First member : / |
774 | // Second member : / (provides a directory of symbols) |
775 | // Third member : // (may exist, if it exists, contains the string table) |
776 | // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present |
777 | // even if the string table is empty. However, lib.exe does not in fact |
778 | // seem to create the third member if there's no member whose filename |
779 | // exceeds 15 characters. So the third member is optional. |
780 | |
781 | if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64" ) { |
782 | if (Name == "__.SYMDEF" ) |
783 | Format = K_BSD; |
784 | else // Name == "__.SYMDEF_64" |
785 | Format = K_DARWIN64; |
786 | // We know that the symbol table is not an external file, but we still must |
787 | // check any Expected<> return value. |
788 | Expected<StringRef> BufOrErr = C->getBuffer(); |
789 | if (!BufOrErr) { |
790 | Err = BufOrErr.takeError(); |
791 | return; |
792 | } |
793 | SymbolTable = BufOrErr.get(); |
794 | if (Increment()) |
795 | return; |
796 | setFirstRegular(*C); |
797 | |
798 | Err = Error::success(); |
799 | return; |
800 | } |
801 | |
802 | if (Name.starts_with(Prefix: "#1/" )) { |
803 | Format = K_BSD; |
804 | // We know this is BSD, so getName will work since there is no string table. |
805 | Expected<StringRef> NameOrErr = C->getName(); |
806 | if (!NameOrErr) { |
807 | Err = NameOrErr.takeError(); |
808 | return; |
809 | } |
810 | Name = NameOrErr.get(); |
811 | if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF" ) { |
812 | // We know that the symbol table is not an external file, but we still |
813 | // must check any Expected<> return value. |
814 | Expected<StringRef> BufOrErr = C->getBuffer(); |
815 | if (!BufOrErr) { |
816 | Err = BufOrErr.takeError(); |
817 | return; |
818 | } |
819 | SymbolTable = BufOrErr.get(); |
820 | if (Increment()) |
821 | return; |
822 | } else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64" ) { |
823 | Format = K_DARWIN64; |
824 | // We know that the symbol table is not an external file, but we still |
825 | // must check any Expected<> return value. |
826 | Expected<StringRef> BufOrErr = C->getBuffer(); |
827 | if (!BufOrErr) { |
828 | Err = BufOrErr.takeError(); |
829 | return; |
830 | } |
831 | SymbolTable = BufOrErr.get(); |
832 | if (Increment()) |
833 | return; |
834 | } |
835 | setFirstRegular(*C); |
836 | return; |
837 | } |
838 | |
839 | // MIPS 64-bit ELF archives use a special format of a symbol table. |
840 | // This format is marked by `ar_name` field equals to "/SYM64/". |
841 | // For detailed description see page 96 in the following document: |
842 | // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf |
843 | |
844 | bool has64SymTable = false; |
845 | if (Name == "/" || Name == "/SYM64/" ) { |
846 | // We know that the symbol table is not an external file, but we still |
847 | // must check any Expected<> return value. |
848 | Expected<StringRef> BufOrErr = C->getBuffer(); |
849 | if (!BufOrErr) { |
850 | Err = BufOrErr.takeError(); |
851 | return; |
852 | } |
853 | SymbolTable = BufOrErr.get(); |
854 | if (Name == "/SYM64/" ) |
855 | has64SymTable = true; |
856 | |
857 | if (Increment()) |
858 | return; |
859 | if (I == E) { |
860 | Err = Error::success(); |
861 | return; |
862 | } |
863 | Expected<StringRef> NameOrErr = C->getRawName(); |
864 | if (!NameOrErr) { |
865 | Err = NameOrErr.takeError(); |
866 | return; |
867 | } |
868 | Name = NameOrErr.get(); |
869 | } |
870 | |
871 | if (Name == "//" ) { |
872 | Format = has64SymTable ? K_GNU64 : K_GNU; |
873 | // The string table is never an external member, but we still |
874 | // must check any Expected<> return value. |
875 | Expected<StringRef> BufOrErr = C->getBuffer(); |
876 | if (!BufOrErr) { |
877 | Err = BufOrErr.takeError(); |
878 | return; |
879 | } |
880 | StringTable = BufOrErr.get(); |
881 | if (Increment()) |
882 | return; |
883 | setFirstRegular(*C); |
884 | Err = Error::success(); |
885 | return; |
886 | } |
887 | |
888 | if (Name[0] != '/') { |
889 | Format = has64SymTable ? K_GNU64 : K_GNU; |
890 | setFirstRegular(*C); |
891 | Err = Error::success(); |
892 | return; |
893 | } |
894 | |
895 | if (Name != "/" ) { |
896 | Err = errorCodeToError(EC: object_error::parse_failed); |
897 | return; |
898 | } |
899 | |
900 | Format = K_COFF; |
901 | // We know that the symbol table is not an external file, but we still |
902 | // must check any Expected<> return value. |
903 | Expected<StringRef> BufOrErr = C->getBuffer(); |
904 | if (!BufOrErr) { |
905 | Err = BufOrErr.takeError(); |
906 | return; |
907 | } |
908 | SymbolTable = BufOrErr.get(); |
909 | |
910 | if (Increment()) |
911 | return; |
912 | |
913 | if (I == E) { |
914 | setFirstRegular(*C); |
915 | Err = Error::success(); |
916 | return; |
917 | } |
918 | |
919 | NameOrErr = C->getRawName(); |
920 | if (!NameOrErr) { |
921 | Err = NameOrErr.takeError(); |
922 | return; |
923 | } |
924 | Name = NameOrErr.get(); |
925 | |
926 | if (Name == "//" ) { |
927 | // The string table is never an external member, but we still |
928 | // must check any Expected<> return value. |
929 | Expected<StringRef> BufOrErr = C->getBuffer(); |
930 | if (!BufOrErr) { |
931 | Err = BufOrErr.takeError(); |
932 | return; |
933 | } |
934 | StringTable = BufOrErr.get(); |
935 | if (Increment()) |
936 | return; |
937 | |
938 | if (I == E) { |
939 | setFirstRegular(*C); |
940 | Err = Error::success(); |
941 | return; |
942 | } |
943 | |
944 | NameOrErr = C->getRawName(); |
945 | if (!NameOrErr) { |
946 | Err = NameOrErr.takeError(); |
947 | return; |
948 | } |
949 | Name = NameOrErr.get(); |
950 | } |
951 | |
952 | if (Name == "/<ECSYMBOLS>/" ) { |
953 | // ARM64EC-aware libraries contain an additional special member with |
954 | // an EC symbol map after the string table. Its format is similar to a |
955 | // regular symbol map, except it doesn't contain member offsets. Its indexes |
956 | // refer to member offsets from the regular symbol table instead. |
957 | Expected<StringRef> BufOrErr = C->getBuffer(); |
958 | if (!BufOrErr) { |
959 | Err = BufOrErr.takeError(); |
960 | return; |
961 | } |
962 | ECSymbolTable = BufOrErr.get(); |
963 | if (Increment()) |
964 | return; |
965 | } |
966 | |
967 | setFirstRegular(*C); |
968 | Err = Error::success(); |
969 | } |
970 | |
971 | object::Archive::Kind Archive::getDefaultKindForTriple(const Triple &T) { |
972 | if (T.isOSDarwin()) |
973 | return object::Archive::K_DARWIN; |
974 | if (T.isOSAIX()) |
975 | return object::Archive::K_AIXBIG; |
976 | if (T.isOSWindows()) |
977 | return object::Archive::K_COFF; |
978 | return object::Archive::K_GNU; |
979 | } |
980 | |
981 | object::Archive::Kind Archive::getDefaultKind() { |
982 | Triple HostTriple(sys::getDefaultTargetTriple()); |
983 | return getDefaultKindForTriple(T: HostTriple); |
984 | } |
985 | |
986 | Archive::child_iterator Archive::child_begin(Error &Err, |
987 | bool SkipInternal) const { |
988 | if (isEmpty()) |
989 | return child_end(); |
990 | |
991 | if (SkipInternal) |
992 | return child_iterator::itr( |
993 | I: Child(this, FirstRegularData, FirstRegularStartOfFile), Err); |
994 | |
995 | const char *Loc = Data.getBufferStart() + getFirstChildOffset(); |
996 | Child C(this, Loc, &Err); |
997 | if (Err) |
998 | return child_end(); |
999 | return child_iterator::itr(I: C, Err); |
1000 | } |
1001 | |
1002 | Archive::child_iterator Archive::child_end() const { |
1003 | return child_iterator::end(I: Child(nullptr, nullptr, nullptr)); |
1004 | } |
1005 | |
1006 | bool Archive::Symbol::isECSymbol() const { |
1007 | // Symbols use SymbolCount..SymbolCount+getNumberOfECSymbols() for EC symbol |
1008 | // indexes. |
1009 | uint32_t SymbolCount = Parent->getNumberOfSymbols(); |
1010 | return SymbolCount <= SymbolIndex && |
1011 | SymbolIndex < SymbolCount + Parent->getNumberOfECSymbols(); |
1012 | } |
1013 | |
1014 | StringRef Archive::Symbol::getName() const { |
1015 | if (isECSymbol()) |
1016 | return Parent->ECSymbolTable.begin() + StringIndex; |
1017 | return Parent->getSymbolTable().begin() + StringIndex; |
1018 | } |
1019 | |
1020 | Expected<Archive::Child> Archive::Symbol::getMember() const { |
1021 | const char *Buf = Parent->getSymbolTable().begin(); |
1022 | const char *Offsets = Buf; |
1023 | if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64 || |
1024 | Parent->kind() == K_AIXBIG) |
1025 | Offsets += sizeof(uint64_t); |
1026 | else |
1027 | Offsets += sizeof(uint32_t); |
1028 | uint64_t Offset = 0; |
1029 | if (Parent->kind() == K_GNU) { |
1030 | Offset = read32be(P: Offsets + SymbolIndex * 4); |
1031 | } else if (Parent->kind() == K_GNU64 || Parent->kind() == K_AIXBIG) { |
1032 | Offset = read64be(P: Offsets + SymbolIndex * 8); |
1033 | } else if (Parent->kind() == K_BSD) { |
1034 | // The SymbolIndex is an index into the ranlib structs that start at |
1035 | // Offsets (the first uint32_t is the number of bytes of the ranlib |
1036 | // structs). The ranlib structs are a pair of uint32_t's the first |
1037 | // being a string table offset and the second being the offset into |
1038 | // the archive of the member that defines the symbol. Which is what |
1039 | // is needed here. |
1040 | Offset = read32le(P: Offsets + SymbolIndex * 8 + 4); |
1041 | } else if (Parent->kind() == K_DARWIN64) { |
1042 | // The SymbolIndex is an index into the ranlib_64 structs that start at |
1043 | // Offsets (the first uint64_t is the number of bytes of the ranlib_64 |
1044 | // structs). The ranlib_64 structs are a pair of uint64_t's the first |
1045 | // being a string table offset and the second being the offset into |
1046 | // the archive of the member that defines the symbol. Which is what |
1047 | // is needed here. |
1048 | Offset = read64le(P: Offsets + SymbolIndex * 16 + 8); |
1049 | } else { |
1050 | // Skip offsets. |
1051 | uint32_t MemberCount = read32le(P: Buf); |
1052 | Buf += MemberCount * 4 + 4; |
1053 | |
1054 | uint32_t SymbolCount = read32le(P: Buf); |
1055 | uint16_t OffsetIndex; |
1056 | if (SymbolIndex < SymbolCount) { |
1057 | // Skip SymbolCount to get to the indices table. |
1058 | const char *Indices = Buf + 4; |
1059 | |
1060 | // Get the index of the offset in the file member offset table for this |
1061 | // symbol. |
1062 | OffsetIndex = read16le(P: Indices + SymbolIndex * 2); |
1063 | } else if (isECSymbol()) { |
1064 | // Skip SymbolCount to get to the indices table. |
1065 | const char *Indices = Parent->ECSymbolTable.begin() + 4; |
1066 | |
1067 | // Get the index of the offset in the file member offset table for this |
1068 | // symbol. |
1069 | OffsetIndex = read16le(P: Indices + (SymbolIndex - SymbolCount) * 2); |
1070 | } else { |
1071 | return errorCodeToError(EC: object_error::parse_failed); |
1072 | } |
1073 | // Subtract 1 since OffsetIndex is 1 based. |
1074 | --OffsetIndex; |
1075 | |
1076 | if (OffsetIndex >= MemberCount) |
1077 | return errorCodeToError(EC: object_error::parse_failed); |
1078 | |
1079 | Offset = read32le(P: Offsets + OffsetIndex * 4); |
1080 | } |
1081 | |
1082 | const char *Loc = Parent->getData().begin() + Offset; |
1083 | Error Err = Error::success(); |
1084 | Child C(Parent, Loc, &Err); |
1085 | if (Err) |
1086 | return std::move(Err); |
1087 | return C; |
1088 | } |
1089 | |
1090 | Archive::Symbol Archive::Symbol::getNext() const { |
1091 | Symbol t(*this); |
1092 | if (Parent->kind() == K_BSD) { |
1093 | // t.StringIndex is an offset from the start of the __.SYMDEF or |
1094 | // "__.SYMDEF SORTED" member into the string table for the ranlib |
1095 | // struct indexed by t.SymbolIndex . To change t.StringIndex to the |
1096 | // offset in the string table for t.SymbolIndex+1 we subtract the |
1097 | // its offset from the start of the string table for t.SymbolIndex |
1098 | // and add the offset of the string table for t.SymbolIndex+1. |
1099 | |
1100 | // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t |
1101 | // which is the number of bytes of ranlib structs that follow. The ranlib |
1102 | // structs are a pair of uint32_t's the first being a string table offset |
1103 | // and the second being the offset into the archive of the member that |
1104 | // define the symbol. After that the next uint32_t is the byte count of |
1105 | // the string table followed by the string table. |
1106 | const char *Buf = Parent->getSymbolTable().begin(); |
1107 | uint32_t RanlibCount = 0; |
1108 | RanlibCount = read32le(P: Buf) / 8; |
1109 | // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount) |
1110 | // don't change the t.StringIndex as we don't want to reference a ranlib |
1111 | // past RanlibCount. |
1112 | if (t.SymbolIndex + 1 < RanlibCount) { |
1113 | const char *Ranlibs = Buf + 4; |
1114 | uint32_t CurRanStrx = 0; |
1115 | uint32_t = 0; |
1116 | CurRanStrx = read32le(P: Ranlibs + t.SymbolIndex * 8); |
1117 | NextRanStrx = read32le(P: Ranlibs + (t.SymbolIndex + 1) * 8); |
1118 | t.StringIndex -= CurRanStrx; |
1119 | t.StringIndex += NextRanStrx; |
1120 | } |
1121 | } else if (t.isECSymbol()) { |
1122 | // Go to one past next null. |
1123 | t.StringIndex = Parent->ECSymbolTable.find(C: '\0', From: t.StringIndex) + 1; |
1124 | } else { |
1125 | // Go to one past next null. |
1126 | t.StringIndex = Parent->getSymbolTable().find(C: '\0', From: t.StringIndex) + 1; |
1127 | } |
1128 | ++t.SymbolIndex; |
1129 | return t; |
1130 | } |
1131 | |
1132 | Archive::symbol_iterator Archive::symbol_begin() const { |
1133 | if (!hasSymbolTable()) |
1134 | return symbol_iterator(Symbol(this, 0, 0)); |
1135 | |
1136 | const char *buf = getSymbolTable().begin(); |
1137 | if (kind() == K_GNU) { |
1138 | uint32_t symbol_count = 0; |
1139 | symbol_count = read32be(P: buf); |
1140 | buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t))); |
1141 | } else if (kind() == K_GNU64) { |
1142 | uint64_t symbol_count = read64be(P: buf); |
1143 | buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t))); |
1144 | } else if (kind() == K_BSD) { |
1145 | // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t |
1146 | // which is the number of bytes of ranlib structs that follow. The ranlib |
1147 | // structs are a pair of uint32_t's the first being a string table offset |
1148 | // and the second being the offset into the archive of the member that |
1149 | // define the symbol. After that the next uint32_t is the byte count of |
1150 | // the string table followed by the string table. |
1151 | uint32_t ranlib_count = 0; |
1152 | ranlib_count = read32le(P: buf) / 8; |
1153 | const char *ranlibs = buf + 4; |
1154 | uint32_t ran_strx = 0; |
1155 | ran_strx = read32le(P: ranlibs); |
1156 | buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t)))); |
1157 | // Skip the byte count of the string table. |
1158 | buf += sizeof(uint32_t); |
1159 | buf += ran_strx; |
1160 | } else if (kind() == K_DARWIN64) { |
1161 | // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t |
1162 | // which is the number of bytes of ranlib_64 structs that follow. The |
1163 | // ranlib_64 structs are a pair of uint64_t's the first being a string |
1164 | // table offset and the second being the offset into the archive of the |
1165 | // member that define the symbol. After that the next uint64_t is the byte |
1166 | // count of the string table followed by the string table. |
1167 | uint64_t ranlib_count = 0; |
1168 | ranlib_count = read64le(P: buf) / 16; |
1169 | const char *ranlibs = buf + 8; |
1170 | uint64_t ran_strx = 0; |
1171 | ran_strx = read64le(P: ranlibs); |
1172 | buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t)))); |
1173 | // Skip the byte count of the string table. |
1174 | buf += sizeof(uint64_t); |
1175 | buf += ran_strx; |
1176 | } else if (kind() == K_AIXBIG) { |
1177 | buf = getStringTable().begin(); |
1178 | } else { |
1179 | uint32_t member_count = 0; |
1180 | uint32_t symbol_count = 0; |
1181 | member_count = read32le(P: buf); |
1182 | buf += 4 + (member_count * 4); // Skip offsets. |
1183 | symbol_count = read32le(P: buf); |
1184 | buf += 4 + (symbol_count * 2); // Skip indices. |
1185 | } |
1186 | uint32_t string_start_offset = buf - getSymbolTable().begin(); |
1187 | return symbol_iterator(Symbol(this, 0, string_start_offset)); |
1188 | } |
1189 | |
1190 | Archive::symbol_iterator Archive::symbol_end() const { |
1191 | return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0)); |
1192 | } |
1193 | |
1194 | Expected<iterator_range<Archive::symbol_iterator>> Archive::ec_symbols() const { |
1195 | uint32_t Count = 0; |
1196 | |
1197 | // Validate EC symbol table. |
1198 | if (!ECSymbolTable.empty()) { |
1199 | if (ECSymbolTable.size() < sizeof(uint32_t)) |
1200 | return malformedError(Msg: "invalid EC symbols size (" + |
1201 | Twine(ECSymbolTable.size()) + ")" ); |
1202 | if (SymbolTable.size() < sizeof(uint32_t)) |
1203 | return malformedError(Msg: "invalid symbols size (" + |
1204 | Twine(ECSymbolTable.size()) + ")" ); |
1205 | |
1206 | Count = read32le(P: ECSymbolTable.begin()); |
1207 | size_t StringIndex = sizeof(uint32_t) + Count * sizeof(uint16_t); |
1208 | if (ECSymbolTable.size() < StringIndex) |
1209 | return malformedError(Msg: "invalid EC symbols size. Size was " + |
1210 | Twine(ECSymbolTable.size()) + ", but expected " + |
1211 | Twine(StringIndex)); |
1212 | |
1213 | uint32_t MemberCount = read32le(P: SymbolTable.begin()); |
1214 | const char *Indexes = ECSymbolTable.begin() + sizeof(uint32_t); |
1215 | |
1216 | for (uint32_t i = 0; i < Count; ++i) { |
1217 | uint16_t Index = read16le(P: Indexes + i * sizeof(uint16_t)); |
1218 | if (!Index) |
1219 | return malformedError(Msg: "invalid EC symbol index 0" ); |
1220 | if (Index > MemberCount) |
1221 | return malformedError(Msg: "invalid EC symbol index " + Twine(Index) + |
1222 | " is larger than member count " + |
1223 | Twine(MemberCount)); |
1224 | |
1225 | StringIndex = ECSymbolTable.find(C: '\0', From: StringIndex); |
1226 | if (StringIndex == StringRef::npos) |
1227 | return malformedError(Msg: "malformed EC symbol names: not null-terminated" ); |
1228 | ++StringIndex; |
1229 | } |
1230 | } |
1231 | |
1232 | uint32_t SymbolCount = getNumberOfSymbols(); |
1233 | return make_range( |
1234 | x: symbol_iterator(Symbol(this, SymbolCount, |
1235 | sizeof(uint32_t) + Count * sizeof(uint16_t))), |
1236 | y: symbol_iterator(Symbol(this, SymbolCount + Count, 0))); |
1237 | } |
1238 | |
1239 | uint32_t Archive::getNumberOfSymbols() const { |
1240 | if (!hasSymbolTable()) |
1241 | return 0; |
1242 | const char *buf = getSymbolTable().begin(); |
1243 | if (kind() == K_GNU) |
1244 | return read32be(P: buf); |
1245 | if (kind() == K_GNU64 || kind() == K_AIXBIG) |
1246 | return read64be(P: buf); |
1247 | if (kind() == K_BSD) |
1248 | return read32le(P: buf) / 8; |
1249 | if (kind() == K_DARWIN64) |
1250 | return read64le(P: buf) / 16; |
1251 | uint32_t member_count = 0; |
1252 | member_count = read32le(P: buf); |
1253 | buf += 4 + (member_count * 4); // Skip offsets. |
1254 | return read32le(P: buf); |
1255 | } |
1256 | |
1257 | uint32_t Archive::getNumberOfECSymbols() const { |
1258 | if (ECSymbolTable.size() < sizeof(uint32_t)) |
1259 | return 0; |
1260 | return read32le(P: ECSymbolTable.begin()); |
1261 | } |
1262 | |
1263 | Expected<std::optional<Archive::Child>> Archive::findSym(StringRef name) const { |
1264 | Archive::symbol_iterator bs = symbol_begin(); |
1265 | Archive::symbol_iterator es = symbol_end(); |
1266 | |
1267 | for (; bs != es; ++bs) { |
1268 | StringRef SymName = bs->getName(); |
1269 | if (SymName == name) { |
1270 | if (auto MemberOrErr = bs->getMember()) |
1271 | return Child(*MemberOrErr); |
1272 | else |
1273 | return MemberOrErr.takeError(); |
1274 | } |
1275 | } |
1276 | return std::nullopt; |
1277 | } |
1278 | |
1279 | // Returns true if archive file contains no member file. |
1280 | bool Archive::isEmpty() const { |
1281 | return Data.getBufferSize() == getArchiveMagicLen(); |
1282 | } |
1283 | |
1284 | bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); } |
1285 | |
1286 | static Error getGlobalSymtabLocAndSize(const MemoryBufferRef &Data, |
1287 | uint64_t GlobalSymtabOffset, |
1288 | const char *&GlobalSymtabLoc, |
1289 | uint64_t &Size, const char *BitMessage) { |
1290 | uint64_t BufferSize = Data.getBufferSize(); |
1291 | uint64_t GlobalSymtabContentOffset = |
1292 | GlobalSymtabOffset + sizeof(BigArMemHdrType); |
1293 | if (GlobalSymtabContentOffset > BufferSize) |
1294 | return malformedError( |
1295 | Msg: Twine(BitMessage) + " global symbol table header at offset 0x" + |
1296 | Twine::utohexstr(Val: GlobalSymtabOffset) + " and size 0x" + |
1297 | Twine::utohexstr(Val: sizeof(BigArMemHdrType)) + |
1298 | " goes past the end of file" ); |
1299 | |
1300 | GlobalSymtabLoc = Data.getBufferStart() + GlobalSymtabOffset; |
1301 | const BigArMemHdrType *GlobalSymHdr = |
1302 | reinterpret_cast<const BigArMemHdrType *>(GlobalSymtabLoc); |
1303 | StringRef RawOffset = getFieldRawString(Field: GlobalSymHdr->Size); |
1304 | if (RawOffset.getAsInteger(Radix: 10, Result&: Size)) |
1305 | return malformedError(Msg: Twine(BitMessage) + " global symbol table size \"" + |
1306 | RawOffset + "\" is not a number" ); |
1307 | |
1308 | if (GlobalSymtabContentOffset + Size > BufferSize) |
1309 | return malformedError( |
1310 | Msg: Twine(BitMessage) + " global symbol table content at offset 0x" + |
1311 | Twine::utohexstr(Val: GlobalSymtabContentOffset) + " and size 0x" + |
1312 | Twine::utohexstr(Val: Size) + " goes past the end of file" ); |
1313 | |
1314 | return Error::success(); |
1315 | } |
1316 | |
1317 | struct GlobalSymtabInfo { |
1318 | uint64_t SymNum; |
1319 | StringRef SymbolTable; |
1320 | StringRef SymbolOffsetTable; |
1321 | StringRef StringTable; |
1322 | }; |
1323 | |
1324 | static void |
1325 | appendGlobalSymbolTableInfo(SmallVector<GlobalSymtabInfo> &SymtabInfos, |
1326 | const char *GlobalSymtabLoc, uint64_t Size) { |
1327 | // In a big archive, a global symbol table contains the following information: |
1328 | // - The number of symbols. |
1329 | // - The array of offsets into the archive file. The length is eight |
1330 | // times the number of symbols. |
1331 | // - The name-string table. The size is: |
1332 | // Size-(8*(the number of symbols + 1)). |
1333 | |
1334 | StringRef SymbolTable = |
1335 | StringRef(GlobalSymtabLoc + sizeof(BigArMemHdrType), Size); |
1336 | uint64_t SymNum = read64be(P: GlobalSymtabLoc + sizeof(BigArMemHdrType)); |
1337 | StringRef SymbolOffsetTable = StringRef(SymbolTable.data() + 8, 8 * SymNum); |
1338 | unsigned SymOffsetsSize = 8 * (SymNum + 1); |
1339 | uint64_t SymbolTableStringSize = Size - SymOffsetsSize; |
1340 | StringRef StringTable = |
1341 | StringRef(SymbolTable.data() + SymOffsetsSize, SymbolTableStringSize); |
1342 | SymtabInfos.push_back(Elt: {.SymNum: SymNum, .SymbolTable: SymbolTable, .SymbolOffsetTable: SymbolOffsetTable, .StringTable: StringTable}); |
1343 | } |
1344 | |
1345 | BigArchive::BigArchive(MemoryBufferRef Source, Error &Err) |
1346 | : Archive(Source, Err) { |
1347 | ErrorAsOutParameter ErrAsOutParam(&Err); |
1348 | StringRef Buffer = Data.getBuffer(); |
1349 | ArFixLenHdr = reinterpret_cast<const FixLenHdr *>(Buffer.data()); |
1350 | uint64_t BufferSize = Data.getBufferSize(); |
1351 | |
1352 | if (BufferSize < sizeof(FixLenHdr)) { |
1353 | Err = malformedError(Msg: "malformed AIX big archive: incomplete fixed length " |
1354 | "header, the archive is only" + |
1355 | Twine(BufferSize) + " byte(s)" ); |
1356 | return; |
1357 | } |
1358 | |
1359 | StringRef RawOffset = getFieldRawString(Field: ArFixLenHdr->FirstChildOffset); |
1360 | if (RawOffset.getAsInteger(Radix: 10, Result&: FirstChildOffset)) |
1361 | // TODO: Out-of-line. |
1362 | Err = malformedError(Msg: "malformed AIX big archive: first member offset \"" + |
1363 | RawOffset + "\" is not a number" ); |
1364 | |
1365 | RawOffset = getFieldRawString(Field: ArFixLenHdr->LastChildOffset); |
1366 | if (RawOffset.getAsInteger(Radix: 10, Result&: LastChildOffset)) |
1367 | // TODO: Out-of-line. |
1368 | Err = malformedError(Msg: "malformed AIX big archive: last member offset \"" + |
1369 | RawOffset + "\" is not a number" ); |
1370 | |
1371 | uint64_t GlobSymtab32Offset = 0; |
1372 | RawOffset = getFieldRawString(Field: ArFixLenHdr->GlobSymOffset); |
1373 | if (RawOffset.getAsInteger(Radix: 10, Result&: GlobSymtab32Offset)) { |
1374 | Err = malformedError(Msg: "global symbol table " |
1375 | "offset of 32-bit members \"" + |
1376 | RawOffset + "\" is not a number" ); |
1377 | return; |
1378 | } |
1379 | |
1380 | uint64_t GlobSymtab64Offset = 0; |
1381 | RawOffset = getFieldRawString(Field: ArFixLenHdr->GlobSym64Offset); |
1382 | if (RawOffset.getAsInteger(Radix: 10, Result&: GlobSymtab64Offset)) { |
1383 | Err = malformedError(Msg: "global symbol table " |
1384 | "offset of 64-bit members\"" + |
1385 | RawOffset + "\" is not a number" ); |
1386 | return; |
1387 | } |
1388 | |
1389 | const char *GlobSymtab32Loc = nullptr; |
1390 | const char *GlobSymtab64Loc = nullptr; |
1391 | uint64_t GlobSymtab32Size = 0; |
1392 | uint64_t GlobSymtab64Size = 0; |
1393 | const MemoryBufferRef &MemBuffRef = getMemoryBufferRef(); |
1394 | |
1395 | if (GlobSymtab32Offset) { |
1396 | Err = |
1397 | getGlobalSymtabLocAndSize(Data: MemBuffRef, GlobalSymtabOffset: GlobSymtab32Offset, |
1398 | GlobalSymtabLoc&: GlobSymtab32Loc, Size&: GlobSymtab32Size, BitMessage: "32-bit" ); |
1399 | if (Err) |
1400 | return; |
1401 | |
1402 | Has32BitGlobalSymtab = true; |
1403 | } |
1404 | |
1405 | if (GlobSymtab64Offset) { |
1406 | Err = |
1407 | getGlobalSymtabLocAndSize(Data: MemBuffRef, GlobalSymtabOffset: GlobSymtab64Offset, |
1408 | GlobalSymtabLoc&: GlobSymtab64Loc, Size&: GlobSymtab64Size, BitMessage: "64-bit" ); |
1409 | if (Err) |
1410 | return; |
1411 | |
1412 | Has64BitGlobalSymtab = true; |
1413 | } |
1414 | |
1415 | SmallVector<GlobalSymtabInfo> SymtabInfos; |
1416 | |
1417 | if (GlobSymtab32Offset) |
1418 | appendGlobalSymbolTableInfo(SymtabInfos, GlobalSymtabLoc: GlobSymtab32Loc, Size: GlobSymtab32Size); |
1419 | if (GlobSymtab64Offset) |
1420 | appendGlobalSymbolTableInfo(SymtabInfos, GlobalSymtabLoc: GlobSymtab64Loc, Size: GlobSymtab64Size); |
1421 | |
1422 | if (SymtabInfos.size() == 1) { |
1423 | SymbolTable = SymtabInfos[0].SymbolTable; |
1424 | StringTable = SymtabInfos[0].StringTable; |
1425 | } else if (SymtabInfos.size() == 2) { |
1426 | // In order to let the Archive::Symbol::getNext() work for both 32-bit and |
1427 | // 64-bit global symbol tables, we need to merge them into a single table. |
1428 | raw_string_ostream Out(MergedGlobalSymtabBuf); |
1429 | uint64_t SymNum = SymtabInfos[0].SymNum + SymtabInfos[1].SymNum; |
1430 | write(os&: Out, value: SymNum, endian: llvm::endianness::big); |
1431 | // Merge symbol offset. |
1432 | Out << SymtabInfos[0].SymbolOffsetTable; |
1433 | Out << SymtabInfos[1].SymbolOffsetTable; |
1434 | // Merge string table. |
1435 | Out << SymtabInfos[0].StringTable; |
1436 | Out << SymtabInfos[1].StringTable; |
1437 | SymbolTable = MergedGlobalSymtabBuf; |
1438 | // The size of the symbol offset to the member file is 8 bytes. |
1439 | StringTable = StringRef(SymbolTable.begin() + (SymNum + 1) * 8, |
1440 | SymtabInfos[0].StringTable.size() + |
1441 | SymtabInfos[1].StringTable.size()); |
1442 | } |
1443 | |
1444 | child_iterator I = child_begin(Err, SkipInternal: false); |
1445 | if (Err) |
1446 | return; |
1447 | child_iterator E = child_end(); |
1448 | if (I == E) { |
1449 | Err = Error::success(); |
1450 | return; |
1451 | } |
1452 | setFirstRegular(*I); |
1453 | Err = Error::success(); |
1454 | } |
1455 | |