1//===- ELFObject.h ----------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_OBJCOPY_ELF_ELFOBJECT_H
10#define LLVM_LIB_OBJCOPY_ELF_ELFOBJECT_H
11
12#include "llvm/ADT/ArrayRef.h"
13#include "llvm/ADT/StringRef.h"
14#include "llvm/ADT/Twine.h"
15#include "llvm/BinaryFormat/ELF.h"
16#include "llvm/MC/StringTableBuilder.h"
17#include "llvm/ObjCopy/CommonConfig.h"
18#include "llvm/Object/ELFObjectFile.h"
19#include "llvm/Support/Errc.h"
20#include "llvm/Support/FileOutputBuffer.h"
21#include "llvm/Support/MemoryBuffer.h"
22#include <cstddef>
23#include <cstdint>
24#include <functional>
25#include <memory>
26#include <set>
27#include <vector>
28
29namespace llvm {
30enum class DebugCompressionType;
31namespace objcopy {
32namespace elf {
33
34class SectionBase;
35class Section;
36class OwnedDataSection;
37class StringTableSection;
38class SymbolTableSection;
39class RelocationSection;
40class DynamicRelocationSection;
41class GnuDebugLinkSection;
42class GroupSection;
43class SectionIndexSection;
44class CompressedSection;
45class DecompressedSection;
46class Segment;
47class Object;
48struct Symbol;
49
50class SectionTableRef {
51 ArrayRef<std::unique_ptr<SectionBase>> Sections;
52
53public:
54 using iterator = pointee_iterator<const std::unique_ptr<SectionBase> *>;
55
56 explicit SectionTableRef(ArrayRef<std::unique_ptr<SectionBase>> Secs)
57 : Sections(Secs) {}
58 SectionTableRef(const SectionTableRef &) = default;
59
60 iterator begin() const { return iterator(Sections.data()); }
61 iterator end() const { return iterator(Sections.data() + Sections.size()); }
62 size_t size() const { return Sections.size(); }
63
64 Expected<SectionBase *> getSection(uint32_t Index, Twine ErrMsg);
65
66 template <class T>
67 Expected<T *> getSectionOfType(uint32_t Index, Twine IndexErrMsg,
68 Twine TypeErrMsg);
69};
70
71enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE };
72
73class SectionVisitor {
74public:
75 virtual ~SectionVisitor() = default;
76
77 virtual Error visit(const Section &Sec) = 0;
78 virtual Error visit(const OwnedDataSection &Sec) = 0;
79 virtual Error visit(const StringTableSection &Sec) = 0;
80 virtual Error visit(const SymbolTableSection &Sec) = 0;
81 virtual Error visit(const RelocationSection &Sec) = 0;
82 virtual Error visit(const DynamicRelocationSection &Sec) = 0;
83 virtual Error visit(const GnuDebugLinkSection &Sec) = 0;
84 virtual Error visit(const GroupSection &Sec) = 0;
85 virtual Error visit(const SectionIndexSection &Sec) = 0;
86 virtual Error visit(const CompressedSection &Sec) = 0;
87 virtual Error visit(const DecompressedSection &Sec) = 0;
88};
89
90class MutableSectionVisitor {
91public:
92 virtual ~MutableSectionVisitor() = default;
93
94 virtual Error visit(Section &Sec) = 0;
95 virtual Error visit(OwnedDataSection &Sec) = 0;
96 virtual Error visit(StringTableSection &Sec) = 0;
97 virtual Error visit(SymbolTableSection &Sec) = 0;
98 virtual Error visit(RelocationSection &Sec) = 0;
99 virtual Error visit(DynamicRelocationSection &Sec) = 0;
100 virtual Error visit(GnuDebugLinkSection &Sec) = 0;
101 virtual Error visit(GroupSection &Sec) = 0;
102 virtual Error visit(SectionIndexSection &Sec) = 0;
103 virtual Error visit(CompressedSection &Sec) = 0;
104 virtual Error visit(DecompressedSection &Sec) = 0;
105};
106
107class SectionWriter : public SectionVisitor {
108protected:
109 WritableMemoryBuffer &Out;
110
111public:
112 virtual ~SectionWriter() = default;
113
114 Error visit(const Section &Sec) override;
115 Error visit(const OwnedDataSection &Sec) override;
116 Error visit(const StringTableSection &Sec) override;
117 Error visit(const DynamicRelocationSection &Sec) override;
118 Error visit(const SymbolTableSection &Sec) override = 0;
119 Error visit(const RelocationSection &Sec) override = 0;
120 Error visit(const GnuDebugLinkSection &Sec) override = 0;
121 Error visit(const GroupSection &Sec) override = 0;
122 Error visit(const SectionIndexSection &Sec) override = 0;
123 Error visit(const CompressedSection &Sec) override = 0;
124 Error visit(const DecompressedSection &Sec) override = 0;
125
126 explicit SectionWriter(WritableMemoryBuffer &Buf) : Out(Buf) {}
127};
128
129template <class ELFT> class ELFSectionWriter : public SectionWriter {
130private:
131 using Elf_Word = typename ELFT::Word;
132 using Elf_Rel = typename ELFT::Rel;
133 using Elf_Rela = typename ELFT::Rela;
134 using Elf_Sym = typename ELFT::Sym;
135
136public:
137 virtual ~ELFSectionWriter() {}
138 Error visit(const SymbolTableSection &Sec) override;
139 Error visit(const RelocationSection &Sec) override;
140 Error visit(const GnuDebugLinkSection &Sec) override;
141 Error visit(const GroupSection &Sec) override;
142 Error visit(const SectionIndexSection &Sec) override;
143 Error visit(const CompressedSection &Sec) override;
144 Error visit(const DecompressedSection &Sec) override;
145
146 explicit ELFSectionWriter(WritableMemoryBuffer &Buf) : SectionWriter(Buf) {}
147};
148
149template <class ELFT> class ELFSectionSizer : public MutableSectionVisitor {
150private:
151 using Elf_Rel = typename ELFT::Rel;
152 using Elf_Rela = typename ELFT::Rela;
153 using Elf_Sym = typename ELFT::Sym;
154 using Elf_Word = typename ELFT::Word;
155 using Elf_Xword = typename ELFT::Xword;
156
157public:
158 Error visit(Section &Sec) override;
159 Error visit(OwnedDataSection &Sec) override;
160 Error visit(StringTableSection &Sec) override;
161 Error visit(DynamicRelocationSection &Sec) override;
162 Error visit(SymbolTableSection &Sec) override;
163 Error visit(RelocationSection &Sec) override;
164 Error visit(GnuDebugLinkSection &Sec) override;
165 Error visit(GroupSection &Sec) override;
166 Error visit(SectionIndexSection &Sec) override;
167 Error visit(CompressedSection &Sec) override;
168 Error visit(DecompressedSection &Sec) override;
169};
170
171#define MAKE_SEC_WRITER_FRIEND \
172 friend class SectionWriter; \
173 friend class IHexSectionWriterBase; \
174 friend class IHexSectionWriter; \
175 friend class SRECSectionWriter; \
176 friend class SRECSectionWriterBase; \
177 friend class SRECSizeCalculator; \
178 template <class ELFT> friend class ELFSectionWriter; \
179 template <class ELFT> friend class ELFSectionSizer;
180
181class BinarySectionWriter : public SectionWriter {
182public:
183 virtual ~BinarySectionWriter() {}
184
185 Error visit(const SymbolTableSection &Sec) override;
186 Error visit(const RelocationSection &Sec) override;
187 Error visit(const GnuDebugLinkSection &Sec) override;
188 Error visit(const GroupSection &Sec) override;
189 Error visit(const SectionIndexSection &Sec) override;
190 Error visit(const CompressedSection &Sec) override;
191 Error visit(const DecompressedSection &Sec) override;
192
193 explicit BinarySectionWriter(WritableMemoryBuffer &Buf)
194 : SectionWriter(Buf) {}
195};
196
197using IHexLineData = SmallVector<char, 64>;
198
199struct IHexRecord {
200 // Memory address of the record.
201 uint16_t Addr;
202 // Record type (see below).
203 uint16_t Type;
204 // Record data in hexadecimal form.
205 StringRef HexData;
206
207 // Helper method to get file length of the record
208 // including newline character
209 static size_t getLength(size_t DataSize) {
210 // :LLAAAATT[DD...DD]CC'
211 return DataSize * 2 + 11;
212 }
213
214 // Gets length of line in a file (getLength + CRLF).
215 static size_t getLineLength(size_t DataSize) {
216 return getLength(DataSize) + 2;
217 }
218
219 // Given type, address and data returns line which can
220 // be written to output file.
221 static IHexLineData getLine(uint8_t Type, uint16_t Addr,
222 ArrayRef<uint8_t> Data);
223
224 // Parses the line and returns record if possible.
225 // Line should be trimmed from whitespace characters.
226 static Expected<IHexRecord> parse(StringRef Line);
227
228 // Calculates checksum of stringified record representation
229 // S must NOT contain leading ':' and trailing whitespace
230 // characters
231 static uint8_t getChecksum(StringRef S);
232
233 enum Type {
234 // Contains data and a 16-bit starting address for the data.
235 // The byte count specifies number of data bytes in the record.
236 Data = 0,
237 // Must occur exactly once per file in the last line of the file.
238 // The data field is empty (thus byte count is 00) and the address
239 // field is typically 0000.
240 EndOfFile = 1,
241 // The data field contains a 16-bit segment base address (thus byte
242 // count is always 02) compatible with 80x86 real mode addressing.
243 // The address field (typically 0000) is ignored. The segment address
244 // from the most recent 02 record is multiplied by 16 and added to each
245 // subsequent data record address to form the physical starting address
246 // for the data. This allows addressing up to one megabyte of address
247 // space.
248 SegmentAddr = 2,
249 // or 80x86 processors, specifies the initial content of the CS:IP
250 // registers. The address field is 0000, the byte count is always 04,
251 // the first two data bytes are the CS value, the latter two are the
252 // IP value.
253 StartAddr80x86 = 3,
254 // Allows for 32 bit addressing (up to 4GiB). The record's address field
255 // is ignored (typically 0000) and its byte count is always 02. The two
256 // data bytes (big endian) specify the upper 16 bits of the 32 bit
257 // absolute address for all subsequent type 00 records
258 ExtendedAddr = 4,
259 // The address field is 0000 (not used) and the byte count is always 04.
260 // The four data bytes represent a 32-bit address value. In the case of
261 // 80386 and higher CPUs, this address is loaded into the EIP register.
262 StartAddr = 5,
263 // We have no other valid types
264 InvalidType = 6
265 };
266};
267
268// Base class for IHexSectionWriter. This class implements writing algorithm,
269// but doesn't actually write records. It is used for output buffer size
270// calculation in IHexWriter::finalize.
271class IHexSectionWriterBase : public BinarySectionWriter {
272 // 20-bit segment address
273 uint32_t SegmentAddr = 0;
274 // Extended linear address
275 uint32_t BaseAddr = 0;
276
277 // Write segment address corresponding to 'Addr'
278 uint64_t writeSegmentAddr(uint64_t Addr);
279 // Write extended linear (base) address corresponding to 'Addr'
280 uint64_t writeBaseAddr(uint64_t Addr);
281
282protected:
283 // Offset in the output buffer
284 uint64_t Offset = 0;
285
286 void writeSection(const SectionBase *Sec, ArrayRef<uint8_t> Data);
287 virtual void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data);
288
289public:
290 explicit IHexSectionWriterBase(WritableMemoryBuffer &Buf)
291 : BinarySectionWriter(Buf) {}
292
293 uint64_t getBufferOffset() const { return Offset; }
294 Error visit(const Section &Sec) final;
295 Error visit(const OwnedDataSection &Sec) final;
296 Error visit(const StringTableSection &Sec) override;
297 Error visit(const DynamicRelocationSection &Sec) final;
298 using BinarySectionWriter::visit;
299};
300
301// Real IHEX section writer
302class IHexSectionWriter : public IHexSectionWriterBase {
303public:
304 IHexSectionWriter(WritableMemoryBuffer &Buf) : IHexSectionWriterBase(Buf) {}
305
306 void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data) override;
307 Error visit(const StringTableSection &Sec) override;
308};
309
310class Writer {
311protected:
312 Object &Obj;
313 std::unique_ptr<WritableMemoryBuffer> Buf;
314 raw_ostream &Out;
315
316public:
317 virtual ~Writer();
318 virtual Error finalize() = 0;
319 virtual Error write() = 0;
320
321 Writer(Object &O, raw_ostream &Out) : Obj(O), Out(Out) {}
322};
323
324template <class ELFT> class ELFWriter : public Writer {
325private:
326 using Elf_Addr = typename ELFT::Addr;
327 using Elf_Shdr = typename ELFT::Shdr;
328 using Elf_Phdr = typename ELFT::Phdr;
329 using Elf_Ehdr = typename ELFT::Ehdr;
330
331 void initEhdrSegment();
332
333 void writeEhdr();
334 void writePhdr(const Segment &Seg);
335 void writeShdr(const SectionBase &Sec);
336
337 void writePhdrs();
338 void writeShdrs();
339 Error writeSectionData();
340 void writeSegmentData();
341
342 void assignOffsets();
343
344 std::unique_ptr<ELFSectionWriter<ELFT>> SecWriter;
345
346 size_t totalSize() const;
347
348public:
349 virtual ~ELFWriter() {}
350 bool WriteSectionHeaders;
351
352 // For --only-keep-debug, select an alternative section/segment layout
353 // algorithm.
354 bool OnlyKeepDebug;
355
356 Error finalize() override;
357 Error write() override;
358 ELFWriter(Object &Obj, raw_ostream &Out, bool WSH, bool OnlyKeepDebug);
359};
360
361class BinaryWriter : public Writer {
362private:
363 const uint8_t GapFill;
364 const uint64_t PadTo;
365 std::unique_ptr<BinarySectionWriter> SecWriter;
366
367 uint64_t TotalSize = 0;
368
369public:
370 ~BinaryWriter() {}
371 Error finalize() override;
372 Error write() override;
373 BinaryWriter(Object &Obj, raw_ostream &Out, const CommonConfig &Config)
374 : Writer(Obj, Out), GapFill(Config.GapFill), PadTo(Config.PadTo) {}
375};
376
377// A base class for writing ascii hex formats such as srec and ihex.
378class ASCIIHexWriter : public Writer {
379public:
380 ASCIIHexWriter(Object &Obj, raw_ostream &OS, StringRef OutputFile)
381 : Writer(Obj, OS), OutputFileName(OutputFile) {}
382 Error finalize() override;
383
384protected:
385 StringRef OutputFileName;
386 size_t TotalSize = 0;
387 std::vector<const SectionBase *> Sections;
388
389 Error checkSection(const SectionBase &S) const;
390 virtual Expected<size_t>
391 getTotalSize(WritableMemoryBuffer &EmptyBuffer) const = 0;
392};
393
394class IHexWriter : public ASCIIHexWriter {
395public:
396 Error write() override;
397 IHexWriter(Object &Obj, raw_ostream &Out, StringRef OutputFile)
398 : ASCIIHexWriter(Obj, Out, OutputFile) {}
399
400private:
401 uint64_t writeEntryPointRecord(uint8_t *Buf);
402 uint64_t writeEndOfFileRecord(uint8_t *Buf);
403 Expected<size_t>
404 getTotalSize(WritableMemoryBuffer &EmptyBuffer) const override;
405};
406
407class SRECWriter : public ASCIIHexWriter {
408public:
409 SRECWriter(Object &Obj, raw_ostream &OS, StringRef OutputFile)
410 : ASCIIHexWriter(Obj, OS, OutputFile) {}
411 Error write() override;
412
413private:
414 size_t writeHeader(uint8_t *Buf);
415 size_t writeTerminator(uint8_t *Buf, uint8_t Type);
416 Expected<size_t>
417 getTotalSize(WritableMemoryBuffer &EmptyBuffer) const override;
418};
419
420using SRecLineData = SmallVector<char, 64>;
421struct SRecord {
422 uint8_t Type;
423 uint32_t Address;
424 ArrayRef<uint8_t> Data;
425 SRecLineData toString() const;
426 uint8_t getCount() const;
427 // Get address size in characters.
428 uint8_t getAddressSize() const;
429 uint8_t getChecksum() const;
430 size_t getSize() const;
431 static SRecord getHeader(StringRef FileName);
432 static uint8_t getType(uint32_t Address);
433
434 enum Type : uint8_t {
435 // Vendor specific text comment.
436 S0 = 0,
437 // Data that starts at a 16 bit address.
438 S1 = 1,
439 // Data that starts at a 24 bit address.
440 S2 = 2,
441 // Data that starts at a 32 bit address.
442 S3 = 3,
443 // Reserved.
444 S4 = 4,
445 // 16 bit count of S1/S2/S3 records (optional).
446 S5 = 5,
447 // 32 bit count of S1/S2/S3 records (optional).
448 S6 = 6,
449 // Terminates a series of S3 records.
450 S7 = 7,
451 // Terminates a series of S2 records.
452 S8 = 8,
453 // Terminates a series of S1 records.
454 S9 = 9
455 };
456};
457
458class SRECSectionWriterBase : public BinarySectionWriter {
459public:
460 explicit SRECSectionWriterBase(WritableMemoryBuffer &Buf,
461 uint64_t StartOffset)
462 : BinarySectionWriter(Buf), Offset(StartOffset), HeaderSize(StartOffset) {
463 }
464
465 using BinarySectionWriter::visit;
466
467 void writeRecords(uint32_t Entry);
468 uint64_t getBufferOffset() const { return Offset; }
469 Error visit(const Section &S) override;
470 Error visit(const OwnedDataSection &S) override;
471 Error visit(const StringTableSection &S) override;
472 Error visit(const DynamicRelocationSection &S) override;
473 uint8_t getType() const { return Type; };
474
475protected:
476 // Offset in the output buffer.
477 uint64_t Offset;
478 // Sections start after the header.
479 uint64_t HeaderSize;
480 // Type of records to write.
481 uint8_t Type = SRecord::S1;
482 std::vector<SRecord> Records;
483
484 void writeSection(const SectionBase &S, ArrayRef<uint8_t> Data);
485 virtual void writeRecord(SRecord &Record, uint64_t Off) = 0;
486};
487
488// An SRECSectionWriterBase that visits sections but does not write anything.
489// This class is only used to calculate the size of the output file.
490class SRECSizeCalculator : public SRECSectionWriterBase {
491public:
492 SRECSizeCalculator(WritableMemoryBuffer &EmptyBuffer, uint64_t Offset)
493 : SRECSectionWriterBase(EmptyBuffer, Offset) {}
494
495protected:
496 void writeRecord(SRecord &Record, uint64_t Off) override {}
497};
498
499class SRECSectionWriter : public SRECSectionWriterBase {
500public:
501 SRECSectionWriter(WritableMemoryBuffer &Buf, uint64_t Offset)
502 : SRECSectionWriterBase(Buf, Offset) {}
503 Error visit(const StringTableSection &Sec) override;
504
505protected:
506 void writeRecord(SRecord &Record, uint64_t Off) override;
507};
508
509class SectionBase {
510public:
511 std::string Name;
512 Segment *ParentSegment = nullptr;
513 uint64_t HeaderOffset = 0;
514 uint32_t Index = 0;
515
516 uint32_t OriginalIndex = 0;
517 uint64_t OriginalFlags = 0;
518 uint64_t OriginalType = ELF::SHT_NULL;
519 uint64_t OriginalOffset = std::numeric_limits<uint64_t>::max();
520
521 uint64_t Addr = 0;
522 uint64_t Align = 1;
523 uint32_t EntrySize = 0;
524 uint64_t Flags = 0;
525 uint64_t Info = 0;
526 uint64_t Link = ELF::SHN_UNDEF;
527 uint64_t NameIndex = 0;
528 uint64_t Offset = 0;
529 uint64_t Size = 0;
530 uint64_t Type = ELF::SHT_NULL;
531 ArrayRef<uint8_t> OriginalData;
532 bool HasSymbol = false;
533
534 SectionBase() = default;
535 SectionBase(const SectionBase &) = default;
536
537 virtual ~SectionBase() = default;
538
539 virtual Error initialize(SectionTableRef SecTable);
540 virtual void finalize();
541 // Remove references to these sections. The list of sections must be sorted.
542 virtual Error
543 removeSectionReferences(bool AllowBrokenLinks,
544 function_ref<bool(const SectionBase *)> ToRemove);
545 virtual Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
546 virtual Error accept(SectionVisitor &Visitor) const = 0;
547 virtual Error accept(MutableSectionVisitor &Visitor) = 0;
548 virtual void markSymbols();
549 virtual void
550 replaceSectionReferences(const DenseMap<SectionBase *, SectionBase *> &);
551 virtual bool hasContents() const { return false; }
552 // Notify the section that it is subject to removal.
553 virtual void onRemove();
554
555 virtual void restoreSymTabLink(SymbolTableSection &) {}
556};
557
558class Segment {
559private:
560 struct SectionCompare {
561 bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const {
562 // Some sections might have the same address if one of them is empty. To
563 // fix this we can use the lexicographic ordering on ->Addr and the
564 // original index.
565 if (Lhs->OriginalOffset == Rhs->OriginalOffset)
566 return Lhs->OriginalIndex < Rhs->OriginalIndex;
567 return Lhs->OriginalOffset < Rhs->OriginalOffset;
568 }
569 };
570
571public:
572 uint32_t Type = 0;
573 uint32_t Flags = 0;
574 uint64_t Offset = 0;
575 uint64_t VAddr = 0;
576 uint64_t PAddr = 0;
577 uint64_t FileSize = 0;
578 uint64_t MemSize = 0;
579 uint64_t Align = 0;
580
581 uint32_t Index = 0;
582 uint64_t OriginalOffset = 0;
583 Segment *ParentSegment = nullptr;
584 ArrayRef<uint8_t> Contents;
585 std::set<const SectionBase *, SectionCompare> Sections;
586
587 explicit Segment(ArrayRef<uint8_t> Data) : Contents(Data) {}
588 Segment() = default;
589
590 const SectionBase *firstSection() const {
591 if (!Sections.empty())
592 return *Sections.begin();
593 return nullptr;
594 }
595
596 void removeSection(const SectionBase *Sec) { Sections.erase(x: Sec); }
597 void addSection(const SectionBase *Sec) { Sections.insert(x: Sec); }
598
599 ArrayRef<uint8_t> getContents() const { return Contents; }
600};
601
602class Section : public SectionBase {
603 MAKE_SEC_WRITER_FRIEND
604
605 ArrayRef<uint8_t> Contents;
606 SectionBase *LinkSection = nullptr;
607 bool HasSymTabLink = false;
608
609public:
610 explicit Section(ArrayRef<uint8_t> Data) : Contents(Data) {}
611
612 Error accept(SectionVisitor &Visitor) const override;
613 Error accept(MutableSectionVisitor &Visitor) override;
614 Error removeSectionReferences(
615 bool AllowBrokenLinks,
616 function_ref<bool(const SectionBase *)> ToRemove) override;
617 Error initialize(SectionTableRef SecTable) override;
618 void finalize() override;
619 bool hasContents() const override {
620 return Type != ELF::SHT_NOBITS && Type != ELF::SHT_NULL;
621 }
622 void restoreSymTabLink(SymbolTableSection &SymTab) override;
623};
624
625class OwnedDataSection : public SectionBase {
626 MAKE_SEC_WRITER_FRIEND
627
628 std::vector<uint8_t> Data;
629
630public:
631 OwnedDataSection(StringRef SecName, ArrayRef<uint8_t> Data)
632 : Data(std::begin(cont&: Data), std::end(cont&: Data)) {
633 Name = SecName.str();
634 Type = OriginalType = ELF::SHT_PROGBITS;
635 Size = Data.size();
636 OriginalOffset = std::numeric_limits<uint64_t>::max();
637 }
638
639 OwnedDataSection(const Twine &SecName, uint64_t SecAddr, uint64_t SecFlags,
640 uint64_t SecOff) {
641 Name = SecName.str();
642 Type = OriginalType = ELF::SHT_PROGBITS;
643 Addr = SecAddr;
644 Flags = OriginalFlags = SecFlags;
645 OriginalOffset = SecOff;
646 }
647
648 OwnedDataSection(SectionBase &S, ArrayRef<uint8_t> Data)
649 : SectionBase(S), Data(std::begin(cont&: Data), std::end(cont&: Data)) {
650 Size = Data.size();
651 }
652
653 void appendHexData(StringRef HexData);
654 Error accept(SectionVisitor &Sec) const override;
655 Error accept(MutableSectionVisitor &Visitor) override;
656 bool hasContents() const override { return true; }
657};
658
659class CompressedSection : public SectionBase {
660 MAKE_SEC_WRITER_FRIEND
661
662 uint32_t ChType = 0;
663 DebugCompressionType CompressionType;
664 uint64_t DecompressedSize;
665 uint64_t DecompressedAlign;
666 SmallVector<uint8_t, 128> CompressedData;
667
668public:
669 CompressedSection(const SectionBase &Sec,
670 DebugCompressionType CompressionType, bool Is64Bits);
671 CompressedSection(ArrayRef<uint8_t> CompressedData, uint32_t ChType,
672 uint64_t DecompressedSize, uint64_t DecompressedAlign);
673
674 uint64_t getDecompressedSize() const { return DecompressedSize; }
675 uint64_t getDecompressedAlign() const { return DecompressedAlign; }
676 uint64_t getChType() const { return ChType; }
677
678 Error accept(SectionVisitor &Visitor) const override;
679 Error accept(MutableSectionVisitor &Visitor) override;
680
681 static bool classof(const SectionBase *S) {
682 return S->OriginalFlags & ELF::SHF_COMPRESSED;
683 }
684};
685
686class DecompressedSection : public SectionBase {
687 MAKE_SEC_WRITER_FRIEND
688
689public:
690 uint32_t ChType;
691 explicit DecompressedSection(const CompressedSection &Sec)
692 : SectionBase(Sec), ChType(Sec.getChType()) {
693 Size = Sec.getDecompressedSize();
694 Align = Sec.getDecompressedAlign();
695 Flags = OriginalFlags = (Flags & ~ELF::SHF_COMPRESSED);
696 }
697
698 Error accept(SectionVisitor &Visitor) const override;
699 Error accept(MutableSectionVisitor &Visitor) override;
700};
701
702// There are two types of string tables that can exist, dynamic and not dynamic.
703// In the dynamic case the string table is allocated. Changing a dynamic string
704// table would mean altering virtual addresses and thus the memory image. So
705// dynamic string tables should not have an interface to modify them or
706// reconstruct them. This type lets us reconstruct a string table. To avoid
707// this class being used for dynamic string tables (which has happened) the
708// classof method checks that the particular instance is not allocated. This
709// then agrees with the makeSection method used to construct most sections.
710class StringTableSection : public SectionBase {
711 MAKE_SEC_WRITER_FRIEND
712
713 StringTableBuilder StrTabBuilder;
714
715public:
716 StringTableSection() : StrTabBuilder(StringTableBuilder::ELF) {
717 Type = OriginalType = ELF::SHT_STRTAB;
718 }
719
720 void addString(StringRef Name);
721 uint32_t findIndex(StringRef Name) const;
722 void prepareForLayout();
723 Error accept(SectionVisitor &Visitor) const override;
724 Error accept(MutableSectionVisitor &Visitor) override;
725
726 static bool classof(const SectionBase *S) {
727 if (S->OriginalFlags & ELF::SHF_ALLOC)
728 return false;
729 return S->OriginalType == ELF::SHT_STRTAB;
730 }
731};
732
733// Symbols have a st_shndx field that normally stores an index but occasionally
734// stores a different special value. This enum keeps track of what the st_shndx
735// field means. Most of the values are just copies of the special SHN_* values.
736// SYMBOL_SIMPLE_INDEX means that the st_shndx is just an index of a section.
737enum SymbolShndxType {
738 SYMBOL_SIMPLE_INDEX = 0,
739 SYMBOL_ABS = ELF::SHN_ABS,
740 SYMBOL_COMMON = ELF::SHN_COMMON,
741 SYMBOL_LOPROC = ELF::SHN_LOPROC,
742 SYMBOL_AMDGPU_LDS = ELF::SHN_AMDGPU_LDS,
743 SYMBOL_HEXAGON_SCOMMON = ELF::SHN_HEXAGON_SCOMMON,
744 SYMBOL_HEXAGON_SCOMMON_2 = ELF::SHN_HEXAGON_SCOMMON_2,
745 SYMBOL_HEXAGON_SCOMMON_4 = ELF::SHN_HEXAGON_SCOMMON_4,
746 SYMBOL_HEXAGON_SCOMMON_8 = ELF::SHN_HEXAGON_SCOMMON_8,
747 SYMBOL_MIPS_ACOMMON = ELF::SHN_MIPS_ACOMMON,
748 SYMBOL_MIPS_TEXT = ELF::SHN_MIPS_TEXT,
749 SYMBOL_MIPS_DATA = ELF::SHN_MIPS_DATA,
750 SYMBOL_MIPS_SCOMMON = ELF::SHN_MIPS_SCOMMON,
751 SYMBOL_MIPS_SUNDEFINED = ELF::SHN_MIPS_SUNDEFINED,
752 SYMBOL_HIPROC = ELF::SHN_HIPROC,
753 SYMBOL_LOOS = ELF::SHN_LOOS,
754 SYMBOL_HIOS = ELF::SHN_HIOS,
755 SYMBOL_XINDEX = ELF::SHN_XINDEX,
756};
757
758struct Symbol {
759 uint8_t Binding;
760 SectionBase *DefinedIn = nullptr;
761 SymbolShndxType ShndxType;
762 uint32_t Index;
763 std::string Name;
764 uint32_t NameIndex;
765 uint64_t Size;
766 uint8_t Type;
767 uint64_t Value;
768 uint8_t Visibility;
769 bool Referenced = false;
770
771 uint16_t getShndx() const;
772 bool isCommon() const;
773};
774
775class SectionIndexSection : public SectionBase {
776 MAKE_SEC_WRITER_FRIEND
777
778private:
779 std::vector<uint32_t> Indexes;
780 SymbolTableSection *Symbols = nullptr;
781
782public:
783 virtual ~SectionIndexSection() {}
784 void addIndex(uint32_t Index) {
785 assert(Size > 0);
786 Indexes.push_back(x: Index);
787 }
788
789 void reserve(size_t NumSymbols) {
790 Indexes.reserve(n: NumSymbols);
791 Size = NumSymbols * 4;
792 }
793 void setSymTab(SymbolTableSection *SymTab) { Symbols = SymTab; }
794 Error initialize(SectionTableRef SecTable) override;
795 void finalize() override;
796 Error accept(SectionVisitor &Visitor) const override;
797 Error accept(MutableSectionVisitor &Visitor) override;
798
799 SectionIndexSection() {
800 Name = ".symtab_shndx";
801 Align = 4;
802 EntrySize = 4;
803 Type = OriginalType = ELF::SHT_SYMTAB_SHNDX;
804 }
805};
806
807class SymbolTableSection : public SectionBase {
808 MAKE_SEC_WRITER_FRIEND
809
810 void setStrTab(StringTableSection *StrTab) { SymbolNames = StrTab; }
811 void assignIndices();
812
813protected:
814 std::vector<std::unique_ptr<Symbol>> Symbols;
815 StringTableSection *SymbolNames = nullptr;
816 SectionIndexSection *SectionIndexTable = nullptr;
817 bool IndicesChanged = false;
818
819 using SymPtr = std::unique_ptr<Symbol>;
820
821public:
822 SymbolTableSection() { Type = OriginalType = ELF::SHT_SYMTAB; }
823
824 void addSymbol(Twine Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn,
825 uint64_t Value, uint8_t Visibility, uint16_t Shndx,
826 uint64_t SymbolSize);
827 void prepareForLayout();
828 // An 'empty' symbol table still contains a null symbol.
829 bool empty() const { return Symbols.size() == 1; }
830 bool indicesChanged() const { return IndicesChanged; }
831 void setShndxTable(SectionIndexSection *ShndxTable) {
832 SectionIndexTable = ShndxTable;
833 }
834 const SectionIndexSection *getShndxTable() const { return SectionIndexTable; }
835 void fillShndxTable();
836 const SectionBase *getStrTab() const { return SymbolNames; }
837 Expected<const Symbol *> getSymbolByIndex(uint32_t Index) const;
838 Expected<Symbol *> getSymbolByIndex(uint32_t Index);
839 void updateSymbols(function_ref<void(Symbol &)> Callable);
840
841 Error removeSectionReferences(
842 bool AllowBrokenLinks,
843 function_ref<bool(const SectionBase *)> ToRemove) override;
844 Error initialize(SectionTableRef SecTable) override;
845 void finalize() override;
846 Error accept(SectionVisitor &Visitor) const override;
847 Error accept(MutableSectionVisitor &Visitor) override;
848 Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
849 void replaceSectionReferences(
850 const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
851
852 static bool classof(const SectionBase *S) {
853 return S->OriginalType == ELF::SHT_SYMTAB;
854 }
855};
856
857struct Relocation {
858 Symbol *RelocSymbol = nullptr;
859 uint64_t Offset;
860 uint64_t Addend;
861 uint32_t Type;
862};
863
864// All relocation sections denote relocations to apply to another section.
865// However, some relocation sections use a dynamic symbol table and others use
866// a regular symbol table. Because the types of the two symbol tables differ in
867// our system (because they should behave differently) we can't uniformly
868// represent all relocations with the same base class if we expose an interface
869// that mentions the symbol table type. So we split the two base types into two
870// different classes, one which handles the section the relocation is applied to
871// and another which handles the symbol table type. The symbol table type is
872// taken as a type parameter to the class (see RelocSectionWithSymtabBase).
873class RelocationSectionBase : public SectionBase {
874protected:
875 SectionBase *SecToApplyRel = nullptr;
876
877public:
878 const SectionBase *getSection() const { return SecToApplyRel; }
879 void setSection(SectionBase *Sec) { SecToApplyRel = Sec; }
880
881 StringRef getNamePrefix() const;
882
883 static bool classof(const SectionBase *S) {
884 return is_contained(Set: {ELF::SHT_REL, ELF::SHT_RELA, ELF::SHT_CREL},
885 Element: S->OriginalType);
886 }
887};
888
889// Takes the symbol table type to use as a parameter so that we can deduplicate
890// that code between the two symbol table types.
891template <class SymTabType>
892class RelocSectionWithSymtabBase : public RelocationSectionBase {
893 void setSymTab(SymTabType *SymTab) { Symbols = SymTab; }
894
895protected:
896 RelocSectionWithSymtabBase() = default;
897
898 SymTabType *Symbols = nullptr;
899
900public:
901 Error initialize(SectionTableRef SecTable) override;
902 void finalize() override;
903};
904
905class RelocationSection
906 : public RelocSectionWithSymtabBase<SymbolTableSection> {
907 MAKE_SEC_WRITER_FRIEND
908
909 std::vector<Relocation> Relocations;
910 const Object &Obj;
911
912public:
913 RelocationSection(const Object &O) : Obj(O) {}
914 void addRelocation(const Relocation &Rel) { Relocations.push_back(x: Rel); }
915 Error accept(SectionVisitor &Visitor) const override;
916 Error accept(MutableSectionVisitor &Visitor) override;
917 Error removeSectionReferences(
918 bool AllowBrokenLinks,
919 function_ref<bool(const SectionBase *)> ToRemove) override;
920 Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
921 void markSymbols() override;
922 void replaceSectionReferences(
923 const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
924 const Object &getObject() const { return Obj; }
925
926 static bool classof(const SectionBase *S) {
927 if (S->OriginalFlags & ELF::SHF_ALLOC)
928 return false;
929 return RelocationSectionBase::classof(S);
930 }
931};
932
933// TODO: The way stripping and groups interact is complicated
934// and still needs to be worked on.
935
936class GroupSection : public SectionBase {
937 MAKE_SEC_WRITER_FRIEND
938 const SymbolTableSection *SymTab = nullptr;
939 Symbol *Sym = nullptr;
940 ELF::Elf32_Word FlagWord;
941 SmallVector<SectionBase *, 3> GroupMembers;
942
943public:
944 template <class T>
945 using ConstRange = iterator_range<
946 pointee_iterator<typename llvm::SmallVector<T *, 3>::const_iterator>>;
947 // TODO: Contents is present in several classes of the hierarchy.
948 // This needs to be refactored to avoid duplication.
949 ArrayRef<uint8_t> Contents;
950
951 explicit GroupSection(ArrayRef<uint8_t> Data) : Contents(Data) {}
952
953 void setSymTab(const SymbolTableSection *SymTabSec) { SymTab = SymTabSec; }
954 void setSymbol(Symbol *S) { Sym = S; }
955 void setFlagWord(ELF::Elf32_Word W) { FlagWord = W; }
956 void addMember(SectionBase *Sec) { GroupMembers.push_back(Elt: Sec); }
957
958 Error accept(SectionVisitor &) const override;
959 Error accept(MutableSectionVisitor &Visitor) override;
960 void finalize() override;
961 Error removeSectionReferences(
962 bool AllowBrokenLinks,
963 function_ref<bool(const SectionBase *)> ToRemove) override;
964 Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
965 void markSymbols() override;
966 void replaceSectionReferences(
967 const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
968 void onRemove() override;
969
970 ConstRange<SectionBase> members() const {
971 return make_pointee_range(Range: GroupMembers);
972 }
973
974 static bool classof(const SectionBase *S) {
975 return S->OriginalType == ELF::SHT_GROUP;
976 }
977};
978
979class DynamicSymbolTableSection : public Section {
980public:
981 explicit DynamicSymbolTableSection(ArrayRef<uint8_t> Data) : Section(Data) {}
982
983 static bool classof(const SectionBase *S) {
984 return S->OriginalType == ELF::SHT_DYNSYM;
985 }
986};
987
988class DynamicSection : public Section {
989public:
990 explicit DynamicSection(ArrayRef<uint8_t> Data) : Section(Data) {}
991
992 static bool classof(const SectionBase *S) {
993 return S->OriginalType == ELF::SHT_DYNAMIC;
994 }
995};
996
997class DynamicRelocationSection
998 : public RelocSectionWithSymtabBase<DynamicSymbolTableSection> {
999 MAKE_SEC_WRITER_FRIEND
1000
1001private:
1002 ArrayRef<uint8_t> Contents;
1003
1004public:
1005 explicit DynamicRelocationSection(ArrayRef<uint8_t> Data) : Contents(Data) {}
1006
1007 Error accept(SectionVisitor &) const override;
1008 Error accept(MutableSectionVisitor &Visitor) override;
1009 Error removeSectionReferences(
1010 bool AllowBrokenLinks,
1011 function_ref<bool(const SectionBase *)> ToRemove) override;
1012
1013 static bool classof(const SectionBase *S) {
1014 if (!(S->OriginalFlags & ELF::SHF_ALLOC))
1015 return false;
1016 return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA;
1017 }
1018};
1019
1020class GnuDebugLinkSection : public SectionBase {
1021 MAKE_SEC_WRITER_FRIEND
1022
1023private:
1024 StringRef FileName;
1025 uint32_t CRC32;
1026
1027 void init(StringRef File);
1028
1029public:
1030 // If we add this section from an external source we can use this ctor.
1031 explicit GnuDebugLinkSection(StringRef File, uint32_t PrecomputedCRC);
1032 Error accept(SectionVisitor &Visitor) const override;
1033 Error accept(MutableSectionVisitor &Visitor) override;
1034};
1035
1036class Reader {
1037public:
1038 virtual ~Reader();
1039 virtual Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const = 0;
1040};
1041
1042using object::Binary;
1043using object::ELFFile;
1044using object::ELFObjectFile;
1045using object::OwningBinary;
1046
1047class BasicELFBuilder {
1048protected:
1049 std::unique_ptr<Object> Obj;
1050
1051 void initFileHeader();
1052 void initHeaderSegment();
1053 StringTableSection *addStrTab();
1054 SymbolTableSection *addSymTab(StringTableSection *StrTab);
1055 Error initSections();
1056
1057public:
1058 BasicELFBuilder() : Obj(std::make_unique<Object>()) {}
1059};
1060
1061class BinaryELFBuilder : public BasicELFBuilder {
1062 MemoryBuffer *MemBuf;
1063 uint8_t NewSymbolVisibility;
1064 void addData(SymbolTableSection *SymTab);
1065
1066public:
1067 BinaryELFBuilder(MemoryBuffer *MB, uint8_t NewSymbolVisibility)
1068 : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {}
1069
1070 Expected<std::unique_ptr<Object>> build();
1071};
1072
1073class IHexELFBuilder : public BasicELFBuilder {
1074 const std::vector<IHexRecord> &Records;
1075
1076 void addDataSections();
1077
1078public:
1079 IHexELFBuilder(const std::vector<IHexRecord> &Records) : Records(Records) {}
1080
1081 Expected<std::unique_ptr<Object>> build();
1082};
1083
1084template <class ELFT> class ELFBuilder {
1085private:
1086 using Elf_Addr = typename ELFT::Addr;
1087 using Elf_Shdr = typename ELFT::Shdr;
1088 using Elf_Word = typename ELFT::Word;
1089
1090 const ELFFile<ELFT> &ElfFile;
1091 Object &Obj;
1092 size_t EhdrOffset = 0;
1093 std::optional<StringRef> ExtractPartition;
1094
1095 void setParentSegment(Segment &Child);
1096 Error readProgramHeaders(const ELFFile<ELFT> &HeadersFile);
1097 Error initGroupSection(GroupSection *GroupSec);
1098 Error initSymbolTable(SymbolTableSection *SymTab);
1099 Error readSectionHeaders();
1100 Error readSections(bool EnsureSymtab);
1101 Error findEhdrOffset();
1102 Expected<SectionBase &> makeSection(const Elf_Shdr &Shdr);
1103
1104public:
1105 ELFBuilder(const ELFObjectFile<ELFT> &ElfObj, Object &Obj,
1106 std::optional<StringRef> ExtractPartition);
1107
1108 Error build(bool EnsureSymtab);
1109};
1110
1111class BinaryReader : public Reader {
1112 MemoryBuffer *MemBuf;
1113 uint8_t NewSymbolVisibility;
1114
1115public:
1116 BinaryReader(MemoryBuffer *MB, const uint8_t NewSymbolVisibility)
1117 : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {}
1118 Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const override;
1119};
1120
1121class IHexReader : public Reader {
1122 MemoryBuffer *MemBuf;
1123
1124 Expected<std::vector<IHexRecord>> parse() const;
1125 Error parseError(size_t LineNo, Error E) const {
1126 return LineNo == -1U
1127 ? createFileError(F: MemBuf->getBufferIdentifier(), E: std::move(E))
1128 : createFileError(F: MemBuf->getBufferIdentifier(), Line: LineNo,
1129 E: std::move(E));
1130 }
1131 template <typename... Ts>
1132 Error parseError(size_t LineNo, char const *Fmt, const Ts &...Vals) const {
1133 Error E = createStringError(errc::invalid_argument, Fmt, Vals...);
1134 return parseError(LineNo, E: std::move(E));
1135 }
1136
1137public:
1138 IHexReader(MemoryBuffer *MB) : MemBuf(MB) {}
1139
1140 Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const override;
1141};
1142
1143class ELFReader : public Reader {
1144 Binary *Bin;
1145 std::optional<StringRef> ExtractPartition;
1146
1147public:
1148 Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const override;
1149 explicit ELFReader(Binary *B, std::optional<StringRef> ExtractPartition)
1150 : Bin(B), ExtractPartition(ExtractPartition) {}
1151};
1152
1153class Object {
1154private:
1155 using SecPtr = std::unique_ptr<SectionBase>;
1156 using SegPtr = std::unique_ptr<Segment>;
1157
1158 std::vector<SecPtr> Sections;
1159 std::vector<SegPtr> Segments;
1160 std::vector<SecPtr> RemovedSections;
1161 DenseMap<SectionBase *, std::vector<uint8_t>> UpdatedSections;
1162
1163 static bool sectionIsAlloc(const SectionBase &Sec) {
1164 return Sec.Flags & ELF::SHF_ALLOC;
1165 };
1166
1167public:
1168 template <class T>
1169 using ConstRange = iterator_range<pointee_iterator<
1170 typename std::vector<std::unique_ptr<T>>::const_iterator>>;
1171
1172 // It is often the case that the ELF header and the program header table are
1173 // not present in any segment. This could be a problem during file layout,
1174 // because other segments may get assigned an offset where either of the
1175 // two should reside, which will effectively corrupt the resulting binary.
1176 // Other than that we use these segments to track program header offsets
1177 // when they may not follow the ELF header.
1178 Segment ElfHdrSegment;
1179 Segment ProgramHdrSegment;
1180
1181 bool Is64Bits;
1182 uint8_t OSABI;
1183 uint8_t ABIVersion;
1184 uint64_t Entry;
1185 uint64_t SHOff;
1186 uint32_t Type;
1187 uint32_t Machine;
1188 uint32_t Version;
1189 uint32_t Flags;
1190
1191 bool HadShdrs = true;
1192 bool MustBeRelocatable = false;
1193 StringTableSection *SectionNames = nullptr;
1194 SymbolTableSection *SymbolTable = nullptr;
1195 SectionIndexSection *SectionIndexTable = nullptr;
1196
1197 bool IsMips64EL = false;
1198
1199 SectionTableRef sections() const { return SectionTableRef(Sections); }
1200 iterator_range<
1201 filter_iterator<pointee_iterator<std::vector<SecPtr>::const_iterator>,
1202 decltype(&sectionIsAlloc)>>
1203 allocSections() const {
1204 return make_filter_range(Range: make_pointee_range(Range: Sections), Pred: sectionIsAlloc);
1205 }
1206
1207 const auto &getUpdatedSections() const { return UpdatedSections; }
1208 Error updateSection(StringRef Name, ArrayRef<uint8_t> Data);
1209
1210 SectionBase *findSection(StringRef Name) {
1211 auto SecIt =
1212 find_if(Range&: Sections, P: [&](const SecPtr &Sec) { return Sec->Name == Name; });
1213 return SecIt == Sections.end() ? nullptr : SecIt->get();
1214 }
1215 SectionTableRef removedSections() { return SectionTableRef(RemovedSections); }
1216
1217 ConstRange<Segment> segments() const { return make_pointee_range(Range: Segments); }
1218
1219 Error removeSections(bool AllowBrokenLinks,
1220 std::function<bool(const SectionBase &)> ToRemove);
1221 Error compressOrDecompressSections(const CommonConfig &Config);
1222 Error replaceSections(const DenseMap<SectionBase *, SectionBase *> &FromTo);
1223 Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
1224 template <class T, class... Ts> T &addSection(Ts &&...Args) {
1225 auto Sec = std::make_unique<T>(std::forward<Ts>(Args)...);
1226 auto Ptr = Sec.get();
1227 MustBeRelocatable |= isa<RelocationSection>(*Ptr);
1228 Sections.emplace_back(std::move(Sec));
1229 Ptr->Index = Sections.size();
1230 return *Ptr;
1231 }
1232 Error addNewSymbolTable();
1233 Segment &addSegment(ArrayRef<uint8_t> Data) {
1234 Segments.emplace_back(args: std::make_unique<Segment>(args&: Data));
1235 return *Segments.back();
1236 }
1237 bool isRelocatable() const {
1238 return (Type != ELF::ET_DYN && Type != ELF::ET_EXEC) || MustBeRelocatable;
1239 }
1240};
1241
1242} // end namespace elf
1243} // end namespace objcopy
1244} // end namespace llvm
1245
1246#endif // LLVM_LIB_OBJCOPY_ELF_ELFOBJECT_H
1247