| 1 | //===- EhFrame.h ------------------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLD_MACHO_EH_FRAME_H |
| 10 | #define LLD_MACHO_EH_FRAME_H |
| 11 | |
| 12 | #include "InputSection.h" |
| 13 | #include "Relocations.h" |
| 14 | |
| 15 | #include "lld/Common/LLVM.h" |
| 16 | #include "llvm/ADT/ArrayRef.h" |
| 17 | #include "llvm/ADT/PointerUnion.h" |
| 18 | #include "llvm/ADT/SmallVector.h" |
| 19 | |
| 20 | /* |
| 21 | * NOTE: The main bulk of the EH frame parsing logic is in InputFiles.cpp as it |
| 22 | * is closely coupled with other file parsing logic; EhFrame.h just contains a |
| 23 | * few helpers. |
| 24 | */ |
| 25 | |
| 26 | /* |
| 27 | * === The EH frame format === |
| 28 | * |
| 29 | * EH frames can either be Common Information Entries (CIEs) or Frame |
| 30 | * Description Entries (FDEs). CIEs contain information that is common amongst |
| 31 | * several FDEs. Each FDE contains a pointer to its CIE. Thus all the EH frame |
| 32 | * entries together form a forest of two-level trees, with CIEs as the roots |
| 33 | * and FDEs as the leaves. Note that a CIE must precede the FDEs which point |
| 34 | * to it. |
| 35 | * |
| 36 | * A CIE comprises the following fields in order: |
| 37 | * 1. Length of the entry (4 or 12 bytes) |
| 38 | * 2. CIE offset (4 bytes; always 0 for CIEs) |
| 39 | * 3. CIE version (byte) |
| 40 | * 4. Null-terminated augmentation string |
| 41 | * 5-8. LEB128 values that we don't care about |
| 42 | * 9. Augmentation data, to be interpreted using the aug string |
| 43 | * 10. DWARF instructions (ignored by LLD) |
| 44 | * |
| 45 | * An FDE comprises of the following: |
| 46 | * 1. Length of the entry (4 or 12 bytes) |
| 47 | * 2. CIE offset (4 bytes pcrel offset that points backwards to this FDE's CIE) |
| 48 | * 3. Function address (pointer-sized pcrel offset) |
| 49 | * 4. (std::optional) Augmentation data length |
| 50 | * 5. (std::optional) LSDA address (pointer-sized pcrel offset) |
| 51 | * 6. DWARF instructions (ignored by LLD) |
| 52 | */ |
| 53 | namespace lld::macho { |
| 54 | |
| 55 | class EhReader { |
| 56 | public: |
| 57 | EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff) |
| 58 | : file(file), data(data), dataOff(dataOff) {} |
| 59 | size_t size() const { return data.size(); } |
| 60 | // Read and validate the length field. |
| 61 | uint64_t readLength(size_t *off) const; |
| 62 | // Skip the length field without doing validation. |
| 63 | void skipValidLength(size_t *off) const; |
| 64 | uint8_t readByte(size_t *off) const; |
| 65 | uint32_t readU32(size_t *off) const; |
| 66 | uint64_t readPointer(size_t *off, uint8_t size) const; |
| 67 | StringRef readString(size_t *off) const; |
| 68 | void skipLeb128(size_t *off) const; |
| 69 | void failOn(size_t errOff, const Twine &msg) const; |
| 70 | |
| 71 | private: |
| 72 | const ObjFile *file; |
| 73 | ArrayRef<uint8_t> data; |
| 74 | // The offset of the data array within its section. Used only for error |
| 75 | // reporting. |
| 76 | const size_t dataOff; |
| 77 | }; |
| 78 | |
| 79 | // The EH frame format, when emitted by llvm-mc, consists of a number of |
| 80 | // "abs-ified" relocations, i.e. relocations that are implicitly encoded as |
| 81 | // pcrel offsets in the section data. The offsets refer to the locations of |
| 82 | // symbols in the input object file. When we ingest these EH frames, we convert |
| 83 | // these implicit relocations into explicit Relocs. |
| 84 | // |
| 85 | // These pcrel relocations are semantically similar to X86_64_RELOC_SIGNED_4. |
| 86 | // However, we need this operation to be cross-platform, and ARM does not have a |
| 87 | // similar relocation that is applicable. We therefore use the more verbose (but |
| 88 | // more generic) subtractor relocation to encode these pcrel values. ld64 |
| 89 | // appears to do something similar -- its `-r` output contains these explicit |
| 90 | // subtractor relocations. |
| 91 | class EhRelocator { |
| 92 | public: |
| 93 | EhRelocator(InputSection *isec) : isec(isec) {} |
| 94 | |
| 95 | // For the next two methods, let `PC` denote `isec address + off`. |
| 96 | // Create relocs writing the value of target - PC to PC. |
| 97 | void makePcRel(uint64_t off, |
| 98 | llvm::PointerUnion<Symbol *, InputSection *> target, |
| 99 | uint8_t length); |
| 100 | // Create relocs writing the value of PC - target to PC. |
| 101 | void makeNegativePcRel(uint64_t off, |
| 102 | llvm::PointerUnion<Symbol *, InputSection *> target, |
| 103 | uint8_t length); |
| 104 | // Insert the new relocations into isec->relocs. |
| 105 | void commit(); |
| 106 | |
| 107 | private: |
| 108 | InputSection *isec; |
| 109 | // Insert new relocs here so that we don't invalidate iterators into the |
| 110 | // existing relocs vector. |
| 111 | SmallVector<Reloc, 6> newRelocs; |
| 112 | }; |
| 113 | |
| 114 | } // namespace lld::macho |
| 115 | |
| 116 | #endif |
| 117 | |