1//===- EhFrame.h ------------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_MACHO_EH_FRAME_H
10#define LLD_MACHO_EH_FRAME_H
11
12#include "InputSection.h"
13#include "Relocations.h"
14
15#include "lld/Common/LLVM.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/PointerUnion.h"
18#include "llvm/ADT/SmallVector.h"
19
20/*
21 * NOTE: The main bulk of the EH frame parsing logic is in InputFiles.cpp as it
22 * is closely coupled with other file parsing logic; EhFrame.h just contains a
23 * few helpers.
24 */
25
26/*
27 * === The EH frame format ===
28 *
29 * EH frames can either be Common Information Entries (CIEs) or Frame
30 * Description Entries (FDEs). CIEs contain information that is common amongst
31 * several FDEs. Each FDE contains a pointer to its CIE. Thus all the EH frame
32 * entries together form a forest of two-level trees, with CIEs as the roots
33 * and FDEs as the leaves. Note that a CIE must precede the FDEs which point
34 * to it.
35 *
36 * A CIE comprises the following fields in order:
37 * 1. Length of the entry (4 or 12 bytes)
38 * 2. CIE offset (4 bytes; always 0 for CIEs)
39 * 3. CIE version (byte)
40 * 4. Null-terminated augmentation string
41 * 5-8. LEB128 values that we don't care about
42 * 9. Augmentation data, to be interpreted using the aug string
43 * 10. DWARF instructions (ignored by LLD)
44 *
45 * An FDE comprises of the following:
46 * 1. Length of the entry (4 or 12 bytes)
47 * 2. CIE offset (4 bytes pcrel offset that points backwards to this FDE's CIE)
48 * 3. Function address (pointer-sized pcrel offset)
49 * 4. (std::optional) Augmentation data length
50 * 5. (std::optional) LSDA address (pointer-sized pcrel offset)
51 * 6. DWARF instructions (ignored by LLD)
52 */
53namespace lld::macho {
54
55class EhReader {
56public:
57 EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff)
58 : file(file), data(data), dataOff(dataOff) {}
59 size_t size() const { return data.size(); }
60 // Read and validate the length field.
61 uint64_t readLength(size_t *off) const;
62 // Skip the length field without doing validation.
63 void skipValidLength(size_t *off) const;
64 uint8_t readByte(size_t *off) const;
65 uint32_t readU32(size_t *off) const;
66 uint64_t readPointer(size_t *off, uint8_t size) const;
67 StringRef readString(size_t *off) const;
68 void skipLeb128(size_t *off) const;
69 void failOn(size_t errOff, const Twine &msg) const;
70
71private:
72 const ObjFile *file;
73 ArrayRef<uint8_t> data;
74 // The offset of the data array within its section. Used only for error
75 // reporting.
76 const size_t dataOff;
77};
78
79// The EH frame format, when emitted by llvm-mc, consists of a number of
80// "abs-ified" relocations, i.e. relocations that are implicitly encoded as
81// pcrel offsets in the section data. The offsets refer to the locations of
82// symbols in the input object file. When we ingest these EH frames, we convert
83// these implicit relocations into explicit Relocs.
84//
85// These pcrel relocations are semantically similar to X86_64_RELOC_SIGNED_4.
86// However, we need this operation to be cross-platform, and ARM does not have a
87// similar relocation that is applicable. We therefore use the more verbose (but
88// more generic) subtractor relocation to encode these pcrel values. ld64
89// appears to do something similar -- its `-r` output contains these explicit
90// subtractor relocations.
91class EhRelocator {
92public:
93 EhRelocator(InputSection *isec) : isec(isec) {}
94
95 // For the next two methods, let `PC` denote `isec address + off`.
96 // Create relocs writing the value of target - PC to PC.
97 void makePcRel(uint64_t off,
98 llvm::PointerUnion<Symbol *, InputSection *> target,
99 uint8_t length);
100 // Create relocs writing the value of PC - target to PC.
101 void makeNegativePcRel(uint64_t off,
102 llvm::PointerUnion<Symbol *, InputSection *> target,
103 uint8_t length);
104 // Insert the new relocations into isec->relocs.
105 void commit();
106
107private:
108 InputSection *isec;
109 // Insert new relocs here so that we don't invalidate iterators into the
110 // existing relocs vector.
111 SmallVector<Reloc, 6> newRelocs;
112};
113
114} // namespace lld::macho
115
116#endif
117