1//===- Relocations.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_ELF_RELOCATIONS_H
10#define LLD_ELF_RELOCATIONS_H
11
12#include "lld/Common/LLVM.h"
13#include "llvm/ADT/DenseMap.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/Object/ELFTypes.h"
16#include <vector>
17
18namespace lld::elf {
19class Symbol;
20class InputSection;
21class InputSectionBase;
22class OutputSection;
23class SectionBase;
24
25// Represents a relocation type, such as R_X86_64_PC32 or R_ARM_THM_CALL.
26using RelType = uint32_t;
27using JumpModType = uint32_t;
28
29// List of target-independent relocation types. Relocations read
30// from files are converted to these types so that the main code
31// doesn't have to know about architecture-specific details.
32enum RelExpr {
33 R_ABS,
34 R_ADDEND,
35 R_DTPREL,
36 R_GOT,
37 R_GOT_OFF,
38 R_GOT_PC,
39 R_GOTONLY_PC,
40 R_GOTPLTONLY_PC,
41 R_GOTPLT,
42 R_GOTPLTREL,
43 R_GOTREL,
44 R_GOTPLT_GOTREL,
45 R_GOTPLT_PC,
46 R_NONE,
47 R_PC,
48 R_PLT,
49 R_PLT_PC,
50 R_PLT_GOTPLT,
51 R_PLT_GOTREL,
52 R_RELAX_HINT,
53 R_RELAX_GOT_PC,
54 R_RELAX_GOT_PC_NOPIC,
55 R_RELAX_TLS_GD_TO_IE,
56 R_RELAX_TLS_GD_TO_IE_ABS,
57 R_RELAX_TLS_GD_TO_IE_GOT_OFF,
58 R_RELAX_TLS_GD_TO_IE_GOTPLT,
59 R_RELAX_TLS_GD_TO_LE,
60 R_RELAX_TLS_GD_TO_LE_NEG,
61 R_RELAX_TLS_IE_TO_LE,
62 R_RELAX_TLS_LD_TO_LE,
63 R_RELAX_TLS_LD_TO_LE_ABS,
64 R_SIZE,
65 R_TPREL,
66 R_TPREL_NEG,
67 R_TLSDESC,
68 R_TLSDESC_CALL,
69 R_TLSDESC_PC,
70 R_TLSDESC_GOTPLT,
71 R_TLSGD_GOT,
72 R_TLSGD_GOTPLT,
73 R_TLSGD_PC,
74 R_TLSIE_HINT,
75 R_TLSLD_GOT,
76 R_TLSLD_GOTPLT,
77 R_TLSLD_GOT_OFF,
78 R_TLSLD_HINT,
79 R_TLSLD_PC,
80
81 // The following is abstract relocation types used for only one target.
82 //
83 // Even though RelExpr is intended to be a target-neutral representation
84 // of a relocation type, there are some relocations whose semantics are
85 // unique to a target. Such relocation are marked with R_<TARGET_NAME>.
86 R_AARCH64_GOT_PAGE_PC,
87 R_AARCH64_GOT_PAGE,
88 R_AARCH64_PAGE_PC,
89 R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC,
90 R_AARCH64_TLSDESC_PAGE,
91 R_AARCH64_AUTH,
92 R_ARM_PCA,
93 R_ARM_SBREL,
94 R_MIPS_GOTREL,
95 R_MIPS_GOT_GP,
96 R_MIPS_GOT_GP_PC,
97 R_MIPS_GOT_LOCAL_PAGE,
98 R_MIPS_GOT_OFF,
99 R_MIPS_GOT_OFF32,
100 R_MIPS_TLSGD,
101 R_MIPS_TLSLD,
102 R_PPC32_PLTREL,
103 R_PPC64_CALL,
104 R_PPC64_CALL_PLT,
105 R_PPC64_RELAX_TOC,
106 R_PPC64_TOCBASE,
107 R_PPC64_RELAX_GOT_PC,
108 R_RISCV_ADD,
109 R_RISCV_LEB128,
110 R_RISCV_PC_INDIRECT,
111 // Same as R_PC but with page-aligned semantics.
112 R_LOONGARCH_PAGE_PC,
113 // Same as R_PLT_PC but with page-aligned semantics.
114 R_LOONGARCH_PLT_PAGE_PC,
115 // In addition to having page-aligned semantics, LoongArch GOT relocs are
116 // also reused for TLS, making the semantics differ from other architectures.
117 R_LOONGARCH_GOT,
118 R_LOONGARCH_GOT_PAGE_PC,
119 R_LOONGARCH_TLSGD_PAGE_PC,
120 R_LOONGARCH_TLSDESC_PAGE_PC,
121};
122
123// Architecture-neutral representation of relocation.
124struct Relocation {
125 RelExpr expr;
126 RelType type;
127 uint64_t offset;
128 int64_t addend;
129 Symbol *sym;
130};
131
132// Manipulate jump instructions with these modifiers. These are used to relax
133// jump instruction opcodes at basic block boundaries and are particularly
134// useful when basic block sections are enabled.
135struct JumpInstrMod {
136 uint64_t offset;
137 JumpModType original;
138 unsigned size;
139};
140
141// This function writes undefined symbol diagnostics to an internal buffer.
142// Call reportUndefinedSymbols() after calling scanRelocations() to emit
143// the diagnostics.
144template <class ELFT> void scanRelocations();
145template <class ELFT> void checkNoCrossRefs();
146void reportUndefinedSymbols();
147void postScanRelocations();
148void addGotEntry(Symbol &sym);
149
150void hexagonTLSSymbolUpdate(ArrayRef<OutputSection *> outputSections);
151bool hexagonNeedsTLSSymbol(ArrayRef<OutputSection *> outputSections);
152
153class ThunkSection;
154class Thunk;
155class InputSectionDescription;
156
157class ThunkCreator {
158public:
159 // Return true if Thunks have been added to OutputSections
160 bool createThunks(uint32_t pass, ArrayRef<OutputSection *> outputSections);
161
162private:
163 void mergeThunks(ArrayRef<OutputSection *> outputSections);
164
165 ThunkSection *getISDThunkSec(OutputSection *os, InputSection *isec,
166 InputSectionDescription *isd,
167 const Relocation &rel, uint64_t src);
168
169 ThunkSection *getISThunkSec(InputSection *isec);
170
171 void createInitialThunkSections(ArrayRef<OutputSection *> outputSections);
172
173 std::pair<Thunk *, bool> getThunk(InputSection *isec, Relocation &rel,
174 uint64_t src);
175
176 ThunkSection *addThunkSection(OutputSection *os, InputSectionDescription *,
177 uint64_t off);
178
179 bool normalizeExistingThunk(Relocation &rel, uint64_t src);
180
181 // Record all the available Thunks for a (Symbol, addend) pair, where Symbol
182 // is represented as a (section, offset) pair. There may be multiple
183 // relocations sharing the same (section, offset + addend) pair. We may revert
184 // a relocation back to its original non-Thunk target, and restore the
185 // original addend, so we cannot fold offset + addend. A nested pair is used
186 // because DenseMapInfo is not specialized for std::tuple.
187 llvm::DenseMap<std::pair<std::pair<SectionBase *, uint64_t>, int64_t>,
188 std::vector<Thunk *>>
189 thunkedSymbolsBySectionAndAddend;
190 llvm::DenseMap<std::pair<Symbol *, int64_t>, std::vector<Thunk *>>
191 thunkedSymbols;
192
193 // Find a Thunk from the Thunks symbol definition, we can use this to find
194 // the Thunk from a relocation to the Thunks symbol definition.
195 llvm::DenseMap<Symbol *, Thunk *> thunks;
196
197 // Track InputSections that have an inline ThunkSection placed in front
198 // an inline ThunkSection may have control fall through to the section below
199 // so we need to make sure that there is only one of them.
200 // The Mips LA25 Thunk is an example of an inline ThunkSection.
201 llvm::DenseMap<InputSection *, ThunkSection *> thunkedSections;
202
203 // The number of completed passes of createThunks this permits us
204 // to do one time initialization on Pass 0 and put a limit on the
205 // number of times it can be called to prevent infinite loops.
206 uint32_t pass = 0;
207};
208
209// Decode LEB128 without error checking. Only used by performance critical code
210// like RelocsCrel.
211inline uint64_t readLEB128(const uint8_t *&p, uint64_t leb) {
212 uint64_t acc = 0, shift = 0, byte;
213 do {
214 byte = *p++;
215 acc |= (byte - 128 * (byte >= leb)) << shift;
216 shift += 7;
217 } while (byte >= 128);
218 return acc;
219}
220inline uint64_t readULEB128(const uint8_t *&p) { return readLEB128(p, leb: 128); }
221inline int64_t readSLEB128(const uint8_t *&p) { return readLEB128(p, leb: 64); }
222
223// This class implements a CREL iterator that does not allocate extra memory.
224template <bool is64> struct RelocsCrel {
225 using uint = std::conditional_t<is64, uint64_t, uint32_t>;
226 struct const_iterator {
227 using iterator_category = std::forward_iterator_tag;
228 using value_type = llvm::object::Elf_Crel_Impl<is64>;
229 using difference_type = ptrdiff_t;
230 using pointer = value_type *;
231 using reference = const value_type &;
232 uint32_t count;
233 uint8_t flagBits, shift;
234 const uint8_t *p;
235 llvm::object::Elf_Crel_Impl<is64> crel{};
236 const_iterator(size_t hdr, const uint8_t *p)
237 : count(hdr / 8), flagBits(hdr & 4 ? 3 : 2), shift(hdr % 4), p(p) {
238 if (count)
239 step();
240 }
241 void step() {
242 // See object::decodeCrel.
243 const uint8_t b = *p++;
244 crel.r_offset += b >> flagBits << shift;
245 if (b >= 0x80)
246 crel.r_offset +=
247 ((readULEB128(p) << (7 - flagBits)) - (0x80 >> flagBits)) << shift;
248 if (b & 1)
249 crel.r_symidx += readSLEB128(p);
250 if (b & 2)
251 crel.r_type += readSLEB128(p);
252 if (b & 4 && flagBits == 3)
253 crel.r_addend += static_cast<uint>(readSLEB128(p));
254 }
255 llvm::object::Elf_Crel_Impl<is64> operator*() const { return crel; };
256 const llvm::object::Elf_Crel_Impl<is64> *operator->() const {
257 return &crel;
258 }
259 // For llvm::enumerate.
260 bool operator==(const const_iterator &r) const { return count == r.count; }
261 bool operator!=(const const_iterator &r) const { return count != r.count; }
262 const_iterator &operator++() {
263 if (--count)
264 step();
265 return *this;
266 }
267 // For RelocationScanner::scanOne.
268 void operator+=(size_t n) {
269 for (; n; --n)
270 operator++();
271 }
272 };
273
274 size_t hdr = 0;
275 const uint8_t *p = nullptr;
276
277 constexpr RelocsCrel() = default;
278 RelocsCrel(const uint8_t *p) : hdr(readULEB128(p)) { this->p = p; }
279 size_t size() const { return hdr / 8; }
280 const_iterator begin() const { return {hdr, p}; }
281 const_iterator end() const { return {0, nullptr}; }
282};
283
284template <class RelTy> struct Relocs : ArrayRef<RelTy> {
285 Relocs() = default;
286 Relocs(ArrayRef<RelTy> a) : ArrayRef<RelTy>(a) {}
287};
288
289template <bool is64>
290struct Relocs<llvm::object::Elf_Crel_Impl<is64>> : RelocsCrel<is64> {
291 using RelocsCrel<is64>::RelocsCrel;
292};
293
294// Return a int64_t to make sure we get the sign extension out of the way as
295// early as possible.
296template <class ELFT>
297static inline int64_t getAddend(const typename ELFT::Rel &rel) {
298 return 0;
299}
300template <class ELFT>
301static inline int64_t getAddend(const typename ELFT::Rela &rel) {
302 return rel.r_addend;
303}
304template <class ELFT>
305static inline int64_t getAddend(const typename ELFT::Crel &rel) {
306 return rel.r_addend;
307}
308
309template <typename RelTy>
310inline Relocs<RelTy> sortRels(Relocs<RelTy> rels,
311 SmallVector<RelTy, 0> &storage) {
312 auto cmp = [](const RelTy &a, const RelTy &b) {
313 return a.r_offset < b.r_offset;
314 };
315 if (!llvm::is_sorted(rels, cmp)) {
316 storage.assign(rels.begin(), rels.end());
317 llvm::stable_sort(storage, cmp);
318 rels = Relocs<RelTy>(storage);
319 }
320 return rels;
321}
322
323template <bool is64>
324inline Relocs<llvm::object::Elf_Crel_Impl<is64>>
325sortRels(Relocs<llvm::object::Elf_Crel_Impl<is64>> rels,
326 SmallVector<llvm::object::Elf_Crel_Impl<is64>, 0> &storage) {
327 return {};
328}
329
330// Returns true if Expr refers a GOT entry. Note that this function returns
331// false for TLS variables even though they need GOT, because TLS variables uses
332// GOT differently than the regular variables.
333bool needsGot(RelExpr expr);
334} // namespace lld::elf
335
336#endif
337