1//===- Relocations.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_ELF_RELOCATIONS_H
10#define LLD_ELF_RELOCATIONS_H
11
12#include "lld/Common/LLVM.h"
13#include "llvm/ADT/DenseMap.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/Object/ELFTypes.h"
16#include <vector>
17
18namespace lld::elf {
19struct Ctx;
20class Defined;
21class Symbol;
22class InputSection;
23class InputSectionBase;
24class OutputSection;
25class RelocationBaseSection;
26class SectionBase;
27
28// Represents a relocation type, such as R_X86_64_PC32 or R_ARM_THM_CALL.
29struct RelType {
30 uint32_t v = 0;
31 /*implicit*/ constexpr RelType(uint32_t v = 0) : v(v) {}
32 /*implicit*/ operator uint32_t() const { return v; }
33};
34
35using JumpModType = uint32_t;
36
37// List of target-independent relocation types. Relocations read
38// from files are converted to these types so that the main code
39// doesn't have to know about architecture-specific details.
40enum RelExpr {
41 R_ABS,
42 R_ADDEND,
43 R_DTPREL,
44 R_GOT,
45 R_GOT_OFF,
46 R_GOT_PC,
47 R_GOTONLY_PC,
48 R_GOTPLTONLY_PC,
49 R_GOTPLT,
50 R_GOTPLTREL,
51 R_GOTREL,
52 R_GOTPLT_GOTREL,
53 R_GOTPLT_PC,
54 R_NONE,
55 R_PC,
56 R_PLT,
57 R_PLT_PC,
58 R_PLT_GOTPLT,
59 R_PLT_GOTREL,
60 R_RELAX_HINT,
61 R_RELAX_GOT_PC,
62 R_RELAX_GOT_PC_NOPIC,
63 R_RELAX_TLS_GD_TO_IE,
64 R_RELAX_TLS_GD_TO_IE_ABS,
65 R_RELAX_TLS_GD_TO_IE_GOT_OFF,
66 R_RELAX_TLS_GD_TO_IE_GOTPLT,
67 R_RELAX_TLS_GD_TO_LE,
68 R_RELAX_TLS_GD_TO_LE_NEG,
69 R_RELAX_TLS_IE_TO_LE,
70 R_RELAX_TLS_LD_TO_LE,
71 R_RELAX_TLS_LD_TO_LE_ABS,
72 R_SIZE,
73 R_TPREL,
74 R_TPREL_NEG,
75 R_TLSDESC,
76 R_TLSDESC_CALL,
77 R_TLSDESC_PC,
78 R_TLSDESC_GOTPLT,
79 R_TLSGD_GOT,
80 R_TLSGD_GOTPLT,
81 R_TLSGD_PC,
82 R_TLSIE_HINT,
83 R_TLSLD_GOT,
84 R_TLSLD_GOTPLT,
85 R_TLSLD_GOT_OFF,
86 R_TLSLD_HINT,
87 R_TLSLD_PC,
88
89 // The following is abstract relocation types used for only one target.
90 //
91 // Even though RelExpr is intended to be a target-neutral representation
92 // of a relocation type, there are some relocations whose semantics are
93 // unique to a target. Such relocation are marked with RE_<TARGET_NAME>.
94 RE_AARCH64_GOT_PAGE_PC,
95 RE_AARCH64_AUTH_GOT_PAGE_PC,
96 RE_AARCH64_GOT_PAGE,
97 RE_AARCH64_AUTH_GOT,
98 RE_AARCH64_AUTH_GOT_PC,
99 RE_AARCH64_PAGE_PC,
100 RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC,
101 RE_AARCH64_TLSDESC_PAGE,
102 RE_AARCH64_AUTH_TLSDESC_PAGE,
103 RE_AARCH64_AUTH_TLSDESC,
104 RE_AARCH64_AUTH,
105 RE_ARM_PCA,
106 RE_ARM_SBREL,
107 RE_MIPS_GOTREL,
108 RE_MIPS_GOT_GP,
109 RE_MIPS_GOT_GP_PC,
110 RE_MIPS_GOT_LOCAL_PAGE,
111 RE_MIPS_GOT_OFF,
112 RE_MIPS_GOT_OFF32,
113 RE_MIPS_TLSGD,
114 RE_MIPS_TLSLD,
115 RE_PPC32_PLTREL,
116 RE_PPC64_CALL,
117 RE_PPC64_CALL_PLT,
118 RE_PPC64_RELAX_TOC,
119 RE_PPC64_TOCBASE,
120 RE_PPC64_RELAX_GOT_PC,
121 RE_RISCV_ADD,
122 RE_RISCV_LEB128,
123 RE_RISCV_PC_INDIRECT,
124 // Same as R_PC but with page-aligned semantics.
125 RE_LOONGARCH_PAGE_PC,
126 // Same as R_PLT_PC but with page-aligned semantics.
127 RE_LOONGARCH_PLT_PAGE_PC,
128 // In addition to having page-aligned semantics, LoongArch GOT relocs are
129 // also reused for TLS, making the semantics differ from other architectures.
130 RE_LOONGARCH_GOT,
131 RE_LOONGARCH_GOT_PAGE_PC,
132 RE_LOONGARCH_TLSGD_PAGE_PC,
133 RE_LOONGARCH_TLSDESC_PAGE_PC,
134 RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC,
135};
136
137// Architecture-neutral representation of relocation.
138struct Relocation {
139 RelExpr expr;
140 RelType type;
141 uint64_t offset;
142 int64_t addend;
143 Symbol *sym;
144};
145
146// Manipulate jump instructions with these modifiers. These are used to relax
147// jump instruction opcodes at basic block boundaries and are particularly
148// useful when basic block sections are enabled.
149struct JumpInstrMod {
150 uint64_t offset;
151 JumpModType original;
152 unsigned size;
153};
154
155// This function writes undefined symbol diagnostics to an internal buffer.
156// Call reportUndefinedSymbols() after calling scanRelocations() to emit
157// the diagnostics.
158template <class ELFT> void scanRelocations(Ctx &ctx);
159template <class ELFT> void checkNoCrossRefs(Ctx &ctx);
160void reportUndefinedSymbols(Ctx &);
161void postScanRelocations(Ctx &ctx);
162void addGotEntry(Ctx &ctx, Symbol &sym);
163
164void hexagonTLSSymbolUpdate(Ctx &ctx);
165bool hexagonNeedsTLSSymbol(ArrayRef<OutputSection *> outputSections);
166
167class ThunkSection;
168class Thunk;
169class InputSectionDescription;
170
171class ThunkCreator {
172public:
173 // Thunk may be incomplete. Avoid inline ctor/dtor.
174 ThunkCreator(Ctx &ctx);
175 ~ThunkCreator();
176 // Return true if Thunks have been added to OutputSections
177 bool createThunks(uint32_t pass, ArrayRef<OutputSection *> outputSections);
178
179private:
180 void mergeThunks(ArrayRef<OutputSection *> outputSections);
181
182 ThunkSection *getISDThunkSec(OutputSection *os, InputSection *isec,
183 InputSectionDescription *isd,
184 const Relocation &rel, uint64_t src);
185
186 ThunkSection *getISThunkSec(InputSection *isec);
187
188 void createInitialThunkSections(ArrayRef<OutputSection *> outputSections);
189
190 std::pair<Thunk *, bool> getThunk(InputSection *isec, Relocation &rel,
191 uint64_t src);
192
193 std::pair<Thunk *, bool> getSyntheticLandingPad(Defined &d, int64_t a);
194
195 ThunkSection *addThunkSection(OutputSection *os, InputSectionDescription *,
196 uint64_t off);
197
198 bool normalizeExistingThunk(Relocation &rel, uint64_t src);
199
200 bool addSyntheticLandingPads();
201
202 Ctx &ctx;
203
204 // Record all the available Thunks for a (Symbol, addend) pair, where Symbol
205 // is represented as a (section, offset) pair. There may be multiple
206 // relocations sharing the same (section, offset + addend) pair. We may revert
207 // a relocation back to its original non-Thunk target, and restore the
208 // original addend, so we cannot fold offset + addend. A nested pair is used
209 // because DenseMapInfo is not specialized for std::tuple.
210 llvm::DenseMap<std::pair<std::pair<SectionBase *, uint64_t>, int64_t>,
211 SmallVector<std::unique_ptr<Thunk>, 0>>
212 thunkedSymbolsBySectionAndAddend;
213 llvm::DenseMap<std::pair<Symbol *, int64_t>,
214 SmallVector<std::unique_ptr<Thunk>, 0>>
215 thunkedSymbols;
216
217 // Find a Thunk from the Thunks symbol definition, we can use this to find
218 // the Thunk from a relocation to the Thunks symbol definition.
219 llvm::DenseMap<Symbol *, Thunk *> thunks;
220
221 // Track InputSections that have an inline ThunkSection placed in front
222 // an inline ThunkSection may have control fall through to the section below
223 // so we need to make sure that there is only one of them.
224 // The Mips LA25 Thunk is an example of an inline ThunkSection, as is
225 // the AArch64BTLandingPadThunk.
226 llvm::DenseMap<InputSection *, ThunkSection *> thunkedSections;
227
228 // Record landing pads, generated for a section + offset destination.
229 // Landling pads are alternative entry points for destinations that need
230 // to be reached via thunks that use indirect branches. A destination
231 // needs at most one landing pad as that can be reused by all callers.
232 llvm::DenseMap<std::pair<std::pair<SectionBase *, uint64_t>, int64_t>,
233 std::unique_ptr<Thunk>>
234 landingPadsBySectionAndAddend;
235
236 // All the nonLandingPad thunks that have been created, in order of creation.
237 std::vector<Thunk *> allThunks;
238
239 // The number of completed passes of createThunks this permits us
240 // to do one time initialization on Pass 0 and put a limit on the
241 // number of times it can be called to prevent infinite loops.
242 uint32_t pass = 0;
243};
244
245// Decode LEB128 without error checking. Only used by performance critical code
246// like RelocsCrel.
247inline uint64_t readLEB128(const uint8_t *&p, uint64_t leb) {
248 uint64_t acc = 0, shift = 0, byte;
249 do {
250 byte = *p++;
251 acc |= (byte - 128 * (byte >= leb)) << shift;
252 shift += 7;
253 } while (byte >= 128);
254 return acc;
255}
256inline uint64_t readULEB128(const uint8_t *&p) { return readLEB128(p, leb: 128); }
257inline int64_t readSLEB128(const uint8_t *&p) { return readLEB128(p, leb: 64); }
258
259// This class implements a CREL iterator that does not allocate extra memory.
260template <bool is64> struct RelocsCrel {
261 using uint = std::conditional_t<is64, uint64_t, uint32_t>;
262 struct const_iterator {
263 using iterator_category = std::forward_iterator_tag;
264 using value_type = llvm::object::Elf_Crel_Impl<is64>;
265 using difference_type = ptrdiff_t;
266 using pointer = value_type *;
267 using reference = const value_type &;
268 uint32_t count;
269 uint8_t flagBits, shift;
270 const uint8_t *p;
271 llvm::object::Elf_Crel_Impl<is64> crel{};
272 const_iterator(size_t hdr, const uint8_t *p)
273 : count(hdr / 8), flagBits(hdr & 4 ? 3 : 2), shift(hdr % 4), p(p) {
274 if (count)
275 step();
276 }
277 void step() {
278 // See object::decodeCrel.
279 const uint8_t b = *p++;
280 crel.r_offset += b >> flagBits << shift;
281 if (b >= 0x80)
282 crel.r_offset +=
283 ((readULEB128(p) << (7 - flagBits)) - (0x80 >> flagBits)) << shift;
284 if (b & 1)
285 crel.r_symidx += readSLEB128(p);
286 if (b & 2)
287 crel.r_type += readSLEB128(p);
288 if (b & 4 && flagBits == 3)
289 crel.r_addend += static_cast<uint>(readSLEB128(p));
290 }
291 llvm::object::Elf_Crel_Impl<is64> operator*() const { return crel; };
292 const llvm::object::Elf_Crel_Impl<is64> *operator->() const {
293 return &crel;
294 }
295 // For llvm::enumerate.
296 bool operator==(const const_iterator &r) const { return count == r.count; }
297 bool operator!=(const const_iterator &r) const { return count != r.count; }
298 const_iterator &operator++() {
299 if (--count)
300 step();
301 return *this;
302 }
303 // For RelocationScanner::scanOne.
304 void operator+=(size_t n) {
305 for (; n; --n)
306 operator++();
307 }
308 };
309
310 size_t hdr = 0;
311 const uint8_t *p = nullptr;
312
313 constexpr RelocsCrel() = default;
314 RelocsCrel(const uint8_t *p) : hdr(readULEB128(p)) { this->p = p; }
315 size_t size() const { return hdr / 8; }
316 const_iterator begin() const { return {hdr, p}; }
317 const_iterator end() const { return {0, nullptr}; }
318};
319
320template <class RelTy> struct Relocs : ArrayRef<RelTy> {
321 Relocs() = default;
322 Relocs(ArrayRef<RelTy> a) : ArrayRef<RelTy>(a) {}
323};
324
325template <bool is64>
326struct Relocs<llvm::object::Elf_Crel_Impl<is64>> : RelocsCrel<is64> {
327 using RelocsCrel<is64>::RelocsCrel;
328};
329
330// Return a int64_t to make sure we get the sign extension out of the way as
331// early as possible.
332template <class ELFT>
333static inline int64_t getAddend(const typename ELFT::Rel &rel) {
334 return 0;
335}
336template <class ELFT>
337static inline int64_t getAddend(const typename ELFT::Rela &rel) {
338 return rel.r_addend;
339}
340template <class ELFT>
341static inline int64_t getAddend(const typename ELFT::Crel &rel) {
342 return rel.r_addend;
343}
344
345template <typename RelTy>
346inline Relocs<RelTy> sortRels(Relocs<RelTy> rels,
347 SmallVector<RelTy, 0> &storage) {
348 auto cmp = [](const RelTy &a, const RelTy &b) {
349 return a.r_offset < b.r_offset;
350 };
351 if (!llvm::is_sorted(rels, cmp)) {
352 storage.assign(rels.begin(), rels.end());
353 llvm::stable_sort(storage, cmp);
354 rels = Relocs<RelTy>(storage);
355 }
356 return rels;
357}
358
359template <bool is64>
360inline Relocs<llvm::object::Elf_Crel_Impl<is64>>
361sortRels(Relocs<llvm::object::Elf_Crel_Impl<is64>> rels,
362 SmallVector<llvm::object::Elf_Crel_Impl<is64>, 0> &storage) {
363 return {};
364}
365
366RelocationBaseSection &getIRelativeSection(Ctx &ctx);
367
368// Returns true if Expr refers a GOT entry. Note that this function returns
369// false for TLS variables even though they need GOT, because TLS variables uses
370// GOT differently than the regular variables.
371bool needsGot(RelExpr expr);
372} // namespace lld::elf
373
374#endif
375