1//===- Relocations.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_ELF_RELOCATIONS_H
10#define LLD_ELF_RELOCATIONS_H
11
12#include "lld/Common/LLVM.h"
13#include "llvm/ADT/DenseMap.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/Object/ELFTypes.h"
16#include <vector>
17
18namespace lld::elf {
19struct Ctx;
20struct ELFSyncStream;
21class Defined;
22class Undefined;
23class Symbol;
24class InputSection;
25class InputSectionBase;
26class OutputSection;
27class RelocationBaseSection;
28class SectionBase;
29
30// Represents a relocation type, such as R_X86_64_PC32 or R_ARM_THM_CALL.
31struct RelType {
32 uint32_t v = 0;
33 /*implicit*/ constexpr RelType(uint32_t v = 0) : v(v) {}
34 /*implicit*/ operator uint32_t() const { return v; }
35};
36
37using JumpModType = uint32_t;
38
39// List of target-independent relocation types. Relocations read
40// from files are converted to these types so that the main code
41// doesn't have to know about architecture-specific details.
42enum RelExpr {
43 R_ABS,
44 R_ADDEND,
45 R_ADDEND_NEG,
46 R_DTPREL,
47 R_GOT,
48 R_GOT_OFF,
49 R_GOT_PC,
50 R_GOTONLY_PC,
51 R_GOTPLTONLY_PC,
52 R_GOTPLT,
53 R_GOTPLTREL,
54 R_GOTREL,
55 R_GOTPLT_GOTREL,
56 R_GOTPLT_PC,
57 R_NONE,
58 R_PC,
59 R_PLT,
60 R_PLT_PC,
61 R_PLT_GOTPLT,
62 R_PLT_GOTREL,
63 R_RELAX_HINT,
64 R_RELAX_GOT_PC,
65 R_RELAX_GOT_PC_NOPIC,
66 R_RELAX_TLS_GD_TO_IE,
67 R_RELAX_TLS_GD_TO_IE_GOT_OFF,
68 R_RELAX_TLS_GD_TO_IE_GOTPLT,
69 R_RELAX_TLS_GD_TO_LE,
70 R_RELAX_TLS_GD_TO_LE_NEG,
71 R_RELAX_TLS_IE_TO_LE,
72 R_RELAX_TLS_LD_TO_LE,
73 R_SIZE,
74 R_TPREL,
75 R_TPREL_NEG,
76 R_TLSDESC,
77 R_TLSDESC_CALL,
78 R_TLSDESC_PC,
79 R_TLSDESC_GOTPLT,
80 R_TLSGD_GOT,
81 R_TLSGD_GOTPLT,
82 R_TLSGD_PC,
83 R_TLSIE_HINT,
84 R_TLSLD_GOT,
85 R_TLSLD_GOTPLT,
86 R_TLSLD_GOT_OFF,
87 R_TLSLD_HINT,
88 R_TLSLD_PC,
89
90 // The following is abstract relocation types used for only one target.
91 //
92 // Even though RelExpr is intended to be a target-neutral representation
93 // of a relocation type, there are some relocations whose semantics are
94 // unique to a target. Such relocation are marked with RE_<TARGET_NAME>.
95 RE_AARCH64_GOT_PAGE_PC,
96 RE_AARCH64_GOT_PAGE,
97 RE_AARCH64_PAGE_PC,
98 RE_AARCH64_TLSDESC_PAGE,
99 RE_AARCH64_AUTH,
100 RE_ARM_PCA,
101 RE_ARM_SBREL,
102 RE_MIPS_GOTREL,
103 RE_MIPS_GOT_GP,
104 RE_MIPS_GOT_GP_PC,
105 RE_MIPS_GOT_LOCAL_PAGE,
106 RE_MIPS_GOT_OFF,
107 RE_MIPS_GOT_OFF32,
108 RE_MIPS_OSEC_LOCAL_PAGE,
109 RE_MIPS_TLSGD,
110 RE_MIPS_TLSLD,
111 RE_PPC32_PLTREL,
112 RE_PPC64_CALL,
113 RE_PPC64_CALL_PLT,
114 RE_PPC64_TOCBASE,
115 RE_RISCV_ADD,
116 RE_RISCV_LEB128,
117 RE_RISCV_PC_INDIRECT,
118 // Same as R_PC but with page-aligned semantics.
119 RE_LOONGARCH_PAGE_PC,
120 // Same as R_PLT_PC but with page-aligned semantics.
121 RE_LOONGARCH_PLT_PAGE_PC,
122 // In addition to having page-aligned semantics, LoongArch GOT relocs are
123 // also reused for TLS, making the semantics differ from other architectures.
124 RE_LOONGARCH_GOT,
125 RE_LOONGARCH_GOT_PAGE_PC,
126 RE_LOONGARCH_PC_INDIRECT,
127 RE_LOONGARCH_TLSGD_PAGE_PC,
128 RE_LOONGARCH_TLSDESC_PAGE_PC,
129};
130
131// Architecture-neutral representation of relocation.
132struct Relocation {
133 RelExpr expr;
134 RelType type;
135 uint64_t offset;
136 int64_t addend;
137 Symbol *sym;
138};
139
140// Manipulate jump instructions with these modifiers. These are used to relax
141// jump instruction opcodes at basic block boundaries and are particularly
142// useful when basic block sections are enabled.
143struct JumpInstrMod {
144 uint64_t offset;
145 JumpModType original;
146 unsigned size;
147};
148
149void printLocation(ELFSyncStream &s, InputSectionBase &sec, const Symbol &sym,
150 uint64_t off);
151
152// This function writes undefined symbol diagnostics to an internal buffer.
153// Call reportUndefinedSymbols() after calling scanRelocations() to emit
154// the diagnostics.
155template <class ELFT> void scanRelocations(Ctx &ctx);
156template <class ELFT> void checkNoCrossRefs(Ctx &ctx);
157void reportUndefinedSymbols(Ctx &);
158bool maybeReportUndefined(Ctx &, Undefined &sym, InputSectionBase &sec,
159 uint64_t offset);
160void postScanRelocations(Ctx &ctx);
161void addGotEntry(Ctx &ctx, Symbol &sym);
162
163void hexagonTLSSymbolUpdate(Ctx &ctx);
164bool hexagonNeedsTLSSymbol(ArrayRef<OutputSection *> outputSections);
165
166bool isAbsolute(const Symbol &sym);
167
168class ThunkSection;
169class Thunk;
170class InputSectionDescription;
171
172class ThunkCreator {
173public:
174 // Thunk may be incomplete. Avoid inline ctor/dtor.
175 ThunkCreator(Ctx &ctx);
176 ~ThunkCreator();
177 // Return true if Thunks have been added to OutputSections
178 bool createThunks(uint32_t pass, ArrayRef<OutputSection *> outputSections);
179
180private:
181 void mergeThunks(ArrayRef<OutputSection *> outputSections);
182
183 ThunkSection *getISDThunkSec(OutputSection *os, InputSection *isec,
184 InputSectionDescription *isd,
185 const Relocation &rel, uint64_t src);
186
187 ThunkSection *getISThunkSec(InputSection *isec);
188
189 void createInitialThunkSections(ArrayRef<OutputSection *> outputSections);
190
191 std::pair<Thunk *, bool> getThunk(InputSection *isec, Relocation &rel,
192 uint64_t src);
193
194 std::pair<Thunk *, bool> getSyntheticLandingPad(Defined &d, int64_t a);
195
196 ThunkSection *addThunkSection(OutputSection *os, InputSectionDescription *,
197 uint64_t off, bool isPrefix = false);
198
199 bool normalizeExistingThunk(Relocation &rel, uint64_t src);
200
201 bool addSyntheticLandingPads();
202
203 Ctx &ctx;
204
205 // Record all the available Thunks for a (Symbol, addend) pair, where Symbol
206 // is represented as a (section, offset) pair. There may be multiple
207 // relocations sharing the same (section, offset + addend) pair. We may revert
208 // a relocation back to its original non-Thunk target, and restore the
209 // original addend, so we cannot fold offset + addend. A nested pair is used
210 // because DenseMapInfo is not specialized for std::tuple.
211 llvm::DenseMap<std::pair<std::pair<SectionBase *, uint64_t>, int64_t>,
212 SmallVector<std::unique_ptr<Thunk>, 0>>
213 thunkedSymbolsBySectionAndAddend;
214 llvm::DenseMap<std::pair<Symbol *, int64_t>,
215 SmallVector<std::unique_ptr<Thunk>, 0>>
216 thunkedSymbols;
217
218 // Find a Thunk from the Thunks symbol definition, we can use this to find
219 // the Thunk from a relocation to the Thunks symbol definition.
220 llvm::DenseMap<Symbol *, Thunk *> thunks;
221
222 // Track InputSections that have an inline ThunkSection placed in front
223 // an inline ThunkSection may have control fall through to the section below
224 // so we need to make sure that there is only one of them.
225 // The Mips LA25 Thunk is an example of an inline ThunkSection, as is
226 // the AArch64BTLandingPadThunk.
227 llvm::DenseMap<InputSection *, ThunkSection *> thunkedSections;
228
229 // Record landing pads, generated for a section + offset destination.
230 // Landling pads are alternative entry points for destinations that need
231 // to be reached via thunks that use indirect branches. A destination
232 // needs at most one landing pad as that can be reused by all callers.
233 llvm::DenseMap<std::pair<std::pair<SectionBase *, uint64_t>, int64_t>,
234 std::unique_ptr<Thunk>>
235 landingPadsBySectionAndAddend;
236
237 // All the nonLandingPad thunks that have been created, in order of creation.
238 std::vector<Thunk *> allThunks;
239
240 // The number of completed passes of createThunks this permits us
241 // to do one time initialization on Pass 0 and put a limit on the
242 // number of times it can be called to prevent infinite loops.
243 uint32_t pass = 0;
244};
245
246// Decode LEB128 without error checking. Only used by performance critical code
247// like RelocsCrel.
248inline uint64_t readLEB128(const uint8_t *&p, uint64_t leb) {
249 uint64_t acc = 0, shift = 0, byte;
250 do {
251 byte = *p++;
252 acc |= (byte - 128 * (byte >= leb)) << shift;
253 shift += 7;
254 } while (byte >= 128);
255 return acc;
256}
257inline uint64_t readULEB128(const uint8_t *&p) { return readLEB128(p, leb: 128); }
258inline int64_t readSLEB128(const uint8_t *&p) { return readLEB128(p, leb: 64); }
259
260// This class implements a CREL iterator that does not allocate extra memory.
261template <bool is64> struct RelocsCrel {
262 using uint = std::conditional_t<is64, uint64_t, uint32_t>;
263 struct const_iterator {
264 using iterator_category = std::forward_iterator_tag;
265 using value_type = llvm::object::Elf_Crel_Impl<is64>;
266 using difference_type = ptrdiff_t;
267 using pointer = value_type *;
268 using reference = const value_type &;
269 uint32_t count;
270 uint8_t flagBits, shift;
271 const uint8_t *p;
272 llvm::object::Elf_Crel_Impl<is64> crel{};
273 const_iterator(size_t hdr, const uint8_t *p)
274 : count(hdr / 8), flagBits(hdr & 4 ? 3 : 2), shift(hdr % 4), p(p) {
275 if (count)
276 step();
277 }
278 void step() {
279 // See object::decodeCrel.
280 const uint8_t b = *p++;
281 crel.r_offset += b >> flagBits << shift;
282 if (b >= 0x80)
283 crel.r_offset +=
284 ((readULEB128(p) << (7 - flagBits)) - (0x80 >> flagBits)) << shift;
285 if (b & 1)
286 crel.r_symidx += readSLEB128(p);
287 if (b & 2)
288 crel.r_type += readSLEB128(p);
289 if (b & 4 && flagBits == 3)
290 crel.r_addend += static_cast<uint>(readSLEB128(p));
291 }
292 llvm::object::Elf_Crel_Impl<is64> operator*() const { return crel; };
293 const llvm::object::Elf_Crel_Impl<is64> *operator->() const {
294 return &crel;
295 }
296 // For llvm::enumerate.
297 bool operator==(const const_iterator &r) const { return count == r.count; }
298 bool operator!=(const const_iterator &r) const { return count != r.count; }
299 const_iterator &operator++() {
300 if (--count)
301 step();
302 return *this;
303 }
304 // For RelocScan::scan when TLS relocations consume multiple entries.
305 void operator+=(size_t n) {
306 for (; n; --n)
307 operator++();
308 }
309 };
310
311 size_t hdr = 0;
312 const uint8_t *p = nullptr;
313
314 constexpr RelocsCrel() = default;
315 RelocsCrel(const uint8_t *p) : hdr(readULEB128(p)) { this->p = p; }
316 size_t size() const { return hdr / 8; }
317 const_iterator begin() const { return {hdr, p}; }
318 const_iterator end() const { return {0, nullptr}; }
319};
320
321template <class RelTy> struct Relocs : ArrayRef<RelTy> {
322 Relocs() = default;
323 Relocs(ArrayRef<RelTy> a) : ArrayRef<RelTy>(a) {}
324};
325
326template <bool is64>
327struct Relocs<llvm::object::Elf_Crel_Impl<is64>> : RelocsCrel<is64> {
328 using RelocsCrel<is64>::RelocsCrel;
329};
330
331// Return a int64_t to make sure we get the sign extension out of the way as
332// early as possible.
333template <class ELFT>
334static inline int64_t getAddend(const typename ELFT::Rel &rel) {
335 return 0;
336}
337template <class ELFT>
338static inline int64_t getAddend(const typename ELFT::Rela &rel) {
339 return rel.r_addend;
340}
341template <class ELFT>
342static inline int64_t getAddend(const typename ELFT::Crel &rel) {
343 return rel.r_addend;
344}
345
346RelocationBaseSection &getIRelativeSection(Ctx &ctx);
347
348// Returns true if Expr refers a GOT entry. Note that this function returns
349// false for TLS variables even though they need GOT, because TLS variables uses
350// GOT differently than the regular variables.
351bool needsGot(RelExpr expr);
352} // namespace lld::elf
353
354#endif
355