1 | //===- InputSection.h -------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLD_ELF_INPUT_SECTION_H |
10 | #define LLD_ELF_INPUT_SECTION_H |
11 | |
12 | #include "Config.h" |
13 | #include "Relocations.h" |
14 | #include "lld/Common/CommonLinkerContext.h" |
15 | #include "lld/Common/LLVM.h" |
16 | #include "lld/Common/Memory.h" |
17 | #include "llvm/ADT/CachedHashString.h" |
18 | #include "llvm/ADT/DenseSet.h" |
19 | #include "llvm/ADT/StringExtras.h" |
20 | #include "llvm/ADT/TinyPtrVector.h" |
21 | #include "llvm/Object/ELF.h" |
22 | #include "llvm/Support/Compiler.h" |
23 | |
24 | namespace lld { |
25 | namespace elf { |
26 | |
27 | class InputFile; |
28 | class Symbol; |
29 | |
30 | class Defined; |
31 | struct Partition; |
32 | class SyntheticSection; |
33 | template <class ELFT> class ObjFile; |
34 | class OutputSection; |
35 | |
36 | LLVM_LIBRARY_VISIBILITY extern std::vector<Partition> partitions; |
37 | |
38 | // Returned by InputSectionBase::relsOrRelas. At most one member is empty. |
39 | template <class ELFT> struct RelsOrRelas { |
40 | Relocs<typename ELFT::Rel> rels; |
41 | Relocs<typename ELFT::Rela> relas; |
42 | Relocs<typename ELFT::Crel> crels; |
43 | bool areRelocsRel() const { return rels.size(); } |
44 | bool areRelocsCrel() const { return crels.size(); } |
45 | }; |
46 | |
47 | #define invokeOnRelocs(sec, f, ...) \ |
48 | { \ |
49 | const RelsOrRelas<ELFT> rs = (sec).template relsOrRelas<ELFT>(); \ |
50 | if (rs.areRelocsCrel()) \ |
51 | f(__VA_ARGS__, rs.crels); \ |
52 | else if (rs.areRelocsRel()) \ |
53 | f(__VA_ARGS__, rs.rels); \ |
54 | else \ |
55 | f(__VA_ARGS__, rs.relas); \ |
56 | } |
57 | |
58 | // This is the base class of all sections that lld handles. Some are sections in |
59 | // input files, some are sections in the produced output file and some exist |
60 | // just as a convenience for implementing special ways of combining some |
61 | // sections. |
62 | class SectionBase { |
63 | public: |
64 | enum Kind { Regular, Synthetic, Spill, EHFrame, Merge, Output }; |
65 | |
66 | Kind kind() const { return (Kind)sectionKind; } |
67 | |
68 | LLVM_PREFERRED_TYPE(Kind) |
69 | uint8_t sectionKind : 3; |
70 | |
71 | // The next two bit fields are only used by InputSectionBase, but we |
72 | // put them here so the struct packs better. |
73 | |
74 | LLVM_PREFERRED_TYPE(bool) |
75 | uint8_t bss : 1; |
76 | |
77 | // Set for sections that should not be folded by ICF. |
78 | LLVM_PREFERRED_TYPE(bool) |
79 | uint8_t keepUnique : 1; |
80 | |
81 | uint8_t partition = 1; |
82 | uint32_t type; |
83 | StringRef name; |
84 | |
85 | // The 1-indexed partition that this section is assigned to by the garbage |
86 | // collector, or 0 if this section is dead. Normally there is only one |
87 | // partition, so this will either be 0 or 1. |
88 | elf::Partition &getPartition() const; |
89 | |
90 | // These corresponds to the fields in Elf_Shdr. |
91 | uint64_t flags; |
92 | uint32_t addralign; |
93 | uint32_t entsize; |
94 | uint32_t link; |
95 | uint32_t info; |
96 | |
97 | OutputSection *getOutputSection(); |
98 | const OutputSection *getOutputSection() const { |
99 | return const_cast<SectionBase *>(this)->getOutputSection(); |
100 | } |
101 | |
102 | // Translate an offset in the input section to an offset in the output |
103 | // section. |
104 | uint64_t getOffset(uint64_t offset) const; |
105 | |
106 | uint64_t getVA(uint64_t offset = 0) const; |
107 | |
108 | bool isLive() const { return partition != 0; } |
109 | void markLive() { partition = 1; } |
110 | void markDead() { partition = 0; } |
111 | |
112 | protected: |
113 | constexpr SectionBase(Kind sectionKind, StringRef name, uint64_t flags, |
114 | uint32_t entsize, uint32_t addralign, uint32_t type, |
115 | uint32_t info, uint32_t link) |
116 | : sectionKind(sectionKind), bss(false), keepUnique(false), type(type), |
117 | name(name), flags(flags), addralign(addralign), entsize(entsize), |
118 | link(link), info(info) {} |
119 | }; |
120 | |
121 | struct SymbolAnchor { |
122 | uint64_t offset; |
123 | Defined *d; |
124 | bool end; // true for the anchor of st_value+st_size |
125 | }; |
126 | |
127 | struct RelaxAux { |
128 | // This records symbol start and end offsets which will be adjusted according |
129 | // to the nearest relocDeltas element. |
130 | SmallVector<SymbolAnchor, 0> anchors; |
131 | // For relocations[i], the actual offset is |
132 | // r_offset - (i ? relocDeltas[i-1] : 0). |
133 | std::unique_ptr<uint32_t[]> relocDeltas; |
134 | // For relocations[i], the actual type is relocTypes[i]. |
135 | std::unique_ptr<RelType[]> relocTypes; |
136 | SmallVector<uint32_t, 0> writes; |
137 | }; |
138 | |
139 | // This corresponds to a section of an input file. |
140 | class InputSectionBase : public SectionBase { |
141 | public: |
142 | template <class ELFT> |
143 | InputSectionBase(ObjFile<ELFT> &file, const typename ELFT::Shdr &, |
144 | StringRef name, Kind sectionKind); |
145 | |
146 | InputSectionBase(InputFile *file, uint64_t flags, uint32_t type, |
147 | uint64_t entsize, uint32_t link, uint32_t info, |
148 | uint32_t addralign, ArrayRef<uint8_t> data, StringRef name, |
149 | Kind sectionKind); |
150 | |
151 | static bool classof(const SectionBase *s) { return s->kind() != Output; } |
152 | |
153 | // The file which contains this section. Its dynamic type is usually |
154 | // ObjFile<ELFT>, but may be an InputFile of InternalKind (for a synthetic |
155 | // section). |
156 | InputFile *file; |
157 | |
158 | // Input sections are part of an output section. Special sections |
159 | // like .eh_frame and merge sections are first combined into a |
160 | // synthetic section that is then added to an output section. In all |
161 | // cases this points one level up. |
162 | SectionBase *parent = nullptr; |
163 | |
164 | // Section index of the relocation section if exists. |
165 | uint32_t relSecIdx = 0; |
166 | |
167 | // Getter when the dynamic type is ObjFile<ELFT>. |
168 | template <class ELFT> ObjFile<ELFT> *getFile() const { |
169 | return cast<ObjFile<ELFT>>(file); |
170 | } |
171 | |
172 | // Used by --optimize-bb-jumps and RISC-V linker relaxation temporarily to |
173 | // indicate the number of bytes which is not counted in the size. This should |
174 | // be reset to zero after uses. |
175 | uint32_t bytesDropped = 0; |
176 | |
177 | mutable bool compressed = false; |
178 | |
179 | // Whether this section is SHT_CREL and has been decoded to RELA by |
180 | // relsOrRelas. |
181 | bool decodedCrel = false; |
182 | |
183 | // Whether the section needs to be padded with a NOP filler due to |
184 | // deleteFallThruJmpInsn. |
185 | bool nopFiller = false; |
186 | |
187 | void drop_back(unsigned num) { |
188 | assert(bytesDropped + num < 256); |
189 | bytesDropped += num; |
190 | } |
191 | |
192 | void push_back(uint64_t num) { |
193 | assert(bytesDropped >= num); |
194 | bytesDropped -= num; |
195 | } |
196 | |
197 | mutable const uint8_t *content_; |
198 | uint64_t size; |
199 | |
200 | void trim() { |
201 | if (bytesDropped) { |
202 | size -= bytesDropped; |
203 | bytesDropped = 0; |
204 | } |
205 | } |
206 | |
207 | ArrayRef<uint8_t> content() const { |
208 | return ArrayRef<uint8_t>(content_, size); |
209 | } |
210 | ArrayRef<uint8_t> contentMaybeDecompress() const { |
211 | if (compressed) |
212 | decompress(); |
213 | return content(); |
214 | } |
215 | |
216 | // The next member in the section group if this section is in a group. This is |
217 | // used by --gc-sections. |
218 | InputSectionBase *nextInSectionGroup = nullptr; |
219 | |
220 | template <class ELFT> |
221 | RelsOrRelas<ELFT> relsOrRelas(bool supportsCrel = true) const; |
222 | |
223 | // InputSections that are dependent on us (reverse dependency for GC) |
224 | llvm::TinyPtrVector<InputSection *> dependentSections; |
225 | |
226 | // Returns the size of this section (even if this is a common or BSS.) |
227 | size_t getSize() const; |
228 | |
229 | InputSection *getLinkOrderDep() const; |
230 | |
231 | // Get a symbol that encloses this offset from within the section. If type is |
232 | // not zero, return a symbol with the specified type. |
233 | Defined *getEnclosingSymbol(uint64_t offset, uint8_t type = 0) const; |
234 | Defined *getEnclosingFunction(uint64_t offset) const { |
235 | return getEnclosingSymbol(offset, type: llvm::ELF::STT_FUNC); |
236 | } |
237 | |
238 | // Returns a source location string. Used to construct an error message. |
239 | std::string getLocation(uint64_t offset) const; |
240 | std::string getSrcMsg(const Symbol &sym, uint64_t offset) const; |
241 | std::string getObjMsg(uint64_t offset) const; |
242 | |
243 | // Each section knows how to relocate itself. These functions apply |
244 | // relocations, assuming that Buf points to this section's copy in |
245 | // the mmap'ed output buffer. |
246 | template <class ELFT> void relocate(uint8_t *buf, uint8_t *bufEnd); |
247 | static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, |
248 | int64_t A, uint64_t P, const Symbol &Sym, |
249 | RelExpr Expr); |
250 | |
251 | // The native ELF reloc data type is not very convenient to handle. |
252 | // So we convert ELF reloc records to our own records in Relocations.cpp. |
253 | // This vector contains such "cooked" relocations. |
254 | SmallVector<Relocation, 0> relocations; |
255 | |
256 | void addReloc(const Relocation &r) { relocations.push_back(Elt: r); } |
257 | MutableArrayRef<Relocation> relocs() { return relocations; } |
258 | ArrayRef<Relocation> relocs() const { return relocations; } |
259 | |
260 | union { |
261 | // These are modifiers to jump instructions that are necessary when basic |
262 | // block sections are enabled. Basic block sections creates opportunities |
263 | // to relax jump instructions at basic block boundaries after reordering the |
264 | // basic blocks. |
265 | JumpInstrMod *jumpInstrMod = nullptr; |
266 | |
267 | // Auxiliary information for RISC-V and LoongArch linker relaxation. |
268 | // They do not use jumpInstrMod. |
269 | RelaxAux *relaxAux; |
270 | |
271 | // The compressed content size when `compressed` is true. |
272 | size_t compressedSize; |
273 | }; |
274 | |
275 | // A function compiled with -fsplit-stack calling a function |
276 | // compiled without -fsplit-stack needs its prologue adjusted. Find |
277 | // such functions and adjust their prologues. This is very similar |
278 | // to relocation. See https://gcc.gnu.org/wiki/SplitStacks for more |
279 | // information. |
280 | template <typename ELFT> |
281 | void adjustSplitStackFunctionPrologues(uint8_t *buf, uint8_t *end); |
282 | |
283 | |
284 | template <typename T> llvm::ArrayRef<T> getDataAs() const { |
285 | size_t s = content().size(); |
286 | assert(s % sizeof(T) == 0); |
287 | return llvm::ArrayRef<T>((const T *)content().data(), s / sizeof(T)); |
288 | } |
289 | |
290 | protected: |
291 | template <typename ELFT> |
292 | void (); |
293 | void decompress() const; |
294 | }; |
295 | |
296 | // SectionPiece represents a piece of splittable section contents. |
297 | // We allocate a lot of these and binary search on them. This means that they |
298 | // have to be as compact as possible, which is why we don't store the size (can |
299 | // be found by looking at the next one). |
300 | struct SectionPiece { |
301 | SectionPiece() = default; |
302 | SectionPiece(size_t off, uint32_t hash, bool live) |
303 | : inputOff(off), live(live), hash(hash >> 1) {} |
304 | |
305 | uint32_t inputOff; |
306 | LLVM_PREFERRED_TYPE(bool) |
307 | uint32_t live : 1; |
308 | uint32_t hash : 31; |
309 | uint64_t outputOff = 0; |
310 | }; |
311 | |
312 | static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big" ); |
313 | |
314 | // This corresponds to a SHF_MERGE section of an input file. |
315 | class MergeInputSection : public InputSectionBase { |
316 | public: |
317 | template <class ELFT> |
318 | MergeInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &, |
319 | StringRef name); |
320 | MergeInputSection(uint64_t flags, uint32_t type, uint64_t entsize, |
321 | ArrayRef<uint8_t> data, StringRef name); |
322 | |
323 | static bool classof(const SectionBase *s) { return s->kind() == Merge; } |
324 | void splitIntoPieces(); |
325 | |
326 | // Translate an offset in the input section to an offset in the parent |
327 | // MergeSyntheticSection. |
328 | uint64_t getParentOffset(uint64_t offset) const; |
329 | |
330 | // Splittable sections are handled as a sequence of data |
331 | // rather than a single large blob of data. |
332 | SmallVector<SectionPiece, 0> pieces; |
333 | |
334 | // Returns I'th piece's data. This function is very hot when |
335 | // string merging is enabled, so we want to inline. |
336 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
337 | llvm::CachedHashStringRef getData(size_t i) const { |
338 | size_t begin = pieces[i].inputOff; |
339 | size_t end = |
340 | (pieces.size() - 1 == i) ? content().size() : pieces[i + 1].inputOff; |
341 | return {toStringRef(Input: content().slice(N: begin, M: end - begin)), pieces[i].hash}; |
342 | } |
343 | |
344 | // Returns the SectionPiece at a given input section offset. |
345 | SectionPiece &getSectionPiece(uint64_t offset); |
346 | const SectionPiece &getSectionPiece(uint64_t offset) const { |
347 | return const_cast<MergeInputSection *>(this)->getSectionPiece(offset); |
348 | } |
349 | |
350 | SyntheticSection *getParent() const { |
351 | return cast_or_null<SyntheticSection>(Val: parent); |
352 | } |
353 | |
354 | private: |
355 | void splitStrings(StringRef s, size_t size); |
356 | void splitNonStrings(ArrayRef<uint8_t> a, size_t size); |
357 | }; |
358 | |
359 | struct EhSectionPiece { |
360 | EhSectionPiece(size_t off, InputSectionBase *sec, uint32_t size, |
361 | unsigned firstRelocation) |
362 | : inputOff(off), sec(sec), size(size), firstRelocation(firstRelocation) {} |
363 | |
364 | ArrayRef<uint8_t> data() const { |
365 | return {sec->content().data() + this->inputOff, size}; |
366 | } |
367 | |
368 | size_t inputOff; |
369 | ssize_t outputOff = -1; |
370 | InputSectionBase *sec; |
371 | uint32_t size; |
372 | unsigned firstRelocation; |
373 | }; |
374 | |
375 | // This corresponds to a .eh_frame section of an input file. |
376 | class EhInputSection : public InputSectionBase { |
377 | public: |
378 | template <class ELFT> |
379 | EhInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &, |
380 | StringRef name); |
381 | static bool classof(const SectionBase *s) { return s->kind() == EHFrame; } |
382 | template <class ELFT> void split(); |
383 | template <class ELFT, class RelTy> void split(ArrayRef<RelTy> rels); |
384 | |
385 | // Splittable sections are handled as a sequence of data |
386 | // rather than a single large blob of data. |
387 | SmallVector<EhSectionPiece, 0> cies, fdes; |
388 | |
389 | SyntheticSection *getParent() const; |
390 | uint64_t getParentOffset(uint64_t offset) const; |
391 | }; |
392 | |
393 | // This is a section that is added directly to an output section |
394 | // instead of needing special combination via a synthetic section. This |
395 | // includes all input sections with the exceptions of SHF_MERGE and |
396 | // .eh_frame. It also includes the synthetic sections themselves. |
397 | class InputSection : public InputSectionBase { |
398 | public: |
399 | InputSection(InputFile *f, uint64_t flags, uint32_t type, uint32_t addralign, |
400 | ArrayRef<uint8_t> data, StringRef name, Kind k = Regular); |
401 | template <class ELFT> |
402 | InputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &, |
403 | StringRef name); |
404 | |
405 | static bool classof(const SectionBase *s) { |
406 | return s->kind() == SectionBase::Regular || |
407 | s->kind() == SectionBase::Synthetic || |
408 | s->kind() == SectionBase::Spill; |
409 | } |
410 | |
411 | // Write this section to a mmap'ed file, assuming Buf is pointing to |
412 | // beginning of the output section. |
413 | template <class ELFT> void writeTo(uint8_t *buf); |
414 | |
415 | OutputSection *getParent() const { |
416 | return reinterpret_cast<OutputSection *>(parent); |
417 | } |
418 | |
419 | // This variable has two usages. Initially, it represents an index in the |
420 | // OutputSection's InputSection list, and is used when ordering SHF_LINK_ORDER |
421 | // sections. After assignAddresses is called, it represents the offset from |
422 | // the beginning of the output section this section was assigned to. |
423 | uint64_t outSecOff = 0; |
424 | |
425 | InputSectionBase *getRelocatedSection() const; |
426 | |
427 | template <class ELFT, class RelTy> |
428 | void relocateNonAlloc(uint8_t *buf, Relocs<RelTy> rels); |
429 | |
430 | // Points to the canonical section. If ICF folds two sections, repl pointer of |
431 | // one section points to the other. |
432 | InputSection *repl = this; |
433 | |
434 | // Used by ICF. |
435 | uint32_t eqClass[2] = {0, 0}; |
436 | |
437 | // Called by ICF to merge two input sections. |
438 | void replace(InputSection *other); |
439 | |
440 | static InputSection discarded; |
441 | |
442 | private: |
443 | template <class ELFT, class RelTy> void copyRelocations(uint8_t *buf); |
444 | |
445 | template <class ELFT, class RelTy, class RelIt> |
446 | void copyRelocations(uint8_t *buf, llvm::iterator_range<RelIt> rels); |
447 | |
448 | template <class ELFT> void copyShtGroup(uint8_t *buf); |
449 | }; |
450 | |
451 | // A marker for a potential spill location for another input section. This |
452 | // broadly acts as if it were the original section until address assignment. |
453 | // Then it is either replaced with the real input section or removed. |
454 | class PotentialSpillSection : public InputSection { |
455 | public: |
456 | // The containing input section description; used to quickly replace this stub |
457 | // with the actual section. |
458 | InputSectionDescription *isd; |
459 | |
460 | // Next potential spill location for the same source input section. |
461 | PotentialSpillSection *next = nullptr; |
462 | |
463 | PotentialSpillSection(const InputSectionBase &source, |
464 | InputSectionDescription &isd); |
465 | |
466 | static bool classof(const SectionBase *sec) { |
467 | return sec->kind() == InputSectionBase::Spill; |
468 | } |
469 | }; |
470 | |
471 | static_assert(sizeof(InputSection) <= 160, "InputSection is too big" ); |
472 | |
473 | class SyntheticSection : public InputSection { |
474 | public: |
475 | SyntheticSection(uint64_t flags, uint32_t type, uint32_t addralign, |
476 | StringRef name) |
477 | : InputSection(ctx.internalFile, flags, type, addralign, {}, name, |
478 | InputSectionBase::Synthetic) {} |
479 | |
480 | virtual ~SyntheticSection() = default; |
481 | virtual size_t getSize() const = 0; |
482 | virtual bool updateAllocSize() { return false; } |
483 | // If the section has the SHF_ALLOC flag and the size may be changed if |
484 | // thunks are added, update the section size. |
485 | virtual bool isNeeded() const { return true; } |
486 | virtual void finalizeContents() {} |
487 | virtual void writeTo(uint8_t *buf) = 0; |
488 | |
489 | static bool classof(const SectionBase *sec) { |
490 | return sec->kind() == InputSectionBase::Synthetic; |
491 | } |
492 | }; |
493 | |
494 | inline bool isStaticRelSecType(uint32_t type) { |
495 | return type == llvm::ELF::SHT_RELA || type == llvm::ELF::SHT_CREL || |
496 | type == llvm::ELF::SHT_REL; |
497 | } |
498 | |
499 | inline bool isDebugSection(const InputSectionBase &sec) { |
500 | return (sec.flags & llvm::ELF::SHF_ALLOC) == 0 && |
501 | sec.name.starts_with(Prefix: ".debug" ); |
502 | } |
503 | |
504 | // The set of TOC entries (.toc + addend) for which we should not apply |
505 | // toc-indirect to toc-relative relaxation. const Symbol * refers to the |
506 | // STT_SECTION symbol associated to the .toc input section. |
507 | extern llvm::DenseSet<std::pair<const Symbol *, uint64_t>> ppc64noTocRelax; |
508 | |
509 | } // namespace elf |
510 | |
511 | std::string toString(const elf::InputSectionBase *); |
512 | } // namespace lld |
513 | |
514 | #endif |
515 | |