1 | //===- InputSection.h -------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLD_ELF_INPUT_SECTION_H |
10 | #define LLD_ELF_INPUT_SECTION_H |
11 | |
12 | #include "Config.h" |
13 | #include "Relocations.h" |
14 | #include "lld/Common/CommonLinkerContext.h" |
15 | #include "lld/Common/LLVM.h" |
16 | #include "lld/Common/Memory.h" |
17 | #include "llvm/ADT/CachedHashString.h" |
18 | #include "llvm/ADT/DenseSet.h" |
19 | #include "llvm/ADT/StringExtras.h" |
20 | #include "llvm/ADT/TinyPtrVector.h" |
21 | #include "llvm/Object/ELF.h" |
22 | #include "llvm/Support/Compiler.h" |
23 | |
24 | namespace lld { |
25 | namespace elf { |
26 | |
27 | class InputFile; |
28 | class Symbol; |
29 | |
30 | class Defined; |
31 | struct Partition; |
32 | class SyntheticSection; |
33 | template <class ELFT> class ObjFile; |
34 | class OutputSection; |
35 | |
36 | LLVM_LIBRARY_VISIBILITY extern std::vector<Partition> partitions; |
37 | |
38 | // Returned by InputSectionBase::relsOrRelas. At most one member is empty. |
39 | template <class ELFT> struct RelsOrRelas { |
40 | Relocs<typename ELFT::Rel> rels; |
41 | Relocs<typename ELFT::Rela> relas; |
42 | Relocs<typename ELFT::Crel> crels; |
43 | bool areRelocsRel() const { return rels.size(); } |
44 | bool areRelocsCrel() const { return crels.size(); } |
45 | }; |
46 | |
47 | #define invokeOnRelocs(sec, f, ...) \ |
48 | { \ |
49 | const RelsOrRelas<ELFT> rs = (sec).template relsOrRelas<ELFT>(); \ |
50 | if (rs.areRelocsCrel()) \ |
51 | f(__VA_ARGS__, rs.crels); \ |
52 | else if (rs.areRelocsRel()) \ |
53 | f(__VA_ARGS__, rs.rels); \ |
54 | else \ |
55 | f(__VA_ARGS__, rs.relas); \ |
56 | } |
57 | |
58 | // This is the base class of all sections that lld handles. Some are sections in |
59 | // input files, some are sections in the produced output file and some exist |
60 | // just as a convenience for implementing special ways of combining some |
61 | // sections. |
62 | class SectionBase { |
63 | public: |
64 | enum Kind { Regular, Synthetic, Spill, EHFrame, Merge, Output }; |
65 | |
66 | Kind kind() const { return (Kind)sectionKind; } |
67 | |
68 | LLVM_PREFERRED_TYPE(Kind) |
69 | uint8_t sectionKind : 3; |
70 | |
71 | // The next two bit fields are only used by InputSectionBase, but we |
72 | // put them here so the struct packs better. |
73 | |
74 | LLVM_PREFERRED_TYPE(bool) |
75 | uint8_t bss : 1; |
76 | |
77 | // Set for sections that should not be folded by ICF. |
78 | LLVM_PREFERRED_TYPE(bool) |
79 | uint8_t keepUnique : 1; |
80 | |
81 | uint8_t partition = 1; |
82 | uint32_t type; |
83 | StringRef name; |
84 | |
85 | // The 1-indexed partition that this section is assigned to by the garbage |
86 | // collector, or 0 if this section is dead. Normally there is only one |
87 | // partition, so this will either be 0 or 1. |
88 | elf::Partition &getPartition() const; |
89 | |
90 | // These corresponds to the fields in Elf_Shdr. |
91 | uint64_t flags; |
92 | uint32_t addralign; |
93 | uint32_t entsize; |
94 | uint32_t link; |
95 | uint32_t info; |
96 | |
97 | OutputSection *getOutputSection(); |
98 | const OutputSection *getOutputSection() const { |
99 | return const_cast<SectionBase *>(this)->getOutputSection(); |
100 | } |
101 | |
102 | // Translate an offset in the input section to an offset in the output |
103 | // section. |
104 | uint64_t getOffset(uint64_t offset) const; |
105 | |
106 | uint64_t getVA(uint64_t offset = 0) const; |
107 | |
108 | bool isLive() const { return partition != 0; } |
109 | void markLive() { partition = 1; } |
110 | void markDead() { partition = 0; } |
111 | |
112 | protected: |
113 | constexpr SectionBase(Kind sectionKind, StringRef name, uint64_t flags, |
114 | uint32_t entsize, uint32_t addralign, uint32_t type, |
115 | uint32_t info, uint32_t link) |
116 | : sectionKind(sectionKind), bss(false), keepUnique(false), type(type), |
117 | name(name), flags(flags), addralign(addralign), entsize(entsize), |
118 | link(link), info(info) {} |
119 | }; |
120 | |
121 | struct SymbolAnchor { |
122 | uint64_t offset; |
123 | Defined *d; |
124 | bool end; // true for the anchor of st_value+st_size |
125 | }; |
126 | |
127 | struct RelaxAux { |
128 | // This records symbol start and end offsets which will be adjusted according |
129 | // to the nearest relocDeltas element. |
130 | SmallVector<SymbolAnchor, 0> anchors; |
131 | // For relocations[i], the actual offset is |
132 | // r_offset - (i ? relocDeltas[i-1] : 0). |
133 | std::unique_ptr<uint32_t[]> relocDeltas; |
134 | // For relocations[i], the actual type is relocTypes[i]. |
135 | std::unique_ptr<RelType[]> relocTypes; |
136 | SmallVector<uint32_t, 0> writes; |
137 | }; |
138 | |
139 | // This corresponds to a section of an input file. |
140 | class InputSectionBase : public SectionBase { |
141 | public: |
142 | template <class ELFT> |
143 | InputSectionBase(ObjFile<ELFT> &file, const typename ELFT::Shdr &, |
144 | StringRef name, Kind sectionKind); |
145 | |
146 | InputSectionBase(InputFile *file, uint64_t flags, uint32_t type, |
147 | uint64_t entsize, uint32_t link, uint32_t info, |
148 | uint32_t addralign, ArrayRef<uint8_t> data, StringRef name, |
149 | Kind sectionKind); |
150 | |
151 | static bool classof(const SectionBase *s) { return s->kind() != Output; } |
152 | |
153 | // The file which contains this section. Its dynamic type is usually |
154 | // ObjFile<ELFT>, but may be an InputFile of InternalKind (for a synthetic |
155 | // section). |
156 | InputFile *file; |
157 | |
158 | // Input sections are part of an output section. Special sections |
159 | // like .eh_frame and merge sections are first combined into a |
160 | // synthetic section that is then added to an output section. In all |
161 | // cases this points one level up. |
162 | SectionBase *parent = nullptr; |
163 | |
164 | // Section index of the relocation section if exists. |
165 | uint32_t relSecIdx = 0; |
166 | |
167 | // Getter when the dynamic type is ObjFile<ELFT>. |
168 | template <class ELFT> ObjFile<ELFT> *getFile() const { |
169 | return cast<ObjFile<ELFT>>(file); |
170 | } |
171 | |
172 | // Used by --optimize-bb-jumps and RISC-V linker relaxation temporarily to |
173 | // indicate the number of bytes which is not counted in the size. This should |
174 | // be reset to zero after uses. |
175 | uint32_t bytesDropped = 0; |
176 | |
177 | mutable bool compressed = false; |
178 | |
179 | // Whether the section needs to be padded with a NOP filler due to |
180 | // deleteFallThruJmpInsn. |
181 | bool nopFiller = false; |
182 | |
183 | void drop_back(unsigned num) { |
184 | assert(bytesDropped + num < 256); |
185 | bytesDropped += num; |
186 | } |
187 | |
188 | void push_back(uint64_t num) { |
189 | assert(bytesDropped >= num); |
190 | bytesDropped -= num; |
191 | } |
192 | |
193 | mutable const uint8_t *content_; |
194 | uint64_t size; |
195 | |
196 | void trim() { |
197 | if (bytesDropped) { |
198 | size -= bytesDropped; |
199 | bytesDropped = 0; |
200 | } |
201 | } |
202 | |
203 | ArrayRef<uint8_t> content() const { |
204 | return ArrayRef<uint8_t>(content_, size); |
205 | } |
206 | ArrayRef<uint8_t> contentMaybeDecompress() const { |
207 | if (compressed) |
208 | decompress(); |
209 | return content(); |
210 | } |
211 | |
212 | // The next member in the section group if this section is in a group. This is |
213 | // used by --gc-sections. |
214 | InputSectionBase *nextInSectionGroup = nullptr; |
215 | |
216 | template <class ELFT> |
217 | RelsOrRelas<ELFT> relsOrRelas(bool supportsCrel = true) const; |
218 | |
219 | // InputSections that are dependent on us (reverse dependency for GC) |
220 | llvm::TinyPtrVector<InputSection *> dependentSections; |
221 | |
222 | // Returns the size of this section (even if this is a common or BSS.) |
223 | size_t getSize() const; |
224 | |
225 | InputSection *getLinkOrderDep() const; |
226 | |
227 | // Get a symbol that encloses this offset from within the section. If type is |
228 | // not zero, return a symbol with the specified type. |
229 | Defined *getEnclosingSymbol(uint64_t offset, uint8_t type = 0) const; |
230 | Defined *getEnclosingFunction(uint64_t offset) const { |
231 | return getEnclosingSymbol(offset, type: llvm::ELF::STT_FUNC); |
232 | } |
233 | |
234 | // Returns a source location string. Used to construct an error message. |
235 | std::string getLocation(uint64_t offset) const; |
236 | std::string getSrcMsg(const Symbol &sym, uint64_t offset) const; |
237 | std::string getObjMsg(uint64_t offset) const; |
238 | |
239 | // Each section knows how to relocate itself. These functions apply |
240 | // relocations, assuming that Buf points to this section's copy in |
241 | // the mmap'ed output buffer. |
242 | template <class ELFT> void relocate(uint8_t *buf, uint8_t *bufEnd); |
243 | static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, |
244 | int64_t A, uint64_t P, const Symbol &Sym, |
245 | RelExpr Expr); |
246 | |
247 | // The native ELF reloc data type is not very convenient to handle. |
248 | // So we convert ELF reloc records to our own records in Relocations.cpp. |
249 | // This vector contains such "cooked" relocations. |
250 | SmallVector<Relocation, 0> relocations; |
251 | |
252 | void addReloc(const Relocation &r) { relocations.push_back(Elt: r); } |
253 | MutableArrayRef<Relocation> relocs() { return relocations; } |
254 | ArrayRef<Relocation> relocs() const { return relocations; } |
255 | |
256 | union { |
257 | // These are modifiers to jump instructions that are necessary when basic |
258 | // block sections are enabled. Basic block sections creates opportunities |
259 | // to relax jump instructions at basic block boundaries after reordering the |
260 | // basic blocks. |
261 | JumpInstrMod *jumpInstrMod = nullptr; |
262 | |
263 | // Auxiliary information for RISC-V and LoongArch linker relaxation. |
264 | // They do not use jumpInstrMod. |
265 | RelaxAux *relaxAux; |
266 | |
267 | // The compressed content size when `compressed` is true. |
268 | size_t compressedSize; |
269 | }; |
270 | |
271 | // A function compiled with -fsplit-stack calling a function |
272 | // compiled without -fsplit-stack needs its prologue adjusted. Find |
273 | // such functions and adjust their prologues. This is very similar |
274 | // to relocation. See https://gcc.gnu.org/wiki/SplitStacks for more |
275 | // information. |
276 | template <typename ELFT> |
277 | void adjustSplitStackFunctionPrologues(uint8_t *buf, uint8_t *end); |
278 | |
279 | |
280 | template <typename T> llvm::ArrayRef<T> getDataAs() const { |
281 | size_t s = content().size(); |
282 | assert(s % sizeof(T) == 0); |
283 | return llvm::ArrayRef<T>((const T *)content().data(), s / sizeof(T)); |
284 | } |
285 | |
286 | protected: |
287 | template <typename ELFT> |
288 | void (); |
289 | void decompress() const; |
290 | }; |
291 | |
292 | // SectionPiece represents a piece of splittable section contents. |
293 | // We allocate a lot of these and binary search on them. This means that they |
294 | // have to be as compact as possible, which is why we don't store the size (can |
295 | // be found by looking at the next one). |
296 | struct SectionPiece { |
297 | SectionPiece() = default; |
298 | SectionPiece(size_t off, uint32_t hash, bool live) |
299 | : inputOff(off), live(live), hash(hash >> 1) {} |
300 | |
301 | uint32_t inputOff; |
302 | LLVM_PREFERRED_TYPE(bool) |
303 | uint32_t live : 1; |
304 | uint32_t hash : 31; |
305 | uint64_t outputOff = 0; |
306 | }; |
307 | |
308 | static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big" ); |
309 | |
310 | // This corresponds to a SHF_MERGE section of an input file. |
311 | class MergeInputSection : public InputSectionBase { |
312 | public: |
313 | template <class ELFT> |
314 | MergeInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &, |
315 | StringRef name); |
316 | MergeInputSection(uint64_t flags, uint32_t type, uint64_t entsize, |
317 | ArrayRef<uint8_t> data, StringRef name); |
318 | |
319 | static bool classof(const SectionBase *s) { return s->kind() == Merge; } |
320 | void splitIntoPieces(); |
321 | |
322 | // Translate an offset in the input section to an offset in the parent |
323 | // MergeSyntheticSection. |
324 | uint64_t getParentOffset(uint64_t offset) const; |
325 | |
326 | // Splittable sections are handled as a sequence of data |
327 | // rather than a single large blob of data. |
328 | SmallVector<SectionPiece, 0> pieces; |
329 | |
330 | // Returns I'th piece's data. This function is very hot when |
331 | // string merging is enabled, so we want to inline. |
332 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
333 | llvm::CachedHashStringRef getData(size_t i) const { |
334 | size_t begin = pieces[i].inputOff; |
335 | size_t end = |
336 | (pieces.size() - 1 == i) ? content().size() : pieces[i + 1].inputOff; |
337 | return {toStringRef(Input: content().slice(N: begin, M: end - begin)), pieces[i].hash}; |
338 | } |
339 | |
340 | // Returns the SectionPiece at a given input section offset. |
341 | SectionPiece &getSectionPiece(uint64_t offset); |
342 | const SectionPiece &getSectionPiece(uint64_t offset) const { |
343 | return const_cast<MergeInputSection *>(this)->getSectionPiece(offset); |
344 | } |
345 | |
346 | SyntheticSection *getParent() const { |
347 | return cast_or_null<SyntheticSection>(Val: parent); |
348 | } |
349 | |
350 | private: |
351 | void splitStrings(StringRef s, size_t size); |
352 | void splitNonStrings(ArrayRef<uint8_t> a, size_t size); |
353 | }; |
354 | |
355 | struct EhSectionPiece { |
356 | EhSectionPiece(size_t off, InputSectionBase *sec, uint32_t size, |
357 | unsigned firstRelocation) |
358 | : inputOff(off), sec(sec), size(size), firstRelocation(firstRelocation) {} |
359 | |
360 | ArrayRef<uint8_t> data() const { |
361 | return {sec->content().data() + this->inputOff, size}; |
362 | } |
363 | |
364 | size_t inputOff; |
365 | ssize_t outputOff = -1; |
366 | InputSectionBase *sec; |
367 | uint32_t size; |
368 | unsigned firstRelocation; |
369 | }; |
370 | |
371 | // This corresponds to a .eh_frame section of an input file. |
372 | class EhInputSection : public InputSectionBase { |
373 | public: |
374 | template <class ELFT> |
375 | EhInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &, |
376 | StringRef name); |
377 | static bool classof(const SectionBase *s) { return s->kind() == EHFrame; } |
378 | template <class ELFT> void split(); |
379 | template <class ELFT, class RelTy> void split(ArrayRef<RelTy> rels); |
380 | |
381 | // Splittable sections are handled as a sequence of data |
382 | // rather than a single large blob of data. |
383 | SmallVector<EhSectionPiece, 0> cies, fdes; |
384 | |
385 | SyntheticSection *getParent() const; |
386 | uint64_t getParentOffset(uint64_t offset) const; |
387 | }; |
388 | |
389 | // This is a section that is added directly to an output section |
390 | // instead of needing special combination via a synthetic section. This |
391 | // includes all input sections with the exceptions of SHF_MERGE and |
392 | // .eh_frame. It also includes the synthetic sections themselves. |
393 | class InputSection : public InputSectionBase { |
394 | public: |
395 | InputSection(InputFile *f, uint64_t flags, uint32_t type, uint32_t addralign, |
396 | ArrayRef<uint8_t> data, StringRef name, Kind k = Regular); |
397 | template <class ELFT> |
398 | InputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &, |
399 | StringRef name); |
400 | |
401 | static bool classof(const SectionBase *s) { |
402 | return s->kind() == SectionBase::Regular || |
403 | s->kind() == SectionBase::Synthetic || |
404 | s->kind() == SectionBase::Spill; |
405 | } |
406 | |
407 | // Write this section to a mmap'ed file, assuming Buf is pointing to |
408 | // beginning of the output section. |
409 | template <class ELFT> void writeTo(uint8_t *buf); |
410 | |
411 | OutputSection *getParent() const { |
412 | return reinterpret_cast<OutputSection *>(parent); |
413 | } |
414 | |
415 | // This variable has two usages. Initially, it represents an index in the |
416 | // OutputSection's InputSection list, and is used when ordering SHF_LINK_ORDER |
417 | // sections. After assignAddresses is called, it represents the offset from |
418 | // the beginning of the output section this section was assigned to. |
419 | uint64_t outSecOff = 0; |
420 | |
421 | InputSectionBase *getRelocatedSection() const; |
422 | |
423 | template <class ELFT, class RelTy> |
424 | void relocateNonAlloc(uint8_t *buf, Relocs<RelTy> rels); |
425 | |
426 | // Points to the canonical section. If ICF folds two sections, repl pointer of |
427 | // one section points to the other. |
428 | InputSection *repl = this; |
429 | |
430 | // Used by ICF. |
431 | uint32_t eqClass[2] = {0, 0}; |
432 | |
433 | // Called by ICF to merge two input sections. |
434 | void replace(InputSection *other); |
435 | |
436 | static InputSection discarded; |
437 | |
438 | private: |
439 | template <class ELFT, class RelTy> void copyRelocations(uint8_t *buf); |
440 | |
441 | template <class ELFT, class RelTy, class RelIt> |
442 | void copyRelocations(uint8_t *buf, llvm::iterator_range<RelIt> rels); |
443 | |
444 | template <class ELFT> void copyShtGroup(uint8_t *buf); |
445 | }; |
446 | |
447 | // A marker for a potential spill location for another input section. This |
448 | // broadly acts as if it were the original section until address assignment. |
449 | // Then it is either replaced with the real input section or removed. |
450 | class PotentialSpillSection : public InputSection { |
451 | public: |
452 | // The containing input section description; used to quickly replace this stub |
453 | // with the actual section. |
454 | InputSectionDescription *isd; |
455 | |
456 | // Next potential spill location for the same source input section. |
457 | PotentialSpillSection *next = nullptr; |
458 | |
459 | PotentialSpillSection(const InputSectionBase &source, |
460 | InputSectionDescription &isd); |
461 | |
462 | static bool classof(const SectionBase *sec) { |
463 | return sec->kind() == InputSectionBase::Spill; |
464 | } |
465 | }; |
466 | |
467 | static_assert(sizeof(InputSection) <= 160, "InputSection is too big" ); |
468 | |
469 | class SyntheticSection : public InputSection { |
470 | public: |
471 | SyntheticSection(uint64_t flags, uint32_t type, uint32_t addralign, |
472 | StringRef name) |
473 | : InputSection(ctx.internalFile, flags, type, addralign, {}, name, |
474 | InputSectionBase::Synthetic) {} |
475 | |
476 | virtual ~SyntheticSection() = default; |
477 | virtual size_t getSize() const = 0; |
478 | virtual bool updateAllocSize() { return false; } |
479 | // If the section has the SHF_ALLOC flag and the size may be changed if |
480 | // thunks are added, update the section size. |
481 | virtual bool isNeeded() const { return true; } |
482 | virtual void finalizeContents() {} |
483 | virtual void writeTo(uint8_t *buf) = 0; |
484 | |
485 | static bool classof(const SectionBase *sec) { |
486 | return sec->kind() == InputSectionBase::Synthetic; |
487 | } |
488 | }; |
489 | |
490 | inline bool isStaticRelSecType(uint32_t type) { |
491 | return type == llvm::ELF::SHT_RELA || type == llvm::ELF::SHT_CREL || |
492 | type == llvm::ELF::SHT_REL; |
493 | } |
494 | |
495 | inline bool isDebugSection(const InputSectionBase &sec) { |
496 | return (sec.flags & llvm::ELF::SHF_ALLOC) == 0 && |
497 | sec.name.starts_with(Prefix: ".debug" ); |
498 | } |
499 | |
500 | // The set of TOC entries (.toc + addend) for which we should not apply |
501 | // toc-indirect to toc-relative relaxation. const Symbol * refers to the |
502 | // STT_SECTION symbol associated to the .toc input section. |
503 | extern llvm::DenseSet<std::pair<const Symbol *, uint64_t>> ppc64noTocRelax; |
504 | |
505 | } // namespace elf |
506 | |
507 | std::string toString(const elf::InputSectionBase *); |
508 | } // namespace lld |
509 | |
510 | #endif |
511 | |