1 | //===- InputSection.h -------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLD_ELF_INPUT_SECTION_H |
10 | #define LLD_ELF_INPUT_SECTION_H |
11 | |
12 | #include "Config.h" |
13 | #include "Relocations.h" |
14 | #include "lld/Common/CommonLinkerContext.h" |
15 | #include "lld/Common/LLVM.h" |
16 | #include "lld/Common/Memory.h" |
17 | #include "llvm/ADT/CachedHashString.h" |
18 | #include "llvm/ADT/DenseSet.h" |
19 | #include "llvm/ADT/StringExtras.h" |
20 | #include "llvm/ADT/TinyPtrVector.h" |
21 | #include "llvm/Object/ELF.h" |
22 | #include "llvm/Support/Compiler.h" |
23 | |
24 | namespace lld { |
25 | namespace elf { |
26 | |
27 | class InputFile; |
28 | class Symbol; |
29 | |
30 | class Defined; |
31 | struct Partition; |
32 | class SyntheticSection; |
33 | template <class ELFT> class ObjFile; |
34 | class OutputSection; |
35 | |
36 | // Returned by InputSectionBase::relsOrRelas. At least two members are empty. |
37 | template <class ELFT> struct RelsOrRelas { |
38 | Relocs<typename ELFT::Rel> rels; |
39 | Relocs<typename ELFT::Rela> relas; |
40 | Relocs<typename ELFT::Crel> crels; |
41 | bool areRelocsRel() const { return rels.size(); } |
42 | bool areRelocsCrel() const { return crels.size(); } |
43 | }; |
44 | |
45 | #define invokeOnRelocs(sec, f, ...) \ |
46 | { \ |
47 | const RelsOrRelas<ELFT> rs = (sec).template relsOrRelas<ELFT>(); \ |
48 | if (rs.areRelocsCrel()) \ |
49 | f(__VA_ARGS__, rs.crels); \ |
50 | else if (rs.areRelocsRel()) \ |
51 | f(__VA_ARGS__, rs.rels); \ |
52 | else \ |
53 | f(__VA_ARGS__, rs.relas); \ |
54 | } |
55 | |
56 | // This is the base class of all sections that lld handles. Some are sections in |
57 | // input files, some are sections in the produced output file and some exist |
58 | // just as a convenience for implementing special ways of combining some |
59 | // sections. |
60 | class SectionBase { |
61 | public: |
62 | enum Kind : uint8_t { |
63 | Regular, |
64 | Synthetic, |
65 | Spill, |
66 | EHFrame, |
67 | Merge, |
68 | Output, |
69 | Class, |
70 | }; |
71 | |
72 | Kind kind() const { return sectionKind; } |
73 | |
74 | // The file which contains this section. For InputSectionBase, its dynamic |
75 | // type is usually ObjFile<ELFT>, but may be an InputFile of InternalKind |
76 | // (for a synthetic section). |
77 | InputFile *file; |
78 | |
79 | StringRef name; |
80 | |
81 | // The 1-indexed partition that this section is assigned to by the garbage |
82 | // collector, or 0 if this section is dead. Normally there is only one |
83 | // partition, so this will either be 0 or 1. |
84 | elf::Partition &getPartition(Ctx &) const; |
85 | |
86 | // These corresponds to the fields in Elf_Shdr. |
87 | uint64_t flags; |
88 | uint32_t type; |
89 | uint32_t link; |
90 | uint32_t info; |
91 | uint32_t addralign; |
92 | uint32_t entsize; |
93 | |
94 | Kind sectionKind; |
95 | uint8_t partition = 1; |
96 | |
97 | // The next two bit fields are only used by InputSectionBase, but we |
98 | // put them here so the struct packs better. |
99 | |
100 | Ctx &getCtx() const; |
101 | OutputSection *getOutputSection(); |
102 | const OutputSection *getOutputSection() const { |
103 | return const_cast<SectionBase *>(this)->getOutputSection(); |
104 | } |
105 | |
106 | // Translate an offset in the input section to an offset in the output |
107 | // section. |
108 | uint64_t getOffset(uint64_t offset) const; |
109 | |
110 | uint64_t getVA(uint64_t offset = 0) const; |
111 | |
112 | bool isLive() const { return partition != 0; } |
113 | void markLive() { partition = 1; } |
114 | void markDead() { partition = 0; } |
115 | |
116 | protected: |
117 | constexpr SectionBase(Kind sectionKind, InputFile *file, StringRef name, |
118 | uint32_t type, uint64_t flags, uint32_t link, |
119 | uint32_t info, uint32_t addralign, uint32_t entsize) |
120 | : file(file), name(name), flags(flags), type(type), link(link), |
121 | info(info), addralign(addralign), entsize(entsize), |
122 | sectionKind(sectionKind) {} |
123 | }; |
124 | |
125 | struct SymbolAnchor { |
126 | uint64_t offset; |
127 | Defined *d; |
128 | bool end; // true for the anchor of st_value+st_size |
129 | }; |
130 | |
131 | struct RelaxAux { |
132 | // This records symbol start and end offsets which will be adjusted according |
133 | // to the nearest relocDeltas element. |
134 | SmallVector<SymbolAnchor, 0> anchors; |
135 | // For relocations[i], the actual offset is |
136 | // r_offset - (i ? relocDeltas[i-1] : 0). |
137 | std::unique_ptr<uint32_t[]> relocDeltas; |
138 | // For relocations[i], the actual type is relocTypes[i]. |
139 | std::unique_ptr<RelType[]> relocTypes; |
140 | SmallVector<uint32_t, 0> writes; |
141 | }; |
142 | |
143 | // This corresponds to a section of an input file. |
144 | class InputSectionBase : public SectionBase { |
145 | public: |
146 | struct ObjMsg { |
147 | const InputSectionBase *sec; |
148 | uint64_t offset; |
149 | }; |
150 | struct SrcMsg { |
151 | const InputSectionBase &sec; |
152 | const Symbol &sym; |
153 | uint64_t offset; |
154 | }; |
155 | |
156 | template <class ELFT> |
157 | InputSectionBase(ObjFile<ELFT> &file, const typename ELFT::Shdr &, |
158 | StringRef name, Kind sectionKind); |
159 | |
160 | InputSectionBase(InputFile *file, StringRef name, uint32_t type, |
161 | uint64_t flags, uint32_t link, uint32_t info, |
162 | uint32_t addralign, uint32_t entsize, ArrayRef<uint8_t> data, |
163 | Kind sectionKind); |
164 | |
165 | static bool classof(const SectionBase *s) { |
166 | return s->kind() != Output && s->kind() != Class; |
167 | } |
168 | |
169 | LLVM_PREFERRED_TYPE(bool) |
170 | uint8_t bss : 1; |
171 | |
172 | // Whether this section is SHT_CREL and has been decoded to RELA by |
173 | // relsOrRelas. |
174 | LLVM_PREFERRED_TYPE(bool) |
175 | uint8_t decodedCrel : 1; |
176 | |
177 | // Set for sections that should not be folded by ICF. |
178 | LLVM_PREFERRED_TYPE(bool) |
179 | uint8_t keepUnique : 1; |
180 | |
181 | // Whether the section needs to be padded with a NOP filler due to |
182 | // deleteFallThruJmpInsn. |
183 | LLVM_PREFERRED_TYPE(bool) |
184 | uint8_t nopFiller : 1; |
185 | |
186 | mutable bool compressed = false; |
187 | |
188 | // Input sections are part of an output section. Special sections |
189 | // like .eh_frame and merge sections are first combined into a |
190 | // synthetic section that is then added to an output section. In all |
191 | // cases this points one level up. |
192 | SectionBase *parent = nullptr; |
193 | |
194 | // Section index of the relocation section if exists. |
195 | uint32_t relSecIdx = 0; |
196 | |
197 | // Getter when the dynamic type is ObjFile<ELFT>. |
198 | template <class ELFT> ObjFile<ELFT> *getFile() const { |
199 | return cast<ObjFile<ELFT>>(file); |
200 | } |
201 | |
202 | // Used by --optimize-bb-jumps and RISC-V linker relaxation temporarily to |
203 | // indicate the number of bytes which is not counted in the size. This should |
204 | // be reset to zero after uses. |
205 | uint32_t bytesDropped = 0; |
206 | |
207 | void drop_back(unsigned num) { |
208 | assert(bytesDropped + num < 256); |
209 | bytesDropped += num; |
210 | } |
211 | |
212 | void push_back(uint64_t num) { |
213 | assert(bytesDropped >= num); |
214 | bytesDropped -= num; |
215 | } |
216 | |
217 | mutable const uint8_t *content_; |
218 | uint64_t size; |
219 | |
220 | void trim() { |
221 | if (bytesDropped) { |
222 | size -= bytesDropped; |
223 | bytesDropped = 0; |
224 | } |
225 | } |
226 | |
227 | ArrayRef<uint8_t> content() const { |
228 | return ArrayRef<uint8_t>(content_, size); |
229 | } |
230 | ArrayRef<uint8_t> contentMaybeDecompress() const { |
231 | if (compressed) |
232 | decompress(); |
233 | return content(); |
234 | } |
235 | |
236 | // The next member in the section group if this section is in a group. This is |
237 | // used by --gc-sections. |
238 | InputSectionBase *nextInSectionGroup = nullptr; |
239 | |
240 | template <class ELFT> |
241 | RelsOrRelas<ELFT> relsOrRelas(bool supportsCrel = true) const; |
242 | |
243 | // InputSections that are dependent on us (reverse dependency for GC) |
244 | llvm::TinyPtrVector<InputSection *> dependentSections; |
245 | |
246 | // Returns the size of this section (even if this is a common or BSS.) |
247 | size_t getSize() const; |
248 | |
249 | InputSection *getLinkOrderDep() const; |
250 | |
251 | // Get a symbol that encloses this offset from within the section. If type is |
252 | // not zero, return a symbol with the specified type. |
253 | Defined *getEnclosingSymbol(uint64_t offset, uint8_t type = 0) const; |
254 | Defined *getEnclosingFunction(uint64_t offset) const { |
255 | return getEnclosingSymbol(offset, type: llvm::ELF::STT_FUNC); |
256 | } |
257 | |
258 | // Returns a source location string. Used to construct an error message. |
259 | std::string getLocation(uint64_t offset) const; |
260 | ObjMsg getObjMsg(uint64_t offset) const { return {.sec: this, .offset: offset}; } |
261 | SrcMsg getSrcMsg(const Symbol &sym, uint64_t offset) const { |
262 | return {.sec: *this, .sym: sym, .offset: offset}; |
263 | } |
264 | |
265 | // Each section knows how to relocate itself. These functions apply |
266 | // relocations, assuming that Buf points to this section's copy in |
267 | // the mmap'ed output buffer. |
268 | template <class ELFT> void relocate(Ctx &, uint8_t *buf, uint8_t *bufEnd); |
269 | uint64_t getRelocTargetVA(Ctx &, const Relocation &r, uint64_t p) const; |
270 | |
271 | // The native ELF reloc data type is not very convenient to handle. |
272 | // So we convert ELF reloc records to our own records in Relocations.cpp. |
273 | // This vector contains such "cooked" relocations. |
274 | SmallVector<Relocation, 0> relocations; |
275 | |
276 | void addReloc(const Relocation &r) { relocations.push_back(Elt: r); } |
277 | MutableArrayRef<Relocation> relocs() { return relocations; } |
278 | ArrayRef<Relocation> relocs() const { return relocations; } |
279 | |
280 | union { |
281 | // These are modifiers to jump instructions that are necessary when basic |
282 | // block sections are enabled. Basic block sections creates opportunities |
283 | // to relax jump instructions at basic block boundaries after reordering the |
284 | // basic blocks. |
285 | JumpInstrMod *jumpInstrMod = nullptr; |
286 | |
287 | // Auxiliary information for RISC-V and LoongArch linker relaxation. |
288 | // They do not use jumpInstrMod. |
289 | RelaxAux *relaxAux; |
290 | |
291 | // The compressed content size when `compressed` is true. |
292 | size_t compressedSize; |
293 | }; |
294 | |
295 | // A function compiled with -fsplit-stack calling a function |
296 | // compiled without -fsplit-stack needs its prologue adjusted. Find |
297 | // such functions and adjust their prologues. This is very similar |
298 | // to relocation. See https://gcc.gnu.org/wiki/SplitStacks for more |
299 | // information. |
300 | template <typename ELFT> |
301 | void adjustSplitStackFunctionPrologues(Ctx &, uint8_t *buf, uint8_t *end); |
302 | |
303 | template <typename T> llvm::ArrayRef<T> getDataAs() const { |
304 | size_t s = content().size(); |
305 | assert(s % sizeof(T) == 0); |
306 | return llvm::ArrayRef<T>((const T *)content().data(), s / sizeof(T)); |
307 | } |
308 | |
309 | protected: |
310 | template <typename ELFT> void (Ctx &); |
311 | void decompress() const; |
312 | }; |
313 | |
314 | // SectionPiece represents a piece of splittable section contents. |
315 | // We allocate a lot of these and binary search on them. This means that they |
316 | // have to be as compact as possible, which is why we don't store the size (can |
317 | // be found by looking at the next one). |
318 | struct SectionPiece { |
319 | SectionPiece() = default; |
320 | SectionPiece(size_t off, uint32_t hash, bool live) |
321 | : inputOff(off), live(live), hash(hash >> 1) {} |
322 | |
323 | uint32_t inputOff; |
324 | LLVM_PREFERRED_TYPE(bool) |
325 | uint32_t live : 1; |
326 | uint32_t hash : 31; |
327 | uint64_t outputOff = 0; |
328 | }; |
329 | |
330 | static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big" ); |
331 | |
332 | // This corresponds to a SHF_MERGE section of an input file. |
333 | class MergeInputSection : public InputSectionBase { |
334 | public: |
335 | template <class ELFT> |
336 | MergeInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &, |
337 | StringRef name); |
338 | MergeInputSection(Ctx &, StringRef name, uint32_t type, uint64_t flags, |
339 | uint64_t entsize, ArrayRef<uint8_t> data); |
340 | |
341 | static bool classof(const SectionBase *s) { return s->kind() == Merge; } |
342 | void splitIntoPieces(); |
343 | |
344 | // Translate an offset in the input section to an offset in the parent |
345 | // MergeSyntheticSection. |
346 | uint64_t getParentOffset(uint64_t offset) const; |
347 | |
348 | // Splittable sections are handled as a sequence of data |
349 | // rather than a single large blob of data. |
350 | SmallVector<SectionPiece, 0> pieces; |
351 | |
352 | // Returns I'th piece's data. This function is very hot when |
353 | // string merging is enabled, so we want to inline. |
354 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
355 | llvm::CachedHashStringRef getData(size_t i) const { |
356 | size_t begin = pieces[i].inputOff; |
357 | size_t end = |
358 | (pieces.size() - 1 == i) ? content().size() : pieces[i + 1].inputOff; |
359 | return {toStringRef(Input: content().slice(N: begin, M: end - begin)), pieces[i].hash}; |
360 | } |
361 | |
362 | // Returns the SectionPiece at a given input section offset. |
363 | SectionPiece &getSectionPiece(uint64_t offset); |
364 | const SectionPiece &getSectionPiece(uint64_t offset) const { |
365 | return const_cast<MergeInputSection *>(this)->getSectionPiece(offset); |
366 | } |
367 | |
368 | SyntheticSection *getParent() const { |
369 | return cast_or_null<SyntheticSection>(Val: parent); |
370 | } |
371 | |
372 | private: |
373 | void splitStrings(StringRef s, size_t size); |
374 | void splitNonStrings(ArrayRef<uint8_t> a, size_t size); |
375 | }; |
376 | |
377 | struct EhSectionPiece { |
378 | EhSectionPiece(size_t off, InputSectionBase *sec, uint32_t size, |
379 | unsigned firstRelocation) |
380 | : inputOff(off), sec(sec), size(size), firstRelocation(firstRelocation) {} |
381 | |
382 | ArrayRef<uint8_t> data() const { |
383 | return {sec->content().data() + this->inputOff, size}; |
384 | } |
385 | |
386 | size_t inputOff; |
387 | ssize_t outputOff = -1; |
388 | InputSectionBase *sec; |
389 | uint32_t size; |
390 | unsigned firstRelocation; |
391 | }; |
392 | |
393 | // This corresponds to a .eh_frame section of an input file. |
394 | class EhInputSection : public InputSectionBase { |
395 | public: |
396 | template <class ELFT> |
397 | EhInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &, |
398 | StringRef name); |
399 | static bool classof(const SectionBase *s) { return s->kind() == EHFrame; } |
400 | template <class ELFT> void split(); |
401 | template <class ELFT, class RelTy> void split(ArrayRef<RelTy> rels); |
402 | |
403 | // Splittable sections are handled as a sequence of data |
404 | // rather than a single large blob of data. |
405 | SmallVector<EhSectionPiece, 0> cies, fdes; |
406 | |
407 | SyntheticSection *getParent() const; |
408 | uint64_t getParentOffset(uint64_t offset) const; |
409 | }; |
410 | |
411 | // This is a section that is added directly to an output section |
412 | // instead of needing special combination via a synthetic section. This |
413 | // includes all input sections with the exceptions of SHF_MERGE and |
414 | // .eh_frame. It also includes the synthetic sections themselves. |
415 | class InputSection : public InputSectionBase { |
416 | public: |
417 | InputSection(InputFile *f, StringRef name, uint32_t type, uint64_t flags, |
418 | uint32_t addralign, uint32_t entsize, ArrayRef<uint8_t> data, |
419 | Kind k = Regular); |
420 | template <class ELFT> |
421 | InputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &, |
422 | StringRef name); |
423 | |
424 | static bool classof(const SectionBase *s) { |
425 | return s->kind() == SectionBase::Regular || |
426 | s->kind() == SectionBase::Synthetic || |
427 | s->kind() == SectionBase::Spill; |
428 | } |
429 | |
430 | // Write this section to a mmap'ed file, assuming Buf is pointing to |
431 | // beginning of the output section. |
432 | template <class ELFT> void writeTo(Ctx &, uint8_t *buf); |
433 | |
434 | OutputSection *getParent() const { |
435 | return reinterpret_cast<OutputSection *>(parent); |
436 | } |
437 | |
438 | // This variable has two usages. Initially, it represents an index in the |
439 | // OutputSection's InputSection list, and is used when ordering SHF_LINK_ORDER |
440 | // sections. After assignAddresses is called, it represents the offset from |
441 | // the beginning of the output section this section was assigned to. |
442 | uint64_t outSecOff = 0; |
443 | |
444 | InputSectionBase *getRelocatedSection() const; |
445 | |
446 | template <class ELFT, class RelTy> |
447 | void relocateNonAlloc(Ctx &, uint8_t *buf, Relocs<RelTy> rels); |
448 | |
449 | // Points to the canonical section. If ICF folds two sections, repl pointer of |
450 | // one section points to the other. |
451 | InputSection *repl = this; |
452 | |
453 | // Used by ICF. |
454 | uint32_t eqClass[2] = {0, 0}; |
455 | |
456 | // Called by ICF to merge two input sections. |
457 | void replace(InputSection *other); |
458 | |
459 | static InputSection discarded; |
460 | |
461 | private: |
462 | template <class ELFT, class RelTy> void copyRelocations(Ctx &, uint8_t *buf); |
463 | |
464 | template <class ELFT, class RelTy, class RelIt> |
465 | void copyRelocations(Ctx &, uint8_t *buf, llvm::iterator_range<RelIt> rels); |
466 | |
467 | template <class ELFT> void copyShtGroup(uint8_t *buf); |
468 | }; |
469 | |
470 | // A marker for a potential spill location for another input section. This |
471 | // broadly acts as if it were the original section until address assignment. |
472 | // Then it is either replaced with the real input section or removed. |
473 | class PotentialSpillSection : public InputSection { |
474 | public: |
475 | // The containing input section description; used to quickly replace this stub |
476 | // with the actual section. |
477 | InputSectionDescription *isd; |
478 | |
479 | // Next potential spill location for the same source input section. |
480 | PotentialSpillSection *next = nullptr; |
481 | |
482 | PotentialSpillSection(const InputSectionBase &source, |
483 | InputSectionDescription &isd); |
484 | |
485 | static bool classof(const SectionBase *sec) { |
486 | return sec->kind() == InputSectionBase::Spill; |
487 | } |
488 | }; |
489 | |
490 | #ifndef _WIN32 |
491 | static_assert(sizeof(InputSection) <= 152, "InputSection is too big" ); |
492 | #endif |
493 | |
494 | class SyntheticSection : public InputSection { |
495 | public: |
496 | Ctx &ctx; |
497 | SyntheticSection(Ctx &ctx, StringRef name, uint32_t type, uint64_t flags, |
498 | uint32_t addralign) |
499 | : InputSection(ctx.internalFile, name, type, flags, addralign, |
500 | /*entsize=*/0, {}, InputSectionBase::Synthetic), |
501 | ctx(ctx) {} |
502 | |
503 | virtual ~SyntheticSection() = default; |
504 | virtual size_t getSize() const = 0; |
505 | virtual bool updateAllocSize(Ctx &) { return false; } |
506 | // If the section has the SHF_ALLOC flag and the size may be changed if |
507 | // thunks are added, update the section size. |
508 | virtual bool isNeeded() const { return true; } |
509 | virtual void finalizeContents() {} |
510 | virtual void writeTo(uint8_t *buf) = 0; |
511 | |
512 | static bool classof(const SectionBase *sec) { |
513 | return sec->kind() == InputSectionBase::Synthetic; |
514 | } |
515 | }; |
516 | |
517 | inline bool isStaticRelSecType(uint32_t type) { |
518 | return type == llvm::ELF::SHT_RELA || type == llvm::ELF::SHT_CREL || |
519 | type == llvm::ELF::SHT_REL; |
520 | } |
521 | |
522 | inline bool isDebugSection(const InputSectionBase &sec) { |
523 | return (sec.flags & llvm::ELF::SHF_ALLOC) == 0 && |
524 | sec.name.starts_with(Prefix: ".debug" ); |
525 | } |
526 | |
527 | std::string toStr(elf::Ctx &, const elf::InputSectionBase *); |
528 | const ELFSyncStream &operator<<(const ELFSyncStream &, |
529 | const InputSectionBase *); |
530 | const ELFSyncStream &operator<<(const ELFSyncStream &, |
531 | InputSectionBase::ObjMsg &&); |
532 | const ELFSyncStream &operator<<(const ELFSyncStream &, |
533 | InputSectionBase::SrcMsg &&); |
534 | } // namespace elf |
535 | } // namespace lld |
536 | |
537 | #endif |
538 | |