1 | //===- MarkLive.cpp -------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements --gc-sections, which is a feature to remove unused |
10 | // sections from output. Unused sections are sections that are not reachable |
11 | // from known GC-root symbols or sections. Naturally the feature is |
12 | // implemented as a mark-sweep garbage collector. |
13 | // |
14 | // Here's how it works. Each InputSectionBase has a "Live" bit. The bit is off |
15 | // by default. Starting with GC-root symbols or sections, markLive function |
16 | // defined in this file visits all reachable sections to set their Live |
17 | // bits. Writer will then ignore sections whose Live bits are off, so that |
18 | // such sections are not included into output. |
19 | // |
20 | //===----------------------------------------------------------------------===// |
21 | |
22 | #include "MarkLive.h" |
23 | #include "InputFiles.h" |
24 | #include "InputSection.h" |
25 | #include "LinkerScript.h" |
26 | #include "SymbolTable.h" |
27 | #include "Symbols.h" |
28 | #include "SyntheticSections.h" |
29 | #include "Target.h" |
30 | #include "lld/Common/CommonLinkerContext.h" |
31 | #include "lld/Common/Strings.h" |
32 | #include "llvm/ADT/STLExtras.h" |
33 | #include "llvm/Object/ELF.h" |
34 | #include "llvm/Support/TimeProfiler.h" |
35 | #include <vector> |
36 | |
37 | using namespace llvm; |
38 | using namespace llvm::ELF; |
39 | using namespace llvm::object; |
40 | using namespace llvm::support::endian; |
41 | using namespace lld; |
42 | using namespace lld::elf; |
43 | |
44 | namespace { |
45 | template <class ELFT> class MarkLive { |
46 | public: |
47 | MarkLive(unsigned partition) : partition(partition) {} |
48 | |
49 | void run(); |
50 | void moveToMain(); |
51 | |
52 | private: |
53 | void enqueue(InputSectionBase *sec, uint64_t offset); |
54 | void markSymbol(Symbol *sym); |
55 | void mark(); |
56 | |
57 | template <class RelTy> |
58 | void resolveReloc(InputSectionBase &sec, RelTy &rel, bool fromFDE); |
59 | |
60 | template <class RelTy> |
61 | void scanEhFrameSection(EhInputSection &eh, ArrayRef<RelTy> rels); |
62 | |
63 | // The index of the partition that we are currently processing. |
64 | unsigned partition; |
65 | |
66 | // A list of sections to visit. |
67 | SmallVector<InputSection *, 0> queue; |
68 | |
69 | // There are normally few input sections whose names are valid C |
70 | // identifiers, so we just store a SmallVector instead of a multimap. |
71 | DenseMap<StringRef, SmallVector<InputSectionBase *, 0>> cNamedSections; |
72 | }; |
73 | } // namespace |
74 | |
75 | template <class ELFT> |
76 | static uint64_t getAddend(InputSectionBase &sec, |
77 | const typename ELFT::Rel &rel) { |
78 | return target->getImplicitAddend(buf: sec.content().begin() + rel.r_offset, |
79 | type: rel.getType(config->isMips64EL)); |
80 | } |
81 | |
82 | template <class ELFT> |
83 | static uint64_t getAddend(InputSectionBase &sec, |
84 | const typename ELFT::Rela &rel) { |
85 | return rel.r_addend; |
86 | } |
87 | |
88 | // Currently, we assume all input CREL relocations have an explicit addend. |
89 | template <class ELFT> |
90 | static uint64_t getAddend(InputSectionBase &sec, |
91 | const typename ELFT::Crel &rel) { |
92 | return rel.r_addend; |
93 | } |
94 | |
95 | template <class ELFT> |
96 | template <class RelTy> |
97 | void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel, |
98 | bool fromFDE) { |
99 | // If a symbol is referenced in a live section, it is used. |
100 | Symbol &sym = sec.file->getRelocTargetSym(rel); |
101 | sym.used = true; |
102 | |
103 | if (auto *d = dyn_cast<Defined>(Val: &sym)) { |
104 | auto *relSec = dyn_cast_or_null<InputSectionBase>(Val: d->section); |
105 | if (!relSec) |
106 | return; |
107 | |
108 | uint64_t offset = d->value; |
109 | if (d->isSection()) |
110 | offset += getAddend<ELFT>(sec, rel); |
111 | |
112 | // fromFDE being true means this is referenced by a FDE in a .eh_frame |
113 | // piece. The relocation points to the described function or to a LSDA. We |
114 | // only need to keep the LSDA live, so ignore anything that points to |
115 | // executable sections. If the LSDA is in a section group or has the |
116 | // SHF_LINK_ORDER flag, we ignore the relocation as well because (a) if the |
117 | // associated text section is live, the LSDA will be retained due to section |
118 | // group/SHF_LINK_ORDER rules (b) if the associated text section should be |
119 | // discarded, marking the LSDA will unnecessarily retain the text section. |
120 | if (!(fromFDE && ((relSec->flags & (SHF_EXECINSTR | SHF_LINK_ORDER)) || |
121 | relSec->nextInSectionGroup))) |
122 | enqueue(sec: relSec, offset); |
123 | return; |
124 | } |
125 | |
126 | if (auto *ss = dyn_cast<SharedSymbol>(Val: &sym)) |
127 | if (!ss->isWeak()) |
128 | cast<SharedFile>(Val: ss->file)->isNeeded = true; |
129 | |
130 | for (InputSectionBase *sec : cNamedSections.lookup(Val: sym.getName())) |
131 | enqueue(sec, offset: 0); |
132 | } |
133 | |
134 | // The .eh_frame section is an unfortunate special case. |
135 | // The section is divided in CIEs and FDEs and the relocations it can have are |
136 | // * CIEs can refer to a personality function. |
137 | // * FDEs can refer to a LSDA |
138 | // * FDEs refer to the function they contain information about |
139 | // The last kind of relocation cannot keep the referred section alive, or they |
140 | // would keep everything alive in a common object file. In fact, each FDE is |
141 | // alive if the section it refers to is alive. |
142 | // To keep things simple, in here we just ignore the last relocation kind. The |
143 | // other two keep the referred section alive. |
144 | // |
145 | // A possible improvement would be to fully process .eh_frame in the middle of |
146 | // the gc pass. With that we would be able to also gc some sections holding |
147 | // LSDAs and personality functions if we found that they were unused. |
148 | template <class ELFT> |
149 | template <class RelTy> |
150 | void MarkLive<ELFT>::scanEhFrameSection(EhInputSection &eh, |
151 | ArrayRef<RelTy> rels) { |
152 | for (const EhSectionPiece &cie : eh.cies) |
153 | if (cie.firstRelocation != unsigned(-1)) |
154 | resolveReloc(eh, rels[cie.firstRelocation], false); |
155 | for (const EhSectionPiece &fde : eh.fdes) { |
156 | size_t firstRelI = fde.firstRelocation; |
157 | if (firstRelI == (unsigned)-1) |
158 | continue; |
159 | uint64_t pieceEnd = fde.inputOff + fde.size; |
160 | for (size_t j = firstRelI, end2 = rels.size(); |
161 | j < end2 && rels[j].r_offset < pieceEnd; ++j) |
162 | resolveReloc(eh, rels[j], true); |
163 | } |
164 | } |
165 | |
166 | // Some sections are used directly by the loader, so they should never be |
167 | // garbage-collected. This function returns true if a given section is such |
168 | // section. |
169 | static bool isReserved(InputSectionBase *sec) { |
170 | switch (sec->type) { |
171 | case SHT_FINI_ARRAY: |
172 | case SHT_INIT_ARRAY: |
173 | case SHT_PREINIT_ARRAY: |
174 | return true; |
175 | case SHT_NOTE: |
176 | // SHT_NOTE sections in a group are subject to garbage collection. |
177 | return !sec->nextInSectionGroup; |
178 | default: |
179 | // Support SHT_PROGBITS .init_array (https://golang.org/issue/50295) and |
180 | // .init_array.N (https://github.com/rust-lang/rust/issues/92181) for a |
181 | // while. |
182 | StringRef s = sec->name; |
183 | return s == ".init" || s == ".fini" || s.starts_with(Prefix: ".init_array" ) || |
184 | s == ".jcr" || s.starts_with(Prefix: ".ctors" ) || s.starts_with(Prefix: ".dtors" ); |
185 | } |
186 | } |
187 | |
188 | template <class ELFT> |
189 | void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset) { |
190 | // Usually, a whole section is marked as live or dead, but in mergeable |
191 | // (splittable) sections, each piece of data has independent liveness bit. |
192 | // So we explicitly tell it which offset is in use. |
193 | if (auto *ms = dyn_cast<MergeInputSection>(Val: sec)) |
194 | ms->getSectionPiece(offset).live = true; |
195 | |
196 | // Set Sec->Partition to the meet (i.e. the "minimum") of Partition and |
197 | // Sec->Partition in the following lattice: 1 < other < 0. If Sec->Partition |
198 | // doesn't change, we don't need to do anything. |
199 | if (sec->partition == 1 || sec->partition == partition) |
200 | return; |
201 | sec->partition = sec->partition ? 1 : partition; |
202 | |
203 | // Add input section to the queue. |
204 | if (InputSection *s = dyn_cast<InputSection>(Val: sec)) |
205 | queue.push_back(Elt: s); |
206 | } |
207 | |
208 | template <class ELFT> void MarkLive<ELFT>::markSymbol(Symbol *sym) { |
209 | if (auto *d = dyn_cast_or_null<Defined>(Val: sym)) |
210 | if (auto *isec = dyn_cast_or_null<InputSectionBase>(Val: d->section)) |
211 | enqueue(sec: isec, offset: d->value); |
212 | } |
213 | |
214 | // This is the main function of the garbage collector. |
215 | // Starting from GC-root sections, this function visits all reachable |
216 | // sections to set their "Live" bits. |
217 | template <class ELFT> void MarkLive<ELFT>::run() { |
218 | // Add GC root symbols. |
219 | |
220 | // Preserve externally-visible symbols if the symbols defined by this |
221 | // file can interpose other ELF file's symbols at runtime. |
222 | for (Symbol *sym : symtab.getSymbols()) |
223 | if (sym->includeInDynsym() && sym->partition == partition) |
224 | markSymbol(sym); |
225 | |
226 | // If this isn't the main partition, that's all that we need to preserve. |
227 | if (partition != 1) { |
228 | mark(); |
229 | return; |
230 | } |
231 | |
232 | markSymbol(sym: symtab.find(name: config->entry)); |
233 | markSymbol(sym: symtab.find(name: config->init)); |
234 | markSymbol(sym: symtab.find(name: config->fini)); |
235 | for (StringRef s : config->undefined) |
236 | markSymbol(sym: symtab.find(name: s)); |
237 | for (StringRef s : script->referencedSymbols) |
238 | markSymbol(sym: symtab.find(name: s)); |
239 | for (auto [symName, _] : symtab.cmseSymMap) { |
240 | markSymbol(sym: symtab.cmseSymMap[symName].sym); |
241 | markSymbol(sym: symtab.cmseSymMap[symName].acleSeSym); |
242 | } |
243 | |
244 | // Mark .eh_frame sections as live because there are usually no relocations |
245 | // that point to .eh_frames. Otherwise, the garbage collector would drop |
246 | // all of them. We also want to preserve personality routines and LSDA |
247 | // referenced by .eh_frame sections, so we scan them for that here. |
248 | for (EhInputSection *eh : ctx.ehInputSections) { |
249 | const RelsOrRelas<ELFT> rels = |
250 | eh->template relsOrRelas<ELFT>(/*supportsCrel=*/false); |
251 | if (rels.areRelocsRel()) |
252 | scanEhFrameSection(*eh, rels.rels); |
253 | else if (rels.relas.size()) |
254 | scanEhFrameSection(*eh, rels.relas); |
255 | } |
256 | for (InputSectionBase *sec : ctx.inputSections) { |
257 | if (sec->flags & SHF_GNU_RETAIN) { |
258 | enqueue(sec, offset: 0); |
259 | continue; |
260 | } |
261 | if (sec->flags & SHF_LINK_ORDER) |
262 | continue; |
263 | |
264 | // Usually, non-SHF_ALLOC sections are not removed even if they are |
265 | // unreachable through relocations because reachability is not a good signal |
266 | // whether they are garbage or not (e.g. there is usually no section |
267 | // referring to a .comment section, but we want to keep it.) When a |
268 | // non-SHF_ALLOC section is retained, we also retain sections dependent on |
269 | // it. |
270 | // |
271 | // Note on SHF_LINK_ORDER: Such sections contain metadata and they |
272 | // have a reverse dependency on the InputSection they are linked with. |
273 | // We are able to garbage collect them. |
274 | // |
275 | // Note on SHF_REL{,A}: Such sections reach here only when -r |
276 | // or --emit-reloc were given. And they are subject of garbage |
277 | // collection because, if we remove a text section, we also |
278 | // remove its relocation section. |
279 | // |
280 | // Note on nextInSectionGroup: The ELF spec says that group sections are |
281 | // included or omitted as a unit. We take the interpretation that: |
282 | // |
283 | // - Group members (nextInSectionGroup != nullptr) are subject to garbage |
284 | // collection. |
285 | // - Groups members are retained or discarded as a unit. |
286 | if (!(sec->flags & SHF_ALLOC)) { |
287 | if (!isStaticRelSecType(type: sec->type) && !sec->nextInSectionGroup) { |
288 | sec->markLive(); |
289 | for (InputSection *isec : sec->dependentSections) |
290 | isec->markLive(); |
291 | } |
292 | } |
293 | |
294 | // Preserve special sections and those which are specified in linker |
295 | // script KEEP command. |
296 | if (isReserved(sec) || script->shouldKeep(s: sec)) { |
297 | enqueue(sec, offset: 0); |
298 | } else if ((!config->zStartStopGC || sec->name.starts_with(Prefix: "__libc_" )) && |
299 | isValidCIdentifier(s: sec->name)) { |
300 | // As a workaround for glibc libc.a before 2.34 |
301 | // (https://sourceware.org/PR27492), retain __libc_atexit and similar |
302 | // sections regardless of zStartStopGC. |
303 | cNamedSections[saver().save(S: "__start_" + sec->name)].push_back(Elt: sec); |
304 | cNamedSections[saver().save(S: "__stop_" + sec->name)].push_back(Elt: sec); |
305 | } |
306 | } |
307 | |
308 | mark(); |
309 | } |
310 | |
311 | template <class ELFT> void MarkLive<ELFT>::mark() { |
312 | // Mark all reachable sections. |
313 | while (!queue.empty()) { |
314 | InputSectionBase &sec = *queue.pop_back_val(); |
315 | |
316 | const RelsOrRelas<ELFT> rels = sec.template relsOrRelas<ELFT>(); |
317 | for (const typename ELFT::Rel &rel : rels.rels) |
318 | resolveReloc(sec, rel, false); |
319 | for (const typename ELFT::Rela &rel : rels.relas) |
320 | resolveReloc(sec, rel, false); |
321 | for (const typename ELFT::Crel &rel : rels.crels) |
322 | resolveReloc(sec, rel, false); |
323 | |
324 | for (InputSectionBase *isec : sec.dependentSections) |
325 | enqueue(sec: isec, offset: 0); |
326 | |
327 | // Mark the next group member. |
328 | if (sec.nextInSectionGroup) |
329 | enqueue(sec: sec.nextInSectionGroup, offset: 0); |
330 | } |
331 | } |
332 | |
333 | // Move the sections for some symbols to the main partition, specifically ifuncs |
334 | // (because they can result in an IRELATIVE being added to the main partition's |
335 | // GOT, which means that the ifunc must be available when the main partition is |
336 | // loaded) and TLS symbols (because we only know how to correctly process TLS |
337 | // relocations for the main partition). |
338 | // |
339 | // We also need to move sections whose names are C identifiers that are referred |
340 | // to from __start_/__stop_ symbols because there will only be one set of |
341 | // symbols for the whole program. |
342 | template <class ELFT> void MarkLive<ELFT>::moveToMain() { |
343 | for (ELFFileBase *file : ctx.objectFiles) |
344 | for (Symbol *s : file->getSymbols()) |
345 | if (auto *d = dyn_cast<Defined>(Val: s)) |
346 | if ((d->type == STT_GNU_IFUNC || d->type == STT_TLS) && d->section && |
347 | d->section->isLive()) |
348 | markSymbol(sym: s); |
349 | |
350 | for (InputSectionBase *sec : ctx.inputSections) { |
351 | if (!sec->isLive() || !isValidCIdentifier(s: sec->name)) |
352 | continue; |
353 | if (symtab.find(name: ("__start_" + sec->name).str()) || |
354 | symtab.find(name: ("__stop_" + sec->name).str())) |
355 | enqueue(sec, offset: 0); |
356 | } |
357 | |
358 | mark(); |
359 | } |
360 | |
361 | // Before calling this function, Live bits are off for all |
362 | // input sections. This function make some or all of them on |
363 | // so that they are emitted to the output file. |
364 | template <class ELFT> void elf::markLive() { |
365 | llvm::TimeTraceScope timeScope("markLive" ); |
366 | // If --gc-sections is not given, retain all input sections. |
367 | if (!config->gcSections) { |
368 | // If a DSO defines a symbol referenced in a regular object, it is needed. |
369 | for (Symbol *sym : symtab.getSymbols()) |
370 | if (auto *s = dyn_cast<SharedSymbol>(Val: sym)) |
371 | if (s->isUsedInRegularObj && !s->isWeak()) |
372 | cast<SharedFile>(Val: s->file)->isNeeded = true; |
373 | return; |
374 | } |
375 | |
376 | for (InputSectionBase *sec : ctx.inputSections) |
377 | sec->markDead(); |
378 | |
379 | // Follow the graph to mark all live sections. |
380 | for (unsigned curPart = 1; curPart <= partitions.size(); ++curPart) |
381 | MarkLive<ELFT>(curPart).run(); |
382 | |
383 | // If we have multiple partitions, some sections need to live in the main |
384 | // partition even if they were allocated to a loadable partition. Move them |
385 | // there now. |
386 | if (partitions.size() != 1) |
387 | MarkLive<ELFT>(1).moveToMain(); |
388 | |
389 | // Report garbage-collected sections. |
390 | if (config->printGcSections) |
391 | for (InputSectionBase *sec : ctx.inputSections) |
392 | if (!sec->isLive()) |
393 | message(msg: "removing unused section " + toString(sec)); |
394 | } |
395 | |
396 | template void elf::markLive<ELF32LE>(); |
397 | template void elf::markLive<ELF32BE>(); |
398 | template void elf::markLive<ELF64LE>(); |
399 | template void elf::markLive<ELF64BE>(); |
400 | |