1 | //===- ICF.cpp ------------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // ICF is short for Identical Code Folding. This is a size optimization to |
10 | // identify and merge two or more read-only sections (typically functions) |
11 | // that happened to have the same contents. It usually reduces output size |
12 | // by a few percent. |
13 | // |
14 | // In ICF, two sections are considered identical if they have the same |
15 | // section flags, section data, and relocations. Relocations are tricky, |
16 | // because two relocations are considered the same if they have the same |
17 | // relocation types, values, and if they point to the same sections *in |
18 | // terms of ICF*. |
19 | // |
20 | // Here is an example. If foo and bar defined below are compiled to the |
21 | // same machine instructions, ICF can and should merge the two, although |
22 | // their relocations point to each other. |
23 | // |
24 | // void foo() { bar(); } |
25 | // void bar() { foo(); } |
26 | // |
27 | // If you merge the two, their relocations point to the same section and |
28 | // thus you know they are mergeable, but how do you know they are |
29 | // mergeable in the first place? This is not an easy problem to solve. |
30 | // |
31 | // What we are doing in LLD is to partition sections into equivalence |
32 | // classes. Sections in the same equivalence class when the algorithm |
33 | // terminates are considered identical. Here are details: |
34 | // |
35 | // 1. First, we partition sections using their hash values as keys. Hash |
36 | // values contain section types, section contents and numbers of |
37 | // relocations. During this step, relocation targets are not taken into |
38 | // account. We just put sections that apparently differ into different |
39 | // equivalence classes. |
40 | // |
41 | // 2. Next, for each equivalence class, we visit sections to compare |
42 | // relocation targets. Relocation targets are considered equivalent if |
43 | // their targets are in the same equivalence class. Sections with |
44 | // different relocation targets are put into different equivalence |
45 | // classes. |
46 | // |
47 | // 3. If we split an equivalence class in step 2, two relocations |
48 | // previously target the same equivalence class may now target |
49 | // different equivalence classes. Therefore, we repeat step 2 until a |
50 | // convergence is obtained. |
51 | // |
52 | // 4. For each equivalence class C, pick an arbitrary section in C, and |
53 | // merge all the other sections in C with it. |
54 | // |
55 | // For small programs, this algorithm needs 3-5 iterations. For large |
56 | // programs such as Chromium, it takes more than 20 iterations. |
57 | // |
58 | // This algorithm was mentioned as an "optimistic algorithm" in [1], |
59 | // though gold implements a different algorithm than this. |
60 | // |
61 | // We parallelize each step so that multiple threads can work on different |
62 | // equivalence classes concurrently. That gave us a large performance |
63 | // boost when applying ICF on large programs. For example, MSVC link.exe |
64 | // or GNU gold takes 10-20 seconds to apply ICF on Chromium, whose output |
65 | // size is about 1.5 GB, but LLD can finish it in less than 2 seconds on a |
66 | // 2.8 GHz 40 core machine. Even without threading, LLD's ICF is still |
67 | // faster than MSVC or gold though. |
68 | // |
69 | // [1] Safe ICF: Pointer Safe and Unwinding aware Identical Code Folding |
70 | // in the Gold Linker |
71 | // http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/36912.pdf |
72 | // |
73 | //===----------------------------------------------------------------------===// |
74 | |
75 | #include "ICF.h" |
76 | #include "Config.h" |
77 | #include "InputFiles.h" |
78 | #include "LinkerScript.h" |
79 | #include "OutputSections.h" |
80 | #include "SymbolTable.h" |
81 | #include "Symbols.h" |
82 | #include "SyntheticSections.h" |
83 | #include "llvm/BinaryFormat/ELF.h" |
84 | #include "llvm/Object/ELF.h" |
85 | #include "llvm/Support/Parallel.h" |
86 | #include "llvm/Support/TimeProfiler.h" |
87 | #include "llvm/Support/xxhash.h" |
88 | #include <algorithm> |
89 | #include <atomic> |
90 | |
91 | using namespace llvm; |
92 | using namespace llvm::ELF; |
93 | using namespace llvm::object; |
94 | using namespace lld; |
95 | using namespace lld::elf; |
96 | |
97 | namespace { |
98 | template <class ELFT> class ICF { |
99 | public: |
100 | void run(); |
101 | |
102 | private: |
103 | void segregate(size_t begin, size_t end, uint32_t eqClassBase, bool constant); |
104 | |
105 | template <class RelTy> |
106 | bool constantEq(const InputSection *a, Relocs<RelTy> relsA, |
107 | const InputSection *b, Relocs<RelTy> relsB); |
108 | |
109 | template <class RelTy> |
110 | bool variableEq(const InputSection *a, Relocs<RelTy> relsA, |
111 | const InputSection *b, Relocs<RelTy> relsB); |
112 | |
113 | bool equalsConstant(const InputSection *a, const InputSection *b); |
114 | bool equalsVariable(const InputSection *a, const InputSection *b); |
115 | |
116 | size_t findBoundary(size_t begin, size_t end); |
117 | |
118 | void forEachClassRange(size_t begin, size_t end, |
119 | llvm::function_ref<void(size_t, size_t)> fn); |
120 | |
121 | void forEachClass(llvm::function_ref<void(size_t, size_t)> fn); |
122 | |
123 | SmallVector<InputSection *, 0> sections; |
124 | |
125 | // We repeat the main loop while `Repeat` is true. |
126 | std::atomic<bool> repeat; |
127 | |
128 | // The main loop counter. |
129 | int cnt = 0; |
130 | |
131 | // We have two locations for equivalence classes. On the first iteration |
132 | // of the main loop, Class[0] has a valid value, and Class[1] contains |
133 | // garbage. We read equivalence classes from slot 0 and write to slot 1. |
134 | // So, Class[0] represents the current class, and Class[1] represents |
135 | // the next class. On each iteration, we switch their roles and use them |
136 | // alternately. |
137 | // |
138 | // Why are we doing this? Recall that other threads may be working on |
139 | // other equivalence classes in parallel. They may read sections that we |
140 | // are updating. We cannot update equivalence classes in place because |
141 | // it breaks the invariance that all possibly-identical sections must be |
142 | // in the same equivalence class at any moment. In other words, the for |
143 | // loop to update equivalence classes is not atomic, and that is |
144 | // observable from other threads. By writing new classes to other |
145 | // places, we can keep the invariance. |
146 | // |
147 | // Below, `Current` has the index of the current class, and `Next` has |
148 | // the index of the next class. If threading is enabled, they are either |
149 | // (0, 1) or (1, 0). |
150 | // |
151 | // Note on single-thread: if that's the case, they are always (0, 0) |
152 | // because we can safely read the next class without worrying about race |
153 | // conditions. Using the same location makes this algorithm converge |
154 | // faster because it uses results of the same iteration earlier. |
155 | int current = 0; |
156 | int next = 0; |
157 | }; |
158 | } |
159 | |
160 | // Returns true if section S is subject of ICF. |
161 | static bool isEligible(InputSection *s) { |
162 | if (!s->isLive() || s->keepUnique || !(s->flags & SHF_ALLOC)) |
163 | return false; |
164 | |
165 | // Don't merge writable sections. .data.rel.ro sections are marked as writable |
166 | // but are semantically read-only. |
167 | if ((s->flags & SHF_WRITE) && s->name != ".data.rel.ro" && |
168 | !s->name.starts_with(Prefix: ".data.rel.ro." )) |
169 | return false; |
170 | |
171 | // SHF_LINK_ORDER sections are ICF'd as a unit with their dependent sections, |
172 | // so we don't consider them for ICF individually. |
173 | if (s->flags & SHF_LINK_ORDER) |
174 | return false; |
175 | |
176 | // Don't merge synthetic sections as their Data member is not valid and empty. |
177 | // The Data member needs to be valid for ICF as it is used by ICF to determine |
178 | // the equality of section contents. |
179 | if (isa<SyntheticSection>(Val: s)) |
180 | return false; |
181 | |
182 | // .init and .fini contains instructions that must be executed to initialize |
183 | // and finalize the process. They cannot and should not be merged. |
184 | if (s->name == ".init" || s->name == ".fini" ) |
185 | return false; |
186 | |
187 | // A user program may enumerate sections named with a C identifier using |
188 | // __start_* and __stop_* symbols. We cannot ICF any such sections because |
189 | // that could change program semantics. |
190 | if (isValidCIdentifier(s: s->name)) |
191 | return false; |
192 | |
193 | return true; |
194 | } |
195 | |
196 | // Split an equivalence class into smaller classes. |
197 | template <class ELFT> |
198 | void ICF<ELFT>::segregate(size_t begin, size_t end, uint32_t eqClassBase, |
199 | bool constant) { |
200 | // This loop rearranges sections in [Begin, End) so that all sections |
201 | // that are equal in terms of equals{Constant,Variable} are contiguous |
202 | // in [Begin, End). |
203 | // |
204 | // The algorithm is quadratic in the worst case, but that is not an |
205 | // issue in practice because the number of the distinct sections in |
206 | // each range is usually very small. |
207 | |
208 | while (begin < end) { |
209 | // Divide [Begin, End) into two. Let Mid be the start index of the |
210 | // second group. |
211 | auto bound = |
212 | std::stable_partition(sections.begin() + begin + 1, |
213 | sections.begin() + end, [&](InputSection *s) { |
214 | if (constant) |
215 | return equalsConstant(a: sections[begin], b: s); |
216 | return equalsVariable(a: sections[begin], b: s); |
217 | }); |
218 | size_t mid = bound - sections.begin(); |
219 | |
220 | // Now we split [Begin, End) into [Begin, Mid) and [Mid, End) by |
221 | // updating the sections in [Begin, Mid). We use Mid as the basis for |
222 | // the equivalence class ID because every group ends with a unique index. |
223 | // Add this to eqClassBase to avoid equality with unique IDs. |
224 | for (size_t i = begin; i < mid; ++i) |
225 | sections[i]->eqClass[next] = eqClassBase + mid; |
226 | |
227 | // If we created a group, we need to iterate the main loop again. |
228 | if (mid != end) |
229 | repeat = true; |
230 | |
231 | begin = mid; |
232 | } |
233 | } |
234 | |
235 | // Compare two lists of relocations. |
236 | template <class ELFT> |
237 | template <class RelTy> |
238 | bool ICF<ELFT>::constantEq(const InputSection *secA, Relocs<RelTy> ra, |
239 | const InputSection *secB, Relocs<RelTy> rb) { |
240 | if (ra.size() != rb.size()) |
241 | return false; |
242 | auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin(); |
243 | for (; rai != rae; ++rai, ++rbi) { |
244 | if (rai->r_offset != rbi->r_offset || |
245 | rai->getType(config->isMips64EL) != rbi->getType(config->isMips64EL)) |
246 | return false; |
247 | |
248 | uint64_t addA = getAddend<ELFT>(*rai); |
249 | uint64_t addB = getAddend<ELFT>(*rbi); |
250 | |
251 | Symbol &sa = secA->file->getRelocTargetSym(*rai); |
252 | Symbol &sb = secB->file->getRelocTargetSym(*rbi); |
253 | if (&sa == &sb) { |
254 | if (addA == addB) |
255 | continue; |
256 | return false; |
257 | } |
258 | |
259 | auto *da = dyn_cast<Defined>(Val: &sa); |
260 | auto *db = dyn_cast<Defined>(Val: &sb); |
261 | |
262 | // Placeholder symbols generated by linker scripts look the same now but |
263 | // may have different values later. |
264 | if (!da || !db || da->scriptDefined || db->scriptDefined) |
265 | return false; |
266 | |
267 | // When comparing a pair of relocations, if they refer to different symbols, |
268 | // and either symbol is preemptible, the containing sections should be |
269 | // considered different. This is because even if the sections are identical |
270 | // in this DSO, they may not be after preemption. |
271 | if (da->isPreemptible || db->isPreemptible) |
272 | return false; |
273 | |
274 | // Relocations referring to absolute symbols are constant-equal if their |
275 | // values are equal. |
276 | if (!da->section && !db->section && da->value + addA == db->value + addB) |
277 | continue; |
278 | if (!da->section || !db->section) |
279 | return false; |
280 | |
281 | if (da->section->kind() != db->section->kind()) |
282 | return false; |
283 | |
284 | // Relocations referring to InputSections are constant-equal if their |
285 | // section offsets are equal. |
286 | if (isa<InputSection>(Val: da->section)) { |
287 | if (da->value + addA == db->value + addB) |
288 | continue; |
289 | return false; |
290 | } |
291 | |
292 | // Relocations referring to MergeInputSections are constant-equal if their |
293 | // offsets in the output section are equal. |
294 | auto *x = dyn_cast<MergeInputSection>(Val: da->section); |
295 | if (!x) |
296 | return false; |
297 | auto *y = cast<MergeInputSection>(Val: db->section); |
298 | if (x->getParent() != y->getParent()) |
299 | return false; |
300 | |
301 | uint64_t offsetA = |
302 | sa.isSection() ? x->getOffset(offset: addA) : x->getOffset(offset: da->value) + addA; |
303 | uint64_t offsetB = |
304 | sb.isSection() ? y->getOffset(offset: addB) : y->getOffset(offset: db->value) + addB; |
305 | if (offsetA != offsetB) |
306 | return false; |
307 | } |
308 | |
309 | return true; |
310 | } |
311 | |
312 | // Compare "non-moving" part of two InputSections, namely everything |
313 | // except relocation targets. |
314 | template <class ELFT> |
315 | bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) { |
316 | if (a->flags != b->flags || a->getSize() != b->getSize() || |
317 | a->content() != b->content()) |
318 | return false; |
319 | |
320 | // If two sections have different output sections, we cannot merge them. |
321 | assert(a->getParent() && b->getParent()); |
322 | if (a->getParent() != b->getParent()) |
323 | return false; |
324 | |
325 | const RelsOrRelas<ELFT> ra = a->template relsOrRelas<ELFT>(); |
326 | const RelsOrRelas<ELFT> rb = b->template relsOrRelas<ELFT>(); |
327 | if (ra.areRelocsCrel() || rb.areRelocsCrel()) |
328 | return constantEq(a, ra.crels, b, rb.crels); |
329 | return ra.areRelocsRel() || rb.areRelocsRel() |
330 | ? constantEq(a, ra.rels, b, rb.rels) |
331 | : constantEq(a, ra.relas, b, rb.relas); |
332 | } |
333 | |
334 | // Compare two lists of relocations. Returns true if all pairs of |
335 | // relocations point to the same section in terms of ICF. |
336 | template <class ELFT> |
337 | template <class RelTy> |
338 | bool ICF<ELFT>::variableEq(const InputSection *secA, Relocs<RelTy> ra, |
339 | const InputSection *secB, Relocs<RelTy> rb) { |
340 | assert(ra.size() == rb.size()); |
341 | |
342 | auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin(); |
343 | for (; rai != rae; ++rai, ++rbi) { |
344 | // The two sections must be identical. |
345 | Symbol &sa = secA->file->getRelocTargetSym(*rai); |
346 | Symbol &sb = secB->file->getRelocTargetSym(*rbi); |
347 | if (&sa == &sb) |
348 | continue; |
349 | |
350 | auto *da = cast<Defined>(Val: &sa); |
351 | auto *db = cast<Defined>(Val: &sb); |
352 | |
353 | // We already dealt with absolute and non-InputSection symbols in |
354 | // constantEq, and for InputSections we have already checked everything |
355 | // except the equivalence class. |
356 | if (!da->section) |
357 | continue; |
358 | auto *x = dyn_cast<InputSection>(Val: da->section); |
359 | if (!x) |
360 | continue; |
361 | auto *y = cast<InputSection>(Val: db->section); |
362 | |
363 | // Sections that are in the special equivalence class 0, can never be the |
364 | // same in terms of the equivalence class. |
365 | if (x->eqClass[current] == 0) |
366 | return false; |
367 | if (x->eqClass[current] != y->eqClass[current]) |
368 | return false; |
369 | }; |
370 | |
371 | return true; |
372 | } |
373 | |
374 | // Compare "moving" part of two InputSections, namely relocation targets. |
375 | template <class ELFT> |
376 | bool ICF<ELFT>::equalsVariable(const InputSection *a, const InputSection *b) { |
377 | const RelsOrRelas<ELFT> ra = a->template relsOrRelas<ELFT>(); |
378 | const RelsOrRelas<ELFT> rb = b->template relsOrRelas<ELFT>(); |
379 | if (ra.areRelocsCrel() || rb.areRelocsCrel()) |
380 | return variableEq(a, ra.crels, b, rb.crels); |
381 | return ra.areRelocsRel() || rb.areRelocsRel() |
382 | ? variableEq(a, ra.rels, b, rb.rels) |
383 | : variableEq(a, ra.relas, b, rb.relas); |
384 | } |
385 | |
386 | template <class ELFT> size_t ICF<ELFT>::findBoundary(size_t begin, size_t end) { |
387 | uint32_t eqClass = sections[begin]->eqClass[current]; |
388 | for (size_t i = begin + 1; i < end; ++i) |
389 | if (eqClass != sections[i]->eqClass[current]) |
390 | return i; |
391 | return end; |
392 | } |
393 | |
394 | // Sections in the same equivalence class are contiguous in Sections |
395 | // vector. Therefore, Sections vector can be considered as contiguous |
396 | // groups of sections, grouped by the class. |
397 | // |
398 | // This function calls Fn on every group within [Begin, End). |
399 | template <class ELFT> |
400 | void ICF<ELFT>::forEachClassRange(size_t begin, size_t end, |
401 | llvm::function_ref<void(size_t, size_t)> fn) { |
402 | while (begin < end) { |
403 | size_t mid = findBoundary(begin, end); |
404 | fn(begin, mid); |
405 | begin = mid; |
406 | } |
407 | } |
408 | |
409 | // Call Fn on each equivalence class. |
410 | template <class ELFT> |
411 | void ICF<ELFT>::forEachClass(llvm::function_ref<void(size_t, size_t)> fn) { |
412 | // If threading is disabled or the number of sections are |
413 | // too small to use threading, call Fn sequentially. |
414 | if (parallel::strategy.ThreadsRequested == 1 || sections.size() < 1024) { |
415 | forEachClassRange(begin: 0, end: sections.size(), fn); |
416 | ++cnt; |
417 | return; |
418 | } |
419 | |
420 | current = cnt % 2; |
421 | next = (cnt + 1) % 2; |
422 | |
423 | // Shard into non-overlapping intervals, and call Fn in parallel. |
424 | // The sharding must be completed before any calls to Fn are made |
425 | // so that Fn can modify the Chunks in its shard without causing data |
426 | // races. |
427 | const size_t numShards = 256; |
428 | size_t step = sections.size() / numShards; |
429 | size_t boundaries[numShards + 1]; |
430 | boundaries[0] = 0; |
431 | boundaries[numShards] = sections.size(); |
432 | |
433 | parallelFor(1, numShards, [&](size_t i) { |
434 | boundaries[i] = findBoundary(begin: (i - 1) * step, end: sections.size()); |
435 | }); |
436 | |
437 | parallelFor(1, numShards + 1, [&](size_t i) { |
438 | if (boundaries[i - 1] < boundaries[i]) |
439 | forEachClassRange(begin: boundaries[i - 1], end: boundaries[i], fn); |
440 | }); |
441 | ++cnt; |
442 | } |
443 | |
444 | // Combine the hashes of the sections referenced by the given section into its |
445 | // hash. |
446 | template <class RelTy> |
447 | static void combineRelocHashes(unsigned cnt, InputSection *isec, |
448 | Relocs<RelTy> rels) { |
449 | uint32_t hash = isec->eqClass[cnt % 2]; |
450 | for (RelTy rel : rels) { |
451 | Symbol &s = isec->file->getRelocTargetSym(rel); |
452 | if (auto *d = dyn_cast<Defined>(Val: &s)) |
453 | if (auto *relSec = dyn_cast_or_null<InputSection>(Val: d->section)) |
454 | hash += relSec->eqClass[cnt % 2]; |
455 | } |
456 | // Set MSB to 1 to avoid collisions with unique IDs. |
457 | isec->eqClass[(cnt + 1) % 2] = hash | (1U << 31); |
458 | } |
459 | |
460 | static void print(const Twine &s) { |
461 | if (config->printIcfSections) |
462 | message(msg: s); |
463 | } |
464 | |
465 | // The main function of ICF. |
466 | template <class ELFT> void ICF<ELFT>::run() { |
467 | // Compute isPreemptible early. We may add more symbols later, so this loop |
468 | // cannot be merged with the later computeIsPreemptible() pass which is used |
469 | // by scanRelocations(). |
470 | if (config->hasDynSymTab) |
471 | for (Symbol *sym : symtab.getSymbols()) |
472 | sym->isPreemptible = computeIsPreemptible(sym: *sym); |
473 | |
474 | // Two text sections may have identical content and relocations but different |
475 | // LSDA, e.g. the two functions may have catch blocks of different types. If a |
476 | // text section is referenced by a .eh_frame FDE with LSDA, it is not |
477 | // eligible. This is implemented by iterating over CIE/FDE and setting |
478 | // eqClass[0] to the referenced text section from a live FDE. |
479 | // |
480 | // If two .gcc_except_table have identical semantics (usually identical |
481 | // content with PC-relative encoding), we will lose folding opportunity. |
482 | uint32_t uniqueId = 0; |
483 | for (Partition &part : partitions) |
484 | part.ehFrame->iterateFDEWithLSDA<ELFT>( |
485 | [&](InputSection &s) { s.eqClass[0] = s.eqClass[1] = ++uniqueId; }); |
486 | |
487 | // Collect sections to merge. |
488 | for (InputSectionBase *sec : ctx.inputSections) { |
489 | auto *s = dyn_cast<InputSection>(Val: sec); |
490 | if (s && s->eqClass[0] == 0) { |
491 | if (isEligible(s)) |
492 | sections.push_back(Elt: s); |
493 | else |
494 | // Ineligible sections are assigned unique IDs, i.e. each section |
495 | // belongs to an equivalence class of its own. |
496 | s->eqClass[0] = s->eqClass[1] = ++uniqueId; |
497 | } |
498 | } |
499 | |
500 | // Initially, we use hash values to partition sections. |
501 | parallelForEach(sections, [&](InputSection *s) { |
502 | // Set MSB to 1 to avoid collisions with unique IDs. |
503 | s->eqClass[0] = xxh3_64bits(data: s->content()) | (1U << 31); |
504 | }); |
505 | |
506 | // Perform 2 rounds of relocation hash propagation. 2 is an empirical value to |
507 | // reduce the average sizes of equivalence classes, i.e. segregate() which has |
508 | // a large time complexity will have less work to do. |
509 | for (unsigned cnt = 0; cnt != 2; ++cnt) { |
510 | parallelForEach(sections, [&](InputSection *s) { |
511 | const RelsOrRelas<ELFT> rels = s->template relsOrRelas<ELFT>(); |
512 | if (rels.areRelocsCrel()) |
513 | combineRelocHashes(cnt, s, rels.crels); |
514 | else if (rels.areRelocsRel()) |
515 | combineRelocHashes(cnt, s, rels.rels); |
516 | else |
517 | combineRelocHashes(cnt, s, rels.relas); |
518 | }); |
519 | } |
520 | |
521 | // From now on, sections in Sections vector are ordered so that sections |
522 | // in the same equivalence class are consecutive in the vector. |
523 | llvm::stable_sort(sections, [](const InputSection *a, const InputSection *b) { |
524 | return a->eqClass[0] < b->eqClass[0]; |
525 | }); |
526 | |
527 | // Compare static contents and assign unique equivalence class IDs for each |
528 | // static content. Use a base offset for these IDs to ensure no overlap with |
529 | // the unique IDs already assigned. |
530 | uint32_t eqClassBase = ++uniqueId; |
531 | forEachClass(fn: [&](size_t begin, size_t end) { |
532 | segregate(begin, end, eqClassBase, constant: true); |
533 | }); |
534 | |
535 | // Split groups by comparing relocations until convergence is obtained. |
536 | do { |
537 | repeat = false; |
538 | forEachClass(fn: [&](size_t begin, size_t end) { |
539 | segregate(begin, end, eqClassBase, constant: false); |
540 | }); |
541 | } while (repeat); |
542 | |
543 | log(msg: "ICF needed " + Twine(cnt) + " iterations" ); |
544 | |
545 | // Merge sections by the equivalence class. |
546 | forEachClassRange(begin: 0, end: sections.size(), fn: [&](size_t begin, size_t end) { |
547 | if (end - begin == 1) |
548 | return; |
549 | print(s: "selected section " + toString(sections[begin])); |
550 | for (size_t i = begin + 1; i < end; ++i) { |
551 | print(s: " removing identical section " + toString(sections[i])); |
552 | sections[begin]->replace(other: sections[i]); |
553 | |
554 | // At this point we know sections merged are fully identical and hence |
555 | // we want to remove duplicate implicit dependencies such as link order |
556 | // and relocation sections. |
557 | for (InputSection *isec : sections[i]->dependentSections) |
558 | isec->markDead(); |
559 | } |
560 | }); |
561 | |
562 | // Change Defined symbol's section field to the canonical one. |
563 | auto fold = [](Symbol *sym) { |
564 | if (auto *d = dyn_cast<Defined>(Val: sym)) |
565 | if (auto *sec = dyn_cast_or_null<InputSection>(Val: d->section)) |
566 | if (sec->repl != d->section) { |
567 | d->section = sec->repl; |
568 | d->folded = true; |
569 | } |
570 | }; |
571 | for (Symbol *sym : symtab.getSymbols()) |
572 | fold(sym); |
573 | parallelForEach(ctx.objectFiles, [&](ELFFileBase *file) { |
574 | for (Symbol *sym : file->getLocalSymbols()) |
575 | fold(sym); |
576 | }); |
577 | |
578 | // InputSectionDescription::sections is populated by processSectionCommands(). |
579 | // ICF may fold some input sections assigned to output sections. Remove them. |
580 | for (SectionCommand *cmd : script->sectionCommands) |
581 | if (auto *osd = dyn_cast<OutputDesc>(Val: cmd)) |
582 | for (SectionCommand *subCmd : osd->osec.commands) |
583 | if (auto *isd = dyn_cast<InputSectionDescription>(Val: subCmd)) |
584 | llvm::erase_if(isd->sections, |
585 | [](InputSection *isec) { return !isec->isLive(); }); |
586 | } |
587 | |
588 | // ICF entry point function. |
589 | template <class ELFT> void elf::doIcf() { |
590 | llvm::TimeTraceScope timeScope("ICF" ); |
591 | ICF<ELFT>().run(); |
592 | } |
593 | |
594 | template void elf::doIcf<ELF32LE>(); |
595 | template void elf::doIcf<ELF32BE>(); |
596 | template void elf::doIcf<ELF64LE>(); |
597 | template void elf::doIcf<ELF64BE>(); |
598 | |