| 1 | //===- ICF.cpp ------------------------------------------------------------===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | // | 
|---|
| 9 | // ICF is short for Identical Code Folding. This is a size optimization to | 
|---|
| 10 | // identify and merge two or more read-only sections (typically functions) | 
|---|
| 11 | // that happened to have the same contents. It usually reduces output size | 
|---|
| 12 | // by a few percent. | 
|---|
| 13 | // | 
|---|
| 14 | // In ICF, two sections are considered identical if they have the same | 
|---|
| 15 | // section flags, section data, and relocations. Relocations are tricky, | 
|---|
| 16 | // because two relocations are considered the same if they have the same | 
|---|
| 17 | // relocation types, values, and if they point to the same sections *in | 
|---|
| 18 | // terms of ICF*. | 
|---|
| 19 | // | 
|---|
| 20 | // Here is an example. If foo and bar defined below are compiled to the | 
|---|
| 21 | // same machine instructions, ICF can and should merge the two, although | 
|---|
| 22 | // their relocations point to each other. | 
|---|
| 23 | // | 
|---|
| 24 | //   void foo() { bar(); } | 
|---|
| 25 | //   void bar() { foo(); } | 
|---|
| 26 | // | 
|---|
| 27 | // If you merge the two, their relocations point to the same section and | 
|---|
| 28 | // thus you know they are mergeable, but how do you know they are | 
|---|
| 29 | // mergeable in the first place? This is not an easy problem to solve. | 
|---|
| 30 | // | 
|---|
| 31 | // What we are doing in LLD is to partition sections into equivalence | 
|---|
| 32 | // classes. Sections in the same equivalence class when the algorithm | 
|---|
| 33 | // terminates are considered identical. Here are details: | 
|---|
| 34 | // | 
|---|
| 35 | // 1. First, we partition sections using their hash values as keys. Hash | 
|---|
| 36 | //    values contain section types, section contents and numbers of | 
|---|
| 37 | //    relocations. During this step, relocation targets are not taken into | 
|---|
| 38 | //    account. We just put sections that apparently differ into different | 
|---|
| 39 | //    equivalence classes. | 
|---|
| 40 | // | 
|---|
| 41 | // 2. Next, for each equivalence class, we visit sections to compare | 
|---|
| 42 | //    relocation targets. Relocation targets are considered equivalent if | 
|---|
| 43 | //    their targets are in the same equivalence class. Sections with | 
|---|
| 44 | //    different relocation targets are put into different equivalence | 
|---|
| 45 | //    classes. | 
|---|
| 46 | // | 
|---|
| 47 | // 3. If we split an equivalence class in step 2, two relocations | 
|---|
| 48 | //    previously target the same equivalence class may now target | 
|---|
| 49 | //    different equivalence classes. Therefore, we repeat step 2 until a | 
|---|
| 50 | //    convergence is obtained. | 
|---|
| 51 | // | 
|---|
| 52 | // 4. For each equivalence class C, pick an arbitrary section in C, and | 
|---|
| 53 | //    merge all the other sections in C with it. | 
|---|
| 54 | // | 
|---|
| 55 | // For small programs, this algorithm needs 3-5 iterations. For large | 
|---|
| 56 | // programs such as Chromium, it takes more than 20 iterations. | 
|---|
| 57 | // | 
|---|
| 58 | // This algorithm was mentioned as an "optimistic algorithm" in [1], | 
|---|
| 59 | // though gold implements a different algorithm than this. | 
|---|
| 60 | // | 
|---|
| 61 | // We parallelize each step so that multiple threads can work on different | 
|---|
| 62 | // equivalence classes concurrently. That gave us a large performance | 
|---|
| 63 | // boost when applying ICF on large programs. For example, MSVC link.exe | 
|---|
| 64 | // or GNU gold takes 10-20 seconds to apply ICF on Chromium, whose output | 
|---|
| 65 | // size is about 1.5 GB, but LLD can finish it in less than 2 seconds on a | 
|---|
| 66 | // 2.8 GHz 40 core machine. Even without threading, LLD's ICF is still | 
|---|
| 67 | // faster than MSVC or gold though. | 
|---|
| 68 | // | 
|---|
| 69 | // [1] Safe ICF: Pointer Safe and Unwinding aware Identical Code Folding | 
|---|
| 70 | // in the Gold Linker | 
|---|
| 71 | // http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/36912.pdf | 
|---|
| 72 | // | 
|---|
| 73 | //===----------------------------------------------------------------------===// | 
|---|
| 74 |  | 
|---|
| 75 | #include "ICF.h" | 
|---|
| 76 | #include "Config.h" | 
|---|
| 77 | #include "InputFiles.h" | 
|---|
| 78 | #include "LinkerScript.h" | 
|---|
| 79 | #include "OutputSections.h" | 
|---|
| 80 | #include "SymbolTable.h" | 
|---|
| 81 | #include "Symbols.h" | 
|---|
| 82 | #include "SyntheticSections.h" | 
|---|
| 83 | #include "llvm/BinaryFormat/ELF.h" | 
|---|
| 84 | #include "llvm/Support/Parallel.h" | 
|---|
| 85 | #include "llvm/Support/TimeProfiler.h" | 
|---|
| 86 | #include "llvm/Support/xxhash.h" | 
|---|
| 87 | #include <algorithm> | 
|---|
| 88 | #include <atomic> | 
|---|
| 89 |  | 
|---|
| 90 | using namespace llvm; | 
|---|
| 91 | using namespace llvm::ELF; | 
|---|
| 92 | using namespace llvm::object; | 
|---|
| 93 | using namespace lld; | 
|---|
| 94 | using namespace lld::elf; | 
|---|
| 95 |  | 
|---|
| 96 | namespace { | 
|---|
| 97 | template <class ELFT> class ICF { | 
|---|
| 98 | public: | 
|---|
| 99 | ICF(Ctx &ctx) : ctx(ctx) {} | 
|---|
| 100 | void run(); | 
|---|
| 101 |  | 
|---|
| 102 | private: | 
|---|
| 103 | void segregate(size_t begin, size_t end, uint32_t eqClassBase, bool constant); | 
|---|
| 104 |  | 
|---|
| 105 | template <class RelTy> | 
|---|
| 106 | bool constantEq(const InputSection *a, Relocs<RelTy> relsA, | 
|---|
| 107 | const InputSection *b, Relocs<RelTy> relsB); | 
|---|
| 108 |  | 
|---|
| 109 | template <class RelTy> | 
|---|
| 110 | bool variableEq(const InputSection *a, Relocs<RelTy> relsA, | 
|---|
| 111 | const InputSection *b, Relocs<RelTy> relsB); | 
|---|
| 112 |  | 
|---|
| 113 | bool equalsConstant(const InputSection *a, const InputSection *b); | 
|---|
| 114 | bool equalsVariable(const InputSection *a, const InputSection *b); | 
|---|
| 115 |  | 
|---|
| 116 | size_t findBoundary(size_t begin, size_t end); | 
|---|
| 117 |  | 
|---|
| 118 | void forEachClassRange(size_t begin, size_t end, | 
|---|
| 119 | llvm::function_ref<void(size_t, size_t)> fn); | 
|---|
| 120 |  | 
|---|
| 121 | void parallelForEachClass(llvm::function_ref<void(size_t, size_t)> fn); | 
|---|
| 122 |  | 
|---|
| 123 | Ctx &ctx; | 
|---|
| 124 | SmallVector<InputSection *, 0> sections; | 
|---|
| 125 |  | 
|---|
| 126 | // We repeat the main loop while `Repeat` is true. | 
|---|
| 127 | std::atomic<bool> repeat; | 
|---|
| 128 |  | 
|---|
| 129 | // The main loop counter. | 
|---|
| 130 | int cnt = 0; | 
|---|
| 131 |  | 
|---|
| 132 | // We have two locations for equivalence classes. On the first iteration | 
|---|
| 133 | // of the main loop, Class[0] has a valid value, and Class[1] contains | 
|---|
| 134 | // garbage. We read equivalence classes from slot 0 and write to slot 1. | 
|---|
| 135 | // So, Class[0] represents the current class, and Class[1] represents | 
|---|
| 136 | // the next class. On each iteration, we switch their roles and use them | 
|---|
| 137 | // alternately. | 
|---|
| 138 | // | 
|---|
| 139 | // Why are we doing this? Recall that other threads may be working on | 
|---|
| 140 | // other equivalence classes in parallel. They may read sections that we | 
|---|
| 141 | // are updating. We cannot update equivalence classes in place because | 
|---|
| 142 | // it breaks the invariance that all possibly-identical sections must be | 
|---|
| 143 | // in the same equivalence class at any moment. In other words, the for | 
|---|
| 144 | // loop to update equivalence classes is not atomic, and that is | 
|---|
| 145 | // observable from other threads. By writing new classes to other | 
|---|
| 146 | // places, we can keep the invariance. | 
|---|
| 147 | // | 
|---|
| 148 | // Below, `Current` has the index of the current class, and `Next` has | 
|---|
| 149 | // the index of the next class. If threading is enabled, they are either | 
|---|
| 150 | // (0, 1) or (1, 0). | 
|---|
| 151 | // | 
|---|
| 152 | // Note on single-thread: if that's the case, they are always (0, 0) | 
|---|
| 153 | // because we can safely read the next class without worrying about race | 
|---|
| 154 | // conditions. Using the same location makes this algorithm converge | 
|---|
| 155 | // faster because it uses results of the same iteration earlier. | 
|---|
| 156 | int current = 0; | 
|---|
| 157 | int next = 0; | 
|---|
| 158 | }; | 
|---|
| 159 | } | 
|---|
| 160 |  | 
|---|
| 161 | // Returns true if section S is subject of ICF. | 
|---|
| 162 | static bool isEligible(InputSection *s) { | 
|---|
| 163 | if (!s->isLive() || s->keepUnique || !(s->flags & SHF_ALLOC)) | 
|---|
| 164 | return false; | 
|---|
| 165 |  | 
|---|
| 166 | // Don't merge writable sections. .data.rel.ro sections are marked as writable | 
|---|
| 167 | // but are semantically read-only. | 
|---|
| 168 | if ((s->flags & SHF_WRITE) && s->name != ".data.rel.ro"&& | 
|---|
| 169 | !s->name.starts_with(Prefix: ".data.rel.ro.")) | 
|---|
| 170 | return false; | 
|---|
| 171 |  | 
|---|
| 172 | // SHF_LINK_ORDER sections are ICF'd as a unit with their dependent sections, | 
|---|
| 173 | // so we don't consider them for ICF individually. | 
|---|
| 174 | if (s->flags & SHF_LINK_ORDER) | 
|---|
| 175 | return false; | 
|---|
| 176 |  | 
|---|
| 177 | // Don't merge synthetic sections as their Data member is not valid and empty. | 
|---|
| 178 | // The Data member needs to be valid for ICF as it is used by ICF to determine | 
|---|
| 179 | // the equality of section contents. | 
|---|
| 180 | if (isa<SyntheticSection>(Val: s)) | 
|---|
| 181 | return false; | 
|---|
| 182 |  | 
|---|
| 183 | // .init and .fini contains instructions that must be executed to initialize | 
|---|
| 184 | // and finalize the process. They cannot and should not be merged. | 
|---|
| 185 | if (s->name == ".init"|| s->name == ".fini") | 
|---|
| 186 | return false; | 
|---|
| 187 |  | 
|---|
| 188 | // A user program may enumerate sections named with a C identifier using | 
|---|
| 189 | // __start_* and __stop_* symbols. We cannot ICF any such sections because | 
|---|
| 190 | // that could change program semantics. | 
|---|
| 191 | if (isValidCIdentifier(s: s->name)) | 
|---|
| 192 | return false; | 
|---|
| 193 |  | 
|---|
| 194 | return true; | 
|---|
| 195 | } | 
|---|
| 196 |  | 
|---|
| 197 | // Split an equivalence class into smaller classes. | 
|---|
| 198 | template <class ELFT> | 
|---|
| 199 | void ICF<ELFT>::segregate(size_t begin, size_t end, uint32_t eqClassBase, | 
|---|
| 200 | bool constant) { | 
|---|
| 201 | // This loop rearranges sections in [Begin, End) so that all sections | 
|---|
| 202 | // that are equal in terms of equals{Constant,Variable} are contiguous | 
|---|
| 203 | // in [Begin, End). | 
|---|
| 204 | // | 
|---|
| 205 | // The algorithm is quadratic in the worst case, but that is not an | 
|---|
| 206 | // issue in practice because the number of the distinct sections in | 
|---|
| 207 | // each range is usually very small. | 
|---|
| 208 |  | 
|---|
| 209 | while (begin < end) { | 
|---|
| 210 | // Divide [Begin, End) into two. Let Mid be the start index of the | 
|---|
| 211 | // second group. | 
|---|
| 212 | auto bound = | 
|---|
| 213 | std::stable_partition(sections.begin() + begin + 1, | 
|---|
| 214 | sections.begin() + end, [&](InputSection *s) { | 
|---|
| 215 | if (constant) | 
|---|
| 216 | return equalsConstant(a: sections[begin], b: s); | 
|---|
| 217 | return equalsVariable(a: sections[begin], b: s); | 
|---|
| 218 | }); | 
|---|
| 219 | size_t mid = bound - sections.begin(); | 
|---|
| 220 |  | 
|---|
| 221 | // Now we split [Begin, End) into [Begin, Mid) and [Mid, End) by | 
|---|
| 222 | // updating the sections in [Begin, Mid). We use Mid as the basis for | 
|---|
| 223 | // the equivalence class ID because every group ends with a unique index. | 
|---|
| 224 | // Add this to eqClassBase to avoid equality with unique IDs. | 
|---|
| 225 | for (size_t i = begin; i < mid; ++i) | 
|---|
| 226 | sections[i]->eqClass[next] = eqClassBase + mid; | 
|---|
| 227 |  | 
|---|
| 228 | // If we created a group, we need to iterate the main loop again. | 
|---|
| 229 | if (mid != end) | 
|---|
| 230 | repeat = true; | 
|---|
| 231 |  | 
|---|
| 232 | begin = mid; | 
|---|
| 233 | } | 
|---|
| 234 | } | 
|---|
| 235 |  | 
|---|
| 236 | // Compare two lists of relocations. | 
|---|
| 237 | template <class ELFT> | 
|---|
| 238 | template <class RelTy> | 
|---|
| 239 | bool ICF<ELFT>::constantEq(const InputSection *secA, Relocs<RelTy> ra, | 
|---|
| 240 | const InputSection *secB, Relocs<RelTy> rb) { | 
|---|
| 241 | if (ra.size() != rb.size()) | 
|---|
| 242 | return false; | 
|---|
| 243 | auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin(); | 
|---|
| 244 | for (; rai != rae; ++rai, ++rbi) { | 
|---|
| 245 | if (rai->r_offset != rbi->r_offset || | 
|---|
| 246 | rai->getType(ctx.arg.isMips64EL) != rbi->getType(ctx.arg.isMips64EL)) | 
|---|
| 247 | return false; | 
|---|
| 248 |  | 
|---|
| 249 | uint64_t addA = getAddend<ELFT>(*rai); | 
|---|
| 250 | uint64_t addB = getAddend<ELFT>(*rbi); | 
|---|
| 251 |  | 
|---|
| 252 | Symbol &sa = secA->file->getRelocTargetSym(*rai); | 
|---|
| 253 | Symbol &sb = secB->file->getRelocTargetSym(*rbi); | 
|---|
| 254 | if (&sa == &sb) { | 
|---|
| 255 | if (addA == addB) | 
|---|
| 256 | continue; | 
|---|
| 257 | return false; | 
|---|
| 258 | } | 
|---|
| 259 |  | 
|---|
| 260 | auto *da = dyn_cast<Defined>(Val: &sa); | 
|---|
| 261 | auto *db = dyn_cast<Defined>(Val: &sb); | 
|---|
| 262 |  | 
|---|
| 263 | // Placeholder symbols generated by linker scripts look the same now but | 
|---|
| 264 | // may have different values later. | 
|---|
| 265 | if (!da || !db || da->scriptDefined || db->scriptDefined) | 
|---|
| 266 | return false; | 
|---|
| 267 |  | 
|---|
| 268 | // When comparing a pair of relocations, if they refer to different symbols, | 
|---|
| 269 | // and either symbol is preemptible, the containing sections should be | 
|---|
| 270 | // considered different. This is because even if the sections are identical | 
|---|
| 271 | // in this DSO, they may not be after preemption. | 
|---|
| 272 | if (da->isPreemptible || db->isPreemptible) | 
|---|
| 273 | return false; | 
|---|
| 274 |  | 
|---|
| 275 | // Relocations referring to absolute symbols are constant-equal if their | 
|---|
| 276 | // values are equal. | 
|---|
| 277 | if (!da->section && !db->section && da->value + addA == db->value + addB) | 
|---|
| 278 | continue; | 
|---|
| 279 | if (!da->section || !db->section) | 
|---|
| 280 | return false; | 
|---|
| 281 |  | 
|---|
| 282 | if (da->section->kind() != db->section->kind()) | 
|---|
| 283 | return false; | 
|---|
| 284 |  | 
|---|
| 285 | // Relocations referring to InputSections are constant-equal if their | 
|---|
| 286 | // section offsets are equal. | 
|---|
| 287 | if (isa<InputSection>(Val: da->section)) { | 
|---|
| 288 | if (da->value + addA == db->value + addB) | 
|---|
| 289 | continue; | 
|---|
| 290 | return false; | 
|---|
| 291 | } | 
|---|
| 292 |  | 
|---|
| 293 | // Relocations referring to MergeInputSections are constant-equal if their | 
|---|
| 294 | // offsets in the output section are equal. | 
|---|
| 295 | auto *x = dyn_cast<MergeInputSection>(Val: da->section); | 
|---|
| 296 | if (!x) | 
|---|
| 297 | return false; | 
|---|
| 298 | auto *y = cast<MergeInputSection>(Val: db->section); | 
|---|
| 299 | if (x->getParent() != y->getParent()) | 
|---|
| 300 | return false; | 
|---|
| 301 |  | 
|---|
| 302 | uint64_t offsetA = | 
|---|
| 303 | sa.isSection() ? x->getOffset(offset: addA) : x->getOffset(offset: da->value) + addA; | 
|---|
| 304 | uint64_t offsetB = | 
|---|
| 305 | sb.isSection() ? y->getOffset(offset: addB) : y->getOffset(offset: db->value) + addB; | 
|---|
| 306 | if (offsetA != offsetB) | 
|---|
| 307 | return false; | 
|---|
| 308 | } | 
|---|
| 309 |  | 
|---|
| 310 | return true; | 
|---|
| 311 | } | 
|---|
| 312 |  | 
|---|
| 313 | // Compare "non-moving" part of two InputSections, namely everything | 
|---|
| 314 | // except relocation targets. | 
|---|
| 315 | template <class ELFT> | 
|---|
| 316 | bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) { | 
|---|
| 317 | if (a->flags != b->flags || a->getSize() != b->getSize() || | 
|---|
| 318 | a->content() != b->content()) | 
|---|
| 319 | return false; | 
|---|
| 320 |  | 
|---|
| 321 | // If two sections have different output sections, we cannot merge them. | 
|---|
| 322 | assert(a->getParent() && b->getParent()); | 
|---|
| 323 | if (a->getParent() != b->getParent()) | 
|---|
| 324 | return false; | 
|---|
| 325 |  | 
|---|
| 326 | const RelsOrRelas<ELFT> ra = a->template relsOrRelas<ELFT>(); | 
|---|
| 327 | const RelsOrRelas<ELFT> rb = b->template relsOrRelas<ELFT>(); | 
|---|
| 328 | if (ra.areRelocsCrel() || rb.areRelocsCrel()) | 
|---|
| 329 | return constantEq(a, ra.crels, b, rb.crels); | 
|---|
| 330 | return ra.areRelocsRel() || rb.areRelocsRel() | 
|---|
| 331 | ? constantEq(a, ra.rels, b, rb.rels) | 
|---|
| 332 | : constantEq(a, ra.relas, b, rb.relas); | 
|---|
| 333 | } | 
|---|
| 334 |  | 
|---|
| 335 | // Compare two lists of relocations. Returns true if all pairs of | 
|---|
| 336 | // relocations point to the same section in terms of ICF. | 
|---|
| 337 | template <class ELFT> | 
|---|
| 338 | template <class RelTy> | 
|---|
| 339 | bool ICF<ELFT>::variableEq(const InputSection *secA, Relocs<RelTy> ra, | 
|---|
| 340 | const InputSection *secB, Relocs<RelTy> rb) { | 
|---|
| 341 | assert(ra.size() == rb.size()); | 
|---|
| 342 |  | 
|---|
| 343 | auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin(); | 
|---|
| 344 | for (; rai != rae; ++rai, ++rbi) { | 
|---|
| 345 | // The two sections must be identical. | 
|---|
| 346 | Symbol &sa = secA->file->getRelocTargetSym(*rai); | 
|---|
| 347 | Symbol &sb = secB->file->getRelocTargetSym(*rbi); | 
|---|
| 348 | if (&sa == &sb) | 
|---|
| 349 | continue; | 
|---|
| 350 |  | 
|---|
| 351 | auto *da = cast<Defined>(Val: &sa); | 
|---|
| 352 | auto *db = cast<Defined>(Val: &sb); | 
|---|
| 353 |  | 
|---|
| 354 | // We already dealt with absolute and non-InputSection symbols in | 
|---|
| 355 | // constantEq, and for InputSections we have already checked everything | 
|---|
| 356 | // except the equivalence class. | 
|---|
| 357 | if (!da->section) | 
|---|
| 358 | continue; | 
|---|
| 359 | auto *x = dyn_cast<InputSection>(Val: da->section); | 
|---|
| 360 | if (!x) | 
|---|
| 361 | continue; | 
|---|
| 362 | auto *y = cast<InputSection>(Val: db->section); | 
|---|
| 363 |  | 
|---|
| 364 | // Sections that are in the special equivalence class 0, can never be the | 
|---|
| 365 | // same in terms of the equivalence class. | 
|---|
| 366 | if (x->eqClass[current] == 0) | 
|---|
| 367 | return false; | 
|---|
| 368 | if (x->eqClass[current] != y->eqClass[current]) | 
|---|
| 369 | return false; | 
|---|
| 370 | }; | 
|---|
| 371 |  | 
|---|
| 372 | return true; | 
|---|
| 373 | } | 
|---|
| 374 |  | 
|---|
| 375 | // Compare "moving" part of two InputSections, namely relocation targets. | 
|---|
| 376 | template <class ELFT> | 
|---|
| 377 | bool ICF<ELFT>::equalsVariable(const InputSection *a, const InputSection *b) { | 
|---|
| 378 | const RelsOrRelas<ELFT> ra = a->template relsOrRelas<ELFT>(); | 
|---|
| 379 | const RelsOrRelas<ELFT> rb = b->template relsOrRelas<ELFT>(); | 
|---|
| 380 | if (ra.areRelocsCrel() || rb.areRelocsCrel()) | 
|---|
| 381 | return variableEq(a, ra.crels, b, rb.crels); | 
|---|
| 382 | return ra.areRelocsRel() || rb.areRelocsRel() | 
|---|
| 383 | ? variableEq(a, ra.rels, b, rb.rels) | 
|---|
| 384 | : variableEq(a, ra.relas, b, rb.relas); | 
|---|
| 385 | } | 
|---|
| 386 |  | 
|---|
| 387 | template <class ELFT> size_t ICF<ELFT>::findBoundary(size_t begin, size_t end) { | 
|---|
| 388 | uint32_t eqClass = sections[begin]->eqClass[current]; | 
|---|
| 389 | for (size_t i = begin + 1; i < end; ++i) | 
|---|
| 390 | if (eqClass != sections[i]->eqClass[current]) | 
|---|
| 391 | return i; | 
|---|
| 392 | return end; | 
|---|
| 393 | } | 
|---|
| 394 |  | 
|---|
| 395 | // Sections in the same equivalence class are contiguous in Sections | 
|---|
| 396 | // vector. Therefore, Sections vector can be considered as contiguous | 
|---|
| 397 | // groups of sections, grouped by the class. | 
|---|
| 398 | // | 
|---|
| 399 | // This function calls Fn on every group within [Begin, End). | 
|---|
| 400 | template <class ELFT> | 
|---|
| 401 | void ICF<ELFT>::forEachClassRange(size_t begin, size_t end, | 
|---|
| 402 | llvm::function_ref<void(size_t, size_t)> fn) { | 
|---|
| 403 | while (begin < end) { | 
|---|
| 404 | size_t mid = findBoundary(begin, end); | 
|---|
| 405 | fn(begin, mid); | 
|---|
| 406 | begin = mid; | 
|---|
| 407 | } | 
|---|
| 408 | } | 
|---|
| 409 |  | 
|---|
| 410 | // Call Fn on each equivalence class. | 
|---|
| 411 |  | 
|---|
| 412 | template <class ELFT> | 
|---|
| 413 | void ICF<ELFT>::parallelForEachClass( | 
|---|
| 414 | llvm::function_ref<void(size_t, size_t)> fn) { | 
|---|
| 415 | // If threading is disabled or the number of sections are | 
|---|
| 416 | // too small to use threading, call Fn sequentially. | 
|---|
| 417 | if (parallel::strategy.ThreadsRequested == 1 || sections.size() < 1024) { | 
|---|
| 418 | forEachClassRange(begin: 0, end: sections.size(), fn); | 
|---|
| 419 | ++cnt; | 
|---|
| 420 | return; | 
|---|
| 421 | } | 
|---|
| 422 |  | 
|---|
| 423 | current = cnt % 2; | 
|---|
| 424 | next = (cnt + 1) % 2; | 
|---|
| 425 |  | 
|---|
| 426 | // Shard into non-overlapping intervals, and call Fn in parallel. | 
|---|
| 427 | // The sharding must be completed before any calls to Fn are made | 
|---|
| 428 | // so that Fn can modify the Chunks in its shard without causing data | 
|---|
| 429 | // races. | 
|---|
| 430 | const size_t numShards = 256; | 
|---|
| 431 | size_t step = sections.size() / numShards; | 
|---|
| 432 | size_t boundaries[numShards + 1]; | 
|---|
| 433 | boundaries[0] = 0; | 
|---|
| 434 | boundaries[numShards] = sections.size(); | 
|---|
| 435 |  | 
|---|
| 436 | parallelFor(1, numShards, [&](size_t i) { | 
|---|
| 437 | boundaries[i] = findBoundary(begin: (i - 1) * step, end: sections.size()); | 
|---|
| 438 | }); | 
|---|
| 439 |  | 
|---|
| 440 | parallelFor(1, numShards + 1, [&](size_t i) { | 
|---|
| 441 | if (boundaries[i - 1] < boundaries[i]) | 
|---|
| 442 | forEachClassRange(begin: boundaries[i - 1], end: boundaries[i], fn); | 
|---|
| 443 | }); | 
|---|
| 444 | ++cnt; | 
|---|
| 445 | } | 
|---|
| 446 |  | 
|---|
| 447 | // Combine the hashes of the sections referenced by the given section into its | 
|---|
| 448 | // hash. | 
|---|
| 449 | template <class RelTy> | 
|---|
| 450 | static void combineRelocHashes(unsigned cnt, InputSection *isec, | 
|---|
| 451 | Relocs<RelTy> rels) { | 
|---|
| 452 | uint32_t hash = isec->eqClass[cnt % 2]; | 
|---|
| 453 | for (RelTy rel : rels) { | 
|---|
| 454 | Symbol &s = isec->file->getRelocTargetSym(rel); | 
|---|
| 455 | if (auto *d = dyn_cast<Defined>(Val: &s)) | 
|---|
| 456 | if (auto *relSec = dyn_cast_or_null<InputSection>(Val: d->section)) | 
|---|
| 457 | hash += relSec->eqClass[cnt % 2]; | 
|---|
| 458 | } | 
|---|
| 459 | // Set MSB to 1 to avoid collisions with unique IDs. | 
|---|
| 460 | isec->eqClass[(cnt + 1) % 2] = hash | (1U << 31); | 
|---|
| 461 | } | 
|---|
| 462 |  | 
|---|
| 463 | // The main function of ICF. | 
|---|
| 464 | template <class ELFT> void ICF<ELFT>::run() { | 
|---|
| 465 | // Two text sections may have identical content and relocations but different | 
|---|
| 466 | // LSDA, e.g. the two functions may have catch blocks of different types. If a | 
|---|
| 467 | // text section is referenced by a .eh_frame FDE with LSDA, it is not | 
|---|
| 468 | // eligible. This is implemented by iterating over CIE/FDE and setting | 
|---|
| 469 | // eqClass[0] to the referenced text section from a live FDE. | 
|---|
| 470 | // | 
|---|
| 471 | // If two .gcc_except_table have identical semantics (usually identical | 
|---|
| 472 | // content with PC-relative encoding), we will lose folding opportunity. | 
|---|
| 473 | uint32_t uniqueId = 0; | 
|---|
| 474 | for (Partition &part : ctx.partitions) | 
|---|
| 475 | part.ehFrame->iterateFDEWithLSDA<ELFT>( | 
|---|
| 476 | [&](InputSection &s) { s.eqClass[0] = s.eqClass[1] = ++uniqueId; }); | 
|---|
| 477 |  | 
|---|
| 478 | // Collect sections to merge. | 
|---|
| 479 | for (InputSectionBase *sec : ctx.inputSections) { | 
|---|
| 480 | auto *s = dyn_cast<InputSection>(Val: sec); | 
|---|
| 481 | if (s && s->eqClass[0] == 0) { | 
|---|
| 482 | if (isEligible(s)) | 
|---|
| 483 | sections.push_back(Elt: s); | 
|---|
| 484 | else | 
|---|
| 485 | // Ineligible sections are assigned unique IDs, i.e. each section | 
|---|
| 486 | // belongs to an equivalence class of its own. | 
|---|
| 487 | s->eqClass[0] = s->eqClass[1] = ++uniqueId; | 
|---|
| 488 | } | 
|---|
| 489 | } | 
|---|
| 490 |  | 
|---|
| 491 | // Initially, we use hash values to partition sections. | 
|---|
| 492 | parallelForEach(sections, [&](InputSection *s) { | 
|---|
| 493 | // Set MSB to 1 to avoid collisions with unique IDs. | 
|---|
| 494 | s->eqClass[0] = xxh3_64bits(data: s->content()) | (1U << 31); | 
|---|
| 495 | }); | 
|---|
| 496 |  | 
|---|
| 497 | // Perform 2 rounds of relocation hash propagation. 2 is an empirical value to | 
|---|
| 498 | // reduce the average sizes of equivalence classes, i.e. segregate() which has | 
|---|
| 499 | // a large time complexity will have less work to do. | 
|---|
| 500 | for (unsigned cnt = 0; cnt != 2; ++cnt) { | 
|---|
| 501 | parallelForEach(sections, [&](InputSection *s) { | 
|---|
| 502 | const RelsOrRelas<ELFT> rels = s->template relsOrRelas<ELFT>(); | 
|---|
| 503 | if (rels.areRelocsCrel()) | 
|---|
| 504 | combineRelocHashes(cnt, s, rels.crels); | 
|---|
| 505 | else if (rels.areRelocsRel()) | 
|---|
| 506 | combineRelocHashes(cnt, s, rels.rels); | 
|---|
| 507 | else | 
|---|
| 508 | combineRelocHashes(cnt, s, rels.relas); | 
|---|
| 509 | }); | 
|---|
| 510 | } | 
|---|
| 511 |  | 
|---|
| 512 | // From now on, sections in Sections vector are ordered so that sections | 
|---|
| 513 | // in the same equivalence class are consecutive in the vector. | 
|---|
| 514 | llvm::stable_sort(sections, [](const InputSection *a, const InputSection *b) { | 
|---|
| 515 | return a->eqClass[0] < b->eqClass[0]; | 
|---|
| 516 | }); | 
|---|
| 517 |  | 
|---|
| 518 | // Compare static contents and assign unique equivalence class IDs for each | 
|---|
| 519 | // static content. Use a base offset for these IDs to ensure no overlap with | 
|---|
| 520 | // the unique IDs already assigned. | 
|---|
| 521 | uint32_t eqClassBase = ++uniqueId; | 
|---|
| 522 | parallelForEachClass(fn: [&](size_t begin, size_t end) { | 
|---|
| 523 | segregate(begin, end, eqClassBase, constant: true); | 
|---|
| 524 | }); | 
|---|
| 525 |  | 
|---|
| 526 | // Split groups by comparing relocations until convergence is obtained. | 
|---|
| 527 | do { | 
|---|
| 528 | repeat = false; | 
|---|
| 529 | parallelForEachClass(fn: [&](size_t begin, size_t end) { | 
|---|
| 530 | segregate(begin, end, eqClassBase, constant: false); | 
|---|
| 531 | }); | 
|---|
| 532 | } while (repeat); | 
|---|
| 533 |  | 
|---|
| 534 | Log(ctx) << "ICF needed "<< cnt << " iterations"; | 
|---|
| 535 |  | 
|---|
| 536 | auto print = [&ctx = ctx]() -> ELFSyncStream { | 
|---|
| 537 | return {ctx, ctx.arg.printIcfSections ? DiagLevel::Msg : DiagLevel::None}; | 
|---|
| 538 | }; | 
|---|
| 539 | // Merge sections by the equivalence class. | 
|---|
| 540 | forEachClassRange(begin: 0, end: sections.size(), fn: [&](size_t begin, size_t end) { | 
|---|
| 541 | if (end - begin == 1) | 
|---|
| 542 | return; | 
|---|
| 543 | print() << "selected section "<< sections[begin]; | 
|---|
| 544 | for (size_t i = begin + 1; i < end; ++i) { | 
|---|
| 545 | print() << "  removing identical section "<< sections[i]; | 
|---|
| 546 | sections[begin]->replace(other: sections[i]); | 
|---|
| 547 |  | 
|---|
| 548 | // At this point we know sections merged are fully identical and hence | 
|---|
| 549 | // we want to remove duplicate implicit dependencies such as link order | 
|---|
| 550 | // and relocation sections. | 
|---|
| 551 | for (InputSection *isec : sections[i]->dependentSections) | 
|---|
| 552 | isec->markDead(); | 
|---|
| 553 | } | 
|---|
| 554 | }); | 
|---|
| 555 |  | 
|---|
| 556 | // Change Defined symbol's section field to the canonical one. | 
|---|
| 557 | auto fold = [](Symbol *sym) { | 
|---|
| 558 | if (auto *d = dyn_cast<Defined>(Val: sym)) | 
|---|
| 559 | if (auto *sec = dyn_cast_or_null<InputSection>(Val: d->section)) | 
|---|
| 560 | if (sec->repl != d->section) { | 
|---|
| 561 | d->section = sec->repl; | 
|---|
| 562 | d->folded = true; | 
|---|
| 563 | } | 
|---|
| 564 | }; | 
|---|
| 565 | for (Symbol *sym : ctx.symtab->getSymbols()) | 
|---|
| 566 | fold(sym); | 
|---|
| 567 | parallelForEach(ctx.objectFiles, [&](ELFFileBase *file) { | 
|---|
| 568 | for (Symbol *sym : file->getLocalSymbols()) | 
|---|
| 569 | fold(sym); | 
|---|
| 570 | }); | 
|---|
| 571 |  | 
|---|
| 572 | // InputSectionDescription::sections is populated by processSectionCommands(). | 
|---|
| 573 | // ICF may fold some input sections assigned to output sections. Remove them. | 
|---|
| 574 | for (SectionCommand *cmd : ctx.script->sectionCommands) | 
|---|
| 575 | if (auto *osd = dyn_cast<OutputDesc>(Val: cmd)) | 
|---|
| 576 | for (SectionCommand *subCmd : osd->osec.commands) | 
|---|
| 577 | if (auto *isd = dyn_cast<InputSectionDescription>(Val: subCmd)) | 
|---|
| 578 | llvm::erase_if(isd->sections, | 
|---|
| 579 | [](InputSection *isec) { return !isec->isLive(); }); | 
|---|
| 580 | } | 
|---|
| 581 |  | 
|---|
| 582 | // ICF entry point function. | 
|---|
| 583 | template <class ELFT> void elf::doIcf(Ctx &ctx) { | 
|---|
| 584 | llvm::TimeTraceScope timeScope( "ICF"); | 
|---|
| 585 | ICF<ELFT>(ctx).run(); | 
|---|
| 586 | } | 
|---|
| 587 |  | 
|---|
| 588 | template void elf::doIcf<ELF32LE>(Ctx &); | 
|---|
| 589 | template void elf::doIcf<ELF32BE>(Ctx &); | 
|---|
| 590 | template void elf::doIcf<ELF64LE>(Ctx &); | 
|---|
| 591 | template void elf::doIcf<ELF64BE>(Ctx &); | 
|---|
| 592 |  | 
|---|