| 1 | //===- ICF.cpp ------------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // ICF is short for Identical Code Folding. That is a size optimization to |
| 10 | // identify and merge two or more read-only sections (typically functions) |
| 11 | // that happened to have the same contents. It usually reduces output size |
| 12 | // by a few percent. |
| 13 | // |
| 14 | // On Windows, ICF is enabled by default. |
| 15 | // |
| 16 | // See ELF/ICF.cpp for the details about the algorithm. |
| 17 | // |
| 18 | //===----------------------------------------------------------------------===// |
| 19 | |
| 20 | #include "ICF.h" |
| 21 | #include "COFFLinkerContext.h" |
| 22 | #include "Chunks.h" |
| 23 | #include "Symbols.h" |
| 24 | #include "lld/Common/Timer.h" |
| 25 | #include "llvm/Support/Parallel.h" |
| 26 | #include "llvm/Support/TimeProfiler.h" |
| 27 | #include "llvm/Support/xxhash.h" |
| 28 | #include <algorithm> |
| 29 | #include <atomic> |
| 30 | #include <vector> |
| 31 | |
| 32 | using namespace llvm; |
| 33 | |
| 34 | namespace lld::coff { |
| 35 | |
| 36 | class ICF { |
| 37 | public: |
| 38 | ICF(COFFLinkerContext &c) : ctx(c){}; |
| 39 | void run(); |
| 40 | |
| 41 | private: |
| 42 | void segregate(size_t begin, size_t end, bool constant); |
| 43 | |
| 44 | bool assocEquals(const SectionChunk *a, const SectionChunk *b); |
| 45 | |
| 46 | bool equalsConstant(const SectionChunk *a, const SectionChunk *b); |
| 47 | bool equalsVariable(const SectionChunk *a, const SectionChunk *b); |
| 48 | |
| 49 | bool isEligible(SectionChunk *c); |
| 50 | |
| 51 | size_t findBoundary(size_t begin, size_t end); |
| 52 | |
| 53 | void forEachClassRange(size_t begin, size_t end, |
| 54 | std::function<void(size_t, size_t)> fn); |
| 55 | |
| 56 | void forEachClass(std::function<void(size_t, size_t)> fn); |
| 57 | |
| 58 | std::vector<SectionChunk *> chunks; |
| 59 | int cnt = 0; |
| 60 | std::atomic<bool> repeat = {false}; |
| 61 | |
| 62 | COFFLinkerContext &ctx; |
| 63 | }; |
| 64 | |
| 65 | // Returns true if section S is subject of ICF. |
| 66 | // |
| 67 | // Microsoft's documentation |
| 68 | // (https://msdn.microsoft.com/en-us/library/bxwfs976.aspx; visited April |
| 69 | // 2017) says that /opt:icf folds both functions and read-only data. |
| 70 | // Despite that, the MSVC linker folds only functions. We found |
| 71 | // a few instances of programs that are not safe for data merging. |
| 72 | // Therefore, we merge only functions just like the MSVC tool. However, we also |
| 73 | // merge read-only sections in a couple of cases where the address of the |
| 74 | // section is insignificant to the user program and the behaviour matches that |
| 75 | // of the Visual C++ linker. |
| 76 | bool ICF::isEligible(SectionChunk *c) { |
| 77 | // Non-comdat chunks, dead chunks, and writable chunks are not eligible. |
| 78 | bool writable = c->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_WRITE; |
| 79 | if (!c->isCOMDAT() || !c->live || writable) |
| 80 | return false; |
| 81 | |
| 82 | // Under regular (not safe) ICF, all code sections are eligible. |
| 83 | if ((ctx.config.doICF == ICFLevel::All) && |
| 84 | c->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE) |
| 85 | return true; |
| 86 | |
| 87 | // .pdata and .xdata unwind info sections are eligible. |
| 88 | StringRef outSecName = c->getSectionName().split(Separator: '$').first; |
| 89 | if (outSecName == ".pdata" || outSecName == ".xdata" ) |
| 90 | return true; |
| 91 | |
| 92 | // So are vtables. |
| 93 | const char *itaniumVtablePrefix = |
| 94 | ctx.config.machine == I386 ? "__ZTV" : "_ZTV" ; |
| 95 | if (c->sym && (c->sym->getName().starts_with(Prefix: "??_7" ) || |
| 96 | c->sym->getName().starts_with(Prefix: itaniumVtablePrefix))) |
| 97 | return true; |
| 98 | |
| 99 | // Anything else not in an address-significance table is eligible. |
| 100 | return !c->keepUnique; |
| 101 | } |
| 102 | |
| 103 | // Split an equivalence class into smaller classes. |
| 104 | void ICF::segregate(size_t begin, size_t end, bool constant) { |
| 105 | while (begin < end) { |
| 106 | // Divide [Begin, End) into two. Let Mid be the start index of the |
| 107 | // second group. |
| 108 | auto bound = std::stable_partition( |
| 109 | first: chunks.begin() + begin + 1, last: chunks.begin() + end, pred: [&](SectionChunk *s) { |
| 110 | if (constant) |
| 111 | return equalsConstant(a: chunks[begin], b: s); |
| 112 | return equalsVariable(a: chunks[begin], b: s); |
| 113 | }); |
| 114 | size_t mid = bound - chunks.begin(); |
| 115 | |
| 116 | // Split [Begin, End) into [Begin, Mid) and [Mid, End). We use Mid as an |
| 117 | // equivalence class ID because every group ends with a unique index. |
| 118 | for (size_t i = begin; i < mid; ++i) |
| 119 | chunks[i]->eqClass[(cnt + 1) % 2] = mid; |
| 120 | |
| 121 | // If we created a group, we need to iterate the main loop again. |
| 122 | if (mid != end) |
| 123 | repeat = true; |
| 124 | |
| 125 | begin = mid; |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | // Returns true if two sections' associative children are equal. |
| 130 | bool ICF::assocEquals(const SectionChunk *a, const SectionChunk *b) { |
| 131 | // Ignore associated metadata sections that don't participate in ICF, such as |
| 132 | // debug info and CFGuard metadata. |
| 133 | auto considerForICF = [](const SectionChunk &assoc) { |
| 134 | StringRef Name = assoc.getSectionName(); |
| 135 | return !(Name.starts_with(Prefix: ".debug" ) || Name == ".gfids$y" || |
| 136 | Name == ".giats$y" || Name == ".gljmp$y" ); |
| 137 | }; |
| 138 | auto ra = make_filter_range(Range: a->children(), Pred: considerForICF); |
| 139 | auto rb = make_filter_range(Range: b->children(), Pred: considerForICF); |
| 140 | return std::equal(first1: ra.begin(), last1: ra.end(), first2: rb.begin(), last2: rb.end(), |
| 141 | binary_pred: [&](const SectionChunk &ia, const SectionChunk &ib) { |
| 142 | return ia.eqClass[cnt % 2] == ib.eqClass[cnt % 2]; |
| 143 | }); |
| 144 | } |
| 145 | |
| 146 | // Compare "non-moving" part of two sections, namely everything |
| 147 | // except relocation targets. |
| 148 | bool ICF::equalsConstant(const SectionChunk *a, const SectionChunk *b) { |
| 149 | if (a->relocsSize != b->relocsSize) |
| 150 | return false; |
| 151 | |
| 152 | // Compare relocations. |
| 153 | auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) { |
| 154 | if (r1.Type != r2.Type || |
| 155 | r1.VirtualAddress != r2.VirtualAddress) { |
| 156 | return false; |
| 157 | } |
| 158 | Symbol *b1 = a->file->getSymbol(symbolIndex: r1.SymbolTableIndex); |
| 159 | Symbol *b2 = b->file->getSymbol(symbolIndex: r2.SymbolTableIndex); |
| 160 | if (b1 == b2) |
| 161 | return true; |
| 162 | if (auto *d1 = dyn_cast<DefinedRegular>(Val: b1)) |
| 163 | if (auto *d2 = dyn_cast<DefinedRegular>(Val: b2)) |
| 164 | return d1->getValue() == d2->getValue() && |
| 165 | d1->getChunk()->eqClass[cnt % 2] == d2->getChunk()->eqClass[cnt % 2]; |
| 166 | return false; |
| 167 | }; |
| 168 | if (!std::equal(first1: a->getRelocs().begin(), last1: a->getRelocs().end(), |
| 169 | first2: b->getRelocs().begin(), binary_pred: eq)) |
| 170 | return false; |
| 171 | |
| 172 | // Compare section attributes and contents. |
| 173 | return a->getOutputCharacteristics() == b->getOutputCharacteristics() && |
| 174 | a->getSectionName() == b->getSectionName() && |
| 175 | a->header->SizeOfRawData == b->header->SizeOfRawData && |
| 176 | a->checksum == b->checksum && a->getContents() == b->getContents() && |
| 177 | a->getMachine() == b->getMachine() && assocEquals(a, b); |
| 178 | } |
| 179 | |
| 180 | // Compare "moving" part of two sections, namely relocation targets. |
| 181 | bool ICF::equalsVariable(const SectionChunk *a, const SectionChunk *b) { |
| 182 | // Compare relocations. |
| 183 | auto eqSym = [&](Symbol *b1, Symbol *b2) { |
| 184 | if (b1 == b2) |
| 185 | return true; |
| 186 | if (auto *d1 = dyn_cast<DefinedRegular>(Val: b1)) |
| 187 | if (auto *d2 = dyn_cast<DefinedRegular>(Val: b2)) |
| 188 | return d1->getChunk()->eqClass[cnt % 2] == d2->getChunk()->eqClass[cnt % 2]; |
| 189 | return false; |
| 190 | }; |
| 191 | auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) { |
| 192 | Symbol *b1 = a->file->getSymbol(symbolIndex: r1.SymbolTableIndex); |
| 193 | Symbol *b2 = b->file->getSymbol(symbolIndex: r2.SymbolTableIndex); |
| 194 | return eqSym(b1, b2); |
| 195 | }; |
| 196 | |
| 197 | Symbol *e1 = a->getEntryThunk(); |
| 198 | Symbol *e2 = b->getEntryThunk(); |
| 199 | if ((e1 || e2) && (!e1 || !e2 || !eqSym(e1, e2))) |
| 200 | return false; |
| 201 | |
| 202 | return std::equal(first1: a->getRelocs().begin(), last1: a->getRelocs().end(), |
| 203 | first2: b->getRelocs().begin(), binary_pred: eq) && |
| 204 | assocEquals(a, b); |
| 205 | } |
| 206 | |
| 207 | // Find the first Chunk after Begin that has a different class from Begin. |
| 208 | size_t ICF::findBoundary(size_t begin, size_t end) { |
| 209 | for (size_t i = begin + 1; i < end; ++i) |
| 210 | if (chunks[begin]->eqClass[cnt % 2] != chunks[i]->eqClass[cnt % 2]) |
| 211 | return i; |
| 212 | return end; |
| 213 | } |
| 214 | |
| 215 | void ICF::forEachClassRange(size_t begin, size_t end, |
| 216 | std::function<void(size_t, size_t)> fn) { |
| 217 | while (begin < end) { |
| 218 | size_t mid = findBoundary(begin, end); |
| 219 | fn(begin, mid); |
| 220 | begin = mid; |
| 221 | } |
| 222 | } |
| 223 | |
| 224 | // Call Fn on each class group. |
| 225 | void ICF::forEachClass(std::function<void(size_t, size_t)> fn) { |
| 226 | // If the number of sections are too small to use threading, |
| 227 | // call Fn sequentially. |
| 228 | if (chunks.size() < 1024) { |
| 229 | forEachClassRange(begin: 0, end: chunks.size(), fn); |
| 230 | ++cnt; |
| 231 | return; |
| 232 | } |
| 233 | |
| 234 | // Shard into non-overlapping intervals, and call Fn in parallel. |
| 235 | // The sharding must be completed before any calls to Fn are made |
| 236 | // so that Fn can modify the Chunks in its shard without causing data |
| 237 | // races. |
| 238 | const size_t numShards = 256; |
| 239 | size_t step = chunks.size() / numShards; |
| 240 | size_t boundaries[numShards + 1]; |
| 241 | boundaries[0] = 0; |
| 242 | boundaries[numShards] = chunks.size(); |
| 243 | parallelFor(Begin: 1, End: numShards, Fn: [&](size_t i) { |
| 244 | boundaries[i] = findBoundary(begin: (i - 1) * step, end: chunks.size()); |
| 245 | }); |
| 246 | parallelFor(Begin: 1, End: numShards + 1, Fn: [&](size_t i) { |
| 247 | if (boundaries[i - 1] < boundaries[i]) { |
| 248 | forEachClassRange(begin: boundaries[i - 1], end: boundaries[i], fn); |
| 249 | } |
| 250 | }); |
| 251 | ++cnt; |
| 252 | } |
| 253 | |
| 254 | // Merge identical COMDAT sections. |
| 255 | // Two sections are considered the same if their section headers, |
| 256 | // contents and relocations are all the same. |
| 257 | void ICF::run() { |
| 258 | llvm::TimeTraceScope timeScope("ICF" ); |
| 259 | ScopedTimer t(ctx.icfTimer); |
| 260 | |
| 261 | // Collect only mergeable sections and group by hash value. |
| 262 | uint32_t nextId = 1; |
| 263 | for (Chunk *c : ctx.driver.getChunks()) { |
| 264 | if (auto *sc = dyn_cast<SectionChunk>(Val: c)) { |
| 265 | if (isEligible(c: sc)) |
| 266 | chunks.push_back(x: sc); |
| 267 | else |
| 268 | sc->eqClass[0] = nextId++; |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | // Make sure that ICF doesn't merge sections that are being handled by string |
| 273 | // tail merging. |
| 274 | for (MergeChunk *mc : ctx.mergeChunkInstances) |
| 275 | if (mc) |
| 276 | for (SectionChunk *sc : mc->sections) |
| 277 | sc->eqClass[0] = nextId++; |
| 278 | |
| 279 | // Initially, we use hash values to partition sections. |
| 280 | parallelForEach(R&: chunks, Fn: [&](SectionChunk *sc) { |
| 281 | sc->eqClass[0] = xxh3_64bits(data: sc->getContents()); |
| 282 | }); |
| 283 | |
| 284 | // Combine the hashes of the sections referenced by each section into its |
| 285 | // hash. |
| 286 | for (unsigned cnt = 0; cnt != 2; ++cnt) { |
| 287 | parallelForEach(R&: chunks, Fn: [&](SectionChunk *sc) { |
| 288 | uint32_t hash = sc->eqClass[cnt % 2]; |
| 289 | for (Symbol *b : sc->symbols()) |
| 290 | if (auto *sym = dyn_cast_or_null<DefinedRegular>(Val: b)) |
| 291 | hash += sym->getChunk()->eqClass[cnt % 2]; |
| 292 | // Set MSB to 1 to avoid collisions with non-hash classes. |
| 293 | sc->eqClass[(cnt + 1) % 2] = hash | (1U << 31); |
| 294 | }); |
| 295 | } |
| 296 | |
| 297 | // From now on, sections in Chunks are ordered so that sections in |
| 298 | // the same group are consecutive in the vector. |
| 299 | llvm::stable_sort(Range&: chunks, C: [](const SectionChunk *a, const SectionChunk *b) { |
| 300 | return a->eqClass[0] < b->eqClass[0]; |
| 301 | }); |
| 302 | |
| 303 | // Compare static contents and assign unique IDs for each static content. |
| 304 | forEachClass(fn: [&](size_t begin, size_t end) { segregate(begin, end, constant: true); }); |
| 305 | |
| 306 | // Split groups by comparing relocations until convergence is obtained. |
| 307 | do { |
| 308 | repeat = false; |
| 309 | forEachClass( |
| 310 | fn: [&](size_t begin, size_t end) { segregate(begin, end, constant: false); }); |
| 311 | } while (repeat); |
| 312 | |
| 313 | Log(ctx) << "ICF needed " << Twine(cnt) << " iterations" ; |
| 314 | |
| 315 | // Merge sections in the same classes. |
| 316 | forEachClass(fn: [&](size_t begin, size_t end) { |
| 317 | if (end - begin == 1) |
| 318 | return; |
| 319 | |
| 320 | Log(ctx) << "Selected " << chunks[begin]->getDebugName(); |
| 321 | for (size_t i = begin + 1; i < end; ++i) { |
| 322 | Log(ctx) << " Removed " << chunks[i]->getDebugName(); |
| 323 | chunks[begin]->replace(other: chunks[i]); |
| 324 | } |
| 325 | }); |
| 326 | } |
| 327 | |
| 328 | // Entry point to ICF. |
| 329 | void doICF(COFFLinkerContext &ctx) { ICF(ctx).run(); } |
| 330 | |
| 331 | } // namespace lld::coff |
| 332 | |