| 1 | //===- BPSectionOrderer.cpp -----------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "BPSectionOrderer.h" |
| 10 | #include "InputSection.h" |
| 11 | #include "Relocations.h" |
| 12 | #include "Symbols.h" |
| 13 | #include "lld/Common/BPSectionOrdererBase.inc" |
| 14 | #include "llvm/ADT/DenseMap.h" |
| 15 | #include "llvm/ADT/StableHashing.h" |
| 16 | #include "llvm/Support/Endian.h" |
| 17 | #include "llvm/Support/xxhash.h" |
| 18 | |
| 19 | #define DEBUG_TYPE "bp-section-orderer" |
| 20 | |
| 21 | using namespace llvm; |
| 22 | using namespace lld::macho; |
| 23 | |
| 24 | namespace { |
| 25 | struct BPOrdererMachO; |
| 26 | } |
| 27 | template <> struct lld::BPOrdererTraits<struct BPOrdererMachO> { |
| 28 | using Section = macho::InputSection; |
| 29 | using Defined = macho::Defined; |
| 30 | }; |
| 31 | namespace { |
| 32 | struct BPOrdererMachO : lld::BPOrderer<BPOrdererMachO> { |
| 33 | static uint64_t getSize(const Section &sec) { return sec.getSize(); } |
| 34 | static bool isCodeSection(const Section &sec) { |
| 35 | return macho::isCodeSection(&sec); |
| 36 | } |
| 37 | static ArrayRef<Defined *> getSymbols(const Section &sec) { |
| 38 | return sec.symbols; |
| 39 | } |
| 40 | |
| 41 | // Linkage names can be prefixed with "_" or "l_" on Mach-O. See |
| 42 | // Mangler::getNameWithPrefix() for details. |
| 43 | std::optional<StringRef> static getResolvedLinkageName(llvm::StringRef name) { |
| 44 | if (name.consume_front(Prefix: "_" ) || name.consume_front(Prefix: "l_" )) |
| 45 | return name; |
| 46 | return {}; |
| 47 | } |
| 48 | |
| 49 | static void |
| 50 | getSectionHashes(const Section &sec, llvm::SmallVectorImpl<uint64_t> &hashes, |
| 51 | const llvm::DenseMap<const void *, uint64_t> §ionToIdx) { |
| 52 | constexpr unsigned windowSize = 4; |
| 53 | |
| 54 | // Calculate content hashes: k-mers and the last k-1 bytes. |
| 55 | ArrayRef<uint8_t> data = sec.data; |
| 56 | if (data.size() >= windowSize) |
| 57 | for (size_t i = 0; i <= data.size() - windowSize; ++i) |
| 58 | hashes.push_back(Elt: llvm::support::endian::read32le(P: data.data() + i)); |
| 59 | for (uint8_t byte : data.take_back(N: windowSize - 1)) |
| 60 | hashes.push_back(Elt: byte); |
| 61 | |
| 62 | // Calculate relocation hashes |
| 63 | for (const auto &r : sec.relocs) { |
| 64 | if (r.length == 0 || r.referent.isNull() || r.offset >= data.size()) |
| 65 | continue; |
| 66 | |
| 67 | uint64_t relocHash = getRelocHash(reloc: r, sectionToIdx); |
| 68 | uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1; |
| 69 | for (uint32_t i = start; i < r.offset + r.length; i++) { |
| 70 | auto window = data.drop_front(N: i).take_front(N: windowSize); |
| 71 | hashes.push_back(Elt: xxh3_64bits(data: window) ^ relocHash); |
| 72 | } |
| 73 | } |
| 74 | |
| 75 | llvm::sort(C&: hashes); |
| 76 | hashes.erase(CS: llvm::unique(R&: hashes), CE: hashes.end()); |
| 77 | } |
| 78 | |
| 79 | static llvm::StringRef getSymName(const Defined &sym) { |
| 80 | return sym.getName(); |
| 81 | } |
| 82 | static uint64_t getSymValue(const Defined &sym) { return sym.value; } |
| 83 | static uint64_t getSymSize(const Defined &sym) { return sym.size; } |
| 84 | |
| 85 | private: |
| 86 | static uint64_t |
| 87 | getRelocHash(const macho::Reloc &reloc, |
| 88 | const llvm::DenseMap<const void *, uint64_t> §ionToIdx) { |
| 89 | auto *isec = reloc.getReferentInputSection(); |
| 90 | std::optional<uint64_t> sectionIdx; |
| 91 | if (auto it = sectionToIdx.find(Val: isec); it != sectionToIdx.end()) |
| 92 | sectionIdx = it->second; |
| 93 | uint64_t kind = -1, value = 0; |
| 94 | if (isec) |
| 95 | kind = uint64_t(isec->kind()); |
| 96 | |
| 97 | if (auto *sym = reloc.referent.dyn_cast<Symbol *>()) { |
| 98 | kind = (kind << 8) | uint8_t(sym->kind()); |
| 99 | if (auto *d = llvm::dyn_cast<Defined>(Val: sym)) |
| 100 | value = d->value; |
| 101 | } |
| 102 | return llvm::stable_hash_combine(A: kind, B: sectionIdx.value_or(u: 0), C: value, |
| 103 | D: reloc.addend); |
| 104 | } |
| 105 | }; |
| 106 | } // namespace |
| 107 | |
| 108 | DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning( |
| 109 | StringRef profilePath, bool forFunctionCompression, bool forDataCompression, |
| 110 | bool compressionSortStartupFunctions, bool verbose) { |
| 111 | // Collect candidate sections and associated symbols. |
| 112 | SmallVector<InputSection *> sections; |
| 113 | DenseMap<CachedHashStringRef, std::set<unsigned>> rootSymbolToSectionIdxs; |
| 114 | for (const auto *file : inputFiles) { |
| 115 | for (auto *sec : file->sections) { |
| 116 | for (auto &subsec : sec->subsections) { |
| 117 | auto *isec = subsec.isec; |
| 118 | if (!isec || isec->data.empty() || !isec->data.data()) |
| 119 | continue; |
| 120 | // ConcatInputSections are entirely live or dead, so the offset is |
| 121 | // irrelevant. |
| 122 | if (isa<ConcatInputSection>(Val: isec) && !isec->isLive(off: 0)) |
| 123 | continue; |
| 124 | size_t idx = sections.size(); |
| 125 | sections.emplace_back(Args&: isec); |
| 126 | for (auto *sym : BPOrdererMachO::getSymbols(sec: *isec)) { |
| 127 | auto rootName = lld::utils::getRootSymbol(Name: sym->getName()); |
| 128 | rootSymbolToSectionIdxs[CachedHashStringRef(rootName)].insert(x: idx); |
| 129 | if (auto linkageName = |
| 130 | BPOrdererMachO::getResolvedLinkageName(name: rootName)) |
| 131 | rootSymbolToSectionIdxs[CachedHashStringRef(*linkageName)].insert( |
| 132 | x: idx); |
| 133 | } |
| 134 | } |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | return BPOrdererMachO().computeOrder(profilePath, forFunctionCompression, |
| 139 | forDataCompression, |
| 140 | compressionSortStartupFunctions, verbose, |
| 141 | sections, rootSymbolToSectionIdxs); |
| 142 | } |
| 143 | |