1//===- BPSectionOrderer.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "BPSectionOrderer.h"
10#include "InputSection.h"
11#include "Relocations.h"
12#include "Symbols.h"
13#include "lld/Common/BPSectionOrdererBase.inc"
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/StableHashing.h"
16#include "llvm/Support/Endian.h"
17#include "llvm/Support/xxhash.h"
18
19#define DEBUG_TYPE "bp-section-orderer"
20
21using namespace llvm;
22using namespace lld::macho;
23
24namespace {
25struct BPOrdererMachO;
26}
27template <> struct lld::BPOrdererTraits<struct BPOrdererMachO> {
28 using Section = macho::InputSection;
29 using Defined = macho::Defined;
30};
31namespace {
32struct BPOrdererMachO : lld::BPOrderer<BPOrdererMachO> {
33 static uint64_t getSize(const Section &sec) { return sec.getSize(); }
34 static bool isCodeSection(const Section &sec) {
35 return macho::isCodeSection(&sec);
36 }
37 static std::string getSectionName(const Section &sec) {
38 return (sec.getSegName() + sec.getName()).str();
39 }
40 // TODO: Use N_COLD_FUNC to separate cold code into a different subgroup.
41 static std::string getCompressionSubgroupKey(const Section &sec) {
42 return "";
43 }
44 static ArrayRef<Defined *> getSymbols(const Section &sec) {
45 return sec.symbols;
46 }
47
48 // Linkage names can be prefixed with "_" or "l_" on Mach-O. See
49 // Mangler::getNameWithPrefix() for details.
50 std::optional<StringRef> static getResolvedLinkageName(llvm::StringRef name) {
51 if (name.consume_front(Prefix: "_") || name.consume_front(Prefix: "l_"))
52 return name;
53 return {};
54 }
55
56 static void
57 getSectionHashes(const Section &sec, llvm::SmallVectorImpl<uint64_t> &hashes,
58 const llvm::DenseMap<const void *, uint64_t> &sectionToIdx) {
59 constexpr unsigned windowSize = 4;
60
61 // Calculate content hashes: k-mers and the last k-1 bytes.
62 ArrayRef<uint8_t> data = sec.data;
63 if (data.size() >= windowSize)
64 for (size_t i = 0; i <= data.size() - windowSize; ++i)
65 hashes.push_back(Elt: llvm::support::endian::read32le(P: data.data() + i));
66 for (uint8_t byte : data.take_back(N: windowSize - 1))
67 hashes.push_back(Elt: byte);
68
69 // Calculate relocation hashes
70 for (const auto &r : sec.relocs) {
71 uint32_t relocLength = 1 << r.length;
72 if (r.referent.isNull() || r.offset + relocLength > data.size())
73 continue;
74
75 uint64_t relocHash = getRelocHash(reloc: r, sectionToIdx);
76 uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
77 for (uint32_t i = start; i < r.offset + relocLength; i++) {
78 auto window = data.drop_front(N: i).take_front(N: windowSize);
79 hashes.push_back(Elt: xxh3_64bits(data: window) ^ relocHash);
80 }
81 }
82
83 llvm::sort(C&: hashes);
84 hashes.erase(CS: llvm::unique(R&: hashes), CE: hashes.end());
85 }
86
87 static llvm::StringRef getSymName(const Defined &sym) {
88 return sym.getName();
89 }
90 static uint64_t getSymValue(const Defined &sym) { return sym.value; }
91 static uint64_t getSymSize(const Defined &sym) { return sym.size; }
92
93private:
94 static uint64_t
95 getRelocHash(const Relocation &reloc,
96 const llvm::DenseMap<const void *, uint64_t> &sectionToIdx) {
97 auto *isec = reloc.getReferentInputSection();
98 std::optional<uint64_t> sectionIdx;
99 if (auto it = sectionToIdx.find(Val: isec); it != sectionToIdx.end())
100 sectionIdx = it->second;
101 uint64_t kind = -1, value = 0;
102 if (isec)
103 kind = uint64_t(isec->kind());
104
105 if (auto *sym = reloc.referent.dyn_cast<Symbol *>()) {
106 kind = (kind << 8) | uint8_t(sym->kind());
107 if (auto *d = llvm::dyn_cast<Defined>(Val: sym))
108 value = d->value;
109 }
110 return llvm::stable_hash_combine(A: kind, B: sectionIdx.value_or(u: 0), C: value,
111 D: reloc.addend);
112 }
113};
114} // namespace
115
116DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
117 StringRef profilePath, ArrayRef<BPCompressionSortSpec> compressionSortSpecs,
118 bool forFunctionCompression, bool forDataCompression,
119 bool compressionSortStartupFunctions, bool verbose) {
120 // Collect candidate sections and associated symbols.
121 SmallVector<InputSection *> sections;
122 DenseMap<CachedHashStringRef, std::set<unsigned>> rootSymbolToSectionIdxs;
123 for (const auto *file : inputFiles) {
124 for (auto *sec : file->sections) {
125 for (auto &subsec : sec->subsections) {
126 auto *isec = subsec.isec;
127 if (!isec || isec->data.empty() || !isec->data.data())
128 continue;
129 // CString section order is handled by
130 // {Deduplicated}CStringSection::finalizeContents()
131 if (isa<CStringInputSection>(Val: isec) || isec->isFinal)
132 continue;
133 // ConcatInputSections are entirely live or dead, so the offset is
134 // irrelevant.
135 if (isa<ConcatInputSection>(Val: isec) && !isec->isLive(off: 0))
136 continue;
137 size_t idx = sections.size();
138 sections.emplace_back(Args&: isec);
139 for (auto *sym : BPOrdererMachO::getSymbols(sec: *isec)) {
140 auto rootName = lld::utils::getRootSymbol(Name: sym->getName());
141 rootSymbolToSectionIdxs[CachedHashStringRef(rootName)].insert(x: idx);
142 if (auto linkageName =
143 BPOrdererMachO::getResolvedLinkageName(name: rootName))
144 rootSymbolToSectionIdxs[CachedHashStringRef(*linkageName)].insert(
145 x: idx);
146 }
147 }
148 }
149 }
150
151 return BPOrdererMachO().computeOrder(
152 profilePath, compressionSortSpecs, forFunctionCompression,
153 forDataCompression, compressionSortStartupFunctions, verbose, sections,
154 rootSymbolToSectionIdxs);
155}
156