| 1 | //===- SectionPriorities.cpp ----------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details |
| 10 | /// about the algorithm. |
| 11 | /// |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "SectionPriorities.h" |
| 15 | #include "BPSectionOrderer.h" |
| 16 | #include "Config.h" |
| 17 | #include "InputFiles.h" |
| 18 | #include "Symbols.h" |
| 19 | #include "Target.h" |
| 20 | |
| 21 | #include "lld/Common/Args.h" |
| 22 | #include "lld/Common/CommonLinkerContext.h" |
| 23 | #include "lld/Common/ErrorHandler.h" |
| 24 | #include "lld/Common/Utils.h" |
| 25 | #include "llvm/ADT/DenseMap.h" |
| 26 | #include "llvm/ADT/MapVector.h" |
| 27 | #include "llvm/Support/Path.h" |
| 28 | #include "llvm/Support/TimeProfiler.h" |
| 29 | #include "llvm/Support/raw_ostream.h" |
| 30 | #include "llvm/Support/xxhash.h" |
| 31 | |
| 32 | #include <numeric> |
| 33 | |
| 34 | using namespace llvm; |
| 35 | using namespace llvm::MachO; |
| 36 | using namespace llvm::sys; |
| 37 | using namespace lld; |
| 38 | using namespace lld::macho; |
| 39 | |
| 40 | PriorityBuilder macho::priorityBuilder; |
| 41 | |
| 42 | namespace { |
| 43 | struct Edge { |
| 44 | int from; |
| 45 | uint64_t weight; |
| 46 | }; |
| 47 | |
| 48 | struct Cluster { |
| 49 | Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {} |
| 50 | |
| 51 | double getDensity() const { |
| 52 | if (size == 0) |
| 53 | return 0; |
| 54 | return double(weight) / double(size); |
| 55 | } |
| 56 | |
| 57 | int next; |
| 58 | int prev; |
| 59 | uint64_t size; |
| 60 | uint64_t weight = 0; |
| 61 | uint64_t initialWeight = 0; |
| 62 | Edge bestPred = {.from: -1, .weight: 0}; |
| 63 | }; |
| 64 | |
| 65 | class CallGraphSort { |
| 66 | public: |
| 67 | CallGraphSort(const MapVector<SectionPair, uint64_t> &profile); |
| 68 | |
| 69 | DenseMap<const InputSection *, int> run(); |
| 70 | |
| 71 | private: |
| 72 | std::vector<Cluster> clusters; |
| 73 | std::vector<const InputSection *> sections; |
| 74 | }; |
| 75 | // Maximum amount the combined cluster density can be worse than the original |
| 76 | // cluster to consider merging. |
| 77 | constexpr int MAX_DENSITY_DEGRADATION = 8; |
| 78 | } // end anonymous namespace |
| 79 | |
| 80 | // Take the edge list in callGraphProfile, resolve symbol names to Symbols, and |
| 81 | // generate a graph between InputSections with the provided weights. |
| 82 | CallGraphSort::CallGraphSort(const MapVector<SectionPair, uint64_t> &profile) { |
| 83 | DenseMap<const InputSection *, int> secToCluster; |
| 84 | |
| 85 | auto getOrCreateCluster = [&](const InputSection *isec) -> int { |
| 86 | auto res = secToCluster.try_emplace(Key: isec, Args: clusters.size()); |
| 87 | if (res.second) { |
| 88 | sections.push_back(x: isec); |
| 89 | clusters.emplace_back(args: clusters.size(), args: isec->getSize()); |
| 90 | } |
| 91 | return res.first->second; |
| 92 | }; |
| 93 | |
| 94 | // Create the graph |
| 95 | for (const std::pair<SectionPair, uint64_t> &c : profile) { |
| 96 | const auto fromSec = c.first.first->canonical(); |
| 97 | const auto toSec = c.first.second->canonical(); |
| 98 | uint64_t weight = c.second; |
| 99 | // Ignore edges between input sections belonging to different output |
| 100 | // sections. This is done because otherwise we would end up with clusters |
| 101 | // containing input sections that can't actually be placed adjacently in the |
| 102 | // output. This messes with the cluster size and density calculations. We |
| 103 | // would also end up moving input sections in other output sections without |
| 104 | // moving them closer to what calls them. |
| 105 | if (fromSec->parent != toSec->parent) |
| 106 | continue; |
| 107 | |
| 108 | int from = getOrCreateCluster(fromSec); |
| 109 | int to = getOrCreateCluster(toSec); |
| 110 | |
| 111 | clusters[to].weight += weight; |
| 112 | |
| 113 | if (from == to) |
| 114 | continue; |
| 115 | |
| 116 | // Remember the best edge. |
| 117 | Cluster &toC = clusters[to]; |
| 118 | if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) { |
| 119 | toC.bestPred.from = from; |
| 120 | toC.bestPred.weight = weight; |
| 121 | } |
| 122 | } |
| 123 | for (Cluster &c : clusters) |
| 124 | c.initialWeight = c.weight; |
| 125 | } |
| 126 | |
| 127 | // It's bad to merge clusters which would degrade the density too much. |
| 128 | static bool isNewDensityBad(Cluster &a, Cluster &b) { |
| 129 | double newDensity = double(a.weight + b.weight) / double(a.size + b.size); |
| 130 | return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION; |
| 131 | } |
| 132 | |
| 133 | // Find the leader of V's belonged cluster (represented as an equivalence |
| 134 | // class). We apply union-find path-halving technique (simple to implement) in |
| 135 | // the meantime as it decreases depths and the time complexity. |
| 136 | static int getLeader(std::vector<int> &leaders, int v) { |
| 137 | while (leaders[v] != v) { |
| 138 | leaders[v] = leaders[leaders[v]]; |
| 139 | v = leaders[v]; |
| 140 | } |
| 141 | return v; |
| 142 | } |
| 143 | |
| 144 | static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx, |
| 145 | Cluster &from, int fromIdx) { |
| 146 | int tail1 = into.prev, tail2 = from.prev; |
| 147 | into.prev = tail2; |
| 148 | cs[tail2].next = intoIdx; |
| 149 | from.prev = tail1; |
| 150 | cs[tail1].next = fromIdx; |
| 151 | into.size += from.size; |
| 152 | into.weight += from.weight; |
| 153 | from.size = 0; |
| 154 | from.weight = 0; |
| 155 | } |
| 156 | |
| 157 | // Group InputSections into clusters using the Call-Chain Clustering heuristic |
| 158 | // then sort the clusters by density. |
| 159 | DenseMap<const InputSection *, int> CallGraphSort::run() { |
| 160 | const uint64_t maxClusterSize = target->getPageSize(); |
| 161 | |
| 162 | // Cluster indices sorted by density. |
| 163 | std::vector<int> sorted(clusters.size()); |
| 164 | // For union-find. |
| 165 | std::vector<int> leaders(clusters.size()); |
| 166 | |
| 167 | std::iota(first: leaders.begin(), last: leaders.end(), value: 0); |
| 168 | std::iota(first: sorted.begin(), last: sorted.end(), value: 0); |
| 169 | |
| 170 | llvm::stable_sort(Range&: sorted, C: [&](int a, int b) { |
| 171 | return clusters[a].getDensity() > clusters[b].getDensity(); |
| 172 | }); |
| 173 | |
| 174 | for (int l : sorted) { |
| 175 | // The cluster index is the same as the index of its leader here because |
| 176 | // clusters[L] has not been merged into another cluster yet. |
| 177 | Cluster &c = clusters[l]; |
| 178 | |
| 179 | // Don't consider merging if the edge is unlikely. |
| 180 | if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight) |
| 181 | continue; |
| 182 | |
| 183 | int predL = getLeader(leaders, v: c.bestPred.from); |
| 184 | // Already in the same cluster. |
| 185 | if (l == predL) |
| 186 | continue; |
| 187 | |
| 188 | Cluster *predC = &clusters[predL]; |
| 189 | if (c.size + predC->size > maxClusterSize) |
| 190 | continue; |
| 191 | |
| 192 | if (isNewDensityBad(a&: *predC, b&: c)) |
| 193 | continue; |
| 194 | |
| 195 | leaders[l] = predL; |
| 196 | mergeClusters(cs&: clusters, into&: *predC, intoIdx: predL, from&: c, fromIdx: l); |
| 197 | } |
| 198 | // Sort remaining non-empty clusters by density. |
| 199 | sorted.clear(); |
| 200 | for (int i = 0, e = (int)clusters.size(); i != e; ++i) |
| 201 | if (clusters[i].size > 0) |
| 202 | sorted.push_back(x: i); |
| 203 | llvm::stable_sort(Range&: sorted, C: [&](int a, int b) { |
| 204 | return clusters[a].getDensity() > clusters[b].getDensity(); |
| 205 | }); |
| 206 | |
| 207 | DenseMap<const InputSection *, int> orderMap; |
| 208 | |
| 209 | // Sections will be sorted by decreasing order. Absent sections will have |
| 210 | // priority 0 and be placed at the end of sections. |
| 211 | int curOrder = -clusters.size(); |
| 212 | for (int leader : sorted) { |
| 213 | for (int i = leader;;) { |
| 214 | orderMap[sections[i]] = curOrder++; |
| 215 | i = clusters[i].next; |
| 216 | if (i == leader) |
| 217 | break; |
| 218 | } |
| 219 | } |
| 220 | if (!config->printSymbolOrder.empty()) { |
| 221 | std::error_code ec; |
| 222 | raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None); |
| 223 | if (ec) { |
| 224 | error(msg: "cannot open " + config->printSymbolOrder + ": " + ec.message()); |
| 225 | return orderMap; |
| 226 | } |
| 227 | // Print the symbols ordered by C3, in the order of decreasing curOrder |
| 228 | // Instead of sorting all the orderMap, just repeat the loops above. |
| 229 | for (int leader : sorted) |
| 230 | for (int i = leader;;) { |
| 231 | const InputSection *isec = sections[i]; |
| 232 | // Search all the symbols in the file of the section |
| 233 | // and find out a Defined symbol with name that is within the |
| 234 | // section. |
| 235 | for (Symbol *sym : isec->getFile()->symbols) { |
| 236 | if (auto *d = dyn_cast_or_null<Defined>(Val: sym)) { |
| 237 | if (d->isec() == isec) |
| 238 | os << sym->getName() << "\n" ; |
| 239 | } |
| 240 | } |
| 241 | i = clusters[i].next; |
| 242 | if (i == leader) |
| 243 | break; |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | return orderMap; |
| 248 | } |
| 249 | |
| 250 | void macho::PriorityBuilder::SymbolPriorityEntry::setPriority( |
| 251 | int priority, StringRef objectFile) { |
| 252 | if (!objectFile.empty()) |
| 253 | objectFiles.try_emplace(Key: objectFile, Args&: priority); |
| 254 | else |
| 255 | anyObjectFile = std::min(a: anyObjectFile, b: priority); |
| 256 | } |
| 257 | |
| 258 | int macho::PriorityBuilder::SymbolPriorityEntry::getPriority( |
| 259 | const InputFile *f) const { |
| 260 | if (!f) |
| 261 | return anyObjectFile; |
| 262 | // We don't use toString(InputFile *) here because it returns the full path |
| 263 | // for object files, and we only want the basename. |
| 264 | StringRef basename = path::filename(path: f->getName()); |
| 265 | StringRef filename = |
| 266 | f->archiveName.empty() |
| 267 | ? basename |
| 268 | : saver().save(S: path::filename(path: f->archiveName) + "(" + basename + ")" ); |
| 269 | return std::min(a: objectFiles.lookup(Val: filename), b: anyObjectFile); |
| 270 | } |
| 271 | |
| 272 | std::optional<int> |
| 273 | macho::PriorityBuilder::getCStringPriority(uint32_t hash, |
| 274 | const InputFile *f) const { |
| 275 | auto it = cStringPriorities.find(Val: hash); |
| 276 | if (it == cStringPriorities.end()) |
| 277 | return std::nullopt; |
| 278 | return it->second.getPriority(f); |
| 279 | } |
| 280 | |
| 281 | std::optional<int> |
| 282 | macho::PriorityBuilder::getSymbolPriority(const Defined *sym) const { |
| 283 | if (sym->isAbsolute()) |
| 284 | return std::nullopt; |
| 285 | auto it = priorities.find(Val: utils::getRootSymbol(Name: sym->getName())); |
| 286 | if (it == priorities.end()) |
| 287 | return std::nullopt; |
| 288 | return it->second.getPriority(f: sym->isec()->getFile()); |
| 289 | } |
| 290 | |
| 291 | void macho::PriorityBuilder::() { |
| 292 | TimeTraceScope timeScope("Extract call graph profile" ); |
| 293 | bool hasOrderFile = !priorities.empty(); |
| 294 | for (const InputFile *file : inputFiles) { |
| 295 | auto *obj = dyn_cast_or_null<ObjFile>(Val: file); |
| 296 | if (!obj) |
| 297 | continue; |
| 298 | for (const CallGraphEntry &entry : obj->callGraph) { |
| 299 | assert(entry.fromIndex < obj->symbols.size() && |
| 300 | entry.toIndex < obj->symbols.size()); |
| 301 | auto *fromSym = dyn_cast_or_null<Defined>(Val: obj->symbols[entry.fromIndex]); |
| 302 | auto *toSym = dyn_cast_or_null<Defined>(Val: obj->symbols[entry.toIndex]); |
| 303 | if (fromSym && toSym && |
| 304 | (!hasOrderFile || |
| 305 | (!getSymbolPriority(sym: fromSym) && !getSymbolPriority(sym: toSym)))) |
| 306 | callGraphProfile[{fromSym->isec(), toSym->isec()}] += entry.count; |
| 307 | } |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | void macho::PriorityBuilder::parseOrderFile(StringRef path) { |
| 312 | assert(callGraphProfile.empty() && |
| 313 | "Order file must be parsed before call graph profile is processed" ); |
| 314 | std::optional<MemoryBufferRef> buffer = readFile(path); |
| 315 | if (!buffer) { |
| 316 | error(msg: "Could not read order file at " + path); |
| 317 | return; |
| 318 | } |
| 319 | |
| 320 | int prio = std::numeric_limits<int>::min(); |
| 321 | MemoryBufferRef mbref = *buffer; |
| 322 | for (StringRef line : args::getLines(mb: mbref)) { |
| 323 | StringRef objectFile; |
| 324 | line = line.take_until(F: [](char c) { return c == '#'; }); // ignore comments |
| 325 | line = line.ltrim(); |
| 326 | |
| 327 | CPUType cpuType = StringSwitch<CPUType>(line) |
| 328 | .StartsWith(S: "i386:" , Value: CPU_TYPE_I386) |
| 329 | .StartsWith(S: "x86_64:" , Value: CPU_TYPE_X86_64) |
| 330 | .StartsWith(S: "arm:" , Value: CPU_TYPE_ARM) |
| 331 | .StartsWith(S: "arm64:" , Value: CPU_TYPE_ARM64) |
| 332 | .StartsWith(S: "ppc:" , Value: CPU_TYPE_POWERPC) |
| 333 | .StartsWith(S: "ppc64:" , Value: CPU_TYPE_POWERPC64) |
| 334 | .Default(Value: CPU_TYPE_ANY); |
| 335 | |
| 336 | if (cpuType != CPU_TYPE_ANY && cpuType != target->cpuType) |
| 337 | continue; |
| 338 | // Drop the CPU type as well as the colon |
| 339 | if (cpuType != CPU_TYPE_ANY) |
| 340 | line = line.drop_until(F: [](char c) { return c == ':'; }).drop_front(); |
| 341 | |
| 342 | constexpr std::array<StringRef, 2> fileEnds = {".o:" , ".o):" }; |
| 343 | for (StringRef fileEnd : fileEnds) { |
| 344 | size_t pos = line.find(Str: fileEnd); |
| 345 | if (pos != StringRef::npos) { |
| 346 | // Split the string around the colon |
| 347 | objectFile = line.take_front(N: pos + fileEnd.size() - 1); |
| 348 | line = line.drop_front(N: pos + fileEnd.size()); |
| 349 | break; |
| 350 | } |
| 351 | } |
| 352 | |
| 353 | // The rest of the line is either <symbol name> or |
| 354 | // cStringEntryPrefix<cstring hash> |
| 355 | line = line.trim(); |
| 356 | if (line.consume_front(Prefix: cStringEntryPrefix)) { |
| 357 | uint32_t hash = 0; |
| 358 | // Only accept hex (0x prefix) or decimal format |
| 359 | if (line.consume_front_insensitive(Prefix: "0x" ) ? !line.getAsInteger(Radix: 16, Result&: hash) |
| 360 | : !line.getAsInteger(Radix: 10, Result&: hash)) |
| 361 | cStringPriorities[hash].setPriority(priority: prio, objectFile); |
| 362 | } else { |
| 363 | StringRef symbol = utils::getRootSymbol(Name: line); |
| 364 | if (!symbol.empty()) |
| 365 | priorities[symbol].setPriority(priority: prio, objectFile); |
| 366 | } |
| 367 | |
| 368 | ++prio; |
| 369 | } |
| 370 | } |
| 371 | |
| 372 | DenseMap<const InputSection *, int> |
| 373 | macho::PriorityBuilder::buildInputSectionPriorities() { |
| 374 | DenseMap<const InputSection *, int> sectionPriorities; |
| 375 | if (config->bpStartupFunctionSort || config->bpFunctionOrderForCompression || |
| 376 | config->bpDataOrderForCompression) { |
| 377 | TimeTraceScope timeScope("Balanced Partitioning Section Orderer" ); |
| 378 | sectionPriorities = runBalancedPartitioning( |
| 379 | profilePath: config->bpStartupFunctionSort ? config->irpgoProfilePath : "" , |
| 380 | forFunctionCompression: config->bpFunctionOrderForCompression, |
| 381 | forDataCompression: config->bpDataOrderForCompression, |
| 382 | compressionSortStartupFunctions: config->bpCompressionSortStartupFunctions, |
| 383 | verbose: config->bpVerboseSectionOrderer); |
| 384 | } else if (config->callGraphProfileSort) { |
| 385 | // Sort sections by the profile data provided by __LLVM,__cg_profile |
| 386 | // sections. |
| 387 | // |
| 388 | // This first builds a call graph based on the profile data then merges |
| 389 | // sections according to the C³ heuristic. All clusters are then sorted by a |
| 390 | // density metric to further improve locality. |
| 391 | TimeTraceScope timeScope("Call graph profile sort" ); |
| 392 | sectionPriorities = CallGraphSort(callGraphProfile).run(); |
| 393 | } |
| 394 | |
| 395 | if (priorities.empty()) |
| 396 | return sectionPriorities; |
| 397 | |
| 398 | auto addSym = [&](const Defined *sym) { |
| 399 | std::optional<int> symbolPriority = getSymbolPriority(sym); |
| 400 | if (!symbolPriority) |
| 401 | return; |
| 402 | int &priority = sectionPriorities[sym->isec()]; |
| 403 | priority = std::min(a: priority, b: *symbolPriority); |
| 404 | }; |
| 405 | |
| 406 | // TODO: Make sure this handles weak symbols correctly. |
| 407 | for (const InputFile *file : inputFiles) { |
| 408 | if (isa<ObjFile>(Val: file)) |
| 409 | for (Symbol *sym : file->symbols) |
| 410 | if (auto *d = dyn_cast_or_null<Defined>(Val: sym)) |
| 411 | addSym(d); |
| 412 | } |
| 413 | |
| 414 | return sectionPriorities; |
| 415 | } |
| 416 | |
| 417 | void macho::PriorityBuilder::forEachStringPiece( |
| 418 | ArrayRef<CStringInputSection *> inputs, |
| 419 | std::function<void(CStringInputSection &, StringPiece &, size_t)> f, |
| 420 | bool forceInputOrder, bool computeHash) const { |
| 421 | std::vector<std::tuple<int, CStringInputSection *, size_t>> orderedPieces; |
| 422 | std::vector<std::pair<CStringInputSection *, size_t>> unorderedPieces; |
| 423 | for (CStringInputSection *isec : inputs) { |
| 424 | for (const auto &[stringPieceIdx, piece] : llvm::enumerate(First&: isec->pieces)) { |
| 425 | if (!piece.live) |
| 426 | continue; |
| 427 | // Process pieces in input order if we have no cstrings in our orderfile |
| 428 | if (forceInputOrder || cStringPriorities.empty()) { |
| 429 | f(*isec, piece, stringPieceIdx); |
| 430 | continue; |
| 431 | } |
| 432 | uint32_t hash = |
| 433 | computeHash |
| 434 | ? (xxh3_64bits(data: isec->getStringRef(i: stringPieceIdx)) & 0x7fffffff) |
| 435 | : piece.hash; |
| 436 | if (auto priority = getCStringPriority(hash, f: isec->getFile())) |
| 437 | orderedPieces.emplace_back(args&: *priority, args&: isec, args&: stringPieceIdx); |
| 438 | else |
| 439 | unorderedPieces.emplace_back(args&: isec, args&: stringPieceIdx); |
| 440 | } |
| 441 | } |
| 442 | if (orderedPieces.empty() && unorderedPieces.empty()) |
| 443 | return; |
| 444 | llvm::stable_sort(Range&: orderedPieces, C: [](const auto &left, const auto &right) { |
| 445 | return std::get<0>(left) < std::get<0>(right); |
| 446 | }); |
| 447 | for (auto &[priority, isec, pieceIdx] : orderedPieces) |
| 448 | f(*isec, isec->pieces[pieceIdx], pieceIdx); |
| 449 | // TODO: Add option to order the remaining cstrings for compression |
| 450 | for (auto &[isec, pieceIdx] : unorderedPieces) |
| 451 | f(*isec, isec->pieces[pieceIdx], pieceIdx); |
| 452 | } |
| 453 | |