| 1 | //===- SectionPriorities.cpp ----------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details |
| 10 | /// about the algorithm. |
| 11 | /// |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "SectionPriorities.h" |
| 15 | #include "BPSectionOrderer.h" |
| 16 | #include "Config.h" |
| 17 | #include "InputFiles.h" |
| 18 | #include "Symbols.h" |
| 19 | #include "Target.h" |
| 20 | |
| 21 | #include "lld/Common/Args.h" |
| 22 | #include "lld/Common/CommonLinkerContext.h" |
| 23 | #include "lld/Common/ErrorHandler.h" |
| 24 | #include "lld/Common/Utils.h" |
| 25 | #include "llvm/ADT/DenseMap.h" |
| 26 | #include "llvm/ADT/MapVector.h" |
| 27 | #include "llvm/Support/Path.h" |
| 28 | #include "llvm/Support/TimeProfiler.h" |
| 29 | #include "llvm/Support/raw_ostream.h" |
| 30 | |
| 31 | #include <numeric> |
| 32 | |
| 33 | using namespace llvm; |
| 34 | using namespace llvm::MachO; |
| 35 | using namespace llvm::sys; |
| 36 | using namespace lld; |
| 37 | using namespace lld::macho; |
| 38 | |
| 39 | PriorityBuilder macho::priorityBuilder; |
| 40 | |
| 41 | namespace { |
| 42 | struct Edge { |
| 43 | int from; |
| 44 | uint64_t weight; |
| 45 | }; |
| 46 | |
| 47 | struct Cluster { |
| 48 | Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {} |
| 49 | |
| 50 | double getDensity() const { |
| 51 | if (size == 0) |
| 52 | return 0; |
| 53 | return double(weight) / double(size); |
| 54 | } |
| 55 | |
| 56 | int next; |
| 57 | int prev; |
| 58 | uint64_t size; |
| 59 | uint64_t weight = 0; |
| 60 | uint64_t initialWeight = 0; |
| 61 | Edge bestPred = {.from: -1, .weight: 0}; |
| 62 | }; |
| 63 | |
| 64 | class CallGraphSort { |
| 65 | public: |
| 66 | CallGraphSort(const MapVector<SectionPair, uint64_t> &profile); |
| 67 | |
| 68 | DenseMap<const InputSection *, int> run(); |
| 69 | |
| 70 | private: |
| 71 | std::vector<Cluster> clusters; |
| 72 | std::vector<const InputSection *> sections; |
| 73 | }; |
| 74 | // Maximum amount the combined cluster density can be worse than the original |
| 75 | // cluster to consider merging. |
| 76 | constexpr int MAX_DENSITY_DEGRADATION = 8; |
| 77 | } // end anonymous namespace |
| 78 | |
| 79 | // Take the edge list in callGraphProfile, resolve symbol names to Symbols, and |
| 80 | // generate a graph between InputSections with the provided weights. |
| 81 | CallGraphSort::CallGraphSort(const MapVector<SectionPair, uint64_t> &profile) { |
| 82 | DenseMap<const InputSection *, int> secToCluster; |
| 83 | |
| 84 | auto getOrCreateCluster = [&](const InputSection *isec) -> int { |
| 85 | auto res = secToCluster.try_emplace(Key: isec, Args: clusters.size()); |
| 86 | if (res.second) { |
| 87 | sections.push_back(x: isec); |
| 88 | clusters.emplace_back(args: clusters.size(), args: isec->getSize()); |
| 89 | } |
| 90 | return res.first->second; |
| 91 | }; |
| 92 | |
| 93 | // Create the graph |
| 94 | for (const std::pair<SectionPair, uint64_t> &c : profile) { |
| 95 | const auto fromSec = c.first.first->canonical(); |
| 96 | const auto toSec = c.first.second->canonical(); |
| 97 | uint64_t weight = c.second; |
| 98 | // Ignore edges between input sections belonging to different output |
| 99 | // sections. This is done because otherwise we would end up with clusters |
| 100 | // containing input sections that can't actually be placed adjacently in the |
| 101 | // output. This messes with the cluster size and density calculations. We |
| 102 | // would also end up moving input sections in other output sections without |
| 103 | // moving them closer to what calls them. |
| 104 | if (fromSec->parent != toSec->parent) |
| 105 | continue; |
| 106 | |
| 107 | int from = getOrCreateCluster(fromSec); |
| 108 | int to = getOrCreateCluster(toSec); |
| 109 | |
| 110 | clusters[to].weight += weight; |
| 111 | |
| 112 | if (from == to) |
| 113 | continue; |
| 114 | |
| 115 | // Remember the best edge. |
| 116 | Cluster &toC = clusters[to]; |
| 117 | if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) { |
| 118 | toC.bestPred.from = from; |
| 119 | toC.bestPred.weight = weight; |
| 120 | } |
| 121 | } |
| 122 | for (Cluster &c : clusters) |
| 123 | c.initialWeight = c.weight; |
| 124 | } |
| 125 | |
| 126 | // It's bad to merge clusters which would degrade the density too much. |
| 127 | static bool isNewDensityBad(Cluster &a, Cluster &b) { |
| 128 | double newDensity = double(a.weight + b.weight) / double(a.size + b.size); |
| 129 | return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION; |
| 130 | } |
| 131 | |
| 132 | // Find the leader of V's belonged cluster (represented as an equivalence |
| 133 | // class). We apply union-find path-halving technique (simple to implement) in |
| 134 | // the meantime as it decreases depths and the time complexity. |
| 135 | static int getLeader(std::vector<int> &leaders, int v) { |
| 136 | while (leaders[v] != v) { |
| 137 | leaders[v] = leaders[leaders[v]]; |
| 138 | v = leaders[v]; |
| 139 | } |
| 140 | return v; |
| 141 | } |
| 142 | |
| 143 | static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx, |
| 144 | Cluster &from, int fromIdx) { |
| 145 | int tail1 = into.prev, tail2 = from.prev; |
| 146 | into.prev = tail2; |
| 147 | cs[tail2].next = intoIdx; |
| 148 | from.prev = tail1; |
| 149 | cs[tail1].next = fromIdx; |
| 150 | into.size += from.size; |
| 151 | into.weight += from.weight; |
| 152 | from.size = 0; |
| 153 | from.weight = 0; |
| 154 | } |
| 155 | |
| 156 | // Group InputSections into clusters using the Call-Chain Clustering heuristic |
| 157 | // then sort the clusters by density. |
| 158 | DenseMap<const InputSection *, int> CallGraphSort::run() { |
| 159 | const uint64_t maxClusterSize = target->getPageSize(); |
| 160 | |
| 161 | // Cluster indices sorted by density. |
| 162 | std::vector<int> sorted(clusters.size()); |
| 163 | // For union-find. |
| 164 | std::vector<int> leaders(clusters.size()); |
| 165 | |
| 166 | std::iota(first: leaders.begin(), last: leaders.end(), value: 0); |
| 167 | std::iota(first: sorted.begin(), last: sorted.end(), value: 0); |
| 168 | |
| 169 | llvm::stable_sort(Range&: sorted, C: [&](int a, int b) { |
| 170 | return clusters[a].getDensity() > clusters[b].getDensity(); |
| 171 | }); |
| 172 | |
| 173 | for (int l : sorted) { |
| 174 | // The cluster index is the same as the index of its leader here because |
| 175 | // clusters[L] has not been merged into another cluster yet. |
| 176 | Cluster &c = clusters[l]; |
| 177 | |
| 178 | // Don't consider merging if the edge is unlikely. |
| 179 | if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight) |
| 180 | continue; |
| 181 | |
| 182 | int predL = getLeader(leaders, v: c.bestPred.from); |
| 183 | // Already in the same cluster. |
| 184 | if (l == predL) |
| 185 | continue; |
| 186 | |
| 187 | Cluster *predC = &clusters[predL]; |
| 188 | if (c.size + predC->size > maxClusterSize) |
| 189 | continue; |
| 190 | |
| 191 | if (isNewDensityBad(a&: *predC, b&: c)) |
| 192 | continue; |
| 193 | |
| 194 | leaders[l] = predL; |
| 195 | mergeClusters(cs&: clusters, into&: *predC, intoIdx: predL, from&: c, fromIdx: l); |
| 196 | } |
| 197 | // Sort remaining non-empty clusters by density. |
| 198 | sorted.clear(); |
| 199 | for (int i = 0, e = (int)clusters.size(); i != e; ++i) |
| 200 | if (clusters[i].size > 0) |
| 201 | sorted.push_back(x: i); |
| 202 | llvm::stable_sort(Range&: sorted, C: [&](int a, int b) { |
| 203 | return clusters[a].getDensity() > clusters[b].getDensity(); |
| 204 | }); |
| 205 | |
| 206 | DenseMap<const InputSection *, int> orderMap; |
| 207 | |
| 208 | // Sections will be sorted by decreasing order. Absent sections will have |
| 209 | // priority 0 and be placed at the end of sections. |
| 210 | int curOrder = -clusters.size(); |
| 211 | for (int leader : sorted) { |
| 212 | for (int i = leader;;) { |
| 213 | orderMap[sections[i]] = curOrder++; |
| 214 | i = clusters[i].next; |
| 215 | if (i == leader) |
| 216 | break; |
| 217 | } |
| 218 | } |
| 219 | if (!config->printSymbolOrder.empty()) { |
| 220 | std::error_code ec; |
| 221 | raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None); |
| 222 | if (ec) { |
| 223 | error(msg: "cannot open " + config->printSymbolOrder + ": " + ec.message()); |
| 224 | return orderMap; |
| 225 | } |
| 226 | // Print the symbols ordered by C3, in the order of decreasing curOrder |
| 227 | // Instead of sorting all the orderMap, just repeat the loops above. |
| 228 | for (int leader : sorted) |
| 229 | for (int i = leader;;) { |
| 230 | const InputSection *isec = sections[i]; |
| 231 | // Search all the symbols in the file of the section |
| 232 | // and find out a Defined symbol with name that is within the |
| 233 | // section. |
| 234 | for (Symbol *sym : isec->getFile()->symbols) { |
| 235 | if (auto *d = dyn_cast_or_null<Defined>(Val: sym)) { |
| 236 | if (d->isec() == isec) |
| 237 | os << sym->getName() << "\n" ; |
| 238 | } |
| 239 | } |
| 240 | i = clusters[i].next; |
| 241 | if (i == leader) |
| 242 | break; |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | return orderMap; |
| 247 | } |
| 248 | |
| 249 | std::optional<int> |
| 250 | macho::PriorityBuilder::getSymbolOrCStringPriority(const StringRef key, |
| 251 | InputFile *f) { |
| 252 | |
| 253 | auto it = priorities.find(Val: key); |
| 254 | if (it == priorities.end()) |
| 255 | return std::nullopt; |
| 256 | const SymbolPriorityEntry &entry = it->second; |
| 257 | if (!f) |
| 258 | return entry.anyObjectFile; |
| 259 | // We don't use toString(InputFile *) here because it returns the full path |
| 260 | // for object files, and we only want the basename. |
| 261 | StringRef filename; |
| 262 | if (f->archiveName.empty()) |
| 263 | filename = path::filename(path: f->getName()); |
| 264 | else |
| 265 | filename = saver().save(S: path::filename(path: f->archiveName) + "(" + |
| 266 | path::filename(path: f->getName()) + ")" ); |
| 267 | return std::min(a: entry.objectFiles.lookup(Val: filename), b: entry.anyObjectFile); |
| 268 | } |
| 269 | |
| 270 | std::optional<int> |
| 271 | macho::PriorityBuilder::getSymbolPriority(const Defined *sym) { |
| 272 | if (sym->isAbsolute()) |
| 273 | return std::nullopt; |
| 274 | return getSymbolOrCStringPriority(key: utils::getRootSymbol(Name: sym->getName()), |
| 275 | f: sym->isec()->getFile()); |
| 276 | } |
| 277 | |
| 278 | void macho::PriorityBuilder::() { |
| 279 | TimeTraceScope timeScope("Extract call graph profile" ); |
| 280 | bool hasOrderFile = !priorities.empty(); |
| 281 | for (const InputFile *file : inputFiles) { |
| 282 | auto *obj = dyn_cast_or_null<ObjFile>(Val: file); |
| 283 | if (!obj) |
| 284 | continue; |
| 285 | for (const CallGraphEntry &entry : obj->callGraph) { |
| 286 | assert(entry.fromIndex < obj->symbols.size() && |
| 287 | entry.toIndex < obj->symbols.size()); |
| 288 | auto *fromSym = dyn_cast_or_null<Defined>(Val: obj->symbols[entry.fromIndex]); |
| 289 | auto *toSym = dyn_cast_or_null<Defined>(Val: obj->symbols[entry.toIndex]); |
| 290 | if (fromSym && toSym && |
| 291 | (!hasOrderFile || |
| 292 | (!getSymbolPriority(sym: fromSym) && !getSymbolPriority(sym: toSym)))) |
| 293 | callGraphProfile[{fromSym->isec(), toSym->isec()}] += entry.count; |
| 294 | } |
| 295 | } |
| 296 | } |
| 297 | |
| 298 | void macho::PriorityBuilder::parseOrderFile(StringRef path) { |
| 299 | assert(callGraphProfile.empty() && |
| 300 | "Order file must be parsed before call graph profile is processed" ); |
| 301 | std::optional<MemoryBufferRef> buffer = readFile(path); |
| 302 | if (!buffer) { |
| 303 | error(msg: "Could not read order file at " + path); |
| 304 | return; |
| 305 | } |
| 306 | |
| 307 | int prio = std::numeric_limits<int>::min(); |
| 308 | MemoryBufferRef mbref = *buffer; |
| 309 | for (StringRef line : args::getLines(mb: mbref)) { |
| 310 | StringRef objectFile, symbolOrCStrHash; |
| 311 | line = line.take_until(F: [](char c) { return c == '#'; }); // ignore comments |
| 312 | line = line.ltrim(); |
| 313 | |
| 314 | CPUType cpuType = StringSwitch<CPUType>(line) |
| 315 | .StartsWith(S: "i386:" , Value: CPU_TYPE_I386) |
| 316 | .StartsWith(S: "x86_64:" , Value: CPU_TYPE_X86_64) |
| 317 | .StartsWith(S: "arm:" , Value: CPU_TYPE_ARM) |
| 318 | .StartsWith(S: "arm64:" , Value: CPU_TYPE_ARM64) |
| 319 | .StartsWith(S: "ppc:" , Value: CPU_TYPE_POWERPC) |
| 320 | .StartsWith(S: "ppc64:" , Value: CPU_TYPE_POWERPC64) |
| 321 | .Default(Value: CPU_TYPE_ANY); |
| 322 | |
| 323 | if (cpuType != CPU_TYPE_ANY && cpuType != target->cpuType) |
| 324 | continue; |
| 325 | // Drop the CPU type as well as the colon |
| 326 | if (cpuType != CPU_TYPE_ANY) |
| 327 | line = line.drop_until(F: [](char c) { return c == ':'; }).drop_front(); |
| 328 | |
| 329 | constexpr std::array<StringRef, 2> fileEnds = {".o:" , ".o):" }; |
| 330 | for (StringRef fileEnd : fileEnds) { |
| 331 | size_t pos = line.find(Str: fileEnd); |
| 332 | if (pos != StringRef::npos) { |
| 333 | // Split the string around the colon |
| 334 | objectFile = line.take_front(N: pos + fileEnd.size() - 1); |
| 335 | line = line.drop_front(N: pos + fileEnd.size()); |
| 336 | break; |
| 337 | } |
| 338 | } |
| 339 | |
| 340 | // The rest of the line is either <symbol name> or |
| 341 | // CStringEntryPrefix<cstring hash> |
| 342 | line = line.trim(); |
| 343 | if (line.starts_with(Prefix: CStringEntryPrefix)) { |
| 344 | StringRef possibleHash = line.drop_front(N: CStringEntryPrefix.size()); |
| 345 | uint32_t hash = 0; |
| 346 | if (to_integer(S: possibleHash, Num&: hash)) |
| 347 | symbolOrCStrHash = possibleHash; |
| 348 | } else |
| 349 | symbolOrCStrHash = utils::getRootSymbol(Name: line); |
| 350 | |
| 351 | if (!symbolOrCStrHash.empty()) { |
| 352 | SymbolPriorityEntry &entry = priorities[symbolOrCStrHash]; |
| 353 | if (!objectFile.empty()) |
| 354 | entry.objectFiles.insert(KV: std::make_pair(x&: objectFile, y&: prio)); |
| 355 | else |
| 356 | entry.anyObjectFile = std::min(a: entry.anyObjectFile, b: prio); |
| 357 | } |
| 358 | |
| 359 | ++prio; |
| 360 | } |
| 361 | } |
| 362 | |
| 363 | DenseMap<const InputSection *, int> |
| 364 | macho::PriorityBuilder::buildInputSectionPriorities() { |
| 365 | DenseMap<const InputSection *, int> sectionPriorities; |
| 366 | if (config->bpStartupFunctionSort || config->bpFunctionOrderForCompression || |
| 367 | config->bpDataOrderForCompression) { |
| 368 | TimeTraceScope timeScope("Balanced Partitioning Section Orderer" ); |
| 369 | sectionPriorities = runBalancedPartitioning( |
| 370 | profilePath: config->bpStartupFunctionSort ? config->irpgoProfilePath : "" , |
| 371 | forFunctionCompression: config->bpFunctionOrderForCompression, |
| 372 | forDataCompression: config->bpDataOrderForCompression, |
| 373 | compressionSortStartupFunctions: config->bpCompressionSortStartupFunctions, |
| 374 | verbose: config->bpVerboseSectionOrderer); |
| 375 | } else if (config->callGraphProfileSort) { |
| 376 | // Sort sections by the profile data provided by __LLVM,__cg_profile |
| 377 | // sections. |
| 378 | // |
| 379 | // This first builds a call graph based on the profile data then merges |
| 380 | // sections according to the C³ heuristic. All clusters are then sorted by a |
| 381 | // density metric to further improve locality. |
| 382 | TimeTraceScope timeScope("Call graph profile sort" ); |
| 383 | sectionPriorities = CallGraphSort(callGraphProfile).run(); |
| 384 | } |
| 385 | |
| 386 | if (priorities.empty()) |
| 387 | return sectionPriorities; |
| 388 | |
| 389 | auto addSym = [&](const Defined *sym) { |
| 390 | std::optional<int> symbolPriority = getSymbolPriority(sym); |
| 391 | if (!symbolPriority) |
| 392 | return; |
| 393 | int &priority = sectionPriorities[sym->isec()]; |
| 394 | priority = std::min(a: priority, b: *symbolPriority); |
| 395 | }; |
| 396 | |
| 397 | // TODO: Make sure this handles weak symbols correctly. |
| 398 | for (const InputFile *file : inputFiles) { |
| 399 | if (isa<ObjFile>(Val: file)) |
| 400 | for (Symbol *sym : file->symbols) |
| 401 | if (auto *d = dyn_cast_or_null<Defined>(Val: sym)) |
| 402 | addSym(d); |
| 403 | } |
| 404 | |
| 405 | return sectionPriorities; |
| 406 | } |
| 407 | |
| 408 | std::vector<StringPiecePair> macho::PriorityBuilder::buildCStringPriorities( |
| 409 | ArrayRef<CStringInputSection *> inputs) { |
| 410 | // Split the input strings into hold and cold sets. |
| 411 | // Order hot set based on -order_file_cstring for performance improvement; |
| 412 | // TODO: Order cold set of cstrings for compression via BP. |
| 413 | std::vector<std::pair<int, StringPiecePair>> |
| 414 | hotStringPrioritiesAndStringPieces; |
| 415 | std::vector<StringPiecePair> coldStringPieces; |
| 416 | std::vector<StringPiecePair> orderedStringPieces; |
| 417 | |
| 418 | for (CStringInputSection *isec : inputs) { |
| 419 | for (const auto &[stringPieceIdx, piece] : llvm::enumerate(First&: isec->pieces)) { |
| 420 | if (!piece.live) |
| 421 | continue; |
| 422 | |
| 423 | std::optional<int> priority = getSymbolOrCStringPriority( |
| 424 | key: std::to_string(val: piece.hash), f: isec->getFile()); |
| 425 | if (!priority) |
| 426 | coldStringPieces.emplace_back(args&: isec, args&: stringPieceIdx); |
| 427 | else |
| 428 | hotStringPrioritiesAndStringPieces.emplace_back( |
| 429 | args&: *priority, args: std::make_pair(x&: isec, y&: stringPieceIdx)); |
| 430 | } |
| 431 | } |
| 432 | |
| 433 | // Order hot set for perf |
| 434 | llvm::stable_sort(Range&: hotStringPrioritiesAndStringPieces); |
| 435 | for (auto &[priority, stringPiecePair] : hotStringPrioritiesAndStringPieces) |
| 436 | orderedStringPieces.push_back(x: stringPiecePair); |
| 437 | |
| 438 | // TODO: Order cold set for compression |
| 439 | |
| 440 | orderedStringPieces.insert(position: orderedStringPieces.end(), |
| 441 | first: coldStringPieces.begin(), last: coldStringPieces.end()); |
| 442 | |
| 443 | return orderedStringPieces; |
| 444 | } |
| 445 | |