| 1 | //===- MapFile.cpp --------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the -map option, which maps address ranges to their |
| 10 | // respective contents, plus the input file these contents were originally from. |
| 11 | // The contents (typically symbols) are listed in address order. Dead-stripped |
| 12 | // contents are included as well. |
| 13 | // |
| 14 | // # Path: test |
| 15 | // # Arch: x86_84 |
| 16 | // # Object files: |
| 17 | // [ 0] linker synthesized |
| 18 | // [ 1] a.o |
| 19 | // # Sections: |
| 20 | // # Address Size Segment Section |
| 21 | // 0x1000005C0 0x0000004C __TEXT __text |
| 22 | // # Symbols: |
| 23 | // # Address Size File Name |
| 24 | // 0x1000005C0 0x00000001 [ 1] _main |
| 25 | // # Dead Stripped Symbols: |
| 26 | // # Size File Name |
| 27 | // <<dead>> 0x00000001 [ 1] _foo |
| 28 | // |
| 29 | //===----------------------------------------------------------------------===// |
| 30 | |
| 31 | #include "MapFile.h" |
| 32 | #include "ConcatOutputSection.h" |
| 33 | #include "Config.h" |
| 34 | #include "InputFiles.h" |
| 35 | #include "InputSection.h" |
| 36 | #include "OutputSegment.h" |
| 37 | #include "Symbols.h" |
| 38 | #include "SyntheticSections.h" |
| 39 | #include "Target.h" |
| 40 | #include "lld/Common/ErrorHandler.h" |
| 41 | #include "llvm/ADT/DenseMap.h" |
| 42 | #include "llvm/Support/Parallel.h" |
| 43 | #include "llvm/Support/TimeProfiler.h" |
| 44 | |
| 45 | using namespace llvm; |
| 46 | using namespace llvm::sys; |
| 47 | using namespace lld; |
| 48 | using namespace lld::macho; |
| 49 | |
| 50 | struct CStringInfo { |
| 51 | uint32_t fileIndex; |
| 52 | StringRef str; |
| 53 | }; |
| 54 | |
| 55 | struct MapInfo { |
| 56 | SmallVector<InputFile *> files; |
| 57 | SmallVector<Defined *> deadSymbols; |
| 58 | DenseMap<const OutputSection *, |
| 59 | SmallVector<std::pair<uint64_t /*addr*/, CStringInfo>>> |
| 60 | liveCStringsForSection; |
| 61 | SmallVector<CStringInfo> deadCStrings; |
| 62 | }; |
| 63 | |
| 64 | static MapInfo gatherMapInfo() { |
| 65 | MapInfo info; |
| 66 | for (InputFile *file : inputFiles) { |
| 67 | bool isReferencedFile = false; |
| 68 | |
| 69 | if (isa<ObjFile>(Val: file) || isa<BitcodeFile>(Val: file)) { |
| 70 | uint32_t fileIndex = info.files.size() + 1; |
| 71 | |
| 72 | // Gather the dead symbols. We don't have to bother with the live ones |
| 73 | // because we will pick them up as we iterate over the OutputSections |
| 74 | // later. |
| 75 | for (Symbol *sym : file->symbols) { |
| 76 | if (auto *d = dyn_cast_or_null<Defined>(Val: sym)) |
| 77 | // Only emit the prevailing definition of a symbol. Also, don't emit |
| 78 | // the symbol if it is part of a cstring section (we use the literal |
| 79 | // value instead, similar to ld64) |
| 80 | if (d->isec() && d->getFile() == file && |
| 81 | !isa<CStringInputSection>(Val: d->isec())) { |
| 82 | isReferencedFile = true; |
| 83 | if (!d->isLive()) |
| 84 | info.deadSymbols.push_back(Elt: d); |
| 85 | } |
| 86 | } |
| 87 | |
| 88 | // Gather all the cstrings (both live and dead). A CString(Output)Section |
| 89 | // doesn't provide us a way of figuring out which InputSections its |
| 90 | // cstring contents came from, so we need to build up that mapping here. |
| 91 | for (const Section *sec : file->sections) { |
| 92 | for (const Subsection &subsec : sec->subsections) { |
| 93 | if (auto isec = dyn_cast<CStringInputSection>(Val: subsec.isec)) { |
| 94 | auto &liveCStrings = info.liveCStringsForSection[isec->parent]; |
| 95 | for (const auto &[i, piece] : llvm::enumerate(First&: isec->pieces)) { |
| 96 | if (piece.live) |
| 97 | liveCStrings.push_back(Elt: {isec->parent->addr + piece.outSecOff, |
| 98 | {.fileIndex: fileIndex, .str: isec->getStringRef(i)}}); |
| 99 | else |
| 100 | info.deadCStrings.push_back(Elt: {.fileIndex: fileIndex, .str: isec->getStringRef(i)}); |
| 101 | isReferencedFile = true; |
| 102 | } |
| 103 | } else { |
| 104 | break; |
| 105 | } |
| 106 | } |
| 107 | } |
| 108 | } else if (const auto *dylibFile = dyn_cast<DylibFile>(Val: file)) { |
| 109 | isReferencedFile = dylibFile->isReferenced(); |
| 110 | } |
| 111 | |
| 112 | if (isReferencedFile) |
| 113 | info.files.push_back(Elt: file); |
| 114 | } |
| 115 | |
| 116 | // cstrings are not stored in sorted order in their OutputSections, so we sort |
| 117 | // them here. |
| 118 | for (auto &liveCStrings : info.liveCStringsForSection) |
| 119 | parallelSort(R&: liveCStrings.second, Comp: llvm::less_first()); |
| 120 | return info; |
| 121 | } |
| 122 | |
| 123 | // We use this instead of `toString(const InputFile *)` as we don't want to |
| 124 | // include the dylib install name in our output. |
| 125 | static void printFileName(raw_fd_ostream &os, const InputFile *f) { |
| 126 | if (f->archiveName.empty()) |
| 127 | os << f->getName(); |
| 128 | else |
| 129 | os << f->archiveName << "(" << path::filename(path: f->getName()) + ")" ; |
| 130 | } |
| 131 | |
| 132 | // For printing the contents of the __stubs and __la_symbol_ptr sections. |
| 133 | static void printStubsEntries( |
| 134 | raw_fd_ostream &os, |
| 135 | const DenseMap<lld::macho::InputFile *, uint32_t> &readerToFileOrdinal, |
| 136 | const OutputSection *osec, size_t entrySize) { |
| 137 | for (const Symbol *sym : in.stubs->getEntries()) |
| 138 | os << format(Fmt: "0x%08llX\t0x%08zX\t[%3u] %s\n" , |
| 139 | Vals: osec->addr + sym->stubsIndex * entrySize, Vals: entrySize, |
| 140 | Vals: readerToFileOrdinal.lookup(Val: sym->getFile()), |
| 141 | Vals: sym->getName().str().data()); |
| 142 | } |
| 143 | |
| 144 | static void printNonLazyPointerSection(raw_fd_ostream &os, |
| 145 | NonLazyPointerSectionBase *osec) { |
| 146 | // ld64 considers stubs to belong to particular files, but considers GOT |
| 147 | // entries to be linker-synthesized. Not sure why they made that decision, but |
| 148 | // I think we can follow suit unless there's demand for better symbol-to-file |
| 149 | // associations. |
| 150 | for (const Symbol *sym : osec->getEntries()) |
| 151 | os << format(Fmt: "0x%08llX\t0x%08zX\t[ 0] non-lazy-pointer-to-local: %s\n" , |
| 152 | Vals: osec->addr + sym->gotIndex * target->wordSize, |
| 153 | Vals: target->wordSize, Vals: sym->getName().str().data()); |
| 154 | } |
| 155 | |
| 156 | static uint64_t getSymSizeForMap(Defined *sym) { |
| 157 | if (sym->identicalCodeFoldingKind == Symbol::ICFFoldKind::Body) |
| 158 | return 0; |
| 159 | return sym->size; |
| 160 | } |
| 161 | |
| 162 | void macho::writeMapFile() { |
| 163 | if (config->mapFile.empty()) |
| 164 | return; |
| 165 | |
| 166 | TimeTraceScope timeScope("Write map file" ); |
| 167 | |
| 168 | // Open a map file for writing. |
| 169 | std::error_code ec; |
| 170 | raw_fd_ostream os(config->mapFile, ec, sys::fs::OF_None); |
| 171 | if (ec) { |
| 172 | error(msg: "cannot open " + config->mapFile + ": " + ec.message()); |
| 173 | return; |
| 174 | } |
| 175 | |
| 176 | os << format(Fmt: "# Path: %s\n" , Vals: config->outputFile.str().c_str()); |
| 177 | os << format(Fmt: "# Arch: %s\n" , |
| 178 | Vals: getArchitectureName(Arch: config->arch()).str().c_str()); |
| 179 | |
| 180 | MapInfo info = gatherMapInfo(); |
| 181 | |
| 182 | os << "# Object files:\n" ; |
| 183 | os << format(Fmt: "[%3u] %s\n" , Vals: 0, Vals: (const char *)"linker synthesized" ); |
| 184 | uint32_t fileIndex = 1; |
| 185 | DenseMap<lld::macho::InputFile *, uint32_t> readerToFileOrdinal; |
| 186 | for (InputFile *file : info.files) { |
| 187 | os << format(Fmt: "[%3u] " , Vals: fileIndex); |
| 188 | printFileName(os, f: file); |
| 189 | os << "\n" ; |
| 190 | readerToFileOrdinal[file] = fileIndex++; |
| 191 | } |
| 192 | |
| 193 | os << "# Sections:\n" ; |
| 194 | os << "# Address\tSize \tSegment\tSection\n" ; |
| 195 | for (OutputSegment *seg : outputSegments) |
| 196 | for (OutputSection *osec : seg->getSections()) { |
| 197 | if (osec->isHidden()) |
| 198 | continue; |
| 199 | |
| 200 | os << format(Fmt: "0x%08llX\t0x%08llX\t%s\t%s\n" , Vals: osec->addr, Vals: osec->getSize(), |
| 201 | Vals: seg->name.str().c_str(), Vals: osec->name.str().c_str()); |
| 202 | } |
| 203 | |
| 204 | // Helper lambda that prints all symbols from one ConcatInputSection. |
| 205 | auto printOne = [&](const ConcatInputSection *isec) { |
| 206 | for (Defined *sym : isec->symbols) { |
| 207 | if (!(isPrivateLabel(name: sym->getName()) && getSymSizeForMap(sym) == 0)) { |
| 208 | os << format(Fmt: "0x%08llX\t0x%08llX\t[%3u] %s\n" , Vals: sym->getVA(), |
| 209 | Vals: getSymSizeForMap(sym), |
| 210 | Vals: readerToFileOrdinal.lookup(Val: sym->getFile()), |
| 211 | Vals: sym->getName().str().data()); |
| 212 | } |
| 213 | } |
| 214 | }; |
| 215 | // Shared function to print one or two arrays of ConcatInputSection in |
| 216 | // ascending outSecOff order. The second array is optional; if provided, we |
| 217 | // interleave the printing in sorted order without allocating a merged temp |
| 218 | // array. |
| 219 | auto printIsecArrSyms = [&](ArrayRef<ConcatInputSection *> arr1, |
| 220 | ArrayRef<ConcatInputSection *> arr2 = {}) { |
| 221 | // Print both arrays in sorted order, interleaving as necessary. |
| 222 | while (!arr1.empty() || !arr2.empty()) { |
| 223 | if (!arr1.empty() && (arr2.empty() || arr1.front()->outSecOff <= |
| 224 | arr2.front()->outSecOff)) { |
| 225 | printOne(arr1.front()); |
| 226 | arr1 = arr1.drop_front(); |
| 227 | } else if (!arr2.empty()) { |
| 228 | printOne(arr2.front()); |
| 229 | arr2 = arr2.drop_front(); |
| 230 | } |
| 231 | } |
| 232 | }; |
| 233 | |
| 234 | os << "# Symbols:\n" ; |
| 235 | os << "# Address\tSize \tFile Name\n" ; |
| 236 | for (const OutputSegment *seg : outputSegments) { |
| 237 | for (const OutputSection *osec : seg->getSections()) { |
| 238 | if (auto *textOsec = dyn_cast<TextOutputSection>(Val: osec)) { |
| 239 | printIsecArrSyms(textOsec->inputs, textOsec->getThunks()); |
| 240 | } else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(Val: osec)) { |
| 241 | printIsecArrSyms(concatOsec->inputs); |
| 242 | } else if (osec == in.cStringSection || osec == in.objcMethnameSection) { |
| 243 | const auto &liveCStrings = info.liveCStringsForSection.lookup(Val: osec); |
| 244 | uint64_t lastAddr = 0; // strings will never start at address 0, so this |
| 245 | // is a sentinel value |
| 246 | for (const auto &[addr, info] : liveCStrings) { |
| 247 | uint64_t size = 0; |
| 248 | if (addr != lastAddr) |
| 249 | size = info.str.size() + 1; // include null terminator |
| 250 | lastAddr = addr; |
| 251 | os << format(Fmt: "0x%08llX\t0x%08llX\t[%3u] literal string: " , Vals: addr, Vals: size, |
| 252 | Vals: info.fileIndex); |
| 253 | os.write_escaped(Str: info.str) << "\n" ; |
| 254 | } |
| 255 | } else if (osec == (void *)in.unwindInfo) { |
| 256 | os << format(Fmt: "0x%08llX\t0x%08llX\t[ 0] compact unwind info\n" , |
| 257 | Vals: osec->addr, Vals: osec->getSize()); |
| 258 | } else if (osec == in.stubs) { |
| 259 | printStubsEntries(os, readerToFileOrdinal, osec, entrySize: target->stubSize); |
| 260 | } else if (osec == in.lazyPointers) { |
| 261 | printStubsEntries(os, readerToFileOrdinal, osec, entrySize: target->wordSize); |
| 262 | } else if (osec == in.stubHelper) { |
| 263 | // yes, ld64 calls it "helper helper"... |
| 264 | os << format(Fmt: "0x%08llX\t0x%08llX\t[ 0] helper helper\n" , Vals: osec->addr, |
| 265 | Vals: osec->getSize()); |
| 266 | } else if (osec == in.got) { |
| 267 | printNonLazyPointerSection(os, osec: in.got); |
| 268 | } else if (osec == in.tlvPointers) { |
| 269 | printNonLazyPointerSection(os, osec: in.tlvPointers); |
| 270 | } else if (osec == in.objcMethList) { |
| 271 | printIsecArrSyms(in.objcMethList->getInputs()); |
| 272 | } |
| 273 | // TODO print other synthetic sections |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | if (config->deadStrip) { |
| 278 | os << "# Dead Stripped Symbols:\n" ; |
| 279 | os << "# \tSize \tFile Name\n" ; |
| 280 | for (Defined *sym : info.deadSymbols) { |
| 281 | assert(!sym->isLive()); |
| 282 | os << format(Fmt: "<<dead>>\t0x%08llX\t[%3u] %s\n" , Vals: getSymSizeForMap(sym), |
| 283 | Vals: readerToFileOrdinal[sym->getFile()], |
| 284 | Vals: sym->getName().str().data()); |
| 285 | } |
| 286 | for (CStringInfo &cstrInfo : info.deadCStrings) { |
| 287 | os << format(Fmt: "<<dead>>\t0x%08zX\t[%3u] literal string: " , |
| 288 | Vals: cstrInfo.str.size() + 1, Vals: cstrInfo.fileIndex); |
| 289 | os.write_escaped(Str: cstrInfo.str) << "\n" ; |
| 290 | } |
| 291 | } |
| 292 | } |
| 293 | |