| 1 | //===- InputSection.h -------------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLD_MACHO_INPUT_SECTION_H |
| 10 | #define LLD_MACHO_INPUT_SECTION_H |
| 11 | |
| 12 | #include "Config.h" |
| 13 | #include "Relocations.h" |
| 14 | #include "Symbols.h" |
| 15 | |
| 16 | #include "lld/Common/LLVM.h" |
| 17 | #include "lld/Common/Memory.h" |
| 18 | #include "llvm/ADT/ArrayRef.h" |
| 19 | #include "llvm/ADT/BitVector.h" |
| 20 | #include "llvm/ADT/CachedHashString.h" |
| 21 | #include "llvm/ADT/TinyPtrVector.h" |
| 22 | #include "llvm/BinaryFormat/MachO.h" |
| 23 | |
| 24 | namespace lld { |
| 25 | namespace macho { |
| 26 | |
| 27 | class InputFile; |
| 28 | class OutputSection; |
| 29 | |
| 30 | class InputSection { |
| 31 | public: |
| 32 | enum Kind : uint8_t { |
| 33 | ConcatKind, |
| 34 | CStringLiteralKind, |
| 35 | WordLiteralKind, |
| 36 | }; |
| 37 | |
| 38 | Kind kind() const { return sectionKind; } |
| 39 | virtual ~InputSection() = default; |
| 40 | virtual uint64_t getSize() const { return data.size(); } |
| 41 | virtual bool empty() const { return data.empty(); } |
| 42 | InputFile *getFile() const { return section.file; } |
| 43 | StringRef getName() const { return section.name; } |
| 44 | StringRef getSegName() const { return section.segname; } |
| 45 | uint32_t getFlags() const { return section.flags; } |
| 46 | uint64_t getFileSize() const; |
| 47 | // Translates \p off -- an offset relative to this InputSection -- into an |
| 48 | // offset from the beginning of its parent OutputSection. |
| 49 | virtual uint64_t getOffset(uint64_t off) const = 0; |
| 50 | // The offset from the beginning of the file. |
| 51 | uint64_t getVA(uint64_t off) const; |
| 52 | // Return a user-friendly string for use in diagnostics. |
| 53 | // Format: /path/to/object.o:(symbol _func+0x123) |
| 54 | std::string getLocation(uint64_t off) const; |
| 55 | // Return the source line corresponding to an address, or the empty string. |
| 56 | // Format: Source.cpp:123 (/path/to/Source.cpp:123) |
| 57 | std::string getSourceLocation(uint64_t off) const; |
| 58 | // Return the relocation at \p off, if it exists. This does a linear search. |
| 59 | const Reloc *getRelocAt(uint32_t off) const; |
| 60 | // Whether the data at \p off in this InputSection is live. |
| 61 | virtual bool isLive(uint64_t off) const = 0; |
| 62 | virtual void markLive(uint64_t off) = 0; |
| 63 | virtual InputSection *canonical() { return this; } |
| 64 | virtual const InputSection *canonical() const { return this; } |
| 65 | |
| 66 | protected: |
| 67 | InputSection(Kind kind, const Section §ion, ArrayRef<uint8_t> data, |
| 68 | uint32_t align) |
| 69 | : sectionKind(kind), keepUnique(false), hasAltEntry(false), align(align), |
| 70 | data(data), section(section) {} |
| 71 | |
| 72 | InputSection(const InputSection &rhs) |
| 73 | : sectionKind(rhs.sectionKind), keepUnique(false), hasAltEntry(false), |
| 74 | align(rhs.align), data(rhs.data), section(rhs.section) {} |
| 75 | |
| 76 | Kind sectionKind; |
| 77 | |
| 78 | public: |
| 79 | // is address assigned? |
| 80 | bool isFinal = false; |
| 81 | // keep the address of the symbol(s) in this section unique in the final |
| 82 | // binary ? |
| 83 | bool keepUnique : 1; |
| 84 | // Does this section have symbols at offsets other than zero? (NOTE: only |
| 85 | // applies to ConcatInputSections.) |
| 86 | bool hasAltEntry : 1; |
| 87 | uint32_t align = 1; |
| 88 | |
| 89 | OutputSection *parent = nullptr; |
| 90 | ArrayRef<uint8_t> data; |
| 91 | std::vector<Reloc> relocs; |
| 92 | // The symbols that belong to this InputSection, sorted by value. With |
| 93 | // .subsections_via_symbols, there is typically only one element here. |
| 94 | llvm::TinyPtrVector<Defined *> symbols; |
| 95 | |
| 96 | const Section §ion; |
| 97 | |
| 98 | protected: |
| 99 | const Defined *getContainingSymbol(uint64_t off) const; |
| 100 | }; |
| 101 | |
| 102 | // ConcatInputSections are combined into (Concat)OutputSections through simple |
| 103 | // concatenation, in contrast with literal sections which may have their |
| 104 | // contents merged before output. |
| 105 | class ConcatInputSection final : public InputSection { |
| 106 | public: |
| 107 | ConcatInputSection(const Section §ion, ArrayRef<uint8_t> data, |
| 108 | uint32_t align = 1) |
| 109 | : InputSection(ConcatKind, section, data, align) {} |
| 110 | |
| 111 | uint64_t getOffset(uint64_t off) const override { return outSecOff + off; } |
| 112 | uint64_t getVA() const { return InputSection::getVA(off: 0); } |
| 113 | // ConcatInputSections are entirely live or dead, so the offset is irrelevant. |
| 114 | bool isLive(uint64_t off) const override { return live; } |
| 115 | void markLive(uint64_t off) override { live = true; } |
| 116 | bool isCoalescedWeak() const { return wasCoalesced && symbols.empty(); } |
| 117 | bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); } |
| 118 | void writeTo(uint8_t *buf); |
| 119 | |
| 120 | void foldIdentical(ConcatInputSection *redundant, |
| 121 | Symbol::ICFFoldKind foldKind = Symbol::ICFFoldKind::Body); |
| 122 | ConcatInputSection *canonical() override { |
| 123 | return replacement ? replacement : this; |
| 124 | } |
| 125 | const InputSection *canonical() const override { |
| 126 | return replacement ? replacement : this; |
| 127 | } |
| 128 | |
| 129 | static bool classof(const InputSection *isec) { |
| 130 | return isec->kind() == ConcatKind; |
| 131 | } |
| 132 | |
| 133 | // Points to the surviving section after this one is folded by ICF |
| 134 | ConcatInputSection *replacement = nullptr; |
| 135 | // Equivalence-class ID for ICF |
| 136 | uint32_t icfEqClass[2] = {0, 0}; |
| 137 | |
| 138 | // With subsections_via_symbols, most symbols have their own InputSection, |
| 139 | // and for weak symbols (e.g. from inline functions), only the |
| 140 | // InputSection from one translation unit will make it to the output, |
| 141 | // while all copies in other translation units are coalesced into the |
| 142 | // first and not copied to the output. |
| 143 | bool wasCoalesced = false; |
| 144 | bool live = !config->deadStrip; |
| 145 | bool hasCallSites = false; |
| 146 | // This variable has two usages. Initially, it represents the input order. |
| 147 | // After assignAddresses is called, it represents the offset from the |
| 148 | // beginning of the output section this section was assigned to. |
| 149 | uint64_t outSecOff = 0; |
| 150 | }; |
| 151 | |
| 152 | // Initialize a fake InputSection that does not belong to any InputFile. |
| 153 | // The created ConcatInputSection will always have 'live=true' |
| 154 | ConcatInputSection *makeSyntheticInputSection(StringRef segName, |
| 155 | StringRef sectName, |
| 156 | uint32_t flags = 0, |
| 157 | ArrayRef<uint8_t> data = {}, |
| 158 | uint32_t align = 1); |
| 159 | |
| 160 | // Helper functions to make it easy to sprinkle asserts. |
| 161 | |
| 162 | inline bool shouldOmitFromOutput(InputSection *isec) { |
| 163 | return isa<ConcatInputSection>(Val: isec) && |
| 164 | cast<ConcatInputSection>(Val: isec)->shouldOmitFromOutput(); |
| 165 | } |
| 166 | |
| 167 | inline bool isCoalescedWeak(InputSection *isec) { |
| 168 | return isa<ConcatInputSection>(Val: isec) && |
| 169 | cast<ConcatInputSection>(Val: isec)->isCoalescedWeak(); |
| 170 | } |
| 171 | |
| 172 | // We allocate a lot of these and binary search on them, so they should be as |
| 173 | // compact as possible. Hence the use of 31 rather than 64 bits for the hash. |
| 174 | struct StringPiece { |
| 175 | // Offset from the start of the containing input section. |
| 176 | uint32_t inSecOff; |
| 177 | uint32_t live : 1; |
| 178 | // Only set if deduplicating literals |
| 179 | uint32_t hash : 31; |
| 180 | // Offset from the start of the containing output section. |
| 181 | uint64_t outSecOff = 0; |
| 182 | |
| 183 | StringPiece(uint64_t off, uint32_t hash) |
| 184 | : inSecOff(off), live(!config->deadStrip), hash(hash) {} |
| 185 | }; |
| 186 | |
| 187 | static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!" ); |
| 188 | |
| 189 | // CStringInputSections are composed of multiple null-terminated string |
| 190 | // literals, which we represent using StringPieces. These literals can be |
| 191 | // deduplicated and tail-merged, so translating offsets between the input and |
| 192 | // outputs sections is more complicated. |
| 193 | // |
| 194 | // NOTE: One significant difference between LLD and ld64 is that we merge all |
| 195 | // cstring literals, even those referenced directly by non-private symbols. |
| 196 | // ld64 is more conservative and does not do that. This was mostly done for |
| 197 | // implementation simplicity; if we find programs that need the more |
| 198 | // conservative behavior we can certainly implement that. |
| 199 | class CStringInputSection final : public InputSection { |
| 200 | public: |
| 201 | CStringInputSection(const Section §ion, ArrayRef<uint8_t> data, |
| 202 | uint32_t align, bool dedupLiterals) |
| 203 | : InputSection(CStringLiteralKind, section, data, align), |
| 204 | deduplicateLiterals(dedupLiterals) {} |
| 205 | |
| 206 | uint64_t getOffset(uint64_t off) const override; |
| 207 | bool isLive(uint64_t off) const override { return getStringPiece(off).live; } |
| 208 | void markLive(uint64_t off) override { getStringPiece(off).live = true; } |
| 209 | // Find the StringPiece that contains this offset. |
| 210 | StringPiece &getStringPiece(uint64_t off); |
| 211 | const StringPiece &getStringPiece(uint64_t off) const; |
| 212 | // Split at each null byte. |
| 213 | void splitIntoPieces(); |
| 214 | |
| 215 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
| 216 | StringRef getStringRef(size_t i) const { |
| 217 | size_t begin = pieces[i].inSecOff; |
| 218 | // The endpoint should be *at* the null terminator, not after. This matches |
| 219 | // the behavior of StringRef(const char *Str). |
| 220 | size_t end = |
| 221 | ((pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff) - 1; |
| 222 | return toStringRef(Input: data.slice(N: begin, M: end - begin)); |
| 223 | } |
| 224 | |
| 225 | StringRef getStringRefAtOffset(uint64_t off) const { |
| 226 | return getStringRef(i: getStringPieceIndex(off)); |
| 227 | } |
| 228 | |
| 229 | // Returns i'th piece as a CachedHashStringRef. This function is very hot when |
| 230 | // string merging is enabled, so we want to inline. |
| 231 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
| 232 | llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const { |
| 233 | assert(deduplicateLiterals); |
| 234 | return {getStringRef(i), pieces[i].hash}; |
| 235 | } |
| 236 | |
| 237 | static bool classof(const InputSection *isec) { |
| 238 | return isec->kind() == CStringLiteralKind; |
| 239 | } |
| 240 | |
| 241 | bool deduplicateLiterals = false; |
| 242 | std::vector<StringPiece> pieces; |
| 243 | |
| 244 | private: |
| 245 | size_t getStringPieceIndex(uint64_t off) const; |
| 246 | }; |
| 247 | |
| 248 | class WordLiteralInputSection final : public InputSection { |
| 249 | public: |
| 250 | WordLiteralInputSection(const Section §ion, ArrayRef<uint8_t> data, |
| 251 | uint32_t align); |
| 252 | uint64_t getOffset(uint64_t off) const override; |
| 253 | bool isLive(uint64_t off) const override { |
| 254 | return live[off >> power2LiteralSize]; |
| 255 | } |
| 256 | void markLive(uint64_t off) override { |
| 257 | live[off >> power2LiteralSize] = true; |
| 258 | } |
| 259 | |
| 260 | static bool classof(const InputSection *isec) { |
| 261 | return isec->kind() == WordLiteralKind; |
| 262 | } |
| 263 | |
| 264 | private: |
| 265 | unsigned power2LiteralSize; |
| 266 | // The liveness of data[off] is tracked by live[off >> power2LiteralSize]. |
| 267 | llvm::BitVector live; |
| 268 | }; |
| 269 | |
| 270 | inline uint8_t sectionType(uint32_t flags) { |
| 271 | return flags & llvm::MachO::SECTION_TYPE; |
| 272 | } |
| 273 | |
| 274 | inline bool isZeroFill(uint32_t flags) { |
| 275 | return llvm::MachO::isVirtualSection(type: sectionType(flags)); |
| 276 | } |
| 277 | |
| 278 | inline bool isThreadLocalVariables(uint32_t flags) { |
| 279 | return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_VARIABLES; |
| 280 | } |
| 281 | |
| 282 | // These sections contain the data for initializing thread-local variables. |
| 283 | inline bool isThreadLocalData(uint32_t flags) { |
| 284 | return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_REGULAR || |
| 285 | sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_ZEROFILL; |
| 286 | } |
| 287 | |
| 288 | inline bool isDebugSection(uint32_t flags) { |
| 289 | return (flags & llvm::MachO::SECTION_ATTRIBUTES_USR) == |
| 290 | llvm::MachO::S_ATTR_DEBUG; |
| 291 | } |
| 292 | |
| 293 | inline bool isWordLiteralSection(uint32_t flags) { |
| 294 | return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS || |
| 295 | sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS || |
| 296 | sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS; |
| 297 | } |
| 298 | |
| 299 | bool isCodeSection(const InputSection *); |
| 300 | bool isCfStringSection(const InputSection *); |
| 301 | bool isClassRefsSection(const InputSection *); |
| 302 | bool isSelRefsSection(const InputSection *); |
| 303 | bool isEhFrameSection(const InputSection *); |
| 304 | bool isGccExceptTabSection(const InputSection *); |
| 305 | |
| 306 | extern std::vector<ConcatInputSection *> inputSections; |
| 307 | // This is used as a counter for specyfing input order for input sections |
| 308 | extern int inputSectionsOrder; |
| 309 | |
| 310 | namespace section_names { |
| 311 | |
| 312 | constexpr const char authGot[] = "__auth_got" ; |
| 313 | constexpr const char authPtr[] = "__auth_ptr" ; |
| 314 | constexpr const char binding[] = "__binding" ; |
| 315 | constexpr const char bitcodeBundle[] = "__bundle" ; |
| 316 | constexpr const char cString[] = "__cstring" ; |
| 317 | constexpr const char cfString[] = "__cfstring" ; |
| 318 | constexpr const char cgProfile[] = "__cg_profile" ; |
| 319 | constexpr const char chainFixups[] = "__chainfixups" ; |
| 320 | constexpr const char codeSignature[] = "__code_signature" ; |
| 321 | constexpr const char common[] = "__common" ; |
| 322 | constexpr const char compactUnwind[] = "__compact_unwind" ; |
| 323 | constexpr const char data[] = "__data" ; |
| 324 | constexpr const char debugAbbrev[] = "__debug_abbrev" ; |
| 325 | constexpr const char debugInfo[] = "__debug_info" ; |
| 326 | constexpr const char debugLine[] = "__debug_line" ; |
| 327 | constexpr const char debugStr[] = "__debug_str" ; |
| 328 | constexpr const char debugStrOffs[] = "__debug_str_offs" ; |
| 329 | constexpr const char ehFrame[] = "__eh_frame" ; |
| 330 | constexpr const char gccExceptTab[] = "__gcc_except_tab" ; |
| 331 | constexpr const char export_[] = "__export" ; |
| 332 | constexpr const char dataInCode[] = "__data_in_code" ; |
| 333 | constexpr const char functionStarts[] = "__func_starts" ; |
| 334 | constexpr const char got[] = "__got" ; |
| 335 | constexpr const char [] = "__mach_header" ; |
| 336 | constexpr const char indirectSymbolTable[] = "__ind_sym_tab" ; |
| 337 | constexpr const char initOffsets[] = "__init_offsets" ; |
| 338 | constexpr const char const_[] = "__const" ; |
| 339 | constexpr const char lazySymbolPtr[] = "__la_symbol_ptr" ; |
| 340 | constexpr const char lazyBinding[] = "__lazy_binding" ; |
| 341 | constexpr const char literals[] = "__literals" ; |
| 342 | constexpr const char functionMap[] = "__llvm_merge" ; |
| 343 | constexpr const char moduleInitFunc[] = "__mod_init_func" ; |
| 344 | constexpr const char moduleTermFunc[] = "__mod_term_func" ; |
| 345 | constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr" ; |
| 346 | constexpr const char objcCatList[] = "__objc_catlist" ; |
| 347 | constexpr const char objcClassList[] = "__objc_classlist" ; |
| 348 | constexpr const char objcMethList[] = "__objc_methlist" ; |
| 349 | constexpr const char objcClassRefs[] = "__objc_classrefs" ; |
| 350 | constexpr const char objcConst[] = "__objc_const" ; |
| 351 | constexpr const char objCImageInfo[] = "__objc_imageinfo" ; |
| 352 | constexpr const char objcStubs[] = "__objc_stubs" ; |
| 353 | constexpr const char objcSelrefs[] = "__objc_selrefs" ; |
| 354 | constexpr const char objcMethname[] = "__objc_methname" ; |
| 355 | constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist" ; |
| 356 | constexpr const char objcNonLazyClassList[] = "__objc_nlclslist" ; |
| 357 | constexpr const char objcProtoList[] = "__objc_protolist" ; |
| 358 | constexpr const char outlinedHashTree[] = "__llvm_outline" ; |
| 359 | constexpr const char pageZero[] = "__pagezero" ; |
| 360 | constexpr const char pointers[] = "__pointers" ; |
| 361 | constexpr const char rebase[] = "__rebase" ; |
| 362 | constexpr const char staticInit[] = "__StaticInit" ; |
| 363 | constexpr const char stringTable[] = "__string_table" ; |
| 364 | constexpr const char stubHelper[] = "__stub_helper" ; |
| 365 | constexpr const char stubs[] = "__stubs" ; |
| 366 | constexpr const char swift[] = "__swift" ; |
| 367 | constexpr const char symbolTable[] = "__symbol_table" ; |
| 368 | constexpr const char textCoalNt[] = "__textcoal_nt" ; |
| 369 | constexpr const char text[] = "__text" ; |
| 370 | constexpr const char threadPtrs[] = "__thread_ptrs" ; |
| 371 | constexpr const char threadVars[] = "__thread_vars" ; |
| 372 | constexpr const char unwindInfo[] = "__unwind_info" ; |
| 373 | constexpr const char weakBinding[] = "__weak_binding" ; |
| 374 | constexpr const char zeroFill[] = "__zerofill" ; |
| 375 | constexpr const char addrSig[] = "__llvm_addrsig" ; |
| 376 | |
| 377 | } // namespace section_names |
| 378 | |
| 379 | void addInputSection(InputSection *inputSection); |
| 380 | } // namespace macho |
| 381 | |
| 382 | std::string toString(const macho::InputSection *); |
| 383 | |
| 384 | } // namespace lld |
| 385 | |
| 386 | #endif |
| 387 | |