| 1 | //===- InputFiles.h ---------------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLD_ELF_INPUT_FILES_H |
| 10 | #define LLD_ELF_INPUT_FILES_H |
| 11 | |
| 12 | #include "Config.h" |
| 13 | #include "Symbols.h" |
| 14 | #include "lld/Common/ErrorHandler.h" |
| 15 | #include "lld/Common/LLVM.h" |
| 16 | #include "lld/Common/Reproduce.h" |
| 17 | #include "llvm/ADT/DenseSet.h" |
| 18 | #include "llvm/BinaryFormat/Magic.h" |
| 19 | #include "llvm/Object/ELF.h" |
| 20 | #include "llvm/Support/MemoryBufferRef.h" |
| 21 | #include "llvm/Support/Threading.h" |
| 22 | |
| 23 | namespace llvm { |
| 24 | struct DILineInfo; |
| 25 | class TarWriter; |
| 26 | namespace lto { |
| 27 | class InputFile; |
| 28 | } |
| 29 | } // namespace llvm |
| 30 | |
| 31 | namespace lld { |
| 32 | class DWARFCache; |
| 33 | |
| 34 | namespace elf { |
| 35 | class InputSection; |
| 36 | class Symbol; |
| 37 | |
| 38 | // Returns "<internal>", "foo.a(bar.o)" or "baz.o". |
| 39 | std::string toStr(Ctx &, const InputFile *f); |
| 40 | const ELFSyncStream &operator<<(const ELFSyncStream &, const InputFile *); |
| 41 | |
| 42 | // Opens a given file. |
| 43 | std::optional<MemoryBufferRef> readFile(Ctx &, StringRef path); |
| 44 | |
| 45 | // Add symbols in File to the symbol table. |
| 46 | void parseFile(Ctx &, InputFile *file); |
| 47 | void parseFiles(Ctx &, const SmallVector<std::unique_ptr<InputFile>, 0> &); |
| 48 | |
| 49 | // The root class of input files. |
| 50 | class InputFile { |
| 51 | public: |
| 52 | Ctx &ctx; |
| 53 | |
| 54 | protected: |
| 55 | std::unique_ptr<Symbol *[]> symbols; |
| 56 | size_t numSymbols = 0; |
| 57 | SmallVector<InputSectionBase *, 0> sections; |
| 58 | |
| 59 | public: |
| 60 | enum Kind : uint8_t { |
| 61 | ObjKind, |
| 62 | SharedKind, |
| 63 | BitcodeKind, |
| 64 | BinaryKind, |
| 65 | InternalKind, |
| 66 | }; |
| 67 | |
| 68 | InputFile(Ctx &, Kind k, MemoryBufferRef m); |
| 69 | virtual ~InputFile(); |
| 70 | Kind kind() const { return fileKind; } |
| 71 | |
| 72 | bool isElf() const { |
| 73 | Kind k = kind(); |
| 74 | return k == ObjKind || k == SharedKind; |
| 75 | } |
| 76 | bool isInternal() const { return kind() == InternalKind; } |
| 77 | |
| 78 | StringRef getName() const { return mb.getBufferIdentifier(); } |
| 79 | MemoryBufferRef mb; |
| 80 | |
| 81 | // Returns sections. It is a runtime error to call this function |
| 82 | // on files that don't have the notion of sections. |
| 83 | ArrayRef<InputSectionBase *> getSections() const { |
| 84 | assert(fileKind == ObjKind || fileKind == BinaryKind); |
| 85 | return sections; |
| 86 | } |
| 87 | void cacheDecodedCrel(size_t i, InputSectionBase *s) { sections[i] = s; } |
| 88 | |
| 89 | // Returns object file symbols. It is a runtime error to call this |
| 90 | // function on files of other types. |
| 91 | ArrayRef<Symbol *> getSymbols() const { |
| 92 | assert(fileKind == BinaryKind || fileKind == ObjKind || |
| 93 | fileKind == BitcodeKind); |
| 94 | return {symbols.get(), numSymbols}; |
| 95 | } |
| 96 | |
| 97 | MutableArrayRef<Symbol *> getMutableSymbols() { |
| 98 | assert(fileKind == BinaryKind || fileKind == ObjKind || |
| 99 | fileKind == BitcodeKind); |
| 100 | return {symbols.get(), numSymbols}; |
| 101 | } |
| 102 | |
| 103 | Symbol &getSymbol(uint32_t symbolIndex) const { |
| 104 | assert(fileKind == ObjKind); |
| 105 | if (symbolIndex >= numSymbols) |
| 106 | Fatal(ctx) << this << ": invalid symbol index" ; |
| 107 | return *this->symbols[symbolIndex]; |
| 108 | } |
| 109 | |
| 110 | template <typename RelT> Symbol &getRelocTargetSym(const RelT &rel) const { |
| 111 | uint32_t symIndex = rel.getSymbol(ctx.arg.isMips64EL); |
| 112 | return getSymbol(symbolIndex: symIndex); |
| 113 | } |
| 114 | |
| 115 | // Get filename to use for linker script processing. |
| 116 | StringRef getNameForScript() const; |
| 117 | |
| 118 | // Check if a non-common symbol should be extracted to override a common |
| 119 | // definition. |
| 120 | bool (StringRef name) const; |
| 121 | |
| 122 | // .got2 in the current file. This is used by PPC32 -fPIC/-fPIE to compute |
| 123 | // offsets in PLT call stubs. |
| 124 | InputSection *ppc32Got2 = nullptr; |
| 125 | |
| 126 | // Index of MIPS GOT built for this file. |
| 127 | uint32_t mipsGotIndex = -1; |
| 128 | |
| 129 | // groupId is used for --warn-backrefs which is an optional error |
| 130 | // checking feature. All files within the same --{start,end}-group or |
| 131 | // --{start,end}-lib get the same group ID. Otherwise, each file gets a new |
| 132 | // group ID. For more info, see checkDependency() in SymbolTable.cpp. |
| 133 | uint32_t groupId; |
| 134 | |
| 135 | // If this is an architecture-specific file, the following members |
| 136 | // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type. |
| 137 | uint16_t emachine = llvm::ELF::EM_NONE; |
| 138 | const Kind fileKind; |
| 139 | ELFKind ekind = ELFNoneKind; |
| 140 | uint8_t osabi = 0; |
| 141 | uint8_t abiVersion = 0; |
| 142 | |
| 143 | // True if this is a relocatable object file/bitcode file in an ar archive |
| 144 | // or between --start-lib and --end-lib. |
| 145 | bool lazy = false; |
| 146 | |
| 147 | // True if this is an argument for --just-symbols. Usually false. |
| 148 | bool justSymbols = false; |
| 149 | |
| 150 | // On PPC64 we need to keep track of which files contain small code model |
| 151 | // relocations that access the .toc section. To minimize the chance of a |
| 152 | // relocation overflow, files that do contain said relocations should have |
| 153 | // their .toc sections sorted closer to the .got section than files that do |
| 154 | // not contain any small code model relocations. Thats because the toc-pointer |
| 155 | // is defined to point at .got + 0x8000 and the instructions used with small |
| 156 | // code model relocations support immediates in the range [-0x8000, 0x7FFC], |
| 157 | // making the addressable range relative to the toc pointer |
| 158 | // [.got, .got + 0xFFFC]. |
| 159 | bool ppc64SmallCodeModelTocRelocs = false; |
| 160 | |
| 161 | // True if the file has TLSGD/TLSLD GOT relocations without R_PPC64_TLSGD or |
| 162 | // R_PPC64_TLSLD. Disable TLS relaxation to avoid bad code generation. |
| 163 | bool ppc64DisableTLSRelax = false; |
| 164 | |
| 165 | public: |
| 166 | // If not empty, this stores the name of the archive containing this file. |
| 167 | // We use this string for creating error messages. |
| 168 | SmallString<0> archiveName; |
| 169 | // Cache for toStr(Ctx &, const InputFile *). Only toStr should use this |
| 170 | // member. |
| 171 | mutable SmallString<0> toStringCache; |
| 172 | |
| 173 | private: |
| 174 | // Cache for getNameForScript(). |
| 175 | mutable SmallString<0> nameForScriptCache; |
| 176 | }; |
| 177 | |
| 178 | class ELFFileBase : public InputFile { |
| 179 | public: |
| 180 | ELFFileBase(Ctx &ctx, Kind k, ELFKind ekind, MemoryBufferRef m); |
| 181 | ~ELFFileBase(); |
| 182 | static bool classof(const InputFile *f) { return f->isElf(); } |
| 183 | |
| 184 | void init(); |
| 185 | template <typename ELFT> llvm::object::ELFFile<ELFT> getObj() const { |
| 186 | return check(llvm::object::ELFFile<ELFT>::create(mb.getBuffer())); |
| 187 | } |
| 188 | |
| 189 | StringRef getStringTable() const { return stringTable; } |
| 190 | |
| 191 | ArrayRef<Symbol *> getLocalSymbols() { |
| 192 | if (numSymbols == 0) |
| 193 | return {}; |
| 194 | return llvm::ArrayRef(symbols.get() + 1, firstGlobal - 1); |
| 195 | } |
| 196 | ArrayRef<Symbol *> getGlobalSymbols() { |
| 197 | return llvm::ArrayRef(symbols.get() + firstGlobal, |
| 198 | numSymbols - firstGlobal); |
| 199 | } |
| 200 | MutableArrayRef<Symbol *> getMutableGlobalSymbols() { |
| 201 | return llvm::MutableArrayRef(symbols.get() + firstGlobal, |
| 202 | numSymbols - firstGlobal); |
| 203 | } |
| 204 | |
| 205 | template <typename ELFT> typename ELFT::ShdrRange getELFShdrs() const { |
| 206 | return typename ELFT::ShdrRange( |
| 207 | reinterpret_cast<const typename ELFT::Shdr *>(elfShdrs), numELFShdrs); |
| 208 | } |
| 209 | template <typename ELFT> typename ELFT::SymRange getELFSyms() const { |
| 210 | return typename ELFT::SymRange( |
| 211 | reinterpret_cast<const typename ELFT::Sym *>(elfSyms), numSymbols); |
| 212 | } |
| 213 | template <typename ELFT> typename ELFT::SymRange getGlobalELFSyms() const { |
| 214 | return getELFSyms<ELFT>().slice(firstGlobal); |
| 215 | } |
| 216 | |
| 217 | // Get cached DWARF information. |
| 218 | DWARFCache *getDwarf(); |
| 219 | |
| 220 | protected: |
| 221 | // Initializes this class's member variables. |
| 222 | template <typename ELFT> void init(InputFile::Kind k); |
| 223 | |
| 224 | StringRef stringTable; |
| 225 | const void *elfShdrs = nullptr; |
| 226 | const void *elfSyms = nullptr; |
| 227 | uint32_t numELFShdrs = 0; |
| 228 | uint32_t firstGlobal = 0; |
| 229 | |
| 230 | // Below are ObjFile specific members. |
| 231 | |
| 232 | // Debugging information to retrieve source file and line for error |
| 233 | // reporting. Linker may find reasonable number of errors in a |
| 234 | // single object file, so we cache debugging information in order to |
| 235 | // parse it only once for each object file we link. |
| 236 | llvm::once_flag initDwarf; |
| 237 | std::unique_ptr<DWARFCache> dwarf; |
| 238 | |
| 239 | public: |
| 240 | // Name of source file obtained from STT_FILE, if present. |
| 241 | StringRef sourceFile; |
| 242 | uint32_t andFeatures = 0; |
| 243 | bool hasCommonSyms = false; |
| 244 | std::optional<AArch64PauthAbiCoreInfo> aarch64PauthAbiCoreInfo; |
| 245 | }; |
| 246 | |
| 247 | // .o file. |
| 248 | template <class ELFT> class ObjFile : public ELFFileBase { |
| 249 | LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) |
| 250 | |
| 251 | public: |
| 252 | static bool classof(const InputFile *f) { return f->kind() == ObjKind; } |
| 253 | |
| 254 | llvm::object::ELFFile<ELFT> getObj() const { |
| 255 | return this->ELFFileBase::getObj<ELFT>(); |
| 256 | } |
| 257 | |
| 258 | ObjFile(Ctx &ctx, ELFKind ekind, MemoryBufferRef m, StringRef archiveName) |
| 259 | : ELFFileBase(ctx, ObjKind, ekind, m) { |
| 260 | this->archiveName = archiveName; |
| 261 | } |
| 262 | |
| 263 | void parse(bool ignoreComdats = false); |
| 264 | void parseLazy(); |
| 265 | |
| 266 | StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> sections, |
| 267 | const Elf_Shdr &sec); |
| 268 | |
| 269 | uint32_t getSectionIndex(const Elf_Sym &sym) const; |
| 270 | |
| 271 | |
| 272 | // Pointer to this input file's .llvm_addrsig section, if it has one. |
| 273 | const Elf_Shdr *addrsigSec = nullptr; |
| 274 | |
| 275 | // SHT_LLVM_CALL_GRAPH_PROFILE section index. |
| 276 | uint32_t cgProfileSectionIndex = 0; |
| 277 | |
| 278 | // MIPS GP0 value defined by this file. This value represents the gp value |
| 279 | // used to create the relocatable object and required to support |
| 280 | // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations. |
| 281 | uint32_t mipsGp0 = 0; |
| 282 | |
| 283 | // True if the file defines functions compiled with |
| 284 | // -fsplit-stack. Usually false. |
| 285 | bool splitStack = false; |
| 286 | |
| 287 | // True if the file defines functions compiled with -fsplit-stack, |
| 288 | // but had one or more functions with the no_split_stack attribute. |
| 289 | bool someNoSplitStack = false; |
| 290 | |
| 291 | void initDwarf(); |
| 292 | |
| 293 | void initSectionsAndLocalSyms(bool ignoreComdats); |
| 294 | void postParse(); |
| 295 | void importCmseSymbols(); |
| 296 | |
| 297 | private: |
| 298 | void initializeSections(bool ignoreComdats, |
| 299 | const llvm::object::ELFFile<ELFT> &obj); |
| 300 | void initializeSymbols(const llvm::object::ELFFile<ELFT> &obj); |
| 301 | void initializeJustSymbols(); |
| 302 | |
| 303 | InputSectionBase *getRelocTarget(uint32_t idx, uint32_t info); |
| 304 | InputSectionBase *createInputSection(uint32_t idx, const Elf_Shdr &sec, |
| 305 | StringRef name); |
| 306 | |
| 307 | bool shouldMerge(const Elf_Shdr &sec, StringRef name); |
| 308 | |
| 309 | // Each ELF symbol contains a section index which the symbol belongs to. |
| 310 | // However, because the number of bits dedicated for that is limited, a |
| 311 | // symbol can directly point to a section only when the section index is |
| 312 | // equal to or smaller than 65280. |
| 313 | // |
| 314 | // If an object file contains more than 65280 sections, the file must |
| 315 | // contain .symtab_shndx section. The section contains an array of |
| 316 | // 32-bit integers whose size is the same as the number of symbols. |
| 317 | // Nth symbol's section index is in the Nth entry of .symtab_shndx. |
| 318 | // |
| 319 | // The following variable contains the contents of .symtab_shndx. |
| 320 | // If the section does not exist (which is common), the array is empty. |
| 321 | ArrayRef<Elf_Word> shndxTable; |
| 322 | }; |
| 323 | |
| 324 | class BitcodeFile : public InputFile { |
| 325 | public: |
| 326 | BitcodeFile(Ctx &, MemoryBufferRef m, StringRef archiveName, |
| 327 | uint64_t offsetInArchive, bool lazy); |
| 328 | static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } |
| 329 | void parse(); |
| 330 | void parseLazy(); |
| 331 | void postParse(); |
| 332 | std::unique_ptr<llvm::lto::InputFile> obj; |
| 333 | std::vector<bool> keptComdats; |
| 334 | }; |
| 335 | |
| 336 | // .so file. |
| 337 | class SharedFile : public ELFFileBase { |
| 338 | public: |
| 339 | SharedFile(Ctx &, MemoryBufferRef m, StringRef defaultSoName); |
| 340 | |
| 341 | // This is actually a vector of Elf_Verdef pointers. |
| 342 | SmallVector<const void *, 0> verdefs; |
| 343 | |
| 344 | // If the output file needs Elf_Verneed data structures for this file, this is |
| 345 | // a vector of Elf_Vernaux version identifiers that map onto the entries in |
| 346 | // Verdefs, otherwise it is empty. |
| 347 | SmallVector<uint32_t, 0> vernauxs; |
| 348 | |
| 349 | SmallVector<StringRef, 0> dtNeeded; |
| 350 | StringRef soName; |
| 351 | |
| 352 | static bool classof(const InputFile *f) { return f->kind() == SharedKind; } |
| 353 | |
| 354 | template <typename ELFT> void parse(); |
| 355 | |
| 356 | // Used for --as-needed |
| 357 | bool isNeeded; |
| 358 | |
| 359 | // Non-weak undefined symbols which are not yet resolved when the SO is |
| 360 | // parsed. Only filled for `--no-allow-shlib-undefined`. |
| 361 | SmallVector<Symbol *, 0> requiredSymbols; |
| 362 | |
| 363 | private: |
| 364 | template <typename ELFT> |
| 365 | std::vector<uint32_t> parseVerneed(const llvm::object::ELFFile<ELFT> &obj, |
| 366 | const typename ELFT::Shdr *sec); |
| 367 | template <typename ELFT> |
| 368 | void parseGnuAndFeatures(const llvm::object::ELFFile<ELFT> &obj); |
| 369 | }; |
| 370 | |
| 371 | class BinaryFile : public InputFile { |
| 372 | public: |
| 373 | explicit BinaryFile(Ctx &ctx, MemoryBufferRef m) |
| 374 | : InputFile(ctx, BinaryKind, m) {} |
| 375 | static bool classof(const InputFile *f) { return f->kind() == BinaryKind; } |
| 376 | void parse(); |
| 377 | }; |
| 378 | |
| 379 | InputFile *createInternalFile(Ctx &, StringRef name); |
| 380 | std::unique_ptr<ELFFileBase> createObjFile(Ctx &, MemoryBufferRef mb, |
| 381 | StringRef archiveName = "" , |
| 382 | bool lazy = false); |
| 383 | |
| 384 | std::string replaceThinLTOSuffix(Ctx &, StringRef path); |
| 385 | |
| 386 | } // namespace elf |
| 387 | } // namespace lld |
| 388 | |
| 389 | #endif |
| 390 | |