| 1 | //===- LinkerScript.h -------------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLD_ELF_LINKER_SCRIPT_H |
| 10 | #define LLD_ELF_LINKER_SCRIPT_H |
| 11 | |
| 12 | #include "Config.h" |
| 13 | #include "InputSection.h" |
| 14 | #include "Writer.h" |
| 15 | #include "lld/Common/LLVM.h" |
| 16 | #include "lld/Common/Strings.h" |
| 17 | #include "llvm/ADT/ArrayRef.h" |
| 18 | #include "llvm/ADT/DenseMap.h" |
| 19 | #include "llvm/ADT/MapVector.h" |
| 20 | #include "llvm/ADT/SmallVector.h" |
| 21 | #include "llvm/ADT/StringRef.h" |
| 22 | #include "llvm/Support/Compiler.h" |
| 23 | #include <cstddef> |
| 24 | #include <cstdint> |
| 25 | #include <functional> |
| 26 | #include <memory> |
| 27 | |
| 28 | namespace lld::elf { |
| 29 | |
| 30 | class Defined; |
| 31 | class InputFile; |
| 32 | class InputSection; |
| 33 | class InputSectionBase; |
| 34 | class OutputSection; |
| 35 | class SectionBase; |
| 36 | class ThunkSection; |
| 37 | struct OutputDesc; |
| 38 | struct SectionClass; |
| 39 | struct SectionClassDesc; |
| 40 | |
| 41 | // This represents an r-value in the linker script. |
| 42 | struct ExprValue { |
| 43 | ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val, |
| 44 | const Twine &loc) |
| 45 | : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {} |
| 46 | |
| 47 | ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "" ) {} |
| 48 | |
| 49 | bool isAbsolute() const { return forceAbsolute || sec == nullptr; } |
| 50 | uint64_t getValue() const; |
| 51 | uint64_t getSecAddr() const; |
| 52 | uint64_t getSectionOffset() const; |
| 53 | |
| 54 | // If a value is relative to a section, it has a non-null Sec. |
| 55 | SectionBase *sec; |
| 56 | |
| 57 | uint64_t val; |
| 58 | uint64_t alignment = 1; |
| 59 | |
| 60 | // The original st_type if the expression represents a symbol. Any operation |
| 61 | // resets type to STT_NOTYPE. |
| 62 | uint8_t type = llvm::ELF::STT_NOTYPE; |
| 63 | |
| 64 | // True if this expression is enclosed in ABSOLUTE(). |
| 65 | // This flag affects the return value of getValue(). |
| 66 | bool forceAbsolute; |
| 67 | |
| 68 | // Original source location. Used for error messages. |
| 69 | std::string loc; |
| 70 | }; |
| 71 | |
| 72 | // This represents an expression in the linker script. |
| 73 | // ScriptParser::readExpr reads an expression and returns an Expr. |
| 74 | // Later, we evaluate the expression by calling the function. |
| 75 | using Expr = std::function<ExprValue()>; |
| 76 | |
| 77 | // This enum is used to implement linker script SECTIONS command. |
| 78 | // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS |
| 79 | enum SectionsCommandKind { |
| 80 | AssignmentKind, // . = expr or <sym> = expr |
| 81 | OutputSectionKind, |
| 82 | InputSectionKind, |
| 83 | ByteKind, // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr) |
| 84 | ClassKind, // CLASS(class_name) |
| 85 | }; |
| 86 | |
| 87 | struct SectionCommand { |
| 88 | SectionCommand(int k) : kind(k) {} |
| 89 | int kind; |
| 90 | }; |
| 91 | |
| 92 | // This represents ". = <expr>" or "<symbol> = <expr>". |
| 93 | struct SymbolAssignment : SectionCommand { |
| 94 | SymbolAssignment(StringRef name, Expr e, unsigned symOrder, std::string loc) |
| 95 | : SectionCommand(AssignmentKind), name(name), expression(e), |
| 96 | symOrder(symOrder), location(loc) {} |
| 97 | |
| 98 | static bool classof(const SectionCommand *c) { |
| 99 | return c->kind == AssignmentKind; |
| 100 | } |
| 101 | |
| 102 | // The LHS of an expression. Name is either a symbol name or ".". |
| 103 | StringRef name; |
| 104 | Defined *sym = nullptr; |
| 105 | |
| 106 | // The RHS of an expression. |
| 107 | Expr expression; |
| 108 | |
| 109 | // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN. |
| 110 | bool provide = false; |
| 111 | bool hidden = false; |
| 112 | |
| 113 | // This assignment references DATA_SEGMENT_RELRO_END. |
| 114 | bool dataSegmentRelroEnd = false; |
| 115 | |
| 116 | unsigned symOrder; |
| 117 | |
| 118 | // Holds file name and line number for error reporting. |
| 119 | std::string location; |
| 120 | |
| 121 | // A string representation of this command. We use this for -Map. |
| 122 | std::string commandString; |
| 123 | |
| 124 | // Address of this assignment command. |
| 125 | uint64_t addr; |
| 126 | |
| 127 | // Size of this assignment command. This is usually 0, but if |
| 128 | // you move '.' this may be greater than 0. |
| 129 | uint64_t size; |
| 130 | }; |
| 131 | |
| 132 | // Linker scripts allow additional constraints to be put on output sections. |
| 133 | // If an output section is marked as ONLY_IF_RO, the section is created |
| 134 | // only if its input sections are read-only. Likewise, an output section |
| 135 | // with ONLY_IF_RW is created if all input sections are RW. |
| 136 | enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; |
| 137 | |
| 138 | // This struct is used to represent the location and size of regions of |
| 139 | // target memory. Instances of the struct are created by parsing the |
| 140 | // MEMORY command. |
| 141 | struct MemoryRegion { |
| 142 | MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags, |
| 143 | uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags) |
| 144 | : name(std::string(name)), origin(origin), length(length), flags(flags), |
| 145 | invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {} |
| 146 | |
| 147 | std::string name; |
| 148 | Expr origin; |
| 149 | Expr length; |
| 150 | // A section can be assigned to the region if any of these ELF section flags |
| 151 | // are set... |
| 152 | uint32_t flags; |
| 153 | // ... or any of these flags are not set. |
| 154 | // For example, the memory region attribute "r" maps to SHF_WRITE. |
| 155 | uint32_t invFlags; |
| 156 | // A section cannot be assigned to the region if any of these ELF section |
| 157 | // flags are set... |
| 158 | uint32_t negFlags; |
| 159 | // ... or any of these flags are not set. |
| 160 | // For example, the memory region attribute "!r" maps to SHF_WRITE. |
| 161 | uint32_t negInvFlags; |
| 162 | uint64_t curPos = 0; |
| 163 | |
| 164 | uint64_t getOrigin() const { return origin().getValue(); } |
| 165 | uint64_t getLength() const { return length().getValue(); } |
| 166 | |
| 167 | bool compatibleWith(uint32_t secFlags) const { |
| 168 | if ((secFlags & negFlags) || (~secFlags & negInvFlags)) |
| 169 | return false; |
| 170 | return (secFlags & flags) || (~secFlags & invFlags); |
| 171 | } |
| 172 | }; |
| 173 | |
| 174 | // This struct represents one section match pattern in SECTIONS() command. |
| 175 | // It can optionally have negative match pattern for EXCLUDED_FILE command. |
| 176 | // Also it may be surrounded with SORT() command, so contains sorting rules. |
| 177 | class SectionPattern { |
| 178 | StringMatcher excludedFilePat; |
| 179 | |
| 180 | // Cache of the most recent input argument and result of excludesFile(). |
| 181 | mutable std::optional<std::pair<const InputFile *, bool>> excludesFileCache; |
| 182 | |
| 183 | public: |
| 184 | SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2) |
| 185 | : excludedFilePat(pat1), sectionPat(pat2), |
| 186 | sortOuter(SortSectionPolicy::Default), |
| 187 | sortInner(SortSectionPolicy::Default) {} |
| 188 | |
| 189 | bool excludesFile(const InputFile &file) const; |
| 190 | |
| 191 | StringMatcher sectionPat; |
| 192 | SortSectionPolicy sortOuter; |
| 193 | SortSectionPolicy sortInner; |
| 194 | }; |
| 195 | |
| 196 | class InputSectionDescription : public SectionCommand { |
| 197 | enum class MatchType { Trivial, WholeArchive, ArchivesExcluded } matchType; |
| 198 | SingleStringMatcher filePat; |
| 199 | |
| 200 | // Cache of the most recent input argument and result of matchesFile(). |
| 201 | mutable std::optional<std::pair<const InputFile *, bool>> matchesFileCache; |
| 202 | |
| 203 | public: |
| 204 | InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0, |
| 205 | uint64_t withoutFlags = 0, StringRef classRef = {}) |
| 206 | : SectionCommand(InputSectionKind), matchType(MatchType::Trivial), |
| 207 | filePat(filePattern), classRef(classRef), withFlags(withFlags), |
| 208 | withoutFlags(withoutFlags) { |
| 209 | assert((filePattern.empty() || classRef.empty()) && |
| 210 | "file pattern and class reference are mutually exclusive" ); |
| 211 | |
| 212 | // The matching syntax for whole archives and files outside of an archive |
| 213 | // can't be handled by SingleStringMatcher, and instead are handled |
| 214 | // manually within matchesFile() |
| 215 | if (!filePattern.empty()) { |
| 216 | if (filePattern.back() == ':') { |
| 217 | matchType = MatchType::WholeArchive; |
| 218 | filePat = filePattern.drop_back(); |
| 219 | } else if (filePattern.front() == ':') { |
| 220 | matchType = MatchType::ArchivesExcluded; |
| 221 | filePat = filePattern.drop_front(); |
| 222 | } |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | static bool classof(const SectionCommand *c) { |
| 227 | return c->kind == InputSectionKind; |
| 228 | } |
| 229 | |
| 230 | bool matchesFile(const InputFile &file) const; |
| 231 | |
| 232 | // Input sections that matches at least one of SectionPatterns |
| 233 | // will be associated with this InputSectionDescription. |
| 234 | SmallVector<SectionPattern, 0> sectionPatterns; |
| 235 | |
| 236 | // If present, input section matching uses class membership instead of file |
| 237 | // and section patterns (mutually exclusive). |
| 238 | StringRef classRef; |
| 239 | |
| 240 | // Includes InputSections and MergeInputSections. Used temporarily during |
| 241 | // assignment of input sections to output sections. |
| 242 | SmallVector<InputSectionBase *, 0> sectionBases; |
| 243 | |
| 244 | // Used after the finalizeInputSections() pass. MergeInputSections have been |
| 245 | // merged into MergeSyntheticSections. |
| 246 | SmallVector<InputSection *, 0> sections; |
| 247 | |
| 248 | // Temporary record of synthetic ThunkSection instances and the pass that |
| 249 | // they were created in. This is used to insert newly created ThunkSections |
| 250 | // into Sections at the end of a createThunks() pass. |
| 251 | SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections; |
| 252 | |
| 253 | // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command. |
| 254 | uint64_t withFlags; |
| 255 | uint64_t withoutFlags; |
| 256 | }; |
| 257 | |
| 258 | // Represents BYTE(), SHORT(), LONG(), or QUAD(). |
| 259 | struct ByteCommand : SectionCommand { |
| 260 | ByteCommand(Expr e, unsigned size, std::string commandString) |
| 261 | : SectionCommand(ByteKind), commandString(commandString), expression(e), |
| 262 | size(size) {} |
| 263 | |
| 264 | static bool classof(const SectionCommand *c) { return c->kind == ByteKind; } |
| 265 | |
| 266 | // Keeps string representing the command. Used for -Map" is perhaps better. |
| 267 | std::string commandString; |
| 268 | |
| 269 | Expr expression; |
| 270 | |
| 271 | // This is just an offset of this assignment command in the output section. |
| 272 | unsigned offset; |
| 273 | |
| 274 | // Size of this data command. |
| 275 | unsigned size; |
| 276 | }; |
| 277 | |
| 278 | struct InsertCommand { |
| 279 | SmallVector<StringRef, 0> names; |
| 280 | bool isAfter; |
| 281 | StringRef where; |
| 282 | }; |
| 283 | |
| 284 | // A NOCROSSREFS/NOCROSSREFS_TO command that prohibits references between |
| 285 | // certain output sections. |
| 286 | struct NoCrossRefCommand { |
| 287 | SmallVector<StringRef, 0> outputSections; |
| 288 | |
| 289 | // When true, this describes a NOCROSSREFS_TO command that probits references |
| 290 | // to the first output section from any of the other sections. |
| 291 | bool toFirst = false; |
| 292 | }; |
| 293 | |
| 294 | struct PhdrsCommand { |
| 295 | StringRef name; |
| 296 | unsigned type = llvm::ELF::PT_NULL; |
| 297 | bool hasFilehdr = false; |
| 298 | bool hasPhdrs = false; |
| 299 | std::optional<unsigned> flags; |
| 300 | Expr lmaExpr = nullptr; |
| 301 | }; |
| 302 | |
| 303 | class LinkerScript final { |
| 304 | // Temporary state used in processSectionCommands() and assignAddresses() |
| 305 | // that must be reinitialized for each call to the above functions, and must |
| 306 | // not be used outside of the scope of a call to the above functions. |
| 307 | struct AddressState { |
| 308 | AddressState(const LinkerScript &); |
| 309 | OutputSection *outSec = nullptr; |
| 310 | MemoryRegion *memRegion = nullptr; |
| 311 | MemoryRegion *lmaRegion = nullptr; |
| 312 | uint64_t lmaOffset = 0; |
| 313 | uint64_t tbssAddr = 0; |
| 314 | uint64_t overlaySize; |
| 315 | }; |
| 316 | |
| 317 | Ctx &ctx; |
| 318 | SmallVector<std::unique_ptr<OutputDesc>, 0> descPool; |
| 319 | llvm::DenseMap<llvm::CachedHashStringRef, OutputDesc *> nameToOutputSection; |
| 320 | |
| 321 | StringRef getOutputSectionName(const InputSectionBase *s) const; |
| 322 | void addSymbol(SymbolAssignment *cmd); |
| 323 | void declareSymbol(SymbolAssignment *cmd); |
| 324 | void assignSymbol(SymbolAssignment *cmd, bool inSec); |
| 325 | void setDot(Expr e, const Twine &loc, bool inSec); |
| 326 | void expandOutputSection(uint64_t size); |
| 327 | void expandMemoryRegions(uint64_t size); |
| 328 | |
| 329 | SmallVector<InputSectionBase *, 0> |
| 330 | computeInputSections(const InputSectionDescription *, |
| 331 | ArrayRef<InputSectionBase *>, const SectionBase &outCmd); |
| 332 | |
| 333 | SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd); |
| 334 | |
| 335 | void discardSynthetic(OutputSection &); |
| 336 | |
| 337 | SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec); |
| 338 | |
| 339 | std::pair<MemoryRegion *, MemoryRegion *> |
| 340 | findMemoryRegion(OutputSection *sec, MemoryRegion *hint); |
| 341 | |
| 342 | bool assignOffsets(OutputSection *sec); |
| 343 | |
| 344 | // This captures the local AddressState and makes it accessible |
| 345 | // deliberately. This is needed as there are some cases where we cannot just |
| 346 | // thread the current state through to a lambda function created by the |
| 347 | // script parser. |
| 348 | // This should remain a plain pointer as its lifetime is smaller than |
| 349 | // LinkerScript. |
| 350 | AddressState *state = nullptr; |
| 351 | |
| 352 | std::unique_ptr<OutputSection> aether; |
| 353 | |
| 354 | uint64_t dot = 0; |
| 355 | |
| 356 | public: |
| 357 | // OutputSection may be incomplete. Avoid inline ctor/dtor. |
| 358 | LinkerScript(Ctx &ctx); |
| 359 | ~LinkerScript(); |
| 360 | |
| 361 | OutputDesc *createOutputSection(StringRef name, StringRef location); |
| 362 | OutputDesc *getOrCreateOutputSection(StringRef name); |
| 363 | |
| 364 | bool hasPhdrsCommands() { return !phdrsCommands.empty(); } |
| 365 | uint64_t getDot() { return dot; } |
| 366 | void discard(InputSectionBase &s); |
| 367 | |
| 368 | ExprValue getSymbolValue(StringRef name, const Twine &loc); |
| 369 | |
| 370 | void addOrphanSections(); |
| 371 | void diagnoseOrphanHandling() const; |
| 372 | void diagnoseMissingSGSectionAddress() const; |
| 373 | void adjustOutputSections(); |
| 374 | void adjustSectionsAfterSorting(); |
| 375 | |
| 376 | SmallVector<std::unique_ptr<PhdrEntry>, 0> createPhdrs(); |
| 377 | bool needsInterpSection(); |
| 378 | |
| 379 | bool shouldKeep(InputSectionBase *s); |
| 380 | std::pair<const OutputSection *, const Defined *> assignAddresses(); |
| 381 | bool spillSections(); |
| 382 | void erasePotentialSpillSections(); |
| 383 | void (SmallVector<std::unique_ptr<PhdrEntry>, 0> &phdrs); |
| 384 | void processSectionCommands(); |
| 385 | void processSymbolAssignments(); |
| 386 | void declareSymbols(); |
| 387 | |
| 388 | // Used to handle INSERT AFTER statements. |
| 389 | void processInsertCommands(); |
| 390 | |
| 391 | // Describe memory region usage. |
| 392 | void printMemoryUsage(raw_ostream &os); |
| 393 | |
| 394 | // Record a pending error during an assignAddresses invocation. |
| 395 | // assignAddresses is executed more than once. Therefore, lld::error should be |
| 396 | // avoided to not report duplicate errors. |
| 397 | void recordError(const Twine &msg); |
| 398 | |
| 399 | // Check backward location counter assignment and memory region/LMA overflows. |
| 400 | void checkFinalScriptConditions() const; |
| 401 | |
| 402 | // Add symbols that are referenced in the linker script to the symbol table. |
| 403 | // Symbols referenced in a PROVIDE command are only added to the symbol table |
| 404 | // if the PROVIDE command actually provides the symbol. |
| 405 | // It also adds the symbols referenced by the used PROVIDE symbols to the |
| 406 | // linker script referenced symbols list. |
| 407 | void addScriptReferencedSymbolsToSymTable(); |
| 408 | |
| 409 | // Returns true if the PROVIDE symbol should be added to the link. |
| 410 | // A PROVIDE symbol is added to the link only if it satisfies an |
| 411 | // undefined reference. |
| 412 | bool shouldAddProvideSym(StringRef symName); |
| 413 | |
| 414 | // SECTIONS command list. |
| 415 | SmallVector<SectionCommand *, 0> sectionCommands; |
| 416 | |
| 417 | // PHDRS command list. |
| 418 | SmallVector<PhdrsCommand, 0> phdrsCommands; |
| 419 | |
| 420 | bool hasSectionsCommand = false; |
| 421 | bool seenDataAlign = false; |
| 422 | bool seenRelroEnd = false; |
| 423 | bool errorOnMissingSection = false; |
| 424 | SmallVector<SmallString<0>, 0> recordedErrors; |
| 425 | |
| 426 | // List of section patterns specified with KEEP commands. They will |
| 427 | // be kept even if they are unused and --gc-sections is specified. |
| 428 | SmallVector<InputSectionDescription *, 0> keptSections; |
| 429 | |
| 430 | // A map from memory region name to a memory region descriptor. |
| 431 | llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions; |
| 432 | |
| 433 | // A list of symbols referenced by the script. |
| 434 | SmallVector<llvm::StringRef, 0> referencedSymbols; |
| 435 | |
| 436 | // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need |
| 437 | // to be reordered. |
| 438 | SmallVector<InsertCommand, 0> insertCommands; |
| 439 | |
| 440 | // OutputSections specified by OVERWRITE_SECTIONS. |
| 441 | SmallVector<OutputDesc *, 0> overwriteSections; |
| 442 | |
| 443 | // NOCROSSREFS(_TO) commands. |
| 444 | SmallVector<NoCrossRefCommand, 0> noCrossRefs; |
| 445 | |
| 446 | // Sections that will be warned/errored by --orphan-handling. |
| 447 | SmallVector<const InputSectionBase *, 0> orphanSections; |
| 448 | |
| 449 | // Stores the mapping: PROVIDE symbol -> symbols referred in the PROVIDE |
| 450 | // expression. For example, if the PROVIDE command is: |
| 451 | // |
| 452 | // PROVIDE(v = a + b + c); |
| 453 | // |
| 454 | // then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c'] |
| 455 | llvm::MapVector<StringRef, SmallVector<StringRef, 0>> provideMap; |
| 456 | // Store defined symbols that should ignore PROVIDE commands. |
| 457 | llvm::DenseSet<Symbol *> unusedProvideSyms; |
| 458 | |
| 459 | // List of potential spill locations (PotentialSpillSection) for an input |
| 460 | // section. |
| 461 | struct PotentialSpillList { |
| 462 | // Never nullptr. |
| 463 | PotentialSpillSection *head; |
| 464 | PotentialSpillSection *tail; |
| 465 | }; |
| 466 | llvm::DenseMap<InputSectionBase *, PotentialSpillList> potentialSpillLists; |
| 467 | |
| 468 | // Named lists of input sections that can be collectively referenced in output |
| 469 | // section descriptions. Multiple references allow for sections to spill from |
| 470 | // one output section to another. |
| 471 | llvm::DenseMap<llvm::CachedHashStringRef, SectionClassDesc *> sectionClasses; |
| 472 | }; |
| 473 | |
| 474 | } // end namespace lld::elf |
| 475 | |
| 476 | #endif // LLD_ELF_LINKER_SCRIPT_H |
| 477 | |