1 | //===- LinkerScript.h -------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLD_ELF_LINKER_SCRIPT_H |
10 | #define LLD_ELF_LINKER_SCRIPT_H |
11 | |
12 | #include "Config.h" |
13 | #include "InputSection.h" |
14 | #include "Writer.h" |
15 | #include "lld/Common/LLVM.h" |
16 | #include "lld/Common/Strings.h" |
17 | #include "llvm/ADT/ArrayRef.h" |
18 | #include "llvm/ADT/DenseMap.h" |
19 | #include "llvm/ADT/MapVector.h" |
20 | #include "llvm/ADT/SmallVector.h" |
21 | #include "llvm/ADT/StringRef.h" |
22 | #include "llvm/Support/Compiler.h" |
23 | #include <cstddef> |
24 | #include <cstdint> |
25 | #include <functional> |
26 | #include <memory> |
27 | |
28 | namespace lld::elf { |
29 | |
30 | class Defined; |
31 | class InputFile; |
32 | class InputSection; |
33 | class InputSectionBase; |
34 | class OutputSection; |
35 | class SectionBase; |
36 | class ThunkSection; |
37 | struct OutputDesc; |
38 | struct SectionClass; |
39 | struct SectionClassDesc; |
40 | |
41 | // This represents an r-value in the linker script. |
42 | struct ExprValue { |
43 | ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val, |
44 | const Twine &loc) |
45 | : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {} |
46 | |
47 | ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "" ) {} |
48 | |
49 | bool isAbsolute() const { return forceAbsolute || sec == nullptr; } |
50 | uint64_t getValue() const; |
51 | uint64_t getSecAddr() const; |
52 | uint64_t getSectionOffset() const; |
53 | |
54 | // If a value is relative to a section, it has a non-null Sec. |
55 | SectionBase *sec; |
56 | |
57 | uint64_t val; |
58 | uint64_t alignment = 1; |
59 | |
60 | // The original st_type if the expression represents a symbol. Any operation |
61 | // resets type to STT_NOTYPE. |
62 | uint8_t type = llvm::ELF::STT_NOTYPE; |
63 | |
64 | // True if this expression is enclosed in ABSOLUTE(). |
65 | // This flag affects the return value of getValue(). |
66 | bool forceAbsolute; |
67 | |
68 | // Original source location. Used for error messages. |
69 | std::string loc; |
70 | }; |
71 | |
72 | // This represents an expression in the linker script. |
73 | // ScriptParser::readExpr reads an expression and returns an Expr. |
74 | // Later, we evaluate the expression by calling the function. |
75 | using Expr = std::function<ExprValue()>; |
76 | |
77 | // This enum is used to implement linker script SECTIONS command. |
78 | // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS |
79 | enum SectionsCommandKind { |
80 | AssignmentKind, // . = expr or <sym> = expr |
81 | OutputSectionKind, |
82 | InputSectionKind, |
83 | ByteKind, // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr) |
84 | ClassKind, // CLASS(class_name) |
85 | }; |
86 | |
87 | struct SectionCommand { |
88 | SectionCommand(int k) : kind(k) {} |
89 | int kind; |
90 | }; |
91 | |
92 | // This represents ". = <expr>" or "<symbol> = <expr>". |
93 | struct SymbolAssignment : SectionCommand { |
94 | SymbolAssignment(StringRef name, Expr e, unsigned symOrder, std::string loc) |
95 | : SectionCommand(AssignmentKind), name(name), expression(e), |
96 | symOrder(symOrder), location(loc) {} |
97 | |
98 | static bool classof(const SectionCommand *c) { |
99 | return c->kind == AssignmentKind; |
100 | } |
101 | |
102 | // The LHS of an expression. Name is either a symbol name or ".". |
103 | StringRef name; |
104 | Defined *sym = nullptr; |
105 | |
106 | // The RHS of an expression. |
107 | Expr expression; |
108 | |
109 | // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN. |
110 | bool provide = false; |
111 | bool hidden = false; |
112 | |
113 | // This assignment references DATA_SEGMENT_RELRO_END. |
114 | bool dataSegmentRelroEnd = false; |
115 | |
116 | unsigned symOrder; |
117 | |
118 | // Holds file name and line number for error reporting. |
119 | std::string location; |
120 | |
121 | // A string representation of this command. We use this for -Map. |
122 | std::string commandString; |
123 | |
124 | // Address of this assignment command. |
125 | uint64_t addr; |
126 | |
127 | // Size of this assignment command. This is usually 0, but if |
128 | // you move '.' this may be greater than 0. |
129 | uint64_t size; |
130 | }; |
131 | |
132 | // Linker scripts allow additional constraints to be put on output sections. |
133 | // If an output section is marked as ONLY_IF_RO, the section is created |
134 | // only if its input sections are read-only. Likewise, an output section |
135 | // with ONLY_IF_RW is created if all input sections are RW. |
136 | enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; |
137 | |
138 | // This struct is used to represent the location and size of regions of |
139 | // target memory. Instances of the struct are created by parsing the |
140 | // MEMORY command. |
141 | struct MemoryRegion { |
142 | MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags, |
143 | uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags) |
144 | : name(std::string(name)), origin(origin), length(length), flags(flags), |
145 | invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {} |
146 | |
147 | std::string name; |
148 | Expr origin; |
149 | Expr length; |
150 | // A section can be assigned to the region if any of these ELF section flags |
151 | // are set... |
152 | uint32_t flags; |
153 | // ... or any of these flags are not set. |
154 | // For example, the memory region attribute "r" maps to SHF_WRITE. |
155 | uint32_t invFlags; |
156 | // A section cannot be assigned to the region if any of these ELF section |
157 | // flags are set... |
158 | uint32_t negFlags; |
159 | // ... or any of these flags are not set. |
160 | // For example, the memory region attribute "!r" maps to SHF_WRITE. |
161 | uint32_t negInvFlags; |
162 | uint64_t curPos = 0; |
163 | |
164 | uint64_t getOrigin() const { return origin().getValue(); } |
165 | uint64_t getLength() const { return length().getValue(); } |
166 | |
167 | bool compatibleWith(uint32_t secFlags) const { |
168 | if ((secFlags & negFlags) || (~secFlags & negInvFlags)) |
169 | return false; |
170 | return (secFlags & flags) || (~secFlags & invFlags); |
171 | } |
172 | }; |
173 | |
174 | // This struct represents one section match pattern in SECTIONS() command. |
175 | // It can optionally have negative match pattern for EXCLUDED_FILE command. |
176 | // Also it may be surrounded with SORT() command, so contains sorting rules. |
177 | class SectionPattern { |
178 | StringMatcher excludedFilePat; |
179 | |
180 | // Cache of the most recent input argument and result of excludesFile(). |
181 | mutable std::optional<std::pair<const InputFile *, bool>> excludesFileCache; |
182 | |
183 | public: |
184 | SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2) |
185 | : excludedFilePat(pat1), sectionPat(pat2), |
186 | sortOuter(SortSectionPolicy::Default), |
187 | sortInner(SortSectionPolicy::Default) {} |
188 | |
189 | bool excludesFile(const InputFile &file) const; |
190 | |
191 | StringMatcher sectionPat; |
192 | SortSectionPolicy sortOuter; |
193 | SortSectionPolicy sortInner; |
194 | }; |
195 | |
196 | class InputSectionDescription : public SectionCommand { |
197 | enum class MatchType { Trivial, WholeArchive, ArchivesExcluded } matchType; |
198 | SingleStringMatcher filePat; |
199 | |
200 | // Cache of the most recent input argument and result of matchesFile(). |
201 | mutable std::optional<std::pair<const InputFile *, bool>> matchesFileCache; |
202 | |
203 | public: |
204 | InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0, |
205 | uint64_t withoutFlags = 0, StringRef classRef = {}) |
206 | : SectionCommand(InputSectionKind), matchType(MatchType::Trivial), |
207 | filePat(filePattern), classRef(classRef), withFlags(withFlags), |
208 | withoutFlags(withoutFlags) { |
209 | assert((filePattern.empty() || classRef.empty()) && |
210 | "file pattern and class reference are mutually exclusive" ); |
211 | |
212 | // The matching syntax for whole archives and files outside of an archive |
213 | // can't be handled by SingleStringMatcher, and instead are handled |
214 | // manually within matchesFile() |
215 | if (!filePattern.empty()) { |
216 | if (filePattern.back() == ':') { |
217 | matchType = MatchType::WholeArchive; |
218 | filePat = filePattern.drop_back(); |
219 | } else if (filePattern.front() == ':') { |
220 | matchType = MatchType::ArchivesExcluded; |
221 | filePat = filePattern.drop_front(); |
222 | } |
223 | } |
224 | } |
225 | |
226 | static bool classof(const SectionCommand *c) { |
227 | return c->kind == InputSectionKind; |
228 | } |
229 | |
230 | bool matchesFile(const InputFile &file) const; |
231 | |
232 | // Input sections that matches at least one of SectionPatterns |
233 | // will be associated with this InputSectionDescription. |
234 | SmallVector<SectionPattern, 0> sectionPatterns; |
235 | |
236 | // If present, input section matching uses class membership instead of file |
237 | // and section patterns (mutually exclusive). |
238 | StringRef classRef; |
239 | |
240 | // Includes InputSections and MergeInputSections. Used temporarily during |
241 | // assignment of input sections to output sections. |
242 | SmallVector<InputSectionBase *, 0> sectionBases; |
243 | |
244 | // Used after the finalizeInputSections() pass. MergeInputSections have been |
245 | // merged into MergeSyntheticSections. |
246 | SmallVector<InputSection *, 0> sections; |
247 | |
248 | // Temporary record of synthetic ThunkSection instances and the pass that |
249 | // they were created in. This is used to insert newly created ThunkSections |
250 | // into Sections at the end of a createThunks() pass. |
251 | SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections; |
252 | |
253 | // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command. |
254 | uint64_t withFlags; |
255 | uint64_t withoutFlags; |
256 | }; |
257 | |
258 | // Represents BYTE(), SHORT(), LONG(), or QUAD(). |
259 | struct ByteCommand : SectionCommand { |
260 | ByteCommand(Expr e, unsigned size, std::string commandString) |
261 | : SectionCommand(ByteKind), commandString(commandString), expression(e), |
262 | size(size) {} |
263 | |
264 | static bool classof(const SectionCommand *c) { return c->kind == ByteKind; } |
265 | |
266 | // Keeps string representing the command. Used for -Map" is perhaps better. |
267 | std::string commandString; |
268 | |
269 | Expr expression; |
270 | |
271 | // This is just an offset of this assignment command in the output section. |
272 | unsigned offset; |
273 | |
274 | // Size of this data command. |
275 | unsigned size; |
276 | }; |
277 | |
278 | struct InsertCommand { |
279 | SmallVector<StringRef, 0> names; |
280 | bool isAfter; |
281 | StringRef where; |
282 | }; |
283 | |
284 | // A NOCROSSREFS/NOCROSSREFS_TO command that prohibits references between |
285 | // certain output sections. |
286 | struct NoCrossRefCommand { |
287 | SmallVector<StringRef, 0> outputSections; |
288 | |
289 | // When true, this describes a NOCROSSREFS_TO command that probits references |
290 | // to the first output section from any of the other sections. |
291 | bool toFirst = false; |
292 | }; |
293 | |
294 | struct PhdrsCommand { |
295 | StringRef name; |
296 | unsigned type = llvm::ELF::PT_NULL; |
297 | bool hasFilehdr = false; |
298 | bool hasPhdrs = false; |
299 | std::optional<unsigned> flags; |
300 | Expr lmaExpr = nullptr; |
301 | }; |
302 | |
303 | class LinkerScript final { |
304 | // Temporary state used in processSectionCommands() and assignAddresses() |
305 | // that must be reinitialized for each call to the above functions, and must |
306 | // not be used outside of the scope of a call to the above functions. |
307 | struct AddressState { |
308 | AddressState(const LinkerScript &); |
309 | OutputSection *outSec = nullptr; |
310 | MemoryRegion *memRegion = nullptr; |
311 | MemoryRegion *lmaRegion = nullptr; |
312 | uint64_t lmaOffset = 0; |
313 | uint64_t tbssAddr = 0; |
314 | uint64_t overlaySize; |
315 | }; |
316 | |
317 | Ctx &ctx; |
318 | SmallVector<std::unique_ptr<OutputDesc>, 0> descPool; |
319 | llvm::DenseMap<llvm::CachedHashStringRef, OutputDesc *> nameToOutputSection; |
320 | |
321 | StringRef getOutputSectionName(const InputSectionBase *s) const; |
322 | void addSymbol(SymbolAssignment *cmd); |
323 | void declareSymbol(SymbolAssignment *cmd); |
324 | void assignSymbol(SymbolAssignment *cmd, bool inSec); |
325 | void setDot(Expr e, const Twine &loc, bool inSec); |
326 | void expandOutputSection(uint64_t size); |
327 | void expandMemoryRegions(uint64_t size); |
328 | |
329 | SmallVector<InputSectionBase *, 0> |
330 | computeInputSections(const InputSectionDescription *, |
331 | ArrayRef<InputSectionBase *>, const SectionBase &outCmd); |
332 | |
333 | SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd); |
334 | |
335 | void discardSynthetic(OutputSection &); |
336 | |
337 | SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec); |
338 | |
339 | std::pair<MemoryRegion *, MemoryRegion *> |
340 | findMemoryRegion(OutputSection *sec, MemoryRegion *hint); |
341 | |
342 | bool assignOffsets(OutputSection *sec); |
343 | |
344 | // This captures the local AddressState and makes it accessible |
345 | // deliberately. This is needed as there are some cases where we cannot just |
346 | // thread the current state through to a lambda function created by the |
347 | // script parser. |
348 | // This should remain a plain pointer as its lifetime is smaller than |
349 | // LinkerScript. |
350 | AddressState *state = nullptr; |
351 | |
352 | std::unique_ptr<OutputSection> aether; |
353 | |
354 | uint64_t dot = 0; |
355 | |
356 | public: |
357 | // OutputSection may be incomplete. Avoid inline ctor/dtor. |
358 | LinkerScript(Ctx &ctx); |
359 | ~LinkerScript(); |
360 | |
361 | OutputDesc *createOutputSection(StringRef name, StringRef location); |
362 | OutputDesc *getOrCreateOutputSection(StringRef name); |
363 | |
364 | bool hasPhdrsCommands() { return !phdrsCommands.empty(); } |
365 | uint64_t getDot() { return dot; } |
366 | void discard(InputSectionBase &s); |
367 | |
368 | ExprValue getSymbolValue(StringRef name, const Twine &loc); |
369 | |
370 | void addOrphanSections(); |
371 | void diagnoseOrphanHandling() const; |
372 | void diagnoseMissingSGSectionAddress() const; |
373 | void adjustOutputSections(); |
374 | void adjustSectionsAfterSorting(); |
375 | |
376 | SmallVector<std::unique_ptr<PhdrEntry>, 0> createPhdrs(); |
377 | bool needsInterpSection(); |
378 | |
379 | bool shouldKeep(InputSectionBase *s); |
380 | std::pair<const OutputSection *, const Defined *> assignAddresses(); |
381 | bool spillSections(); |
382 | void erasePotentialSpillSections(); |
383 | void (SmallVector<std::unique_ptr<PhdrEntry>, 0> &phdrs); |
384 | void processSectionCommands(); |
385 | void processSymbolAssignments(); |
386 | void declareSymbols(); |
387 | |
388 | // Used to handle INSERT AFTER statements. |
389 | void processInsertCommands(); |
390 | |
391 | // Describe memory region usage. |
392 | void printMemoryUsage(raw_ostream &os); |
393 | |
394 | // Record a pending error during an assignAddresses invocation. |
395 | // assignAddresses is executed more than once. Therefore, lld::error should be |
396 | // avoided to not report duplicate errors. |
397 | void recordError(const Twine &msg); |
398 | |
399 | // Check backward location counter assignment and memory region/LMA overflows. |
400 | void checkFinalScriptConditions() const; |
401 | |
402 | // Add symbols that are referenced in the linker script to the symbol table. |
403 | // Symbols referenced in a PROVIDE command are only added to the symbol table |
404 | // if the PROVIDE command actually provides the symbol. |
405 | // It also adds the symbols referenced by the used PROVIDE symbols to the |
406 | // linker script referenced symbols list. |
407 | void addScriptReferencedSymbolsToSymTable(); |
408 | |
409 | // Returns true if the PROVIDE symbol should be added to the link. |
410 | // A PROVIDE symbol is added to the link only if it satisfies an |
411 | // undefined reference. |
412 | bool shouldAddProvideSym(StringRef symName); |
413 | |
414 | // SECTIONS command list. |
415 | SmallVector<SectionCommand *, 0> sectionCommands; |
416 | |
417 | // PHDRS command list. |
418 | SmallVector<PhdrsCommand, 0> phdrsCommands; |
419 | |
420 | bool hasSectionsCommand = false; |
421 | bool seenDataAlign = false; |
422 | bool seenRelroEnd = false; |
423 | bool errorOnMissingSection = false; |
424 | SmallVector<SmallString<0>, 0> recordedErrors; |
425 | |
426 | // List of section patterns specified with KEEP commands. They will |
427 | // be kept even if they are unused and --gc-sections is specified. |
428 | SmallVector<InputSectionDescription *, 0> keptSections; |
429 | |
430 | // A map from memory region name to a memory region descriptor. |
431 | llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions; |
432 | |
433 | // A list of symbols referenced by the script. |
434 | SmallVector<llvm::StringRef, 0> referencedSymbols; |
435 | |
436 | // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need |
437 | // to be reordered. |
438 | SmallVector<InsertCommand, 0> insertCommands; |
439 | |
440 | // OutputSections specified by OVERWRITE_SECTIONS. |
441 | SmallVector<OutputDesc *, 0> overwriteSections; |
442 | |
443 | // NOCROSSREFS(_TO) commands. |
444 | SmallVector<NoCrossRefCommand, 0> noCrossRefs; |
445 | |
446 | // Sections that will be warned/errored by --orphan-handling. |
447 | SmallVector<const InputSectionBase *, 0> orphanSections; |
448 | |
449 | // Stores the mapping: PROVIDE symbol -> symbols referred in the PROVIDE |
450 | // expression. For example, if the PROVIDE command is: |
451 | // |
452 | // PROVIDE(v = a + b + c); |
453 | // |
454 | // then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c'] |
455 | llvm::MapVector<StringRef, SmallVector<StringRef, 0>> provideMap; |
456 | // Store defined symbols that should ignore PROVIDE commands. |
457 | llvm::DenseSet<Symbol *> unusedProvideSyms; |
458 | |
459 | // List of potential spill locations (PotentialSpillSection) for an input |
460 | // section. |
461 | struct PotentialSpillList { |
462 | // Never nullptr. |
463 | PotentialSpillSection *head; |
464 | PotentialSpillSection *tail; |
465 | }; |
466 | llvm::DenseMap<InputSectionBase *, PotentialSpillList> potentialSpillLists; |
467 | |
468 | // Named lists of input sections that can be collectively referenced in output |
469 | // section descriptions. Multiple references allow for sections to spill from |
470 | // one output section to another. |
471 | llvm::DenseMap<llvm::CachedHashStringRef, SectionClassDesc *> sectionClasses; |
472 | }; |
473 | |
474 | } // end namespace lld::elf |
475 | |
476 | #endif // LLD_ELF_LINKER_SCRIPT_H |
477 | |