1//===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_ELF_LINKER_SCRIPT_H
10#define LLD_ELF_LINKER_SCRIPT_H
11
12#include "Config.h"
13#include "InputSection.h"
14#include "Writer.h"
15#include "lld/Common/LLVM.h"
16#include "lld/Common/Strings.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/StringRef.h"
22#include "llvm/Support/Compiler.h"
23#include <cstddef>
24#include <cstdint>
25#include <functional>
26#include <memory>
27
28namespace lld::elf {
29
30class Defined;
31class InputFile;
32class InputSection;
33class InputSectionBase;
34class OutputSection;
35class SectionBase;
36class ThunkSection;
37struct OutputDesc;
38struct SectionClass;
39struct SectionClassDesc;
40
41// This represents an r-value in the linker script.
42struct ExprValue {
43 ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
44 const Twine &loc)
45 : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {}
46
47 ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
48
49 bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
50 uint64_t getValue() const;
51 uint64_t getSecAddr() const;
52 uint64_t getSectionOffset() const;
53
54 // If a value is relative to a section, it has a non-null Sec.
55 SectionBase *sec;
56
57 uint64_t val;
58 uint64_t alignment = 1;
59
60 // The original st_type if the expression represents a symbol. Any operation
61 // resets type to STT_NOTYPE.
62 uint8_t type = llvm::ELF::STT_NOTYPE;
63
64 // True if this expression is enclosed in ABSOLUTE().
65 // This flag affects the return value of getValue().
66 bool forceAbsolute;
67
68 // Original source location. Used for error messages.
69 std::string loc;
70};
71
72// This represents an expression in the linker script.
73// ScriptParser::readExpr reads an expression and returns an Expr.
74// Later, we evaluate the expression by calling the function.
75using Expr = std::function<ExprValue()>;
76
77// This enum is used to implement linker script SECTIONS command.
78// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
79enum SectionsCommandKind {
80 AssignmentKind, // . = expr or <sym> = expr
81 OutputSectionKind,
82 InputSectionKind,
83 ByteKind, // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
84 ClassKind, // CLASS(class_name)
85};
86
87struct SectionCommand {
88 SectionCommand(int k) : kind(k) {}
89 int kind;
90};
91
92// This represents ". = <expr>" or "<symbol> = <expr>".
93struct SymbolAssignment : SectionCommand {
94 SymbolAssignment(StringRef name, Expr e, unsigned symOrder, std::string loc)
95 : SectionCommand(AssignmentKind), name(name), expression(e),
96 symOrder(symOrder), location(loc) {}
97
98 static bool classof(const SectionCommand *c) {
99 return c->kind == AssignmentKind;
100 }
101
102 // The LHS of an expression. Name is either a symbol name or ".".
103 StringRef name;
104 Defined *sym = nullptr;
105
106 // The RHS of an expression.
107 Expr expression;
108
109 // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
110 bool provide = false;
111 bool hidden = false;
112
113 // This assignment references DATA_SEGMENT_RELRO_END.
114 bool dataSegmentRelroEnd = false;
115
116 unsigned symOrder;
117
118 // Holds file name and line number for error reporting.
119 std::string location;
120
121 // A string representation of this command. We use this for -Map.
122 std::string commandString;
123
124 // Address of this assignment command.
125 uint64_t addr;
126
127 // Size of this assignment command. This is usually 0, but if
128 // you move '.' this may be greater than 0.
129 uint64_t size;
130};
131
132// Linker scripts allow additional constraints to be put on output sections.
133// If an output section is marked as ONLY_IF_RO, the section is created
134// only if its input sections are read-only. Likewise, an output section
135// with ONLY_IF_RW is created if all input sections are RW.
136enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
137
138// This struct is used to represent the location and size of regions of
139// target memory. Instances of the struct are created by parsing the
140// MEMORY command.
141struct MemoryRegion {
142 MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
143 uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags)
144 : name(std::string(name)), origin(origin), length(length), flags(flags),
145 invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {}
146
147 std::string name;
148 Expr origin;
149 Expr length;
150 // A section can be assigned to the region if any of these ELF section flags
151 // are set...
152 uint32_t flags;
153 // ... or any of these flags are not set.
154 // For example, the memory region attribute "r" maps to SHF_WRITE.
155 uint32_t invFlags;
156 // A section cannot be assigned to the region if any of these ELF section
157 // flags are set...
158 uint32_t negFlags;
159 // ... or any of these flags are not set.
160 // For example, the memory region attribute "!r" maps to SHF_WRITE.
161 uint32_t negInvFlags;
162 uint64_t curPos = 0;
163
164 uint64_t getOrigin() const { return origin().getValue(); }
165 uint64_t getLength() const { return length().getValue(); }
166
167 bool compatibleWith(uint32_t secFlags) const {
168 if ((secFlags & negFlags) || (~secFlags & negInvFlags))
169 return false;
170 return (secFlags & flags) || (~secFlags & invFlags);
171 }
172};
173
174// This struct represents one section match pattern in SECTIONS() command.
175// It can optionally have negative match pattern for EXCLUDED_FILE command.
176// Also it may be surrounded with SORT() command, so contains sorting rules.
177class SectionPattern {
178 StringMatcher excludedFilePat;
179
180 // Cache of the most recent input argument and result of excludesFile().
181 mutable std::optional<std::pair<const InputFile *, bool>> excludesFileCache;
182
183public:
184 SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
185 : excludedFilePat(pat1), sectionPat(pat2),
186 sortOuter(SortSectionPolicy::Default),
187 sortInner(SortSectionPolicy::Default) {}
188
189 bool excludesFile(const InputFile &file) const;
190
191 StringMatcher sectionPat;
192 SortSectionPolicy sortOuter;
193 SortSectionPolicy sortInner;
194};
195
196class InputSectionDescription : public SectionCommand {
197 enum class MatchType { Trivial, WholeArchive, ArchivesExcluded } matchType;
198 SingleStringMatcher filePat;
199
200 // Cache of the most recent input argument and result of matchesFile().
201 mutable std::optional<std::pair<const InputFile *, bool>> matchesFileCache;
202
203public:
204 InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
205 uint64_t withoutFlags = 0, StringRef classRef = {})
206 : SectionCommand(InputSectionKind), matchType(MatchType::Trivial),
207 filePat(filePattern), classRef(classRef), withFlags(withFlags),
208 withoutFlags(withoutFlags) {
209 assert((filePattern.empty() || classRef.empty()) &&
210 "file pattern and class reference are mutually exclusive");
211
212 // The matching syntax for whole archives and files outside of an archive
213 // can't be handled by SingleStringMatcher, and instead are handled
214 // manually within matchesFile()
215 if (!filePattern.empty()) {
216 if (filePattern.back() == ':') {
217 matchType = MatchType::WholeArchive;
218 filePat = filePattern.drop_back();
219 } else if (filePattern.front() == ':') {
220 matchType = MatchType::ArchivesExcluded;
221 filePat = filePattern.drop_front();
222 }
223 }
224 }
225
226 static bool classof(const SectionCommand *c) {
227 return c->kind == InputSectionKind;
228 }
229
230 bool matchesFile(const InputFile &file) const;
231
232 // Input sections that matches at least one of SectionPatterns
233 // will be associated with this InputSectionDescription.
234 SmallVector<SectionPattern, 0> sectionPatterns;
235
236 // If present, input section matching uses class membership instead of file
237 // and section patterns (mutually exclusive).
238 StringRef classRef;
239
240 // Includes InputSections and MergeInputSections. Used temporarily during
241 // assignment of input sections to output sections.
242 SmallVector<InputSectionBase *, 0> sectionBases;
243
244 // Used after the finalizeInputSections() pass. MergeInputSections have been
245 // merged into MergeSyntheticSections.
246 SmallVector<InputSection *, 0> sections;
247
248 // Temporary record of synthetic ThunkSection instances and the pass that
249 // they were created in. This is used to insert newly created ThunkSections
250 // into Sections at the end of a createThunks() pass.
251 SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections;
252
253 // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
254 uint64_t withFlags;
255 uint64_t withoutFlags;
256};
257
258// Represents BYTE(), SHORT(), LONG(), or QUAD().
259struct ByteCommand : SectionCommand {
260 ByteCommand(Expr e, unsigned size, std::string commandString)
261 : SectionCommand(ByteKind), commandString(commandString), expression(e),
262 size(size) {}
263
264 static bool classof(const SectionCommand *c) { return c->kind == ByteKind; }
265
266 // Keeps string representing the command. Used for -Map" is perhaps better.
267 std::string commandString;
268
269 Expr expression;
270
271 // This is just an offset of this assignment command in the output section.
272 unsigned offset;
273
274 // Size of this data command.
275 unsigned size;
276};
277
278struct InsertCommand {
279 SmallVector<StringRef, 0> names;
280 bool isAfter;
281 StringRef where;
282};
283
284// A NOCROSSREFS/NOCROSSREFS_TO command that prohibits references between
285// certain output sections.
286struct NoCrossRefCommand {
287 SmallVector<StringRef, 0> outputSections;
288
289 // When true, this describes a NOCROSSREFS_TO command that probits references
290 // to the first output section from any of the other sections.
291 bool toFirst = false;
292};
293
294struct PhdrsCommand {
295 StringRef name;
296 unsigned type = llvm::ELF::PT_NULL;
297 bool hasFilehdr = false;
298 bool hasPhdrs = false;
299 std::optional<unsigned> flags;
300 Expr lmaExpr = nullptr;
301};
302
303class LinkerScript final {
304 // Temporary state used in processSectionCommands() and assignAddresses()
305 // that must be reinitialized for each call to the above functions, and must
306 // not be used outside of the scope of a call to the above functions.
307 struct AddressState {
308 AddressState(const LinkerScript &);
309 OutputSection *outSec = nullptr;
310 MemoryRegion *memRegion = nullptr;
311 MemoryRegion *lmaRegion = nullptr;
312 uint64_t lmaOffset = 0;
313 uint64_t tbssAddr = 0;
314 uint64_t overlaySize;
315 };
316
317 Ctx &ctx;
318 SmallVector<std::unique_ptr<OutputDesc>, 0> descPool;
319 llvm::DenseMap<llvm::CachedHashStringRef, OutputDesc *> nameToOutputSection;
320
321 StringRef getOutputSectionName(const InputSectionBase *s) const;
322 void addSymbol(SymbolAssignment *cmd);
323 void declareSymbol(SymbolAssignment *cmd);
324 void assignSymbol(SymbolAssignment *cmd, bool inSec);
325 void setDot(Expr e, const Twine &loc, bool inSec);
326 void expandOutputSection(uint64_t size);
327 void expandMemoryRegions(uint64_t size);
328
329 SmallVector<InputSectionBase *, 0>
330 computeInputSections(const InputSectionDescription *,
331 ArrayRef<InputSectionBase *>, const SectionBase &outCmd);
332
333 SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd);
334
335 void discardSynthetic(OutputSection &);
336
337 SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec);
338
339 std::pair<MemoryRegion *, MemoryRegion *>
340 findMemoryRegion(OutputSection *sec, MemoryRegion *hint);
341
342 bool assignOffsets(OutputSection *sec);
343
344 // This captures the local AddressState and makes it accessible
345 // deliberately. This is needed as there are some cases where we cannot just
346 // thread the current state through to a lambda function created by the
347 // script parser.
348 // This should remain a plain pointer as its lifetime is smaller than
349 // LinkerScript.
350 AddressState *state = nullptr;
351
352 std::unique_ptr<OutputSection> aether;
353
354 uint64_t dot = 0;
355
356public:
357 // OutputSection may be incomplete. Avoid inline ctor/dtor.
358 LinkerScript(Ctx &ctx);
359 ~LinkerScript();
360
361 OutputDesc *createOutputSection(StringRef name, StringRef location);
362 OutputDesc *getOrCreateOutputSection(StringRef name);
363
364 bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
365 uint64_t getDot() { return dot; }
366 void discard(InputSectionBase &s);
367
368 ExprValue getSymbolValue(StringRef name, const Twine &loc);
369
370 void addOrphanSections();
371 void diagnoseOrphanHandling() const;
372 void diagnoseMissingSGSectionAddress() const;
373 void adjustOutputSections();
374 void adjustSectionsAfterSorting();
375
376 SmallVector<std::unique_ptr<PhdrEntry>, 0> createPhdrs();
377 bool needsInterpSection();
378
379 bool shouldKeep(InputSectionBase *s);
380 std::pair<const OutputSection *, const Defined *> assignAddresses();
381 bool spillSections();
382 void erasePotentialSpillSections();
383 void allocateHeaders(SmallVector<std::unique_ptr<PhdrEntry>, 0> &phdrs);
384 void processSectionCommands();
385 void processSymbolAssignments();
386 void declareSymbols();
387
388 // Used to handle INSERT AFTER statements.
389 void processInsertCommands();
390
391 // Describe memory region usage.
392 void printMemoryUsage(raw_ostream &os);
393
394 // Record a pending error during an assignAddresses invocation.
395 // assignAddresses is executed more than once. Therefore, lld::error should be
396 // avoided to not report duplicate errors.
397 void recordError(const Twine &msg);
398
399 // Check backward location counter assignment and memory region/LMA overflows.
400 void checkFinalScriptConditions() const;
401
402 // Add symbols that are referenced in the linker script to the symbol table.
403 // Symbols referenced in a PROVIDE command are only added to the symbol table
404 // if the PROVIDE command actually provides the symbol.
405 // It also adds the symbols referenced by the used PROVIDE symbols to the
406 // linker script referenced symbols list.
407 void addScriptReferencedSymbolsToSymTable();
408
409 // Returns true if the PROVIDE symbol should be added to the link.
410 // A PROVIDE symbol is added to the link only if it satisfies an
411 // undefined reference.
412 bool shouldAddProvideSym(StringRef symName);
413
414 // SECTIONS command list.
415 SmallVector<SectionCommand *, 0> sectionCommands;
416
417 // PHDRS command list.
418 SmallVector<PhdrsCommand, 0> phdrsCommands;
419
420 bool hasSectionsCommand = false;
421 bool seenDataAlign = false;
422 bool seenRelroEnd = false;
423 bool errorOnMissingSection = false;
424 SmallVector<SmallString<0>, 0> recordedErrors;
425
426 // List of section patterns specified with KEEP commands. They will
427 // be kept even if they are unused and --gc-sections is specified.
428 SmallVector<InputSectionDescription *, 0> keptSections;
429
430 // A map from memory region name to a memory region descriptor.
431 llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
432
433 // A list of symbols referenced by the script.
434 SmallVector<llvm::StringRef, 0> referencedSymbols;
435
436 // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
437 // to be reordered.
438 SmallVector<InsertCommand, 0> insertCommands;
439
440 // OutputSections specified by OVERWRITE_SECTIONS.
441 SmallVector<OutputDesc *, 0> overwriteSections;
442
443 // NOCROSSREFS(_TO) commands.
444 SmallVector<NoCrossRefCommand, 0> noCrossRefs;
445
446 // Sections that will be warned/errored by --orphan-handling.
447 SmallVector<const InputSectionBase *, 0> orphanSections;
448
449 // Stores the mapping: PROVIDE symbol -> symbols referred in the PROVIDE
450 // expression. For example, if the PROVIDE command is:
451 //
452 // PROVIDE(v = a + b + c);
453 //
454 // then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c']
455 llvm::MapVector<StringRef, SmallVector<StringRef, 0>> provideMap;
456 // Store defined symbols that should ignore PROVIDE commands.
457 llvm::DenseSet<Symbol *> unusedProvideSyms;
458
459 // List of potential spill locations (PotentialSpillSection) for an input
460 // section.
461 struct PotentialSpillList {
462 // Never nullptr.
463 PotentialSpillSection *head;
464 PotentialSpillSection *tail;
465 };
466 llvm::DenseMap<InputSectionBase *, PotentialSpillList> potentialSpillLists;
467
468 // Named lists of input sections that can be collectively referenced in output
469 // section descriptions. Multiple references allow for sections to spill from
470 // one output section to another.
471 llvm::DenseMap<llvm::CachedHashStringRef, SectionClassDesc *> sectionClasses;
472};
473
474} // end namespace lld::elf
475
476#endif // LLD_ELF_LINKER_SCRIPT_H
477