1//===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_ELF_LINKER_SCRIPT_H
10#define LLD_ELF_LINKER_SCRIPT_H
11
12#include "Config.h"
13#include "InputSection.h"
14#include "Writer.h"
15#include "lld/Common/LLVM.h"
16#include "lld/Common/Strings.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/StringRef.h"
22#include "llvm/Support/Compiler.h"
23#include <cstddef>
24#include <cstdint>
25#include <functional>
26#include <memory>
27
28namespace lld::elf {
29
30class Defined;
31class InputFile;
32class InputSection;
33class InputSectionBase;
34class OutputSection;
35class SectionBase;
36class ThunkSection;
37struct OutputDesc;
38
39// This represents an r-value in the linker script.
40struct ExprValue {
41 ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
42 const Twine &loc)
43 : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {}
44
45 ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
46
47 bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
48 uint64_t getValue() const;
49 uint64_t getSecAddr() const;
50 uint64_t getSectionOffset() const;
51
52 // If a value is relative to a section, it has a non-null Sec.
53 SectionBase *sec;
54
55 uint64_t val;
56 uint64_t alignment = 1;
57
58 // The original st_type if the expression represents a symbol. Any operation
59 // resets type to STT_NOTYPE.
60 uint8_t type = llvm::ELF::STT_NOTYPE;
61
62 // True if this expression is enclosed in ABSOLUTE().
63 // This flag affects the return value of getValue().
64 bool forceAbsolute;
65
66 // Original source location. Used for error messages.
67 std::string loc;
68};
69
70// This represents an expression in the linker script.
71// ScriptParser::readExpr reads an expression and returns an Expr.
72// Later, we evaluate the expression by calling the function.
73using Expr = std::function<ExprValue()>;
74
75// This enum is used to implement linker script SECTIONS command.
76// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
77enum SectionsCommandKind {
78 AssignmentKind, // . = expr or <sym> = expr
79 OutputSectionKind,
80 InputSectionKind,
81 ByteKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
82};
83
84struct SectionCommand {
85 SectionCommand(int k) : kind(k) {}
86 int kind;
87};
88
89// This represents ". = <expr>" or "<symbol> = <expr>".
90struct SymbolAssignment : SectionCommand {
91 SymbolAssignment(StringRef name, Expr e, unsigned symOrder, std::string loc)
92 : SectionCommand(AssignmentKind), name(name), expression(e),
93 symOrder(symOrder), location(loc) {}
94
95 static bool classof(const SectionCommand *c) {
96 return c->kind == AssignmentKind;
97 }
98
99 // The LHS of an expression. Name is either a symbol name or ".".
100 StringRef name;
101 Defined *sym = nullptr;
102
103 // The RHS of an expression.
104 Expr expression;
105
106 // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
107 bool provide = false;
108 bool hidden = false;
109
110 // This assignment references DATA_SEGMENT_RELRO_END.
111 bool dataSegmentRelroEnd = false;
112
113 unsigned symOrder;
114
115 // Holds file name and line number for error reporting.
116 std::string location;
117
118 // A string representation of this command. We use this for -Map.
119 std::string commandString;
120
121 // Address of this assignment command.
122 uint64_t addr;
123
124 // Size of this assignment command. This is usually 0, but if
125 // you move '.' this may be greater than 0.
126 uint64_t size;
127};
128
129// Linker scripts allow additional constraints to be put on output sections.
130// If an output section is marked as ONLY_IF_RO, the section is created
131// only if its input sections are read-only. Likewise, an output section
132// with ONLY_IF_RW is created if all input sections are RW.
133enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
134
135// This struct is used to represent the location and size of regions of
136// target memory. Instances of the struct are created by parsing the
137// MEMORY command.
138struct MemoryRegion {
139 MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
140 uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags)
141 : name(std::string(name)), origin(origin), length(length), flags(flags),
142 invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {}
143
144 std::string name;
145 Expr origin;
146 Expr length;
147 // A section can be assigned to the region if any of these ELF section flags
148 // are set...
149 uint32_t flags;
150 // ... or any of these flags are not set.
151 // For example, the memory region attribute "r" maps to SHF_WRITE.
152 uint32_t invFlags;
153 // A section cannot be assigned to the region if any of these ELF section
154 // flags are set...
155 uint32_t negFlags;
156 // ... or any of these flags are not set.
157 // For example, the memory region attribute "!r" maps to SHF_WRITE.
158 uint32_t negInvFlags;
159 uint64_t curPos = 0;
160
161 uint64_t getOrigin() const { return origin().getValue(); }
162 uint64_t getLength() const { return length().getValue(); }
163
164 bool compatibleWith(uint32_t secFlags) const {
165 if ((secFlags & negFlags) || (~secFlags & negInvFlags))
166 return false;
167 return (secFlags & flags) || (~secFlags & invFlags);
168 }
169};
170
171// This struct represents one section match pattern in SECTIONS() command.
172// It can optionally have negative match pattern for EXCLUDED_FILE command.
173// Also it may be surrounded with SORT() command, so contains sorting rules.
174class SectionPattern {
175 StringMatcher excludedFilePat;
176
177 // Cache of the most recent input argument and result of excludesFile().
178 mutable std::optional<std::pair<const InputFile *, bool>> excludesFileCache;
179
180public:
181 SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
182 : excludedFilePat(pat1), sectionPat(pat2),
183 sortOuter(SortSectionPolicy::Default),
184 sortInner(SortSectionPolicy::Default) {}
185
186 bool excludesFile(const InputFile *file) const;
187
188 StringMatcher sectionPat;
189 SortSectionPolicy sortOuter;
190 SortSectionPolicy sortInner;
191};
192
193class InputSectionDescription : public SectionCommand {
194 SingleStringMatcher filePat;
195
196 // Cache of the most recent input argument and result of matchesFile().
197 mutable std::optional<std::pair<const InputFile *, bool>> matchesFileCache;
198
199public:
200 InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
201 uint64_t withoutFlags = 0)
202 : SectionCommand(InputSectionKind), filePat(filePattern),
203 withFlags(withFlags), withoutFlags(withoutFlags) {}
204
205 static bool classof(const SectionCommand *c) {
206 return c->kind == InputSectionKind;
207 }
208
209 bool matchesFile(const InputFile *file) const;
210
211 // Input sections that matches at least one of SectionPatterns
212 // will be associated with this InputSectionDescription.
213 SmallVector<SectionPattern, 0> sectionPatterns;
214
215 // Includes InputSections and MergeInputSections. Used temporarily during
216 // assignment of input sections to output sections.
217 SmallVector<InputSectionBase *, 0> sectionBases;
218
219 // Used after the finalizeInputSections() pass. MergeInputSections have been
220 // merged into MergeSyntheticSections.
221 SmallVector<InputSection *, 0> sections;
222
223 // Temporary record of synthetic ThunkSection instances and the pass that
224 // they were created in. This is used to insert newly created ThunkSections
225 // into Sections at the end of a createThunks() pass.
226 SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections;
227
228 // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
229 uint64_t withFlags;
230 uint64_t withoutFlags;
231};
232
233// Represents BYTE(), SHORT(), LONG(), or QUAD().
234struct ByteCommand : SectionCommand {
235 ByteCommand(Expr e, unsigned size, std::string commandString)
236 : SectionCommand(ByteKind), commandString(commandString), expression(e),
237 size(size) {}
238
239 static bool classof(const SectionCommand *c) { return c->kind == ByteKind; }
240
241 // Keeps string representing the command. Used for -Map" is perhaps better.
242 std::string commandString;
243
244 Expr expression;
245
246 // This is just an offset of this assignment command in the output section.
247 unsigned offset;
248
249 // Size of this data command.
250 unsigned size;
251};
252
253struct InsertCommand {
254 SmallVector<StringRef, 0> names;
255 bool isAfter;
256 StringRef where;
257};
258
259// A NOCROSSREFS/NOCROSSREFS_TO command that prohibits references between
260// certain output sections.
261struct NoCrossRefCommand {
262 SmallVector<StringRef, 0> outputSections;
263
264 // When true, this describes a NOCROSSREFS_TO command that probits references
265 // to the first output section from any of the other sections.
266 bool toFirst = false;
267};
268
269struct PhdrsCommand {
270 StringRef name;
271 unsigned type = llvm::ELF::PT_NULL;
272 bool hasFilehdr = false;
273 bool hasPhdrs = false;
274 std::optional<unsigned> flags;
275 Expr lmaExpr = nullptr;
276};
277
278class LinkerScript final {
279 // Temporary state used in processSectionCommands() and assignAddresses()
280 // that must be reinitialized for each call to the above functions, and must
281 // not be used outside of the scope of a call to the above functions.
282 struct AddressState {
283 AddressState();
284 OutputSection *outSec = nullptr;
285 MemoryRegion *memRegion = nullptr;
286 MemoryRegion *lmaRegion = nullptr;
287 uint64_t lmaOffset = 0;
288 uint64_t tbssAddr = 0;
289 };
290
291 llvm::DenseMap<llvm::CachedHashStringRef, OutputDesc *> nameToOutputSection;
292
293 void addSymbol(SymbolAssignment *cmd);
294 void assignSymbol(SymbolAssignment *cmd, bool inSec);
295 void setDot(Expr e, const Twine &loc, bool inSec);
296 void expandOutputSection(uint64_t size);
297 void expandMemoryRegions(uint64_t size);
298
299 SmallVector<InputSectionBase *, 0>
300 computeInputSections(const InputSectionDescription *,
301 ArrayRef<InputSectionBase *>,
302 const OutputSection &outCmd);
303
304 SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd);
305
306 void discardSynthetic(OutputSection &);
307
308 SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec);
309
310 std::pair<MemoryRegion *, MemoryRegion *>
311 findMemoryRegion(OutputSection *sec, MemoryRegion *hint);
312
313 bool assignOffsets(OutputSection *sec);
314
315 // This captures the local AddressState and makes it accessible
316 // deliberately. This is needed as there are some cases where we cannot just
317 // thread the current state through to a lambda function created by the
318 // script parser.
319 // This should remain a plain pointer as its lifetime is smaller than
320 // LinkerScript.
321 AddressState *state = nullptr;
322
323 OutputSection *aether;
324
325 uint64_t dot;
326
327public:
328 OutputDesc *createOutputSection(StringRef name, StringRef location);
329 OutputDesc *getOrCreateOutputSection(StringRef name);
330
331 bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
332 uint64_t getDot() { return dot; }
333 void discard(InputSectionBase &s);
334
335 ExprValue getSymbolValue(StringRef name, const Twine &loc);
336
337 void addOrphanSections();
338 void diagnoseOrphanHandling() const;
339 void diagnoseMissingSGSectionAddress() const;
340 void adjustOutputSections();
341 void adjustSectionsAfterSorting();
342
343 SmallVector<PhdrEntry *, 0> createPhdrs();
344 bool needsInterpSection();
345
346 bool shouldKeep(InputSectionBase *s);
347 std::pair<const OutputSection *, const Defined *> assignAddresses();
348 bool spillSections();
349 void erasePotentialSpillSections();
350 void allocateHeaders(SmallVector<PhdrEntry *, 0> &phdrs);
351 void processSectionCommands();
352 void processSymbolAssignments();
353 void declareSymbols();
354
355 // Used to handle INSERT AFTER statements.
356 void processInsertCommands();
357
358 // Describe memory region usage.
359 void printMemoryUsage(raw_ostream &os);
360
361 // Record a pending error during an assignAddresses invocation.
362 // assignAddresses is executed more than once. Therefore, lld::error should be
363 // avoided to not report duplicate errors.
364 void recordError(const Twine &msg);
365
366 // Check backward location counter assignment and memory region/LMA overflows.
367 void checkFinalScriptConditions() const;
368
369 // Add symbols that are referenced in the linker script to the symbol table.
370 // Symbols referenced in a PROVIDE command are only added to the symbol table
371 // if the PROVIDE command actually provides the symbol.
372 // It also adds the symbols referenced by the used PROVIDE symbols to the
373 // linker script referenced symbols list.
374 void addScriptReferencedSymbolsToSymTable();
375
376 // Returns true if the PROVIDE symbol should be added to the link.
377 // A PROVIDE symbol is added to the link only if it satisfies an
378 // undefined reference.
379 static bool shouldAddProvideSym(StringRef symName);
380
381 // SECTIONS command list.
382 SmallVector<SectionCommand *, 0> sectionCommands;
383
384 // PHDRS command list.
385 SmallVector<PhdrsCommand, 0> phdrsCommands;
386
387 bool hasSectionsCommand = false;
388 bool seenDataAlign = false;
389 bool seenRelroEnd = false;
390 bool errorOnMissingSection = false;
391 SmallVector<SmallString<0>, 0> recordedErrors;
392
393 // List of section patterns specified with KEEP commands. They will
394 // be kept even if they are unused and --gc-sections is specified.
395 SmallVector<InputSectionDescription *, 0> keptSections;
396
397 // A map from memory region name to a memory region descriptor.
398 llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
399
400 // A list of symbols referenced by the script.
401 SmallVector<llvm::StringRef, 0> referencedSymbols;
402
403 // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
404 // to be reordered.
405 SmallVector<InsertCommand, 0> insertCommands;
406
407 // OutputSections specified by OVERWRITE_SECTIONS.
408 SmallVector<OutputDesc *, 0> overwriteSections;
409
410 // NOCROSSREFS(_TO) commands.
411 SmallVector<NoCrossRefCommand, 0> noCrossRefs;
412
413 // Sections that will be warned/errored by --orphan-handling.
414 SmallVector<const InputSectionBase *, 0> orphanSections;
415
416 // Stores the mapping: PROVIDE symbol -> symbols referred in the PROVIDE
417 // expression. For example, if the PROVIDE command is:
418 //
419 // PROVIDE(v = a + b + c);
420 //
421 // then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c']
422 llvm::MapVector<StringRef, SmallVector<StringRef, 0>> provideMap;
423
424 // List of potential spill locations (PotentialSpillSection) for an input
425 // section.
426 struct PotentialSpillList {
427 // Never nullptr.
428 PotentialSpillSection *head;
429 PotentialSpillSection *tail;
430 };
431 llvm::DenseMap<InputSectionBase *, PotentialSpillList> potentialSpillLists;
432};
433
434struct ScriptWrapper {
435 LinkerScript s;
436 LinkerScript *operator->() { return &s; }
437};
438
439LLVM_LIBRARY_VISIBILITY extern ScriptWrapper script;
440
441} // end namespace lld::elf
442
443#endif // LLD_ELF_LINKER_SCRIPT_H
444