1//===- ScriptParser.cpp ---------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a recursive-descendent parser for linker scripts.
10// Parsed results are stored to Config and Script global objects.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ScriptParser.h"
15#include "Config.h"
16#include "Driver.h"
17#include "InputFiles.h"
18#include "LinkerScript.h"
19#include "OutputSections.h"
20#include "ScriptLexer.h"
21#include "SymbolTable.h"
22#include "Symbols.h"
23#include "Target.h"
24#include "llvm/ADT/SmallString.h"
25#include "llvm/ADT/StringRef.h"
26#include "llvm/ADT/StringSwitch.h"
27#include "llvm/BinaryFormat/ELF.h"
28#include "llvm/Support/Casting.h"
29#include "llvm/Support/ErrorHandling.h"
30#include "llvm/Support/FileSystem.h"
31#include "llvm/Support/MathExtras.h"
32#include "llvm/Support/Path.h"
33#include "llvm/Support/SaveAndRestore.h"
34#include "llvm/Support/TimeProfiler.h"
35#include <cassert>
36#include <optional>
37#include <vector>
38
39using namespace llvm;
40using namespace llvm::ELF;
41using namespace llvm::support::endian;
42using namespace lld;
43using namespace lld::elf;
44
45namespace {
46class ScriptParser final : ScriptLexer {
47public:
48 ScriptParser(Ctx &ctx, MemoryBufferRef mb) : ScriptLexer(ctx, mb), ctx(ctx) {}
49
50 void readLinkerScript();
51 void readVersionScript();
52 void readDynamicList();
53 void readDefsym();
54
55private:
56 void addFile(StringRef path);
57
58 void readAsNeeded();
59 void readEntry();
60 void readExtern();
61 void readGroup();
62 void readInclude(llvm::function_ref<void()> parse);
63 void readInput();
64 void readLinkerScriptStmt(StringRef tok);
65 void readMemory();
66 void readMemoryStmt(StringRef tok);
67 void readOutput();
68 void readOutputArch();
69 void readOutputFormat();
70 void readOverwriteSections();
71 void readPhdrs();
72 void readRegionAlias();
73 void readSearchDir();
74 void readSections();
75 void readSectionsStmt(SmallVectorImpl<SectionCommand *> &v, StringRef tok);
76 void readOutputSectionStmt(OutputSection &osec, StringRef tok);
77 void readStmts(llvm::function_ref<void(StringRef)> readStmt);
78 void readTarget();
79 void readVersion();
80 void readVersionScriptCommand();
81 void readNoCrossRefs(bool to);
82
83 StringRef readName();
84 SymbolAssignment *readSymbolAssignment(StringRef name);
85 ByteCommand *readByteCommand(StringRef tok);
86 std::array<uint8_t, 4> readFill();
87 bool readSectionDirective(OutputSection *cmd, StringRef tok);
88 void readSectionAddressType(OutputSection *cmd);
89 OutputDesc *readOverlaySectionDescription();
90 OutputDesc *readOutputSectionDescription(StringRef outSec);
91 SmallVector<SectionCommand *, 0> readOverlay();
92 SectionClassDesc *readSectionClassDescription();
93 StringRef readSectionClassName();
94 SmallVector<StringRef, 0> readOutputSectionPhdrs();
95 std::pair<uint64_t, uint64_t> readInputSectionFlags();
96 InputSectionDescription *readInputSectionDescription(StringRef tok);
97 StringMatcher readFilePatterns();
98 SmallVector<SectionPattern, 0> readInputSectionsList();
99 InputSectionDescription *readInputSectionRules(StringRef filePattern,
100 uint64_t withFlags,
101 uint64_t withoutFlags);
102 unsigned readPhdrType();
103 SortSectionPolicy peekSortKind();
104 SortSectionPolicy readSortKind();
105 SymbolAssignment *readProvideHidden(bool provide, bool hidden);
106 SymbolAssignment *readAssignment(StringRef tok);
107 void readSort();
108 Expr readAssert();
109 Expr readConstant();
110 Expr getPageSize();
111
112 Expr readMemoryAssignment(StringRef, StringRef, StringRef);
113 void readMemoryAttributes(uint32_t &flags, uint32_t &invFlags,
114 uint32_t &negFlags, uint32_t &negInvFlags);
115
116 Expr combine(StringRef op, Expr l, Expr r);
117 Expr readExpr();
118 Expr readExpr1(Expr lhs, int minPrec);
119 StringRef readParenName();
120 Expr readPrimary();
121 Expr readTernary(Expr cond);
122 Expr readParenExpr();
123
124 // For parsing version script.
125 SmallVector<SymbolVersion, 0> readVersionExtern();
126 void readAnonymousDeclaration();
127 void readVersionDeclaration(StringRef verStr);
128
129 std::pair<SmallVector<SymbolVersion, 0>, SmallVector<SymbolVersion, 0>>
130 readSymbols();
131
132 Ctx &ctx;
133
134 // If we are currently parsing a PROVIDE|PROVIDE_HIDDEN command,
135 // then this member is set to the PROVIDE symbol name.
136 std::optional<llvm::StringRef> activeProvideSym;
137};
138} // namespace
139
140static StringRef unquote(StringRef s) {
141 if (s.starts_with(Prefix: "\""))
142 return s.substr(Start: 1, N: s.size() - 2);
143 return s;
144}
145
146// Some operations only support one non absolute value. Move the
147// absolute one to the right hand side for convenience.
148static void moveAbsRight(LinkerScript &s, ExprValue &a, ExprValue &b) {
149 if (a.sec == nullptr || (a.forceAbsolute && !b.isAbsolute()))
150 std::swap(a&: a, b&: b);
151 if (!b.isAbsolute())
152 s.recordError(msg: a.loc +
153 ": at least one side of the expression must be absolute");
154}
155
156static ExprValue add(LinkerScript &s, ExprValue a, ExprValue b) {
157 moveAbsRight(s, a, b);
158 return {a.sec, a.forceAbsolute, a.getSectionOffset() + b.getValue(), a.loc};
159}
160
161static ExprValue sub(ExprValue a, ExprValue b) {
162 // The distance between two symbols in sections is absolute.
163 if (!a.isAbsolute() && !b.isAbsolute())
164 return a.getValue() - b.getValue();
165 return {a.sec, false, a.getSectionOffset() - b.getValue(), a.loc};
166}
167
168static ExprValue bitAnd(LinkerScript &s, ExprValue a, ExprValue b) {
169 moveAbsRight(s, a, b);
170 return {a.sec, a.forceAbsolute,
171 (a.getValue() & b.getValue()) - a.getSecAddr(), a.loc};
172}
173
174static ExprValue bitXor(LinkerScript &s, ExprValue a, ExprValue b) {
175 moveAbsRight(s, a, b);
176 return {a.sec, a.forceAbsolute,
177 (a.getValue() ^ b.getValue()) - a.getSecAddr(), a.loc};
178}
179
180static ExprValue bitOr(LinkerScript &s, ExprValue a, ExprValue b) {
181 moveAbsRight(s, a, b);
182 return {a.sec, a.forceAbsolute,
183 (a.getValue() | b.getValue()) - a.getSecAddr(), a.loc};
184}
185
186void ScriptParser::readDynamicList() {
187 SaveAndRestore saved(lexState, State::VersionNode);
188 expect(expect: "{");
189 SmallVector<SymbolVersion, 0> locals;
190 SmallVector<SymbolVersion, 0> globals;
191 std::tie(args&: locals, args&: globals) = readSymbols();
192 expect(expect: ";");
193
194 StringRef tok = peek();
195 if (tok.size()) {
196 setError("EOF expected, but got " + tok);
197 return;
198 }
199 if (!locals.empty()) {
200 setError("\"local:\" scope not supported in --dynamic-list");
201 return;
202 }
203
204 for (SymbolVersion v : globals)
205 ctx.arg.dynamicList.push_back(Elt: v);
206}
207
208void ScriptParser::readVersionScript() {
209 readVersionScriptCommand();
210 StringRef tok = peek();
211 if (tok.size())
212 setError("EOF expected, but got " + tok);
213}
214
215void ScriptParser::readVersionScriptCommand() {
216 SaveAndRestore saved(lexState, State::VersionNode);
217 if (consume(tok: "{")) {
218 readAnonymousDeclaration();
219 return;
220 }
221
222 if (atEOF())
223 setError("unexpected EOF");
224 while (peek() != "}" && !atEOF()) {
225 StringRef verStr = next();
226 if (verStr == "{") {
227 setError("anonymous version definition is used in "
228 "combination with other version definitions");
229 return;
230 }
231 expect(expect: "{");
232 readVersionDeclaration(verStr);
233 }
234}
235
236void ScriptParser::readVersion() {
237 expect(expect: "{");
238 readVersionScriptCommand();
239 expect(expect: "}");
240}
241
242void ScriptParser::readLinkerScript() {
243 readStmts(readStmt: [&](StringRef t) { readLinkerScriptStmt(tok: t); });
244}
245
246void ScriptParser::readLinkerScriptStmt(StringRef tok) {
247 if (tok == ";")
248 return;
249
250 if (tok == "ENTRY") {
251 readEntry();
252 } else if (tok == "EXTERN") {
253 readExtern();
254 } else if (tok == "GROUP") {
255 readGroup();
256 } else if (tok == "INCLUDE") {
257 readInclude(
258 parse: [&] { readStmts(readStmt: [&](StringRef t) { readLinkerScriptStmt(tok: t); }); });
259 } else if (tok == "INPUT") {
260 readInput();
261 } else if (tok == "MEMORY") {
262 readMemory();
263 } else if (tok == "OUTPUT") {
264 readOutput();
265 } else if (tok == "OUTPUT_ARCH") {
266 readOutputArch();
267 } else if (tok == "OUTPUT_FORMAT") {
268 readOutputFormat();
269 } else if (tok == "OVERWRITE_SECTIONS") {
270 readOverwriteSections();
271 } else if (tok == "PHDRS") {
272 readPhdrs();
273 } else if (tok == "REGION_ALIAS") {
274 readRegionAlias();
275 } else if (tok == "SEARCH_DIR") {
276 readSearchDir();
277 } else if (tok == "SECTIONS") {
278 readSections();
279 } else if (tok == "TARGET") {
280 readTarget();
281 } else if (tok == "VERSION") {
282 readVersion();
283 } else if (tok == "NOCROSSREFS") {
284 readNoCrossRefs(/*to=*/false);
285 } else if (tok == "NOCROSSREFS_TO") {
286 readNoCrossRefs(/*to=*/true);
287 } else if (SymbolAssignment *cmd = readAssignment(tok)) {
288 ctx.script->sectionCommands.push_back(Elt: cmd);
289 } else {
290 setError("unknown directive: " + tok);
291 }
292}
293
294void ScriptParser::readDefsym() {
295 if (errCount(ctx))
296 return;
297 SaveAndRestore saved(lexState, State::Expr);
298 StringRef name = readName();
299 expect(expect: "=");
300 Expr e = readExpr();
301 if (!atEOF())
302 setError("EOF expected, but got " + next());
303 auto *cmd = make<SymbolAssignment>(args&: name, args&: e, args: 0, args: curBuf.filename.str());
304 ctx.script->sectionCommands.push_back(Elt: cmd);
305}
306
307void ScriptParser::readNoCrossRefs(bool to) {
308 expect(expect: "(");
309 NoCrossRefCommand cmd{.outputSections: {}, .toFirst: to};
310 while (auto tok = till(tok: ")"))
311 cmd.outputSections.push_back(Elt: unquote(s: tok));
312 if (cmd.outputSections.size() < 2)
313 Warn(ctx) << getCurrentLocation()
314 << ": ignored with fewer than 2 output sections";
315 else
316 ctx.script->noCrossRefs.push_back(Elt: std::move(cmd));
317}
318
319void ScriptParser::addFile(StringRef s) {
320 if (curBuf.isUnderSysroot && s.starts_with(Prefix: "/")) {
321 SmallString<128> pathData;
322 StringRef path = (ctx.arg.sysroot + s).toStringRef(Out&: pathData);
323 if (sys::fs::exists(Path: path))
324 ctx.driver.addFile(path: ctx.saver.save(S: path), /*withLOption=*/false);
325 else
326 setError("cannot find " + s + " inside " + ctx.arg.sysroot);
327 return;
328 }
329
330 if (s.starts_with(Prefix: "/")) {
331 // Case 1: s is an absolute path. Just open it.
332 ctx.driver.addFile(path: s, /*withLOption=*/false);
333 } else if (s.starts_with(Prefix: "=")) {
334 // Case 2: relative to the sysroot.
335 if (ctx.arg.sysroot.empty())
336 ctx.driver.addFile(path: s.substr(Start: 1), /*withLOption=*/false);
337 else
338 ctx.driver.addFile(path: ctx.saver.save(S: ctx.arg.sysroot + "/" + s.substr(Start: 1)),
339 /*withLOption=*/false);
340 } else if (s.starts_with(Prefix: "-l")) {
341 // Case 3: search in the list of library paths.
342 ctx.driver.addLibrary(name: s.substr(Start: 2));
343 } else {
344 // Case 4: s is a relative path. Search in the directory of the script file.
345 StringRef directory = sys::path::parent_path(path: curBuf.filename);
346 if (!directory.empty()) {
347 SmallString<0> path(directory);
348 sys::path::append(path, a: s);
349 if (sys::fs::exists(Path: path)) {
350 ctx.driver.addFile(path: ctx.saver.save(S: path.str()), /*withLOption=*/false);
351 return;
352 }
353 }
354 // Then search in the current working directory.
355 if (sys::fs::exists(Path: s)) {
356 ctx.driver.addFile(path: s, /*withLOption=*/false);
357 } else {
358 // Finally, search in the list of library paths.
359 if (std::optional<std::string> path = findFromSearchPaths(ctx, path: s))
360 ctx.driver.addFile(path: ctx.saver.save(S: *path), /*withLOption=*/true);
361 else
362 setError("unable to find " + s);
363 }
364 }
365}
366
367void ScriptParser::readAsNeeded() {
368 expect(expect: "(");
369 bool orig = ctx.arg.asNeeded;
370 ctx.arg.asNeeded = true;
371 while (auto tok = till(tok: ")"))
372 addFile(s: unquote(s: tok));
373 ctx.arg.asNeeded = orig;
374}
375
376void ScriptParser::readEntry() {
377 // -e <symbol> takes predecence over ENTRY(<symbol>).
378 expect(expect: "(");
379 StringRef name = readName();
380 if (ctx.arg.entry.empty())
381 ctx.arg.entry = name;
382 expect(expect: ")");
383}
384
385void ScriptParser::readExtern() {
386 expect(expect: "(");
387 while (auto tok = till(tok: ")"))
388 ctx.arg.undefined.push_back(Elt: unquote(s: tok));
389}
390
391void ScriptParser::readGroup() {
392 SaveAndRestore saved(ctx.driver.isInGroup, true);
393 readInput();
394 if (!saved.get())
395 ++ctx.driver.nextGroupId;
396}
397
398void ScriptParser::readInclude(llvm::function_ref<void()> parse) {
399 StringRef name = readName();
400 if (!activeFilenames.insert(V: name).second) {
401 setError("there is a cycle in linker script INCLUDEs");
402 return;
403 }
404
405 std::optional<std::string> path = searchScript(ctx, path: name);
406 if (!path) {
407 setError("cannot find linker script " + name);
408 return;
409 }
410 std::optional<MemoryBufferRef> mb = readFile(ctx, path: *path);
411 if (!mb)
412 return;
413
414 SaveAndRestore savedBuf(curBuf, Buffer(ctx, *mb));
415 SaveAndRestore savedPrevTok(prevTok, StringRef());
416 SaveAndRestore savedPrevTokLine(prevTokLine, size_t(1));
417 parse();
418
419 // parse() leaves `eof` true on normal completion; reset so the parent
420 // buffer continues to be lexed.
421 eof = false;
422 activeFilenames.erase(V: name);
423}
424
425// Drive `readStmt` on each token until EOF of the current buffer.
426void ScriptParser::readStmts(llvm::function_ref<void(StringRef)> readStmt) {
427 while (!atEOF()) {
428 StringRef tok = next();
429 if (atEOF())
430 return;
431 readStmt(tok);
432 }
433}
434
435void ScriptParser::readInput() {
436 expect(expect: "(");
437 while (auto tok = till(tok: ")")) {
438 if (tok == "AS_NEEDED")
439 readAsNeeded();
440 else
441 addFile(s: unquote(s: tok));
442 }
443}
444
445void ScriptParser::readOutput() {
446 // -o <file> takes predecence over OUTPUT(<file>).
447 expect(expect: "(");
448 StringRef name = readName();
449 if (ctx.arg.outputFile.empty())
450 ctx.arg.outputFile = name;
451 expect(expect: ")");
452}
453
454void ScriptParser::readOutputArch() {
455 // OUTPUT_ARCH is ignored for now.
456 expect(expect: "(");
457 while (till(tok: ")"))
458 ;
459}
460
461static std::pair<ELFKind, uint16_t> parseBfdName(StringRef s) {
462 return StringSwitch<std::pair<ELFKind, uint16_t>>(s)
463 .Case(S: "elf32-i386", Value: {ELF32LEKind, EM_386})
464 .Case(S: "elf32-avr", Value: {ELF32LEKind, EM_AVR})
465 .Case(S: "elf32-iamcu", Value: {ELF32LEKind, EM_IAMCU})
466 .Case(S: "elf32-littlearm", Value: {ELF32LEKind, EM_ARM})
467 .Case(S: "elf32-bigarm", Value: {ELF32BEKind, EM_ARM})
468 .Case(S: "elf32-x86-64", Value: {ELF32LEKind, EM_X86_64})
469 .Case(S: "elf64-aarch64", Value: {ELF64LEKind, EM_AARCH64})
470 .Case(S: "elf64-littleaarch64", Value: {ELF64LEKind, EM_AARCH64})
471 .Case(S: "elf64-bigaarch64", Value: {ELF64BEKind, EM_AARCH64})
472 .Case(S: "elf32-powerpc", Value: {ELF32BEKind, EM_PPC})
473 .Case(S: "elf32-powerpcle", Value: {ELF32LEKind, EM_PPC})
474 .Case(S: "elf64-powerpc", Value: {ELF64BEKind, EM_PPC64})
475 .Case(S: "elf64-powerpcle", Value: {ELF64LEKind, EM_PPC64})
476 .Case(S: "elf64-x86-64", Value: {ELF64LEKind, EM_X86_64})
477 .Cases(CaseStrings: {"elf32-tradbigmips", "elf32-bigmips"}, Value: {ELF32BEKind, EM_MIPS})
478 .Case(S: "elf32-ntradbigmips", Value: {ELF32BEKind, EM_MIPS})
479 .Case(S: "elf32-tradlittlemips", Value: {ELF32LEKind, EM_MIPS})
480 .Case(S: "elf32-ntradlittlemips", Value: {ELF32LEKind, EM_MIPS})
481 .Case(S: "elf64-tradbigmips", Value: {ELF64BEKind, EM_MIPS})
482 .Case(S: "elf64-tradlittlemips", Value: {ELF64LEKind, EM_MIPS})
483 .Case(S: "elf32-littleriscv", Value: {ELF32LEKind, EM_RISCV})
484 .Case(S: "elf64-littleriscv", Value: {ELF64LEKind, EM_RISCV})
485 .Case(S: "elf64-sparc", Value: {ELF64BEKind, EM_SPARCV9})
486 .Case(S: "elf32-msp430", Value: {ELF32LEKind, EM_MSP430})
487 .Case(S: "elf32-loongarch", Value: {ELF32LEKind, EM_LOONGARCH})
488 .Case(S: "elf64-loongarch", Value: {ELF64LEKind, EM_LOONGARCH})
489 .Case(S: "elf64-s390", Value: {ELF64BEKind, EM_S390})
490 .Cases(CaseStrings: {"elf32-hexagon", "elf32-littlehexagon"},
491 Value: {ELF32LEKind, EM_HEXAGON})
492 .Default(Value: {ELFNoneKind, EM_NONE});
493}
494
495// Parse OUTPUT_FORMAT(bfdname) or OUTPUT_FORMAT(default, big, little). Choose
496// big if -EB is specified, little if -EL is specified, or default if neither is
497// specified.
498void ScriptParser::readOutputFormat() {
499 expect(expect: "(");
500
501 StringRef s = readName();
502 if (!consume(tok: ")")) {
503 expect(expect: ",");
504 StringRef tmp = readName();
505 if (ctx.arg.optEB)
506 s = tmp;
507 expect(expect: ",");
508 tmp = readName();
509 if (ctx.arg.optEL)
510 s = tmp;
511 consume(tok: ")");
512 }
513 // If more than one OUTPUT_FORMAT is specified, only the first is checked.
514 if (!ctx.arg.bfdname.empty())
515 return;
516 ctx.arg.bfdname = s;
517
518 if (s == "binary") {
519 ctx.arg.oFormatBinary = true;
520 return;
521 }
522
523 if (s.consume_back(Suffix: "-freebsd"))
524 ctx.arg.osabi = ELFOSABI_FREEBSD;
525
526 std::tie(args&: ctx.arg.ekind, args&: ctx.arg.emachine) = parseBfdName(s);
527 if (ctx.arg.emachine == EM_NONE)
528 setError("unknown output format name: " + ctx.arg.bfdname);
529 if (s == "elf32-ntradlittlemips" || s == "elf32-ntradbigmips")
530 ctx.arg.mipsN32Abi = true;
531 if (ctx.arg.emachine == EM_MSP430)
532 ctx.arg.osabi = ELFOSABI_STANDALONE;
533}
534
535void ScriptParser::readPhdrs() {
536 expect(expect: "{");
537 while (auto tok = till(tok: "}")) {
538 PhdrsCommand cmd;
539 cmd.name = tok;
540 cmd.type = readPhdrType();
541
542 while (!errCount(ctx) && !consume(tok: ";")) {
543 if (consume(tok: "FILEHDR"))
544 cmd.hasFilehdr = true;
545 else if (consume(tok: "PHDRS"))
546 cmd.hasPhdrs = true;
547 else if (consume(tok: "AT"))
548 cmd.lmaExpr = readParenExpr();
549 else if (consume(tok: "FLAGS"))
550 cmd.flags = readParenExpr()().getValue();
551 else
552 setError("unexpected header attribute: " + next());
553 }
554
555 ctx.script->phdrsCommands.push_back(Elt: cmd);
556 }
557}
558
559void ScriptParser::readRegionAlias() {
560 expect(expect: "(");
561 StringRef alias = readName();
562 expect(expect: ",");
563 StringRef name = readName();
564 expect(expect: ")");
565
566 if (ctx.script->memoryRegions.contains(Key: alias))
567 setError("redefinition of memory region '" + alias + "'");
568 if (!ctx.script->memoryRegions.contains(Key: name))
569 setError("memory region '" + name + "' is not defined");
570 ctx.script->memoryRegions.insert(KV: {alias, ctx.script->memoryRegions[name]});
571}
572
573void ScriptParser::readSearchDir() {
574 expect(expect: "(");
575 StringRef name = readName();
576 if (!ctx.arg.nostdlib)
577 ctx.arg.searchPaths.push_back(Elt: name);
578 expect(expect: ")");
579}
580
581// This reads an overlay description. Overlays are used to describe output
582// sections that use the same virtual memory range and normally would trigger
583// linker's sections sanity check failures.
584// https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description
585SmallVector<SectionCommand *, 0> ScriptParser::readOverlay() {
586 Expr addrExpr;
587 if (!consume(tok: ":")) {
588 addrExpr = readExpr();
589 expect(expect: ":");
590 }
591 bool noCrossRefs = consume(tok: "NOCROSSREFS");
592 Expr lmaExpr = consume(tok: "AT") ? readParenExpr() : Expr{};
593 expect(expect: "{");
594
595 SmallVector<SectionCommand *, 0> v;
596 OutputSection *prev = nullptr;
597 while (!errCount(ctx) && !consume(tok: "}")) {
598 // VA is the same for all sections. The LMAs are consecutive in memory
599 // starting from the base load address.
600 OutputDesc *osd = readOverlaySectionDescription();
601 osd->osec.addrExpr = addrExpr;
602 if (prev) {
603 osd->osec.lmaExpr = [=] { return prev->getLMA() + prev->size; };
604 } else {
605 osd->osec.lmaExpr = lmaExpr;
606 // Use first section address for subsequent sections. Ensure the first
607 // section, even if empty, is not discarded.
608 osd->osec.usedInExpression = true;
609 addrExpr = [=]() -> ExprValue { return {&osd->osec, false, 0, ""}; };
610 }
611 v.push_back(Elt: osd);
612 prev = &osd->osec;
613 }
614 if (!v.empty())
615 static_cast<OutputDesc *>(v.front())->osec.firstInOverlay = true;
616 if (consume(tok: ">")) {
617 StringRef regionName = readName();
618 for (SectionCommand *od : v)
619 static_cast<OutputDesc *>(od)->osec.memoryRegionName =
620 std::string(regionName);
621 }
622 if (noCrossRefs) {
623 NoCrossRefCommand cmd;
624 for (SectionCommand *od : v)
625 cmd.outputSections.push_back(Elt: static_cast<OutputDesc *>(od)->osec.name);
626 ctx.script->noCrossRefs.push_back(Elt: std::move(cmd));
627 }
628
629 // According to the specification, at the end of the overlay, the location
630 // counter should be equal to the overlay base address plus size of the
631 // largest section seen in the overlay.
632 // Here we want to create the Dot assignment command to achieve that.
633 Expr moveDot = [=] {
634 uint64_t max = 0;
635 for (SectionCommand *cmd : v)
636 max = std::max(a: max, b: cast<OutputDesc>(Val: cmd)->osec.size);
637 return addrExpr().getValue() + max;
638 };
639 v.push_back(Elt: make<SymbolAssignment>(args: ".", args&: moveDot, args: 0, args: getCurrentLocation()));
640 return v;
641}
642
643SectionClassDesc *ScriptParser::readSectionClassDescription() {
644 StringRef name = readSectionClassName();
645 SectionClassDesc *desc = make<SectionClassDesc>(args&: name);
646 if (!ctx.script->sectionClasses.insert(KV: {CachedHashStringRef(name), desc})
647 .second)
648 setError("section class '" + name + "' already defined");
649 expect(expect: "{");
650 while (auto tok = till(tok: "}")) {
651 if (tok == "(" || tok == ")") {
652 setError("expected filename pattern");
653 } else if (peek() == "(") {
654 InputSectionDescription *isd = readInputSectionDescription(tok);
655 if (!isd->classRef.empty())
656 setError("section class '" + name + "' references class '" +
657 isd->classRef + "'");
658 desc->sc.commands.push_back(Elt: isd);
659 }
660 }
661 return desc;
662}
663
664StringRef ScriptParser::readSectionClassName() {
665 expect(expect: "(");
666 StringRef name = unquote(s: next());
667 expect(expect: ")");
668 return name;
669}
670
671void ScriptParser::readOverwriteSections() {
672 expect(expect: "{");
673 while (auto tok = till(tok: "}"))
674 ctx.script->overwriteSections.push_back(Elt: readOutputSectionDescription(outSec: tok));
675}
676
677void ScriptParser::readSections() {
678 expect(expect: "{");
679 SmallVector<SectionCommand *, 0> v;
680 while (auto tok = till(tok: "}"))
681 readSectionsStmt(v, tok);
682
683 // If DATA_SEGMENT_RELRO_END is absent, for sections after DATA_SEGMENT_ALIGN,
684 // the relro fields should be cleared.
685 if (!ctx.script->seenRelroEnd)
686 for (SectionCommand *cmd : v)
687 if (auto *osd = dyn_cast<OutputDesc>(Val: cmd))
688 osd->osec.relro = false;
689
690 ctx.script->sectionCommands.insert(I: ctx.script->sectionCommands.end(),
691 From: v.begin(), To: v.end());
692
693 if (atEOF() || !consume(tok: "INSERT")) {
694 ctx.script->hasSectionsCommand = true;
695 return;
696 }
697
698 bool isAfter = false;
699 if (consume(tok: "AFTER"))
700 isAfter = true;
701 else if (!consume(tok: "BEFORE"))
702 setError("expected AFTER/BEFORE, but got '" + next() + "'");
703 StringRef where = readName();
704 SmallVector<StringRef, 0> names;
705 for (SectionCommand *cmd : v)
706 if (auto *os = dyn_cast<OutputDesc>(Val: cmd))
707 names.push_back(Elt: os->osec.name);
708 if (!names.empty())
709 ctx.script->insertCommands.push_back(Elt: {.names: std::move(names), .isAfter: isAfter, .where: where});
710}
711
712void ScriptParser::readSectionsStmt(SmallVectorImpl<SectionCommand *> &v,
713 StringRef tok) {
714 if (tok == "OVERLAY") {
715 for (SectionCommand *cmd : readOverlay())
716 v.push_back(Elt: cmd);
717 return;
718 }
719 if (tok == "CLASS") {
720 v.push_back(Elt: readSectionClassDescription());
721 return;
722 }
723 if (tok == "INCLUDE") {
724 readInclude(
725 parse: [&] { readStmts(readStmt: [&](StringRef t) { readSectionsStmt(v, tok: t); }); });
726 return;
727 }
728
729 if (SectionCommand *cmd = readAssignment(tok))
730 v.push_back(Elt: cmd);
731 else
732 v.push_back(Elt: readOutputSectionDescription(outSec: tok));
733}
734
735void ScriptParser::readTarget() {
736 // TARGET(foo) is an alias for "--format foo". Unlike GNU linkers,
737 // we accept only a limited set of BFD names (i.e. "elf" or "binary")
738 // for --format. We recognize only /^elf/ and "binary" in the linker
739 // script as well.
740 expect(expect: "(");
741 StringRef tok = readName();
742 expect(expect: ")");
743
744 if (tok.starts_with(Prefix: "elf"))
745 ctx.arg.formatBinary = false;
746 else if (tok == "binary")
747 ctx.arg.formatBinary = true;
748 else
749 setError("unknown target: " + tok);
750}
751
752static int precedence(StringRef op) {
753 return StringSwitch<int>(op)
754 .Cases(CaseStrings: {"*", "/", "%"}, Value: 11)
755 .Cases(CaseStrings: {"+", "-"}, Value: 10)
756 .Cases(CaseStrings: {"<<", ">>"}, Value: 9)
757 .Cases(CaseStrings: {"<", "<=", ">", ">="}, Value: 8)
758 .Cases(CaseStrings: {"==", "!="}, Value: 7)
759 .Case(S: "&", Value: 6)
760 .Case(S: "^", Value: 5)
761 .Case(S: "|", Value: 4)
762 .Case(S: "&&", Value: 3)
763 .Case(S: "||", Value: 2)
764 .Case(S: "?", Value: 1)
765 .Default(Value: -1);
766}
767
768StringMatcher ScriptParser::readFilePatterns() {
769 StringMatcher Matcher;
770 while (auto tok = till(tok: ")"))
771 Matcher.addPattern(Matcher: SingleStringMatcher(tok));
772 return Matcher;
773}
774
775SortSectionPolicy ScriptParser::peekSortKind() {
776 return StringSwitch<SortSectionPolicy>(peek())
777 .Case(S: "REVERSE", Value: SortSectionPolicy::Reverse)
778 .Cases(CaseStrings: {"SORT", "SORT_BY_NAME"}, Value: SortSectionPolicy::Name)
779 .Case(S: "SORT_BY_ALIGNMENT", Value: SortSectionPolicy::Alignment)
780 .Case(S: "SORT_BY_INIT_PRIORITY", Value: SortSectionPolicy::Priority)
781 .Case(S: "SORT_NONE", Value: SortSectionPolicy::None)
782 .Default(Value: SortSectionPolicy::Default);
783}
784
785SortSectionPolicy ScriptParser::readSortKind() {
786 SortSectionPolicy ret = peekSortKind();
787 if (ret != SortSectionPolicy::Default)
788 skip();
789 return ret;
790}
791
792// Reads SECTIONS command contents in the following form:
793//
794// <contents> ::= <elem>*
795// <elem> ::= <exclude>? <glob-pattern>
796// <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")"
797//
798// For example,
799//
800// *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz)
801//
802// is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o".
803// The semantics of that is section .foo in any file, section .bar in
804// any file but a.o, and section .baz in any file but b.o.
805SmallVector<SectionPattern, 0> ScriptParser::readInputSectionsList() {
806 SmallVector<SectionPattern, 0> ret;
807 while (!errCount(ctx) && peek() != ")") {
808 StringMatcher excludeFilePat;
809 if (consume(tok: "EXCLUDE_FILE")) {
810 expect(expect: "(");
811 excludeFilePat = readFilePatterns();
812 }
813
814 StringMatcher SectionMatcher;
815 // Break if the next token is ), EXCLUDE_FILE, or SORT*.
816 while (!errCount(ctx) && peekSortKind() == SortSectionPolicy::Default) {
817 StringRef s = peek();
818 if (s == ")" || s == "EXCLUDE_FILE")
819 break;
820 // Detect common mistakes when certain non-wildcard meta characters are
821 // used without a closing ')'.
822 if (!s.empty() && strchr(s: "(){}", c: s[0])) {
823 skip();
824 setError("section pattern is expected");
825 break;
826 }
827 SectionMatcher.addPattern(Matcher: readName());
828 }
829
830 if (!SectionMatcher.empty())
831 ret.push_back(Elt: {std::move(excludeFilePat), std::move(SectionMatcher)});
832 else if (excludeFilePat.empty())
833 break;
834 else
835 setError("section pattern is expected");
836 }
837 return ret;
838}
839
840// Reads contents of "SECTIONS" directive. That directive contains a
841// list of glob patterns for input sections. The grammar is as follows.
842//
843// <patterns> ::= <section-list>
844// | <sort> "(" <section-list> ")"
845// | <sort> "(" <sort> "(" <section-list> ")" ")"
846//
847// <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT"
848// | "SORT_BY_INIT_PRIORITY" | "SORT_NONE"
849//
850// <section-list> is parsed by readInputSectionsList().
851InputSectionDescription *
852ScriptParser::readInputSectionRules(StringRef filePattern, uint64_t withFlags,
853 uint64_t withoutFlags) {
854 auto *cmd =
855 make<InputSectionDescription>(args&: filePattern, args&: withFlags, args&: withoutFlags);
856 expect(expect: "(");
857
858 while (peek() != ")" && !atEOF()) {
859 SortSectionPolicy outer = readSortKind();
860 SortSectionPolicy inner = SortSectionPolicy::Default;
861 SmallVector<SectionPattern, 0> v;
862 if (outer != SortSectionPolicy::Default) {
863 expect(expect: "(");
864 inner = readSortKind();
865 if (inner != SortSectionPolicy::Default) {
866 expect(expect: "(");
867 v = readInputSectionsList();
868 expect(expect: ")");
869 } else {
870 v = readInputSectionsList();
871 }
872 expect(expect: ")");
873 } else {
874 v = readInputSectionsList();
875 }
876
877 for (SectionPattern &pat : v) {
878 pat.sortInner = inner;
879 pat.sortOuter = outer;
880 }
881
882 std::move(first: v.begin(), last: v.end(), result: std::back_inserter(x&: cmd->sectionPatterns));
883 }
884 expect(expect: ")");
885 return cmd;
886}
887
888InputSectionDescription *
889ScriptParser::readInputSectionDescription(StringRef tok) {
890 // Input section wildcard can be surrounded by KEEP.
891 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep
892 uint64_t withFlags = 0;
893 uint64_t withoutFlags = 0;
894 if (tok == "KEEP") {
895 expect(expect: "(");
896 if (consume(tok: "INPUT_SECTION_FLAGS"))
897 std::tie(args&: withFlags, args&: withoutFlags) = readInputSectionFlags();
898
899 tok = next();
900 InputSectionDescription *cmd;
901 if (tok == "CLASS")
902 cmd = make<InputSectionDescription>(args: StringRef{}, args&: withFlags, args&: withoutFlags,
903 args: readSectionClassName());
904 else
905 cmd = readInputSectionRules(filePattern: tok, withFlags, withoutFlags);
906 expect(expect: ")");
907 ctx.script->keptSections.push_back(Elt: cmd);
908 return cmd;
909 }
910 if (tok == "INPUT_SECTION_FLAGS") {
911 std::tie(args&: withFlags, args&: withoutFlags) = readInputSectionFlags();
912 tok = next();
913 }
914 if (tok == "CLASS")
915 return make<InputSectionDescription>(args: StringRef{}, args&: withFlags, args&: withoutFlags,
916 args: readSectionClassName());
917 return readInputSectionRules(filePattern: tok, withFlags, withoutFlags);
918}
919
920void ScriptParser::readSort() {
921 expect(expect: "(");
922 expect(expect: "CONSTRUCTORS");
923 expect(expect: ")");
924}
925
926Expr ScriptParser::readAssert() {
927 expect(expect: "(");
928 Expr e = readExpr();
929 expect(expect: ",");
930 StringRef msg = readName();
931 expect(expect: ")");
932
933 return [=, s = ctx.script]() -> ExprValue {
934 if (!e().getValue())
935 s->recordError(msg);
936 return s->getDot();
937 };
938}
939
940#define ECase(X) \
941 { #X, X }
942constexpr std::pair<const char *, unsigned> typeMap[] = {
943 ECase(SHT_PROGBITS), ECase(SHT_NOTE), ECase(SHT_NOBITS),
944 ECase(SHT_INIT_ARRAY), ECase(SHT_FINI_ARRAY), ECase(SHT_PREINIT_ARRAY),
945};
946#undef ECase
947
948// Tries to read the special directive for an output section definition which
949// can be one of following: "(NOLOAD)", "(COPY)", "(INFO)", "(OVERLAY)", and
950// "(TYPE=<value>)".
951bool ScriptParser::readSectionDirective(OutputSection *cmd, StringRef tok) {
952 if (tok != "NOLOAD" && tok != "COPY" && tok != "INFO" && tok != "OVERLAY" &&
953 tok != "TYPE")
954 return false;
955
956 if (consume(tok: "NOLOAD")) {
957 cmd->type = SHT_NOBITS;
958 cmd->typeIsSet = true;
959 } else if (consume(tok: "TYPE")) {
960 expect(expect: "=");
961 StringRef value = peek();
962 auto it = llvm::find_if(Range: typeMap, P: [=](auto e) { return e.first == value; });
963 if (it != std::end(arr: typeMap)) {
964 // The value is a recognized literal SHT_*.
965 cmd->type = it->second;
966 skip();
967 } else if (value.starts_with(Prefix: "SHT_")) {
968 setError("unknown section type " + value);
969 } else {
970 // Otherwise, read an expression.
971 cmd->type = readExpr()().getValue();
972 }
973 cmd->typeIsSet = true;
974 } else {
975 skip(); // This is "COPY", "INFO" or "OVERLAY".
976 cmd->nonAlloc = true;
977 }
978 expect(expect: ")");
979 return true;
980}
981
982// Reads an expression and/or the special directive for an output
983// section definition. Directive is one of following: "(NOLOAD)",
984// "(COPY)", "(INFO)" or "(OVERLAY)".
985//
986// An output section name can be followed by an address expression
987// and/or directive. This grammar is not LL(1) because "(" can be
988// interpreted as either the beginning of some expression or beginning
989// of directive.
990//
991// https://sourceware.org/binutils/docs/ld/Output-Section-Address.html
992// https://sourceware.org/binutils/docs/ld/Output-Section-Type.html
993void ScriptParser::readSectionAddressType(OutputSection *cmd) {
994 if (consume(tok: "(")) {
995 // Temporarily set lexState to support TYPE=<value> without spaces.
996 SaveAndRestore saved(lexState, State::Expr);
997 if (readSectionDirective(cmd, tok: peek()))
998 return;
999 cmd->addrExpr = readExpr();
1000 expect(expect: ")");
1001 } else {
1002 cmd->addrExpr = readExpr();
1003 }
1004
1005 if (consume(tok: "(")) {
1006 SaveAndRestore saved(lexState, State::Expr);
1007 StringRef tok = peek();
1008 if (!readSectionDirective(cmd, tok))
1009 setError("unknown section directive: " + tok);
1010 }
1011}
1012
1013static Expr checkAlignment(Ctx &ctx, Expr e, std::string &loc) {
1014 return [=, &ctx] {
1015 uint64_t alignment = std::max(a: (uint64_t)1, b: e().getValue());
1016 if (!isPowerOf2_64(Value: alignment)) {
1017 ErrAlways(ctx) << loc << ": alignment must be power of 2";
1018 return (uint64_t)1; // Return a dummy value.
1019 }
1020 return alignment;
1021 };
1022}
1023
1024OutputDesc *ScriptParser::readOverlaySectionDescription() {
1025 OutputDesc *osd =
1026 ctx.script->createOutputSection(name: readName(), location: getCurrentLocation());
1027 osd->osec.inOverlay = true;
1028 expect(expect: "{");
1029 while (auto tok = till(tok: "}"))
1030 readOutputSectionStmt(osec&: osd->osec, tok);
1031 osd->osec.phdrs = readOutputSectionPhdrs();
1032 return osd;
1033}
1034
1035OutputDesc *ScriptParser::readOutputSectionDescription(StringRef outSec) {
1036 OutputDesc *cmd =
1037 ctx.script->createOutputSection(name: unquote(s: outSec), location: getCurrentLocation());
1038 OutputSection *osec = &cmd->osec;
1039 // Maybe relro. Will reset to false if DATA_SEGMENT_RELRO_END is absent.
1040 osec->relro = ctx.script->seenDataAlign && !ctx.script->seenRelroEnd;
1041
1042 size_t symbolsReferenced = ctx.script->referencedSymbols.size();
1043
1044 if (peek() != ":")
1045 readSectionAddressType(cmd: osec);
1046 expect(expect: ":");
1047
1048 std::string location = getCurrentLocation();
1049 if (consume(tok: "AT"))
1050 osec->lmaExpr = readParenExpr();
1051 if (consume(tok: "ALIGN"))
1052 osec->alignExpr = checkAlignment(ctx, e: readParenExpr(), loc&: location);
1053 if (consume(tok: "SUBALIGN"))
1054 osec->subalignExpr = checkAlignment(ctx, e: readParenExpr(), loc&: location);
1055
1056 // Parse constraints.
1057 if (consume(tok: "ONLY_IF_RO"))
1058 osec->constraint = ConstraintKind::ReadOnly;
1059 if (consume(tok: "ONLY_IF_RW"))
1060 osec->constraint = ConstraintKind::ReadWrite;
1061 expect(expect: "{");
1062
1063 while (auto tok = till(tok: "}"))
1064 readOutputSectionStmt(osec&: *osec, tok);
1065
1066 if (consume(tok: ">"))
1067 osec->memoryRegionName = std::string(readName());
1068
1069 if (consume(tok: "AT")) {
1070 expect(expect: ">");
1071 osec->lmaRegionName = std::string(readName());
1072 }
1073
1074 if (osec->lmaExpr && !osec->lmaRegionName.empty())
1075 ErrAlways(ctx) << "section can't have both LMA and a load region";
1076
1077 osec->phdrs = readOutputSectionPhdrs();
1078
1079 if (peek() == "=" || peek().starts_with(Prefix: "=")) {
1080 lexState = State::Expr;
1081 consume(tok: "=");
1082 osec->filler = readFill();
1083 lexState = State::Script;
1084 }
1085
1086 // Consume optional comma following output section command.
1087 consume(tok: ",");
1088
1089 if (ctx.script->referencedSymbols.size() > symbolsReferenced)
1090 osec->expressionsUseSymbols = true;
1091 return cmd;
1092}
1093
1094void ScriptParser::readOutputSectionStmt(OutputSection &osec, StringRef tok) {
1095 if (tok == ";") {
1096 // Empty commands are allowed. Do nothing here.
1097 } else if (SymbolAssignment *assign = readAssignment(tok)) {
1098 osec.commands.push_back(Elt: assign);
1099 } else if (ByteCommand *data = readByteCommand(tok)) {
1100 osec.commands.push_back(Elt: data);
1101 } else if (tok == "CONSTRUCTORS") {
1102 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors
1103 // by name. This is for very old file formats such as ECOFF/XCOFF.
1104 // For ELF, we should ignore.
1105 } else if (tok == "FILL") {
1106 // We handle the FILL command as an alias for =fillexp section attribute,
1107 // which is different from what GNU linkers do.
1108 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html
1109 if (peek() != "(")
1110 setError("( expected, but got " + peek());
1111 osec.filler = readFill();
1112 } else if (tok == "SORT") {
1113 readSort();
1114 } else if (tok == "INCLUDE") {
1115 readInclude(parse: [&] {
1116 readStmts(readStmt: [&](StringRef t) { readOutputSectionStmt(osec, tok: t); });
1117 });
1118 } else if (tok == "(" || tok == ")") {
1119 setError("expected filename pattern");
1120 } else if (peek() == "(") {
1121 osec.commands.push_back(Elt: readInputSectionDescription(tok));
1122 } else {
1123 // We have a file name and no input sections description. It is not a
1124 // commonly used syntax, but still acceptable. In that case, all sections
1125 // from the file will be included.
1126 // FIXME: GNU ld permits INPUT_SECTION_FLAGS to be used here. We do not
1127 // handle this case here as it will already have been matched by the
1128 // case above.
1129 auto *isd = make<InputSectionDescription>(args&: tok);
1130 isd->sectionPatterns.push_back(Elt: {{}, StringMatcher("*")});
1131 osec.commands.push_back(Elt: isd);
1132 }
1133}
1134
1135// Reads a `=<fillexp>` expression and returns its value as a big-endian number.
1136// https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html
1137// We do not support using symbols in such expressions.
1138//
1139// When reading a hexstring, ld.bfd handles it as a blob of arbitrary
1140// size, while ld.gold always handles it as a 32-bit big-endian number.
1141// We are compatible with ld.gold because it's easier to implement.
1142// Also, we require that expressions with operators must be wrapped into
1143// round brackets. We did it to resolve the ambiguity when parsing scripts like:
1144// SECTIONS { .foo : { ... } =120+3 /DISCARD/ : { ... } }
1145std::array<uint8_t, 4> ScriptParser::readFill() {
1146 uint64_t value = readPrimary()().val;
1147 if (value > UINT32_MAX)
1148 setError("filler expression result does not fit 32-bit: 0x" +
1149 Twine::utohexstr(Val: value));
1150
1151 std::array<uint8_t, 4> buf;
1152 write32be(P: buf.data(), V: (uint32_t)value);
1153 return buf;
1154}
1155
1156SymbolAssignment *ScriptParser::readProvideHidden(bool provide, bool hidden) {
1157 expect(expect: "(");
1158 StringRef name = readName(), eq = peek();
1159 if (eq != "=") {
1160 setError("= expected, but got " + next());
1161 while (till(tok: ")"))
1162 ;
1163 return nullptr;
1164 }
1165 llvm::SaveAndRestore saveActiveProvideSym(activeProvideSym);
1166 if (provide)
1167 activeProvideSym = name;
1168 SymbolAssignment *cmd = readSymbolAssignment(name);
1169 cmd->provide = provide;
1170 cmd->hidden = hidden;
1171 expect(expect: ")");
1172 return cmd;
1173}
1174
1175// Replace whitespace sequence (including \n) with one single space. The output
1176// is used by -Map.
1177static void squeezeSpaces(std::string &str) {
1178 char prev = '\0';
1179 auto it = str.begin();
1180 for (char c : str)
1181 if (!isSpace(C: c) || (c = ' ') != prev)
1182 *it++ = prev = c;
1183 str.erase(first: it, last: str.end());
1184}
1185
1186SymbolAssignment *ScriptParser::readAssignment(StringRef tok) {
1187 // Assert expression returns Dot, so this is equal to ".=."
1188 if (tok == "ASSERT")
1189 return make<SymbolAssignment>(args: ".", args: readAssert(), args: 0, args: getCurrentLocation());
1190
1191 const char *oldS = prevTok.data();
1192 SymbolAssignment *cmd = nullptr;
1193 bool savedSeenRelroEnd = ctx.script->seenRelroEnd;
1194 const StringRef op = peek();
1195 {
1196 SaveAndRestore saved(lexState, State::Expr);
1197 if (op.starts_with(Prefix: "=")) {
1198 // Support = followed by an expression without whitespace.
1199 cmd = readSymbolAssignment(name: unquote(s: tok));
1200 } else if ((op.size() == 2 && op[1] == '=' && strchr(s: "+-*/&^|", c: op[0])) ||
1201 op == "<<=" || op == ">>=") {
1202 cmd = readSymbolAssignment(name: unquote(s: tok));
1203 } else if (tok == "PROVIDE") {
1204 cmd = readProvideHidden(provide: true, hidden: false);
1205 } else if (tok == "HIDDEN") {
1206 cmd = readProvideHidden(provide: false, hidden: true);
1207 } else if (tok == "PROVIDE_HIDDEN") {
1208 cmd = readProvideHidden(provide: true, hidden: true);
1209 }
1210 }
1211
1212 if (cmd) {
1213 cmd->dataSegmentRelroEnd = !savedSeenRelroEnd && ctx.script->seenRelroEnd;
1214 cmd->commandString = StringRef(oldS, curTok.data() - oldS).str();
1215 squeezeSpaces(str&: cmd->commandString);
1216 expect(expect: ";");
1217 }
1218 return cmd;
1219}
1220
1221StringRef ScriptParser::readName() { return unquote(s: next()); }
1222
1223SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef name) {
1224 StringRef op = next();
1225 assert(op == "=" || op == "*=" || op == "/=" || op == "+=" || op == "-=" ||
1226 op == "&=" || op == "^=" || op == "|=" || op == "<<=" || op == ">>=");
1227 // Note: GNU ld does not support %=.
1228 Expr e = readExpr();
1229 if (op != "=") {
1230 std::string loc = getCurrentLocation();
1231 e = [=, s = ctx.script, c = op[0], &ctx = ctx]() -> ExprValue {
1232 ExprValue lhs = s->getSymbolValue(name, loc);
1233 switch (c) {
1234 case '*':
1235 return lhs.getValue() * e().getValue();
1236 case '/':
1237 if (uint64_t rv = e().getValue())
1238 return lhs.getValue() / rv;
1239 ErrAlways(ctx) << loc << ": division by zero";
1240 return 0;
1241 case '+':
1242 return add(s&: *s, a: lhs, b: e());
1243 case '-':
1244 return sub(a: lhs, b: e());
1245 case '<':
1246 return lhs.getValue() << e().getValue() % 64;
1247 case '>':
1248 return lhs.getValue() >> e().getValue() % 64;
1249 case '&':
1250 return lhs.getValue() & e().getValue();
1251 case '^':
1252 return lhs.getValue() ^ e().getValue();
1253 case '|':
1254 return lhs.getValue() | e().getValue();
1255 default:
1256 llvm_unreachable("");
1257 }
1258 };
1259 }
1260 return make<SymbolAssignment>(args&: name, args&: e, args: ctx.scriptSymOrderCounter++,
1261 args: getCurrentLocation());
1262}
1263
1264// This is an operator-precedence parser to parse a linker
1265// script expression.
1266Expr ScriptParser::readExpr() {
1267 if (atEOF())
1268 return []() { return 0; };
1269 // Our lexer is context-aware. Set the in-expression bit so that
1270 // they apply different tokenization rules.
1271 SaveAndRestore saved(lexState, State::Expr);
1272 Expr e = readExpr1(lhs: readPrimary(), minPrec: 0);
1273 return e;
1274}
1275
1276Expr ScriptParser::combine(StringRef op, Expr l, Expr r) {
1277 if (op == "+")
1278 return [=, s = ctx.script] { return add(s&: *s, a: l(), b: r()); };
1279 if (op == "-")
1280 return [=] { return sub(a: l(), b: r()); };
1281 if (op == "*")
1282 return [=] { return l().getValue() * r().getValue(); };
1283 if (op == "/") {
1284 std::string loc = getCurrentLocation();
1285 return [=, &ctx = ctx]() -> uint64_t {
1286 if (uint64_t rv = r().getValue())
1287 return l().getValue() / rv;
1288 ErrAlways(ctx) << loc << ": division by zero";
1289 return 0;
1290 };
1291 }
1292 if (op == "%") {
1293 std::string loc = getCurrentLocation();
1294 return [=, &ctx = ctx]() -> uint64_t {
1295 if (uint64_t rv = r().getValue())
1296 return l().getValue() % rv;
1297 ErrAlways(ctx) << loc << ": modulo by zero";
1298 return 0;
1299 };
1300 }
1301 if (op == "<<")
1302 return [=] { return l().getValue() << r().getValue() % 64; };
1303 if (op == ">>")
1304 return [=] { return l().getValue() >> r().getValue() % 64; };
1305 if (op == "<")
1306 return [=] { return l().getValue() < r().getValue(); };
1307 if (op == ">")
1308 return [=] { return l().getValue() > r().getValue(); };
1309 if (op == ">=")
1310 return [=] { return l().getValue() >= r().getValue(); };
1311 if (op == "<=")
1312 return [=] { return l().getValue() <= r().getValue(); };
1313 if (op == "==")
1314 return [=] { return l().getValue() == r().getValue(); };
1315 if (op == "!=")
1316 return [=] { return l().getValue() != r().getValue(); };
1317 if (op == "||")
1318 return [=] { return l().getValue() || r().getValue(); };
1319 if (op == "&&")
1320 return [=] { return l().getValue() && r().getValue(); };
1321 if (op == "&")
1322 return [=, s = ctx.script] { return bitAnd(s&: *s, a: l(), b: r()); };
1323 if (op == "^")
1324 return [=, s = ctx.script] { return bitXor(s&: *s, a: l(), b: r()); };
1325 if (op == "|")
1326 return [=, s = ctx.script] { return bitOr(s&: *s, a: l(), b: r()); };
1327 llvm_unreachable("invalid operator");
1328}
1329
1330// This is a part of the operator-precedence parser. This function
1331// assumes that the remaining token stream starts with an operator.
1332Expr ScriptParser::readExpr1(Expr lhs, int minPrec) {
1333 while (!atEOF() && !errCount(ctx)) {
1334 // Read an operator and an expression.
1335 StringRef op1 = peek();
1336 if (precedence(op: op1) < minPrec)
1337 break;
1338 skip();
1339 if (op1 == "?")
1340 return readTernary(cond: lhs);
1341 Expr rhs = readPrimary();
1342
1343 // Evaluate the remaining part of the expression first if the
1344 // next operator has greater precedence than the previous one.
1345 // For example, if we have read "+" and "3", and if the next
1346 // operator is "*", then we'll evaluate 3 * ... part first.
1347 while (!atEOF()) {
1348 StringRef op2 = peek();
1349 if (precedence(op: op2) <= precedence(op: op1))
1350 break;
1351 rhs = readExpr1(lhs: rhs, minPrec: precedence(op: op2));
1352 }
1353
1354 lhs = combine(op: op1, l: lhs, r: rhs);
1355 }
1356 return lhs;
1357}
1358
1359Expr ScriptParser::getPageSize() {
1360 std::string location = getCurrentLocation();
1361 return [=, &ctx = this->ctx]() -> uint64_t {
1362 if (ctx.target)
1363 return ctx.arg.commonPageSize;
1364 ErrAlways(ctx) << location << ": unable to calculate page size";
1365 return 4096; // Return a dummy value.
1366 };
1367}
1368
1369Expr ScriptParser::readConstant() {
1370 StringRef s = readParenName();
1371 if (s == "COMMONPAGESIZE")
1372 return getPageSize();
1373 if (s == "MAXPAGESIZE")
1374 return [&ctx = this->ctx] { return ctx.arg.maxPageSize; };
1375 setError("unknown constant: " + s);
1376 return [] { return 0; };
1377}
1378
1379// Parses Tok as an integer. It recognizes hexadecimal (prefixed with
1380// "0x" or suffixed with "H") and decimal numbers. Decimal numbers may
1381// have "K" (Ki) or "M" (Mi) suffixes.
1382static std::optional<uint64_t> parseInt(StringRef tok) {
1383 // Hexadecimal
1384 uint64_t val;
1385 if (tok.starts_with_insensitive(Prefix: "0x")) {
1386 if (!to_integer(S: tok.substr(Start: 2), Num&: val, Base: 16))
1387 return std::nullopt;
1388 return val;
1389 }
1390 if (tok.ends_with_insensitive(Suffix: "H")) {
1391 if (!to_integer(S: tok.drop_back(), Num&: val, Base: 16))
1392 return std::nullopt;
1393 return val;
1394 }
1395
1396 // Decimal
1397 if (tok.ends_with_insensitive(Suffix: "K")) {
1398 if (!to_integer(S: tok.drop_back(), Num&: val, Base: 10))
1399 return std::nullopt;
1400 return val * 1024;
1401 }
1402 if (tok.ends_with_insensitive(Suffix: "M")) {
1403 if (!to_integer(S: tok.drop_back(), Num&: val, Base: 10))
1404 return std::nullopt;
1405 return val * 1024 * 1024;
1406 }
1407 if (!to_integer(S: tok, Num&: val, Base: 10))
1408 return std::nullopt;
1409 return val;
1410}
1411
1412ByteCommand *ScriptParser::readByteCommand(StringRef tok) {
1413 int size = StringSwitch<int>(tok)
1414 .Case(S: "BYTE", Value: 1)
1415 .Case(S: "SHORT", Value: 2)
1416 .Case(S: "LONG", Value: 4)
1417 .Case(S: "QUAD", Value: 8)
1418 .Default(Value: -1);
1419 if (size == -1)
1420 return nullptr;
1421
1422 const char *oldS = prevTok.data();
1423 Expr e = readParenExpr();
1424 std::string commandString = StringRef(oldS, curBuf.s.data() - oldS).str();
1425 squeezeSpaces(str&: commandString);
1426 return make<ByteCommand>(args&: e, args&: size, args: std::move(commandString));
1427}
1428
1429static std::optional<uint64_t> parseFlag(StringRef tok) {
1430 if (std::optional<uint64_t> asInt = parseInt(tok))
1431 return asInt;
1432#define CASE_ENT(enum) #enum, ELF::enum
1433 return StringSwitch<std::optional<uint64_t>>(tok)
1434 .Case(CASE_ENT(SHF_WRITE))
1435 .Case(CASE_ENT(SHF_ALLOC))
1436 .Case(CASE_ENT(SHF_EXECINSTR))
1437 .Case(CASE_ENT(SHF_MERGE))
1438 .Case(CASE_ENT(SHF_STRINGS))
1439 .Case(CASE_ENT(SHF_INFO_LINK))
1440 .Case(CASE_ENT(SHF_LINK_ORDER))
1441 .Case(CASE_ENT(SHF_OS_NONCONFORMING))
1442 .Case(CASE_ENT(SHF_GROUP))
1443 .Case(CASE_ENT(SHF_TLS))
1444 .Case(CASE_ENT(SHF_COMPRESSED))
1445 .Case(CASE_ENT(SHF_EXCLUDE))
1446 .Case(CASE_ENT(SHF_ARM_PURECODE))
1447 .Case(CASE_ENT(SHF_AARCH64_PURECODE))
1448 .Default(Value: std::nullopt);
1449#undef CASE_ENT
1450}
1451
1452// Reads the '(' <flags> ')' list of section flags in
1453// INPUT_SECTION_FLAGS '(' <flags> ')' in the
1454// following form:
1455// <flags> ::= <flag>
1456// | <flags> & flag
1457// <flag> ::= Recognized Flag Name, or Integer value of flag.
1458// If the first character of <flag> is a ! then this means without flag,
1459// otherwise with flag.
1460// Example: SHF_EXECINSTR & !SHF_WRITE means with flag SHF_EXECINSTR and
1461// without flag SHF_WRITE.
1462std::pair<uint64_t, uint64_t> ScriptParser::readInputSectionFlags() {
1463 uint64_t withFlags = 0;
1464 uint64_t withoutFlags = 0;
1465 expect(expect: "(");
1466 while (!errCount(ctx)) {
1467 StringRef tok = readName();
1468 bool without = tok.consume_front(Prefix: "!");
1469 if (std::optional<uint64_t> flag = parseFlag(tok)) {
1470 if (without)
1471 withoutFlags |= *flag;
1472 else
1473 withFlags |= *flag;
1474 } else {
1475 setError("unrecognised flag: " + tok);
1476 }
1477 if (consume(tok: ")"))
1478 break;
1479 if (!consume(tok: "&")) {
1480 next();
1481 setError("expected & or )");
1482 }
1483 }
1484 return std::make_pair(x&: withFlags, y&: withoutFlags);
1485}
1486
1487StringRef ScriptParser::readParenName() {
1488 expect(expect: "(");
1489 auto saved = std::exchange(obj&: lexState, new_val: State::Script);
1490 StringRef name = readName();
1491 lexState = saved;
1492 expect(expect: ")");
1493 return name;
1494}
1495
1496static void checkIfExists(LinkerScript &script, const OutputSection &osec,
1497 StringRef location) {
1498 if (osec.location.empty() && script.errorOnMissingSection)
1499 script.recordError(msg: location + ": undefined section " + osec.name);
1500}
1501
1502static bool isValidSymbolName(StringRef s) {
1503 auto valid = [](char c) {
1504 return isAlnum(C: c) || c == '$' || c == '.' || c == '_';
1505 };
1506 return !s.empty() && !isDigit(C: s[0]) && llvm::all_of(Range&: s, P: valid);
1507}
1508
1509Expr ScriptParser::readPrimary() {
1510 if (peek() == "(")
1511 return readParenExpr();
1512
1513 if (consume(tok: "~")) {
1514 Expr e = readPrimary();
1515 return [=] { return ~e().getValue(); };
1516 }
1517 if (consume(tok: "!")) {
1518 Expr e = readPrimary();
1519 return [=] { return !e().getValue(); };
1520 }
1521 if (consume(tok: "-")) {
1522 Expr e = readPrimary();
1523 return [=] { return -e().getValue(); };
1524 }
1525 if (consume(tok: "+"))
1526 return readPrimary();
1527
1528 StringRef tok = next();
1529 std::string location = getCurrentLocation();
1530
1531 // Built-in functions are parsed here.
1532 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html.
1533 if (tok == "ABSOLUTE") {
1534 Expr inner = readParenExpr();
1535 return [=] {
1536 ExprValue i = inner();
1537 i.forceAbsolute = true;
1538 return i;
1539 };
1540 }
1541 if (tok == "ADDR") {
1542 StringRef name = readParenName();
1543 OutputSection *osec = &ctx.script->getOrCreateOutputSection(name)->osec;
1544 osec->usedInExpression = true;
1545 return [=, s = ctx.script]() -> ExprValue {
1546 checkIfExists(script&: *s, osec: *osec, location);
1547 return {osec, false, 0, location};
1548 };
1549 }
1550 if (tok == "ALIGN") {
1551 expect(expect: "(");
1552 Expr e = readExpr();
1553 if (consume(tok: ")")) {
1554 e = checkAlignment(ctx, e, loc&: location);
1555 return [=, s = ctx.script] {
1556 return alignToPowerOf2(Value: s->getDot(), Align: e().getValue());
1557 };
1558 }
1559 expect(expect: ",");
1560 Expr e2 = checkAlignment(ctx, e: readExpr(), loc&: location);
1561 expect(expect: ")");
1562 return [=] {
1563 ExprValue v = e();
1564 v.alignment = e2().getValue();
1565 return v;
1566 };
1567 }
1568 if (tok == "ALIGNOF") {
1569 StringRef name = readParenName();
1570 OutputSection *osec = &ctx.script->getOrCreateOutputSection(name)->osec;
1571 return [=, s = ctx.script] {
1572 checkIfExists(script&: *s, osec: *osec, location);
1573 return osec->addralign;
1574 };
1575 }
1576 if (tok == "ASSERT")
1577 return readAssert();
1578 if (tok == "CONSTANT")
1579 return readConstant();
1580 if (tok == "DATA_SEGMENT_ALIGN") {
1581 expect(expect: "(");
1582 Expr e = readExpr();
1583 expect(expect: ",");
1584 readExpr();
1585 expect(expect: ")");
1586 ctx.script->seenDataAlign = true;
1587 return [=, s = ctx.script] {
1588 uint64_t align = std::max(a: uint64_t(1), b: e().getValue());
1589 return (s->getDot() + align - 1) & -align;
1590 };
1591 }
1592 if (tok == "DATA_SEGMENT_END") {
1593 expect(expect: "(");
1594 expect(expect: ".");
1595 expect(expect: ")");
1596 return [s = ctx.script] { return s->getDot(); };
1597 }
1598 if (tok == "DATA_SEGMENT_RELRO_END") {
1599 // GNU linkers implements more complicated logic to handle
1600 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and
1601 // just align to the next page boundary for simplicity.
1602 expect(expect: "(");
1603 readExpr();
1604 expect(expect: ",");
1605 readExpr();
1606 expect(expect: ")");
1607 ctx.script->seenRelroEnd = true;
1608 return [&ctx = this->ctx] {
1609 return alignToPowerOf2(Value: ctx.script->getDot(), Align: ctx.arg.maxPageSize);
1610 };
1611 }
1612 if (tok == "DEFINED") {
1613 StringRef name = readParenName();
1614 // Return 1 if s is defined. If the definition is only found in a linker
1615 // script, it must happen before this DEFINED.
1616 auto order = ctx.scriptSymOrderCounter++;
1617 return [=, &ctx = this->ctx] {
1618 Symbol *s = ctx.symtab->find(name);
1619 return s && s->isDefined() && ctx.scriptSymOrder.lookup(Val: s) < order ? 1
1620 : 0;
1621 };
1622 }
1623 if (tok == "LENGTH") {
1624 StringRef name = readParenName();
1625 if (!ctx.script->memoryRegions.contains(Key: name)) {
1626 setError("memory region not defined: " + name);
1627 return [] { return 0; };
1628 }
1629 return ctx.script->memoryRegions[name]->length;
1630 }
1631 if (tok == "LOADADDR") {
1632 StringRef name = readParenName();
1633 OutputSection *osec = &ctx.script->getOrCreateOutputSection(name)->osec;
1634 osec->usedInExpression = true;
1635 return [=, s = ctx.script] {
1636 checkIfExists(script&: *s, osec: *osec, location);
1637 return osec->getLMA();
1638 };
1639 }
1640 if (tok == "LOG2CEIL") {
1641 expect(expect: "(");
1642 Expr a = readExpr();
1643 expect(expect: ")");
1644 return [=] {
1645 // LOG2CEIL(0) is defined to be 0.
1646 return llvm::Log2_64_Ceil(Value: std::max(a: a().getValue(), UINT64_C(1)));
1647 };
1648 }
1649 if (tok == "MAX" || tok == "MIN") {
1650 expect(expect: "(");
1651 Expr a = readExpr();
1652 expect(expect: ",");
1653 Expr b = readExpr();
1654 expect(expect: ")");
1655 if (tok == "MIN")
1656 return [=] { return std::min(a: a().getValue(), b: b().getValue()); };
1657 return [=] { return std::max(a: a().getValue(), b: b().getValue()); };
1658 }
1659 if (tok == "ORIGIN") {
1660 StringRef name = readParenName();
1661 if (!ctx.script->memoryRegions.contains(Key: name)) {
1662 setError("memory region not defined: " + name);
1663 return [] { return 0; };
1664 }
1665 return ctx.script->memoryRegions[name]->origin;
1666 }
1667 if (tok == "SEGMENT_START") {
1668 expect(expect: "(");
1669 skip();
1670 expect(expect: ",");
1671 Expr e = readExpr();
1672 expect(expect: ")");
1673 return [=] { return e(); };
1674 }
1675 if (tok == "SIZEOF") {
1676 StringRef name = readParenName();
1677 OutputSection *cmd = &ctx.script->getOrCreateOutputSection(name)->osec;
1678 // Linker script does not create an output section if its content is empty.
1679 // We want to allow SIZEOF(.foo) where .foo is a section which happened to
1680 // be empty.
1681 return [=] { return cmd->size; };
1682 }
1683 if (tok == "SIZEOF_HEADERS")
1684 return [=, &ctx = ctx] { return elf::getHeaderSize(ctx); };
1685
1686 // Tok is the dot.
1687 if (tok == ".")
1688 return [=, s = ctx.script] { return s->getSymbolValue(name: tok, loc: location); };
1689
1690 // Tok is a literal number.
1691 if (std::optional<uint64_t> val = parseInt(tok))
1692 return [=] { return *val; };
1693
1694 // Tok is a symbol name.
1695 if (tok.starts_with(Prefix: "\""))
1696 tok = unquote(s: tok);
1697 else if (!isValidSymbolName(s: tok))
1698 setError("malformed number: " + tok);
1699 if (activeProvideSym)
1700 ctx.script->provideMap[*activeProvideSym].push_back(Elt: tok);
1701 else
1702 ctx.script->referencedSymbols.push_back(Elt: tok);
1703 return [=, s = ctx.script] { return s->getSymbolValue(name: tok, loc: location); };
1704}
1705
1706Expr ScriptParser::readTernary(Expr cond) {
1707 Expr l = readExpr();
1708 expect(expect: ":");
1709 Expr r = readExpr();
1710 return [=] { return cond().getValue() ? l() : r(); };
1711}
1712
1713Expr ScriptParser::readParenExpr() {
1714 expect(expect: "(");
1715 Expr e = readExpr();
1716 expect(expect: ")");
1717 return e;
1718}
1719
1720SmallVector<StringRef, 0> ScriptParser::readOutputSectionPhdrs() {
1721 SmallVector<StringRef, 0> phdrs;
1722 while (!errCount(ctx) && peek().starts_with(Prefix: ":")) {
1723 StringRef tok = next();
1724 phdrs.push_back(Elt: (tok.size() == 1) ? readName() : tok.substr(Start: 1));
1725 }
1726 return phdrs;
1727}
1728
1729// Read a program header type name. The next token must be a
1730// name of a program header type or a constant (e.g. "0x3").
1731unsigned ScriptParser::readPhdrType() {
1732 StringRef tok = next();
1733 if (std::optional<uint64_t> val = parseInt(tok))
1734 return *val;
1735
1736 unsigned ret = StringSwitch<unsigned>(tok)
1737 .Case(S: "PT_NULL", Value: PT_NULL)
1738 .Case(S: "PT_LOAD", Value: PT_LOAD)
1739 .Case(S: "PT_DYNAMIC", Value: PT_DYNAMIC)
1740 .Case(S: "PT_INTERP", Value: PT_INTERP)
1741 .Case(S: "PT_NOTE", Value: PT_NOTE)
1742 .Case(S: "PT_SHLIB", Value: PT_SHLIB)
1743 .Case(S: "PT_PHDR", Value: PT_PHDR)
1744 .Case(S: "PT_TLS", Value: PT_TLS)
1745 .Case(S: "PT_GNU_EH_FRAME", Value: PT_GNU_EH_FRAME)
1746 .Case(S: "PT_GNU_STACK", Value: PT_GNU_STACK)
1747 .Case(S: "PT_GNU_RELRO", Value: PT_GNU_RELRO)
1748 .Case(S: "PT_OPENBSD_MUTABLE", Value: PT_OPENBSD_MUTABLE)
1749 .Case(S: "PT_OPENBSD_RANDOMIZE", Value: PT_OPENBSD_RANDOMIZE)
1750 .Case(S: "PT_OPENBSD_SYSCALLS", Value: PT_OPENBSD_SYSCALLS)
1751 .Case(S: "PT_OPENBSD_WXNEEDED", Value: PT_OPENBSD_WXNEEDED)
1752 .Case(S: "PT_OPENBSD_BOOTDATA", Value: PT_OPENBSD_BOOTDATA)
1753 .Default(Value: -1);
1754
1755 if (ret == (unsigned)-1) {
1756 setError("invalid program header type: " + tok);
1757 return PT_NULL;
1758 }
1759 return ret;
1760}
1761
1762// Reads an anonymous version declaration.
1763void ScriptParser::readAnonymousDeclaration() {
1764 SmallVector<SymbolVersion, 0> locals;
1765 SmallVector<SymbolVersion, 0> globals;
1766 std::tie(args&: locals, args&: globals) = readSymbols();
1767 for (const SymbolVersion &pat : locals)
1768 ctx.arg.versionDefinitions[VER_NDX_LOCAL].localPatterns.push_back(Elt: pat);
1769 for (const SymbolVersion &pat : globals)
1770 ctx.arg.versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back(Elt: pat);
1771
1772 expect(expect: ";");
1773}
1774
1775// Reads a non-anonymous version definition,
1776// e.g. "VerStr { global: foo; bar; local: *; };".
1777void ScriptParser::readVersionDeclaration(StringRef verStr) {
1778 // Read a symbol list.
1779 SmallVector<SymbolVersion, 0> locals;
1780 SmallVector<SymbolVersion, 0> globals;
1781 std::tie(args&: locals, args&: globals) = readSymbols();
1782
1783 // Create a new version definition and add that to the global symbols.
1784 VersionDefinition ver;
1785 ver.name = verStr;
1786 ver.nonLocalPatterns = std::move(globals);
1787 ver.localPatterns = std::move(locals);
1788 ver.id = ctx.arg.versionDefinitions.size();
1789 ctx.arg.versionDefinitions.push_back(Elt: ver);
1790
1791 // Each version may have a parent version. For example, "Ver2"
1792 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1"
1793 // as a parent. This version hierarchy is, probably against your
1794 // instinct, purely for hint; the runtime doesn't care about it
1795 // at all. In LLD, we simply ignore it.
1796 if (next() != ";")
1797 expect(expect: ";");
1798}
1799
1800bool elf::hasWildcard(StringRef s) {
1801 return s.find_first_of(Chars: "?*[") != StringRef::npos;
1802}
1803
1804// Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };".
1805std::pair<SmallVector<SymbolVersion, 0>, SmallVector<SymbolVersion, 0>>
1806ScriptParser::readSymbols() {
1807 SmallVector<SymbolVersion, 0> locals;
1808 SmallVector<SymbolVersion, 0> globals;
1809 SmallVector<SymbolVersion, 0> *v = &globals;
1810
1811 while (auto tok = till(tok: "}")) {
1812 if (tok == "extern") {
1813 SmallVector<SymbolVersion, 0> ext = readVersionExtern();
1814 v->insert(I: v->end(), From: ext.begin(), To: ext.end());
1815 } else {
1816 if (tok == "local" && consume(tok: ":")) {
1817 v = &locals;
1818 continue;
1819 }
1820 if (tok == "global" && consume(tok: ":")) {
1821 v = &globals;
1822 continue;
1823 }
1824 v->push_back(Elt: {.name: unquote(s: tok), .isExternCpp: false, .hasWildcard: hasWildcard(s: tok)});
1825 }
1826 expect(expect: ";");
1827 }
1828 return {locals, globals};
1829}
1830
1831// Reads an "extern C++" directive, e.g.,
1832// "extern "C++" { ns::*; "f(int, double)"; };"
1833//
1834// The last semicolon is optional. E.g. this is OK:
1835// "extern "C++" { ns::*; "f(int, double)" };"
1836SmallVector<SymbolVersion, 0> ScriptParser::readVersionExtern() {
1837 StringRef tok = next();
1838 bool isCXX = tok == "\"C++\"";
1839 if (!isCXX && tok != "\"C\"")
1840 setError("Unknown language");
1841 expect(expect: "{");
1842
1843 SmallVector<SymbolVersion, 0> ret;
1844 while (auto tok = till(tok: "}")) {
1845 ret.push_back(
1846 Elt: {.name: unquote(s: tok), .isExternCpp: isCXX, .hasWildcard: !tok.str.starts_with(Prefix: "\"") && hasWildcard(s: tok)});
1847 if (consume(tok: "}"))
1848 return ret;
1849 expect(expect: ";");
1850 }
1851 return ret;
1852}
1853
1854Expr ScriptParser::readMemoryAssignment(StringRef s1, StringRef s2,
1855 StringRef s3) {
1856 if (!consume(tok: s1) && !consume(tok: s2) && !consume(tok: s3)) {
1857 setError("expected one of: " + s1 + ", " + s2 + ", or " + s3);
1858 return [] { return 0; };
1859 }
1860 expect(expect: "=");
1861 return readExpr();
1862}
1863
1864// Parse the MEMORY command as specified in:
1865// https://sourceware.org/binutils/docs/ld/MEMORY.html
1866//
1867// MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... }
1868void ScriptParser::readMemory() {
1869 expect(expect: "{");
1870 while (auto tok = till(tok: "}"))
1871 readMemoryStmt(tok);
1872}
1873
1874void ScriptParser::readMemoryStmt(StringRef tok) {
1875 if (tok == "INCLUDE") {
1876 readInclude(parse: [&] { readStmts(readStmt: [&](StringRef t) { readMemoryStmt(tok: t); }); });
1877 return;
1878 }
1879
1880 uint32_t flags = 0;
1881 uint32_t invFlags = 0;
1882 uint32_t negFlags = 0;
1883 uint32_t negInvFlags = 0;
1884 if (consume(tok: "(")) {
1885 readMemoryAttributes(flags, invFlags, negFlags, negInvFlags);
1886 expect(expect: ")");
1887 }
1888 expect(expect: ":");
1889
1890 Expr origin = readMemoryAssignment(s1: "ORIGIN", s2: "org", s3: "o");
1891 expect(expect: ",");
1892 Expr length = readMemoryAssignment(s1: "LENGTH", s2: "len", s3: "l");
1893
1894 // Add the memory region to the region map.
1895 MemoryRegion *mr = make<MemoryRegion>(args&: tok, args&: origin, args&: length, args&: flags, args&: invFlags,
1896 args&: negFlags, args&: negInvFlags);
1897 if (!ctx.script->memoryRegions.insert(KV: {tok, mr}).second)
1898 setError("region '" + tok + "' already defined");
1899}
1900
1901// This function parses the attributes used to match against section
1902// flags when placing output sections in a memory region. These flags
1903// are only used when an explicit memory region name is not used.
1904void ScriptParser::readMemoryAttributes(uint32_t &flags, uint32_t &invFlags,
1905 uint32_t &negFlags,
1906 uint32_t &negInvFlags) {
1907 bool invert = false;
1908
1909 for (char c : next().lower()) {
1910 if (c == '!') {
1911 invert = !invert;
1912 std::swap(a&: flags, b&: negFlags);
1913 std::swap(a&: invFlags, b&: negInvFlags);
1914 continue;
1915 }
1916 if (c == 'w')
1917 flags |= SHF_WRITE;
1918 else if (c == 'x')
1919 flags |= SHF_EXECINSTR;
1920 else if (c == 'a')
1921 flags |= SHF_ALLOC;
1922 else if (c == 'r')
1923 invFlags |= SHF_WRITE;
1924 else
1925 setError("invalid memory region attribute");
1926 }
1927
1928 if (invert) {
1929 std::swap(a&: flags, b&: negFlags);
1930 std::swap(a&: invFlags, b&: negInvFlags);
1931 }
1932}
1933
1934void elf::readLinkerScript(Ctx &ctx, MemoryBufferRef mb) {
1935 llvm::TimeTraceScope timeScope("Read linker script",
1936 mb.getBufferIdentifier());
1937 ScriptParser(ctx, mb).readLinkerScript();
1938}
1939
1940void elf::readVersionScript(Ctx &ctx, MemoryBufferRef mb) {
1941 llvm::TimeTraceScope timeScope("Read version script",
1942 mb.getBufferIdentifier());
1943 ScriptParser(ctx, mb).readVersionScript();
1944}
1945
1946void elf::readDynamicList(Ctx &ctx, MemoryBufferRef mb) {
1947 llvm::TimeTraceScope timeScope("Read dynamic list", mb.getBufferIdentifier());
1948 ScriptParser(ctx, mb).readDynamicList();
1949}
1950
1951void elf::readDefsym(Ctx &ctx, MemoryBufferRef mb) {
1952 ScriptParser(ctx, mb).readDefsym();
1953}
1954