1 | //===- LinkerScript.cpp ---------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the parser/evaluator of the linker script. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "LinkerScript.h" |
14 | #include "Config.h" |
15 | #include "InputFiles.h" |
16 | #include "InputSection.h" |
17 | #include "OutputSections.h" |
18 | #include "SymbolTable.h" |
19 | #include "Symbols.h" |
20 | #include "SyntheticSections.h" |
21 | #include "Target.h" |
22 | #include "Writer.h" |
23 | #include "lld/Common/CommonLinkerContext.h" |
24 | #include "lld/Common/Strings.h" |
25 | #include "llvm/ADT/STLExtras.h" |
26 | #include "llvm/ADT/StringRef.h" |
27 | #include "llvm/BinaryFormat/ELF.h" |
28 | #include "llvm/Support/Casting.h" |
29 | #include "llvm/Support/Endian.h" |
30 | #include "llvm/Support/ErrorHandling.h" |
31 | #include "llvm/Support/TimeProfiler.h" |
32 | #include <algorithm> |
33 | #include <cassert> |
34 | #include <cstddef> |
35 | #include <cstdint> |
36 | #include <limits> |
37 | #include <string> |
38 | #include <vector> |
39 | |
40 | using namespace llvm; |
41 | using namespace llvm::ELF; |
42 | using namespace llvm::object; |
43 | using namespace llvm::support::endian; |
44 | using namespace lld; |
45 | using namespace lld::elf; |
46 | |
47 | ScriptWrapper elf::script; |
48 | |
49 | static bool isSectionPrefix(StringRef prefix, StringRef name) { |
50 | return name.consume_front(Prefix: prefix) && (name.empty() || name[0] == '.'); |
51 | } |
52 | |
53 | static StringRef getOutputSectionName(const InputSectionBase *s) { |
54 | // This is for --emit-relocs and -r. If .text.foo is emitted as .text.bar, we |
55 | // want to emit .rela.text.foo as .rela.text.bar for consistency (this is not |
56 | // technically required, but not doing it is odd). This code guarantees that. |
57 | if (auto *isec = dyn_cast<InputSection>(Val: s)) { |
58 | if (InputSectionBase *rel = isec->getRelocatedSection()) { |
59 | OutputSection *out = rel->getOutputSection(); |
60 | if (!out) { |
61 | assert(config->relocatable && (rel->flags & SHF_LINK_ORDER)); |
62 | return s->name; |
63 | } |
64 | if (s->type == SHT_CREL) |
65 | return saver().save(S: ".crel" + out->name); |
66 | if (s->type == SHT_RELA) |
67 | return saver().save(S: ".rela" + out->name); |
68 | return saver().save(S: ".rel" + out->name); |
69 | } |
70 | } |
71 | |
72 | if (config->relocatable) |
73 | return s->name; |
74 | |
75 | // A BssSection created for a common symbol is identified as "COMMON" in |
76 | // linker scripts. It should go to .bss section. |
77 | if (s->name == "COMMON" ) |
78 | return ".bss" ; |
79 | |
80 | if (script->hasSectionsCommand) |
81 | return s->name; |
82 | |
83 | // When no SECTIONS is specified, emulate GNU ld's internal linker scripts |
84 | // by grouping sections with certain prefixes. |
85 | |
86 | // GNU ld places text sections with prefix ".text.hot.", ".text.unknown.", |
87 | // ".text.unlikely.", ".text.startup." or ".text.exit." before others. |
88 | // We provide an option -z keep-text-section-prefix to group such sections |
89 | // into separate output sections. This is more flexible. See also |
90 | // sortISDBySectionOrder(). |
91 | // ".text.unknown" means the hotness of the section is unknown. When |
92 | // SampleFDO is used, if a function doesn't have sample, it could be very |
93 | // cold or it could be a new function never being sampled. Those functions |
94 | // will be kept in the ".text.unknown" section. |
95 | // ".text.split." holds symbols which are split out from functions in other |
96 | // input sections. For example, with -fsplit-machine-functions, placing the |
97 | // cold parts in .text.split instead of .text.unlikely mitigates against poor |
98 | // profile inaccuracy. Techniques such as hugepage remapping can make |
99 | // conservative decisions at the section granularity. |
100 | if (isSectionPrefix(prefix: ".text" , name: s->name)) { |
101 | if (config->zKeepTextSectionPrefix) |
102 | for (StringRef v : {".text.hot" , ".text.unknown" , ".text.unlikely" , |
103 | ".text.startup" , ".text.exit" , ".text.split" }) |
104 | if (isSectionPrefix(prefix: v.substr(Start: 5), name: s->name.substr(Start: 5))) |
105 | return v; |
106 | return ".text" ; |
107 | } |
108 | |
109 | for (StringRef v : |
110 | {".data.rel.ro" , ".data" , ".rodata" , ".bss.rel.ro" , ".bss" , ".ldata" , |
111 | ".lrodata" , ".lbss" , ".gcc_except_table" , ".init_array" , ".fini_array" , |
112 | ".tbss" , ".tdata" , ".ARM.exidx" , ".ARM.extab" , ".ctors" , ".dtors" }) |
113 | if (isSectionPrefix(prefix: v, name: s->name)) |
114 | return v; |
115 | |
116 | return s->name; |
117 | } |
118 | |
119 | uint64_t ExprValue::getValue() const { |
120 | if (sec) |
121 | return alignToPowerOf2(Value: sec->getOutputSection()->addr + sec->getOffset(offset: val), |
122 | Align: alignment); |
123 | return alignToPowerOf2(Value: val, Align: alignment); |
124 | } |
125 | |
126 | uint64_t ExprValue::getSecAddr() const { |
127 | return sec ? sec->getOutputSection()->addr + sec->getOffset(offset: 0) : 0; |
128 | } |
129 | |
130 | uint64_t ExprValue::getSectionOffset() const { |
131 | return getValue() - getSecAddr(); |
132 | } |
133 | |
134 | OutputDesc *LinkerScript::createOutputSection(StringRef name, |
135 | StringRef location) { |
136 | OutputDesc *&secRef = nameToOutputSection[CachedHashStringRef(name)]; |
137 | OutputDesc *sec; |
138 | if (secRef && secRef->osec.location.empty()) { |
139 | // There was a forward reference. |
140 | sec = secRef; |
141 | } else { |
142 | sec = make<OutputDesc>(args&: name, args: SHT_PROGBITS, args: 0); |
143 | if (!secRef) |
144 | secRef = sec; |
145 | } |
146 | sec->osec.location = std::string(location); |
147 | return sec; |
148 | } |
149 | |
150 | OutputDesc *LinkerScript::getOrCreateOutputSection(StringRef name) { |
151 | OutputDesc *&cmdRef = nameToOutputSection[CachedHashStringRef(name)]; |
152 | if (!cmdRef) |
153 | cmdRef = make<OutputDesc>(args&: name, args: SHT_PROGBITS, args: 0); |
154 | return cmdRef; |
155 | } |
156 | |
157 | // Expands the memory region by the specified size. |
158 | static void expandMemoryRegion(MemoryRegion *memRegion, uint64_t size, |
159 | StringRef secName) { |
160 | memRegion->curPos += size; |
161 | } |
162 | |
163 | void LinkerScript::expandMemoryRegions(uint64_t size) { |
164 | if (state->memRegion) |
165 | expandMemoryRegion(memRegion: state->memRegion, size, secName: state->outSec->name); |
166 | // Only expand the LMARegion if it is different from memRegion. |
167 | if (state->lmaRegion && state->memRegion != state->lmaRegion) |
168 | expandMemoryRegion(memRegion: state->lmaRegion, size, secName: state->outSec->name); |
169 | } |
170 | |
171 | void LinkerScript::expandOutputSection(uint64_t size) { |
172 | state->outSec->size += size; |
173 | expandMemoryRegions(size); |
174 | } |
175 | |
176 | void LinkerScript::setDot(Expr e, const Twine &loc, bool inSec) { |
177 | uint64_t val = e().getValue(); |
178 | // If val is smaller and we are in an output section, record the error and |
179 | // report it if this is the last assignAddresses iteration. dot may be smaller |
180 | // if there is another assignAddresses iteration. |
181 | if (val < dot && inSec) { |
182 | recordError(msg: loc + ": unable to move location counter (0x" + |
183 | Twine::utohexstr(Val: dot) + ") backward to 0x" + |
184 | Twine::utohexstr(Val: val) + " for section '" + state->outSec->name + |
185 | "'" ); |
186 | } |
187 | |
188 | // Update to location counter means update to section size. |
189 | if (inSec) |
190 | expandOutputSection(size: val - dot); |
191 | |
192 | dot = val; |
193 | } |
194 | |
195 | // Used for handling linker symbol assignments, for both finalizing |
196 | // their values and doing early declarations. Returns true if symbol |
197 | // should be defined from linker script. |
198 | static bool shouldDefineSym(SymbolAssignment *cmd) { |
199 | if (cmd->name == "." ) |
200 | return false; |
201 | |
202 | return !cmd->provide || LinkerScript::shouldAddProvideSym(symName: cmd->name); |
203 | } |
204 | |
205 | // Called by processSymbolAssignments() to assign definitions to |
206 | // linker-script-defined symbols. |
207 | void LinkerScript::addSymbol(SymbolAssignment *cmd) { |
208 | if (!shouldDefineSym(cmd)) |
209 | return; |
210 | |
211 | // Define a symbol. |
212 | ExprValue value = cmd->expression(); |
213 | SectionBase *sec = value.isAbsolute() ? nullptr : value.sec; |
214 | uint8_t visibility = cmd->hidden ? STV_HIDDEN : STV_DEFAULT; |
215 | |
216 | // When this function is called, section addresses have not been |
217 | // fixed yet. So, we may or may not know the value of the RHS |
218 | // expression. |
219 | // |
220 | // For example, if an expression is `x = 42`, we know x is always 42. |
221 | // However, if an expression is `x = .`, there's no way to know its |
222 | // value at the moment. |
223 | // |
224 | // We want to set symbol values early if we can. This allows us to |
225 | // use symbols as variables in linker scripts. Doing so allows us to |
226 | // write expressions like this: `alignment = 16; . = ALIGN(., alignment)`. |
227 | uint64_t symValue = value.sec ? 0 : value.getValue(); |
228 | |
229 | Defined newSym(createInternalFile(name: cmd->location), cmd->name, STB_GLOBAL, |
230 | visibility, value.type, symValue, 0, sec); |
231 | |
232 | Symbol *sym = symtab.insert(name: cmd->name); |
233 | sym->mergeProperties(other: newSym); |
234 | newSym.overwrite(sym&: *sym); |
235 | sym->isUsedInRegularObj = true; |
236 | cmd->sym = cast<Defined>(Val: sym); |
237 | } |
238 | |
239 | // This function is called from LinkerScript::declareSymbols. |
240 | // It creates a placeholder symbol if needed. |
241 | static void declareSymbol(SymbolAssignment *cmd) { |
242 | if (!shouldDefineSym(cmd)) |
243 | return; |
244 | |
245 | uint8_t visibility = cmd->hidden ? STV_HIDDEN : STV_DEFAULT; |
246 | Defined newSym(ctx.internalFile, cmd->name, STB_GLOBAL, visibility, |
247 | STT_NOTYPE, 0, 0, nullptr); |
248 | |
249 | // If the symbol is already defined, its order is 0 (with absence indicating |
250 | // 0); otherwise it's assigned the order of the SymbolAssignment. |
251 | Symbol *sym = symtab.insert(name: cmd->name); |
252 | if (!sym->isDefined()) |
253 | ctx.scriptSymOrder.insert(KV: {sym, cmd->symOrder}); |
254 | |
255 | // We can't calculate final value right now. |
256 | sym->mergeProperties(other: newSym); |
257 | newSym.overwrite(sym&: *sym); |
258 | |
259 | cmd->sym = cast<Defined>(Val: sym); |
260 | cmd->provide = false; |
261 | sym->isUsedInRegularObj = true; |
262 | sym->scriptDefined = true; |
263 | } |
264 | |
265 | using SymbolAssignmentMap = |
266 | DenseMap<const Defined *, std::pair<SectionBase *, uint64_t>>; |
267 | |
268 | // Collect section/value pairs of linker-script-defined symbols. This is used to |
269 | // check whether symbol values converge. |
270 | static SymbolAssignmentMap |
271 | getSymbolAssignmentValues(ArrayRef<SectionCommand *> sectionCommands) { |
272 | SymbolAssignmentMap ret; |
273 | for (SectionCommand *cmd : sectionCommands) { |
274 | if (auto *assign = dyn_cast<SymbolAssignment>(Val: cmd)) { |
275 | if (assign->sym) // sym is nullptr for dot. |
276 | ret.try_emplace(Key: assign->sym, Args: std::make_pair(x&: assign->sym->section, |
277 | y&: assign->sym->value)); |
278 | continue; |
279 | } |
280 | for (SectionCommand *subCmd : cast<OutputDesc>(Val: cmd)->osec.commands) |
281 | if (auto *assign = dyn_cast<SymbolAssignment>(Val: subCmd)) |
282 | if (assign->sym) |
283 | ret.try_emplace(Key: assign->sym, Args: std::make_pair(x&: assign->sym->section, |
284 | y&: assign->sym->value)); |
285 | } |
286 | return ret; |
287 | } |
288 | |
289 | // Returns the lexicographical smallest (for determinism) Defined whose |
290 | // section/value has changed. |
291 | static const Defined * |
292 | getChangedSymbolAssignment(const SymbolAssignmentMap &oldValues) { |
293 | const Defined *changed = nullptr; |
294 | for (auto &it : oldValues) { |
295 | const Defined *sym = it.first; |
296 | if (std::make_pair(x: sym->section, y: sym->value) != it.second && |
297 | (!changed || sym->getName() < changed->getName())) |
298 | changed = sym; |
299 | } |
300 | return changed; |
301 | } |
302 | |
303 | // Process INSERT [AFTER|BEFORE] commands. For each command, we move the |
304 | // specified output section to the designated place. |
305 | void LinkerScript::processInsertCommands() { |
306 | SmallVector<OutputDesc *, 0> moves; |
307 | for (const InsertCommand &cmd : insertCommands) { |
308 | if (config->enableNonContiguousRegions) |
309 | error(msg: "INSERT cannot be used with --enable-non-contiguous-regions" ); |
310 | |
311 | for (StringRef name : cmd.names) { |
312 | // If base is empty, it may have been discarded by |
313 | // adjustOutputSections(). We do not handle such output sections. |
314 | auto from = llvm::find_if(Range&: sectionCommands, P: [&](SectionCommand *subCmd) { |
315 | return isa<OutputDesc>(Val: subCmd) && |
316 | cast<OutputDesc>(Val: subCmd)->osec.name == name; |
317 | }); |
318 | if (from == sectionCommands.end()) |
319 | continue; |
320 | moves.push_back(Elt: cast<OutputDesc>(Val: *from)); |
321 | sectionCommands.erase(CI: from); |
322 | } |
323 | |
324 | auto insertPos = |
325 | llvm::find_if(Range&: sectionCommands, P: [&cmd](SectionCommand *subCmd) { |
326 | auto *to = dyn_cast<OutputDesc>(Val: subCmd); |
327 | return to != nullptr && to->osec.name == cmd.where; |
328 | }); |
329 | if (insertPos == sectionCommands.end()) { |
330 | error(msg: "unable to insert " + cmd.names[0] + |
331 | (cmd.isAfter ? " after " : " before " ) + cmd.where); |
332 | } else { |
333 | if (cmd.isAfter) |
334 | ++insertPos; |
335 | sectionCommands.insert(I: insertPos, From: moves.begin(), To: moves.end()); |
336 | } |
337 | moves.clear(); |
338 | } |
339 | } |
340 | |
341 | // Symbols defined in script should not be inlined by LTO. At the same time |
342 | // we don't know their final values until late stages of link. Here we scan |
343 | // over symbol assignment commands and create placeholder symbols if needed. |
344 | void LinkerScript::declareSymbols() { |
345 | assert(!state); |
346 | for (SectionCommand *cmd : sectionCommands) { |
347 | if (auto *assign = dyn_cast<SymbolAssignment>(Val: cmd)) { |
348 | declareSymbol(cmd: assign); |
349 | continue; |
350 | } |
351 | |
352 | // If the output section directive has constraints, |
353 | // we can't say for sure if it is going to be included or not. |
354 | // Skip such sections for now. Improve the checks if we ever |
355 | // need symbols from that sections to be declared early. |
356 | const OutputSection &sec = cast<OutputDesc>(Val: cmd)->osec; |
357 | if (sec.constraint != ConstraintKind::NoConstraint) |
358 | continue; |
359 | for (SectionCommand *cmd : sec.commands) |
360 | if (auto *assign = dyn_cast<SymbolAssignment>(Val: cmd)) |
361 | declareSymbol(cmd: assign); |
362 | } |
363 | } |
364 | |
365 | // This function is called from assignAddresses, while we are |
366 | // fixing the output section addresses. This function is supposed |
367 | // to set the final value for a given symbol assignment. |
368 | void LinkerScript::assignSymbol(SymbolAssignment *cmd, bool inSec) { |
369 | if (cmd->name == "." ) { |
370 | setDot(e: cmd->expression, loc: cmd->location, inSec); |
371 | return; |
372 | } |
373 | |
374 | if (!cmd->sym) |
375 | return; |
376 | |
377 | ExprValue v = cmd->expression(); |
378 | if (v.isAbsolute()) { |
379 | cmd->sym->section = nullptr; |
380 | cmd->sym->value = v.getValue(); |
381 | } else { |
382 | cmd->sym->section = v.sec; |
383 | cmd->sym->value = v.getSectionOffset(); |
384 | } |
385 | cmd->sym->type = v.type; |
386 | } |
387 | |
388 | static inline StringRef getFilename(const InputFile *file) { |
389 | return file ? file->getNameForScript() : StringRef(); |
390 | } |
391 | |
392 | bool InputSectionDescription::matchesFile(const InputFile *file) const { |
393 | if (filePat.isTrivialMatchAll()) |
394 | return true; |
395 | |
396 | if (!matchesFileCache || matchesFileCache->first != file) |
397 | matchesFileCache.emplace(args&: file, args: filePat.match(s: getFilename(file))); |
398 | |
399 | return matchesFileCache->second; |
400 | } |
401 | |
402 | bool SectionPattern::excludesFile(const InputFile *file) const { |
403 | if (excludedFilePat.empty()) |
404 | return false; |
405 | |
406 | if (!excludesFileCache || excludesFileCache->first != file) |
407 | excludesFileCache.emplace(args&: file, args: excludedFilePat.match(s: getFilename(file))); |
408 | |
409 | return excludesFileCache->second; |
410 | } |
411 | |
412 | bool LinkerScript::shouldKeep(InputSectionBase *s) { |
413 | for (InputSectionDescription *id : keptSections) |
414 | if (id->matchesFile(file: s->file)) |
415 | for (SectionPattern &p : id->sectionPatterns) |
416 | if (p.sectionPat.match(s: s->name) && |
417 | (s->flags & id->withFlags) == id->withFlags && |
418 | (s->flags & id->withoutFlags) == 0) |
419 | return true; |
420 | return false; |
421 | } |
422 | |
423 | // A helper function for the SORT() command. |
424 | static bool matchConstraints(ArrayRef<InputSectionBase *> sections, |
425 | ConstraintKind kind) { |
426 | if (kind == ConstraintKind::NoConstraint) |
427 | return true; |
428 | |
429 | bool isRW = llvm::any_of( |
430 | Range&: sections, P: [](InputSectionBase *sec) { return sec->flags & SHF_WRITE; }); |
431 | |
432 | return (isRW && kind == ConstraintKind::ReadWrite) || |
433 | (!isRW && kind == ConstraintKind::ReadOnly); |
434 | } |
435 | |
436 | static void sortSections(MutableArrayRef<InputSectionBase *> vec, |
437 | SortSectionPolicy k) { |
438 | auto alignmentComparator = [](InputSectionBase *a, InputSectionBase *b) { |
439 | // ">" is not a mistake. Sections with larger alignments are placed |
440 | // before sections with smaller alignments in order to reduce the |
441 | // amount of padding necessary. This is compatible with GNU. |
442 | return a->addralign > b->addralign; |
443 | }; |
444 | auto nameComparator = [](InputSectionBase *a, InputSectionBase *b) { |
445 | return a->name < b->name; |
446 | }; |
447 | auto priorityComparator = [](InputSectionBase *a, InputSectionBase *b) { |
448 | return getPriority(s: a->name) < getPriority(s: b->name); |
449 | }; |
450 | |
451 | switch (k) { |
452 | case SortSectionPolicy::Default: |
453 | case SortSectionPolicy::None: |
454 | return; |
455 | case SortSectionPolicy::Alignment: |
456 | return llvm::stable_sort(Range&: vec, C: alignmentComparator); |
457 | case SortSectionPolicy::Name: |
458 | return llvm::stable_sort(Range&: vec, C: nameComparator); |
459 | case SortSectionPolicy::Priority: |
460 | return llvm::stable_sort(Range&: vec, C: priorityComparator); |
461 | case SortSectionPolicy::Reverse: |
462 | return std::reverse(first: vec.begin(), last: vec.end()); |
463 | } |
464 | } |
465 | |
466 | // Sort sections as instructed by SORT-family commands and --sort-section |
467 | // option. Because SORT-family commands can be nested at most two depth |
468 | // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command |
469 | // line option is respected even if a SORT command is given, the exact |
470 | // behavior we have here is a bit complicated. Here are the rules. |
471 | // |
472 | // 1. If two SORT commands are given, --sort-section is ignored. |
473 | // 2. If one SORT command is given, and if it is not SORT_NONE, |
474 | // --sort-section is handled as an inner SORT command. |
475 | // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. |
476 | // 4. If no SORT command is given, sort according to --sort-section. |
477 | static void sortInputSections(MutableArrayRef<InputSectionBase *> vec, |
478 | SortSectionPolicy outer, |
479 | SortSectionPolicy inner) { |
480 | if (outer == SortSectionPolicy::None) |
481 | return; |
482 | |
483 | if (inner == SortSectionPolicy::Default) |
484 | sortSections(vec, k: config->sortSection); |
485 | else |
486 | sortSections(vec, k: inner); |
487 | sortSections(vec, k: outer); |
488 | } |
489 | |
490 | // Compute and remember which sections the InputSectionDescription matches. |
491 | SmallVector<InputSectionBase *, 0> |
492 | LinkerScript::computeInputSections(const InputSectionDescription *cmd, |
493 | ArrayRef<InputSectionBase *> sections, |
494 | const OutputSection &outCmd) { |
495 | SmallVector<InputSectionBase *, 0> ret; |
496 | SmallVector<size_t, 0> indexes; |
497 | DenseSet<size_t> seen; |
498 | DenseSet<InputSectionBase *> spills; |
499 | auto sortByPositionThenCommandLine = [&](size_t begin, size_t end) { |
500 | llvm::sort(C: MutableArrayRef<size_t>(indexes).slice(N: begin, M: end - begin)); |
501 | for (size_t i = begin; i != end; ++i) |
502 | ret[i] = sections[indexes[i]]; |
503 | sortInputSections( |
504 | vec: MutableArrayRef<InputSectionBase *>(ret).slice(N: begin, M: end - begin), |
505 | outer: config->sortSection, inner: SortSectionPolicy::None); |
506 | }; |
507 | |
508 | // Collects all sections that satisfy constraints of Cmd. |
509 | size_t sizeAfterPrevSort = 0; |
510 | for (const SectionPattern &pat : cmd->sectionPatterns) { |
511 | size_t sizeBeforeCurrPat = ret.size(); |
512 | |
513 | for (size_t i = 0, e = sections.size(); i != e; ++i) { |
514 | // Skip if the section is dead or has been matched by a previous pattern |
515 | // in this input section description. |
516 | InputSectionBase *sec = sections[i]; |
517 | if (!sec->isLive() || seen.contains(V: i)) |
518 | continue; |
519 | |
520 | // For --emit-relocs we have to ignore entries like |
521 | // .rela.dyn : { *(.rela.data) } |
522 | // which are common because they are in the default bfd script. |
523 | // We do not ignore SHT_REL[A] linker-synthesized sections here because |
524 | // want to support scripts that do custom layout for them. |
525 | if (isa<InputSection>(Val: sec) && |
526 | cast<InputSection>(Val: sec)->getRelocatedSection()) |
527 | continue; |
528 | |
529 | // Check the name early to improve performance in the common case. |
530 | if (!pat.sectionPat.match(s: sec->name)) |
531 | continue; |
532 | |
533 | if (!cmd->matchesFile(file: sec->file) || pat.excludesFile(file: sec->file) || |
534 | (sec->flags & cmd->withFlags) != cmd->withFlags || |
535 | (sec->flags & cmd->withoutFlags) != 0) |
536 | continue; |
537 | |
538 | if (sec->parent) { |
539 | // Skip if not allowing multiple matches. |
540 | if (!config->enableNonContiguousRegions) |
541 | continue; |
542 | |
543 | // Disallow spilling into /DISCARD/; special handling would be needed |
544 | // for this in address assignment, and the semantics are nebulous. |
545 | if (outCmd.name == "/DISCARD/" ) |
546 | continue; |
547 | |
548 | // Skip if the section's first match was /DISCARD/; such sections are |
549 | // always discarded. |
550 | if (sec->parent->name == "/DISCARD/" ) |
551 | continue; |
552 | |
553 | // Skip if the section was already matched by a different input section |
554 | // description within this output section. |
555 | if (sec->parent == &outCmd) |
556 | continue; |
557 | |
558 | spills.insert(V: sec); |
559 | } |
560 | |
561 | ret.push_back(Elt: sec); |
562 | indexes.push_back(Elt: i); |
563 | seen.insert(V: i); |
564 | } |
565 | |
566 | if (pat.sortOuter == SortSectionPolicy::Default) |
567 | continue; |
568 | |
569 | // Matched sections are ordered by radix sort with the keys being (SORT*, |
570 | // --sort-section, input order), where SORT* (if present) is most |
571 | // significant. |
572 | // |
573 | // Matched sections between the previous SORT* and this SORT* are sorted by |
574 | // (--sort-alignment, input order). |
575 | sortByPositionThenCommandLine(sizeAfterPrevSort, sizeBeforeCurrPat); |
576 | // Matched sections by this SORT* pattern are sorted using all 3 keys. |
577 | // ret[sizeBeforeCurrPat,ret.size()) are already in the input order, so we |
578 | // just sort by sortOuter and sortInner. |
579 | sortInputSections( |
580 | vec: MutableArrayRef<InputSectionBase *>(ret).slice(N: sizeBeforeCurrPat), |
581 | outer: pat.sortOuter, inner: pat.sortInner); |
582 | sizeAfterPrevSort = ret.size(); |
583 | } |
584 | // Matched sections after the last SORT* are sorted by (--sort-alignment, |
585 | // input order). |
586 | sortByPositionThenCommandLine(sizeAfterPrevSort, ret.size()); |
587 | |
588 | // The flag --enable-non-contiguous-regions may cause sections to match an |
589 | // InputSectionDescription in more than one OutputSection. Matches after the |
590 | // first were collected in the spills set, so replace these with potential |
591 | // spill sections. |
592 | if (!spills.empty()) { |
593 | for (InputSectionBase *&sec : ret) { |
594 | if (!spills.contains(V: sec)) |
595 | continue; |
596 | |
597 | // Append the spill input section to the list for the input section, |
598 | // creating it if necessary. |
599 | PotentialSpillSection *pss = make<PotentialSpillSection>( |
600 | args&: *sec, args&: const_cast<InputSectionDescription &>(*cmd)); |
601 | auto [it, inserted] = |
602 | potentialSpillLists.try_emplace(Key: sec, Args: PotentialSpillList{.head: pss, .tail: pss}); |
603 | if (!inserted) { |
604 | PotentialSpillSection *&tail = it->second.tail; |
605 | tail = tail->next = pss; |
606 | } |
607 | sec = pss; |
608 | } |
609 | } |
610 | |
611 | return ret; |
612 | } |
613 | |
614 | void LinkerScript::discard(InputSectionBase &s) { |
615 | if (&s == in.shStrTab.get()) |
616 | error(msg: "discarding " + s.name + " section is not allowed" ); |
617 | |
618 | s.markDead(); |
619 | s.parent = nullptr; |
620 | for (InputSection *sec : s.dependentSections) |
621 | discard(s&: *sec); |
622 | } |
623 | |
624 | void LinkerScript::discardSynthetic(OutputSection &outCmd) { |
625 | for (Partition &part : partitions) { |
626 | if (!part.armExidx || !part.armExidx->isLive()) |
627 | continue; |
628 | SmallVector<InputSectionBase *, 0> secs( |
629 | part.armExidx->exidxSections.begin(), |
630 | part.armExidx->exidxSections.end()); |
631 | for (SectionCommand *cmd : outCmd.commands) |
632 | if (auto *isd = dyn_cast<InputSectionDescription>(Val: cmd)) |
633 | for (InputSectionBase *s : computeInputSections(cmd: isd, sections: secs, outCmd)) |
634 | discard(s&: *s); |
635 | } |
636 | } |
637 | |
638 | SmallVector<InputSectionBase *, 0> |
639 | LinkerScript::createInputSectionList(OutputSection &outCmd) { |
640 | SmallVector<InputSectionBase *, 0> ret; |
641 | |
642 | for (SectionCommand *cmd : outCmd.commands) { |
643 | if (auto *isd = dyn_cast<InputSectionDescription>(Val: cmd)) { |
644 | isd->sectionBases = computeInputSections(cmd: isd, sections: ctx.inputSections, outCmd); |
645 | for (InputSectionBase *s : isd->sectionBases) |
646 | s->parent = &outCmd; |
647 | ret.insert(I: ret.end(), From: isd->sectionBases.begin(), To: isd->sectionBases.end()); |
648 | } |
649 | } |
650 | return ret; |
651 | } |
652 | |
653 | // Create output sections described by SECTIONS commands. |
654 | void LinkerScript::processSectionCommands() { |
655 | auto process = [this](OutputSection *osec) { |
656 | SmallVector<InputSectionBase *, 0> v = createInputSectionList(outCmd&: *osec); |
657 | |
658 | // The output section name `/DISCARD/' is special. |
659 | // Any input section assigned to it is discarded. |
660 | if (osec->name == "/DISCARD/" ) { |
661 | for (InputSectionBase *s : v) |
662 | discard(s&: *s); |
663 | discardSynthetic(outCmd&: *osec); |
664 | osec->commands.clear(); |
665 | return false; |
666 | } |
667 | |
668 | // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive |
669 | // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input |
670 | // sections satisfy a given constraint. If not, a directive is handled |
671 | // as if it wasn't present from the beginning. |
672 | // |
673 | // Because we'll iterate over SectionCommands many more times, the easy |
674 | // way to "make it as if it wasn't present" is to make it empty. |
675 | if (!matchConstraints(sections: v, kind: osec->constraint)) { |
676 | for (InputSectionBase *s : v) |
677 | s->parent = nullptr; |
678 | osec->commands.clear(); |
679 | return false; |
680 | } |
681 | |
682 | // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign |
683 | // is given, input sections are aligned to that value, whether the |
684 | // given value is larger or smaller than the original section alignment. |
685 | if (osec->subalignExpr) { |
686 | uint32_t subalign = osec->subalignExpr().getValue(); |
687 | for (InputSectionBase *s : v) |
688 | s->addralign = subalign; |
689 | } |
690 | |
691 | // Set the partition field the same way OutputSection::recordSection() |
692 | // does. Partitions cannot be used with the SECTIONS command, so this is |
693 | // always 1. |
694 | osec->partition = 1; |
695 | return true; |
696 | }; |
697 | |
698 | // Process OVERWRITE_SECTIONS first so that it can overwrite the main script |
699 | // or orphans. |
700 | if (config->enableNonContiguousRegions && !overwriteSections.empty()) |
701 | error(msg: "OVERWRITE_SECTIONS cannot be used with " |
702 | "--enable-non-contiguous-regions" ); |
703 | DenseMap<CachedHashStringRef, OutputDesc *> map; |
704 | size_t i = 0; |
705 | for (OutputDesc *osd : overwriteSections) { |
706 | OutputSection *osec = &osd->osec; |
707 | if (process(osec) && |
708 | !map.try_emplace(Key: CachedHashStringRef(osec->name), Args&: osd).second) |
709 | warn(msg: "OVERWRITE_SECTIONS specifies duplicate " + osec->name); |
710 | } |
711 | for (SectionCommand *&base : sectionCommands) |
712 | if (auto *osd = dyn_cast<OutputDesc>(Val: base)) { |
713 | OutputSection *osec = &osd->osec; |
714 | if (OutputDesc *overwrite = map.lookup(Val: CachedHashStringRef(osec->name))) { |
715 | log(msg: overwrite->osec.location + " overwrites " + osec->name); |
716 | overwrite->osec.sectionIndex = i++; |
717 | base = overwrite; |
718 | } else if (process(osec)) { |
719 | osec->sectionIndex = i++; |
720 | } |
721 | } |
722 | |
723 | // If an OVERWRITE_SECTIONS specified output section is not in |
724 | // sectionCommands, append it to the end. The section will be inserted by |
725 | // orphan placement. |
726 | for (OutputDesc *osd : overwriteSections) |
727 | if (osd->osec.partition == 1 && osd->osec.sectionIndex == UINT32_MAX) |
728 | sectionCommands.push_back(Elt: osd); |
729 | } |
730 | |
731 | void LinkerScript::processSymbolAssignments() { |
732 | // Dot outside an output section still represents a relative address, whose |
733 | // sh_shndx should not be SHN_UNDEF or SHN_ABS. Create a dummy aether section |
734 | // that fills the void outside a section. It has an index of one, which is |
735 | // indistinguishable from any other regular section index. |
736 | aether = make<OutputSection>(args: "" , args: 0, args: SHF_ALLOC); |
737 | aether->sectionIndex = 1; |
738 | |
739 | // `st` captures the local AddressState and makes it accessible deliberately. |
740 | // This is needed as there are some cases where we cannot just thread the |
741 | // current state through to a lambda function created by the script parser. |
742 | AddressState st; |
743 | state = &st; |
744 | st.outSec = aether; |
745 | |
746 | for (SectionCommand *cmd : sectionCommands) { |
747 | if (auto *assign = dyn_cast<SymbolAssignment>(Val: cmd)) |
748 | addSymbol(cmd: assign); |
749 | else |
750 | for (SectionCommand *subCmd : cast<OutputDesc>(Val: cmd)->osec.commands) |
751 | if (auto *assign = dyn_cast<SymbolAssignment>(Val: subCmd)) |
752 | addSymbol(cmd: assign); |
753 | } |
754 | |
755 | state = nullptr; |
756 | } |
757 | |
758 | static OutputSection *findByName(ArrayRef<SectionCommand *> vec, |
759 | StringRef name) { |
760 | for (SectionCommand *cmd : vec) |
761 | if (auto *osd = dyn_cast<OutputDesc>(Val: cmd)) |
762 | if (osd->osec.name == name) |
763 | return &osd->osec; |
764 | return nullptr; |
765 | } |
766 | |
767 | static OutputDesc *createSection(InputSectionBase *isec, StringRef outsecName) { |
768 | OutputDesc *osd = script->createOutputSection(name: outsecName, location: "<internal>" ); |
769 | osd->osec.recordSection(isec); |
770 | return osd; |
771 | } |
772 | |
773 | static OutputDesc *addInputSec(StringMap<TinyPtrVector<OutputSection *>> &map, |
774 | InputSectionBase *isec, StringRef outsecName) { |
775 | // Sections with SHT_GROUP or SHF_GROUP attributes reach here only when the -r |
776 | // option is given. A section with SHT_GROUP defines a "section group", and |
777 | // its members have SHF_GROUP attribute. Usually these flags have already been |
778 | // stripped by InputFiles.cpp as section groups are processed and uniquified. |
779 | // However, for the -r option, we want to pass through all section groups |
780 | // as-is because adding/removing members or merging them with other groups |
781 | // change their semantics. |
782 | if (isec->type == SHT_GROUP || (isec->flags & SHF_GROUP)) |
783 | return createSection(isec, outsecName); |
784 | |
785 | // Imagine .zed : { *(.foo) *(.bar) } script. Both foo and bar may have |
786 | // relocation sections .rela.foo and .rela.bar for example. Most tools do |
787 | // not allow multiple REL[A] sections for output section. Hence we |
788 | // should combine these relocation sections into single output. |
789 | // We skip synthetic sections because it can be .rela.dyn/.rela.plt or any |
790 | // other REL[A] sections created by linker itself. |
791 | if (!isa<SyntheticSection>(Val: isec) && isStaticRelSecType(type: isec->type)) { |
792 | auto *sec = cast<InputSection>(Val: isec); |
793 | OutputSection *out = sec->getRelocatedSection()->getOutputSection(); |
794 | |
795 | if (auto *relSec = out->relocationSection) { |
796 | relSec->recordSection(isec: sec); |
797 | return nullptr; |
798 | } |
799 | |
800 | OutputDesc *osd = createSection(isec, outsecName); |
801 | out->relocationSection = &osd->osec; |
802 | return osd; |
803 | } |
804 | |
805 | // The ELF spec just says |
806 | // ---------------------------------------------------------------- |
807 | // In the first phase, input sections that match in name, type and |
808 | // attribute flags should be concatenated into single sections. |
809 | // ---------------------------------------------------------------- |
810 | // |
811 | // However, it is clear that at least some flags have to be ignored for |
812 | // section merging. At the very least SHF_GROUP and SHF_COMPRESSED have to be |
813 | // ignored. We should not have two output .text sections just because one was |
814 | // in a group and another was not for example. |
815 | // |
816 | // It also seems that wording was a late addition and didn't get the |
817 | // necessary scrutiny. |
818 | // |
819 | // Merging sections with different flags is expected by some users. One |
820 | // reason is that if one file has |
821 | // |
822 | // int *const bar __attribute__((section(".foo"))) = (int *)0; |
823 | // |
824 | // gcc with -fPIC will produce a read only .foo section. But if another |
825 | // file has |
826 | // |
827 | // int zed; |
828 | // int *const bar __attribute__((section(".foo"))) = (int *)&zed; |
829 | // |
830 | // gcc with -fPIC will produce a read write section. |
831 | // |
832 | // Last but not least, when using linker script the merge rules are forced by |
833 | // the script. Unfortunately, linker scripts are name based. This means that |
834 | // expressions like *(.foo*) can refer to multiple input sections with |
835 | // different flags. We cannot put them in different output sections or we |
836 | // would produce wrong results for |
837 | // |
838 | // start = .; *(.foo.*) end = .; *(.bar) |
839 | // |
840 | // and a mapping of .foo1 and .bar1 to one section and .foo2 and .bar2 to |
841 | // another. The problem is that there is no way to layout those output |
842 | // sections such that the .foo sections are the only thing between the start |
843 | // and end symbols. |
844 | // |
845 | // Given the above issues, we instead merge sections by name and error on |
846 | // incompatible types and flags. |
847 | TinyPtrVector<OutputSection *> &v = map[outsecName]; |
848 | for (OutputSection *sec : v) { |
849 | if (sec->partition != isec->partition) |
850 | continue; |
851 | |
852 | if (config->relocatable && (isec->flags & SHF_LINK_ORDER)) { |
853 | // Merging two SHF_LINK_ORDER sections with different sh_link fields will |
854 | // change their semantics, so we only merge them in -r links if they will |
855 | // end up being linked to the same output section. The casts are fine |
856 | // because everything in the map was created by the orphan placement code. |
857 | auto *firstIsec = cast<InputSectionBase>( |
858 | Val: cast<InputSectionDescription>(Val: sec->commands[0])->sectionBases[0]); |
859 | OutputSection *firstIsecOut = |
860 | (firstIsec->flags & SHF_LINK_ORDER) |
861 | ? firstIsec->getLinkOrderDep()->getOutputSection() |
862 | : nullptr; |
863 | if (firstIsecOut != isec->getLinkOrderDep()->getOutputSection()) |
864 | continue; |
865 | } |
866 | |
867 | sec->recordSection(isec); |
868 | return nullptr; |
869 | } |
870 | |
871 | OutputDesc *osd = createSection(isec, outsecName); |
872 | v.push_back(NewVal: &osd->osec); |
873 | return osd; |
874 | } |
875 | |
876 | // Add sections that didn't match any sections command. |
877 | void LinkerScript::addOrphanSections() { |
878 | StringMap<TinyPtrVector<OutputSection *>> map; |
879 | SmallVector<OutputDesc *, 0> v; |
880 | |
881 | auto add = [&](InputSectionBase *s) { |
882 | if (s->isLive() && !s->parent) { |
883 | orphanSections.push_back(Elt: s); |
884 | |
885 | StringRef name = getOutputSectionName(s); |
886 | if (config->unique) { |
887 | v.push_back(Elt: createSection(isec: s, outsecName: name)); |
888 | } else if (OutputSection *sec = findByName(vec: sectionCommands, name)) { |
889 | sec->recordSection(isec: s); |
890 | } else { |
891 | if (OutputDesc *osd = addInputSec(map, isec: s, outsecName: name)) |
892 | v.push_back(Elt: osd); |
893 | assert(isa<MergeInputSection>(s) || |
894 | s->getOutputSection()->sectionIndex == UINT32_MAX); |
895 | } |
896 | } |
897 | }; |
898 | |
899 | // For further --emit-reloc handling code we need target output section |
900 | // to be created before we create relocation output section, so we want |
901 | // to create target sections first. We do not want priority handling |
902 | // for synthetic sections because them are special. |
903 | size_t n = 0; |
904 | for (InputSectionBase *isec : ctx.inputSections) { |
905 | // Process InputSection and MergeInputSection. |
906 | if (LLVM_LIKELY(isa<InputSection>(isec))) |
907 | ctx.inputSections[n++] = isec; |
908 | |
909 | // In -r links, SHF_LINK_ORDER sections are added while adding their parent |
910 | // sections because we need to know the parent's output section before we |
911 | // can select an output section for the SHF_LINK_ORDER section. |
912 | if (config->relocatable && (isec->flags & SHF_LINK_ORDER)) |
913 | continue; |
914 | |
915 | if (auto *sec = dyn_cast<InputSection>(Val: isec)) |
916 | if (InputSectionBase *rel = sec->getRelocatedSection()) |
917 | if (auto *relIS = dyn_cast_or_null<InputSectionBase>(Val: rel->parent)) |
918 | add(relIS); |
919 | add(isec); |
920 | if (config->relocatable) |
921 | for (InputSectionBase *depSec : isec->dependentSections) |
922 | if (depSec->flags & SHF_LINK_ORDER) |
923 | add(depSec); |
924 | } |
925 | // Keep just InputSection. |
926 | ctx.inputSections.resize(N: n); |
927 | |
928 | // If no SECTIONS command was given, we should insert sections commands |
929 | // before others, so that we can handle scripts which refers them, |
930 | // for example: "foo = ABSOLUTE(ADDR(.text)));". |
931 | // When SECTIONS command is present we just add all orphans to the end. |
932 | if (hasSectionsCommand) |
933 | sectionCommands.insert(I: sectionCommands.end(), From: v.begin(), To: v.end()); |
934 | else |
935 | sectionCommands.insert(I: sectionCommands.begin(), From: v.begin(), To: v.end()); |
936 | } |
937 | |
938 | void LinkerScript::diagnoseOrphanHandling() const { |
939 | llvm::TimeTraceScope timeScope("Diagnose orphan sections" ); |
940 | if (config->orphanHandling == OrphanHandlingPolicy::Place || |
941 | !hasSectionsCommand) |
942 | return; |
943 | for (const InputSectionBase *sec : orphanSections) { |
944 | // .relro_padding is inserted before DATA_SEGMENT_RELRO_END, if present, |
945 | // automatically. The section is not supposed to be specified by scripts. |
946 | if (sec == in.relroPadding.get()) |
947 | continue; |
948 | // Input SHT_REL[A] retained by --emit-relocs are ignored by |
949 | // computeInputSections(). Don't warn/error. |
950 | if (isa<InputSection>(Val: sec) && |
951 | cast<InputSection>(Val: sec)->getRelocatedSection()) |
952 | continue; |
953 | |
954 | StringRef name = getOutputSectionName(s: sec); |
955 | if (config->orphanHandling == OrphanHandlingPolicy::Error) |
956 | error(msg: toString(sec) + " is being placed in '" + name + "'" ); |
957 | else |
958 | warn(msg: toString(sec) + " is being placed in '" + name + "'" ); |
959 | } |
960 | } |
961 | |
962 | void LinkerScript::diagnoseMissingSGSectionAddress() const { |
963 | if (!config->cmseImplib || !in.armCmseSGSection->isNeeded()) |
964 | return; |
965 | |
966 | OutputSection *sec = findByName(vec: sectionCommands, name: ".gnu.sgstubs" ); |
967 | if (sec && !sec->addrExpr && !config->sectionStartMap.count(Key: ".gnu.sgstubs" )) |
968 | error(msg: "no address assigned to the veneers output section " + sec->name); |
969 | } |
970 | |
971 | // This function searches for a memory region to place the given output |
972 | // section in. If found, a pointer to the appropriate memory region is |
973 | // returned in the first member of the pair. Otherwise, a nullptr is returned. |
974 | // The second member of the pair is a hint that should be passed to the |
975 | // subsequent call of this method. |
976 | std::pair<MemoryRegion *, MemoryRegion *> |
977 | LinkerScript::findMemoryRegion(OutputSection *sec, MemoryRegion *hint) { |
978 | // Non-allocatable sections are not part of the process image. |
979 | if (!(sec->flags & SHF_ALLOC)) { |
980 | bool hasInputOrByteCommand = |
981 | sec->hasInputSections || |
982 | llvm::any_of(Range&: sec->commands, P: [](SectionCommand *comm) { |
983 | return ByteCommand::classof(c: comm); |
984 | }); |
985 | if (!sec->memoryRegionName.empty() && hasInputOrByteCommand) |
986 | warn(msg: "ignoring memory region assignment for non-allocatable section '" + |
987 | sec->name + "'" ); |
988 | return {nullptr, nullptr}; |
989 | } |
990 | |
991 | // If a memory region name was specified in the output section command, |
992 | // then try to find that region first. |
993 | if (!sec->memoryRegionName.empty()) { |
994 | if (MemoryRegion *m = memoryRegions.lookup(Key: sec->memoryRegionName)) |
995 | return {m, m}; |
996 | error(msg: "memory region '" + sec->memoryRegionName + "' not declared" ); |
997 | return {nullptr, nullptr}; |
998 | } |
999 | |
1000 | // If at least one memory region is defined, all sections must |
1001 | // belong to some memory region. Otherwise, we don't need to do |
1002 | // anything for memory regions. |
1003 | if (memoryRegions.empty()) |
1004 | return {nullptr, nullptr}; |
1005 | |
1006 | // An orphan section should continue the previous memory region. |
1007 | if (sec->sectionIndex == UINT32_MAX && hint) |
1008 | return {hint, hint}; |
1009 | |
1010 | // See if a region can be found by matching section flags. |
1011 | for (auto &pair : memoryRegions) { |
1012 | MemoryRegion *m = pair.second; |
1013 | if (m->compatibleWith(secFlags: sec->flags)) |
1014 | return {m, nullptr}; |
1015 | } |
1016 | |
1017 | // Otherwise, no suitable region was found. |
1018 | error(msg: "no memory region specified for section '" + sec->name + "'" ); |
1019 | return {nullptr, nullptr}; |
1020 | } |
1021 | |
1022 | static OutputSection *findFirstSection(PhdrEntry *load) { |
1023 | for (OutputSection *sec : outputSections) |
1024 | if (sec->ptLoad == load) |
1025 | return sec; |
1026 | return nullptr; |
1027 | } |
1028 | |
1029 | // Assign addresses to an output section and offsets to its input sections and |
1030 | // symbol assignments. Return true if the output section's address has changed. |
1031 | bool LinkerScript::assignOffsets(OutputSection *sec) { |
1032 | const bool isTbss = (sec->flags & SHF_TLS) && sec->type == SHT_NOBITS; |
1033 | const bool sameMemRegion = state->memRegion == sec->memRegion; |
1034 | const bool prevLMARegionIsDefault = state->lmaRegion == nullptr; |
1035 | const uint64_t savedDot = dot; |
1036 | bool addressChanged = false; |
1037 | state->memRegion = sec->memRegion; |
1038 | state->lmaRegion = sec->lmaRegion; |
1039 | |
1040 | if (!(sec->flags & SHF_ALLOC)) { |
1041 | // Non-SHF_ALLOC sections have zero addresses. |
1042 | dot = 0; |
1043 | } else if (isTbss) { |
1044 | // Allow consecutive SHF_TLS SHT_NOBITS output sections. The address range |
1045 | // starts from the end address of the previous tbss section. |
1046 | if (state->tbssAddr == 0) |
1047 | state->tbssAddr = dot; |
1048 | else |
1049 | dot = state->tbssAddr; |
1050 | } else { |
1051 | if (state->memRegion) |
1052 | dot = state->memRegion->curPos; |
1053 | if (sec->addrExpr) |
1054 | setDot(e: sec->addrExpr, loc: sec->location, inSec: false); |
1055 | |
1056 | // If the address of the section has been moved forward by an explicit |
1057 | // expression so that it now starts past the current curPos of the enclosing |
1058 | // region, we need to expand the current region to account for the space |
1059 | // between the previous section, if any, and the start of this section. |
1060 | if (state->memRegion && state->memRegion->curPos < dot) |
1061 | expandMemoryRegion(memRegion: state->memRegion, size: dot - state->memRegion->curPos, |
1062 | secName: sec->name); |
1063 | } |
1064 | |
1065 | state->outSec = sec; |
1066 | if (!(sec->addrExpr && script->hasSectionsCommand)) { |
1067 | // ALIGN is respected. sec->alignment is the max of ALIGN and the maximum of |
1068 | // input section alignments. |
1069 | const uint64_t pos = dot; |
1070 | dot = alignToPowerOf2(Value: dot, Align: sec->addralign); |
1071 | expandMemoryRegions(size: dot - pos); |
1072 | } |
1073 | addressChanged = sec->addr != dot; |
1074 | sec->addr = dot; |
1075 | |
1076 | // state->lmaOffset is LMA minus VMA. If LMA is explicitly specified via AT() |
1077 | // or AT>, recompute state->lmaOffset; otherwise, if both previous/current LMA |
1078 | // region is the default, and the two sections are in the same memory region, |
1079 | // reuse previous lmaOffset; otherwise, reset lmaOffset to 0. This emulates |
1080 | // heuristics described in |
1081 | // https://sourceware.org/binutils/docs/ld/Output-Section-LMA.html |
1082 | if (sec->lmaExpr) { |
1083 | state->lmaOffset = sec->lmaExpr().getValue() - dot; |
1084 | } else if (MemoryRegion *mr = sec->lmaRegion) { |
1085 | uint64_t lmaStart = alignToPowerOf2(Value: mr->curPos, Align: sec->addralign); |
1086 | if (mr->curPos < lmaStart) |
1087 | expandMemoryRegion(memRegion: mr, size: lmaStart - mr->curPos, secName: sec->name); |
1088 | state->lmaOffset = lmaStart - dot; |
1089 | } else if (!sameMemRegion || !prevLMARegionIsDefault) { |
1090 | state->lmaOffset = 0; |
1091 | } |
1092 | |
1093 | // Propagate state->lmaOffset to the first "non-header" section. |
1094 | if (PhdrEntry *l = sec->ptLoad) |
1095 | if (sec == findFirstSection(load: l)) |
1096 | l->lmaOffset = state->lmaOffset; |
1097 | |
1098 | // We can call this method multiple times during the creation of |
1099 | // thunks and want to start over calculation each time. |
1100 | sec->size = 0; |
1101 | |
1102 | // We visited SectionsCommands from processSectionCommands to |
1103 | // layout sections. Now, we visit SectionsCommands again to fix |
1104 | // section offsets. |
1105 | for (SectionCommand *cmd : sec->commands) { |
1106 | // This handles the assignments to symbol or to the dot. |
1107 | if (auto *assign = dyn_cast<SymbolAssignment>(Val: cmd)) { |
1108 | assign->addr = dot; |
1109 | assignSymbol(cmd: assign, inSec: true); |
1110 | assign->size = dot - assign->addr; |
1111 | continue; |
1112 | } |
1113 | |
1114 | // Handle BYTE(), SHORT(), LONG(), or QUAD(). |
1115 | if (auto *data = dyn_cast<ByteCommand>(Val: cmd)) { |
1116 | data->offset = dot - sec->addr; |
1117 | dot += data->size; |
1118 | expandOutputSection(size: data->size); |
1119 | continue; |
1120 | } |
1121 | |
1122 | // Handle a single input section description command. |
1123 | // It calculates and assigns the offsets for each section and also |
1124 | // updates the output section size. |
1125 | |
1126 | auto §ions = cast<InputSectionDescription>(Val: cmd)->sections; |
1127 | for (InputSection *isec : sections) { |
1128 | assert(isec->getParent() == sec); |
1129 | if (isa<PotentialSpillSection>(Val: isec)) |
1130 | continue; |
1131 | const uint64_t pos = dot; |
1132 | dot = alignToPowerOf2(Value: dot, Align: isec->addralign); |
1133 | isec->outSecOff = dot - sec->addr; |
1134 | dot += isec->getSize(); |
1135 | |
1136 | // Update output section size after adding each section. This is so that |
1137 | // SIZEOF works correctly in the case below: |
1138 | // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } |
1139 | expandOutputSection(size: dot - pos); |
1140 | } |
1141 | } |
1142 | |
1143 | // If .relro_padding is present, round up the end to a common-page-size |
1144 | // boundary to protect the last page. |
1145 | if (in.relroPadding && sec == in.relroPadding->getParent()) |
1146 | expandOutputSection(size: alignToPowerOf2(Value: dot, Align: config->commonPageSize) - dot); |
1147 | |
1148 | // Non-SHF_ALLOC sections do not affect the addresses of other OutputSections |
1149 | // as they are not part of the process image. |
1150 | if (!(sec->flags & SHF_ALLOC)) { |
1151 | dot = savedDot; |
1152 | } else if (isTbss) { |
1153 | // NOBITS TLS sections are similar. Additionally save the end address. |
1154 | state->tbssAddr = dot; |
1155 | dot = savedDot; |
1156 | } |
1157 | return addressChanged; |
1158 | } |
1159 | |
1160 | static bool isDiscardable(const OutputSection &sec) { |
1161 | if (sec.name == "/DISCARD/" ) |
1162 | return true; |
1163 | |
1164 | // We do not want to remove OutputSections with expressions that reference |
1165 | // symbols even if the OutputSection is empty. We want to ensure that the |
1166 | // expressions can be evaluated and report an error if they cannot. |
1167 | if (sec.expressionsUseSymbols) |
1168 | return false; |
1169 | |
1170 | // OutputSections may be referenced by name in ADDR and LOADADDR expressions, |
1171 | // as an empty Section can has a valid VMA and LMA we keep the OutputSection |
1172 | // to maintain the integrity of the other Expression. |
1173 | if (sec.usedInExpression) |
1174 | return false; |
1175 | |
1176 | for (SectionCommand *cmd : sec.commands) { |
1177 | if (auto assign = dyn_cast<SymbolAssignment>(Val: cmd)) |
1178 | // Don't create empty output sections just for unreferenced PROVIDE |
1179 | // symbols. |
1180 | if (assign->name != "." && !assign->sym) |
1181 | continue; |
1182 | |
1183 | if (!isa<InputSectionDescription>(Val: *cmd)) |
1184 | return false; |
1185 | } |
1186 | return true; |
1187 | } |
1188 | |
1189 | static void maybePropagatePhdrs(OutputSection &sec, |
1190 | SmallVector<StringRef, 0> &phdrs) { |
1191 | if (sec.phdrs.empty()) { |
1192 | // To match the bfd linker script behaviour, only propagate program |
1193 | // headers to sections that are allocated. |
1194 | if (sec.flags & SHF_ALLOC) |
1195 | sec.phdrs = phdrs; |
1196 | } else { |
1197 | phdrs = sec.phdrs; |
1198 | } |
1199 | } |
1200 | |
1201 | void LinkerScript::adjustOutputSections() { |
1202 | // If the output section contains only symbol assignments, create a |
1203 | // corresponding output section. The issue is what to do with linker script |
1204 | // like ".foo : { symbol = 42; }". One option would be to convert it to |
1205 | // "symbol = 42;". That is, move the symbol out of the empty section |
1206 | // description. That seems to be what bfd does for this simple case. The |
1207 | // problem is that this is not completely general. bfd will give up and |
1208 | // create a dummy section too if there is a ". = . + 1" inside the section |
1209 | // for example. |
1210 | // Given that we want to create the section, we have to worry what impact |
1211 | // it will have on the link. For example, if we just create a section with |
1212 | // 0 for flags, it would change which PT_LOADs are created. |
1213 | // We could remember that particular section is dummy and ignore it in |
1214 | // other parts of the linker, but unfortunately there are quite a few places |
1215 | // that would need to change: |
1216 | // * The program header creation. |
1217 | // * The orphan section placement. |
1218 | // * The address assignment. |
1219 | // The other option is to pick flags that minimize the impact the section |
1220 | // will have on the rest of the linker. That is why we copy the flags from |
1221 | // the previous sections. We copy just SHF_ALLOC and SHF_WRITE to keep the |
1222 | // impact low. We do not propagate SHF_EXECINSTR as in some cases this can |
1223 | // lead to executable writeable section. |
1224 | uint64_t flags = SHF_ALLOC; |
1225 | |
1226 | SmallVector<StringRef, 0> defPhdrs; |
1227 | bool seenRelro = false; |
1228 | for (SectionCommand *&cmd : sectionCommands) { |
1229 | if (!isa<OutputDesc>(Val: cmd)) |
1230 | continue; |
1231 | auto *sec = &cast<OutputDesc>(Val: cmd)->osec; |
1232 | |
1233 | // Handle align (e.g. ".foo : ALIGN(16) { ... }"). |
1234 | if (sec->alignExpr) |
1235 | sec->addralign = |
1236 | std::max<uint32_t>(a: sec->addralign, b: sec->alignExpr().getValue()); |
1237 | |
1238 | bool isEmpty = (getFirstInputSection(os: sec) == nullptr); |
1239 | bool discardable = isEmpty && isDiscardable(sec: *sec); |
1240 | // If sec has at least one input section and not discarded, remember its |
1241 | // flags to be inherited by subsequent output sections. (sec may contain |
1242 | // just one empty synthetic section.) |
1243 | if (sec->hasInputSections && !discardable) |
1244 | flags = sec->flags; |
1245 | |
1246 | // We do not want to keep any special flags for output section |
1247 | // in case it is empty. |
1248 | if (isEmpty) { |
1249 | sec->flags = |
1250 | flags & ((sec->nonAlloc ? 0 : (uint64_t)SHF_ALLOC) | SHF_WRITE); |
1251 | sec->sortRank = getSectionRank(osec&: *sec); |
1252 | } |
1253 | |
1254 | // The code below may remove empty output sections. We should save the |
1255 | // specified program headers (if exist) and propagate them to subsequent |
1256 | // sections which do not specify program headers. |
1257 | // An example of such a linker script is: |
1258 | // SECTIONS { .empty : { *(.empty) } :rw |
1259 | // .foo : { *(.foo) } } |
1260 | // Note: at this point the order of output sections has not been finalized, |
1261 | // because orphans have not been inserted into their expected positions. We |
1262 | // will handle them in adjustSectionsAfterSorting(). |
1263 | if (sec->sectionIndex != UINT32_MAX) |
1264 | maybePropagatePhdrs(sec&: *sec, phdrs&: defPhdrs); |
1265 | |
1266 | // Discard .relro_padding if we have not seen one RELRO section. Note: when |
1267 | // .tbss is the only RELRO section, there is no associated PT_LOAD segment |
1268 | // (needsPtLoad), so we don't append .relro_padding in the case. |
1269 | if (in.relroPadding && in.relroPadding->getParent() == sec && !seenRelro) |
1270 | discardable = true; |
1271 | if (discardable) { |
1272 | sec->markDead(); |
1273 | cmd = nullptr; |
1274 | } else { |
1275 | seenRelro |= |
1276 | sec->relro && !(sec->type == SHT_NOBITS && (sec->flags & SHF_TLS)); |
1277 | } |
1278 | } |
1279 | |
1280 | // It is common practice to use very generic linker scripts. So for any |
1281 | // given run some of the output sections in the script will be empty. |
1282 | // We could create corresponding empty output sections, but that would |
1283 | // clutter the output. |
1284 | // We instead remove trivially empty sections. The bfd linker seems even |
1285 | // more aggressive at removing them. |
1286 | llvm::erase_if(C&: sectionCommands, P: [&](SectionCommand *cmd) { return !cmd; }); |
1287 | } |
1288 | |
1289 | void LinkerScript::adjustSectionsAfterSorting() { |
1290 | // Try and find an appropriate memory region to assign offsets in. |
1291 | MemoryRegion *hint = nullptr; |
1292 | for (SectionCommand *cmd : sectionCommands) { |
1293 | if (auto *osd = dyn_cast<OutputDesc>(Val: cmd)) { |
1294 | OutputSection *sec = &osd->osec; |
1295 | if (!sec->lmaRegionName.empty()) { |
1296 | if (MemoryRegion *m = memoryRegions.lookup(Key: sec->lmaRegionName)) |
1297 | sec->lmaRegion = m; |
1298 | else |
1299 | error(msg: "memory region '" + sec->lmaRegionName + "' not declared" ); |
1300 | } |
1301 | std::tie(args&: sec->memRegion, args&: hint) = findMemoryRegion(sec, hint); |
1302 | } |
1303 | } |
1304 | |
1305 | // If output section command doesn't specify any segments, |
1306 | // and we haven't previously assigned any section to segment, |
1307 | // then we simply assign section to the very first load segment. |
1308 | // Below is an example of such linker script: |
1309 | // PHDRS { seg PT_LOAD; } |
1310 | // SECTIONS { .aaa : { *(.aaa) } } |
1311 | SmallVector<StringRef, 0> defPhdrs; |
1312 | auto firstPtLoad = llvm::find_if(Range&: phdrsCommands, P: [](const PhdrsCommand &cmd) { |
1313 | return cmd.type == PT_LOAD; |
1314 | }); |
1315 | if (firstPtLoad != phdrsCommands.end()) |
1316 | defPhdrs.push_back(Elt: firstPtLoad->name); |
1317 | |
1318 | // Walk the commands and propagate the program headers to commands that don't |
1319 | // explicitly specify them. |
1320 | for (SectionCommand *cmd : sectionCommands) |
1321 | if (auto *osd = dyn_cast<OutputDesc>(Val: cmd)) |
1322 | maybePropagatePhdrs(sec&: osd->osec, phdrs&: defPhdrs); |
1323 | } |
1324 | |
1325 | static uint64_t computeBase(uint64_t min, bool ) { |
1326 | // If there is no SECTIONS or if the linkerscript is explicit about program |
1327 | // headers, do our best to allocate them. |
1328 | if (!script->hasSectionsCommand || allocateHeaders) |
1329 | return 0; |
1330 | // Otherwise only allocate program headers if that would not add a page. |
1331 | return alignDown(Value: min, Align: config->maxPageSize); |
1332 | } |
1333 | |
1334 | // When the SECTIONS command is used, try to find an address for the file and |
1335 | // program headers output sections, which can be added to the first PT_LOAD |
1336 | // segment when program headers are created. |
1337 | // |
1338 | // We check if the headers fit below the first allocated section. If there isn't |
1339 | // enough space for these sections, we'll remove them from the PT_LOAD segment, |
1340 | // and we'll also remove the PT_PHDR segment. |
1341 | void LinkerScript::(SmallVector<PhdrEntry *, 0> &phdrs) { |
1342 | uint64_t min = std::numeric_limits<uint64_t>::max(); |
1343 | for (OutputSection *sec : outputSections) |
1344 | if (sec->flags & SHF_ALLOC) |
1345 | min = std::min<uint64_t>(a: min, b: sec->addr); |
1346 | |
1347 | auto it = llvm::find_if( |
1348 | Range&: phdrs, P: [](const PhdrEntry *e) { return e->p_type == PT_LOAD; }); |
1349 | if (it == phdrs.end()) |
1350 | return; |
1351 | PhdrEntry *firstPTLoad = *it; |
1352 | |
1353 | bool = |
1354 | llvm::any_of(Range&: phdrsCommands, P: [](const PhdrsCommand &cmd) { |
1355 | return cmd.hasPhdrs || cmd.hasFilehdr; |
1356 | }); |
1357 | bool paged = !config->omagic && !config->nmagic; |
1358 | uint64_t = getHeaderSize(); |
1359 | if ((paged || hasExplicitHeaders) && |
1360 | headerSize <= min - computeBase(min, allocateHeaders: hasExplicitHeaders)) { |
1361 | min = alignDown(Value: min - headerSize, Align: config->maxPageSize); |
1362 | Out::elfHeader->addr = min; |
1363 | Out::programHeaders->addr = min + Out::elfHeader->size; |
1364 | return; |
1365 | } |
1366 | |
1367 | // Error if we were explicitly asked to allocate headers. |
1368 | if (hasExplicitHeaders) |
1369 | error(msg: "could not allocate headers" ); |
1370 | |
1371 | Out::elfHeader->ptLoad = nullptr; |
1372 | Out::programHeaders->ptLoad = nullptr; |
1373 | firstPTLoad->firstSec = findFirstSection(load: firstPTLoad); |
1374 | |
1375 | llvm::erase_if(C&: phdrs, |
1376 | P: [](const PhdrEntry *e) { return e->p_type == PT_PHDR; }); |
1377 | } |
1378 | |
1379 | LinkerScript::AddressState::AddressState() { |
1380 | for (auto &mri : script->memoryRegions) { |
1381 | MemoryRegion *mr = mri.second; |
1382 | mr->curPos = (mr->origin)().getValue(); |
1383 | } |
1384 | } |
1385 | |
1386 | // Here we assign addresses as instructed by linker script SECTIONS |
1387 | // sub-commands. Doing that allows us to use final VA values, so here |
1388 | // we also handle rest commands like symbol assignments and ASSERTs. |
1389 | // Return an output section that has changed its address or null, and a symbol |
1390 | // that has changed its section or value (or nullptr if no symbol has changed). |
1391 | std::pair<const OutputSection *, const Defined *> |
1392 | LinkerScript::assignAddresses() { |
1393 | if (script->hasSectionsCommand) { |
1394 | // With a linker script, assignment of addresses to headers is covered by |
1395 | // allocateHeaders(). |
1396 | dot = config->imageBase.value_or(u: 0); |
1397 | } else { |
1398 | // Assign addresses to headers right now. |
1399 | dot = target->getImageBase(); |
1400 | Out::elfHeader->addr = dot; |
1401 | Out::programHeaders->addr = dot + Out::elfHeader->size; |
1402 | dot += getHeaderSize(); |
1403 | } |
1404 | |
1405 | OutputSection *changedOsec = nullptr; |
1406 | AddressState st; |
1407 | state = &st; |
1408 | errorOnMissingSection = true; |
1409 | st.outSec = aether; |
1410 | recordedErrors.clear(); |
1411 | |
1412 | SymbolAssignmentMap oldValues = getSymbolAssignmentValues(sectionCommands); |
1413 | for (SectionCommand *cmd : sectionCommands) { |
1414 | if (auto *assign = dyn_cast<SymbolAssignment>(Val: cmd)) { |
1415 | assign->addr = dot; |
1416 | assignSymbol(cmd: assign, inSec: false); |
1417 | assign->size = dot - assign->addr; |
1418 | continue; |
1419 | } |
1420 | if (assignOffsets(sec: &cast<OutputDesc>(Val: cmd)->osec) && !changedOsec) |
1421 | changedOsec = &cast<OutputDesc>(Val: cmd)->osec; |
1422 | } |
1423 | |
1424 | state = nullptr; |
1425 | return {changedOsec, getChangedSymbolAssignment(oldValues)}; |
1426 | } |
1427 | |
1428 | static bool hasRegionOverflowed(MemoryRegion *mr) { |
1429 | if (!mr) |
1430 | return false; |
1431 | return mr->curPos - mr->getOrigin() > mr->getLength(); |
1432 | } |
1433 | |
1434 | // Spill input sections in reverse order of address assignment to (potentially) |
1435 | // bring memory regions out of overflow. The size savings of a spill can only be |
1436 | // estimated, since general linker script arithmetic may occur afterwards. |
1437 | // Under-estimates may cause unnecessary spills, but over-estimates can always |
1438 | // be corrected on the next pass. |
1439 | bool LinkerScript::spillSections() { |
1440 | if (!config->enableNonContiguousRegions) |
1441 | return false; |
1442 | |
1443 | bool spilled = false; |
1444 | for (SectionCommand *cmd : reverse(C&: sectionCommands)) { |
1445 | auto *od = dyn_cast<OutputDesc>(Val: cmd); |
1446 | if (!od) |
1447 | continue; |
1448 | OutputSection *osec = &od->osec; |
1449 | if (!osec->memRegion) |
1450 | continue; |
1451 | |
1452 | // Input sections that have replaced a potential spill and should be removed |
1453 | // from their input section description. |
1454 | DenseSet<InputSection *> spilledInputSections; |
1455 | |
1456 | for (SectionCommand *cmd : reverse(C&: osec->commands)) { |
1457 | if (!hasRegionOverflowed(mr: osec->memRegion) && |
1458 | !hasRegionOverflowed(mr: osec->lmaRegion)) |
1459 | break; |
1460 | |
1461 | auto *isd = dyn_cast<InputSectionDescription>(Val: cmd); |
1462 | if (!isd) |
1463 | continue; |
1464 | for (InputSection *isec : reverse(C&: isd->sections)) { |
1465 | // Potential spill locations cannot be spilled. |
1466 | if (isa<PotentialSpillSection>(Val: isec)) |
1467 | continue; |
1468 | |
1469 | // Find the next potential spill location and remove it from the list. |
1470 | auto it = potentialSpillLists.find(Val: isec); |
1471 | if (it == potentialSpillLists.end()) |
1472 | continue; |
1473 | PotentialSpillList &list = it->second; |
1474 | PotentialSpillSection *spill = list.head; |
1475 | if (spill->next) |
1476 | list.head = spill->next; |
1477 | else |
1478 | potentialSpillLists.erase(Val: isec); |
1479 | |
1480 | // Replace the next spill location with the spilled section and adjust |
1481 | // its properties to match the new location. Note that the alignment of |
1482 | // the spill section may have diverged from the original due to e.g. a |
1483 | // SUBALIGN. Correct assignment requires the spill's alignment to be |
1484 | // used, not the original. |
1485 | spilledInputSections.insert(V: isec); |
1486 | *llvm::find(Range&: spill->isd->sections, Val: spill) = isec; |
1487 | isec->parent = spill->parent; |
1488 | isec->addralign = spill->addralign; |
1489 | |
1490 | // Record the (potential) reduction in the region's end position. |
1491 | osec->memRegion->curPos -= isec->getSize(); |
1492 | if (osec->lmaRegion) |
1493 | osec->lmaRegion->curPos -= isec->getSize(); |
1494 | |
1495 | // Spilling continues until the end position no longer overflows the |
1496 | // region. Then, another round of address assignment will either confirm |
1497 | // the spill's success or lead to yet more spilling. |
1498 | if (!hasRegionOverflowed(mr: osec->memRegion) && |
1499 | !hasRegionOverflowed(mr: osec->lmaRegion)) |
1500 | break; |
1501 | } |
1502 | |
1503 | // Remove any spilled input sections to complete their move. |
1504 | if (!spilledInputSections.empty()) { |
1505 | spilled = true; |
1506 | llvm::erase_if(C&: isd->sections, P: [&](InputSection *isec) { |
1507 | return spilledInputSections.contains(V: isec); |
1508 | }); |
1509 | } |
1510 | } |
1511 | } |
1512 | |
1513 | return spilled; |
1514 | } |
1515 | |
1516 | // Erase any potential spill sections that were not used. |
1517 | void LinkerScript::erasePotentialSpillSections() { |
1518 | if (potentialSpillLists.empty()) |
1519 | return; |
1520 | |
1521 | // Collect the set of input section descriptions that contain potential |
1522 | // spills. |
1523 | DenseSet<InputSectionDescription *> isds; |
1524 | for (const auto &[_, list] : potentialSpillLists) |
1525 | for (PotentialSpillSection *s = list.head; s; s = s->next) |
1526 | isds.insert(V: s->isd); |
1527 | |
1528 | for (InputSectionDescription *isd : isds) |
1529 | llvm::erase_if(C&: isd->sections, P: [](InputSection *s) { |
1530 | return isa<PotentialSpillSection>(Val: s); |
1531 | }); |
1532 | |
1533 | potentialSpillLists.clear(); |
1534 | } |
1535 | |
1536 | // Creates program headers as instructed by PHDRS linker script command. |
1537 | SmallVector<PhdrEntry *, 0> LinkerScript::createPhdrs() { |
1538 | SmallVector<PhdrEntry *, 0> ret; |
1539 | |
1540 | // Process PHDRS and FILEHDR keywords because they are not |
1541 | // real output sections and cannot be added in the following loop. |
1542 | for (const PhdrsCommand &cmd : phdrsCommands) { |
1543 | PhdrEntry *phdr = make<PhdrEntry>(args: cmd.type, args: cmd.flags.value_or(u: PF_R)); |
1544 | |
1545 | if (cmd.hasFilehdr) |
1546 | phdr->add(sec: Out::elfHeader); |
1547 | if (cmd.hasPhdrs) |
1548 | phdr->add(sec: Out::programHeaders); |
1549 | |
1550 | if (cmd.lmaExpr) { |
1551 | phdr->p_paddr = cmd.lmaExpr().getValue(); |
1552 | phdr->hasLMA = true; |
1553 | } |
1554 | ret.push_back(Elt: phdr); |
1555 | } |
1556 | |
1557 | // Add output sections to program headers. |
1558 | for (OutputSection *sec : outputSections) { |
1559 | // Assign headers specified by linker script |
1560 | for (size_t id : getPhdrIndices(sec)) { |
1561 | ret[id]->add(sec); |
1562 | if (!phdrsCommands[id].flags) |
1563 | ret[id]->p_flags |= sec->getPhdrFlags(); |
1564 | } |
1565 | } |
1566 | return ret; |
1567 | } |
1568 | |
1569 | // Returns true if we should emit an .interp section. |
1570 | // |
1571 | // We usually do. But if PHDRS commands are given, and |
1572 | // no PT_INTERP is there, there's no place to emit an |
1573 | // .interp, so we don't do that in that case. |
1574 | bool LinkerScript::needsInterpSection() { |
1575 | if (phdrsCommands.empty()) |
1576 | return true; |
1577 | for (PhdrsCommand &cmd : phdrsCommands) |
1578 | if (cmd.type == PT_INTERP) |
1579 | return true; |
1580 | return false; |
1581 | } |
1582 | |
1583 | ExprValue LinkerScript::getSymbolValue(StringRef name, const Twine &loc) { |
1584 | if (name == "." ) { |
1585 | if (state) |
1586 | return {state->outSec, false, dot - state->outSec->addr, loc}; |
1587 | error(msg: loc + ": unable to get location counter value" ); |
1588 | return 0; |
1589 | } |
1590 | |
1591 | if (Symbol *sym = symtab.find(name)) { |
1592 | if (auto *ds = dyn_cast<Defined>(Val: sym)) { |
1593 | ExprValue v{ds->section, false, ds->value, loc}; |
1594 | // Retain the original st_type, so that the alias will get the same |
1595 | // behavior in relocation processing. Any operation will reset st_type to |
1596 | // STT_NOTYPE. |
1597 | v.type = ds->type; |
1598 | return v; |
1599 | } |
1600 | if (isa<SharedSymbol>(Val: sym)) |
1601 | if (!errorOnMissingSection) |
1602 | return {nullptr, false, 0, loc}; |
1603 | } |
1604 | |
1605 | error(msg: loc + ": symbol not found: " + name); |
1606 | return 0; |
1607 | } |
1608 | |
1609 | // Returns the index of the segment named Name. |
1610 | static std::optional<size_t> getPhdrIndex(ArrayRef<PhdrsCommand> vec, |
1611 | StringRef name) { |
1612 | for (size_t i = 0; i < vec.size(); ++i) |
1613 | if (vec[i].name == name) |
1614 | return i; |
1615 | return std::nullopt; |
1616 | } |
1617 | |
1618 | // Returns indices of ELF headers containing specific section. Each index is a |
1619 | // zero based number of ELF header listed within PHDRS {} script block. |
1620 | SmallVector<size_t, 0> LinkerScript::getPhdrIndices(OutputSection *cmd) { |
1621 | SmallVector<size_t, 0> ret; |
1622 | |
1623 | for (StringRef s : cmd->phdrs) { |
1624 | if (std::optional<size_t> idx = getPhdrIndex(vec: phdrsCommands, name: s)) |
1625 | ret.push_back(Elt: *idx); |
1626 | else if (s != "NONE" ) |
1627 | error(msg: cmd->location + ": program header '" + s + |
1628 | "' is not listed in PHDRS" ); |
1629 | } |
1630 | return ret; |
1631 | } |
1632 | |
1633 | void LinkerScript::printMemoryUsage(raw_ostream& os) { |
1634 | auto printSize = [&](uint64_t size) { |
1635 | if ((size & 0x3fffffff) == 0) |
1636 | os << format_decimal(N: size >> 30, Width: 10) << " GB" ; |
1637 | else if ((size & 0xfffff) == 0) |
1638 | os << format_decimal(N: size >> 20, Width: 10) << " MB" ; |
1639 | else if ((size & 0x3ff) == 0) |
1640 | os << format_decimal(N: size >> 10, Width: 10) << " KB" ; |
1641 | else |
1642 | os << " " << format_decimal(N: size, Width: 10) << " B" ; |
1643 | }; |
1644 | os << "Memory region Used Size Region Size %age Used\n" ; |
1645 | for (auto &pair : memoryRegions) { |
1646 | MemoryRegion *m = pair.second; |
1647 | uint64_t usedLength = m->curPos - m->getOrigin(); |
1648 | os << right_justify(Str: m->name, Width: 16) << ": " ; |
1649 | printSize(usedLength); |
1650 | uint64_t length = m->getLength(); |
1651 | if (length != 0) { |
1652 | printSize(length); |
1653 | double percent = usedLength * 100.0 / length; |
1654 | os << " " << format(Fmt: "%6.2f%%" , Vals: percent); |
1655 | } |
1656 | os << '\n'; |
1657 | } |
1658 | } |
1659 | |
1660 | void LinkerScript::recordError(const Twine &msg) { |
1661 | auto &str = recordedErrors.emplace_back(); |
1662 | msg.toVector(Out&: str); |
1663 | } |
1664 | |
1665 | static void checkMemoryRegion(const MemoryRegion *region, |
1666 | const OutputSection *osec, uint64_t addr) { |
1667 | uint64_t osecEnd = addr + osec->size; |
1668 | uint64_t regionEnd = region->getOrigin() + region->getLength(); |
1669 | if (osecEnd > regionEnd) { |
1670 | error(msg: "section '" + osec->name + "' will not fit in region '" + |
1671 | region->name + "': overflowed by " + Twine(osecEnd - regionEnd) + |
1672 | " bytes" ); |
1673 | } |
1674 | } |
1675 | |
1676 | void LinkerScript::checkFinalScriptConditions() const { |
1677 | for (StringRef err : recordedErrors) |
1678 | errorOrWarn(msg: err); |
1679 | for (const OutputSection *sec : outputSections) { |
1680 | if (const MemoryRegion *memoryRegion = sec->memRegion) |
1681 | checkMemoryRegion(region: memoryRegion, osec: sec, addr: sec->addr); |
1682 | if (const MemoryRegion *lmaRegion = sec->lmaRegion) |
1683 | checkMemoryRegion(region: lmaRegion, osec: sec, addr: sec->getLMA()); |
1684 | } |
1685 | } |
1686 | |
1687 | void LinkerScript::addScriptReferencedSymbolsToSymTable() { |
1688 | // Some symbols (such as __ehdr_start) are defined lazily only when there |
1689 | // are undefined symbols for them, so we add these to trigger that logic. |
1690 | auto reference = [](StringRef name) { |
1691 | Symbol *sym = symtab.addUnusedUndefined(name); |
1692 | sym->isUsedInRegularObj = true; |
1693 | sym->referenced = true; |
1694 | }; |
1695 | for (StringRef name : referencedSymbols) |
1696 | reference(name); |
1697 | |
1698 | // Keeps track of references from which PROVIDE symbols have been added to the |
1699 | // symbol table. |
1700 | DenseSet<StringRef> added; |
1701 | SmallVector<const SmallVector<StringRef, 0> *, 0> symRefsVec; |
1702 | for (const auto &[name, symRefs] : provideMap) |
1703 | if (LinkerScript::shouldAddProvideSym(symName: name) && added.insert(V: name).second) |
1704 | symRefsVec.push_back(Elt: &symRefs); |
1705 | while (symRefsVec.size()) { |
1706 | for (StringRef name : *symRefsVec.pop_back_val()) { |
1707 | reference(name); |
1708 | // Prevent the symbol from being discarded by --gc-sections. |
1709 | script->referencedSymbols.push_back(Elt: name); |
1710 | auto it = script->provideMap.find(Key: name); |
1711 | if (it != script->provideMap.end() && |
1712 | LinkerScript::shouldAddProvideSym(symName: name) && |
1713 | added.insert(V: name).second) { |
1714 | symRefsVec.push_back(Elt: &it->second); |
1715 | } |
1716 | } |
1717 | } |
1718 | } |
1719 | |
1720 | bool LinkerScript::shouldAddProvideSym(StringRef symName) { |
1721 | Symbol *sym = symtab.find(name: symName); |
1722 | return sym && !sym->isDefined() && !sym->isCommon(); |
1723 | } |
1724 | |