InputFiles.cpp source code [llvm_projects/lld/ELF/InputFiles.cpp]

1	//===- InputFiles.cpp -----------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "InputFiles.h"
10	#include "Config.h"
11	#include "DWARF.h"
12	#include "Driver.h"
13	#include "InputSection.h"
14	#include "LinkerScript.h"
15	#include "SymbolTable.h"
16	#include "Symbols.h"
17	#include "SyntheticSections.h"
18	#include "Target.h"
19	#include "lld/Common/DWARF.h"
20	#include "llvm/ADT/CachedHashString.h"
21	#include "llvm/ADT/STLExtras.h"
22	#include "llvm/LTO/LTO.h"
23	#include "llvm/Object/IRObjectFile.h"
24	#include "llvm/Support/ARMAttributeParser.h"
25	#include "llvm/Support/ARMBuildAttributes.h"
26	#include "llvm/Support/Endian.h"
27	#include "llvm/Support/FileSystem.h"
28	#include "llvm/Support/Path.h"
29	#include "llvm/Support/TimeProfiler.h"
30	#include "llvm/Support/raw_ostream.h"
31	#include <optional>
32
33	using namespace llvm;
34	using namespace llvm::ELF;
35	using namespace llvm::object;
36	using namespace llvm::sys;
37	using namespace llvm::sys::fs;
38	using namespace llvm::support::endian;
39	using namespace lld;
40	using namespace lld::elf;
41
42	// This function is explicitly instantiated in ARM.cpp, don't do it here to
43	// avoid warnings with MSVC.
44	extern template void ObjFile<ELF32LE>::importCmseSymbols();
45	extern template void ObjFile<ELF32BE>::importCmseSymbols();
46	extern template void ObjFile<ELF64LE>::importCmseSymbols();
47	extern template void ObjFile<ELF64BE>::importCmseSymbols();
48
49	// Returns "<internal>", "foo.a(bar.o)" or "baz.o".
50	std::string elf::toStr(Ctx &ctx, const InputFile *f) {
51	static std::mutex mu;
52	if (!f)
53	return "<internal>";
54
55	{
56	std::lock_guard<std::mutex> lock(mu);
57	if (f->toStringCache.empty()) {
58	if (f->archiveName.empty())
59	f->toStringCache = f->getName();
60	else
61	(f->archiveName + "(" + f->getName() + ")").toVector(Out&: f->toStringCache);
62	}
63	}
64	return std::string(f->toStringCache);
65	}
66
67	const ELFSyncStream &elf::operator<<(const ELFSyncStream &s,
68	const InputFile *f) {
69	return s << toStr(ctx&: s.ctx, f);
70	}
71
72	static ELFKind getELFKind(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName) {
73	unsigned char size;
74	unsigned char endian;
75	std::tie(args&: size, args&: endian) = getElfArchType(Object: mb.getBuffer());
76
77	auto report = [&](StringRef msg) {
78	StringRef filename = mb.getBufferIdentifier();
79	if (archiveName.empty())
80	Fatal(ctx) << filename << ": " << msg;
81	else
82	Fatal(ctx) << archiveName << "(" << filename << "): " << msg;
83	};
84
85	if (!mb.getBuffer().starts_with(Prefix: ElfMagic))
86	report ("not an ELF file");
87	if (endian != ELFDATA2LSB && endian != ELFDATA2MSB)
88	report ("corrupted ELF file: invalid data encoding");
89	if (size != ELFCLASS32 && size != ELFCLASS64)
90	report ("corrupted ELF file: invalid file class");
91
92	size_t bufSize = mb.getBuffer().size();
93	if ((size == ELFCLASS32 && bufSize < sizeof(Elf32_Ehdr)) \|\|
94	(size == ELFCLASS64 && bufSize < sizeof(Elf64_Ehdr)))
95	report ("corrupted ELF file: file is too short");
96
97	if (size == ELFCLASS32)
98	return (endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind;
99	return (endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind;
100	}
101
102	// For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD
103	// flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how
104	// the input objects have been compiled.
105	static void updateARMVFPArgs(Ctx &ctx, const ARMAttributeParser &attributes,
106	const InputFile *f) {
107	std::optional<unsigned> attr =
108	attributes.getAttributeValue(tag: ARMBuildAttrs::ABI_VFP_args);
109	if (!attr)
110	// If an ABI tag isn't present then it is implicitly given the value of 0
111	// which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files,
112	// including some in glibc that don't use FP args (and should have value 3)
113	// don't have the attribute so we do not consider an implicit value of 0
114	// as a clash.
115	return;
116
117	unsigned vfpArgs = *attr;
118	ARMVFPArgKind arg;
119	switch (vfpArgs) {
120	case ARMBuildAttrs::BaseAAPCS:
121	arg = ARMVFPArgKind::Base;
122	break;
123	case ARMBuildAttrs::HardFPAAPCS:
124	arg = ARMVFPArgKind::VFP;
125	break;
126	case ARMBuildAttrs::ToolChainFPPCS:
127	// Tool chain specific convention that conforms to neither AAPCS variant.
128	arg = ARMVFPArgKind::ToolChain;
129	break;
130	case ARMBuildAttrs::CompatibleFPAAPCS:
131	// Object compatible with all conventions.
132	return;
133	default:
134	ErrAlways(ctx) << f << ": unknown Tag_ABI_VFP_args value: " << vfpArgs;
135	return;
136	}
137	// Follow ld.bfd and error if there is a mix of calling conventions.
138	if (ctx.arg.armVFPArgs != arg && ctx.arg.armVFPArgs != ARMVFPArgKind::Default)
139	ErrAlways(ctx) << f << ": incompatible Tag_ABI_VFP_args";
140	else
141	ctx.arg.armVFPArgs = arg;
142	}
143
144	// The ARM support in lld makes some use of instructions that are not available
145	// on all ARM architectures. Namely:
146	// - Use of BLX instruction for interworking between ARM and Thumb state.
147	// - Use of the extended Thumb branch encoding in relocation.
148	// - Use of the MOVT/MOVW instructions in Thumb Thunks.
149	// The ARM Attributes section contains information about the architecture chosen
150	// at compile time. We follow the convention that if at least one input object
151	// is compiled with an architecture that supports these features then lld is
152	// permitted to use them.
153	static void updateSupportedARMFeatures(Ctx &ctx,
154	const ARMAttributeParser &attributes) {
155	std::optional<unsigned> attr =
156	attributes.getAttributeValue(tag: ARMBuildAttrs::CPU_arch);
157	if (!attr)
158	return;
159	auto arch = *attr;
160	switch (arch) {
161	case ARMBuildAttrs::Pre_v4:
162	case ARMBuildAttrs::v4:
163	case ARMBuildAttrs::v4T:
164	// Architectures prior to v5 do not support BLX instruction
165	break;
166	case ARMBuildAttrs::v5T:
167	case ARMBuildAttrs::v5TE:
168	case ARMBuildAttrs::v5TEJ:
169	case ARMBuildAttrs::v6:
170	case ARMBuildAttrs::v6KZ:
171	case ARMBuildAttrs::v6K:
172	ctx.arg.armHasBlx = true;
173	// Architectures used in pre-Cortex processors do not support
174	// The J1 = 1 J2 = 1 Thumb branch range extension, with the exception
175	// of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do.
176	break;
177	default:
178	// All other Architectures have BLX and extended branch encoding
179	ctx.arg.armHasBlx = true;
180	ctx.arg.armJ1J2BranchEncoding = true;
181	if (arch != ARMBuildAttrs::v6_M && arch != ARMBuildAttrs::v6S_M)
182	// All Architectures used in Cortex processors with the exception
183	// of v6-M and v6S-M have the MOVT and MOVW instructions.
184	ctx.arg.armHasMovtMovw = true;
185	break;
186	}
187
188	// Only ARMv8-M or later architectures have CMSE support.
189	std::optional<unsigned> profile =
190	attributes.getAttributeValue(tag: ARMBuildAttrs::CPU_arch_profile);
191	if (!profile)
192	return;
193	if (arch >= ARMBuildAttrs::CPUArch::v8_M_Base &&
194	profile == ARMBuildAttrs::MicroControllerProfile)
195	ctx.arg.armCMSESupport = true;
196
197	// The thumb PLT entries require Thumb2 which can be used on multiple archs.
198	// For now, let's limit it to ones where ARM isn't available and we know have
199	// Thumb2.
200	std::optional<unsigned> armISA =
201	attributes.getAttributeValue(tag: ARMBuildAttrs::ARM_ISA_use);
202	std::optional<unsigned> thumb =
203	attributes.getAttributeValue(tag: ARMBuildAttrs::THUMB_ISA_use);
204	ctx.arg.armHasArmISA \|= armISA && *armISA >= ARMBuildAttrs::Allowed;
205	ctx.arg.armHasThumb2ISA \|= thumb && *thumb >= ARMBuildAttrs::AllowThumb32;
206	}
207
208	InputFile::InputFile(Ctx &ctx, Kind k, MemoryBufferRef m)
209	: ctx(ctx), mb (m), groupId(ctx.driver.nextGroupId), fileKind(k) {
210	// All files within the same --{start,end}-group get the same group ID.
211	// Otherwise, a new file will get a new group ID.
212	if (!ctx.driver.isInGroup)
213	++ctx.driver.nextGroupId;
214	}
215
216	InputFile::~InputFile() {}
217
218	std::optional<MemoryBufferRef> elf::readFile(Ctx &ctx, StringRef path) {
219	llvm::TimeTraceScope timeScope("Load input files", path);
220
221	// The --chroot option changes our virtual root directory.
222	// This is useful when you are dealing with files created by --reproduce.
223	if (!ctx.arg.chroot.empty() && path.starts_with(Prefix: "/"))
224	path = ctx.saver.save(S: ctx.arg.chroot + path);
225
226	bool remapped = false;
227	auto it = ctx.arg.remapInputs.find(Val: path);
228	if (it != ctx.arg.remapInputs.end()) {
229	path = it ->second;
230	remapped = true;
231	} else {
232	for (const auto &[pat, toFile] : ctx.arg.remapInputsWildcards) {
233	if (pat.match(S: path)) {
234	path = toFile;
235	remapped = true;
236	break;
237	}
238	}
239	}
240	if (remapped) {
241	// Use /dev/null to indicate an input file that should be ignored. Change
242	// the path to NUL on Windows.
243	#ifdef _WIN32
244	if (path == "/dev/null")
245	path = "NUL";
246	#endif
247	}
248
249	Log(ctx) << path;
250	ctx.arg.dependencyFiles.insert(X: llvm::CachedHashString (path));
251
252	auto mbOrErr = MemoryBuffer::getFile(Filename: path, /IsText=/false,
253	/RequiresNullTerminator=/false);
254	if (auto ec = mbOrErr.getError()) {
255	ErrAlways(ctx) << "cannot open " << path << ": " << ec.message();
256	return std::nullopt;
257	}
258
259	MemoryBufferRef mbref = (*mbOrErr)->getMemBufferRef();
260	ctx.memoryBuffers.push_back(Elt: std::move(mbOrErr)); // take MB ownership*
261
262	if (ctx.tar)
263	ctx.tar ->append(Path: relativeToRoot(path), Data: mbref.getBuffer());
264	return mbref;
265	}
266
267	// All input object files must be for the same architecture
268	// (e.g. it does not make sense to link x86 object files with
269	// MIPS object files.) This function checks for that error.
270	static bool isCompatible(Ctx &ctx, InputFile *file) {
271	if (!file->isElf() && !isa<BitcodeFile>(Val: file))
272	return true;
273
274	if (file->ekind == ctx.arg.ekind && file->emachine == ctx.arg.emachine) {
275	if (ctx.arg.emachine != EM_MIPS)
276	return true;
277	if (isMipsN32Abi(ctx, f: *file) == ctx.arg.mipsN32Abi)
278	return true;
279	}
280
281	StringRef target =
282	!ctx.arg.bfdname.empty() ? ctx.arg.bfdname : ctx.arg.emulation;
283	if (!target.empty()) {
284	Err(ctx) << file << " is incompatible with " << target;
285	return false;
286	}
287
288	InputFile existing = nullptr*;
289	if (!ctx.objectFiles.empty())
290	existing = ctx.objectFiles [`0`];
291	else if (!ctx.sharedFiles.empty())
292	existing = ctx.sharedFiles [`0`];
293	else if (!ctx.bitcodeFiles.empty())
294	existing = ctx.bitcodeFiles [`0`];
295	auto diag = Err(ctx);
296	diag << file << " is incompatible";
297	if (existing)
298	diag << " with " << existing;
299	return false;
300	}
301
302	template <class ELFT> static void doParseFile(Ctx &ctx, InputFile *file) {
303	if (!isCompatible(ctx, file))
304	return;
305
306	// Lazy object file
307	if (file->lazy) {
308	if (auto *f = dyn_cast<BitcodeFile>(Val: file)) {
309	ctx.lazyBitcodeFiles.push_back(Elt: f);
310	f->parseLazy();
311	} else {
312	cast<ObjFile<ELFT>>(file)->parseLazy();
313	}
314	return;
315	}
316
317	if (ctx.arg.trace)
318	Msg(ctx) << file;
319
320	if (file->kind() == InputFile::ObjKind) {
321	ctx.objectFiles.push_back(Elt: cast<ELFFileBase>(Val: file));
322	cast<ObjFile<ELFT>>(file)->parse();
323	} else if (auto *f = dyn_cast<SharedFile>(Val: file)) {
324	f->parse<ELFT>();
325	} else if (auto *f = dyn_cast<BitcodeFile>(Val: file)) {
326	ctx.bitcodeFiles.push_back(Elt: f);
327	f->parse();
328	} else {
329	ctx.binaryFiles.push_back(Elt: cast<BinaryFile>(Val: file));
330	cast<BinaryFile>(Val: file)->parse();
331	}
332	}
333
334	// Add symbols in File to the symbol table.
335	void elf::parseFile(Ctx &ctx, InputFile *file) {
336	invokeELFT(doParseFile, ctx, file);
337	}
338
339	// This function is explicitly instantiated in ARM.cpp. Mark it extern here,
340	// to avoid warnings when building with MSVC.
341	extern template void ObjFile<ELF32LE>::importCmseSymbols();
342	extern template void ObjFile<ELF32BE>::importCmseSymbols();
343	extern template void ObjFile<ELF64LE>::importCmseSymbols();
344	extern template void ObjFile<ELF64BE>::importCmseSymbols();
345
346	template <class ELFT>
347	static void
348	doParseFiles(Ctx &ctx,
349	const SmallVector<std::unique_ptr<InputFile>, `0`> &files) {
350	// Add all files to the symbol table. This will add almost all symbols that we
351	// need to the symbol table. This process might add files to the link due to
352	// addDependentLibrary.
353	for (size_t i = `0`; i < files.size(); ++i) {
354	llvm::TimeTraceScope timeScope("Parse input files", files [i]->getName());
355	doParseFile<ELFT>(ctx, files [i].get());
356	}
357	if (ctx.driver.armCmseImpLib)
358	cast<ObjFile<ELFT>>(*ctx.driver.armCmseImpLib).importCmseSymbols();
359	}
360
361	void elf::parseFiles(Ctx &ctx,
362	const SmallVector<std::unique_ptr<InputFile>, `0`> &files) {
363	llvm::TimeTraceScope timeScope("Parse input files");
364	invokeELFT(doParseFiles, ctx, files);
365	}
366
367	// Concatenates arguments to construct a string representing an error location.
368	StringRef InputFile::getNameForScript() const {
369	if (archiveName.empty())
370	return getName();
371
372	if (nameForScriptCache.empty())
373	nameForScriptCache = (archiveName + Twine(`':'`) + getName()).str();
374
375	return nameForScriptCache;
376	}
377
378	// An ELF object file may contain a `.deplibs` section. If it exists, the
379	// section contains a list of library specifiers such as `m` for libm. This
380	// function resolves a given name by finding the first matching library checking
381	// the various ways that a library can be specified to LLD. This ELF extension
382	// is a form of autolinking and is called `dependent libraries`. It is currently
383	// unique to LLVM and lld.
384	static void addDependentLibrary(Ctx &ctx, StringRef specifier,
385	const InputFile *f) {
386	if (!ctx.arg.dependentLibraries)
387	return;
388	if (std::optional<std::string> s = searchLibraryBaseName(ctx, path: specifier))
389	ctx.driver.addFile(path: ctx.saver.save(S: s), /withLOption=/*true);
390	else if (std::optional<std::string> s = findFromSearchPaths(ctx, path: specifier))
391	ctx.driver.addFile(path: ctx.saver.save(S: s), /withLOption=/*true);
392	else if (fs::exists(Path: specifier))
393	ctx.driver.addFile(path: specifier, /withLOption=/false);
394	else
395	ErrAlways(ctx)
396	<< f << ": unable to find library from dependent library specifier: "
397	<< specifier;
398	}
399
400	// Record the membership of a section group so that in the garbage collection
401	// pass, section group members are kept or discarded as a unit.
402	template <class ELFT>
403	static void handleSectionGroup(ArrayRef<InputSectionBase *> sections,
404	ArrayRef<typename ELFT::Word> entries) {
405	bool hasAlloc = false;
406	for (uint32_t index : entries.slice(`1`)) {
407	if (index >= sections.size())
408	return;
409	if (InputSectionBase *s = sections [index])
410	if (s != &InputSection::discarded && s->flags & SHF_ALLOC)
411	hasAlloc = true;
412	}
413
414	// If any member has the SHF_ALLOC flag, the whole group is subject to garbage
415	// collection. See the comment in markLive(). This rule retains .debug_types
416	// and .rela.debug_types.
417	if (!hasAlloc)
418	return;
419
420	// Connect the members in a circular doubly-linked list via
421	// nextInSectionGroup.
422	InputSectionBase *head;
423	InputSectionBase prev = nullptr*;
424	for (uint32_t index : entries.slice(`1`)) {
425	InputSectionBase *s = sections [index];
426	if (!s \|\| s == &InputSection::discarded)
427	continue;
428	if (prev)
429	prev->nextInSectionGroup = s;
430	else
431	head = s;
432	prev = s;
433	}
434	if (prev)
435	prev->nextInSectionGroup = head;
436	}
437
438	template <class ELFT> void ObjFile<ELFT>::initDwarf() {
439	dwarf = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>(
440	std::make_unique<LLDDwarfObj<ELFT>>(this), "",
441	[&](Error err) { Warn(ctx) << getName() + ": " << std::move(err); },
442	[&](Error warning) {
443	Warn(ctx) << getName() << ": " << std::move(warning);
444	}));
445	}
446
447	DWARFCache *ELFFileBase::getDwarf() {
448	assert(fileKind == ObjKind);
449	llvm::call_once(flag&: initDwarf, F: [this]() {
450	switch (ekind) {
451	default:
452	llvm_unreachable("");
453	case ELF32LEKind:
454	return cast<ObjFile<ELF32LE>>(Val: this)->initDwarf();
455	case ELF32BEKind:
456	return cast<ObjFile<ELF32BE>>(Val: this)->initDwarf();
457	case ELF64LEKind:
458	return cast<ObjFile<ELF64LE>>(Val: this)->initDwarf();
459	case ELF64BEKind:
460	return cast<ObjFile<ELF64BE>>(Val: this)->initDwarf();
461	}
462	});
463	return dwarf.get();
464	}
465
466	ELFFileBase::ELFFileBase(Ctx &ctx, Kind k, ELFKind ekind, MemoryBufferRef mb)
467	: InputFile (ctx, k, mb) {
468	this->ekind = ekind;
469	}
470
471	ELFFileBase::~ELFFileBase() {}
472
473	template <typename Elf_Shdr>
474	static const Elf_Shdr *findSection(ArrayRef<Elf_Shdr> sections, uint32_t type) {
475	for (const Elf_Shdr &sec : sections)
476	if (sec.sh_type == type)
477	return &sec;
478	return nullptr;
479	}
480
481	void ELFFileBase::init() {
482	switch (ekind) {
483	case ELF32LEKind:
484	init<ELF32LE>(k: fileKind);
485	break;
486	case ELF32BEKind:
487	init<ELF32BE>(k: fileKind);
488	break;
489	case ELF64LEKind:
490	init<ELF64LE>(k: fileKind);
491	break;
492	case ELF64BEKind:
493	init<ELF64BE>(k: fileKind);
494	break;
495	default:
496	llvm_unreachable("getELFKind");
497	}
498	}
499
500	template <class ELFT> void ELFFileBase::init(InputFile::Kind k) {
501	using Elf_Shdr = typename ELFT::Shdr;
502	using Elf_Sym = typename ELFT::Sym;
503
504	// Initialize trivial attributes.
505	const ELFFile<ELFT> &obj = getObj<ELFT>();
506	emachine = obj.getHeader().e_machine;
507	osabi = obj.getHeader().e_ident[llvm::ELF::EI_OSABI];
508	abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION];
509
510	ArrayRef<Elf_Shdr> sections = CHECK2(obj.sections(), this);
511	elfShdrs = sections.data();
512	numELFShdrs = sections.size();
513
514	// Find a symbol table.
515	const Elf_Shdr *symtabSec =
516	findSection(sections, k == SharedKind ? SHT_DYNSYM : SHT_SYMTAB);
517
518	if (!symtabSec)
519	return;
520
521	// Initialize members corresponding to a symbol table.
522	firstGlobal = symtabSec->sh_info;
523
524	ArrayRef<Elf_Sym> eSyms = CHECK2(obj.symbols(symtabSec), this);
525	if (firstGlobal == `0` \|\| firstGlobal > eSyms.size())
526	Fatal(ctx) << this << ": invalid sh_info in symbol table";
527
528	elfSyms = reinterpret_cast<const void *>(eSyms.data());
529	numSymbols = eSyms.size();
530	stringTable = CHECK2(obj.getStringTableForSymtab(symtabSec, sections), this*);
531	}
532
533	template <class ELFT>
534	uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const {
535	return CHECK2(
536	this->getObj().getSectionIndex(sym, getELFSyms<ELFT>(), shndxTable),
537	this);
538	}
539
540	template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) {
541	object::ELFFile<ELFT> obj = this->getObj();
542	// Read a section table. justSymbols is usually false.
543	if (this->justSymbols) {
544	initializeJustSymbols();
545	initializeSymbols(obj);
546	return;
547	}
548
549	// Handle dependent libraries and selection of section groups as these are not
550	// done in parallel.
551	ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>();
552	StringRef shstrtab = CHECK2(obj.getSectionStringTable(objSections), this);
553	uint64_t size = objSections.size();
554	sections.resize(size);
555	for (size_t i = `0`; i != size; ++i) {
556	const Elf_Shdr &sec = objSections[i];
557	if (LLVM_LIKELY(sec.sh_type == SHT_PROGBITS))
558	continue;
559	if (LLVM_LIKELY(sec.sh_type == SHT_GROUP)) {
560	StringRef signature = getShtGroupSignature(sections: objSections, sec);
561	ArrayRef<Elf_Word> entries =
562	CHECK2(obj.template getSectionContentsAsArray<Elf_Word>(sec), this);
563	if (entries.empty())
564	Fatal(ctx) << this << ": empty SHT_GROUP";
565
566	Elf_Word flag = entries[`0`];
567	if (flag && flag != GRP_COMDAT)
568	Fatal(ctx) << this << ": unsupported SHT_GROUP format";
569
570	bool keepGroup = !flag \|\| ignoreComdats \|\|
571	ctx.symtab ->comdatGroups
572	.try_emplace(CachedHashStringRef (signature), this)
573	.second;
574	if (keepGroup) {
575	if (!ctx.arg.resolveGroups)
576	sections[i] = createInputSection(
577	idx: i, sec, name: check(obj.getSectionName(sec, shstrtab)));
578	} else {
579	// Otherwise, discard group members.
580	for (uint32_t secIndex : entries.slice(`1`)) {
581	if (secIndex >= size)
582	Fatal(ctx) << this
583	<< ": invalid section index in group: " << secIndex;
584	sections[secIndex] = &InputSection::discarded;
585	}
586	}
587	continue;
588	}
589
590	if (sec.sh_type == SHT_LLVM_DEPENDENT_LIBRARIES && !ctx.arg.relocatable) {
591	StringRef name = check(obj.getSectionName(sec, shstrtab));
592	ArrayRef<char> data = CHECK2(
593	this->getObj().template getSectionContentsAsArray<char>(sec), this);
594	if (!data.empty() && data.back() != `'\0'`) {
595	Err(ctx)
596	<< this
597	<< ": corrupted dependent libraries section (unterminated string): "
598	<< name;
599	} else {
600	for (const char d = data.begin(), e = data.end(); d < e;) {
601	StringRef s(d);
602	addDependentLibrary(ctx, s, this);
603	d += s.size() + `1`;
604	}
605	}
606	sections[i] = &InputSection::discarded;
607	continue;
608	}
609
610	switch (ctx.arg.emachine) {
611	case EM_ARM:
612	if (sec.sh_type == SHT_ARM_ATTRIBUTES) {
613	ARMAttributeParser attributes;
614	ArrayRef<uint8_t> contents =
615	check(this->getObj().getSectionContents(sec));
616	StringRef name = check(obj.getSectionName(sec, shstrtab));
617	sections[i] = &InputSection::discarded;
618	if (Error e = attributes.parse(section: contents, endian: ekind == ELF32LEKind
619	? llvm::endianness::little
620	: llvm::endianness::big)) {
621	InputSection isec(*this, sec, name);
622	Warn(ctx) << &isec << ": " << std::move(e);
623	} else {
624	updateSupportedARMFeatures(ctx, attributes);
625	updateARMVFPArgs(ctx, attributes, this);
626
627	// FIXME: Retain the first attribute section we see. The eglibc ARM
628	// dynamic loaders require the presence of an attribute section for
629	// dlopen to work. In a full implementation we would merge all
630	// attribute sections.
631	if (ctx.in.attributes == nullptr) {
632	ctx.in.attributes =
633	std::make_unique<InputSection>(*this, sec, name);
634	sections[i] = ctx.in.attributes.get();
635	}
636	}
637	}
638	break;
639	case EM_AARCH64:
640	// FIXME: BuildAttributes have been implemented in llvm, but not yet in
641	// lld. Remove the section so that it does not accumulate in the output
642	// file. When support is implemented we expect not to output a build
643	// attributes section in files of type ET_EXEC or ET_SHARED, but ld -r
644	// ouptut will need a single merged attributes section.
645	if (sec.sh_type == SHT_AARCH64_ATTRIBUTES)
646	sections[i] = &InputSection::discarded;
647	// Producing a static binary with MTE globals is not currently supported,
648	// remove all SHT_AARCH64_MEMTAG_GLOBALS_STATIC sections as they're unused
649	// medatada, and we don't want them to end up in the output file for
650	// static executables.
651	if (sec.sh_type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC &&
652	!canHaveMemtagGlobals(ctx))
653	sections[i] = &InputSection::discarded;
654	break;
655	}
656	}
657
658	// Read a symbol table.
659	initializeSymbols(obj);
660	}
661
662	// Sections with SHT_GROUP and comdat bits define comdat section groups.
663	// They are identified and deduplicated by group name. This function
664	// returns a group name.
665	template <class ELFT>
666	StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections,
667	const Elf_Shdr &sec) {
668	typename ELFT::SymRange symbols = this->getELFSyms<ELFT>();
669	if (sec.sh_info >= symbols.size())
670	Fatal(ctx) << this << ": invalid symbol index";
671	const typename ELFT::Sym &sym = symbols[sec.sh_info];
672	return CHECK2(sym.getName(this->stringTable), this);
673	}
674
675	template <class ELFT>
676	bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) {
677	// On a regular link we don't merge sections if -O0 (default is -O1). This
678	// sometimes makes the linker significantly faster, although the output will
679	// be bigger.
680	//
681	// Doing the same for -r would create a problem as it would combine sections
682	// with different sh_entsize. One option would be to just copy every SHF_MERGE
683	// section as is to the output. While this would produce a valid ELF file with
684	// usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when
685	// they see two .debug_str. We could have separate logic for combining
686	// SHF_MERGE sections based both on their name and sh_entsize, but that seems
687	// to be more trouble than it is worth. Instead, we just use the regular (-O1)
688	// logic for -r.
689	if (ctx.arg.optimize == `0` && !ctx.arg.relocatable)
690	return false;
691
692	// A mergeable section with size 0 is useless because they don't have
693	// any data to merge. A mergeable string section with size 0 can be
694	// argued as invalid because it doesn't end with a null character.
695	// We'll avoid a mess by handling them as if they were non-mergeable.
696	if (sec.sh_size == `0`)
697	return false;
698
699	// Check for sh_entsize. The ELF spec is not clear about the zero
700	// sh_entsize. It says that "the member [sh_entsize] contains 0 if
701	// the section does not hold a table of fixed-size entries". We know
702	// that Rust 1.13 produces a string mergeable section with a zero
703	// sh_entsize. Here we just accept it rather than being picky about it.
704	uint64_t entSize = sec.sh_entsize;
705	if (entSize == `0`)
706	return false;
707	if (sec.sh_size % entSize)
708	ErrAlways(ctx) << this << ":(" << name << "): SHF_MERGE section size ("
709	<< uint64_t(sec.sh_size)
710	<< ") must be a multiple of sh_entsize (" << entSize << ")";
711	if (sec.sh_flags & SHF_WRITE)
712	Err(ctx) << this << ":(" << name
713	<< "): writable SHF_MERGE section is not supported";
714
715	return true;
716	}
717
718	// This is for --just-symbols.
719	//
720	// --just-symbols is a very minor feature that allows you to link your
721	// output against other existing program, so that if you load both your
722	// program and the other program into memory, your output can refer the
723	// other program's symbols.
724	//
725	// When the option is given, we link "just symbols". The section table is
726	// initialized with null pointers.
727	template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() {
728	sections.resize(numELFShdrs);
729	}
730
731	static bool isKnownSpecificSectionType(uint32_t t, uint32_t flags) {
732	if (SHT_LOUSER <= t && t <= SHT_HIUSER && !(flags & SHF_ALLOC))
733	return true;
734	if (SHT_LOOS <= t && t <= SHT_HIOS && !(flags & SHF_OS_NONCONFORMING))
735	return true;
736	// Allow all processor-specific types. This is different from GNU ld.
737	return SHT_LOPROC <= t && t <= SHT_HIPROC;
738	}
739
740	template <class ELFT>
741	void ObjFile<ELFT>::initializeSections(bool ignoreComdats,
742	const llvm::object::ELFFile<ELFT> &obj) {
743	ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>();
744	StringRef shstrtab = CHECK2(obj.getSectionStringTable(objSections), this);
745	uint64_t size = objSections.size();
746	SmallVector<ArrayRef<Elf_Word>, `0`> selectedGroups;
747	for (size_t i = `0`; i != size; ++i) {
748	if (this->sections[i] == &InputSection::discarded)
749	continue;
750	const Elf_Shdr &sec = objSections[i];
751	const uint32_t type = sec.sh_type;
752
753	// SHF_EXCLUDE'ed sections are discarded by the linker. However,
754	// if -r is given, we'll let the final link discard such sections.
755	// This is compatible with GNU.
756	if ((sec.sh_flags & SHF_EXCLUDE) && !ctx.arg.relocatable) {
757	if (type == SHT_LLVM_CALL_GRAPH_PROFILE)
758	cgProfileSectionIndex = i;
759	if (type == SHT_LLVM_ADDRSIG) {
760	// We ignore the address-significance table if we know that the object
761	// file was created by objcopy or ld -r. This is because these tools
762	// will reorder the symbols in the symbol table, invalidating the data
763	// in the address-significance table, which refers to symbols by index.
764	if (sec.sh_link != `0`)
765	this->addrsigSec = &sec;
766	else if (ctx.arg.icf == ICFLevel::Safe)
767	Warn(ctx) << this
768	<< ": --icf=safe conservatively ignores "
769	"SHT_LLVM_ADDRSIG [index "
770	<< i
771	<< "] with sh_link=0 "
772	"(likely created using objcopy or ld -r)";
773	}
774	this->sections[i] = &InputSection::discarded;
775	continue;
776	}
777
778	switch (type) {
779	case SHT_GROUP: {
780	if (!ctx.arg.relocatable)
781	sections[i] = &InputSection::discarded;
782	StringRef signature =
783	cantFail(this->getELFSyms<ELFT>()[sec.sh_info].getName(stringTable));
784	ArrayRef<Elf_Word> entries =
785	cantFail(obj.template getSectionContentsAsArray<Elf_Word>(sec));
786	if ((entries[`0`] & GRP_COMDAT) == `0` \|\| ignoreComdats \|\|
787	ctx.symtab ->comdatGroups.find(Val: CachedHashStringRef (signature))
788	->second == this)
789	selectedGroups.push_back(entries);
790	break;
791	}
792	case SHT_SYMTAB_SHNDX:
793	shndxTable = CHECK2(obj.getSHNDXTable(sec, objSections), this);
794	break;
795	case SHT_SYMTAB:
796	case SHT_STRTAB:
797	case SHT_REL:
798	case SHT_RELA:
799	case SHT_CREL:
800	case SHT_NULL:
801	break;
802	case SHT_PROGBITS:
803	case SHT_NOTE:
804	case SHT_NOBITS:
805	case SHT_INIT_ARRAY:
806	case SHT_FINI_ARRAY:
807	case SHT_PREINIT_ARRAY:
808	this->sections[i] =
809	createInputSection(idx: i, sec, name: check(obj.getSectionName(sec, shstrtab)));
810	break;
811	case SHT_LLVM_LTO:
812	// Discard .llvm.lto in a relocatable link that does not use the bitcode.
813	// The concatenated output does not properly reflect the linking
814	// semantics. In addition, since we do not use the bitcode wrapper format,
815	// the concatenated raw bitcode would be invalid.
816	if (ctx.arg.relocatable && !ctx.arg.fatLTOObjects) {
817	sections[i] = &InputSection::discarded;
818	break;
819	}
820	[[fallthrough]];
821	default:
822	this->sections[i] =
823	createInputSection(idx: i, sec, name: check(obj.getSectionName(sec, shstrtab)));
824	if (type == SHT_LLVM_SYMPART)
825	ctx.hasSympart.store(true, std::memory_order_relaxed);
826	else if (ctx.arg.rejectMismatch &&
827	!isKnownSpecificSectionType(type, sec.sh_flags))
828	Err(ctx) << this->sections[i] << ": unknown section type 0x"
829	<< Twine::utohexstr(Val: type);
830	break;
831	}
832	}
833
834	// We have a second loop. It is used to:
835	// 1) handle SHF_LINK_ORDER sections.
836	// 2) create relocation sections. In some cases the section header index of a
837	// relocation section may be smaller than that of the relocated section. In
838	// such cases, the relocation section would attempt to reference a target
839	// section that has not yet been created. For simplicity, delay creation of
840	// relocation sections until now.
841	for (size_t i = `0`; i != size; ++i) {
842	if (this->sections[i] == &InputSection::discarded)
843	continue;
844	const Elf_Shdr &sec = objSections[i];
845
846	if (isStaticRelSecType(sec.sh_type)) {
847	// Find a relocation target section and associate this section with that.
848	// Target may have been discarded if it is in a different section group
849	// and the group is discarded, even though it's a violation of the spec.
850	// We handle that situation gracefully by discarding dangling relocation
851	// sections.
852	const uint32_t info = sec.sh_info;
853	InputSectionBase *s = getRelocTarget(idx: i, info);
854	if (!s)
855	continue;
856
857	// ELF spec allows mergeable sections with relocations, but they are rare,
858	// and it is in practice hard to merge such sections by contents, because
859	// applying relocations at end of linking changes section contents. So, we
860	// simply handle such sections as non-mergeable ones. Degrading like this
861	// is acceptable because section merging is optional.
862	if (auto *ms = dyn_cast<MergeInputSection>(Val: s)) {
863	s = makeThreadLocal<InputSection>(args&: ms->file, args&: ms->name, args&: ms->type,
864	args&: ms->flags, args&: ms->addralign, args&: ms->entsize,
865	args: ms->contentMaybeDecompress());
866	sections[info] = s;
867	}
868
869	if (s->relSecIdx != `0`)
870	ErrAlways(ctx) << s
871	<< ": multiple relocation sections to one section are "
872	"not supported";
873	s->relSecIdx = i;
874
875	// Relocation sections are usually removed from the output, so return
876	// `nullptr` for the normal case. However, if -r or --emit-relocs is
877	// specified, we need to copy them to the output. (Some post link analysis
878	// tools specify --emit-relocs to obtain the information.)
879	if (ctx.arg.copyRelocs) {
880	auto *isec = makeThreadLocal<InputSection>(
881	*this, sec, check(obj.getSectionName(sec, shstrtab)));
882	// If the relocated section is discarded (due to /DISCARD/ or
883	// --gc-sections), the relocation section should be discarded as well.
884	s->dependentSections.push_back(NewVal: isec);
885	sections[i] = isec;
886	}
887	continue;
888	}
889
890	// A SHF_LINK_ORDER section with sh_link=0 is handled as if it did not have
891	// the flag.
892	if (!sec.sh_link \|\| !(sec.sh_flags & SHF_LINK_ORDER))
893	continue;
894
895	InputSectionBase linkSec = nullptr*;
896	if (sec.sh_link < size)
897	linkSec = this->sections[sec.sh_link];
898	if (!linkSec) {
899	ErrAlways(ctx) << this
900	<< ": invalid sh_link index: " << uint32_t(sec.sh_link);
901	continue;
902	}
903
904	// A SHF_LINK_ORDER section is discarded if its linked-to section is
905	// discarded.
906	InputSection isec = cast<InputSection>(this*->sections[i]);
907	linkSec->dependentSections.push_back(NewVal: isec);
908	if (!isa<InputSection>(Val: linkSec))
909	ErrAlways(ctx)
910	<< "a section " << isec->name
911	<< " with SHF_LINK_ORDER should not refer a non-regular section: "
912	<< linkSec;
913	}
914
915	for (ArrayRef<Elf_Word> entries : selectedGroups)
916	handleSectionGroup<ELFT>(this->sections, entries);
917	}
918
919	template <typename ELFT>
920	static void parseGnuPropertyNote(Ctx &ctx, ELFFileBase &f,
921	uint32_t featureAndType,
922	ArrayRef<uint8_t> &desc, const uint8_t *base,
923	ArrayRef<uint8_t> data = nullptr*) {
924	auto err = [&](const uint8_t *place) -> ELFSyncStream {
925	auto diag = Err(ctx);
926	diag << &f << ":(" << ".note.gnu.property+0x"
927	<< Twine::utohexstr(Val: place - base) << "): ";
928	return diag;
929	};
930
931	while (!desc.empty()) {
932	const uint8_t *place = desc.data();
933	if (desc.size() < `8`)
934	return void(err(place) << "program property is too short");
935	uint32_t type = read32<ELFT::Endianness>(desc.data());
936	uint32_t size = read32<ELFT::Endianness>(desc.data() + `4`);
937	desc = desc.slice(N: `8`);
938	if (desc.size() < size)
939	return void(err(place) << "program property is too short");
940
941	if (type == featureAndType) {
942	// We found a FEATURE_1_AND field. There may be more than one of these
943	// in a .note.gnu.property section, for a relocatable object we
944	// accumulate the bits set.
945	if (size < `4`)
946	return void(err(place) << "FEATURE_1_AND entry is too short");
947	f.andFeatures \|= read32<ELFT::Endianness>(desc.data());
948	} else if (ctx.arg.emachine == EM_AARCH64 &&
949	type == GNU_PROPERTY_AARCH64_FEATURE_PAUTH) {
950	ArrayRef<uint8_t> contents = data ? *data : desc;
951	if (f.aarch64PauthAbiCoreInfo) {
952	return void(
953	err(contents.data())
954	<< "multiple GNU_PROPERTY_AARCH64_FEATURE_PAUTH entries are "
955	"not supported");
956	} else if (size != `16`) {
957	return void(err(contents.data())
958	<< "GNU_PROPERTY_AARCH64_FEATURE_PAUTH entry "
959	"is invalid: expected 16 bytes, but got "
960	<< size);
961	}
962	f.aarch64PauthAbiCoreInfo = {
963	support::endian::read64<ELFT::Endianness>(&desc [`0`]),
964	support::endian::read64<ELFT::Endianness>(&desc [`8`])};
965	}
966
967	// Padding is present in the note descriptor, if necessary.
968	desc = desc.slice(alignTo<(ELFT::Is64Bits ? `8` : `4`)>(size));
969	}
970	}
971	// Read the following info from the .note.gnu.property section and write it to
972	// the corresponding fields in `ObjFile`:
973	// - Feature flags (32 bits) representing x86, AArch64 or RISC-V features for
974	// hardware-assisted call flow control;
975	// - AArch64 PAuth ABI core info (16 bytes).
976	template <class ELFT>
977	static void readGnuProperty(Ctx &ctx, const InputSection &sec,
978	ObjFile<ELFT> &f) {
979	using Elf_Nhdr = typename ELFT::Nhdr;
980	using Elf_Note = typename ELFT::Note;
981
982	uint32_t featureAndType;
983	switch (ctx.arg.emachine) {
984	case EM_386:
985	case EM_X86_64:
986	featureAndType = GNU_PROPERTY_X86_FEATURE_1_AND;
987	break;
988	case EM_AARCH64:
989	featureAndType = GNU_PROPERTY_AARCH64_FEATURE_1_AND;
990	break;
991	case EM_RISCV:
992	featureAndType = GNU_PROPERTY_RISCV_FEATURE_1_AND;
993	break;
994	default:
995	return;
996	}
997
998	ArrayRef<uint8_t> data = sec.content();
999	auto err = [&](const uint8_t *place) -> ELFSyncStream {
1000	auto diag = Err(ctx);
1001	diag << sec.file << ":(" << sec.name << "+0x"
1002	<< Twine::utohexstr(Val: place - sec.content().data()) << "): ";
1003	return diag;
1004	};
1005	while (!data.empty()) {
1006	// Read one NOTE record.
1007	auto nhdr = reinterpret_cast<const* Elf_Nhdr *>(data.data());
1008	if (data.size() < sizeof(Elf_Nhdr) \|\|
1009	data.size() < nhdr->getSize(sec.addralign))
1010	return void(err(data.data()) << "data is too short");
1011
1012	Elf_Note note(*nhdr);
1013	if (nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 \|\| note.getName() != "GNU") {
1014	data = data.slice(nhdr->getSize(sec.addralign));
1015	continue;
1016	}
1017
1018	// Read a body of a NOTE record, which consists of type-length-value fields.
1019	ArrayRef<uint8_t> desc = note.getDesc(sec.addralign);
1020	const uint8_t *base = sec.content().data();
1021	parseGnuPropertyNote<ELFT>(ctx, f, featureAndType, desc, base, &data);
1022
1023	// Go to next NOTE record to look for more FEATURE_1_AND descriptions.
1024	data = data.slice(nhdr->getSize(sec.addralign));
1025	}
1026	}
1027
1028	template <class ELFT>
1029	InputSectionBase *ObjFile<ELFT>::getRelocTarget(uint32_t idx, uint32_t info) {
1030	if (info < this->sections.size()) {
1031	InputSectionBase target = this*->sections[info];
1032
1033	// Strictly speaking, a relocation section must be included in the
1034	// group of the section it relocates. However, LLVM 3.3 and earlier
1035	// would fail to do so, so we gracefully handle that case.
1036	if (target == &InputSection::discarded)
1037	return nullptr;
1038
1039	if (target != nullptr)
1040	return target;
1041	}
1042
1043	Err(ctx) << this << ": relocation section (index " << idx
1044	<< ") has invalid sh_info (" << info << `')'`;
1045	return nullptr;
1046	}
1047
1048	// The function may be called concurrently for different input files. For
1049	// allocation, prefer makeThreadLocal which does not require holding a lock.
1050	template <class ELFT>
1051	InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx,
1052	const Elf_Shdr &sec,
1053	StringRef name) {
1054	if (name.starts_with(Prefix: ".n")) {
1055	// The GNU linker uses .note.GNU-stack section as a marker indicating
1056	// that the code in the object file does not expect that the stack is
1057	// executable (in terms of NX bit). If all input files have the marker,
1058	// the GNU linker adds a PT_GNU_STACK segment to tells the loader to
1059	// make the stack non-executable. Most object files have this section as
1060	// of 2017.
1061	//
1062	// But making the stack non-executable is a norm today for security
1063	// reasons. Failure to do so may result in a serious security issue.
1064	// Therefore, we make LLD always add PT_GNU_STACK unless it is
1065	// explicitly told to do otherwise (by -z execstack). Because the stack
1066	// executable-ness is controlled solely by command line options,
1067	// .note.GNU-stack sections are, with one exception, ignored. Report
1068	// an error if we encounter an executable .note.GNU-stack to force the
1069	// user to explicitly request an executable stack.
1070	if (name == ".note.GNU-stack") {
1071	if ((sec.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable &&
1072	ctx.arg.zGnustack != GnuStackKind::Exec) {
1073	Err(ctx) << this
1074	<< ": requires an executable stack, but -z execstack is not "
1075	"specified";
1076	}
1077	return &InputSection::discarded;
1078	}
1079
1080	// Object files that use processor features such as Intel Control-Flow
1081	// Enforcement (CET), AArch64 Branch Target Identification BTI or RISC-V
1082	// Zicfilp/Zicfiss extensions, use a .note.gnu.property section containing
1083	// a bitfield of feature bits like the GNU_PROPERTY_X86_FEATURE_1_IBT flag.
1084	//
1085	// Since we merge bitmaps from multiple object files to create a new
1086	// .note.gnu.property containing a single AND'ed bitmap, we discard an input
1087	// file's .note.gnu.property section.
1088	if (name == ".note.gnu.property") {
1089	readGnuProperty<ELFT>(ctx, InputSection(*this, sec, name), *this);
1090	return &InputSection::discarded;
1091	}
1092
1093	// Split stacks is a feature to support a discontiguous stack,
1094	// commonly used in the programming language Go. For the details,
1095	// see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled
1096	// for split stack will include a .note.GNU-split-stack section.
1097	if (name == ".note.GNU-split-stack") {
1098	if (ctx.arg.relocatable) {
1099	ErrAlways(ctx) << "cannot mix split-stack and non-split-stack in a "
1100	"relocatable link";
1101	return &InputSection::discarded;
1102	}
1103	this->splitStack = true;
1104	return &InputSection::discarded;
1105	}
1106
1107	// An object file compiled for split stack, but where some of the
1108	// functions were compiled with the no_split_stack_attribute will
1109	// include a .note.GNU-no-split-stack section.
1110	if (name == ".note.GNU-no-split-stack") {
1111	this->someNoSplitStack = true;
1112	return &InputSection::discarded;
1113	}
1114
1115	// Strip existing .note.gnu.build-id sections so that the output won't have
1116	// more than one build-id. This is not usually a problem because input
1117	// object files normally don't have .build-id sections, but you can create
1118	// such files by "ld.{bfd,gold,lld} -r --build-id", and we want to guard
1119	// against it.
1120	if (name == ".note.gnu.build-id")
1121	return &InputSection::discarded;
1122	}
1123
1124	// The linker merges EH (exception handling) frames and creates a
1125	// .eh_frame_hdr section for runtime. So we handle them with a special
1126	// class. For relocatable outputs, they are just passed through.
1127	if (name == ".eh_frame" && !ctx.arg.relocatable)
1128	return makeThreadLocal<EhInputSection>(*this, sec, name);
1129
1130	if ((sec.sh_flags & SHF_MERGE) && shouldMerge(sec, name))
1131	return makeThreadLocal<MergeInputSection>(*this, sec, name);
1132	return makeThreadLocal<InputSection>(*this, sec, name);
1133	}
1134
1135	// Initialize symbols. symbols is a parallel array to the corresponding ELF
1136	// symbol table.
1137	template <class ELFT>
1138	void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) {
1139	ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
1140	if (!symbols)
1141	symbols = std::make_unique<Symbol *[]>(numSymbols);
1142
1143	// Some entries have been filled by LazyObjFile.
1144	auto *symtab = ctx.symtab.get();
1145	for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i)
1146	if (!symbols[i])
1147	symbols[i] = symtab->insert(CHECK2(eSyms[i].getName(stringTable), this));
1148
1149	// Perform symbol resolution on non-local symbols.
1150	SmallVector<unsigned, `32`> undefineds;
1151	for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
1152	const Elf_Sym &eSym = eSyms[i];
1153	uint32_t secIdx = eSym.st_shndx;
1154	if (secIdx == SHN_UNDEF) {
1155	undefineds.push_back(Elt: i);
1156	continue;
1157	}
1158
1159	uint8_t binding = eSym.getBinding();
1160	uint8_t stOther = eSym.st_other;
1161	uint8_t type = eSym.getType();
1162	uint64_t value = eSym.st_value;
1163	uint64_t size = eSym.st_size;
1164
1165	Symbol *sym = symbols[i];
1166	sym->isUsedInRegularObj = true;
1167	if (LLVM_UNLIKELY(eSym.st_shndx == SHN_COMMON)) {
1168	if (value == `0` \|\| value >= UINT32_MAX)
1169	Err(ctx) << this << ": common symbol '" << sym->getName()
1170	<< "' has invalid alignment: " << value;
1171	hasCommonSyms = true;
1172	sym->resolve(ctx, CommonSymbol{ctx, this, StringRef(), binding, stOther,
1173	type, value, size});
1174	continue;
1175	}
1176
1177	// Handle global defined symbols. Defined::section will be set in postParse.
1178	sym->resolve(ctx, Defined{ctx, this, StringRef(), binding, stOther, type,
1179	value, size, nullptr});
1180	}
1181
1182	// Undefined symbols (excluding those defined relative to non-prevailing
1183	// sections) can trigger recursive extract. Process defined symbols first so
1184	// that the relative order between a defined symbol and an undefined symbol
1185	// does not change the symbol resolution behavior. In addition, a set of
1186	// interconnected symbols will all be resolved to the same file, instead of
1187	// being resolved to different files.
1188	for (unsigned i : undefineds) {
1189	const Elf_Sym &eSym = eSyms[i];
1190	Symbol *sym = symbols[i];
1191	sym->resolve(ctx, Undefined{this, StringRef(), eSym.getBinding(),
1192	eSym.st_other, eSym.getType()});
1193	sym->isUsedInRegularObj = true;
1194	sym->referenced = true;
1195	}
1196	}
1197
1198	template <class ELFT>
1199	void ObjFile<ELFT>::initSectionsAndLocalSyms(bool ignoreComdats) {
1200	if (!justSymbols)
1201	initializeSections(ignoreComdats, obj: getObj());
1202
1203	if (!firstGlobal)
1204	return;
1205	SymbolUnion *locals = makeThreadLocalN<SymbolUnion>(firstGlobal);
1206	memset(locals, `0`, sizeof(SymbolUnion) * firstGlobal);
1207
1208	ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
1209	for (size_t i = `0`, end = firstGlobal; i != end; ++i) {
1210	const Elf_Sym &eSym = eSyms[i];
1211	uint32_t secIdx = eSym.st_shndx;
1212	if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
1213	secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
1214	else if (secIdx >= SHN_LORESERVE)
1215	secIdx = `0`;
1216	if (LLVM_UNLIKELY(secIdx >= sections.size())) {
1217	Err(ctx) << this << ": invalid section index: " << secIdx;
1218	secIdx = `0`;
1219	}
1220	if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL))
1221	ErrAlways(ctx) << this << ": non-local symbol (" << i
1222	<< ") found at index < .symtab's sh_info (" << end << ")";
1223
1224	InputSectionBase *sec = sections[secIdx];
1225	uint8_t type = eSym.getType();
1226	if (type == STT_FILE)
1227	sourceFile = CHECK2(eSym.getName(stringTable), this);
1228	unsigned stName = eSym.st_name;
1229	if (LLVM_UNLIKELY(stringTable.size() <= stName)) {
1230	Err(ctx) << this << ": invalid symbol name offset";
1231	stName = `0`;
1232	}
1233	StringRef name(stringTable.data() + stName);
1234
1235	symbols[i] = reinterpret_cast<Symbol *>(locals + i);
1236	if (eSym.st_shndx == SHN_UNDEF \|\| sec == &InputSection::discarded)
1237	new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type,
1238	/discardedSecIdx=/secIdx);
1239	else
1240	new (symbols[i]) Defined(ctx, this, name, STB_LOCAL, eSym.st_other, type,
1241	eSym.st_value, eSym.st_size, sec);
1242	symbols[i]->partition = `1`;
1243	symbols[i]->isUsedInRegularObj = true;
1244	}
1245	}
1246
1247	// Called after all ObjFile::parse is called for all ObjFiles. This checks
1248	// duplicate symbols and may do symbol property merge in the future.
1249	template <class ELFT> void ObjFile<ELFT>::postParse() {
1250	static std::mutex mu;
1251	ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
1252	for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
1253	const Elf_Sym &eSym = eSyms[i];
1254	Symbol &sym = *symbols[i];
1255	uint32_t secIdx = eSym.st_shndx;
1256	uint8_t binding = eSym.getBinding();
1257	if (LLVM_UNLIKELY(binding != STB_GLOBAL && binding != STB_WEAK &&
1258	binding != STB_GNU_UNIQUE))
1259	Err(ctx) << this << ": symbol (" << i
1260	<< ") has invalid binding: " << (int)binding;
1261
1262	// st_value of STT_TLS represents the assigned offset, not the actual
1263	// address which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can
1264	// only be referenced by special TLS relocations. It is usually an error if
1265	// a STT_TLS symbol is replaced by a non-STT_TLS symbol, vice versa.
1266	if (LLVM_UNLIKELY(sym.isTls()) && eSym.getType() != STT_TLS &&
1267	eSym.getType() != STT_NOTYPE)
1268	Err(ctx) << "TLS attribute mismatch: " << &sym << "\n>>> in " << sym.file
1269	<< "\n>>> in " << this;
1270
1271	// Handle non-COMMON defined symbol below. !sym.file allows a symbol
1272	// assignment to redefine a symbol without an error.
1273	if (!sym.isDefined() \|\| secIdx == SHN_UNDEF)
1274	continue;
1275	if (LLVM_UNLIKELY(secIdx >= SHN_LORESERVE)) {
1276	if (secIdx == SHN_COMMON)
1277	continue;
1278	if (secIdx == SHN_XINDEX)
1279	secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
1280	else
1281	secIdx = `0`;
1282	}
1283
1284	if (LLVM_UNLIKELY(secIdx >= sections.size())) {
1285	Err(ctx) << this << ": invalid section index: " << secIdx;
1286	continue;
1287	}
1288	InputSectionBase *sec = sections[secIdx];
1289	if (sec == &InputSection::discarded) {
1290	if (sym.traced) {
1291	printTraceSymbol(sym: Undefined{this, sym.getName(), sym.binding,
1292	sym.stOther, sym.type, secIdx},
1293	name: sym.getName());
1294	}
1295	if (sym.file == this) {
1296	std::lock_guard<std::mutex> lock(mu);
1297	ctx.nonPrevailingSyms.emplace_back(&sym, secIdx);
1298	}
1299	continue;
1300	}
1301
1302	if (sym.file == this) {
1303	cast<Defined>(Val&: sym).section = sec;
1304	continue;
1305	}
1306
1307	if (sym.binding == STB_WEAK \|\| binding == STB_WEAK)
1308	continue;
1309	std::lock_guard<std::mutex> lock(mu);
1310	ctx.duplicates.push_back(Elt: {&sym, this, sec, eSym.st_value});
1311	}
1312	}
1313
1314	// The handling of tentative definitions (COMMON symbols) in archives is murky.
1315	// A tentative definition will be promoted to a global definition if there are
1316	// no non-tentative definitions to dominate it. When we hold a tentative
1317	// definition to a symbol and are inspecting archive members for inclusion
1318	// there are 2 ways we can proceed:
1319	//
1320	// 1) Consider the tentative definition a 'real' definition (ie promotion from
1321	// tentative to real definition has already happened) and not inspect
1322	// archive members for Global/Weak definitions to replace the tentative
1323	// definition. An archive member would only be included if it satisfies some
1324	// other undefined symbol. This is the behavior Gold uses.
1325	//
1326	// 2) Consider the tentative definition as still undefined (ie the promotion to
1327	// a real definition happens only after all symbol resolution is done).
1328	// The linker searches archive members for STB_GLOBAL definitions to
1329	// replace the tentative definition with. This is the behavior used by
1330	// GNU ld.
1331	//
1332	// The second behavior is inherited from SysVR4, which based it on the FORTRAN
1333	// COMMON BLOCK model. This behavior is needed for proper initialization in old
1334	// (pre F90) FORTRAN code that is packaged into an archive.
1335	//
1336	// The following functions search archive members for definitions to replace
1337	// tentative definitions (implementing behavior 2).
1338	static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName,
1339	StringRef archiveName) {
1340	IRSymtabFile symtabFile = check(e: readIRSymtab(MBRef: mb));
1341	for (const irsymtab::Reader::SymbolRef &sym :
1342	symtabFile.TheReader.symbols()) {
1343	if (sym.isGlobal() && sym.getName() == symName)
1344	return !sym.isUndefined() && !sym.isWeak() && !sym.isCommon();
1345	}
1346	return false;
1347	}
1348
1349	template <class ELFT>
1350	static bool isNonCommonDef(Ctx &ctx, ELFKind ekind, MemoryBufferRef mb,
1351	StringRef symName, StringRef archiveName) {
1352	ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(ctx, ekind, mb, archiveName);
1353	obj->init();
1354	StringRef stringtable = obj->getStringTable();
1355
1356	for (auto sym : obj->template getGlobalELFSyms<ELFT>()) {
1357	Expected<StringRef> name = sym.getName(stringtable);
1358	if (name && name.get() == symName)
1359	return sym.isDefined() && sym.getBinding() == STB_GLOBAL &&
1360	!sym.isCommon();
1361	}
1362	return false;
1363	}
1364
1365	static bool isNonCommonDef(Ctx &ctx, MemoryBufferRef mb, StringRef symName,
1366	StringRef archiveName) {
1367	switch (getELFKind(ctx, mb, archiveName)) {
1368	case ELF32LEKind:
1369	return isNonCommonDef<ELF32LE>(ctx, ekind: ELF32LEKind, mb, symName, archiveName);
1370	case ELF32BEKind:
1371	return isNonCommonDef<ELF32BE>(ctx, ekind: ELF32BEKind, mb, symName, archiveName);
1372	case ELF64LEKind:
1373	return isNonCommonDef<ELF64LE>(ctx, ekind: ELF64LEKind, mb, symName, archiveName);
1374	case ELF64BEKind:
1375	return isNonCommonDef<ELF64BE>(ctx, ekind: ELF64BEKind, mb, symName, archiveName);
1376	default:
1377	llvm_unreachable("getELFKind");
1378	}
1379	}
1380
1381	SharedFile::SharedFile(Ctx &ctx, MemoryBufferRef m, StringRef defaultSoName)
1382	: ELFFileBase (ctx, SharedKind, getELFKind(ctx, mb: m, archiveName: ""), m),
1383	soName (defaultSoName), isNeeded(!ctx.arg.asNeeded) {}
1384
1385	// Parse the version definitions in the object file if present, and return a
1386	// vector whose nth element contains a pointer to the Elf_Verdef for version
1387	// identifier n. Version identifiers that are not definitions map to nullptr.
1388	template <typename ELFT>
1389	static SmallVector<const void *, `0`>
1390	parseVerdefs(const uint8_t base, const* typename ELFT::Shdr *sec) {
1391	if (!sec)
1392	return {};
1393
1394	// Build the Verdefs array by following the chain of Elf_Verdef objects
1395	// from the start of the .gnu.version_d section.
1396	SmallVector<const void *, `0`> verdefs;
1397	const uint8_t *verdef = base + sec->sh_offset;
1398	for (unsigned i = `0`, e = sec->sh_info; i != e; ++i) {
1399	auto curVerdef = reinterpret_cast<const* typename ELFT::Verdef *>(verdef);
1400	verdef += curVerdef->vd_next;
1401	unsigned verdefIndex = curVerdef->vd_ndx;
1402	if (verdefIndex >= verdefs.size())
1403	verdefs.resize(N: verdefIndex + `1`);
1404	verdefs [verdefIndex] = curVerdef;
1405	}
1406	return verdefs;
1407	}
1408
1409	// Parse SHT_GNU_verneed to properly set the name of a versioned undefined
1410	// symbol. We detect fatal issues which would cause vulnerabilities, but do not
1411	// implement sophisticated error checking like in llvm-readobj because the value
1412	// of such diagnostics is low.
1413	template <typename ELFT>
1414	std::vector<uint32_t> SharedFile::parseVerneed(const ELFFile<ELFT> &obj,
1415	const typename ELFT::Shdr *sec) {
1416	if (!sec)
1417	return {};
1418	std::vector<uint32_t> verneeds;
1419	ArrayRef<uint8_t> data = CHECK2(obj.getSectionContents(sec), this*);
1420	const uint8_t *verneedBuf = data.begin();
1421	for (unsigned i = `0`; i != sec->sh_info; ++i) {
1422	if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end()) {
1423	Err(ctx) << this << " has an invalid Verneed";
1424	break;
1425	}
1426	auto vn = reinterpret_cast<const* typename ELFT::Verneed *>(verneedBuf);
1427	const uint8_t *vernauxBuf = verneedBuf + vn->vn_aux;
1428	for (unsigned j = `0`; j != vn->vn_cnt; ++j) {
1429	if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end()) {
1430	Err(ctx) << this << " has an invalid Vernaux";
1431	break;
1432	}
1433	auto aux = reinterpret_cast<const* typename ELFT::Vernaux *>(vernauxBuf);
1434	if (aux->vna_name >= this->stringTable.size()) {
1435	Err(ctx) << this << " has a Vernaux with an invalid vna_name";
1436	break;
1437	}
1438	uint16_t version = aux->vna_other & VERSYM_VERSION;
1439	if (version >= verneeds.size())
1440	verneeds.resize(new_size: version + `1`);
1441	verneeds [version] = aux->vna_name;
1442	vernauxBuf += aux->vna_next;
1443	}
1444	verneedBuf += vn->vn_next;
1445	}
1446	return verneeds;
1447	}
1448
1449	// Parse PT_GNU_PROPERTY segments in DSO. The process is similar to
1450	// readGnuProperty, but we don't have the InputSection information.
1451	template <typename ELFT>
1452	void SharedFile::parseGnuAndFeatures(const ELFFile<ELFT> &obj) {
1453	if (ctx.arg.emachine != EM_AARCH64)
1454	return;
1455	const uint8_t *base = obj.base();
1456	auto phdrs = CHECK2(obj.program_headers(), this);
1457	for (auto phdr : phdrs) {
1458	if (phdr.p_type != PT_GNU_PROPERTY)
1459	continue;
1460	typename ELFT::Note note(
1461	*reinterpret_cast<const typename ELFT::Nhdr *>(base + phdr.p_offset));
1462	if (note.getType() != NT_GNU_PROPERTY_TYPE_0 \|\| note.getName() != "GNU")
1463	continue;
1464
1465	ArrayRef<uint8_t> desc = note.getDesc(phdr.p_align);
1466	parseGnuPropertyNote<ELFT>(ctx, *this, GNU_PROPERTY_AARCH64_FEATURE_1_AND,
1467	desc, base);
1468	}
1469	}
1470
1471	// We do not usually care about alignments of data in shared object
1472	// files because the loader takes care of it. However, if we promote a
1473	// DSO symbol to point to .bss due to copy relocation, we need to keep
1474	// the original alignment requirements. We infer it in this function.
1475	template <typename ELFT>
1476	static uint64_t getAlignment(ArrayRef<typename ELFT::Shdr> sections,
1477	const typename ELFT::Sym &sym) {
1478	uint64_t ret = UINT64_MAX;
1479	if (sym.st_value)
1480	ret = `1ULL` << llvm::countr_zero(Val: (uint64_t)sym.st_value);
1481	if (`0` < sym.st_shndx && sym.st_shndx < sections.size())
1482	ret = std::min<uint64_t>(ret, sections[sym.st_shndx].sh_addralign);
1483	return (ret > UINT32_MAX) ? `0` : ret;
1484	}
1485
1486	// Fully parse the shared object file.
1487	//
1488	// This function parses symbol versions. If a DSO has version information,
1489	// the file has a ".gnu.version_d" section which contains symbol version
1490	// definitions. Each symbol is associated to one version through a table in
1491	// ".gnu.version" section. That table is a parallel array for the symbol
1492	// table, and each table entry contains an index in ".gnu.version_d".
1493	//
1494	// The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for
1495	// VER_NDX_GLOBAL. There's no table entry for these special versions in
1496	// ".gnu.version_d".
1497	//
1498	// The file format for symbol versioning is perhaps a bit more complicated
1499	// than necessary, but you can easily understand the code if you wrap your
1500	// head around the data structure described above.
1501	template <class ELFT> void SharedFile::parse() {
1502	using Elf_Dyn = typename ELFT::Dyn;
1503	using Elf_Shdr = typename ELFT::Shdr;
1504	using Elf_Sym = typename ELFT::Sym;
1505	using Elf_Verdef = typename ELFT::Verdef;
1506	using Elf_Versym = typename ELFT::Versym;
1507
1508	ArrayRef<Elf_Dyn> dynamicTags;
1509	const ELFFile<ELFT> obj = this->getObj<ELFT>();
1510	ArrayRef<Elf_Shdr> sections = getELFShdrs<ELFT>();
1511
1512	const Elf_Shdr versymSec = nullptr*;
1513	const Elf_Shdr verdefSec = nullptr*;
1514	const Elf_Shdr verneedSec = nullptr*;
1515	symbols = std::make_unique<Symbol *[]>(num: numSymbols);
1516
1517	// Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d.
1518	for (const Elf_Shdr &sec : sections) {
1519	switch (sec.sh_type) {
1520	default:
1521	continue;
1522	case SHT_DYNAMIC:
1523	dynamicTags =
1524	CHECK2(obj.template getSectionContentsAsArray<Elf_Dyn>(sec), this);
1525	break;
1526	case SHT_GNU_versym:
1527	versymSec = &sec;
1528	break;
1529	case SHT_GNU_verdef:
1530	verdefSec = &sec;
1531	break;
1532	case SHT_GNU_verneed:
1533	verneedSec = &sec;
1534	break;
1535	}
1536	}
1537
1538	if (versymSec && numSymbols == `0`) {
1539	ErrAlways(ctx) << "SHT_GNU_versym should be associated with symbol table";
1540	return;
1541	}
1542
1543	// Search for a DT_SONAME tag to initialize this->soName.
1544	for (const Elf_Dyn &dyn : dynamicTags) {
1545	if (dyn.d_tag == DT_NEEDED) {
1546	uint64_t val = dyn.getVal();
1547	if (val >= this->stringTable.size()) {
1548	Err(ctx) << this << ": invalid DT_NEEDED entry";
1549	return;
1550	}
1551	dtNeeded.push_back(Elt: this->stringTable.data() + val);
1552	} else if (dyn.d_tag == DT_SONAME) {
1553	uint64_t val = dyn.getVal();
1554	if (val >= this->stringTable.size()) {
1555	Err(ctx) << this << ": invalid DT_SONAME entry";
1556	return;
1557	}
1558	soName = this->stringTable.data() + val;
1559	}
1560	}
1561
1562	// DSOs are uniquified not by filename but by soname.
1563	StringSaver &ss = ctx.saver;
1564	DenseMap<CachedHashStringRef, SharedFile *>::iterator it;
1565	bool wasInserted;
1566	std::tie(args&: it, args&: wasInserted) =
1567	ctx.symtab ->soNames.try_emplace(Key: CachedHashStringRef (soName), Args: this);
1568
1569	// If a DSO appears more than once on the command line with and without
1570	// --as-needed, --no-as-needed takes precedence over --as-needed because a
1571	// user can add an extra DSO with --no-as-needed to force it to be added to
1572	// the dependency list.
1573	it ->second->isNeeded \|= isNeeded;
1574	if (!wasInserted)
1575	return;
1576
1577	ctx.sharedFiles.push_back(Elt: this);
1578
1579	verdefs = parseVerdefs<ELFT>(obj.base(), verdefSec);
1580	std::vector<uint32_t> verneeds = parseVerneed<ELFT>(obj, verneedSec);
1581	parseGnuAndFeatures<ELFT>(obj);
1582
1583	// Parse ".gnu.version" section which is a parallel array for the symbol
1584	// table. If a given file doesn't have a ".gnu.version" section, we use
1585	// VER_NDX_GLOBAL.
1586	size_t size = numSymbols - firstGlobal;
1587	std::vector<uint16_t> versyms(size, VER_NDX_GLOBAL);
1588	if (versymSec) {
1589	ArrayRef<Elf_Versym> versym =
1590	CHECK2(obj.template getSectionContentsAsArray<Elf_Versym>(*versymSec),
1591	this)
1592	.slice(firstGlobal);
1593	for (size_t i = `0`; i < size; ++i)
1594	versyms [i] = versym[i].vs_index;
1595	}
1596
1597	// System libraries can have a lot of symbols with versions. Using a
1598	// fixed buffer for computing the versions name (foo@ver) can save a
1599	// lot of allocations.
1600	SmallString<`0`> versionedNameBuffer;
1601
1602	// Add symbols to the symbol table.
1603	ArrayRef<Elf_Sym> syms = this->getGlobalELFSyms<ELFT>();
1604	for (size_t i = `0`, e = syms.size(); i != e; ++i) {
1605	const Elf_Sym &sym = syms[i];
1606
1607	// ELF spec requires that all local symbols precede weak or global
1608	// symbols in each symbol table, and the index of first non-local symbol
1609	// is stored to sh_info. If a local symbol appears after some non-local
1610	// symbol, that's a violation of the spec.
1611	StringRef name = CHECK2(sym.getName(stringTable), this);
1612	if (sym.getBinding() == STB_LOCAL) {
1613	Err(ctx) << this << ": invalid local symbol '" << name
1614	<< "' in global part of symbol table";
1615	continue;
1616	}
1617
1618	const uint16_t ver = versyms [i], idx = ver & ~VERSYM_HIDDEN;
1619	if (sym.isUndefined()) {
1620	// For unversioned undefined symbols, VER_NDX_GLOBAL makes more sense but
1621	// as of binutils 2.34, GNU ld produces VER_NDX_LOCAL.
1622	if (ver != VER_NDX_LOCAL && ver != VER_NDX_GLOBAL) {
1623	if (idx >= verneeds.size()) {
1624	ErrAlways(ctx) << "corrupt input file: version need index " << idx
1625	<< " for symbol " << name
1626	<< " is out of bounds\n>>> defined in " << this;
1627	continue;
1628	}
1629	StringRef verName = stringTable.data() + verneeds [idx];
1630	versionedNameBuffer.clear();
1631	name = ss.save(S: (name + "@" + verName).toStringRef(Out&: versionedNameBuffer));
1632	}
1633	Symbol *s = ctx.symtab ->addSymbol(
1634	newSym: Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()});
1635	s->isExported = true;
1636	if (sym.getBinding() != STB_WEAK &&
1637	ctx.arg.unresolvedSymbolsInShlib != UnresolvedPolicy::Ignore)
1638	requiredSymbols.push_back(Elt: s);
1639	continue;
1640	}
1641
1642	if (ver == VER_NDX_LOCAL \|\|
1643	(ver != VER_NDX_GLOBAL && idx >= verdefs.size())) {
1644	// In GNU ld < 2.31 (before 3be08ea4728b56d35e136af4e6fd3086ade17764), the
1645	// MIPS port puts _gp_disp symbol into DSO files and incorrectly assigns
1646	// VER_NDX_LOCAL. Workaround this bug.
1647	if (ctx.arg.emachine == EM_MIPS && name == "_gp_disp")
1648	continue;
1649	ErrAlways(ctx) << "corrupt input file: version definition index " << idx
1650	<< " for symbol " << name
1651	<< " is out of bounds\n>>> defined in " << this;
1652	continue;
1653	}
1654
1655	uint32_t alignment = getAlignment<ELFT>(sections, sym);
1656	if (ver == idx) {
1657	auto *s = ctx.symtab ->addSymbol(
1658	newSym: SharedSymbol{*this, name, sym.getBinding(), sym.st_other,
1659	sym.getType(), sym.st_value, sym.st_size, alignment});
1660	s->dsoDefined = true;
1661	if (s->file == this)
1662	s->versionId = ver;
1663	}
1664
1665	// Also add the symbol with the versioned name to handle undefined symbols
1666	// with explicit versions.
1667	if (ver == VER_NDX_GLOBAL)
1668	continue;
1669
1670	StringRef verName =
1671	stringTable.data() +
1672	reinterpret_cast<const Elf_Verdef *>(verdefs [idx])->getAux()->vda_name;
1673	versionedNameBuffer.clear();
1674	name = (name + "@" + verName).toStringRef(Out&: versionedNameBuffer);
1675	auto *s = ctx.symtab ->addSymbol(
1676	newSym: SharedSymbol{*this, ss.save(S: name), sym.getBinding(), sym.st_other,
1677	sym.getType(), sym.st_value, sym.st_size, alignment});
1678	s->dsoDefined = true;
1679	if (s->file == this)
1680	s->versionId = idx;
1681	}
1682	}
1683
1684	static ELFKind getBitcodeELFKind(const Triple &t) {
1685	if (t.isLittleEndian())
1686	return t.isArch64Bit() ? ELF64LEKind : ELF32LEKind;
1687	return t.isArch64Bit() ? ELF64BEKind : ELF32BEKind;
1688	}
1689
1690	static uint16_t getBitcodeMachineKind(Ctx &ctx, StringRef path,
1691	const Triple &t) {
1692	switch (t.getArch()) {
1693	case Triple::aarch64:
1694	case Triple::aarch64_be:
1695	return EM_AARCH64;
1696	case Triple::amdgcn:
1697	case Triple::r600:
1698	return EM_AMDGPU;
1699	case Triple::arm:
1700	case Triple::armeb:
1701	case Triple::thumb:
1702	case Triple::thumbeb:
1703	return EM_ARM;
1704	case Triple::avr:
1705	return EM_AVR;
1706	case Triple::hexagon:
1707	return EM_HEXAGON;
1708	case Triple::loongarch32:
1709	case Triple::loongarch64:
1710	return EM_LOONGARCH;
1711	case Triple::mips:
1712	case Triple::mipsel:
1713	case Triple::mips64:
1714	case Triple::mips64el:
1715	return EM_MIPS;
1716	case Triple::msp430:
1717	return EM_MSP430;
1718	case Triple::ppc:
1719	case Triple::ppcle:
1720	return EM_PPC;
1721	case Triple::ppc64:
1722	case Triple::ppc64le:
1723	return EM_PPC64;
1724	case Triple::riscv32:
1725	case Triple::riscv64:
1726	return EM_RISCV;
1727	case Triple::sparcv9:
1728	return EM_SPARCV9;
1729	case Triple::systemz:
1730	return EM_S390;
1731	case Triple::x86:
1732	return t.isOSIAMCU() ? EM_IAMCU : EM_386;
1733	case Triple::x86_64:
1734	return EM_X86_64;
1735	default:
1736	ErrAlways(ctx) << path
1737	<< ": could not infer e_machine from bitcode target triple "
1738	<< t.str();
1739	return EM_NONE;
1740	}
1741	}
1742
1743	static uint8_t getOsAbi(const Triple &t) {
1744	switch (t.getOS()) {
1745	case Triple::AMDHSA:
1746	return ELF::ELFOSABI_AMDGPU_HSA;
1747	case Triple::AMDPAL:
1748	return ELF::ELFOSABI_AMDGPU_PAL;
1749	case Triple::Mesa3D:
1750	return ELF::ELFOSABI_AMDGPU_MESA3D;
1751	default:
1752	return ELF::ELFOSABI_NONE;
1753	}
1754	}
1755
1756	BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName,
1757	uint64_t offsetInArchive, bool lazy)
1758	: InputFile (ctx, BitcodeKind, mb) {
1759	this->archiveName = archiveName;
1760	this->lazy = lazy;
1761
1762	std::string path = mb.getBufferIdentifier().str();
1763	if (ctx.arg.thinLTOIndexOnly)
1764	path = replaceThinLTOSuffix(ctx, path: mb.getBufferIdentifier());
1765
1766	// ThinLTO assumes that all MemoryBufferRefs given to it have a unique
1767	// name. If two archives define two members with the same name, this
1768	// causes a collision which result in only one of the objects being taken
1769	// into consideration at LTO time (which very likely causes undefined
1770	// symbols later in the link stage). So we append file offset to make
1771	// filename unique.
1772	StringSaver &ss = ctx.saver;
1773	StringRef name = archiveName.empty()
1774	? ss.save(S: path)
1775	: ss.save(S: archiveName + "(" + path::filename(path) +
1776	" at " + utostr(X: offsetInArchive) + ")");
1777	MemoryBufferRef mbref(mb.getBuffer(), name);
1778
1779	obj = CHECK2(lto::InputFile::create(mbref), this);
1780
1781	Triple t(obj ->getTargetTriple());
1782	ekind = getBitcodeELFKind(t);
1783	emachine = getBitcodeMachineKind(ctx, path: mb.getBufferIdentifier(), t);
1784	osabi = getOsAbi(t);
1785	}
1786
1787	static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
1788	switch (gvVisibility) {
1789	case GlobalValue::DefaultVisibility:
1790	return STV_DEFAULT;
1791	case GlobalValue::HiddenVisibility:
1792	return STV_HIDDEN;
1793	case GlobalValue::ProtectedVisibility:
1794	return STV_PROTECTED;
1795	}
1796	llvm_unreachable("unknown visibility");
1797	}
1798
1799	static void createBitcodeSymbol(Ctx &ctx, Symbol *&sym,
1800	const lto::InputFile::Symbol &objSym,
1801	BitcodeFile &f) {
1802	uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL;
1803	uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE;
1804	uint8_t visibility = mapVisibility(gvVisibility: objSym.getVisibility());
1805
1806	if (!sym) {
1807	// Symbols can be duplicated in bitcode files because of '#include' and
1808	// linkonce_odr. Use uniqueSaver to save symbol names for de-duplication.
1809	// Update objSym.Name to reference (via StringRef) the string saver's copy;
1810	// this way LTO can reference the same string saver's copy rather than
1811	// keeping copies of its own.
1812	objSym.Name = ctx.uniqueSaver.save(S: objSym.getName());
1813	sym = ctx.symtab ->insert(name: objSym.getName());
1814	}
1815
1816	if (objSym.isUndefined()) {
1817	Undefined newSym(&f, StringRef (), binding, visibility, type);
1818	sym->resolve(ctx, other: newSym);
1819	sym->referenced = true;
1820	return;
1821	}
1822
1823	if (objSym.isCommon()) {
1824	sym->resolve(ctx, other: CommonSymbol {ctx, &f, StringRef (), binding, visibility,
1825	STT_OBJECT, objSym.getCommonAlignment(),
1826	objSym.getCommonSize()});
1827	} else {
1828	Defined newSym(ctx, &f, StringRef (), binding, visibility, type, `0`, `0`,
1829	nullptr);
1830	// The definition can be omitted if all bitcode definitions satisfy
1831	// `canBeOmittedFromSymbolTable()` and isUsedInRegularObj is false.
1832	// The latter condition is tested in parseVersionAndComputeIsPreemptible.
1833	sym->ltoCanOmit = objSym.canBeOmittedFromSymbolTable() &&
1834	(!sym->isDefined() \|\| sym->ltoCanOmit);
1835	sym->resolve(ctx, other: newSym);
1836	}
1837	}
1838
1839	void BitcodeFile::parse() {
1840	for (std::pair<StringRef, Comdat::SelectionKind> s : obj ->getComdatTable()) {
1841	keptComdats.push_back(
1842	x: s.second == Comdat::NoDeduplicate \|\|
1843	ctx.symtab ->comdatGroups.try_emplace(Key: CachedHashStringRef (s.first), Args: this)
1844	.second);
1845	}
1846
1847	if (numSymbols == `0`) {
1848	numSymbols = obj ->symbols().size();
1849	symbols = std::make_unique<Symbol *[]>(num: numSymbols);
1850	}
1851	// Process defined symbols first. See the comment in
1852	// ObjFile<ELFT>::initializeSymbols.
1853	for (auto [i, irSym] : llvm::enumerate(First: obj ->symbols()))
1854	if (!irSym.isUndefined())
1855	createBitcodeSymbol(ctx, sym&: symbols [i], objSym: irSym, f&: *this);
1856	for (auto [i, irSym] : llvm::enumerate(First: obj ->symbols()))
1857	if (irSym.isUndefined())
1858	createBitcodeSymbol(ctx, sym&: symbols [i], objSym: irSym, f&: *this);
1859
1860	for (auto l : obj ->getDependentLibraries())
1861	addDependentLibrary(ctx, specifier: l, f: this);
1862	}
1863
1864	void BitcodeFile::parseLazy() {
1865	numSymbols = obj ->symbols().size();
1866	symbols = std::make_unique<Symbol *[]>(num: numSymbols);
1867	for (auto [i, irSym] : llvm::enumerate(First: obj ->symbols())) {
1868	// Symbols can be duplicated in bitcode files because of '#include' and
1869	// linkonce_odr. Use uniqueSaver to save symbol names for de-duplication.
1870	// Update objSym.Name to reference (via StringRef) the string saver's copy;
1871	// this way LTO can reference the same string saver's copy rather than
1872	// keeping copies of its own.
1873	irSym.Name = ctx.uniqueSaver.save(S: irSym.getName());
1874	if (!irSym.isUndefined()) {
1875	auto *sym = ctx.symtab ->insert(name: irSym.getName());
1876	sym->resolve(ctx, other: LazySymbol {*this});
1877	symbols [i] = sym;
1878	}
1879	}
1880	}
1881
1882	void BitcodeFile::postParse() {
1883	for (auto [i, irSym] : llvm::enumerate(First: obj ->symbols())) {
1884	const Symbol &sym = *symbols [i];
1885	if (sym.file == this \|\| !sym.isDefined() \|\| irSym.isUndefined() \|\|
1886	irSym.isCommon() \|\| irSym.isWeak())
1887	continue;
1888	int c = irSym.getComdatIndex();
1889	if (c != -`1` && !keptComdats [c])
1890	continue;
1891	reportDuplicate(ctx, sym, newFile: this, errSec: nullptr, errOffset: `0`);
1892	}
1893	}
1894
1895	void BinaryFile::parse() {
1896	ArrayRef<uint8_t> data = arrayRefFromStringRef(Input: mb.getBuffer());
1897	auto *section =
1898	make<InputSection>(args: this, args: ".data", args: SHT_PROGBITS, args: SHF_ALLOC \| SHF_WRITE,
1899	/addralign=/args: `8`, /entsize=/args: `0`, args&: data);
1900	sections.push_back(Elt: section);
1901
1902	// For each input file foo that is embedded to a result as a binary
1903	// blob, we define _binary_foo_{start,end,size} symbols, so that
1904	// user programs can access blobs by name. Non-alphanumeric
1905	// characters in a filename are replaced with underscore.
1906	std::string s = "_binary_" + mb.getBufferIdentifier().str();
1907	for (char &c : s)
1908	if (!isAlnum(C: c))
1909	c = `'_'`;
1910
1911	llvm::StringSaver &ss = ctx.saver;
1912	ctx.symtab ->addAndCheckDuplicate(
1913	ctx, newSym: Defined {ctx, this, ss.save(S: s + "_start"), STB_GLOBAL, STV_DEFAULT,
1914	STT_OBJECT, `0`, `0`, section});
1915	ctx.symtab ->addAndCheckDuplicate(
1916	ctx, newSym: Defined {ctx, this, ss.save(S: s + "_end"), STB_GLOBAL, STV_DEFAULT,
1917	STT_OBJECT, data.size(), `0`, section});
1918	ctx.symtab ->addAndCheckDuplicate(
1919	ctx, newSym: Defined {ctx, this, ss.save(S: s + "_size"), STB_GLOBAL, STV_DEFAULT,
1920	STT_OBJECT, data.size(), `0`, nullptr});
1921	}
1922
1923	InputFile *elf::createInternalFile(Ctx &ctx, StringRef name) {
1924	auto *file =
1925	make<InputFile>(args&: ctx, args: InputFile::InternalKind, args: MemoryBufferRef("", name));
1926	// References from an internal file do not lead to --warn-backrefs
1927	// diagnostics.
1928	file->groupId = `0`;
1929	return file;
1930	}
1931
1932	std::unique_ptr<ELFFileBase> elf::createObjFile(Ctx &ctx, MemoryBufferRef mb,
1933	StringRef archiveName,
1934	bool lazy) {
1935	std::unique_ptr<ELFFileBase> f;
1936	switch (getELFKind(ctx, mb, archiveName)) {
1937	case ELF32LEKind:
1938	f = std::make_unique<ObjFile<ELF32LE>>(args&: ctx, args: ELF32LEKind, args&: mb, args&: archiveName);
1939	break;
1940	case ELF32BEKind:
1941	f = std::make_unique<ObjFile<ELF32BE>>(args&: ctx, args: ELF32BEKind, args&: mb, args&: archiveName);
1942	break;
1943	case ELF64LEKind:
1944	f = std::make_unique<ObjFile<ELF64LE>>(args&: ctx, args: ELF64LEKind, args&: mb, args&: archiveName);
1945	break;
1946	case ELF64BEKind:
1947	f = std::make_unique<ObjFile<ELF64BE>>(args&: ctx, args: ELF64BEKind, args&: mb, args&: archiveName);
1948	break;
1949	default:
1950	llvm_unreachable("getELFKind");
1951	}
1952	f ->init();
1953	f ->lazy = lazy;
1954	return f;
1955	}
1956
1957	template <class ELFT> void ObjFile<ELFT>::parseLazy() {
1958	const ArrayRef<typename ELFT::Sym> eSyms = this->getELFSyms<ELFT>();
1959	numSymbols = eSyms.size();
1960	symbols = std::make_unique<Symbol *[]>(numSymbols);
1961
1962	// resolve() may trigger this->extract() if an existing symbol is an undefined
1963	// symbol. If that happens, this function has served its purpose, and we can
1964	// exit from the loop early.
1965	auto *symtab = ctx.symtab.get();
1966	for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
1967	if (eSyms[i].st_shndx == SHN_UNDEF)
1968	continue;
1969	symbols[i] = symtab->insert(CHECK2(eSyms[i].getName(stringTable), this));
1970	symbols[i]->resolve(ctx, LazySymbol{*this});
1971	if (!lazy)
1972	break;
1973	}
1974	}
1975
1976	bool InputFile::shouldExtractForCommon(StringRef name) const {
1977	if (isa<BitcodeFile>(Val: this))
1978	return isBitcodeNonCommonDef(mb, symName: name, archiveName);
1979
1980	return isNonCommonDef(ctx, mb, symName: name, archiveName);
1981	}
1982
1983	std::string elf::replaceThinLTOSuffix(Ctx &ctx, StringRef path) {
1984	auto [suffix, repl] = ctx.arg.thinLTOObjectSuffixReplace;
1985	if (path.consume_back(Suffix: suffix))
1986	return (path + repl).str();
1987	return std::string (path);
1988	}
1989
1990	template class elf::ObjFile<ELF32LE>;
1991	template class elf::ObjFile<ELF32BE>;
1992	template class elf::ObjFile<ELF64LE>;
1993	template class elf::ObjFile<ELF64BE>;
1994
1995	template void SharedFile::parse<ELF32LE>();
1996	template void SharedFile::parse<ELF32BE>();
1997	template void SharedFile::parse<ELF64LE>();
1998	template void SharedFile::parse<ELF64BE>();
1999

Browse the source code of llvm_projects/lld/ELF/InputFiles.cpp