1//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_MACHO_INPUT_FILES_H
10#define LLD_MACHO_INPUT_FILES_H
11
12#include "MachOStructs.h"
13#include "Target.h"
14
15#include "lld/Common/DWARF.h"
16#include "lld/Common/LLVM.h"
17#include "lld/Common/Memory.h"
18#include "llvm/ADT/CachedHashString.h"
19#include "llvm/ADT/DenseSet.h"
20#include "llvm/ADT/SetVector.h"
21#include "llvm/BinaryFormat/MachO.h"
22#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23#include "llvm/Object/Archive.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/Threading.h"
26#include "llvm/TextAPI/TextAPIReader.h"
27
28#include <vector>
29
30namespace llvm {
31namespace lto {
32class InputFile;
33} // namespace lto
34namespace MachO {
35class InterfaceFile;
36} // namespace MachO
37class TarWriter;
38} // namespace llvm
39
40namespace lld {
41namespace macho {
42
43struct PlatformInfo;
44class ConcatInputSection;
45class Symbol;
46class Defined;
47class AliasSymbol;
48struct Reloc;
49enum class RefState : uint8_t;
50
51// If --reproduce option is given, all input files are written
52// to this tar archive.
53extern std::unique_ptr<llvm::TarWriter> tar;
54
55// If .subsections_via_symbols is set, each InputSection will be split along
56// symbol boundaries. The field offset represents the offset of the subsection
57// from the start of the original pre-split InputSection.
58struct Subsection {
59 uint64_t offset = 0;
60 InputSection *isec = nullptr;
61};
62
63using Subsections = std::vector<Subsection>;
64class InputFile;
65
66class Section {
67public:
68 InputFile *file;
69 StringRef segname;
70 StringRef name;
71 uint32_t flags;
72 uint64_t addr;
73 Subsections subsections;
74
75 Section(InputFile *file, StringRef segname, StringRef name, uint32_t flags,
76 uint64_t addr)
77 : file(file), segname(segname), name(name), flags(flags), addr(addr) {}
78 // Ensure pointers to Sections are never invalidated.
79 Section(const Section &) = delete;
80 Section &operator=(const Section &) = delete;
81 Section(Section &&) = delete;
82 Section &operator=(Section &&) = delete;
83
84private:
85 // Whether we have already split this section into individual subsections.
86 // For sections that cannot be split (e.g. literal sections), this is always
87 // false.
88 bool doneSplitting = false;
89 friend class ObjFile;
90};
91
92// Represents a call graph profile edge.
93struct CallGraphEntry {
94 // The index of the caller in the symbol table.
95 uint32_t fromIndex;
96 // The index of the callee in the symbol table.
97 uint32_t toIndex;
98 // Number of calls from callee to caller in the profile.
99 uint64_t count;
100
101 CallGraphEntry(uint32_t fromIndex, uint32_t toIndex, uint64_t count)
102 : fromIndex(fromIndex), toIndex(toIndex), count(count) {}
103};
104
105class InputFile {
106public:
107 enum Kind {
108 ObjKind,
109 OpaqueKind,
110 DylibKind,
111 ArchiveKind,
112 BitcodeKind,
113 };
114
115 virtual ~InputFile() = default;
116 Kind kind() const { return fileKind; }
117 StringRef getName() const { return name; }
118 static void resetIdCount() { idCount = 0; }
119
120 MemoryBufferRef mb;
121
122 std::vector<Symbol *> symbols;
123 std::vector<Section *> sections;
124 ArrayRef<uint8_t> objCImageInfo;
125
126 // If not empty, this stores the name of the archive containing this file.
127 // We use this string for creating error messages.
128 std::string archiveName;
129
130 // Provides an easy way to sort InputFiles deterministically.
131 const int id;
132
133 // True if this is a lazy ObjFile or BitcodeFile.
134 bool lazy = false;
135
136protected:
137 InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false)
138 : mb(mb), id(idCount++), lazy(lazy), fileKind(kind),
139 name(mb.getBufferIdentifier()) {}
140
141 InputFile(Kind, const llvm::MachO::InterfaceFile &);
142
143 // If true, this input's arch is compatible with target.
144 bool compatArch = true;
145
146private:
147 const Kind fileKind;
148 const StringRef name;
149
150 static int idCount;
151};
152
153struct FDE {
154 uint32_t funcLength;
155 Symbol *personality;
156 InputSection *lsda;
157};
158
159// .o file
160class ObjFile final : public InputFile {
161public:
162 ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
163 bool lazy = false, bool forceHidden = false, bool compatArch = true,
164 bool builtFromBitcode = false);
165 ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
166 ArrayRef<uint8_t> getOptimizationHints() const;
167 template <class LP> void parse();
168 template <class LP>
169 void parseLinkerOptions(llvm::SmallVectorImpl<StringRef> &LinkerOptions);
170
171 static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
172
173 std::string sourceFile() const;
174 // Parses line table information for diagnostics. compileUnit should be used
175 // for other purposes.
176 lld::DWARFCache *getDwarf();
177
178 llvm::DWARFUnit *compileUnit = nullptr;
179 std::unique_ptr<lld::DWARFCache> dwarfCache;
180 Section *addrSigSection = nullptr;
181 const uint32_t modTime;
182 bool forceHidden;
183 bool builtFromBitcode;
184 std::vector<ConcatInputSection *> debugSections;
185 std::vector<CallGraphEntry> callGraph;
186 llvm::DenseMap<ConcatInputSection *, FDE> fdes;
187 std::vector<AliasSymbol *> aliases;
188
189private:
190 llvm::once_flag initDwarf;
191 template <class LP> void parseLazy();
192 template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);
193 template <class LP>
194 void parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
195 ArrayRef<typename LP::nlist> nList, const char *strtab,
196 bool subsectionsViaSymbols);
197 template <class NList>
198 Symbol *parseNonSectionSymbol(const NList &sym, const char *strtab);
199 template <class SectionHeader>
200 void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
201 const SectionHeader &, Section &);
202 void parseDebugInfo();
203 void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection);
204 void registerCompactUnwind(Section &compactUnwindSection);
205 void registerEhFrames(Section &ehFrameSection);
206};
207
208// command-line -sectcreate file
209class OpaqueFile final : public InputFile {
210public:
211 OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName);
212 static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; }
213};
214
215// .dylib or .tbd file
216class DylibFile final : public InputFile {
217public:
218 // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
219 // symbols in those sub-libraries will be available under the umbrella
220 // library's namespace. Those sub-libraries can also have their own
221 // re-exports. When loading a re-exported dylib, `umbrella` should be set to
222 // the root dylib to ensure symbols in the child library are correctly bound
223 // to the root. On the other hand, if a dylib is being directly loaded
224 // (through an -lfoo flag), then `umbrella` should be a nullptr.
225 explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
226 bool isBundleLoader, bool explicitlyLinked);
227 explicit DylibFile(const llvm::MachO::InterfaceFile &interface,
228 DylibFile *umbrella, bool isBundleLoader,
229 bool explicitlyLinked);
230 explicit DylibFile(DylibFile *umbrella);
231
232 void parseLoadCommands(MemoryBufferRef mb);
233 void parseReexports(const llvm::MachO::InterfaceFile &interface);
234 bool isReferenced() const { return numReferencedSymbols > 0; }
235 bool isExplicitlyLinked() const;
236 void setExplicitlyLinked() { explicitlyLinked = true; }
237
238 static bool classof(const InputFile *f) { return f->kind() == DylibKind; }
239
240 StringRef installName;
241 DylibFile *exportingFile = nullptr;
242 DylibFile *umbrella;
243 SmallVector<StringRef, 2> rpaths;
244 uint32_t compatibilityVersion = 0;
245 uint32_t currentVersion = 0;
246 int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
247 unsigned numReferencedSymbols = 0;
248 RefState refState;
249 bool reexport = false;
250 bool forceNeeded = false;
251 bool forceWeakImport = false;
252 bool deadStrippable = false;
253
254private:
255 bool explicitlyLinked = false; // Access via isExplicitlyLinked().
256
257public:
258 // An executable can be used as a bundle loader that will load the output
259 // file being linked, and that contains symbols referenced, but not
260 // implemented in the bundle. When used like this, it is very similar
261 // to a dylib, so we've used the same class to represent it.
262 bool isBundleLoader;
263
264 // Synthetic Dylib objects created by $ld$previous symbols in this dylib.
265 // Usually empty. These synthetic dylibs won't have synthetic dylibs
266 // themselves.
267 SmallVector<DylibFile *, 2> extraDylibs;
268
269private:
270 DylibFile *getSyntheticDylib(StringRef installName, uint32_t currentVersion,
271 uint32_t compatVersion);
272
273 bool handleLDSymbol(StringRef originalName);
274 void handleLDPreviousSymbol(StringRef name, StringRef originalName);
275 void handleLDInstallNameSymbol(StringRef name, StringRef originalName);
276 void handleLDHideSymbol(StringRef name, StringRef originalName);
277 void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const;
278 void parseExportedSymbols(uint32_t offset, uint32_t size);
279 void loadReexport(StringRef path, DylibFile *umbrella,
280 const llvm::MachO::InterfaceFile *currentTopLevelTapi);
281
282 llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols;
283};
284
285// .a file
286class ArchiveFile final : public InputFile {
287public:
288 explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file,
289 bool forceHidden);
290 void addLazySymbols();
291 void fetch(const llvm::object::Archive::Symbol &);
292 // LLD normally doesn't use Error for error-handling, but the underlying
293 // Archive library does, so this is the cleanest way to wrap it.
294 Error fetch(const llvm::object::Archive::Child &, StringRef reason);
295 const llvm::object::Archive &getArchive() const { return *file; };
296 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
297
298private:
299 std::unique_ptr<llvm::object::Archive> file;
300 // Keep track of children fetched from the archive by tracking
301 // which address offsets have been fetched already.
302 llvm::DenseSet<uint64_t> seen;
303 // Load all symbols with hidden visibility (-load_hidden).
304 bool forceHidden;
305};
306
307class BitcodeFile final : public InputFile {
308public:
309 explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
310 uint64_t offsetInArchive, bool lazy = false,
311 bool forceHidden = false, bool compatArch = true);
312 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
313 void parse();
314
315 std::unique_ptr<llvm::lto::InputFile> obj;
316 bool forceHidden;
317
318private:
319 void parseLazy();
320};
321
322extern llvm::SetVector<InputFile *> inputFiles;
323extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads;
324extern llvm::SmallVector<StringRef> unprocessedLCLinkerOptions;
325
326std::optional<MemoryBufferRef> readFile(StringRef path);
327
328void extract(InputFile &file, StringRef reason);
329
330namespace detail {
331
332template <class CommandType, class... Types>
333std::vector<const CommandType *>
334findCommands(const void *anyHdr, size_t maxCommands, Types... types) {
335 std::vector<const CommandType *> cmds;
336 std::initializer_list<uint32_t> typesList{types...};
337 const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr);
338 const uint8_t *p =
339 reinterpret_cast<const uint8_t *>(hdr) + target->headerSize;
340 for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
341 auto *cmd = reinterpret_cast<const CommandType *>(p);
342 if (llvm::is_contained(typesList, cmd->cmd)) {
343 cmds.push_back(cmd);
344 if (cmds.size() == maxCommands)
345 return cmds;
346 }
347 p += cmd->cmdsize;
348 }
349 return cmds;
350}
351
352} // namespace detail
353
354// anyHdr should be a pointer to either mach_header or mach_header_64
355template <class CommandType = llvm::MachO::load_command, class... Types>
356const CommandType *findCommand(const void *anyHdr, Types... types) {
357 std::vector<const CommandType *> cmds =
358 detail::findCommands<CommandType>(anyHdr, 1, types...);
359 return cmds.size() ? cmds[0] : nullptr;
360}
361
362template <class CommandType = llvm::MachO::load_command, class... Types>
363std::vector<const CommandType *> findCommands(const void *anyHdr,
364 Types... types) {
365 return detail::findCommands<CommandType>(anyHdr, 0, types...);
366}
367
368std::string replaceThinLTOSuffix(StringRef path);
369} // namespace macho
370
371std::string toString(const macho::InputFile *file);
372std::string toString(const macho::Section &);
373} // namespace lld
374
375#endif
376