1//===- DylibReader.cpp -------------- TAPI MachO Dylib Reader --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// Implements the TAPI Reader for Mach-O dynamic libraries.
10///
11//===----------------------------------------------------------------------===//
12
13#include "llvm/TextAPI/DylibReader.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
16#include "llvm/DebugInfo/DWARF/DWARFContext.h"
17#include "llvm/Object/Binary.h"
18#include "llvm/Object/MachOUniversal.h"
19#include "llvm/Support/Endian.h"
20#include "llvm/TargetParser/Triple.h"
21#include "llvm/TextAPI/InterfaceFile.h"
22#include "llvm/TextAPI/RecordsSlice.h"
23#include "llvm/TextAPI/TextAPIError.h"
24#include <iomanip>
25#include <sstream>
26#include <string>
27#include <tuple>
28
29using namespace llvm;
30using namespace llvm::object;
31using namespace llvm::MachO;
32using namespace llvm::MachO::DylibReader;
33
34using TripleVec = std::vector<Triple>;
35static typename TripleVec::iterator emplace(TripleVec &Container, Triple &&T) {
36 auto I = partition_point(Range&: Container, P: [=](const Triple &CT) {
37 return std::forward_as_tuple(args: CT.getArch(), args: CT.getOS(),
38 args: CT.getEnvironment()) <
39 std::forward_as_tuple(args: T.getArch(), args: T.getOS(), args: T.getEnvironment());
40 });
41
42 if (I != Container.end() && *I == T)
43 return I;
44 return Container.emplace(position: I, args&: T);
45}
46
47static TripleVec constructTriples(MachOObjectFile *Obj,
48 const Architecture ArchT) {
49 auto getOSVersionStr = [](uint32_t V) {
50 PackedVersion OSVersion(V);
51 std::string Vers;
52 raw_string_ostream VStream(Vers);
53 VStream << OSVersion;
54 return VStream.str();
55 };
56 auto getOSVersion = [&](const MachOObjectFile::LoadCommandInfo &cmd) {
57 auto Vers = Obj->getVersionMinLoadCommand(L: cmd);
58 return getOSVersionStr(Vers.version);
59 };
60
61 TripleVec Triples;
62 bool IsIntel = ArchitectureSet(ArchT).hasX86();
63 auto Arch = getArchitectureName(Arch: ArchT);
64
65 for (const auto &cmd : Obj->load_commands()) {
66 std::string OSVersion;
67 switch (cmd.C.cmd) {
68 case MachO::LC_VERSION_MIN_MACOSX:
69 OSVersion = getOSVersion(cmd);
70 emplace(Container&: Triples, T: {Arch, "apple", "macos" + OSVersion});
71 break;
72 case MachO::LC_VERSION_MIN_IPHONEOS:
73 OSVersion = getOSVersion(cmd);
74 if (IsIntel)
75 emplace(Container&: Triples, T: {Arch, "apple", "ios" + OSVersion, "simulator"});
76 else
77 emplace(Container&: Triples, T: {Arch, "apple", "ios" + OSVersion});
78 break;
79 case MachO::LC_VERSION_MIN_TVOS:
80 OSVersion = getOSVersion(cmd);
81 if (IsIntel)
82 emplace(Container&: Triples, T: {Arch, "apple", "tvos" + OSVersion, "simulator"});
83 else
84 emplace(Container&: Triples, T: {Arch, "apple", "tvos" + OSVersion});
85 break;
86 case MachO::LC_VERSION_MIN_WATCHOS:
87 OSVersion = getOSVersion(cmd);
88 if (IsIntel)
89 emplace(Container&: Triples, T: {Arch, "apple", "watchos" + OSVersion, "simulator"});
90 else
91 emplace(Container&: Triples, T: {Arch, "apple", "watchos" + OSVersion});
92 break;
93 case MachO::LC_BUILD_VERSION: {
94 OSVersion = getOSVersionStr(Obj->getBuildVersionLoadCommand(L: cmd).minos);
95 switch (Obj->getBuildVersionLoadCommand(L: cmd).platform) {
96 case MachO::PLATFORM_MACOS:
97 emplace(Container&: Triples, T: {Arch, "apple", "macos" + OSVersion});
98 break;
99 case MachO::PLATFORM_IOS:
100 emplace(Container&: Triples, T: {Arch, "apple", "ios" + OSVersion});
101 break;
102 case MachO::PLATFORM_TVOS:
103 emplace(Container&: Triples, T: {Arch, "apple", "tvos" + OSVersion});
104 break;
105 case MachO::PLATFORM_WATCHOS:
106 emplace(Container&: Triples, T: {Arch, "apple", "watchos" + OSVersion});
107 break;
108 case MachO::PLATFORM_BRIDGEOS:
109 emplace(Container&: Triples, T: {Arch, "apple", "bridgeos" + OSVersion});
110 break;
111 case MachO::PLATFORM_MACCATALYST:
112 emplace(Container&: Triples, T: {Arch, "apple", "ios" + OSVersion, "macabi"});
113 break;
114 case MachO::PLATFORM_IOSSIMULATOR:
115 emplace(Container&: Triples, T: {Arch, "apple", "ios" + OSVersion, "simulator"});
116 break;
117 case MachO::PLATFORM_TVOSSIMULATOR:
118 emplace(Container&: Triples, T: {Arch, "apple", "tvos" + OSVersion, "simulator"});
119 break;
120 case MachO::PLATFORM_WATCHOSSIMULATOR:
121 emplace(Container&: Triples, T: {Arch, "apple", "watchos" + OSVersion, "simulator"});
122 break;
123 case MachO::PLATFORM_DRIVERKIT:
124 emplace(Container&: Triples, T: {Arch, "apple", "driverkit" + OSVersion});
125 break;
126 default:
127 break; // Skip any others.
128 }
129 break;
130 }
131 default:
132 break;
133 }
134 }
135
136 // Record unknown platform for older binaries that don't enforce platform
137 // load commands.
138 if (Triples.empty())
139 emplace(Container&: Triples, T: {Arch, "apple", "unknown"});
140
141 return Triples;
142}
143
144static Error readMachOHeader(MachOObjectFile *Obj, RecordsSlice &Slice) {
145 auto H = Obj->getHeader();
146 auto &BA = Slice.getBinaryAttrs();
147
148 switch (H.filetype) {
149 default:
150 llvm_unreachable("unsupported binary type");
151 case MachO::MH_DYLIB:
152 BA.File = FileType::MachO_DynamicLibrary;
153 break;
154 case MachO::MH_DYLIB_STUB:
155 BA.File = FileType::MachO_DynamicLibrary_Stub;
156 break;
157 case MachO::MH_BUNDLE:
158 BA.File = FileType::MachO_Bundle;
159 break;
160 }
161
162 if (H.flags & MachO::MH_TWOLEVEL)
163 BA.TwoLevelNamespace = true;
164 if (H.flags & MachO::MH_APP_EXTENSION_SAFE)
165 BA.AppExtensionSafe = true;
166
167 for (const auto &LCI : Obj->load_commands()) {
168 switch (LCI.C.cmd) {
169 case MachO::LC_ID_DYLIB: {
170 auto DLLC = Obj->getDylibIDLoadCommand(L: LCI);
171 BA.InstallName = Slice.copyString(String: LCI.Ptr + DLLC.dylib.name);
172 BA.CurrentVersion = DLLC.dylib.current_version;
173 BA.CompatVersion = DLLC.dylib.compatibility_version;
174 break;
175 }
176 case MachO::LC_REEXPORT_DYLIB: {
177 auto DLLC = Obj->getDylibIDLoadCommand(L: LCI);
178 BA.RexportedLibraries.emplace_back(
179 args: Slice.copyString(String: LCI.Ptr + DLLC.dylib.name));
180 break;
181 }
182 case MachO::LC_SUB_FRAMEWORK: {
183 auto SFC = Obj->getSubFrameworkCommand(L: LCI);
184 BA.ParentUmbrella = Slice.copyString(String: LCI.Ptr + SFC.umbrella);
185 break;
186 }
187 case MachO::LC_SUB_CLIENT: {
188 auto SCLC = Obj->getSubClientCommand(L: LCI);
189 BA.AllowableClients.emplace_back(args: Slice.copyString(String: LCI.Ptr + SCLC.client));
190 break;
191 }
192 case MachO::LC_UUID: {
193 auto UUIDLC = Obj->getUuidCommand(L: LCI);
194 std::stringstream Stream;
195 for (unsigned I = 0; I < 16; ++I) {
196 if (I == 4 || I == 6 || I == 8 || I == 10)
197 Stream << '-';
198 Stream << std::setfill('0') << std::setw(2) << std::uppercase
199 << std::hex << static_cast<int>(UUIDLC.uuid[I]);
200 }
201 BA.UUID = Slice.copyString(String: Stream.str());
202 break;
203 }
204 case MachO::LC_RPATH: {
205 auto RPLC = Obj->getRpathCommand(L: LCI);
206 BA.RPaths.emplace_back(args: Slice.copyString(String: LCI.Ptr + RPLC.path));
207 break;
208 }
209 case MachO::LC_SEGMENT_SPLIT_INFO: {
210 auto SSILC = Obj->getLinkeditDataLoadCommand(L: LCI);
211 if (SSILC.datasize == 0)
212 BA.OSLibNotForSharedCache = true;
213 break;
214 }
215 default:
216 break;
217 }
218 }
219
220 for (auto &Sect : Obj->sections()) {
221 auto SectName = Sect.getName();
222 if (!SectName)
223 return SectName.takeError();
224 if (*SectName != "__objc_imageinfo" && *SectName != "__image_info")
225 continue;
226
227 auto Content = Sect.getContents();
228 if (!Content)
229 return Content.takeError();
230
231 if ((Content->size() >= 8) && (Content->front() == 0)) {
232 uint32_t Flags;
233 if (Obj->isLittleEndian()) {
234 auto *p =
235 reinterpret_cast<const support::ulittle32_t *>(Content->data() + 4);
236 Flags = *p;
237 } else {
238 auto *p =
239 reinterpret_cast<const support::ubig32_t *>(Content->data() + 4);
240 Flags = *p;
241 }
242 BA.SwiftABI = (Flags >> 8) & 0xFF;
243 }
244 }
245 return Error::success();
246}
247
248static Error readSymbols(MachOObjectFile *Obj, RecordsSlice &Slice,
249 const ParseOption &Opt) {
250
251 auto parseExport = [](const auto ExportFlags,
252 auto Addr) -> std::tuple<SymbolFlags, RecordLinkage> {
253 SymbolFlags Flags = SymbolFlags::None;
254 switch (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) {
255 case MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR:
256 if (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION)
257 Flags |= SymbolFlags::WeakDefined;
258 break;
259 case MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL:
260 Flags |= SymbolFlags::ThreadLocalValue;
261 break;
262 }
263
264 RecordLinkage Linkage = (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT)
265 ? RecordLinkage::Rexported
266 : RecordLinkage::Exported;
267 return {Flags, Linkage};
268 };
269
270 Error Err = Error::success();
271
272 StringMap<std::pair<SymbolFlags, RecordLinkage>> Exports;
273 // Collect symbols from export trie first. Sometimes, there are more exports
274 // in the trie than in n-list due to stripping. This is common for swift
275 // mangled symbols.
276 for (auto &Sym : Obj->exports(Err)) {
277 auto [Flags, Linkage] = parseExport(Sym.flags(), Sym.address());
278 Slice.addRecord(Name: Sym.name(), Flags, GV: GlobalRecord::Kind::Unknown, Linkage);
279 Exports[Sym.name()] = {Flags, Linkage};
280 }
281
282 for (const auto &Sym : Obj->symbols()) {
283 auto FlagsOrErr = Sym.getFlags();
284 if (!FlagsOrErr)
285 return FlagsOrErr.takeError();
286 auto Flags = *FlagsOrErr;
287
288 auto NameOrErr = Sym.getName();
289 if (!NameOrErr)
290 return NameOrErr.takeError();
291 auto Name = *NameOrErr;
292
293 RecordLinkage Linkage = RecordLinkage::Unknown;
294 SymbolFlags RecordFlags = SymbolFlags::None;
295
296 if (Flags & SymbolRef::SF_Undefined) {
297 if (Opt.Undefineds)
298 Linkage = RecordLinkage::Undefined;
299 else
300 continue;
301 if (Flags & SymbolRef::SF_Weak)
302 RecordFlags |= SymbolFlags::WeakReferenced;
303 } else if (Flags & SymbolRef::SF_Exported) {
304 auto Exp = Exports.find(Key: Name);
305 // This should never be possible when binaries are produced with Apple
306 // linkers. However it is possible to craft dylibs where the export trie
307 // is either malformed or has conflicting symbols compared to n_list.
308 if (Exp != Exports.end())
309 std::tie(args&: RecordFlags, args&: Linkage) = Exp->second;
310 else
311 Linkage = RecordLinkage::Exported;
312 } else if (Flags & SymbolRef::SF_Hidden) {
313 Linkage = RecordLinkage::Internal;
314 } else
315 continue;
316
317 auto TypeOrErr = Sym.getType();
318 if (!TypeOrErr)
319 return TypeOrErr.takeError();
320 auto Type = *TypeOrErr;
321
322 GlobalRecord::Kind GV = (Type & SymbolRef::ST_Function)
323 ? GlobalRecord::Kind::Function
324 : GlobalRecord::Kind::Variable;
325
326 if (GV == GlobalRecord::Kind::Function)
327 RecordFlags |= SymbolFlags::Text;
328 else
329 RecordFlags |= SymbolFlags::Data;
330
331 Slice.addRecord(Name, Flags: RecordFlags, GV, Linkage);
332 }
333 return Err;
334}
335
336static Error load(MachOObjectFile *Obj, RecordsSlice &Slice,
337 const ParseOption &Opt, const Architecture Arch) {
338 if (Arch == AK_unknown)
339 return make_error<TextAPIError>(Args: TextAPIErrorCode::UnsupportedTarget);
340
341 if (Opt.MachOHeader)
342 if (auto Err = readMachOHeader(Obj, Slice))
343 return Err;
344
345 if (Opt.SymbolTable)
346 if (auto Err = readSymbols(Obj, Slice, Opt))
347 return Err;
348
349 return Error::success();
350}
351
352Expected<Records> DylibReader::readFile(MemoryBufferRef Buffer,
353 const ParseOption &Opt) {
354 Records Results;
355
356 auto BinOrErr = createBinary(Source: Buffer);
357 if (!BinOrErr)
358 return BinOrErr.takeError();
359
360 Binary &Bin = *BinOrErr.get();
361 if (auto *Obj = dyn_cast<MachOObjectFile>(Val: &Bin)) {
362 const auto Arch = getArchitectureFromCpuType(CPUType: Obj->getHeader().cputype,
363 CPUSubType: Obj->getHeader().cpusubtype);
364 if (!Opt.Archs.has(Arch))
365 return make_error<TextAPIError>(Args: TextAPIErrorCode::NoSuchArchitecture);
366
367 auto Triples = constructTriples(Obj, ArchT: Arch);
368 for (const auto &T : Triples) {
369 if (mapToPlatformType(Target: T) == PLATFORM_UNKNOWN)
370 return make_error<TextAPIError>(Args: TextAPIErrorCode::UnsupportedTarget);
371 Results.emplace_back(Args: std::make_shared<RecordsSlice>(args: RecordsSlice({T})));
372 if (auto Err = load(Obj, Slice&: *Results.back(), Opt, Arch))
373 return std::move(Err);
374 Results.back()->getBinaryAttrs().Path = Buffer.getBufferIdentifier();
375 }
376 return Results;
377 }
378
379 // Only expect MachO universal binaries at this point.
380 assert(isa<MachOUniversalBinary>(&Bin) &&
381 "Expected a MachO universal binary.");
382 auto *UB = cast<MachOUniversalBinary>(Val: &Bin);
383
384 for (auto OI = UB->begin_objects(), OE = UB->end_objects(); OI != OE; ++OI) {
385 // Skip architecture if not requested.
386 auto Arch =
387 getArchitectureFromCpuType(CPUType: OI->getCPUType(), CPUSubType: OI->getCPUSubType());
388 if (!Opt.Archs.has(Arch))
389 continue;
390
391 // Skip unknown architectures.
392 if (Arch == AK_unknown)
393 continue;
394
395 // This can fail if the object is an archive.
396 auto ObjOrErr = OI->getAsObjectFile();
397
398 // Skip the archive and consume the error.
399 if (!ObjOrErr) {
400 consumeError(Err: ObjOrErr.takeError());
401 continue;
402 }
403
404 auto &Obj = *ObjOrErr.get();
405 switch (Obj.getHeader().filetype) {
406 default:
407 break;
408 case MachO::MH_BUNDLE:
409 case MachO::MH_DYLIB:
410 case MachO::MH_DYLIB_STUB:
411 for (const auto &T : constructTriples(Obj: &Obj, ArchT: Arch)) {
412 Results.emplace_back(Args: std::make_shared<RecordsSlice>(args: RecordsSlice({T})));
413 if (auto Err = load(Obj: &Obj, Slice&: *Results.back(), Opt, Arch))
414 return std::move(Err);
415 Results.back()->getBinaryAttrs().Path = Buffer.getBufferIdentifier();
416 }
417 break;
418 }
419 }
420
421 if (Results.empty())
422 return make_error<TextAPIError>(Args: TextAPIErrorCode::EmptyResults);
423 return Results;
424}
425
426Expected<std::unique_ptr<InterfaceFile>>
427DylibReader::get(MemoryBufferRef Buffer) {
428 ParseOption Options;
429 auto SlicesOrErr = readFile(Buffer, Opt: Options);
430 if (!SlicesOrErr)
431 return SlicesOrErr.takeError();
432
433 return convertToInterfaceFile(Slices: *SlicesOrErr);
434}
435
436static void DWARFErrorHandler(Error Err) { /**/ }
437
438static SymbolToSourceLocMap
439accumulateLocs(MachOObjectFile &Obj,
440 const std::unique_ptr<DWARFContext> &DiCtx) {
441 SymbolToSourceLocMap LocMap;
442 for (const auto &Symbol : Obj.symbols()) {
443 Expected<uint32_t> FlagsOrErr = Symbol.getFlags();
444 if (!FlagsOrErr) {
445 consumeError(Err: FlagsOrErr.takeError());
446 continue;
447 }
448
449 if (!(*FlagsOrErr & SymbolRef::SF_Exported))
450 continue;
451
452 Expected<uint64_t> AddressOrErr = Symbol.getAddress();
453 if (!AddressOrErr) {
454 consumeError(Err: AddressOrErr.takeError());
455 continue;
456 }
457 const uint64_t Address = *AddressOrErr;
458
459 auto TypeOrErr = Symbol.getType();
460 if (!TypeOrErr) {
461 consumeError(Err: TypeOrErr.takeError());
462 continue;
463 }
464 const bool IsCode = (*TypeOrErr & SymbolRef::ST_Function);
465
466 auto *DWARFCU = IsCode ? DiCtx->getCompileUnitForCodeAddress(Address)
467 : DiCtx->getCompileUnitForDataAddress(Address);
468 if (!DWARFCU)
469 continue;
470
471 const DWARFDie &DIE = IsCode ? DWARFCU->getSubroutineForAddress(Address)
472 : DWARFCU->getVariableForAddress(Address);
473 const std::string File = DIE.getDeclFile(
474 Kind: llvm::DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
475 const uint64_t Line = DIE.getDeclLine();
476
477 auto NameOrErr = Symbol.getName();
478 if (!NameOrErr) {
479 consumeError(Err: NameOrErr.takeError());
480 continue;
481 }
482 auto Name = *NameOrErr;
483 auto Sym = parseSymbol(SymName: Name);
484
485 if (!File.empty() && Line != 0)
486 LocMap.insert(KV: {Sym.Name, RecordLoc(File, Line)});
487 }
488
489 return LocMap;
490}
491
492SymbolToSourceLocMap
493DylibReader::accumulateSourceLocFromDSYM(const StringRef DSYM,
494 const Target &T) {
495 // Find sidecar file.
496 auto DSYMsOrErr = MachOObjectFile::findDsymObjectMembers(Path: DSYM);
497 if (!DSYMsOrErr) {
498 consumeError(Err: DSYMsOrErr.takeError());
499 return SymbolToSourceLocMap();
500 }
501 if (DSYMsOrErr->empty())
502 return SymbolToSourceLocMap();
503
504 const StringRef Path = DSYMsOrErr->front();
505 auto BufOrErr = MemoryBuffer::getFile(Filename: Path);
506 if (auto Err = BufOrErr.getError())
507 return SymbolToSourceLocMap();
508
509 auto BinOrErr = createBinary(Source: *BufOrErr.get());
510 if (!BinOrErr) {
511 consumeError(Err: BinOrErr.takeError());
512 return SymbolToSourceLocMap();
513 }
514 // Handle single arch.
515 if (auto *Single = dyn_cast<MachOObjectFile>(Val: BinOrErr->get())) {
516 auto DiCtx = DWARFContext::create(
517 Obj: *Single, RelocAction: DWARFContext::ProcessDebugRelocations::Process, L: nullptr, DWPName: "",
518 RecoverableErrorHandler: DWARFErrorHandler, WarningHandler: DWARFErrorHandler);
519
520 return accumulateLocs(Obj&: *Single, DiCtx);
521 }
522 // Handle universal companion file.
523 if (auto *Fat = dyn_cast<MachOUniversalBinary>(Val: BinOrErr->get())) {
524 auto ObjForArch = Fat->getObjectForArch(ArchName: getArchitectureName(Arch: T.Arch));
525 if (!ObjForArch) {
526 consumeError(Err: ObjForArch.takeError());
527 return SymbolToSourceLocMap();
528 }
529 auto MachOOrErr = ObjForArch->getAsObjectFile();
530 if (!MachOOrErr) {
531 consumeError(Err: MachOOrErr.takeError());
532 return SymbolToSourceLocMap();
533 }
534 auto &Obj = **MachOOrErr;
535 auto DiCtx = DWARFContext::create(
536 Obj, RelocAction: DWARFContext::ProcessDebugRelocations::Process, L: nullptr, DWPName: "",
537 RecoverableErrorHandler: DWARFErrorHandler, WarningHandler: DWARFErrorHandler);
538
539 return accumulateLocs(Obj, DiCtx);
540 }
541 return SymbolToSourceLocMap();
542}
543