1//===- DylibReader.cpp -------------- TAPI MachO Dylib Reader --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// Implements the TAPI Reader for Mach-O dynamic libraries.
10///
11//===----------------------------------------------------------------------===//
12
13#include "llvm/TextAPI/DylibReader.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
16#include "llvm/DebugInfo/DWARF/DWARFContext.h"
17#include "llvm/Object/Binary.h"
18#include "llvm/Object/MachOUniversal.h"
19#include "llvm/Support/Endian.h"
20#include "llvm/TargetParser/Triple.h"
21#include "llvm/TextAPI/InterfaceFile.h"
22#include "llvm/TextAPI/RecordsSlice.h"
23#include "llvm/TextAPI/TextAPIError.h"
24#include <iomanip>
25#include <set>
26#include <sstream>
27#include <string>
28#include <tuple>
29
30using namespace llvm;
31using namespace llvm::object;
32using namespace llvm::MachO;
33using namespace llvm::MachO::DylibReader;
34
35using TripleVec = std::vector<Triple>;
36static typename TripleVec::iterator emplace(TripleVec &Container, Triple &&T) {
37 auto I = partition_point(Range&: Container, P: [=](const Triple &CT) {
38 return std::forward_as_tuple(args: CT.getArch(), args: CT.getOS(),
39 args: CT.getEnvironment()) <
40 std::forward_as_tuple(args: T.getArch(), args: T.getOS(), args: T.getEnvironment());
41 });
42
43 if (I != Container.end() && *I == T)
44 return I;
45 return Container.emplace(position: I, args&: T);
46}
47
48static TripleVec constructTriples(MachOObjectFile *Obj,
49 const Architecture ArchT) {
50 auto getOSVersionStr = [](uint32_t V) {
51 PackedVersion OSVersion(V);
52 std::string Vers;
53 raw_string_ostream VStream(Vers);
54 VStream << OSVersion;
55 return VStream.str();
56 };
57 auto getOSVersion = [&](const MachOObjectFile::LoadCommandInfo &cmd) {
58 auto Vers = Obj->getVersionMinLoadCommand(L: cmd);
59 return getOSVersionStr(Vers.version);
60 };
61
62 TripleVec Triples;
63 bool IsIntel = ArchitectureSet(ArchT).hasX86();
64 auto Arch = getArchitectureName(Arch: ArchT);
65
66 for (const auto &cmd : Obj->load_commands()) {
67 std::string OSVersion;
68 switch (cmd.C.cmd) {
69 case MachO::LC_VERSION_MIN_MACOSX:
70 OSVersion = getOSVersion(cmd);
71 emplace(Container&: Triples, T: {Arch, "apple", "macos" + OSVersion});
72 break;
73 case MachO::LC_VERSION_MIN_IPHONEOS:
74 OSVersion = getOSVersion(cmd);
75 if (IsIntel)
76 emplace(Container&: Triples, T: {Arch, "apple", "ios" + OSVersion, "simulator"});
77 else
78 emplace(Container&: Triples, T: {Arch, "apple", "ios" + OSVersion});
79 break;
80 case MachO::LC_VERSION_MIN_TVOS:
81 OSVersion = getOSVersion(cmd);
82 if (IsIntel)
83 emplace(Container&: Triples, T: {Arch, "apple", "tvos" + OSVersion, "simulator"});
84 else
85 emplace(Container&: Triples, T: {Arch, "apple", "tvos" + OSVersion});
86 break;
87 case MachO::LC_VERSION_MIN_WATCHOS:
88 OSVersion = getOSVersion(cmd);
89 if (IsIntel)
90 emplace(Container&: Triples, T: {Arch, "apple", "watchos" + OSVersion, "simulator"});
91 else
92 emplace(Container&: Triples, T: {Arch, "apple", "watchos" + OSVersion});
93 break;
94 case MachO::LC_BUILD_VERSION: {
95 OSVersion = getOSVersionStr(Obj->getBuildVersionLoadCommand(L: cmd).minos);
96 switch (Obj->getBuildVersionLoadCommand(L: cmd).platform) {
97 case MachO::PLATFORM_MACOS:
98 emplace(Container&: Triples, T: {Arch, "apple", "macos" + OSVersion});
99 break;
100 case MachO::PLATFORM_IOS:
101 emplace(Container&: Triples, T: {Arch, "apple", "ios" + OSVersion});
102 break;
103 case MachO::PLATFORM_TVOS:
104 emplace(Container&: Triples, T: {Arch, "apple", "tvos" + OSVersion});
105 break;
106 case MachO::PLATFORM_WATCHOS:
107 emplace(Container&: Triples, T: {Arch, "apple", "watchos" + OSVersion});
108 break;
109 case MachO::PLATFORM_BRIDGEOS:
110 emplace(Container&: Triples, T: {Arch, "apple", "bridgeos" + OSVersion});
111 break;
112 case MachO::PLATFORM_MACCATALYST:
113 emplace(Container&: Triples, T: {Arch, "apple", "ios" + OSVersion, "macabi"});
114 break;
115 case MachO::PLATFORM_IOSSIMULATOR:
116 emplace(Container&: Triples, T: {Arch, "apple", "ios" + OSVersion, "simulator"});
117 break;
118 case MachO::PLATFORM_TVOSSIMULATOR:
119 emplace(Container&: Triples, T: {Arch, "apple", "tvos" + OSVersion, "simulator"});
120 break;
121 case MachO::PLATFORM_WATCHOSSIMULATOR:
122 emplace(Container&: Triples, T: {Arch, "apple", "watchos" + OSVersion, "simulator"});
123 break;
124 case MachO::PLATFORM_DRIVERKIT:
125 emplace(Container&: Triples, T: {Arch, "apple", "driverkit" + OSVersion});
126 break;
127 default:
128 break; // Skip any others.
129 }
130 break;
131 }
132 default:
133 break;
134 }
135 }
136
137 // Record unknown platform for older binaries that don't enforce platform
138 // load commands.
139 if (Triples.empty())
140 emplace(Container&: Triples, T: {Arch, "apple", "unknown"});
141
142 return Triples;
143}
144
145static Error readMachOHeader(MachOObjectFile *Obj, RecordsSlice &Slice) {
146 auto H = Obj->getHeader();
147 auto &BA = Slice.getBinaryAttrs();
148
149 switch (H.filetype) {
150 default:
151 llvm_unreachable("unsupported binary type");
152 case MachO::MH_DYLIB:
153 BA.File = FileType::MachO_DynamicLibrary;
154 break;
155 case MachO::MH_DYLIB_STUB:
156 BA.File = FileType::MachO_DynamicLibrary_Stub;
157 break;
158 case MachO::MH_BUNDLE:
159 BA.File = FileType::MachO_Bundle;
160 break;
161 }
162
163 if (H.flags & MachO::MH_TWOLEVEL)
164 BA.TwoLevelNamespace = true;
165 if (H.flags & MachO::MH_APP_EXTENSION_SAFE)
166 BA.AppExtensionSafe = true;
167
168 for (const auto &LCI : Obj->load_commands()) {
169 switch (LCI.C.cmd) {
170 case MachO::LC_ID_DYLIB: {
171 auto DLLC = Obj->getDylibIDLoadCommand(L: LCI);
172 BA.InstallName = Slice.copyString(String: LCI.Ptr + DLLC.dylib.name);
173 BA.CurrentVersion = DLLC.dylib.current_version;
174 BA.CompatVersion = DLLC.dylib.compatibility_version;
175 break;
176 }
177 case MachO::LC_REEXPORT_DYLIB: {
178 auto DLLC = Obj->getDylibIDLoadCommand(L: LCI);
179 BA.RexportedLibraries.emplace_back(
180 args: Slice.copyString(String: LCI.Ptr + DLLC.dylib.name));
181 break;
182 }
183 case MachO::LC_SUB_FRAMEWORK: {
184 auto SFC = Obj->getSubFrameworkCommand(L: LCI);
185 BA.ParentUmbrella = Slice.copyString(String: LCI.Ptr + SFC.umbrella);
186 break;
187 }
188 case MachO::LC_SUB_CLIENT: {
189 auto SCLC = Obj->getSubClientCommand(L: LCI);
190 BA.AllowableClients.emplace_back(args: Slice.copyString(String: LCI.Ptr + SCLC.client));
191 break;
192 }
193 case MachO::LC_UUID: {
194 auto UUIDLC = Obj->getUuidCommand(L: LCI);
195 std::stringstream Stream;
196 for (unsigned I = 0; I < 16; ++I) {
197 if (I == 4 || I == 6 || I == 8 || I == 10)
198 Stream << '-';
199 Stream << std::setfill('0') << std::setw(2) << std::uppercase
200 << std::hex << static_cast<int>(UUIDLC.uuid[I]);
201 }
202 BA.UUID = Slice.copyString(String: Stream.str());
203 break;
204 }
205 case MachO::LC_RPATH: {
206 auto RPLC = Obj->getRpathCommand(L: LCI);
207 BA.RPaths.emplace_back(args: Slice.copyString(String: LCI.Ptr + RPLC.path));
208 break;
209 }
210 case MachO::LC_SEGMENT_SPLIT_INFO: {
211 auto SSILC = Obj->getLinkeditDataLoadCommand(L: LCI);
212 if (SSILC.datasize == 0)
213 BA.OSLibNotForSharedCache = true;
214 break;
215 }
216 default:
217 break;
218 }
219 }
220
221 for (auto &Sect : Obj->sections()) {
222 auto SectName = Sect.getName();
223 if (!SectName)
224 return SectName.takeError();
225 if (*SectName != "__objc_imageinfo" && *SectName != "__image_info")
226 continue;
227
228 auto Content = Sect.getContents();
229 if (!Content)
230 return Content.takeError();
231
232 if ((Content->size() >= 8) && (Content->front() == 0)) {
233 uint32_t Flags;
234 if (Obj->isLittleEndian()) {
235 auto *p =
236 reinterpret_cast<const support::ulittle32_t *>(Content->data() + 4);
237 Flags = *p;
238 } else {
239 auto *p =
240 reinterpret_cast<const support::ubig32_t *>(Content->data() + 4);
241 Flags = *p;
242 }
243 BA.SwiftABI = (Flags >> 8) & 0xFF;
244 }
245 }
246 return Error::success();
247}
248
249static Error readSymbols(MachOObjectFile *Obj, RecordsSlice &Slice,
250 const ParseOption &Opt) {
251
252 auto parseExport = [](const auto ExportFlags,
253 auto Addr) -> std::tuple<SymbolFlags, RecordLinkage> {
254 SymbolFlags Flags = SymbolFlags::None;
255 switch (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) {
256 case MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR:
257 if (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION)
258 Flags |= SymbolFlags::WeakDefined;
259 break;
260 case MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL:
261 Flags |= SymbolFlags::ThreadLocalValue;
262 break;
263 }
264
265 RecordLinkage Linkage = (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT)
266 ? RecordLinkage::Rexported
267 : RecordLinkage::Exported;
268 return {Flags, Linkage};
269 };
270
271 Error Err = Error::success();
272
273 StringMap<std::pair<SymbolFlags, RecordLinkage>> Exports;
274 // Collect symbols from export trie first. Sometimes, there are more exports
275 // in the trie than in n-list due to stripping. This is common for swift
276 // mangled symbols.
277 for (auto &Sym : Obj->exports(Err)) {
278 auto [Flags, Linkage] = parseExport(Sym.flags(), Sym.address());
279 Slice.addRecord(Name: Sym.name(), Flags, GV: GlobalRecord::Kind::Unknown, Linkage);
280 Exports[Sym.name()] = {Flags, Linkage};
281 }
282
283 for (const auto &Sym : Obj->symbols()) {
284 auto FlagsOrErr = Sym.getFlags();
285 if (!FlagsOrErr)
286 return FlagsOrErr.takeError();
287 auto Flags = *FlagsOrErr;
288
289 auto NameOrErr = Sym.getName();
290 if (!NameOrErr)
291 return NameOrErr.takeError();
292 auto Name = *NameOrErr;
293
294 RecordLinkage Linkage = RecordLinkage::Unknown;
295 SymbolFlags RecordFlags = SymbolFlags::None;
296
297 if (Flags & SymbolRef::SF_Undefined) {
298 if (Opt.Undefineds)
299 Linkage = RecordLinkage::Undefined;
300 else
301 continue;
302 if (Flags & SymbolRef::SF_Weak)
303 RecordFlags |= SymbolFlags::WeakReferenced;
304 } else if (Flags & SymbolRef::SF_Exported) {
305 auto Exp = Exports.find(Key: Name);
306 // This should never be possible when binaries are produced with Apple
307 // linkers. However it is possible to craft dylibs where the export trie
308 // is either malformed or has conflicting symbols compared to n_list.
309 if (Exp != Exports.end())
310 std::tie(args&: RecordFlags, args&: Linkage) = Exp->second;
311 else
312 Linkage = RecordLinkage::Exported;
313 } else if (Flags & SymbolRef::SF_Hidden) {
314 Linkage = RecordLinkage::Internal;
315 } else
316 continue;
317
318 auto TypeOrErr = Sym.getType();
319 if (!TypeOrErr)
320 return TypeOrErr.takeError();
321 auto Type = *TypeOrErr;
322
323 GlobalRecord::Kind GV = (Type & SymbolRef::ST_Function)
324 ? GlobalRecord::Kind::Function
325 : GlobalRecord::Kind::Variable;
326
327 if (GV == GlobalRecord::Kind::Function)
328 RecordFlags |= SymbolFlags::Text;
329 else
330 RecordFlags |= SymbolFlags::Data;
331
332 Slice.addRecord(Name, Flags: RecordFlags, GV, Linkage);
333 }
334 return Err;
335}
336
337static Error load(MachOObjectFile *Obj, RecordsSlice &Slice,
338 const ParseOption &Opt, const Architecture Arch) {
339 if (Arch == AK_unknown)
340 return make_error<TextAPIError>(Args: TextAPIErrorCode::UnsupportedTarget);
341
342 if (Opt.MachOHeader)
343 if (auto Err = readMachOHeader(Obj, Slice))
344 return Err;
345
346 if (Opt.SymbolTable)
347 if (auto Err = readSymbols(Obj, Slice, Opt))
348 return Err;
349
350 return Error::success();
351}
352
353Expected<Records> DylibReader::readFile(MemoryBufferRef Buffer,
354 const ParseOption &Opt) {
355 Records Results;
356
357 auto BinOrErr = createBinary(Source: Buffer);
358 if (!BinOrErr)
359 return BinOrErr.takeError();
360
361 Binary &Bin = *BinOrErr.get();
362 if (auto *Obj = dyn_cast<MachOObjectFile>(Val: &Bin)) {
363 const auto Arch = getArchitectureFromCpuType(CPUType: Obj->getHeader().cputype,
364 CPUSubType: Obj->getHeader().cpusubtype);
365 if (!Opt.Archs.has(Arch))
366 return make_error<TextAPIError>(Args: TextAPIErrorCode::NoSuchArchitecture);
367
368 auto Triples = constructTriples(Obj, ArchT: Arch);
369 for (const auto &T : Triples) {
370 if (mapToPlatformType(Target: T) == PLATFORM_UNKNOWN)
371 return make_error<TextAPIError>(Args: TextAPIErrorCode::UnsupportedTarget);
372 Results.emplace_back(Args: std::make_shared<RecordsSlice>(args: RecordsSlice({T})));
373 if (auto Err = load(Obj, Slice&: *Results.back(), Opt, Arch))
374 return std::move(Err);
375 Results.back()->getBinaryAttrs().Path = Buffer.getBufferIdentifier();
376 }
377 return Results;
378 }
379
380 // Only expect MachO universal binaries at this point.
381 assert(isa<MachOUniversalBinary>(&Bin) &&
382 "Expected a MachO universal binary.");
383 auto *UB = cast<MachOUniversalBinary>(Val: &Bin);
384
385 for (auto OI = UB->begin_objects(), OE = UB->end_objects(); OI != OE; ++OI) {
386 // Skip architecture if not requested.
387 auto Arch =
388 getArchitectureFromCpuType(CPUType: OI->getCPUType(), CPUSubType: OI->getCPUSubType());
389 if (!Opt.Archs.has(Arch))
390 continue;
391
392 // Skip unknown architectures.
393 if (Arch == AK_unknown)
394 continue;
395
396 // This can fail if the object is an archive.
397 auto ObjOrErr = OI->getAsObjectFile();
398
399 // Skip the archive and consume the error.
400 if (!ObjOrErr) {
401 consumeError(Err: ObjOrErr.takeError());
402 continue;
403 }
404
405 auto &Obj = *ObjOrErr.get();
406 switch (Obj.getHeader().filetype) {
407 default:
408 break;
409 case MachO::MH_BUNDLE:
410 case MachO::MH_DYLIB:
411 case MachO::MH_DYLIB_STUB:
412 for (const auto &T : constructTriples(Obj: &Obj, ArchT: Arch)) {
413 Results.emplace_back(Args: std::make_shared<RecordsSlice>(args: RecordsSlice({T})));
414 if (auto Err = load(Obj: &Obj, Slice&: *Results.back(), Opt, Arch))
415 return std::move(Err);
416 Results.back()->getBinaryAttrs().Path = Buffer.getBufferIdentifier();
417 }
418 break;
419 }
420 }
421
422 if (Results.empty())
423 return make_error<TextAPIError>(Args: TextAPIErrorCode::EmptyResults);
424 return Results;
425}
426
427Expected<std::unique_ptr<InterfaceFile>>
428DylibReader::get(MemoryBufferRef Buffer) {
429 ParseOption Options;
430 auto SlicesOrErr = readFile(Buffer, Opt: Options);
431 if (!SlicesOrErr)
432 return SlicesOrErr.takeError();
433
434 return convertToInterfaceFile(Slices: *SlicesOrErr);
435}
436
437static void DWARFErrorHandler(Error Err) { /**/ }
438
439static SymbolToSourceLocMap
440accumulateLocs(MachOObjectFile &Obj,
441 const std::unique_ptr<DWARFContext> &DiCtx) {
442 SymbolToSourceLocMap LocMap;
443 for (const auto &Symbol : Obj.symbols()) {
444 Expected<uint32_t> FlagsOrErr = Symbol.getFlags();
445 if (!FlagsOrErr) {
446 consumeError(Err: FlagsOrErr.takeError());
447 continue;
448 }
449
450 if (!(*FlagsOrErr & SymbolRef::SF_Exported))
451 continue;
452
453 Expected<uint64_t> AddressOrErr = Symbol.getAddress();
454 if (!AddressOrErr) {
455 consumeError(Err: AddressOrErr.takeError());
456 continue;
457 }
458 const uint64_t Address = *AddressOrErr;
459
460 auto TypeOrErr = Symbol.getType();
461 if (!TypeOrErr) {
462 consumeError(Err: TypeOrErr.takeError());
463 continue;
464 }
465 const bool IsCode = (*TypeOrErr & SymbolRef::ST_Function);
466
467 auto *DWARFCU = IsCode ? DiCtx->getCompileUnitForCodeAddress(Address)
468 : DiCtx->getCompileUnitForDataAddress(Address);
469 if (!DWARFCU)
470 continue;
471
472 const DWARFDie &DIE = IsCode ? DWARFCU->getSubroutineForAddress(Address)
473 : DWARFCU->getVariableForAddress(Address);
474 const std::string File = DIE.getDeclFile(
475 Kind: llvm::DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
476 const uint64_t Line = DIE.getDeclLine();
477
478 auto NameOrErr = Symbol.getName();
479 if (!NameOrErr) {
480 consumeError(Err: NameOrErr.takeError());
481 continue;
482 }
483 auto Name = *NameOrErr;
484 auto Sym = parseSymbol(SymName: Name);
485
486 if (!File.empty() && Line != 0)
487 LocMap.insert(KV: {Sym.Name, RecordLoc(File, Line)});
488 }
489
490 return LocMap;
491}
492
493SymbolToSourceLocMap
494DylibReader::accumulateSourceLocFromDSYM(const StringRef DSYM,
495 const Target &T) {
496 // Find sidecar file.
497 auto DSYMsOrErr = MachOObjectFile::findDsymObjectMembers(Path: DSYM);
498 if (!DSYMsOrErr) {
499 consumeError(Err: DSYMsOrErr.takeError());
500 return SymbolToSourceLocMap();
501 }
502 if (DSYMsOrErr->empty())
503 return SymbolToSourceLocMap();
504
505 const StringRef Path = DSYMsOrErr->front();
506 auto BufOrErr = MemoryBuffer::getFile(Filename: Path);
507 if (auto Err = BufOrErr.getError())
508 return SymbolToSourceLocMap();
509
510 auto BinOrErr = createBinary(Source: *BufOrErr.get());
511 if (!BinOrErr) {
512 consumeError(Err: BinOrErr.takeError());
513 return SymbolToSourceLocMap();
514 }
515 // Handle single arch.
516 if (auto *Single = dyn_cast<MachOObjectFile>(Val: BinOrErr->get())) {
517 auto DiCtx = DWARFContext::create(
518 Obj: *Single, RelocAction: DWARFContext::ProcessDebugRelocations::Process, L: nullptr, DWPName: "",
519 RecoverableErrorHandler: DWARFErrorHandler, WarningHandler: DWARFErrorHandler);
520
521 return accumulateLocs(Obj&: *Single, DiCtx);
522 }
523 // Handle universal companion file.
524 if (auto *Fat = dyn_cast<MachOUniversalBinary>(Val: BinOrErr->get())) {
525 auto ObjForArch = Fat->getObjectForArch(ArchName: getArchitectureName(Arch: T.Arch));
526 if (!ObjForArch) {
527 consumeError(Err: ObjForArch.takeError());
528 return SymbolToSourceLocMap();
529 }
530 auto MachOOrErr = ObjForArch->getAsObjectFile();
531 if (!MachOOrErr) {
532 consumeError(Err: MachOOrErr.takeError());
533 return SymbolToSourceLocMap();
534 }
535 auto &Obj = **MachOOrErr;
536 auto DiCtx = DWARFContext::create(
537 Obj, RelocAction: DWARFContext::ProcessDebugRelocations::Process, L: nullptr, DWPName: "",
538 RecoverableErrorHandler: DWARFErrorHandler, WarningHandler: DWARFErrorHandler);
539
540 return accumulateLocs(Obj, DiCtx);
541 }
542 return SymbolToSourceLocMap();
543}
544