1//===-- gsymutil.cpp - GSYM dumping and creation utility for llvm ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/ADT/STLExtras.h"
10#include "llvm/DebugInfo/DIContext.h"
11#include "llvm/DebugInfo/DWARF/DWARFContext.h"
12#include "llvm/Object/Archive.h"
13#include "llvm/Object/ELFObjectFile.h"
14#include "llvm/Object/MachOUniversal.h"
15#include "llvm/Object/ObjectFile.h"
16#include "llvm/Option/ArgList.h"
17#include "llvm/Option/Option.h"
18#include "llvm/Support/CommandLine.h"
19#include "llvm/Support/Debug.h"
20#include "llvm/Support/Format.h"
21#include "llvm/Support/JSON.h"
22#include "llvm/Support/LLVMDriver.h"
23#include "llvm/Support/ManagedStatic.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/PrettyStackTrace.h"
26#include "llvm/Support/Regex.h"
27#include "llvm/Support/Signals.h"
28#include "llvm/Support/TargetSelect.h"
29#include "llvm/Support/raw_ostream.h"
30#include "llvm/TargetParser/Triple.h"
31#include <algorithm>
32#include <cstring>
33#include <inttypes.h>
34#include <iostream>
35#include <optional>
36#include <string>
37#include <system_error>
38#include <vector>
39
40#include "llvm/DebugInfo/GSYM/CallSiteInfo.h"
41#include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
42#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
43#include "llvm/DebugInfo/GSYM/GsymCreator.h"
44#include "llvm/DebugInfo/GSYM/GsymCreatorV1.h"
45#include "llvm/DebugInfo/GSYM/GsymCreatorV2.h"
46#include "llvm/DebugInfo/GSYM/GsymReader.h"
47#include "llvm/DebugInfo/GSYM/Header.h"
48#include "llvm/DebugInfo/GSYM/HeaderV2.h"
49#include "llvm/DebugInfo/GSYM/InlineInfo.h"
50#include "llvm/DebugInfo/GSYM/LookupResult.h"
51#include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h"
52#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
53
54using namespace llvm;
55using namespace gsym;
56using namespace object;
57
58/// @}
59/// Command line options.
60/// @{
61
62using namespace llvm::opt;
63enum ID {
64 OPT_INVALID = 0, // This is not an option ID.
65#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
66#include "Opts.inc"
67#undef OPTION
68};
69
70#define OPTTABLE_STR_TABLE_CODE
71#include "Opts.inc"
72#undef OPTTABLE_STR_TABLE_CODE
73
74#define OPTTABLE_PREFIXES_TABLE_CODE
75#include "Opts.inc"
76#undef OPTTABLE_PREFIXES_TABLE_CODE
77
78const opt::OptTable::Info InfoTable[] = {
79#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
80#include "Opts.inc"
81#undef OPTION
82};
83
84class GSYMUtilOptTable : public llvm::opt::GenericOptTable {
85public:
86 GSYMUtilOptTable()
87 : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {
88 setGroupedShortOptions(true);
89 }
90};
91
92static bool Verbose;
93static std::vector<std::string> InputFilenames;
94static std::string ConvertFilename;
95static std::string SymtabFilename;
96static std::vector<std::string> ArchFilters;
97static std::string OutputFilename;
98static std::string JsonSummaryFile;
99static bool Verify;
100static bool BenchmarkReader;
101static uint32_t BenchmarkStart;
102static uint32_t BenchmarkStride;
103static unsigned NumThreads;
104static uint64_t SegmentSize;
105static bool Quiet;
106static std::vector<uint64_t> LookupAddresses;
107static bool LookupAddressesFromStdin;
108static bool UseMergedFunctions = false;
109static bool LoadDwarfCallSites = false;
110static std::string CallSiteYamlPath;
111static std::vector<std::string> MergedFunctionsFilters;
112// Default output version. Can be overridden by --output-version.
113static uint32_t OutputVersion = Header::getVersion();
114
115static void parseArgs(int argc, char **argv) {
116 GSYMUtilOptTable Tbl;
117 llvm::StringRef ToolName = argv[0];
118 llvm::BumpPtrAllocator A;
119 llvm::StringSaver Saver{A};
120 llvm::opt::InputArgList Args =
121 Tbl.parseArgs(Argc: argc, Argv: argv, Unknown: OPT_UNKNOWN, Saver, ErrorFn: [&](StringRef Msg) {
122 llvm::errs() << Msg << '\n';
123 std::exit(status: 1);
124 });
125 if (Args.hasArg(Ids: OPT_help)) {
126 const char *Overview =
127 "A tool for dumping, searching and creating GSYM files.\n\n"
128 "Specify one or more GSYM paths as arguments to dump all of the "
129 "information in each GSYM file.\n"
130 "Specify a single GSYM file along with one or more --lookup options to "
131 "lookup addresses within that GSYM file.\n"
132 "Use the --convert option to specify a file with option --out-file "
133 "option to convert to GSYM format.\n";
134
135 Tbl.printHelp(OS&: llvm::outs(), Usage: "llvm-gsymutil [options] <input GSYM files>",
136 Title: Overview);
137 std::exit(status: 0);
138 }
139 if (Args.hasArg(Ids: OPT_version)) {
140 llvm::outs() << ToolName << '\n';
141 cl::PrintVersionMessage();
142 std::exit(status: 0);
143 }
144
145 Verbose = Args.hasArg(Ids: OPT_verbose);
146
147 for (const llvm::opt::Arg *A : Args.filtered(Ids: OPT_INPUT))
148 InputFilenames.emplace_back(args: A->getValue());
149
150 if (const llvm::opt::Arg *A = Args.getLastArg(Ids: OPT_convert_EQ))
151 ConvertFilename = A->getValue();
152
153 if (const llvm::opt::Arg *A = Args.getLastArg(Ids: OPT_symtab_file_EQ))
154 SymtabFilename = A->getValue();
155
156 for (const llvm::opt::Arg *A : Args.filtered(Ids: OPT_arch_EQ))
157 ArchFilters.emplace_back(args: A->getValue());
158
159 if (const llvm::opt::Arg *A = Args.getLastArg(Ids: OPT_out_file_EQ))
160 OutputFilename = A->getValue();
161
162 if (const llvm::opt::Arg *A = Args.getLastArg(Ids: OPT_json_summary_file_EQ))
163 JsonSummaryFile = A->getValue();
164
165 Verify = Args.hasArg(Ids: OPT_verify);
166 BenchmarkStart = 0;
167 BenchmarkStride = 1;
168 if (Args.hasArg(Ids: OPT_benchmark_reader_all)) {
169 BenchmarkReader = true;
170 } else if (const llvm::opt::Arg *A = Args.getLastArg(Ids: OPT_benchmark_reader)) {
171 BenchmarkReader = true;
172 StringRef S{A->getValue()};
173 if (!S.empty()) {
174 auto [StartStr, StrideStr] = S.split(Separator: ',');
175 if (!llvm::to_integer(S: StartStr, Num&: BenchmarkStart, Base: 0)) {
176 llvm::errs() << ToolName
177 << ": for the --benchmark-reader option: invalid start '"
178 << StartStr << "'\n";
179 std::exit(status: 1);
180 }
181 if (!StrideStr.empty() &&
182 !llvm::to_integer(S: StrideStr, Num&: BenchmarkStride, Base: 0)) {
183 llvm::errs() << ToolName
184 << ": for the --benchmark-reader option: invalid stride '"
185 << StrideStr << "'\n";
186 std::exit(status: 1);
187 }
188 if (BenchmarkStride == 0) {
189 llvm::errs() << ToolName
190 << ": for the --benchmark-reader option: stride must be "
191 "positive\n";
192 std::exit(status: 1);
193 }
194 }
195 }
196
197 if (const llvm::opt::Arg *A = Args.getLastArg(Ids: OPT_num_threads_EQ)) {
198 StringRef S{A->getValue()};
199 if (!llvm::to_integer(S, Num&: NumThreads, Base: 0)) {
200 llvm::errs() << ToolName << ": for the --num-threads option: '" << S
201 << "' value invalid for uint argument!\n";
202 std::exit(status: 1);
203 }
204 }
205
206 if (const llvm::opt::Arg *A = Args.getLastArg(Ids: OPT_segment_size_EQ)) {
207 StringRef S{A->getValue()};
208 if (!llvm::to_integer(S, Num&: SegmentSize, Base: 0)) {
209 llvm::errs() << ToolName << ": for the --segment-size option: '" << S
210 << "' value invalid for uint argument!\n";
211 std::exit(status: 1);
212 }
213 }
214
215 Quiet = Args.hasArg(Ids: OPT_quiet);
216
217 for (const llvm::opt::Arg *A : Args.filtered(Ids: OPT_address_EQ)) {
218 StringRef S{A->getValue()};
219 if (!llvm::to_integer(S, Num&: LookupAddresses.emplace_back(), Base: 0)) {
220 llvm::errs() << ToolName << ": for the --address option: '" << S
221 << "' value invalid for uint argument!\n";
222 std::exit(status: 1);
223 }
224 }
225
226 LookupAddressesFromStdin = Args.hasArg(Ids: OPT_addresses_from_stdin);
227 UseMergedFunctions = Args.hasArg(Ids: OPT_merged_functions);
228
229 if (Args.hasArg(Ids: OPT_callsites_yaml_file_EQ)) {
230 CallSiteYamlPath = Args.getLastArgValue(Id: OPT_callsites_yaml_file_EQ);
231 if (CallSiteYamlPath.empty()) {
232 llvm::errs()
233 << ToolName
234 << ": --callsites-yaml-file option requires a non-empty argument.\n";
235 std::exit(status: 1);
236 }
237 }
238
239 LoadDwarfCallSites = Args.hasArg(Ids: OPT_dwarf_callsites);
240
241 for (const llvm::opt::Arg *A :
242 Args.filtered(Ids: OPT_merged_functions_filter_EQ)) {
243 MergedFunctionsFilters.push_back(x: A->getValue());
244 // Validate the filter is only used with correct flags
245 if (LookupAddresses.empty() && !LookupAddressesFromStdin) {
246 llvm::errs() << ToolName
247 << ": --merged-functions-filter can only be used with "
248 "--address/--addresses-from-stdin\n";
249 std::exit(status: 1);
250 }
251 if (!UseMergedFunctions) {
252 llvm::errs()
253 << ToolName
254 << ": --merged-functions-filter requires --merged-functions\n";
255 std::exit(status: 1);
256 }
257 }
258
259 if (const llvm::opt::Arg *A = Args.getLastArg(Ids: OPT_output_version_EQ)) {
260 StringRef Val = A->getValue();
261 uint32_t Version;
262 if (Val.getAsInteger(Radix: 10, Result&: Version) || (Version != Header::getVersion() &&
263 Version != HeaderV2::getVersion())) {
264 llvm::errs() << ToolName << ": for the --output-version option: '" << Val
265 << "' is invalid. Use '1' or '2'.\n";
266 std::exit(status: 1);
267 }
268 OutputVersion = Version;
269 }
270}
271
272/// @}
273//===----------------------------------------------------------------------===//
274
275static void error(Error Err) {
276 if (!Err)
277 return;
278 WithColor::error() << toString(E: std::move(Err)) << "\n";
279 exit(status: 1);
280}
281
282static void error(StringRef Prefix, llvm::Error Err) {
283 if (!Err)
284 return;
285 errs() << Prefix << ": " << Err << "\n";
286 consumeError(Err: std::move(Err));
287 exit(status: 1);
288}
289
290static void error(StringRef Prefix, std::error_code EC) {
291 if (!EC)
292 return;
293 errs() << Prefix << ": " << EC.message() << "\n";
294 exit(status: 1);
295}
296
297static uint32_t getCPUType(MachOObjectFile &MachO) {
298 if (MachO.is64Bit())
299 return MachO.getHeader64().cputype;
300 else
301 return MachO.getHeader().cputype;
302}
303
304static std::string getArchitectureName(const ObjectFile &Obj) {
305 if (const auto *MachO = dyn_cast<object::MachOObjectFile>(Val: &Obj)) {
306 Triple ObjTriple(MachO->getArchTriple());
307 return ObjTriple.getArchName().str();
308 }
309
310 Triple ObjTriple(Obj.makeTriple());
311 return ObjTriple.getArchName().str();
312}
313
314/// Return true if the object file has not been filtered by an --arch option.
315static bool filterArch(MachOObjectFile &Obj) {
316 if (ArchFilters.empty())
317 return true;
318
319 Triple ObjTriple(Obj.getArchTriple());
320 StringRef ObjArch = ObjTriple.getArchName();
321
322 for (StringRef Arch : ArchFilters) {
323 // Match name.
324 if (Arch == ObjArch)
325 return true;
326
327 // Match architecture number.
328 unsigned Value;
329 if (!Arch.getAsInteger(Radix: 0, Result&: Value))
330 if (Value == getCPUType(MachO&: Obj))
331 return true;
332 }
333 return false;
334}
335
336/// Determine the virtual address that is considered the base address of an ELF
337/// object file.
338///
339/// The base address of an ELF file is the "p_vaddr" of the first program
340/// header whose "p_type" is PT_LOAD.
341///
342/// \param ELFFile An ELF object file we will search.
343///
344/// \returns A valid image base address if we are able to extract one.
345template <class ELFT>
346static std::optional<uint64_t>
347getImageBaseAddress(const object::ELFFile<ELFT> &ELFFile) {
348 auto PhdrRangeOrErr = ELFFile.program_headers();
349 if (!PhdrRangeOrErr) {
350 consumeError(PhdrRangeOrErr.takeError());
351 return std::nullopt;
352 }
353 for (const typename ELFT::Phdr &Phdr : *PhdrRangeOrErr)
354 if (Phdr.p_type == ELF::PT_LOAD)
355 return (uint64_t)Phdr.p_vaddr;
356 return std::nullopt;
357}
358
359/// Determine the virtual address that is considered the base address of mach-o
360/// object file.
361///
362/// The base address of a mach-o file is the vmaddr of the "__TEXT" segment.
363///
364/// \param MachO A mach-o object file we will search.
365///
366/// \returns A valid image base address if we are able to extract one.
367static std::optional<uint64_t>
368getImageBaseAddress(const object::MachOObjectFile *MachO) {
369 for (const auto &Command : MachO->load_commands()) {
370 if (Command.C.cmd == MachO::LC_SEGMENT) {
371 MachO::segment_command SLC = MachO->getSegmentLoadCommand(L: Command);
372 StringRef SegName = SLC.segname;
373 if (SegName == "__TEXT")
374 return SLC.vmaddr;
375 } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
376 MachO::segment_command_64 SLC = MachO->getSegment64LoadCommand(L: Command);
377 StringRef SegName = SLC.segname;
378 if (SegName == "__TEXT")
379 return SLC.vmaddr;
380 }
381 }
382 return std::nullopt;
383}
384
385/// Determine the virtual address that is considered the base address of an
386/// object file.
387///
388/// Since GSYM files are used for symbolication, many clients will need to
389/// easily adjust addresses they find in stack traces so the lookups happen
390/// on unslid addresses from the original object file. If the base address of
391/// a GSYM file is set to the base address of the image, then this address
392/// adjusting is much easier.
393///
394/// \param Obj An object file we will search.
395///
396/// \returns A valid image base address if we are able to extract one.
397static std::optional<uint64_t> getImageBaseAddress(object::ObjectFile &Obj) {
398 if (const auto *MachO = dyn_cast<object::MachOObjectFile>(Val: &Obj))
399 return getImageBaseAddress(MachO);
400 else if (const auto *ELFObj = dyn_cast<object::ELF32LEObjectFile>(Val: &Obj))
401 return getImageBaseAddress(ELFFile: ELFObj->getELFFile());
402 else if (const auto *ELFObj = dyn_cast<object::ELF32BEObjectFile>(Val: &Obj))
403 return getImageBaseAddress(ELFFile: ELFObj->getELFFile());
404 else if (const auto *ELFObj = dyn_cast<object::ELF64LEObjectFile>(Val: &Obj))
405 return getImageBaseAddress(ELFFile: ELFObj->getELFFile());
406 else if (const auto *ELFObj = dyn_cast<object::ELF64BEObjectFile>(Val: &Obj))
407 return getImageBaseAddress(ELFFile: ELFObj->getELFFile());
408 return std::nullopt;
409}
410
411static Expected<ObjectFile *>
412resolveSymtabObject(StringRef ArchName, Binary *SymtabBinary,
413 StringRef SymtabPath,
414 std::unique_ptr<ObjectFile> &OwnedSymtabObj) {
415 if (!SymtabBinary)
416 return nullptr;
417
418 if (auto *SymtabObj = dyn_cast<ObjectFile>(Val: SymtabBinary)) {
419 std::string SymtabArchName = getArchitectureName(Obj: *SymtabObj);
420 if (SymtabArchName != ArchName)
421 return createStringError(EC: std::errc::invalid_argument,
422 Fmt: "architecture mismatch: input file is %s but "
423 "symbol table file '%s' is %s",
424 Vals: ArchName.str().c_str(), Vals: SymtabPath.str().c_str(),
425 Vals: SymtabArchName.c_str());
426
427 return SymtabObj;
428 }
429
430 if (auto *SymtabFat = dyn_cast<MachOUniversalBinary>(Val: SymtabBinary)) {
431 auto SymtabObjOrErr = SymtabFat->getMachOObjectForArch(ArchName);
432 if (!SymtabObjOrErr) {
433 consumeError(Err: SymtabObjOrErr.takeError());
434 return createStringError(
435 EC: std::errc::invalid_argument,
436 Fmt: "symbol table file '%s' does not contain architecture '%s'",
437 Vals: SymtabPath.str().c_str(), Vals: ArchName.str().c_str());
438 }
439
440 OwnedSymtabObj = std::move(*SymtabObjOrErr);
441 return OwnedSymtabObj.get();
442 }
443
444 return createStringError(EC: std::errc::invalid_argument,
445 Fmt: "symbol table file '%s' is not a valid object file",
446 Vals: SymtabPath.str().c_str());
447}
448
449static llvm::Error handleObjectFile(ObjectFile &Obj, ObjectFile *SymtabObj,
450 StringRef SymtabPath,
451 const std::string &OutFile,
452 OutputAggregator &Out) {
453 auto ThreadCount =
454 NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency();
455
456 std::unique_ptr<GsymCreator> GsymPtr;
457 switch (OutputVersion) {
458 case Header::getVersion():
459 GsymPtr = std::make_unique<GsymCreatorV1>(args&: Quiet);
460 break;
461 case HeaderV2::getVersion():
462 GsymPtr = std::make_unique<GsymCreatorV2>(args&: Quiet);
463 break;
464 default:
465 return createStringError(EC: std::errc::invalid_argument,
466 Fmt: "invalid --output-version option");
467 }
468 GsymCreator &Gsym = *GsymPtr;
469
470 // See if we can figure out the base address for a given object file, and if
471 // we can, then set the base address to use to this value. This will ease
472 // symbolication since clients can slide the GSYM lookup addresses by using
473 // the load bias of the shared library.
474 if (auto ImageBaseAddr = getImageBaseAddress(Obj))
475 Gsym.setBaseAddress(*ImageBaseAddr);
476
477 // We need to know where the valid sections are that contain instructions.
478 // See header documentation for DWARFTransformer::SetValidTextRanges() for
479 // defails.
480 AddressRanges TextRanges;
481 for (const object::SectionRef &Sect : Obj.sections()) {
482 if (!Sect.isText())
483 continue;
484 const uint64_t Size = Sect.getSize();
485 if (Size == 0)
486 continue;
487 const uint64_t StartAddr = Sect.getAddress();
488 TextRanges.insert(Range: AddressRange(StartAddr, StartAddr + Size));
489 }
490
491 // Make sure there is DWARF to convert first.
492 std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(
493 Obj,
494 /*RelocAction=*/DWARFContext::ProcessDebugRelocations::Process,
495 L: nullptr,
496 /*DWPName=*/"",
497 /*RecoverableErrorHandler=*/WithColor::defaultErrorHandler,
498 /*WarningHandler=*/WithColor::defaultWarningHandler,
499 /*ThreadSafe*/true);
500 if (!DICtx)
501 return createStringError(EC: std::errc::invalid_argument,
502 Fmt: "unable to create DWARF context");
503
504 // Make a DWARF transformer object and populate the ranges of the code
505 // so we don't end up adding invalid functions to GSYM data.
506 bool IsMachO = dyn_cast<object::MachOObjectFile>(Val: &Obj) != nullptr;
507
508 DwarfTransformer DT(*DICtx, Gsym, LoadDwarfCallSites, IsMachO);
509 if (!TextRanges.empty())
510 Gsym.SetValidTextRanges(TextRanges);
511
512 // Convert all DWARF to GSYM.
513 if (auto Err = DT.convert(NumThreads: ThreadCount, OS&: Out))
514 return Err;
515
516 // If enabled, merge functions with identical address ranges as merged
517 // functions in the first FunctionInfo with that address range. Do this right
518 // after loading the DWARF data so we don't have to deal with functions from
519 // the symbol table.
520 if (UseMergedFunctions)
521 Gsym.prepareMergedFunctions(Out);
522
523 // Get the UUID and convert symbol table to GSYM.
524 if (SymtabObj) {
525 Out << "Using symbol table file: " << SymtabPath << "\n";
526 if (auto Err = ObjectFileTransformer::convert(Obj: *SymtabObj, Output&: Out, Gsym))
527 return Err;
528 } else if (auto Err = ObjectFileTransformer::convert(Obj, Output&: Out, Gsym)) {
529 return Err;
530 }
531
532 // If any call site YAML files were specified, load them now.
533 if (!CallSiteYamlPath.empty())
534 if (auto Err = Gsym.loadCallSitesFromYAML(YAMLFile: CallSiteYamlPath))
535 return Err;
536
537 // Finalize the GSYM to make it ready to save to disk. This will remove
538 // duplicate FunctionInfo entries where we might have found an entry from
539 // debug info and also a symbol table entry from the object file.
540 if (auto Err = Gsym.finalize(OS&: Out))
541 return Err;
542
543 // Save the GSYM file to disk.
544 llvm::endianness Endian = Obj.makeTriple().isLittleEndian()
545 ? llvm::endianness::little
546 : llvm::endianness::big;
547
548 std::optional<uint64_t> OptSegmentSize;
549 if (SegmentSize > 0)
550 OptSegmentSize = SegmentSize;
551 if (auto Err = Gsym.save(Path: OutFile, ByteOrder: Endian, SegmentSize: OptSegmentSize))
552 return Err;
553
554 // Verify the DWARF if requested. This will ensure all the info in the DWARF
555 // can be looked up in the GSYM and that all lookups get matching data.
556 if (Verify) {
557 if (auto Err = DT.verify(GsymPath: OutFile, OS&: Out))
558 return Err;
559 }
560
561 return Error::success();
562}
563
564static llvm::Error handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
565 Binary *SymtabBinary, StringRef SymtabPath,
566 const std::string &OutFile,
567 OutputAggregator &Out) {
568 Expected<std::unique_ptr<Binary>> BinOrErr = object::createBinary(Source: Buffer);
569 error(Prefix: Filename, EC: errorToErrorCode(Err: BinOrErr.takeError()));
570
571 if (auto *Obj = dyn_cast<ObjectFile>(Val: BinOrErr->get())) {
572 std::string ArchName = getArchitectureName(Obj: *Obj);
573 std::unique_ptr<ObjectFile> OwnedSymtabObj;
574 auto SymtabObjOrErr =
575 resolveSymtabObject(ArchName, SymtabBinary, SymtabPath, OwnedSymtabObj);
576 if (!SymtabObjOrErr)
577 return SymtabObjOrErr.takeError();
578
579 outs() << "Output file (" << ArchName << "): " << OutFile << "\n";
580 if (auto Err =
581 handleObjectFile(Obj&: *Obj, SymtabObj: *SymtabObjOrErr, SymtabPath, OutFile, Out))
582 return Err;
583 } else if (auto *Fat = dyn_cast<MachOUniversalBinary>(Val: BinOrErr->get())) {
584 // Iterate over all contained architectures and filter out any that were
585 // not specified with the "--arch <arch>" option. If the --arch option was
586 // not specified on the command line, we will process all architectures.
587 std::vector<std::unique_ptr<MachOObjectFile>> FilterObjs;
588 for (auto &ObjForArch : Fat->objects()) {
589 auto MachOOrErr = ObjForArch.getAsObjectFile();
590 if (!MachOOrErr) {
591 error(Prefix: Filename, Err: MachOOrErr.takeError());
592 continue;
593 }
594
595 std::unique_ptr<MachOObjectFile> Obj = std::move(*MachOOrErr);
596 if (filterArch(Obj&: *Obj))
597 FilterObjs.emplace_back(args: std::move(Obj));
598 }
599 if (FilterObjs.empty())
600 error(Prefix: Filename, Err: createStringError(EC: std::errc::invalid_argument,
601 Fmt: "no matching architectures found"));
602
603 // Now handle each architecture we need to convert.
604 bool MultipleArchitecturesSelected = FilterObjs.size() > 1;
605 if (MultipleArchitecturesSelected && SymtabBinary &&
606 isa<ObjectFile>(Val: SymtabBinary))
607 return createStringError(
608 EC: std::errc::invalid_argument,
609 Fmt: "symbol table file '%s' is not a universal binary, but the input "
610 "contains multiple architectures; use --arch to select a single "
611 "architecture",
612 Vals: SymtabPath.str().c_str());
613
614 for (auto &Obj : FilterObjs) {
615 std::string ArchName = getArchitectureName(Obj: *Obj);
616 std::unique_ptr<ObjectFile> OwnedSymtabObj;
617 auto SymtabObjOrErr = resolveSymtabObject(ArchName, SymtabBinary,
618 SymtabPath, OwnedSymtabObj);
619 if (!SymtabObjOrErr)
620 return SymtabObjOrErr.takeError();
621
622 std::string ArchOutFile(OutFile);
623 // If we are only handling a single architecture, then we will use the
624 // normal output file. If we are handling multiple architectures append
625 // the architecture name to the end of the out file path so that we
626 // don't overwrite the previous architecture's gsym file.
627 if (MultipleArchitecturesSelected) {
628 ArchOutFile.append(n: 1, c: '.');
629 ArchOutFile.append(str: ArchName);
630 }
631 outs() << "Output file (" << ArchName << "): " << ArchOutFile << "\n";
632 if (auto Err = handleObjectFile(Obj&: *Obj, SymtabObj: *SymtabObjOrErr, SymtabPath,
633 OutFile: ArchOutFile, Out))
634 return Err;
635 }
636 }
637 return Error::success();
638}
639
640static llvm::Error handleFileConversionToGSYM(StringRef Filename,
641 const std::string &OutFile,
642 OutputAggregator &Out) {
643 ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
644 MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true);
645 error(Prefix: Filename, EC: BuffOrErr.getError());
646 std::unique_ptr<MemoryBuffer> Buffer = std::move(BuffOrErr.get());
647
648 std::unique_ptr<MemoryBuffer> SymtabBuffer;
649 std::unique_ptr<Binary> SymtabBinary;
650 if (!SymtabFilename.empty()) {
651 auto SymtabBufOrErr =
652 MemoryBuffer::getFile(Filename: SymtabFilename, /*IsText=*/true);
653 if (!SymtabBufOrErr)
654 return createStringError(EC: SymtabBufOrErr.getError(),
655 Fmt: "failed to open symbol table file '%s'",
656 Vals: SymtabFilename.c_str());
657
658 SymtabBuffer = std::move(*SymtabBufOrErr);
659 auto SymtabBinOrErr = object::createBinary(Source: *SymtabBuffer);
660 if (!SymtabBinOrErr)
661 return SymtabBinOrErr.takeError();
662 SymtabBinary = std::move(*SymtabBinOrErr);
663 }
664
665 return handleBuffer(Filename, Buffer: *Buffer, SymtabBinary: SymtabBinary.get(), SymtabPath: SymtabFilename,
666 OutFile, Out);
667}
668
669static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
670 // Expand any .dSYM bundles to the individual object files contained therein.
671 std::vector<std::string> Objects;
672 std::string OutFile = OutputFilename;
673 if (OutFile.empty()) {
674 OutFile = ConvertFilename;
675 OutFile += ".gsym";
676 }
677
678 Out << "Input file: " << ConvertFilename << "\n";
679
680 if (auto DsymObjectsOrErr =
681 MachOObjectFile::findDsymObjectMembers(Path: ConvertFilename)) {
682 if (DsymObjectsOrErr->empty())
683 Objects.push_back(x: ConvertFilename);
684 else
685 llvm::append_range(C&: Objects, R&: *DsymObjectsOrErr);
686 } else {
687 error(Err: DsymObjectsOrErr.takeError());
688 }
689
690 for (StringRef Object : Objects)
691 if (Error Err = handleFileConversionToGSYM(Filename: Object, OutFile, Out))
692 return Err;
693 return Error::success();
694}
695
696static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
697 if (UseMergedFunctions) {
698 if (auto Results = Gsym.lookupAll(Addr)) {
699 // If we have filters, count matching results first
700 size_t NumMatching = Results->size();
701 if (!MergedFunctionsFilters.empty()) {
702 NumMatching = 0;
703 for (const auto &Result : *Results) {
704 bool Matches = false;
705 for (const auto &Filter : MergedFunctionsFilters) {
706 Regex Pattern(Filter);
707 if (Pattern.match(String: Result.FuncName)) {
708 Matches = true;
709 break;
710 }
711 }
712 if (Matches)
713 NumMatching++;
714 }
715 }
716
717 OS << "Found " << NumMatching << " function"
718 << (NumMatching != 1 ? "s" : "") << " at address " << HEX64(Addr)
719 << ":\n";
720
721 for (size_t i = 0; i < Results->size(); ++i) {
722 // Skip if doesn't match any filter
723 if (!MergedFunctionsFilters.empty()) {
724 bool Matches = false;
725 for (const auto &Filter : MergedFunctionsFilters) {
726 Regex Pattern(Filter);
727 if (Pattern.match(String: Results->at(n: i).FuncName)) {
728 Matches = true;
729 break;
730 }
731 }
732 if (!Matches)
733 continue;
734 }
735
736 OS << " " << Results->at(n: i);
737
738 if (i != Results->size() - 1)
739 OS << "\n";
740 }
741 }
742 } else { /* UseMergedFunctions == false */
743 if (auto Result = Gsym.lookup(Addr)) {
744 // If verbose is enabled dump the full function info for the address.
745 if (Verbose) {
746 if (auto FI = Gsym.getFunctionInfo(Addr)) {
747 OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
748 Gsym.dump(OS, FI: *FI);
749 OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
750 }
751 }
752 // Don't print call site info if --merged-functions is not specified.
753 Result->CallSiteFuncRegex.clear();
754 OS << Result.get();
755 } else {
756 if (Verbose)
757 OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
758 OS << HEX64(Addr) << ": ";
759 logAllUnhandledErrors(E: Result.takeError(), OS, ErrorBanner: "error: ");
760 }
761 if (Verbose)
762 OS << "\n";
763 }
764}
765
766static llvm::Error benchmarkReader(StringRef GSYMPath, uint32_t Start,
767 uint32_t Stride) {
768 auto Gsym = GsymReader::openFile(Path: GSYMPath);
769 if (!Gsym)
770 return Gsym.takeError();
771 uint32_t N = (*Gsym)->getNumAddresses();
772 uint32_t NumLookups = 0;
773 for (uint32_t I = Start; I < N; I += Stride) {
774 auto Addr = (*Gsym)->getAddress(Index: I);
775 if (!Addr)
776 return createStringError(EC: std::errc::invalid_argument,
777 Fmt: "failed to extract address[%u]", Vals: I);
778 auto LR = (*Gsym)->lookup(Addr: *Addr);
779 if (!LR)
780 return LR.takeError();
781 ++NumLookups;
782 }
783 outs() << "Benchmarked " << NumLookups << " lookups (out of " << N
784 << " addresses) in \"" << GSYMPath << "\"\n";
785 return Error::success();
786}
787
788int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) {
789 // Print a stack trace if we signal out.
790 sys::PrintStackTraceOnErrorSignal(Argv0: argv[0]);
791 PrettyStackTraceProgram X(argc, argv);
792 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
793
794 llvm::InitializeAllTargets();
795
796 parseArgs(argc, argv);
797
798 raw_ostream &OS = outs();
799
800 if (BenchmarkReader) {
801 for (const auto &GSYMPath : InputFilenames)
802 if (auto Err = benchmarkReader(GSYMPath, Start: BenchmarkStart, Stride: BenchmarkStride))
803 error(Prefix: "Benchmark failed: ", Err: std::move(Err));
804 return EXIT_SUCCESS;
805 }
806
807 OutputAggregator Aggregation(&OS);
808 if (!ConvertFilename.empty()) {
809 // Convert DWARF to GSYM
810 if (!InputFilenames.empty()) {
811 OS << "error: no input files can be specified when using the --convert "
812 "option.\n";
813 return 1;
814 }
815 // Call error() if we have an error and it will exit with a status of 1
816 if (auto Err = convertFileToGSYM(Out&: Aggregation))
817 error(Prefix: "DWARF conversion failed: ", Err: std::move(Err));
818
819 // Report the errors from aggregator:
820 Aggregation.EnumerateResults(handleCounts: [&](StringRef category, unsigned count) {
821 OS << category << " occurred " << count << " time(s)\n";
822 });
823 if (!JsonSummaryFile.empty()) {
824 std::error_code EC;
825 raw_fd_ostream JsonStream(JsonSummaryFile, EC, sys::fs::OF_Text);
826 if (EC) {
827 OS << "error opening aggregate error json file '" << JsonSummaryFile
828 << "' for writing: " << EC.message() << '\n';
829 return 1;
830 }
831
832 llvm::json::Object Categories;
833 uint64_t ErrorCount = 0;
834 Aggregation.EnumerateResults(handleCounts: [&](StringRef Category, unsigned Count) {
835 llvm::json::Object Val;
836 Val.try_emplace(K: "count", Args&: Count);
837 Categories.try_emplace(K: Category, Args: std::move(Val));
838 ErrorCount += Count;
839 });
840 llvm::json::Object RootNode;
841 RootNode.try_emplace(K: "error-categories", Args: std::move(Categories));
842 RootNode.try_emplace(K: "error-count", Args&: ErrorCount);
843
844 JsonStream << llvm::json::Value(std::move(RootNode));
845 }
846 return 0;
847 }
848
849 if (LookupAddressesFromStdin) {
850 if (!LookupAddresses.empty() || !InputFilenames.empty()) {
851 OS << "error: no input files or addresses can be specified when using "
852 "the --addresses-from-stdin "
853 "option.\n";
854 return 1;
855 }
856
857 std::string InputLine;
858 std::string CurrentGSYMPath;
859 std::unique_ptr<GsymReader> CurrentGsym;
860
861 while (std::getline(is&: std::cin, str&: InputLine)) {
862 // Strip newline characters.
863 std::string StrippedInputLine(InputLine);
864 llvm::erase_if(C&: StrippedInputLine,
865 P: [](char c) { return c == '\r' || c == '\n'; });
866
867 StringRef AddrStr, GSYMPath;
868 std::tie(args&: AddrStr, args&: GSYMPath) =
869 llvm::StringRef{StrippedInputLine}.split(Separator: ' ');
870
871 if (GSYMPath != CurrentGSYMPath) {
872 auto GsymOrErr = GsymReader::openFile(Path: GSYMPath);
873 if (!GsymOrErr)
874 error(Prefix: GSYMPath, Err: GsymOrErr.takeError());
875 CurrentGsym = std::move(*GsymOrErr);
876 CurrentGSYMPath = GSYMPath;
877 }
878
879 uint64_t Addr;
880 if (AddrStr.getAsInteger(Radix: 0, Result&: Addr)) {
881 OS << "error: invalid address " << AddrStr
882 << ", expected: Address GsymFile.\n";
883 return 1;
884 }
885
886 doLookup(Gsym&: *CurrentGsym, Addr, OS);
887
888 OS << "\n";
889 OS.flush();
890 }
891
892 return EXIT_SUCCESS;
893 }
894
895 // Dump or access data inside GSYM files
896 for (const auto &GSYMPath : InputFilenames) {
897 auto Gsym = GsymReader::openFile(Path: GSYMPath);
898 if (!Gsym)
899 error(Prefix: GSYMPath, Err: Gsym.takeError());
900
901 if (LookupAddresses.empty()) {
902 (*Gsym)->dump(OS&: outs());
903 continue;
904 }
905
906 // Lookup an address in a GSYM file and print any matches.
907 OS << "Looking up addresses in \"" << GSYMPath << "\":\n";
908 for (auto Addr : LookupAddresses) {
909 doLookup(Gsym&: **Gsym, Addr, OS);
910 }
911 }
912 return EXIT_SUCCESS;
913}
914