1//===- GsymReader.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/DebugInfo/GSYM/GsymReader.h"
10
11#include <assert.h>
12#include <inttypes.h>
13#include <stdio.h>
14#include <stdlib.h>
15
16#include "llvm/DebugInfo/GSYM/InlineInfo.h"
17#include "llvm/DebugInfo/GSYM/LineTable.h"
18#include "llvm/Support/BinaryStreamReader.h"
19#include "llvm/Support/DataExtractor.h"
20#include "llvm/Support/MemoryBuffer.h"
21
22using namespace llvm;
23using namespace gsym;
24
25GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer)
26 : MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {}
27
28GsymReader::GsymReader(GsymReader &&RHS) = default;
29
30GsymReader::~GsymReader() = default;
31
32llvm::Expected<GsymReader> GsymReader::openFile(StringRef Filename) {
33 // Open the input file and return an appropriate error if needed.
34 ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
35 MemoryBuffer::getFileOrSTDIN(Filename);
36 auto Err = BuffOrErr.getError();
37 if (Err)
38 return llvm::errorCodeToError(EC: Err);
39 return create(MemBuffer&: BuffOrErr.get());
40}
41
42llvm::Expected<GsymReader> GsymReader::copyBuffer(StringRef Bytes) {
43 auto MemBuffer = MemoryBuffer::getMemBufferCopy(InputData: Bytes, BufferName: "GSYM bytes");
44 return create(MemBuffer);
45}
46
47llvm::Expected<llvm::gsym::GsymReader>
48GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
49 if (!MemBuffer)
50 return createStringError(EC: std::errc::invalid_argument,
51 Fmt: "invalid memory buffer");
52 GsymReader GR(std::move(MemBuffer));
53 llvm::Error Err = GR.parse();
54 if (Err)
55 return std::move(Err);
56 return std::move(GR);
57}
58
59llvm::Error
60GsymReader::parse() {
61 BinaryStreamReader FileData(MemBuffer->getBuffer(), llvm::endianness::native);
62 // Check for the magic bytes. This file format is designed to be mmap'ed
63 // into a process and accessed as read only. This is done for performance
64 // and efficiency for symbolicating and parsing GSYM data.
65 if (FileData.readObject(Dest&: Hdr))
66 return createStringError(EC: std::errc::invalid_argument,
67 Fmt: "not enough data for a GSYM header");
68
69 const auto HostByteOrder = llvm::endianness::native;
70 switch (Hdr->Magic) {
71 case GSYM_MAGIC:
72 Endian = HostByteOrder;
73 break;
74 case GSYM_CIGAM:
75 // This is a GSYM file, but not native endianness.
76 Endian = sys::IsBigEndianHost ? llvm::endianness::little
77 : llvm::endianness::big;
78 Swap.reset(p: new SwappedData);
79 break;
80 default:
81 return createStringError(EC: std::errc::invalid_argument,
82 Fmt: "not a GSYM file");
83 }
84
85 bool DataIsLittleEndian = HostByteOrder != llvm::endianness::little;
86 // Read a correctly byte swapped header if we need to.
87 if (Swap) {
88 DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
89 if (auto ExpectedHdr = Header::decode(Data))
90 Swap->Hdr = ExpectedHdr.get();
91 else
92 return ExpectedHdr.takeError();
93 Hdr = &Swap->Hdr;
94 }
95
96 // Detect errors in the header and report any that are found. If we make it
97 // past this without errors, we know we have a good magic value, a supported
98 // version number, verified address offset size and a valid UUID size.
99 if (Error Err = Hdr->checkForError())
100 return Err;
101
102 if (!Swap) {
103 // This is the native endianness case that is most common and optimized for
104 // efficient lookups. Here we just grab pointers to the native data and
105 // use ArrayRef objects to allow efficient read only access.
106
107 // Read the address offsets.
108 if (FileData.padToAlignment(Align: Hdr->AddrOffSize) ||
109 FileData.readArray(Array&: AddrOffsets,
110 NumElements: Hdr->NumAddresses * Hdr->AddrOffSize))
111 return createStringError(EC: std::errc::invalid_argument,
112 Fmt: "failed to read address table");
113
114 // Read the address info offsets.
115 if (FileData.padToAlignment(Align: 4) ||
116 FileData.readArray(Array&: AddrInfoOffsets, NumElements: Hdr->NumAddresses))
117 return createStringError(EC: std::errc::invalid_argument,
118 Fmt: "failed to read address info offsets table");
119
120 // Read the file table.
121 uint32_t NumFiles = 0;
122 if (FileData.readInteger(Dest&: NumFiles) || FileData.readArray(Array&: Files, NumElements: NumFiles))
123 return createStringError(EC: std::errc::invalid_argument,
124 Fmt: "failed to read file table");
125
126 // Get the string table.
127 FileData.setOffset(Hdr->StrtabOffset);
128 if (FileData.readFixedString(Dest&: StrTab.Data, Length: Hdr->StrtabSize))
129 return createStringError(EC: std::errc::invalid_argument,
130 Fmt: "failed to read string table");
131} else {
132 // This is the non native endianness case that is not common and not
133 // optimized for lookups. Here we decode the important tables into local
134 // storage and then set the ArrayRef objects to point to these swapped
135 // copies of the read only data so lookups can be as efficient as possible.
136 DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
137
138 // Read the address offsets.
139 uint64_t Offset = alignTo(Value: sizeof(Header), Align: Hdr->AddrOffSize);
140 Swap->AddrOffsets.resize(new_size: Hdr->NumAddresses * Hdr->AddrOffSize);
141 switch (Hdr->AddrOffSize) {
142 case 1:
143 if (!Data.getU8(offset_ptr: &Offset, dst: Swap->AddrOffsets.data(), count: Hdr->NumAddresses))
144 return createStringError(EC: std::errc::invalid_argument,
145 Fmt: "failed to read address table");
146 break;
147 case 2:
148 if (!Data.getU16(offset_ptr: &Offset,
149 dst: reinterpret_cast<uint16_t *>(Swap->AddrOffsets.data()),
150 count: Hdr->NumAddresses))
151 return createStringError(EC: std::errc::invalid_argument,
152 Fmt: "failed to read address table");
153 break;
154 case 4:
155 if (!Data.getU32(offset_ptr: &Offset,
156 dst: reinterpret_cast<uint32_t *>(Swap->AddrOffsets.data()),
157 count: Hdr->NumAddresses))
158 return createStringError(EC: std::errc::invalid_argument,
159 Fmt: "failed to read address table");
160 break;
161 case 8:
162 if (!Data.getU64(offset_ptr: &Offset,
163 dst: reinterpret_cast<uint64_t *>(Swap->AddrOffsets.data()),
164 count: Hdr->NumAddresses))
165 return createStringError(EC: std::errc::invalid_argument,
166 Fmt: "failed to read address table");
167 }
168 AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets);
169
170 // Read the address info offsets.
171 Offset = alignTo(Value: Offset, Align: 4);
172 Swap->AddrInfoOffsets.resize(new_size: Hdr->NumAddresses);
173 if (Data.getU32(offset_ptr: &Offset, dst: Swap->AddrInfoOffsets.data(), count: Hdr->NumAddresses))
174 AddrInfoOffsets = ArrayRef<uint32_t>(Swap->AddrInfoOffsets);
175 else
176 return createStringError(EC: std::errc::invalid_argument,
177 Fmt: "failed to read address table");
178 // Read the file table.
179 const uint32_t NumFiles = Data.getU32(offset_ptr: &Offset);
180 if (NumFiles > 0) {
181 Swap->Files.resize(new_size: NumFiles);
182 if (Data.getU32(offset_ptr: &Offset, dst: &Swap->Files[0].Dir, count: NumFiles*2))
183 Files = ArrayRef<FileEntry>(Swap->Files);
184 else
185 return createStringError(EC: std::errc::invalid_argument,
186 Fmt: "failed to read file table");
187 }
188 // Get the string table.
189 StrTab.Data = MemBuffer->getBuffer().substr(Start: Hdr->StrtabOffset,
190 N: Hdr->StrtabSize);
191 if (StrTab.Data.empty())
192 return createStringError(EC: std::errc::invalid_argument,
193 Fmt: "failed to read string table");
194 }
195 return Error::success();
196
197}
198
199const Header &GsymReader::getHeader() const {
200 // The only way to get a GsymReader is from GsymReader::openFile(...) or
201 // GsymReader::copyBuffer() and the header must be valid and initialized to
202 // a valid pointer value, so the assert below should not trigger.
203 assert(Hdr);
204 return *Hdr;
205}
206
207std::optional<uint64_t> GsymReader::getAddress(size_t Index) const {
208 switch (Hdr->AddrOffSize) {
209 case 1: return addressForIndex<uint8_t>(Index);
210 case 2: return addressForIndex<uint16_t>(Index);
211 case 4: return addressForIndex<uint32_t>(Index);
212 case 8: return addressForIndex<uint64_t>(Index);
213 }
214 return std::nullopt;
215}
216
217std::optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const {
218 const auto NumAddrInfoOffsets = AddrInfoOffsets.size();
219 if (Index < NumAddrInfoOffsets)
220 return AddrInfoOffsets[Index];
221 return std::nullopt;
222}
223
224Expected<uint64_t>
225GsymReader::getAddressIndex(const uint64_t Addr) const {
226 if (Addr >= Hdr->BaseAddress) {
227 const uint64_t AddrOffset = Addr - Hdr->BaseAddress;
228 std::optional<uint64_t> AddrOffsetIndex;
229 switch (Hdr->AddrOffSize) {
230 case 1:
231 AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset);
232 break;
233 case 2:
234 AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset);
235 break;
236 case 4:
237 AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset);
238 break;
239 case 8:
240 AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset);
241 break;
242 default:
243 return createStringError(EC: std::errc::invalid_argument,
244 Fmt: "unsupported address offset size %u",
245 Vals: Hdr->AddrOffSize);
246 }
247 if (AddrOffsetIndex)
248 return *AddrOffsetIndex;
249 }
250 return createStringError(EC: std::errc::invalid_argument,
251 Fmt: "address 0x%" PRIx64 " is not in GSYM", Vals: Addr);
252
253}
254
255llvm::Expected<DataExtractor>
256GsymReader::getFunctionInfoDataForAddress(uint64_t Addr,
257 uint64_t &FuncStartAddr) const {
258 Expected<uint64_t> ExpectedAddrIdx = getAddressIndex(Addr);
259 if (!ExpectedAddrIdx)
260 return ExpectedAddrIdx.takeError();
261 const uint64_t FirstAddrIdx = *ExpectedAddrIdx;
262 // The AddrIdx is the first index of the function info entries that match
263 // \a Addr. We need to iterate over all function info objects that start with
264 // the same address until we find a range that contains \a Addr.
265 std::optional<uint64_t> FirstFuncStartAddr;
266 const size_t NumAddresses = getNumAddresses();
267 for (uint64_t AddrIdx = FirstAddrIdx; AddrIdx < NumAddresses; ++AddrIdx) {
268 auto ExpextedData = getFunctionInfoDataAtIndex(AddrIdx, FuncStartAddr);
269 // If there was an error, return the error.
270 if (!ExpextedData)
271 return ExpextedData;
272
273 // Remember the first function start address if it hasn't already been set.
274 // If it is already valid, check to see if it matches the first function
275 // start address and only continue if it matches.
276 if (FirstFuncStartAddr.has_value()) {
277 if (*FirstFuncStartAddr != FuncStartAddr)
278 break; // Done with consecutive function entries with same address.
279 } else {
280 FirstFuncStartAddr = FuncStartAddr;
281 }
282 // Make sure the current function address ranges contains \a Addr.
283 // Some symbols on Darwin don't have valid sizes, so if we run into a
284 // symbol with zero size, then we have found a match for our address.
285
286 // The first thing the encoding of a FunctionInfo object is the function
287 // size.
288 uint64_t Offset = 0;
289 uint32_t FuncSize = ExpextedData->getU32(offset_ptr: &Offset);
290 if (FuncSize == 0 ||
291 AddressRange(FuncStartAddr, FuncStartAddr + FuncSize).contains(Addr))
292 return ExpextedData;
293 }
294 return createStringError(EC: std::errc::invalid_argument,
295 Fmt: "address 0x%" PRIx64 " is not in GSYM", Vals: Addr);
296}
297
298llvm::Expected<DataExtractor>
299GsymReader::getFunctionInfoDataAtIndex(uint64_t AddrIdx,
300 uint64_t &FuncStartAddr) const {
301 if (AddrIdx >= getNumAddresses())
302 return createStringError(EC: std::errc::invalid_argument,
303 Fmt: "invalid address index %" PRIu64, Vals: AddrIdx);
304 const uint32_t AddrInfoOffset = AddrInfoOffsets[AddrIdx];
305 assert((Endian == endianness::big || Endian == endianness::little) &&
306 "Endian must be either big or little");
307 StringRef Bytes = MemBuffer->getBuffer().substr(Start: AddrInfoOffset);
308 if (Bytes.empty())
309 return createStringError(EC: std::errc::invalid_argument,
310 Fmt: "invalid address info offset 0x%" PRIx32,
311 Vals: AddrInfoOffset);
312 std::optional<uint64_t> OptFuncStartAddr = getAddress(Index: AddrIdx);
313 if (!OptFuncStartAddr)
314 return createStringError(EC: std::errc::invalid_argument,
315 Fmt: "failed to extract address[%" PRIu64 "]", Vals: AddrIdx);
316 FuncStartAddr = *OptFuncStartAddr;
317 return DataExtractor(Bytes, Endian == llvm::endianness::little, 4);
318}
319
320llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const {
321 uint64_t FuncStartAddr = 0;
322 if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
323 return FunctionInfo::decode(Data&: *ExpectedData, BaseAddr: FuncStartAddr);
324 else
325 return ExpectedData.takeError();
326}
327
328llvm::Expected<FunctionInfo>
329GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const {
330 uint64_t FuncStartAddr = 0;
331 if (auto ExpectedData = getFunctionInfoDataAtIndex(AddrIdx: Idx, FuncStartAddr))
332 return FunctionInfo::decode(Data&: *ExpectedData, BaseAddr: FuncStartAddr);
333 else
334 return ExpectedData.takeError();
335}
336
337llvm::Expected<LookupResult>
338GsymReader::lookup(uint64_t Addr,
339 std::optional<DataExtractor> *MergedFunctionsData) const {
340 uint64_t FuncStartAddr = 0;
341 if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
342 return FunctionInfo::lookup(Data&: *ExpectedData, GR: *this, FuncAddr: FuncStartAddr, Addr,
343 MergedFuncsData: MergedFunctionsData);
344 else
345 return ExpectedData.takeError();
346}
347
348llvm::Expected<std::vector<LookupResult>>
349GsymReader::lookupAll(uint64_t Addr) const {
350 std::vector<LookupResult> Results;
351 std::optional<DataExtractor> MergedFunctionsData;
352
353 // First perform a lookup to get the primary function info result.
354 auto MainResult = lookup(Addr, MergedFunctionsData: &MergedFunctionsData);
355 if (!MainResult)
356 return MainResult.takeError();
357
358 // Add the main result as the first entry.
359 Results.push_back(x: std::move(*MainResult));
360
361 // Now process any merged functions data that was found during the lookup.
362 if (MergedFunctionsData) {
363 // Get data extractors for each merged function.
364 auto ExpectedMergedFuncExtractors =
365 MergedFunctionsInfo::getFuncsDataExtractors(Data&: *MergedFunctionsData);
366 if (!ExpectedMergedFuncExtractors)
367 return ExpectedMergedFuncExtractors.takeError();
368
369 // Process each merged function data.
370 for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) {
371 if (auto FI = FunctionInfo::lookup(Data&: MergedData, GR: *this,
372 FuncAddr: MainResult->FuncRange.start(), Addr)) {
373 Results.push_back(x: std::move(*FI));
374 } else {
375 return FI.takeError();
376 }
377 }
378 }
379
380 return Results;
381}
382
383void GsymReader::dump(raw_ostream &OS) {
384 const auto &Header = getHeader();
385 // Dump the GSYM header.
386 OS << Header << "\n";
387 // Dump the address table.
388 OS << "Address Table:\n";
389 OS << "INDEX OFFSET";
390
391 switch (Hdr->AddrOffSize) {
392 case 1: OS << "8 "; break;
393 case 2: OS << "16"; break;
394 case 4: OS << "32"; break;
395 case 8: OS << "64"; break;
396 default: OS << "??"; break;
397 }
398 OS << " (ADDRESS)\n";
399 OS << "====== =============================== \n";
400 for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
401 OS << format(Fmt: "[%4u] ", Vals: I);
402 switch (Hdr->AddrOffSize) {
403 case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break;
404 case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break;
405 case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break;
406 case 8: OS << HEX32(getAddrOffsets<uint64_t>()[I]); break;
407 default: break;
408 }
409 OS << " (" << HEX64(*getAddress(I)) << ")\n";
410 }
411 // Dump the address info offsets table.
412 OS << "\nAddress Info Offsets:\n";
413 OS << "INDEX Offset\n";
414 OS << "====== ==========\n";
415 for (uint32_t I = 0; I < Header.NumAddresses; ++I)
416 OS << format(Fmt: "[%4u] ", Vals: I) << HEX32(AddrInfoOffsets[I]) << "\n";
417 // Dump the file table.
418 OS << "\nFiles:\n";
419 OS << "INDEX DIRECTORY BASENAME PATH\n";
420 OS << "====== ========== ========== ==============================\n";
421 for (uint32_t I = 0; I < Files.size(); ++I) {
422 OS << format(Fmt: "[%4u] ", Vals: I) << HEX32(Files[I].Dir) << ' '
423 << HEX32(Files[I].Base) << ' ';
424 dump(OS, FE: getFile(Index: I));
425 OS << "\n";
426 }
427 OS << "\n" << StrTab << "\n";
428
429 for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
430 OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": ";
431 if (auto FI = getFunctionInfoAtIndex(Idx: I))
432 dump(OS, FI: *FI);
433 else
434 logAllUnhandledErrors(E: FI.takeError(), OS, ErrorBanner: "FunctionInfo:");
435 }
436}
437
438void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI,
439 uint32_t Indent) {
440 OS.indent(NumSpaces: Indent);
441 OS << FI.Range << " \"" << getString(Offset: FI.Name) << "\"\n";
442 if (FI.OptLineTable)
443 dump(OS, LT: *FI.OptLineTable, Indent);
444 if (FI.Inline)
445 dump(OS, II: *FI.Inline, Indent);
446
447 if (FI.CallSites)
448 dump(OS, CSIC: *FI.CallSites, Indent);
449
450 if (FI.MergedFunctions) {
451 assert(Indent == 0 && "MergedFunctionsInfo should only exist at top level");
452 dump(OS, MFI: *FI.MergedFunctions);
453 }
454}
455
456void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
457 for (uint32_t inx = 0; inx < MFI.MergedFunctions.size(); inx++) {
458 OS << "++ Merged FunctionInfos[" << inx << "]:\n";
459 dump(OS, FI: MFI.MergedFunctions[inx], Indent: 4);
460 }
461}
462
463void GsymReader::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
464 OS << HEX16(CSI.ReturnOffset);
465
466 std::string Flags;
467 auto addFlag = [&](const char *Flag) {
468 if (!Flags.empty())
469 Flags += " | ";
470 Flags += Flag;
471 };
472
473 if (CSI.Flags == CallSiteInfo::Flags::None)
474 Flags = "None";
475 else {
476 if (CSI.Flags & CallSiteInfo::Flags::InternalCall)
477 addFlag("InternalCall");
478
479 if (CSI.Flags & CallSiteInfo::Flags::ExternalCall)
480 addFlag("ExternalCall");
481 }
482 OS << " Flags[" << Flags << "]";
483
484 if (!CSI.MatchRegex.empty()) {
485 OS << " MatchRegex[";
486 for (uint32_t i = 0; i < CSI.MatchRegex.size(); ++i) {
487 if (i > 0)
488 OS << ";";
489 OS << getString(Offset: CSI.MatchRegex[i]);
490 }
491 OS << "]";
492 }
493}
494
495void GsymReader::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
496 uint32_t Indent) {
497 OS.indent(NumSpaces: Indent);
498 OS << "CallSites (by relative return offset):\n";
499 for (const auto &CS : CSIC.CallSites) {
500 OS.indent(NumSpaces: Indent);
501 OS << " ";
502 dump(OS, CSI: CS);
503 OS << "\n";
504 }
505}
506
507void GsymReader::dump(raw_ostream &OS, const LineTable &LT, uint32_t Indent) {
508 OS.indent(NumSpaces: Indent);
509 OS << "LineTable:\n";
510 for (auto &LE: LT) {
511 OS.indent(NumSpaces: Indent);
512 OS << " " << HEX64(LE.Addr) << ' ';
513 if (LE.File)
514 dump(OS, FE: getFile(Index: LE.File));
515 OS << ':' << LE.Line << '\n';
516 }
517}
518
519void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {
520 if (Indent == 0)
521 OS << "InlineInfo:\n";
522 else
523 OS.indent(NumSpaces: Indent);
524 OS << II.Ranges << ' ' << getString(Offset: II.Name);
525 if (II.CallFile != 0) {
526 if (auto File = getFile(Index: II.CallFile)) {
527 OS << " called from ";
528 dump(OS, FE: File);
529 OS << ':' << II.CallLine;
530 }
531 }
532 OS << '\n';
533 for (const auto &ChildII: II.Children)
534 dump(OS, II: ChildII, Indent: Indent + 2);
535}
536
537void GsymReader::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
538 if (FE) {
539 // IF we have the file from index 0, then don't print anything
540 if (FE->Dir == 0 && FE->Base == 0)
541 return;
542 StringRef Dir = getString(Offset: FE->Dir);
543 StringRef Base = getString(Offset: FE->Base);
544 if (!Dir.empty()) {
545 OS << Dir;
546 if (Dir.contains(C: '\\') && !Dir.contains(C: '/'))
547 OS << '\\';
548 else
549 OS << '/';
550 }
551 if (!Base.empty()) {
552 OS << Base;
553 }
554 if (!Dir.empty() || !Base.empty())
555 return;
556 }
557 OS << "<invalid-file>";
558}
559