1 | //===- GsymReader.cpp -----------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/DebugInfo/GSYM/GsymReader.h" |
10 | |
11 | #include <assert.h> |
12 | #include <inttypes.h> |
13 | #include <stdio.h> |
14 | #include <stdlib.h> |
15 | |
16 | #include "llvm/DebugInfo/GSYM/GsymCreator.h" |
17 | #include "llvm/DebugInfo/GSYM/InlineInfo.h" |
18 | #include "llvm/DebugInfo/GSYM/LineTable.h" |
19 | #include "llvm/Support/BinaryStreamReader.h" |
20 | #include "llvm/Support/DataExtractor.h" |
21 | #include "llvm/Support/MemoryBuffer.h" |
22 | |
23 | using namespace llvm; |
24 | using namespace gsym; |
25 | |
26 | GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer) |
27 | : MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {} |
28 | |
29 | GsymReader::GsymReader(GsymReader &&RHS) = default; |
30 | |
31 | GsymReader::~GsymReader() = default; |
32 | |
33 | llvm::Expected<GsymReader> GsymReader::openFile(StringRef Filename) { |
34 | // Open the input file and return an appropriate error if needed. |
35 | ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr = |
36 | MemoryBuffer::getFileOrSTDIN(Filename); |
37 | auto Err = BuffOrErr.getError(); |
38 | if (Err) |
39 | return llvm::errorCodeToError(EC: Err); |
40 | return create(MemBuffer&: BuffOrErr.get()); |
41 | } |
42 | |
43 | llvm::Expected<GsymReader> GsymReader::copyBuffer(StringRef Bytes) { |
44 | auto MemBuffer = MemoryBuffer::getMemBufferCopy(InputData: Bytes, BufferName: "GSYM bytes" ); |
45 | return create(MemBuffer); |
46 | } |
47 | |
48 | llvm::Expected<llvm::gsym::GsymReader> |
49 | GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) { |
50 | if (!MemBuffer) |
51 | return createStringError(EC: std::errc::invalid_argument, |
52 | Fmt: "invalid memory buffer" ); |
53 | GsymReader GR(std::move(MemBuffer)); |
54 | llvm::Error Err = GR.parse(); |
55 | if (Err) |
56 | return std::move(Err); |
57 | return std::move(GR); |
58 | } |
59 | |
60 | llvm::Error |
61 | GsymReader::parse() { |
62 | BinaryStreamReader FileData(MemBuffer->getBuffer(), llvm::endianness::native); |
63 | // Check for the magic bytes. This file format is designed to be mmap'ed |
64 | // into a process and accessed as read only. This is done for performance |
65 | // and efficiency for symbolicating and parsing GSYM data. |
66 | if (FileData.readObject(Dest&: Hdr)) |
67 | return createStringError(EC: std::errc::invalid_argument, |
68 | Fmt: "not enough data for a GSYM header" ); |
69 | |
70 | const auto HostByteOrder = llvm::endianness::native; |
71 | switch (Hdr->Magic) { |
72 | case GSYM_MAGIC: |
73 | Endian = HostByteOrder; |
74 | break; |
75 | case GSYM_CIGAM: |
76 | // This is a GSYM file, but not native endianness. |
77 | Endian = sys::IsBigEndianHost ? llvm::endianness::little |
78 | : llvm::endianness::big; |
79 | Swap.reset(p: new SwappedData); |
80 | break; |
81 | default: |
82 | return createStringError(EC: std::errc::invalid_argument, |
83 | Fmt: "not a GSYM file" ); |
84 | } |
85 | |
86 | bool DataIsLittleEndian = HostByteOrder != llvm::endianness::little; |
87 | // Read a correctly byte swapped header if we need to. |
88 | if (Swap) { |
89 | DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); |
90 | if (auto ExpectedHdr = Header::decode(Data)) |
91 | Swap->Hdr = ExpectedHdr.get(); |
92 | else |
93 | return ExpectedHdr.takeError(); |
94 | Hdr = &Swap->Hdr; |
95 | } |
96 | |
97 | // Detect errors in the header and report any that are found. If we make it |
98 | // past this without errors, we know we have a good magic value, a supported |
99 | // version number, verified address offset size and a valid UUID size. |
100 | if (Error Err = Hdr->checkForError()) |
101 | return Err; |
102 | |
103 | if (!Swap) { |
104 | // This is the native endianness case that is most common and optimized for |
105 | // efficient lookups. Here we just grab pointers to the native data and |
106 | // use ArrayRef objects to allow efficient read only access. |
107 | |
108 | // Read the address offsets. |
109 | if (FileData.padToAlignment(Align: Hdr->AddrOffSize) || |
110 | FileData.readArray(Array&: AddrOffsets, |
111 | NumElements: Hdr->NumAddresses * Hdr->AddrOffSize)) |
112 | return createStringError(EC: std::errc::invalid_argument, |
113 | Fmt: "failed to read address table" ); |
114 | |
115 | // Read the address info offsets. |
116 | if (FileData.padToAlignment(Align: 4) || |
117 | FileData.readArray(Array&: AddrInfoOffsets, NumElements: Hdr->NumAddresses)) |
118 | return createStringError(EC: std::errc::invalid_argument, |
119 | Fmt: "failed to read address info offsets table" ); |
120 | |
121 | // Read the file table. |
122 | uint32_t NumFiles = 0; |
123 | if (FileData.readInteger(Dest&: NumFiles) || FileData.readArray(Array&: Files, NumElements: NumFiles)) |
124 | return createStringError(EC: std::errc::invalid_argument, |
125 | Fmt: "failed to read file table" ); |
126 | |
127 | // Get the string table. |
128 | FileData.setOffset(Hdr->StrtabOffset); |
129 | if (FileData.readFixedString(Dest&: StrTab.Data, Length: Hdr->StrtabSize)) |
130 | return createStringError(EC: std::errc::invalid_argument, |
131 | Fmt: "failed to read string table" ); |
132 | } else { |
133 | // This is the non native endianness case that is not common and not |
134 | // optimized for lookups. Here we decode the important tables into local |
135 | // storage and then set the ArrayRef objects to point to these swapped |
136 | // copies of the read only data so lookups can be as efficient as possible. |
137 | DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); |
138 | |
139 | // Read the address offsets. |
140 | uint64_t Offset = alignTo(Value: sizeof(Header), Align: Hdr->AddrOffSize); |
141 | Swap->AddrOffsets.resize(new_size: Hdr->NumAddresses * Hdr->AddrOffSize); |
142 | switch (Hdr->AddrOffSize) { |
143 | case 1: |
144 | if (!Data.getU8(offset_ptr: &Offset, dst: Swap->AddrOffsets.data(), count: Hdr->NumAddresses)) |
145 | return createStringError(EC: std::errc::invalid_argument, |
146 | Fmt: "failed to read address table" ); |
147 | break; |
148 | case 2: |
149 | if (!Data.getU16(offset_ptr: &Offset, |
150 | dst: reinterpret_cast<uint16_t *>(Swap->AddrOffsets.data()), |
151 | count: Hdr->NumAddresses)) |
152 | return createStringError(EC: std::errc::invalid_argument, |
153 | Fmt: "failed to read address table" ); |
154 | break; |
155 | case 4: |
156 | if (!Data.getU32(offset_ptr: &Offset, |
157 | dst: reinterpret_cast<uint32_t *>(Swap->AddrOffsets.data()), |
158 | count: Hdr->NumAddresses)) |
159 | return createStringError(EC: std::errc::invalid_argument, |
160 | Fmt: "failed to read address table" ); |
161 | break; |
162 | case 8: |
163 | if (!Data.getU64(offset_ptr: &Offset, |
164 | dst: reinterpret_cast<uint64_t *>(Swap->AddrOffsets.data()), |
165 | count: Hdr->NumAddresses)) |
166 | return createStringError(EC: std::errc::invalid_argument, |
167 | Fmt: "failed to read address table" ); |
168 | } |
169 | AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets); |
170 | |
171 | // Read the address info offsets. |
172 | Offset = alignTo(Value: Offset, Align: 4); |
173 | Swap->AddrInfoOffsets.resize(new_size: Hdr->NumAddresses); |
174 | if (Data.getU32(offset_ptr: &Offset, dst: Swap->AddrInfoOffsets.data(), count: Hdr->NumAddresses)) |
175 | AddrInfoOffsets = ArrayRef<uint32_t>(Swap->AddrInfoOffsets); |
176 | else |
177 | return createStringError(EC: std::errc::invalid_argument, |
178 | Fmt: "failed to read address table" ); |
179 | // Read the file table. |
180 | const uint32_t NumFiles = Data.getU32(offset_ptr: &Offset); |
181 | if (NumFiles > 0) { |
182 | Swap->Files.resize(new_size: NumFiles); |
183 | if (Data.getU32(offset_ptr: &Offset, dst: &Swap->Files[0].Dir, count: NumFiles*2)) |
184 | Files = ArrayRef<FileEntry>(Swap->Files); |
185 | else |
186 | return createStringError(EC: std::errc::invalid_argument, |
187 | Fmt: "failed to read file table" ); |
188 | } |
189 | // Get the string table. |
190 | StrTab.Data = MemBuffer->getBuffer().substr(Start: Hdr->StrtabOffset, |
191 | N: Hdr->StrtabSize); |
192 | if (StrTab.Data.empty()) |
193 | return createStringError(EC: std::errc::invalid_argument, |
194 | Fmt: "failed to read string table" ); |
195 | } |
196 | return Error::success(); |
197 | |
198 | } |
199 | |
200 | const Header &GsymReader::() const { |
201 | // The only way to get a GsymReader is from GsymReader::openFile(...) or |
202 | // GsymReader::copyBuffer() and the header must be valid and initialized to |
203 | // a valid pointer value, so the assert below should not trigger. |
204 | assert(Hdr); |
205 | return *Hdr; |
206 | } |
207 | |
208 | std::optional<uint64_t> GsymReader::getAddress(size_t Index) const { |
209 | switch (Hdr->AddrOffSize) { |
210 | case 1: return addressForIndex<uint8_t>(Index); |
211 | case 2: return addressForIndex<uint16_t>(Index); |
212 | case 4: return addressForIndex<uint32_t>(Index); |
213 | case 8: return addressForIndex<uint64_t>(Index); |
214 | } |
215 | return std::nullopt; |
216 | } |
217 | |
218 | std::optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const { |
219 | const auto NumAddrInfoOffsets = AddrInfoOffsets.size(); |
220 | if (Index < NumAddrInfoOffsets) |
221 | return AddrInfoOffsets[Index]; |
222 | return std::nullopt; |
223 | } |
224 | |
225 | Expected<uint64_t> |
226 | GsymReader::getAddressIndex(const uint64_t Addr) const { |
227 | if (Addr >= Hdr->BaseAddress) { |
228 | const uint64_t AddrOffset = Addr - Hdr->BaseAddress; |
229 | std::optional<uint64_t> AddrOffsetIndex; |
230 | switch (Hdr->AddrOffSize) { |
231 | case 1: |
232 | AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset); |
233 | break; |
234 | case 2: |
235 | AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset); |
236 | break; |
237 | case 4: |
238 | AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset); |
239 | break; |
240 | case 8: |
241 | AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset); |
242 | break; |
243 | default: |
244 | return createStringError(EC: std::errc::invalid_argument, |
245 | Fmt: "unsupported address offset size %u" , |
246 | Vals: Hdr->AddrOffSize); |
247 | } |
248 | if (AddrOffsetIndex) |
249 | return *AddrOffsetIndex; |
250 | } |
251 | return createStringError(EC: std::errc::invalid_argument, |
252 | Fmt: "address 0x%" PRIx64 " is not in GSYM" , Vals: Addr); |
253 | |
254 | } |
255 | |
256 | llvm::Expected<DataExtractor> |
257 | GsymReader::getFunctionInfoDataForAddress(uint64_t Addr, |
258 | uint64_t &FuncStartAddr) const { |
259 | Expected<uint64_t> ExpectedAddrIdx = getAddressIndex(Addr); |
260 | if (!ExpectedAddrIdx) |
261 | return ExpectedAddrIdx.takeError(); |
262 | const uint64_t FirstAddrIdx = *ExpectedAddrIdx; |
263 | // The AddrIdx is the first index of the function info entries that match |
264 | // \a Addr. We need to iterate over all function info objects that start with |
265 | // the same address until we find a range that contains \a Addr. |
266 | std::optional<uint64_t> FirstFuncStartAddr; |
267 | const size_t NumAddresses = getNumAddresses(); |
268 | for (uint64_t AddrIdx = FirstAddrIdx; AddrIdx < NumAddresses; ++AddrIdx) { |
269 | auto ExpextedData = getFunctionInfoDataAtIndex(AddrIdx, FuncStartAddr); |
270 | // If there was an error, return the error. |
271 | if (!ExpextedData) |
272 | return ExpextedData; |
273 | |
274 | // Remember the first function start address if it hasn't already been set. |
275 | // If it is already valid, check to see if it matches the first function |
276 | // start address and only continue if it matches. |
277 | if (FirstFuncStartAddr.has_value()) { |
278 | if (*FirstFuncStartAddr != FuncStartAddr) |
279 | break; // Done with consecutive function entries with same address. |
280 | } else { |
281 | FirstFuncStartAddr = FuncStartAddr; |
282 | } |
283 | // Make sure the current function address ranges contains \a Addr. |
284 | // Some symbols on Darwin don't have valid sizes, so if we run into a |
285 | // symbol with zero size, then we have found a match for our address. |
286 | |
287 | // The first thing the encoding of a FunctionInfo object is the function |
288 | // size. |
289 | uint64_t Offset = 0; |
290 | uint32_t FuncSize = ExpextedData->getU32(offset_ptr: &Offset); |
291 | if (FuncSize == 0 || |
292 | AddressRange(FuncStartAddr, FuncStartAddr + FuncSize).contains(Addr)) |
293 | return ExpextedData; |
294 | } |
295 | return createStringError(EC: std::errc::invalid_argument, |
296 | Fmt: "address 0x%" PRIx64 " is not in GSYM" , Vals: Addr); |
297 | } |
298 | |
299 | llvm::Expected<DataExtractor> |
300 | GsymReader::getFunctionInfoDataAtIndex(uint64_t AddrIdx, |
301 | uint64_t &FuncStartAddr) const { |
302 | if (AddrIdx >= getNumAddresses()) |
303 | return createStringError(EC: std::errc::invalid_argument, |
304 | Fmt: "invalid address index %" PRIu64, Vals: AddrIdx); |
305 | const uint32_t AddrInfoOffset = AddrInfoOffsets[AddrIdx]; |
306 | assert((Endian == endianness::big || Endian == endianness::little) && |
307 | "Endian must be either big or little" ); |
308 | StringRef Bytes = MemBuffer->getBuffer().substr(Start: AddrInfoOffset); |
309 | if (Bytes.empty()) |
310 | return createStringError(EC: std::errc::invalid_argument, |
311 | Fmt: "invalid address info offset 0x%" PRIx32, |
312 | Vals: AddrInfoOffset); |
313 | std::optional<uint64_t> OptFuncStartAddr = getAddress(Index: AddrIdx); |
314 | if (!OptFuncStartAddr) |
315 | return createStringError(EC: std::errc::invalid_argument, |
316 | Fmt: "failed to extract address[%" PRIu64 "]" , Vals: AddrIdx); |
317 | FuncStartAddr = *OptFuncStartAddr; |
318 | return DataExtractor(Bytes, Endian == llvm::endianness::little, 4); |
319 | } |
320 | |
321 | llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const { |
322 | uint64_t FuncStartAddr = 0; |
323 | if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr)) |
324 | return FunctionInfo::decode(Data&: *ExpectedData, BaseAddr: FuncStartAddr); |
325 | else |
326 | return ExpectedData.takeError(); |
327 | } |
328 | |
329 | llvm::Expected<FunctionInfo> |
330 | GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const { |
331 | uint64_t FuncStartAddr = 0; |
332 | if (auto ExpectedData = getFunctionInfoDataAtIndex(AddrIdx: Idx, FuncStartAddr)) |
333 | return FunctionInfo::decode(Data&: *ExpectedData, BaseAddr: FuncStartAddr); |
334 | else |
335 | return ExpectedData.takeError(); |
336 | } |
337 | |
338 | llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const { |
339 | uint64_t FuncStartAddr = 0; |
340 | if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr)) |
341 | return FunctionInfo::lookup(Data&: *ExpectedData, GR: *this, FuncAddr: FuncStartAddr, Addr); |
342 | else |
343 | return ExpectedData.takeError(); |
344 | } |
345 | |
346 | void GsymReader::dump(raw_ostream &OS) { |
347 | const auto & = getHeader(); |
348 | // Dump the GSYM header. |
349 | OS << Header << "\n" ; |
350 | // Dump the address table. |
351 | OS << "Address Table:\n" ; |
352 | OS << "INDEX OFFSET" ; |
353 | |
354 | switch (Hdr->AddrOffSize) { |
355 | case 1: OS << "8 " ; break; |
356 | case 2: OS << "16" ; break; |
357 | case 4: OS << "32" ; break; |
358 | case 8: OS << "64" ; break; |
359 | default: OS << "??" ; break; |
360 | } |
361 | OS << " (ADDRESS)\n" ; |
362 | OS << "====== =============================== \n" ; |
363 | for (uint32_t I = 0; I < Header.NumAddresses; ++I) { |
364 | OS << format(Fmt: "[%4u] " , Vals: I); |
365 | switch (Hdr->AddrOffSize) { |
366 | case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break; |
367 | case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break; |
368 | case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break; |
369 | case 8: OS << HEX32(getAddrOffsets<uint64_t>()[I]); break; |
370 | default: break; |
371 | } |
372 | OS << " (" << HEX64(*getAddress(I)) << ")\n" ; |
373 | } |
374 | // Dump the address info offsets table. |
375 | OS << "\nAddress Info Offsets:\n" ; |
376 | OS << "INDEX Offset\n" ; |
377 | OS << "====== ==========\n" ; |
378 | for (uint32_t I = 0; I < Header.NumAddresses; ++I) |
379 | OS << format(Fmt: "[%4u] " , Vals: I) << HEX32(AddrInfoOffsets[I]) << "\n" ; |
380 | // Dump the file table. |
381 | OS << "\nFiles:\n" ; |
382 | OS << "INDEX DIRECTORY BASENAME PATH\n" ; |
383 | OS << "====== ========== ========== ==============================\n" ; |
384 | for (uint32_t I = 0; I < Files.size(); ++I) { |
385 | OS << format(Fmt: "[%4u] " , Vals: I) << HEX32(Files[I].Dir) << ' ' |
386 | << HEX32(Files[I].Base) << ' '; |
387 | dump(OS, FE: getFile(Index: I)); |
388 | OS << "\n" ; |
389 | } |
390 | OS << "\n" << StrTab << "\n" ; |
391 | |
392 | for (uint32_t I = 0; I < Header.NumAddresses; ++I) { |
393 | OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": " ; |
394 | if (auto FI = getFunctionInfoAtIndex(Idx: I)) |
395 | dump(OS, FI: *FI); |
396 | else |
397 | logAllUnhandledErrors(E: FI.takeError(), OS, ErrorBanner: "FunctionInfo:" ); |
398 | } |
399 | } |
400 | |
401 | void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI) { |
402 | OS << FI.Range << " \"" << getString(Offset: FI.Name) << "\"\n" ; |
403 | if (FI.OptLineTable) |
404 | dump(OS, LT: *FI.OptLineTable); |
405 | if (FI.Inline) |
406 | dump(OS, II: *FI.Inline); |
407 | } |
408 | |
409 | void GsymReader::dump(raw_ostream &OS, const LineTable <) { |
410 | OS << "LineTable:\n" ; |
411 | for (auto &LE: LT) { |
412 | OS << " " << HEX64(LE.Addr) << ' '; |
413 | if (LE.File) |
414 | dump(OS, FE: getFile(Index: LE.File)); |
415 | OS << ':' << LE.Line << '\n'; |
416 | } |
417 | } |
418 | |
419 | void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) { |
420 | if (Indent == 0) |
421 | OS << "InlineInfo:\n" ; |
422 | else |
423 | OS.indent(NumSpaces: Indent); |
424 | OS << II.Ranges << ' ' << getString(Offset: II.Name); |
425 | if (II.CallFile != 0) { |
426 | if (auto File = getFile(Index: II.CallFile)) { |
427 | OS << " called from " ; |
428 | dump(OS, FE: File); |
429 | OS << ':' << II.CallLine; |
430 | } |
431 | } |
432 | OS << '\n'; |
433 | for (const auto &ChildII: II.Children) |
434 | dump(OS, II: ChildII, Indent: Indent + 2); |
435 | } |
436 | |
437 | void GsymReader::dump(raw_ostream &OS, std::optional<FileEntry> FE) { |
438 | if (FE) { |
439 | // IF we have the file from index 0, then don't print anything |
440 | if (FE->Dir == 0 && FE->Base == 0) |
441 | return; |
442 | StringRef Dir = getString(Offset: FE->Dir); |
443 | StringRef Base = getString(Offset: FE->Base); |
444 | if (!Dir.empty()) { |
445 | OS << Dir; |
446 | if (Dir.contains(C: '\\') && !Dir.contains(C: '/')) |
447 | OS << '\\'; |
448 | else |
449 | OS << '/'; |
450 | } |
451 | if (!Base.empty()) { |
452 | OS << Base; |
453 | } |
454 | if (!Dir.empty() || !Base.empty()) |
455 | return; |
456 | } |
457 | OS << "<invalid-file>" ; |
458 | } |
459 | |