1//===------ dwarf2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/BinaryFormat/Dwarf.h"
10#include "llvm/DebugInfo/DWARF/DWARFContext.h"
11#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
12#include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h"
13#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
14#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
15#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
16#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
17#include "llvm/DebugInfo/DWARF/DWARFSection.h"
18#include "llvm/ObjectYAML/DWARFYAML.h"
19
20#include <algorithm>
21#include <optional>
22
23using namespace llvm;
24
25Error dumpDebugAbbrev(DWARFContext &DCtx, DWARFYAML::Data &Y) {
26 auto AbbrevSetPtr = DCtx.getDebugAbbrev();
27 if (AbbrevSetPtr) {
28 uint64_t AbbrevTableID = 0;
29 if (Error Err = AbbrevSetPtr->parse())
30 return Err;
31 for (const auto &AbbrvDeclSet : *AbbrevSetPtr) {
32 Y.DebugAbbrev.emplace_back();
33 Y.DebugAbbrev.back().ID = AbbrevTableID++;
34 for (const DWARFAbbreviationDeclaration &AbbrvDecl :
35 AbbrvDeclSet.second) {
36 DWARFYAML::Abbrev Abbrv;
37 Abbrv.Code = AbbrvDecl.getCode();
38 Abbrv.Tag = AbbrvDecl.getTag();
39 Abbrv.Children = AbbrvDecl.hasChildren() ? dwarf::DW_CHILDREN_yes
40 : dwarf::DW_CHILDREN_no;
41 Abbrv.Attributes.reserve(n: AbbrvDecl.getNumAttributes());
42 for (auto Attribute : AbbrvDecl.attributes()) {
43 DWARFYAML::AttributeAbbrev AttAbrv;
44 AttAbrv.Attribute = Attribute.Attr;
45 AttAbrv.Form = Attribute.Form;
46 if (AttAbrv.Form == dwarf::DW_FORM_implicit_const)
47 AttAbrv.Value = Attribute.getImplicitConstValue();
48 Abbrv.Attributes.push_back(x: AttAbrv);
49 }
50 Y.DebugAbbrev.back().Table.push_back(x: std::move(Abbrv));
51 }
52 }
53 }
54 return Error::success();
55}
56
57Error dumpDebugAddr(DWARFContext &DCtx, DWARFYAML::Data &Y) {
58 DWARFDebugAddrTable AddrTable;
59 DWARFDataExtractor AddrData(DCtx.getDWARFObj(),
60 DCtx.getDWARFObj().getAddrSection(),
61 DCtx.isLittleEndian(), /*AddressSize=*/0);
62 std::vector<DWARFYAML::AddrTableEntry> AddrTables;
63 uint64_t Offset = 0;
64 while (AddrData.isValidOffset(offset: Offset)) {
65 // We ignore any errors that don't prevent parsing the section, since we can
66 // still represent such sections.
67 if (Error Err = AddrTable.extractV5(Data: AddrData, OffsetPtr: &Offset, /*CUAddrSize=*/0,
68 WarnCallback: consumeError))
69 return Err;
70 AddrTables.emplace_back();
71 AddrTables.back().SegAddrPairs.reserve(
72 n: AddrTable.getAddressEntries().size());
73 for (uint64_t Addr : AddrTable.getAddressEntries()) {
74 // Currently, the parser doesn't support parsing an address table with non
75 // linear addresses (segment_selector_size != 0). The segment selectors
76 // are specified to be zero.
77 AddrTables.back().SegAddrPairs.push_back(
78 x: {/*SegmentSelector=*/.Segment: 0, /*Address=*/Addr});
79 }
80
81 AddrTables.back().Format = AddrTable.getFormat();
82 AddrTables.back().Length = AddrTable.getLength();
83 AddrTables.back().Version = AddrTable.getVersion();
84 AddrTables.back().AddrSize = AddrTable.getAddressSize();
85 AddrTables.back().SegSelectorSize = AddrTable.getSegmentSelectorSize();
86 }
87 Y.DebugAddr = std::move(AddrTables);
88 return Error::success();
89}
90
91Error dumpDebugStrings(DWARFContext &DCtx, DWARFYAML::Data &Y) {
92 DataExtractor StrData = DCtx.getStringExtractor();
93 uint64_t Offset = 0;
94 std::vector<StringRef> DebugStr;
95 Error Err = Error::success();
96 while (StrData.isValidOffset(offset: Offset)) {
97 const char *CStr = StrData.getCStr(OffsetPtr: &Offset, Err: &Err);
98 if (Err)
99 return Err;
100 DebugStr.push_back(x: CStr);
101 }
102
103 Y.DebugStrings = std::move(DebugStr);
104 return Err;
105}
106
107Error dumpDebugARanges(DWARFContext &DCtx, DWARFYAML::Data &Y) {
108 DWARFDataExtractor ArangesData(DCtx.getDWARFObj().getArangesSection(),
109 DCtx.isLittleEndian(), 0);
110 uint64_t Offset = 0;
111 DWARFDebugArangeSet Set;
112 std::vector<DWARFYAML::ARange> DebugAranges;
113
114 // We ignore any errors that don't prevent parsing the section, since we can
115 // still represent such sections. These errors are recorded via the
116 // WarningHandler parameter of Set.extract().
117 auto DiscardError = [](Error Err) { consumeError(Err: std::move(Err)); };
118
119 while (ArangesData.isValidOffset(offset: Offset)) {
120 if (Error E = Set.extract(data: ArangesData, offset_ptr: &Offset, WarningHandler: DiscardError))
121 return E;
122 DWARFYAML::ARange Range;
123 Range.Format = Set.getHeader().Format;
124 Range.Length = Set.getHeader().Length;
125 Range.Version = Set.getHeader().Version;
126 Range.CuOffset = Set.getHeader().CuOffset;
127 Range.AddrSize = Set.getHeader().AddrSize;
128 Range.SegSize = Set.getHeader().SegSize;
129
130 Range.Descriptors.reserve(n: Set.descriptors().end() -
131 Set.descriptors().begin());
132 for (auto Descriptor : Set.descriptors()) {
133 DWARFYAML::ARangeDescriptor Desc;
134 Desc.Address = Descriptor.Address;
135 Desc.Length = Descriptor.Length;
136 Range.Descriptors.push_back(x: Desc);
137 }
138 DebugAranges.push_back(x: std::move(Range));
139 }
140
141 Y.DebugAranges = std::move(DebugAranges);
142 return ErrorSuccess();
143}
144
145Error dumpDebugRanges(DWARFContext &DCtx, DWARFYAML::Data &Y) {
146 // We are assuming all address byte sizes will be consistent across all
147 // compile units.
148 uint8_t AddrSize = 0;
149 for (const auto &CU : DCtx.compile_units()) {
150 const uint8_t CUAddrSize = CU->getAddressByteSize();
151 if (AddrSize == 0)
152 AddrSize = CUAddrSize;
153 else if (CUAddrSize != AddrSize)
154 return createStringError(EC: std::errc::invalid_argument,
155 Fmt: "address sizes vary in different compile units");
156 }
157
158 DWARFDataExtractor Data(DCtx.getDWARFObj().getRangesSection().Data,
159 DCtx.isLittleEndian(), AddrSize);
160 uint64_t Offset = 0;
161 DWARFDebugRangeList DwarfRanges;
162 std::vector<DWARFYAML::Ranges> DebugRanges;
163
164 while (Data.isValidOffset(offset: Offset)) {
165 DWARFYAML::Ranges YamlRanges;
166 YamlRanges.Offset = Offset;
167 YamlRanges.AddrSize = AddrSize;
168 if (Error E = DwarfRanges.extract(data: Data, offset_ptr: &Offset))
169 return E;
170 YamlRanges.Entries.reserve(n: DwarfRanges.getEntries().size());
171 for (const auto &RLE : DwarfRanges.getEntries())
172 YamlRanges.Entries.push_back(x: {.LowOffset: RLE.StartAddress, .HighOffset: RLE.EndAddress});
173 DebugRanges.push_back(x: std::move(YamlRanges));
174 }
175
176 Y.DebugRanges = std::move(DebugRanges);
177 return ErrorSuccess();
178}
179
180static std::optional<DWARFYAML::PubSection>
181dumpPubSection(const DWARFContext &DCtx, const DWARFSection &Section,
182 bool IsGNUStyle) {
183 DWARFYAML::PubSection Y;
184 DWARFDataExtractor PubSectionData(DCtx.getDWARFObj(), Section,
185 DCtx.isLittleEndian(), 0);
186 DWARFDebugPubTable Table;
187 // We ignore any errors that don't prevent parsing the section, since we can
188 // still represent such sections.
189 Table.extract(Data: PubSectionData, GnuStyle: IsGNUStyle,
190 RecoverableErrorHandler: [](Error Err) { consumeError(Err: std::move(Err)); });
191 ArrayRef<DWARFDebugPubTable::Set> Sets = Table.getData();
192 if (Sets.empty())
193 return std::nullopt;
194
195 // FIXME: Currently, obj2yaml only supports dumping the first pubtable.
196 Y.Format = Sets[0].Format;
197 Y.Length = Sets[0].Length;
198 Y.Version = Sets[0].Version;
199 Y.UnitOffset = Sets[0].Offset;
200 Y.UnitSize = Sets[0].Size;
201
202 Y.Entries.reserve(n: Sets[0].Entries.size());
203 for (const DWARFDebugPubTable::Entry &E : Sets[0].Entries)
204 Y.Entries.push_back(x: DWARFYAML::PubEntry{.DieOffset: (uint32_t)E.SecOffset,
205 .Descriptor: E.Descriptor.toBits(), .Name: E.Name});
206
207 return Y;
208}
209
210void dumpDebugPubSections(DWARFContext &DCtx, DWARFYAML::Data &Y) {
211 const DWARFObject &D = DCtx.getDWARFObj();
212
213 Y.PubNames =
214 dumpPubSection(DCtx, Section: D.getPubnamesSection(), /*IsGNUStyle=*/false);
215 Y.PubTypes =
216 dumpPubSection(DCtx, Section: D.getPubtypesSection(), /*IsGNUStyle=*/false);
217 // TODO: Test dumping .debug_gnu_pubnames section.
218 Y.GNUPubNames =
219 dumpPubSection(DCtx, Section: D.getGnuPubnamesSection(), /*IsGNUStyle=*/true);
220 // TODO: Test dumping .debug_gnu_pubtypes section.
221 Y.GNUPubTypes =
222 dumpPubSection(DCtx, Section: D.getGnuPubtypesSection(), /*IsGNUStyle=*/true);
223}
224
225void dumpDebugInfo(DWARFContext &DCtx, DWARFYAML::Data &Y) {
226 Y.Units.reserve(n: DCtx.getNumCompileUnits());
227 for (const auto &CU : DCtx.compile_units()) {
228 DWARFYAML::Unit NewUnit;
229 NewUnit.Format = CU->getFormat();
230 NewUnit.Length = CU->getLength();
231 NewUnit.Version = CU->getVersion();
232 if (NewUnit.Version >= 5)
233 NewUnit.Type = (dwarf::UnitType)CU->getUnitType();
234 const DWARFDebugAbbrev *DebugAbbrev = DCtx.getDebugAbbrev();
235 // FIXME: Ideally we would propagate this error upwards, but that would
236 // prevent us from displaying any debug info at all. For now we just consume
237 // the error and display everything that was parsed successfully.
238 if (Error Err = DebugAbbrev->parse())
239 llvm::consumeError(Err: std::move(Err));
240
241 NewUnit.AbbrevTableID = std::distance(
242 first: DebugAbbrev->begin(),
243 last: llvm::find_if(
244 Range: *DebugAbbrev,
245 P: [&](const std::pair<uint64_t, DWARFAbbreviationDeclarationSet> &P) {
246 return P.first == CU->getAbbreviations()->getOffset();
247 }));
248 NewUnit.AbbrOffset = CU->getAbbreviations()->getOffset();
249 NewUnit.AddrSize = CU->getAddressByteSize();
250 NewUnit.Entries.reserve(n: CU->getNumDIEs());
251 for (auto DIE : CU->dies()) {
252 DWARFYAML::Entry NewEntry;
253 DataExtractor EntryData = CU->getDebugInfoExtractor();
254 uint64_t offset = DIE.getOffset();
255
256 assert(EntryData.isValidOffset(offset) && "Invalid DIE Offset");
257 if (!EntryData.isValidOffset(offset))
258 continue;
259
260 NewEntry.AbbrCode = EntryData.getULEB128(offset_ptr: &offset);
261
262 auto AbbrevDecl = DIE.getAbbreviationDeclarationPtr();
263 if (AbbrevDecl) {
264 // This reserve doesn't account for DW_FORM_indirect values, which would
265 // result in more entries in NewEntry.Values than getNumAttributes()
266 // implies. Not all binaries have these, and it'll reduce the number of
267 // allocations in any case.
268 NewEntry.Values.reserve(n: AbbrevDecl->getNumAttributes());
269 for (const auto &AttrSpec : AbbrevDecl->attributes()) {
270 DWARFYAML::FormValue NewValue;
271 NewValue.Value = 0xDEADBEEFDEADBEEF;
272 DWARFDie DIEWrapper(CU.get(), &DIE);
273 auto FormValue = DIEWrapper.find(Attr: AttrSpec.Attr);
274 if (!FormValue)
275 return;
276 auto Form = FormValue->getForm();
277 bool indirect = false;
278 do {
279 indirect = false;
280 switch (Form) {
281 case dwarf::DW_FORM_addr:
282 case dwarf::DW_FORM_GNU_addr_index:
283 if (auto Val = FormValue->getAsAddress())
284 NewValue.Value = *Val;
285 break;
286 case dwarf::DW_FORM_ref_addr:
287 case dwarf::DW_FORM_ref1:
288 case dwarf::DW_FORM_ref2:
289 case dwarf::DW_FORM_ref4:
290 case dwarf::DW_FORM_ref8:
291 case dwarf::DW_FORM_ref_udata:
292 case dwarf::DW_FORM_ref_sig8:
293 if (auto Val = FormValue->getAsReferenceUVal())
294 NewValue.Value = *Val;
295 break;
296 case dwarf::DW_FORM_exprloc:
297 case dwarf::DW_FORM_block:
298 case dwarf::DW_FORM_block1:
299 case dwarf::DW_FORM_block2:
300 case dwarf::DW_FORM_block4:
301 if (auto Val = FormValue->getAsBlock()) {
302 auto BlockData = *Val;
303 std::copy(first: BlockData.begin(), last: BlockData.end(),
304 result: std::back_inserter(x&: NewValue.BlockData));
305 }
306 NewValue.Value = NewValue.BlockData.size();
307 break;
308 case dwarf::DW_FORM_data1:
309 case dwarf::DW_FORM_flag:
310 case dwarf::DW_FORM_data2:
311 case dwarf::DW_FORM_data4:
312 case dwarf::DW_FORM_data8:
313 case dwarf::DW_FORM_sdata:
314 case dwarf::DW_FORM_udata:
315 case dwarf::DW_FORM_ref_sup4:
316 case dwarf::DW_FORM_ref_sup8:
317 if (auto Val = FormValue->getAsUnsignedConstant())
318 NewValue.Value = *Val;
319 break;
320 case dwarf::DW_FORM_string:
321 if (auto Val = dwarf::toString(V: FormValue))
322 NewValue.CStr = *Val;
323 break;
324 case dwarf::DW_FORM_indirect:
325 indirect = true;
326 if (auto Val = FormValue->getAsUnsignedConstant()) {
327 NewValue.Value = *Val;
328 NewEntry.Values.push_back(x: NewValue);
329 Form = static_cast<dwarf::Form>(*Val);
330 }
331 break;
332 case dwarf::DW_FORM_strp:
333 case dwarf::DW_FORM_sec_offset:
334 case dwarf::DW_FORM_GNU_ref_alt:
335 case dwarf::DW_FORM_GNU_strp_alt:
336 case dwarf::DW_FORM_line_strp:
337 case dwarf::DW_FORM_strp_sup:
338 case dwarf::DW_FORM_GNU_str_index:
339 case dwarf::DW_FORM_strx:
340 if (auto Val = FormValue->getAsCStringOffset())
341 NewValue.Value = *Val;
342 break;
343 case dwarf::DW_FORM_flag_present:
344 NewValue.Value = 1;
345 break;
346 default:
347 break;
348 }
349 } while (indirect);
350 NewEntry.Values.push_back(x: std::move(NewValue));
351 }
352 }
353
354 NewUnit.Entries.push_back(x: std::move(NewEntry));
355 }
356 Y.Units.push_back(x: std::move(NewUnit));
357 }
358}
359
360bool dumpFileEntry(DataExtractor &Data, uint64_t &Offset,
361 DWARFYAML::File &File) {
362 File.Name = Data.getCStr(OffsetPtr: &Offset);
363 if (File.Name.empty())
364 return false;
365 File.DirIdx = Data.getULEB128(offset_ptr: &Offset);
366 File.ModTime = Data.getULEB128(offset_ptr: &Offset);
367 File.Length = Data.getULEB128(offset_ptr: &Offset);
368 return true;
369}
370
371void dumpDebugLines(DWARFContext &DCtx, DWARFYAML::Data &Y) {
372 for (const auto &CU : DCtx.compile_units()) {
373 auto CUDIE = CU->getUnitDIE();
374 if (!CUDIE)
375 continue;
376 if (auto StmtOffset =
377 dwarf::toSectionOffset(V: CUDIE.find(Attr: dwarf::DW_AT_stmt_list))) {
378 DWARFYAML::LineTable DebugLines;
379 DataExtractor LineData(DCtx.getDWARFObj().getLineSection().Data,
380 DCtx.isLittleEndian(), CU->getAddressByteSize());
381 uint64_t Offset = *StmtOffset;
382 uint64_t LengthOrDWARF64Prefix = LineData.getU32(offset_ptr: &Offset);
383 if (LengthOrDWARF64Prefix == dwarf::DW_LENGTH_DWARF64) {
384 DebugLines.Format = dwarf::DWARF64;
385 DebugLines.Length = LineData.getU64(offset_ptr: &Offset);
386 } else {
387 DebugLines.Format = dwarf::DWARF32;
388 DebugLines.Length = LengthOrDWARF64Prefix;
389 }
390 assert(DebugLines.Length);
391 uint64_t LineTableLength = *DebugLines.Length;
392 uint64_t SizeOfPrologueLength =
393 DebugLines.Format == dwarf::DWARF64 ? 8 : 4;
394 DebugLines.Version = LineData.getU16(offset_ptr: &Offset);
395 DebugLines.PrologueLength =
396 LineData.getUnsigned(offset_ptr: &Offset, byte_size: SizeOfPrologueLength);
397 assert(DebugLines.PrologueLength);
398 const uint64_t EndPrologue = *DebugLines.PrologueLength + Offset;
399
400 DebugLines.MinInstLength = LineData.getU8(offset_ptr: &Offset);
401 if (DebugLines.Version >= 4)
402 DebugLines.MaxOpsPerInst = LineData.getU8(offset_ptr: &Offset);
403 DebugLines.DefaultIsStmt = LineData.getU8(offset_ptr: &Offset);
404 DebugLines.LineBase = LineData.getU8(offset_ptr: &Offset);
405 DebugLines.LineRange = LineData.getU8(offset_ptr: &Offset);
406 DebugLines.OpcodeBase = LineData.getU8(offset_ptr: &Offset);
407
408 DebugLines.StandardOpcodeLengths.emplace();
409 for (uint8_t i = 1; i < DebugLines.OpcodeBase; ++i)
410 DebugLines.StandardOpcodeLengths->push_back(x: LineData.getU8(offset_ptr: &Offset));
411
412 while (Offset < EndPrologue) {
413 StringRef Dir = LineData.getCStr(OffsetPtr: &Offset);
414 if (!Dir.empty())
415 DebugLines.IncludeDirs.push_back(x: Dir);
416 else
417 break;
418 }
419
420 while (Offset < EndPrologue) {
421 DWARFYAML::File TmpFile;
422 if (dumpFileEntry(Data&: LineData, Offset, File&: TmpFile))
423 DebugLines.Files.push_back(x: TmpFile);
424 else
425 break;
426 }
427
428 const uint64_t LineEnd =
429 LineTableLength + *StmtOffset + SizeOfPrologueLength;
430 while (Offset < LineEnd) {
431 DWARFYAML::LineTableOpcode NewOp = {};
432 NewOp.Opcode = (dwarf::LineNumberOps)LineData.getU8(offset_ptr: &Offset);
433 if (NewOp.Opcode == 0) {
434 auto StartExt = Offset;
435 NewOp.ExtLen = LineData.getULEB128(offset_ptr: &Offset);
436 NewOp.SubOpcode =
437 (dwarf::LineNumberExtendedOps)LineData.getU8(offset_ptr: &Offset);
438 switch (NewOp.SubOpcode) {
439 case dwarf::DW_LNE_set_address:
440 case dwarf::DW_LNE_set_discriminator:
441 NewOp.Data = LineData.getAddress(offset_ptr: &Offset);
442 break;
443 case dwarf::DW_LNE_define_file:
444 dumpFileEntry(Data&: LineData, Offset, File&: NewOp.FileEntry);
445 break;
446 case dwarf::DW_LNE_end_sequence:
447 break;
448 default:
449 while (Offset < StartExt + *NewOp.ExtLen)
450 NewOp.UnknownOpcodeData.push_back(x: LineData.getU8(offset_ptr: &Offset));
451 }
452 } else if (NewOp.Opcode < *DebugLines.OpcodeBase) {
453 switch (NewOp.Opcode) {
454 case dwarf::DW_LNS_copy:
455 case dwarf::DW_LNS_negate_stmt:
456 case dwarf::DW_LNS_set_basic_block:
457 case dwarf::DW_LNS_const_add_pc:
458 case dwarf::DW_LNS_set_prologue_end:
459 case dwarf::DW_LNS_set_epilogue_begin:
460 break;
461
462 case dwarf::DW_LNS_advance_pc:
463 case dwarf::DW_LNS_set_file:
464 case dwarf::DW_LNS_set_column:
465 case dwarf::DW_LNS_set_isa:
466 NewOp.Data = LineData.getULEB128(offset_ptr: &Offset);
467 break;
468
469 case dwarf::DW_LNS_advance_line:
470 NewOp.SData = LineData.getSLEB128(OffsetPtr: &Offset);
471 break;
472
473 case dwarf::DW_LNS_fixed_advance_pc:
474 NewOp.Data = LineData.getU16(offset_ptr: &Offset);
475 break;
476
477 default:
478 for (uint8_t i = 0;
479 i < (*DebugLines.StandardOpcodeLengths)[NewOp.Opcode - 1]; ++i)
480 NewOp.StandardOpcodeData.push_back(x: LineData.getULEB128(offset_ptr: &Offset));
481 }
482 }
483 DebugLines.Opcodes.push_back(x: std::move(NewOp));
484 }
485 Y.DebugLines.push_back(x: std::move(DebugLines));
486 }
487 }
488}
489