1//===------ dwarf2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/BinaryFormat/Dwarf.h"
10#include "llvm/DebugInfo/DWARF/DWARFContext.h"
11#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
12#include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h"
13#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
14#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
15#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
16#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
17#include "llvm/DebugInfo/DWARF/DWARFSection.h"
18#include "llvm/ObjectYAML/DWARFYAML.h"
19
20#include <algorithm>
21#include <optional>
22
23using namespace llvm;
24
25Error dumpDebugAbbrev(DWARFContext &DCtx, DWARFYAML::Data &Y) {
26 auto AbbrevSetPtr = DCtx.getDebugAbbrev();
27 if (AbbrevSetPtr) {
28 uint64_t AbbrevTableID = 0;
29 if (Error Err = AbbrevSetPtr->parse())
30 return Err;
31 for (const auto &AbbrvDeclSet : *AbbrevSetPtr) {
32 Y.DebugAbbrev.emplace_back();
33 Y.DebugAbbrev.back().ID = AbbrevTableID++;
34 for (const DWARFAbbreviationDeclaration &AbbrvDecl :
35 AbbrvDeclSet.second) {
36 DWARFYAML::Abbrev Abbrv;
37 Abbrv.Code = AbbrvDecl.getCode();
38 Abbrv.Tag = AbbrvDecl.getTag();
39 Abbrv.Children = AbbrvDecl.hasChildren() ? dwarf::DW_CHILDREN_yes
40 : dwarf::DW_CHILDREN_no;
41 for (auto Attribute : AbbrvDecl.attributes()) {
42 DWARFYAML::AttributeAbbrev AttAbrv;
43 AttAbrv.Attribute = Attribute.Attr;
44 AttAbrv.Form = Attribute.Form;
45 if (AttAbrv.Form == dwarf::DW_FORM_implicit_const)
46 AttAbrv.Value = Attribute.getImplicitConstValue();
47 Abbrv.Attributes.push_back(x: AttAbrv);
48 }
49 Y.DebugAbbrev.back().Table.push_back(x: Abbrv);
50 }
51 }
52 }
53 return Error::success();
54}
55
56Error dumpDebugAddr(DWARFContext &DCtx, DWARFYAML::Data &Y) {
57 DWARFDebugAddrTable AddrTable;
58 DWARFDataExtractor AddrData(DCtx.getDWARFObj(),
59 DCtx.getDWARFObj().getAddrSection(),
60 DCtx.isLittleEndian(), /*AddressSize=*/0);
61 std::vector<DWARFYAML::AddrTableEntry> AddrTables;
62 uint64_t Offset = 0;
63 while (AddrData.isValidOffset(offset: Offset)) {
64 // We ignore any errors that don't prevent parsing the section, since we can
65 // still represent such sections.
66 if (Error Err = AddrTable.extractV5(Data: AddrData, OffsetPtr: &Offset, /*CUAddrSize=*/0,
67 WarnCallback: consumeError))
68 return Err;
69 AddrTables.emplace_back();
70 for (uint64_t Addr : AddrTable.getAddressEntries()) {
71 // Currently, the parser doesn't support parsing an address table with non
72 // linear addresses (segment_selector_size != 0). The segment selectors
73 // are specified to be zero.
74 AddrTables.back().SegAddrPairs.push_back(
75 x: {/*SegmentSelector=*/.Segment: 0, /*Address=*/Addr});
76 }
77
78 AddrTables.back().Format = AddrTable.getFormat();
79 AddrTables.back().Length = AddrTable.getLength();
80 AddrTables.back().Version = AddrTable.getVersion();
81 AddrTables.back().AddrSize = AddrTable.getAddressSize();
82 AddrTables.back().SegSelectorSize = AddrTable.getSegmentSelectorSize();
83 }
84 Y.DebugAddr = std::move(AddrTables);
85 return Error::success();
86}
87
88Error dumpDebugStrings(DWARFContext &DCtx, DWARFYAML::Data &Y) {
89 DataExtractor StrData = DCtx.getStringExtractor();
90 uint64_t Offset = 0;
91 std::vector<StringRef> DebugStr;
92 Error Err = Error::success();
93 while (StrData.isValidOffset(offset: Offset)) {
94 const char *CStr = StrData.getCStr(OffsetPtr: &Offset, Err: &Err);
95 if (Err)
96 return Err;
97 DebugStr.push_back(x: CStr);
98 }
99
100 Y.DebugStrings = DebugStr;
101 return Err;
102}
103
104Error dumpDebugARanges(DWARFContext &DCtx, DWARFYAML::Data &Y) {
105 DWARFDataExtractor ArangesData(DCtx.getDWARFObj().getArangesSection(),
106 DCtx.isLittleEndian(), 0);
107 uint64_t Offset = 0;
108 DWARFDebugArangeSet Set;
109 std::vector<DWARFYAML::ARange> DebugAranges;
110
111 // We ignore any errors that don't prevent parsing the section, since we can
112 // still represent such sections. These errors are recorded via the
113 // WarningHandler parameter of Set.extract().
114 auto DiscardError = [](Error Err) { consumeError(Err: std::move(Err)); };
115
116 while (ArangesData.isValidOffset(offset: Offset)) {
117 if (Error E = Set.extract(data: ArangesData, offset_ptr: &Offset, WarningHandler: DiscardError))
118 return E;
119 DWARFYAML::ARange Range;
120 Range.Format = Set.getHeader().Format;
121 Range.Length = Set.getHeader().Length;
122 Range.Version = Set.getHeader().Version;
123 Range.CuOffset = Set.getHeader().CuOffset;
124 Range.AddrSize = Set.getHeader().AddrSize;
125 Range.SegSize = Set.getHeader().SegSize;
126 for (auto Descriptor : Set.descriptors()) {
127 DWARFYAML::ARangeDescriptor Desc;
128 Desc.Address = Descriptor.Address;
129 Desc.Length = Descriptor.Length;
130 Range.Descriptors.push_back(x: Desc);
131 }
132 DebugAranges.push_back(x: Range);
133 }
134
135 Y.DebugAranges = DebugAranges;
136 return ErrorSuccess();
137}
138
139Error dumpDebugRanges(DWARFContext &DCtx, DWARFYAML::Data &Y) {
140 // We are assuming all address byte sizes will be consistent across all
141 // compile units.
142 uint8_t AddrSize = 0;
143 for (const auto &CU : DCtx.compile_units()) {
144 const uint8_t CUAddrSize = CU->getAddressByteSize();
145 if (AddrSize == 0)
146 AddrSize = CUAddrSize;
147 else if (CUAddrSize != AddrSize)
148 return createStringError(EC: std::errc::invalid_argument,
149 Fmt: "address sizes vary in different compile units");
150 }
151
152 DWARFDataExtractor Data(DCtx.getDWARFObj().getRangesSection().Data,
153 DCtx.isLittleEndian(), AddrSize);
154 uint64_t Offset = 0;
155 DWARFDebugRangeList DwarfRanges;
156 std::vector<DWARFYAML::Ranges> DebugRanges;
157
158 while (Data.isValidOffset(offset: Offset)) {
159 DWARFYAML::Ranges YamlRanges;
160 YamlRanges.Offset = Offset;
161 YamlRanges.AddrSize = AddrSize;
162 if (Error E = DwarfRanges.extract(data: Data, offset_ptr: &Offset))
163 return E;
164 for (const auto &RLE : DwarfRanges.getEntries())
165 YamlRanges.Entries.push_back(x: {.LowOffset: RLE.StartAddress, .HighOffset: RLE.EndAddress});
166 DebugRanges.push_back(x: std::move(YamlRanges));
167 }
168
169 Y.DebugRanges = DebugRanges;
170 return ErrorSuccess();
171}
172
173static std::optional<DWARFYAML::PubSection>
174dumpPubSection(const DWARFContext &DCtx, const DWARFSection &Section,
175 bool IsGNUStyle) {
176 DWARFYAML::PubSection Y;
177 DWARFDataExtractor PubSectionData(DCtx.getDWARFObj(), Section,
178 DCtx.isLittleEndian(), 0);
179 DWARFDebugPubTable Table;
180 // We ignore any errors that don't prevent parsing the section, since we can
181 // still represent such sections.
182 Table.extract(Data: PubSectionData, GnuStyle: IsGNUStyle,
183 RecoverableErrorHandler: [](Error Err) { consumeError(Err: std::move(Err)); });
184 ArrayRef<DWARFDebugPubTable::Set> Sets = Table.getData();
185 if (Sets.empty())
186 return std::nullopt;
187
188 // FIXME: Currently, obj2yaml only supports dumping the first pubtable.
189 Y.Format = Sets[0].Format;
190 Y.Length = Sets[0].Length;
191 Y.Version = Sets[0].Version;
192 Y.UnitOffset = Sets[0].Offset;
193 Y.UnitSize = Sets[0].Size;
194
195 for (const DWARFDebugPubTable::Entry &E : Sets[0].Entries)
196 Y.Entries.push_back(x: DWARFYAML::PubEntry{.DieOffset: (uint32_t)E.SecOffset,
197 .Descriptor: E.Descriptor.toBits(), .Name: E.Name});
198
199 return Y;
200}
201
202void dumpDebugPubSections(DWARFContext &DCtx, DWARFYAML::Data &Y) {
203 const DWARFObject &D = DCtx.getDWARFObj();
204
205 Y.PubNames =
206 dumpPubSection(DCtx, Section: D.getPubnamesSection(), /*IsGNUStyle=*/false);
207 Y.PubTypes =
208 dumpPubSection(DCtx, Section: D.getPubtypesSection(), /*IsGNUStyle=*/false);
209 // TODO: Test dumping .debug_gnu_pubnames section.
210 Y.GNUPubNames =
211 dumpPubSection(DCtx, Section: D.getGnuPubnamesSection(), /*IsGNUStyle=*/true);
212 // TODO: Test dumping .debug_gnu_pubtypes section.
213 Y.GNUPubTypes =
214 dumpPubSection(DCtx, Section: D.getGnuPubtypesSection(), /*IsGNUStyle=*/true);
215}
216
217void dumpDebugInfo(DWARFContext &DCtx, DWARFYAML::Data &Y) {
218 for (const auto &CU : DCtx.compile_units()) {
219 DWARFYAML::Unit NewUnit;
220 NewUnit.Format = CU->getFormat();
221 NewUnit.Length = CU->getLength();
222 NewUnit.Version = CU->getVersion();
223 if (NewUnit.Version >= 5)
224 NewUnit.Type = (dwarf::UnitType)CU->getUnitType();
225 const DWARFDebugAbbrev *DebugAbbrev = DCtx.getDebugAbbrev();
226 // FIXME: Ideally we would propagate this error upwards, but that would
227 // prevent us from displaying any debug info at all. For now we just consume
228 // the error and display everything that was parsed successfully.
229 if (Error Err = DebugAbbrev->parse())
230 llvm::consumeError(Err: std::move(Err));
231
232 NewUnit.AbbrevTableID = std::distance(
233 first: DebugAbbrev->begin(),
234 last: llvm::find_if(
235 Range: *DebugAbbrev,
236 P: [&](const std::pair<uint64_t, DWARFAbbreviationDeclarationSet> &P) {
237 return P.first == CU->getAbbreviations()->getOffset();
238 }));
239 NewUnit.AbbrOffset = CU->getAbbreviations()->getOffset();
240 NewUnit.AddrSize = CU->getAddressByteSize();
241 for (auto DIE : CU->dies()) {
242 DWARFYAML::Entry NewEntry;
243 DataExtractor EntryData = CU->getDebugInfoExtractor();
244 uint64_t offset = DIE.getOffset();
245
246 assert(EntryData.isValidOffset(offset) && "Invalid DIE Offset");
247 if (!EntryData.isValidOffset(offset))
248 continue;
249
250 NewEntry.AbbrCode = EntryData.getULEB128(offset_ptr: &offset);
251
252 auto AbbrevDecl = DIE.getAbbreviationDeclarationPtr();
253 if (AbbrevDecl) {
254 for (const auto &AttrSpec : AbbrevDecl->attributes()) {
255 DWARFYAML::FormValue NewValue;
256 NewValue.Value = 0xDEADBEEFDEADBEEF;
257 DWARFDie DIEWrapper(CU.get(), &DIE);
258 auto FormValue = DIEWrapper.find(Attr: AttrSpec.Attr);
259 if (!FormValue)
260 return;
261 auto Form = FormValue->getForm();
262 bool indirect = false;
263 do {
264 indirect = false;
265 switch (Form) {
266 case dwarf::DW_FORM_addr:
267 case dwarf::DW_FORM_GNU_addr_index:
268 if (auto Val = FormValue->getAsAddress())
269 NewValue.Value = *Val;
270 break;
271 case dwarf::DW_FORM_ref_addr:
272 case dwarf::DW_FORM_ref1:
273 case dwarf::DW_FORM_ref2:
274 case dwarf::DW_FORM_ref4:
275 case dwarf::DW_FORM_ref8:
276 case dwarf::DW_FORM_ref_udata:
277 case dwarf::DW_FORM_ref_sig8:
278 if (auto Val = FormValue->getAsReferenceUVal())
279 NewValue.Value = *Val;
280 break;
281 case dwarf::DW_FORM_exprloc:
282 case dwarf::DW_FORM_block:
283 case dwarf::DW_FORM_block1:
284 case dwarf::DW_FORM_block2:
285 case dwarf::DW_FORM_block4:
286 if (auto Val = FormValue->getAsBlock()) {
287 auto BlockData = *Val;
288 std::copy(BlockData.begin(), BlockData.end(),
289 std::back_inserter(x&: NewValue.BlockData));
290 }
291 NewValue.Value = NewValue.BlockData.size();
292 break;
293 case dwarf::DW_FORM_data1:
294 case dwarf::DW_FORM_flag:
295 case dwarf::DW_FORM_data2:
296 case dwarf::DW_FORM_data4:
297 case dwarf::DW_FORM_data8:
298 case dwarf::DW_FORM_sdata:
299 case dwarf::DW_FORM_udata:
300 case dwarf::DW_FORM_ref_sup4:
301 case dwarf::DW_FORM_ref_sup8:
302 if (auto Val = FormValue->getAsUnsignedConstant())
303 NewValue.Value = *Val;
304 break;
305 case dwarf::DW_FORM_string:
306 if (auto Val = dwarf::toString(V: FormValue))
307 NewValue.CStr = *Val;
308 break;
309 case dwarf::DW_FORM_indirect:
310 indirect = true;
311 if (auto Val = FormValue->getAsUnsignedConstant()) {
312 NewValue.Value = *Val;
313 NewEntry.Values.push_back(x: NewValue);
314 Form = static_cast<dwarf::Form>(*Val);
315 }
316 break;
317 case dwarf::DW_FORM_strp:
318 case dwarf::DW_FORM_sec_offset:
319 case dwarf::DW_FORM_GNU_ref_alt:
320 case dwarf::DW_FORM_GNU_strp_alt:
321 case dwarf::DW_FORM_line_strp:
322 case dwarf::DW_FORM_strp_sup:
323 case dwarf::DW_FORM_GNU_str_index:
324 case dwarf::DW_FORM_strx:
325 if (auto Val = FormValue->getAsCStringOffset())
326 NewValue.Value = *Val;
327 break;
328 case dwarf::DW_FORM_flag_present:
329 NewValue.Value = 1;
330 break;
331 default:
332 break;
333 }
334 } while (indirect);
335 NewEntry.Values.push_back(x: NewValue);
336 }
337 }
338
339 NewUnit.Entries.push_back(x: NewEntry);
340 }
341 Y.CompileUnits.push_back(x: NewUnit);
342 }
343}
344
345bool dumpFileEntry(DataExtractor &Data, uint64_t &Offset,
346 DWARFYAML::File &File) {
347 File.Name = Data.getCStr(OffsetPtr: &Offset);
348 if (File.Name.empty())
349 return false;
350 File.DirIdx = Data.getULEB128(offset_ptr: &Offset);
351 File.ModTime = Data.getULEB128(offset_ptr: &Offset);
352 File.Length = Data.getULEB128(offset_ptr: &Offset);
353 return true;
354}
355
356void dumpDebugLines(DWARFContext &DCtx, DWARFYAML::Data &Y) {
357 for (const auto &CU : DCtx.compile_units()) {
358 auto CUDIE = CU->getUnitDIE();
359 if (!CUDIE)
360 continue;
361 if (auto StmtOffset =
362 dwarf::toSectionOffset(V: CUDIE.find(Attr: dwarf::DW_AT_stmt_list))) {
363 DWARFYAML::LineTable DebugLines;
364 DataExtractor LineData(DCtx.getDWARFObj().getLineSection().Data,
365 DCtx.isLittleEndian(), CU->getAddressByteSize());
366 uint64_t Offset = *StmtOffset;
367 uint64_t LengthOrDWARF64Prefix = LineData.getU32(offset_ptr: &Offset);
368 if (LengthOrDWARF64Prefix == dwarf::DW_LENGTH_DWARF64) {
369 DebugLines.Format = dwarf::DWARF64;
370 DebugLines.Length = LineData.getU64(offset_ptr: &Offset);
371 } else {
372 DebugLines.Format = dwarf::DWARF32;
373 DebugLines.Length = LengthOrDWARF64Prefix;
374 }
375 assert(DebugLines.Length);
376 uint64_t LineTableLength = *DebugLines.Length;
377 uint64_t SizeOfPrologueLength =
378 DebugLines.Format == dwarf::DWARF64 ? 8 : 4;
379 DebugLines.Version = LineData.getU16(offset_ptr: &Offset);
380 DebugLines.PrologueLength =
381 LineData.getUnsigned(offset_ptr: &Offset, byte_size: SizeOfPrologueLength);
382 assert(DebugLines.PrologueLength);
383 const uint64_t EndPrologue = *DebugLines.PrologueLength + Offset;
384
385 DebugLines.MinInstLength = LineData.getU8(offset_ptr: &Offset);
386 if (DebugLines.Version >= 4)
387 DebugLines.MaxOpsPerInst = LineData.getU8(offset_ptr: &Offset);
388 DebugLines.DefaultIsStmt = LineData.getU8(offset_ptr: &Offset);
389 DebugLines.LineBase = LineData.getU8(offset_ptr: &Offset);
390 DebugLines.LineRange = LineData.getU8(offset_ptr: &Offset);
391 DebugLines.OpcodeBase = LineData.getU8(offset_ptr: &Offset);
392
393 DebugLines.StandardOpcodeLengths.emplace();
394 for (uint8_t i = 1; i < DebugLines.OpcodeBase; ++i)
395 DebugLines.StandardOpcodeLengths->push_back(x: LineData.getU8(offset_ptr: &Offset));
396
397 while (Offset < EndPrologue) {
398 StringRef Dir = LineData.getCStr(OffsetPtr: &Offset);
399 if (!Dir.empty())
400 DebugLines.IncludeDirs.push_back(x: Dir);
401 else
402 break;
403 }
404
405 while (Offset < EndPrologue) {
406 DWARFYAML::File TmpFile;
407 if (dumpFileEntry(Data&: LineData, Offset, File&: TmpFile))
408 DebugLines.Files.push_back(x: TmpFile);
409 else
410 break;
411 }
412
413 const uint64_t LineEnd =
414 LineTableLength + *StmtOffset + SizeOfPrologueLength;
415 while (Offset < LineEnd) {
416 DWARFYAML::LineTableOpcode NewOp = {};
417 NewOp.Opcode = (dwarf::LineNumberOps)LineData.getU8(offset_ptr: &Offset);
418 if (NewOp.Opcode == 0) {
419 auto StartExt = Offset;
420 NewOp.ExtLen = LineData.getULEB128(offset_ptr: &Offset);
421 NewOp.SubOpcode =
422 (dwarf::LineNumberExtendedOps)LineData.getU8(offset_ptr: &Offset);
423 switch (NewOp.SubOpcode) {
424 case dwarf::DW_LNE_set_address:
425 case dwarf::DW_LNE_set_discriminator:
426 NewOp.Data = LineData.getAddress(offset_ptr: &Offset);
427 break;
428 case dwarf::DW_LNE_define_file:
429 dumpFileEntry(Data&: LineData, Offset, File&: NewOp.FileEntry);
430 break;
431 case dwarf::DW_LNE_end_sequence:
432 break;
433 default:
434 while (Offset < StartExt + *NewOp.ExtLen)
435 NewOp.UnknownOpcodeData.push_back(x: LineData.getU8(offset_ptr: &Offset));
436 }
437 } else if (NewOp.Opcode < *DebugLines.OpcodeBase) {
438 switch (NewOp.Opcode) {
439 case dwarf::DW_LNS_copy:
440 case dwarf::DW_LNS_negate_stmt:
441 case dwarf::DW_LNS_set_basic_block:
442 case dwarf::DW_LNS_const_add_pc:
443 case dwarf::DW_LNS_set_prologue_end:
444 case dwarf::DW_LNS_set_epilogue_begin:
445 break;
446
447 case dwarf::DW_LNS_advance_pc:
448 case dwarf::DW_LNS_set_file:
449 case dwarf::DW_LNS_set_column:
450 case dwarf::DW_LNS_set_isa:
451 NewOp.Data = LineData.getULEB128(offset_ptr: &Offset);
452 break;
453
454 case dwarf::DW_LNS_advance_line:
455 NewOp.SData = LineData.getSLEB128(OffsetPtr: &Offset);
456 break;
457
458 case dwarf::DW_LNS_fixed_advance_pc:
459 NewOp.Data = LineData.getU16(offset_ptr: &Offset);
460 break;
461
462 default:
463 for (uint8_t i = 0;
464 i < (*DebugLines.StandardOpcodeLengths)[NewOp.Opcode - 1]; ++i)
465 NewOp.StandardOpcodeData.push_back(x: LineData.getULEB128(offset_ptr: &Offset));
466 }
467 }
468 DebugLines.Opcodes.push_back(x: NewOp);
469 }
470 Y.DebugLines.push_back(x: DebugLines);
471 }
472 }
473}
474