1//===-- LVDWARFReader.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the LVDWARFReader class.
10// It supports ELF, Mach-O and Wasm binary formats.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/DebugInfo/LogicalView/Readers/LVDWARFReader.h"
15#include "llvm/DebugInfo/DIContext.h"
16#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
17#include "llvm/DebugInfo/DWARF/DWARFExpressionPrinter.h"
18#include "llvm/DebugInfo/DWARF/LowLevel/DWARFExpression.h"
19#include "llvm/DebugInfo/LogicalView/Core/LVLine.h"
20#include "llvm/DebugInfo/LogicalView/Core/LVScope.h"
21#include "llvm/DebugInfo/LogicalView/Core/LVSymbol.h"
22#include "llvm/DebugInfo/LogicalView/Core/LVType.h"
23#include "llvm/Object/MachO.h"
24#include "llvm/Support/FormatAdapters.h"
25#include "llvm/Support/FormatVariadic.h"
26
27using namespace llvm;
28using namespace llvm::object;
29using namespace llvm::logicalview;
30
31#define DEBUG_TYPE "DWARFReader"
32
33void LVDWARFReader::processOneAttribute(const DWARFDie &Die,
34 LVOffset *OffsetPtr,
35 const AttributeSpec &AttrSpec) {
36 uint64_t OffsetOnEntry = *OffsetPtr;
37 DWARFUnit *U = Die.getDwarfUnit();
38 const DWARFFormValue &FormValue =
39 DWARFFormValue::createFromUnit(F: AttrSpec.Form, Unit: U, OffsetPtr);
40
41 // We are processing .debug_info section, implicit_const attribute
42 // values are not really stored here, but in .debug_abbrev section.
43 auto GetAsUnsignedConstant = [&]() -> int64_t {
44 if (AttrSpec.isImplicitConst())
45 return AttrSpec.getImplicitConstValue();
46 if (std::optional<uint64_t> Val = FormValue.getAsUnsignedConstant())
47 return *Val;
48 return 0;
49 };
50
51 auto GetFlag = [](const DWARFFormValue &FormValue) -> bool {
52 return FormValue.isFormClass(FC: DWARFFormValue::FC_Flag);
53 };
54
55 auto GetBoundValue = [&AttrSpec](const DWARFFormValue &FormValue) -> int64_t {
56 switch (FormValue.getForm()) {
57 case dwarf::DW_FORM_ref_addr:
58 case dwarf::DW_FORM_ref1:
59 case dwarf::DW_FORM_ref2:
60 case dwarf::DW_FORM_ref4:
61 case dwarf::DW_FORM_ref8:
62 case dwarf::DW_FORM_ref_udata:
63 case dwarf::DW_FORM_ref_sig8:
64 return *FormValue.getAsReferenceUVal();
65 case dwarf::DW_FORM_data1:
66 case dwarf::DW_FORM_flag:
67 case dwarf::DW_FORM_data2:
68 case dwarf::DW_FORM_data4:
69 case dwarf::DW_FORM_data8:
70 case dwarf::DW_FORM_udata:
71 case dwarf::DW_FORM_ref_sup4:
72 case dwarf::DW_FORM_ref_sup8:
73 return *FormValue.getAsUnsignedConstant();
74 case dwarf::DW_FORM_sdata:
75 return *FormValue.getAsSignedConstant();
76 case dwarf::DW_FORM_implicit_const:
77 return AttrSpec.getImplicitConstValue();
78 default:
79 return 0;
80 }
81 };
82
83 LLVM_DEBUG({
84 dbgs() << " " << hexValue(OffsetOnEntry)
85 << formatv(" {0}", AttrSpec.Attr) << "\n";
86 });
87
88 switch (AttrSpec.Attr) {
89 case dwarf::DW_AT_accessibility:
90 CurrentElement->setAccessibilityCode(GetAsUnsignedConstant());
91 break;
92 case dwarf::DW_AT_artificial:
93 CurrentElement->setIsArtificial();
94 break;
95 case dwarf::DW_AT_bit_size:
96 CurrentElement->setBitSize(GetAsUnsignedConstant());
97 break;
98 case dwarf::DW_AT_byte_size:
99 CurrentElement->setBitSize(GetAsUnsignedConstant() * DWARF_CHAR_BIT);
100 break;
101 case dwarf::DW_AT_call_file:
102 CurrentElement->setCallFilenameIndex(IncrementFileIndex
103 ? GetAsUnsignedConstant() + 1
104 : GetAsUnsignedConstant());
105 break;
106 case dwarf::DW_AT_call_line:
107 CurrentElement->setCallLineNumber(GetAsUnsignedConstant());
108 break;
109 case dwarf::DW_AT_comp_dir:
110 CompileUnit->setCompilationDirectory(dwarf::toStringRef(V: FormValue));
111 break;
112 case dwarf::DW_AT_const_value:
113 if (FormValue.isFormClass(FC: DWARFFormValue::FC_Block)) {
114 ArrayRef<uint8_t> Expr = *FormValue.getAsBlock();
115 // Store the expression as a hexadecimal string.
116 CurrentElement->setValue(
117 llvm::toHex(Input: llvm::toStringRef(Input: Expr), /*LowerCase=*/true));
118 } else if (FormValue.isFormClass(FC: DWARFFormValue::FC_Constant)) {
119 // In the case of negative values, generate the string representation
120 // for a positive value prefixed with the negative sign.
121 if (FormValue.getForm() == dwarf::DW_FORM_sdata) {
122 std::stringstream Stream;
123 int64_t Value = *FormValue.getAsSignedConstant();
124 if (Value < 0) {
125 Stream << "-";
126 Value = std::abs(i: Value);
127 }
128 Stream << hexString(Value, Width: 2);
129 CurrentElement->setValue(Stream.str());
130 } else
131 CurrentElement->setValue(hexString(Value: GetAsUnsignedConstant(), Width: 2));
132 } else
133 CurrentElement->setValue(dwarf::toStringRef(V: FormValue));
134 break;
135 case dwarf::DW_AT_count:
136 CurrentElement->setCount(GetAsUnsignedConstant());
137 break;
138 case dwarf::DW_AT_decl_line:
139 CurrentElement->setLineNumber(GetAsUnsignedConstant());
140 break;
141 case dwarf::DW_AT_decl_file:
142 CurrentElement->setFilenameIndex(IncrementFileIndex
143 ? GetAsUnsignedConstant() + 1
144 : GetAsUnsignedConstant());
145 break;
146 case dwarf::DW_AT_enum_class:
147 if (GetFlag(FormValue))
148 CurrentElement->setIsEnumClass();
149 break;
150 case dwarf::DW_AT_external:
151 if (GetFlag(FormValue))
152 CurrentElement->setIsExternal();
153 break;
154 case dwarf::DW_AT_GNU_discriminator:
155 CurrentElement->setDiscriminator(GetAsUnsignedConstant());
156 break;
157 case dwarf::DW_AT_inline:
158 CurrentElement->setInlineCode(GetAsUnsignedConstant());
159 break;
160 case dwarf::DW_AT_lower_bound:
161 CurrentElement->setLowerBound(GetBoundValue(FormValue));
162 break;
163 case dwarf::DW_AT_name:
164 CurrentElement->setName(dwarf::toStringRef(V: FormValue));
165 break;
166 case dwarf::DW_AT_GNU_template_name:
167 CurrentElement->setValue(dwarf::toStringRef(V: FormValue));
168 break;
169 case dwarf::DW_AT_linkage_name:
170 case dwarf::DW_AT_MIPS_linkage_name:
171 CurrentElement->setLinkageName(dwarf::toStringRef(V: FormValue));
172 break;
173 case dwarf::DW_AT_producer:
174 if (options().getAttributeProducer())
175 CurrentElement->setProducer(dwarf::toStringRef(V: FormValue));
176 break;
177 case dwarf::DW_AT_language:
178 if (options().getAttributeLanguage())
179 CurrentElement->setSourceLanguage(LVSourceLanguage{
180 static_cast<llvm::dwarf::SourceLanguage>(GetAsUnsignedConstant())});
181 break;
182 case dwarf::DW_AT_upper_bound:
183 CurrentElement->setUpperBound(GetBoundValue(FormValue));
184 break;
185 case dwarf::DW_AT_virtuality:
186 CurrentElement->setVirtualityCode(GetAsUnsignedConstant());
187 break;
188
189 case dwarf::DW_AT_abstract_origin:
190 case dwarf::DW_AT_call_origin:
191 case dwarf::DW_AT_extension:
192 case dwarf::DW_AT_import:
193 case dwarf::DW_AT_specification:
194 case dwarf::DW_AT_type:
195 updateReference(Attr: AttrSpec.Attr, FormValue);
196 break;
197
198 case dwarf::DW_AT_low_pc:
199 if (options().getGeneralCollectRanges()) {
200 FoundLowPC = true;
201 // For toolchains that support the removal of unused code, the linker
202 // marks functions that have been removed, by setting the value for the
203 // low_pc to the max address.
204 if (std::optional<uint64_t> Value = FormValue.getAsAddress()) {
205 CurrentLowPC = *Value;
206 } else {
207 uint64_t UValue = FormValue.getRawUValue();
208 if (U->getAddrOffsetSectionItem(Index: UValue)) {
209 CurrentLowPC = *FormValue.getAsAddress();
210 } else {
211 FoundLowPC = false;
212 // We are dealing with an index into the .debug_addr section.
213 LLVM_DEBUG({
214 dbgs() << formatv("indexed ({0:x-8}) address = ", (uint32_t)UValue);
215 });
216 }
217 }
218 if (FoundLowPC) {
219 if (CurrentLowPC == getTombstoneAddress())
220 CurrentElement->setIsDiscarded();
221 else
222 // Consider the case of WebAssembly.
223 CurrentLowPC += WasmCodeSectionOffset;
224 if (CurrentElement->isCompileUnit())
225 setCUBaseAddress(CurrentLowPC);
226 }
227 }
228 break;
229
230 case dwarf::DW_AT_high_pc:
231 if (options().getGeneralCollectRanges()) {
232 FoundHighPC = true;
233 if (std::optional<uint64_t> Address = FormValue.getAsAddress())
234 // High PC is an address.
235 CurrentHighPC = *Address;
236 if (std::optional<uint64_t> Offset = FormValue.getAsUnsignedConstant())
237 // High PC is an offset from LowPC.
238 // Don't add the WebAssembly offset if we have seen a DW_AT_low_pc, as
239 // the CurrentLowPC has already that offset added. Basically, use the
240 // original DW_AT_loc_pc value.
241 CurrentHighPC =
242 (FoundLowPC ? CurrentLowPC - WasmCodeSectionOffset : CurrentLowPC) +
243 *Offset;
244 // Store the real upper limit for the address range.
245 if (UpdateHighAddress && CurrentHighPC > 0)
246 --CurrentHighPC;
247 // Consider the case of WebAssembly.
248 CurrentHighPC += WasmCodeSectionOffset;
249 if (CurrentElement->isCompileUnit())
250 setCUHighAddress(CurrentHighPC);
251 }
252 break;
253
254 case dwarf::DW_AT_ranges:
255 if (RangesDataAvailable && options().getGeneralCollectRanges()) {
256 auto GetRanges = [](const DWARFFormValue &FormValue,
257 DWARFUnit *U) -> Expected<DWARFAddressRangesVector> {
258 if (FormValue.getForm() == dwarf::DW_FORM_rnglistx)
259 return U->findRnglistFromIndex(Index: *FormValue.getAsSectionOffset());
260 return U->findRnglistFromOffset(Offset: *FormValue.getAsSectionOffset());
261 };
262 Expected<DWARFAddressRangesVector> RangesOrError =
263 GetRanges(FormValue, U);
264 if (!RangesOrError) {
265 LLVM_DEBUG({
266 dbgs() << formatv("error decoding address ranges = {0}",
267 fmt_consume(RangesOrError.takeError()));
268 });
269 consumeError(Err: RangesOrError.takeError());
270 break;
271 }
272 // The address ranges are absolute. There is no need to add any addend.
273 DWARFAddressRangesVector Ranges = RangesOrError.get();
274 for (DWARFAddressRange &Range : Ranges) {
275 // This seems to be a tombstone for empty ranges.
276 if ((Range.LowPC == Range.HighPC) ||
277 (Range.LowPC == getTombstoneAddress()))
278 continue;
279 // Store the real upper limit for the address range.
280 if (UpdateHighAddress && Range.HighPC > 0)
281 --Range.HighPC;
282 // Consider the case of WebAssembly.
283 Range.LowPC += WasmCodeSectionOffset;
284 Range.HighPC += WasmCodeSectionOffset;
285 // Add the pair of addresses.
286 CurrentScope->addObject(LowerAddress: Range.LowPC, UpperAddress: Range.HighPC);
287 // If the scope is the CU, do not update the ranges set.
288 if (!CurrentElement->isCompileUnit())
289 CurrentRanges.emplace_back(args&: Range.LowPC, args&: Range.HighPC);
290 }
291 }
292 break;
293
294 // Get the location list for the symbol.
295 case dwarf::DW_AT_data_member_location:
296 if (options().getAttributeAnyLocation())
297 processLocationMember(Attr: AttrSpec.Attr, FormValue, Die, OffsetOnEntry);
298 break;
299
300 // Get the location list for the symbol.
301 case dwarf::DW_AT_location:
302 case dwarf::DW_AT_string_length:
303 case dwarf::DW_AT_use_location:
304 if (options().getAttributeAnyLocation() && CurrentSymbol)
305 processLocationList(Attr: AttrSpec.Attr, FormValue, Die, OffsetOnEntry);
306 break;
307
308 case dwarf::DW_AT_call_data_value:
309 case dwarf::DW_AT_call_value:
310 case dwarf::DW_AT_GNU_call_site_data_value:
311 case dwarf::DW_AT_GNU_call_site_value:
312 if (options().getAttributeAnyLocation() && CurrentSymbol)
313 processLocationList(Attr: AttrSpec.Attr, FormValue, Die, OffsetOnEntry,
314 /*CallSiteLocation=*/true);
315 break;
316
317 default:
318 break;
319 }
320}
321
322LVScope *LVDWARFReader::processOneDie(const DWARFDie &InputDIE, LVScope *Parent,
323 DWARFDie &SkeletonDie) {
324 // If the input DIE corresponds to the compile unit, it can be:
325 // a) Simple DWARF: a standard DIE. Ignore the skeleton DIE (is empty).
326 // b) Split DWARF: the DIE for the split DWARF. The skeleton is the DIE
327 // for the skeleton DWARF. Process both DIEs.
328 const DWARFDie &DIE = SkeletonDie.isValid() ? SkeletonDie : InputDIE;
329 DWARFDataExtractor DebugInfoData =
330 DIE.getDwarfUnit()->getDebugInfoExtractor();
331 LVOffset Offset = DIE.getOffset();
332
333 // Reset values for the current DIE.
334 CurrentLowPC = 0;
335 CurrentHighPC = 0;
336 CurrentOffset = Offset;
337 CurrentEndOffset = 0;
338 FoundLowPC = false;
339 FoundHighPC = false;
340
341 // Process supported attributes.
342 if (DebugInfoData.isValidOffset(offset: Offset)) {
343
344 LLVM_DEBUG({
345 dbgs() << "DIE: " << hexValue(Offset) << formatv(" {0}", DIE.getTag())
346 << "\n";
347 });
348
349 // Create the logical view element for the current DIE.
350 dwarf::Tag Tag = DIE.getTag();
351 CurrentElement = createElement(Tag);
352 if (!CurrentElement)
353 return CurrentScope;
354
355 CurrentElement->setTag(Tag);
356 CurrentElement->setOffset(Offset);
357
358 if (options().getAttributeAnySource() && CurrentElement->isCompileUnit())
359 addCompileUnitOffset(Offset,
360 CompileUnit: static_cast<LVScopeCompileUnit *>(CurrentElement));
361
362 // Insert the newly created element into the element symbol table. If the
363 // element is in the list, it means there are previously created elements
364 // referencing this element.
365 auto [It, Inserted] = ElementTable.try_emplace(Key: Offset, Args&: CurrentElement);
366 if (!Inserted) {
367 // There are previous references to this element. We need to update the
368 // element and all the references pointing to this element.
369 LVElementEntry &Reference = ElementTable[Offset];
370 Reference.Element = CurrentElement;
371 // Traverse the element set and update the elements (backtracking).
372 for (LVElement *Target : Reference.References)
373 Target->setReference(CurrentElement);
374 for (LVElement *Target : Reference.Types)
375 Target->setType(CurrentElement);
376 // Clear the pending elements.
377 Reference.References.clear();
378 Reference.Types.clear();
379 }
380
381 // Add the current element to its parent as there are attributes
382 // (locations) that require the scope level.
383 if (CurrentScope)
384 Parent->addElement(Scope: CurrentScope);
385 else if (CurrentSymbol)
386 Parent->addElement(Symbol: CurrentSymbol);
387 else if (CurrentType)
388 Parent->addElement(Type: CurrentType);
389
390 // Process the attributes for the given DIE.
391 auto ProcessAttributes = [&](const DWARFDie &TheDIE,
392 DWARFDataExtractor &DebugData) {
393 CurrentEndOffset = Offset;
394 uint32_t abbrCode = DebugData.getULEB128(offset_ptr: &CurrentEndOffset);
395 if (abbrCode) {
396 if (const DWARFAbbreviationDeclaration *AbbrevDecl =
397 TheDIE.getAbbreviationDeclarationPtr())
398 for (const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec :
399 AbbrevDecl->attributes())
400 processOneAttribute(Die: TheDIE, OffsetPtr: &CurrentEndOffset, AttrSpec);
401 }
402 };
403
404 ProcessAttributes(DIE, DebugInfoData);
405
406 // If the input DIE is for a compile unit, process its attributes in
407 // the case of split DWARF, to override any common attribute values.
408 if (SkeletonDie.isValid()) {
409 DWARFDataExtractor DebugInfoData =
410 InputDIE.getDwarfUnit()->getDebugInfoExtractor();
411 LVOffset Offset = InputDIE.getOffset();
412 if (DebugInfoData.isValidOffset(offset: Offset))
413 ProcessAttributes(InputDIE, DebugInfoData);
414 }
415 }
416
417 if (CurrentScope) {
418 if (CurrentScope->getCanHaveRanges()) {
419 // If the scope has ranges, they are already added to the scope.
420 // Add any collected LowPC/HighPC values.
421 bool IsCompileUnit = CurrentScope->getIsCompileUnit();
422 if (FoundLowPC && FoundHighPC) {
423 CurrentScope->addObject(LowerAddress: CurrentLowPC, UpperAddress: CurrentHighPC);
424 if (!IsCompileUnit) {
425 // If the scope is a function, add it to the public names.
426 if ((options().getAttributePublics() ||
427 options().getPrintAnyLine()) &&
428 CurrentScope->getIsFunction() &&
429 !CurrentScope->getIsInlinedFunction())
430 CompileUnit->addPublicName(Scope: CurrentScope, LowPC: CurrentLowPC,
431 HighPC: CurrentHighPC);
432 }
433 }
434
435 // Look for scopes with ranges and no linkage name information that
436 // are referencing another scopes via DW_AT_specification. They are
437 // possible candidates for a comdat scope.
438 if (CurrentScope->getHasRanges() &&
439 !CurrentScope->getLinkageNameIndex() &&
440 CurrentScope->getHasReferenceSpecification()) {
441 // Get the linkage name in order to search for a possible comdat.
442 std::optional<DWARFFormValue> LinkageDIE =
443 DIE.findRecursively(Attrs: dwarf::DW_AT_linkage_name);
444 if (LinkageDIE.has_value()) {
445 StringRef Name(dwarf::toStringRef(V: LinkageDIE));
446 if (!Name.empty())
447 CurrentScope->setLinkageName(Name);
448 }
449 }
450
451 // If the current scope is in the 'LinkageNames' table, update its
452 // logical scope. For other scopes, always we will assume the default
453 // ".text" section index.
454 LVSectionIndex SectionIndex = updateSymbolTable(Function: CurrentScope);
455 if (CurrentScope->getIsComdat())
456 CompileUnit->setHasComdatScopes();
457
458 // Update section index contained ranges.
459 if (SectionIndex) {
460 if (!CurrentRanges.empty()) {
461 for (LVAddressRange &Range : CurrentRanges)
462 addSectionRange(SectionIndex, Scope: CurrentScope, LowerAddress: Range.first,
463 UpperAddress: Range.second > Range.first
464 ? Range.second - 1 // Make hi-pc exclusive
465 : Range.second);
466 CurrentRanges.clear();
467 }
468 // If the scope is the CU, do not update the ranges set.
469 if (FoundLowPC && FoundHighPC && !IsCompileUnit) {
470 addSectionRange(SectionIndex, Scope: CurrentScope, LowerAddress: CurrentLowPC,
471 UpperAddress: CurrentHighPC > CurrentLowPC
472 ? CurrentHighPC - 1 // Make hi-pc exclusive
473 : CurrentHighPC);
474 }
475 }
476 }
477 // Mark member functions.
478 if (Parent->getIsAggregate())
479 CurrentScope->setIsMember();
480 }
481
482 // Keep track of symbols with locations.
483 if (options().getAttributeAnyLocation() && CurrentSymbol &&
484 CurrentSymbol->getHasLocation())
485 SymbolsWithLocations.push_back(Elt: CurrentSymbol);
486
487 // If we have template parameters, mark the parent as template.
488 if (CurrentType && CurrentType->getIsTemplateParam())
489 Parent->setIsTemplate();
490
491 return CurrentScope;
492}
493
494void LVDWARFReader::traverseDieAndChildren(DWARFDie &DIE, LVScope *Parent,
495 DWARFDie &SkeletonDie) {
496 // Process the current DIE.
497 LVScope *Scope = processOneDie(InputDIE: DIE, Parent, SkeletonDie);
498 if (Scope) {
499 LVOffset Lower = DIE.getOffset();
500 LVOffset Upper = CurrentEndOffset;
501 DWARFDie DummyDie;
502 // Traverse the children chain.
503 DWARFDie Child = DIE.getFirstChild();
504 while (Child) {
505 traverseDieAndChildren(DIE&: Child, Parent: Scope, SkeletonDie&: DummyDie);
506 Upper = Child.getOffset();
507 Child = Child.getSibling();
508 }
509 // Calculate contributions to the debug info section.
510 if (options().getPrintSizes() && Upper)
511 CompileUnit->addSize(Scope, Lower, Upper);
512 }
513}
514
515void LVDWARFReader::processLocationGaps() {
516 if (options().getAttributeAnyLocation())
517 for (LVSymbol *Symbol : SymbolsWithLocations)
518 Symbol->fillLocationGaps();
519}
520
521void LVDWARFReader::createLineAndFileRecords(
522 const DWARFDebugLine::LineTable *Lines) {
523 if (!Lines)
524 return;
525
526 // Get the source filenames.
527 if (!Lines->Prologue.FileNames.empty())
528 for (const DWARFDebugLine::FileNameEntry &Entry :
529 Lines->Prologue.FileNames) {
530 std::string Directory;
531 if (Lines->getDirectoryForEntry(Entry, Directory))
532 Directory = transformPath(Path: Directory);
533 if (Directory.empty())
534 Directory = std::string(CompileUnit->getCompilationDirectory());
535 std::string File = transformPath(Path: dwarf::toStringRef(V: Entry.Name));
536 std::string String;
537 raw_string_ostream(String) << Directory << "/" << File;
538 CompileUnit->addFilename(Name: String);
539 }
540
541 // In DWARF5 the file indexes start at 0;
542 bool IncrementIndex = Lines->Prologue.getVersion() >= 5;
543
544 // Get the source lines if requested by command line option.
545 if (options().getPrintLines() && Lines->Rows.size())
546 for (const DWARFDebugLine::Row &Row : Lines->Rows) {
547 // Here we collect logical debug lines in CULines. Later on,
548 // the 'processLines()' function will move each created logical line
549 // to its enclosing logical scope, using the debug ranges information
550 // and they will be released when its scope parent is deleted.
551 LVLineDebug *Line = createLineDebug();
552 CULines.push_back(Elt: Line);
553 // Consider the case of WebAssembly.
554 Line->setAddress(Row.Address.Address + WasmCodeSectionOffset);
555 Line->setFilename(
556 CompileUnit->getFilename(Index: IncrementIndex ? Row.File + 1 : Row.File));
557 Line->setLineNumber(Row.Line);
558 if (Row.Discriminator)
559 Line->setDiscriminator(Row.Discriminator);
560 if (Row.IsStmt)
561 Line->setIsNewStatement();
562 if (Row.BasicBlock)
563 Line->setIsBasicBlock();
564 if (Row.EndSequence)
565 Line->setIsEndSequence();
566 if (Row.EpilogueBegin)
567 Line->setIsEpilogueBegin();
568 if (Row.PrologueEnd)
569 Line->setIsPrologueEnd();
570 LLVM_DEBUG({
571 dbgs() << "Address: " << hexValue(Line->getAddress())
572 << " Line: " << Line->lineNumberAsString(/*ShowZero=*/true)
573 << "\n";
574 });
575 }
576}
577
578std::string LVDWARFReader::getRegisterName(LVSmall Opcode,
579 ArrayRef<uint64_t> Operands) {
580 // The 'prettyPrintRegisterOp' function uses the DWARFUnit to support
581 // DW_OP_regval_type. At this point we are operating on a logical view
582 // item, with no access to the underlying DWARF data used by LLVM.
583 // We do not support DW_OP_regval_type here.
584 if (Opcode == dwarf::DW_OP_regval_type)
585 return {};
586
587 std::string string;
588 raw_string_ostream Stream(string);
589 DIDumpOptions DumpOpts;
590 auto *MCRegInfo = MRI.get();
591 auto GetRegName = [&MCRegInfo](uint64_t DwarfRegNum, bool IsEH) -> StringRef {
592 if (!MCRegInfo)
593 return {};
594 if (std::optional<MCRegister> LLVMRegNum =
595 MCRegInfo->getLLVMRegNum(RegNum: DwarfRegNum, isEH: IsEH))
596 if (const char *RegName = MCRegInfo->getName(RegNo: *LLVMRegNum))
597 return StringRef(RegName);
598 return {};
599 };
600 DumpOpts.GetNameForDWARFReg = GetRegName;
601 prettyPrintRegisterOp(/*U=*/nullptr, OS&: Stream, DumpOpts, Opcode, Operands);
602 return Stream.str();
603}
604
605Error LVDWARFReader::createScopes() {
606 LLVM_DEBUG({
607 W.startLine() << "\n";
608 W.printString("File", Obj.getFileName().str());
609 W.printString("Format", FileFormatName);
610 });
611
612 if (Error Err = LVReader::createScopes())
613 return Err;
614
615 // As the DwarfContext object is valid only during the scopes creation,
616 // we need to create our own Target information, to be used during the
617 // logical view printing, in the case of instructions being requested.
618 std::unique_ptr<DWARFContext> DwarfContext = DWARFContext::create(Obj);
619 if (!DwarfContext)
620 return createStringError(EC: errc::invalid_argument,
621 Fmt: "Could not create DWARF information: %s",
622 Vals: getFilename().str().c_str());
623
624 if (Error Err = loadTargetInfo(Obj))
625 return Err;
626
627 // Create a mapping for virtual addresses.
628 mapVirtualAddress(Obj);
629
630 // Select the correct compile unit range, depending if we are dealing with
631 // a standard or split DWARF object.
632 DWARFContext::compile_unit_range CompileUnits =
633 DwarfContext->getNumCompileUnits() ? DwarfContext->compile_units()
634 : DwarfContext->dwo_compile_units();
635 for (const std::unique_ptr<DWARFUnit> &CU : CompileUnits) {
636
637 // Take into account the address byte size for a correct 'tombstone'
638 // value identification.
639 setTombstoneAddress(
640 dwarf::computeTombstoneAddress(AddressByteSize: CU->getAddressByteSize()));
641
642 // Deduction of index used for the line records.
643 //
644 // For the following test case: test.cpp
645 // void foo(void ParamPtr) { }
646
647 // Both GCC and Clang generate DWARF-5 .debug_line layout.
648
649 // * GCC (GNU C++17 11.3.0) - All DW_AT_decl_file use index 1.
650 //
651 // .debug_info:
652 // format = DWARF32, version = 0x0005
653 // DW_TAG_compile_unit
654 // DW_AT_name ("test.cpp")
655 // DW_TAG_subprogram ("foo")
656 // DW_AT_decl_file (1)
657 // DW_TAG_formal_parameter ("ParamPtr")
658 // DW_AT_decl_file (1)
659 // .debug_line:
660 // Line table prologue: format (DWARF32), version (5)
661 // include_directories[0] = "..."
662 // file_names[0]: name ("test.cpp"), dir_index (0)
663 // file_names[1]: name ("test.cpp"), dir_index (0)
664
665 // * Clang (14.0.6) - All DW_AT_decl_file use index 0.
666 //
667 // .debug_info:
668 // format = DWARF32, version = 0x0005
669 // DW_AT_producer ("clang version 14.0.6")
670 // DW_AT_name ("test.cpp")
671 //
672 // DW_TAG_subprogram ("foo")
673 // DW_AT_decl_file (0)
674 // DW_TAG_formal_parameter ("ParamPtr")
675 // DW_AT_decl_file (0)
676 // .debug_line:
677 // Line table prologue: format (DWARF32), version (5)
678 // include_directories[0] = "..."
679 // file_names[0]: name ("test.cpp"), dir_index (0)
680
681 // From DWARFDebugLine::getFileNameByIndex documentation:
682 // In Dwarf 4, the files are 1-indexed.
683 // In Dwarf 5, the files are 0-indexed.
684 // Additional discussions here:
685 // https://www.mail-archive.com/dwarf-discuss@lists.dwarfstd.org/msg00883.html
686
687 // The DWARF reader is expecting the files are 1-indexed, so using
688 // the .debug_line header information decide if the indexed require
689 // an internal adjustment.
690
691 // For the case of GCC (DWARF5), if the entries[0] and [1] are the
692 // same, do not perform any adjustment.
693 auto DeduceIncrementFileIndex = [&]() -> bool {
694 if (CU->getVersion() < 5)
695 // DWARF-4 or earlier -> Don't increment index.
696 return false;
697
698 if (const DWARFDebugLine::LineTable *LT =
699 CU->getContext().getLineTableForUnit(U: CU.get())) {
700 // Check if there are at least 2 entries and if they are the same.
701 if (LT->hasFileAtIndex(FileIndex: 0) && LT->hasFileAtIndex(FileIndex: 1)) {
702 const DWARFDebugLine::FileNameEntry &EntryZero =
703 LT->Prologue.getFileNameEntry(Index: 0);
704 const DWARFDebugLine::FileNameEntry &EntryOne =
705 LT->Prologue.getFileNameEntry(Index: 1);
706 // Check directory indexes.
707 if (EntryZero.DirIdx != EntryOne.DirIdx)
708 // DWARF-5 -> Increment index.
709 return true;
710 // Check filename.
711 std::string FileZero;
712 std::string FileOne;
713 StringRef None;
714 LT->getFileNameByIndex(
715 FileIndex: 0, CompDir: None, Kind: DILineInfoSpecifier::FileLineInfoKind::RawValue,
716 Result&: FileZero);
717 LT->getFileNameByIndex(
718 FileIndex: 1, CompDir: None, Kind: DILineInfoSpecifier::FileLineInfoKind::RawValue,
719 Result&: FileOne);
720 return FileZero != FileOne;
721 }
722 }
723
724 // DWARF-5 -> Increment index.
725 return true;
726 };
727 // The DWARF reader expects the indexes as 1-indexed.
728 IncrementFileIndex = DeduceIncrementFileIndex();
729
730 DWARFDie UnitDie = CU->getUnitDIE();
731 SmallString<16> DWOAlternativeLocation;
732 if (UnitDie) {
733 std::optional<const char *> DWOFileName =
734 CU->getVersion() >= 5
735 ? dwarf::toString(V: UnitDie.find(Attr: dwarf::DW_AT_dwo_name))
736 : dwarf::toString(V: UnitDie.find(Attr: dwarf::DW_AT_GNU_dwo_name));
737 StringRef From(DWOFileName.value_or(u: ""));
738 DWOAlternativeLocation = createAlternativePath(From);
739 }
740
741 // The current CU can be a normal compile unit (standard) or a skeleton
742 // compile unit (split). For both cases, the returned die, will be used
743 // to create the logical scopes.
744 DWARFDie CUDie = CU->getNonSkeletonUnitDIE(
745 /*ExtractUnitDIEOnly=*/false,
746 /*DWOAlternativeLocation=*/DWOAlternativeLocation);
747 if (!CUDie.isValid())
748 continue;
749
750 // The current unit corresponds to the .dwo file. We need to get the
751 // skeleton unit and query for any ranges that will enclose any ranges
752 // in the non-skeleton unit.
753 DWARFDie DummyDie;
754 DWARFDie SkeletonDie =
755 CUDie.getDwarfUnit()->isDWOUnit() ? CU->getUnitDIE(ExtractUnitDIEOnly: false) : DummyDie;
756 // Disable the ranges processing if we have just a single .dwo object,
757 // as any DW_AT_ranges will access not available range information.
758 RangesDataAvailable =
759 (!CUDie.getDwarfUnit()->isDWOUnit() ||
760 (SkeletonDie.isValid() ? !SkeletonDie.getDwarfUnit()->isDWOUnit()
761 : true));
762
763 traverseDieAndChildren(DIE&: CUDie, Parent: Root, SkeletonDie);
764
765 createLineAndFileRecords(Lines: DwarfContext->getLineTableForUnit(U: CU.get()));
766 if (Error Err = createInstructions())
767 return Err;
768
769 // Process the compilation unit, as there are cases where enclosed
770 // functions have the same ranges values. Insert the compilation unit
771 // ranges at the end, to allow enclosing ranges to be first in the list.
772 LVSectionIndex SectionIndex = getSectionIndex(Scope: CompileUnit);
773 addSectionRange(SectionIndex, Scope: CompileUnit);
774 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
775 ScopesWithRanges->sort();
776
777 processLines(DebugLines: &CULines, SectionIndex);
778 processLocationGaps();
779
780 // These are per compile unit.
781 ScopesWithRanges->clear();
782 SymbolsWithLocations.clear();
783 CULines.clear();
784 }
785
786 return Error::success();
787}
788
789// Get the location information for the associated attribute.
790void LVDWARFReader::processLocationList(dwarf::Attribute Attr,
791 const DWARFFormValue &FormValue,
792 const DWARFDie &Die,
793 uint64_t OffsetOnEntry,
794 bool CallSiteLocation) {
795
796 auto ProcessLocationExpression = [&](const DWARFExpression &Expression) {
797 for (const DWARFExpression::Operation &Op : Expression)
798 CurrentSymbol->addLocationOperands(Opcode: Op.getCode(), Operands: Op.getRawOperands());
799 };
800
801 DWARFUnit *U = Die.getDwarfUnit();
802 DWARFContext &DwarfContext = U->getContext();
803 bool IsLittleEndian = DwarfContext.isLittleEndian();
804 if (FormValue.isFormClass(FC: DWARFFormValue::FC_Block) ||
805 (DWARFAttribute::mayHaveLocationExpr(Attr) &&
806 FormValue.isFormClass(FC: DWARFFormValue::FC_Exprloc))) {
807 ArrayRef<uint8_t> Expr = *FormValue.getAsBlock();
808 DataExtractor Data(Expr, IsLittleEndian);
809 DWARFExpression Expression(Data, U->getAddressByteSize(),
810 U->getFormParams().Format);
811
812 // Add location and operation entries.
813 CurrentSymbol->addLocation(Attr, /*LowPC=*/0, /*HighPC=*/-1,
814 /*SectionOffset=*/0, LocDescOffset: OffsetOnEntry,
815 CallSiteLocation);
816 ProcessLocationExpression(Expression);
817 return;
818 }
819
820 if (DWARFAttribute::mayHaveLocationList(Attr) &&
821 FormValue.isFormClass(FC: DWARFFormValue::FC_SectionOffset)) {
822 uint64_t Offset = *FormValue.getAsSectionOffset();
823 if (FormValue.getForm() == dwarf::DW_FORM_loclistx) {
824 std::optional<uint64_t> LoclistOffset = U->getLoclistOffset(Index: Offset);
825 if (!LoclistOffset)
826 return;
827 Offset = *LoclistOffset;
828 }
829 uint64_t BaseAddr = 0;
830 if (std::optional<SectionedAddress> BA = U->getBaseAddress())
831 BaseAddr = BA->Address;
832 LVAddress LowPC = 0;
833 LVAddress HighPC = 0;
834
835 auto ProcessLocationEntry = [&](const DWARFLocationEntry &Entry) {
836 if (Entry.Kind == dwarf::DW_LLE_base_address) {
837 BaseAddr = Entry.Value0;
838 return;
839 }
840 if (Entry.Kind == dwarf::DW_LLE_offset_pair) {
841 LowPC = BaseAddr + Entry.Value0;
842 HighPC = BaseAddr + Entry.Value1;
843 DWARFAddressRange Range{LowPC, HighPC, Entry.SectionIndex};
844 if (Range.SectionIndex == SectionedAddress::UndefSection)
845 Range.SectionIndex = Entry.SectionIndex;
846 DWARFLocationExpression Loc{.Range: Range, .Expr: Entry.Loc};
847 DWARFDataExtractor Data(Loc.Expr, IsLittleEndian,
848 U->getAddressByteSize());
849 DWARFExpression Expression(Data, U->getAddressByteSize());
850
851 // Store the real upper limit for the address range.
852 if (UpdateHighAddress && HighPC > 0)
853 --HighPC;
854 // Add location and operation entries.
855 CurrentSymbol->addLocation(Attr, LowPC, HighPC, SectionOffset: Offset, LocDescOffset: OffsetOnEntry,
856 CallSiteLocation);
857 ProcessLocationExpression(Expression);
858 }
859 };
860 Error E = U->getLocationTable().visitLocationList(
861 Offset: &Offset, Callback: [&](const DWARFLocationEntry &E) {
862 ProcessLocationEntry(E);
863 return true;
864 });
865 if (E)
866 consumeError(Err: std::move(E));
867 }
868}
869
870void LVDWARFReader::processLocationMember(dwarf::Attribute Attr,
871 const DWARFFormValue &FormValue,
872 const DWARFDie &Die,
873 uint64_t OffsetOnEntry) {
874 // Check if the value is an integer constant.
875 if (FormValue.isFormClass(FC: DWARFFormValue::FC_Constant))
876 // Add a record to hold a constant as location.
877 CurrentSymbol->addLocationConstant(Attr, Constant: *FormValue.getAsUnsignedConstant(),
878 LocDescOffset: OffsetOnEntry);
879 else
880 // This is a location description, or a reference to one.
881 processLocationList(Attr, FormValue, Die, OffsetOnEntry);
882}
883
884// Update the current element with the reference.
885void LVDWARFReader::updateReference(dwarf::Attribute Attr,
886 const DWARFFormValue &FormValue) {
887 // FIXME: We are assuming that at most one Reference (DW_AT_specification,
888 // DW_AT_abstract_origin, ...) and at most one Type (DW_AT_import, DW_AT_type)
889 // appear in any single DIE, but this may not be true.
890 uint64_t Offset;
891 if (std::optional<uint64_t> Off = FormValue.getAsRelativeReference())
892 Offset = FormValue.getUnit()->getOffset() + *Off;
893 else if (Off = FormValue.getAsDebugInfoReference(); Off)
894 Offset = *Off;
895 else
896 llvm_unreachable("Unsupported reference type");
897
898 // Get target for the given reference, if already created.
899 LVElement *Target = getElementForOffset(
900 offset: Offset, Element: CurrentElement,
901 /*IsType=*/Attr == dwarf::DW_AT_import || Attr == dwarf::DW_AT_type);
902 // Check if we are dealing with cross CU references.
903 if (FormValue.getForm() == dwarf::DW_FORM_ref_addr) {
904 if (Target) {
905 // The global reference is ready. Mark it as global.
906 Target->setIsGlobalReference();
907 // Remove global reference from the unseen list.
908 removeGlobalOffset(Offset);
909 } else
910 // Record the unseen cross CU reference.
911 addGlobalOffset(Offset);
912 }
913
914 // At this point, 'Target' can be null, in the case of the target element
915 // not being seen. But the correct bit is set, to indicate that the target
916 // is being referenced by (abstract_origin, extension, specification) or
917 // (import, type).
918 // We must differentiate between the kind of reference. This is needed to
919 // complete inlined function instances with dropped abstract references,
920 // in order to facilitate a logical comparison.
921 switch (Attr) {
922 case dwarf::DW_AT_abstract_origin:
923 case dwarf::DW_AT_call_origin:
924 CurrentElement->setReference(Target);
925 CurrentElement->setHasReferenceAbstract();
926 break;
927 case dwarf::DW_AT_extension:
928 CurrentElement->setReference(Target);
929 CurrentElement->setHasReferenceExtension();
930 break;
931 case dwarf::DW_AT_specification:
932 CurrentElement->setReference(Target);
933 CurrentElement->setHasReferenceSpecification();
934 break;
935 case dwarf::DW_AT_import:
936 case dwarf::DW_AT_type:
937 CurrentElement->setType(Target);
938 break;
939 default:
940 break;
941 }
942}
943
944// Get an element given the DIE offset.
945LVElement *LVDWARFReader::getElementForOffset(LVOffset Offset,
946 LVElement *Element, bool IsType) {
947 // Update the element and all the references pointing to this element.
948 LVElementEntry &Entry = ElementTable[Offset];
949 if (!Entry.Element) {
950 if (IsType)
951 Entry.Types.insert(Ptr: Element);
952 else
953 Entry.References.insert(Ptr: Element);
954 }
955 return Entry.Element;
956}
957
958Error LVDWARFReader::loadTargetInfo(const ObjectFile &Obj) {
959 // Detect the architecture from the object file. We usually don't need OS
960 // info to lookup a target and create register info.
961 Triple TT = Obj.makeTriple();
962
963 // Features to be passed to target/subtarget
964 SubtargetFeatures FeaturesValue;
965 if (Expected<SubtargetFeatures> Features = Obj.getFeatures())
966 FeaturesValue = std::move(*Features);
967 else
968 consumeError(Err: Features.takeError());
969
970 StringRef CPU;
971 if (auto OptCPU = Obj.tryGetCPUName())
972 CPU = *OptCPU;
973
974 return loadGenericTargetInfo(TheTriple: TT.str(), TheFeatures: FeaturesValue.getString(), TheCPU: CPU);
975}
976
977void LVDWARFReader::mapRangeAddress(const ObjectFile &Obj) {
978 for (auto Iter = Obj.symbol_begin(); Iter != Obj.symbol_end(); ++Iter) {
979 const SymbolRef &Symbol = *Iter;
980
981 Expected<SymbolRef::Type> TypeOrErr = Symbol.getType();
982 if (!TypeOrErr) {
983 consumeError(Err: TypeOrErr.takeError());
984 continue;
985 }
986
987 // Process only symbols that represent a function.
988 SymbolRef::Type Type = *TypeOrErr;
989 if (Type != SymbolRef::ST_Function)
990 continue;
991
992 // In the case of a Mach-O STAB symbol, get its section only if
993 // the STAB symbol's section field refers to a valid section index.
994 // Otherwise the symbol may error trying to load a section that
995 // does not exist.
996 const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Val: &Obj);
997 bool IsSTAB = false;
998 if (MachO) {
999 DataRefImpl SymDRI = Symbol.getRawDataRefImpl();
1000 uint8_t NType =
1001 (MachO->is64Bit() ? MachO->getSymbol64TableEntry(DRI: SymDRI).n_type
1002 : MachO->getSymbolTableEntry(DRI: SymDRI).n_type);
1003 if (NType & MachO::N_STAB)
1004 IsSTAB = true;
1005 }
1006
1007 Expected<section_iterator> IterOrErr = Symbol.getSection();
1008 if (!IterOrErr) {
1009 consumeError(Err: IterOrErr.takeError());
1010 continue;
1011 }
1012 section_iterator Section = IsSTAB ? Obj.section_end() : *IterOrErr;
1013 if (Section == Obj.section_end())
1014 continue;
1015
1016 // Get the symbol value.
1017 Expected<uint64_t> AddressOrErr = Symbol.getAddress();
1018 if (!AddressOrErr) {
1019 consumeError(Err: AddressOrErr.takeError());
1020 continue;
1021 }
1022 uint64_t Address = *AddressOrErr;
1023
1024 // Get symbol name.
1025 StringRef Name;
1026 Expected<StringRef> NameOrErr = Symbol.getName();
1027 if (!NameOrErr) {
1028 consumeError(Err: NameOrErr.takeError());
1029 continue;
1030 }
1031 Name = *NameOrErr;
1032
1033 // Check if the symbol is Comdat.
1034 Expected<uint32_t> FlagsOrErr = Symbol.getFlags();
1035 if (!FlagsOrErr) {
1036 consumeError(Err: FlagsOrErr.takeError());
1037 continue;
1038 }
1039 uint32_t Flags = *FlagsOrErr;
1040
1041 // Mark the symbol as 'comdat' in any of the following cases:
1042 // - Symbol has the SF_Weak flag or
1043 // - Symbol section index different from the DotTextSectionIndex.
1044 LVSectionIndex SectionIndex = Section->getIndex();
1045 bool IsComdat =
1046 (Flags & SymbolRef::SF_Weak) || (SectionIndex != DotTextSectionIndex);
1047
1048 // Record the symbol name (linkage) and its loading address.
1049 addToSymbolTable(Name, Address, SectionIndex, IsComdat);
1050 }
1051}
1052
1053void LVDWARFReader::sortScopes() { Root->sort(); }
1054
1055void LVDWARFReader::print(raw_ostream &OS) const {
1056 OS << "LVType\n";
1057 LLVM_DEBUG(dbgs() << "CreateReaders\n");
1058}
1059