1//===-- LVDWARFReader.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the LVDWARFReader class.
10// It supports ELF, Mach-O and Wasm binary formats.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/DebugInfo/LogicalView/Readers/LVDWARFReader.h"
15#include "llvm/DebugInfo/DIContext.h"
16#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
17#include "llvm/DebugInfo/DWARF/DWARFExpressionPrinter.h"
18#include "llvm/DebugInfo/DWARF/LowLevel/DWARFExpression.h"
19#include "llvm/DebugInfo/LogicalView/Core/LVLine.h"
20#include "llvm/DebugInfo/LogicalView/Core/LVScope.h"
21#include "llvm/DebugInfo/LogicalView/Core/LVSymbol.h"
22#include "llvm/DebugInfo/LogicalView/Core/LVType.h"
23#include "llvm/Object/MachO.h"
24#include "llvm/Support/FormatVariadic.h"
25
26using namespace llvm;
27using namespace llvm::object;
28using namespace llvm::logicalview;
29
30#define DEBUG_TYPE "DWARFReader"
31
32void LVDWARFReader::processOneAttribute(const DWARFDie &Die,
33 LVOffset *OffsetPtr,
34 const AttributeSpec &AttrSpec) {
35 uint64_t OffsetOnEntry = *OffsetPtr;
36 DWARFUnit *U = Die.getDwarfUnit();
37 const DWARFFormValue &FormValue =
38 DWARFFormValue::createFromUnit(F: AttrSpec.Form, Unit: U, OffsetPtr);
39
40 // We are processing .debug_info section, implicit_const attribute
41 // values are not really stored here, but in .debug_abbrev section.
42 auto GetAsUnsignedConstant = [&]() -> int64_t {
43 if (AttrSpec.isImplicitConst())
44 return AttrSpec.getImplicitConstValue();
45 if (std::optional<uint64_t> Val = FormValue.getAsUnsignedConstant())
46 return *Val;
47 return 0;
48 };
49
50 auto GetFlag = [](const DWARFFormValue &FormValue) -> bool {
51 return FormValue.isFormClass(FC: DWARFFormValue::FC_Flag);
52 };
53
54 auto GetBoundValue = [&AttrSpec](const DWARFFormValue &FormValue) -> int64_t {
55 switch (FormValue.getForm()) {
56 case dwarf::DW_FORM_ref_addr:
57 case dwarf::DW_FORM_ref1:
58 case dwarf::DW_FORM_ref2:
59 case dwarf::DW_FORM_ref4:
60 case dwarf::DW_FORM_ref8:
61 case dwarf::DW_FORM_ref_udata:
62 case dwarf::DW_FORM_ref_sig8:
63 return *FormValue.getAsReferenceUVal();
64 case dwarf::DW_FORM_data1:
65 case dwarf::DW_FORM_flag:
66 case dwarf::DW_FORM_data2:
67 case dwarf::DW_FORM_data4:
68 case dwarf::DW_FORM_data8:
69 case dwarf::DW_FORM_udata:
70 case dwarf::DW_FORM_ref_sup4:
71 case dwarf::DW_FORM_ref_sup8:
72 return *FormValue.getAsUnsignedConstant();
73 case dwarf::DW_FORM_sdata:
74 return *FormValue.getAsSignedConstant();
75 case dwarf::DW_FORM_implicit_const:
76 return AttrSpec.getImplicitConstValue();
77 default:
78 return 0;
79 }
80 };
81
82 LLVM_DEBUG({
83 dbgs() << " " << hexValue(OffsetOnEntry)
84 << formatv(" {0}", AttrSpec.Attr) << "\n";
85 });
86
87 switch (AttrSpec.Attr) {
88 case dwarf::DW_AT_accessibility:
89 CurrentElement->setAccessibilityCode(GetAsUnsignedConstant());
90 break;
91 case dwarf::DW_AT_artificial:
92 CurrentElement->setIsArtificial();
93 break;
94 case dwarf::DW_AT_bit_size:
95 CurrentElement->setBitSize(GetAsUnsignedConstant());
96 break;
97 case dwarf::DW_AT_byte_size:
98 CurrentElement->setBitSize(GetAsUnsignedConstant() * DWARF_CHAR_BIT);
99 break;
100 case dwarf::DW_AT_call_file:
101 CurrentElement->setCallFilenameIndex(IncrementFileIndex
102 ? GetAsUnsignedConstant() + 1
103 : GetAsUnsignedConstant());
104 break;
105 case dwarf::DW_AT_call_line:
106 CurrentElement->setCallLineNumber(GetAsUnsignedConstant());
107 break;
108 case dwarf::DW_AT_comp_dir:
109 CompileUnit->setCompilationDirectory(dwarf::toStringRef(V: FormValue));
110 break;
111 case dwarf::DW_AT_const_value:
112 if (FormValue.isFormClass(FC: DWARFFormValue::FC_Block)) {
113 ArrayRef<uint8_t> Expr = *FormValue.getAsBlock();
114 // Store the expression as a hexadecimal string.
115 CurrentElement->setValue(
116 llvm::toHex(Input: llvm::toStringRef(Input: Expr), /*LowerCase=*/true));
117 } else if (FormValue.isFormClass(FC: DWARFFormValue::FC_Constant)) {
118 // In the case of negative values, generate the string representation
119 // for a positive value prefixed with the negative sign.
120 if (FormValue.getForm() == dwarf::DW_FORM_sdata) {
121 std::stringstream Stream;
122 int64_t Value = *FormValue.getAsSignedConstant();
123 if (Value < 0) {
124 Stream << "-";
125 Value = std::abs(i: Value);
126 }
127 Stream << hexString(Value, Width: 2);
128 CurrentElement->setValue(Stream.str());
129 } else
130 CurrentElement->setValue(hexString(Value: GetAsUnsignedConstant(), Width: 2));
131 } else
132 CurrentElement->setValue(dwarf::toStringRef(V: FormValue));
133 break;
134 case dwarf::DW_AT_count:
135 CurrentElement->setCount(GetAsUnsignedConstant());
136 break;
137 case dwarf::DW_AT_decl_line:
138 CurrentElement->setLineNumber(GetAsUnsignedConstant());
139 break;
140 case dwarf::DW_AT_decl_file:
141 CurrentElement->setFilenameIndex(IncrementFileIndex
142 ? GetAsUnsignedConstant() + 1
143 : GetAsUnsignedConstant());
144 break;
145 case dwarf::DW_AT_enum_class:
146 if (GetFlag(FormValue))
147 CurrentElement->setIsEnumClass();
148 break;
149 case dwarf::DW_AT_external:
150 if (GetFlag(FormValue))
151 CurrentElement->setIsExternal();
152 break;
153 case dwarf::DW_AT_GNU_discriminator:
154 CurrentElement->setDiscriminator(GetAsUnsignedConstant());
155 break;
156 case dwarf::DW_AT_inline:
157 CurrentElement->setInlineCode(GetAsUnsignedConstant());
158 break;
159 case dwarf::DW_AT_lower_bound:
160 CurrentElement->setLowerBound(GetBoundValue(FormValue));
161 break;
162 case dwarf::DW_AT_name:
163 CurrentElement->setName(dwarf::toStringRef(V: FormValue));
164 break;
165 case dwarf::DW_AT_GNU_template_name:
166 CurrentElement->setValue(dwarf::toStringRef(V: FormValue));
167 break;
168 case dwarf::DW_AT_linkage_name:
169 case dwarf::DW_AT_MIPS_linkage_name:
170 CurrentElement->setLinkageName(dwarf::toStringRef(V: FormValue));
171 break;
172 case dwarf::DW_AT_producer:
173 if (options().getAttributeProducer())
174 CurrentElement->setProducer(dwarf::toStringRef(V: FormValue));
175 break;
176 case dwarf::DW_AT_language:
177 if (options().getAttributeLanguage())
178 CurrentElement->setSourceLanguage(LVSourceLanguage{
179 static_cast<llvm::dwarf::SourceLanguage>(GetAsUnsignedConstant())});
180 break;
181 case dwarf::DW_AT_upper_bound:
182 CurrentElement->setUpperBound(GetBoundValue(FormValue));
183 break;
184 case dwarf::DW_AT_virtuality:
185 CurrentElement->setVirtualityCode(GetAsUnsignedConstant());
186 break;
187
188 case dwarf::DW_AT_abstract_origin:
189 case dwarf::DW_AT_call_origin:
190 case dwarf::DW_AT_extension:
191 case dwarf::DW_AT_import:
192 case dwarf::DW_AT_specification:
193 case dwarf::DW_AT_type:
194 updateReference(Attr: AttrSpec.Attr, FormValue);
195 break;
196
197 case dwarf::DW_AT_low_pc:
198 if (options().getGeneralCollectRanges()) {
199 FoundLowPC = true;
200 // For toolchains that support the removal of unused code, the linker
201 // marks functions that have been removed, by setting the value for the
202 // low_pc to the max address.
203 if (std::optional<uint64_t> Value = FormValue.getAsAddress()) {
204 CurrentLowPC = *Value;
205 } else {
206 uint64_t UValue = FormValue.getRawUValue();
207 if (U->getAddrOffsetSectionItem(Index: UValue)) {
208 CurrentLowPC = *FormValue.getAsAddress();
209 } else {
210 FoundLowPC = false;
211 // We are dealing with an index into the .debug_addr section.
212 LLVM_DEBUG({
213 dbgs() << format("indexed (%8.8x) address = ", (uint32_t)UValue);
214 });
215 }
216 }
217 if (FoundLowPC) {
218 if (CurrentLowPC == getTombstoneAddress())
219 CurrentElement->setIsDiscarded();
220 else
221 // Consider the case of WebAssembly.
222 CurrentLowPC += WasmCodeSectionOffset;
223 if (CurrentElement->isCompileUnit())
224 setCUBaseAddress(CurrentLowPC);
225 }
226 }
227 break;
228
229 case dwarf::DW_AT_high_pc:
230 if (options().getGeneralCollectRanges()) {
231 FoundHighPC = true;
232 if (std::optional<uint64_t> Address = FormValue.getAsAddress())
233 // High PC is an address.
234 CurrentHighPC = *Address;
235 if (std::optional<uint64_t> Offset = FormValue.getAsUnsignedConstant())
236 // High PC is an offset from LowPC.
237 // Don't add the WebAssembly offset if we have seen a DW_AT_low_pc, as
238 // the CurrentLowPC has already that offset added. Basically, use the
239 // original DW_AT_loc_pc value.
240 CurrentHighPC =
241 (FoundLowPC ? CurrentLowPC - WasmCodeSectionOffset : CurrentLowPC) +
242 *Offset;
243 // Store the real upper limit for the address range.
244 if (UpdateHighAddress && CurrentHighPC > 0)
245 --CurrentHighPC;
246 // Consider the case of WebAssembly.
247 CurrentHighPC += WasmCodeSectionOffset;
248 if (CurrentElement->isCompileUnit())
249 setCUHighAddress(CurrentHighPC);
250 }
251 break;
252
253 case dwarf::DW_AT_ranges:
254 if (RangesDataAvailable && options().getGeneralCollectRanges()) {
255 auto GetRanges = [](const DWARFFormValue &FormValue,
256 DWARFUnit *U) -> Expected<DWARFAddressRangesVector> {
257 if (FormValue.getForm() == dwarf::DW_FORM_rnglistx)
258 return U->findRnglistFromIndex(Index: *FormValue.getAsSectionOffset());
259 return U->findRnglistFromOffset(Offset: *FormValue.getAsSectionOffset());
260 };
261 Expected<DWARFAddressRangesVector> RangesOrError =
262 GetRanges(FormValue, U);
263 if (!RangesOrError) {
264 LLVM_DEBUG({
265 std::string TheError(toString(RangesOrError.takeError()));
266 dbgs() << format("error decoding address ranges = ",
267 TheError.c_str());
268 });
269 consumeError(Err: RangesOrError.takeError());
270 break;
271 }
272 // The address ranges are absolute. There is no need to add any addend.
273 DWARFAddressRangesVector Ranges = RangesOrError.get();
274 for (DWARFAddressRange &Range : Ranges) {
275 // This seems to be a tombstone for empty ranges.
276 if ((Range.LowPC == Range.HighPC) ||
277 (Range.LowPC = getTombstoneAddress()))
278 continue;
279 // Store the real upper limit for the address range.
280 if (UpdateHighAddress && Range.HighPC > 0)
281 --Range.HighPC;
282 // Consider the case of WebAssembly.
283 Range.LowPC += WasmCodeSectionOffset;
284 Range.HighPC += WasmCodeSectionOffset;
285 // Add the pair of addresses.
286 CurrentScope->addObject(LowerAddress: Range.LowPC, UpperAddress: Range.HighPC);
287 // If the scope is the CU, do not update the ranges set.
288 if (!CurrentElement->isCompileUnit())
289 CurrentRanges.emplace_back(args&: Range.LowPC, args&: Range.HighPC);
290 }
291 }
292 break;
293
294 // Get the location list for the symbol.
295 case dwarf::DW_AT_data_member_location:
296 if (options().getAttributeAnyLocation())
297 processLocationMember(Attr: AttrSpec.Attr, FormValue, Die, OffsetOnEntry);
298 break;
299
300 // Get the location list for the symbol.
301 case dwarf::DW_AT_location:
302 case dwarf::DW_AT_string_length:
303 case dwarf::DW_AT_use_location:
304 if (options().getAttributeAnyLocation() && CurrentSymbol)
305 processLocationList(Attr: AttrSpec.Attr, FormValue, Die, OffsetOnEntry);
306 break;
307
308 case dwarf::DW_AT_call_data_value:
309 case dwarf::DW_AT_call_value:
310 case dwarf::DW_AT_GNU_call_site_data_value:
311 case dwarf::DW_AT_GNU_call_site_value:
312 if (options().getAttributeAnyLocation() && CurrentSymbol)
313 processLocationList(Attr: AttrSpec.Attr, FormValue, Die, OffsetOnEntry,
314 /*CallSiteLocation=*/true);
315 break;
316
317 default:
318 break;
319 }
320}
321
322LVScope *LVDWARFReader::processOneDie(const DWARFDie &InputDIE, LVScope *Parent,
323 DWARFDie &SkeletonDie) {
324 // If the input DIE corresponds to the compile unit, it can be:
325 // a) Simple DWARF: a standard DIE. Ignore the skeleton DIE (is empty).
326 // b) Split DWARF: the DIE for the split DWARF. The skeleton is the DIE
327 // for the skeleton DWARF. Process both DIEs.
328 const DWARFDie &DIE = SkeletonDie.isValid() ? SkeletonDie : InputDIE;
329 DWARFDataExtractor DebugInfoData =
330 DIE.getDwarfUnit()->getDebugInfoExtractor();
331 LVOffset Offset = DIE.getOffset();
332
333 // Reset values for the current DIE.
334 CurrentLowPC = 0;
335 CurrentHighPC = 0;
336 CurrentOffset = Offset;
337 CurrentEndOffset = 0;
338 FoundLowPC = false;
339 FoundHighPC = false;
340
341 // Process supported attributes.
342 if (DebugInfoData.isValidOffset(offset: Offset)) {
343
344 LLVM_DEBUG({
345 dbgs() << "DIE: " << hexValue(Offset) << formatv(" {0}", DIE.getTag())
346 << "\n";
347 });
348
349 // Create the logical view element for the current DIE.
350 dwarf::Tag Tag = DIE.getTag();
351 CurrentElement = createElement(Tag);
352 if (!CurrentElement)
353 return CurrentScope;
354
355 CurrentElement->setTag(Tag);
356 CurrentElement->setOffset(Offset);
357
358 if (options().getAttributeAnySource() && CurrentElement->isCompileUnit())
359 addCompileUnitOffset(Offset,
360 CompileUnit: static_cast<LVScopeCompileUnit *>(CurrentElement));
361
362 // Insert the newly created element into the element symbol table. If the
363 // element is in the list, it means there are previously created elements
364 // referencing this element.
365 auto [It, Inserted] = ElementTable.try_emplace(k: Offset, args&: CurrentElement);
366 if (!Inserted) {
367 // There are previous references to this element. We need to update the
368 // element and all the references pointing to this element.
369 LVElementEntry &Reference = ElementTable[Offset];
370 Reference.Element = CurrentElement;
371 // Traverse the element set and update the elements (backtracking).
372 for (LVElement *Target : Reference.References)
373 Target->setReference(CurrentElement);
374 for (LVElement *Target : Reference.Types)
375 Target->setType(CurrentElement);
376 // Clear the pending elements.
377 Reference.References.clear();
378 Reference.Types.clear();
379 }
380
381 // Add the current element to its parent as there are attributes
382 // (locations) that require the scope level.
383 if (CurrentScope)
384 Parent->addElement(Scope: CurrentScope);
385 else if (CurrentSymbol)
386 Parent->addElement(Symbol: CurrentSymbol);
387 else if (CurrentType)
388 Parent->addElement(Type: CurrentType);
389
390 // Process the attributes for the given DIE.
391 auto ProcessAttributes = [&](const DWARFDie &TheDIE,
392 DWARFDataExtractor &DebugData) {
393 CurrentEndOffset = Offset;
394 uint32_t abbrCode = DebugData.getULEB128(offset_ptr: &CurrentEndOffset);
395 if (abbrCode) {
396 if (const DWARFAbbreviationDeclaration *AbbrevDecl =
397 TheDIE.getAbbreviationDeclarationPtr())
398 if (AbbrevDecl)
399 for (const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec :
400 AbbrevDecl->attributes())
401 processOneAttribute(Die: TheDIE, OffsetPtr: &CurrentEndOffset, AttrSpec);
402 }
403 };
404
405 ProcessAttributes(DIE, DebugInfoData);
406
407 // If the input DIE is for a compile unit, process its attributes in
408 // the case of split DWARF, to override any common attribute values.
409 if (SkeletonDie.isValid()) {
410 DWARFDataExtractor DebugInfoData =
411 InputDIE.getDwarfUnit()->getDebugInfoExtractor();
412 LVOffset Offset = InputDIE.getOffset();
413 if (DebugInfoData.isValidOffset(offset: Offset))
414 ProcessAttributes(InputDIE, DebugInfoData);
415 }
416 }
417
418 if (CurrentScope) {
419 if (CurrentScope->getCanHaveRanges()) {
420 // If the scope has ranges, they are already added to the scope.
421 // Add any collected LowPC/HighPC values.
422 bool IsCompileUnit = CurrentScope->getIsCompileUnit();
423 if (FoundLowPC && FoundHighPC) {
424 CurrentScope->addObject(LowerAddress: CurrentLowPC, UpperAddress: CurrentHighPC);
425 if (!IsCompileUnit) {
426 // If the scope is a function, add it to the public names.
427 if ((options().getAttributePublics() ||
428 options().getPrintAnyLine()) &&
429 CurrentScope->getIsFunction() &&
430 !CurrentScope->getIsInlinedFunction())
431 CompileUnit->addPublicName(Scope: CurrentScope, LowPC: CurrentLowPC,
432 HighPC: CurrentHighPC);
433 }
434 }
435
436 // Look for scopes with ranges and no linkage name information that
437 // are referencing another scopes via DW_AT_specification. They are
438 // possible candidates for a comdat scope.
439 if (CurrentScope->getHasRanges() &&
440 !CurrentScope->getLinkageNameIndex() &&
441 CurrentScope->getHasReferenceSpecification()) {
442 // Get the linkage name in order to search for a possible comdat.
443 std::optional<DWARFFormValue> LinkageDIE =
444 DIE.findRecursively(Attrs: dwarf::DW_AT_linkage_name);
445 if (LinkageDIE.has_value()) {
446 StringRef Name(dwarf::toStringRef(V: LinkageDIE));
447 if (!Name.empty())
448 CurrentScope->setLinkageName(Name);
449 }
450 }
451
452 // If the current scope is in the 'LinkageNames' table, update its
453 // logical scope. For other scopes, always we will assume the default
454 // ".text" section index.
455 LVSectionIndex SectionIndex = updateSymbolTable(Function: CurrentScope);
456 if (CurrentScope->getIsComdat())
457 CompileUnit->setHasComdatScopes();
458
459 // Update section index contained ranges.
460 if (SectionIndex) {
461 if (!CurrentRanges.empty()) {
462 for (LVAddressRange &Range : CurrentRanges)
463 addSectionRange(SectionIndex, Scope: CurrentScope, LowerAddress: Range.first,
464 UpperAddress: Range.second);
465 CurrentRanges.clear();
466 }
467 // If the scope is the CU, do not update the ranges set.
468 if (FoundLowPC && FoundHighPC && !IsCompileUnit) {
469 addSectionRange(SectionIndex, Scope: CurrentScope, LowerAddress: CurrentLowPC,
470 UpperAddress: CurrentHighPC);
471 }
472 }
473 }
474 // Mark member functions.
475 if (Parent->getIsAggregate())
476 CurrentScope->setIsMember();
477 }
478
479 // Keep track of symbols with locations.
480 if (options().getAttributeAnyLocation() && CurrentSymbol &&
481 CurrentSymbol->getHasLocation())
482 SymbolsWithLocations.push_back(Elt: CurrentSymbol);
483
484 // If we have template parameters, mark the parent as template.
485 if (CurrentType && CurrentType->getIsTemplateParam())
486 Parent->setIsTemplate();
487
488 return CurrentScope;
489}
490
491void LVDWARFReader::traverseDieAndChildren(DWARFDie &DIE, LVScope *Parent,
492 DWARFDie &SkeletonDie) {
493 // Process the current DIE.
494 LVScope *Scope = processOneDie(InputDIE: DIE, Parent, SkeletonDie);
495 if (Scope) {
496 LVOffset Lower = DIE.getOffset();
497 LVOffset Upper = CurrentEndOffset;
498 DWARFDie DummyDie;
499 // Traverse the children chain.
500 DWARFDie Child = DIE.getFirstChild();
501 while (Child) {
502 traverseDieAndChildren(DIE&: Child, Parent: Scope, SkeletonDie&: DummyDie);
503 Upper = Child.getOffset();
504 Child = Child.getSibling();
505 }
506 // Calculate contributions to the debug info section.
507 if (options().getPrintSizes() && Upper)
508 CompileUnit->addSize(Scope, Lower, Upper);
509 }
510}
511
512void LVDWARFReader::processLocationGaps() {
513 if (options().getAttributeAnyLocation())
514 for (LVSymbol *Symbol : SymbolsWithLocations)
515 Symbol->fillLocationGaps();
516}
517
518void LVDWARFReader::createLineAndFileRecords(
519 const DWARFDebugLine::LineTable *Lines) {
520 if (!Lines)
521 return;
522
523 // Get the source filenames.
524 if (!Lines->Prologue.FileNames.empty())
525 for (const DWARFDebugLine::FileNameEntry &Entry :
526 Lines->Prologue.FileNames) {
527 std::string Directory;
528 if (Lines->getDirectoryForEntry(Entry, Directory))
529 Directory = transformPath(Path: Directory);
530 if (Directory.empty())
531 Directory = std::string(CompileUnit->getCompilationDirectory());
532 std::string File = transformPath(Path: dwarf::toStringRef(V: Entry.Name));
533 std::string String;
534 raw_string_ostream(String) << Directory << "/" << File;
535 CompileUnit->addFilename(Name: String);
536 }
537
538 // In DWARF5 the file indexes start at 0;
539 bool IncrementIndex = Lines->Prologue.getVersion() >= 5;
540
541 // Get the source lines if requested by command line option.
542 if (options().getPrintLines() && Lines->Rows.size())
543 for (const DWARFDebugLine::Row &Row : Lines->Rows) {
544 // Here we collect logical debug lines in CULines. Later on,
545 // the 'processLines()' function will move each created logical line
546 // to its enclosing logical scope, using the debug ranges information
547 // and they will be released when its scope parent is deleted.
548 LVLineDebug *Line = createLineDebug();
549 CULines.push_back(Elt: Line);
550 // Consider the case of WebAssembly.
551 Line->setAddress(Row.Address.Address + WasmCodeSectionOffset);
552 Line->setFilename(
553 CompileUnit->getFilename(Index: IncrementIndex ? Row.File + 1 : Row.File));
554 Line->setLineNumber(Row.Line);
555 if (Row.Discriminator)
556 Line->setDiscriminator(Row.Discriminator);
557 if (Row.IsStmt)
558 Line->setIsNewStatement();
559 if (Row.BasicBlock)
560 Line->setIsBasicBlock();
561 if (Row.EndSequence)
562 Line->setIsEndSequence();
563 if (Row.EpilogueBegin)
564 Line->setIsEpilogueBegin();
565 if (Row.PrologueEnd)
566 Line->setIsPrologueEnd();
567 LLVM_DEBUG({
568 dbgs() << "Address: " << hexValue(Line->getAddress())
569 << " Line: " << Line->lineNumberAsString(/*ShowZero=*/true)
570 << "\n";
571 });
572 }
573}
574
575std::string LVDWARFReader::getRegisterName(LVSmall Opcode,
576 ArrayRef<uint64_t> Operands) {
577 // The 'prettyPrintRegisterOp' function uses the DWARFUnit to support
578 // DW_OP_regval_type. At this point we are operating on a logical view
579 // item, with no access to the underlying DWARF data used by LLVM.
580 // We do not support DW_OP_regval_type here.
581 if (Opcode == dwarf::DW_OP_regval_type)
582 return {};
583
584 std::string string;
585 raw_string_ostream Stream(string);
586 DIDumpOptions DumpOpts;
587 auto *MCRegInfo = MRI.get();
588 auto GetRegName = [&MCRegInfo](uint64_t DwarfRegNum, bool IsEH) -> StringRef {
589 if (!MCRegInfo)
590 return {};
591 if (std::optional<MCRegister> LLVMRegNum =
592 MCRegInfo->getLLVMRegNum(RegNum: DwarfRegNum, isEH: IsEH))
593 if (const char *RegName = MCRegInfo->getName(RegNo: *LLVMRegNum))
594 return StringRef(RegName);
595 return {};
596 };
597 DumpOpts.GetNameForDWARFReg = GetRegName;
598 prettyPrintRegisterOp(/*U=*/nullptr, OS&: Stream, DumpOpts, Opcode, Operands);
599 return Stream.str();
600}
601
602Error LVDWARFReader::createScopes() {
603 LLVM_DEBUG({
604 W.startLine() << "\n";
605 W.printString("File", Obj.getFileName().str());
606 W.printString("Format", FileFormatName);
607 });
608
609 if (Error Err = LVReader::createScopes())
610 return Err;
611
612 // As the DwarfContext object is valid only during the scopes creation,
613 // we need to create our own Target information, to be used during the
614 // logical view printing, in the case of instructions being requested.
615 std::unique_ptr<DWARFContext> DwarfContext = DWARFContext::create(Obj);
616 if (!DwarfContext)
617 return createStringError(EC: errc::invalid_argument,
618 Fmt: "Could not create DWARF information: %s",
619 Vals: getFilename().str().c_str());
620
621 if (Error Err = loadTargetInfo(Obj))
622 return Err;
623
624 // Create a mapping for virtual addresses.
625 mapVirtualAddress(Obj);
626
627 // Select the correct compile unit range, depending if we are dealing with
628 // a standard or split DWARF object.
629 DWARFContext::compile_unit_range CompileUnits =
630 DwarfContext->getNumCompileUnits() ? DwarfContext->compile_units()
631 : DwarfContext->dwo_compile_units();
632 for (const std::unique_ptr<DWARFUnit> &CU : CompileUnits) {
633
634 // Take into account the address byte size for a correct 'tombstone'
635 // value identification.
636 setTombstoneAddress(
637 dwarf::computeTombstoneAddress(AddressByteSize: CU->getAddressByteSize()));
638
639 // Deduction of index used for the line records.
640 //
641 // For the following test case: test.cpp
642 // void foo(void ParamPtr) { }
643
644 // Both GCC and Clang generate DWARF-5 .debug_line layout.
645
646 // * GCC (GNU C++17 11.3.0) - All DW_AT_decl_file use index 1.
647 //
648 // .debug_info:
649 // format = DWARF32, version = 0x0005
650 // DW_TAG_compile_unit
651 // DW_AT_name ("test.cpp")
652 // DW_TAG_subprogram ("foo")
653 // DW_AT_decl_file (1)
654 // DW_TAG_formal_parameter ("ParamPtr")
655 // DW_AT_decl_file (1)
656 // .debug_line:
657 // Line table prologue: format (DWARF32), version (5)
658 // include_directories[0] = "..."
659 // file_names[0]: name ("test.cpp"), dir_index (0)
660 // file_names[1]: name ("test.cpp"), dir_index (0)
661
662 // * Clang (14.0.6) - All DW_AT_decl_file use index 0.
663 //
664 // .debug_info:
665 // format = DWARF32, version = 0x0005
666 // DW_AT_producer ("clang version 14.0.6")
667 // DW_AT_name ("test.cpp")
668 //
669 // DW_TAG_subprogram ("foo")
670 // DW_AT_decl_file (0)
671 // DW_TAG_formal_parameter ("ParamPtr")
672 // DW_AT_decl_file (0)
673 // .debug_line:
674 // Line table prologue: format (DWARF32), version (5)
675 // include_directories[0] = "..."
676 // file_names[0]: name ("test.cpp"), dir_index (0)
677
678 // From DWARFDebugLine::getFileNameByIndex documentation:
679 // In Dwarf 4, the files are 1-indexed.
680 // In Dwarf 5, the files are 0-indexed.
681 // Additional discussions here:
682 // https://www.mail-archive.com/dwarf-discuss@lists.dwarfstd.org/msg00883.html
683
684 // The DWARF reader is expecting the files are 1-indexed, so using
685 // the .debug_line header information decide if the indexed require
686 // an internal adjustment.
687
688 // For the case of GCC (DWARF5), if the entries[0] and [1] are the
689 // same, do not perform any adjustment.
690 auto DeduceIncrementFileIndex = [&]() -> bool {
691 if (CU->getVersion() < 5)
692 // DWARF-4 or earlier -> Don't increment index.
693 return false;
694
695 if (const DWARFDebugLine::LineTable *LT =
696 CU->getContext().getLineTableForUnit(U: CU.get())) {
697 // Check if there are at least 2 entries and if they are the same.
698 if (LT->hasFileAtIndex(FileIndex: 0) && LT->hasFileAtIndex(FileIndex: 1)) {
699 const DWARFDebugLine::FileNameEntry &EntryZero =
700 LT->Prologue.getFileNameEntry(Index: 0);
701 const DWARFDebugLine::FileNameEntry &EntryOne =
702 LT->Prologue.getFileNameEntry(Index: 1);
703 // Check directory indexes.
704 if (EntryZero.DirIdx != EntryOne.DirIdx)
705 // DWARF-5 -> Increment index.
706 return true;
707 // Check filename.
708 std::string FileZero;
709 std::string FileOne;
710 StringRef None;
711 LT->getFileNameByIndex(
712 FileIndex: 0, CompDir: None, Kind: DILineInfoSpecifier::FileLineInfoKind::RawValue,
713 Result&: FileZero);
714 LT->getFileNameByIndex(
715 FileIndex: 1, CompDir: None, Kind: DILineInfoSpecifier::FileLineInfoKind::RawValue,
716 Result&: FileOne);
717 return FileZero != FileOne;
718 }
719 }
720
721 // DWARF-5 -> Increment index.
722 return true;
723 };
724 // The DWARF reader expects the indexes as 1-indexed.
725 IncrementFileIndex = DeduceIncrementFileIndex();
726
727 DWARFDie UnitDie = CU->getUnitDIE();
728 SmallString<16> DWOAlternativeLocation;
729 if (UnitDie) {
730 std::optional<const char *> DWOFileName =
731 CU->getVersion() >= 5
732 ? dwarf::toString(V: UnitDie.find(Attr: dwarf::DW_AT_dwo_name))
733 : dwarf::toString(V: UnitDie.find(Attr: dwarf::DW_AT_GNU_dwo_name));
734 StringRef From(DWOFileName.value_or(u: ""));
735 DWOAlternativeLocation = createAlternativePath(From);
736 }
737
738 // The current CU can be a normal compile unit (standard) or a skeleton
739 // compile unit (split). For both cases, the returned die, will be used
740 // to create the logical scopes.
741 DWARFDie CUDie = CU->getNonSkeletonUnitDIE(
742 /*ExtractUnitDIEOnly=*/false,
743 /*DWOAlternativeLocation=*/DWOAlternativeLocation);
744 if (!CUDie.isValid())
745 continue;
746
747 // The current unit corresponds to the .dwo file. We need to get the
748 // skeleton unit and query for any ranges that will enclose any ranges
749 // in the non-skeleton unit.
750 DWARFDie DummyDie;
751 DWARFDie SkeletonDie =
752 CUDie.getDwarfUnit()->isDWOUnit() ? CU->getUnitDIE(ExtractUnitDIEOnly: false) : DummyDie;
753 // Disable the ranges processing if we have just a single .dwo object,
754 // as any DW_AT_ranges will access not available range information.
755 RangesDataAvailable =
756 (!CUDie.getDwarfUnit()->isDWOUnit() ||
757 (SkeletonDie.isValid() ? !SkeletonDie.getDwarfUnit()->isDWOUnit()
758 : true));
759
760 traverseDieAndChildren(DIE&: CUDie, Parent: Root, SkeletonDie);
761
762 createLineAndFileRecords(Lines: DwarfContext->getLineTableForUnit(U: CU.get()));
763 if (Error Err = createInstructions())
764 return Err;
765
766 // Process the compilation unit, as there are cases where enclosed
767 // functions have the same ranges values. Insert the compilation unit
768 // ranges at the end, to allow enclosing ranges to be first in the list.
769 LVSectionIndex SectionIndex = getSectionIndex(Scope: CompileUnit);
770 addSectionRange(SectionIndex, Scope: CompileUnit);
771 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
772 ScopesWithRanges->sort();
773
774 processLines(DebugLines: &CULines, SectionIndex);
775 processLocationGaps();
776
777 // These are per compile unit.
778 ScopesWithRanges->clear();
779 SymbolsWithLocations.clear();
780 CULines.clear();
781 }
782
783 return Error::success();
784}
785
786// Get the location information for the associated attribute.
787void LVDWARFReader::processLocationList(dwarf::Attribute Attr,
788 const DWARFFormValue &FormValue,
789 const DWARFDie &Die,
790 uint64_t OffsetOnEntry,
791 bool CallSiteLocation) {
792
793 auto ProcessLocationExpression = [&](const DWARFExpression &Expression) {
794 for (const DWARFExpression::Operation &Op : Expression)
795 CurrentSymbol->addLocationOperands(Opcode: Op.getCode(), Operands: Op.getRawOperands());
796 };
797
798 DWARFUnit *U = Die.getDwarfUnit();
799 DWARFContext &DwarfContext = U->getContext();
800 bool IsLittleEndian = DwarfContext.isLittleEndian();
801 if (FormValue.isFormClass(FC: DWARFFormValue::FC_Block) ||
802 (DWARFAttribute::mayHaveLocationExpr(Attr) &&
803 FormValue.isFormClass(FC: DWARFFormValue::FC_Exprloc))) {
804 ArrayRef<uint8_t> Expr = *FormValue.getAsBlock();
805 DataExtractor Data(StringRef((const char *)Expr.data(), Expr.size()),
806 IsLittleEndian, 0);
807 DWARFExpression Expression(Data, U->getAddressByteSize(),
808 U->getFormParams().Format);
809
810 // Add location and operation entries.
811 CurrentSymbol->addLocation(Attr, /*LowPC=*/0, /*HighPC=*/-1,
812 /*SectionOffset=*/0, LocDescOffset: OffsetOnEntry,
813 CallSiteLocation);
814 ProcessLocationExpression(Expression);
815 return;
816 }
817
818 if (DWARFAttribute::mayHaveLocationList(Attr) &&
819 FormValue.isFormClass(FC: DWARFFormValue::FC_SectionOffset)) {
820 uint64_t Offset = *FormValue.getAsSectionOffset();
821 if (FormValue.getForm() == dwarf::DW_FORM_loclistx) {
822 std::optional<uint64_t> LoclistOffset = U->getLoclistOffset(Index: Offset);
823 if (!LoclistOffset)
824 return;
825 Offset = *LoclistOffset;
826 }
827 uint64_t BaseAddr = 0;
828 if (std::optional<SectionedAddress> BA = U->getBaseAddress())
829 BaseAddr = BA->Address;
830 LVAddress LowPC = 0;
831 LVAddress HighPC = 0;
832
833 auto ProcessLocationEntry = [&](const DWARFLocationEntry &Entry) {
834 if (Entry.Kind == dwarf::DW_LLE_base_address) {
835 BaseAddr = Entry.Value0;
836 return;
837 }
838 if (Entry.Kind == dwarf::DW_LLE_offset_pair) {
839 LowPC = BaseAddr + Entry.Value0;
840 HighPC = BaseAddr + Entry.Value1;
841 DWARFAddressRange Range{LowPC, HighPC, Entry.SectionIndex};
842 if (Range.SectionIndex == SectionedAddress::UndefSection)
843 Range.SectionIndex = Entry.SectionIndex;
844 DWARFLocationExpression Loc{.Range: Range, .Expr: Entry.Loc};
845 DWARFDataExtractor Data(Loc.Expr, IsLittleEndian,
846 U->getAddressByteSize());
847 DWARFExpression Expression(Data, U->getAddressByteSize());
848
849 // Store the real upper limit for the address range.
850 if (UpdateHighAddress && HighPC > 0)
851 --HighPC;
852 // Add location and operation entries.
853 CurrentSymbol->addLocation(Attr, LowPC, HighPC, SectionOffset: Offset, LocDescOffset: OffsetOnEntry,
854 CallSiteLocation);
855 ProcessLocationExpression(Expression);
856 }
857 };
858 Error E = U->getLocationTable().visitLocationList(
859 Offset: &Offset, Callback: [&](const DWARFLocationEntry &E) {
860 ProcessLocationEntry(E);
861 return true;
862 });
863 if (E)
864 consumeError(Err: std::move(E));
865 }
866}
867
868void LVDWARFReader::processLocationMember(dwarf::Attribute Attr,
869 const DWARFFormValue &FormValue,
870 const DWARFDie &Die,
871 uint64_t OffsetOnEntry) {
872 // Check if the value is an integer constant.
873 if (FormValue.isFormClass(FC: DWARFFormValue::FC_Constant))
874 // Add a record to hold a constant as location.
875 CurrentSymbol->addLocationConstant(Attr, Constant: *FormValue.getAsUnsignedConstant(),
876 LocDescOffset: OffsetOnEntry);
877 else
878 // This is a location description, or a reference to one.
879 processLocationList(Attr, FormValue, Die, OffsetOnEntry);
880}
881
882// Update the current element with the reference.
883void LVDWARFReader::updateReference(dwarf::Attribute Attr,
884 const DWARFFormValue &FormValue) {
885 // FIXME: We are assuming that at most one Reference (DW_AT_specification,
886 // DW_AT_abstract_origin, ...) and at most one Type (DW_AT_import, DW_AT_type)
887 // appear in any single DIE, but this may not be true.
888 uint64_t Offset;
889 if (std::optional<uint64_t> Off = FormValue.getAsRelativeReference())
890 Offset = FormValue.getUnit()->getOffset() + *Off;
891 else if (Off = FormValue.getAsDebugInfoReference(); Off)
892 Offset = *Off;
893 else
894 llvm_unreachable("Unsupported reference type");
895
896 // Get target for the given reference, if already created.
897 LVElement *Target = getElementForOffset(
898 offset: Offset, Element: CurrentElement,
899 /*IsType=*/Attr == dwarf::DW_AT_import || Attr == dwarf::DW_AT_type);
900 // Check if we are dealing with cross CU references.
901 if (FormValue.getForm() == dwarf::DW_FORM_ref_addr) {
902 if (Target) {
903 // The global reference is ready. Mark it as global.
904 Target->setIsGlobalReference();
905 // Remove global reference from the unseen list.
906 removeGlobalOffset(Offset);
907 } else
908 // Record the unseen cross CU reference.
909 addGlobalOffset(Offset);
910 }
911
912 // At this point, 'Target' can be null, in the case of the target element
913 // not being seen. But the correct bit is set, to indicate that the target
914 // is being referenced by (abstract_origin, extension, specification) or
915 // (import, type).
916 // We must differentiate between the kind of reference. This is needed to
917 // complete inlined function instances with dropped abstract references,
918 // in order to facilitate a logical comparison.
919 switch (Attr) {
920 case dwarf::DW_AT_abstract_origin:
921 case dwarf::DW_AT_call_origin:
922 CurrentElement->setReference(Target);
923 CurrentElement->setHasReferenceAbstract();
924 break;
925 case dwarf::DW_AT_extension:
926 CurrentElement->setReference(Target);
927 CurrentElement->setHasReferenceExtension();
928 break;
929 case dwarf::DW_AT_specification:
930 CurrentElement->setReference(Target);
931 CurrentElement->setHasReferenceSpecification();
932 break;
933 case dwarf::DW_AT_import:
934 case dwarf::DW_AT_type:
935 CurrentElement->setType(Target);
936 break;
937 default:
938 break;
939 }
940}
941
942// Get an element given the DIE offset.
943LVElement *LVDWARFReader::getElementForOffset(LVOffset Offset,
944 LVElement *Element, bool IsType) {
945 // Update the element and all the references pointing to this element.
946 LVElementEntry &Entry = ElementTable[Offset];
947 if (!Entry.Element) {
948 if (IsType)
949 Entry.Types.insert(x: Element);
950 else
951 Entry.References.insert(x: Element);
952 }
953 return Entry.Element;
954}
955
956Error LVDWARFReader::loadTargetInfo(const ObjectFile &Obj) {
957 // Detect the architecture from the object file. We usually don't need OS
958 // info to lookup a target and create register info.
959 Triple TT;
960 TT.setArch(Kind: Triple::ArchType(Obj.getArch()));
961 TT.setVendor(Triple::UnknownVendor);
962 TT.setOS(Triple::UnknownOS);
963
964 // Features to be passed to target/subtarget
965 Expected<SubtargetFeatures> Features = Obj.getFeatures();
966 SubtargetFeatures FeaturesValue;
967 if (!Features) {
968 consumeError(Err: Features.takeError());
969 FeaturesValue = SubtargetFeatures();
970 }
971 FeaturesValue = *Features;
972 return loadGenericTargetInfo(TheTriple: TT.str(), TheFeatures: FeaturesValue.getString());
973}
974
975void LVDWARFReader::mapRangeAddress(const ObjectFile &Obj) {
976 for (auto Iter = Obj.symbol_begin(); Iter != Obj.symbol_end(); ++Iter) {
977 const SymbolRef &Symbol = *Iter;
978
979 Expected<SymbolRef::Type> TypeOrErr = Symbol.getType();
980 if (!TypeOrErr) {
981 consumeError(Err: TypeOrErr.takeError());
982 continue;
983 }
984
985 // Process only symbols that represent a function.
986 SymbolRef::Type Type = *TypeOrErr;
987 if (Type != SymbolRef::ST_Function)
988 continue;
989
990 // In the case of a Mach-O STAB symbol, get its section only if
991 // the STAB symbol's section field refers to a valid section index.
992 // Otherwise the symbol may error trying to load a section that
993 // does not exist.
994 const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Val: &Obj);
995 bool IsSTAB = false;
996 if (MachO) {
997 DataRefImpl SymDRI = Symbol.getRawDataRefImpl();
998 uint8_t NType =
999 (MachO->is64Bit() ? MachO->getSymbol64TableEntry(DRI: SymDRI).n_type
1000 : MachO->getSymbolTableEntry(DRI: SymDRI).n_type);
1001 if (NType & MachO::N_STAB)
1002 IsSTAB = true;
1003 }
1004
1005 Expected<section_iterator> IterOrErr = Symbol.getSection();
1006 if (!IterOrErr) {
1007 consumeError(Err: IterOrErr.takeError());
1008 continue;
1009 }
1010 section_iterator Section = IsSTAB ? Obj.section_end() : *IterOrErr;
1011 if (Section == Obj.section_end())
1012 continue;
1013
1014 // Get the symbol value.
1015 Expected<uint64_t> AddressOrErr = Symbol.getAddress();
1016 if (!AddressOrErr) {
1017 consumeError(Err: AddressOrErr.takeError());
1018 continue;
1019 }
1020 uint64_t Address = *AddressOrErr;
1021
1022 // Get symbol name.
1023 StringRef Name;
1024 Expected<StringRef> NameOrErr = Symbol.getName();
1025 if (!NameOrErr) {
1026 consumeError(Err: NameOrErr.takeError());
1027 continue;
1028 }
1029 Name = *NameOrErr;
1030
1031 // Check if the symbol is Comdat.
1032 Expected<uint32_t> FlagsOrErr = Symbol.getFlags();
1033 if (!FlagsOrErr) {
1034 consumeError(Err: FlagsOrErr.takeError());
1035 continue;
1036 }
1037 uint32_t Flags = *FlagsOrErr;
1038
1039 // Mark the symbol as 'comdat' in any of the following cases:
1040 // - Symbol has the SF_Weak flag or
1041 // - Symbol section index different from the DotTextSectionIndex.
1042 LVSectionIndex SectionIndex = Section->getIndex();
1043 bool IsComdat =
1044 (Flags & SymbolRef::SF_Weak) || (SectionIndex != DotTextSectionIndex);
1045
1046 // Record the symbol name (linkage) and its loading address.
1047 addToSymbolTable(Name, Address, SectionIndex, IsComdat);
1048 }
1049}
1050
1051void LVDWARFReader::sortScopes() { Root->sort(); }
1052
1053void LVDWARFReader::print(raw_ostream &OS) const {
1054 OS << "LVType\n";
1055 LLVM_DEBUG(dbgs() << "CreateReaders\n");
1056}
1057