1//===- DwarfTransformer.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/DebugInfo/DIContext.h"
10#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
11#include "llvm/DebugInfo/DWARF/DWARFContext.h"
12#include "llvm/Support/Error.h"
13#include "llvm/Support/ThreadPool.h"
14#include "llvm/Support/raw_ostream.h"
15
16#include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
17#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
18#include "llvm/DebugInfo/GSYM/GsymCreator.h"
19#include "llvm/DebugInfo/GSYM/GsymReader.h"
20#include "llvm/DebugInfo/GSYM/InlineInfo.h"
21#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
22
23#include <optional>
24
25using namespace llvm;
26using namespace gsym;
27
28struct llvm::gsym::CUInfo {
29 const DWARFDebugLine::LineTable *LineTable;
30 const char *CompDir;
31 std::vector<uint32_t> FileCache;
32 uint64_t Language = 0;
33 uint8_t AddrSize = 0;
34
35 CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) {
36 LineTable = DICtx.getLineTableForUnit(U: CU);
37 CompDir = CU->getCompilationDir();
38 FileCache.clear();
39 if (LineTable)
40 FileCache.assign(n: LineTable->Prologue.FileNames.size() + 1, UINT32_MAX);
41 DWARFDie Die = CU->getUnitDIE();
42 Language = dwarf::toUnsigned(V: Die.find(Attr: dwarf::DW_AT_language), Default: 0);
43 AddrSize = CU->getAddressByteSize();
44 }
45
46 /// Return true if Addr is the highest address for a given compile unit. The
47 /// highest address is encoded as -1, of all ones in the address. These high
48 /// addresses are used by some linkers to indicate that a function has been
49 /// dead stripped or didn't end up in the linked executable.
50 bool isHighestAddress(uint64_t Addr) const {
51 if (AddrSize == 4)
52 return Addr == UINT32_MAX;
53 else if (AddrSize == 8)
54 return Addr == UINT64_MAX;
55 return false;
56 }
57
58 /// Convert a DWARF compile unit file index into a GSYM global file index.
59 ///
60 /// Each compile unit in DWARF has its own file table in the line table
61 /// prologue. GSYM has a single large file table that applies to all files
62 /// from all of the info in a GSYM file. This function converts between the
63 /// two and caches and DWARF CU file index that has already been converted so
64 /// the first client that asks for a compile unit file index will end up
65 /// doing the conversion, and subsequent clients will get the cached GSYM
66 /// index.
67 std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym,
68 uint32_t DwarfFileIdx) {
69 if (!LineTable || DwarfFileIdx >= FileCache.size())
70 return std::nullopt;
71 uint32_t &GsymFileIdx = FileCache[DwarfFileIdx];
72 if (GsymFileIdx != UINT32_MAX)
73 return GsymFileIdx;
74 std::string File;
75 if (LineTable->getFileNameByIndex(
76 FileIndex: DwarfFileIdx, CompDir,
77 Kind: DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, Result&: File))
78 GsymFileIdx = Gsym.insertFile(Path: File);
79 else
80 GsymFileIdx = 0;
81 return GsymFileIdx;
82 }
83};
84
85
86static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) {
87 if (DWARFDie SpecDie =
88 Die.getAttributeValueAsReferencedDie(Attr: dwarf::DW_AT_specification)) {
89 if (DWARFDie SpecParent = GetParentDeclContextDIE(Die&: SpecDie))
90 return SpecParent;
91 }
92 if (DWARFDie AbstDie =
93 Die.getAttributeValueAsReferencedDie(Attr: dwarf::DW_AT_abstract_origin)) {
94 if (DWARFDie AbstParent = GetParentDeclContextDIE(Die&: AbstDie))
95 return AbstParent;
96 }
97
98 // We never want to follow parent for inlined subroutine - that would
99 // give us information about where the function is inlined, not what
100 // function is inlined
101 if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine)
102 return DWARFDie();
103
104 DWARFDie ParentDie = Die.getParent();
105 if (!ParentDie)
106 return DWARFDie();
107
108 switch (ParentDie.getTag()) {
109 case dwarf::DW_TAG_namespace:
110 case dwarf::DW_TAG_structure_type:
111 case dwarf::DW_TAG_union_type:
112 case dwarf::DW_TAG_class_type:
113 case dwarf::DW_TAG_subprogram:
114 return ParentDie; // Found parent decl context DIE
115 case dwarf::DW_TAG_lexical_block:
116 return GetParentDeclContextDIE(Die&: ParentDie);
117 default:
118 break;
119 }
120
121 return DWARFDie();
122}
123
124/// Get the GsymCreator string table offset for the qualified name for the
125/// DIE passed in. This function will avoid making copies of any strings in
126/// the GsymCreator when possible. We don't need to copy a string when the
127/// string comes from our .debug_str section or is an inlined string in the
128/// .debug_info. If we create a qualified name string in this function by
129/// combining multiple strings in the DWARF string table or info, we will make
130/// a copy of the string when we add it to the string table.
131static std::optional<uint32_t>
132getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym) {
133 // If the dwarf has mangled name, use mangled name
134 if (auto LinkageName = Die.getLinkageName()) {
135 // We have seen cases were linkage name is actually empty.
136 if (strlen(s: LinkageName) > 0)
137 return Gsym.insertString(S: LinkageName, /* Copy */ false);
138 }
139
140 StringRef ShortName(Die.getName(Kind: DINameKind::ShortName));
141 if (ShortName.empty())
142 return std::nullopt;
143
144 // For C++ and ObjC, prepend names of all parent declaration contexts
145 if (!(Language == dwarf::DW_LANG_C_plus_plus ||
146 Language == dwarf::DW_LANG_C_plus_plus_03 ||
147 Language == dwarf::DW_LANG_C_plus_plus_11 ||
148 Language == dwarf::DW_LANG_C_plus_plus_14 ||
149 Language == dwarf::DW_LANG_ObjC_plus_plus ||
150 // This should not be needed for C, but we see C++ code marked as C
151 // in some binaries. This should hurt, so let's do it for C as well
152 Language == dwarf::DW_LANG_C))
153 return Gsym.insertString(S: ShortName, /* Copy */ false);
154
155 // Some GCC optimizations create functions with names ending with .isra.<num>
156 // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name
157 // If it looks like it could be the case, don't add any prefix
158 if (ShortName.starts_with(Prefix: "_Z") &&
159 (ShortName.contains(Other: ".isra.") || ShortName.contains(Other: ".part.")))
160 return Gsym.insertString(S: ShortName, /* Copy */ false);
161
162 DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die);
163 if (ParentDeclCtxDie) {
164 std::string Name = ShortName.str();
165 while (ParentDeclCtxDie) {
166 StringRef ParentName(ParentDeclCtxDie.getName(Kind: DINameKind::ShortName));
167 if (!ParentName.empty()) {
168 // "lambda" names are wrapped in < >. Replace with { }
169 // to be consistent with demangled names and not to confuse with
170 // templates
171 if (ParentName.front() == '<' && ParentName.back() == '>')
172 Name = "{" + ParentName.substr(Start: 1, N: ParentName.size() - 2).str() + "}" +
173 "::" + Name;
174 else
175 Name = ParentName.str() + "::" + Name;
176 }
177 ParentDeclCtxDie = GetParentDeclContextDIE(Die&: ParentDeclCtxDie);
178 }
179 // Copy the name since we created a new name in a std::string.
180 return Gsym.insertString(S: Name, /* Copy */ true);
181 }
182 // Don't copy the name since it exists in the DWARF object file.
183 return Gsym.insertString(S: ShortName, /* Copy */ false);
184}
185
186static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) {
187 bool CheckChildren = true;
188 switch (Die.getTag()) {
189 case dwarf::DW_TAG_subprogram:
190 // Don't look into functions within functions.
191 CheckChildren = Depth == 0;
192 break;
193 case dwarf::DW_TAG_inlined_subroutine:
194 return true;
195 default:
196 break;
197 }
198 if (!CheckChildren)
199 return false;
200 for (DWARFDie ChildDie : Die.children()) {
201 if (hasInlineInfo(Die: ChildDie, Depth: Depth + 1))
202 return true;
203 }
204 return false;
205}
206
207static AddressRanges
208ConvertDWARFRanges(const DWARFAddressRangesVector &DwarfRanges) {
209 AddressRanges Ranges;
210 for (const DWARFAddressRange &DwarfRange : DwarfRanges) {
211 if (DwarfRange.LowPC < DwarfRange.HighPC)
212 Ranges.insert(Range: {DwarfRange.LowPC, DwarfRange.HighPC});
213 }
214 return Ranges;
215}
216
217static void parseInlineInfo(GsymCreator &Gsym, OutputAggregator &Out,
218 CUInfo &CUI, DWARFDie Die, uint32_t Depth,
219 FunctionInfo &FI, InlineInfo &Parent,
220 const AddressRanges &AllParentRanges,
221 bool &WarnIfEmpty) {
222 if (!hasInlineInfo(Die, Depth))
223 return;
224
225 dwarf::Tag Tag = Die.getTag();
226 if (Tag == dwarf::DW_TAG_inlined_subroutine) {
227 // create new InlineInfo and append to parent.children
228 InlineInfo II;
229 AddressRanges AllInlineRanges;
230 Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
231 if (RangesOrError) {
232 AllInlineRanges = ConvertDWARFRanges(DwarfRanges: RangesOrError.get());
233 uint32_t EmptyCount = 0;
234 for (const AddressRange &InlineRange : AllInlineRanges) {
235 // Check for empty inline range in case inline function was outlined
236 // or has not code
237 if (InlineRange.empty()) {
238 ++EmptyCount;
239 } else {
240 if (Parent.Ranges.contains(Range: InlineRange)) {
241 II.Ranges.insert(Range: InlineRange);
242 } else {
243 // Only warn if the current inline range is not within any of all
244 // of the parent ranges. If we have a DW_TAG_subpgram with multiple
245 // ranges we will emit a FunctionInfo for each range of that
246 // function that only emits information within the current range,
247 // so we only want to emit an error if the DWARF has issues, not
248 // when a range currently just isn't in the range we are currently
249 // parsing for.
250 if (AllParentRanges.contains(Range: InlineRange)) {
251 WarnIfEmpty = false;
252 } else
253 Out.Report(s: "Function DIE has uncontained address range",
254 detailCallback: [&](raw_ostream &OS) {
255 OS << "error: inlined function DIE at "
256 << HEX32(Die.getOffset()) << " has a range ["
257 << HEX64(InlineRange.start()) << " - "
258 << HEX64(InlineRange.end())
259 << ") that isn't contained in "
260 << "any parent address ranges, this inline range "
261 "will be "
262 "removed.\n";
263 });
264 }
265 }
266 }
267 // If we have all empty ranges for the inlines, then don't warn if we
268 // have an empty InlineInfo at the top level as all inline functions
269 // were elided.
270 if (EmptyCount == AllInlineRanges.size())
271 WarnIfEmpty = false;
272 }
273 if (II.Ranges.empty())
274 return;
275
276 if (auto NameIndex = getQualifiedNameIndex(Die, Language: CUI.Language, Gsym))
277 II.Name = *NameIndex;
278 const uint64_t DwarfFileIdx = dwarf::toUnsigned(
279 V: Die.findRecursively(Attrs: dwarf::DW_AT_call_file), UINT32_MAX);
280 std::optional<uint32_t> OptGSymFileIdx =
281 CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx);
282 if (OptGSymFileIdx) {
283 II.CallFile = OptGSymFileIdx.value();
284 II.CallLine = dwarf::toUnsigned(V: Die.find(Attr: dwarf::DW_AT_call_line), Default: 0);
285 // parse all children and append to parent
286 for (DWARFDie ChildDie : Die.children())
287 parseInlineInfo(Gsym, Out, CUI, Die: ChildDie, Depth: Depth + 1, FI, Parent&: II,
288 AllParentRanges: AllInlineRanges, WarnIfEmpty);
289 Parent.Children.emplace_back(args: std::move(II));
290 } else
291 Out.Report(
292 s: "Inlined function die has invlaid file index in DW_AT_call_file",
293 detailCallback: [&](raw_ostream &OS) {
294 OS << "error: inlined function DIE at " << HEX32(Die.getOffset())
295 << " has an invalid file index " << DwarfFileIdx
296 << " in its DW_AT_call_file attribute, this inline entry and "
297 "all "
298 << "children will be removed.\n";
299 });
300 return;
301 }
302 if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) {
303 // skip this Die and just recurse down
304 for (DWARFDie ChildDie : Die.children())
305 parseInlineInfo(Gsym, Out, CUI, Die: ChildDie, Depth: Depth + 1, FI, Parent,
306 AllParentRanges, WarnIfEmpty);
307 }
308}
309
310static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI,
311 DWARFDie Die, GsymCreator &Gsym,
312 FunctionInfo &FI) {
313 std::vector<uint32_t> RowVector;
314 const uint64_t StartAddress = FI.startAddress();
315 const uint64_t EndAddress = FI.endAddress();
316 const uint64_t RangeSize = EndAddress - StartAddress;
317 const object::SectionedAddress SecAddress{
318 .Address: StartAddress, .SectionIndex: object::SectionedAddress::UndefSection};
319
320 // Attempt to retrieve DW_AT_LLVM_stmt_sequence if present.
321 std::optional<uint64_t> StmtSeqOffset;
322 if (auto StmtSeqAttr = Die.find(Attr: llvm::dwarf::DW_AT_LLVM_stmt_sequence)) {
323 // The `DW_AT_LLVM_stmt_sequence` attribute might be set to an invalid
324 // sentinel value when it refers to an empty line sequence. In such cases,
325 // the DWARF linker will exclude the empty sequence from the final output
326 // and assign the sentinel value to the `DW_AT_LLVM_stmt_sequence`
327 // attribute. The sentinel value is UINT32_MAX for DWARF32 and UINT64_MAX
328 // for DWARF64.
329 const uint64_t InvalidOffset =
330 Die.getDwarfUnit()->getFormParams().getDwarfMaxOffset();
331 uint64_t StmtSeqVal = dwarf::toSectionOffset(V: StmtSeqAttr, Default: InvalidOffset);
332 if (StmtSeqVal != InvalidOffset)
333 StmtSeqOffset = StmtSeqVal;
334 }
335
336 if (!CUI.LineTable->lookupAddressRange(Address: SecAddress, Size: RangeSize, Result&: RowVector,
337 StmtSequenceOffset: StmtSeqOffset)) {
338 // If StmtSeqOffset had a value but the lookup failed, try again without it.
339 // If the second lookup succeeds, we know the DW_AT_LLVM_stmt_sequence value
340 // was invalid, but we still have valid line entries.
341 if (StmtSeqOffset &&
342 CUI.LineTable->lookupAddressRange(Address: SecAddress, Size: RangeSize, Result&: RowVector)) {
343 Out.Report(s: "Invalid DW_AT_LLVM_stmt_sequence value",
344 detailCallback: [&](raw_ostream &OS) {
345 OS << "error: function DIE at " << HEX32(Die.getOffset())
346 << " has a DW_AT_LLVM_stmt_sequence value "
347 << HEX32(*StmtSeqOffset)
348 << " which doesn't match any line table "
349 << "sequence offset but there are " << RowVector.size()
350 << " matching line entries in other sequences.\n";
351 });
352 } else {
353 // If we have a DW_TAG_subprogram but no line entries, fall back to using
354 // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
355 std::string FilePath = Die.getDeclFile(
356 Kind: DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
357 if (FilePath.empty()) {
358 // If we had a DW_AT_decl_file, but got no file then we need to emit a
359 // warning.
360 const uint64_t DwarfFileIdx = dwarf::toUnsigned(
361 V: Die.findRecursively(Attrs: dwarf::DW_AT_decl_file), UINT32_MAX);
362 // Check if there is no DW_AT_decl_line attribute, and don't report an
363 // error if it isn't there.
364 if (DwarfFileIdx == UINT32_MAX)
365 return;
366 Out.Report(s: "Invalid file index in DW_AT_decl_file", detailCallback: [&](raw_ostream
367 &OS) {
368 OS << "error: function DIE at " << HEX32(Die.getOffset())
369 << " has an invalid file index " << DwarfFileIdx
370 << " in its DW_AT_decl_file attribute, unable to create a single "
371 << "line entry from the DW_AT_decl_file/DW_AT_decl_line "
372 << "attributes.\n";
373 });
374 return;
375 }
376 if (auto Line = dwarf::toUnsigned(
377 V: Die.findRecursively(Attrs: {dwarf::DW_AT_decl_line}))) {
378 LineEntry LE(StartAddress, Gsym.insertFile(Path: FilePath), *Line);
379 FI.OptLineTable = LineTable();
380 FI.OptLineTable->push(LE);
381 }
382 return;
383 }
384 }
385
386 FI.OptLineTable = LineTable();
387 DWARFDebugLine::Row PrevRow;
388 for (uint32_t RowIndex : RowVector) {
389 // Take file number and line/column from the row.
390 const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex];
391 std::optional<uint32_t> OptFileIdx =
392 CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx: Row.File);
393 if (!OptFileIdx) {
394 Out.Report(
395 s: "Invalid file index in DWARF line table", detailCallback: [&](raw_ostream &OS) {
396 OS << "error: function DIE at " << HEX32(Die.getOffset()) << " has "
397 << "a line entry with invalid DWARF file index, this entry will "
398 << "be removed:\n";
399 Row.dumpTableHeader(OS, /*Indent=*/0);
400 Row.dump(OS);
401 OS << "\n";
402 });
403 continue;
404 }
405 const uint32_t FileIdx = OptFileIdx.value();
406 uint64_t RowAddress = Row.Address.Address;
407 // Watch out for a RowAddress that is in the middle of a line table entry
408 // in the DWARF. If we pass an address in between two line table entries
409 // we will get a RowIndex for the previous valid line table row which won't
410 // be contained in our function. This is usually a bug in the DWARF due to
411 // linker problems or LTO or other DWARF re-linking so it is worth emitting
412 // an error, but not worth stopping the creation of the GSYM.
413 if (!FI.Range.contains(Addr: RowAddress)) {
414 if (RowAddress < FI.Range.start()) {
415 Out.Report(s: "Start address lies between valid Row table entries",
416 detailCallback: [&](raw_ostream &OS) {
417 OS << "error: DIE has a start address whose LowPC is "
418 "between the "
419 "line table Row["
420 << RowIndex << "] with address " << HEX64(RowAddress)
421 << " and the next one.\n";
422 Die.dump(OS, indent: 0, DumpOpts: DIDumpOptions::getForSingleDIE());
423 });
424 RowAddress = FI.Range.start();
425 } else {
426 continue;
427 }
428 }
429
430 LineEntry LE(RowAddress, FileIdx, Row.Line);
431 if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) {
432 // We have seen full duplicate line tables for functions in some
433 // DWARF files. Watch for those here by checking the last
434 // row was the function's end address (HighPC) and that the
435 // current line table entry's address is the same as the first
436 // line entry we already have in our "function_info.Lines". If
437 // so break out after printing a warning.
438 auto FirstLE = FI.OptLineTable->first();
439 if (FirstLE && *FirstLE == LE)
440 // if (Log && !Gsym.isQuiet()) { TODO <-- This looks weird
441 Out.Report(s: "Duplicate line table detected", detailCallback: [&](raw_ostream &OS) {
442 OS << "warning: duplicate line table detected for DIE:\n";
443 Die.dump(OS, indent: 0, DumpOpts: DIDumpOptions::getForSingleDIE());
444 });
445 else
446 Out.Report(s: "Non-monotonically increasing addresses",
447 detailCallback: [&](raw_ostream &OS) {
448 OS << "error: line table has addresses that do not "
449 << "monotonically increase:\n";
450 for (uint32_t RowIndex2 : RowVector)
451 CUI.LineTable->Rows[RowIndex2].dump(OS);
452 Die.dump(OS, indent: 0, DumpOpts: DIDumpOptions::getForSingleDIE());
453 });
454 break;
455 }
456
457 // Skip multiple line entries for the same file and line.
458 auto LastLE = FI.OptLineTable->last();
459 if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line)
460 continue;
461 // Only push a row if it isn't an end sequence. End sequence markers are
462 // included for the last address in a function or the last contiguous
463 // address in a sequence.
464 if (Row.EndSequence) {
465 // End sequence means that the next line entry could have a lower address
466 // that the previous entries. So we clear the previous row so we don't
467 // trigger the line table error about address that do not monotonically
468 // increase.
469 PrevRow = DWARFDebugLine::Row();
470 } else {
471 FI.OptLineTable->push(LE);
472 PrevRow = Row;
473 }
474 }
475 // If not line table rows were added, clear the line table so we don't encode
476 // on in the GSYM file.
477 if (FI.OptLineTable->empty())
478 FI.OptLineTable = std::nullopt;
479}
480
481void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI,
482 DWARFDie Die) {
483 switch (Die.getTag()) {
484 case dwarf::DW_TAG_subprogram: {
485 Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
486 if (!RangesOrError) {
487 consumeError(Err: RangesOrError.takeError());
488 break;
489 }
490 const DWARFAddressRangesVector &Ranges = RangesOrError.get();
491 if (Ranges.empty())
492 break;
493 auto NameIndex = getQualifiedNameIndex(Die, Language: CUI.Language, Gsym);
494 if (!NameIndex) {
495 Out.Report(s: "Function has no name", detailCallback: [&](raw_ostream &OS) {
496 OS << "error: function at " << HEX64(Die.getOffset())
497 << " has no name\n ";
498 Die.dump(OS, indent: 0, DumpOpts: DIDumpOptions::getForSingleDIE());
499 });
500 break;
501 }
502 // All ranges for the subprogram DIE in case it has multiple. We need to
503 // pass this down into parseInlineInfo so we don't warn about inline
504 // ranges that are not in the current subrange of a function when they
505 // actually are in another subgrange. We do this because when a function
506 // has discontiguos ranges, we create multiple function entries with only
507 // the info for that range contained inside of it.
508 AddressRanges AllSubprogramRanges = ConvertDWARFRanges(DwarfRanges: Ranges);
509
510 // Create a function_info for each range
511 for (const DWARFAddressRange &Range : Ranges) {
512 // The low PC must be less than the high PC. Many linkers don't remove
513 // DWARF for functions that don't get linked into the final executable.
514 // If both the high and low pc have relocations, linkers will often set
515 // the address values for both to the same value to indicate the function
516 // has been remove. Other linkers have been known to set the one or both
517 // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8
518 // byte addresses to indicate the function isn't valid. The check below
519 // tries to watch for these cases and abort if it runs into them.
520 if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Addr: Range.LowPC))
521 break;
522
523 // Many linkers can't remove DWARF and might set the LowPC to zero. Since
524 // high PC can be an offset from the low PC in more recent DWARF versions
525 // we need to watch for a zero'ed low pc which we do using ValidTextRanges
526 // below.
527 if (!Gsym.IsValidTextAddress(Addr: Range.LowPC)) {
528 // We expect zero and -1 to be invalid addresses in DWARF depending
529 // on the linker of the DWARF. This indicates a function was stripped
530 // and the debug info wasn't able to be stripped from the DWARF. If
531 // the LowPC isn't zero or -1, then we should emit an error.
532 if (Range.LowPC != 0) {
533 if (!Gsym.isQuiet()) {
534 // Unexpected invalid address, emit a warning
535 Out.Report(s: "Address range starts outside executable section",
536 detailCallback: [&](raw_ostream &OS) {
537 OS << "warning: DIE has an address range whose "
538 "start address "
539 "is not in any executable sections ("
540 << *Gsym.GetValidTextRanges()
541 << ") and will not be processed:\n";
542 Die.dump(OS, indent: 0, DumpOpts: DIDumpOptions::getForSingleDIE());
543 });
544 }
545 }
546 break;
547 }
548
549 FunctionInfo FI;
550 FI.Range = {Range.LowPC, Range.HighPC};
551 FI.Name = *NameIndex;
552 if (CUI.LineTable)
553 convertFunctionLineTable(Out, CUI, Die, Gsym, FI);
554
555 if (hasInlineInfo(Die, Depth: 0)) {
556 FI.Inline = InlineInfo();
557 FI.Inline->Name = *NameIndex;
558 FI.Inline->Ranges.insert(Range: FI.Range);
559 bool WarnIfEmpty = true;
560 parseInlineInfo(Gsym, Out, CUI, Die, Depth: 0, FI, Parent&: *FI.Inline,
561 AllParentRanges: AllSubprogramRanges, WarnIfEmpty);
562 // Make sure we at least got some valid inline info other than just
563 // the top level function. If we didn't then remove the inline info
564 // from the function info. We have seen cases where LTO tries to modify
565 // the DWARF for functions and it messes up the address ranges for
566 // the inline functions so it is no longer valid.
567 //
568 // By checking if there are any valid children on the top level inline
569 // information object, we will know if we got anything valid from the
570 // debug info.
571 if (FI.Inline->Children.empty()) {
572 if (WarnIfEmpty && !Gsym.isQuiet())
573 Out.Report(s: "DIE contains inline functions with no valid ranges",
574 detailCallback: [&](raw_ostream &OS) {
575 OS << "warning: DIE contains inline function "
576 "information that has no valid ranges, removing "
577 "inline information:\n";
578 Die.dump(OS, indent: 0, DumpOpts: DIDumpOptions::getForSingleDIE());
579 });
580 FI.Inline = std::nullopt;
581 }
582 }
583
584 // If dwarf-callsites flag is set, parse DW_TAG_call_site DIEs.
585 if (LoadDwarfCallSites)
586 parseCallSiteInfoFromDwarf(CUI, Die, FI);
587
588 Gsym.addFunctionInfo(FI: std::move(FI));
589 }
590 } break;
591 default:
592 break;
593 }
594 for (DWARFDie ChildDie : Die.children())
595 handleDie(Out, CUI, Die: ChildDie);
596}
597
598void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die,
599 FunctionInfo &FI) {
600 // Parse all DW_TAG_call_site DIEs that are children of this subprogram DIE.
601 // DWARF specification:
602 // - DW_TAG_call_site can have DW_AT_call_return_pc for return address offset.
603 // - DW_AT_call_origin might point to a DIE of the function being called.
604 // For simplicity, we will just extract return_offset and possibly target name
605 // if available.
606
607 CallSiteInfoCollection CSIC;
608
609 for (DWARFDie Child : Die.children()) {
610 if (Child.getTag() != dwarf::DW_TAG_call_site)
611 continue;
612
613 CallSiteInfo CSI;
614 // DW_AT_call_return_pc: the return PC (address). We'll convert it to
615 // offset relative to FI's start.
616 auto ReturnPC =
617 dwarf::toAddress(V: Child.findRecursively(Attrs: dwarf::DW_AT_call_return_pc));
618 if (!ReturnPC || !FI.Range.contains(Addr: *ReturnPC))
619 continue;
620
621 CSI.ReturnOffset = *ReturnPC - FI.startAddress();
622
623 // Attempt to get function name from DW_AT_call_origin. If present, we can
624 // insert it as a match regex.
625 if (DWARFDie OriginDie =
626 Child.getAttributeValueAsReferencedDie(Attr: dwarf::DW_AT_call_origin)) {
627
628 // Include the full unmangled name if available, otherwise the short name.
629 if (const char *LinkName = OriginDie.getLinkageName()) {
630 uint32_t LinkNameOff = Gsym.insertString(S: LinkName, /*Copy=*/false);
631 CSI.MatchRegex.push_back(x: LinkNameOff);
632 } else if (const char *ShortName = OriginDie.getShortName()) {
633 uint32_t ShortNameOff = Gsym.insertString(S: ShortName, /*Copy=*/false);
634 CSI.MatchRegex.push_back(x: ShortNameOff);
635 }
636 }
637
638 // For now, we won't attempt to deduce InternalCall/ExternalCall flags
639 // from DWARF.
640 CSI.Flags = CallSiteInfo::Flags::None;
641
642 CSIC.CallSites.push_back(x: CSI);
643 }
644
645 if (!CSIC.CallSites.empty()) {
646 if (!FI.CallSites)
647 FI.CallSites = CallSiteInfoCollection();
648 // Append parsed DWARF callsites:
649 llvm::append_range(C&: FI.CallSites->CallSites, R&: CSIC.CallSites);
650 }
651}
652
653Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
654 size_t NumBefore = Gsym.getNumFunctionInfos();
655 auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
656 DWARFDie ReturnDie = DwarfUnit.getUnitDIE(ExtractUnitDIEOnly: false);
657 // Apple uses DW_AT_GNU_dwo_id for things other than split DWARF.
658 if (IsMachO)
659 return ReturnDie;
660
661 if (DwarfUnit.getDWOId()) {
662 DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(ExtractUnitDIEOnly: false).getDwarfUnit();
663 if (!DWOCU->isDWOUnit())
664 Out.Report(
665 s: "warning: Unable to retrieve DWO .debug_info section for some "
666 "object files. (Remove the --quiet flag for full output)",
667 detailCallback: [&](raw_ostream &OS) {
668 std::string DWOName = dwarf::toString(
669 V: DwarfUnit.getUnitDIE().find(
670 Attrs: {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
671 Default: "");
672 OS << "warning: Unable to retrieve DWO .debug_info section for "
673 << DWOName << "\n";
674 });
675 else {
676 ReturnDie = DWOCU->getUnitDIE(ExtractUnitDIEOnly: false);
677 }
678 }
679 return ReturnDie;
680 };
681 if (NumThreads == 1) {
682 // Parse all DWARF data from this thread, use the same string/file table
683 // for everything
684 for (const auto &CU : DICtx.compile_units()) {
685 DWARFDie Die = getDie(*CU);
686 CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(Val: CU.get()));
687 handleDie(Out, CUI, Die);
688 }
689 } else {
690 // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
691 // front before we start accessing any DIEs since there might be
692 // cross compile unit references in the DWARF. If we don't do this we can
693 // end up crashing.
694
695 // We need to call getAbbreviations sequentially first so that getUnitDIE()
696 // only works with its local data.
697 for (const auto &CU : DICtx.compile_units())
698 CU->getAbbreviations();
699
700 // Now parse all DIEs in case we have cross compile unit references in a
701 // thread pool.
702 DefaultThreadPool pool(hardware_concurrency(ThreadCount: NumThreads));
703 for (const auto &CU : DICtx.compile_units())
704 pool.async(F: [&CU]() { CU->getUnitDIE(ExtractUnitDIEOnly: false /*CUDieOnly*/); });
705 pool.wait();
706
707 // Now convert all DWARF to GSYM in a thread pool.
708 std::mutex LogMutex;
709 for (const auto &CU : DICtx.compile_units()) {
710 DWARFDie Die = getDie(*CU);
711 if (Die) {
712 CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(Val: CU.get()));
713 pool.async(F: [this, CUI, &LogMutex, &Out, Die]() mutable {
714 std::string storage;
715 raw_string_ostream StrStream(storage);
716 OutputAggregator ThreadOut(Out.GetOS() ? &StrStream : nullptr);
717 handleDie(Out&: ThreadOut, CUI, Die);
718 // Print ThreadLogStorage lines into an actual stream under a lock
719 std::lock_guard<std::mutex> guard(LogMutex);
720 if (Out.GetOS()) {
721 Out << storage;
722 }
723 Out.Merge(other: ThreadOut);
724 });
725 }
726 }
727 pool.wait();
728 }
729 size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
730 Out << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
731 return Error::success();
732}
733
734llvm::Error DwarfTransformer::verify(StringRef GsymPath,
735 OutputAggregator &Out) {
736 Out << "Verifying GSYM file \"" << GsymPath << "\":\n";
737
738 auto Gsym = GsymReader::openFile(Path: GsymPath);
739 if (!Gsym)
740 return Gsym.takeError();
741
742 auto NumAddrs = Gsym->getNumAddresses();
743 DILineInfoSpecifier DLIS(
744 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
745 DILineInfoSpecifier::FunctionNameKind::LinkageName);
746 std::string gsymFilename;
747 for (uint32_t I = 0; I < NumAddrs; ++I) {
748 auto FuncAddr = Gsym->getAddress(Index: I);
749 if (!FuncAddr)
750 return createStringError(EC: std::errc::invalid_argument,
751 Fmt: "failed to extract address[%i]", Vals: I);
752
753 auto FI = Gsym->getFunctionInfo(Addr: *FuncAddr);
754 if (!FI)
755 return createStringError(
756 EC: std::errc::invalid_argument,
757 Fmt: "failed to extract function info for address 0x%" PRIu64, Vals: *FuncAddr);
758
759 for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) {
760 const object::SectionedAddress SectAddr{
761 .Address: Addr, .SectionIndex: object::SectionedAddress::UndefSection};
762 auto LR = Gsym->lookup(Addr);
763 if (!LR)
764 return LR.takeError();
765
766 auto DwarfInlineInfos =
767 DICtx.getInliningInfoForAddress(Address: SectAddr, Specifier: DLIS);
768 uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames();
769 if (NumDwarfInlineInfos == 0) {
770 DwarfInlineInfos.addFrame(
771 Frame: DICtx.getLineInfoForAddress(Address: SectAddr, Specifier: DLIS).value_or(u: DILineInfo()));
772 }
773
774 // Check for 1 entry that has no file and line info
775 if (NumDwarfInlineInfos == 1 &&
776 DwarfInlineInfos.getFrame(Index: 0).FileName == "<invalid>") {
777 DwarfInlineInfos = DIInliningInfo();
778 NumDwarfInlineInfos = 0;
779 }
780 if (NumDwarfInlineInfos > 0 &&
781 NumDwarfInlineInfos != LR->Locations.size()) {
782 if (Out.GetOS()) {
783 raw_ostream &Log = *Out.GetOS();
784 Log << "error: address " << HEX64(Addr) << " has "
785 << NumDwarfInlineInfos << " DWARF inline frames and GSYM has "
786 << LR->Locations.size() << "\n";
787 Log << " " << NumDwarfInlineInfos << " DWARF frames:\n";
788 for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) {
789 const auto &dii = DwarfInlineInfos.getFrame(Index: Idx);
790 Log << " [" << Idx << "]: " << dii.FunctionName << " @ "
791 << dii.FileName << ':' << dii.Line << '\n';
792 }
793 Log << " " << LR->Locations.size() << " GSYM frames:\n";
794 for (size_t Idx = 0, count = LR->Locations.size(); Idx < count;
795 ++Idx) {
796 const auto &gii = LR->Locations[Idx];
797 Log << " [" << Idx << "]: " << gii.Name << " @ " << gii.Dir
798 << '/' << gii.Base << ':' << gii.Line << '\n';
799 }
800 Gsym->dump(OS&: Log, FI: *FI);
801 }
802 continue;
803 }
804
805 for (size_t Idx = 0, count = LR->Locations.size(); Idx < count;
806 ++Idx) {
807 const auto &gii = LR->Locations[Idx];
808 if (Idx < NumDwarfInlineInfos) {
809 const auto &dii = DwarfInlineInfos.getFrame(Index: Idx);
810 gsymFilename = LR->getSourceFile(Index: Idx);
811 // Verify function name
812 if (!StringRef(dii.FunctionName).starts_with(Prefix: gii.Name))
813 Out << "error: address " << HEX64(Addr) << " DWARF function \""
814 << dii.FunctionName.c_str()
815 << "\" doesn't match GSYM function \"" << gii.Name << "\"\n";
816
817 // Verify source file path
818 if (dii.FileName != gsymFilename)
819 Out << "error: address " << HEX64(Addr) << " DWARF path \""
820 << dii.FileName.c_str() << "\" doesn't match GSYM path \""
821 << gsymFilename.c_str() << "\"\n";
822 // Verify source file line
823 if (dii.Line != gii.Line)
824 Out << "error: address " << HEX64(Addr) << " DWARF line "
825 << dii.Line << " != GSYM line " << gii.Line << "\n";
826 }
827 }
828 }
829 }
830 return Error::success();
831}
832