1 | //===- DwarfTransformer.cpp -----------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include <thread> |
10 | #include <unordered_set> |
11 | |
12 | #include "llvm/DebugInfo/DIContext.h" |
13 | #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" |
14 | #include "llvm/DebugInfo/DWARF/DWARFContext.h" |
15 | #include "llvm/Support/Error.h" |
16 | #include "llvm/Support/ThreadPool.h" |
17 | #include "llvm/Support/raw_ostream.h" |
18 | |
19 | #include "llvm/DebugInfo/GSYM/DwarfTransformer.h" |
20 | #include "llvm/DebugInfo/GSYM/FunctionInfo.h" |
21 | #include "llvm/DebugInfo/GSYM/GsymCreator.h" |
22 | #include "llvm/DebugInfo/GSYM/GsymReader.h" |
23 | #include "llvm/DebugInfo/GSYM/InlineInfo.h" |
24 | #include "llvm/DebugInfo/GSYM/OutputAggregator.h" |
25 | |
26 | #include <optional> |
27 | |
28 | using namespace llvm; |
29 | using namespace gsym; |
30 | |
31 | struct llvm::gsym::CUInfo { |
32 | const DWARFDebugLine::LineTable *LineTable; |
33 | const char *CompDir; |
34 | std::vector<uint32_t> FileCache; |
35 | uint64_t Language = 0; |
36 | uint8_t AddrSize = 0; |
37 | |
38 | CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) { |
39 | LineTable = DICtx.getLineTableForUnit(U: CU); |
40 | CompDir = CU->getCompilationDir(); |
41 | FileCache.clear(); |
42 | if (LineTable) |
43 | FileCache.assign(n: LineTable->Prologue.FileNames.size() + 1, UINT32_MAX); |
44 | DWARFDie Die = CU->getUnitDIE(); |
45 | Language = dwarf::toUnsigned(V: Die.find(Attr: dwarf::DW_AT_language), Default: 0); |
46 | AddrSize = CU->getAddressByteSize(); |
47 | } |
48 | |
49 | /// Return true if Addr is the highest address for a given compile unit. The |
50 | /// highest address is encoded as -1, of all ones in the address. These high |
51 | /// addresses are used by some linkers to indicate that a function has been |
52 | /// dead stripped or didn't end up in the linked executable. |
53 | bool isHighestAddress(uint64_t Addr) const { |
54 | if (AddrSize == 4) |
55 | return Addr == UINT32_MAX; |
56 | else if (AddrSize == 8) |
57 | return Addr == UINT64_MAX; |
58 | return false; |
59 | } |
60 | |
61 | /// Convert a DWARF compile unit file index into a GSYM global file index. |
62 | /// |
63 | /// Each compile unit in DWARF has its own file table in the line table |
64 | /// prologue. GSYM has a single large file table that applies to all files |
65 | /// from all of the info in a GSYM file. This function converts between the |
66 | /// two and caches and DWARF CU file index that has already been converted so |
67 | /// the first client that asks for a compile unit file index will end up |
68 | /// doing the conversion, and subsequent clients will get the cached GSYM |
69 | /// index. |
70 | std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym, |
71 | uint32_t DwarfFileIdx) { |
72 | if (!LineTable || DwarfFileIdx >= FileCache.size()) |
73 | return std::nullopt; |
74 | uint32_t &GsymFileIdx = FileCache[DwarfFileIdx]; |
75 | if (GsymFileIdx != UINT32_MAX) |
76 | return GsymFileIdx; |
77 | std::string File; |
78 | if (LineTable->getFileNameByIndex( |
79 | FileIndex: DwarfFileIdx, CompDir, |
80 | Kind: DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, Result&: File)) |
81 | GsymFileIdx = Gsym.insertFile(Path: File); |
82 | else |
83 | GsymFileIdx = 0; |
84 | return GsymFileIdx; |
85 | } |
86 | }; |
87 | |
88 | |
89 | static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) { |
90 | if (DWARFDie SpecDie = |
91 | Die.getAttributeValueAsReferencedDie(Attr: dwarf::DW_AT_specification)) { |
92 | if (DWARFDie SpecParent = GetParentDeclContextDIE(Die&: SpecDie)) |
93 | return SpecParent; |
94 | } |
95 | if (DWARFDie AbstDie = |
96 | Die.getAttributeValueAsReferencedDie(Attr: dwarf::DW_AT_abstract_origin)) { |
97 | if (DWARFDie AbstParent = GetParentDeclContextDIE(Die&: AbstDie)) |
98 | return AbstParent; |
99 | } |
100 | |
101 | // We never want to follow parent for inlined subroutine - that would |
102 | // give us information about where the function is inlined, not what |
103 | // function is inlined |
104 | if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine) |
105 | return DWARFDie(); |
106 | |
107 | DWARFDie ParentDie = Die.getParent(); |
108 | if (!ParentDie) |
109 | return DWARFDie(); |
110 | |
111 | switch (ParentDie.getTag()) { |
112 | case dwarf::DW_TAG_namespace: |
113 | case dwarf::DW_TAG_structure_type: |
114 | case dwarf::DW_TAG_union_type: |
115 | case dwarf::DW_TAG_class_type: |
116 | case dwarf::DW_TAG_subprogram: |
117 | return ParentDie; // Found parent decl context DIE |
118 | case dwarf::DW_TAG_lexical_block: |
119 | return GetParentDeclContextDIE(Die&: ParentDie); |
120 | default: |
121 | break; |
122 | } |
123 | |
124 | return DWARFDie(); |
125 | } |
126 | |
127 | /// Get the GsymCreator string table offset for the qualified name for the |
128 | /// DIE passed in. This function will avoid making copies of any strings in |
129 | /// the GsymCreator when possible. We don't need to copy a string when the |
130 | /// string comes from our .debug_str section or is an inlined string in the |
131 | /// .debug_info. If we create a qualified name string in this function by |
132 | /// combining multiple strings in the DWARF string table or info, we will make |
133 | /// a copy of the string when we add it to the string table. |
134 | static std::optional<uint32_t> |
135 | getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym) { |
136 | // If the dwarf has mangled name, use mangled name |
137 | if (auto LinkageName = Die.getLinkageName()) { |
138 | // We have seen cases were linkage name is actually empty. |
139 | if (strlen(s: LinkageName) > 0) |
140 | return Gsym.insertString(S: LinkageName, /* Copy */ false); |
141 | } |
142 | |
143 | StringRef ShortName(Die.getName(Kind: DINameKind::ShortName)); |
144 | if (ShortName.empty()) |
145 | return std::nullopt; |
146 | |
147 | // For C++ and ObjC, prepend names of all parent declaration contexts |
148 | if (!(Language == dwarf::DW_LANG_C_plus_plus || |
149 | Language == dwarf::DW_LANG_C_plus_plus_03 || |
150 | Language == dwarf::DW_LANG_C_plus_plus_11 || |
151 | Language == dwarf::DW_LANG_C_plus_plus_14 || |
152 | Language == dwarf::DW_LANG_ObjC_plus_plus || |
153 | // This should not be needed for C, but we see C++ code marked as C |
154 | // in some binaries. This should hurt, so let's do it for C as well |
155 | Language == dwarf::DW_LANG_C)) |
156 | return Gsym.insertString(S: ShortName, /* Copy */ false); |
157 | |
158 | // Some GCC optimizations create functions with names ending with .isra.<num> |
159 | // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name |
160 | // If it looks like it could be the case, don't add any prefix |
161 | if (ShortName.starts_with(Prefix: "_Z" ) && |
162 | (ShortName.contains(Other: ".isra." ) || ShortName.contains(Other: ".part." ))) |
163 | return Gsym.insertString(S: ShortName, /* Copy */ false); |
164 | |
165 | DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die); |
166 | if (ParentDeclCtxDie) { |
167 | std::string Name = ShortName.str(); |
168 | while (ParentDeclCtxDie) { |
169 | StringRef ParentName(ParentDeclCtxDie.getName(Kind: DINameKind::ShortName)); |
170 | if (!ParentName.empty()) { |
171 | // "lambda" names are wrapped in < >. Replace with { } |
172 | // to be consistent with demangled names and not to confuse with |
173 | // templates |
174 | if (ParentName.front() == '<' && ParentName.back() == '>') |
175 | Name = "{" + ParentName.substr(Start: 1, N: ParentName.size() - 2).str() + "}" + |
176 | "::" + Name; |
177 | else |
178 | Name = ParentName.str() + "::" + Name; |
179 | } |
180 | ParentDeclCtxDie = GetParentDeclContextDIE(Die&: ParentDeclCtxDie); |
181 | } |
182 | // Copy the name since we created a new name in a std::string. |
183 | return Gsym.insertString(S: Name, /* Copy */ true); |
184 | } |
185 | // Don't copy the name since it exists in the DWARF object file. |
186 | return Gsym.insertString(S: ShortName, /* Copy */ false); |
187 | } |
188 | |
189 | static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) { |
190 | bool CheckChildren = true; |
191 | switch (Die.getTag()) { |
192 | case dwarf::DW_TAG_subprogram: |
193 | // Don't look into functions within functions. |
194 | CheckChildren = Depth == 0; |
195 | break; |
196 | case dwarf::DW_TAG_inlined_subroutine: |
197 | return true; |
198 | default: |
199 | break; |
200 | } |
201 | if (!CheckChildren) |
202 | return false; |
203 | for (DWARFDie ChildDie : Die.children()) { |
204 | if (hasInlineInfo(Die: ChildDie, Depth: Depth + 1)) |
205 | return true; |
206 | } |
207 | return false; |
208 | } |
209 | |
210 | static AddressRanges |
211 | ConvertDWARFRanges(const DWARFAddressRangesVector &DwarfRanges) { |
212 | AddressRanges Ranges; |
213 | for (const DWARFAddressRange &DwarfRange : DwarfRanges) { |
214 | if (DwarfRange.LowPC < DwarfRange.HighPC) |
215 | Ranges.insert(Range: {DwarfRange.LowPC, DwarfRange.HighPC}); |
216 | } |
217 | return Ranges; |
218 | } |
219 | |
220 | static void parseInlineInfo(GsymCreator &Gsym, OutputAggregator &Out, |
221 | CUInfo &CUI, DWARFDie Die, uint32_t Depth, |
222 | FunctionInfo &FI, InlineInfo &Parent, |
223 | const AddressRanges &AllParentRanges, |
224 | bool &WarnIfEmpty) { |
225 | if (!hasInlineInfo(Die, Depth)) |
226 | return; |
227 | |
228 | dwarf::Tag Tag = Die.getTag(); |
229 | if (Tag == dwarf::DW_TAG_inlined_subroutine) { |
230 | // create new InlineInfo and append to parent.children |
231 | InlineInfo II; |
232 | AddressRanges AllInlineRanges; |
233 | Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges(); |
234 | if (RangesOrError) { |
235 | AllInlineRanges = ConvertDWARFRanges(DwarfRanges: RangesOrError.get()); |
236 | uint32_t EmptyCount = 0; |
237 | for (const AddressRange &InlineRange : AllInlineRanges) { |
238 | // Check for empty inline range in case inline function was outlined |
239 | // or has not code |
240 | if (InlineRange.empty()) { |
241 | ++EmptyCount; |
242 | } else { |
243 | if (Parent.Ranges.contains(Range: InlineRange)) { |
244 | II.Ranges.insert(Range: InlineRange); |
245 | } else { |
246 | // Only warn if the current inline range is not within any of all |
247 | // of the parent ranges. If we have a DW_TAG_subpgram with multiple |
248 | // ranges we will emit a FunctionInfo for each range of that |
249 | // function that only emits information within the current range, |
250 | // so we only want to emit an error if the DWARF has issues, not |
251 | // when a range currently just isn't in the range we are currently |
252 | // parsing for. |
253 | if (AllParentRanges.contains(Range: InlineRange)) { |
254 | WarnIfEmpty = false; |
255 | } else |
256 | Out.Report(s: "Function DIE has uncontained address range" , |
257 | detailCallback: [&](raw_ostream &OS) { |
258 | OS << "error: inlined function DIE at " |
259 | << HEX32(Die.getOffset()) << " has a range [" |
260 | << HEX64(InlineRange.start()) << " - " |
261 | << HEX64(InlineRange.end()) |
262 | << ") that isn't contained in " |
263 | << "any parent address ranges, this inline range " |
264 | "will be " |
265 | "removed.\n" ; |
266 | }); |
267 | } |
268 | } |
269 | } |
270 | // If we have all empty ranges for the inlines, then don't warn if we |
271 | // have an empty InlineInfo at the top level as all inline functions |
272 | // were elided. |
273 | if (EmptyCount == AllInlineRanges.size()) |
274 | WarnIfEmpty = false; |
275 | } |
276 | if (II.Ranges.empty()) |
277 | return; |
278 | |
279 | if (auto NameIndex = getQualifiedNameIndex(Die, Language: CUI.Language, Gsym)) |
280 | II.Name = *NameIndex; |
281 | const uint64_t DwarfFileIdx = dwarf::toUnsigned( |
282 | V: Die.findRecursively(Attrs: dwarf::DW_AT_call_file), UINT32_MAX); |
283 | std::optional<uint32_t> OptGSymFileIdx = |
284 | CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx); |
285 | if (OptGSymFileIdx) { |
286 | II.CallFile = OptGSymFileIdx.value(); |
287 | II.CallLine = dwarf::toUnsigned(V: Die.find(Attr: dwarf::DW_AT_call_line), Default: 0); |
288 | // parse all children and append to parent |
289 | for (DWARFDie ChildDie : Die.children()) |
290 | parseInlineInfo(Gsym, Out, CUI, Die: ChildDie, Depth: Depth + 1, FI, Parent&: II, |
291 | AllParentRanges: AllInlineRanges, WarnIfEmpty); |
292 | Parent.Children.emplace_back(args: std::move(II)); |
293 | } else |
294 | Out.Report( |
295 | s: "Inlined function die has invlaid file index in DW_AT_call_file" , |
296 | detailCallback: [&](raw_ostream &OS) { |
297 | OS << "error: inlined function DIE at " << HEX32(Die.getOffset()) |
298 | << " has an invalid file index " << DwarfFileIdx |
299 | << " in its DW_AT_call_file attribute, this inline entry and " |
300 | "all " |
301 | << "children will be removed.\n" ; |
302 | }); |
303 | return; |
304 | } |
305 | if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) { |
306 | // skip this Die and just recurse down |
307 | for (DWARFDie ChildDie : Die.children()) |
308 | parseInlineInfo(Gsym, Out, CUI, Die: ChildDie, Depth: Depth + 1, FI, Parent, |
309 | AllParentRanges, WarnIfEmpty); |
310 | } |
311 | } |
312 | |
313 | static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI, |
314 | DWARFDie Die, GsymCreator &Gsym, |
315 | FunctionInfo &FI) { |
316 | std::vector<uint32_t> RowVector; |
317 | const uint64_t StartAddress = FI.startAddress(); |
318 | const uint64_t EndAddress = FI.endAddress(); |
319 | const uint64_t RangeSize = EndAddress - StartAddress; |
320 | const object::SectionedAddress SecAddress{ |
321 | .Address: StartAddress, .SectionIndex: object::SectionedAddress::UndefSection}; |
322 | |
323 | |
324 | if (!CUI.LineTable->lookupAddressRange(Address: SecAddress, Size: RangeSize, Result&: RowVector)) { |
325 | // If we have a DW_TAG_subprogram but no line entries, fall back to using |
326 | // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes. |
327 | std::string FilePath = Die.getDeclFile( |
328 | Kind: DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath); |
329 | if (FilePath.empty()) { |
330 | // If we had a DW_AT_decl_file, but got no file then we need to emit a |
331 | // warning. |
332 | Out.Report(s: "Invalid file index in DW_AT_decl_file" , detailCallback: [&](raw_ostream &OS) { |
333 | const uint64_t DwarfFileIdx = dwarf::toUnsigned( |
334 | V: Die.findRecursively(Attrs: dwarf::DW_AT_decl_file), UINT32_MAX); |
335 | OS << "error: function DIE at " << HEX32(Die.getOffset()) |
336 | << " has an invalid file index " << DwarfFileIdx |
337 | << " in its DW_AT_decl_file attribute, unable to create a single " |
338 | << "line entry from the DW_AT_decl_file/DW_AT_decl_line " |
339 | << "attributes.\n" ; |
340 | }); |
341 | return; |
342 | } |
343 | if (auto Line = |
344 | dwarf::toUnsigned(V: Die.findRecursively(Attrs: {dwarf::DW_AT_decl_line}))) { |
345 | LineEntry LE(StartAddress, Gsym.insertFile(Path: FilePath), *Line); |
346 | FI.OptLineTable = LineTable(); |
347 | FI.OptLineTable->push(LE); |
348 | } |
349 | return; |
350 | } |
351 | |
352 | FI.OptLineTable = LineTable(); |
353 | DWARFDebugLine::Row PrevRow; |
354 | for (uint32_t RowIndex : RowVector) { |
355 | // Take file number and line/column from the row. |
356 | const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex]; |
357 | std::optional<uint32_t> OptFileIdx = |
358 | CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx: Row.File); |
359 | if (!OptFileIdx) { |
360 | Out.Report( |
361 | s: "Invalid file index in DWARF line table" , detailCallback: [&](raw_ostream &OS) { |
362 | OS << "error: function DIE at " << HEX32(Die.getOffset()) << " has " |
363 | << "a line entry with invalid DWARF file index, this entry will " |
364 | << "be removed:\n" ; |
365 | Row.dumpTableHeader(OS, /*Indent=*/0); |
366 | Row.dump(OS); |
367 | OS << "\n" ; |
368 | }); |
369 | continue; |
370 | } |
371 | const uint32_t FileIdx = OptFileIdx.value(); |
372 | uint64_t RowAddress = Row.Address.Address; |
373 | // Watch out for a RowAddress that is in the middle of a line table entry |
374 | // in the DWARF. If we pass an address in between two line table entries |
375 | // we will get a RowIndex for the previous valid line table row which won't |
376 | // be contained in our function. This is usually a bug in the DWARF due to |
377 | // linker problems or LTO or other DWARF re-linking so it is worth emitting |
378 | // an error, but not worth stopping the creation of the GSYM. |
379 | if (!FI.Range.contains(Addr: RowAddress)) { |
380 | if (RowAddress < FI.Range.start()) { |
381 | Out.Report(s: "Start address lies between valid Row table entries" , |
382 | detailCallback: [&](raw_ostream &OS) { |
383 | OS << "error: DIE has a start address whose LowPC is " |
384 | "between the " |
385 | "line table Row[" |
386 | << RowIndex << "] with address " << HEX64(RowAddress) |
387 | << " and the next one.\n" ; |
388 | Die.dump(OS, indent: 0, DumpOpts: DIDumpOptions::getForSingleDIE()); |
389 | }); |
390 | RowAddress = FI.Range.start(); |
391 | } else { |
392 | continue; |
393 | } |
394 | } |
395 | |
396 | LineEntry LE(RowAddress, FileIdx, Row.Line); |
397 | if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) { |
398 | // We have seen full duplicate line tables for functions in some |
399 | // DWARF files. Watch for those here by checking the last |
400 | // row was the function's end address (HighPC) and that the |
401 | // current line table entry's address is the same as the first |
402 | // line entry we already have in our "function_info.Lines". If |
403 | // so break out after printing a warning. |
404 | auto FirstLE = FI.OptLineTable->first(); |
405 | if (FirstLE && *FirstLE == LE) |
406 | // if (Log && !Gsym.isQuiet()) { TODO <-- This looks weird |
407 | Out.Report(s: "Duplicate line table detected" , detailCallback: [&](raw_ostream &OS) { |
408 | OS << "warning: duplicate line table detected for DIE:\n" ; |
409 | Die.dump(OS, indent: 0, DumpOpts: DIDumpOptions::getForSingleDIE()); |
410 | }); |
411 | else |
412 | Out.Report(s: "Non-monotonically increasing addresses" , |
413 | detailCallback: [&](raw_ostream &OS) { |
414 | OS << "error: line table has addresses that do not " |
415 | << "monotonically increase:\n" ; |
416 | for (uint32_t RowIndex2 : RowVector) |
417 | CUI.LineTable->Rows[RowIndex2].dump(OS); |
418 | Die.dump(OS, indent: 0, DumpOpts: DIDumpOptions::getForSingleDIE()); |
419 | }); |
420 | break; |
421 | } |
422 | |
423 | // Skip multiple line entries for the same file and line. |
424 | auto LastLE = FI.OptLineTable->last(); |
425 | if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line) |
426 | continue; |
427 | // Only push a row if it isn't an end sequence. End sequence markers are |
428 | // included for the last address in a function or the last contiguous |
429 | // address in a sequence. |
430 | if (Row.EndSequence) { |
431 | // End sequence means that the next line entry could have a lower address |
432 | // that the previous entries. So we clear the previous row so we don't |
433 | // trigger the line table error about address that do not monotonically |
434 | // increase. |
435 | PrevRow = DWARFDebugLine::Row(); |
436 | } else { |
437 | FI.OptLineTable->push(LE); |
438 | PrevRow = Row; |
439 | } |
440 | } |
441 | // If not line table rows were added, clear the line table so we don't encode |
442 | // on in the GSYM file. |
443 | if (FI.OptLineTable->empty()) |
444 | FI.OptLineTable = std::nullopt; |
445 | } |
446 | |
447 | void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI, |
448 | DWARFDie Die) { |
449 | switch (Die.getTag()) { |
450 | case dwarf::DW_TAG_subprogram: { |
451 | Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges(); |
452 | if (!RangesOrError) { |
453 | consumeError(Err: RangesOrError.takeError()); |
454 | break; |
455 | } |
456 | const DWARFAddressRangesVector &Ranges = RangesOrError.get(); |
457 | if (Ranges.empty()) |
458 | break; |
459 | auto NameIndex = getQualifiedNameIndex(Die, Language: CUI.Language, Gsym); |
460 | if (!NameIndex) { |
461 | Out.Report(s: "Function has no name" , detailCallback: [&](raw_ostream &OS) { |
462 | OS << "error: function at " << HEX64(Die.getOffset()) |
463 | << " has no name\n " ; |
464 | Die.dump(OS, indent: 0, DumpOpts: DIDumpOptions::getForSingleDIE()); |
465 | }); |
466 | break; |
467 | } |
468 | // All ranges for the subprogram DIE in case it has multiple. We need to |
469 | // pass this down into parseInlineInfo so we don't warn about inline |
470 | // ranges that are not in the current subrange of a function when they |
471 | // actually are in another subgrange. We do this because when a function |
472 | // has discontiguos ranges, we create multiple function entries with only |
473 | // the info for that range contained inside of it. |
474 | AddressRanges AllSubprogramRanges = ConvertDWARFRanges(DwarfRanges: Ranges); |
475 | |
476 | // Create a function_info for each range |
477 | for (const DWARFAddressRange &Range : Ranges) { |
478 | // The low PC must be less than the high PC. Many linkers don't remove |
479 | // DWARF for functions that don't get linked into the final executable. |
480 | // If both the high and low pc have relocations, linkers will often set |
481 | // the address values for both to the same value to indicate the function |
482 | // has been remove. Other linkers have been known to set the one or both |
483 | // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8 |
484 | // byte addresses to indicate the function isn't valid. The check below |
485 | // tries to watch for these cases and abort if it runs into them. |
486 | if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Addr: Range.LowPC)) |
487 | break; |
488 | |
489 | // Many linkers can't remove DWARF and might set the LowPC to zero. Since |
490 | // high PC can be an offset from the low PC in more recent DWARF versions |
491 | // we need to watch for a zero'ed low pc which we do using ValidTextRanges |
492 | // below. |
493 | if (!Gsym.IsValidTextAddress(Addr: Range.LowPC)) { |
494 | // We expect zero and -1 to be invalid addresses in DWARF depending |
495 | // on the linker of the DWARF. This indicates a function was stripped |
496 | // and the debug info wasn't able to be stripped from the DWARF. If |
497 | // the LowPC isn't zero or -1, then we should emit an error. |
498 | if (Range.LowPC != 0) { |
499 | if (!Gsym.isQuiet()) { |
500 | // Unexpected invalid address, emit a warning |
501 | Out.Report(s: "Address range starts outside executable section" , |
502 | detailCallback: [&](raw_ostream &OS) { |
503 | OS << "warning: DIE has an address range whose " |
504 | "start address " |
505 | "is not in any executable sections (" |
506 | << *Gsym.GetValidTextRanges() |
507 | << ") and will not be processed:\n" ; |
508 | Die.dump(OS, indent: 0, DumpOpts: DIDumpOptions::getForSingleDIE()); |
509 | }); |
510 | } |
511 | } |
512 | break; |
513 | } |
514 | |
515 | FunctionInfo FI; |
516 | FI.Range = {Range.LowPC, Range.HighPC}; |
517 | FI.Name = *NameIndex; |
518 | if (CUI.LineTable) |
519 | convertFunctionLineTable(Out, CUI, Die, Gsym, FI); |
520 | |
521 | if (hasInlineInfo(Die, Depth: 0)) { |
522 | FI.Inline = InlineInfo(); |
523 | FI.Inline->Name = *NameIndex; |
524 | FI.Inline->Ranges.insert(Range: FI.Range); |
525 | bool WarnIfEmpty = true; |
526 | parseInlineInfo(Gsym, Out, CUI, Die, Depth: 0, FI, Parent&: *FI.Inline, |
527 | AllParentRanges: AllSubprogramRanges, WarnIfEmpty); |
528 | // Make sure we at least got some valid inline info other than just |
529 | // the top level function. If we didn't then remove the inline info |
530 | // from the function info. We have seen cases where LTO tries to modify |
531 | // the DWARF for functions and it messes up the address ranges for |
532 | // the inline functions so it is no longer valid. |
533 | // |
534 | // By checking if there are any valid children on the top level inline |
535 | // information object, we will know if we got anything valid from the |
536 | // debug info. |
537 | if (FI.Inline->Children.empty()) { |
538 | if (WarnIfEmpty && !Gsym.isQuiet()) |
539 | Out.Report(s: "DIE contains inline functions with no valid ranges" , |
540 | detailCallback: [&](raw_ostream &OS) { |
541 | OS << "warning: DIE contains inline function " |
542 | "information that has no valid ranges, removing " |
543 | "inline information:\n" ; |
544 | Die.dump(OS, indent: 0, DumpOpts: DIDumpOptions::getForSingleDIE()); |
545 | }); |
546 | FI.Inline = std::nullopt; |
547 | } |
548 | } |
549 | Gsym.addFunctionInfo(FI: std::move(FI)); |
550 | } |
551 | } break; |
552 | default: |
553 | break; |
554 | } |
555 | for (DWARFDie ChildDie : Die.children()) |
556 | handleDie(Out, CUI, Die: ChildDie); |
557 | } |
558 | |
559 | Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) { |
560 | size_t NumBefore = Gsym.getNumFunctionInfos(); |
561 | auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie { |
562 | DWARFDie ReturnDie = DwarfUnit.getUnitDIE(ExtractUnitDIEOnly: false); |
563 | if (DwarfUnit.getDWOId()) { |
564 | DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(ExtractUnitDIEOnly: false).getDwarfUnit(); |
565 | if (!DWOCU->isDWOUnit()) |
566 | Out.Report( |
567 | s: "warning: Unable to retrieve DWO .debug_info section for some " |
568 | "object files. (Remove the --quiet flag for full output)" , |
569 | detailCallback: [&](raw_ostream &OS) { |
570 | std::string DWOName = dwarf::toString( |
571 | V: DwarfUnit.getUnitDIE().find( |
572 | Attrs: {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), |
573 | Default: "" ); |
574 | OS << "warning: Unable to retrieve DWO .debug_info section for " |
575 | << DWOName << "\n" ; |
576 | }); |
577 | else { |
578 | ReturnDie = DWOCU->getUnitDIE(ExtractUnitDIEOnly: false); |
579 | } |
580 | } |
581 | return ReturnDie; |
582 | }; |
583 | if (NumThreads == 1) { |
584 | // Parse all DWARF data from this thread, use the same string/file table |
585 | // for everything |
586 | for (const auto &CU : DICtx.compile_units()) { |
587 | DWARFDie Die = getDie(*CU); |
588 | CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(Val: CU.get())); |
589 | handleDie(Out, CUI, Die); |
590 | } |
591 | } else { |
592 | // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up |
593 | // front before we start accessing any DIEs since there might be |
594 | // cross compile unit references in the DWARF. If we don't do this we can |
595 | // end up crashing. |
596 | |
597 | // We need to call getAbbreviations sequentially first so that getUnitDIE() |
598 | // only works with its local data. |
599 | for (const auto &CU : DICtx.compile_units()) |
600 | CU->getAbbreviations(); |
601 | |
602 | // Now parse all DIEs in case we have cross compile unit references in a |
603 | // thread pool. |
604 | DefaultThreadPool pool(hardware_concurrency(ThreadCount: NumThreads)); |
605 | for (const auto &CU : DICtx.compile_units()) |
606 | pool.async(F: [&CU]() { CU->getUnitDIE(ExtractUnitDIEOnly: false /*CUDieOnly*/); }); |
607 | pool.wait(); |
608 | |
609 | // Now convert all DWARF to GSYM in a thread pool. |
610 | std::mutex LogMutex; |
611 | for (const auto &CU : DICtx.compile_units()) { |
612 | DWARFDie Die = getDie(*CU); |
613 | if (Die) { |
614 | CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(Val: CU.get())); |
615 | pool.async(F: [this, CUI, &LogMutex, &Out, Die]() mutable { |
616 | std::string storage; |
617 | raw_string_ostream StrStream(storage); |
618 | OutputAggregator ThreadOut(Out.GetOS() ? &StrStream : nullptr); |
619 | handleDie(Out&: ThreadOut, CUI, Die); |
620 | // Print ThreadLogStorage lines into an actual stream under a lock |
621 | std::lock_guard<std::mutex> guard(LogMutex); |
622 | if (Out.GetOS()) { |
623 | StrStream.flush(); |
624 | Out << storage; |
625 | } |
626 | Out.Merge(other: ThreadOut); |
627 | }); |
628 | } |
629 | } |
630 | pool.wait(); |
631 | } |
632 | size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore; |
633 | Out << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n" ; |
634 | return Error::success(); |
635 | } |
636 | |
637 | llvm::Error DwarfTransformer::verify(StringRef GsymPath, |
638 | OutputAggregator &Out) { |
639 | Out << "Verifying GSYM file \"" << GsymPath << "\":\n" ; |
640 | |
641 | auto Gsym = GsymReader::openFile(Path: GsymPath); |
642 | if (!Gsym) |
643 | return Gsym.takeError(); |
644 | |
645 | auto NumAddrs = Gsym->getNumAddresses(); |
646 | DILineInfoSpecifier DLIS( |
647 | DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, |
648 | DILineInfoSpecifier::FunctionNameKind::LinkageName); |
649 | std::string gsymFilename; |
650 | for (uint32_t I = 0; I < NumAddrs; ++I) { |
651 | auto FuncAddr = Gsym->getAddress(Index: I); |
652 | if (!FuncAddr) |
653 | return createStringError(EC: std::errc::invalid_argument, |
654 | Fmt: "failed to extract address[%i]" , Vals: I); |
655 | |
656 | auto FI = Gsym->getFunctionInfo(Addr: *FuncAddr); |
657 | if (!FI) |
658 | return createStringError( |
659 | EC: std::errc::invalid_argument, |
660 | Fmt: "failed to extract function info for address 0x%" PRIu64, Vals: *FuncAddr); |
661 | |
662 | for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) { |
663 | const object::SectionedAddress SectAddr{ |
664 | .Address: Addr, .SectionIndex: object::SectionedAddress::UndefSection}; |
665 | auto LR = Gsym->lookup(Addr); |
666 | if (!LR) |
667 | return LR.takeError(); |
668 | |
669 | auto DwarfInlineInfos = |
670 | DICtx.getInliningInfoForAddress(Address: SectAddr, Specifier: DLIS); |
671 | uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames(); |
672 | if (NumDwarfInlineInfos == 0) { |
673 | DwarfInlineInfos.addFrame( |
674 | Frame: DICtx.getLineInfoForAddress(Address: SectAddr, Specifier: DLIS)); |
675 | } |
676 | |
677 | // Check for 1 entry that has no file and line info |
678 | if (NumDwarfInlineInfos == 1 && |
679 | DwarfInlineInfos.getFrame(Index: 0).FileName == "<invalid>" ) { |
680 | DwarfInlineInfos = DIInliningInfo(); |
681 | NumDwarfInlineInfos = 0; |
682 | } |
683 | if (NumDwarfInlineInfos > 0 && |
684 | NumDwarfInlineInfos != LR->Locations.size()) { |
685 | if (Out.GetOS()) { |
686 | raw_ostream &Log = *Out.GetOS(); |
687 | Log << "error: address " << HEX64(Addr) << " has " |
688 | << NumDwarfInlineInfos << " DWARF inline frames and GSYM has " |
689 | << LR->Locations.size() << "\n" ; |
690 | Log << " " << NumDwarfInlineInfos << " DWARF frames:\n" ; |
691 | for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) { |
692 | const auto &dii = DwarfInlineInfos.getFrame(Index: Idx); |
693 | Log << " [" << Idx << "]: " << dii.FunctionName << " @ " |
694 | << dii.FileName << ':' << dii.Line << '\n'; |
695 | } |
696 | Log << " " << LR->Locations.size() << " GSYM frames:\n" ; |
697 | for (size_t Idx = 0, count = LR->Locations.size(); Idx < count; |
698 | ++Idx) { |
699 | const auto &gii = LR->Locations[Idx]; |
700 | Log << " [" << Idx << "]: " << gii.Name << " @ " << gii.Dir |
701 | << '/' << gii.Base << ':' << gii.Line << '\n'; |
702 | } |
703 | DwarfInlineInfos = DICtx.getInliningInfoForAddress(Address: SectAddr, Specifier: DLIS); |
704 | Gsym->dump(OS&: Log, FI: *FI); |
705 | } |
706 | continue; |
707 | } |
708 | |
709 | for (size_t Idx = 0, count = LR->Locations.size(); Idx < count; |
710 | ++Idx) { |
711 | const auto &gii = LR->Locations[Idx]; |
712 | if (Idx < NumDwarfInlineInfos) { |
713 | const auto &dii = DwarfInlineInfos.getFrame(Index: Idx); |
714 | gsymFilename = LR->getSourceFile(Index: Idx); |
715 | // Verify function name |
716 | if (dii.FunctionName.find(str: gii.Name.str()) != 0) |
717 | Out << "error: address " << HEX64(Addr) << " DWARF function \"" |
718 | << dii.FunctionName.c_str() |
719 | << "\" doesn't match GSYM function \"" << gii.Name << "\"\n" ; |
720 | |
721 | // Verify source file path |
722 | if (dii.FileName != gsymFilename) |
723 | Out << "error: address " << HEX64(Addr) << " DWARF path \"" |
724 | << dii.FileName.c_str() << "\" doesn't match GSYM path \"" |
725 | << gsymFilename.c_str() << "\"\n" ; |
726 | // Verify source file line |
727 | if (dii.Line != gii.Line) |
728 | Out << "error: address " << HEX64(Addr) << " DWARF line " |
729 | << dii.Line << " != GSYM line " << gii.Line << "\n" ; |
730 | } |
731 | } |
732 | } |
733 | } |
734 | return Error::success(); |
735 | } |
736 | |