1 | //===-- LVBinaryReader.cpp ------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This implements the LVBinaryReader class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h" |
14 | #include "llvm/Support/Errc.h" |
15 | #include "llvm/Support/FormatAdapters.h" |
16 | #include "llvm/Support/FormatVariadic.h" |
17 | |
18 | using namespace llvm; |
19 | using namespace llvm::logicalview; |
20 | |
21 | #define DEBUG_TYPE "BinaryReader" |
22 | |
23 | // Function names extracted from the object symbol table. |
24 | void LVSymbolTable::add(StringRef Name, LVScope *Function, |
25 | LVSectionIndex SectionIndex) { |
26 | std::string SymbolName(Name); |
27 | if (SymbolNames.find(x: SymbolName) == SymbolNames.end()) { |
28 | SymbolNames.emplace( |
29 | args: std::piecewise_construct, args: std::forward_as_tuple(args&: SymbolName), |
30 | args: std::forward_as_tuple(args&: Function, args: 0, args&: SectionIndex, args: false)); |
31 | } else { |
32 | // Update a recorded entry with its logical scope and section index. |
33 | SymbolNames[SymbolName].Scope = Function; |
34 | if (SectionIndex) |
35 | SymbolNames[SymbolName].SectionIndex = SectionIndex; |
36 | } |
37 | |
38 | if (Function && SymbolNames[SymbolName].IsComdat) |
39 | Function->setIsComdat(); |
40 | |
41 | LLVM_DEBUG({ print(dbgs()); }); |
42 | } |
43 | |
44 | void LVSymbolTable::add(StringRef Name, LVAddress Address, |
45 | LVSectionIndex SectionIndex, bool IsComdat) { |
46 | std::string SymbolName(Name); |
47 | if (SymbolNames.find(x: SymbolName) == SymbolNames.end()) |
48 | SymbolNames.emplace( |
49 | args: std::piecewise_construct, args: std::forward_as_tuple(args&: SymbolName), |
50 | args: std::forward_as_tuple(args: nullptr, args&: Address, args&: SectionIndex, args&: IsComdat)); |
51 | else |
52 | // Update a recorded symbol name with its logical scope. |
53 | SymbolNames[SymbolName].Address = Address; |
54 | |
55 | LVScope *Function = SymbolNames[SymbolName].Scope; |
56 | if (Function && IsComdat) |
57 | Function->setIsComdat(); |
58 | LLVM_DEBUG({ print(dbgs()); }); |
59 | } |
60 | |
61 | LVSectionIndex LVSymbolTable::update(LVScope *Function) { |
62 | LVSectionIndex SectionIndex = getReader().getDotTextSectionIndex(); |
63 | StringRef Name = Function->getLinkageName(); |
64 | if (Name.empty()) |
65 | Name = Function->getName(); |
66 | std::string SymbolName(Name); |
67 | |
68 | if (SymbolName.empty() || (SymbolNames.find(x: SymbolName) == SymbolNames.end())) |
69 | return SectionIndex; |
70 | |
71 | // Update a recorded entry with its logical scope, only if the scope has |
72 | // ranges. That is the case when in DWARF there are 2 DIEs connected via |
73 | // the DW_AT_specification. |
74 | if (Function->getHasRanges()) { |
75 | SymbolNames[SymbolName].Scope = Function; |
76 | SectionIndex = SymbolNames[SymbolName].SectionIndex; |
77 | } else { |
78 | SectionIndex = UndefinedSectionIndex; |
79 | } |
80 | |
81 | if (SymbolNames[SymbolName].IsComdat) |
82 | Function->setIsComdat(); |
83 | |
84 | LLVM_DEBUG({ print(dbgs()); }); |
85 | return SectionIndex; |
86 | } |
87 | |
88 | const LVSymbolTableEntry &LVSymbolTable::getEntry(StringRef Name) { |
89 | static LVSymbolTableEntry Empty = LVSymbolTableEntry(); |
90 | LVSymbolNames::iterator Iter = SymbolNames.find(x: std::string(Name)); |
91 | return Iter != SymbolNames.end() ? Iter->second : Empty; |
92 | } |
93 | LVAddress LVSymbolTable::getAddress(StringRef Name) { |
94 | LVSymbolNames::iterator Iter = SymbolNames.find(x: std::string(Name)); |
95 | return Iter != SymbolNames.end() ? Iter->second.Address : 0; |
96 | } |
97 | LVSectionIndex LVSymbolTable::getIndex(StringRef Name) { |
98 | LVSymbolNames::iterator Iter = SymbolNames.find(x: std::string(Name)); |
99 | return Iter != SymbolNames.end() ? Iter->second.SectionIndex |
100 | : getReader().getDotTextSectionIndex(); |
101 | } |
102 | bool LVSymbolTable::getIsComdat(StringRef Name) { |
103 | LVSymbolNames::iterator Iter = SymbolNames.find(x: std::string(Name)); |
104 | return Iter != SymbolNames.end() ? Iter->second.IsComdat : false; |
105 | } |
106 | |
107 | void LVSymbolTable::print(raw_ostream &OS) { |
108 | OS << "Symbol Table\n" ; |
109 | for (LVSymbolNames::reference Entry : SymbolNames) { |
110 | LVSymbolTableEntry &SymbolName = Entry.second; |
111 | LVScope *Scope = SymbolName.Scope; |
112 | LVOffset Offset = Scope ? Scope->getOffset() : 0; |
113 | OS << "Index: " << hexValue(N: SymbolName.SectionIndex, Width: 5) |
114 | << " Comdat: " << (SymbolName.IsComdat ? "Y" : "N" ) |
115 | << " Scope: " << hexValue(N: Offset) |
116 | << " Address: " << hexValue(N: SymbolName.Address) |
117 | << " Name: " << Entry.first << "\n" ; |
118 | } |
119 | } |
120 | |
121 | void LVBinaryReader::addToSymbolTable(StringRef Name, LVScope *Function, |
122 | LVSectionIndex SectionIndex) { |
123 | SymbolTable.add(Name, Function, SectionIndex); |
124 | } |
125 | void LVBinaryReader::addToSymbolTable(StringRef Name, LVAddress Address, |
126 | LVSectionIndex SectionIndex, |
127 | bool IsComdat) { |
128 | SymbolTable.add(Name, Address, SectionIndex, IsComdat); |
129 | } |
130 | LVSectionIndex LVBinaryReader::updateSymbolTable(LVScope *Function) { |
131 | return SymbolTable.update(Function); |
132 | } |
133 | |
134 | const LVSymbolTableEntry &LVBinaryReader::getSymbolTableEntry(StringRef Name) { |
135 | return SymbolTable.getEntry(Name); |
136 | } |
137 | LVAddress LVBinaryReader::getSymbolTableAddress(StringRef Name) { |
138 | return SymbolTable.getAddress(Name); |
139 | } |
140 | LVSectionIndex LVBinaryReader::getSymbolTableIndex(StringRef Name) { |
141 | return SymbolTable.getIndex(Name); |
142 | } |
143 | bool LVBinaryReader::getSymbolTableIsComdat(StringRef Name) { |
144 | return SymbolTable.getIsComdat(Name); |
145 | } |
146 | |
147 | void LVBinaryReader::mapVirtualAddress(const object::ObjectFile &Obj) { |
148 | for (const object::SectionRef &Section : Obj.sections()) { |
149 | LLVM_DEBUG({ |
150 | Expected<StringRef> SectionNameOrErr = Section.getName(); |
151 | StringRef Name; |
152 | if (!SectionNameOrErr) |
153 | consumeError(SectionNameOrErr.takeError()); |
154 | else |
155 | Name = *SectionNameOrErr; |
156 | dbgs() << "Index: " << format_decimal(Section.getIndex(), 3) << ", " |
157 | << "Address: " << hexValue(Section.getAddress()) << ", " |
158 | << "Size: " << hexValue(Section.getSize()) << ", " |
159 | << "Name: " << Name << "\n" ; |
160 | dbgs() << "isCompressed: " << Section.isCompressed() << ", " |
161 | << "isText: " << Section.isText() << ", " |
162 | << "isData: " << Section.isData() << ", " |
163 | << "isBSS: " << Section.isBSS() << ", " |
164 | << "isVirtual: " << Section.isVirtual() << "\n" ; |
165 | dbgs() << "isBitcode: " << Section.isBitcode() << ", " |
166 | << "isStripped: " << Section.isStripped() << ", " |
167 | << "isBerkeleyText: " << Section.isBerkeleyText() << ", " |
168 | << "isBerkeleyData: " << Section.isBerkeleyData() << ", " |
169 | << "isDebugSection: " << Section.isDebugSection() << "\n" ; |
170 | dbgs() << "\n" ; |
171 | }); |
172 | |
173 | if (!Section.isText() || Section.isVirtual() || !Section.getSize()) |
174 | continue; |
175 | |
176 | // Record section information required for symbol resolution. |
177 | // Note: The section index returned by 'getIndex()' is one based. |
178 | Sections.emplace(args: Section.getIndex(), args: Section); |
179 | addSectionAddress(Section); |
180 | |
181 | // Identify the ".text" section. |
182 | Expected<StringRef> SectionNameOrErr = Section.getName(); |
183 | if (!SectionNameOrErr) { |
184 | consumeError(Err: SectionNameOrErr.takeError()); |
185 | continue; |
186 | } |
187 | if (*SectionNameOrErr == ".text" || *SectionNameOrErr == "CODE" || |
188 | *SectionNameOrErr == ".code" ) { |
189 | DotTextSectionIndex = Section.getIndex(); |
190 | // If the object is WebAssembly, update the address offset that |
191 | // will be added to DWARF DW_AT_* attributes. |
192 | if (Obj.isWasm()) |
193 | WasmCodeSectionOffset = Section.getAddress(); |
194 | } |
195 | } |
196 | |
197 | // Process the symbol table. |
198 | mapRangeAddress(Obj); |
199 | |
200 | LLVM_DEBUG({ |
201 | dbgs() << "\nSections Information:\n" ; |
202 | for (LVSections::reference Entry : Sections) { |
203 | LVSectionIndex SectionIndex = Entry.first; |
204 | const object::SectionRef Section = Entry.second; |
205 | Expected<StringRef> SectionNameOrErr = Section.getName(); |
206 | if (!SectionNameOrErr) |
207 | consumeError(SectionNameOrErr.takeError()); |
208 | dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3) |
209 | << " Name: " << *SectionNameOrErr << "\n" |
210 | << "Size: " << hexValue(Section.getSize()) << "\n" |
211 | << "VirtualAddress: " << hexValue(VirtualAddress) << "\n" |
212 | << "SectionAddress: " << hexValue(Section.getAddress()) << "\n" ; |
213 | } |
214 | dbgs() << "\nObject Section Information:\n" ; |
215 | for (LVSectionAddresses::const_reference Entry : SectionAddresses) |
216 | dbgs() << "[" << hexValue(Entry.first) << ":" |
217 | << hexValue(Entry.first + Entry.second.getSize()) |
218 | << "] Size: " << hexValue(Entry.second.getSize()) << "\n" ; |
219 | }); |
220 | } |
221 | |
222 | void LVBinaryReader::mapVirtualAddress(const object::COFFObjectFile &COFFObj) { |
223 | ErrorOr<uint64_t> ImageBase = COFFObj.getImageBase(); |
224 | if (ImageBase) |
225 | ImageBaseAddress = ImageBase.get(); |
226 | |
227 | LLVM_DEBUG({ |
228 | dbgs() << "ImageBaseAddress: " << hexValue(ImageBaseAddress) << "\n" ; |
229 | }); |
230 | |
231 | uint32_t Flags = COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_LNK_COMDAT; |
232 | |
233 | for (const object::SectionRef &Section : COFFObj.sections()) { |
234 | if (!Section.isText() || Section.isVirtual() || !Section.getSize()) |
235 | continue; |
236 | |
237 | const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section); |
238 | VirtualAddress = COFFSection->VirtualAddress; |
239 | bool IsComdat = (COFFSection->Characteristics & Flags) == Flags; |
240 | |
241 | // Record section information required for symbol resolution. |
242 | // Note: The section index returned by 'getIndex()' is zero based. |
243 | Sections.emplace(args: Section.getIndex() + 1, args: Section); |
244 | addSectionAddress(Section); |
245 | |
246 | // Additional initialization on the specific object format. |
247 | mapRangeAddress(Obj: COFFObj, Section, IsComdat); |
248 | } |
249 | |
250 | LLVM_DEBUG({ |
251 | dbgs() << "\nSections Information:\n" ; |
252 | for (LVSections::reference Entry : Sections) { |
253 | LVSectionIndex SectionIndex = Entry.first; |
254 | const object::SectionRef Section = Entry.second; |
255 | const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section); |
256 | Expected<StringRef> SectionNameOrErr = Section.getName(); |
257 | if (!SectionNameOrErr) |
258 | consumeError(SectionNameOrErr.takeError()); |
259 | dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3) |
260 | << " Name: " << *SectionNameOrErr << "\n" |
261 | << "Size: " << hexValue(Section.getSize()) << "\n" |
262 | << "VirtualAddress: " << hexValue(VirtualAddress) << "\n" |
263 | << "SectionAddress: " << hexValue(Section.getAddress()) << "\n" |
264 | << "PointerToRawData: " << hexValue(COFFSection->PointerToRawData) |
265 | << "\n" |
266 | << "SizeOfRawData: " << hexValue(COFFSection->SizeOfRawData) |
267 | << "\n" ; |
268 | } |
269 | dbgs() << "\nObject Section Information:\n" ; |
270 | for (LVSectionAddresses::const_reference Entry : SectionAddresses) |
271 | dbgs() << "[" << hexValue(Entry.first) << ":" |
272 | << hexValue(Entry.first + Entry.second.getSize()) |
273 | << "] Size: " << hexValue(Entry.second.getSize()) << "\n" ; |
274 | }); |
275 | } |
276 | |
277 | Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple, |
278 | StringRef TheFeatures) { |
279 | std::string TargetLookupError; |
280 | const Target *TheTarget = |
281 | TargetRegistry::lookupTarget(Triple: std::string(TheTriple), Error&: TargetLookupError); |
282 | if (!TheTarget) |
283 | return createStringError(EC: errc::invalid_argument, S: TargetLookupError.c_str()); |
284 | |
285 | // Register information. |
286 | MCRegisterInfo *RegisterInfo = TheTarget->createMCRegInfo(TT: TheTriple); |
287 | if (!RegisterInfo) |
288 | return createStringError(EC: errc::invalid_argument, |
289 | S: "no register info for target " + TheTriple); |
290 | MRI.reset(p: RegisterInfo); |
291 | |
292 | // Assembler properties and features. |
293 | MCTargetOptions MCOptions; |
294 | MCAsmInfo *AsmInfo(TheTarget->createMCAsmInfo(MRI: *MRI, TheTriple, Options: MCOptions)); |
295 | if (!AsmInfo) |
296 | return createStringError(EC: errc::invalid_argument, |
297 | S: "no assembly info for target " + TheTriple); |
298 | MAI.reset(p: AsmInfo); |
299 | |
300 | // Target subtargets. |
301 | StringRef CPU; |
302 | MCSubtargetInfo *SubtargetInfo( |
303 | TheTarget->createMCSubtargetInfo(TheTriple, CPU, Features: TheFeatures)); |
304 | if (!SubtargetInfo) |
305 | return createStringError(EC: errc::invalid_argument, |
306 | S: "no subtarget info for target " + TheTriple); |
307 | STI.reset(p: SubtargetInfo); |
308 | |
309 | // Instructions Info. |
310 | MCInstrInfo *InstructionInfo(TheTarget->createMCInstrInfo()); |
311 | if (!InstructionInfo) |
312 | return createStringError(EC: errc::invalid_argument, |
313 | S: "no instruction info for target " + TheTriple); |
314 | MII.reset(p: InstructionInfo); |
315 | |
316 | MC = std::make_unique<MCContext>(args: Triple(TheTriple), args: MAI.get(), args: MRI.get(), |
317 | args: STI.get()); |
318 | |
319 | // Assembler. |
320 | MCDisassembler *DisAsm(TheTarget->createMCDisassembler(STI: *STI, Ctx&: *MC)); |
321 | if (!DisAsm) |
322 | return createStringError(EC: errc::invalid_argument, |
323 | S: "no disassembler for target " + TheTriple); |
324 | MD.reset(p: DisAsm); |
325 | |
326 | MCInstPrinter *InstructionPrinter(TheTarget->createMCInstPrinter( |
327 | T: Triple(TheTriple), SyntaxVariant: AsmInfo->getAssemblerDialect(), MAI: *MAI, MII: *MII, MRI: *MRI)); |
328 | if (!InstructionPrinter) |
329 | return createStringError(EC: errc::invalid_argument, |
330 | S: "no target assembly language printer for target " + |
331 | TheTriple); |
332 | MIP.reset(p: InstructionPrinter); |
333 | InstructionPrinter->setPrintImmHex(true); |
334 | |
335 | return Error::success(); |
336 | } |
337 | |
338 | Expected<std::pair<uint64_t, object::SectionRef>> |
339 | LVBinaryReader::getSection(LVScope *Scope, LVAddress Address, |
340 | LVSectionIndex SectionIndex) { |
341 | // Return the 'text' section with the code for this logical scope. |
342 | // COFF: SectionIndex is zero. Use 'SectionAddresses' data. |
343 | // ELF: SectionIndex is the section index in the file. |
344 | if (SectionIndex) { |
345 | LVSections::iterator Iter = Sections.find(x: SectionIndex); |
346 | if (Iter == Sections.end()) { |
347 | return createStringError(EC: errc::invalid_argument, |
348 | Fmt: "invalid section index for: '%s'" , |
349 | Vals: Scope->getName().str().c_str()); |
350 | } |
351 | const object::SectionRef Section = Iter->second; |
352 | return std::make_pair(x: Section.getAddress(), y: Section); |
353 | } |
354 | |
355 | // Ensure a valid starting address for the public names. |
356 | LVSectionAddresses::const_iterator Iter = |
357 | SectionAddresses.upper_bound(x: Address); |
358 | if (Iter == SectionAddresses.begin()) |
359 | return createStringError(EC: errc::invalid_argument, |
360 | Fmt: "invalid section address for: '%s'" , |
361 | Vals: Scope->getName().str().c_str()); |
362 | |
363 | // Get section that contains the code for this function. |
364 | Iter = SectionAddresses.lower_bound(x: Address); |
365 | if (Iter != SectionAddresses.begin()) |
366 | --Iter; |
367 | return std::make_pair(x: Iter->first, y: Iter->second); |
368 | } |
369 | |
370 | void LVBinaryReader::addSectionRange(LVSectionIndex SectionIndex, |
371 | LVScope *Scope) { |
372 | LVRange *ScopesWithRanges = getSectionRanges(SectionIndex); |
373 | ScopesWithRanges->addEntry(Scope); |
374 | } |
375 | |
376 | void LVBinaryReader::addSectionRange(LVSectionIndex SectionIndex, |
377 | LVScope *Scope, LVAddress LowerAddress, |
378 | LVAddress UpperAddress) { |
379 | LVRange *ScopesWithRanges = getSectionRanges(SectionIndex); |
380 | ScopesWithRanges->addEntry(Scope, LowerAddress, UpperAddress); |
381 | } |
382 | |
383 | LVRange *LVBinaryReader::getSectionRanges(LVSectionIndex SectionIndex) { |
384 | // Check if we already have a mapping for this section index. |
385 | LVSectionRanges::iterator IterSection = SectionRanges.find(x: SectionIndex); |
386 | if (IterSection == SectionRanges.end()) |
387 | IterSection = |
388 | SectionRanges.emplace(args&: SectionIndex, args: std::make_unique<LVRange>()).first; |
389 | LVRange *Range = IterSection->second.get(); |
390 | assert(Range && "Range is null." ); |
391 | return Range; |
392 | } |
393 | |
394 | Error LVBinaryReader::createInstructions(LVScope *Scope, |
395 | LVSectionIndex SectionIndex, |
396 | const LVNameInfo &NameInfo) { |
397 | assert(Scope && "Scope is null." ); |
398 | |
399 | // Skip stripped functions. |
400 | if (Scope->getIsDiscarded()) |
401 | return Error::success(); |
402 | |
403 | // Find associated address and size for the given function entry point. |
404 | LVAddress Address = NameInfo.first; |
405 | uint64_t Size = NameInfo.second; |
406 | |
407 | LLVM_DEBUG({ |
408 | dbgs() << "\nPublic Name instructions: '" << Scope->getName() << "' / '" |
409 | << Scope->getLinkageName() << "'\n" |
410 | << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: [" |
411 | << hexValue(Address) << ":" << hexValue(Address + Size) << "]\n" ; |
412 | }); |
413 | |
414 | Expected<std::pair<uint64_t, const object::SectionRef>> SectionOrErr = |
415 | getSection(Scope, Address, SectionIndex); |
416 | if (!SectionOrErr) |
417 | return SectionOrErr.takeError(); |
418 | const object::SectionRef Section = (*SectionOrErr).second; |
419 | uint64_t SectionAddress = (*SectionOrErr).first; |
420 | |
421 | Expected<StringRef> SectionContentsOrErr = Section.getContents(); |
422 | if (!SectionContentsOrErr) |
423 | return SectionOrErr.takeError(); |
424 | |
425 | // There are cases where the section size is smaller than the [LowPC,HighPC] |
426 | // range; it causes us to decode invalid addresses. The recorded size in the |
427 | // logical scope is one less than the real size. |
428 | LLVM_DEBUG({ |
429 | dbgs() << " Size: " << hexValue(Size) |
430 | << ", Section Size: " << hexValue(Section.getSize()) << "\n" ; |
431 | }); |
432 | Size = std::min(a: Size + 1, b: Section.getSize()); |
433 | |
434 | ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(Input: *SectionContentsOrErr); |
435 | uint64_t Offset = Address - SectionAddress; |
436 | uint8_t const *Begin = Bytes.data() + Offset; |
437 | uint8_t const *End = Bytes.data() + Offset + Size; |
438 | |
439 | LLVM_DEBUG({ |
440 | Expected<StringRef> SectionNameOrErr = Section.getName(); |
441 | if (!SectionNameOrErr) |
442 | consumeError(SectionNameOrErr.takeError()); |
443 | else |
444 | dbgs() << "Section Index: " << hexValue(Section.getIndex()) << " [" |
445 | << hexValue((uint64_t)Section.getAddress()) << ":" |
446 | << hexValue((uint64_t)Section.getAddress() + Section.getSize(), 10) |
447 | << "] Name: '" << *SectionNameOrErr << "'\n" |
448 | << "Begin: " << hexValue((uint64_t)Begin) |
449 | << ", End: " << hexValue((uint64_t)End) << "\n" ; |
450 | }); |
451 | |
452 | // Address for first instruction line. |
453 | LVAddress FirstAddress = Address; |
454 | auto InstructionsSP = std::make_unique<LVLines>(); |
455 | LVLines &Instructions = *InstructionsSP; |
456 | DiscoveredLines.emplace_back(args: std::move(InstructionsSP)); |
457 | |
458 | while (Begin < End) { |
459 | MCInst Instruction; |
460 | uint64_t BytesConsumed = 0; |
461 | SmallVector<char, 64> InsnStr; |
462 | raw_svector_ostream Annotations(InsnStr); |
463 | MCDisassembler::DecodeStatus const S = |
464 | MD->getInstruction(Instr&: Instruction, Size&: BytesConsumed, |
465 | Bytes: ArrayRef<uint8_t>(Begin, End), Address, CStream&: outs()); |
466 | switch (S) { |
467 | case MCDisassembler::Fail: |
468 | LLVM_DEBUG({ dbgs() << "Invalid instruction\n" ; }); |
469 | if (BytesConsumed == 0) |
470 | // Skip invalid bytes |
471 | BytesConsumed = 1; |
472 | break; |
473 | case MCDisassembler::SoftFail: |
474 | LLVM_DEBUG({ dbgs() << "Potentially undefined instruction:" ; }); |
475 | [[fallthrough]]; |
476 | case MCDisassembler::Success: { |
477 | std::string Buffer; |
478 | raw_string_ostream Stream(Buffer); |
479 | StringRef AnnotationsStr = Annotations.str(); |
480 | MIP->printInst(MI: &Instruction, Address, Annot: AnnotationsStr, STI: *STI, OS&: Stream); |
481 | LLVM_DEBUG({ |
482 | std::string BufferCodes; |
483 | raw_string_ostream StreamCodes(BufferCodes); |
484 | StreamCodes << format_bytes( |
485 | ArrayRef<uint8_t>(Begin, Begin + BytesConsumed), std::nullopt, 16, |
486 | 16); |
487 | dbgs() << "[" << hexValue((uint64_t)Begin) << "] " |
488 | << "Size: " << format_decimal(BytesConsumed, 2) << " (" |
489 | << formatv("{0}" , |
490 | fmt_align(StreamCodes.str(), AlignStyle::Left, 32)) |
491 | << ") " << hexValue((uint64_t)Address) << ": " << Stream.str() |
492 | << "\n" ; |
493 | }); |
494 | // Here we add logical lines to the Instructions. Later on, |
495 | // the 'processLines()' function will move each created logical line |
496 | // to its enclosing logical scope, using the debug ranges information |
497 | // and they will be released when its scope parent is deleted. |
498 | LVLineAssembler *Line = createLineAssembler(); |
499 | Line->setAddress(Address); |
500 | Line->setName(StringRef(Stream.str()).trim()); |
501 | Instructions.push_back(Elt: Line); |
502 | break; |
503 | } |
504 | } |
505 | Address += BytesConsumed; |
506 | Begin += BytesConsumed; |
507 | } |
508 | |
509 | LLVM_DEBUG({ |
510 | size_t Index = 0; |
511 | dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3) |
512 | << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n" |
513 | << "Address: " << hexValue(FirstAddress) |
514 | << format(" - Collected instructions lines: %d\n" , |
515 | Instructions.size()); |
516 | for (const LVLine *Line : Instructions) |
517 | dbgs() << format_decimal(++Index, 5) << ": " |
518 | << hexValue(Line->getOffset()) << ", (" << Line->getName() |
519 | << ")\n" ; |
520 | }); |
521 | |
522 | // The scope in the assembler names is linked to its own instructions. |
523 | ScopeInstructions.add(FirstKey: SectionIndex, SecondKey: Scope, Value: &Instructions); |
524 | AssemblerMappings.add(FirstKey: SectionIndex, SecondKey: FirstAddress, Value: Scope); |
525 | |
526 | return Error::success(); |
527 | } |
528 | |
529 | Error LVBinaryReader::createInstructions(LVScope *Function, |
530 | LVSectionIndex SectionIndex) { |
531 | if (!options().getPrintInstructions()) |
532 | return Error::success(); |
533 | |
534 | LVNameInfo Name = CompileUnit->findPublicName(Scope: Function); |
535 | if (Name.first != LVAddress(UINT64_MAX)) |
536 | return createInstructions(Scope: Function, SectionIndex, NameInfo: Name); |
537 | |
538 | return Error::success(); |
539 | } |
540 | |
541 | Error LVBinaryReader::createInstructions() { |
542 | if (!options().getPrintInstructions()) |
543 | return Error::success(); |
544 | |
545 | LLVM_DEBUG({ |
546 | size_t Index = 1; |
547 | dbgs() << "\nPublic Names (Scope):\n" ; |
548 | for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) { |
549 | LVScope *Scope = Name.first; |
550 | const LVNameInfo &NameInfo = Name.second; |
551 | LVAddress Address = NameInfo.first; |
552 | uint64_t Size = NameInfo.second; |
553 | dbgs() << format_decimal(Index++, 5) << ": " |
554 | << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: [" |
555 | << hexValue(Address) << ":" << hexValue(Address + Size) << "] " |
556 | << "Name: '" << Scope->getName() << "' / '" |
557 | << Scope->getLinkageName() << "'\n" ; |
558 | } |
559 | }); |
560 | |
561 | // For each public name in the current compile unit, create the line |
562 | // records that represent the executable instructions. |
563 | for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) { |
564 | LVScope *Scope = Name.first; |
565 | // The symbol table extracted from the object file always contains a |
566 | // non-empty name (linkage name). However, the logical scope does not |
567 | // guarantee to have a name for the linkage name (main is one case). |
568 | // For those cases, set the linkage name the same as the name. |
569 | if (!Scope->getLinkageNameIndex()) |
570 | Scope->setLinkageName(Scope->getName()); |
571 | LVSectionIndex SectionIndex = getSymbolTableIndex(Name: Scope->getLinkageName()); |
572 | if (Error Err = createInstructions(Scope, SectionIndex, NameInfo: Name.second)) |
573 | return Err; |
574 | } |
575 | |
576 | return Error::success(); |
577 | } |
578 | |
579 | // During the traversal of the debug information sections, we created the |
580 | // logical lines representing the disassembled instructions from the text |
581 | // section and the logical lines representing the line records from the |
582 | // debug line section. Using the ranges associated with the logical scopes, |
583 | // we will allocate those logical lines to their logical scopes. |
584 | void LVBinaryReader::processLines(LVLines *DebugLines, |
585 | LVSectionIndex SectionIndex, |
586 | LVScope *Function) { |
587 | assert(DebugLines && "DebugLines is null." ); |
588 | |
589 | // Just return if this compilation unit does not have any line records |
590 | // and no instruction lines were created. |
591 | if (DebugLines->empty() && !options().getPrintInstructions()) |
592 | return; |
593 | |
594 | // Merge the debug lines and instruction lines using their text address; |
595 | // the logical line representing the debug line record is followed by the |
596 | // line(s) representing the disassembled instructions, whose addresses are |
597 | // equal or greater that the line address and less than the address of the |
598 | // next debug line record. |
599 | LLVM_DEBUG({ |
600 | size_t Index = 1; |
601 | size_t PerLine = 4; |
602 | dbgs() << format("\nProcess debug lines: %d\n" , DebugLines->size()); |
603 | for (const LVLine *Line : *DebugLines) { |
604 | dbgs() << format_decimal(Index, 5) << ": " << hexValue(Line->getOffset()) |
605 | << ", (" << Line->getLineNumber() << ")" |
606 | << ((Index % PerLine) ? " " : "\n" ); |
607 | ++Index; |
608 | } |
609 | dbgs() << ((Index % PerLine) ? "\n" : "" ); |
610 | }); |
611 | |
612 | bool TraverseLines = true; |
613 | LVLines::iterator Iter = DebugLines->begin(); |
614 | while (TraverseLines && Iter != DebugLines->end()) { |
615 | uint64_t DebugAddress = (*Iter)->getAddress(); |
616 | |
617 | // Get the function with an entry point that matches this line and |
618 | // its associated assembler entries. In the case of COMDAT, the input |
619 | // 'Function' is not null. Use it to find its address ranges. |
620 | LVScope *Scope = Function; |
621 | if (!Function) { |
622 | Scope = AssemblerMappings.find(FirstKey: SectionIndex, SecondKey: DebugAddress); |
623 | if (!Scope) { |
624 | ++Iter; |
625 | continue; |
626 | } |
627 | } |
628 | |
629 | // Get the associated instructions for the found 'Scope'. |
630 | LVLines InstructionLines; |
631 | LVLines *Lines = ScopeInstructions.find(FirstKey: SectionIndex, SecondKey: Scope); |
632 | if (Lines) |
633 | InstructionLines = std::move(*Lines); |
634 | |
635 | LLVM_DEBUG({ |
636 | size_t Index = 0; |
637 | dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3) |
638 | << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n" |
639 | << format("Process instruction lines: %d\n" , |
640 | InstructionLines.size()); |
641 | for (const LVLine *Line : InstructionLines) |
642 | dbgs() << format_decimal(++Index, 5) << ": " |
643 | << hexValue(Line->getOffset()) << ", (" << Line->getName() |
644 | << ")\n" ; |
645 | }); |
646 | |
647 | // Continue with next debug line if there are not instructions lines. |
648 | if (InstructionLines.empty()) { |
649 | ++Iter; |
650 | continue; |
651 | } |
652 | |
653 | for (LVLine *InstructionLine : InstructionLines) { |
654 | uint64_t InstructionAddress = InstructionLine->getAddress(); |
655 | LLVM_DEBUG({ |
656 | dbgs() << "Instruction address: " << hexValue(InstructionAddress) |
657 | << "\n" ; |
658 | }); |
659 | if (TraverseLines) { |
660 | while (Iter != DebugLines->end()) { |
661 | DebugAddress = (*Iter)->getAddress(); |
662 | LLVM_DEBUG({ |
663 | bool IsDebug = (*Iter)->getIsLineDebug(); |
664 | dbgs() << "Line " << (IsDebug ? "dbg:" : "ins:" ) << " [" |
665 | << hexValue(DebugAddress) << "]" ; |
666 | if (IsDebug) |
667 | dbgs() << format(" %d" , (*Iter)->getLineNumber()); |
668 | dbgs() << "\n" ; |
669 | }); |
670 | // Instruction address before debug line. |
671 | if (InstructionAddress < DebugAddress) { |
672 | LLVM_DEBUG({ |
673 | dbgs() << "Inserted instruction address: " |
674 | << hexValue(InstructionAddress) << " before line: " |
675 | << format("%d" , (*Iter)->getLineNumber()) << " [" |
676 | << hexValue(DebugAddress) << "]\n" ; |
677 | }); |
678 | Iter = DebugLines->insert(I: Iter, Elt: InstructionLine); |
679 | // The returned iterator points to the inserted instruction. |
680 | // Skip it and point to the line acting as reference. |
681 | ++Iter; |
682 | break; |
683 | } |
684 | ++Iter; |
685 | } |
686 | if (Iter == DebugLines->end()) { |
687 | // We have reached the end of the source lines and the current |
688 | // instruction line address is greater than the last source line. |
689 | TraverseLines = false; |
690 | DebugLines->push_back(Elt: InstructionLine); |
691 | } |
692 | } else { |
693 | DebugLines->push_back(Elt: InstructionLine); |
694 | } |
695 | } |
696 | } |
697 | |
698 | LLVM_DEBUG({ |
699 | dbgs() << format("Lines after merge: %d\n" , DebugLines->size()); |
700 | size_t Index = 0; |
701 | for (const LVLine *Line : *DebugLines) { |
702 | dbgs() << format_decimal(++Index, 5) << ": " |
703 | << hexValue(Line->getOffset()) << ", (" |
704 | << ((Line->getIsLineDebug()) |
705 | ? Line->lineNumberAsStringStripped(/*ShowZero=*/true) |
706 | : Line->getName()) |
707 | << ")\n" ; |
708 | } |
709 | }); |
710 | |
711 | // If this compilation unit does not have line records, traverse its scopes |
712 | // and take any collected instruction lines as the working set in order |
713 | // to move them to their associated scope. |
714 | if (DebugLines->empty()) { |
715 | if (const LVScopes *Scopes = CompileUnit->getScopes()) |
716 | for (LVScope *Scope : *Scopes) { |
717 | LVLines *Lines = ScopeInstructions.find(SecondKey: Scope); |
718 | if (Lines) { |
719 | |
720 | LLVM_DEBUG({ |
721 | size_t Index = 0; |
722 | dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3) |
723 | << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n" |
724 | << format("Instruction lines: %d\n" , Lines->size()); |
725 | for (const LVLine *Line : *Lines) |
726 | dbgs() << format_decimal(++Index, 5) << ": " |
727 | << hexValue(Line->getOffset()) << ", (" << Line->getName() |
728 | << ")\n" ; |
729 | }); |
730 | |
731 | if (Scope->getIsArtificial()) { |
732 | // Add the instruction lines to their artificial scope. |
733 | for (LVLine *Line : *Lines) |
734 | Scope->addElement(Line); |
735 | } else { |
736 | DebugLines->append(RHS: *Lines); |
737 | } |
738 | Lines->clear(); |
739 | } |
740 | } |
741 | } |
742 | |
743 | LVRange *ScopesWithRanges = getSectionRanges(SectionIndex); |
744 | ScopesWithRanges->startSearch(); |
745 | |
746 | // Process collected lines. |
747 | LVScope *Scope; |
748 | for (LVLine *Line : *DebugLines) { |
749 | // Using the current line address, get its associated lexical scope and |
750 | // add the line information to it. |
751 | Scope = ScopesWithRanges->getEntry(Address: Line->getAddress()); |
752 | if (!Scope) { |
753 | // If missing scope, use the compile unit. |
754 | Scope = CompileUnit; |
755 | LLVM_DEBUG({ |
756 | dbgs() << "Adding line to CU: " << hexValue(Line->getOffset()) << ", (" |
757 | << ((Line->getIsLineDebug()) |
758 | ? Line->lineNumberAsStringStripped(/*ShowZero=*/true) |
759 | : Line->getName()) |
760 | << ")\n" ; |
761 | }); |
762 | } |
763 | |
764 | // Add line object to scope. |
765 | Scope->addElement(Line); |
766 | |
767 | // Report any line zero. |
768 | if (options().getWarningLines() && Line->getIsLineDebug() && |
769 | !Line->getLineNumber()) |
770 | CompileUnit->addLineZero(Line); |
771 | |
772 | // Some compilers generate ranges in the compile unit; other compilers |
773 | // only DW_AT_low_pc/DW_AT_high_pc. In order to correctly map global |
774 | // variables, we need to generate the map ranges for the compile unit. |
775 | // If we use the ranges stored at the scope level, there are cases where |
776 | // the address referenced by a symbol location, is not in the enclosing |
777 | // scope, but in an outer one. By using the ranges stored in the compile |
778 | // unit, we can catch all those addresses. |
779 | if (Line->getIsLineDebug()) |
780 | CompileUnit->addMapping(Line, SectionIndex); |
781 | |
782 | // Resolve any given pattern. |
783 | patterns().resolvePatternMatch(Line); |
784 | } |
785 | |
786 | ScopesWithRanges->endSearch(); |
787 | } |
788 | |
789 | void LVBinaryReader::processLines(LVLines *DebugLines, |
790 | LVSectionIndex SectionIndex) { |
791 | assert(DebugLines && "DebugLines is null." ); |
792 | if (DebugLines->empty() && !ScopeInstructions.findMap(FirstKey: SectionIndex)) |
793 | return; |
794 | |
795 | // If the Compile Unit does not contain comdat functions, use the whole |
796 | // set of debug lines, as the addresses don't have conflicts. |
797 | if (!CompileUnit->getHasComdatScopes()) { |
798 | processLines(DebugLines, SectionIndex, Function: nullptr); |
799 | return; |
800 | } |
801 | |
802 | // Find the indexes for the lines whose address is zero. |
803 | std::vector<size_t> AddressZero; |
804 | LVLines::iterator It = |
805 | std::find_if(first: std::begin(cont&: *DebugLines), last: std::end(cont&: *DebugLines), |
806 | pred: [](LVLine *Line) { return !Line->getAddress(); }); |
807 | while (It != std::end(cont&: *DebugLines)) { |
808 | AddressZero.emplace_back(args: std::distance(first: std::begin(cont&: *DebugLines), last: It)); |
809 | It = std::find_if(first: std::next(x: It), last: std::end(cont&: *DebugLines), |
810 | pred: [](LVLine *Line) { return !Line->getAddress(); }); |
811 | } |
812 | |
813 | // If the set of debug lines does not contain any line with address zero, |
814 | // use the whole set. It means we are dealing with an initialization |
815 | // section from a fully linked binary. |
816 | if (AddressZero.empty()) { |
817 | processLines(DebugLines, SectionIndex, Function: nullptr); |
818 | return; |
819 | } |
820 | |
821 | // The Compile unit contains comdat functions. Traverse the collected |
822 | // debug lines and identify logical groups based on their start and |
823 | // address. Each group starts with a zero address. |
824 | // Begin, End, Address, IsDone. |
825 | using LVBucket = std::tuple<size_t, size_t, LVAddress, bool>; |
826 | std::vector<LVBucket> Buckets; |
827 | |
828 | LVAddress Address; |
829 | size_t Begin = 0; |
830 | size_t End = 0; |
831 | size_t Index = 0; |
832 | for (Index = 0; Index < AddressZero.size() - 1; ++Index) { |
833 | Begin = AddressZero[Index]; |
834 | End = AddressZero[Index + 1] - 1; |
835 | Address = (*DebugLines)[End]->getAddress(); |
836 | Buckets.emplace_back(args&: Begin, args&: End, args&: Address, args: false); |
837 | } |
838 | |
839 | // Add the last bucket. |
840 | if (Index) { |
841 | Begin = AddressZero[Index]; |
842 | End = DebugLines->size() - 1; |
843 | Address = (*DebugLines)[End]->getAddress(); |
844 | Buckets.emplace_back(args&: Begin, args&: End, args&: Address, args: false); |
845 | } |
846 | |
847 | LLVM_DEBUG({ |
848 | dbgs() << "\nDebug Lines buckets: " << Buckets.size() << "\n" ; |
849 | for (LVBucket &Bucket : Buckets) { |
850 | dbgs() << "Begin: " << format_decimal(std::get<0>(Bucket), 5) << ", " |
851 | << "End: " << format_decimal(std::get<1>(Bucket), 5) << ", " |
852 | << "Address: " << hexValue(std::get<2>(Bucket)) << "\n" ; |
853 | } |
854 | }); |
855 | |
856 | // Traverse the sections and buckets looking for matches on the section |
857 | // sizes. In the unlikely event of different buckets with the same size |
858 | // process them in order and mark them as done. |
859 | LVLines Group; |
860 | for (LVSections::reference Entry : Sections) { |
861 | LVSectionIndex SectionIndex = Entry.first; |
862 | const object::SectionRef Section = Entry.second; |
863 | uint64_t Size = Section.getSize(); |
864 | LLVM_DEBUG({ |
865 | dbgs() << "\nSection Index: " << format_decimal(SectionIndex, 3) |
866 | << " , Section Size: " << hexValue(Section.getSize()) |
867 | << " , Section Address: " << hexValue(Section.getAddress()) |
868 | << "\n" ; |
869 | }); |
870 | |
871 | for (LVBucket &Bucket : Buckets) { |
872 | if (std::get<3>(t&: Bucket)) |
873 | // Already done for previous section. |
874 | continue; |
875 | if (Size == std::get<2>(t&: Bucket)) { |
876 | // We have a match on the section size. |
877 | Group.clear(); |
878 | LVLines::iterator IterStart = DebugLines->begin() + std::get<0>(t&: Bucket); |
879 | LVLines::iterator IterEnd = |
880 | DebugLines->begin() + std::get<1>(t&: Bucket) + 1; |
881 | for (LVLines::iterator Iter = IterStart; Iter < IterEnd; ++Iter) |
882 | Group.push_back(Elt: *Iter); |
883 | processLines(DebugLines: &Group, SectionIndex, /*Function=*/nullptr); |
884 | std::get<3>(t&: Bucket) = true; |
885 | break; |
886 | } |
887 | } |
888 | } |
889 | } |
890 | |
891 | // Traverse the scopes for the given 'Function' looking for any inlined |
892 | // scopes with inlined lines, which are found in 'CUInlineeLines'. |
893 | void LVBinaryReader::includeInlineeLines(LVSectionIndex SectionIndex, |
894 | LVScope *Function) { |
895 | SmallVector<LVInlineeLine::iterator> InlineeIters; |
896 | std::function<void(LVScope * Parent)> FindInlinedScopes = |
897 | [&](LVScope *Parent) { |
898 | if (const LVScopes *Scopes = Parent->getScopes()) |
899 | for (LVScope *Scope : *Scopes) { |
900 | LVInlineeLine::iterator Iter = CUInlineeLines.find(x: Scope); |
901 | if (Iter != CUInlineeLines.end()) |
902 | InlineeIters.push_back(Elt: Iter); |
903 | FindInlinedScopes(Scope); |
904 | } |
905 | }; |
906 | |
907 | // Find all inlined scopes for the given 'Function'. |
908 | FindInlinedScopes(Function); |
909 | for (LVInlineeLine::iterator InlineeIter : InlineeIters) { |
910 | LVScope *Scope = InlineeIter->first; |
911 | addToSymbolTable(Name: Scope->getLinkageName(), Function: Scope, SectionIndex); |
912 | |
913 | // TODO: Convert this into a reference. |
914 | LVLines *InlineeLines = InlineeIter->second.get(); |
915 | LLVM_DEBUG({ |
916 | dbgs() << "Inlined lines for: " << Scope->getName() << "\n" ; |
917 | for (const LVLine *Line : *InlineeLines) |
918 | dbgs() << "[" << hexValue(Line->getAddress()) << "] " |
919 | << Line->getLineNumber() << "\n" ; |
920 | dbgs() << format("Debug lines: %d\n" , CULines.size()); |
921 | for (const LVLine *Line : CULines) |
922 | dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", (" |
923 | << Line->getLineNumber() << ")\n" ; |
924 | ; |
925 | }); |
926 | |
927 | // The inlined lines must be merged using its address, in order to keep |
928 | // the real order of the instructions. The inlined lines are mixed with |
929 | // the other non-inlined lines. |
930 | if (InlineeLines->size()) { |
931 | // First address of inlinee code. |
932 | uint64_t InlineeStart = (InlineeLines->front())->getAddress(); |
933 | LVLines::iterator Iter = std::find_if( |
934 | first: CULines.begin(), last: CULines.end(), pred: [&](LVLine *Item) -> bool { |
935 | return Item->getAddress() == InlineeStart; |
936 | }); |
937 | if (Iter != CULines.end()) { |
938 | // 'Iter' points to the line where the inlined function is called. |
939 | // Emulate the DW_AT_call_line attribute. |
940 | Scope->setCallLineNumber((*Iter)->getLineNumber()); |
941 | // Mark the referenced line as the start of the inlined function. |
942 | // Skip the first line during the insertion, as the address and |
943 | // line number as the same. Otherwise we have to erase and insert. |
944 | (*Iter)->setLineNumber((*InlineeLines->begin())->getLineNumber()); |
945 | ++Iter; |
946 | CULines.insert(I: Iter, From: InlineeLines->begin() + 1, To: InlineeLines->end()); |
947 | } |
948 | } |
949 | |
950 | // Remove this set of lines from the container; each inlined function |
951 | // creates an unique set of lines. Remove only the created container. |
952 | CUInlineeLines.erase(position: InlineeIter); |
953 | InlineeLines->clear(); |
954 | } |
955 | LLVM_DEBUG({ |
956 | dbgs() << "Merged Inlined lines for: " << Function->getName() << "\n" ; |
957 | dbgs() << format("Debug lines: %d\n" , CULines.size()); |
958 | for (const LVLine *Line : CULines) |
959 | dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", (" |
960 | << Line->getLineNumber() << ")\n" ; |
961 | ; |
962 | }); |
963 | } |
964 | |
965 | void LVBinaryReader::print(raw_ostream &OS) const { |
966 | OS << "LVBinaryReader\n" ; |
967 | LLVM_DEBUG(dbgs() << "PrintReader\n" ); |
968 | } |
969 | |