1//===-- LVBinaryReader.cpp ------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the LVBinaryReader class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h"
14#include "llvm/Support/Errc.h"
15#include "llvm/Support/FormatAdapters.h"
16#include "llvm/Support/FormatVariadic.h"
17
18using namespace llvm;
19using namespace llvm::logicalview;
20
21#define DEBUG_TYPE "BinaryReader"
22
23// Function names extracted from the object symbol table.
24void LVSymbolTable::add(StringRef Name, LVScope *Function,
25 LVSectionIndex SectionIndex) {
26 std::string SymbolName(Name);
27 auto [It, Inserted] =
28 SymbolNames.try_emplace(k: SymbolName, args&: Function, args: 0, args&: SectionIndex, args: false);
29 if (!Inserted) {
30 // Update a recorded entry with its logical scope and section index.
31 It->second.Scope = Function;
32 if (SectionIndex)
33 It->second.SectionIndex = SectionIndex;
34 }
35
36 if (Function && It->second.IsComdat)
37 Function->setIsComdat();
38
39 LLVM_DEBUG({ print(dbgs()); });
40}
41
42void LVSymbolTable::add(StringRef Name, LVAddress Address,
43 LVSectionIndex SectionIndex, bool IsComdat) {
44 std::string SymbolName(Name);
45 auto [It, Inserted] = SymbolNames.try_emplace(k: SymbolName, args: nullptr, args&: Address,
46 args&: SectionIndex, args&: IsComdat);
47 if (!Inserted)
48 // Update a recorded symbol name with its logical scope.
49 It->second.Address = Address;
50
51 LVScope *Function = It->second.Scope;
52 if (Function && IsComdat)
53 Function->setIsComdat();
54 LLVM_DEBUG({ print(dbgs()); });
55}
56
57LVSectionIndex LVSymbolTable::update(LVScope *Function) {
58 LVSectionIndex SectionIndex = getReader().getDotTextSectionIndex();
59 StringRef Name = Function->getLinkageName();
60 if (Name.empty())
61 Name = Function->getName();
62 std::string SymbolName(Name);
63
64 if (SymbolName.empty())
65 return SectionIndex;
66
67 auto It = SymbolNames.find(x: SymbolName);
68 if (It == SymbolNames.end())
69 return SectionIndex;
70
71 // Update a recorded entry with its logical scope, only if the scope has
72 // ranges. That is the case when in DWARF there are 2 DIEs connected via
73 // the DW_AT_specification.
74 if (Function->getHasRanges()) {
75 It->second.Scope = Function;
76 SectionIndex = It->second.SectionIndex;
77 } else {
78 SectionIndex = UndefinedSectionIndex;
79 }
80
81 if (It->second.IsComdat)
82 Function->setIsComdat();
83
84 LLVM_DEBUG({ print(dbgs()); });
85 return SectionIndex;
86}
87
88const LVSymbolTableEntry &LVSymbolTable::getEntry(StringRef Name) {
89 static LVSymbolTableEntry Empty = LVSymbolTableEntry();
90 LVSymbolNames::iterator Iter = SymbolNames.find(x: Name);
91 return Iter != SymbolNames.end() ? Iter->second : Empty;
92}
93LVAddress LVSymbolTable::getAddress(StringRef Name) {
94 LVSymbolNames::iterator Iter = SymbolNames.find(x: Name);
95 return Iter != SymbolNames.end() ? Iter->second.Address : 0;
96}
97LVSectionIndex LVSymbolTable::getIndex(StringRef Name) {
98 LVSymbolNames::iterator Iter = SymbolNames.find(x: Name);
99 return Iter != SymbolNames.end() ? Iter->second.SectionIndex
100 : getReader().getDotTextSectionIndex();
101}
102bool LVSymbolTable::getIsComdat(StringRef Name) {
103 LVSymbolNames::iterator Iter = SymbolNames.find(x: Name);
104 return Iter != SymbolNames.end() ? Iter->second.IsComdat : false;
105}
106
107void LVSymbolTable::print(raw_ostream &OS) {
108 OS << "Symbol Table\n";
109 for (LVSymbolNames::reference Entry : SymbolNames) {
110 LVSymbolTableEntry &SymbolName = Entry.second;
111 LVScope *Scope = SymbolName.Scope;
112 LVOffset Offset = Scope ? Scope->getOffset() : 0;
113 OS << "Index: " << hexValue(N: SymbolName.SectionIndex, Width: 5)
114 << " Comdat: " << (SymbolName.IsComdat ? "Y" : "N")
115 << " Scope: " << hexValue(N: Offset)
116 << " Address: " << hexValue(N: SymbolName.Address)
117 << " Name: " << Entry.first << "\n";
118 }
119}
120
121void LVBinaryReader::addToSymbolTable(StringRef Name, LVScope *Function,
122 LVSectionIndex SectionIndex) {
123 SymbolTable.add(Name, Function, SectionIndex);
124}
125void LVBinaryReader::addToSymbolTable(StringRef Name, LVAddress Address,
126 LVSectionIndex SectionIndex,
127 bool IsComdat) {
128 SymbolTable.add(Name, Address, SectionIndex, IsComdat);
129}
130LVSectionIndex LVBinaryReader::updateSymbolTable(LVScope *Function) {
131 return SymbolTable.update(Function);
132}
133
134const LVSymbolTableEntry &LVBinaryReader::getSymbolTableEntry(StringRef Name) {
135 return SymbolTable.getEntry(Name);
136}
137LVAddress LVBinaryReader::getSymbolTableAddress(StringRef Name) {
138 return SymbolTable.getAddress(Name);
139}
140LVSectionIndex LVBinaryReader::getSymbolTableIndex(StringRef Name) {
141 return SymbolTable.getIndex(Name);
142}
143bool LVBinaryReader::getSymbolTableIsComdat(StringRef Name) {
144 return SymbolTable.getIsComdat(Name);
145}
146
147void LVBinaryReader::mapVirtualAddress(const object::ObjectFile &Obj) {
148 for (const object::SectionRef &Section : Obj.sections()) {
149 LLVM_DEBUG({
150 Expected<StringRef> SectionNameOrErr = Section.getName();
151 StringRef Name;
152 if (!SectionNameOrErr)
153 consumeError(SectionNameOrErr.takeError());
154 else
155 Name = *SectionNameOrErr;
156 dbgs() << "Index: " << format_decimal(Section.getIndex(), 3) << ", "
157 << "Address: " << hexValue(Section.getAddress()) << ", "
158 << "Size: " << hexValue(Section.getSize()) << ", "
159 << "Name: " << Name << "\n";
160 dbgs() << "isCompressed: " << Section.isCompressed() << ", "
161 << "isText: " << Section.isText() << ", "
162 << "isData: " << Section.isData() << ", "
163 << "isBSS: " << Section.isBSS() << ", "
164 << "isVirtual: " << Section.isVirtual() << "\n";
165 dbgs() << "isBitcode: " << Section.isBitcode() << ", "
166 << "isStripped: " << Section.isStripped() << ", "
167 << "isBerkeleyText: " << Section.isBerkeleyText() << ", "
168 << "isBerkeleyData: " << Section.isBerkeleyData() << ", "
169 << "isDebugSection: " << Section.isDebugSection() << "\n";
170 dbgs() << "\n";
171 });
172
173 if (!Section.isText() || Section.isVirtual() || !Section.getSize())
174 continue;
175
176 // Record section information required for symbol resolution.
177 // Note: The section index returned by 'getIndex()' is one based.
178 Sections.emplace(args: Section.getIndex(), args: Section);
179 addSectionAddress(Section);
180
181 // Identify the ".text" section.
182 Expected<StringRef> SectionNameOrErr = Section.getName();
183 if (!SectionNameOrErr) {
184 consumeError(Err: SectionNameOrErr.takeError());
185 continue;
186 }
187 if (*SectionNameOrErr == ".text" || *SectionNameOrErr == "CODE" ||
188 *SectionNameOrErr == ".code") {
189 DotTextSectionIndex = Section.getIndex();
190 // If the object is WebAssembly, update the address offset that
191 // will be added to DWARF DW_AT_* attributes.
192 if (Obj.isWasm())
193 WasmCodeSectionOffset = Section.getAddress();
194 }
195 }
196
197 // Process the symbol table.
198 mapRangeAddress(Obj);
199
200 LLVM_DEBUG({
201 dbgs() << "\nSections Information:\n";
202 for (LVSections::reference Entry : Sections) {
203 LVSectionIndex SectionIndex = Entry.first;
204 const object::SectionRef Section = Entry.second;
205 Expected<StringRef> SectionNameOrErr = Section.getName();
206 if (!SectionNameOrErr)
207 consumeError(SectionNameOrErr.takeError());
208 dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3)
209 << " Name: " << *SectionNameOrErr << "\n"
210 << "Size: " << hexValue(Section.getSize()) << "\n"
211 << "VirtualAddress: " << hexValue(VirtualAddress) << "\n"
212 << "SectionAddress: " << hexValue(Section.getAddress()) << "\n";
213 }
214 dbgs() << "\nObject Section Information:\n";
215 for (LVSectionAddresses::const_reference Entry : SectionAddresses)
216 dbgs() << "[" << hexValue(Entry.first) << ":"
217 << hexValue(Entry.first + Entry.second.getSize())
218 << "] Size: " << hexValue(Entry.second.getSize()) << "\n";
219 });
220}
221
222void LVBinaryReader::mapVirtualAddress(const object::COFFObjectFile &COFFObj) {
223 ErrorOr<uint64_t> ImageBase = COFFObj.getImageBase();
224 if (ImageBase)
225 ImageBaseAddress = ImageBase.get();
226
227 LLVM_DEBUG({
228 dbgs() << "ImageBaseAddress: " << hexValue(ImageBaseAddress) << "\n";
229 });
230
231 uint32_t Flags = COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_LNK_COMDAT;
232
233 for (const object::SectionRef &Section : COFFObj.sections()) {
234 if (!Section.isText() || Section.isVirtual() || !Section.getSize())
235 continue;
236
237 const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section);
238 VirtualAddress = COFFSection->VirtualAddress;
239 bool IsComdat = (COFFSection->Characteristics & Flags) == Flags;
240
241 // Record section information required for symbol resolution.
242 // Note: The section index returned by 'getIndex()' is zero based.
243 Sections.emplace(args: Section.getIndex() + 1, args: Section);
244 addSectionAddress(Section);
245
246 // Additional initialization on the specific object format.
247 mapRangeAddress(Obj: COFFObj, Section, IsComdat);
248 }
249
250 LLVM_DEBUG({
251 dbgs() << "\nSections Information:\n";
252 for (LVSections::reference Entry : Sections) {
253 LVSectionIndex SectionIndex = Entry.first;
254 const object::SectionRef Section = Entry.second;
255 const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section);
256 Expected<StringRef> SectionNameOrErr = Section.getName();
257 if (!SectionNameOrErr)
258 consumeError(SectionNameOrErr.takeError());
259 dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3)
260 << " Name: " << *SectionNameOrErr << "\n"
261 << "Size: " << hexValue(Section.getSize()) << "\n"
262 << "VirtualAddress: " << hexValue(VirtualAddress) << "\n"
263 << "SectionAddress: " << hexValue(Section.getAddress()) << "\n"
264 << "PointerToRawData: " << hexValue(COFFSection->PointerToRawData)
265 << "\n"
266 << "SizeOfRawData: " << hexValue(COFFSection->SizeOfRawData)
267 << "\n";
268 }
269 dbgs() << "\nObject Section Information:\n";
270 for (LVSectionAddresses::const_reference Entry : SectionAddresses)
271 dbgs() << "[" << hexValue(Entry.first) << ":"
272 << hexValue(Entry.first + Entry.second.getSize())
273 << "] Size: " << hexValue(Entry.second.getSize()) << "\n";
274 });
275}
276
277Error LVBinaryReader::loadGenericTargetInfo(StringRef TripleName,
278 StringRef TheFeatures,
279 StringRef TheCPU) {
280 Triple TheTriple(TripleName);
281 std::string TargetLookupError;
282 const Target *TheTarget =
283 TargetRegistry::lookupTarget(TheTriple, Error&: TargetLookupError);
284 if (!TheTarget)
285 return createStringError(EC: errc::invalid_argument, S: TargetLookupError.c_str());
286
287 // Register information.
288 MCRegisterInfo *RegisterInfo = TheTarget->createMCRegInfo(TT: TheTriple);
289 if (!RegisterInfo)
290 return createStringError(EC: errc::invalid_argument,
291 S: "no register info for target " + TripleName);
292 MRI.reset(p: RegisterInfo);
293
294 // Assembler properties and features.
295 MCTargetOptions MCOptions;
296 MCAsmInfo *AsmInfo(TheTarget->createMCAsmInfo(MRI: *MRI, TheTriple, Options: MCOptions));
297 if (!AsmInfo)
298 return createStringError(EC: errc::invalid_argument,
299 S: "no assembly info for target " + TripleName);
300 MAI.reset(p: AsmInfo);
301
302 // Target subtargets.
303 MCSubtargetInfo *SubtargetInfo(
304 TheTarget->createMCSubtargetInfo(TheTriple, CPU: TheCPU, Features: TheFeatures));
305 if (!SubtargetInfo)
306 return createStringError(EC: errc::invalid_argument,
307 S: "no subtarget info for target " + TripleName);
308 STI.reset(p: SubtargetInfo);
309
310 // Instructions Info.
311 MCInstrInfo *InstructionInfo(TheTarget->createMCInstrInfo());
312 if (!InstructionInfo)
313 return createStringError(EC: errc::invalid_argument,
314 S: "no instruction info for target " + TripleName);
315 MII.reset(p: InstructionInfo);
316
317 MC = std::make_unique<MCContext>(args: Triple(TheTriple), args: MAI.get(), args: MRI.get(),
318 args: STI.get());
319
320 // Assembler.
321 MCDisassembler *DisAsm(TheTarget->createMCDisassembler(STI: *STI, Ctx&: *MC));
322 if (!DisAsm)
323 return createStringError(EC: errc::invalid_argument,
324 S: "no disassembler for target " + TripleName);
325 MD.reset(p: DisAsm);
326
327 MCInstPrinter *InstructionPrinter(TheTarget->createMCInstPrinter(
328 T: Triple(TheTriple), SyntaxVariant: AsmInfo->getAssemblerDialect(), MAI: *MAI, MII: *MII, MRI: *MRI));
329 if (!InstructionPrinter)
330 return createStringError(EC: errc::invalid_argument,
331 S: "no target assembly language printer for target " +
332 TripleName);
333 MIP.reset(p: InstructionPrinter);
334 InstructionPrinter->setPrintImmHex(true);
335
336 return Error::success();
337}
338
339Expected<std::pair<uint64_t, object::SectionRef>>
340LVBinaryReader::getSection(LVScope *Scope, LVAddress Address,
341 LVSectionIndex SectionIndex) {
342 // Return the 'text' section with the code for this logical scope.
343 // COFF: SectionIndex is zero. Use 'SectionAddresses' data.
344 // ELF: SectionIndex is the section index in the file.
345 if (SectionIndex) {
346 LVSections::iterator Iter = Sections.find(x: SectionIndex);
347 if (Iter == Sections.end()) {
348 return createStringError(EC: errc::invalid_argument,
349 Fmt: "invalid section index for: '%s'",
350 Vals: Scope->getName().str().c_str());
351 }
352 const object::SectionRef Section = Iter->second;
353 return std::make_pair(x: Section.getAddress(), y: Section);
354 }
355
356 // Ensure a valid starting address for the public names.
357 LVSectionAddresses::const_iterator Iter =
358 SectionAddresses.upper_bound(x: Address);
359 if (Iter == SectionAddresses.begin())
360 return createStringError(EC: errc::invalid_argument,
361 Fmt: "invalid section address for: '%s'",
362 Vals: Scope->getName().str().c_str());
363
364 // Get section that contains the code for this function.
365 Iter = SectionAddresses.lower_bound(x: Address);
366 if (Iter != SectionAddresses.begin())
367 --Iter;
368 return std::make_pair(x: Iter->first, y: Iter->second);
369}
370
371Error LVBinaryReader::createInstructions(LVScope *Scope,
372 LVSectionIndex SectionIndex,
373 const LVNameInfo &NameInfo) {
374 assert(Scope && "Scope is null.");
375
376 // Skip stripped functions.
377 if (Scope->getIsDiscarded())
378 return Error::success();
379
380 // Find associated address and size for the given function entry point.
381 LVAddress Address = NameInfo.first;
382 uint64_t Size = NameInfo.second;
383
384 LLVM_DEBUG({
385 dbgs() << "\nPublic Name instructions: '" << Scope->getName() << "' / '"
386 << Scope->getLinkageName() << "'\n"
387 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: ["
388 << hexValue(Address) << ":" << hexValue(Address + Size) << "]\n";
389 });
390
391 Expected<std::pair<uint64_t, const object::SectionRef>> SectionOrErr =
392 getSection(Scope, Address, SectionIndex);
393 if (!SectionOrErr)
394 return SectionOrErr.takeError();
395 const object::SectionRef Section = (*SectionOrErr).second;
396 uint64_t SectionAddress = (*SectionOrErr).first;
397
398 Expected<StringRef> SectionContentsOrErr = Section.getContents();
399 if (!SectionContentsOrErr)
400 return SectionOrErr.takeError();
401
402 // There are cases where the section size is smaller than the [LowPC,HighPC]
403 // range; it causes us to decode invalid addresses. The recorded size in the
404 // logical scope is one less than the real size.
405 LLVM_DEBUG({
406 dbgs() << " Size: " << hexValue(Size)
407 << ", Section Size: " << hexValue(Section.getSize()) << "\n";
408 });
409 Size = std::min(a: Size + 1, b: Section.getSize());
410
411 ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(Input: *SectionContentsOrErr);
412 uint64_t Offset = Address - SectionAddress;
413 if (Offset > Bytes.size()) {
414 LLVM_DEBUG({
415 dbgs() << "offset (" << hexValue(Offset) << ") is beyond section size ("
416 << hexValue(Bytes.size()) << "); malformed input?\n";
417 });
418 return createStringError(
419 EC: errc::bad_address,
420 S: "Failed to parse instructions; offset beyond section size");
421 }
422 uint8_t const *Begin = Bytes.data() + Offset;
423 uint8_t const *End = Bytes.data() + Offset + Size;
424
425 LLVM_DEBUG({
426 Expected<StringRef> SectionNameOrErr = Section.getName();
427 if (!SectionNameOrErr)
428 consumeError(SectionNameOrErr.takeError());
429 else
430 dbgs() << "Section Index: " << hexValue(Section.getIndex()) << " ["
431 << hexValue((uint64_t)Section.getAddress()) << ":"
432 << hexValue((uint64_t)Section.getAddress() + Section.getSize(), 10)
433 << "] Name: '" << *SectionNameOrErr << "'\n"
434 << "Begin: " << hexValue((uint64_t)Begin)
435 << ", End: " << hexValue((uint64_t)End) << "\n";
436 });
437
438 // Address for first instruction line.
439 LVAddress FirstAddress = Address;
440 auto InstructionsSP = std::make_unique<LVLines>();
441 LVLines &Instructions = *InstructionsSP;
442 DiscoveredLines.emplace_back(args: std::move(InstructionsSP));
443
444 while (Begin < End) {
445 MCInst Instruction;
446 uint64_t BytesConsumed = 0;
447 SmallVector<char, 64> InsnStr;
448 raw_svector_ostream Annotations(InsnStr);
449 MCDisassembler::DecodeStatus const S =
450 MD->getInstruction(Instr&: Instruction, Size&: BytesConsumed,
451 Bytes: ArrayRef<uint8_t>(Begin, End), Address, CStream&: outs());
452 switch (S) {
453 case MCDisassembler::Fail:
454 LLVM_DEBUG({ dbgs() << "Invalid instruction\n"; });
455 if (BytesConsumed == 0)
456 // Skip invalid bytes
457 BytesConsumed = 1;
458 break;
459 case MCDisassembler::SoftFail:
460 LLVM_DEBUG({ dbgs() << "Potentially undefined instruction:"; });
461 [[fallthrough]];
462 case MCDisassembler::Success: {
463 std::string Buffer;
464 raw_string_ostream Stream(Buffer);
465 StringRef AnnotationsStr = Annotations.str();
466 MIP->printInst(MI: &Instruction, Address, Annot: AnnotationsStr, STI: *STI, OS&: Stream);
467 LLVM_DEBUG({
468 std::string BufferCodes;
469 raw_string_ostream StreamCodes(BufferCodes);
470 StreamCodes << format_bytes(
471 ArrayRef<uint8_t>(Begin, Begin + BytesConsumed), std::nullopt, 16,
472 16);
473 dbgs() << "[" << hexValue((uint64_t)Begin) << "] "
474 << "Size: " << format_decimal(BytesConsumed, 2) << " ("
475 << formatv("{0}",
476 fmt_align(StreamCodes.str(), AlignStyle::Left, 32))
477 << ") " << hexValue((uint64_t)Address) << ": " << Stream.str()
478 << "\n";
479 });
480 // Here we add logical lines to the Instructions. Later on,
481 // the 'processLines()' function will move each created logical line
482 // to its enclosing logical scope, using the debug ranges information
483 // and they will be released when its scope parent is deleted.
484 LVLineAssembler *Line = createLineAssembler();
485 Line->setAddress(Address);
486 Line->setName(StringRef(Stream.str()).trim());
487 Instructions.push_back(Elt: Line);
488 break;
489 }
490 }
491 Address += BytesConsumed;
492 Begin += BytesConsumed;
493 }
494
495 LLVM_DEBUG({
496 size_t Index = 0;
497 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
498 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
499 << "Address: " << hexValue(FirstAddress)
500 << format(" - Collected instructions lines: %d\n",
501 Instructions.size());
502 for (const LVLine *Line : Instructions)
503 dbgs() << format_decimal(++Index, 5) << ": "
504 << hexValue(Line->getOffset()) << ", (" << Line->getName()
505 << ")\n";
506 });
507
508 // The scope in the assembler names is linked to its own instructions.
509 ScopeInstructions.add(FirstKey: SectionIndex, SecondKey: Scope, Value: &Instructions);
510 AssemblerMappings.add(FirstKey: SectionIndex, SecondKey: FirstAddress, Value: Scope);
511
512 return Error::success();
513}
514
515Error LVBinaryReader::createInstructions(LVScope *Function,
516 LVSectionIndex SectionIndex) {
517 if (!options().getPrintInstructions())
518 return Error::success();
519
520 LVNameInfo Name = CompileUnit->findPublicName(Scope: Function);
521 if (Name.first != LVAddress(UINT64_MAX))
522 return createInstructions(Scope: Function, SectionIndex, NameInfo: Name);
523
524 return Error::success();
525}
526
527Error LVBinaryReader::createInstructions() {
528 if (!options().getPrintInstructions())
529 return Error::success();
530
531 LLVM_DEBUG({
532 size_t Index = 1;
533 dbgs() << "\nPublic Names (Scope):\n";
534 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) {
535 LVScope *Scope = Name.first;
536 const LVNameInfo &NameInfo = Name.second;
537 LVAddress Address = NameInfo.first;
538 uint64_t Size = NameInfo.second;
539 dbgs() << format_decimal(Index++, 5) << ": "
540 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: ["
541 << hexValue(Address) << ":" << hexValue(Address + Size) << "] "
542 << "Name: '" << Scope->getName() << "' / '"
543 << Scope->getLinkageName() << "'\n";
544 }
545 });
546
547 // For each public name in the current compile unit, create the line
548 // records that represent the executable instructions.
549 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) {
550 LVScope *Scope = Name.first;
551 // The symbol table extracted from the object file always contains a
552 // non-empty name (linkage name). However, the logical scope does not
553 // guarantee to have a name for the linkage name (main is one case).
554 // For those cases, set the linkage name the same as the name.
555 if (!Scope->getLinkageNameIndex())
556 Scope->setLinkageName(Scope->getName());
557 LVSectionIndex SectionIndex = getSymbolTableIndex(Name: Scope->getLinkageName());
558 if (Error Err = createInstructions(Scope, SectionIndex, NameInfo: Name.second))
559 return Err;
560 }
561
562 return Error::success();
563}
564
565// During the traversal of the debug information sections, we created the
566// logical lines representing the disassembled instructions from the text
567// section and the logical lines representing the line records from the
568// debug line section. Using the ranges associated with the logical scopes,
569// we will allocate those logical lines to their logical scopes.
570void LVBinaryReader::processLines(LVLines *DebugLines,
571 LVSectionIndex SectionIndex,
572 LVScope *Function) {
573 assert(DebugLines && "DebugLines is null.");
574
575 // Just return if this compilation unit does not have any line records
576 // and no instruction lines were created.
577 if (DebugLines->empty() && !options().getPrintInstructions())
578 return;
579
580 // Merge the debug lines and instruction lines using their text address;
581 // the logical line representing the debug line record is followed by the
582 // line(s) representing the disassembled instructions, whose addresses are
583 // equal or greater that the line address and less than the address of the
584 // next debug line record.
585 LLVM_DEBUG({
586 size_t Index = 1;
587 size_t PerLine = 4;
588 dbgs() << format("\nProcess debug lines: %d\n", DebugLines->size());
589 for (const LVLine *Line : *DebugLines) {
590 dbgs() << format_decimal(Index, 5) << ": " << hexValue(Line->getOffset())
591 << ", (" << Line->getLineNumber() << ")"
592 << ((Index % PerLine) ? " " : "\n");
593 ++Index;
594 }
595 dbgs() << ((Index % PerLine) ? "\n" : "");
596 });
597
598 bool TraverseLines = true;
599 LVLines::iterator Iter = DebugLines->begin();
600 while (TraverseLines && Iter != DebugLines->end()) {
601 uint64_t DebugAddress = (*Iter)->getAddress();
602
603 // Get the function with an entry point that matches this line and
604 // its associated assembler entries. In the case of COMDAT, the input
605 // 'Function' is not null. Use it to find its address ranges.
606 LVScope *Scope = Function;
607 if (!Function) {
608 Scope = AssemblerMappings.find(FirstKey: SectionIndex, SecondKey: DebugAddress);
609 if (!Scope) {
610 ++Iter;
611 continue;
612 }
613 }
614
615 // Get the associated instructions for the found 'Scope'.
616 LVLines InstructionLines;
617 LVLines *Lines = ScopeInstructions.find(FirstKey: SectionIndex, SecondKey: Scope);
618 if (Lines)
619 InstructionLines = std::move(*Lines);
620
621 LLVM_DEBUG({
622 size_t Index = 0;
623 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
624 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
625 << format("Process instruction lines: %d\n",
626 InstructionLines.size());
627 for (const LVLine *Line : InstructionLines)
628 dbgs() << format_decimal(++Index, 5) << ": "
629 << hexValue(Line->getOffset()) << ", (" << Line->getName()
630 << ")\n";
631 });
632
633 // Continue with next debug line if there are not instructions lines.
634 if (InstructionLines.empty()) {
635 ++Iter;
636 continue;
637 }
638
639 for (LVLine *InstructionLine : InstructionLines) {
640 uint64_t InstructionAddress = InstructionLine->getAddress();
641 LLVM_DEBUG({
642 dbgs() << "Instruction address: " << hexValue(InstructionAddress)
643 << "\n";
644 });
645 if (TraverseLines) {
646 while (Iter != DebugLines->end()) {
647 DebugAddress = (*Iter)->getAddress();
648 LLVM_DEBUG({
649 bool IsDebug = (*Iter)->getIsLineDebug();
650 dbgs() << "Line " << (IsDebug ? "dbg:" : "ins:") << " ["
651 << hexValue(DebugAddress) << "]";
652 if (IsDebug)
653 dbgs() << format(" %d", (*Iter)->getLineNumber());
654 dbgs() << "\n";
655 });
656 // Instruction address before debug line.
657 if (InstructionAddress < DebugAddress) {
658 LLVM_DEBUG({
659 dbgs() << "Inserted instruction address: "
660 << hexValue(InstructionAddress) << " before line: "
661 << format("%d", (*Iter)->getLineNumber()) << " ["
662 << hexValue(DebugAddress) << "]\n";
663 });
664 Iter = DebugLines->insert(I: Iter, Elt: InstructionLine);
665 // The returned iterator points to the inserted instruction.
666 // Skip it and point to the line acting as reference.
667 ++Iter;
668 break;
669 }
670 ++Iter;
671 }
672 if (Iter == DebugLines->end()) {
673 // We have reached the end of the source lines and the current
674 // instruction line address is greater than the last source line.
675 TraverseLines = false;
676 DebugLines->push_back(Elt: InstructionLine);
677 }
678 } else {
679 DebugLines->push_back(Elt: InstructionLine);
680 }
681 }
682 }
683
684 LLVM_DEBUG({
685 dbgs() << format("Lines after merge: %d\n", DebugLines->size());
686 size_t Index = 0;
687 for (const LVLine *Line : *DebugLines) {
688 dbgs() << format_decimal(++Index, 5) << ": "
689 << hexValue(Line->getOffset()) << ", ("
690 << ((Line->getIsLineDebug())
691 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true)
692 : Line->getName())
693 << ")\n";
694 }
695 });
696
697 // If this compilation unit does not have line records, traverse its scopes
698 // and take any collected instruction lines as the working set in order
699 // to move them to their associated scope.
700 if (DebugLines->empty()) {
701 if (const LVScopes *Scopes = CompileUnit->getScopes())
702 for (LVScope *Scope : *Scopes) {
703 LVLines *Lines = ScopeInstructions.find(SecondKey: Scope);
704 if (Lines) {
705
706 LLVM_DEBUG({
707 size_t Index = 0;
708 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3)
709 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
710 << format("Instruction lines: %d\n", Lines->size());
711 for (const LVLine *Line : *Lines)
712 dbgs() << format_decimal(++Index, 5) << ": "
713 << hexValue(Line->getOffset()) << ", (" << Line->getName()
714 << ")\n";
715 });
716
717 if (Scope->getIsArtificial()) {
718 // Add the instruction lines to their artificial scope.
719 for (LVLine *Line : *Lines)
720 Scope->addElement(Line);
721 } else {
722 DebugLines->append(RHS: *Lines);
723 }
724 Lines->clear();
725 }
726 }
727 }
728
729 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
730 ScopesWithRanges->startSearch();
731
732 // Process collected lines.
733 LVScope *Scope;
734 for (LVLine *Line : *DebugLines) {
735 // Using the current line address, get its associated lexical scope and
736 // add the line information to it.
737 Scope = ScopesWithRanges->getEntry(Address: Line->getAddress());
738 if (!Scope) {
739 // If missing scope, use the compile unit.
740 Scope = CompileUnit;
741 LLVM_DEBUG({
742 dbgs() << "Adding line to CU: " << hexValue(Line->getOffset()) << ", ("
743 << ((Line->getIsLineDebug())
744 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true)
745 : Line->getName())
746 << ")\n";
747 });
748 }
749
750 // Add line object to scope.
751 Scope->addElement(Line);
752
753 // Report any line zero.
754 if (options().getWarningLines() && Line->getIsLineDebug() &&
755 !Line->getLineNumber())
756 CompileUnit->addLineZero(Line);
757
758 // Some compilers generate ranges in the compile unit; other compilers
759 // only DW_AT_low_pc/DW_AT_high_pc. In order to correctly map global
760 // variables, we need to generate the map ranges for the compile unit.
761 // If we use the ranges stored at the scope level, there are cases where
762 // the address referenced by a symbol location, is not in the enclosing
763 // scope, but in an outer one. By using the ranges stored in the compile
764 // unit, we can catch all those addresses.
765 if (Line->getIsLineDebug())
766 CompileUnit->addMapping(Line, SectionIndex);
767
768 // Resolve any given pattern.
769 patterns().resolvePatternMatch(Line);
770 }
771
772 ScopesWithRanges->endSearch();
773}
774
775void LVBinaryReader::processLines(LVLines *DebugLines,
776 LVSectionIndex SectionIndex) {
777 assert(DebugLines && "DebugLines is null.");
778 if (DebugLines->empty() && !ScopeInstructions.findMap(FirstKey: SectionIndex))
779 return;
780
781 // If the Compile Unit does not contain comdat functions, use the whole
782 // set of debug lines, as the addresses don't have conflicts.
783 if (!CompileUnit->getHasComdatScopes()) {
784 processLines(DebugLines, SectionIndex, Function: nullptr);
785 return;
786 }
787
788 // Find the indexes for the lines whose address is zero.
789 std::vector<size_t> AddressZero;
790 LVLines::iterator It = llvm::find_if(
791 Range&: *DebugLines, P: [](LVLine *Line) { return !Line->getAddress(); });
792 while (It != std::end(cont&: *DebugLines)) {
793 AddressZero.emplace_back(args: std::distance(first: std::begin(cont&: *DebugLines), last: It));
794 It = std::find_if(first: std::next(x: It), last: std::end(cont&: *DebugLines),
795 pred: [](LVLine *Line) { return !Line->getAddress(); });
796 }
797
798 // If the set of debug lines does not contain any line with address zero,
799 // use the whole set. It means we are dealing with an initialization
800 // section from a fully linked binary.
801 if (AddressZero.empty()) {
802 processLines(DebugLines, SectionIndex, Function: nullptr);
803 return;
804 }
805
806 // The Compile unit contains comdat functions. Traverse the collected
807 // debug lines and identify logical groups based on their start and
808 // address. Each group starts with a zero address.
809 // Begin, End, Address, IsDone.
810 using LVBucket = std::tuple<size_t, size_t, LVAddress, bool>;
811 std::vector<LVBucket> Buckets;
812
813 LVAddress Address;
814 size_t Begin = 0;
815 size_t End = 0;
816 size_t Index = 0;
817 for (Index = 0; Index < AddressZero.size() - 1; ++Index) {
818 Begin = AddressZero[Index];
819 End = AddressZero[Index + 1] - 1;
820 Address = (*DebugLines)[End]->getAddress();
821 Buckets.emplace_back(args&: Begin, args&: End, args&: Address, args: false);
822 }
823
824 // Add the last bucket.
825 if (Index) {
826 Begin = AddressZero[Index];
827 End = DebugLines->size() - 1;
828 Address = (*DebugLines)[End]->getAddress();
829 Buckets.emplace_back(args&: Begin, args&: End, args&: Address, args: false);
830 }
831
832 LLVM_DEBUG({
833 dbgs() << "\nDebug Lines buckets: " << Buckets.size() << "\n";
834 for (LVBucket &Bucket : Buckets) {
835 dbgs() << "Begin: " << format_decimal(std::get<0>(Bucket), 5) << ", "
836 << "End: " << format_decimal(std::get<1>(Bucket), 5) << ", "
837 << "Address: " << hexValue(std::get<2>(Bucket)) << "\n";
838 }
839 });
840
841 // Traverse the sections and buckets looking for matches on the section
842 // sizes. In the unlikely event of different buckets with the same size
843 // process them in order and mark them as done.
844 LVLines Group;
845 for (LVSections::reference Entry : Sections) {
846 LVSectionIndex SectionIndex = Entry.first;
847 const object::SectionRef Section = Entry.second;
848 uint64_t Size = Section.getSize();
849 LLVM_DEBUG({
850 dbgs() << "\nSection Index: " << format_decimal(SectionIndex, 3)
851 << " , Section Size: " << hexValue(Section.getSize())
852 << " , Section Address: " << hexValue(Section.getAddress())
853 << "\n";
854 });
855
856 for (LVBucket &Bucket : Buckets) {
857 if (std::get<3>(t&: Bucket))
858 // Already done for previous section.
859 continue;
860 if (Size == std::get<2>(t&: Bucket)) {
861 // We have a match on the section size.
862 Group.clear();
863 LVLines::iterator IterStart = DebugLines->begin() + std::get<0>(t&: Bucket);
864 LVLines::iterator IterEnd =
865 DebugLines->begin() + std::get<1>(t&: Bucket) + 1;
866 for (LVLines::iterator Iter = IterStart; Iter < IterEnd; ++Iter)
867 Group.push_back(Elt: *Iter);
868 processLines(DebugLines: &Group, SectionIndex, /*Function=*/nullptr);
869 std::get<3>(t&: Bucket) = true;
870 break;
871 }
872 }
873 }
874}
875
876// Traverse the scopes for the given 'Function' looking for any inlined
877// scopes with inlined lines, which are found in 'CUInlineeLines'.
878void LVBinaryReader::includeInlineeLines(LVSectionIndex SectionIndex,
879 LVScope *Function) {
880 SmallVector<LVInlineeLine::iterator> InlineeIters;
881 std::function<void(LVScope * Parent)> FindInlinedScopes =
882 [&](LVScope *Parent) {
883 if (const LVScopes *Scopes = Parent->getScopes())
884 for (LVScope *Scope : *Scopes) {
885 LVInlineeLine::iterator Iter = CUInlineeLines.find(x: Scope);
886 if (Iter != CUInlineeLines.end())
887 InlineeIters.push_back(Elt: Iter);
888 FindInlinedScopes(Scope);
889 }
890 };
891
892 // Find all inlined scopes for the given 'Function'.
893 FindInlinedScopes(Function);
894 for (LVInlineeLine::iterator InlineeIter : InlineeIters) {
895 LVScope *Scope = InlineeIter->first;
896 addToSymbolTable(Name: Scope->getLinkageName(), Function: Scope, SectionIndex);
897
898 // TODO: Convert this into a reference.
899 LVLines *InlineeLines = InlineeIter->second.get();
900 LLVM_DEBUG({
901 dbgs() << "Inlined lines for: " << Scope->getName() << "\n";
902 for (const LVLine *Line : *InlineeLines)
903 dbgs() << "[" << hexValue(Line->getAddress()) << "] "
904 << Line->getLineNumber() << "\n";
905 dbgs() << format("Debug lines: %d\n", CULines.size());
906 for (const LVLine *Line : CULines)
907 dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", ("
908 << Line->getLineNumber() << ")\n";
909 ;
910 });
911
912 // The inlined lines must be merged using its address, in order to keep
913 // the real order of the instructions. The inlined lines are mixed with
914 // the other non-inlined lines.
915 if (InlineeLines->size()) {
916 // First address of inlinee code.
917 uint64_t InlineeStart = (InlineeLines->front())->getAddress();
918 LVLines::iterator Iter =
919 llvm::find_if(Range&: CULines, P: [&](LVLine *Item) -> bool {
920 return Item->getAddress() == InlineeStart;
921 });
922 if (Iter != CULines.end()) {
923 // 'Iter' points to the line where the inlined function is called.
924 // Emulate the DW_AT_call_line attribute.
925 Scope->setCallLineNumber((*Iter)->getLineNumber());
926 // Mark the referenced line as the start of the inlined function.
927 // Skip the first line during the insertion, as the address and
928 // line number as the same. Otherwise we have to erase and insert.
929 (*Iter)->setLineNumber((*InlineeLines->begin())->getLineNumber());
930 ++Iter;
931 CULines.insert(I: Iter, From: InlineeLines->begin() + 1, To: InlineeLines->end());
932 }
933 }
934
935 // Remove this set of lines from the container; each inlined function
936 // creates an unique set of lines. Remove only the created container.
937 CUInlineeLines.erase(position: InlineeIter);
938 InlineeLines->clear();
939 }
940 LLVM_DEBUG({
941 dbgs() << "Merged Inlined lines for: " << Function->getName() << "\n";
942 dbgs() << format("Debug lines: %d\n", CULines.size());
943 for (const LVLine *Line : CULines)
944 dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", ("
945 << Line->getLineNumber() << ")\n";
946 ;
947 });
948}
949
950void LVBinaryReader::print(raw_ostream &OS) const {
951 OS << "LVBinaryReader\n";
952 LLVM_DEBUG(dbgs() << "PrintReader\n");
953}
954