1//===-- sancov.cpp --------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This file is a command-line tool for reading and analyzing sanitizer
9// coverage.
10//===----------------------------------------------------------------------===//
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/StringExtras.h"
13#include "llvm/ADT/Twine.h"
14#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
15#include "llvm/DebugInfo/Symbolize/Symbolize.h"
16#include "llvm/MC/MCAsmInfo.h"
17#include "llvm/MC/MCContext.h"
18#include "llvm/MC/MCDisassembler/MCDisassembler.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/MC/MCInstrAnalysis.h"
21#include "llvm/MC/MCInstrInfo.h"
22#include "llvm/MC/MCObjectFileInfo.h"
23#include "llvm/MC/MCRegisterInfo.h"
24#include "llvm/MC/MCSubtargetInfo.h"
25#include "llvm/MC/MCTargetOptions.h"
26#include "llvm/MC/TargetRegistry.h"
27#include "llvm/Object/Archive.h"
28#include "llvm/Object/Binary.h"
29#include "llvm/Object/COFF.h"
30#include "llvm/Object/MachO.h"
31#include "llvm/Object/ObjectFile.h"
32#include "llvm/Object/XCOFFObjectFile.h"
33#include "llvm/Option/ArgList.h"
34#include "llvm/Option/Option.h"
35#include "llvm/Support/Casting.h"
36#include "llvm/Support/CommandLine.h"
37#include "llvm/Support/Errc.h"
38#include "llvm/Support/ErrorOr.h"
39#include "llvm/Support/FileSystem.h"
40#include "llvm/Support/JSON.h"
41#include "llvm/Support/LLVMDriver.h"
42#include "llvm/Support/MD5.h"
43#include "llvm/Support/MemoryBuffer.h"
44#include "llvm/Support/Path.h"
45#include "llvm/Support/Regex.h"
46#include "llvm/Support/SHA1.h"
47#include "llvm/Support/SourceMgr.h"
48#include "llvm/Support/SpecialCaseList.h"
49#include "llvm/Support/TargetSelect.h"
50#include "llvm/Support/VirtualFileSystem.h"
51#include "llvm/Support/YAMLParser.h"
52#include "llvm/Support/raw_ostream.h"
53
54#include <set>
55#include <vector>
56
57using namespace llvm;
58
59namespace {
60
61// Command-line option boilerplate.
62namespace {
63using namespace llvm::opt;
64enum ID {
65 OPT_INVALID = 0, // This is not an option ID.
66#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
67#include "Opts.inc"
68#undef OPTION
69};
70
71#define OPTTABLE_STR_TABLE_CODE
72#include "Opts.inc"
73#undef OPTTABLE_STR_TABLE_CODE
74
75#define OPTTABLE_PREFIXES_TABLE_CODE
76#include "Opts.inc"
77#undef OPTTABLE_PREFIXES_TABLE_CODE
78
79static constexpr opt::OptTable::Info InfoTable[] = {
80#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
81#include "Opts.inc"
82#undef OPTION
83};
84
85class SancovOptTable : public opt::GenericOptTable {
86public:
87 SancovOptTable()
88 : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
89};
90} // namespace
91
92// --------- COMMAND LINE FLAGS ---------
93
94enum ActionType {
95 CoveredFunctionsAction,
96 DiffAction,
97 HtmlReportAction,
98 MergeAction,
99 NotCoveredFunctionsAction,
100 PrintAction,
101 PrintCovPointsAction,
102 StatsAction,
103 SymbolizeAction,
104 UnionAction
105};
106
107static ActionType Action;
108static std::vector<std::string> ClInputFiles;
109static bool ClDemangle;
110static bool ClSkipDeadFiles;
111static bool ClUseDefaultIgnorelist;
112static std::string ClStripPathPrefix;
113static std::string ClIgnorelist;
114static std::string ClOutputFile;
115
116static const char *const DefaultIgnorelistStr = "fun:__sanitizer_.*\n"
117 "src:/usr/include/.*\n"
118 "src:.*/libc\\+\\+/.*\n";
119
120// --------- FORMAT SPECIFICATION ---------
121
122struct FileHeader {
123 uint32_t Bitness;
124 uint32_t Magic;
125};
126
127static const uint32_t BinCoverageMagic = 0xC0BFFFFF;
128static const uint32_t Bitness32 = 0xFFFFFF32;
129static const uint32_t Bitness64 = 0xFFFFFF64;
130
131static const Regex SancovFileRegex("(.*)\\.[0-9]+\\.sancov");
132static const Regex SymcovFileRegex(".*\\.symcov");
133
134// --------- MAIN DATASTRUCTURES ----------
135
136// Contents of .sancov file: list of coverage point addresses that were
137// executed.
138struct RawCoverage {
139 explicit RawCoverage(std::unique_ptr<std::set<uint64_t>> Addrs,
140 FileHeader Header)
141 : Addrs(std::move(Addrs)), Header(Header) {}
142
143 // Read binary .sancov file.
144 static ErrorOr<std::unique_ptr<RawCoverage>>
145 read(const std::string &FileName);
146
147 // Write binary .sancov file.
148 static void write(const std::string &FileName, const RawCoverage &Coverage);
149
150 std::unique_ptr<std::set<uint64_t>> Addrs;
151 FileHeader Header;
152};
153
154// Coverage point has an opaque Id and corresponds to multiple source locations.
155struct CoveragePoint {
156 explicit CoveragePoint(const std::string &Id) : Id(Id) {}
157
158 std::string Id;
159 SmallVector<DILineInfo, 1> Locs;
160};
161
162// Symcov file content: set of covered Ids plus information about all available
163// coverage points.
164struct SymbolizedCoverage {
165 // Read json .symcov file.
166 static std::unique_ptr<SymbolizedCoverage> read(const std::string &InputFile);
167
168 std::set<std::string> CoveredIds;
169 std::string BinaryHash;
170 std::vector<CoveragePoint> Points;
171};
172
173struct CoverageStats {
174 size_t AllPoints;
175 size_t CovPoints;
176 size_t AllFns;
177 size_t CovFns;
178};
179
180// --------- ERROR HANDLING ---------
181
182static void fail(const llvm::Twine &E) {
183 errs() << "ERROR: " << E << "\n";
184 exit(status: 1);
185}
186
187static void failIf(bool B, const llvm::Twine &E) {
188 if (B)
189 fail(E);
190}
191
192static void failIfError(std::error_code Error) {
193 if (!Error)
194 return;
195 errs() << "ERROR: " << Error.message() << "(" << Error.value() << ")\n";
196 exit(status: 1);
197}
198
199template <typename T> static void failIfError(const ErrorOr<T> &E) {
200 failIfError(E.getError());
201}
202
203static void failIfError(Error Err) {
204 if (Err) {
205 logAllUnhandledErrors(E: std::move(Err), OS&: errs(), ErrorBanner: "ERROR: ");
206 exit(status: 1);
207 }
208}
209
210template <typename T> static void failIfError(Expected<T> &E) {
211 failIfError(E.takeError());
212}
213
214static void failIfNotEmpty(const llvm::Twine &E) {
215 if (E.str().empty())
216 return;
217 fail(E);
218}
219
220template <typename T>
221static void failIfEmpty(const std::unique_ptr<T> &Ptr,
222 const std::string &Message) {
223 if (Ptr.get())
224 return;
225 fail(E: Message);
226}
227
228// ----------- Coverage I/O ----------
229template <typename T>
230static void readInts(const char *Start, const char *End,
231 std::set<uint64_t> *Ints) {
232 const T *S = reinterpret_cast<const T *>(Start);
233 const T *E = reinterpret_cast<const T *>(End);
234 std::copy(S, E, std::inserter(x&: *Ints, i: Ints->end()));
235}
236
237ErrorOr<std::unique_ptr<RawCoverage>>
238RawCoverage::read(const std::string &FileName) {
239 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
240 MemoryBuffer::getFile(Filename: FileName);
241 if (!BufOrErr)
242 return BufOrErr.getError();
243 std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
244 if (Buf->getBufferSize() < 8) {
245 errs() << "File too small (<8): " << Buf->getBufferSize() << '\n';
246 return make_error_code(E: errc::illegal_byte_sequence);
247 }
248 const FileHeader *Header =
249 reinterpret_cast<const FileHeader *>(Buf->getBufferStart());
250
251 if (Header->Magic != BinCoverageMagic) {
252 errs() << "Wrong magic: " << Header->Magic << '\n';
253 return make_error_code(E: errc::illegal_byte_sequence);
254 }
255
256 auto Addrs = std::make_unique<std::set<uint64_t>>();
257
258 switch (Header->Bitness) {
259 case Bitness64:
260 readInts<uint64_t>(Start: Buf->getBufferStart() + 8, End: Buf->getBufferEnd(),
261 Ints: Addrs.get());
262 break;
263 case Bitness32:
264 readInts<uint32_t>(Start: Buf->getBufferStart() + 8, End: Buf->getBufferEnd(),
265 Ints: Addrs.get());
266 break;
267 default:
268 errs() << "Unsupported bitness: " << Header->Bitness << '\n';
269 return make_error_code(E: errc::illegal_byte_sequence);
270 }
271
272 // Ignore slots that are zero, so a runtime implementation is not required
273 // to compactify the data.
274 Addrs->erase(x: 0);
275
276 return std::make_unique<RawCoverage>(args: std::move(Addrs), args: *Header);
277}
278
279// Print coverage addresses.
280raw_ostream &operator<<(raw_ostream &OS, const RawCoverage &CoverageData) {
281 for (auto Addr : *CoverageData.Addrs) {
282 OS << "0x";
283 OS.write_hex(N: Addr);
284 OS << "\n";
285 }
286 return OS;
287}
288
289// Write coverage addresses in binary format.
290void RawCoverage::write(const std::string &FileName,
291 const RawCoverage &Coverage) {
292 std::error_code EC;
293 raw_fd_ostream OS(FileName, EC, sys::fs::OF_None);
294 failIfError(Error: EC);
295
296 OS.write(Ptr: reinterpret_cast<const char *>(&Coverage.Header),
297 Size: sizeof(Coverage.Header));
298
299 switch (Coverage.Header.Bitness) {
300 case Bitness64:
301 for (auto Addr : *Coverage.Addrs) {
302 uint64_t Addr64 = Addr;
303 OS.write(Ptr: reinterpret_cast<const char *>(&Addr64), Size: sizeof(Addr64));
304 }
305 break;
306 case Bitness32:
307 for (auto Addr : *Coverage.Addrs) {
308 uint32_t Addr32 = static_cast<uint32_t>(Addr);
309 OS.write(Ptr: reinterpret_cast<const char *>(&Addr32), Size: sizeof(Addr32));
310 }
311 break;
312 default:
313 fail(E: "Unsupported bitness: " + std::to_string(val: Coverage.Header.Bitness));
314 }
315}
316
317static raw_ostream &operator<<(raw_ostream &OS, const CoverageStats &Stats) {
318 OS << "all-edges: " << Stats.AllPoints << "\n";
319 OS << "cov-edges: " << Stats.CovPoints << "\n";
320 OS << "all-functions: " << Stats.AllFns << "\n";
321 OS << "cov-functions: " << Stats.CovFns << "\n";
322 return OS;
323}
324
325// Output symbolized information for coverage points in JSON.
326// Format:
327// {
328// '<file_name>' : {
329// '<function_name>' : {
330// '<point_id'> : '<line_number>:'<column_number'.
331// ....
332// }
333// }
334// }
335static void operator<<(json::OStream &W,
336 const std::vector<CoveragePoint> &Points) {
337 // Group points by file.
338 std::map<std::string, std::vector<const CoveragePoint *>> PointsByFile;
339 for (const auto &Point : Points) {
340 for (const DILineInfo &Loc : Point.Locs) {
341 PointsByFile[Loc.FileName].push_back(x: &Point);
342 }
343 }
344
345 for (const auto &P : PointsByFile) {
346 std::string FileName = P.first;
347 std::map<std::string, std::vector<const CoveragePoint *>> PointsByFn;
348 for (auto PointPtr : P.second) {
349 for (const DILineInfo &Loc : PointPtr->Locs) {
350 PointsByFn[Loc.FunctionName].push_back(x: PointPtr);
351 }
352 }
353
354 W.attributeObject(Key: P.first, Contents: [&] {
355 // Group points by function.
356 for (const auto &P : PointsByFn) {
357 std::string FunctionName = P.first;
358 std::set<std::string> WrittenIds;
359
360 W.attributeObject(Key: FunctionName, Contents: [&] {
361 for (const CoveragePoint *Point : P.second) {
362 for (const auto &Loc : Point->Locs) {
363 if (Loc.FileName != FileName || Loc.FunctionName != FunctionName)
364 continue;
365 if (!WrittenIds.insert(x: Point->Id).second)
366 continue;
367
368 // Output <point_id> : "<line>:<col>".
369 W.attribute(Key: Point->Id,
370 Contents: (utostr(X: Loc.Line) + ":" + utostr(X: Loc.Column)));
371 }
372 }
373 });
374 }
375 });
376 }
377}
378
379static void operator<<(json::OStream &W, const SymbolizedCoverage &C) {
380 W.object(Contents: [&] {
381 W.attributeArray(Key: "covered-points", Contents: [&] {
382 for (const std::string &P : C.CoveredIds) {
383 W.value(V: P);
384 }
385 });
386 W.attribute(Key: "binary-hash", Contents: C.BinaryHash);
387 W.attributeObject(Key: "point-symbol-info", Contents: [&] { W << C.Points; });
388 });
389}
390
391static std::string parseScalarString(yaml::Node *N) {
392 SmallString<64> StringStorage;
393 yaml::ScalarNode *S = dyn_cast<yaml::ScalarNode>(Val: N);
394 failIf(B: !S, E: "expected string");
395 return std::string(S->getValue(Storage&: StringStorage));
396}
397
398std::unique_ptr<SymbolizedCoverage>
399SymbolizedCoverage::read(const std::string &InputFile) {
400 auto Coverage(std::make_unique<SymbolizedCoverage>());
401
402 std::map<std::string, CoveragePoint> Points;
403 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
404 MemoryBuffer::getFile(Filename: InputFile);
405 failIfError(E: BufOrErr);
406
407 SourceMgr SM;
408 yaml::Stream S(**BufOrErr, SM);
409
410 yaml::document_iterator DI = S.begin();
411 failIf(B: DI == S.end(), E: "empty document: " + InputFile);
412 yaml::Node *Root = DI->getRoot();
413 failIf(B: !Root, E: "expecting root node: " + InputFile);
414 yaml::MappingNode *Top = dyn_cast<yaml::MappingNode>(Val: Root);
415 failIf(B: !Top, E: "expecting mapping node: " + InputFile);
416
417 for (auto &KVNode : *Top) {
418 auto Key = parseScalarString(N: KVNode.getKey());
419
420 if (Key == "covered-points") {
421 yaml::SequenceNode *Points =
422 dyn_cast<yaml::SequenceNode>(Val: KVNode.getValue());
423 failIf(B: !Points, E: "expected array: " + InputFile);
424
425 for (auto I = Points->begin(), E = Points->end(); I != E; ++I) {
426 Coverage->CoveredIds.insert(x: parseScalarString(N: &*I));
427 }
428 } else if (Key == "binary-hash") {
429 Coverage->BinaryHash = parseScalarString(N: KVNode.getValue());
430 } else if (Key == "point-symbol-info") {
431 yaml::MappingNode *PointSymbolInfo =
432 dyn_cast<yaml::MappingNode>(Val: KVNode.getValue());
433 failIf(B: !PointSymbolInfo, E: "expected mapping node: " + InputFile);
434
435 for (auto &FileKVNode : *PointSymbolInfo) {
436 auto Filename = parseScalarString(N: FileKVNode.getKey());
437
438 yaml::MappingNode *FileInfo =
439 dyn_cast<yaml::MappingNode>(Val: FileKVNode.getValue());
440 failIf(B: !FileInfo, E: "expected mapping node: " + InputFile);
441
442 for (auto &FunctionKVNode : *FileInfo) {
443 auto FunctionName = parseScalarString(N: FunctionKVNode.getKey());
444
445 yaml::MappingNode *FunctionInfo =
446 dyn_cast<yaml::MappingNode>(Val: FunctionKVNode.getValue());
447 failIf(B: !FunctionInfo, E: "expected mapping node: " + InputFile);
448
449 for (auto &PointKVNode : *FunctionInfo) {
450 auto PointId = parseScalarString(N: PointKVNode.getKey());
451 auto Loc = parseScalarString(N: PointKVNode.getValue());
452
453 size_t ColonPos = Loc.find(c: ':');
454 failIf(B: ColonPos == std::string::npos, E: "expected ':': " + InputFile);
455
456 auto LineStr = Loc.substr(pos: 0, n: ColonPos);
457 auto ColStr = Loc.substr(pos: ColonPos + 1, n: Loc.size());
458
459 DILineInfo LineInfo;
460 LineInfo.FileName = Filename;
461 LineInfo.FunctionName = FunctionName;
462 char *End;
463 LineInfo.Line = std::strtoul(nptr: LineStr.c_str(), endptr: &End, base: 10);
464 LineInfo.Column = std::strtoul(nptr: ColStr.c_str(), endptr: &End, base: 10);
465
466 CoveragePoint *CoveragePoint =
467 &Points.try_emplace(k: PointId, args&: PointId).first->second;
468 CoveragePoint->Locs.push_back(Elt: LineInfo);
469 }
470 }
471 }
472 } else {
473 errs() << "Ignoring unknown key: " << Key << "\n";
474 }
475 }
476
477 for (auto &KV : Points) {
478 Coverage->Points.push_back(x: KV.second);
479 }
480
481 return Coverage;
482}
483
484// ---------- MAIN FUNCTIONALITY ----------
485
486std::string stripPathPrefix(std::string Path) {
487 if (ClStripPathPrefix.empty())
488 return Path;
489 size_t Pos = Path.find(str: ClStripPathPrefix);
490 if (Pos == std::string::npos)
491 return Path;
492 return Path.substr(pos: Pos + ClStripPathPrefix.size());
493}
494
495static std::unique_ptr<symbolize::LLVMSymbolizer> createSymbolizer() {
496 symbolize::LLVMSymbolizer::Options SymbolizerOptions;
497 SymbolizerOptions.Demangle = ClDemangle;
498 SymbolizerOptions.UseSymbolTable = true;
499 return std::make_unique<symbolize::LLVMSymbolizer>(args&: SymbolizerOptions);
500}
501
502static std::string normalizeFilename(const std::string &FileName) {
503 SmallString<256> S(FileName);
504 sys::path::remove_dots(path&: S, /* remove_dot_dot */ true);
505 return stripPathPrefix(Path: sys::path::convert_to_slash(path: std::string(S)));
506}
507
508class Ignorelists {
509public:
510 Ignorelists()
511 : DefaultIgnorelist(createDefaultIgnorelist()),
512 UserIgnorelist(createUserIgnorelist()) {}
513
514 bool isIgnorelisted(const DILineInfo &I) {
515 if (DefaultIgnorelist &&
516 DefaultIgnorelist->inSection(Section: "sancov", Prefix: "fun", Query: I.FunctionName))
517 return true;
518 if (DefaultIgnorelist &&
519 DefaultIgnorelist->inSection(Section: "sancov", Prefix: "src", Query: I.FileName))
520 return true;
521 if (UserIgnorelist &&
522 UserIgnorelist->inSection(Section: "sancov", Prefix: "fun", Query: I.FunctionName))
523 return true;
524 if (UserIgnorelist &&
525 UserIgnorelist->inSection(Section: "sancov", Prefix: "src", Query: I.FileName))
526 return true;
527 return false;
528 }
529
530private:
531 static std::unique_ptr<SpecialCaseList> createDefaultIgnorelist() {
532 if (!ClUseDefaultIgnorelist)
533 return std::unique_ptr<SpecialCaseList>();
534 std::unique_ptr<MemoryBuffer> MB =
535 MemoryBuffer::getMemBuffer(InputData: DefaultIgnorelistStr);
536 std::string Error;
537 auto Ignorelist = SpecialCaseList::create(MB: MB.get(), Error);
538 failIfNotEmpty(E: Error);
539 return Ignorelist;
540 }
541
542 static std::unique_ptr<SpecialCaseList> createUserIgnorelist() {
543 if (ClIgnorelist.empty())
544 return std::unique_ptr<SpecialCaseList>();
545 return SpecialCaseList::createOrDie(Paths: {{ClIgnorelist}},
546 FS&: *vfs::getRealFileSystem());
547 }
548 std::unique_ptr<SpecialCaseList> DefaultIgnorelist;
549 std::unique_ptr<SpecialCaseList> UserIgnorelist;
550};
551
552static std::vector<CoveragePoint>
553getCoveragePoints(const std::string &ObjectFile,
554 const std::set<uint64_t> &Addrs,
555 const std::set<uint64_t> &CoveredAddrs) {
556 std::vector<CoveragePoint> Result;
557 auto Symbolizer(createSymbolizer());
558 Ignorelists Ig;
559
560 std::set<std::string> CoveredFiles;
561 if (ClSkipDeadFiles) {
562 for (auto Addr : CoveredAddrs) {
563 // TODO: it would be neccessary to set proper section index here.
564 // object::SectionedAddress::UndefSection works for only absolute
565 // addresses.
566 object::SectionedAddress ModuleAddress = {
567 .Address: Addr, .SectionIndex: object::SectionedAddress::UndefSection};
568
569 auto LineInfo = Symbolizer->symbolizeCode(ModuleName: ObjectFile, ModuleOffset: ModuleAddress);
570 failIfError(E&: LineInfo);
571 CoveredFiles.insert(x: LineInfo->FileName);
572 auto InliningInfo =
573 Symbolizer->symbolizeInlinedCode(ModuleName: ObjectFile, ModuleOffset: ModuleAddress);
574 failIfError(E&: InliningInfo);
575 for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) {
576 auto FrameInfo = InliningInfo->getFrame(Index: I);
577 CoveredFiles.insert(x: FrameInfo.FileName);
578 }
579 }
580 }
581
582 for (auto Addr : Addrs) {
583 std::set<DILineInfo> Infos; // deduplicate debug info.
584
585 // TODO: it would be neccessary to set proper section index here.
586 // object::SectionedAddress::UndefSection works for only absolute addresses.
587 object::SectionedAddress ModuleAddress = {
588 .Address: Addr, .SectionIndex: object::SectionedAddress::UndefSection};
589
590 auto LineInfo = Symbolizer->symbolizeCode(ModuleName: ObjectFile, ModuleOffset: ModuleAddress);
591 failIfError(E&: LineInfo);
592 if (ClSkipDeadFiles &&
593 CoveredFiles.find(x: LineInfo->FileName) == CoveredFiles.end())
594 continue;
595 LineInfo->FileName = normalizeFilename(FileName: LineInfo->FileName);
596 if (Ig.isIgnorelisted(I: *LineInfo))
597 continue;
598
599 auto Id = utohexstr(X: Addr, LowerCase: true);
600 auto Point = CoveragePoint(Id);
601 Infos.insert(x: *LineInfo);
602 Point.Locs.push_back(Elt: *LineInfo);
603
604 auto InliningInfo =
605 Symbolizer->symbolizeInlinedCode(ModuleName: ObjectFile, ModuleOffset: ModuleAddress);
606 failIfError(E&: InliningInfo);
607 for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) {
608 auto FrameInfo = InliningInfo->getFrame(Index: I);
609 if (ClSkipDeadFiles &&
610 CoveredFiles.find(x: FrameInfo.FileName) == CoveredFiles.end())
611 continue;
612 FrameInfo.FileName = normalizeFilename(FileName: FrameInfo.FileName);
613 if (Ig.isIgnorelisted(I: FrameInfo))
614 continue;
615 if (Infos.insert(x: FrameInfo).second)
616 Point.Locs.push_back(Elt: FrameInfo);
617 }
618
619 Result.push_back(x: Point);
620 }
621
622 return Result;
623}
624
625static bool isCoveragePointSymbol(StringRef Name) {
626 return Name == "__sanitizer_cov" || Name == "__sanitizer_cov_with_check" ||
627 Name == "__sanitizer_cov_trace_func_enter" ||
628 Name == "__sanitizer_cov_trace_pc_guard" ||
629 // Mac has '___' prefix
630 Name == "___sanitizer_cov" || Name == "___sanitizer_cov_with_check" ||
631 Name == "___sanitizer_cov_trace_func_enter" ||
632 Name == "___sanitizer_cov_trace_pc_guard" ||
633 // Large Aarch64 binaries use thunks
634 Name == "__AArch64ADRPThunk___sanitizer_cov" ||
635 Name == "__AArch64ADRPThunk___sanitizer_cov_with_check" ||
636 Name == "__AArch64ADRPThunk___sanitizer_cov_trace_func_enter" ||
637 Name == "__AArch64ADRPThunk___sanitizer_cov_trace_pc_guard";
638}
639
640// Locate __sanitizer_cov* function addresses inside the stubs table on MachO.
641static void findMachOIndirectCovFunctions(const object::MachOObjectFile &O,
642 std::set<uint64_t> *Result) {
643 MachO::dysymtab_command Dysymtab = O.getDysymtabLoadCommand();
644 MachO::symtab_command Symtab = O.getSymtabLoadCommand();
645
646 for (const auto &Load : O.load_commands()) {
647 if (Load.C.cmd == MachO::LC_SEGMENT_64) {
648 MachO::segment_command_64 Seg = O.getSegment64LoadCommand(L: Load);
649 for (unsigned J = 0; J < Seg.nsects; ++J) {
650 MachO::section_64 Sec = O.getSection64(L: Load, Index: J);
651
652 uint32_t SectionType = Sec.flags & MachO::SECTION_TYPE;
653 if (SectionType == MachO::S_SYMBOL_STUBS) {
654 uint32_t Stride = Sec.reserved2;
655 uint32_t Cnt = Sec.size / Stride;
656 uint32_t N = Sec.reserved1;
657 for (uint32_t J = 0; J < Cnt && N + J < Dysymtab.nindirectsyms; J++) {
658 uint32_t IndirectSymbol =
659 O.getIndirectSymbolTableEntry(DLC: Dysymtab, Index: N + J);
660 uint64_t Addr = Sec.addr + J * Stride;
661 if (IndirectSymbol < Symtab.nsyms) {
662 object::SymbolRef Symbol = *(O.getSymbolByIndex(Index: IndirectSymbol));
663 Expected<StringRef> Name = Symbol.getName();
664 failIfError(E&: Name);
665 if (isCoveragePointSymbol(Name: Name.get())) {
666 Result->insert(x: Addr);
667 }
668 }
669 }
670 }
671 }
672 }
673 if (Load.C.cmd == MachO::LC_SEGMENT) {
674 errs() << "ERROR: 32 bit MachO binaries not supported\n";
675 }
676 }
677}
678
679// Locate __sanitizer_cov* function addresses that are used for coverage
680// reporting.
681static std::set<uint64_t>
682findSanitizerCovFunctions(const object::ObjectFile &O) {
683 std::set<uint64_t> Result;
684
685 for (const object::SymbolRef &Symbol : O.symbols()) {
686 Expected<uint64_t> AddressOrErr = Symbol.getAddress();
687 failIfError(E&: AddressOrErr);
688 uint64_t Address = AddressOrErr.get();
689
690 Expected<StringRef> NameOrErr = Symbol.getName();
691 failIfError(E&: NameOrErr);
692 StringRef Name = NameOrErr.get();
693
694 Expected<uint32_t> FlagsOrErr = Symbol.getFlags();
695 // TODO: Test this error.
696 failIfError(E&: FlagsOrErr);
697 uint32_t Flags = FlagsOrErr.get();
698
699 // XCOFF uses "." prefix for function entry point symbols.
700 StringRef EffectiveName =
701 (isa<object::XCOFFObjectFile>(Val: &O) && Name.starts_with(Prefix: "."))
702 ? Name.drop_front(N: 1)
703 : Name;
704 if (!(Flags & object::BasicSymbolRef::SF_Undefined) &&
705 isCoveragePointSymbol(Name: EffectiveName)) {
706 Result.insert(x: Address);
707 }
708 }
709
710 if (const auto *CO = dyn_cast<object::COFFObjectFile>(Val: &O)) {
711 for (const object::ExportDirectoryEntryRef &Export :
712 CO->export_directories()) {
713 uint32_t RVA;
714 failIfError(Err: Export.getExportRVA(Result&: RVA));
715
716 StringRef Name;
717 failIfError(Err: Export.getSymbolName(Result&: Name));
718
719 if (isCoveragePointSymbol(Name))
720 Result.insert(x: CO->getImageBase() + RVA);
721 }
722 }
723
724 if (const auto *MO = dyn_cast<object::MachOObjectFile>(Val: &O)) {
725 findMachOIndirectCovFunctions(O: *MO, Result: &Result);
726 }
727
728 return Result;
729}
730
731// Ported from
732// compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h:GetPreviousInstructionPc
733// GetPreviousInstructionPc.
734static uint64_t getPreviousInstructionPc(uint64_t PC, Triple TheTriple) {
735 if (TheTriple.isARM())
736 return (PC - 3) & (~1);
737 if (TheTriple.isMIPS() || TheTriple.isSPARC())
738 return PC - 8;
739 if (TheTriple.isRISCV())
740 return PC - 2;
741 if (TheTriple.isX86() || TheTriple.isSystemZ())
742 return PC - 1;
743 return PC - 4;
744}
745
746// Locate addresses of all coverage points in a file. Coverage point
747// is defined as the 'address of instruction following __sanitizer_cov
748// call - 1'.
749static void getObjectCoveragePoints(const object::ObjectFile &O,
750 std::set<uint64_t> *Addrs) {
751 Triple TheTriple("unknown-unknown-unknown");
752 TheTriple.setArch(Kind: Triple::ArchType(O.getArch()));
753 auto TripleName = TheTriple.getTriple();
754
755 std::string Error;
756 const Target *TheTarget = TargetRegistry::lookupTarget(TheTriple, Error);
757 failIfNotEmpty(E: Error);
758
759 std::unique_ptr<const MCSubtargetInfo> STI(
760 TheTarget->createMCSubtargetInfo(TheTriple, CPU: "", Features: ""));
761 failIfEmpty(Ptr: STI, Message: "no subtarget info for target " + TripleName);
762
763 std::unique_ptr<const MCRegisterInfo> MRI(
764 TheTarget->createMCRegInfo(TT: TheTriple));
765 failIfEmpty(Ptr: MRI, Message: "no register info for target " + TripleName);
766
767 MCTargetOptions MCOptions;
768 std::unique_ptr<const MCAsmInfo> AsmInfo(
769 TheTarget->createMCAsmInfo(MRI: *MRI, TheTriple, Options: MCOptions));
770 failIfEmpty(Ptr: AsmInfo, Message: "no asm info for target " + TripleName);
771
772 MCContext Ctx(TheTriple, AsmInfo.get(), MRI.get(), STI.get());
773 std::unique_ptr<MCDisassembler> DisAsm(
774 TheTarget->createMCDisassembler(STI: *STI, Ctx));
775 failIfEmpty(Ptr: DisAsm, Message: "no disassembler info for target " + TripleName);
776
777 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
778 failIfEmpty(Ptr: MII, Message: "no instruction info for target " + TripleName);
779
780 std::unique_ptr<MCInstrAnalysis> MIA(
781 TheTarget->createMCInstrAnalysis(Info: MII.get()));
782 failIfEmpty(Ptr: MIA, Message: "no instruction analysis info for target " + TripleName);
783
784 auto SanCovAddrs = findSanitizerCovFunctions(O);
785 if (SanCovAddrs.empty())
786 fail(E: "__sanitizer_cov* functions not found");
787
788 for (object::SectionRef Section : O.sections()) {
789 if (Section.isVirtual() || !Section.isText()) // llvm-objdump does the same.
790 continue;
791 uint64_t SectionAddr = Section.getAddress();
792 uint64_t SectSize = Section.getSize();
793 if (!SectSize)
794 continue;
795
796 Expected<StringRef> BytesStr = Section.getContents();
797 failIfError(E&: BytesStr);
798 ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(Input: *BytesStr);
799
800 if (MIA)
801 MIA->resetState();
802
803 for (uint64_t Index = 0, Size = 0; Index < Section.getSize();
804 Index += Size) {
805 MCInst Inst;
806 ArrayRef<uint8_t> ThisBytes = Bytes.slice(N: Index);
807 uint64_t ThisAddr = SectionAddr + Index;
808 if (!DisAsm->getInstruction(Instr&: Inst, Size, Bytes: ThisBytes, Address: ThisAddr, CStream&: nulls())) {
809 if (Size == 0)
810 Size = std::min<uint64_t>(
811 a: ThisBytes.size(),
812 b: DisAsm->suggestBytesToSkip(Bytes: ThisBytes, Address: ThisAddr));
813 MIA->resetState();
814 continue;
815 }
816 uint64_t Addr = Index + SectionAddr;
817 // Sanitizer coverage uses the address of the next instruction - 1.
818 uint64_t CovPoint = getPreviousInstructionPc(PC: Addr + Size, TheTriple);
819 uint64_t Target;
820 if (MIA->isCall(Inst) &&
821 MIA->evaluateBranch(Inst, Addr: SectionAddr + Index, Size, Target) &&
822 SanCovAddrs.find(x: Target) != SanCovAddrs.end())
823 Addrs->insert(x: CovPoint);
824 MIA->updateState(Inst, STI: STI.get(), Addr);
825 }
826 }
827}
828
829static void
830visitObjectFiles(const object::Archive &A,
831 function_ref<void(const object::ObjectFile &)> Fn) {
832 Error Err = Error::success();
833 for (auto &C : A.children(Err)) {
834 Expected<std::unique_ptr<object::Binary>> ChildOrErr = C.getAsBinary();
835 failIfError(E&: ChildOrErr);
836 if (auto *O = dyn_cast<object::ObjectFile>(Val: &*ChildOrErr.get()))
837 Fn(*O);
838 else
839 failIfError(Error: object::object_error::invalid_file_type);
840 }
841 failIfError(Err: std::move(Err));
842}
843
844static void
845visitObjectFiles(const std::string &FileName,
846 function_ref<void(const object::ObjectFile &)> Fn) {
847 Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
848 object::createBinary(Path: FileName);
849 if (!BinaryOrErr)
850 failIfError(E&: BinaryOrErr);
851
852 object::Binary &Binary = *BinaryOrErr.get().getBinary();
853 if (object::Archive *A = dyn_cast<object::Archive>(Val: &Binary))
854 visitObjectFiles(A: *A, Fn);
855 else if (object::ObjectFile *O = dyn_cast<object::ObjectFile>(Val: &Binary))
856 Fn(*O);
857 else
858 failIfError(Error: object::object_error::invalid_file_type);
859}
860
861static std::set<uint64_t>
862findSanitizerCovFunctions(const std::string &FileName) {
863 std::set<uint64_t> Result;
864 visitObjectFiles(FileName, Fn: [&](const object::ObjectFile &O) {
865 auto Addrs = findSanitizerCovFunctions(O);
866 Result.insert(first: Addrs.begin(), last: Addrs.end());
867 });
868 return Result;
869}
870
871// Locate addresses of all coverage points in a file. Coverage point
872// is defined as the 'address of instruction following __sanitizer_cov
873// call - 1'.
874static std::set<uint64_t> findCoveragePointAddrs(const std::string &FileName) {
875 std::set<uint64_t> Result;
876 visitObjectFiles(FileName, Fn: [&](const object::ObjectFile &O) {
877 getObjectCoveragePoints(O, Addrs: &Result);
878 });
879 return Result;
880}
881
882static void printCovPoints(const std::string &ObjFile, raw_ostream &OS) {
883 for (uint64_t Addr : findCoveragePointAddrs(FileName: ObjFile)) {
884 OS << "0x";
885 OS.write_hex(N: Addr);
886 OS << "\n";
887 }
888}
889
890static ErrorOr<bool> isCoverageFile(const std::string &FileName) {
891 auto ShortFileName = llvm::sys::path::filename(path: FileName);
892 if (!SancovFileRegex.match(String: ShortFileName))
893 return false;
894
895 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
896 MemoryBuffer::getFile(Filename: FileName);
897 if (!BufOrErr) {
898 errs() << "Warning: " << BufOrErr.getError().message() << "("
899 << BufOrErr.getError().value()
900 << "), filename: " << llvm::sys::path::filename(path: FileName) << "\n";
901 return BufOrErr.getError();
902 }
903 std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
904 if (Buf->getBufferSize() < 8) {
905 return false;
906 }
907 const FileHeader *Header =
908 reinterpret_cast<const FileHeader *>(Buf->getBufferStart());
909 return Header->Magic == BinCoverageMagic;
910}
911
912static bool isSymbolizedCoverageFile(const std::string &FileName) {
913 auto ShortFileName = llvm::sys::path::filename(path: FileName);
914 return SymcovFileRegex.match(String: ShortFileName);
915}
916
917static std::unique_ptr<SymbolizedCoverage>
918symbolize(const RawCoverage &Data, const std::string ObjectFile) {
919 auto Coverage = std::make_unique<SymbolizedCoverage>();
920
921 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
922 MemoryBuffer::getFile(Filename: ObjectFile);
923 failIfError(E: BufOrErr);
924 SHA1 Hasher;
925 Hasher.update(Str: (*BufOrErr)->getBuffer());
926 Coverage->BinaryHash = toHex(Input: Hasher.final());
927
928 Ignorelists Ig;
929 auto Symbolizer(createSymbolizer());
930
931 for (uint64_t Addr : *Data.Addrs) {
932 // TODO: it would be neccessary to set proper section index here.
933 // object::SectionedAddress::UndefSection works for only absolute addresses.
934 auto LineInfo = Symbolizer->symbolizeCode(
935 ModuleName: ObjectFile, ModuleOffset: {.Address: Addr, .SectionIndex: object::SectionedAddress::UndefSection});
936 failIfError(E&: LineInfo);
937 if (Ig.isIgnorelisted(I: *LineInfo))
938 continue;
939
940 Coverage->CoveredIds.insert(x: utohexstr(X: Addr, LowerCase: true));
941 }
942
943 std::set<uint64_t> AllAddrs = findCoveragePointAddrs(FileName: ObjectFile);
944 if (!llvm::includes(Range1&: AllAddrs, Range2&: *Data.Addrs)) {
945 fail(E: "Coverage points in binary and .sancov file do not match.");
946 }
947 Coverage->Points = getCoveragePoints(ObjectFile, Addrs: AllAddrs, CoveredAddrs: *Data.Addrs);
948 return Coverage;
949}
950
951struct FileFn {
952 bool operator<(const FileFn &RHS) const {
953 return std::tie(args: FileName, args: FunctionName) <
954 std::tie(args: RHS.FileName, args: RHS.FunctionName);
955 }
956
957 std::string FileName;
958 std::string FunctionName;
959};
960
961static std::set<FileFn>
962computeFunctions(const std::vector<CoveragePoint> &Points) {
963 std::set<FileFn> Fns;
964 for (const auto &Point : Points) {
965 for (const auto &Loc : Point.Locs) {
966 Fns.insert(x: FileFn{.FileName: Loc.FileName, .FunctionName: Loc.FunctionName});
967 }
968 }
969 return Fns;
970}
971
972static std::set<FileFn>
973computeNotCoveredFunctions(const SymbolizedCoverage &Coverage) {
974 auto Fns = computeFunctions(Points: Coverage.Points);
975
976 for (const auto &Point : Coverage.Points) {
977 if (Coverage.CoveredIds.find(x: Point.Id) == Coverage.CoveredIds.end())
978 continue;
979
980 for (const auto &Loc : Point.Locs) {
981 Fns.erase(x: FileFn{.FileName: Loc.FileName, .FunctionName: Loc.FunctionName});
982 }
983 }
984
985 return Fns;
986}
987
988static std::set<FileFn>
989computeCoveredFunctions(const SymbolizedCoverage &Coverage) {
990 auto AllFns = computeFunctions(Points: Coverage.Points);
991 std::set<FileFn> Result;
992
993 for (const auto &Point : Coverage.Points) {
994 if (Coverage.CoveredIds.find(x: Point.Id) == Coverage.CoveredIds.end())
995 continue;
996
997 for (const auto &Loc : Point.Locs) {
998 Result.insert(x: FileFn{.FileName: Loc.FileName, .FunctionName: Loc.FunctionName});
999 }
1000 }
1001
1002 return Result;
1003}
1004
1005typedef std::map<FileFn, std::pair<uint32_t, uint32_t>> FunctionLocs;
1006// finds first location in a file for each function.
1007static FunctionLocs resolveFunctions(const SymbolizedCoverage &Coverage,
1008 const std::set<FileFn> &Fns) {
1009 FunctionLocs Result;
1010 for (const auto &Point : Coverage.Points) {
1011 for (const auto &Loc : Point.Locs) {
1012 FileFn Fn = FileFn{.FileName: Loc.FileName, .FunctionName: Loc.FunctionName};
1013 if (Fns.find(x: Fn) == Fns.end())
1014 continue;
1015
1016 auto P = std::make_pair(x: Loc.Line, y: Loc.Column);
1017 auto [It, Inserted] = Result.try_emplace(k: Fn, args&: P);
1018 if (!Inserted && It->second > P)
1019 It->second = P;
1020 }
1021 }
1022 return Result;
1023}
1024
1025static void printFunctionLocs(const FunctionLocs &FnLocs, raw_ostream &OS) {
1026 for (const auto &P : FnLocs) {
1027 OS << stripPathPrefix(Path: P.first.FileName) << ":" << P.second.first << " "
1028 << P.first.FunctionName << "\n";
1029 }
1030}
1031CoverageStats computeStats(const SymbolizedCoverage &Coverage) {
1032 CoverageStats Stats = {.AllPoints: Coverage.Points.size(), .CovPoints: Coverage.CoveredIds.size(),
1033 .AllFns: computeFunctions(Points: Coverage.Points).size(),
1034 .CovFns: computeCoveredFunctions(Coverage).size()};
1035 return Stats;
1036}
1037
1038// Print list of covered functions.
1039// Line format: <file_name>:<line> <function_name>
1040static void printCoveredFunctions(const SymbolizedCoverage &CovData,
1041 raw_ostream &OS) {
1042 auto CoveredFns = computeCoveredFunctions(Coverage: CovData);
1043 printFunctionLocs(FnLocs: resolveFunctions(Coverage: CovData, Fns: CoveredFns), OS);
1044}
1045
1046// Print list of not covered functions.
1047// Line format: <file_name>:<line> <function_name>
1048static void printNotCoveredFunctions(const SymbolizedCoverage &CovData,
1049 raw_ostream &OS) {
1050 auto NotCoveredFns = computeNotCoveredFunctions(Coverage: CovData);
1051 printFunctionLocs(FnLocs: resolveFunctions(Coverage: CovData, Fns: NotCoveredFns), OS);
1052}
1053
1054// Read list of files and merges their coverage info.
1055static void readAndPrintRawCoverage(const std::vector<std::string> &FileNames,
1056 raw_ostream &OS) {
1057 for (const auto &FileName : FileNames) {
1058 auto Cov = RawCoverage::read(FileName);
1059 if (!Cov)
1060 continue;
1061 OS << *Cov.get();
1062 }
1063}
1064
1065static const char *bitnessToString(uint32_t Bitness) {
1066 switch (Bitness) {
1067 case Bitness64:
1068 return "64-bit";
1069 case Bitness32:
1070 return "32-bit";
1071 default:
1072 fail(E: "Unsupported bitness: " + std::to_string(val: Bitness));
1073 return nullptr;
1074 }
1075}
1076
1077// Warn if two file headers have different bitness.
1078static void warnIfDifferentBitness(const FileHeader &Header1,
1079 const FileHeader &Header2,
1080 const std::string &File1Desc,
1081 const std::string &File2Desc) {
1082 if (Header1.Bitness != Header2.Bitness) {
1083 errs() << "WARNING: Input files have different bitness (" << File1Desc
1084 << ": " << bitnessToString(Bitness: Header1.Bitness) << ", " << File2Desc
1085 << ": " << bitnessToString(Bitness: Header2.Bitness)
1086 << "). Using bitness from " << File1Desc << ".\n";
1087
1088 if (Header1.Bitness == Bitness32 && Header2.Bitness == Bitness64) {
1089 errs() << "WARNING: 64-bit addresses will be truncated to 32 bits. "
1090 << "This may result in data loss.\n";
1091 }
1092 }
1093}
1094
1095// Compute difference between two coverage files (A - B) and write to output
1096// file.
1097static void diffRawCoverage(const std::string &FileA, const std::string &FileB,
1098 const std::string &OutputFile) {
1099 auto CovA = RawCoverage::read(FileName: FileA);
1100 failIfError(E: CovA);
1101
1102 auto CovB = RawCoverage::read(FileName: FileB);
1103 failIfError(E: CovB);
1104
1105 const FileHeader &HeaderA = CovA.get()->Header;
1106 const FileHeader &HeaderB = CovB.get()->Header;
1107
1108 warnIfDifferentBitness(Header1: HeaderA, Header2: HeaderB, File1Desc: FileA, File2Desc: FileB);
1109
1110 // Compute A - B
1111 auto DiffAddrs = std::make_unique<std::set<uint64_t>>();
1112 std::set_difference(first1: CovA.get()->Addrs->begin(), last1: CovA.get()->Addrs->end(),
1113 first2: CovB.get()->Addrs->begin(), last2: CovB.get()->Addrs->end(),
1114 result: std::inserter(x&: *DiffAddrs, i: DiffAddrs->end()));
1115
1116 RawCoverage DiffCov(std::move(DiffAddrs), HeaderA);
1117 RawCoverage::write(FileName: OutputFile, Coverage: DiffCov);
1118}
1119
1120// Compute union of multiple coverage files and write to output file.
1121static void unionRawCoverage(const std::vector<std::string> &InputFiles,
1122 const std::string &OutputFile) {
1123 failIf(B: InputFiles.empty(), E: "union action requires at least one input file");
1124
1125 // Read the first file to get the header and initial coverage
1126 auto UnionCov = RawCoverage::read(FileName: InputFiles[0]);
1127 failIfError(E: UnionCov);
1128
1129 const FileHeader &UnionHeader = UnionCov.get()->Header;
1130
1131 for (size_t I = 1; I < InputFiles.size(); ++I) {
1132 auto Cov = RawCoverage::read(FileName: InputFiles[I]);
1133 failIfError(E: Cov);
1134
1135 const FileHeader &CurHeader = Cov.get()->Header;
1136
1137 warnIfDifferentBitness(Header1: UnionHeader, Header2: CurHeader, File1Desc: InputFiles[0],
1138 File2Desc: InputFiles[I]);
1139
1140 UnionCov.get()->Addrs->insert(first: Cov.get()->Addrs->begin(),
1141 last: Cov.get()->Addrs->end());
1142 }
1143
1144 RawCoverage::write(FileName: OutputFile, Coverage: *UnionCov.get());
1145}
1146
1147static std::unique_ptr<SymbolizedCoverage>
1148merge(const std::vector<std::unique_ptr<SymbolizedCoverage>> &Coverages) {
1149 if (Coverages.empty())
1150 return nullptr;
1151
1152 auto Result = std::make_unique<SymbolizedCoverage>();
1153
1154 for (size_t I = 0; I < Coverages.size(); ++I) {
1155 const SymbolizedCoverage &Coverage = *Coverages[I];
1156 std::string Prefix;
1157 if (Coverages.size() > 1) {
1158 // prefix is not needed when there's only one file.
1159 Prefix = utostr(X: I);
1160 }
1161
1162 for (const auto &Id : Coverage.CoveredIds) {
1163 Result->CoveredIds.insert(x: Prefix + Id);
1164 }
1165
1166 for (const auto &CovPoint : Coverage.Points) {
1167 CoveragePoint NewPoint(CovPoint);
1168 NewPoint.Id = Prefix + CovPoint.Id;
1169 Result->Points.push_back(x: NewPoint);
1170 }
1171 }
1172
1173 if (Coverages.size() == 1) {
1174 Result->BinaryHash = Coverages[0]->BinaryHash;
1175 }
1176
1177 return Result;
1178}
1179
1180static std::unique_ptr<SymbolizedCoverage>
1181readSymbolizeAndMergeCmdArguments(std::vector<std::string> FileNames) {
1182 std::vector<std::unique_ptr<SymbolizedCoverage>> Coverages;
1183
1184 {
1185 // Short name => file name.
1186 std::map<std::string, std::string, std::less<>> ObjFiles;
1187 std::string FirstObjFile;
1188 std::set<std::string> CovFiles;
1189
1190 // Partition input values into coverage/object files.
1191 for (const auto &FileName : FileNames) {
1192 if (isSymbolizedCoverageFile(FileName)) {
1193 Coverages.push_back(x: SymbolizedCoverage::read(InputFile: FileName));
1194 }
1195
1196 auto ErrorOrIsCoverage = isCoverageFile(FileName);
1197 if (!ErrorOrIsCoverage)
1198 continue;
1199 if (ErrorOrIsCoverage.get()) {
1200 CovFiles.insert(x: FileName);
1201 } else {
1202 auto ShortFileName = llvm::sys::path::filename(path: FileName);
1203 if (ObjFiles.find(x: ShortFileName) != ObjFiles.end()) {
1204 fail(E: "Duplicate binary file with a short name: " + ShortFileName);
1205 }
1206
1207 ObjFiles[std::string(ShortFileName)] = FileName;
1208 if (FirstObjFile.empty())
1209 FirstObjFile = FileName;
1210 }
1211 }
1212
1213 SmallVector<StringRef, 2> Components;
1214
1215 // Object file => list of corresponding coverage file names.
1216 std::map<std::string, std::vector<std::string>> CoverageByObjFile;
1217 for (const auto &FileName : CovFiles) {
1218 auto ShortFileName = llvm::sys::path::filename(path: FileName);
1219 auto Ok = SancovFileRegex.match(String: ShortFileName, Matches: &Components);
1220 if (!Ok) {
1221 fail(E: "Can't match coverage file name against "
1222 "<module_name>.<pid>.sancov pattern: " +
1223 FileName);
1224 }
1225
1226 auto Iter = ObjFiles.find(x: Components[1]);
1227 if (Iter == ObjFiles.end()) {
1228 fail(E: "Object file for coverage not found: " + FileName);
1229 }
1230
1231 CoverageByObjFile[Iter->second].push_back(x: FileName);
1232 };
1233
1234 for (const auto &Pair : ObjFiles) {
1235 auto FileName = Pair.second;
1236 if (CoverageByObjFile.find(x: FileName) == CoverageByObjFile.end())
1237 errs() << "WARNING: No coverage file for " << FileName << "\n";
1238 }
1239
1240 // Read raw coverage and symbolize it.
1241 for (const auto &Pair : CoverageByObjFile) {
1242 if (findSanitizerCovFunctions(FileName: Pair.first).empty()) {
1243 errs()
1244 << "WARNING: Ignoring " << Pair.first
1245 << " and its coverage because __sanitizer_cov* functions were not "
1246 "found.\n";
1247 continue;
1248 }
1249
1250 for (const std::string &CoverageFile : Pair.second) {
1251 auto DataOrError = RawCoverage::read(FileName: CoverageFile);
1252 failIfError(E: DataOrError);
1253 Coverages.push_back(x: symbolize(Data: *DataOrError.get(), ObjectFile: Pair.first));
1254 }
1255 }
1256 }
1257
1258 return merge(Coverages);
1259}
1260
1261} // namespace
1262
1263static void parseArgs(int Argc, char **Argv) {
1264 SancovOptTable Tbl;
1265 llvm::BumpPtrAllocator A;
1266 llvm::StringSaver Saver{A};
1267 opt::InputArgList Args =
1268 Tbl.parseArgs(Argc, Argv, Unknown: OPT_UNKNOWN, Saver, ErrorFn: [&](StringRef Msg) {
1269 llvm::errs() << Msg << '\n';
1270 std::exit(status: 1);
1271 });
1272
1273 if (Args.hasArg(Ids: OPT_help)) {
1274 Tbl.printHelp(
1275 OS&: llvm::outs(),
1276 Usage: "sancov [options] <action> <binary files...> <.sancov files...> "
1277 "<.symcov files...>",
1278 Title: "Sanitizer Coverage Processing Tool (sancov)\n\n"
1279 " This tool can extract various coverage-related information from: \n"
1280 " coverage-instrumented binary files, raw .sancov files and their "
1281 "symbolized .symcov version.\n"
1282 " Depending on chosen action the tool expects different input files:\n"
1283 " -print-coverage-pcs - coverage-instrumented binary files\n"
1284 " -print-coverage - .sancov files\n"
1285 " -diff - two .sancov files & --output option\n"
1286 " -union - one or more .sancov files & --output "
1287 "option\n"
1288 " <other actions> - .sancov files & corresponding binary "
1289 "files, .symcov files\n");
1290 std::exit(status: 0);
1291 }
1292
1293 if (Args.hasArg(Ids: OPT_version)) {
1294 cl::PrintVersionMessage();
1295 std::exit(status: 0);
1296 }
1297
1298 if (Args.hasMultipleArgs(Id: OPT_action_grp)) {
1299 fail(E: "Only one action option is allowed");
1300 }
1301
1302 for (const opt::Arg *A : Args.filtered(Ids: OPT_INPUT)) {
1303 ClInputFiles.emplace_back(args: A->getValue());
1304 }
1305
1306 if (const llvm::opt::Arg *A = Args.getLastArg(Ids: OPT_action_grp)) {
1307 switch (A->getOption().getID()) {
1308 case OPT_print:
1309 Action = ActionType::PrintAction;
1310 break;
1311 case OPT_diff:
1312 Action = ActionType::DiffAction;
1313 break;
1314 case OPT_union_files:
1315 Action = ActionType::UnionAction;
1316 break;
1317 case OPT_printCoveragePcs:
1318 Action = ActionType::PrintCovPointsAction;
1319 break;
1320 case OPT_coveredFunctions:
1321 Action = ActionType::CoveredFunctionsAction;
1322 break;
1323 case OPT_notCoveredFunctions:
1324 Action = ActionType::NotCoveredFunctionsAction;
1325 break;
1326 case OPT_printCoverageStats:
1327 Action = ActionType::StatsAction;
1328 break;
1329 case OPT_htmlReport:
1330 Action = ActionType::HtmlReportAction;
1331 break;
1332 case OPT_symbolize:
1333 Action = ActionType::SymbolizeAction;
1334 break;
1335 case OPT_merge:
1336 Action = ActionType::MergeAction;
1337 break;
1338 default:
1339 fail(E: "Invalid Action");
1340 }
1341 }
1342
1343 ClDemangle = Args.hasFlag(Pos: OPT_demangle, Neg: OPT_no_demangle, Default: true);
1344 ClSkipDeadFiles = Args.hasFlag(Pos: OPT_skipDeadFiles, Neg: OPT_no_skipDeadFiles, Default: true);
1345 ClUseDefaultIgnorelist =
1346 Args.hasFlag(Pos: OPT_useDefaultIgnoreList, Neg: OPT_no_useDefaultIgnoreList, Default: true);
1347
1348 ClStripPathPrefix = Args.getLastArgValue(Id: OPT_stripPathPrefix_EQ);
1349 ClIgnorelist = Args.getLastArgValue(Id: OPT_ignorelist_EQ);
1350 ClOutputFile = Args.getLastArgValue(Id: OPT_output_EQ);
1351}
1352
1353int sancov_main(int Argc, char **Argv, const llvm::ToolContext &) {
1354 llvm::InitializeAllTargetInfos();
1355 llvm::InitializeAllTargetMCs();
1356 llvm::InitializeAllDisassemblers();
1357
1358 parseArgs(Argc, Argv);
1359
1360 // -print doesn't need object files.
1361 if (Action == PrintAction) {
1362 readAndPrintRawCoverage(FileNames: ClInputFiles, OS&: outs());
1363 return 0;
1364 }
1365 if (Action == DiffAction) {
1366 // -diff requires exactly 2 input files and an output file.
1367 failIf(B: ClInputFiles.size() != 2,
1368 E: "diff action requires exactly 2 input sancov files");
1369 failIf(
1370 B: ClOutputFile.empty(),
1371 E: "diff action requires --output option to specify output sancov file");
1372 diffRawCoverage(FileA: ClInputFiles[0], FileB: ClInputFiles[1], OutputFile: ClOutputFile);
1373 return 0;
1374 }
1375 if (Action == UnionAction) {
1376 // -union requires at least 1 input file and an output file.
1377 failIf(B: ClInputFiles.empty(),
1378 E: "union action requires at least one input sancov file");
1379 failIf(
1380 B: ClOutputFile.empty(),
1381 E: "union action requires --output option to specify output sancov file");
1382 unionRawCoverage(InputFiles: ClInputFiles, OutputFile: ClOutputFile);
1383 return 0;
1384 }
1385 if (Action == PrintCovPointsAction) {
1386 // -print-coverage-points doesn't need coverage files.
1387 for (const std::string &ObjFile : ClInputFiles) {
1388 printCovPoints(ObjFile, OS&: outs());
1389 }
1390 return 0;
1391 }
1392
1393 auto Coverage = readSymbolizeAndMergeCmdArguments(FileNames: ClInputFiles);
1394 failIf(B: !Coverage, E: "No valid coverage files given.");
1395
1396 switch (Action) {
1397 case CoveredFunctionsAction: {
1398 printCoveredFunctions(CovData: *Coverage, OS&: outs());
1399 return 0;
1400 }
1401 case NotCoveredFunctionsAction: {
1402 printNotCoveredFunctions(CovData: *Coverage, OS&: outs());
1403 return 0;
1404 }
1405 case StatsAction: {
1406 outs() << computeStats(Coverage: *Coverage);
1407 return 0;
1408 }
1409 case MergeAction:
1410 case SymbolizeAction: { // merge & symbolize are synonims.
1411 json::OStream W(outs(), 2);
1412 W << *Coverage;
1413 return 0;
1414 }
1415 case HtmlReportAction:
1416 errs() << "-html-report option is removed: "
1417 "use -symbolize & coverage-report-server.py instead\n";
1418 return 1;
1419 case DiffAction:
1420 case UnionAction:
1421 case PrintAction:
1422 case PrintCovPointsAction:
1423 llvm_unreachable("unsupported action");
1424 }
1425
1426 return 0;
1427}
1428