1//===- SSAFFormat.cpp - SSAF Format Tool ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SSAF format tool that validates and converts
10// TU and LU summaries between registered serialization formats.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummaryEncoding.h"
15#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/TUSummaryEncoding.h"
16#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h"
17#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h"
18#include "clang/ScalableStaticAnalysisFramework/SSAFForceLinker.h" // IWYU pragma: keep
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/Support/CommandLine.h"
22#include "llvm/Support/DynamicLibrary.h"
23#include "llvm/Support/ErrorHandling.h"
24#include "llvm/Support/FileSystem.h"
25#include "llvm/Support/Format.h"
26#include "llvm/Support/FormatVariadic.h"
27#include "llvm/Support/InitLLVM.h"
28#include "llvm/Support/Path.h"
29#include "llvm/Support/Process.h"
30#include "llvm/Support/WithColor.h"
31#include "llvm/Support/raw_ostream.h"
32#include <memory>
33#include <optional>
34#include <string>
35#include <system_error>
36
37using namespace llvm;
38using namespace clang::ssaf;
39
40namespace {
41
42namespace fs = llvm::sys::fs;
43namespace path = llvm::sys::path;
44
45//===----------------------------------------------------------------------===//
46// Summary Type
47//===----------------------------------------------------------------------===//
48
49enum class SummaryType { TU, LU };
50
51//===----------------------------------------------------------------------===//
52// Command-Line Options
53//===----------------------------------------------------------------------===//
54
55cl::OptionCategory SsafFormatCategory("clang-ssaf-format options");
56
57cl::list<std::string> LoadPlugins("load",
58 cl::desc("Load a plugin shared library"),
59 cl::value_desc("path"),
60 cl::cat(SsafFormatCategory));
61
62// --type and the input file are required for convert/validateInput operations
63// but must be optional at the cl layer so that --list can be used standalone.
64cl::opt<SummaryType> Type(
65 "type", cl::desc("Summary type (required unless --list is given)"),
66 cl::values(clEnumValN(SummaryType::TU, "tu", "Translation unit summary"),
67 clEnumValN(SummaryType::LU, "lu", "Link unit summary")),
68 cl::cat(SsafFormatCategory));
69
70cl::opt<std::string> InputPath(cl::Positional, cl::desc("<input file>"),
71 cl::cat(SsafFormatCategory));
72
73cl::opt<std::string> OutputPath("o", cl::desc("Output summary path"),
74 cl::value_desc("path"),
75 cl::cat(SsafFormatCategory));
76
77cl::opt<bool> UseEncoding("encoding",
78 cl::desc("Read and write summary encodings rather "
79 "than decoded summaries"),
80 cl::cat(SsafFormatCategory));
81
82cl::opt<bool> ListFormats("list",
83 cl::desc("List registered serialization formats and "
84 "analyses, then exit"),
85 cl::init(Val: false), cl::cat(SsafFormatCategory));
86
87llvm::StringRef ToolName;
88
89void printVersion(llvm::raw_ostream &OS) { OS << ToolName << " 0.1\n"; }
90
91//===----------------------------------------------------------------------===//
92// Error Messages
93//===----------------------------------------------------------------------===//
94
95namespace ErrorMessages {
96
97constexpr const char *FailedToLoadPlugin = "failed to load plugin '{0}': {1}";
98
99constexpr const char *CannotValidateSummary =
100 "failed to validate summary '{0}': {1}";
101
102constexpr const char *ExtensionNotSupplied = "Extension not supplied";
103
104constexpr const char *NoFormatForExtension =
105 "Format not registered for extension '{0}'";
106
107constexpr const char *OutputDirectoryMissing =
108 "Parent directory does not exist";
109
110constexpr const char *OutputFileAlreadyExists = "Output file already exists";
111
112constexpr const char *InputOutputSamePath =
113 "Input and Output resolve to the same path";
114
115} // namespace ErrorMessages
116
117//===----------------------------------------------------------------------===//
118// Diagnostic Utilities
119//===----------------------------------------------------------------------===//
120
121[[noreturn]] void fail(const char *Msg) {
122 llvm::WithColor::error(OS&: llvm::errs(), Prefix: ToolName) << Msg << "\n";
123 llvm::sys::Process::Exit(RetCode: 1);
124}
125
126template <typename... Ts>
127[[noreturn]] void fail(const char *Fmt, Ts &&...Args) {
128 std::string Message = llvm::formatv(Fmt, std::forward<Ts>(Args)...);
129 fail(Msg: Message.data());
130}
131
132[[noreturn]] void fail(llvm::Error Err) {
133 fail(Msg: toString(E: std::move(Err)).data());
134}
135
136//===----------------------------------------------------------------------===//
137// Format Registry
138//===----------------------------------------------------------------------===//
139
140// FIXME: This will be revisited after we add support for registering formats
141// with extensions.
142SerializationFormat *getFormatForExtension(llvm::StringRef Extension) {
143 static llvm::SmallVector<
144 std::pair<std::string, std::unique_ptr<SerializationFormat>>, 4>
145 ExtensionFormatList;
146
147 // Most recently used format is most likely to be reused again.
148 auto ReversedList = llvm::reverse(C&: ExtensionFormatList);
149 auto It = llvm::find_if(Range&: ReversedList, P: [&](const auto &Entry) {
150 return Entry.first == Extension;
151 });
152 if (It != ReversedList.end()) {
153 return It->second.get();
154 }
155
156 if (!isFormatRegistered(FormatName: Extension)) {
157 return nullptr;
158 }
159
160 auto Format = makeFormat(FormatName: Extension);
161 SerializationFormat *Result = Format.get();
162 assert(Result);
163
164 ExtensionFormatList.emplace_back(Args&: Extension, Args: std::move(Format));
165
166 return Result;
167}
168
169//===----------------------------------------------------------------------===//
170// Format Listing
171//===----------------------------------------------------------------------===//
172
173constexpr size_t FormatIndent = 4;
174constexpr size_t AnalysisIndent = 4;
175
176struct AnalysisData {
177 std::string Name;
178 std::string Desc;
179};
180
181struct FormatData {
182 std::string Name;
183 std::string Desc;
184 llvm::SmallVector<AnalysisData> Analyses;
185};
186
187struct PrintLayout {
188 size_t FormatNumWidth;
189 size_t MaxFormatNameWidth;
190 size_t FormatNameCol;
191 size_t AnalysisCol;
192 size_t AnalysisNumWidth;
193 size_t MaxAnalysisNameWidth;
194};
195
196llvm::SmallVector<FormatData> collectFormats() {
197 llvm::SmallVector<FormatData> Formats;
198 for (const auto &Entry : SerializationFormatRegistry::entries()) {
199 FormatData FD;
200 FD.Name = Entry.getName().str();
201 FD.Desc = Entry.getDesc().str();
202 auto Format = Entry.instantiate();
203 Format->forEachRegisteredAnalysis(
204 Callback: [&](llvm::StringRef Name, llvm::StringRef Desc) {
205 FD.Analyses.push_back(Elt: {.Name: Name.str(), .Desc: Desc.str()});
206 });
207 Formats.push_back(Elt: std::move(FD));
208 }
209 return Formats;
210}
211
212void printAnalysis(const AnalysisData &AD, size_t AnalysisIndex,
213 size_t FormatIndex, const PrintLayout &Layout) {
214 std::string AnalysisNum = std::to_string(val: FormatIndex + 1) + "." +
215 std::to_string(val: AnalysisIndex + 1) + ".";
216 llvm::outs().indent(NumSpaces: Layout.AnalysisCol)
217 << llvm::right_justify(Str: AnalysisNum, Width: Layout.AnalysisNumWidth) << " "
218 << llvm::left_justify(Str: AD.Name, Width: Layout.MaxAnalysisNameWidth) << " "
219 << AD.Desc << "\n";
220}
221
222void printAnalyses(const llvm::SmallVector<AnalysisData> &Analyses,
223 size_t FormatIndex, const PrintLayout &Layout) {
224 if (Analyses.empty()) {
225 llvm::outs().indent(NumSpaces: Layout.FormatNameCol) << "Analyses: (none)\n";
226 return;
227 }
228
229 llvm::outs().indent(NumSpaces: Layout.FormatNameCol) << "Analyses:\n";
230
231 for (size_t AnalysisIndex = 0; AnalysisIndex < Analyses.size();
232 ++AnalysisIndex) {
233 printAnalysis(AD: Analyses[AnalysisIndex], AnalysisIndex, FormatIndex, Layout);
234 }
235}
236
237void printFormat(const FormatData &FD, size_t FormatIndex,
238 const PrintLayout &Layout) {
239 // Blank line before each format entry for readability.
240 llvm::outs() << "\n";
241
242 std::string FormatNum = std::to_string(val: FormatIndex + 1) + ".";
243 llvm::outs().indent(NumSpaces: FormatIndent)
244 << llvm::right_justify(Str: FormatNum, Width: Layout.FormatNumWidth) << " "
245 << llvm::left_justify(Str: FD.Name, Width: Layout.MaxFormatNameWidth) << " "
246 << FD.Desc << "\n";
247
248 printAnalyses(Analyses: FD.Analyses, FormatIndex, Layout);
249}
250
251void printFormats(const llvm::SmallVector<FormatData> &Formats,
252 const PrintLayout &Layout) {
253 llvm::outs() << "Registered serialization formats:\n";
254 for (size_t FormatIndex = 0; FormatIndex < Formats.size(); ++FormatIndex) {
255 printFormat(FD: Formats[FormatIndex], FormatIndex, Layout);
256 }
257}
258
259PrintLayout computePrintLayout(const llvm::SmallVector<FormatData> &Formats) {
260 size_t MaxFormatNameWidth = 0;
261 size_t MaxAnalysisCount = 0;
262 size_t MaxAnalysisNameWidth = 0;
263 for (const auto &FD : Formats) {
264 MaxFormatNameWidth = std::max(a: MaxFormatNameWidth, b: FD.Name.size());
265 MaxAnalysisCount = std::max(a: MaxAnalysisCount, b: FD.Analyses.size());
266 for (const auto &AD : FD.Analyses) {
267 MaxAnalysisNameWidth = std::max(a: MaxAnalysisNameWidth, b: AD.Name.size());
268 }
269 }
270
271 // Width of the widest format number string, e.g. "10." -> 3.
272 size_t FormatNumWidth =
273 std::to_string(val: Formats.size()).size() + 1; // +1 for '.'
274 // Width of the widest analysis number string, e.g. "10.10." -> 6.
275 size_t AnalysisNumWidth = std::to_string(val: Formats.size()).size() + 1 +
276 std::to_string(val: MaxAnalysisCount).size() + 1;
277
278 // Where the format name starts (also where "Analyses:" is indented to).
279 size_t FormatNameCol = FormatIndent + FormatNumWidth + 1;
280 // Where the analysis number starts.
281 size_t AnalysisCol = FormatNameCol + AnalysisIndent;
282
283 return {
284 .FormatNumWidth: FormatNumWidth, .MaxFormatNameWidth: MaxFormatNameWidth, .FormatNameCol: FormatNameCol,
285 .AnalysisCol: AnalysisCol, .AnalysisNumWidth: AnalysisNumWidth, .MaxAnalysisNameWidth: MaxAnalysisNameWidth,
286 };
287}
288
289void listFormats() {
290 llvm::SmallVector<FormatData> Formats = collectFormats();
291 if (Formats.empty()) {
292 llvm::outs() << "No serialization formats registered.\n";
293 return;
294 }
295 printFormats(Formats, Layout: computePrintLayout(Formats));
296}
297
298//===----------------------------------------------------------------------===//
299// Plugin Loading
300//===----------------------------------------------------------------------===//
301
302void loadPlugins() {
303 for (const auto &PluginPath : LoadPlugins) {
304 std::string ErrMsg;
305 if (llvm::sys::DynamicLibrary::LoadLibraryPermanently(Filename: PluginPath.c_str(),
306 ErrMsg: &ErrMsg)) {
307 fail(Fmt: ErrorMessages::FailedToLoadPlugin, Args: PluginPath, Args&: ErrMsg);
308 }
309 }
310}
311
312//===----------------------------------------------------------------------===//
313// Input Validation
314//===----------------------------------------------------------------------===//
315
316struct SummaryFile {
317 std::string Path;
318 SerializationFormat *Format = nullptr;
319
320 static SummaryFile fromPath(llvm::StringRef Path) {
321 llvm::StringRef Extension = path::extension(path: Path);
322 if (Extension.empty()) {
323 fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: Path,
324 Args: ErrorMessages::ExtensionNotSupplied);
325 }
326
327 Extension = Extension.drop_front();
328 SerializationFormat *Format = getFormatForExtension(Extension);
329 if (!Format) {
330 std::string Msg =
331 llvm::formatv(Fmt: ErrorMessages::NoFormatForExtension, Vals&: Extension);
332 fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: Path, Args&: Msg);
333 }
334
335 return {.Path: Path.str(), .Format: Format};
336 }
337};
338
339struct FormatInput {
340 SummaryFile InputFile;
341 std::optional<SummaryFile> OutputFile;
342};
343
344FormatInput validateInput() {
345 assert(!ListFormats);
346
347 FormatInput FI;
348
349 // Validate Type explicitly since we don't want to specify it if --list is
350 // provided.
351 if (!Type.getNumOccurrences()) {
352 fail(Msg: "'--type' option is required");
353 }
354
355 // Validate the input path.
356 {
357 if (InputPath.empty()) {
358 fail(Msg: "no input file specified");
359 }
360
361 llvm::SmallString<256> RealInputPath;
362 std::error_code EC =
363 fs::real_path(path: InputPath, output&: RealInputPath, /*expand_tilde=*/true);
364 if (EC) {
365 fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: InputPath, Args: EC.message());
366 }
367
368 FI.InputFile = SummaryFile::fromPath(Path: RealInputPath);
369 }
370
371 // Validate the output path.
372 if (!OutputPath.empty()) {
373 llvm::StringRef ParentDir = path::parent_path(path: OutputPath);
374 llvm::StringRef DirToCheck = ParentDir.empty() ? "." : ParentDir;
375
376 if (!fs::exists(Path: DirToCheck)) {
377 fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: OutputPath,
378 Args: ErrorMessages::OutputDirectoryMissing);
379 }
380
381 // Reconstruct the real output path from the real parent directory and the
382 // output filename. The output file does not exist yet so real_path cannot
383 // be called on the full output path directly.
384 llvm::SmallString<256> RealParentDir;
385 if (std::error_code EC = fs::real_path(path: DirToCheck, output&: RealParentDir)) {
386 fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: OutputPath, Args: EC.message());
387 }
388
389 llvm::SmallString<256> RealOutputPath = RealParentDir;
390 path::append(path&: RealOutputPath, a: path::filename(path: OutputPath));
391
392 if (RealOutputPath == FI.InputFile.Path) {
393 fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: OutputPath,
394 Args: ErrorMessages::InputOutputSamePath);
395 }
396
397 if (fs::exists(Path: RealOutputPath)) {
398 fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: OutputPath,
399 Args: ErrorMessages::OutputFileAlreadyExists);
400 }
401
402 FI.OutputFile = SummaryFile::fromPath(Path: RealOutputPath);
403 }
404 return FI;
405}
406
407//===----------------------------------------------------------------------===//
408// Format Conversion
409//===----------------------------------------------------------------------===//
410
411template <typename ReadFn, typename WriteFn>
412void run(const FormatInput &FI, ReadFn Read, WriteFn Write) {
413 auto ExpectedResult = (FI.InputFile.Format->*Read)(FI.InputFile.Path);
414 if (!ExpectedResult) {
415 fail(ExpectedResult.takeError());
416 }
417
418 if (!FI.OutputFile) {
419 return;
420 }
421
422 auto Err =
423 (FI.OutputFile->Format->*Write)(*ExpectedResult, FI.OutputFile->Path);
424 if (Err) {
425 fail(std::move(Err));
426 }
427}
428
429void convert(const FormatInput &FI) {
430 switch (Type) {
431 case SummaryType::TU:
432 if (UseEncoding) {
433 run(FI, Read: &SerializationFormat::readTUSummaryEncoding,
434 Write: &SerializationFormat::writeTUSummaryEncoding);
435 } else {
436 run(FI, Read: &SerializationFormat::readTUSummary,
437 Write: &SerializationFormat::writeTUSummary);
438 }
439 return;
440 case SummaryType::LU:
441 if (UseEncoding) {
442 run(FI, Read: &SerializationFormat::readLUSummaryEncoding,
443 Write: &SerializationFormat::writeLUSummaryEncoding);
444 } else {
445 run(FI, Read: &SerializationFormat::readLUSummary,
446 Write: &SerializationFormat::writeLUSummary);
447 }
448 return;
449 }
450
451 llvm_unreachable("Unhandled SummaryType variant");
452}
453
454} // namespace
455
456//===----------------------------------------------------------------------===//
457// Driver
458//===----------------------------------------------------------------------===//
459
460int main(int argc, const char **argv) {
461 InitLLVM X(argc, argv);
462 // path::stem strips the .exe extension on Windows so ToolName is consistent.
463 ToolName = path::stem(path: argv[0]);
464
465 cl::HideUnrelatedOptions(Category&: SsafFormatCategory);
466 cl::SetVersionPrinter(printVersion);
467 cl::ParseCommandLineOptions(argc, argv, Overview: "SSAF Format\n");
468
469 loadPlugins();
470
471 if (ListFormats) {
472 listFormats();
473 } else {
474 FormatInput FI = validateInput();
475 convert(FI);
476 }
477
478 return 0;
479}
480