1//===- SSAFFormat.cpp - SSAF Format Tool ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SSAF format tool that validates and converts
10// TU and LU summaries between registered serialization formats.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummaryEncoding.h"
15#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/TUSummaryEncoding.h"
16#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h"
17#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h"
18#include "clang/ScalableStaticAnalysisFramework/SSAFForceLinker.h" // IWYU pragma: keep
19#include "clang/ScalableStaticAnalysisFramework/Tool/Utils.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallVector.h"
22#include "llvm/Support/CommandLine.h"
23#include "llvm/Support/ErrorHandling.h"
24#include "llvm/Support/FileSystem.h"
25#include "llvm/Support/Format.h"
26#include "llvm/Support/FormatVariadic.h"
27#include "llvm/Support/InitLLVM.h"
28#include "llvm/Support/Path.h"
29#include "llvm/Support/raw_ostream.h"
30#include <memory>
31#include <optional>
32#include <string>
33#include <system_error>
34
35using namespace llvm;
36using namespace clang::ssaf;
37
38namespace {
39
40namespace fs = llvm::sys::fs;
41namespace path = llvm::sys::path;
42
43//===----------------------------------------------------------------------===//
44// Summary Type
45//===----------------------------------------------------------------------===//
46
47enum class SummaryType { TU, LU };
48
49//===----------------------------------------------------------------------===//
50// Command-Line Options
51//===----------------------------------------------------------------------===//
52
53cl::OptionCategory SsafFormatCategory("clang-ssaf-format options");
54
55cl::list<std::string> LoadPlugins("load",
56 cl::desc("Load a plugin shared library"),
57 cl::value_desc("path"),
58 cl::cat(SsafFormatCategory));
59
60// --type and the input file are required for convert/validateInput operations
61// but must be optional at the cl layer so that --list can be used standalone.
62cl::opt<SummaryType> Type(
63 "type", cl::desc("Summary type (required unless --list is given)"),
64 cl::values(clEnumValN(SummaryType::TU, "tu", "Translation unit summary"),
65 clEnumValN(SummaryType::LU, "lu", "Link unit summary")),
66 cl::cat(SsafFormatCategory));
67
68cl::opt<std::string> InputPath(cl::Positional, cl::desc("<input file>"),
69 cl::cat(SsafFormatCategory));
70
71cl::opt<std::string> OutputPath("o", cl::desc("Output summary path"),
72 cl::value_desc("path"),
73 cl::cat(SsafFormatCategory));
74
75cl::opt<bool> UseEncoding("encoding",
76 cl::desc("Read and write summary encodings rather "
77 "than decoded summaries"),
78 cl::cat(SsafFormatCategory));
79
80cl::opt<bool> ListFormats("list",
81 cl::desc("List registered serialization formats and "
82 "analyses, then exit"),
83 cl::init(Val: false), cl::cat(SsafFormatCategory));
84
85//===----------------------------------------------------------------------===//
86// Error Messages
87//===----------------------------------------------------------------------===//
88
89namespace LocalErrorMessages {
90
91constexpr const char *OutputFileAlreadyExists = "Output file already exists";
92
93constexpr const char *InputOutputSamePath =
94 "Input and Output resolve to the same path";
95
96} // namespace LocalErrorMessages
97
98//===----------------------------------------------------------------------===//
99// Format Listing
100//===----------------------------------------------------------------------===//
101
102constexpr size_t FormatIndent = 4;
103constexpr size_t AnalysisIndent = 4;
104
105struct AnalysisData {
106 std::string Name;
107 std::string Desc;
108};
109
110struct FormatData {
111 std::string Name;
112 std::string Desc;
113 llvm::SmallVector<AnalysisData> Analyses;
114};
115
116struct PrintLayout {
117 size_t FormatNumWidth;
118 size_t MaxFormatNameWidth;
119 size_t FormatNameCol;
120 size_t AnalysisCol;
121 size_t AnalysisNumWidth;
122 size_t MaxAnalysisNameWidth;
123};
124
125llvm::SmallVector<FormatData> collectFormats() {
126 llvm::SmallVector<FormatData> Formats;
127 for (const auto &Entry : SerializationFormatRegistry::entries()) {
128 FormatData FD;
129 FD.Name = Entry.getName().str();
130 FD.Desc = Entry.getDesc().str();
131 auto Format = Entry.instantiate();
132 Format->forEachRegisteredAnalysis(
133 Callback: [&](llvm::StringRef Name, llvm::StringRef Desc) {
134 FD.Analyses.push_back(Elt: {.Name: Name.str(), .Desc: Desc.str()});
135 });
136 Formats.push_back(Elt: std::move(FD));
137 }
138 return Formats;
139}
140
141void printAnalysis(const AnalysisData &AD, size_t AnalysisIndex,
142 size_t FormatIndex, const PrintLayout &Layout) {
143 std::string AnalysisNum = std::to_string(val: FormatIndex + 1) + "." +
144 std::to_string(val: AnalysisIndex + 1) + ".";
145 llvm::outs().indent(NumSpaces: Layout.AnalysisCol)
146 << llvm::right_justify(Str: AnalysisNum, Width: Layout.AnalysisNumWidth) << " "
147 << llvm::left_justify(Str: AD.Name, Width: Layout.MaxAnalysisNameWidth) << " - "
148 << AD.Desc << "\n";
149}
150
151void printAnalyses(const llvm::SmallVector<AnalysisData> &Analyses,
152 size_t FormatIndex, const PrintLayout &Layout) {
153 if (Analyses.empty()) {
154 llvm::outs().indent(NumSpaces: Layout.FormatNameCol) << "Analyses: (none)\n";
155 return;
156 }
157
158 llvm::outs().indent(NumSpaces: Layout.FormatNameCol) << "Analyses:\n";
159
160 for (size_t AnalysisIndex = 0; AnalysisIndex < Analyses.size();
161 ++AnalysisIndex) {
162 printAnalysis(AD: Analyses[AnalysisIndex], AnalysisIndex, FormatIndex, Layout);
163 }
164}
165
166void printFormat(const FormatData &FD, size_t FormatIndex,
167 const PrintLayout &Layout) {
168 // Blank line before each format entry for readability.
169 llvm::outs() << "\n";
170
171 std::string FormatNum = std::to_string(val: FormatIndex + 1) + ".";
172 llvm::outs().indent(NumSpaces: FormatIndent)
173 << llvm::right_justify(Str: FormatNum, Width: Layout.FormatNumWidth) << " "
174 << llvm::left_justify(Str: FD.Name, Width: Layout.MaxFormatNameWidth) << " - "
175 << FD.Desc << "\n";
176
177 printAnalyses(Analyses: FD.Analyses, FormatIndex, Layout);
178}
179
180void printFormats(const llvm::SmallVector<FormatData> &Formats,
181 const PrintLayout &Layout) {
182 llvm::outs() << "Registered serialization formats:\n";
183 for (size_t FormatIndex = 0; FormatIndex < Formats.size(); ++FormatIndex) {
184 printFormat(FD: Formats[FormatIndex], FormatIndex, Layout);
185 }
186}
187
188PrintLayout computePrintLayout(const llvm::SmallVector<FormatData> &Formats) {
189 size_t MaxFormatNameWidth = 0;
190 size_t MaxAnalysisCount = 0;
191 size_t MaxAnalysisNameWidth = 0;
192 for (const auto &FD : Formats) {
193 MaxFormatNameWidth = std::max(a: MaxFormatNameWidth, b: FD.Name.size());
194 MaxAnalysisCount = std::max(a: MaxAnalysisCount, b: FD.Analyses.size());
195 for (const auto &AD : FD.Analyses) {
196 MaxAnalysisNameWidth = std::max(a: MaxAnalysisNameWidth, b: AD.Name.size());
197 }
198 }
199
200 // Width of the widest format number string, e.g. "10." -> 3.
201 size_t FormatNumWidth =
202 std::to_string(val: Formats.size()).size() + 1; // +1 for '.'
203 // Width of the widest analysis number string, e.g. "10.10." -> 6.
204 size_t AnalysisNumWidth = std::to_string(val: Formats.size()).size() + 1 +
205 std::to_string(val: MaxAnalysisCount).size() + 1;
206
207 // Where the format name starts (also where "Analyses:" is indented to).
208 size_t FormatNameCol = FormatIndent + FormatNumWidth + 1;
209 // Where the analysis number starts.
210 size_t AnalysisCol = FormatNameCol + AnalysisIndent;
211
212 return {
213 .FormatNumWidth: FormatNumWidth, .MaxFormatNameWidth: MaxFormatNameWidth, .FormatNameCol: FormatNameCol,
214 .AnalysisCol: AnalysisCol, .AnalysisNumWidth: AnalysisNumWidth, .MaxAnalysisNameWidth: MaxAnalysisNameWidth,
215 };
216}
217
218void listFormats() {
219 llvm::SmallVector<FormatData> Formats = collectFormats();
220 if (Formats.empty()) {
221 llvm::outs() << "No serialization formats registered.\n";
222 return;
223 }
224 printFormats(Formats, Layout: computePrintLayout(Formats));
225}
226
227//===----------------------------------------------------------------------===//
228// Input Validation
229//===----------------------------------------------------------------------===//
230
231struct FormatInput {
232 SummaryFile InputFile;
233 std::optional<SummaryFile> OutputFile;
234};
235
236FormatInput validateInput() {
237 assert(!ListFormats);
238
239 FormatInput FI;
240
241 // Validate Type explicitly since we don't want to specify it if --list is
242 // provided.
243 if (!Type.getNumOccurrences()) {
244 fail(Msg: "'--type' option is required");
245 }
246
247 // Validate the input path.
248 {
249 if (InputPath.empty()) {
250 fail(Msg: "no input file specified");
251 }
252
253 llvm::SmallString<256> RealInputPath;
254 std::error_code EC =
255 fs::real_path(path: InputPath, output&: RealInputPath, /*expand_tilde=*/true);
256 if (EC) {
257 fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: InputPath, Args: EC.message());
258 }
259
260 FI.InputFile = SummaryFile::fromPath(Path: RealInputPath);
261 }
262
263 // Validate the output path.
264 if (!OutputPath.empty()) {
265 llvm::StringRef ParentDir = path::parent_path(path: OutputPath);
266 llvm::StringRef DirToCheck = ParentDir.empty() ? "." : ParentDir;
267
268 if (!fs::exists(Path: DirToCheck)) {
269 fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: OutputPath,
270 Args: ErrorMessages::OutputDirectoryMissing);
271 }
272
273 // Reconstruct the real output path from the real parent directory and the
274 // output filename. The output file does not exist yet so real_path cannot
275 // be called on the full output path directly.
276 llvm::SmallString<256> RealParentDir;
277 if (std::error_code EC = fs::real_path(path: DirToCheck, output&: RealParentDir)) {
278 fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: OutputPath, Args: EC.message());
279 }
280
281 llvm::SmallString<256> RealOutputPath = RealParentDir;
282 path::append(path&: RealOutputPath, a: path::filename(path: OutputPath));
283
284 if (RealOutputPath == FI.InputFile.Path) {
285 fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: OutputPath,
286 Args: LocalErrorMessages::InputOutputSamePath);
287 }
288
289 if (fs::exists(Path: RealOutputPath)) {
290 fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: OutputPath,
291 Args: LocalErrorMessages::OutputFileAlreadyExists);
292 }
293
294 FI.OutputFile = SummaryFile::fromPath(Path: RealOutputPath);
295 }
296 return FI;
297}
298
299//===----------------------------------------------------------------------===//
300// Format Conversion
301//===----------------------------------------------------------------------===//
302
303template <typename ReadFn, typename WriteFn>
304void run(const FormatInput &FI, ReadFn Read, WriteFn Write) {
305 auto ExpectedResult = (FI.InputFile.Format->*Read)(FI.InputFile.Path);
306 if (!ExpectedResult) {
307 fail(ExpectedResult.takeError());
308 }
309
310 if (!FI.OutputFile) {
311 return;
312 }
313
314 auto Err =
315 (FI.OutputFile->Format->*Write)(*ExpectedResult, FI.OutputFile->Path);
316 if (Err) {
317 fail(std::move(Err));
318 }
319}
320
321void convert(const FormatInput &FI) {
322 switch (Type) {
323 case SummaryType::TU:
324 if (UseEncoding) {
325 run(FI, Read: &SerializationFormat::readTUSummaryEncoding,
326 Write: &SerializationFormat::writeTUSummaryEncoding);
327 } else {
328 run(FI, Read: &SerializationFormat::readTUSummary,
329 Write: &SerializationFormat::writeTUSummary);
330 }
331 return;
332 case SummaryType::LU:
333 if (UseEncoding) {
334 run(FI, Read: &SerializationFormat::readLUSummaryEncoding,
335 Write: &SerializationFormat::writeLUSummaryEncoding);
336 } else {
337 run(FI, Read: &SerializationFormat::readLUSummary,
338 Write: &SerializationFormat::writeLUSummary);
339 }
340 return;
341 }
342
343 llvm_unreachable("Unhandled SummaryType variant");
344}
345
346} // namespace
347
348//===----------------------------------------------------------------------===//
349// Driver
350//===----------------------------------------------------------------------===//
351
352int main(int argc, const char **argv) {
353 llvm::StringRef ToolHeading = "SSAF Format";
354
355 InitLLVM X(argc, argv);
356 initTool(argc, argv, Version: "0.1", Category&: SsafFormatCategory, ToolHeading);
357
358 loadPlugins(Paths: LoadPlugins);
359
360 if (ListFormats) {
361 listFormats();
362 } else {
363 FormatInput FI = validateInput();
364 convert(FI);
365 }
366
367 return 0;
368}
369