| 1 | //===- SSAFFormat.cpp - SSAF Format Tool ----------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the SSAF format tool that validates and converts |
| 10 | // TU and LU summaries between registered serialization formats. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummaryEncoding.h" |
| 15 | #include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/TUSummaryEncoding.h" |
| 16 | #include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" |
| 17 | #include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h" |
| 18 | #include "clang/ScalableStaticAnalysisFramework/SSAFForceLinker.h" // IWYU pragma: keep |
| 19 | #include "clang/ScalableStaticAnalysisFramework/Tool/Utils.h" |
| 20 | #include "llvm/ADT/STLExtras.h" |
| 21 | #include "llvm/ADT/SmallVector.h" |
| 22 | #include "llvm/Support/CommandLine.h" |
| 23 | #include "llvm/Support/ErrorHandling.h" |
| 24 | #include "llvm/Support/FileSystem.h" |
| 25 | #include "llvm/Support/Format.h" |
| 26 | #include "llvm/Support/FormatVariadic.h" |
| 27 | #include "llvm/Support/InitLLVM.h" |
| 28 | #include "llvm/Support/Path.h" |
| 29 | #include "llvm/Support/raw_ostream.h" |
| 30 | #include <memory> |
| 31 | #include <optional> |
| 32 | #include <string> |
| 33 | #include <system_error> |
| 34 | |
| 35 | using namespace llvm; |
| 36 | using namespace clang::ssaf; |
| 37 | |
| 38 | namespace { |
| 39 | |
| 40 | namespace fs = llvm::sys::fs; |
| 41 | namespace path = llvm::sys::path; |
| 42 | |
| 43 | //===----------------------------------------------------------------------===// |
| 44 | // Summary Type |
| 45 | //===----------------------------------------------------------------------===// |
| 46 | |
| 47 | enum class SummaryType { TU, LU }; |
| 48 | |
| 49 | //===----------------------------------------------------------------------===// |
| 50 | // Command-Line Options |
| 51 | //===----------------------------------------------------------------------===// |
| 52 | |
| 53 | cl::OptionCategory SsafFormatCategory("clang-ssaf-format options" ); |
| 54 | |
| 55 | cl::list<std::string> LoadPlugins("load" , |
| 56 | cl::desc("Load a plugin shared library" ), |
| 57 | cl::value_desc("path" ), |
| 58 | cl::cat(SsafFormatCategory)); |
| 59 | |
| 60 | // --type and the input file are required for convert/validateInput operations |
| 61 | // but must be optional at the cl layer so that --list can be used standalone. |
| 62 | cl::opt<SummaryType> Type( |
| 63 | "type" , cl::desc("Summary type (required unless --list is given)" ), |
| 64 | cl::values(clEnumValN(SummaryType::TU, "tu" , "Translation unit summary" ), |
| 65 | clEnumValN(SummaryType::LU, "lu" , "Link unit summary" )), |
| 66 | cl::cat(SsafFormatCategory)); |
| 67 | |
| 68 | cl::opt<std::string> InputPath(cl::Positional, cl::desc("<input file>" ), |
| 69 | cl::cat(SsafFormatCategory)); |
| 70 | |
| 71 | cl::opt<std::string> OutputPath("o" , cl::desc("Output summary path" ), |
| 72 | cl::value_desc("path" ), |
| 73 | cl::cat(SsafFormatCategory)); |
| 74 | |
| 75 | cl::opt<bool> UseEncoding("encoding" , |
| 76 | cl::desc("Read and write summary encodings rather " |
| 77 | "than decoded summaries" ), |
| 78 | cl::cat(SsafFormatCategory)); |
| 79 | |
| 80 | cl::opt<bool> ListFormats("list" , |
| 81 | cl::desc("List registered serialization formats and " |
| 82 | "analyses, then exit" ), |
| 83 | cl::init(Val: false), cl::cat(SsafFormatCategory)); |
| 84 | |
| 85 | //===----------------------------------------------------------------------===// |
| 86 | // Error Messages |
| 87 | //===----------------------------------------------------------------------===// |
| 88 | |
| 89 | namespace LocalErrorMessages { |
| 90 | |
| 91 | constexpr const char *OutputFileAlreadyExists = "Output file already exists" ; |
| 92 | |
| 93 | constexpr const char *InputOutputSamePath = |
| 94 | "Input and Output resolve to the same path" ; |
| 95 | |
| 96 | } // namespace LocalErrorMessages |
| 97 | |
| 98 | //===----------------------------------------------------------------------===// |
| 99 | // Format Listing |
| 100 | //===----------------------------------------------------------------------===// |
| 101 | |
| 102 | constexpr size_t FormatIndent = 4; |
| 103 | constexpr size_t AnalysisIndent = 4; |
| 104 | |
| 105 | struct AnalysisData { |
| 106 | std::string Name; |
| 107 | std::string Desc; |
| 108 | }; |
| 109 | |
| 110 | struct FormatData { |
| 111 | std::string Name; |
| 112 | std::string Desc; |
| 113 | llvm::SmallVector<AnalysisData> Analyses; |
| 114 | }; |
| 115 | |
| 116 | struct PrintLayout { |
| 117 | size_t FormatNumWidth; |
| 118 | size_t MaxFormatNameWidth; |
| 119 | size_t FormatNameCol; |
| 120 | size_t AnalysisCol; |
| 121 | size_t AnalysisNumWidth; |
| 122 | size_t MaxAnalysisNameWidth; |
| 123 | }; |
| 124 | |
| 125 | llvm::SmallVector<FormatData> collectFormats() { |
| 126 | llvm::SmallVector<FormatData> Formats; |
| 127 | for (const auto &Entry : SerializationFormatRegistry::entries()) { |
| 128 | FormatData FD; |
| 129 | FD.Name = Entry.getName().str(); |
| 130 | FD.Desc = Entry.getDesc().str(); |
| 131 | auto Format = Entry.instantiate(); |
| 132 | Format->forEachRegisteredAnalysis( |
| 133 | Callback: [&](llvm::StringRef Name, llvm::StringRef Desc) { |
| 134 | FD.Analyses.push_back(Elt: {.Name: Name.str(), .Desc: Desc.str()}); |
| 135 | }); |
| 136 | Formats.push_back(Elt: std::move(FD)); |
| 137 | } |
| 138 | return Formats; |
| 139 | } |
| 140 | |
| 141 | void printAnalysis(const AnalysisData &AD, size_t AnalysisIndex, |
| 142 | size_t FormatIndex, const PrintLayout &Layout) { |
| 143 | std::string AnalysisNum = std::to_string(val: FormatIndex + 1) + "." + |
| 144 | std::to_string(val: AnalysisIndex + 1) + "." ; |
| 145 | llvm::outs().indent(NumSpaces: Layout.AnalysisCol) |
| 146 | << llvm::right_justify(Str: AnalysisNum, Width: Layout.AnalysisNumWidth) << " " |
| 147 | << llvm::left_justify(Str: AD.Name, Width: Layout.MaxAnalysisNameWidth) << " - " |
| 148 | << AD.Desc << "\n" ; |
| 149 | } |
| 150 | |
| 151 | void printAnalyses(const llvm::SmallVector<AnalysisData> &Analyses, |
| 152 | size_t FormatIndex, const PrintLayout &Layout) { |
| 153 | if (Analyses.empty()) { |
| 154 | llvm::outs().indent(NumSpaces: Layout.FormatNameCol) << "Analyses: (none)\n" ; |
| 155 | return; |
| 156 | } |
| 157 | |
| 158 | llvm::outs().indent(NumSpaces: Layout.FormatNameCol) << "Analyses:\n" ; |
| 159 | |
| 160 | for (size_t AnalysisIndex = 0; AnalysisIndex < Analyses.size(); |
| 161 | ++AnalysisIndex) { |
| 162 | printAnalysis(AD: Analyses[AnalysisIndex], AnalysisIndex, FormatIndex, Layout); |
| 163 | } |
| 164 | } |
| 165 | |
| 166 | void printFormat(const FormatData &FD, size_t FormatIndex, |
| 167 | const PrintLayout &Layout) { |
| 168 | // Blank line before each format entry for readability. |
| 169 | llvm::outs() << "\n" ; |
| 170 | |
| 171 | std::string FormatNum = std::to_string(val: FormatIndex + 1) + "." ; |
| 172 | llvm::outs().indent(NumSpaces: FormatIndent) |
| 173 | << llvm::right_justify(Str: FormatNum, Width: Layout.FormatNumWidth) << " " |
| 174 | << llvm::left_justify(Str: FD.Name, Width: Layout.MaxFormatNameWidth) << " - " |
| 175 | << FD.Desc << "\n" ; |
| 176 | |
| 177 | printAnalyses(Analyses: FD.Analyses, FormatIndex, Layout); |
| 178 | } |
| 179 | |
| 180 | void printFormats(const llvm::SmallVector<FormatData> &Formats, |
| 181 | const PrintLayout &Layout) { |
| 182 | llvm::outs() << "Registered serialization formats:\n" ; |
| 183 | for (size_t FormatIndex = 0; FormatIndex < Formats.size(); ++FormatIndex) { |
| 184 | printFormat(FD: Formats[FormatIndex], FormatIndex, Layout); |
| 185 | } |
| 186 | } |
| 187 | |
| 188 | PrintLayout computePrintLayout(const llvm::SmallVector<FormatData> &Formats) { |
| 189 | size_t MaxFormatNameWidth = 0; |
| 190 | size_t MaxAnalysisCount = 0; |
| 191 | size_t MaxAnalysisNameWidth = 0; |
| 192 | for (const auto &FD : Formats) { |
| 193 | MaxFormatNameWidth = std::max(a: MaxFormatNameWidth, b: FD.Name.size()); |
| 194 | MaxAnalysisCount = std::max(a: MaxAnalysisCount, b: FD.Analyses.size()); |
| 195 | for (const auto &AD : FD.Analyses) { |
| 196 | MaxAnalysisNameWidth = std::max(a: MaxAnalysisNameWidth, b: AD.Name.size()); |
| 197 | } |
| 198 | } |
| 199 | |
| 200 | // Width of the widest format number string, e.g. "10." -> 3. |
| 201 | size_t FormatNumWidth = |
| 202 | std::to_string(val: Formats.size()).size() + 1; // +1 for '.' |
| 203 | // Width of the widest analysis number string, e.g. "10.10." -> 6. |
| 204 | size_t AnalysisNumWidth = std::to_string(val: Formats.size()).size() + 1 + |
| 205 | std::to_string(val: MaxAnalysisCount).size() + 1; |
| 206 | |
| 207 | // Where the format name starts (also where "Analyses:" is indented to). |
| 208 | size_t FormatNameCol = FormatIndent + FormatNumWidth + 1; |
| 209 | // Where the analysis number starts. |
| 210 | size_t AnalysisCol = FormatNameCol + AnalysisIndent; |
| 211 | |
| 212 | return { |
| 213 | .FormatNumWidth: FormatNumWidth, .MaxFormatNameWidth: MaxFormatNameWidth, .FormatNameCol: FormatNameCol, |
| 214 | .AnalysisCol: AnalysisCol, .AnalysisNumWidth: AnalysisNumWidth, .MaxAnalysisNameWidth: MaxAnalysisNameWidth, |
| 215 | }; |
| 216 | } |
| 217 | |
| 218 | void listFormats() { |
| 219 | llvm::SmallVector<FormatData> Formats = collectFormats(); |
| 220 | if (Formats.empty()) { |
| 221 | llvm::outs() << "No serialization formats registered.\n" ; |
| 222 | return; |
| 223 | } |
| 224 | printFormats(Formats, Layout: computePrintLayout(Formats)); |
| 225 | } |
| 226 | |
| 227 | //===----------------------------------------------------------------------===// |
| 228 | // Input Validation |
| 229 | //===----------------------------------------------------------------------===// |
| 230 | |
| 231 | struct FormatInput { |
| 232 | SummaryFile InputFile; |
| 233 | std::optional<SummaryFile> OutputFile; |
| 234 | }; |
| 235 | |
| 236 | FormatInput validateInput() { |
| 237 | assert(!ListFormats); |
| 238 | |
| 239 | FormatInput FI; |
| 240 | |
| 241 | // Validate Type explicitly since we don't want to specify it if --list is |
| 242 | // provided. |
| 243 | if (!Type.getNumOccurrences()) { |
| 244 | fail(Msg: "'--type' option is required" ); |
| 245 | } |
| 246 | |
| 247 | // Validate the input path. |
| 248 | { |
| 249 | if (InputPath.empty()) { |
| 250 | fail(Msg: "no input file specified" ); |
| 251 | } |
| 252 | |
| 253 | llvm::SmallString<256> RealInputPath; |
| 254 | std::error_code EC = |
| 255 | fs::real_path(path: InputPath, output&: RealInputPath, /*expand_tilde=*/true); |
| 256 | if (EC) { |
| 257 | fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: InputPath, Args: EC.message()); |
| 258 | } |
| 259 | |
| 260 | FI.InputFile = SummaryFile::fromPath(Path: RealInputPath); |
| 261 | } |
| 262 | |
| 263 | // Validate the output path. |
| 264 | if (!OutputPath.empty()) { |
| 265 | llvm::StringRef ParentDir = path::parent_path(path: OutputPath); |
| 266 | llvm::StringRef DirToCheck = ParentDir.empty() ? "." : ParentDir; |
| 267 | |
| 268 | if (!fs::exists(Path: DirToCheck)) { |
| 269 | fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: OutputPath, |
| 270 | Args: ErrorMessages::OutputDirectoryMissing); |
| 271 | } |
| 272 | |
| 273 | // Reconstruct the real output path from the real parent directory and the |
| 274 | // output filename. The output file does not exist yet so real_path cannot |
| 275 | // be called on the full output path directly. |
| 276 | llvm::SmallString<256> RealParentDir; |
| 277 | if (std::error_code EC = fs::real_path(path: DirToCheck, output&: RealParentDir)) { |
| 278 | fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: OutputPath, Args: EC.message()); |
| 279 | } |
| 280 | |
| 281 | llvm::SmallString<256> RealOutputPath = RealParentDir; |
| 282 | path::append(path&: RealOutputPath, a: path::filename(path: OutputPath)); |
| 283 | |
| 284 | if (RealOutputPath == FI.InputFile.Path) { |
| 285 | fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: OutputPath, |
| 286 | Args: LocalErrorMessages::InputOutputSamePath); |
| 287 | } |
| 288 | |
| 289 | if (fs::exists(Path: RealOutputPath)) { |
| 290 | fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: OutputPath, |
| 291 | Args: LocalErrorMessages::OutputFileAlreadyExists); |
| 292 | } |
| 293 | |
| 294 | FI.OutputFile = SummaryFile::fromPath(Path: RealOutputPath); |
| 295 | } |
| 296 | return FI; |
| 297 | } |
| 298 | |
| 299 | //===----------------------------------------------------------------------===// |
| 300 | // Format Conversion |
| 301 | //===----------------------------------------------------------------------===// |
| 302 | |
| 303 | template <typename ReadFn, typename WriteFn> |
| 304 | void run(const FormatInput &FI, ReadFn Read, WriteFn Write) { |
| 305 | auto ExpectedResult = (FI.InputFile.Format->*Read)(FI.InputFile.Path); |
| 306 | if (!ExpectedResult) { |
| 307 | fail(ExpectedResult.takeError()); |
| 308 | } |
| 309 | |
| 310 | if (!FI.OutputFile) { |
| 311 | return; |
| 312 | } |
| 313 | |
| 314 | auto Err = |
| 315 | (FI.OutputFile->Format->*Write)(*ExpectedResult, FI.OutputFile->Path); |
| 316 | if (Err) { |
| 317 | fail(std::move(Err)); |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | void convert(const FormatInput &FI) { |
| 322 | switch (Type) { |
| 323 | case SummaryType::TU: |
| 324 | if (UseEncoding) { |
| 325 | run(FI, Read: &SerializationFormat::readTUSummaryEncoding, |
| 326 | Write: &SerializationFormat::writeTUSummaryEncoding); |
| 327 | } else { |
| 328 | run(FI, Read: &SerializationFormat::readTUSummary, |
| 329 | Write: &SerializationFormat::writeTUSummary); |
| 330 | } |
| 331 | return; |
| 332 | case SummaryType::LU: |
| 333 | if (UseEncoding) { |
| 334 | run(FI, Read: &SerializationFormat::readLUSummaryEncoding, |
| 335 | Write: &SerializationFormat::writeLUSummaryEncoding); |
| 336 | } else { |
| 337 | run(FI, Read: &SerializationFormat::readLUSummary, |
| 338 | Write: &SerializationFormat::writeLUSummary); |
| 339 | } |
| 340 | return; |
| 341 | } |
| 342 | |
| 343 | llvm_unreachable("Unhandled SummaryType variant" ); |
| 344 | } |
| 345 | |
| 346 | } // namespace |
| 347 | |
| 348 | //===----------------------------------------------------------------------===// |
| 349 | // Driver |
| 350 | //===----------------------------------------------------------------------===// |
| 351 | |
| 352 | int main(int argc, const char **argv) { |
| 353 | llvm::StringRef ToolHeading = "SSAF Format" ; |
| 354 | |
| 355 | InitLLVM X(argc, argv); |
| 356 | initTool(argc, argv, Version: "0.1" , Category&: SsafFormatCategory, ToolHeading); |
| 357 | |
| 358 | loadPlugins(Paths: LoadPlugins); |
| 359 | |
| 360 | if (ListFormats) { |
| 361 | listFormats(); |
| 362 | } else { |
| 363 | FormatInput FI = validateInput(); |
| 364 | convert(FI); |
| 365 | } |
| 366 | |
| 367 | return 0; |
| 368 | } |
| 369 | |