| 1 | //===- SSAFFormat.cpp - SSAF Format Tool ----------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the SSAF format tool that validates and converts |
| 10 | // TU and LU summaries between registered serialization formats. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummaryEncoding.h" |
| 15 | #include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/TUSummaryEncoding.h" |
| 16 | #include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" |
| 17 | #include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h" |
| 18 | #include "clang/ScalableStaticAnalysisFramework/SSAFForceLinker.h" // IWYU pragma: keep |
| 19 | #include "llvm/ADT/STLExtras.h" |
| 20 | #include "llvm/ADT/SmallVector.h" |
| 21 | #include "llvm/Support/CommandLine.h" |
| 22 | #include "llvm/Support/DynamicLibrary.h" |
| 23 | #include "llvm/Support/ErrorHandling.h" |
| 24 | #include "llvm/Support/FileSystem.h" |
| 25 | #include "llvm/Support/Format.h" |
| 26 | #include "llvm/Support/FormatVariadic.h" |
| 27 | #include "llvm/Support/InitLLVM.h" |
| 28 | #include "llvm/Support/Path.h" |
| 29 | #include "llvm/Support/Process.h" |
| 30 | #include "llvm/Support/WithColor.h" |
| 31 | #include "llvm/Support/raw_ostream.h" |
| 32 | #include <memory> |
| 33 | #include <optional> |
| 34 | #include <string> |
| 35 | #include <system_error> |
| 36 | |
| 37 | using namespace llvm; |
| 38 | using namespace clang::ssaf; |
| 39 | |
| 40 | namespace { |
| 41 | |
| 42 | namespace fs = llvm::sys::fs; |
| 43 | namespace path = llvm::sys::path; |
| 44 | |
| 45 | //===----------------------------------------------------------------------===// |
| 46 | // Summary Type |
| 47 | //===----------------------------------------------------------------------===// |
| 48 | |
| 49 | enum class SummaryType { TU, LU }; |
| 50 | |
| 51 | //===----------------------------------------------------------------------===// |
| 52 | // Command-Line Options |
| 53 | //===----------------------------------------------------------------------===// |
| 54 | |
| 55 | cl::OptionCategory SsafFormatCategory("clang-ssaf-format options" ); |
| 56 | |
| 57 | cl::list<std::string> LoadPlugins("load" , |
| 58 | cl::desc("Load a plugin shared library" ), |
| 59 | cl::value_desc("path" ), |
| 60 | cl::cat(SsafFormatCategory)); |
| 61 | |
| 62 | // --type and the input file are required for convert/validateInput operations |
| 63 | // but must be optional at the cl layer so that --list can be used standalone. |
| 64 | cl::opt<SummaryType> Type( |
| 65 | "type" , cl::desc("Summary type (required unless --list is given)" ), |
| 66 | cl::values(clEnumValN(SummaryType::TU, "tu" , "Translation unit summary" ), |
| 67 | clEnumValN(SummaryType::LU, "lu" , "Link unit summary" )), |
| 68 | cl::cat(SsafFormatCategory)); |
| 69 | |
| 70 | cl::opt<std::string> InputPath(cl::Positional, cl::desc("<input file>" ), |
| 71 | cl::cat(SsafFormatCategory)); |
| 72 | |
| 73 | cl::opt<std::string> OutputPath("o" , cl::desc("Output summary path" ), |
| 74 | cl::value_desc("path" ), |
| 75 | cl::cat(SsafFormatCategory)); |
| 76 | |
| 77 | cl::opt<bool> UseEncoding("encoding" , |
| 78 | cl::desc("Read and write summary encodings rather " |
| 79 | "than decoded summaries" ), |
| 80 | cl::cat(SsafFormatCategory)); |
| 81 | |
| 82 | cl::opt<bool> ListFormats("list" , |
| 83 | cl::desc("List registered serialization formats and " |
| 84 | "analyses, then exit" ), |
| 85 | cl::init(Val: false), cl::cat(SsafFormatCategory)); |
| 86 | |
| 87 | llvm::StringRef ToolName; |
| 88 | |
| 89 | void printVersion(llvm::raw_ostream &OS) { OS << ToolName << " 0.1\n" ; } |
| 90 | |
| 91 | //===----------------------------------------------------------------------===// |
| 92 | // Error Messages |
| 93 | //===----------------------------------------------------------------------===// |
| 94 | |
| 95 | namespace ErrorMessages { |
| 96 | |
| 97 | constexpr const char *FailedToLoadPlugin = "failed to load plugin '{0}': {1}" ; |
| 98 | |
| 99 | constexpr const char *CannotValidateSummary = |
| 100 | "failed to validate summary '{0}': {1}" ; |
| 101 | |
| 102 | constexpr const char *ExtensionNotSupplied = "Extension not supplied" ; |
| 103 | |
| 104 | constexpr const char *NoFormatForExtension = |
| 105 | "Format not registered for extension '{0}'" ; |
| 106 | |
| 107 | constexpr const char *OutputDirectoryMissing = |
| 108 | "Parent directory does not exist" ; |
| 109 | |
| 110 | constexpr const char *OutputFileAlreadyExists = "Output file already exists" ; |
| 111 | |
| 112 | constexpr const char *InputOutputSamePath = |
| 113 | "Input and Output resolve to the same path" ; |
| 114 | |
| 115 | } // namespace ErrorMessages |
| 116 | |
| 117 | //===----------------------------------------------------------------------===// |
| 118 | // Diagnostic Utilities |
| 119 | //===----------------------------------------------------------------------===// |
| 120 | |
| 121 | [[noreturn]] void fail(const char *Msg) { |
| 122 | llvm::WithColor::error(OS&: llvm::errs(), Prefix: ToolName) << Msg << "\n" ; |
| 123 | llvm::sys::Process::Exit(RetCode: 1); |
| 124 | } |
| 125 | |
| 126 | template <typename... Ts> |
| 127 | [[noreturn]] void fail(const char *Fmt, Ts &&...Args) { |
| 128 | std::string Message = llvm::formatv(Fmt, std::forward<Ts>(Args)...); |
| 129 | fail(Msg: Message.data()); |
| 130 | } |
| 131 | |
| 132 | [[noreturn]] void fail(llvm::Error Err) { |
| 133 | fail(Msg: toString(E: std::move(Err)).data()); |
| 134 | } |
| 135 | |
| 136 | //===----------------------------------------------------------------------===// |
| 137 | // Format Registry |
| 138 | //===----------------------------------------------------------------------===// |
| 139 | |
| 140 | // FIXME: This will be revisited after we add support for registering formats |
| 141 | // with extensions. |
| 142 | SerializationFormat *getFormatForExtension(llvm::StringRef Extension) { |
| 143 | static llvm::SmallVector< |
| 144 | std::pair<std::string, std::unique_ptr<SerializationFormat>>, 4> |
| 145 | ExtensionFormatList; |
| 146 | |
| 147 | // Most recently used format is most likely to be reused again. |
| 148 | auto ReversedList = llvm::reverse(C&: ExtensionFormatList); |
| 149 | auto It = llvm::find_if(Range&: ReversedList, P: [&](const auto &Entry) { |
| 150 | return Entry.first == Extension; |
| 151 | }); |
| 152 | if (It != ReversedList.end()) { |
| 153 | return It->second.get(); |
| 154 | } |
| 155 | |
| 156 | if (!isFormatRegistered(FormatName: Extension)) { |
| 157 | return nullptr; |
| 158 | } |
| 159 | |
| 160 | auto Format = makeFormat(FormatName: Extension); |
| 161 | SerializationFormat *Result = Format.get(); |
| 162 | assert(Result); |
| 163 | |
| 164 | ExtensionFormatList.emplace_back(Args&: Extension, Args: std::move(Format)); |
| 165 | |
| 166 | return Result; |
| 167 | } |
| 168 | |
| 169 | //===----------------------------------------------------------------------===// |
| 170 | // Format Listing |
| 171 | //===----------------------------------------------------------------------===// |
| 172 | |
| 173 | constexpr size_t FormatIndent = 4; |
| 174 | constexpr size_t AnalysisIndent = 4; |
| 175 | |
| 176 | struct AnalysisData { |
| 177 | std::string Name; |
| 178 | std::string Desc; |
| 179 | }; |
| 180 | |
| 181 | struct FormatData { |
| 182 | std::string Name; |
| 183 | std::string Desc; |
| 184 | llvm::SmallVector<AnalysisData> Analyses; |
| 185 | }; |
| 186 | |
| 187 | struct PrintLayout { |
| 188 | size_t FormatNumWidth; |
| 189 | size_t MaxFormatNameWidth; |
| 190 | size_t FormatNameCol; |
| 191 | size_t AnalysisCol; |
| 192 | size_t AnalysisNumWidth; |
| 193 | size_t MaxAnalysisNameWidth; |
| 194 | }; |
| 195 | |
| 196 | llvm::SmallVector<FormatData> collectFormats() { |
| 197 | llvm::SmallVector<FormatData> Formats; |
| 198 | for (const auto &Entry : SerializationFormatRegistry::entries()) { |
| 199 | FormatData FD; |
| 200 | FD.Name = Entry.getName().str(); |
| 201 | FD.Desc = Entry.getDesc().str(); |
| 202 | auto Format = Entry.instantiate(); |
| 203 | Format->forEachRegisteredAnalysis( |
| 204 | Callback: [&](llvm::StringRef Name, llvm::StringRef Desc) { |
| 205 | FD.Analyses.push_back(Elt: {.Name: Name.str(), .Desc: Desc.str()}); |
| 206 | }); |
| 207 | Formats.push_back(Elt: std::move(FD)); |
| 208 | } |
| 209 | return Formats; |
| 210 | } |
| 211 | |
| 212 | void printAnalysis(const AnalysisData &AD, size_t AnalysisIndex, |
| 213 | size_t FormatIndex, const PrintLayout &Layout) { |
| 214 | std::string AnalysisNum = std::to_string(val: FormatIndex + 1) + "." + |
| 215 | std::to_string(val: AnalysisIndex + 1) + "." ; |
| 216 | llvm::outs().indent(NumSpaces: Layout.AnalysisCol) |
| 217 | << llvm::right_justify(Str: AnalysisNum, Width: Layout.AnalysisNumWidth) << " " |
| 218 | << llvm::left_justify(Str: AD.Name, Width: Layout.MaxAnalysisNameWidth) << " " |
| 219 | << AD.Desc << "\n" ; |
| 220 | } |
| 221 | |
| 222 | void printAnalyses(const llvm::SmallVector<AnalysisData> &Analyses, |
| 223 | size_t FormatIndex, const PrintLayout &Layout) { |
| 224 | if (Analyses.empty()) { |
| 225 | llvm::outs().indent(NumSpaces: Layout.FormatNameCol) << "Analyses: (none)\n" ; |
| 226 | return; |
| 227 | } |
| 228 | |
| 229 | llvm::outs().indent(NumSpaces: Layout.FormatNameCol) << "Analyses:\n" ; |
| 230 | |
| 231 | for (size_t AnalysisIndex = 0; AnalysisIndex < Analyses.size(); |
| 232 | ++AnalysisIndex) { |
| 233 | printAnalysis(AD: Analyses[AnalysisIndex], AnalysisIndex, FormatIndex, Layout); |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | void printFormat(const FormatData &FD, size_t FormatIndex, |
| 238 | const PrintLayout &Layout) { |
| 239 | // Blank line before each format entry for readability. |
| 240 | llvm::outs() << "\n" ; |
| 241 | |
| 242 | std::string FormatNum = std::to_string(val: FormatIndex + 1) + "." ; |
| 243 | llvm::outs().indent(NumSpaces: FormatIndent) |
| 244 | << llvm::right_justify(Str: FormatNum, Width: Layout.FormatNumWidth) << " " |
| 245 | << llvm::left_justify(Str: FD.Name, Width: Layout.MaxFormatNameWidth) << " " |
| 246 | << FD.Desc << "\n" ; |
| 247 | |
| 248 | printAnalyses(Analyses: FD.Analyses, FormatIndex, Layout); |
| 249 | } |
| 250 | |
| 251 | void printFormats(const llvm::SmallVector<FormatData> &Formats, |
| 252 | const PrintLayout &Layout) { |
| 253 | llvm::outs() << "Registered serialization formats:\n" ; |
| 254 | for (size_t FormatIndex = 0; FormatIndex < Formats.size(); ++FormatIndex) { |
| 255 | printFormat(FD: Formats[FormatIndex], FormatIndex, Layout); |
| 256 | } |
| 257 | } |
| 258 | |
| 259 | PrintLayout computePrintLayout(const llvm::SmallVector<FormatData> &Formats) { |
| 260 | size_t MaxFormatNameWidth = 0; |
| 261 | size_t MaxAnalysisCount = 0; |
| 262 | size_t MaxAnalysisNameWidth = 0; |
| 263 | for (const auto &FD : Formats) { |
| 264 | MaxFormatNameWidth = std::max(a: MaxFormatNameWidth, b: FD.Name.size()); |
| 265 | MaxAnalysisCount = std::max(a: MaxAnalysisCount, b: FD.Analyses.size()); |
| 266 | for (const auto &AD : FD.Analyses) { |
| 267 | MaxAnalysisNameWidth = std::max(a: MaxAnalysisNameWidth, b: AD.Name.size()); |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | // Width of the widest format number string, e.g. "10." -> 3. |
| 272 | size_t FormatNumWidth = |
| 273 | std::to_string(val: Formats.size()).size() + 1; // +1 for '.' |
| 274 | // Width of the widest analysis number string, e.g. "10.10." -> 6. |
| 275 | size_t AnalysisNumWidth = std::to_string(val: Formats.size()).size() + 1 + |
| 276 | std::to_string(val: MaxAnalysisCount).size() + 1; |
| 277 | |
| 278 | // Where the format name starts (also where "Analyses:" is indented to). |
| 279 | size_t FormatNameCol = FormatIndent + FormatNumWidth + 1; |
| 280 | // Where the analysis number starts. |
| 281 | size_t AnalysisCol = FormatNameCol + AnalysisIndent; |
| 282 | |
| 283 | return { |
| 284 | .FormatNumWidth: FormatNumWidth, .MaxFormatNameWidth: MaxFormatNameWidth, .FormatNameCol: FormatNameCol, |
| 285 | .AnalysisCol: AnalysisCol, .AnalysisNumWidth: AnalysisNumWidth, .MaxAnalysisNameWidth: MaxAnalysisNameWidth, |
| 286 | }; |
| 287 | } |
| 288 | |
| 289 | void listFormats() { |
| 290 | llvm::SmallVector<FormatData> Formats = collectFormats(); |
| 291 | if (Formats.empty()) { |
| 292 | llvm::outs() << "No serialization formats registered.\n" ; |
| 293 | return; |
| 294 | } |
| 295 | printFormats(Formats, Layout: computePrintLayout(Formats)); |
| 296 | } |
| 297 | |
| 298 | //===----------------------------------------------------------------------===// |
| 299 | // Plugin Loading |
| 300 | //===----------------------------------------------------------------------===// |
| 301 | |
| 302 | void loadPlugins() { |
| 303 | for (const auto &PluginPath : LoadPlugins) { |
| 304 | std::string ErrMsg; |
| 305 | if (llvm::sys::DynamicLibrary::LoadLibraryPermanently(Filename: PluginPath.c_str(), |
| 306 | ErrMsg: &ErrMsg)) { |
| 307 | fail(Fmt: ErrorMessages::FailedToLoadPlugin, Args: PluginPath, Args&: ErrMsg); |
| 308 | } |
| 309 | } |
| 310 | } |
| 311 | |
| 312 | //===----------------------------------------------------------------------===// |
| 313 | // Input Validation |
| 314 | //===----------------------------------------------------------------------===// |
| 315 | |
| 316 | struct SummaryFile { |
| 317 | std::string Path; |
| 318 | SerializationFormat *Format = nullptr; |
| 319 | |
| 320 | static SummaryFile fromPath(llvm::StringRef Path) { |
| 321 | llvm::StringRef Extension = path::extension(path: Path); |
| 322 | if (Extension.empty()) { |
| 323 | fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: Path, |
| 324 | Args: ErrorMessages::ExtensionNotSupplied); |
| 325 | } |
| 326 | |
| 327 | Extension = Extension.drop_front(); |
| 328 | SerializationFormat *Format = getFormatForExtension(Extension); |
| 329 | if (!Format) { |
| 330 | std::string Msg = |
| 331 | llvm::formatv(Fmt: ErrorMessages::NoFormatForExtension, Vals&: Extension); |
| 332 | fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: Path, Args&: Msg); |
| 333 | } |
| 334 | |
| 335 | return {.Path: Path.str(), .Format: Format}; |
| 336 | } |
| 337 | }; |
| 338 | |
| 339 | struct FormatInput { |
| 340 | SummaryFile InputFile; |
| 341 | std::optional<SummaryFile> OutputFile; |
| 342 | }; |
| 343 | |
| 344 | FormatInput validateInput() { |
| 345 | assert(!ListFormats); |
| 346 | |
| 347 | FormatInput FI; |
| 348 | |
| 349 | // Validate Type explicitly since we don't want to specify it if --list is |
| 350 | // provided. |
| 351 | if (!Type.getNumOccurrences()) { |
| 352 | fail(Msg: "'--type' option is required" ); |
| 353 | } |
| 354 | |
| 355 | // Validate the input path. |
| 356 | { |
| 357 | if (InputPath.empty()) { |
| 358 | fail(Msg: "no input file specified" ); |
| 359 | } |
| 360 | |
| 361 | llvm::SmallString<256> RealInputPath; |
| 362 | std::error_code EC = |
| 363 | fs::real_path(path: InputPath, output&: RealInputPath, /*expand_tilde=*/true); |
| 364 | if (EC) { |
| 365 | fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: InputPath, Args: EC.message()); |
| 366 | } |
| 367 | |
| 368 | FI.InputFile = SummaryFile::fromPath(Path: RealInputPath); |
| 369 | } |
| 370 | |
| 371 | // Validate the output path. |
| 372 | if (!OutputPath.empty()) { |
| 373 | llvm::StringRef ParentDir = path::parent_path(path: OutputPath); |
| 374 | llvm::StringRef DirToCheck = ParentDir.empty() ? "." : ParentDir; |
| 375 | |
| 376 | if (!fs::exists(Path: DirToCheck)) { |
| 377 | fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: OutputPath, |
| 378 | Args: ErrorMessages::OutputDirectoryMissing); |
| 379 | } |
| 380 | |
| 381 | // Reconstruct the real output path from the real parent directory and the |
| 382 | // output filename. The output file does not exist yet so real_path cannot |
| 383 | // be called on the full output path directly. |
| 384 | llvm::SmallString<256> RealParentDir; |
| 385 | if (std::error_code EC = fs::real_path(path: DirToCheck, output&: RealParentDir)) { |
| 386 | fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: OutputPath, Args: EC.message()); |
| 387 | } |
| 388 | |
| 389 | llvm::SmallString<256> RealOutputPath = RealParentDir; |
| 390 | path::append(path&: RealOutputPath, a: path::filename(path: OutputPath)); |
| 391 | |
| 392 | if (RealOutputPath == FI.InputFile.Path) { |
| 393 | fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: OutputPath, |
| 394 | Args: ErrorMessages::InputOutputSamePath); |
| 395 | } |
| 396 | |
| 397 | if (fs::exists(Path: RealOutputPath)) { |
| 398 | fail(Fmt: ErrorMessages::CannotValidateSummary, Args&: OutputPath, |
| 399 | Args: ErrorMessages::OutputFileAlreadyExists); |
| 400 | } |
| 401 | |
| 402 | FI.OutputFile = SummaryFile::fromPath(Path: RealOutputPath); |
| 403 | } |
| 404 | return FI; |
| 405 | } |
| 406 | |
| 407 | //===----------------------------------------------------------------------===// |
| 408 | // Format Conversion |
| 409 | //===----------------------------------------------------------------------===// |
| 410 | |
| 411 | template <typename ReadFn, typename WriteFn> |
| 412 | void run(const FormatInput &FI, ReadFn Read, WriteFn Write) { |
| 413 | auto ExpectedResult = (FI.InputFile.Format->*Read)(FI.InputFile.Path); |
| 414 | if (!ExpectedResult) { |
| 415 | fail(ExpectedResult.takeError()); |
| 416 | } |
| 417 | |
| 418 | if (!FI.OutputFile) { |
| 419 | return; |
| 420 | } |
| 421 | |
| 422 | auto Err = |
| 423 | (FI.OutputFile->Format->*Write)(*ExpectedResult, FI.OutputFile->Path); |
| 424 | if (Err) { |
| 425 | fail(std::move(Err)); |
| 426 | } |
| 427 | } |
| 428 | |
| 429 | void convert(const FormatInput &FI) { |
| 430 | switch (Type) { |
| 431 | case SummaryType::TU: |
| 432 | if (UseEncoding) { |
| 433 | run(FI, Read: &SerializationFormat::readTUSummaryEncoding, |
| 434 | Write: &SerializationFormat::writeTUSummaryEncoding); |
| 435 | } else { |
| 436 | run(FI, Read: &SerializationFormat::readTUSummary, |
| 437 | Write: &SerializationFormat::writeTUSummary); |
| 438 | } |
| 439 | return; |
| 440 | case SummaryType::LU: |
| 441 | if (UseEncoding) { |
| 442 | run(FI, Read: &SerializationFormat::readLUSummaryEncoding, |
| 443 | Write: &SerializationFormat::writeLUSummaryEncoding); |
| 444 | } else { |
| 445 | run(FI, Read: &SerializationFormat::readLUSummary, |
| 446 | Write: &SerializationFormat::writeLUSummary); |
| 447 | } |
| 448 | return; |
| 449 | } |
| 450 | |
| 451 | llvm_unreachable("Unhandled SummaryType variant" ); |
| 452 | } |
| 453 | |
| 454 | } // namespace |
| 455 | |
| 456 | //===----------------------------------------------------------------------===// |
| 457 | // Driver |
| 458 | //===----------------------------------------------------------------------===// |
| 459 | |
| 460 | int main(int argc, const char **argv) { |
| 461 | InitLLVM X(argc, argv); |
| 462 | // path::stem strips the .exe extension on Windows so ToolName is consistent. |
| 463 | ToolName = path::stem(path: argv[0]); |
| 464 | |
| 465 | cl::HideUnrelatedOptions(Category&: SsafFormatCategory); |
| 466 | cl::SetVersionPrinter(printVersion); |
| 467 | cl::ParseCommandLineOptions(argc, argv, Overview: "SSAF Format\n" ); |
| 468 | |
| 469 | loadPlugins(); |
| 470 | |
| 471 | if (ListFormats) { |
| 472 | listFormats(); |
| 473 | } else { |
| 474 | FormatInput FI = validateInput(); |
| 475 | convert(FI); |
| 476 | } |
| 477 | |
| 478 | return 0; |
| 479 | } |
| 480 | |