| 1 | //===- SSAFFormat.cpp - SSAF Format Tool ----------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the SSAF format tool that validates and converts |
| 10 | // TU and LU summaries between registered serialization formats. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummaryEncoding.h" |
| 15 | #include "clang/ScalableStaticAnalysis/Core/EntityLinker/TUSummaryEncoding.h" |
| 16 | #include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" |
| 17 | #include "clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormatRegistry.h" |
| 18 | #include "clang/ScalableStaticAnalysis/SSAFForceLinker.h" // IWYU pragma: keep |
| 19 | #include "clang/ScalableStaticAnalysis/Tool/Utils.h" |
| 20 | #include "llvm/ADT/STLExtras.h" |
| 21 | #include "llvm/ADT/SmallVector.h" |
| 22 | #include "llvm/Support/CommandLine.h" |
| 23 | #include "llvm/Support/ErrorHandling.h" |
| 24 | #include "llvm/Support/Format.h" |
| 25 | #include "llvm/Support/FormatVariadic.h" |
| 26 | #include "llvm/Support/InitLLVM.h" |
| 27 | #include "llvm/Support/Path.h" |
| 28 | #include "llvm/Support/raw_ostream.h" |
| 29 | #include <memory> |
| 30 | #include <optional> |
| 31 | #include <string> |
| 32 | |
| 33 | using namespace llvm; |
| 34 | using namespace clang::ssaf; |
| 35 | |
| 36 | namespace { |
| 37 | |
| 38 | //===----------------------------------------------------------------------===// |
| 39 | // Summary Type |
| 40 | //===----------------------------------------------------------------------===// |
| 41 | |
| 42 | enum class SummaryType { Auto, TU, LU, WPA }; |
| 43 | |
| 44 | //===----------------------------------------------------------------------===// |
| 45 | // Command-Line Options |
| 46 | //===----------------------------------------------------------------------===// |
| 47 | |
| 48 | cl::OptionCategory SsafFormatCategory("clang-ssaf-format options" ); |
| 49 | |
| 50 | cl::list<std::string> LoadPlugins("load" , |
| 51 | cl::desc("Load a plugin shared library" ), |
| 52 | cl::value_desc("path" ), |
| 53 | cl::cat(SsafFormatCategory)); |
| 54 | |
| 55 | // Defaults to 'auto', which inspects the file's self-describing 'type' |
| 56 | // field and dispatches to the matching reader/writer. Explicit values |
| 57 | // force the use of the corresponding kind-specific reader/writer. |
| 58 | cl::opt<SummaryType> Type( |
| 59 | "type" , |
| 60 | cl::desc("Summary type (defaults to 'auto', which uses the file's " |
| 61 | "self-describing 'type' field)" ), |
| 62 | cl::values(clEnumValN(SummaryType::Auto, "auto" , |
| 63 | "Detect type from the file's 'type' field" ), |
| 64 | clEnumValN(SummaryType::TU, "tu" , "Translation unit summary" ), |
| 65 | clEnumValN(SummaryType::LU, "lu" , "Link unit summary" ), |
| 66 | clEnumValN(SummaryType::WPA, "wpa" , |
| 67 | "Whole-program analysis suite" )), |
| 68 | cl::init(Val: SummaryType::Auto), cl::cat(SsafFormatCategory)); |
| 69 | |
| 70 | cl::opt<std::string> InputPath(cl::Positional, cl::desc("<input file>" ), |
| 71 | cl::cat(SsafFormatCategory)); |
| 72 | |
| 73 | cl::opt<std::string> OutputPath("o" , cl::desc("Output file path" ), |
| 74 | cl::value_desc("path" ), |
| 75 | cl::cat(SsafFormatCategory)); |
| 76 | |
| 77 | cl::opt<bool> UseEncoding("encoding" , |
| 78 | cl::desc("Read and write summary encodings rather " |
| 79 | "than decoded summaries" ), |
| 80 | cl::cat(SsafFormatCategory)); |
| 81 | |
| 82 | cl::opt<bool> ListFormats("list" , |
| 83 | cl::desc("List registered serialization formats and " |
| 84 | "analyses, then exit" ), |
| 85 | cl::init(Val: false), cl::cat(SsafFormatCategory)); |
| 86 | |
| 87 | //===----------------------------------------------------------------------===// |
| 88 | // Format Listing |
| 89 | //===----------------------------------------------------------------------===// |
| 90 | |
| 91 | constexpr size_t FormatIndent = 4; |
| 92 | constexpr size_t AnalysisIndent = 4; |
| 93 | |
| 94 | struct AnalysisData { |
| 95 | std::string Name; |
| 96 | std::string Desc; |
| 97 | }; |
| 98 | |
| 99 | struct FormatData { |
| 100 | std::string Name; |
| 101 | std::string Desc; |
| 102 | llvm::SmallVector<AnalysisData> Analyses; |
| 103 | }; |
| 104 | |
| 105 | struct PrintLayout { |
| 106 | size_t FormatNumWidth; |
| 107 | size_t MaxFormatNameWidth; |
| 108 | size_t FormatNameCol; |
| 109 | size_t AnalysisCol; |
| 110 | size_t AnalysisNumWidth; |
| 111 | size_t MaxAnalysisNameWidth; |
| 112 | }; |
| 113 | |
| 114 | llvm::SmallVector<FormatData> collectFormats() { |
| 115 | llvm::SmallVector<FormatData> Formats; |
| 116 | for (const auto &Entry : SerializationFormatRegistry::entries()) { |
| 117 | FormatData FD; |
| 118 | FD.Name = Entry.getName().str(); |
| 119 | FD.Desc = Entry.getDesc().str(); |
| 120 | auto Format = Entry.instantiate(); |
| 121 | Format->forEachRegisteredAnalysis( |
| 122 | Callback: [&](llvm::StringRef Name, llvm::StringRef Desc) { |
| 123 | FD.Analyses.push_back(Elt: {.Name: Name.str(), .Desc: Desc.str()}); |
| 124 | }); |
| 125 | Formats.push_back(Elt: std::move(FD)); |
| 126 | } |
| 127 | return Formats; |
| 128 | } |
| 129 | |
| 130 | void printAnalysis(const AnalysisData &AD, size_t AnalysisIndex, |
| 131 | size_t FormatIndex, const PrintLayout &Layout) { |
| 132 | std::string AnalysisNum = std::to_string(val: FormatIndex + 1) + "." + |
| 133 | std::to_string(val: AnalysisIndex + 1) + "." ; |
| 134 | llvm::outs().indent(NumSpaces: Layout.AnalysisCol) |
| 135 | << llvm::right_justify(Str: AnalysisNum, Width: Layout.AnalysisNumWidth) << " " |
| 136 | << llvm::left_justify(Str: AD.Name, Width: Layout.MaxAnalysisNameWidth) << " - " |
| 137 | << AD.Desc << "\n" ; |
| 138 | } |
| 139 | |
| 140 | void printAnalyses(const llvm::SmallVector<AnalysisData> &Analyses, |
| 141 | size_t FormatIndex, const PrintLayout &Layout) { |
| 142 | if (Analyses.empty()) { |
| 143 | llvm::outs().indent(NumSpaces: Layout.FormatNameCol) << "Analyses: (none)\n" ; |
| 144 | return; |
| 145 | } |
| 146 | |
| 147 | llvm::outs().indent(NumSpaces: Layout.FormatNameCol) << "Analyses:\n" ; |
| 148 | |
| 149 | for (size_t AnalysisIndex = 0; AnalysisIndex < Analyses.size(); |
| 150 | ++AnalysisIndex) { |
| 151 | printAnalysis(AD: Analyses[AnalysisIndex], AnalysisIndex, FormatIndex, Layout); |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | void printFormat(const FormatData &FD, size_t FormatIndex, |
| 156 | const PrintLayout &Layout) { |
| 157 | // Blank line before each format entry for readability. |
| 158 | llvm::outs() << "\n" ; |
| 159 | |
| 160 | std::string FormatNum = std::to_string(val: FormatIndex + 1) + "." ; |
| 161 | llvm::outs().indent(NumSpaces: FormatIndent) |
| 162 | << llvm::right_justify(Str: FormatNum, Width: Layout.FormatNumWidth) << " " |
| 163 | << llvm::left_justify(Str: FD.Name, Width: Layout.MaxFormatNameWidth) << " - " |
| 164 | << FD.Desc << "\n" ; |
| 165 | |
| 166 | printAnalyses(Analyses: FD.Analyses, FormatIndex, Layout); |
| 167 | } |
| 168 | |
| 169 | void printFormats(const llvm::SmallVector<FormatData> &Formats, |
| 170 | const PrintLayout &Layout) { |
| 171 | llvm::outs() << "Registered serialization formats:\n" ; |
| 172 | for (size_t FormatIndex = 0; FormatIndex < Formats.size(); ++FormatIndex) { |
| 173 | printFormat(FD: Formats[FormatIndex], FormatIndex, Layout); |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | PrintLayout computePrintLayout(const llvm::SmallVector<FormatData> &Formats) { |
| 178 | size_t MaxFormatNameWidth = 0; |
| 179 | size_t MaxAnalysisCount = 0; |
| 180 | size_t MaxAnalysisNameWidth = 0; |
| 181 | for (const auto &FD : Formats) { |
| 182 | MaxFormatNameWidth = std::max(a: MaxFormatNameWidth, b: FD.Name.size()); |
| 183 | MaxAnalysisCount = std::max(a: MaxAnalysisCount, b: FD.Analyses.size()); |
| 184 | for (const auto &AD : FD.Analyses) { |
| 185 | MaxAnalysisNameWidth = std::max(a: MaxAnalysisNameWidth, b: AD.Name.size()); |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | // Width of the widest format number string, e.g. "10." -> 3. |
| 190 | size_t FormatNumWidth = |
| 191 | std::to_string(val: Formats.size()).size() + 1; // +1 for '.' |
| 192 | // Width of the widest analysis number string, e.g. "10.10." -> 6. |
| 193 | size_t AnalysisNumWidth = std::to_string(val: Formats.size()).size() + 1 + |
| 194 | std::to_string(val: MaxAnalysisCount).size() + 1; |
| 195 | |
| 196 | // Where the format name starts (also where "Analyses:" is indented to). |
| 197 | size_t FormatNameCol = FormatIndent + FormatNumWidth + 1; |
| 198 | // Where the analysis number starts. |
| 199 | size_t AnalysisCol = FormatNameCol + AnalysisIndent; |
| 200 | |
| 201 | return { |
| 202 | .FormatNumWidth: FormatNumWidth, .MaxFormatNameWidth: MaxFormatNameWidth, .FormatNameCol: FormatNameCol, |
| 203 | .AnalysisCol: AnalysisCol, .AnalysisNumWidth: AnalysisNumWidth, .MaxAnalysisNameWidth: MaxAnalysisNameWidth, |
| 204 | }; |
| 205 | } |
| 206 | |
| 207 | void listFormats() { |
| 208 | llvm::SmallVector<FormatData> Formats = collectFormats(); |
| 209 | if (Formats.empty()) { |
| 210 | llvm::outs() << "No serialization formats registered.\n" ; |
| 211 | return; |
| 212 | } |
| 213 | printFormats(Formats, Layout: computePrintLayout(Formats)); |
| 214 | } |
| 215 | |
| 216 | //===----------------------------------------------------------------------===// |
| 217 | // Input Validation |
| 218 | //===----------------------------------------------------------------------===// |
| 219 | |
| 220 | struct FormatInput { |
| 221 | FormatFile InputFile; |
| 222 | std::optional<FormatFile> OutputFile; |
| 223 | }; |
| 224 | |
| 225 | FormatInput validateInput() { |
| 226 | assert(!ListFormats); |
| 227 | |
| 228 | FormatInput FI; |
| 229 | |
| 230 | // Validate the input path. |
| 231 | { |
| 232 | if (InputPath.empty()) { |
| 233 | fail(Msg: "no input file specified" ); |
| 234 | } |
| 235 | |
| 236 | FI.InputFile = FormatFile::fromInputPath(Path: InputPath); |
| 237 | } |
| 238 | |
| 239 | // Validate the output path. |
| 240 | if (!OutputPath.empty()) { |
| 241 | FI.OutputFile = FormatFile::fromOutputPath(Path: OutputPath); |
| 242 | } |
| 243 | |
| 244 | return FI; |
| 245 | } |
| 246 | |
| 247 | //===----------------------------------------------------------------------===// |
| 248 | // Format Conversion |
| 249 | //===----------------------------------------------------------------------===// |
| 250 | |
| 251 | template <typename ReadFn, typename WriteFn> |
| 252 | void run(const FormatInput &FI, ReadFn Read, WriteFn Write) { |
| 253 | auto ExpectedResult = (FI.InputFile.Format->*Read)(FI.InputFile.Path); |
| 254 | if (!ExpectedResult) { |
| 255 | fail(ExpectedResult.takeError()); |
| 256 | } |
| 257 | |
| 258 | if (!FI.OutputFile) { |
| 259 | return; |
| 260 | } |
| 261 | |
| 262 | auto Err = |
| 263 | (FI.OutputFile->Format->*Write)(*ExpectedResult, FI.OutputFile->Path); |
| 264 | if (Err) { |
| 265 | fail(std::move(Err)); |
| 266 | } |
| 267 | } |
| 268 | |
| 269 | void convert(const FormatInput &FI) { |
| 270 | switch (Type) { |
| 271 | case SummaryType::Auto: |
| 272 | if (UseEncoding) { |
| 273 | run(FI, Read: &SerializationFormat::readArtifactEncoding, |
| 274 | Write: &SerializationFormat::writeArtifactEncoding); |
| 275 | } else { |
| 276 | run(FI, Read: &SerializationFormat::readArtifact, |
| 277 | Write: &SerializationFormat::writeArtifact); |
| 278 | } |
| 279 | return; |
| 280 | case SummaryType::TU: |
| 281 | if (UseEncoding) { |
| 282 | run(FI, Read: &SerializationFormat::readTUSummaryEncoding, |
| 283 | Write: &SerializationFormat::writeTUSummaryEncoding); |
| 284 | } else { |
| 285 | run(FI, Read: &SerializationFormat::readTUSummary, |
| 286 | Write: &SerializationFormat::writeTUSummary); |
| 287 | } |
| 288 | return; |
| 289 | case SummaryType::LU: |
| 290 | if (UseEncoding) { |
| 291 | run(FI, Read: &SerializationFormat::readLUSummaryEncoding, |
| 292 | Write: &SerializationFormat::writeLUSummaryEncoding); |
| 293 | } else { |
| 294 | run(FI, Read: &SerializationFormat::readLUSummary, |
| 295 | Write: &SerializationFormat::writeLUSummary); |
| 296 | } |
| 297 | return; |
| 298 | case SummaryType::WPA: |
| 299 | run(FI, Read: &SerializationFormat::readWPASuite, |
| 300 | Write: &SerializationFormat::writeWPASuite); |
| 301 | return; |
| 302 | } |
| 303 | |
| 304 | llvm_unreachable("Unhandled SummaryType variant" ); |
| 305 | } |
| 306 | |
| 307 | } // namespace |
| 308 | |
| 309 | //===----------------------------------------------------------------------===// |
| 310 | // Driver |
| 311 | //===----------------------------------------------------------------------===// |
| 312 | |
| 313 | int main(int argc, const char **argv) { |
| 314 | llvm::StringRef ToolHeading = "SSAF Format" ; |
| 315 | |
| 316 | InitLLVM X(argc, argv); |
| 317 | initTool(argc, argv, Version: "0.1" , Category&: SsafFormatCategory, ToolHeading); |
| 318 | |
| 319 | loadPlugins(Paths: LoadPlugins); |
| 320 | |
| 321 | if (ListFormats) { |
| 322 | listFormats(); |
| 323 | } else { |
| 324 | FormatInput FI = validateInput(); |
| 325 | convert(FI); |
| 326 | } |
| 327 | |
| 328 | return 0; |
| 329 | } |
| 330 | |