| 1 | //===--- SarifDiagnostics.cpp - Sarif Diagnostics for Paths -----*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines the SarifDiagnostics object. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "SarifDiagnostics.h" |
| 14 | #include "HTMLDiagnostics.h" |
| 15 | #include "clang/Analysis/IssueHash.h" |
| 16 | #include "clang/Analysis/MacroExpansionContext.h" |
| 17 | #include "clang/Analysis/PathDiagnostic.h" |
| 18 | #include "clang/Basic/Sarif.h" |
| 19 | #include "clang/Basic/SourceManager.h" |
| 20 | #include "clang/Basic/Version.h" |
| 21 | #include "clang/Lex/Preprocessor.h" |
| 22 | #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h" |
| 23 | #include "llvm/ADT/StringMap.h" |
| 24 | #include "llvm/Support/ConvertUTF.h" |
| 25 | #include "llvm/Support/JSON.h" |
| 26 | #include <memory> |
| 27 | |
| 28 | using namespace llvm; |
| 29 | using namespace clang; |
| 30 | using namespace ento; |
| 31 | |
| 32 | namespace { |
| 33 | class SarifDiagnostics : public PathDiagnosticConsumer { |
| 34 | std::string OutputFile; |
| 35 | const LangOptions &LO; |
| 36 | const SourceManager &SM; |
| 37 | SarifDocumentWriter SarifWriter; |
| 38 | |
| 39 | public: |
| 40 | SarifDiagnostics(const std::string &Output, const LangOptions &LO, |
| 41 | const SourceManager &SM) |
| 42 | : OutputFile(Output), LO(LO), SM(SM), SarifWriter(SM) {} |
| 43 | ~SarifDiagnostics() override = default; |
| 44 | |
| 45 | void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags, |
| 46 | FilesMade *FM) override; |
| 47 | |
| 48 | StringRef getName() const override { return "SarifDiagnostics" ; } |
| 49 | PathGenerationScheme getGenerationScheme() const override { return Minimal; } |
| 50 | bool supportsLogicalOpControlFlow() const override { return true; } |
| 51 | bool supportsCrossFileDiagnostics() const override { return true; } |
| 52 | |
| 53 | private: |
| 54 | SarifResult createResult(const PathDiagnostic *Diag, |
| 55 | const StringMap<uint32_t> &RuleMapping, |
| 56 | const LangOptions &LO, FilesMade *FM); |
| 57 | }; |
| 58 | } // end anonymous namespace |
| 59 | |
| 60 | void ento::createSarifDiagnosticConsumer( |
| 61 | PathDiagnosticConsumerOptions DiagOpts, PathDiagnosticConsumers &C, |
| 62 | const std::string &Output, const Preprocessor &PP, |
| 63 | const cross_tu::CrossTranslationUnitContext &CTU, |
| 64 | const MacroExpansionContext &MacroExpansions) { |
| 65 | |
| 66 | createSarifDiagnosticConsumerImpl(DiagOpts, C, Output, PP); |
| 67 | |
| 68 | createTextMinimalPathDiagnosticConsumer(Diagopts: std::move(DiagOpts), C, Prefix: Output, PP, |
| 69 | CTU, MacroExpansions); |
| 70 | } |
| 71 | |
| 72 | /// Creates and registers a SARIF diagnostic consumer, without any additional |
| 73 | /// text consumer. |
| 74 | void ento::createSarifDiagnosticConsumerImpl( |
| 75 | PathDiagnosticConsumerOptions DiagOpts, PathDiagnosticConsumers &C, |
| 76 | const std::string &Output, const Preprocessor &PP) { |
| 77 | |
| 78 | // TODO: Emit an error here. |
| 79 | if (Output.empty()) |
| 80 | return; |
| 81 | |
| 82 | C.push_back(x: std::make_unique<SarifDiagnostics>(args: Output, args: PP.getLangOpts(), |
| 83 | args&: PP.getSourceManager())); |
| 84 | } |
| 85 | |
| 86 | static StringRef getRuleDescription(StringRef CheckName) { |
| 87 | return llvm::StringSwitch<StringRef>(CheckName) |
| 88 | #define GET_CHECKERS |
| 89 | #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN) \ |
| 90 | .Case(FULLNAME, HELPTEXT) |
| 91 | #include "clang/StaticAnalyzer/Checkers/Checkers.inc" |
| 92 | #undef CHECKER |
| 93 | #undef GET_CHECKERS |
| 94 | ; |
| 95 | } |
| 96 | |
| 97 | static StringRef getRuleHelpURIStr(StringRef CheckName) { |
| 98 | return llvm::StringSwitch<StringRef>(CheckName) |
| 99 | #define GET_CHECKERS |
| 100 | #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN) \ |
| 101 | .Case(FULLNAME, DOC_URI) |
| 102 | #include "clang/StaticAnalyzer/Checkers/Checkers.inc" |
| 103 | #undef CHECKER |
| 104 | #undef GET_CHECKERS |
| 105 | ; |
| 106 | } |
| 107 | |
| 108 | static ThreadFlowImportance |
| 109 | calculateImportance(const PathDiagnosticPiece &Piece) { |
| 110 | switch (Piece.getKind()) { |
| 111 | case PathDiagnosticPiece::Call: |
| 112 | case PathDiagnosticPiece::Macro: |
| 113 | case PathDiagnosticPiece::Note: |
| 114 | case PathDiagnosticPiece::PopUp: |
| 115 | // FIXME: What should be reported here? |
| 116 | break; |
| 117 | case PathDiagnosticPiece::Event: |
| 118 | return Piece.getTagStr() == "ConditionBRVisitor" |
| 119 | ? ThreadFlowImportance::Important |
| 120 | : ThreadFlowImportance::Essential; |
| 121 | case PathDiagnosticPiece::ControlFlow: |
| 122 | return ThreadFlowImportance::Unimportant; |
| 123 | } |
| 124 | return ThreadFlowImportance::Unimportant; |
| 125 | } |
| 126 | |
| 127 | /// Accepts a SourceRange corresponding to a pair of the first and last tokens |
| 128 | /// and converts to a Character granular CharSourceRange. |
| 129 | static CharSourceRange convertTokenRangeToCharRange(const SourceRange &R, |
| 130 | const SourceManager &SM, |
| 131 | const LangOptions &LO) { |
| 132 | // Caret diagnostics have the first and last locations pointed at the same |
| 133 | // location, return these as-is. |
| 134 | if (R.getBegin() == R.getEnd()) |
| 135 | return CharSourceRange::getCharRange(R); |
| 136 | |
| 137 | SourceLocation BeginCharLoc = R.getBegin(); |
| 138 | // For token ranges, the raw end SLoc points at the first character of the |
| 139 | // last token in the range. This must be moved to one past the end of the |
| 140 | // last character using the lexer. |
| 141 | SourceLocation EndCharLoc = |
| 142 | Lexer::getLocForEndOfToken(Loc: R.getEnd(), /* Offset = */ 0, SM, LangOpts: LO); |
| 143 | return CharSourceRange::getCharRange(B: BeginCharLoc, E: EndCharLoc); |
| 144 | } |
| 145 | |
| 146 | static SmallVector<ThreadFlow, 8> createThreadFlows(const PathDiagnostic *Diag, |
| 147 | const LangOptions &LO) { |
| 148 | SmallVector<ThreadFlow, 8> Flows; |
| 149 | const PathPieces &Pieces = Diag->path.flatten(ShouldFlattenMacros: false); |
| 150 | for (const auto &Piece : Pieces) { |
| 151 | auto Range = convertTokenRangeToCharRange( |
| 152 | R: Piece->getLocation().asRange(), SM: Piece->getLocation().getManager(), LO); |
| 153 | auto Flow = ThreadFlow::create() |
| 154 | .setImportance(calculateImportance(Piece: *Piece)) |
| 155 | .setRange(Range) |
| 156 | .setMessage(Piece->getString()); |
| 157 | Flows.push_back(Elt: Flow); |
| 158 | } |
| 159 | return Flows; |
| 160 | } |
| 161 | |
| 162 | static StringMap<uint32_t> |
| 163 | createRuleMapping(const std::vector<const PathDiagnostic *> &Diags, |
| 164 | SarifDocumentWriter &SarifWriter) { |
| 165 | StringMap<uint32_t> RuleMapping; |
| 166 | llvm::StringSet<> Seen; |
| 167 | |
| 168 | for (const PathDiagnostic *D : Diags) { |
| 169 | StringRef CheckName = D->getCheckerName(); |
| 170 | std::pair<llvm::StringSet<>::iterator, bool> P = Seen.insert(key: CheckName); |
| 171 | if (P.second) { |
| 172 | auto Rule = SarifRule::create() |
| 173 | .setName(CheckName) |
| 174 | .setRuleId(CheckName) |
| 175 | .setDescription(getRuleDescription(CheckName)) |
| 176 | .setHelpURI(getRuleHelpURIStr(CheckName)); |
| 177 | size_t RuleIdx = SarifWriter.createRule(Rule); |
| 178 | RuleMapping[CheckName] = RuleIdx; |
| 179 | } |
| 180 | } |
| 181 | return RuleMapping; |
| 182 | } |
| 183 | |
| 184 | static const llvm::StringRef IssueHashKey = "clang/issueHash/v1" ; |
| 185 | |
| 186 | SarifResult |
| 187 | SarifDiagnostics::createResult(const PathDiagnostic *Diag, |
| 188 | const StringMap<uint32_t> &RuleMapping, |
| 189 | const LangOptions &LO, FilesMade *FM) { |
| 190 | |
| 191 | StringRef CheckName = Diag->getCheckerName(); |
| 192 | uint32_t RuleIdx = RuleMapping.lookup(Key: CheckName); |
| 193 | auto Range = convertTokenRangeToCharRange( |
| 194 | R: Diag->getLocation().asRange(), SM: Diag->getLocation().getManager(), LO); |
| 195 | |
| 196 | SmallVector<ThreadFlow, 8> Flows = createThreadFlows(Diag, LO); |
| 197 | |
| 198 | auto IssueHash = Diag->getIssueHash(SrcMgr: SM, LangOpts: LO); |
| 199 | |
| 200 | std::string HtmlReportURL; |
| 201 | if (FM && !FM->empty()) { |
| 202 | // Find the HTML report that was generated for this issue, if one exists. |
| 203 | PDFileEntry::ConsumerFiles *Files = FM->getFiles(PD: *Diag); |
| 204 | if (Files) { |
| 205 | auto HtmlFile = llvm::find_if(Range&: *Files, P: [](const auto &File) { |
| 206 | return File.first == HTML_DIAGNOSTICS_NAME; |
| 207 | }); |
| 208 | if (HtmlFile != Files->end()) { |
| 209 | SmallString<128> HtmlReportPath = |
| 210 | llvm::sys::path::parent_path(path: OutputFile); |
| 211 | llvm::sys::path::append(path&: HtmlReportPath, a: HtmlFile->second); |
| 212 | HtmlReportURL = SarifDocumentWriter::fileNameToURI(Filename: HtmlReportPath); |
| 213 | } |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | auto Result = SarifResult::create(RuleIdx) |
| 218 | .setRuleId(CheckName) |
| 219 | .setDiagnosticMessage(Diag->getVerboseDescription()) |
| 220 | .setDiagnosticLevel(SarifResultLevel::Warning) |
| 221 | .addLocations(DiagLocs: {Range}) |
| 222 | .addPartialFingerprint(key: IssueHashKey, value: IssueHash) |
| 223 | .setHostedViewerURI(HtmlReportURL) |
| 224 | .setThreadFlows(Flows); |
| 225 | return Result; |
| 226 | } |
| 227 | |
| 228 | void SarifDiagnostics::FlushDiagnosticsImpl( |
| 229 | std::vector<const PathDiagnostic *> &Diags, FilesMade *FM) { |
| 230 | // We currently overwrite the file if it already exists. However, it may be |
| 231 | // useful to add a feature someday that allows the user to append a run to an |
| 232 | // existing SARIF file. One danger from that approach is that the size of the |
| 233 | // file can become large very quickly, so decoding into JSON to append a run |
| 234 | // may be an expensive operation. |
| 235 | std::error_code EC; |
| 236 | llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::OF_TextWithCRLF); |
| 237 | if (EC) { |
| 238 | llvm::errs() << "warning: could not create file: " << EC.message() << '\n'; |
| 239 | return; |
| 240 | } |
| 241 | |
| 242 | std::string ToolVersion = getClangFullVersion(); |
| 243 | SarifWriter.createRun(ShortToolName: "clang" , LongToolName: "clang static analyzer" , ToolVersion); |
| 244 | StringMap<uint32_t> RuleMapping = createRuleMapping(Diags, SarifWriter); |
| 245 | for (const PathDiagnostic *D : Diags) { |
| 246 | SarifResult Result = createResult(Diag: D, RuleMapping, LO, FM); |
| 247 | SarifWriter.appendResult(SarifResult: Result); |
| 248 | } |
| 249 | auto Document = SarifWriter.createDocument(); |
| 250 | OS << llvm::formatv(Fmt: "{0:2}\n" , Vals: json::Value(std::move(Document))); |
| 251 | } |
| 252 | |