| 1 | //===--- SarifDiagnostics.cpp - Sarif Diagnostics for Paths -----*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines the SarifDiagnostics object. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "clang/Analysis/MacroExpansionContext.h" |
| 14 | #include "clang/Analysis/PathDiagnostic.h" |
| 15 | #include "clang/Basic/Sarif.h" |
| 16 | #include "clang/Basic/SourceManager.h" |
| 17 | #include "clang/Basic/Version.h" |
| 18 | #include "clang/Lex/Preprocessor.h" |
| 19 | #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h" |
| 20 | #include "llvm/ADT/StringMap.h" |
| 21 | #include "llvm/Support/ConvertUTF.h" |
| 22 | #include "llvm/Support/JSON.h" |
| 23 | #include <memory> |
| 24 | |
| 25 | using namespace llvm; |
| 26 | using namespace clang; |
| 27 | using namespace ento; |
| 28 | |
| 29 | namespace { |
| 30 | class SarifDiagnostics : public PathDiagnosticConsumer { |
| 31 | std::string OutputFile; |
| 32 | const LangOptions &LO; |
| 33 | SarifDocumentWriter SarifWriter; |
| 34 | |
| 35 | public: |
| 36 | SarifDiagnostics(const std::string &Output, const LangOptions &LO, |
| 37 | const SourceManager &SM) |
| 38 | : OutputFile(Output), LO(LO), SarifWriter(SM) {} |
| 39 | ~SarifDiagnostics() override = default; |
| 40 | |
| 41 | void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags, |
| 42 | FilesMade *FM) override; |
| 43 | |
| 44 | StringRef getName() const override { return "SarifDiagnostics" ; } |
| 45 | PathGenerationScheme getGenerationScheme() const override { return Minimal; } |
| 46 | bool supportsLogicalOpControlFlow() const override { return true; } |
| 47 | bool supportsCrossFileDiagnostics() const override { return true; } |
| 48 | }; |
| 49 | } // end anonymous namespace |
| 50 | |
| 51 | void ento::createSarifDiagnosticConsumer( |
| 52 | PathDiagnosticConsumerOptions DiagOpts, PathDiagnosticConsumers &C, |
| 53 | const std::string &Output, const Preprocessor &PP, |
| 54 | const cross_tu::CrossTranslationUnitContext &CTU, |
| 55 | const MacroExpansionContext &MacroExpansions) { |
| 56 | |
| 57 | // TODO: Emit an error here. |
| 58 | if (Output.empty()) |
| 59 | return; |
| 60 | |
| 61 | C.push_back(x: std::make_unique<SarifDiagnostics>(args: Output, args: PP.getLangOpts(), |
| 62 | args&: PP.getSourceManager())); |
| 63 | createTextMinimalPathDiagnosticConsumer(Diagopts: std::move(DiagOpts), C, Prefix: Output, PP, |
| 64 | CTU, MacroExpansions); |
| 65 | } |
| 66 | |
| 67 | static StringRef getRuleDescription(StringRef CheckName) { |
| 68 | return llvm::StringSwitch<StringRef>(CheckName) |
| 69 | #define GET_CHECKERS |
| 70 | #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN) \ |
| 71 | .Case(FULLNAME, HELPTEXT) |
| 72 | #include "clang/StaticAnalyzer/Checkers/Checkers.inc" |
| 73 | #undef CHECKER |
| 74 | #undef GET_CHECKERS |
| 75 | ; |
| 76 | } |
| 77 | |
| 78 | static StringRef getRuleHelpURIStr(StringRef CheckName) { |
| 79 | return llvm::StringSwitch<StringRef>(CheckName) |
| 80 | #define GET_CHECKERS |
| 81 | #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN) \ |
| 82 | .Case(FULLNAME, DOC_URI) |
| 83 | #include "clang/StaticAnalyzer/Checkers/Checkers.inc" |
| 84 | #undef CHECKER |
| 85 | #undef GET_CHECKERS |
| 86 | ; |
| 87 | } |
| 88 | |
| 89 | static ThreadFlowImportance |
| 90 | calculateImportance(const PathDiagnosticPiece &Piece) { |
| 91 | switch (Piece.getKind()) { |
| 92 | case PathDiagnosticPiece::Call: |
| 93 | case PathDiagnosticPiece::Macro: |
| 94 | case PathDiagnosticPiece::Note: |
| 95 | case PathDiagnosticPiece::PopUp: |
| 96 | // FIXME: What should be reported here? |
| 97 | break; |
| 98 | case PathDiagnosticPiece::Event: |
| 99 | return Piece.getTagStr() == "ConditionBRVisitor" |
| 100 | ? ThreadFlowImportance::Important |
| 101 | : ThreadFlowImportance::Essential; |
| 102 | case PathDiagnosticPiece::ControlFlow: |
| 103 | return ThreadFlowImportance::Unimportant; |
| 104 | } |
| 105 | return ThreadFlowImportance::Unimportant; |
| 106 | } |
| 107 | |
| 108 | /// Accepts a SourceRange corresponding to a pair of the first and last tokens |
| 109 | /// and converts to a Character granular CharSourceRange. |
| 110 | static CharSourceRange convertTokenRangeToCharRange(const SourceRange &R, |
| 111 | const SourceManager &SM, |
| 112 | const LangOptions &LO) { |
| 113 | // Caret diagnostics have the first and last locations pointed at the same |
| 114 | // location, return these as-is. |
| 115 | if (R.getBegin() == R.getEnd()) |
| 116 | return CharSourceRange::getCharRange(R); |
| 117 | |
| 118 | SourceLocation BeginCharLoc = R.getBegin(); |
| 119 | // For token ranges, the raw end SLoc points at the first character of the |
| 120 | // last token in the range. This must be moved to one past the end of the |
| 121 | // last character using the lexer. |
| 122 | SourceLocation EndCharLoc = |
| 123 | Lexer::getLocForEndOfToken(Loc: R.getEnd(), /* Offset = */ 0, SM, LangOpts: LO); |
| 124 | return CharSourceRange::getCharRange(B: BeginCharLoc, E: EndCharLoc); |
| 125 | } |
| 126 | |
| 127 | static SmallVector<ThreadFlow, 8> createThreadFlows(const PathDiagnostic *Diag, |
| 128 | const LangOptions &LO) { |
| 129 | SmallVector<ThreadFlow, 8> Flows; |
| 130 | const PathPieces &Pieces = Diag->path.flatten(ShouldFlattenMacros: false); |
| 131 | for (const auto &Piece : Pieces) { |
| 132 | auto Range = convertTokenRangeToCharRange( |
| 133 | R: Piece->getLocation().asRange(), SM: Piece->getLocation().getManager(), LO); |
| 134 | auto Flow = ThreadFlow::create() |
| 135 | .setImportance(calculateImportance(Piece: *Piece)) |
| 136 | .setRange(Range) |
| 137 | .setMessage(Piece->getString()); |
| 138 | Flows.push_back(Elt: Flow); |
| 139 | } |
| 140 | return Flows; |
| 141 | } |
| 142 | |
| 143 | static StringMap<uint32_t> |
| 144 | createRuleMapping(const std::vector<const PathDiagnostic *> &Diags, |
| 145 | SarifDocumentWriter &SarifWriter) { |
| 146 | StringMap<uint32_t> RuleMapping; |
| 147 | llvm::StringSet<> Seen; |
| 148 | |
| 149 | for (const PathDiagnostic *D : Diags) { |
| 150 | StringRef CheckName = D->getCheckerName(); |
| 151 | std::pair<llvm::StringSet<>::iterator, bool> P = Seen.insert(key: CheckName); |
| 152 | if (P.second) { |
| 153 | auto Rule = SarifRule::create() |
| 154 | .setName(CheckName) |
| 155 | .setRuleId(CheckName) |
| 156 | .setDescription(getRuleDescription(CheckName)) |
| 157 | .setHelpURI(getRuleHelpURIStr(CheckName)); |
| 158 | size_t RuleIdx = SarifWriter.createRule(Rule); |
| 159 | RuleMapping[CheckName] = RuleIdx; |
| 160 | } |
| 161 | } |
| 162 | return RuleMapping; |
| 163 | } |
| 164 | |
| 165 | static SarifResult createResult(const PathDiagnostic *Diag, |
| 166 | const StringMap<uint32_t> &RuleMapping, |
| 167 | const LangOptions &LO) { |
| 168 | |
| 169 | StringRef CheckName = Diag->getCheckerName(); |
| 170 | uint32_t RuleIdx = RuleMapping.lookup(Key: CheckName); |
| 171 | auto Range = convertTokenRangeToCharRange( |
| 172 | R: Diag->getLocation().asRange(), SM: Diag->getLocation().getManager(), LO); |
| 173 | |
| 174 | SmallVector<ThreadFlow, 8> Flows = createThreadFlows(Diag, LO); |
| 175 | auto Result = SarifResult::create(RuleIdx) |
| 176 | .setRuleId(CheckName) |
| 177 | .setDiagnosticMessage(Diag->getVerboseDescription()) |
| 178 | .setDiagnosticLevel(SarifResultLevel::Warning) |
| 179 | .setLocations({Range}) |
| 180 | .setThreadFlows(Flows); |
| 181 | return Result; |
| 182 | } |
| 183 | |
| 184 | void SarifDiagnostics::FlushDiagnosticsImpl( |
| 185 | std::vector<const PathDiagnostic *> &Diags, FilesMade *) { |
| 186 | // We currently overwrite the file if it already exists. However, it may be |
| 187 | // useful to add a feature someday that allows the user to append a run to an |
| 188 | // existing SARIF file. One danger from that approach is that the size of the |
| 189 | // file can become large very quickly, so decoding into JSON to append a run |
| 190 | // may be an expensive operation. |
| 191 | std::error_code EC; |
| 192 | llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::OF_TextWithCRLF); |
| 193 | if (EC) { |
| 194 | llvm::errs() << "warning: could not create file: " << EC.message() << '\n'; |
| 195 | return; |
| 196 | } |
| 197 | |
| 198 | std::string ToolVersion = getClangFullVersion(); |
| 199 | SarifWriter.createRun(ShortToolName: "clang" , LongToolName: "clang static analyzer" , ToolVersion); |
| 200 | StringMap<uint32_t> RuleMapping = createRuleMapping(Diags, SarifWriter); |
| 201 | for (const PathDiagnostic *D : Diags) { |
| 202 | SarifResult Result = createResult(Diag: D, RuleMapping, LO); |
| 203 | SarifWriter.appendResult(SarifResult: Result); |
| 204 | } |
| 205 | auto Document = SarifWriter.createDocument(); |
| 206 | OS << llvm::formatv(Fmt: "{0:2}\n" , Vals: json::Value(std::move(Document))); |
| 207 | } |
| 208 | |