| 1 | //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// This file contains the declaration of the SARIFDocumentWriter class, and |
| 11 | /// associated builders such as: |
| 12 | /// - \ref SarifArtifact |
| 13 | /// - \ref SarifArtifactLocation |
| 14 | /// - \ref SarifRule |
| 15 | /// - \ref SarifResult |
| 16 | //===----------------------------------------------------------------------===// |
| 17 | #include "clang/Basic/Sarif.h" |
| 18 | #include "clang/Basic/SourceLocation.h" |
| 19 | #include "clang/Basic/SourceManager.h" |
| 20 | #include "llvm/ADT/STLExtras.h" |
| 21 | #include "llvm/ADT/StringExtras.h" |
| 22 | #include "llvm/ADT/StringRef.h" |
| 23 | #include "llvm/Support/ConvertUTF.h" |
| 24 | #include "llvm/Support/JSON.h" |
| 25 | #include "llvm/Support/Path.h" |
| 26 | |
| 27 | #include <optional> |
| 28 | #include <string> |
| 29 | #include <utility> |
| 30 | |
| 31 | using namespace clang; |
| 32 | using namespace llvm; |
| 33 | |
| 34 | using clang::detail::SarifArtifact; |
| 35 | using clang::detail::SarifArtifactLocation; |
| 36 | |
| 37 | static StringRef getFileName(FileEntryRef FE) { |
| 38 | StringRef Filename = FE.getFileEntry().tryGetRealPathName(); |
| 39 | if (Filename.empty()) |
| 40 | Filename = FE.getName(); |
| 41 | return Filename; |
| 42 | } |
| 43 | /// \name URI |
| 44 | /// @{ |
| 45 | |
| 46 | /// \internal |
| 47 | /// \brief |
| 48 | /// Return the RFC3986 encoding of the input character. |
| 49 | /// |
| 50 | /// \param C Character to encode to RFC3986. |
| 51 | /// |
| 52 | /// \return The RFC3986 representation of \c C. |
| 53 | static std::string percentEncodeURICharacter(char C) { |
| 54 | // RFC 3986 claims alpha, numeric, and this handful of |
| 55 | // characters are not reserved for the path component and |
| 56 | // should be written out directly. Otherwise, percent |
| 57 | // encode the character and write that out instead of the |
| 58 | // reserved character. |
| 59 | if (llvm::isAlnum(C) || StringRef("-._~:@!$&'()*+,;=" ).contains(C)) |
| 60 | return std::string(&C, 1); |
| 61 | return "%" + llvm::toHex(Input: StringRef(&C, 1)); |
| 62 | } |
| 63 | |
| 64 | /// \internal |
| 65 | /// \brief Return a URI representing the given file name. |
| 66 | /// |
| 67 | /// \param Filename The filename to be represented as URI. |
| 68 | /// |
| 69 | /// \return RFC3986 URI representing the input file name. |
| 70 | static std::string fileNameToURI(StringRef Filename) { |
| 71 | SmallString<32> Ret = StringRef("file://" ); |
| 72 | |
| 73 | // Get the root name to see if it has a URI authority. |
| 74 | StringRef Root = sys::path::root_name(path: Filename); |
| 75 | if (Root.starts_with(Prefix: "//" )) { |
| 76 | // There is an authority, so add it to the URI. |
| 77 | Ret += Root.drop_front(N: 2).str(); |
| 78 | } else if (!Root.empty()) { |
| 79 | // There is no authority, so end the component and add the root to the URI. |
| 80 | Ret += Twine("/" + Root).str(); |
| 81 | } |
| 82 | |
| 83 | auto Iter = sys::path::begin(path: Filename), End = sys::path::end(path: Filename); |
| 84 | assert(Iter != End && "Expected there to be a non-root path component." ); |
| 85 | // Add the rest of the path components, encoding any reserved characters; |
| 86 | // we skip past the first path component, as it was handled it above. |
| 87 | for (StringRef Component : llvm::make_range(x: ++Iter, y: End)) { |
| 88 | // For reasons unknown to me, we may get a backslash with Windows native |
| 89 | // paths for the initial backslash following the drive component, which |
| 90 | // we need to ignore as a URI path part. |
| 91 | if (Component == "\\" ) |
| 92 | continue; |
| 93 | |
| 94 | // Add the separator between the previous path part and the one being |
| 95 | // currently processed. |
| 96 | Ret += "/" ; |
| 97 | |
| 98 | // URI encode the part. |
| 99 | for (char C : Component) { |
| 100 | Ret += percentEncodeURICharacter(C); |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | return std::string(Ret); |
| 105 | } |
| 106 | /// @} |
| 107 | |
| 108 | /// \brief Calculate the column position expressed in the number of UTF-8 code |
| 109 | /// points from column start to the source location |
| 110 | /// |
| 111 | /// \param Loc The source location whose column needs to be calculated. |
| 112 | /// \param TokenLen Optional hint for when the token is multiple bytes long. |
| 113 | /// |
| 114 | /// \return The column number as a UTF-8 aware byte offset from column start to |
| 115 | /// the effective source location. |
| 116 | static unsigned int adjustColumnPos(FullSourceLoc Loc, |
| 117 | unsigned int TokenLen = 0) { |
| 118 | assert(!Loc.isInvalid() && "invalid Loc when adjusting column position" ); |
| 119 | |
| 120 | FileIDAndOffset LocInfo = Loc.getDecomposedExpansionLoc(); |
| 121 | std::optional<MemoryBufferRef> Buf = |
| 122 | Loc.getManager().getBufferOrNone(FID: LocInfo.first); |
| 123 | assert(Buf && "got an invalid buffer for the location's file" ); |
| 124 | assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && |
| 125 | "token extends past end of buffer?" ); |
| 126 | |
| 127 | // Adjust the offset to be the start of the line, since we'll be counting |
| 128 | // Unicode characters from there until our column offset. |
| 129 | unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1); |
| 130 | unsigned int Ret = 1; |
| 131 | while (Off < (LocInfo.second + TokenLen)) { |
| 132 | Off += getNumBytesForUTF8(firstByte: Buf->getBuffer()[Off]); |
| 133 | Ret++; |
| 134 | } |
| 135 | |
| 136 | return Ret; |
| 137 | } |
| 138 | |
| 139 | /// \name SARIF Utilities |
| 140 | /// @{ |
| 141 | |
| 142 | /// \internal |
| 143 | static json::Object createMessage(StringRef Text) { |
| 144 | return json::Object{{.K: "text" , .V: Text.str()}}; |
| 145 | } |
| 146 | |
| 147 | /// \internal |
| 148 | /// \pre CharSourceRange must be a token range |
| 149 | static json::Object createTextRegion(const SourceManager &SM, |
| 150 | const CharSourceRange &R) { |
| 151 | FullSourceLoc BeginCharLoc{R.getBegin(), SM}; |
| 152 | FullSourceLoc EndCharLoc{R.getEnd(), SM}; |
| 153 | json::Object Region{{.K: "startLine" , .V: BeginCharLoc.getExpansionLineNumber()}, |
| 154 | {.K: "startColumn" , .V: adjustColumnPos(Loc: BeginCharLoc)}}; |
| 155 | |
| 156 | if (BeginCharLoc == EndCharLoc) { |
| 157 | Region["endColumn" ] = adjustColumnPos(Loc: BeginCharLoc); |
| 158 | } else { |
| 159 | Region["endLine" ] = EndCharLoc.getExpansionLineNumber(); |
| 160 | Region["endColumn" ] = adjustColumnPos(Loc: EndCharLoc); |
| 161 | } |
| 162 | return Region; |
| 163 | } |
| 164 | |
| 165 | static json::Object createLocation(json::Object &&PhysicalLocation, |
| 166 | StringRef Message = "" ) { |
| 167 | json::Object Ret{{.K: "physicalLocation" , .V: std::move(PhysicalLocation)}}; |
| 168 | if (!Message.empty()) |
| 169 | Ret.insert(E: {.K: "message" , .V: createMessage(Text: Message)}); |
| 170 | return Ret; |
| 171 | } |
| 172 | |
| 173 | static StringRef importanceToStr(ThreadFlowImportance I) { |
| 174 | switch (I) { |
| 175 | case ThreadFlowImportance::Important: |
| 176 | return "important" ; |
| 177 | case ThreadFlowImportance::Essential: |
| 178 | return "essential" ; |
| 179 | case ThreadFlowImportance::Unimportant: |
| 180 | return "unimportant" ; |
| 181 | } |
| 182 | llvm_unreachable("Fully covered switch is not so fully covered" ); |
| 183 | } |
| 184 | |
| 185 | static StringRef resultLevelToStr(SarifResultLevel R) { |
| 186 | switch (R) { |
| 187 | case SarifResultLevel::None: |
| 188 | return "none" ; |
| 189 | case SarifResultLevel::Note: |
| 190 | return "note" ; |
| 191 | case SarifResultLevel::Warning: |
| 192 | return "warning" ; |
| 193 | case SarifResultLevel::Error: |
| 194 | return "error" ; |
| 195 | } |
| 196 | llvm_unreachable("Potentially un-handled SarifResultLevel. " |
| 197 | "Is the switch not fully covered?" ); |
| 198 | } |
| 199 | |
| 200 | static json::Object |
| 201 | createThreadFlowLocation(json::Object &&Location, |
| 202 | const ThreadFlowImportance &Importance) { |
| 203 | return json::Object{{.K: "location" , .V: std::move(Location)}, |
| 204 | {.K: "importance" , .V: importanceToStr(I: Importance)}}; |
| 205 | } |
| 206 | /// @} |
| 207 | |
| 208 | json::Object |
| 209 | SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) { |
| 210 | assert(R.isValid() && |
| 211 | "Cannot create a physicalLocation from invalid SourceRange!" ); |
| 212 | assert(R.isCharRange() && |
| 213 | "Cannot create a physicalLocation from a token range!" ); |
| 214 | FullSourceLoc Start{R.getBegin(), SourceMgr}; |
| 215 | OptionalFileEntryRef FE = Start.getExpansionLoc().getFileEntryRef(); |
| 216 | assert(FE && "Diagnostic does not exist within a valid file!" ); |
| 217 | |
| 218 | const std::string &FileURI = fileNameToURI(Filename: getFileName(FE: *FE)); |
| 219 | auto I = CurrentArtifacts.find(Key: FileURI); |
| 220 | |
| 221 | if (I == CurrentArtifacts.end()) { |
| 222 | uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size()); |
| 223 | const SarifArtifactLocation &Location = |
| 224 | SarifArtifactLocation::create(URI: FileURI).setIndex(Idx); |
| 225 | const SarifArtifact &Artifact = SarifArtifact::create(Loc: Location) |
| 226 | .setRoles({"resultFile" }) |
| 227 | .setLength(FE->getSize()) |
| 228 | .setMimeType("text/plain" ); |
| 229 | auto StatusIter = CurrentArtifacts.insert(KV: {FileURI, Artifact}); |
| 230 | // If inserted, ensure the original iterator points to the newly inserted |
| 231 | // element, so it can be used downstream. |
| 232 | if (StatusIter.second) |
| 233 | I = StatusIter.first; |
| 234 | } |
| 235 | assert(I != CurrentArtifacts.end() && "Failed to insert new artifact" ); |
| 236 | const SarifArtifactLocation &Location = I->second.Location; |
| 237 | json::Object ArtifactLocationObject{{.K: "uri" , .V: Location.URI}}; |
| 238 | if (Location.Index.has_value()) |
| 239 | ArtifactLocationObject["index" ] = *Location.Index; |
| 240 | return json::Object{{{.K: "artifactLocation" , .V: std::move(ArtifactLocationObject)}, |
| 241 | {.K: "region" , .V: createTextRegion(SM: SourceMgr, R)}}}; |
| 242 | } |
| 243 | |
| 244 | json::Object &SarifDocumentWriter::getCurrentTool() { |
| 245 | assert(!Closed && "SARIF Document is closed. " |
| 246 | "Need to call createRun() before using getcurrentTool!" ); |
| 247 | |
| 248 | // Since Closed = false here, expect there to be at least 1 Run, anything |
| 249 | // else is an invalid state. |
| 250 | assert(!Runs.empty() && "There are no runs associated with the document!" ); |
| 251 | |
| 252 | return *Runs.back().getAsObject()->get(K: "tool" )->getAsObject(); |
| 253 | } |
| 254 | |
| 255 | void SarifDocumentWriter::reset() { |
| 256 | CurrentRules.clear(); |
| 257 | CurrentArtifacts.clear(); |
| 258 | } |
| 259 | |
| 260 | void SarifDocumentWriter::endRun() { |
| 261 | // Exit early if trying to close a closed Document. |
| 262 | if (Closed) { |
| 263 | reset(); |
| 264 | return; |
| 265 | } |
| 266 | |
| 267 | // Since Closed = false here, expect there to be at least 1 Run, anything |
| 268 | // else is an invalid state. |
| 269 | assert(!Runs.empty() && "There are no runs associated with the document!" ); |
| 270 | |
| 271 | // Flush all the rules. |
| 272 | json::Object &Tool = getCurrentTool(); |
| 273 | json::Array Rules; |
| 274 | for (const SarifRule &R : CurrentRules) { |
| 275 | json::Object Config{ |
| 276 | {.K: "enabled" , .V: R.DefaultConfiguration.Enabled}, |
| 277 | {.K: "level" , .V: resultLevelToStr(R: R.DefaultConfiguration.Level)}, |
| 278 | {.K: "rank" , .V: R.DefaultConfiguration.Rank}}; |
| 279 | json::Object Rule{ |
| 280 | {.K: "name" , .V: R.Name}, |
| 281 | {.K: "id" , .V: R.Id}, |
| 282 | {.K: "fullDescription" , .V: json::Object{{.K: "text" , .V: R.Description}}}, |
| 283 | {.K: "defaultConfiguration" , .V: std::move(Config)}}; |
| 284 | if (!R.HelpURI.empty()) |
| 285 | Rule["helpUri" ] = R.HelpURI; |
| 286 | Rules.emplace_back(A: std::move(Rule)); |
| 287 | } |
| 288 | json::Object &Driver = *Tool.getObject(K: "driver" ); |
| 289 | Driver["rules" ] = std::move(Rules); |
| 290 | |
| 291 | // Flush all the artifacts. |
| 292 | json::Object &Run = getCurrentRun(); |
| 293 | json::Array *Artifacts = Run.getArray(K: "artifacts" ); |
| 294 | SmallVector<std::pair<StringRef, SarifArtifact>, 0> Vec; |
| 295 | for (const auto &[K, V] : CurrentArtifacts) |
| 296 | Vec.emplace_back(Args: K, Args: V); |
| 297 | llvm::sort(C&: Vec, Comp: llvm::less_first()); |
| 298 | for (const auto &[_, A] : Vec) { |
| 299 | json::Object Loc{{.K: "uri" , .V: A.Location.URI}}; |
| 300 | if (A.Location.Index.has_value()) { |
| 301 | Loc["index" ] = static_cast<int64_t>(*A.Location.Index); |
| 302 | } |
| 303 | json::Object Artifact; |
| 304 | Artifact["location" ] = std::move(Loc); |
| 305 | if (A.Length.has_value()) |
| 306 | Artifact["length" ] = static_cast<int64_t>(*A.Length); |
| 307 | if (!A.Roles.empty()) |
| 308 | Artifact["roles" ] = json::Array(A.Roles); |
| 309 | if (!A.MimeType.empty()) |
| 310 | Artifact["mimeType" ] = A.MimeType; |
| 311 | if (A.Offset.has_value()) |
| 312 | Artifact["offset" ] = *A.Offset; |
| 313 | Artifacts->push_back(E: json::Value(std::move(Artifact))); |
| 314 | } |
| 315 | |
| 316 | // Clear, reset temporaries before next run. |
| 317 | reset(); |
| 318 | |
| 319 | // Mark the document as closed. |
| 320 | Closed = true; |
| 321 | } |
| 322 | |
| 323 | json::Array |
| 324 | SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) { |
| 325 | json::Object Ret{{.K: "locations" , .V: json::Array{}}}; |
| 326 | json::Array Locs; |
| 327 | for (const auto &ThreadFlow : ThreadFlows) { |
| 328 | json::Object PLoc = createPhysicalLocation(R: ThreadFlow.Range); |
| 329 | json::Object Loc = createLocation(PhysicalLocation: std::move(PLoc), Message: ThreadFlow.Message); |
| 330 | Locs.emplace_back( |
| 331 | A: createThreadFlowLocation(Location: std::move(Loc), Importance: ThreadFlow.Importance)); |
| 332 | } |
| 333 | Ret["locations" ] = std::move(Locs); |
| 334 | return json::Array{std::move(Ret)}; |
| 335 | } |
| 336 | |
| 337 | json::Object |
| 338 | SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) { |
| 339 | return json::Object{{.K: "threadFlows" , .V: createThreadFlows(ThreadFlows)}}; |
| 340 | } |
| 341 | |
| 342 | void SarifDocumentWriter::createRun(StringRef ShortToolName, |
| 343 | StringRef LongToolName, |
| 344 | StringRef ToolVersion) { |
| 345 | // Clear resources associated with a previous run. |
| 346 | endRun(); |
| 347 | |
| 348 | // Signify a new run has begun. |
| 349 | Closed = false; |
| 350 | |
| 351 | json::Object Tool{ |
| 352 | {.K: "driver" , |
| 353 | .V: json::Object{{.K: "name" , .V: ShortToolName}, |
| 354 | {.K: "fullName" , .V: LongToolName}, |
| 355 | {.K: "language" , .V: "en-US" }, |
| 356 | {.K: "version" , .V: ToolVersion}, |
| 357 | {.K: "informationUri" , |
| 358 | .V: "https://clang.llvm.org/docs/UsersManual.html" }}}}; |
| 359 | json::Object TheRun{{.K: "tool" , .V: std::move(Tool)}, |
| 360 | {.K: "results" , .V: {}}, |
| 361 | {.K: "artifacts" , .V: {}}, |
| 362 | {.K: "columnKind" , .V: "unicodeCodePoints" }}; |
| 363 | Runs.emplace_back(A: std::move(TheRun)); |
| 364 | } |
| 365 | |
| 366 | json::Object &SarifDocumentWriter::getCurrentRun() { |
| 367 | assert(!Closed && |
| 368 | "SARIF Document is closed. " |
| 369 | "Can only getCurrentRun() if document is opened via createRun(), " |
| 370 | "create a run first" ); |
| 371 | |
| 372 | // Since Closed = false here, expect there to be at least 1 Run, anything |
| 373 | // else is an invalid state. |
| 374 | assert(!Runs.empty() && "There are no runs associated with the document!" ); |
| 375 | return *Runs.back().getAsObject(); |
| 376 | } |
| 377 | |
| 378 | size_t SarifDocumentWriter::createRule(const SarifRule &Rule) { |
| 379 | size_t Ret = CurrentRules.size(); |
| 380 | CurrentRules.emplace_back(Args: Rule); |
| 381 | return Ret; |
| 382 | } |
| 383 | |
| 384 | void SarifDocumentWriter::appendResult(const SarifResult &Result) { |
| 385 | size_t RuleIdx = Result.RuleIdx; |
| 386 | assert(RuleIdx < CurrentRules.size() && |
| 387 | "Trying to reference a rule that doesn't exist" ); |
| 388 | const SarifRule &Rule = CurrentRules[RuleIdx]; |
| 389 | assert(Rule.DefaultConfiguration.Enabled && |
| 390 | "Cannot add a result referencing a disabled Rule" ); |
| 391 | json::Object Ret{{.K: "message" , .V: createMessage(Text: Result.DiagnosticMessage)}, |
| 392 | {.K: "ruleIndex" , .V: static_cast<int64_t>(RuleIdx)}, |
| 393 | {.K: "ruleId" , .V: Rule.Id}}; |
| 394 | if (!Result.Locations.empty()) { |
| 395 | json::Array Locs; |
| 396 | for (auto &Range : Result.Locations) { |
| 397 | Locs.emplace_back(A: createLocation(PhysicalLocation: createPhysicalLocation(R: Range))); |
| 398 | } |
| 399 | Ret["locations" ] = std::move(Locs); |
| 400 | } |
| 401 | if (!Result.ThreadFlows.empty()) |
| 402 | Ret["codeFlows" ] = json::Array{createCodeFlow(ThreadFlows: Result.ThreadFlows)}; |
| 403 | |
| 404 | Ret["level" ] = resultLevelToStr( |
| 405 | R: Result.LevelOverride.value_or(u: Rule.DefaultConfiguration.Level)); |
| 406 | |
| 407 | json::Object &Run = getCurrentRun(); |
| 408 | json::Array *Results = Run.getArray(K: "results" ); |
| 409 | Results->emplace_back(A: std::move(Ret)); |
| 410 | } |
| 411 | |
| 412 | json::Object SarifDocumentWriter::createDocument() { |
| 413 | // Flush all temporaries to their destinations if needed. |
| 414 | endRun(); |
| 415 | |
| 416 | json::Object Doc{ |
| 417 | {.K: "$schema" , .V: SchemaURI}, |
| 418 | {.K: "version" , .V: SchemaVersion}, |
| 419 | }; |
| 420 | if (!Runs.empty()) |
| 421 | Doc["runs" ] = json::Array(Runs); |
| 422 | return Doc; |
| 423 | } |
| 424 | |