| 1 | //===--- SourceCode.cpp - Source code manipulation routines -----*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file provides functions that simplify extraction of source code. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | #include "clang/Tooling/Transformer/SourceCode.h" |
| 13 | #include "clang/AST/ASTContext.h" |
| 14 | #include "clang/AST/Attr.h" |
| 15 | #include "clang/AST/Comment.h" |
| 16 | #include "clang/AST/Decl.h" |
| 17 | #include "clang/AST/DeclCXX.h" |
| 18 | #include "clang/AST/DeclTemplate.h" |
| 19 | #include "clang/AST/Expr.h" |
| 20 | #include "clang/Basic/SourceManager.h" |
| 21 | #include "clang/Lex/Lexer.h" |
| 22 | #include "llvm/Support/Errc.h" |
| 23 | #include "llvm/Support/Error.h" |
| 24 | #include <set> |
| 25 | |
| 26 | using namespace clang; |
| 27 | |
| 28 | using llvm::errc; |
| 29 | using llvm::StringError; |
| 30 | |
| 31 | StringRef clang::tooling::getText(CharSourceRange Range, |
| 32 | const ASTContext &Context) { |
| 33 | return Lexer::getSourceText(Range, SM: Context.getSourceManager(), |
| 34 | LangOpts: Context.getLangOpts()); |
| 35 | } |
| 36 | |
| 37 | CharSourceRange clang::tooling::maybeExtendRange(CharSourceRange Range, |
| 38 | tok::TokenKind Next, |
| 39 | ASTContext &Context) { |
| 40 | CharSourceRange R = Lexer::getAsCharRange(Range, SM: Context.getSourceManager(), |
| 41 | LangOpts: Context.getLangOpts()); |
| 42 | if (R.isInvalid()) |
| 43 | return Range; |
| 44 | Token Tok; |
| 45 | bool Err = |
| 46 | Lexer::getRawToken(Loc: R.getEnd(), Result&: Tok, SM: Context.getSourceManager(), |
| 47 | LangOpts: Context.getLangOpts(), /*IgnoreWhiteSpace=*/true); |
| 48 | if (Err || !Tok.is(K: Next)) |
| 49 | return Range; |
| 50 | return CharSourceRange::getTokenRange(B: Range.getBegin(), E: Tok.getLocation()); |
| 51 | } |
| 52 | |
| 53 | llvm::Error clang::tooling::validateRange(const CharSourceRange &Range, |
| 54 | const SourceManager &SM, |
| 55 | bool ) { |
| 56 | if (Range.isInvalid()) |
| 57 | return llvm::make_error<StringError>(Args: errc::invalid_argument, |
| 58 | Args: "Invalid range" ); |
| 59 | |
| 60 | if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID()) |
| 61 | return llvm::make_error<StringError>( |
| 62 | Args: errc::invalid_argument, Args: "Range starts or ends in a macro expansion" ); |
| 63 | |
| 64 | if (!AllowSystemHeaders) { |
| 65 | if (SM.isInSystemHeader(Loc: Range.getBegin()) || |
| 66 | SM.isInSystemHeader(Loc: Range.getEnd())) |
| 67 | return llvm::make_error<StringError>(Args: errc::invalid_argument, |
| 68 | Args: "Range is in system header" ); |
| 69 | } |
| 70 | |
| 71 | FileIDAndOffset BeginInfo = SM.getDecomposedLoc(Loc: Range.getBegin()); |
| 72 | FileIDAndOffset EndInfo = SM.getDecomposedLoc(Loc: Range.getEnd()); |
| 73 | if (BeginInfo.first != EndInfo.first) |
| 74 | return llvm::make_error<StringError>( |
| 75 | Args: errc::invalid_argument, Args: "Range begins and ends in different files" ); |
| 76 | |
| 77 | if (BeginInfo.second > EndInfo.second) |
| 78 | return llvm::make_error<StringError>(Args: errc::invalid_argument, |
| 79 | Args: "Range's begin is past its end" ); |
| 80 | |
| 81 | return llvm::Error::success(); |
| 82 | } |
| 83 | |
| 84 | llvm::Error clang::tooling::validateEditRange(const CharSourceRange &Range, |
| 85 | const SourceManager &SM) { |
| 86 | return validateRange(Range, SM, /*AllowSystemHeaders=*/false); |
| 87 | } |
| 88 | |
| 89 | // Returns the full set of expansion locations of `Loc` from bottom to top-most |
| 90 | // macro, if `Loc` is spelled in a macro argument. If `Loc` is spelled in the |
| 91 | // macro definition, returns an empty vector. |
| 92 | static llvm::SmallVector<SourceLocation, 2> |
| 93 | getMacroArgumentExpansionLocs(SourceLocation Loc, const SourceManager &SM) { |
| 94 | assert(Loc.isMacroID() && "Location must be in a macro" ); |
| 95 | llvm::SmallVector<SourceLocation, 2> ArgLocs; |
| 96 | while (Loc.isMacroID()) { |
| 97 | const auto &Expansion = SM.getSLocEntry(FID: SM.getFileID(SpellingLoc: Loc)).getExpansion(); |
| 98 | if (Expansion.isMacroArgExpansion()) { |
| 99 | // Check the spelling location of the macro arg, in case the arg itself is |
| 100 | // in a macro expansion. |
| 101 | Loc = Expansion.getSpellingLoc(); |
| 102 | ArgLocs.push_back(Elt: Expansion.getExpansionLocStart()); |
| 103 | } else { |
| 104 | return {}; |
| 105 | } |
| 106 | } |
| 107 | return ArgLocs; |
| 108 | } |
| 109 | |
| 110 | static bool spelledInMacroDefinition(CharSourceRange Range, |
| 111 | const SourceManager &SM) { |
| 112 | if (Range.getBegin().isMacroID() && Range.getEnd().isMacroID()) { |
| 113 | // Check whether the range is entirely within a single macro argument by |
| 114 | // checking if they are in the same macro argument at every level. |
| 115 | auto B = getMacroArgumentExpansionLocs(Loc: Range.getBegin(), SM); |
| 116 | auto E = getMacroArgumentExpansionLocs(Loc: Range.getEnd(), SM); |
| 117 | return B.empty() || B != E; |
| 118 | } |
| 119 | |
| 120 | return Range.getBegin().isMacroID() || Range.getEnd().isMacroID(); |
| 121 | } |
| 122 | |
| 123 | // Returns the expansion char-range of `Loc` if `Loc` is a split token. For |
| 124 | // example, `>>` in nested templates needs the first `>` to be split, otherwise |
| 125 | // the `SourceLocation` of the token would lex as `>>` instead of `>`. |
| 126 | static std::optional<CharSourceRange> |
| 127 | getExpansionForSplitToken(SourceLocation Loc, const SourceManager &SM, |
| 128 | const LangOptions &LangOpts) { |
| 129 | if (Loc.isMacroID()) { |
| 130 | bool Invalid = false; |
| 131 | auto &SLoc = SM.getSLocEntry(FID: SM.getFileID(SpellingLoc: Loc), Invalid: &Invalid); |
| 132 | if (Invalid) |
| 133 | return std::nullopt; |
| 134 | if (auto &Expansion = SLoc.getExpansion(); |
| 135 | !Expansion.isExpansionTokenRange()) { |
| 136 | // A char-range expansion is only used where a token-range would be |
| 137 | // incorrect, and so identifies this as a split token (and importantly, |
| 138 | // not as a macro). |
| 139 | return Expansion.getExpansionLocRange(); |
| 140 | } |
| 141 | } |
| 142 | return std::nullopt; |
| 143 | } |
| 144 | |
| 145 | // If `Range` covers a split token, returns the expansion range, otherwise |
| 146 | // returns `Range`. |
| 147 | static CharSourceRange getRangeForSplitTokens(CharSourceRange Range, |
| 148 | const SourceManager &SM, |
| 149 | const LangOptions &LangOpts) { |
| 150 | if (Range.isTokenRange()) { |
| 151 | auto BeginToken = getExpansionForSplitToken(Loc: Range.getBegin(), SM, LangOpts); |
| 152 | auto EndToken = getExpansionForSplitToken(Loc: Range.getEnd(), SM, LangOpts); |
| 153 | if (EndToken) { |
| 154 | SourceLocation BeginLoc = |
| 155 | BeginToken ? BeginToken->getBegin() : Range.getBegin(); |
| 156 | // We can't use the expansion location with a token-range, because that |
| 157 | // will incorrectly lex the end token, so use a char-range that ends at |
| 158 | // the split. |
| 159 | return CharSourceRange::getCharRange(B: BeginLoc, E: EndToken->getEnd()); |
| 160 | } else if (BeginToken) { |
| 161 | // Since the end token is not split, the whole range covers the split, so |
| 162 | // the only adjustment we make is to use the expansion location of the |
| 163 | // begin token. |
| 164 | return CharSourceRange::getTokenRange(B: BeginToken->getBegin(), |
| 165 | E: Range.getEnd()); |
| 166 | } |
| 167 | } |
| 168 | return Range; |
| 169 | } |
| 170 | |
| 171 | static CharSourceRange getRange(const CharSourceRange &EditRange, |
| 172 | const SourceManager &SM, |
| 173 | const LangOptions &LangOpts, |
| 174 | bool IncludeMacroExpansion) { |
| 175 | CharSourceRange Range; |
| 176 | if (IncludeMacroExpansion) { |
| 177 | Range = Lexer::makeFileCharRange(Range: EditRange, SM, LangOpts); |
| 178 | } else { |
| 179 | auto AdjustedRange = getRangeForSplitTokens(Range: EditRange, SM, LangOpts); |
| 180 | if (spelledInMacroDefinition(Range: AdjustedRange, SM)) |
| 181 | return {}; |
| 182 | |
| 183 | auto B = SM.getSpellingLoc(Loc: AdjustedRange.getBegin()); |
| 184 | auto E = SM.getSpellingLoc(Loc: AdjustedRange.getEnd()); |
| 185 | if (AdjustedRange.isTokenRange()) |
| 186 | E = Lexer::getLocForEndOfToken(Loc: E, Offset: 0, SM, LangOpts); |
| 187 | Range = CharSourceRange::getCharRange(B, E); |
| 188 | } |
| 189 | return Range; |
| 190 | } |
| 191 | |
| 192 | std::optional<CharSourceRange> clang::tooling::getFileRangeForEdit( |
| 193 | const CharSourceRange &EditRange, const SourceManager &SM, |
| 194 | const LangOptions &LangOpts, bool IncludeMacroExpansion) { |
| 195 | CharSourceRange Range = |
| 196 | getRange(EditRange, SM, LangOpts, IncludeMacroExpansion); |
| 197 | bool IsInvalid = llvm::errorToBool(Err: validateEditRange(Range, SM)); |
| 198 | if (IsInvalid) |
| 199 | return std::nullopt; |
| 200 | return Range; |
| 201 | } |
| 202 | |
| 203 | std::optional<CharSourceRange> clang::tooling::getFileRange( |
| 204 | const CharSourceRange &EditRange, const SourceManager &SM, |
| 205 | const LangOptions &LangOpts, bool IncludeMacroExpansion) { |
| 206 | CharSourceRange Range = |
| 207 | getRange(EditRange, SM, LangOpts, IncludeMacroExpansion); |
| 208 | bool IsInvalid = |
| 209 | llvm::errorToBool(Err: validateRange(Range, SM, /*AllowSystemHeaders=*/true)); |
| 210 | if (IsInvalid) |
| 211 | return std::nullopt; |
| 212 | return Range; |
| 213 | } |
| 214 | |
| 215 | static bool startsWithNewline(const SourceManager &SM, const Token &Tok) { |
| 216 | return isVerticalWhitespace(c: SM.getCharacterData(SL: Tok.getLocation())[0]); |
| 217 | } |
| 218 | |
| 219 | static bool contains(const std::set<tok::TokenKind> &Terminators, |
| 220 | const Token &Tok) { |
| 221 | return Terminators.count(x: Tok.getKind()) > 0; |
| 222 | } |
| 223 | |
| 224 | // Returns the exclusive, *file* end location of the entity whose last token is |
| 225 | // at location 'EntityLast'. That is, it returns the location one past the last |
| 226 | // relevant character. |
| 227 | // |
| 228 | // Associated tokens include comments, horizontal whitespace and 'Terminators' |
| 229 | // -- optional tokens, which, if any are found, will be included; if |
| 230 | // 'Terminators' is empty, we will not include any extra tokens beyond comments |
| 231 | // and horizontal whitespace. |
| 232 | static SourceLocation |
| 233 | getEntityEndLoc(const SourceManager &SM, SourceLocation EntityLast, |
| 234 | const std::set<tok::TokenKind> &Terminators, |
| 235 | const LangOptions &LangOpts) { |
| 236 | assert(EntityLast.isValid() && "Invalid end location found." ); |
| 237 | |
| 238 | // We remember the last location of a non-horizontal-whitespace token we have |
| 239 | // lexed; this is the location up to which we will want to delete. |
| 240 | // FIXME: Support using the spelling loc here for cases where we want to |
| 241 | // analyze the macro text. |
| 242 | |
| 243 | CharSourceRange ExpansionRange = SM.getExpansionRange(Loc: EntityLast); |
| 244 | // FIXME: Should check isTokenRange(), for the (rare) case that |
| 245 | // `ExpansionRange` is a character range. |
| 246 | std::unique_ptr<Lexer> Lexer = [&]() { |
| 247 | bool Invalid = false; |
| 248 | auto FileOffset = SM.getDecomposedLoc(Loc: ExpansionRange.getEnd()); |
| 249 | llvm::StringRef File = SM.getBufferData(FID: FileOffset.first, Invalid: &Invalid); |
| 250 | assert(!Invalid && "Cannot get file/offset" ); |
| 251 | return std::make_unique<clang::Lexer>( |
| 252 | args: SM.getLocForStartOfFile(FID: FileOffset.first), args: LangOpts, args: File.begin(), |
| 253 | args: File.data() + FileOffset.second, args: File.end()); |
| 254 | }(); |
| 255 | |
| 256 | // Tell Lexer to return whitespace as pseudo-tokens (kind is tok::unknown). |
| 257 | Lexer->SetKeepWhitespaceMode(true); |
| 258 | |
| 259 | // Generally, the code we want to include looks like this ([] are optional), |
| 260 | // If Terminators is empty: |
| 261 | // [ <comment> ] [ <newline> ] |
| 262 | // Otherwise: |
| 263 | // ... <terminator> [ <comment> ] [ <newline> ] |
| 264 | |
| 265 | Token Tok; |
| 266 | bool Terminated = false; |
| 267 | |
| 268 | // First, lex to the current token (which is the last token of the range that |
| 269 | // is definitely associated with the decl). Then, we process the first token |
| 270 | // separately from the rest based on conditions that hold specifically for |
| 271 | // that first token. |
| 272 | // |
| 273 | // We do not search for a terminator if none is required or we've already |
| 274 | // encountered it. Otherwise, if the original `EntityLast` location was in a |
| 275 | // macro expansion, we don't have visibility into the text, so we assume we've |
| 276 | // already terminated. However, we note this assumption with |
| 277 | // `TerminatedByMacro`, because we'll want to handle it somewhat differently |
| 278 | // for the terminators semicolon and comma. These terminators can be safely |
| 279 | // associated with the entity when they appear after the macro -- extra |
| 280 | // semicolons have no effect on the program and a well-formed program won't |
| 281 | // have multiple commas in a row, so we're guaranteed that there is only one. |
| 282 | // |
| 283 | // FIXME: This handling of macros is more conservative than necessary. When |
| 284 | // the end of the expansion coincides with the end of the node, we can still |
| 285 | // safely analyze the code. But, it is more complicated, because we need to |
| 286 | // start by lexing the spelling loc for the first token and then switch to the |
| 287 | // expansion loc. |
| 288 | bool TerminatedByMacro = false; |
| 289 | Lexer->LexFromRawLexer(Result&: Tok); |
| 290 | if (Terminators.empty() || contains(Terminators, Tok)) |
| 291 | Terminated = true; |
| 292 | else if (EntityLast.isMacroID()) { |
| 293 | Terminated = true; |
| 294 | TerminatedByMacro = true; |
| 295 | } |
| 296 | |
| 297 | // We save the most recent candidate for the exclusive end location. |
| 298 | SourceLocation End = Tok.getEndLoc(); |
| 299 | |
| 300 | while (!Terminated) { |
| 301 | // Lex the next token we want to possibly expand the range with. |
| 302 | Lexer->LexFromRawLexer(Result&: Tok); |
| 303 | |
| 304 | switch (Tok.getKind()) { |
| 305 | case tok::eof: |
| 306 | // Unexpected separators. |
| 307 | case tok::l_brace: |
| 308 | case tok::r_brace: |
| 309 | case tok::comma: |
| 310 | return End; |
| 311 | // Whitespace pseudo-tokens. |
| 312 | case tok::unknown: |
| 313 | if (startsWithNewline(SM, Tok)) |
| 314 | // Include at least until the end of the line. |
| 315 | End = Tok.getEndLoc(); |
| 316 | break; |
| 317 | default: |
| 318 | if (contains(Terminators, Tok)) |
| 319 | Terminated = true; |
| 320 | End = Tok.getEndLoc(); |
| 321 | break; |
| 322 | } |
| 323 | } |
| 324 | |
| 325 | do { |
| 326 | // Lex the next token we want to possibly expand the range with. |
| 327 | Lexer->LexFromRawLexer(Result&: Tok); |
| 328 | |
| 329 | switch (Tok.getKind()) { |
| 330 | case tok::unknown: |
| 331 | if (startsWithNewline(SM, Tok)) |
| 332 | // We're done, but include this newline. |
| 333 | return Tok.getEndLoc(); |
| 334 | break; |
| 335 | case tok::comment: |
| 336 | // Include any comments we find on the way. |
| 337 | End = Tok.getEndLoc(); |
| 338 | break; |
| 339 | case tok::semi: |
| 340 | case tok::comma: |
| 341 | if (TerminatedByMacro && contains(Terminators, Tok)) { |
| 342 | End = Tok.getEndLoc(); |
| 343 | // We've found a real terminator. |
| 344 | TerminatedByMacro = false; |
| 345 | break; |
| 346 | } |
| 347 | // Found an unrelated token; stop and don't include it. |
| 348 | return End; |
| 349 | default: |
| 350 | // Found an unrelated token; stop and don't include it. |
| 351 | return End; |
| 352 | } |
| 353 | } while (true); |
| 354 | } |
| 355 | |
| 356 | // Returns the expected terminator tokens for the given declaration. |
| 357 | // |
| 358 | // If we do not know the correct terminator token, returns an empty set. |
| 359 | // |
| 360 | // There are cases where we have more than one possible terminator (for example, |
| 361 | // we find either a comma or a semicolon after a VarDecl). |
| 362 | static std::set<tok::TokenKind> getTerminators(const Decl &D) { |
| 363 | if (llvm::isa<RecordDecl>(Val: D) || llvm::isa<UsingDecl>(Val: D)) |
| 364 | return {tok::semi}; |
| 365 | |
| 366 | if (llvm::isa<FunctionDecl>(Val: D) || llvm::isa<LinkageSpecDecl>(Val: D)) |
| 367 | return {tok::r_brace, tok::semi}; |
| 368 | |
| 369 | if (llvm::isa<VarDecl>(Val: D) || llvm::isa<FieldDecl>(Val: D)) |
| 370 | return {tok::comma, tok::semi}; |
| 371 | |
| 372 | return {}; |
| 373 | } |
| 374 | |
| 375 | // Starting from `Loc`, skips whitespace up to, and including, a single |
| 376 | // newline. Returns the (exclusive) end of any skipped whitespace (that is, the |
| 377 | // location immediately after the whitespace). |
| 378 | static SourceLocation skipWhitespaceAndNewline(const SourceManager &SM, |
| 379 | SourceLocation Loc, |
| 380 | const LangOptions &LangOpts) { |
| 381 | const char *LocChars = SM.getCharacterData(SL: Loc); |
| 382 | int i = 0; |
| 383 | while (isHorizontalWhitespace(c: LocChars[i])) |
| 384 | ++i; |
| 385 | if (isVerticalWhitespace(c: LocChars[i])) |
| 386 | ++i; |
| 387 | return Loc.getLocWithOffset(Offset: i); |
| 388 | } |
| 389 | |
| 390 | // Is `Loc` separated from any following decl by something meaningful (e.g. an |
| 391 | // empty line, a comment), ignoring horizontal whitespace? Since this is a |
| 392 | // heuristic, we return false when in doubt. `Loc` cannot be the first location |
| 393 | // in the file. |
| 394 | static bool atOrBeforeSeparation(const SourceManager &SM, SourceLocation Loc, |
| 395 | const LangOptions &LangOpts) { |
| 396 | // If the preceding character is a newline, we'll check for an empty line as a |
| 397 | // separator. However, we can't identify an empty line using tokens, so we |
| 398 | // analyse the characters. If we try to use tokens, we'll just end up with a |
| 399 | // whitespace token, whose characters we'd have to analyse anyhow. |
| 400 | bool Invalid = false; |
| 401 | const char *LocChars = |
| 402 | SM.getCharacterData(SL: Loc.getLocWithOffset(Offset: -1), Invalid: &Invalid); |
| 403 | assert(!Invalid && |
| 404 | "Loc must be a valid character and not the first of the source file." ); |
| 405 | if (isVerticalWhitespace(c: LocChars[0])) { |
| 406 | for (int i = 1; isWhitespace(c: LocChars[i]); ++i) |
| 407 | if (isVerticalWhitespace(c: LocChars[i])) |
| 408 | return true; |
| 409 | } |
| 410 | // We didn't find an empty line, so lex the next token, skipping past any |
| 411 | // whitespace we just scanned. |
| 412 | Token Tok; |
| 413 | bool Failed = Lexer::getRawToken(Loc, Result&: Tok, SM, LangOpts, |
| 414 | /*IgnoreWhiteSpace=*/true); |
| 415 | if (Failed) |
| 416 | // Any text that confuses the lexer seems fair to consider a separation. |
| 417 | return true; |
| 418 | |
| 419 | switch (Tok.getKind()) { |
| 420 | case tok::comment: |
| 421 | case tok::l_brace: |
| 422 | case tok::r_brace: |
| 423 | case tok::eof: |
| 424 | return true; |
| 425 | default: |
| 426 | return false; |
| 427 | } |
| 428 | } |
| 429 | |
| 430 | CharSourceRange tooling::getAssociatedRange(const Decl &Decl, |
| 431 | ASTContext &Context) { |
| 432 | const SourceManager &SM = Context.getSourceManager(); |
| 433 | const LangOptions &LangOpts = Context.getLangOpts(); |
| 434 | CharSourceRange Range = CharSourceRange::getTokenRange(R: Decl.getSourceRange()); |
| 435 | |
| 436 | // First, expand to the start of the template<> declaration if necessary. |
| 437 | if (const auto *Record = llvm::dyn_cast<CXXRecordDecl>(Val: &Decl)) { |
| 438 | if (const auto *T = Record->getDescribedClassTemplate()) |
| 439 | if (SM.isBeforeInTranslationUnit(LHS: T->getBeginLoc(), RHS: Range.getBegin())) |
| 440 | Range.setBegin(T->getBeginLoc()); |
| 441 | } else if (const auto *F = llvm::dyn_cast<FunctionDecl>(Val: &Decl)) { |
| 442 | if (const auto *T = F->getDescribedFunctionTemplate()) |
| 443 | if (SM.isBeforeInTranslationUnit(LHS: T->getBeginLoc(), RHS: Range.getBegin())) |
| 444 | Range.setBegin(T->getBeginLoc()); |
| 445 | } |
| 446 | |
| 447 | // Next, expand the end location past trailing comments to include a potential |
| 448 | // newline at the end of the decl's line. |
| 449 | Range.setEnd( |
| 450 | getEntityEndLoc(SM, EntityLast: Decl.getEndLoc(), Terminators: getTerminators(D: Decl), LangOpts)); |
| 451 | Range.setTokenRange(false); |
| 452 | |
| 453 | // Expand to include preceeding associated comments. We ignore any comments |
| 454 | // that are not preceeding the decl, since we've already skipped trailing |
| 455 | // comments with getEntityEndLoc. |
| 456 | if (const RawComment * = |
| 457 | Decl.getASTContext().getRawCommentForDeclNoCache(D: &Decl)) |
| 458 | // Only include a preceding comment if: |
| 459 | // * it is *not* separate from the declaration (not including any newline |
| 460 | // that immediately follows the comment), |
| 461 | // * the decl *is* separate from any following entity (so, there are no |
| 462 | // other entities the comment could refer to), and |
| 463 | // * it is not a IfThisThenThat lint check. |
| 464 | if (SM.isBeforeInTranslationUnit(LHS: Comment->getBeginLoc(), |
| 465 | RHS: Range.getBegin()) && |
| 466 | !atOrBeforeSeparation( |
| 467 | SM, Loc: skipWhitespaceAndNewline(SM, Loc: Comment->getEndLoc(), LangOpts), |
| 468 | LangOpts) && |
| 469 | atOrBeforeSeparation(SM, Loc: Range.getEnd(), LangOpts)) { |
| 470 | const StringRef = Comment->getRawText(SourceMgr: SM); |
| 471 | if (!CommentText.contains(Other: "LINT.IfChange" ) && |
| 472 | !CommentText.contains(Other: "LINT.ThenChange" )) |
| 473 | Range.setBegin(Comment->getBeginLoc()); |
| 474 | } |
| 475 | // Add leading attributes. |
| 476 | for (auto *Attr : Decl.attrs()) { |
| 477 | if (Attr->getLocation().isInvalid() || |
| 478 | !SM.isBeforeInTranslationUnit(LHS: Attr->getLocation(), RHS: Range.getBegin())) |
| 479 | continue; |
| 480 | Range.setBegin(Attr->getLocation()); |
| 481 | |
| 482 | // Extend to the left '[[' or '__attribute((' if we saw the attribute, |
| 483 | // unless it is not a valid location. |
| 484 | bool Invalid; |
| 485 | StringRef Source = |
| 486 | SM.getBufferData(FID: SM.getFileID(SpellingLoc: Range.getBegin()), Invalid: &Invalid); |
| 487 | if (Invalid) |
| 488 | continue; |
| 489 | llvm::StringRef BeforeAttr = |
| 490 | Source.substr(Start: 0, N: SM.getFileOffset(SpellingLoc: Range.getBegin())); |
| 491 | llvm::StringRef BeforeAttrStripped = BeforeAttr.rtrim(); |
| 492 | |
| 493 | for (llvm::StringRef Prefix : {"[[" , "__attribute__((" }) { |
| 494 | // Handle whitespace between attribute prefix and attribute value. |
| 495 | if (BeforeAttrStripped.ends_with(Suffix: Prefix)) { |
| 496 | // Move start to start position of prefix, which is |
| 497 | // length(BeforeAttr) - length(BeforeAttrStripped) + length(Prefix) |
| 498 | // positions to the left. |
| 499 | Range.setBegin(Range.getBegin().getLocWithOffset(Offset: static_cast<int>( |
| 500 | -BeforeAttr.size() + BeforeAttrStripped.size() - Prefix.size()))); |
| 501 | break; |
| 502 | // If we didn't see '[[' or '__attribute' it's probably coming from a |
| 503 | // macro expansion which is already handled by makeFileCharRange(), |
| 504 | // below. |
| 505 | } |
| 506 | } |
| 507 | } |
| 508 | |
| 509 | // Range.getEnd() is already fully un-expanded by getEntityEndLoc. But, |
| 510 | // Range.getBegin() may be inside an expansion. |
| 511 | return Lexer::makeFileCharRange(Range, SM, LangOpts); |
| 512 | } |
| 513 | |