| 1 | //===--- SourceCode.cpp - Source code manipulation routines -----*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file provides functions that simplify extraction of source code. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | #include "clang/Tooling/Transformer/SourceCode.h" |
| 13 | #include "clang/AST/ASTContext.h" |
| 14 | #include "clang/AST/Attr.h" |
| 15 | #include "clang/AST/Comment.h" |
| 16 | #include "clang/AST/Decl.h" |
| 17 | #include "clang/AST/DeclCXX.h" |
| 18 | #include "clang/AST/DeclTemplate.h" |
| 19 | #include "clang/AST/Expr.h" |
| 20 | #include "clang/Basic/SourceManager.h" |
| 21 | #include "clang/Lex/Lexer.h" |
| 22 | #include "llvm/Support/Errc.h" |
| 23 | #include "llvm/Support/Error.h" |
| 24 | #include <set> |
| 25 | |
| 26 | using namespace clang; |
| 27 | |
| 28 | using llvm::errc; |
| 29 | using llvm::StringError; |
| 30 | |
| 31 | StringRef clang::tooling::getText(CharSourceRange Range, |
| 32 | const ASTContext &Context) { |
| 33 | return Lexer::getSourceText(Range, SM: Context.getSourceManager(), |
| 34 | LangOpts: Context.getLangOpts()); |
| 35 | } |
| 36 | |
| 37 | CharSourceRange clang::tooling::maybeExtendRange(CharSourceRange Range, |
| 38 | tok::TokenKind Next, |
| 39 | ASTContext &Context) { |
| 40 | CharSourceRange R = Lexer::getAsCharRange(Range, SM: Context.getSourceManager(), |
| 41 | LangOpts: Context.getLangOpts()); |
| 42 | if (R.isInvalid()) |
| 43 | return Range; |
| 44 | Token Tok; |
| 45 | bool Err = |
| 46 | Lexer::getRawToken(Loc: R.getEnd(), Result&: Tok, SM: Context.getSourceManager(), |
| 47 | LangOpts: Context.getLangOpts(), /*IgnoreWhiteSpace=*/true); |
| 48 | if (Err || !Tok.is(K: Next)) |
| 49 | return Range; |
| 50 | return CharSourceRange::getTokenRange(B: Range.getBegin(), E: Tok.getLocation()); |
| 51 | } |
| 52 | |
| 53 | llvm::Error clang::tooling::validateRange(const CharSourceRange &Range, |
| 54 | const SourceManager &SM, |
| 55 | bool ) { |
| 56 | if (Range.isInvalid()) |
| 57 | return llvm::make_error<StringError>(Args: errc::invalid_argument, |
| 58 | Args: "Invalid range" ); |
| 59 | |
| 60 | if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID()) |
| 61 | return llvm::make_error<StringError>( |
| 62 | Args: errc::invalid_argument, Args: "Range starts or ends in a macro expansion" ); |
| 63 | |
| 64 | if (!AllowSystemHeaders) { |
| 65 | if (SM.isInSystemHeader(Loc: Range.getBegin()) || |
| 66 | SM.isInSystemHeader(Loc: Range.getEnd())) |
| 67 | return llvm::make_error<StringError>(Args: errc::invalid_argument, |
| 68 | Args: "Range is in system header" ); |
| 69 | } |
| 70 | |
| 71 | FileIDAndOffset BeginInfo = SM.getDecomposedLoc(Loc: Range.getBegin()); |
| 72 | FileIDAndOffset EndInfo = SM.getDecomposedLoc(Loc: Range.getEnd()); |
| 73 | if (BeginInfo.first != EndInfo.first) |
| 74 | return llvm::make_error<StringError>( |
| 75 | Args: errc::invalid_argument, Args: "Range begins and ends in different files" ); |
| 76 | |
| 77 | if (BeginInfo.second > EndInfo.second) |
| 78 | return llvm::make_error<StringError>(Args: errc::invalid_argument, |
| 79 | Args: "Range's begin is past its end" ); |
| 80 | |
| 81 | return llvm::Error::success(); |
| 82 | } |
| 83 | |
| 84 | llvm::Error clang::tooling::validateEditRange(const CharSourceRange &Range, |
| 85 | const SourceManager &SM) { |
| 86 | return validateRange(Range, SM, /*AllowSystemHeaders=*/false); |
| 87 | } |
| 88 | |
| 89 | static bool spelledInMacroDefinition(SourceLocation Loc, |
| 90 | const SourceManager &SM) { |
| 91 | while (Loc.isMacroID()) { |
| 92 | const auto &Expansion = SM.getSLocEntry(FID: SM.getFileID(SpellingLoc: Loc)).getExpansion(); |
| 93 | if (Expansion.isMacroArgExpansion()) { |
| 94 | // Check the spelling location of the macro arg, in case the arg itself is |
| 95 | // in a macro expansion. |
| 96 | Loc = Expansion.getSpellingLoc(); |
| 97 | } else { |
| 98 | return true; |
| 99 | } |
| 100 | } |
| 101 | return false; |
| 102 | } |
| 103 | |
| 104 | // Returns the expansion char-range of `Loc` if `Loc` is a split token. For |
| 105 | // example, `>>` in nested templates needs the first `>` to be split, otherwise |
| 106 | // the `SourceLocation` of the token would lex as `>>` instead of `>`. |
| 107 | static std::optional<CharSourceRange> |
| 108 | getExpansionForSplitToken(SourceLocation Loc, const SourceManager &SM, |
| 109 | const LangOptions &LangOpts) { |
| 110 | if (Loc.isMacroID()) { |
| 111 | bool Invalid = false; |
| 112 | auto &SLoc = SM.getSLocEntry(FID: SM.getFileID(SpellingLoc: Loc), Invalid: &Invalid); |
| 113 | if (Invalid) |
| 114 | return std::nullopt; |
| 115 | if (auto &Expansion = SLoc.getExpansion(); |
| 116 | !Expansion.isExpansionTokenRange()) { |
| 117 | // A char-range expansion is only used where a token-range would be |
| 118 | // incorrect, and so identifies this as a split token (and importantly, |
| 119 | // not as a macro). |
| 120 | return Expansion.getExpansionLocRange(); |
| 121 | } |
| 122 | } |
| 123 | return std::nullopt; |
| 124 | } |
| 125 | |
| 126 | // If `Range` covers a split token, returns the expansion range, otherwise |
| 127 | // returns `Range`. |
| 128 | static CharSourceRange getRangeForSplitTokens(CharSourceRange Range, |
| 129 | const SourceManager &SM, |
| 130 | const LangOptions &LangOpts) { |
| 131 | if (Range.isTokenRange()) { |
| 132 | auto BeginToken = getExpansionForSplitToken(Loc: Range.getBegin(), SM, LangOpts); |
| 133 | auto EndToken = getExpansionForSplitToken(Loc: Range.getEnd(), SM, LangOpts); |
| 134 | if (EndToken) { |
| 135 | SourceLocation BeginLoc = |
| 136 | BeginToken ? BeginToken->getBegin() : Range.getBegin(); |
| 137 | // We can't use the expansion location with a token-range, because that |
| 138 | // will incorrectly lex the end token, so use a char-range that ends at |
| 139 | // the split. |
| 140 | return CharSourceRange::getCharRange(B: BeginLoc, E: EndToken->getEnd()); |
| 141 | } else if (BeginToken) { |
| 142 | // Since the end token is not split, the whole range covers the split, so |
| 143 | // the only adjustment we make is to use the expansion location of the |
| 144 | // begin token. |
| 145 | return CharSourceRange::getTokenRange(B: BeginToken->getBegin(), |
| 146 | E: Range.getEnd()); |
| 147 | } |
| 148 | } |
| 149 | return Range; |
| 150 | } |
| 151 | |
| 152 | static CharSourceRange getRange(const CharSourceRange &EditRange, |
| 153 | const SourceManager &SM, |
| 154 | const LangOptions &LangOpts, |
| 155 | bool IncludeMacroExpansion) { |
| 156 | CharSourceRange Range; |
| 157 | if (IncludeMacroExpansion) { |
| 158 | Range = Lexer::makeFileCharRange(Range: EditRange, SM, LangOpts); |
| 159 | } else { |
| 160 | auto AdjustedRange = getRangeForSplitTokens(Range: EditRange, SM, LangOpts); |
| 161 | if (spelledInMacroDefinition(Loc: AdjustedRange.getBegin(), SM) || |
| 162 | spelledInMacroDefinition(Loc: AdjustedRange.getEnd(), SM)) |
| 163 | return {}; |
| 164 | |
| 165 | auto B = SM.getSpellingLoc(Loc: AdjustedRange.getBegin()); |
| 166 | auto E = SM.getSpellingLoc(Loc: AdjustedRange.getEnd()); |
| 167 | if (AdjustedRange.isTokenRange()) |
| 168 | E = Lexer::getLocForEndOfToken(Loc: E, Offset: 0, SM, LangOpts); |
| 169 | Range = CharSourceRange::getCharRange(B, E); |
| 170 | } |
| 171 | return Range; |
| 172 | } |
| 173 | |
| 174 | std::optional<CharSourceRange> clang::tooling::getFileRangeForEdit( |
| 175 | const CharSourceRange &EditRange, const SourceManager &SM, |
| 176 | const LangOptions &LangOpts, bool IncludeMacroExpansion) { |
| 177 | CharSourceRange Range = |
| 178 | getRange(EditRange, SM, LangOpts, IncludeMacroExpansion); |
| 179 | bool IsInvalid = llvm::errorToBool(Err: validateEditRange(Range, SM)); |
| 180 | if (IsInvalid) |
| 181 | return std::nullopt; |
| 182 | return Range; |
| 183 | } |
| 184 | |
| 185 | std::optional<CharSourceRange> clang::tooling::getFileRange( |
| 186 | const CharSourceRange &EditRange, const SourceManager &SM, |
| 187 | const LangOptions &LangOpts, bool IncludeMacroExpansion) { |
| 188 | CharSourceRange Range = |
| 189 | getRange(EditRange, SM, LangOpts, IncludeMacroExpansion); |
| 190 | bool IsInvalid = |
| 191 | llvm::errorToBool(Err: validateRange(Range, SM, /*AllowSystemHeaders=*/true)); |
| 192 | if (IsInvalid) |
| 193 | return std::nullopt; |
| 194 | return Range; |
| 195 | } |
| 196 | |
| 197 | static bool startsWithNewline(const SourceManager &SM, const Token &Tok) { |
| 198 | return isVerticalWhitespace(c: SM.getCharacterData(SL: Tok.getLocation())[0]); |
| 199 | } |
| 200 | |
| 201 | static bool contains(const std::set<tok::TokenKind> &Terminators, |
| 202 | const Token &Tok) { |
| 203 | return Terminators.count(x: Tok.getKind()) > 0; |
| 204 | } |
| 205 | |
| 206 | // Returns the exclusive, *file* end location of the entity whose last token is |
| 207 | // at location 'EntityLast'. That is, it returns the location one past the last |
| 208 | // relevant character. |
| 209 | // |
| 210 | // Associated tokens include comments, horizontal whitespace and 'Terminators' |
| 211 | // -- optional tokens, which, if any are found, will be included; if |
| 212 | // 'Terminators' is empty, we will not include any extra tokens beyond comments |
| 213 | // and horizontal whitespace. |
| 214 | static SourceLocation |
| 215 | getEntityEndLoc(const SourceManager &SM, SourceLocation EntityLast, |
| 216 | const std::set<tok::TokenKind> &Terminators, |
| 217 | const LangOptions &LangOpts) { |
| 218 | assert(EntityLast.isValid() && "Invalid end location found." ); |
| 219 | |
| 220 | // We remember the last location of a non-horizontal-whitespace token we have |
| 221 | // lexed; this is the location up to which we will want to delete. |
| 222 | // FIXME: Support using the spelling loc here for cases where we want to |
| 223 | // analyze the macro text. |
| 224 | |
| 225 | CharSourceRange ExpansionRange = SM.getExpansionRange(Loc: EntityLast); |
| 226 | // FIXME: Should check isTokenRange(), for the (rare) case that |
| 227 | // `ExpansionRange` is a character range. |
| 228 | std::unique_ptr<Lexer> Lexer = [&]() { |
| 229 | bool Invalid = false; |
| 230 | auto FileOffset = SM.getDecomposedLoc(Loc: ExpansionRange.getEnd()); |
| 231 | llvm::StringRef File = SM.getBufferData(FID: FileOffset.first, Invalid: &Invalid); |
| 232 | assert(!Invalid && "Cannot get file/offset" ); |
| 233 | return std::make_unique<clang::Lexer>( |
| 234 | args: SM.getLocForStartOfFile(FID: FileOffset.first), args: LangOpts, args: File.begin(), |
| 235 | args: File.data() + FileOffset.second, args: File.end()); |
| 236 | }(); |
| 237 | |
| 238 | // Tell Lexer to return whitespace as pseudo-tokens (kind is tok::unknown). |
| 239 | Lexer->SetKeepWhitespaceMode(true); |
| 240 | |
| 241 | // Generally, the code we want to include looks like this ([] are optional), |
| 242 | // If Terminators is empty: |
| 243 | // [ <comment> ] [ <newline> ] |
| 244 | // Otherwise: |
| 245 | // ... <terminator> [ <comment> ] [ <newline> ] |
| 246 | |
| 247 | Token Tok; |
| 248 | bool Terminated = false; |
| 249 | |
| 250 | // First, lex to the current token (which is the last token of the range that |
| 251 | // is definitely associated with the decl). Then, we process the first token |
| 252 | // separately from the rest based on conditions that hold specifically for |
| 253 | // that first token. |
| 254 | // |
| 255 | // We do not search for a terminator if none is required or we've already |
| 256 | // encountered it. Otherwise, if the original `EntityLast` location was in a |
| 257 | // macro expansion, we don't have visibility into the text, so we assume we've |
| 258 | // already terminated. However, we note this assumption with |
| 259 | // `TerminatedByMacro`, because we'll want to handle it somewhat differently |
| 260 | // for the terminators semicolon and comma. These terminators can be safely |
| 261 | // associated with the entity when they appear after the macro -- extra |
| 262 | // semicolons have no effect on the program and a well-formed program won't |
| 263 | // have multiple commas in a row, so we're guaranteed that there is only one. |
| 264 | // |
| 265 | // FIXME: This handling of macros is more conservative than necessary. When |
| 266 | // the end of the expansion coincides with the end of the node, we can still |
| 267 | // safely analyze the code. But, it is more complicated, because we need to |
| 268 | // start by lexing the spelling loc for the first token and then switch to the |
| 269 | // expansion loc. |
| 270 | bool TerminatedByMacro = false; |
| 271 | Lexer->LexFromRawLexer(Result&: Tok); |
| 272 | if (Terminators.empty() || contains(Terminators, Tok)) |
| 273 | Terminated = true; |
| 274 | else if (EntityLast.isMacroID()) { |
| 275 | Terminated = true; |
| 276 | TerminatedByMacro = true; |
| 277 | } |
| 278 | |
| 279 | // We save the most recent candidate for the exclusive end location. |
| 280 | SourceLocation End = Tok.getEndLoc(); |
| 281 | |
| 282 | while (!Terminated) { |
| 283 | // Lex the next token we want to possibly expand the range with. |
| 284 | Lexer->LexFromRawLexer(Result&: Tok); |
| 285 | |
| 286 | switch (Tok.getKind()) { |
| 287 | case tok::eof: |
| 288 | // Unexpected separators. |
| 289 | case tok::l_brace: |
| 290 | case tok::r_brace: |
| 291 | case tok::comma: |
| 292 | return End; |
| 293 | // Whitespace pseudo-tokens. |
| 294 | case tok::unknown: |
| 295 | if (startsWithNewline(SM, Tok)) |
| 296 | // Include at least until the end of the line. |
| 297 | End = Tok.getEndLoc(); |
| 298 | break; |
| 299 | default: |
| 300 | if (contains(Terminators, Tok)) |
| 301 | Terminated = true; |
| 302 | End = Tok.getEndLoc(); |
| 303 | break; |
| 304 | } |
| 305 | } |
| 306 | |
| 307 | do { |
| 308 | // Lex the next token we want to possibly expand the range with. |
| 309 | Lexer->LexFromRawLexer(Result&: Tok); |
| 310 | |
| 311 | switch (Tok.getKind()) { |
| 312 | case tok::unknown: |
| 313 | if (startsWithNewline(SM, Tok)) |
| 314 | // We're done, but include this newline. |
| 315 | return Tok.getEndLoc(); |
| 316 | break; |
| 317 | case tok::comment: |
| 318 | // Include any comments we find on the way. |
| 319 | End = Tok.getEndLoc(); |
| 320 | break; |
| 321 | case tok::semi: |
| 322 | case tok::comma: |
| 323 | if (TerminatedByMacro && contains(Terminators, Tok)) { |
| 324 | End = Tok.getEndLoc(); |
| 325 | // We've found a real terminator. |
| 326 | TerminatedByMacro = false; |
| 327 | break; |
| 328 | } |
| 329 | // Found an unrelated token; stop and don't include it. |
| 330 | return End; |
| 331 | default: |
| 332 | // Found an unrelated token; stop and don't include it. |
| 333 | return End; |
| 334 | } |
| 335 | } while (true); |
| 336 | } |
| 337 | |
| 338 | // Returns the expected terminator tokens for the given declaration. |
| 339 | // |
| 340 | // If we do not know the correct terminator token, returns an empty set. |
| 341 | // |
| 342 | // There are cases where we have more than one possible terminator (for example, |
| 343 | // we find either a comma or a semicolon after a VarDecl). |
| 344 | static std::set<tok::TokenKind> getTerminators(const Decl &D) { |
| 345 | if (llvm::isa<RecordDecl>(Val: D) || llvm::isa<UsingDecl>(Val: D)) |
| 346 | return {tok::semi}; |
| 347 | |
| 348 | if (llvm::isa<FunctionDecl>(Val: D) || llvm::isa<LinkageSpecDecl>(Val: D)) |
| 349 | return {tok::r_brace, tok::semi}; |
| 350 | |
| 351 | if (llvm::isa<VarDecl>(Val: D) || llvm::isa<FieldDecl>(Val: D)) |
| 352 | return {tok::comma, tok::semi}; |
| 353 | |
| 354 | return {}; |
| 355 | } |
| 356 | |
| 357 | // Starting from `Loc`, skips whitespace up to, and including, a single |
| 358 | // newline. Returns the (exclusive) end of any skipped whitespace (that is, the |
| 359 | // location immediately after the whitespace). |
| 360 | static SourceLocation skipWhitespaceAndNewline(const SourceManager &SM, |
| 361 | SourceLocation Loc, |
| 362 | const LangOptions &LangOpts) { |
| 363 | const char *LocChars = SM.getCharacterData(SL: Loc); |
| 364 | int i = 0; |
| 365 | while (isHorizontalWhitespace(c: LocChars[i])) |
| 366 | ++i; |
| 367 | if (isVerticalWhitespace(c: LocChars[i])) |
| 368 | ++i; |
| 369 | return Loc.getLocWithOffset(Offset: i); |
| 370 | } |
| 371 | |
| 372 | // Is `Loc` separated from any following decl by something meaningful (e.g. an |
| 373 | // empty line, a comment), ignoring horizontal whitespace? Since this is a |
| 374 | // heuristic, we return false when in doubt. `Loc` cannot be the first location |
| 375 | // in the file. |
| 376 | static bool atOrBeforeSeparation(const SourceManager &SM, SourceLocation Loc, |
| 377 | const LangOptions &LangOpts) { |
| 378 | // If the preceding character is a newline, we'll check for an empty line as a |
| 379 | // separator. However, we can't identify an empty line using tokens, so we |
| 380 | // analyse the characters. If we try to use tokens, we'll just end up with a |
| 381 | // whitespace token, whose characters we'd have to analyse anyhow. |
| 382 | bool Invalid = false; |
| 383 | const char *LocChars = |
| 384 | SM.getCharacterData(SL: Loc.getLocWithOffset(Offset: -1), Invalid: &Invalid); |
| 385 | assert(!Invalid && |
| 386 | "Loc must be a valid character and not the first of the source file." ); |
| 387 | if (isVerticalWhitespace(c: LocChars[0])) { |
| 388 | for (int i = 1; isWhitespace(c: LocChars[i]); ++i) |
| 389 | if (isVerticalWhitespace(c: LocChars[i])) |
| 390 | return true; |
| 391 | } |
| 392 | // We didn't find an empty line, so lex the next token, skipping past any |
| 393 | // whitespace we just scanned. |
| 394 | Token Tok; |
| 395 | bool Failed = Lexer::getRawToken(Loc, Result&: Tok, SM, LangOpts, |
| 396 | /*IgnoreWhiteSpace=*/true); |
| 397 | if (Failed) |
| 398 | // Any text that confuses the lexer seems fair to consider a separation. |
| 399 | return true; |
| 400 | |
| 401 | switch (Tok.getKind()) { |
| 402 | case tok::comment: |
| 403 | case tok::l_brace: |
| 404 | case tok::r_brace: |
| 405 | case tok::eof: |
| 406 | return true; |
| 407 | default: |
| 408 | return false; |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | CharSourceRange tooling::getAssociatedRange(const Decl &Decl, |
| 413 | ASTContext &Context) { |
| 414 | const SourceManager &SM = Context.getSourceManager(); |
| 415 | const LangOptions &LangOpts = Context.getLangOpts(); |
| 416 | CharSourceRange Range = CharSourceRange::getTokenRange(R: Decl.getSourceRange()); |
| 417 | |
| 418 | // First, expand to the start of the template<> declaration if necessary. |
| 419 | if (const auto *Record = llvm::dyn_cast<CXXRecordDecl>(Val: &Decl)) { |
| 420 | if (const auto *T = Record->getDescribedClassTemplate()) |
| 421 | if (SM.isBeforeInTranslationUnit(LHS: T->getBeginLoc(), RHS: Range.getBegin())) |
| 422 | Range.setBegin(T->getBeginLoc()); |
| 423 | } else if (const auto *F = llvm::dyn_cast<FunctionDecl>(Val: &Decl)) { |
| 424 | if (const auto *T = F->getDescribedFunctionTemplate()) |
| 425 | if (SM.isBeforeInTranslationUnit(LHS: T->getBeginLoc(), RHS: Range.getBegin())) |
| 426 | Range.setBegin(T->getBeginLoc()); |
| 427 | } |
| 428 | |
| 429 | // Next, expand the end location past trailing comments to include a potential |
| 430 | // newline at the end of the decl's line. |
| 431 | Range.setEnd( |
| 432 | getEntityEndLoc(SM, EntityLast: Decl.getEndLoc(), Terminators: getTerminators(D: Decl), LangOpts)); |
| 433 | Range.setTokenRange(false); |
| 434 | |
| 435 | // Expand to include preceeding associated comments. We ignore any comments |
| 436 | // that are not preceeding the decl, since we've already skipped trailing |
| 437 | // comments with getEntityEndLoc. |
| 438 | if (const RawComment * = |
| 439 | Decl.getASTContext().getRawCommentForDeclNoCache(D: &Decl)) |
| 440 | // Only include a preceding comment if: |
| 441 | // * it is *not* separate from the declaration (not including any newline |
| 442 | // that immediately follows the comment), |
| 443 | // * the decl *is* separate from any following entity (so, there are no |
| 444 | // other entities the comment could refer to), and |
| 445 | // * it is not a IfThisThenThat lint check. |
| 446 | if (SM.isBeforeInTranslationUnit(LHS: Comment->getBeginLoc(), |
| 447 | RHS: Range.getBegin()) && |
| 448 | !atOrBeforeSeparation( |
| 449 | SM, Loc: skipWhitespaceAndNewline(SM, Loc: Comment->getEndLoc(), LangOpts), |
| 450 | LangOpts) && |
| 451 | atOrBeforeSeparation(SM, Loc: Range.getEnd(), LangOpts)) { |
| 452 | const StringRef = Comment->getRawText(SourceMgr: SM); |
| 453 | if (!CommentText.contains(Other: "LINT.IfChange" ) && |
| 454 | !CommentText.contains(Other: "LINT.ThenChange" )) |
| 455 | Range.setBegin(Comment->getBeginLoc()); |
| 456 | } |
| 457 | // Add leading attributes. |
| 458 | for (auto *Attr : Decl.attrs()) { |
| 459 | if (Attr->getLocation().isInvalid() || |
| 460 | !SM.isBeforeInTranslationUnit(LHS: Attr->getLocation(), RHS: Range.getBegin())) |
| 461 | continue; |
| 462 | Range.setBegin(Attr->getLocation()); |
| 463 | |
| 464 | // Extend to the left '[[' or '__attribute((' if we saw the attribute, |
| 465 | // unless it is not a valid location. |
| 466 | bool Invalid; |
| 467 | StringRef Source = |
| 468 | SM.getBufferData(FID: SM.getFileID(SpellingLoc: Range.getBegin()), Invalid: &Invalid); |
| 469 | if (Invalid) |
| 470 | continue; |
| 471 | llvm::StringRef BeforeAttr = |
| 472 | Source.substr(Start: 0, N: SM.getFileOffset(SpellingLoc: Range.getBegin())); |
| 473 | llvm::StringRef BeforeAttrStripped = BeforeAttr.rtrim(); |
| 474 | |
| 475 | for (llvm::StringRef Prefix : {"[[" , "__attribute__((" }) { |
| 476 | // Handle whitespace between attribute prefix and attribute value. |
| 477 | if (BeforeAttrStripped.ends_with(Suffix: Prefix)) { |
| 478 | // Move start to start position of prefix, which is |
| 479 | // length(BeforeAttr) - length(BeforeAttrStripped) + length(Prefix) |
| 480 | // positions to the left. |
| 481 | Range.setBegin(Range.getBegin().getLocWithOffset(Offset: static_cast<int>( |
| 482 | -BeforeAttr.size() + BeforeAttrStripped.size() - Prefix.size()))); |
| 483 | break; |
| 484 | // If we didn't see '[[' or '__attribute' it's probably coming from a |
| 485 | // macro expansion which is already handled by makeFileCharRange(), |
| 486 | // below. |
| 487 | } |
| 488 | } |
| 489 | } |
| 490 | |
| 491 | // Range.getEnd() is already fully un-expanded by getEntityEndLoc. But, |
| 492 | // Range.getBegin() may be inside an expansion. |
| 493 | return Lexer::makeFileCharRange(Range, SM, LangOpts); |
| 494 | } |
| 495 | |