1//===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Tooling/Transformer/RangeSelector.h"
10#include "clang/AST/Expr.h"
11#include "clang/AST/TypeLoc.h"
12#include "clang/ASTMatchers/ASTMatchFinder.h"
13#include "clang/Basic/SourceLocation.h"
14#include "clang/Lex/Lexer.h"
15#include "clang/Tooling/Transformer/SourceCode.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/Support/Errc.h"
18#include "llvm/Support/Error.h"
19#include <string>
20#include <utility>
21
22using namespace clang;
23using namespace transformer;
24
25using ast_matchers::MatchFinder;
26using llvm::Error;
27using llvm::StringError;
28
29using MatchResult = MatchFinder::MatchResult;
30
31static Error invalidArgumentError(Twine Message) {
32 return llvm::make_error<StringError>(Args: llvm::errc::invalid_argument, Args&: Message);
33}
34
35static Error typeError(StringRef ID, const ASTNodeKind &Kind) {
36 return invalidArgumentError(Message: "mismatched type (node id=" + ID +
37 " kind=" + Kind.asStringRef() + ")");
38}
39
40static Error typeError(StringRef ID, const ASTNodeKind &Kind,
41 Twine ExpectedType) {
42 return invalidArgumentError(Message: "mismatched type: expected one of " +
43 ExpectedType + " (node id=" + ID +
44 " kind=" + Kind.asStringRef() + ")");
45}
46
47static Error missingPropertyError(StringRef ID, Twine Description,
48 StringRef Property) {
49 return invalidArgumentError(Message: Description + " requires property '" + Property +
50 "' (node id=" + ID + ")");
51}
52
53static Expected<DynTypedNode> getNode(const ast_matchers::BoundNodes &Nodes,
54 StringRef ID) {
55 auto &NodesMap = Nodes.getMap();
56 auto It = NodesMap.find(x: ID);
57 if (It == NodesMap.end())
58 return invalidArgumentError(Message: "ID not bound: " + ID);
59 return It->second;
60}
61
62// FIXME: handling of macros should be configurable.
63static SourceLocation findPreviousTokenStart(SourceLocation Start,
64 const SourceManager &SM,
65 const LangOptions &LangOpts) {
66 if (Start.isInvalid() || Start.isMacroID())
67 return SourceLocation();
68
69 SourceLocation BeforeStart = Start.getLocWithOffset(Offset: -1);
70 if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
71 return SourceLocation();
72
73 return Lexer::GetBeginningOfToken(Loc: BeforeStart, SM, LangOpts);
74}
75
76// Finds the start location of the previous token of kind \p TK.
77// FIXME: handling of macros should be configurable.
78static SourceLocation findPreviousTokenKind(SourceLocation Start,
79 const SourceManager &SM,
80 const LangOptions &LangOpts,
81 tok::TokenKind TK) {
82 while (true) {
83 SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
84 if (L.isInvalid() || L.isMacroID())
85 return SourceLocation();
86
87 Token T;
88 if (Lexer::getRawToken(Loc: L, Result&: T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
89 return SourceLocation();
90
91 if (T.is(K: TK))
92 return T.getLocation();
93
94 Start = L;
95 }
96}
97
98RangeSelector transformer::before(RangeSelector Selector) {
99 return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
100 Expected<CharSourceRange> SelectedRange = Selector(Result);
101 if (!SelectedRange)
102 return SelectedRange.takeError();
103 return CharSourceRange::getCharRange(R: SelectedRange->getBegin());
104 };
105}
106
107RangeSelector transformer::after(RangeSelector Selector) {
108 return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
109 Expected<CharSourceRange> SelectedRange = Selector(Result);
110 if (!SelectedRange)
111 return SelectedRange.takeError();
112 SourceLocation End = SelectedRange->getEnd();
113 if (SelectedRange->isTokenRange()) {
114 // We need to find the actual (exclusive) end location from which to
115 // create a new source range. However, that's not guaranteed to be valid,
116 // even if the token location itself is valid. So, we create a token range
117 // consisting only of the last token, then map that range back to the
118 // source file. If that succeeds, we have a valid location for the end of
119 // the generated range.
120 CharSourceRange Range = Lexer::makeFileCharRange(
121 Range: CharSourceRange::getTokenRange(R: SelectedRange->getEnd()),
122 SM: *Result.SourceManager, LangOpts: Result.Context->getLangOpts());
123 if (Range.isInvalid())
124 return invalidArgumentError(
125 Message: "after: can't resolve sub-range to valid source range");
126 End = Range.getEnd();
127 }
128
129 return CharSourceRange::getCharRange(R: End);
130 };
131}
132
133RangeSelector transformer::node(std::string ID) {
134 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
135 Expected<DynTypedNode> Node = getNode(Nodes: Result.Nodes, ID);
136 if (!Node)
137 return Node.takeError();
138 return (Node->get<Decl>() != nullptr ||
139 (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr))
140 ? tooling::getExtendedRange(Node: *Node, Next: tok::TokenKind::semi,
141 Context&: *Result.Context)
142 : CharSourceRange::getTokenRange(
143 R: Node->getSourceRange(/*IncludeQualifier=*/true));
144 };
145}
146
147RangeSelector transformer::statement(std::string ID) {
148 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
149 Expected<DynTypedNode> Node = getNode(Nodes: Result.Nodes, ID);
150 if (!Node)
151 return Node.takeError();
152 return tooling::getExtendedRange(Node: *Node, Next: tok::TokenKind::semi,
153 Context&: *Result.Context);
154 };
155}
156
157RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) {
158 return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> {
159 Expected<CharSourceRange> BeginRange = Begin(Result);
160 if (!BeginRange)
161 return BeginRange.takeError();
162 Expected<CharSourceRange> EndRange = End(Result);
163 if (!EndRange)
164 return EndRange.takeError();
165 SourceLocation B = BeginRange->getBegin();
166 SourceLocation E = EndRange->getEnd();
167 // Note: we are precluding the possibility of sub-token ranges in the case
168 // that EndRange is a token range.
169 if (Result.SourceManager->isBeforeInTranslationUnit(LHS: E, RHS: B)) {
170 return invalidArgumentError(Message: "Bad range: out of order");
171 }
172 return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange());
173 };
174}
175
176RangeSelector transformer::encloseNodes(std::string BeginID,
177 std::string EndID) {
178 return transformer::enclose(Begin: node(ID: std::move(BeginID)), End: node(ID: std::move(EndID)));
179}
180
181RangeSelector transformer::merge(RangeSelector First, RangeSelector Second) {
182 return [First,
183 Second](const MatchResult &Result) -> Expected<CharSourceRange> {
184 Expected<CharSourceRange> FirstRange = First(Result);
185 if (!FirstRange)
186 return FirstRange.takeError();
187 Expected<CharSourceRange> SecondRange = Second(Result);
188 if (!SecondRange)
189 return SecondRange.takeError();
190
191 SourceLocation FirstB = FirstRange->getBegin();
192 SourceLocation FirstE = FirstRange->getEnd();
193 SourceLocation SecondB = SecondRange->getBegin();
194 SourceLocation SecondE = SecondRange->getEnd();
195 // Result begin loc is the minimum of the begin locs of the two ranges.
196 SourceLocation B =
197 Result.SourceManager->isBeforeInTranslationUnit(LHS: FirstB, RHS: SecondB)
198 ? FirstB
199 : SecondB;
200 if (FirstRange->isTokenRange() && SecondRange->isTokenRange()) {
201 // Both ranges are token ranges. Just take the maximum of their end locs.
202 SourceLocation E =
203 Result.SourceManager->isBeforeInTranslationUnit(LHS: FirstE, RHS: SecondE)
204 ? SecondE
205 : FirstE;
206 return CharSourceRange::getTokenRange(B, E);
207 }
208
209 if (FirstRange->isTokenRange()) {
210 // The end of the first range is a token. Need to resolve the token to a
211 // char range.
212 FirstE = Lexer::getLocForEndOfToken(Loc: FirstE, /*Offset=*/0,
213 SM: *Result.SourceManager,
214 LangOpts: Result.Context->getLangOpts());
215 if (FirstE.isInvalid())
216 return invalidArgumentError(
217 Message: "merge: can't resolve first token range to valid source range");
218 }
219 if (SecondRange->isTokenRange()) {
220 // The end of the second range is a token. Need to resolve the token to a
221 // char range.
222 SecondE = Lexer::getLocForEndOfToken(Loc: SecondE, /*Offset=*/0,
223 SM: *Result.SourceManager,
224 LangOpts: Result.Context->getLangOpts());
225 if (SecondE.isInvalid())
226 return invalidArgumentError(
227 Message: "merge: can't resolve second token range to valid source range");
228 }
229 // Result end loc is the maximum of the end locs of the two ranges.
230 SourceLocation E =
231 Result.SourceManager->isBeforeInTranslationUnit(LHS: FirstE, RHS: SecondE)
232 ? SecondE
233 : FirstE;
234 return CharSourceRange::getCharRange(B, E);
235 };
236}
237
238RangeSelector transformer::member(std::string ID) {
239 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
240 Expected<DynTypedNode> Node = getNode(Nodes: Result.Nodes, ID);
241 if (!Node)
242 return Node.takeError();
243 if (auto *M = Node->get<clang::MemberExpr>())
244 return CharSourceRange::getTokenRange(
245 R: M->getMemberNameInfo().getSourceRange());
246 return typeError(ID, Kind: Node->getNodeKind(), ExpectedType: "MemberExpr");
247 };
248}
249
250RangeSelector transformer::name(std::string ID) {
251 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
252 Expected<DynTypedNode> N = getNode(Nodes: Result.Nodes, ID);
253 if (!N)
254 return N.takeError();
255 auto &Node = *N;
256 if (const auto *D = Node.get<NamedDecl>()) {
257 if (!D->getDeclName().isIdentifier())
258 return missingPropertyError(ID, Description: "name", Property: "identifier");
259 SourceLocation L = D->getLocation();
260 auto R = CharSourceRange::getTokenRange(B: L, E: L);
261 // Verify that the range covers exactly the name.
262 // FIXME: extend this code to support cases like `operator +` or
263 // `foo<int>` for which this range will be too short. Doing so will
264 // require subcasing `NamedDecl`, because it doesn't provide virtual
265 // access to the \c DeclarationNameInfo.
266 StringRef Text = tooling::getText(Range: R, Context: *Result.Context);
267 if (Text != D->getName())
268 return llvm::make_error<StringError>(
269 Args: llvm::errc::not_supported,
270 Args: "range selected by name(node id=" + ID + "): '" + Text +
271 "' is different from decl name '" + D->getName() + "'");
272 return R;
273 }
274 if (const auto *E = Node.get<DeclRefExpr>()) {
275 if (!E->getNameInfo().getName().isIdentifier())
276 return missingPropertyError(ID, Description: "name", Property: "identifier");
277 SourceLocation L = E->getLocation();
278 return CharSourceRange::getTokenRange(B: L, E: L);
279 }
280 if (const auto *I = Node.get<CXXCtorInitializer>()) {
281 if (!I->isMemberInitializer() && I->isWritten())
282 return missingPropertyError(ID, Description: "name", Property: "explicit member initializer");
283 SourceLocation L = I->getMemberLocation();
284 return CharSourceRange::getTokenRange(B: L, E: L);
285 }
286 if (const auto *T = Node.get<TypeLoc>()) {
287 if (auto SpecLoc = T->getAs<TemplateSpecializationTypeLoc>();
288 !SpecLoc.isNull())
289 return CharSourceRange::getTokenRange(R: SpecLoc.getTemplateNameLoc());
290 return CharSourceRange::getTokenRange(R: T->getSourceRange());
291 }
292 return typeError(ID, Kind: Node.getNodeKind(),
293 ExpectedType: "DeclRefExpr, NamedDecl, CXXCtorInitializer, TypeLoc");
294 };
295}
296
297namespace {
298// FIXME: make this available in the public API for users to easily create their
299// own selectors.
300
301// Creates a selector from a range-selection function \p Func, which selects a
302// range that is relative to a bound node id. \c T is the node type expected by
303// \p Func.
304template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)>
305class RelativeSelector {
306 std::string ID;
307
308public:
309 RelativeSelector(std::string ID) : ID(std::move(ID)) {}
310
311 Expected<CharSourceRange> operator()(const MatchResult &Result) {
312 Expected<DynTypedNode> N = getNode(Nodes: Result.Nodes, ID);
313 if (!N)
314 return N.takeError();
315 if (const auto *Arg = N->get<T>())
316 return Func(Result, *Arg);
317 return typeError(ID, Kind: N->getNodeKind());
318 }
319};
320} // namespace
321
322// FIXME: Change the following functions from being in an anonymous namespace
323// to static functions, after the minimum Visual C++ has _MSC_VER >= 1915
324// (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous
325// namespace works around a bug in earlier versions.
326namespace {
327// Returns the range of the statements (all source between the braces).
328CharSourceRange getStatementsRange(const MatchResult &,
329 const CompoundStmt &CS) {
330 return CharSourceRange::getCharRange(B: CS.getLBracLoc().getLocWithOffset(Offset: 1),
331 E: CS.getRBracLoc());
332}
333} // namespace
334
335RangeSelector transformer::statements(std::string ID) {
336 return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID));
337}
338
339namespace {
340
341SourceLocation findArgStartDelimiter(const CallExpr &E, SourceLocation RLoc,
342 const SourceManager &SM,
343 const LangOptions &LangOpts) {
344 SourceLocation Loc = E.getNumArgs() == 0 ? RLoc : E.getArg(Arg: 0)->getBeginLoc();
345 return findPreviousTokenKind(Start: Loc, SM, LangOpts, TK: tok::TokenKind::l_paren);
346}
347
348// Returns the location after the last argument of the construct expr. Returns
349// an invalid location if there are no arguments.
350SourceLocation findLastArgEnd(const CXXConstructExpr &CE,
351 const SourceManager &SM,
352 const LangOptions &LangOpts) {
353 for (int i = CE.getNumArgs() - 1; i >= 0; --i) {
354 const Expr *Arg = CE.getArg(Arg: i);
355 if (isa<CXXDefaultArgExpr>(Val: Arg))
356 continue;
357 return Lexer::getLocForEndOfToken(Loc: Arg->getEndLoc(), Offset: 0, SM, LangOpts);
358 }
359 return {};
360}
361
362// Returns the range of the source between the call's parentheses/braces.
363CharSourceRange getCallArgumentsRange(const MatchResult &Result,
364 const CallExpr &CE) {
365 const SourceLocation RLoc = CE.getRParenLoc();
366 return CharSourceRange::getCharRange(
367 B: findArgStartDelimiter(E: CE, RLoc, SM: *Result.SourceManager,
368 LangOpts: Result.Context->getLangOpts())
369 .getLocWithOffset(Offset: 1),
370 E: RLoc);
371}
372
373// Returns the range of the source between the construct expr's
374// parentheses/braces.
375CharSourceRange getConstructArgumentsRange(const MatchResult &Result,
376 const CXXConstructExpr &CE) {
377 if (SourceRange R = CE.getParenOrBraceRange(); R.isValid()) {
378 return CharSourceRange::getCharRange(
379 B: Lexer::getLocForEndOfToken(Loc: R.getBegin(), Offset: 0, SM: *Result.SourceManager,
380 LangOpts: Result.Context->getLangOpts()),
381 E: R.getEnd());
382 }
383
384 if (CE.getNumArgs() > 0) {
385 return CharSourceRange::getCharRange(
386 B: CE.getArg(Arg: 0)->getBeginLoc(),
387 E: findLastArgEnd(CE, SM: *Result.SourceManager,
388 LangOpts: Result.Context->getLangOpts()));
389 }
390
391 return {};
392}
393
394} // namespace
395
396RangeSelector transformer::callArgs(std::string ID) {
397 return RelativeSelector<CallExpr, getCallArgumentsRange>(std::move(ID));
398}
399
400RangeSelector transformer::constructExprArgs(std::string ID) {
401 return RelativeSelector<CXXConstructExpr, getConstructArgumentsRange>(
402 std::move(ID));
403}
404
405namespace {
406// Returns the range of the elements of the initializer list. Includes all
407// source between the braces.
408CharSourceRange getElementsRange(const MatchResult &,
409 const InitListExpr &E) {
410 return CharSourceRange::getCharRange(B: E.getLBraceLoc().getLocWithOffset(Offset: 1),
411 E: E.getRBraceLoc());
412}
413} // namespace
414
415RangeSelector transformer::initListElements(std::string ID) {
416 return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID));
417}
418
419namespace {
420// Returns the range of the else branch, including the `else` keyword.
421CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) {
422 return tooling::maybeExtendRange(
423 Range: CharSourceRange::getTokenRange(B: S.getElseLoc(), E: S.getEndLoc()),
424 Terminator: tok::TokenKind::semi, Context&: *Result.Context);
425}
426} // namespace
427
428RangeSelector transformer::elseBranch(std::string ID) {
429 return RelativeSelector<IfStmt, getElseRange>(std::move(ID));
430}
431
432RangeSelector transformer::expansion(RangeSelector S) {
433 return [S](const MatchResult &Result) -> Expected<CharSourceRange> {
434 Expected<CharSourceRange> SRange = S(Result);
435 if (!SRange)
436 return SRange.takeError();
437 return Result.SourceManager->getExpansionRange(Range: *SRange);
438 };
439}
440