1//===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Tooling/Transformer/RangeSelector.h"
10#include "clang/AST/Expr.h"
11#include "clang/AST/TypeLoc.h"
12#include "clang/ASTMatchers/ASTMatchFinder.h"
13#include "clang/Basic/SourceLocation.h"
14#include "clang/Lex/Lexer.h"
15#include "clang/Tooling/Transformer/SourceCode.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/Support/Errc.h"
18#include "llvm/Support/Error.h"
19#include <string>
20#include <utility>
21#include <vector>
22
23using namespace clang;
24using namespace transformer;
25
26using ast_matchers::MatchFinder;
27using llvm::Error;
28using llvm::StringError;
29
30using MatchResult = MatchFinder::MatchResult;
31
32static Error invalidArgumentError(Twine Message) {
33 return llvm::make_error<StringError>(Args: llvm::errc::invalid_argument, Args&: Message);
34}
35
36static Error typeError(StringRef ID, const ASTNodeKind &Kind) {
37 return invalidArgumentError(Message: "mismatched type (node id=" + ID +
38 " kind=" + Kind.asStringRef() + ")");
39}
40
41static Error typeError(StringRef ID, const ASTNodeKind &Kind,
42 Twine ExpectedType) {
43 return invalidArgumentError(Message: "mismatched type: expected one of " +
44 ExpectedType + " (node id=" + ID +
45 " kind=" + Kind.asStringRef() + ")");
46}
47
48static Error missingPropertyError(StringRef ID, Twine Description,
49 StringRef Property) {
50 return invalidArgumentError(Message: Description + " requires property '" + Property +
51 "' (node id=" + ID + ")");
52}
53
54static Expected<DynTypedNode> getNode(const ast_matchers::BoundNodes &Nodes,
55 StringRef ID) {
56 auto &NodesMap = Nodes.getMap();
57 auto It = NodesMap.find(x: ID);
58 if (It == NodesMap.end())
59 return invalidArgumentError(Message: "ID not bound: " + ID);
60 return It->second;
61}
62
63// FIXME: handling of macros should be configurable.
64static SourceLocation findPreviousTokenStart(SourceLocation Start,
65 const SourceManager &SM,
66 const LangOptions &LangOpts) {
67 if (Start.isInvalid() || Start.isMacroID())
68 return SourceLocation();
69
70 SourceLocation BeforeStart = Start.getLocWithOffset(Offset: -1);
71 if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
72 return SourceLocation();
73
74 return Lexer::GetBeginningOfToken(Loc: BeforeStart, SM, LangOpts);
75}
76
77// Finds the start location of the previous token of kind \p TK.
78// FIXME: handling of macros should be configurable.
79static SourceLocation findPreviousTokenKind(SourceLocation Start,
80 const SourceManager &SM,
81 const LangOptions &LangOpts,
82 tok::TokenKind TK) {
83 while (true) {
84 SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
85 if (L.isInvalid() || L.isMacroID())
86 return SourceLocation();
87
88 Token T;
89 if (Lexer::getRawToken(Loc: L, Result&: T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
90 return SourceLocation();
91
92 if (T.is(K: TK))
93 return T.getLocation();
94
95 Start = L;
96 }
97}
98
99RangeSelector transformer::before(RangeSelector Selector) {
100 return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
101 Expected<CharSourceRange> SelectedRange = Selector(Result);
102 if (!SelectedRange)
103 return SelectedRange.takeError();
104 return CharSourceRange::getCharRange(R: SelectedRange->getBegin());
105 };
106}
107
108RangeSelector transformer::after(RangeSelector Selector) {
109 return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
110 Expected<CharSourceRange> SelectedRange = Selector(Result);
111 if (!SelectedRange)
112 return SelectedRange.takeError();
113 SourceLocation End = SelectedRange->getEnd();
114 if (SelectedRange->isTokenRange()) {
115 // We need to find the actual (exclusive) end location from which to
116 // create a new source range. However, that's not guaranteed to be valid,
117 // even if the token location itself is valid. So, we create a token range
118 // consisting only of the last token, then map that range back to the
119 // source file. If that succeeds, we have a valid location for the end of
120 // the generated range.
121 CharSourceRange Range = Lexer::makeFileCharRange(
122 Range: CharSourceRange::getTokenRange(R: SelectedRange->getEnd()),
123 SM: *Result.SourceManager, LangOpts: Result.Context->getLangOpts());
124 if (Range.isInvalid())
125 return invalidArgumentError(
126 Message: "after: can't resolve sub-range to valid source range");
127 End = Range.getEnd();
128 }
129
130 return CharSourceRange::getCharRange(R: End);
131 };
132}
133
134RangeSelector transformer::node(std::string ID) {
135 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
136 Expected<DynTypedNode> Node = getNode(Nodes: Result.Nodes, ID);
137 if (!Node)
138 return Node.takeError();
139 return (Node->get<Decl>() != nullptr ||
140 (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr))
141 ? tooling::getExtendedRange(Node: *Node, Next: tok::TokenKind::semi,
142 Context&: *Result.Context)
143 : CharSourceRange::getTokenRange(R: Node->getSourceRange());
144 };
145}
146
147RangeSelector transformer::statement(std::string ID) {
148 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
149 Expected<DynTypedNode> Node = getNode(Nodes: Result.Nodes, ID);
150 if (!Node)
151 return Node.takeError();
152 return tooling::getExtendedRange(Node: *Node, Next: tok::TokenKind::semi,
153 Context&: *Result.Context);
154 };
155}
156
157RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) {
158 return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> {
159 Expected<CharSourceRange> BeginRange = Begin(Result);
160 if (!BeginRange)
161 return BeginRange.takeError();
162 Expected<CharSourceRange> EndRange = End(Result);
163 if (!EndRange)
164 return EndRange.takeError();
165 SourceLocation B = BeginRange->getBegin();
166 SourceLocation E = EndRange->getEnd();
167 // Note: we are precluding the possibility of sub-token ranges in the case
168 // that EndRange is a token range.
169 if (Result.SourceManager->isBeforeInTranslationUnit(LHS: E, RHS: B)) {
170 return invalidArgumentError(Message: "Bad range: out of order");
171 }
172 return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange());
173 };
174}
175
176RangeSelector transformer::encloseNodes(std::string BeginID,
177 std::string EndID) {
178 return transformer::enclose(Begin: node(ID: std::move(BeginID)), End: node(ID: std::move(EndID)));
179}
180
181RangeSelector transformer::member(std::string ID) {
182 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
183 Expected<DynTypedNode> Node = getNode(Nodes: Result.Nodes, ID);
184 if (!Node)
185 return Node.takeError();
186 if (auto *M = Node->get<clang::MemberExpr>())
187 return CharSourceRange::getTokenRange(
188 R: M->getMemberNameInfo().getSourceRange());
189 return typeError(ID, Kind: Node->getNodeKind(), ExpectedType: "MemberExpr");
190 };
191}
192
193RangeSelector transformer::name(std::string ID) {
194 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
195 Expected<DynTypedNode> N = getNode(Nodes: Result.Nodes, ID);
196 if (!N)
197 return N.takeError();
198 auto &Node = *N;
199 if (const auto *D = Node.get<NamedDecl>()) {
200 if (!D->getDeclName().isIdentifier())
201 return missingPropertyError(ID, Description: "name", Property: "identifier");
202 SourceLocation L = D->getLocation();
203 auto R = CharSourceRange::getTokenRange(B: L, E: L);
204 // Verify that the range covers exactly the name.
205 // FIXME: extend this code to support cases like `operator +` or
206 // `foo<int>` for which this range will be too short. Doing so will
207 // require subcasing `NamedDecl`, because it doesn't provide virtual
208 // access to the \c DeclarationNameInfo.
209 if (tooling::getText(Range: R, Context: *Result.Context) != D->getName())
210 return CharSourceRange();
211 return R;
212 }
213 if (const auto *E = Node.get<DeclRefExpr>()) {
214 if (!E->getNameInfo().getName().isIdentifier())
215 return missingPropertyError(ID, Description: "name", Property: "identifier");
216 SourceLocation L = E->getLocation();
217 return CharSourceRange::getTokenRange(B: L, E: L);
218 }
219 if (const auto *I = Node.get<CXXCtorInitializer>()) {
220 if (!I->isMemberInitializer() && I->isWritten())
221 return missingPropertyError(ID, Description: "name", Property: "explicit member initializer");
222 SourceLocation L = I->getMemberLocation();
223 return CharSourceRange::getTokenRange(B: L, E: L);
224 }
225 if (const auto *T = Node.get<TypeLoc>()) {
226 TypeLoc Loc = *T;
227 auto ET = Loc.getAs<ElaboratedTypeLoc>();
228 if (!ET.isNull())
229 Loc = ET.getNamedTypeLoc();
230 if (auto SpecLoc = Loc.getAs<TemplateSpecializationTypeLoc>();
231 !SpecLoc.isNull())
232 return CharSourceRange::getTokenRange(R: SpecLoc.getTemplateNameLoc());
233 return CharSourceRange::getTokenRange(R: Loc.getSourceRange());
234 }
235 return typeError(ID, Kind: Node.getNodeKind(),
236 ExpectedType: "DeclRefExpr, NamedDecl, CXXCtorInitializer, TypeLoc");
237 };
238}
239
240namespace {
241// FIXME: make this available in the public API for users to easily create their
242// own selectors.
243
244// Creates a selector from a range-selection function \p Func, which selects a
245// range that is relative to a bound node id. \c T is the node type expected by
246// \p Func.
247template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)>
248class RelativeSelector {
249 std::string ID;
250
251public:
252 RelativeSelector(std::string ID) : ID(std::move(ID)) {}
253
254 Expected<CharSourceRange> operator()(const MatchResult &Result) {
255 Expected<DynTypedNode> N = getNode(Nodes: Result.Nodes, ID);
256 if (!N)
257 return N.takeError();
258 if (const auto *Arg = N->get<T>())
259 return Func(Result, *Arg);
260 return typeError(ID, Kind: N->getNodeKind());
261 }
262};
263} // namespace
264
265// FIXME: Change the following functions from being in an anonymous namespace
266// to static functions, after the minimum Visual C++ has _MSC_VER >= 1915
267// (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous
268// namespace works around a bug in earlier versions.
269namespace {
270// Returns the range of the statements (all source between the braces).
271CharSourceRange getStatementsRange(const MatchResult &,
272 const CompoundStmt &CS) {
273 return CharSourceRange::getCharRange(B: CS.getLBracLoc().getLocWithOffset(Offset: 1),
274 E: CS.getRBracLoc());
275}
276} // namespace
277
278RangeSelector transformer::statements(std::string ID) {
279 return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID));
280}
281
282namespace {
283
284SourceLocation getRLoc(const CallExpr &E) { return E.getRParenLoc(); }
285
286SourceLocation getRLoc(const CXXConstructExpr &E) {
287 return E.getParenOrBraceRange().getEnd();
288}
289
290tok::TokenKind getStartToken(const CallExpr &E) {
291 return tok::TokenKind::l_paren;
292}
293
294tok::TokenKind getStartToken(const CXXConstructExpr &E) {
295 return isa<CXXTemporaryObjectExpr>(Val: E) ? tok::TokenKind::l_paren
296 : tok::TokenKind::l_brace;
297}
298
299template <typename ExprWithArgs>
300SourceLocation findArgStartDelimiter(const ExprWithArgs &E, SourceLocation RLoc,
301 const SourceManager &SM,
302 const LangOptions &LangOpts) {
303 SourceLocation Loc = E.getNumArgs() == 0 ? RLoc : E.getArg(0)->getBeginLoc();
304 return findPreviousTokenKind(Loc, SM, LangOpts, getStartToken(E));
305}
306// Returns the range of the source between the call's or construct expr's
307// parentheses/braces.
308template <typename ExprWithArgs>
309CharSourceRange getArgumentsRange(const MatchResult &Result,
310 const ExprWithArgs &CE) {
311 const SourceLocation RLoc = getRLoc(CE);
312 return CharSourceRange::getCharRange(
313 findArgStartDelimiter(CE, RLoc, *Result.SourceManager,
314 Result.Context->getLangOpts())
315 .getLocWithOffset(1),
316 RLoc);
317}
318} // namespace
319
320RangeSelector transformer::callArgs(std::string ID) {
321 return RelativeSelector<CallExpr, getArgumentsRange<CallExpr>>(std::move(ID));
322}
323
324RangeSelector transformer::constructExprArgs(std::string ID) {
325 return RelativeSelector<CXXConstructExpr,
326 getArgumentsRange<CXXConstructExpr>>(std::move(ID));
327}
328
329namespace {
330// Returns the range of the elements of the initializer list. Includes all
331// source between the braces.
332CharSourceRange getElementsRange(const MatchResult &,
333 const InitListExpr &E) {
334 return CharSourceRange::getCharRange(B: E.getLBraceLoc().getLocWithOffset(Offset: 1),
335 E: E.getRBraceLoc());
336}
337} // namespace
338
339RangeSelector transformer::initListElements(std::string ID) {
340 return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID));
341}
342
343namespace {
344// Returns the range of the else branch, including the `else` keyword.
345CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) {
346 return tooling::maybeExtendRange(
347 Range: CharSourceRange::getTokenRange(B: S.getElseLoc(), E: S.getEndLoc()),
348 Terminator: tok::TokenKind::semi, Context&: *Result.Context);
349}
350} // namespace
351
352RangeSelector transformer::elseBranch(std::string ID) {
353 return RelativeSelector<IfStmt, getElseRange>(std::move(ID));
354}
355
356RangeSelector transformer::expansion(RangeSelector S) {
357 return [S](const MatchResult &Result) -> Expected<CharSourceRange> {
358 Expected<CharSourceRange> SRange = S(Result);
359 if (!SRange)
360 return SRange.takeError();
361 return Result.SourceManager->getExpansionRange(Range: *SRange);
362 };
363}
364