1 | //===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang/Tooling/Transformer/RangeSelector.h" |
10 | #include "clang/AST/Expr.h" |
11 | #include "clang/AST/TypeLoc.h" |
12 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
13 | #include "clang/Basic/SourceLocation.h" |
14 | #include "clang/Lex/Lexer.h" |
15 | #include "clang/Tooling/Transformer/SourceCode.h" |
16 | #include "llvm/ADT/StringRef.h" |
17 | #include "llvm/Support/Errc.h" |
18 | #include "llvm/Support/Error.h" |
19 | #include <string> |
20 | #include <utility> |
21 | #include <vector> |
22 | |
23 | using namespace clang; |
24 | using namespace transformer; |
25 | |
26 | using ast_matchers::MatchFinder; |
27 | using llvm::Error; |
28 | using llvm::StringError; |
29 | |
30 | using MatchResult = MatchFinder::MatchResult; |
31 | |
32 | static Error invalidArgumentError(Twine Message) { |
33 | return llvm::make_error<StringError>(Args: llvm::errc::invalid_argument, Args&: Message); |
34 | } |
35 | |
36 | static Error typeError(StringRef ID, const ASTNodeKind &Kind) { |
37 | return invalidArgumentError(Message: "mismatched type (node id=" + ID + |
38 | " kind=" + Kind.asStringRef() + ")" ); |
39 | } |
40 | |
41 | static Error typeError(StringRef ID, const ASTNodeKind &Kind, |
42 | Twine ExpectedType) { |
43 | return invalidArgumentError(Message: "mismatched type: expected one of " + |
44 | ExpectedType + " (node id=" + ID + |
45 | " kind=" + Kind.asStringRef() + ")" ); |
46 | } |
47 | |
48 | static Error missingPropertyError(StringRef ID, Twine Description, |
49 | StringRef Property) { |
50 | return invalidArgumentError(Message: Description + " requires property '" + Property + |
51 | "' (node id=" + ID + ")" ); |
52 | } |
53 | |
54 | static Expected<DynTypedNode> getNode(const ast_matchers::BoundNodes &Nodes, |
55 | StringRef ID) { |
56 | auto &NodesMap = Nodes.getMap(); |
57 | auto It = NodesMap.find(x: ID); |
58 | if (It == NodesMap.end()) |
59 | return invalidArgumentError(Message: "ID not bound: " + ID); |
60 | return It->second; |
61 | } |
62 | |
63 | // FIXME: handling of macros should be configurable. |
64 | static SourceLocation findPreviousTokenStart(SourceLocation Start, |
65 | const SourceManager &SM, |
66 | const LangOptions &LangOpts) { |
67 | if (Start.isInvalid() || Start.isMacroID()) |
68 | return SourceLocation(); |
69 | |
70 | SourceLocation BeforeStart = Start.getLocWithOffset(Offset: -1); |
71 | if (BeforeStart.isInvalid() || BeforeStart.isMacroID()) |
72 | return SourceLocation(); |
73 | |
74 | return Lexer::GetBeginningOfToken(Loc: BeforeStart, SM, LangOpts); |
75 | } |
76 | |
77 | // Finds the start location of the previous token of kind \p TK. |
78 | // FIXME: handling of macros should be configurable. |
79 | static SourceLocation findPreviousTokenKind(SourceLocation Start, |
80 | const SourceManager &SM, |
81 | const LangOptions &LangOpts, |
82 | tok::TokenKind TK) { |
83 | while (true) { |
84 | SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts); |
85 | if (L.isInvalid() || L.isMacroID()) |
86 | return SourceLocation(); |
87 | |
88 | Token T; |
89 | if (Lexer::getRawToken(Loc: L, Result&: T, SM, LangOpts, /*IgnoreWhiteSpace=*/true)) |
90 | return SourceLocation(); |
91 | |
92 | if (T.is(K: TK)) |
93 | return T.getLocation(); |
94 | |
95 | Start = L; |
96 | } |
97 | } |
98 | |
99 | RangeSelector transformer::before(RangeSelector Selector) { |
100 | return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> { |
101 | Expected<CharSourceRange> SelectedRange = Selector(Result); |
102 | if (!SelectedRange) |
103 | return SelectedRange.takeError(); |
104 | return CharSourceRange::getCharRange(R: SelectedRange->getBegin()); |
105 | }; |
106 | } |
107 | |
108 | RangeSelector transformer::after(RangeSelector Selector) { |
109 | return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> { |
110 | Expected<CharSourceRange> SelectedRange = Selector(Result); |
111 | if (!SelectedRange) |
112 | return SelectedRange.takeError(); |
113 | SourceLocation End = SelectedRange->getEnd(); |
114 | if (SelectedRange->isTokenRange()) { |
115 | // We need to find the actual (exclusive) end location from which to |
116 | // create a new source range. However, that's not guaranteed to be valid, |
117 | // even if the token location itself is valid. So, we create a token range |
118 | // consisting only of the last token, then map that range back to the |
119 | // source file. If that succeeds, we have a valid location for the end of |
120 | // the generated range. |
121 | CharSourceRange Range = Lexer::makeFileCharRange( |
122 | Range: CharSourceRange::getTokenRange(R: SelectedRange->getEnd()), |
123 | SM: *Result.SourceManager, LangOpts: Result.Context->getLangOpts()); |
124 | if (Range.isInvalid()) |
125 | return invalidArgumentError( |
126 | Message: "after: can't resolve sub-range to valid source range" ); |
127 | End = Range.getEnd(); |
128 | } |
129 | |
130 | return CharSourceRange::getCharRange(R: End); |
131 | }; |
132 | } |
133 | |
134 | RangeSelector transformer::node(std::string ID) { |
135 | return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { |
136 | Expected<DynTypedNode> Node = getNode(Nodes: Result.Nodes, ID); |
137 | if (!Node) |
138 | return Node.takeError(); |
139 | return (Node->get<Decl>() != nullptr || |
140 | (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr)) |
141 | ? tooling::getExtendedRange(Node: *Node, Next: tok::TokenKind::semi, |
142 | Context&: *Result.Context) |
143 | : CharSourceRange::getTokenRange(R: Node->getSourceRange()); |
144 | }; |
145 | } |
146 | |
147 | RangeSelector transformer::statement(std::string ID) { |
148 | return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { |
149 | Expected<DynTypedNode> Node = getNode(Nodes: Result.Nodes, ID); |
150 | if (!Node) |
151 | return Node.takeError(); |
152 | return tooling::getExtendedRange(Node: *Node, Next: tok::TokenKind::semi, |
153 | Context&: *Result.Context); |
154 | }; |
155 | } |
156 | |
157 | RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) { |
158 | return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> { |
159 | Expected<CharSourceRange> BeginRange = Begin(Result); |
160 | if (!BeginRange) |
161 | return BeginRange.takeError(); |
162 | Expected<CharSourceRange> EndRange = End(Result); |
163 | if (!EndRange) |
164 | return EndRange.takeError(); |
165 | SourceLocation B = BeginRange->getBegin(); |
166 | SourceLocation E = EndRange->getEnd(); |
167 | // Note: we are precluding the possibility of sub-token ranges in the case |
168 | // that EndRange is a token range. |
169 | if (Result.SourceManager->isBeforeInTranslationUnit(LHS: E, RHS: B)) { |
170 | return invalidArgumentError(Message: "Bad range: out of order" ); |
171 | } |
172 | return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange()); |
173 | }; |
174 | } |
175 | |
176 | RangeSelector transformer::encloseNodes(std::string BeginID, |
177 | std::string EndID) { |
178 | return transformer::enclose(Begin: node(ID: std::move(BeginID)), End: node(ID: std::move(EndID))); |
179 | } |
180 | |
181 | RangeSelector transformer::member(std::string ID) { |
182 | return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { |
183 | Expected<DynTypedNode> Node = getNode(Nodes: Result.Nodes, ID); |
184 | if (!Node) |
185 | return Node.takeError(); |
186 | if (auto *M = Node->get<clang::MemberExpr>()) |
187 | return CharSourceRange::getTokenRange( |
188 | R: M->getMemberNameInfo().getSourceRange()); |
189 | return typeError(ID, Kind: Node->getNodeKind(), ExpectedType: "MemberExpr" ); |
190 | }; |
191 | } |
192 | |
193 | RangeSelector transformer::name(std::string ID) { |
194 | return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { |
195 | Expected<DynTypedNode> N = getNode(Nodes: Result.Nodes, ID); |
196 | if (!N) |
197 | return N.takeError(); |
198 | auto &Node = *N; |
199 | if (const auto *D = Node.get<NamedDecl>()) { |
200 | if (!D->getDeclName().isIdentifier()) |
201 | return missingPropertyError(ID, Description: "name" , Property: "identifier" ); |
202 | SourceLocation L = D->getLocation(); |
203 | auto R = CharSourceRange::getTokenRange(B: L, E: L); |
204 | // Verify that the range covers exactly the name. |
205 | // FIXME: extend this code to support cases like `operator +` or |
206 | // `foo<int>` for which this range will be too short. Doing so will |
207 | // require subcasing `NamedDecl`, because it doesn't provide virtual |
208 | // access to the \c DeclarationNameInfo. |
209 | if (tooling::getText(Range: R, Context: *Result.Context) != D->getName()) |
210 | return CharSourceRange(); |
211 | return R; |
212 | } |
213 | if (const auto *E = Node.get<DeclRefExpr>()) { |
214 | if (!E->getNameInfo().getName().isIdentifier()) |
215 | return missingPropertyError(ID, Description: "name" , Property: "identifier" ); |
216 | SourceLocation L = E->getLocation(); |
217 | return CharSourceRange::getTokenRange(B: L, E: L); |
218 | } |
219 | if (const auto *I = Node.get<CXXCtorInitializer>()) { |
220 | if (!I->isMemberInitializer() && I->isWritten()) |
221 | return missingPropertyError(ID, Description: "name" , Property: "explicit member initializer" ); |
222 | SourceLocation L = I->getMemberLocation(); |
223 | return CharSourceRange::getTokenRange(B: L, E: L); |
224 | } |
225 | if (const auto *T = Node.get<TypeLoc>()) { |
226 | TypeLoc Loc = *T; |
227 | auto ET = Loc.getAs<ElaboratedTypeLoc>(); |
228 | if (!ET.isNull()) |
229 | Loc = ET.getNamedTypeLoc(); |
230 | if (auto SpecLoc = Loc.getAs<TemplateSpecializationTypeLoc>(); |
231 | !SpecLoc.isNull()) |
232 | return CharSourceRange::getTokenRange(R: SpecLoc.getTemplateNameLoc()); |
233 | return CharSourceRange::getTokenRange(R: Loc.getSourceRange()); |
234 | } |
235 | return typeError(ID, Kind: Node.getNodeKind(), |
236 | ExpectedType: "DeclRefExpr, NamedDecl, CXXCtorInitializer, TypeLoc" ); |
237 | }; |
238 | } |
239 | |
240 | namespace { |
241 | // FIXME: make this available in the public API for users to easily create their |
242 | // own selectors. |
243 | |
244 | // Creates a selector from a range-selection function \p Func, which selects a |
245 | // range that is relative to a bound node id. \c T is the node type expected by |
246 | // \p Func. |
247 | template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)> |
248 | class RelativeSelector { |
249 | std::string ID; |
250 | |
251 | public: |
252 | RelativeSelector(std::string ID) : ID(std::move(ID)) {} |
253 | |
254 | Expected<CharSourceRange> operator()(const MatchResult &Result) { |
255 | Expected<DynTypedNode> N = getNode(Nodes: Result.Nodes, ID); |
256 | if (!N) |
257 | return N.takeError(); |
258 | if (const auto *Arg = N->get<T>()) |
259 | return Func(Result, *Arg); |
260 | return typeError(ID, Kind: N->getNodeKind()); |
261 | } |
262 | }; |
263 | } // namespace |
264 | |
265 | // FIXME: Change the following functions from being in an anonymous namespace |
266 | // to static functions, after the minimum Visual C++ has _MSC_VER >= 1915 |
267 | // (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous |
268 | // namespace works around a bug in earlier versions. |
269 | namespace { |
270 | // Returns the range of the statements (all source between the braces). |
271 | CharSourceRange getStatementsRange(const MatchResult &, |
272 | const CompoundStmt &CS) { |
273 | return CharSourceRange::getCharRange(B: CS.getLBracLoc().getLocWithOffset(Offset: 1), |
274 | E: CS.getRBracLoc()); |
275 | } |
276 | } // namespace |
277 | |
278 | RangeSelector transformer::statements(std::string ID) { |
279 | return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID)); |
280 | } |
281 | |
282 | namespace { |
283 | |
284 | SourceLocation getRLoc(const CallExpr &E) { return E.getRParenLoc(); } |
285 | |
286 | SourceLocation getRLoc(const CXXConstructExpr &E) { |
287 | return E.getParenOrBraceRange().getEnd(); |
288 | } |
289 | |
290 | tok::TokenKind getStartToken(const CallExpr &E) { |
291 | return tok::TokenKind::l_paren; |
292 | } |
293 | |
294 | tok::TokenKind getStartToken(const CXXConstructExpr &E) { |
295 | return isa<CXXTemporaryObjectExpr>(Val: E) ? tok::TokenKind::l_paren |
296 | : tok::TokenKind::l_brace; |
297 | } |
298 | |
299 | template <typename ExprWithArgs> |
300 | SourceLocation findArgStartDelimiter(const ExprWithArgs &E, SourceLocation RLoc, |
301 | const SourceManager &SM, |
302 | const LangOptions &LangOpts) { |
303 | SourceLocation Loc = E.getNumArgs() == 0 ? RLoc : E.getArg(0)->getBeginLoc(); |
304 | return findPreviousTokenKind(Loc, SM, LangOpts, getStartToken(E)); |
305 | } |
306 | // Returns the range of the source between the call's or construct expr's |
307 | // parentheses/braces. |
308 | template <typename ExprWithArgs> |
309 | CharSourceRange getArgumentsRange(const MatchResult &Result, |
310 | const ExprWithArgs &CE) { |
311 | const SourceLocation RLoc = getRLoc(CE); |
312 | return CharSourceRange::getCharRange( |
313 | findArgStartDelimiter(CE, RLoc, *Result.SourceManager, |
314 | Result.Context->getLangOpts()) |
315 | .getLocWithOffset(1), |
316 | RLoc); |
317 | } |
318 | } // namespace |
319 | |
320 | RangeSelector transformer::callArgs(std::string ID) { |
321 | return RelativeSelector<CallExpr, getArgumentsRange<CallExpr>>(std::move(ID)); |
322 | } |
323 | |
324 | RangeSelector transformer::constructExprArgs(std::string ID) { |
325 | return RelativeSelector<CXXConstructExpr, |
326 | getArgumentsRange<CXXConstructExpr>>(std::move(ID)); |
327 | } |
328 | |
329 | namespace { |
330 | // Returns the range of the elements of the initializer list. Includes all |
331 | // source between the braces. |
332 | CharSourceRange getElementsRange(const MatchResult &, |
333 | const InitListExpr &E) { |
334 | return CharSourceRange::getCharRange(B: E.getLBraceLoc().getLocWithOffset(Offset: 1), |
335 | E: E.getRBraceLoc()); |
336 | } |
337 | } // namespace |
338 | |
339 | RangeSelector transformer::initListElements(std::string ID) { |
340 | return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID)); |
341 | } |
342 | |
343 | namespace { |
344 | // Returns the range of the else branch, including the `else` keyword. |
345 | CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) { |
346 | return tooling::maybeExtendRange( |
347 | Range: CharSourceRange::getTokenRange(B: S.getElseLoc(), E: S.getEndLoc()), |
348 | Terminator: tok::TokenKind::semi, Context&: *Result.Context); |
349 | } |
350 | } // namespace |
351 | |
352 | RangeSelector transformer::elseBranch(std::string ID) { |
353 | return RelativeSelector<IfStmt, getElseRange>(std::move(ID)); |
354 | } |
355 | |
356 | RangeSelector transformer::expansion(RangeSelector S) { |
357 | return [S](const MatchResult &Result) -> Expected<CharSourceRange> { |
358 | Expected<CharSourceRange> SRange = S(Result); |
359 | if (!SRange) |
360 | return SRange.takeError(); |
361 | return Result.SourceManager->getExpansionRange(Range: *SRange); |
362 | }; |
363 | } |
364 | |