1 | //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | #include "clang/Tooling/Syntax/BuildTree.h" |
9 | #include "clang/AST/ASTFwd.h" |
10 | #include "clang/AST/Decl.h" |
11 | #include "clang/AST/DeclBase.h" |
12 | #include "clang/AST/DeclCXX.h" |
13 | #include "clang/AST/DeclarationName.h" |
14 | #include "clang/AST/Expr.h" |
15 | #include "clang/AST/ExprCXX.h" |
16 | #include "clang/AST/IgnoreExpr.h" |
17 | #include "clang/AST/OperationKinds.h" |
18 | #include "clang/AST/RecursiveASTVisitor.h" |
19 | #include "clang/AST/Stmt.h" |
20 | #include "clang/AST/TypeLoc.h" |
21 | #include "clang/AST/TypeLocVisitor.h" |
22 | #include "clang/Basic/LLVM.h" |
23 | #include "clang/Basic/SourceLocation.h" |
24 | #include "clang/Basic/SourceManager.h" |
25 | #include "clang/Basic/Specifiers.h" |
26 | #include "clang/Basic/TokenKinds.h" |
27 | #include "clang/Lex/Lexer.h" |
28 | #include "clang/Lex/LiteralSupport.h" |
29 | #include "clang/Tooling/Syntax/Nodes.h" |
30 | #include "clang/Tooling/Syntax/TokenBufferTokenManager.h" |
31 | #include "clang/Tooling/Syntax/Tokens.h" |
32 | #include "clang/Tooling/Syntax/Tree.h" |
33 | #include "llvm/ADT/ArrayRef.h" |
34 | #include "llvm/ADT/DenseMap.h" |
35 | #include "llvm/ADT/PointerUnion.h" |
36 | #include "llvm/ADT/STLExtras.h" |
37 | #include "llvm/ADT/ScopeExit.h" |
38 | #include "llvm/ADT/SmallVector.h" |
39 | #include "llvm/Support/Allocator.h" |
40 | #include "llvm/Support/Casting.h" |
41 | #include "llvm/Support/Compiler.h" |
42 | #include "llvm/Support/FormatVariadic.h" |
43 | #include "llvm/Support/MemoryBuffer.h" |
44 | #include "llvm/Support/raw_ostream.h" |
45 | #include <cstddef> |
46 | #include <map> |
47 | |
48 | using namespace clang; |
49 | |
50 | // Ignores the implicit `CXXConstructExpr` for copy/move constructor calls |
51 | // generated by the compiler, as well as in implicit conversions like the one |
52 | // wrapping `1` in `X x = 1;`. |
53 | static Expr *IgnoreImplicitConstructorSingleStep(Expr *E) { |
54 | if (auto *C = dyn_cast<CXXConstructExpr>(Val: E)) { |
55 | auto NumArgs = C->getNumArgs(); |
56 | if (NumArgs == 1 || (NumArgs > 1 && isa<CXXDefaultArgExpr>(Val: C->getArg(Arg: 1)))) { |
57 | Expr *A = C->getArg(Arg: 0); |
58 | if (C->getParenOrBraceRange().isInvalid()) |
59 | return A; |
60 | } |
61 | } |
62 | return E; |
63 | } |
64 | |
65 | // In: |
66 | // struct X { |
67 | // X(int) |
68 | // }; |
69 | // X x = X(1); |
70 | // Ignores the implicit `CXXFunctionalCastExpr` that wraps |
71 | // `CXXConstructExpr X(1)`. |
72 | static Expr *IgnoreCXXFunctionalCastExprWrappingConstructor(Expr *E) { |
73 | if (auto *F = dyn_cast<CXXFunctionalCastExpr>(Val: E)) { |
74 | if (F->getCastKind() == CK_ConstructorConversion) |
75 | return F->getSubExpr(); |
76 | } |
77 | return E; |
78 | } |
79 | |
80 | static Expr *IgnoreImplicit(Expr *E) { |
81 | return IgnoreExprNodes(E, Fns&: IgnoreImplicitSingleStep, |
82 | Fns&: IgnoreImplicitConstructorSingleStep, |
83 | Fns&: IgnoreCXXFunctionalCastExprWrappingConstructor); |
84 | } |
85 | |
86 | LLVM_ATTRIBUTE_UNUSED |
87 | static bool isImplicitExpr(Expr *E) { return IgnoreImplicit(E) != E; } |
88 | |
89 | namespace { |
90 | /// Get start location of the Declarator from the TypeLoc. |
91 | /// E.g.: |
92 | /// loc of `(` in `int (a)` |
93 | /// loc of `*` in `int *(a)` |
94 | /// loc of the first `(` in `int (*a)(int)` |
95 | /// loc of the `*` in `int *(a)(int)` |
96 | /// loc of the first `*` in `const int *const *volatile a;` |
97 | /// |
98 | /// It is non-trivial to get the start location because TypeLocs are stored |
99 | /// inside out. In the example above `*volatile` is the TypeLoc returned |
100 | /// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()` |
101 | /// returns. |
102 | struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> { |
103 | SourceLocation VisitParenTypeLoc(ParenTypeLoc T) { |
104 | auto L = Visit(TyLoc: T.getInnerLoc()); |
105 | if (L.isValid()) |
106 | return L; |
107 | return T.getLParenLoc(); |
108 | } |
109 | |
110 | // Types spelled in the prefix part of the declarator. |
111 | SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) { |
112 | return HandlePointer(T); |
113 | } |
114 | |
115 | SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) { |
116 | return HandlePointer(T); |
117 | } |
118 | |
119 | SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) { |
120 | return HandlePointer(T); |
121 | } |
122 | |
123 | SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) { |
124 | return HandlePointer(T); |
125 | } |
126 | |
127 | SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) { |
128 | return HandlePointer(T); |
129 | } |
130 | |
131 | // All other cases are not important, as they are either part of declaration |
132 | // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on |
133 | // existing declarators (e.g. QualifiedTypeLoc). They cannot start the |
134 | // declarator themselves, but their underlying type can. |
135 | SourceLocation VisitTypeLoc(TypeLoc T) { |
136 | auto N = T.getNextTypeLoc(); |
137 | if (!N) |
138 | return SourceLocation(); |
139 | return Visit(TyLoc: N); |
140 | } |
141 | |
142 | SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) { |
143 | if (T.getTypePtr()->hasTrailingReturn()) |
144 | return SourceLocation(); // avoid recursing into the suffix of declarator. |
145 | return VisitTypeLoc(T); |
146 | } |
147 | |
148 | private: |
149 | template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) { |
150 | auto L = Visit(T.getPointeeLoc()); |
151 | if (L.isValid()) |
152 | return L; |
153 | return T.getLocalSourceRange().getBegin(); |
154 | } |
155 | }; |
156 | } // namespace |
157 | |
158 | static CallExpr::arg_range dropDefaultArgs(CallExpr::arg_range Args) { |
159 | auto FirstDefaultArg = |
160 | llvm::find_if(Range&: Args, P: [](auto It) { return isa<CXXDefaultArgExpr>(It); }); |
161 | return llvm::make_range(x: Args.begin(), y: FirstDefaultArg); |
162 | } |
163 | |
164 | static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) { |
165 | switch (E.getOperator()) { |
166 | // Comparison |
167 | case OO_EqualEqual: |
168 | case OO_ExclaimEqual: |
169 | case OO_Greater: |
170 | case OO_GreaterEqual: |
171 | case OO_Less: |
172 | case OO_LessEqual: |
173 | case OO_Spaceship: |
174 | // Assignment |
175 | case OO_Equal: |
176 | case OO_SlashEqual: |
177 | case OO_PercentEqual: |
178 | case OO_CaretEqual: |
179 | case OO_PipeEqual: |
180 | case OO_LessLessEqual: |
181 | case OO_GreaterGreaterEqual: |
182 | case OO_PlusEqual: |
183 | case OO_MinusEqual: |
184 | case OO_StarEqual: |
185 | case OO_AmpEqual: |
186 | // Binary computation |
187 | case OO_Slash: |
188 | case OO_Percent: |
189 | case OO_Caret: |
190 | case OO_Pipe: |
191 | case OO_LessLess: |
192 | case OO_GreaterGreater: |
193 | case OO_AmpAmp: |
194 | case OO_PipePipe: |
195 | case OO_ArrowStar: |
196 | case OO_Comma: |
197 | return syntax::NodeKind::BinaryOperatorExpression; |
198 | case OO_Tilde: |
199 | case OO_Exclaim: |
200 | return syntax::NodeKind::PrefixUnaryOperatorExpression; |
201 | // Prefix/Postfix increment/decrement |
202 | case OO_PlusPlus: |
203 | case OO_MinusMinus: |
204 | switch (E.getNumArgs()) { |
205 | case 1: |
206 | return syntax::NodeKind::PrefixUnaryOperatorExpression; |
207 | case 2: |
208 | return syntax::NodeKind::PostfixUnaryOperatorExpression; |
209 | default: |
210 | llvm_unreachable("Invalid number of arguments for operator" ); |
211 | } |
212 | // Operators that can be unary or binary |
213 | case OO_Plus: |
214 | case OO_Minus: |
215 | case OO_Star: |
216 | case OO_Amp: |
217 | switch (E.getNumArgs()) { |
218 | case 1: |
219 | return syntax::NodeKind::PrefixUnaryOperatorExpression; |
220 | case 2: |
221 | return syntax::NodeKind::BinaryOperatorExpression; |
222 | default: |
223 | llvm_unreachable("Invalid number of arguments for operator" ); |
224 | } |
225 | return syntax::NodeKind::BinaryOperatorExpression; |
226 | // Not yet supported by SyntaxTree |
227 | case OO_New: |
228 | case OO_Delete: |
229 | case OO_Array_New: |
230 | case OO_Array_Delete: |
231 | case OO_Coawait: |
232 | case OO_Subscript: |
233 | case OO_Arrow: |
234 | return syntax::NodeKind::UnknownExpression; |
235 | case OO_Call: |
236 | return syntax::NodeKind::CallExpression; |
237 | case OO_Conditional: // not overloadable |
238 | case NUM_OVERLOADED_OPERATORS: |
239 | case OO_None: |
240 | llvm_unreachable("Not an overloadable operator" ); |
241 | } |
242 | llvm_unreachable("Unknown OverloadedOperatorKind enum" ); |
243 | } |
244 | |
245 | /// Get the start of the qualified name. In the examples below it gives the |
246 | /// location of the `^`: |
247 | /// `int ^a;` |
248 | /// `int *^a;` |
249 | /// `int ^a::S::f(){}` |
250 | static SourceLocation getQualifiedNameStart(NamedDecl *D) { |
251 | assert((isa<DeclaratorDecl, TypedefNameDecl>(D)) && |
252 | "only DeclaratorDecl and TypedefNameDecl are supported." ); |
253 | |
254 | auto DN = D->getDeclName(); |
255 | bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo(); |
256 | if (IsAnonymous) |
257 | return SourceLocation(); |
258 | |
259 | if (const auto *DD = dyn_cast<DeclaratorDecl>(Val: D)) { |
260 | if (DD->getQualifierLoc()) { |
261 | return DD->getQualifierLoc().getBeginLoc(); |
262 | } |
263 | } |
264 | |
265 | return D->getLocation(); |
266 | } |
267 | |
268 | /// Gets the range of the initializer inside an init-declarator C++ [dcl.decl]. |
269 | /// `int a;` -> range of ``, |
270 | /// `int *a = nullptr` -> range of `= nullptr`. |
271 | /// `int a{}` -> range of `{}`. |
272 | /// `int a()` -> range of `()`. |
273 | static SourceRange getInitializerRange(Decl *D) { |
274 | if (auto *V = dyn_cast<VarDecl>(Val: D)) { |
275 | auto *I = V->getInit(); |
276 | // Initializers in range-based-for are not part of the declarator |
277 | if (I && !V->isCXXForRangeDecl()) |
278 | return I->getSourceRange(); |
279 | } |
280 | |
281 | return SourceRange(); |
282 | } |
283 | |
284 | /// Gets the range of declarator as defined by the C++ grammar. E.g. |
285 | /// `int a;` -> range of `a`, |
286 | /// `int *a;` -> range of `*a`, |
287 | /// `int a[10];` -> range of `a[10]`, |
288 | /// `int a[1][2][3];` -> range of `a[1][2][3]`, |
289 | /// `int *a = nullptr` -> range of `*a = nullptr`. |
290 | /// `int S::f(){}` -> range of `S::f()`. |
291 | /// FIXME: \p Name must be a source range. |
292 | static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, |
293 | SourceLocation Name, |
294 | SourceRange Initializer) { |
295 | SourceLocation Start = GetStartLoc().Visit(TyLoc: T); |
296 | SourceLocation End = T.getEndLoc(); |
297 | if (Name.isValid()) { |
298 | if (Start.isInvalid()) |
299 | Start = Name; |
300 | // End of TypeLoc could be invalid if the type is invalid, fallback to the |
301 | // NameLoc. |
302 | if (End.isInvalid() || SM.isBeforeInTranslationUnit(LHS: End, RHS: Name)) |
303 | End = Name; |
304 | } |
305 | if (Initializer.isValid()) { |
306 | auto InitializerEnd = Initializer.getEnd(); |
307 | assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || |
308 | End == InitializerEnd); |
309 | End = InitializerEnd; |
310 | } |
311 | return SourceRange(Start, End); |
312 | } |
313 | |
314 | namespace { |
315 | /// All AST hierarchy roots that can be represented as pointers. |
316 | using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>; |
317 | /// Maintains a mapping from AST to syntax tree nodes. This class will get more |
318 | /// complicated as we support more kinds of AST nodes, e.g. TypeLocs. |
319 | /// FIXME: expose this as public API. |
320 | class ASTToSyntaxMapping { |
321 | public: |
322 | void add(ASTPtr From, syntax::Tree *To) { |
323 | assert(To != nullptr); |
324 | assert(!From.isNull()); |
325 | |
326 | bool Added = Nodes.insert(KV: {From, To}).second; |
327 | (void)Added; |
328 | assert(Added && "mapping added twice" ); |
329 | } |
330 | |
331 | void add(NestedNameSpecifierLoc From, syntax::Tree *To) { |
332 | assert(To != nullptr); |
333 | assert(From.hasQualifier()); |
334 | |
335 | bool Added = NNSNodes.insert(KV: {From, To}).second; |
336 | (void)Added; |
337 | assert(Added && "mapping added twice" ); |
338 | } |
339 | |
340 | syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(Val: P); } |
341 | |
342 | syntax::Tree *find(NestedNameSpecifierLoc P) const { |
343 | return NNSNodes.lookup(Val: P); |
344 | } |
345 | |
346 | private: |
347 | llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes; |
348 | llvm::DenseMap<NestedNameSpecifierLoc, syntax::Tree *> NNSNodes; |
349 | }; |
350 | } // namespace |
351 | |
352 | /// A helper class for constructing the syntax tree while traversing a clang |
353 | /// AST. |
354 | /// |
355 | /// At each point of the traversal we maintain a list of pending nodes. |
356 | /// Initially all tokens are added as pending nodes. When processing a clang AST |
357 | /// node, the clients need to: |
358 | /// - create a corresponding syntax node, |
359 | /// - assign roles to all pending child nodes with 'markChild' and |
360 | /// 'markChildToken', |
361 | /// - replace the child nodes with the new syntax node in the pending list |
362 | /// with 'foldNode'. |
363 | /// |
364 | /// Note that all children are expected to be processed when building a node. |
365 | /// |
366 | /// Call finalize() to finish building the tree and consume the root node. |
367 | class syntax::TreeBuilder { |
368 | public: |
369 | TreeBuilder(syntax::Arena &Arena, TokenBufferTokenManager& TBTM) |
370 | : Arena(Arena), |
371 | TBTM(TBTM), |
372 | Pending(Arena, TBTM.tokenBuffer()) { |
373 | for (const auto &T : TBTM.tokenBuffer().expandedTokens()) |
374 | LocationToToken.insert(KV: {T.location(), &T}); |
375 | } |
376 | |
377 | llvm::BumpPtrAllocator &allocator() { return Arena.getAllocator(); } |
378 | const SourceManager &sourceManager() const { |
379 | return TBTM.sourceManager(); |
380 | } |
381 | |
382 | /// Populate children for \p New node, assuming it covers tokens from \p |
383 | /// Range. |
384 | void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, ASTPtr From) { |
385 | assert(New); |
386 | Pending.foldChildren(TB: TBTM.tokenBuffer(), Tokens: Range, Node: New); |
387 | if (From) |
388 | Mapping.add(From, To: New); |
389 | } |
390 | |
391 | void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, TypeLoc L) { |
392 | // FIXME: add mapping for TypeLocs |
393 | foldNode(Range, New, From: nullptr); |
394 | } |
395 | |
396 | void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, |
397 | NestedNameSpecifierLoc From) { |
398 | assert(New); |
399 | Pending.foldChildren(TB: TBTM.tokenBuffer(), Tokens: Range, Node: New); |
400 | if (From) |
401 | Mapping.add(From, To: New); |
402 | } |
403 | |
404 | /// Populate children for \p New list, assuming it covers tokens from a |
405 | /// subrange of \p SuperRange. |
406 | void foldList(ArrayRef<syntax::Token> SuperRange, syntax::List *New, |
407 | ASTPtr From) { |
408 | assert(New); |
409 | auto ListRange = Pending.shrinkToFitList(Range: SuperRange); |
410 | Pending.foldChildren(TB: TBTM.tokenBuffer(), Tokens: ListRange, Node: New); |
411 | if (From) |
412 | Mapping.add(From, To: New); |
413 | } |
414 | |
415 | /// Notifies that we should not consume trailing semicolon when computing |
416 | /// token range of \p D. |
417 | void noticeDeclWithoutSemicolon(Decl *D); |
418 | |
419 | /// Mark the \p Child node with a corresponding \p Role. All marked children |
420 | /// should be consumed by foldNode. |
421 | /// When called on expressions (clang::Expr is derived from clang::Stmt), |
422 | /// wraps expressions into expression statement. |
423 | void markStmtChild(Stmt *Child, NodeRole Role); |
424 | /// Should be called for expressions in non-statement position to avoid |
425 | /// wrapping into expression statement. |
426 | void markExprChild(Expr *Child, NodeRole Role); |
427 | /// Set role for a token starting at \p Loc. |
428 | void markChildToken(SourceLocation Loc, NodeRole R); |
429 | /// Set role for \p T. |
430 | void markChildToken(const syntax::Token *T, NodeRole R); |
431 | |
432 | /// Set role for \p N. |
433 | void markChild(syntax::Node *N, NodeRole R); |
434 | /// Set role for the syntax node matching \p N. |
435 | void markChild(ASTPtr N, NodeRole R); |
436 | /// Set role for the syntax node matching \p N. |
437 | void markChild(NestedNameSpecifierLoc N, NodeRole R); |
438 | |
439 | /// Finish building the tree and consume the root node. |
440 | syntax::TranslationUnit *finalize() && { |
441 | auto Tokens = TBTM.tokenBuffer().expandedTokens(); |
442 | assert(!Tokens.empty()); |
443 | assert(Tokens.back().kind() == tok::eof); |
444 | |
445 | // Build the root of the tree, consuming all the children. |
446 | Pending.foldChildren(TB: TBTM.tokenBuffer(), Tokens: Tokens.drop_back(), |
447 | Node: new (Arena.getAllocator()) syntax::TranslationUnit); |
448 | |
449 | auto *TU = cast<syntax::TranslationUnit>(Val: std::move(Pending).finalize()); |
450 | TU->assertInvariantsRecursive(); |
451 | return TU; |
452 | } |
453 | |
454 | /// Finds a token starting at \p L. The token must exist if \p L is valid. |
455 | const syntax::Token *findToken(SourceLocation L) const; |
456 | |
457 | /// Finds the syntax tokens corresponding to the \p SourceRange. |
458 | ArrayRef<syntax::Token> getRange(SourceRange Range) const { |
459 | assert(Range.isValid()); |
460 | return getRange(First: Range.getBegin(), Last: Range.getEnd()); |
461 | } |
462 | |
463 | /// Finds the syntax tokens corresponding to the passed source locations. |
464 | /// \p First is the start position of the first token and \p Last is the start |
465 | /// position of the last token. |
466 | ArrayRef<syntax::Token> getRange(SourceLocation First, |
467 | SourceLocation Last) const { |
468 | assert(First.isValid()); |
469 | assert(Last.isValid()); |
470 | assert(First == Last || |
471 | TBTM.sourceManager().isBeforeInTranslationUnit(First, Last)); |
472 | return llvm::ArrayRef(findToken(L: First), std::next(x: findToken(L: Last))); |
473 | } |
474 | |
475 | ArrayRef<syntax::Token> |
476 | getTemplateRange(const ClassTemplateSpecializationDecl *D) const { |
477 | auto Tokens = getRange(Range: D->getSourceRange()); |
478 | return maybeAppendSemicolon(Tokens, D); |
479 | } |
480 | |
481 | /// Returns true if \p D is the last declarator in a chain and is thus |
482 | /// reponsible for creating SimpleDeclaration for the whole chain. |
483 | bool isResponsibleForCreatingDeclaration(const Decl *D) const { |
484 | assert((isa<DeclaratorDecl, TypedefNameDecl>(D)) && |
485 | "only DeclaratorDecl and TypedefNameDecl are supported." ); |
486 | |
487 | const Decl *Next = D->getNextDeclInContext(); |
488 | |
489 | // There's no next sibling, this one is responsible. |
490 | if (Next == nullptr) { |
491 | return true; |
492 | } |
493 | |
494 | // Next sibling is not the same type, this one is responsible. |
495 | if (D->getKind() != Next->getKind()) { |
496 | return true; |
497 | } |
498 | // Next sibling doesn't begin at the same loc, it must be a different |
499 | // declaration, so this declarator is responsible. |
500 | if (Next->getBeginLoc() != D->getBeginLoc()) { |
501 | return true; |
502 | } |
503 | |
504 | // NextT is a member of the same declaration, and we need the last member to |
505 | // create declaration. This one is not responsible. |
506 | return false; |
507 | } |
508 | |
509 | ArrayRef<syntax::Token> getDeclarationRange(Decl *D) { |
510 | ArrayRef<syntax::Token> Tokens; |
511 | // We want to drop the template parameters for specializations. |
512 | if (const auto *S = dyn_cast<TagDecl>(Val: D)) |
513 | Tokens = getRange(First: S->TypeDecl::getBeginLoc(), Last: S->getEndLoc()); |
514 | else |
515 | Tokens = getRange(Range: D->getSourceRange()); |
516 | return maybeAppendSemicolon(Tokens, D); |
517 | } |
518 | |
519 | ArrayRef<syntax::Token> getExprRange(const Expr *E) const { |
520 | return getRange(Range: E->getSourceRange()); |
521 | } |
522 | |
523 | /// Find the adjusted range for the statement, consuming the trailing |
524 | /// semicolon when needed. |
525 | ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { |
526 | auto Tokens = getRange(Range: S->getSourceRange()); |
527 | if (isa<CompoundStmt>(Val: S)) |
528 | return Tokens; |
529 | |
530 | // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and |
531 | // all statements that end with those. Consume this semicolon here. |
532 | if (Tokens.back().kind() == tok::semi) |
533 | return Tokens; |
534 | return withTrailingSemicolon(Tokens); |
535 | } |
536 | |
537 | private: |
538 | ArrayRef<syntax::Token> maybeAppendSemicolon(ArrayRef<syntax::Token> Tokens, |
539 | const Decl *D) const { |
540 | if (isa<NamespaceDecl>(Val: D)) |
541 | return Tokens; |
542 | if (DeclsWithoutSemicolons.count(V: D)) |
543 | return Tokens; |
544 | // FIXME: do not consume trailing semicolon on function definitions. |
545 | // Most declarations own a semicolon in syntax trees, but not in clang AST. |
546 | return withTrailingSemicolon(Tokens); |
547 | } |
548 | |
549 | ArrayRef<syntax::Token> |
550 | withTrailingSemicolon(ArrayRef<syntax::Token> Tokens) const { |
551 | assert(!Tokens.empty()); |
552 | assert(Tokens.back().kind() != tok::eof); |
553 | // We never consume 'eof', so looking at the next token is ok. |
554 | if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) |
555 | return llvm::ArrayRef(Tokens.begin(), Tokens.end() + 1); |
556 | return Tokens; |
557 | } |
558 | |
559 | void setRole(syntax::Node *N, NodeRole R) { |
560 | assert(N->getRole() == NodeRole::Detached); |
561 | N->setRole(R); |
562 | } |
563 | |
564 | /// A collection of trees covering the input tokens. |
565 | /// When created, each tree corresponds to a single token in the file. |
566 | /// Clients call 'foldChildren' to attach one or more subtrees to a parent |
567 | /// node and update the list of trees accordingly. |
568 | /// |
569 | /// Ensures that added nodes properly nest and cover the whole token stream. |
570 | struct Forest { |
571 | Forest(syntax::Arena &A, const syntax::TokenBuffer &TB) { |
572 | assert(!TB.expandedTokens().empty()); |
573 | assert(TB.expandedTokens().back().kind() == tok::eof); |
574 | // Create all leaf nodes. |
575 | // Note that we do not have 'eof' in the tree. |
576 | for (const auto &T : TB.expandedTokens().drop_back()) { |
577 | auto *L = new (A.getAllocator()) |
578 | syntax::Leaf(reinterpret_cast<TokenManager::Key>(&T)); |
579 | L->Original = true; |
580 | L->CanModify = TB.spelledForExpanded(Expanded: T).has_value(); |
581 | Trees.insert(position: Trees.end(), x: {&T, L}); |
582 | } |
583 | } |
584 | |
585 | void assignRole(ArrayRef<syntax::Token> Range, syntax::NodeRole Role) { |
586 | assert(!Range.empty()); |
587 | auto It = Trees.lower_bound(x: Range.begin()); |
588 | assert(It != Trees.end() && "no node found" ); |
589 | assert(It->first == Range.begin() && "no child with the specified range" ); |
590 | assert((std::next(It) == Trees.end() || |
591 | std::next(It)->first == Range.end()) && |
592 | "no child with the specified range" ); |
593 | assert(It->second->getRole() == NodeRole::Detached && |
594 | "re-assigning role for a child" ); |
595 | It->second->setRole(Role); |
596 | } |
597 | |
598 | /// Shrink \p Range to a subrange that only contains tokens of a list. |
599 | /// List elements and delimiters should already have correct roles. |
600 | ArrayRef<syntax::Token> shrinkToFitList(ArrayRef<syntax::Token> Range) { |
601 | auto BeginChildren = Trees.lower_bound(x: Range.begin()); |
602 | assert((BeginChildren == Trees.end() || |
603 | BeginChildren->first == Range.begin()) && |
604 | "Range crosses boundaries of existing subtrees" ); |
605 | |
606 | auto EndChildren = Trees.lower_bound(x: Range.end()); |
607 | assert( |
608 | (EndChildren == Trees.end() || EndChildren->first == Range.end()) && |
609 | "Range crosses boundaries of existing subtrees" ); |
610 | |
611 | auto BelongsToList = [](decltype(Trees)::value_type KV) { |
612 | auto Role = KV.second->getRole(); |
613 | return Role == syntax::NodeRole::ListElement || |
614 | Role == syntax::NodeRole::ListDelimiter; |
615 | }; |
616 | |
617 | auto BeginListChildren = |
618 | std::find_if(first: BeginChildren, last: EndChildren, pred: BelongsToList); |
619 | |
620 | auto EndListChildren = |
621 | std::find_if_not(first: BeginListChildren, last: EndChildren, pred: BelongsToList); |
622 | |
623 | return ArrayRef<syntax::Token>(BeginListChildren->first, |
624 | EndListChildren->first); |
625 | } |
626 | |
627 | /// Add \p Node to the forest and attach child nodes based on \p Tokens. |
628 | void foldChildren(const syntax::TokenBuffer &TB, |
629 | ArrayRef<syntax::Token> Tokens, syntax::Tree *Node) { |
630 | // Attach children to `Node`. |
631 | assert(Node->getFirstChild() == nullptr && "node already has children" ); |
632 | |
633 | auto *FirstToken = Tokens.begin(); |
634 | auto BeginChildren = Trees.lower_bound(x: FirstToken); |
635 | |
636 | assert((BeginChildren == Trees.end() || |
637 | BeginChildren->first == FirstToken) && |
638 | "fold crosses boundaries of existing subtrees" ); |
639 | auto EndChildren = Trees.lower_bound(x: Tokens.end()); |
640 | assert( |
641 | (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && |
642 | "fold crosses boundaries of existing subtrees" ); |
643 | |
644 | for (auto It = BeginChildren; It != EndChildren; ++It) { |
645 | auto *C = It->second; |
646 | if (C->getRole() == NodeRole::Detached) |
647 | C->setRole(NodeRole::Unknown); |
648 | Node->appendChildLowLevel(Child: C); |
649 | } |
650 | |
651 | // Mark that this node came from the AST and is backed by the source code. |
652 | Node->Original = true; |
653 | Node->CanModify = |
654 | TB.spelledForExpanded(Expanded: Tokens).has_value(); |
655 | |
656 | Trees.erase(first: BeginChildren, last: EndChildren); |
657 | Trees.insert(x: {FirstToken, Node}); |
658 | } |
659 | |
660 | // EXPECTS: all tokens were consumed and are owned by a single root node. |
661 | syntax::Node *finalize() && { |
662 | assert(Trees.size() == 1); |
663 | auto *Root = Trees.begin()->second; |
664 | Trees = {}; |
665 | return Root; |
666 | } |
667 | |
668 | std::string str(const syntax::TokenBufferTokenManager &STM) const { |
669 | std::string R; |
670 | for (auto It = Trees.begin(); It != Trees.end(); ++It) { |
671 | unsigned CoveredTokens = |
672 | It != Trees.end() |
673 | ? (std::next(x: It)->first - It->first) |
674 | : STM.tokenBuffer().expandedTokens().end() - It->first; |
675 | |
676 | R += std::string( |
677 | formatv(Fmt: "- '{0}' covers '{1}'+{2} tokens\n" , Vals: It->second->getKind(), |
678 | Vals: It->first->text(SM: STM.sourceManager()), Vals&: CoveredTokens)); |
679 | R += It->second->dump(SM: STM); |
680 | } |
681 | return R; |
682 | } |
683 | |
684 | private: |
685 | /// Maps from the start token to a subtree starting at that token. |
686 | /// Keys in the map are pointers into the array of expanded tokens, so |
687 | /// pointer order corresponds to the order of preprocessor tokens. |
688 | std::map<const syntax::Token *, syntax::Node *> Trees; |
689 | }; |
690 | |
691 | /// For debugging purposes. |
692 | std::string str() { return Pending.str(STM: TBTM); } |
693 | |
694 | syntax::Arena &Arena; |
695 | TokenBufferTokenManager& TBTM; |
696 | /// To quickly find tokens by their start location. |
697 | llvm::DenseMap<SourceLocation, const syntax::Token *> LocationToToken; |
698 | Forest Pending; |
699 | llvm::DenseSet<Decl *> DeclsWithoutSemicolons; |
700 | ASTToSyntaxMapping Mapping; |
701 | }; |
702 | |
703 | namespace { |
704 | class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { |
705 | public: |
706 | explicit BuildTreeVisitor(ASTContext &Context, syntax::TreeBuilder &Builder) |
707 | : Builder(Builder), Context(Context) {} |
708 | |
709 | bool shouldTraversePostOrder() const { return true; } |
710 | |
711 | bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) { |
712 | return processDeclaratorAndDeclaration(D: DD); |
713 | } |
714 | |
715 | bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) { |
716 | return processDeclaratorAndDeclaration(D: TD); |
717 | } |
718 | |
719 | bool VisitDecl(Decl *D) { |
720 | assert(!D->isImplicit()); |
721 | Builder.foldNode(Range: Builder.getDeclarationRange(D), |
722 | New: new (allocator()) syntax::UnknownDeclaration(), From: D); |
723 | return true; |
724 | } |
725 | |
726 | // RAV does not call WalkUpFrom* on explicit instantiations, so we have to |
727 | // override Traverse. |
728 | // FIXME: make RAV call WalkUpFrom* instead. |
729 | bool |
730 | TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) { |
731 | if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(D: C)) |
732 | return false; |
733 | if (C->isExplicitSpecialization()) |
734 | return true; // we are only interested in explicit instantiations. |
735 | auto *Declaration = |
736 | cast<syntax::SimpleDeclaration>(Val: handleFreeStandingTagDecl(C)); |
737 | foldExplicitTemplateInstantiation( |
738 | Range: Builder.getTemplateRange(D: C), |
739 | ExternKW: Builder.findToken(L: C->getExternKeywordLoc()), |
740 | TemplateKW: Builder.findToken(L: C->getTemplateKeywordLoc()), InnerDeclaration: Declaration, From: C); |
741 | return true; |
742 | } |
743 | |
744 | bool WalkUpFromTemplateDecl(TemplateDecl *S) { |
745 | foldTemplateDeclaration( |
746 | Range: Builder.getDeclarationRange(D: S), |
747 | TemplateKW: Builder.findToken(L: S->getTemplateParameters()->getTemplateLoc()), |
748 | TemplatedDeclaration: Builder.getDeclarationRange(D: S->getTemplatedDecl()), From: S); |
749 | return true; |
750 | } |
751 | |
752 | bool WalkUpFromTagDecl(TagDecl *C) { |
753 | // FIXME: build the ClassSpecifier node. |
754 | if (!C->isFreeStanding()) { |
755 | assert(C->getNumTemplateParameterLists() == 0); |
756 | return true; |
757 | } |
758 | handleFreeStandingTagDecl(C); |
759 | return true; |
760 | } |
761 | |
762 | syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) { |
763 | assert(C->isFreeStanding()); |
764 | // Class is a declaration specifier and needs a spanning declaration node. |
765 | auto DeclarationRange = Builder.getDeclarationRange(D: C); |
766 | syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration; |
767 | Builder.foldNode(Range: DeclarationRange, New: Result, From: nullptr); |
768 | |
769 | // Build TemplateDeclaration nodes if we had template parameters. |
770 | auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) { |
771 | const auto *TemplateKW = Builder.findToken(L: L.getTemplateLoc()); |
772 | auto R = llvm::ArrayRef(TemplateKW, DeclarationRange.end()); |
773 | Result = |
774 | foldTemplateDeclaration(Range: R, TemplateKW, TemplatedDeclaration: DeclarationRange, From: nullptr); |
775 | DeclarationRange = R; |
776 | }; |
777 | if (auto *S = dyn_cast<ClassTemplatePartialSpecializationDecl>(Val: C)) |
778 | ConsumeTemplateParameters(*S->getTemplateParameters()); |
779 | for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I) |
780 | ConsumeTemplateParameters(*C->getTemplateParameterList(i: I - 1)); |
781 | return Result; |
782 | } |
783 | |
784 | bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { |
785 | // We do not want to call VisitDecl(), the declaration for translation |
786 | // unit is built by finalize(). |
787 | return true; |
788 | } |
789 | |
790 | bool WalkUpFromCompoundStmt(CompoundStmt *S) { |
791 | using NodeRole = syntax::NodeRole; |
792 | |
793 | Builder.markChildToken(Loc: S->getLBracLoc(), R: NodeRole::OpenParen); |
794 | for (auto *Child : S->body()) |
795 | Builder.markStmtChild(Child, Role: NodeRole::Statement); |
796 | Builder.markChildToken(Loc: S->getRBracLoc(), R: NodeRole::CloseParen); |
797 | |
798 | Builder.foldNode(Range: Builder.getStmtRange(S), |
799 | New: new (allocator()) syntax::CompoundStatement, From: S); |
800 | return true; |
801 | } |
802 | |
803 | // Some statements are not yet handled by syntax trees. |
804 | bool WalkUpFromStmt(Stmt *S) { |
805 | Builder.foldNode(Range: Builder.getStmtRange(S), |
806 | New: new (allocator()) syntax::UnknownStatement, From: S); |
807 | return true; |
808 | } |
809 | |
810 | bool TraverseIfStmt(IfStmt *S) { |
811 | bool Result = [&, this]() { |
812 | if (S->getInit() && !TraverseStmt(S: S->getInit())) { |
813 | return false; |
814 | } |
815 | // In cases where the condition is an initialized declaration in a |
816 | // statement, we want to preserve the declaration and ignore the |
817 | // implicit condition expression in the syntax tree. |
818 | if (S->hasVarStorage()) { |
819 | if (!TraverseStmt(S: S->getConditionVariableDeclStmt())) |
820 | return false; |
821 | } else if (S->getCond() && !TraverseStmt(S: S->getCond())) |
822 | return false; |
823 | |
824 | if (S->getThen() && !TraverseStmt(S: S->getThen())) |
825 | return false; |
826 | if (S->getElse() && !TraverseStmt(S: S->getElse())) |
827 | return false; |
828 | return true; |
829 | }(); |
830 | WalkUpFromIfStmt(S); |
831 | return Result; |
832 | } |
833 | |
834 | bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { |
835 | // We override to traverse range initializer as VarDecl. |
836 | // RAV traverses it as a statement, we produce invalid node kinds in that |
837 | // case. |
838 | // FIXME: should do this in RAV instead? |
839 | bool Result = [&, this]() { |
840 | if (S->getInit() && !TraverseStmt(S: S->getInit())) |
841 | return false; |
842 | if (S->getLoopVariable() && !TraverseDecl(D: S->getLoopVariable())) |
843 | return false; |
844 | if (S->getRangeInit() && !TraverseStmt(S: S->getRangeInit())) |
845 | return false; |
846 | if (S->getBody() && !TraverseStmt(S: S->getBody())) |
847 | return false; |
848 | return true; |
849 | }(); |
850 | WalkUpFromCXXForRangeStmt(S); |
851 | return Result; |
852 | } |
853 | |
854 | bool TraverseStmt(Stmt *S) { |
855 | if (auto *DS = dyn_cast_or_null<DeclStmt>(Val: S)) { |
856 | // We want to consume the semicolon, make sure SimpleDeclaration does not. |
857 | for (auto *D : DS->decls()) |
858 | Builder.noticeDeclWithoutSemicolon(D); |
859 | } else if (auto *E = dyn_cast_or_null<Expr>(Val: S)) { |
860 | return RecursiveASTVisitor::TraverseStmt(S: IgnoreImplicit(E)); |
861 | } |
862 | return RecursiveASTVisitor::TraverseStmt(S); |
863 | } |
864 | |
865 | bool TraverseOpaqueValueExpr(OpaqueValueExpr *VE) { |
866 | // OpaqueValue doesn't correspond to concrete syntax, ignore it. |
867 | return true; |
868 | } |
869 | |
870 | // Some expressions are not yet handled by syntax trees. |
871 | bool WalkUpFromExpr(Expr *E) { |
872 | assert(!isImplicitExpr(E) && "should be handled by TraverseStmt" ); |
873 | Builder.foldNode(Range: Builder.getExprRange(E), |
874 | New: new (allocator()) syntax::UnknownExpression, From: E); |
875 | return true; |
876 | } |
877 | |
878 | bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) { |
879 | // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node |
880 | // referencing the location of the UDL suffix (`_w` in `1.2_w`). The |
881 | // UDL suffix location does not point to the beginning of a token, so we |
882 | // can't represent the UDL suffix as a separate syntax tree node. |
883 | |
884 | return WalkUpFromUserDefinedLiteral(S); |
885 | } |
886 | |
887 | syntax::UserDefinedLiteralExpression * |
888 | buildUserDefinedLiteral(UserDefinedLiteral *S) { |
889 | switch (S->getLiteralOperatorKind()) { |
890 | case UserDefinedLiteral::LOK_Integer: |
891 | return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; |
892 | case UserDefinedLiteral::LOK_Floating: |
893 | return new (allocator()) syntax::FloatUserDefinedLiteralExpression; |
894 | case UserDefinedLiteral::LOK_Character: |
895 | return new (allocator()) syntax::CharUserDefinedLiteralExpression; |
896 | case UserDefinedLiteral::LOK_String: |
897 | return new (allocator()) syntax::StringUserDefinedLiteralExpression; |
898 | case UserDefinedLiteral::LOK_Raw: |
899 | case UserDefinedLiteral::LOK_Template: |
900 | // For raw literal operator and numeric literal operator template we |
901 | // cannot get the type of the operand in the semantic AST. We get this |
902 | // information from the token. As integer and floating point have the same |
903 | // token kind, we run `NumericLiteralParser` again to distinguish them. |
904 | auto TokLoc = S->getBeginLoc(); |
905 | auto TokSpelling = |
906 | Builder.findToken(L: TokLoc)->text(SM: Context.getSourceManager()); |
907 | auto Literal = |
908 | NumericLiteralParser(TokSpelling, TokLoc, Context.getSourceManager(), |
909 | Context.getLangOpts(), Context.getTargetInfo(), |
910 | Context.getDiagnostics()); |
911 | if (Literal.isIntegerLiteral()) |
912 | return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; |
913 | else { |
914 | assert(Literal.isFloatingLiteral()); |
915 | return new (allocator()) syntax::FloatUserDefinedLiteralExpression; |
916 | } |
917 | } |
918 | llvm_unreachable("Unknown literal operator kind." ); |
919 | } |
920 | |
921 | bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) { |
922 | Builder.markChildToken(Loc: S->getBeginLoc(), R: syntax::NodeRole::LiteralToken); |
923 | Builder.foldNode(Range: Builder.getExprRange(E: S), New: buildUserDefinedLiteral(S), From: S); |
924 | return true; |
925 | } |
926 | |
927 | // FIXME: Fix `NestedNameSpecifierLoc::getLocalSourceRange` for the |
928 | // `DependentTemplateSpecializationType` case. |
929 | /// Given a nested-name-specifier return the range for the last name |
930 | /// specifier. |
931 | /// |
932 | /// e.g. `std::T::template X<U>::` => `template X<U>::` |
933 | SourceRange getLocalSourceRange(const NestedNameSpecifierLoc &NNSLoc) { |
934 | auto SR = NNSLoc.getLocalSourceRange(); |
935 | |
936 | // The method `NestedNameSpecifierLoc::getLocalSourceRange` *should* |
937 | // return the desired `SourceRange`, but there is a corner case. For a |
938 | // `DependentTemplateSpecializationType` this method returns its |
939 | // qualifiers as well, in other words in the example above this method |
940 | // returns `T::template X<U>::` instead of only `template X<U>::` |
941 | if (auto TL = NNSLoc.getTypeLoc()) { |
942 | if (auto DependentTL = |
943 | TL.getAs<DependentTemplateSpecializationTypeLoc>()) { |
944 | // The 'template' keyword is always present in dependent template |
945 | // specializations. Except in the case of incorrect code |
946 | // TODO: Treat the case of incorrect code. |
947 | SR.setBegin(DependentTL.getTemplateKeywordLoc()); |
948 | } |
949 | } |
950 | |
951 | return SR; |
952 | } |
953 | |
954 | syntax::NodeKind getNameSpecifierKind(const NestedNameSpecifier &NNS) { |
955 | switch (NNS.getKind()) { |
956 | case NestedNameSpecifier::Global: |
957 | return syntax::NodeKind::GlobalNameSpecifier; |
958 | case NestedNameSpecifier::Namespace: |
959 | case NestedNameSpecifier::NamespaceAlias: |
960 | case NestedNameSpecifier::Identifier: |
961 | return syntax::NodeKind::IdentifierNameSpecifier; |
962 | case NestedNameSpecifier::TypeSpecWithTemplate: |
963 | return syntax::NodeKind::SimpleTemplateNameSpecifier; |
964 | case NestedNameSpecifier::TypeSpec: { |
965 | const auto *NNSType = NNS.getAsType(); |
966 | assert(NNSType); |
967 | if (isa<DecltypeType>(Val: NNSType)) |
968 | return syntax::NodeKind::DecltypeNameSpecifier; |
969 | if (isa<TemplateSpecializationType, DependentTemplateSpecializationType>( |
970 | Val: NNSType)) |
971 | return syntax::NodeKind::SimpleTemplateNameSpecifier; |
972 | return syntax::NodeKind::IdentifierNameSpecifier; |
973 | } |
974 | default: |
975 | // FIXME: Support Microsoft's __super |
976 | llvm::report_fatal_error(reason: "We don't yet support the __super specifier" , |
977 | gen_crash_diag: true); |
978 | } |
979 | } |
980 | |
981 | syntax::NameSpecifier * |
982 | buildNameSpecifier(const NestedNameSpecifierLoc &NNSLoc) { |
983 | assert(NNSLoc.hasQualifier()); |
984 | auto NameSpecifierTokens = |
985 | Builder.getRange(Range: getLocalSourceRange(NNSLoc)).drop_back(); |
986 | switch (getNameSpecifierKind(NNS: *NNSLoc.getNestedNameSpecifier())) { |
987 | case syntax::NodeKind::GlobalNameSpecifier: |
988 | return new (allocator()) syntax::GlobalNameSpecifier; |
989 | case syntax::NodeKind::IdentifierNameSpecifier: { |
990 | assert(NameSpecifierTokens.size() == 1); |
991 | Builder.markChildToken(T: NameSpecifierTokens.begin(), |
992 | R: syntax::NodeRole::Unknown); |
993 | auto *NS = new (allocator()) syntax::IdentifierNameSpecifier; |
994 | Builder.foldNode(Range: NameSpecifierTokens, New: NS, From: nullptr); |
995 | return NS; |
996 | } |
997 | case syntax::NodeKind::SimpleTemplateNameSpecifier: { |
998 | // TODO: Build `SimpleTemplateNameSpecifier` children and implement |
999 | // accessors to them. |
1000 | // Be aware, we cannot do that simply by calling `TraverseTypeLoc`, |
1001 | // some `TypeLoc`s have inside them the previous name specifier and |
1002 | // we want to treat them independently. |
1003 | auto *NS = new (allocator()) syntax::SimpleTemplateNameSpecifier; |
1004 | Builder.foldNode(Range: NameSpecifierTokens, New: NS, From: nullptr); |
1005 | return NS; |
1006 | } |
1007 | case syntax::NodeKind::DecltypeNameSpecifier: { |
1008 | const auto TL = NNSLoc.getTypeLoc().castAs<DecltypeTypeLoc>(); |
1009 | if (!RecursiveASTVisitor::TraverseDecltypeTypeLoc(TL)) |
1010 | return nullptr; |
1011 | auto *NS = new (allocator()) syntax::DecltypeNameSpecifier; |
1012 | // TODO: Implement accessor to `DecltypeNameSpecifier` inner |
1013 | // `DecltypeTypeLoc`. |
1014 | // For that add mapping from `TypeLoc` to `syntax::Node*` then: |
1015 | // Builder.markChild(TypeLoc, syntax::NodeRole); |
1016 | Builder.foldNode(Range: NameSpecifierTokens, New: NS, From: nullptr); |
1017 | return NS; |
1018 | } |
1019 | default: |
1020 | llvm_unreachable("getChildKind() does not return this value" ); |
1021 | } |
1022 | } |
1023 | |
1024 | // To build syntax tree nodes for NestedNameSpecifierLoc we override |
1025 | // Traverse instead of WalkUpFrom because we want to traverse the children |
1026 | // ourselves and build a list instead of a nested tree of name specifier |
1027 | // prefixes. |
1028 | bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc QualifierLoc) { |
1029 | if (!QualifierLoc) |
1030 | return true; |
1031 | for (auto It = QualifierLoc; It; It = It.getPrefix()) { |
1032 | auto *NS = buildNameSpecifier(NNSLoc: It); |
1033 | if (!NS) |
1034 | return false; |
1035 | Builder.markChild(N: NS, R: syntax::NodeRole::ListElement); |
1036 | Builder.markChildToken(Loc: It.getEndLoc(), R: syntax::NodeRole::ListDelimiter); |
1037 | } |
1038 | Builder.foldNode(Range: Builder.getRange(Range: QualifierLoc.getSourceRange()), |
1039 | New: new (allocator()) syntax::NestedNameSpecifier, |
1040 | From: QualifierLoc); |
1041 | return true; |
1042 | } |
1043 | |
1044 | syntax::IdExpression *buildIdExpression(NestedNameSpecifierLoc QualifierLoc, |
1045 | SourceLocation TemplateKeywordLoc, |
1046 | SourceRange UnqualifiedIdLoc, |
1047 | ASTPtr From) { |
1048 | if (QualifierLoc) { |
1049 | Builder.markChild(N: QualifierLoc, R: syntax::NodeRole::Qualifier); |
1050 | if (TemplateKeywordLoc.isValid()) |
1051 | Builder.markChildToken(Loc: TemplateKeywordLoc, |
1052 | R: syntax::NodeRole::TemplateKeyword); |
1053 | } |
1054 | |
1055 | auto *TheUnqualifiedId = new (allocator()) syntax::UnqualifiedId; |
1056 | Builder.foldNode(Range: Builder.getRange(Range: UnqualifiedIdLoc), New: TheUnqualifiedId, |
1057 | From: nullptr); |
1058 | Builder.markChild(N: TheUnqualifiedId, R: syntax::NodeRole::UnqualifiedId); |
1059 | |
1060 | auto IdExpressionBeginLoc = |
1061 | QualifierLoc ? QualifierLoc.getBeginLoc() : UnqualifiedIdLoc.getBegin(); |
1062 | |
1063 | auto *TheIdExpression = new (allocator()) syntax::IdExpression; |
1064 | Builder.foldNode( |
1065 | Range: Builder.getRange(First: IdExpressionBeginLoc, Last: UnqualifiedIdLoc.getEnd()), |
1066 | New: TheIdExpression, From); |
1067 | |
1068 | return TheIdExpression; |
1069 | } |
1070 | |
1071 | bool WalkUpFromMemberExpr(MemberExpr *S) { |
1072 | // For `MemberExpr` with implicit `this->` we generate a simple |
1073 | // `id-expression` syntax node, beacuse an implicit `member-expression` is |
1074 | // syntactically undistinguishable from an `id-expression` |
1075 | if (S->isImplicitAccess()) { |
1076 | buildIdExpression(QualifierLoc: S->getQualifierLoc(), TemplateKeywordLoc: S->getTemplateKeywordLoc(), |
1077 | UnqualifiedIdLoc: SourceRange(S->getMemberLoc(), S->getEndLoc()), From: S); |
1078 | return true; |
1079 | } |
1080 | |
1081 | auto *TheIdExpression = buildIdExpression( |
1082 | QualifierLoc: S->getQualifierLoc(), TemplateKeywordLoc: S->getTemplateKeywordLoc(), |
1083 | UnqualifiedIdLoc: SourceRange(S->getMemberLoc(), S->getEndLoc()), From: nullptr); |
1084 | |
1085 | Builder.markChild(N: TheIdExpression, R: syntax::NodeRole::Member); |
1086 | |
1087 | Builder.markExprChild(Child: S->getBase(), Role: syntax::NodeRole::Object); |
1088 | Builder.markChildToken(Loc: S->getOperatorLoc(), R: syntax::NodeRole::AccessToken); |
1089 | |
1090 | Builder.foldNode(Range: Builder.getExprRange(E: S), |
1091 | New: new (allocator()) syntax::MemberExpression, From: S); |
1092 | return true; |
1093 | } |
1094 | |
1095 | bool WalkUpFromDeclRefExpr(DeclRefExpr *S) { |
1096 | buildIdExpression(QualifierLoc: S->getQualifierLoc(), TemplateKeywordLoc: S->getTemplateKeywordLoc(), |
1097 | UnqualifiedIdLoc: SourceRange(S->getLocation(), S->getEndLoc()), From: S); |
1098 | |
1099 | return true; |
1100 | } |
1101 | |
1102 | // Same logic as DeclRefExpr. |
1103 | bool WalkUpFromDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *S) { |
1104 | buildIdExpression(QualifierLoc: S->getQualifierLoc(), TemplateKeywordLoc: S->getTemplateKeywordLoc(), |
1105 | UnqualifiedIdLoc: SourceRange(S->getLocation(), S->getEndLoc()), From: S); |
1106 | |
1107 | return true; |
1108 | } |
1109 | |
1110 | bool WalkUpFromCXXThisExpr(CXXThisExpr *S) { |
1111 | if (!S->isImplicit()) { |
1112 | Builder.markChildToken(Loc: S->getLocation(), |
1113 | R: syntax::NodeRole::IntroducerKeyword); |
1114 | Builder.foldNode(Range: Builder.getExprRange(E: S), |
1115 | New: new (allocator()) syntax::ThisExpression, From: S); |
1116 | } |
1117 | return true; |
1118 | } |
1119 | |
1120 | bool WalkUpFromParenExpr(ParenExpr *S) { |
1121 | Builder.markChildToken(Loc: S->getLParen(), R: syntax::NodeRole::OpenParen); |
1122 | Builder.markExprChild(Child: S->getSubExpr(), Role: syntax::NodeRole::SubExpression); |
1123 | Builder.markChildToken(Loc: S->getRParen(), R: syntax::NodeRole::CloseParen); |
1124 | Builder.foldNode(Range: Builder.getExprRange(E: S), |
1125 | New: new (allocator()) syntax::ParenExpression, From: S); |
1126 | return true; |
1127 | } |
1128 | |
1129 | bool WalkUpFromIntegerLiteral(IntegerLiteral *S) { |
1130 | Builder.markChildToken(Loc: S->getLocation(), R: syntax::NodeRole::LiteralToken); |
1131 | Builder.foldNode(Range: Builder.getExprRange(E: S), |
1132 | New: new (allocator()) syntax::IntegerLiteralExpression, From: S); |
1133 | return true; |
1134 | } |
1135 | |
1136 | bool WalkUpFromCharacterLiteral(CharacterLiteral *S) { |
1137 | Builder.markChildToken(Loc: S->getLocation(), R: syntax::NodeRole::LiteralToken); |
1138 | Builder.foldNode(Range: Builder.getExprRange(E: S), |
1139 | New: new (allocator()) syntax::CharacterLiteralExpression, From: S); |
1140 | return true; |
1141 | } |
1142 | |
1143 | bool WalkUpFromFloatingLiteral(FloatingLiteral *S) { |
1144 | Builder.markChildToken(Loc: S->getLocation(), R: syntax::NodeRole::LiteralToken); |
1145 | Builder.foldNode(Range: Builder.getExprRange(E: S), |
1146 | New: new (allocator()) syntax::FloatingLiteralExpression, From: S); |
1147 | return true; |
1148 | } |
1149 | |
1150 | bool WalkUpFromStringLiteral(StringLiteral *S) { |
1151 | Builder.markChildToken(Loc: S->getBeginLoc(), R: syntax::NodeRole::LiteralToken); |
1152 | Builder.foldNode(Range: Builder.getExprRange(E: S), |
1153 | New: new (allocator()) syntax::StringLiteralExpression, From: S); |
1154 | return true; |
1155 | } |
1156 | |
1157 | bool WalkUpFromCXXBoolLiteralExpr(CXXBoolLiteralExpr *S) { |
1158 | Builder.markChildToken(Loc: S->getLocation(), R: syntax::NodeRole::LiteralToken); |
1159 | Builder.foldNode(Range: Builder.getExprRange(E: S), |
1160 | New: new (allocator()) syntax::BoolLiteralExpression, From: S); |
1161 | return true; |
1162 | } |
1163 | |
1164 | bool WalkUpFromCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *S) { |
1165 | Builder.markChildToken(Loc: S->getLocation(), R: syntax::NodeRole::LiteralToken); |
1166 | Builder.foldNode(Range: Builder.getExprRange(E: S), |
1167 | New: new (allocator()) syntax::CxxNullPtrExpression, From: S); |
1168 | return true; |
1169 | } |
1170 | |
1171 | bool WalkUpFromUnaryOperator(UnaryOperator *S) { |
1172 | Builder.markChildToken(Loc: S->getOperatorLoc(), |
1173 | R: syntax::NodeRole::OperatorToken); |
1174 | Builder.markExprChild(Child: S->getSubExpr(), Role: syntax::NodeRole::Operand); |
1175 | |
1176 | if (S->isPostfix()) |
1177 | Builder.foldNode(Range: Builder.getExprRange(E: S), |
1178 | New: new (allocator()) syntax::PostfixUnaryOperatorExpression, |
1179 | From: S); |
1180 | else |
1181 | Builder.foldNode(Range: Builder.getExprRange(E: S), |
1182 | New: new (allocator()) syntax::PrefixUnaryOperatorExpression, |
1183 | From: S); |
1184 | |
1185 | return true; |
1186 | } |
1187 | |
1188 | bool WalkUpFromBinaryOperator(BinaryOperator *S) { |
1189 | Builder.markExprChild(Child: S->getLHS(), Role: syntax::NodeRole::LeftHandSide); |
1190 | Builder.markChildToken(Loc: S->getOperatorLoc(), |
1191 | R: syntax::NodeRole::OperatorToken); |
1192 | Builder.markExprChild(Child: S->getRHS(), Role: syntax::NodeRole::RightHandSide); |
1193 | Builder.foldNode(Range: Builder.getExprRange(E: S), |
1194 | New: new (allocator()) syntax::BinaryOperatorExpression, From: S); |
1195 | return true; |
1196 | } |
1197 | |
1198 | /// Builds `CallArguments` syntax node from arguments that appear in source |
1199 | /// code, i.e. not default arguments. |
1200 | syntax::CallArguments * |
1201 | buildCallArguments(CallExpr::arg_range ArgsAndDefaultArgs) { |
1202 | auto Args = dropDefaultArgs(Args: ArgsAndDefaultArgs); |
1203 | for (auto *Arg : Args) { |
1204 | Builder.markExprChild(Child: Arg, Role: syntax::NodeRole::ListElement); |
1205 | const auto *DelimiterToken = |
1206 | std::next(x: Builder.findToken(L: Arg->getEndLoc())); |
1207 | if (DelimiterToken->kind() == clang::tok::TokenKind::comma) |
1208 | Builder.markChildToken(T: DelimiterToken, R: syntax::NodeRole::ListDelimiter); |
1209 | } |
1210 | |
1211 | auto *Arguments = new (allocator()) syntax::CallArguments; |
1212 | if (!Args.empty()) |
1213 | Builder.foldNode(Range: Builder.getRange(First: (*Args.begin())->getBeginLoc(), |
1214 | Last: (*(Args.end() - 1))->getEndLoc()), |
1215 | New: Arguments, From: nullptr); |
1216 | |
1217 | return Arguments; |
1218 | } |
1219 | |
1220 | bool WalkUpFromCallExpr(CallExpr *S) { |
1221 | Builder.markExprChild(Child: S->getCallee(), Role: syntax::NodeRole::Callee); |
1222 | |
1223 | const auto *LParenToken = |
1224 | std::next(x: Builder.findToken(L: S->getCallee()->getEndLoc())); |
1225 | // FIXME: Assert that `LParenToken` is indeed a `l_paren` once we have fixed |
1226 | // the test on decltype desctructors. |
1227 | if (LParenToken->kind() == clang::tok::l_paren) |
1228 | Builder.markChildToken(T: LParenToken, R: syntax::NodeRole::OpenParen); |
1229 | |
1230 | Builder.markChild(N: buildCallArguments(ArgsAndDefaultArgs: S->arguments()), |
1231 | R: syntax::NodeRole::Arguments); |
1232 | |
1233 | Builder.markChildToken(Loc: S->getRParenLoc(), R: syntax::NodeRole::CloseParen); |
1234 | |
1235 | Builder.foldNode(Range: Builder.getRange(Range: S->getSourceRange()), |
1236 | New: new (allocator()) syntax::CallExpression, From: S); |
1237 | return true; |
1238 | } |
1239 | |
1240 | bool WalkUpFromCXXConstructExpr(CXXConstructExpr *S) { |
1241 | // Ignore the implicit calls to default constructors. |
1242 | if ((S->getNumArgs() == 0 || isa<CXXDefaultArgExpr>(Val: S->getArg(Arg: 0))) && |
1243 | S->getParenOrBraceRange().isInvalid()) |
1244 | return true; |
1245 | return RecursiveASTVisitor::WalkUpFromCXXConstructExpr(S); |
1246 | } |
1247 | |
1248 | bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) { |
1249 | // To construct a syntax tree of the same shape for calls to built-in and |
1250 | // user-defined operators, ignore the `DeclRefExpr` that refers to the |
1251 | // operator and treat it as a simple token. Do that by traversing |
1252 | // arguments instead of children. |
1253 | for (auto *child : S->arguments()) { |
1254 | // A postfix unary operator is declared as taking two operands. The |
1255 | // second operand is used to distinguish from its prefix counterpart. In |
1256 | // the semantic AST this "phantom" operand is represented as a |
1257 | // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this |
1258 | // operand because it does not correspond to anything written in source |
1259 | // code. |
1260 | if (child->getSourceRange().isInvalid()) { |
1261 | assert(getOperatorNodeKind(*S) == |
1262 | syntax::NodeKind::PostfixUnaryOperatorExpression); |
1263 | continue; |
1264 | } |
1265 | if (!TraverseStmt(S: child)) |
1266 | return false; |
1267 | } |
1268 | return WalkUpFromCXXOperatorCallExpr(S); |
1269 | } |
1270 | |
1271 | bool WalkUpFromCXXOperatorCallExpr(CXXOperatorCallExpr *S) { |
1272 | switch (getOperatorNodeKind(E: *S)) { |
1273 | case syntax::NodeKind::BinaryOperatorExpression: |
1274 | Builder.markExprChild(Child: S->getArg(Arg: 0), Role: syntax::NodeRole::LeftHandSide); |
1275 | Builder.markChildToken(Loc: S->getOperatorLoc(), |
1276 | R: syntax::NodeRole::OperatorToken); |
1277 | Builder.markExprChild(Child: S->getArg(Arg: 1), Role: syntax::NodeRole::RightHandSide); |
1278 | Builder.foldNode(Range: Builder.getExprRange(E: S), |
1279 | New: new (allocator()) syntax::BinaryOperatorExpression, From: S); |
1280 | return true; |
1281 | case syntax::NodeKind::PrefixUnaryOperatorExpression: |
1282 | Builder.markChildToken(Loc: S->getOperatorLoc(), |
1283 | R: syntax::NodeRole::OperatorToken); |
1284 | Builder.markExprChild(Child: S->getArg(Arg: 0), Role: syntax::NodeRole::Operand); |
1285 | Builder.foldNode(Range: Builder.getExprRange(E: S), |
1286 | New: new (allocator()) syntax::PrefixUnaryOperatorExpression, |
1287 | From: S); |
1288 | return true; |
1289 | case syntax::NodeKind::PostfixUnaryOperatorExpression: |
1290 | Builder.markChildToken(Loc: S->getOperatorLoc(), |
1291 | R: syntax::NodeRole::OperatorToken); |
1292 | Builder.markExprChild(Child: S->getArg(Arg: 0), Role: syntax::NodeRole::Operand); |
1293 | Builder.foldNode(Range: Builder.getExprRange(E: S), |
1294 | New: new (allocator()) syntax::PostfixUnaryOperatorExpression, |
1295 | From: S); |
1296 | return true; |
1297 | case syntax::NodeKind::CallExpression: { |
1298 | Builder.markExprChild(Child: S->getArg(Arg: 0), Role: syntax::NodeRole::Callee); |
1299 | |
1300 | const auto *LParenToken = |
1301 | std::next(x: Builder.findToken(L: S->getArg(Arg: 0)->getEndLoc())); |
1302 | // FIXME: Assert that `LParenToken` is indeed a `l_paren` once we have |
1303 | // fixed the test on decltype desctructors. |
1304 | if (LParenToken->kind() == clang::tok::l_paren) |
1305 | Builder.markChildToken(T: LParenToken, R: syntax::NodeRole::OpenParen); |
1306 | |
1307 | Builder.markChild(N: buildCallArguments(ArgsAndDefaultArgs: CallExpr::arg_range( |
1308 | S->arg_begin() + 1, S->arg_end())), |
1309 | R: syntax::NodeRole::Arguments); |
1310 | |
1311 | Builder.markChildToken(Loc: S->getRParenLoc(), R: syntax::NodeRole::CloseParen); |
1312 | |
1313 | Builder.foldNode(Range: Builder.getRange(Range: S->getSourceRange()), |
1314 | New: new (allocator()) syntax::CallExpression, From: S); |
1315 | return true; |
1316 | } |
1317 | case syntax::NodeKind::UnknownExpression: |
1318 | return WalkUpFromExpr(E: S); |
1319 | default: |
1320 | llvm_unreachable("getOperatorNodeKind() does not return this value" ); |
1321 | } |
1322 | } |
1323 | |
1324 | bool WalkUpFromCXXDefaultArgExpr(CXXDefaultArgExpr *S) { return true; } |
1325 | |
1326 | bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { |
1327 | auto Tokens = Builder.getDeclarationRange(D: S); |
1328 | if (Tokens.front().kind() == tok::coloncolon) { |
1329 | // Handle nested namespace definitions. Those start at '::' token, e.g. |
1330 | // namespace a^::b {} |
1331 | // FIXME: build corresponding nodes for the name of this namespace. |
1332 | return true; |
1333 | } |
1334 | Builder.foldNode(Range: Tokens, New: new (allocator()) syntax::NamespaceDefinition, From: S); |
1335 | return true; |
1336 | } |
1337 | |
1338 | // FIXME: Deleting the `TraverseParenTypeLoc` override doesn't change test |
1339 | // results. Find test coverage or remove it. |
1340 | bool TraverseParenTypeLoc(ParenTypeLoc L) { |
1341 | // We reverse order of traversal to get the proper syntax structure. |
1342 | if (!WalkUpFromParenTypeLoc(L)) |
1343 | return false; |
1344 | return TraverseTypeLoc(TL: L.getInnerLoc()); |
1345 | } |
1346 | |
1347 | bool WalkUpFromParenTypeLoc(ParenTypeLoc L) { |
1348 | Builder.markChildToken(Loc: L.getLParenLoc(), R: syntax::NodeRole::OpenParen); |
1349 | Builder.markChildToken(Loc: L.getRParenLoc(), R: syntax::NodeRole::CloseParen); |
1350 | Builder.foldNode(Range: Builder.getRange(First: L.getLParenLoc(), Last: L.getRParenLoc()), |
1351 | New: new (allocator()) syntax::ParenDeclarator, L); |
1352 | return true; |
1353 | } |
1354 | |
1355 | // Declarator chunks, they are produced by type locs and some clang::Decls. |
1356 | bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) { |
1357 | Builder.markChildToken(Loc: L.getLBracketLoc(), R: syntax::NodeRole::OpenParen); |
1358 | Builder.markExprChild(Child: L.getSizeExpr(), Role: syntax::NodeRole::Size); |
1359 | Builder.markChildToken(Loc: L.getRBracketLoc(), R: syntax::NodeRole::CloseParen); |
1360 | Builder.foldNode(Range: Builder.getRange(First: L.getLBracketLoc(), Last: L.getRBracketLoc()), |
1361 | New: new (allocator()) syntax::ArraySubscript, L); |
1362 | return true; |
1363 | } |
1364 | |
1365 | syntax::ParameterDeclarationList * |
1366 | buildParameterDeclarationList(ArrayRef<ParmVarDecl *> Params) { |
1367 | for (auto *P : Params) { |
1368 | Builder.markChild(N: P, R: syntax::NodeRole::ListElement); |
1369 | const auto *DelimiterToken = std::next(x: Builder.findToken(L: P->getEndLoc())); |
1370 | if (DelimiterToken->kind() == clang::tok::TokenKind::comma) |
1371 | Builder.markChildToken(T: DelimiterToken, R: syntax::NodeRole::ListDelimiter); |
1372 | } |
1373 | auto *Parameters = new (allocator()) syntax::ParameterDeclarationList; |
1374 | if (!Params.empty()) |
1375 | Builder.foldNode(Range: Builder.getRange(First: Params.front()->getBeginLoc(), |
1376 | Last: Params.back()->getEndLoc()), |
1377 | New: Parameters, From: nullptr); |
1378 | return Parameters; |
1379 | } |
1380 | |
1381 | bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) { |
1382 | Builder.markChildToken(Loc: L.getLParenLoc(), R: syntax::NodeRole::OpenParen); |
1383 | |
1384 | Builder.markChild(N: buildParameterDeclarationList(Params: L.getParams()), |
1385 | R: syntax::NodeRole::Parameters); |
1386 | |
1387 | Builder.markChildToken(Loc: L.getRParenLoc(), R: syntax::NodeRole::CloseParen); |
1388 | Builder.foldNode(Range: Builder.getRange(First: L.getLParenLoc(), Last: L.getEndLoc()), |
1389 | New: new (allocator()) syntax::ParametersAndQualifiers, L); |
1390 | return true; |
1391 | } |
1392 | |
1393 | bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) { |
1394 | if (!L.getTypePtr()->hasTrailingReturn()) |
1395 | return WalkUpFromFunctionTypeLoc(L); |
1396 | |
1397 | auto *TrailingReturnTokens = buildTrailingReturn(L); |
1398 | // Finish building the node for parameters. |
1399 | Builder.markChild(N: TrailingReturnTokens, R: syntax::NodeRole::TrailingReturn); |
1400 | return WalkUpFromFunctionTypeLoc(L); |
1401 | } |
1402 | |
1403 | bool TraverseMemberPointerTypeLoc(MemberPointerTypeLoc L) { |
1404 | // In the source code "void (Y::*mp)()" `MemberPointerTypeLoc` corresponds |
1405 | // to "Y::*" but it points to a `ParenTypeLoc` that corresponds to |
1406 | // "(Y::*mp)" We thus reverse the order of traversal to get the proper |
1407 | // syntax structure. |
1408 | if (!WalkUpFromMemberPointerTypeLoc(L)) |
1409 | return false; |
1410 | return TraverseTypeLoc(TL: L.getPointeeLoc()); |
1411 | } |
1412 | |
1413 | bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) { |
1414 | auto SR = L.getLocalSourceRange(); |
1415 | Builder.foldNode(Range: Builder.getRange(Range: SR), |
1416 | New: new (allocator()) syntax::MemberPointer, L); |
1417 | return true; |
1418 | } |
1419 | |
1420 | // The code below is very regular, it could even be generated with some |
1421 | // preprocessor magic. We merely assign roles to the corresponding children |
1422 | // and fold resulting nodes. |
1423 | bool WalkUpFromDeclStmt(DeclStmt *S) { |
1424 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1425 | New: new (allocator()) syntax::DeclarationStatement, From: S); |
1426 | return true; |
1427 | } |
1428 | |
1429 | bool WalkUpFromNullStmt(NullStmt *S) { |
1430 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1431 | New: new (allocator()) syntax::EmptyStatement, From: S); |
1432 | return true; |
1433 | } |
1434 | |
1435 | bool WalkUpFromSwitchStmt(SwitchStmt *S) { |
1436 | Builder.markChildToken(Loc: S->getSwitchLoc(), |
1437 | R: syntax::NodeRole::IntroducerKeyword); |
1438 | Builder.markStmtChild(Child: S->getBody(), Role: syntax::NodeRole::BodyStatement); |
1439 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1440 | New: new (allocator()) syntax::SwitchStatement, From: S); |
1441 | return true; |
1442 | } |
1443 | |
1444 | bool WalkUpFromCaseStmt(CaseStmt *S) { |
1445 | Builder.markChildToken(Loc: S->getKeywordLoc(), |
1446 | R: syntax::NodeRole::IntroducerKeyword); |
1447 | Builder.markExprChild(Child: S->getLHS(), Role: syntax::NodeRole::CaseValue); |
1448 | Builder.markStmtChild(Child: S->getSubStmt(), Role: syntax::NodeRole::BodyStatement); |
1449 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1450 | New: new (allocator()) syntax::CaseStatement, From: S); |
1451 | return true; |
1452 | } |
1453 | |
1454 | bool WalkUpFromDefaultStmt(DefaultStmt *S) { |
1455 | Builder.markChildToken(Loc: S->getKeywordLoc(), |
1456 | R: syntax::NodeRole::IntroducerKeyword); |
1457 | Builder.markStmtChild(Child: S->getSubStmt(), Role: syntax::NodeRole::BodyStatement); |
1458 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1459 | New: new (allocator()) syntax::DefaultStatement, From: S); |
1460 | return true; |
1461 | } |
1462 | |
1463 | bool WalkUpFromIfStmt(IfStmt *S) { |
1464 | Builder.markChildToken(Loc: S->getIfLoc(), R: syntax::NodeRole::IntroducerKeyword); |
1465 | Stmt *ConditionStatement = S->getCond(); |
1466 | if (S->hasVarStorage()) |
1467 | ConditionStatement = S->getConditionVariableDeclStmt(); |
1468 | Builder.markStmtChild(Child: ConditionStatement, Role: syntax::NodeRole::Condition); |
1469 | Builder.markStmtChild(Child: S->getThen(), Role: syntax::NodeRole::ThenStatement); |
1470 | Builder.markChildToken(Loc: S->getElseLoc(), R: syntax::NodeRole::ElseKeyword); |
1471 | Builder.markStmtChild(Child: S->getElse(), Role: syntax::NodeRole::ElseStatement); |
1472 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1473 | New: new (allocator()) syntax::IfStatement, From: S); |
1474 | return true; |
1475 | } |
1476 | |
1477 | bool WalkUpFromForStmt(ForStmt *S) { |
1478 | Builder.markChildToken(Loc: S->getForLoc(), R: syntax::NodeRole::IntroducerKeyword); |
1479 | Builder.markStmtChild(Child: S->getBody(), Role: syntax::NodeRole::BodyStatement); |
1480 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1481 | New: new (allocator()) syntax::ForStatement, From: S); |
1482 | return true; |
1483 | } |
1484 | |
1485 | bool WalkUpFromWhileStmt(WhileStmt *S) { |
1486 | Builder.markChildToken(Loc: S->getWhileLoc(), |
1487 | R: syntax::NodeRole::IntroducerKeyword); |
1488 | Builder.markStmtChild(Child: S->getBody(), Role: syntax::NodeRole::BodyStatement); |
1489 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1490 | New: new (allocator()) syntax::WhileStatement, From: S); |
1491 | return true; |
1492 | } |
1493 | |
1494 | bool WalkUpFromContinueStmt(ContinueStmt *S) { |
1495 | Builder.markChildToken(Loc: S->getContinueLoc(), |
1496 | R: syntax::NodeRole::IntroducerKeyword); |
1497 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1498 | New: new (allocator()) syntax::ContinueStatement, From: S); |
1499 | return true; |
1500 | } |
1501 | |
1502 | bool WalkUpFromBreakStmt(BreakStmt *S) { |
1503 | Builder.markChildToken(Loc: S->getBreakLoc(), |
1504 | R: syntax::NodeRole::IntroducerKeyword); |
1505 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1506 | New: new (allocator()) syntax::BreakStatement, From: S); |
1507 | return true; |
1508 | } |
1509 | |
1510 | bool WalkUpFromReturnStmt(ReturnStmt *S) { |
1511 | Builder.markChildToken(Loc: S->getReturnLoc(), |
1512 | R: syntax::NodeRole::IntroducerKeyword); |
1513 | Builder.markExprChild(Child: S->getRetValue(), Role: syntax::NodeRole::ReturnValue); |
1514 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1515 | New: new (allocator()) syntax::ReturnStatement, From: S); |
1516 | return true; |
1517 | } |
1518 | |
1519 | bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { |
1520 | Builder.markChildToken(Loc: S->getForLoc(), R: syntax::NodeRole::IntroducerKeyword); |
1521 | Builder.markStmtChild(Child: S->getBody(), Role: syntax::NodeRole::BodyStatement); |
1522 | Builder.foldNode(Range: Builder.getStmtRange(S), |
1523 | New: new (allocator()) syntax::RangeBasedForStatement, From: S); |
1524 | return true; |
1525 | } |
1526 | |
1527 | bool WalkUpFromEmptyDecl(EmptyDecl *S) { |
1528 | Builder.foldNode(Range: Builder.getDeclarationRange(D: S), |
1529 | New: new (allocator()) syntax::EmptyDeclaration, From: S); |
1530 | return true; |
1531 | } |
1532 | |
1533 | bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) { |
1534 | Builder.markExprChild(Child: S->getAssertExpr(), Role: syntax::NodeRole::Condition); |
1535 | Builder.markExprChild(Child: S->getMessage(), Role: syntax::NodeRole::Message); |
1536 | Builder.foldNode(Range: Builder.getDeclarationRange(D: S), |
1537 | New: new (allocator()) syntax::StaticAssertDeclaration, From: S); |
1538 | return true; |
1539 | } |
1540 | |
1541 | bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { |
1542 | Builder.foldNode(Range: Builder.getDeclarationRange(D: S), |
1543 | New: new (allocator()) syntax::LinkageSpecificationDeclaration, |
1544 | From: S); |
1545 | return true; |
1546 | } |
1547 | |
1548 | bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { |
1549 | Builder.foldNode(Range: Builder.getDeclarationRange(D: S), |
1550 | New: new (allocator()) syntax::NamespaceAliasDefinition, From: S); |
1551 | return true; |
1552 | } |
1553 | |
1554 | bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { |
1555 | Builder.foldNode(Range: Builder.getDeclarationRange(D: S), |
1556 | New: new (allocator()) syntax::UsingNamespaceDirective, From: S); |
1557 | return true; |
1558 | } |
1559 | |
1560 | bool WalkUpFromUsingDecl(UsingDecl *S) { |
1561 | Builder.foldNode(Range: Builder.getDeclarationRange(D: S), |
1562 | New: new (allocator()) syntax::UsingDeclaration, From: S); |
1563 | return true; |
1564 | } |
1565 | |
1566 | bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { |
1567 | Builder.foldNode(Range: Builder.getDeclarationRange(D: S), |
1568 | New: new (allocator()) syntax::UsingDeclaration, From: S); |
1569 | return true; |
1570 | } |
1571 | |
1572 | bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { |
1573 | Builder.foldNode(Range: Builder.getDeclarationRange(D: S), |
1574 | New: new (allocator()) syntax::UsingDeclaration, From: S); |
1575 | return true; |
1576 | } |
1577 | |
1578 | bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { |
1579 | Builder.foldNode(Range: Builder.getDeclarationRange(D: S), |
1580 | New: new (allocator()) syntax::TypeAliasDeclaration, From: S); |
1581 | return true; |
1582 | } |
1583 | |
1584 | private: |
1585 | /// Folds SimpleDeclarator node (if present) and in case this is the last |
1586 | /// declarator in the chain it also folds SimpleDeclaration node. |
1587 | template <class T> bool processDeclaratorAndDeclaration(T *D) { |
1588 | auto Range = getDeclaratorRange( |
1589 | Builder.sourceManager(), D->getTypeSourceInfo()->getTypeLoc(), |
1590 | getQualifiedNameStart(D), getInitializerRange(D)); |
1591 | |
1592 | // There doesn't have to be a declarator (e.g. `void foo(int)` only has |
1593 | // declaration, but no declarator). |
1594 | if (!Range.getBegin().isValid()) { |
1595 | Builder.markChild(N: new (allocator()) syntax::DeclaratorList, |
1596 | R: syntax::NodeRole::Declarators); |
1597 | Builder.foldNode(Builder.getDeclarationRange(D), |
1598 | new (allocator()) syntax::SimpleDeclaration, D); |
1599 | return true; |
1600 | } |
1601 | |
1602 | auto *N = new (allocator()) syntax::SimpleDeclarator; |
1603 | Builder.foldNode(Builder.getRange(Range), N, nullptr); |
1604 | Builder.markChild(N, R: syntax::NodeRole::ListElement); |
1605 | |
1606 | if (!Builder.isResponsibleForCreatingDeclaration(D)) { |
1607 | // If this is not the last declarator in the declaration we expect a |
1608 | // delimiter after it. |
1609 | const auto *DelimiterToken = std::next(Builder.findToken(L: Range.getEnd())); |
1610 | if (DelimiterToken->kind() == clang::tok::TokenKind::comma) |
1611 | Builder.markChildToken(DelimiterToken, syntax::NodeRole::ListDelimiter); |
1612 | } else { |
1613 | auto *DL = new (allocator()) syntax::DeclaratorList; |
1614 | auto DeclarationRange = Builder.getDeclarationRange(D); |
1615 | Builder.foldList(SuperRange: DeclarationRange, New: DL, From: nullptr); |
1616 | |
1617 | Builder.markChild(N: DL, R: syntax::NodeRole::Declarators); |
1618 | Builder.foldNode(DeclarationRange, |
1619 | new (allocator()) syntax::SimpleDeclaration, D); |
1620 | } |
1621 | return true; |
1622 | } |
1623 | |
1624 | /// Returns the range of the built node. |
1625 | syntax::TrailingReturnType *buildTrailingReturn(FunctionProtoTypeLoc L) { |
1626 | assert(L.getTypePtr()->hasTrailingReturn()); |
1627 | |
1628 | auto ReturnedType = L.getReturnLoc(); |
1629 | // Build node for the declarator, if any. |
1630 | auto ReturnDeclaratorRange = SourceRange(GetStartLoc().Visit(TyLoc: ReturnedType), |
1631 | ReturnedType.getEndLoc()); |
1632 | syntax::SimpleDeclarator *ReturnDeclarator = nullptr; |
1633 | if (ReturnDeclaratorRange.isValid()) { |
1634 | ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator; |
1635 | Builder.foldNode(Range: Builder.getRange(Range: ReturnDeclaratorRange), |
1636 | New: ReturnDeclarator, From: nullptr); |
1637 | } |
1638 | |
1639 | // Build node for trailing return type. |
1640 | auto Return = Builder.getRange(Range: ReturnedType.getSourceRange()); |
1641 | const auto *Arrow = Return.begin() - 1; |
1642 | assert(Arrow->kind() == tok::arrow); |
1643 | auto Tokens = llvm::ArrayRef(Arrow, Return.end()); |
1644 | Builder.markChildToken(T: Arrow, R: syntax::NodeRole::ArrowToken); |
1645 | if (ReturnDeclarator) |
1646 | Builder.markChild(N: ReturnDeclarator, R: syntax::NodeRole::Declarator); |
1647 | auto *R = new (allocator()) syntax::TrailingReturnType; |
1648 | Builder.foldNode(Range: Tokens, New: R, L); |
1649 | return R; |
1650 | } |
1651 | |
1652 | void foldExplicitTemplateInstantiation( |
1653 | ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW, |
1654 | const syntax::Token *TemplateKW, |
1655 | syntax::SimpleDeclaration *InnerDeclaration, Decl *From) { |
1656 | assert(!ExternKW || ExternKW->kind() == tok::kw_extern); |
1657 | assert(TemplateKW && TemplateKW->kind() == tok::kw_template); |
1658 | Builder.markChildToken(T: ExternKW, R: syntax::NodeRole::ExternKeyword); |
1659 | Builder.markChildToken(T: TemplateKW, R: syntax::NodeRole::IntroducerKeyword); |
1660 | Builder.markChild(N: InnerDeclaration, R: syntax::NodeRole::Declaration); |
1661 | Builder.foldNode( |
1662 | Range, New: new (allocator()) syntax::ExplicitTemplateInstantiation, From); |
1663 | } |
1664 | |
1665 | syntax::TemplateDeclaration *foldTemplateDeclaration( |
1666 | ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW, |
1667 | ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) { |
1668 | assert(TemplateKW && TemplateKW->kind() == tok::kw_template); |
1669 | Builder.markChildToken(T: TemplateKW, R: syntax::NodeRole::IntroducerKeyword); |
1670 | |
1671 | auto *N = new (allocator()) syntax::TemplateDeclaration; |
1672 | Builder.foldNode(Range, New: N, From); |
1673 | Builder.markChild(N, R: syntax::NodeRole::Declaration); |
1674 | return N; |
1675 | } |
1676 | |
1677 | /// A small helper to save some typing. |
1678 | llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } |
1679 | |
1680 | syntax::TreeBuilder &Builder; |
1681 | const ASTContext &Context; |
1682 | }; |
1683 | } // namespace |
1684 | |
1685 | void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) { |
1686 | DeclsWithoutSemicolons.insert(V: D); |
1687 | } |
1688 | |
1689 | void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { |
1690 | if (Loc.isInvalid()) |
1691 | return; |
1692 | Pending.assignRole(Range: *findToken(L: Loc), Role); |
1693 | } |
1694 | |
1695 | void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) { |
1696 | if (!T) |
1697 | return; |
1698 | Pending.assignRole(Range: *T, Role: R); |
1699 | } |
1700 | |
1701 | void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) { |
1702 | assert(N); |
1703 | setRole(N, R); |
1704 | } |
1705 | |
1706 | void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) { |
1707 | auto *SN = Mapping.find(P: N); |
1708 | assert(SN != nullptr); |
1709 | setRole(N: SN, R); |
1710 | } |
1711 | void syntax::TreeBuilder::markChild(NestedNameSpecifierLoc NNSLoc, NodeRole R) { |
1712 | auto *SN = Mapping.find(P: NNSLoc); |
1713 | assert(SN != nullptr); |
1714 | setRole(N: SN, R); |
1715 | } |
1716 | |
1717 | void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { |
1718 | if (!Child) |
1719 | return; |
1720 | |
1721 | syntax::Tree *ChildNode; |
1722 | if (Expr *ChildExpr = dyn_cast<Expr>(Val: Child)) { |
1723 | // This is an expression in a statement position, consume the trailing |
1724 | // semicolon and form an 'ExpressionStatement' node. |
1725 | markExprChild(Child: ChildExpr, Role: NodeRole::Expression); |
1726 | ChildNode = new (allocator()) syntax::ExpressionStatement; |
1727 | // (!) 'getStmtRange()' ensures this covers a trailing semicolon. |
1728 | Pending.foldChildren(TB: TBTM.tokenBuffer(), Tokens: getStmtRange(S: Child), Node: ChildNode); |
1729 | } else { |
1730 | ChildNode = Mapping.find(P: Child); |
1731 | } |
1732 | assert(ChildNode != nullptr); |
1733 | setRole(N: ChildNode, R: Role); |
1734 | } |
1735 | |
1736 | void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { |
1737 | if (!Child) |
1738 | return; |
1739 | Child = IgnoreImplicit(E: Child); |
1740 | |
1741 | syntax::Tree *ChildNode = Mapping.find(P: Child); |
1742 | assert(ChildNode != nullptr); |
1743 | setRole(N: ChildNode, R: Role); |
1744 | } |
1745 | |
1746 | const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { |
1747 | if (L.isInvalid()) |
1748 | return nullptr; |
1749 | auto It = LocationToToken.find(Val: L); |
1750 | assert(It != LocationToToken.end()); |
1751 | return It->second; |
1752 | } |
1753 | |
1754 | syntax::TranslationUnit *syntax::buildSyntaxTree(Arena &A, |
1755 | TokenBufferTokenManager& TBTM, |
1756 | ASTContext &Context) { |
1757 | TreeBuilder Builder(A, TBTM); |
1758 | BuildTreeVisitor(Context, Builder).TraverseAST(AST&: Context); |
1759 | return std::move(Builder).finalize(); |
1760 | } |
1761 | |