1//===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// Defines the basic structure of the syntax tree. There are two kinds of nodes:
9// - leaf nodes correspond to tokens,
10// - tree nodes correspond to language grammar constructs.
11//
12// The tree is initially built from an AST. Each node of a newly built tree
13// covers a continuous subrange of expanded tokens (i.e. tokens after
14// preprocessing), the specific tokens coverered are stored in the leaf nodes of
15// a tree. A post-order traversal of a tree will visit leaf nodes in an order
16// corresponding the original order of expanded tokens.
17//
18// This is still work in progress and highly experimental, we leave room for
19// ourselves to completely change the design and/or implementation.
20//===----------------------------------------------------------------------===//
21#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H
22#define LLVM_CLANG_TOOLING_SYNTAX_TREE_H
23
24#include "clang/Basic/TokenKinds.h"
25#include "clang/Tooling/Syntax/TokenManager.h"
26#include "llvm/ADT/iterator.h"
27#include "llvm/Support/Allocator.h"
28#include <cstdint>
29#include <vector>
30
31namespace clang {
32namespace syntax {
33
34/// A memory arena for syntax trees.
35// FIXME: use BumpPtrAllocator directly.
36class Arena {
37public:
38 llvm::BumpPtrAllocator &getAllocator() { return Allocator; }
39private:
40 /// Keeps all the allocated nodes and their intermediate data structures.
41 llvm::BumpPtrAllocator Allocator;
42};
43
44class Tree;
45class TreeBuilder;
46class FactoryImpl;
47class MutationsImpl;
48
49enum class NodeKind : uint16_t;
50enum class NodeRole : uint8_t;
51
52/// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
53/// a Tree (representing language constructrs).
54class Node {
55protected:
56 /// Newly created nodes are detached from a tree, parent and sibling links are
57 /// set when the node is added as a child to another one.
58 Node(NodeKind Kind);
59 /// Nodes are allocated on Arenas; the destructor is never called.
60 ~Node() = default;
61
62public:
63 /// Nodes cannot simply be copied without violating tree invariants.
64 Node(const Node &) = delete;
65 Node &operator=(const Node &) = delete;
66 /// Idiomatically, nodes are allocated on an Arena and never moved.
67 Node(Node &&) = delete;
68 Node &operator=(Node &&) = delete;
69
70 NodeKind getKind() const { return static_cast<NodeKind>(Kind); }
71 NodeRole getRole() const { return static_cast<NodeRole>(Role); }
72
73 /// Whether the node is detached from a tree, i.e. does not have a parent.
74 bool isDetached() const;
75 /// Whether the node was created from the AST backed by the source code
76 /// rather than added later through mutation APIs or created with factory
77 /// functions.
78 /// When this flag is true, all subtrees are also original.
79 /// This flag is set to false on any modifications to the node or any of its
80 /// subtrees, even if this simply involves swapping existing subtrees.
81 bool isOriginal() const { return Original; }
82 /// If this function return false, the tree cannot be modified because there
83 /// is no reasonable way to produce the corresponding textual replacements.
84 /// This can happen when the node crosses macro expansion boundaries.
85 ///
86 /// Note that even if the node is not modifiable, its child nodes can be
87 /// modifiable.
88 bool canModify() const { return CanModify; }
89
90 const Tree *getParent() const { return Parent; }
91 Tree *getParent() { return Parent; }
92
93 const Node *getNextSibling() const { return NextSibling; }
94 Node *getNextSibling() { return NextSibling; }
95 const Node *getPreviousSibling() const { return PreviousSibling; }
96 Node *getPreviousSibling() { return PreviousSibling; }
97
98 /// Dumps the structure of a subtree. For debugging and testing purposes.
99 std::string dump(const TokenManager &SM) const;
100 /// Dumps the tokens forming this subtree.
101 std::string dumpTokens(const TokenManager &SM) const;
102
103 /// Asserts invariants on this node of the tree and its immediate children.
104 /// Will not recurse into the subtree. No-op if NDEBUG is set.
105 void assertInvariants() const;
106 /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set.
107 void assertInvariantsRecursive() const;
108
109private:
110 // Tree is allowed to change the Parent link and Role.
111 friend class Tree;
112 // TreeBuilder is allowed to set the Original and CanModify flags.
113 friend class TreeBuilder;
114 // MutationsImpl sets roles and CanModify flag.
115 friend class MutationsImpl;
116 // FactoryImpl sets CanModify flag.
117 friend class FactoryImpl;
118
119 void setRole(NodeRole NR);
120
121 Tree *Parent;
122 Node *NextSibling;
123 Node *PreviousSibling;
124 unsigned Kind : 16;
125 unsigned Role : 8;
126 unsigned Original : 1;
127 unsigned CanModify : 1;
128};
129
130/// A leaf node points to a single token.
131// FIXME: add TokenKind field (borrow some bits from the Node::kind).
132class Leaf final : public Node {
133public:
134 Leaf(TokenManager::Key K);
135 static bool classof(const Node *N);
136
137 TokenManager::Key getTokenKey() const { return K; }
138
139private:
140 TokenManager::Key K;
141};
142
143/// A node that has children and represents a syntactic language construct.
144class Tree : public Node {
145 /// Iterator over children (common base for const/non-const).
146 /// Not invalidated by tree mutations (holds a stable node pointer).
147 template <typename DerivedT, typename NodeT>
148 class ChildIteratorBase
149 : public llvm::iterator_facade_base<DerivedT, std::forward_iterator_tag,
150 NodeT> {
151 protected:
152 NodeT *N = nullptr;
153 using Base = ChildIteratorBase;
154
155 public:
156 ChildIteratorBase() = default;
157 explicit ChildIteratorBase(NodeT *N) : N(N) {}
158
159 friend bool operator==(const DerivedT &LHS, const DerivedT &RHS) {
160 return LHS.N == RHS.N;
161 }
162
163 NodeT &operator*() const { return *N; }
164 DerivedT &operator++() {
165 N = N->getNextSibling();
166 return *static_cast<DerivedT *>(this);
167 }
168
169 /// Truthy if valid (not past-the-end).
170 /// This allows: if (auto It = find_if(N.children(), ...) )
171 explicit operator bool() const { return N != nullptr; }
172 /// The element, or nullptr if past-the-end.
173 NodeT *asPointer() const { return N; }
174 };
175
176public:
177 static bool classof(const Node *N);
178
179 Node *getFirstChild() { return FirstChild; }
180 const Node *getFirstChild() const { return FirstChild; }
181 Node *getLastChild() { return LastChild; }
182 const Node *getLastChild() const { return LastChild; }
183
184 const Leaf *findFirstLeaf() const;
185 Leaf *findFirstLeaf() {
186 return const_cast<Leaf *>(const_cast<const Tree *>(this)->findFirstLeaf());
187 }
188
189 const Leaf *findLastLeaf() const;
190 Leaf *findLastLeaf() {
191 return const_cast<Leaf *>(const_cast<const Tree *>(this)->findLastLeaf());
192 }
193
194 /// child_iterator is not invalidated by mutations.
195 struct ChildIterator : ChildIteratorBase<ChildIterator, Node> {
196 using Base::ChildIteratorBase;
197 };
198 struct ConstChildIterator
199 : ChildIteratorBase<ConstChildIterator, const Node> {
200 using Base::ChildIteratorBase;
201 ConstChildIterator() = default;
202 ConstChildIterator(const ChildIterator &I) : Base(I.asPointer()) {}
203 };
204
205 llvm::iterator_range<ChildIterator> getChildren() {
206 return {ChildIterator(getFirstChild()), ChildIterator()};
207 }
208 llvm::iterator_range<ConstChildIterator> getChildren() const {
209 return {ConstChildIterator(getFirstChild()), ConstChildIterator()};
210 }
211
212 /// Find the first node with a corresponding role.
213 const Node *findChild(NodeRole R) const;
214 Node *findChild(NodeRole R) {
215 return const_cast<Node *>(const_cast<const Tree *>(this)->findChild(R));
216 }
217
218protected:
219 using Node::Node;
220
221private:
222 /// Append \p Child to the list of children and sets the parent pointer.
223 /// A very low-level operation that does not check any invariants, only used
224 /// by TreeBuilder and FactoryImpl.
225 /// EXPECTS: Role != Detached.
226 void appendChildLowLevel(Node *Child, NodeRole Role);
227 /// Similar but prepends.
228 void prependChildLowLevel(Node *Child, NodeRole Role);
229
230 /// Like the previous overloads, but does not set role for \p Child.
231 /// EXPECTS: Child->Role != Detached
232 void appendChildLowLevel(Node *Child);
233 void prependChildLowLevel(Node *Child);
234 friend class TreeBuilder;
235 friend class FactoryImpl;
236
237 /// Replace a range of children [Begin, End) with a list of
238 /// new nodes starting at \p New.
239 /// Only used by MutationsImpl to implement higher-level mutation operations.
240 /// (!) \p New can be null to model removal of the child range.
241 /// (!) \p End can be null to model one past the end.
242 /// (!) \p Begin can be null to model an append.
243 void replaceChildRangeLowLevel(Node *Begin, Node *End, Node *New);
244 friend class MutationsImpl;
245
246 Node *FirstChild = nullptr;
247 Node *LastChild = nullptr;
248};
249
250/// A list of Elements separated or terminated by a fixed token.
251///
252/// This type models the following grammar construct:
253/// delimited-list(element, delimiter, termination, canBeEmpty)
254class List : public Tree {
255public:
256 template <typename Element> struct ElementAndDelimiter {
257 Element *element;
258 Leaf *delimiter;
259 };
260
261 enum class TerminationKind {
262 Terminated,
263 MaybeTerminated,
264 Separated,
265 };
266
267 using Tree::Tree;
268 static bool classof(const Node *N);
269 /// Returns the elements and corresponding delimiters. Missing elements
270 /// and delimiters are represented as null pointers.
271 ///
272 /// For example, in a separated list:
273 /// "a, b, c" <=> [("a" , ","), ("b" , "," ), ("c" , null)]
274 /// "a, , c" <=> [("a" , ","), (null, "," ), ("c" , null)]
275 /// "a, b c" <=> [("a" , ","), ("b" , null), ("c" , null)]
276 /// "a, b," <=> [("a" , ","), ("b" , "," ), (null, null)]
277 ///
278 /// In a terminated or maybe-terminated list:
279 /// "a; b; c;" <=> [("a" , ";"), ("b" , ";" ), ("c" , ";" )]
280 /// "a; ; c;" <=> [("a" , ";"), (null, ";" ), ("c" , ";" )]
281 /// "a; b c;" <=> [("a" , ";"), ("b" , null), ("c" , ";" )]
282 /// "a; b; c" <=> [("a" , ";"), ("b" , ";" ), ("c" , null)]
283 std::vector<ElementAndDelimiter<Node>> getElementsAsNodesAndDelimiters();
284
285 /// Returns the elements of the list. Missing elements are represented
286 /// as null pointers in the same way as in the return value of
287 /// `getElementsAsNodesAndDelimiters()`.
288 std::vector<Node *> getElementsAsNodes();
289
290 // These can't be implemented with the information we have!
291
292 /// Returns the appropriate delimiter for this list.
293 ///
294 /// Useful for discovering the correct delimiter to use when adding
295 /// elements to empty or one-element lists.
296 clang::tok::TokenKind getDelimiterTokenKind() const;
297
298 TerminationKind getTerminationKind() const;
299
300 /// Whether this list can be empty in syntactically and semantically correct
301 /// code.
302 ///
303 /// This list may be empty when the source code has errors even if
304 /// canBeEmpty() returns false.
305 bool canBeEmpty() const;
306};
307
308} // namespace syntax
309} // namespace clang
310
311#endif
312