1//===- ClangSyntaxEmitter.cpp - Generate clang Syntax Tree nodes ----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6// See https://llvm.org/LICENSE.txt for license information.
7// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8//
9//===----------------------------------------------------------------------===//
10//
11// These backends consume the definitions of Syntax Tree nodes.
12// See clang/include/clang/Tooling/Syntax/{Syntax,Nodes}.td
13//
14// The -gen-clang-syntax-node-list backend produces a .inc with macro calls
15// NODE(Kind, BaseKind)
16// ABSTRACT_NODE(Type, Base, FirstKind, LastKind)
17// similar to those for AST nodes such as AST/DeclNodes.inc.
18//
19// The -gen-clang-syntax-node-classes backend produces definitions for the
20// syntax::Node subclasses (except those marked as External).
21//
22// In future, another backend will encode the structure of the various node
23// types in tables so their invariants can be checked and enforced.
24//
25//===----------------------------------------------------------------------===//
26#include "TableGenBackends.h"
27
28#include <deque>
29
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/Support/FormatVariadic.h"
32#include "llvm/Support/raw_ostream.h"
33#include "llvm/TableGen/Record.h"
34#include "llvm/TableGen/TableGenBackend.h"
35
36namespace {
37using llvm::formatv;
38
39// The class hierarchy of Node types.
40// We assemble this in order to be able to define the NodeKind enum in a
41// stable and useful way, where abstract Node subclasses correspond to ranges.
42class Hierarchy {
43public:
44 Hierarchy(const llvm::RecordKeeper &Records) {
45 for (llvm::Record *T : Records.getAllDerivedDefinitions(ClassName: "NodeType"))
46 add(R: T);
47 for (llvm::Record *Derived : Records.getAllDerivedDefinitions(ClassName: "NodeType"))
48 if (llvm::Record *Base = Derived->getValueAsOptionalDef(FieldName: "base"))
49 link(Derived, Base);
50 for (NodeType &N : AllTypes) {
51 llvm::sort(C&: N.Derived, Comp: [](const NodeType *L, const NodeType *R) {
52 return L->Record->getName() < R->Record->getName();
53 });
54 // Alternatives nodes must have subclasses, External nodes may do.
55 assert(N.Record->isSubClassOf("Alternatives") ||
56 N.Record->isSubClassOf("External") || N.Derived.empty());
57 assert(!N.Record->isSubClassOf("Alternatives") || !N.Derived.empty());
58 }
59 }
60
61 struct NodeType {
62 const llvm::Record *Record = nullptr;
63 const NodeType *Base = nullptr;
64 std::vector<const NodeType *> Derived;
65 llvm::StringRef name() const { return Record->getName(); }
66 };
67
68 NodeType &get(llvm::StringRef Name = "Node") {
69 auto NI = ByName.find(Val: Name);
70 assert(NI != ByName.end() && "no such node");
71 return *NI->second;
72 }
73
74 // Traverse the hierarchy in pre-order (base classes before derived).
75 void visit(llvm::function_ref<void(const NodeType &)> CB,
76 const NodeType *Start = nullptr) {
77 if (Start == nullptr)
78 Start = &get();
79 CB(*Start);
80 for (const NodeType *D : Start->Derived)
81 visit(CB, Start: D);
82 }
83
84private:
85 void add(const llvm::Record *R) {
86 AllTypes.emplace_back();
87 AllTypes.back().Record = R;
88 bool Inserted = ByName.try_emplace(Key: R->getName(), Args: &AllTypes.back()).second;
89 assert(Inserted && "Duplicate node name");
90 (void)Inserted;
91 }
92
93 void link(const llvm::Record *Derived, const llvm::Record *Base) {
94 auto &CN = get(Name: Derived->getName()), &PN = get(Name: Base->getName());
95 assert(CN.Base == nullptr && "setting base twice");
96 PN.Derived.push_back(x: &CN);
97 CN.Base = &PN;
98 }
99
100 std::deque<NodeType> AllTypes;
101 llvm::DenseMap<llvm::StringRef, NodeType *> ByName;
102};
103
104const Hierarchy::NodeType &firstConcrete(const Hierarchy::NodeType &N) {
105 return N.Derived.empty() ? N : firstConcrete(N: *N.Derived.front());
106}
107const Hierarchy::NodeType &lastConcrete(const Hierarchy::NodeType &N) {
108 return N.Derived.empty() ? N : lastConcrete(N: *N.Derived.back());
109}
110
111struct SyntaxConstraint {
112 SyntaxConstraint(const llvm::Record &R) {
113 if (R.isSubClassOf(Name: "Optional")) {
114 *this = SyntaxConstraint(*R.getValueAsDef(FieldName: "inner"));
115 } else if (R.isSubClassOf(Name: "AnyToken")) {
116 NodeType = "Leaf";
117 } else if (R.isSubClassOf(Name: "NodeType")) {
118 NodeType = R.getName().str();
119 } else {
120 assert(false && "Unhandled Syntax kind");
121 }
122 }
123
124 std::string NodeType;
125 // optional and leaf types also go here, once we want to use them.
126};
127
128} // namespace
129
130void clang::EmitClangSyntaxNodeList(llvm::RecordKeeper &Records,
131 llvm::raw_ostream &OS) {
132 llvm::emitSourceFileHeader(Desc: "Syntax tree node list", OS, Record: Records);
133 Hierarchy H(Records);
134 OS << R"cpp(
135#ifndef NODE
136#define NODE(Kind, Base)
137#endif
138
139#ifndef CONCRETE_NODE
140#define CONCRETE_NODE(Kind, Base) NODE(Kind, Base)
141#endif
142
143#ifndef ABSTRACT_NODE
144#define ABSTRACT_NODE(Kind, Base, First, Last) NODE(Kind, Base)
145#endif
146
147)cpp";
148 H.visit(CB: [&](const Hierarchy::NodeType &N) {
149 // Don't emit ABSTRACT_NODE for node itself, which has no parent.
150 if (N.Base == nullptr)
151 return;
152 if (N.Derived.empty())
153 OS << formatv(Fmt: "CONCRETE_NODE({0},{1})\n", Vals: N.name(), Vals: N.Base->name());
154 else
155 OS << formatv(Fmt: "ABSTRACT_NODE({0},{1},{2},{3})\n", Vals: N.name(),
156 Vals: N.Base->name(), Vals: firstConcrete(N).name(),
157 Vals: lastConcrete(N).name());
158 });
159 OS << R"cpp(
160#undef NODE
161#undef CONCRETE_NODE
162#undef ABSTRACT_NODE
163)cpp";
164}
165
166// Format a documentation string as a C++ comment.
167// Trims leading whitespace handling since comments come from a TableGen file:
168// documentation = [{
169// This is a widget. Example:
170// widget.explode()
171// }];
172// and should be formatted as:
173// /// This is a widget. Example:
174// /// widget.explode()
175// Leading and trailing whitespace lines are stripped.
176// The indentation of the first line is stripped from all lines.
177static void printDoc(llvm::StringRef Doc, llvm::raw_ostream &OS) {
178 Doc = Doc.rtrim();
179 llvm::StringRef Line;
180 while (Line.trim().empty() && !Doc.empty())
181 std::tie(args&: Line, args&: Doc) = Doc.split(Separator: '\n');
182 llvm::StringRef Indent = Line.take_while(F: llvm::isSpace);
183 for (; !Line.empty() || !Doc.empty(); std::tie(args&: Line, args&: Doc) = Doc.split(Separator: '\n')) {
184 Line.consume_front(Prefix: Indent);
185 OS << "/// " << Line << "\n";
186 }
187}
188
189void clang::EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records,
190 llvm::raw_ostream &OS) {
191 llvm::emitSourceFileHeader(Desc: "Syntax tree node list", OS, Record: Records);
192 Hierarchy H(Records);
193
194 OS << "\n// Forward-declare node types so we don't have to carefully "
195 "sequence definitions.\n";
196 H.visit(CB: [&](const Hierarchy::NodeType &N) {
197 OS << "class " << N.name() << ";\n";
198 });
199
200 OS << "\n// Node definitions\n\n";
201 H.visit(CB: [&](const Hierarchy::NodeType &N) {
202 if (N.Record->isSubClassOf(Name: "External"))
203 return;
204 printDoc(Doc: N.Record->getValueAsString(FieldName: "documentation"), OS);
205 OS << formatv(Fmt: "class {0}{1} : public {2} {{\n", Vals: N.name(),
206 Vals: N.Derived.empty() ? " final" : "", Vals: N.Base->name());
207
208 // Constructor.
209 if (N.Derived.empty())
210 OS << formatv(Fmt: "public:\n {0}() : {1}(NodeKind::{0}) {{}\n", Vals: N.name(),
211 Vals: N.Base->name());
212 else
213 OS << formatv(Fmt: "protected:\n {0}(NodeKind K) : {1}(K) {{}\npublic:\n",
214 Vals: N.name(), Vals: N.Base->name());
215
216 if (N.Record->isSubClassOf(Name: "Sequence")) {
217 // Getters for sequence elements.
218 for (const auto &C : N.Record->getValueAsListOfDefs(FieldName: "children")) {
219 assert(C->isSubClassOf("Role"));
220 llvm::StringRef Role = C->getValueAsString(FieldName: "role");
221 SyntaxConstraint Constraint(*C->getValueAsDef(FieldName: "syntax"));
222 for (const char *Const : {"", "const "})
223 OS << formatv(
224 Fmt: " {2}{1} *get{0}() {2} {{\n"
225 " return llvm::cast_or_null<{1}>(findChild(NodeRole::{0}));\n"
226 " }\n",
227 Vals&: Role, Vals&: Constraint.NodeType, Vals&: Const);
228 }
229 }
230
231 // classof. FIXME: move definition inline once ~all nodes are generated.
232 OS << " static bool classof(const Node *N);\n";
233
234 OS << "};\n\n";
235 });
236}
237