Macros.h source code [llvm_projects/clang/lib/Format/Macros.h]

1	//===--- Macros.h - Format C++ code ------------------------------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file contains the main building blocks of macro support in
11	/// clang-format.
12	///
13	/// In order to not violate the requirement that clang-format can format files
14	/// in isolation, clang-format's macro support uses expansions users provide
15	/// as part of clang-format's style configuration.
16	///
17	/// Macro definitions are of the form "MACRO(p1, p2)=p1 + p2", but only support
18	/// one level of expansion (\see MacroExpander for a full description of what
19	/// is supported).
20	///
21	/// As part of parsing, clang-format uses the MacroExpander to expand the
22	/// spelled token streams into expanded token streams when it encounters a
23	/// macro call. The UnwrappedLineParser continues to parse UnwrappedLines
24	/// from the expanded token stream.
25	/// After the expanded unwrapped lines are parsed, the MacroCallReconstructor
26	/// matches the spelled token stream into unwrapped lines that best resemble the
27	/// structure of the expanded unwrapped lines. These reconstructed unwrapped
28	/// lines are aliasing the tokens in the expanded token stream, so that token
29	/// annotations will be reused when formatting the spelled macro calls.
30	///
31	/// When formatting, clang-format annotates and formats the expanded unwrapped
32	/// lines first, determining the token types. Next, it formats the spelled
33	/// unwrapped lines, keeping the token types fixed, while allowing other
34	/// formatting decisions to change.
35	///
36	//===----------------------------------------------------------------------===//
37
38	#ifndef CLANG_LIB_FORMAT_MACROS_H
39	#define CLANG_LIB_FORMAT_MACROS_H
40
41	#include <list>
42
43	#include "FormatToken.h"
44	#include "llvm/ADT/DenseMap.h"
45
46	namespace clang {
47	namespace format {
48
49	struct UnwrappedLine;
50	struct UnwrappedLineNode;
51
52	/// Takes a set of macro definitions as strings and allows expanding calls to
53	/// those macros.
54	///
55	/// For example:
56	/// Definition: A(x, y)=x + y
57	/// Call : A(int a = 1, 2)
58	/// Expansion : int a = 1 + 2
59	///
60	/// Expansion does not check arity of the definition.
61	/// If fewer arguments than expected are provided, the remaining parameters
62	/// are considered empty:
63	/// Call : A(a)
64	/// Expansion: a +
65	/// If more arguments than expected are provided, they will be discarded.
66	///
67	/// The expander does not support:
68	/// - recursive expansion
69	/// - stringification
70	/// - concatenation
71	/// - variadic macros
72	///
73	/// Furthermore, only a single expansion of each macro argument is supported,
74	/// so that we cannot get conflicting formatting decisions from different
75	/// expansions.
76	/// Definition: A(x)=x+x
77	/// Call : A(id)
78	/// Expansion : id+x
79	///
80	class MacroExpander {
81	public:
82	using ArgsList = ArrayRef<SmallVector<FormatToken *, `8`>>;
83
84	/// Construct a macro expander from a set of macro definitions.
85	/// Macro definitions must be encoded as UTF-8.
86	///
87	/// Each entry in \p Macros must conform to the following simple
88	/// macro-definition language:
89	/// <definition> ::= <id> <expansion> \| <id> "(" <params> ")" <expansion>
90	/// <params> ::= <id-list> \| ""
91	/// <id-list> ::= <id> \| <id> "," <params>
92	/// <expansion> ::= "=" <tail> \| <eof>
93	/// <tail> ::= <tok> <tail> \| <eof>
94	///
95	/// Macros that cannot be parsed will be silently discarded.
96	///
97	MacroExpander(const std::vector<std::string> &Macros,
98	SourceManager &SourceMgr, const FormatStyle &Style,
99	llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
100	IdentifierTable &IdentTable);
101	~MacroExpander();
102
103	/// Returns whether any macro \p Name is defined, regardless of overloads.
104	bool defined(StringRef Name) const;
105
106	/// Returns whetherh there is an object-like overload, i.e. where the macro
107	/// has no arguments and should not consume subsequent parentheses.
108	bool objectLike(StringRef Name) const;
109
110	/// Returns whether macro \p Name provides an overload with the given arity.
111	bool hasArity(StringRef Name, unsigned Arity) const;
112
113	/// Returns the expanded stream of format tokens for \p ID, where
114	/// each element in \p Args is a positional argument to the macro call.
115	/// If \p Args is not set, the object-like overload is used.
116	/// If \p Args is set, the overload with the arity equal to \c Args.size() is
117	/// used.
118	SmallVector<FormatToken *, `8`>
119	expand(FormatToken ID, std::optional<ArgsList> OptionalArgs) const*;
120
121	private:
122	struct Definition;
123	class DefinitionParser;
124
125	void parseDefinition(const std::string &Macro);
126
127	SourceManager &SourceMgr;
128	const FormatStyle &Style;
129	llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
130	IdentifierTable &IdentTable;
131	SmallVector<std::unique_ptr<llvm::MemoryBuffer>> Buffers;
132	llvm::StringMap<llvm::DenseMap<int, Definition>> FunctionLike;
133	llvm::StringMap<Definition> ObjectLike;
134	};
135
136	/// Converts a sequence of UnwrappedLines containing expanded macros into a
137	/// single UnwrappedLine containing the macro calls. This UnwrappedLine may be
138	/// broken into child lines, in a way that best conveys the structure of the
139	/// expanded code.
140	///
141	/// In the simplest case, a spelled UnwrappedLine contains one macro, and after
142	/// expanding it we have one expanded UnwrappedLine. In general, macro
143	/// expansions can span UnwrappedLines, and multiple macros can contribute
144	/// tokens to the same line. We keep consuming expanded lines until:
145	/// all expansions that started have finished (we're not chopping any macros*
146	/// in half)
147	/// and we've reached the end of a spelled unwrapped line.*
148	///
149	/// A single UnwrappedLine represents this chunk of code.
150	///
151	/// After this point, the state of the spelled/expanded stream is "in sync"
152	/// (both at the start of an UnwrappedLine, with no macros open), so the
153	/// Reconstructor can be thrown away and parsing can continue.
154	///
155	/// Given a mapping from the macro name identifier token in the macro call
156	/// to the tokens of the macro call, for example:
157	/// CLASSA -> CLASSA({public: void x();})
158	///
159	/// When getting the formatted lines of the expansion via the \c addLine method
160	/// (each '->' specifies a call to \c addLine ):
161	/// -> class A {
162	/// -> public:
163	/// -> void x();
164	/// -> };
165	///
166	/// Creates the tree of unwrapped lines containing the macro call tokens so that
167	/// the macro call tokens fit the semantic structure of the expanded formatted
168	/// lines:
169	/// -> CLASSA({
170	/// -> public:
171	/// -> void x();
172	/// -> })
173	class MacroCallReconstructor {
174	public:
175	/// Create an Reconstructor whose resulting \p UnwrappedLine will start at
176	/// \p Level, using the map from name identifier token to the corresponding
177	/// tokens of the spelled macro call.
178	MacroCallReconstructor(
179	unsigned Level,
180	const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
181	&ActiveExpansions);
182
183	/// For the given \p Line, match all occurences of tokens expanded from a
184	/// macro to unwrapped lines in the spelled macro call so that the resulting
185	/// tree of unwrapped lines best resembles the structure of unwrapped lines
186	/// passed in via \c addLine.
187	void addLine(const UnwrappedLine &Line);
188
189	/// Check whether at the current state there is no open macro expansion
190	/// that needs to be processed to finish an macro call.
191	/// Only when \c finished() is true, \c takeResult() can be called to retrieve
192	/// the resulting \c UnwrappedLine.
193	/// If there are multiple subsequent macro calls within an unwrapped line in
194	/// the spelled token stream, the calling code may also continue to call
195	/// \c addLine() when \c finished() is true.
196	bool finished() const { return ActiveExpansions.empty(); }
197
198	/// Retrieve the formatted \c UnwrappedLine containing the orginal
199	/// macro calls, formatted according to the expanded token stream received
200	/// via \c addLine().
201	/// Generally, this line tries to have the same structure as the expanded,
202	/// formatted unwrapped lines handed in via \c addLine(), with the exception
203	/// that for multiple top-level lines, each subsequent line will be the
204	/// child of the last token in its predecessor. This representation is chosen
205	/// because it is a precondition to the formatter that we get what looks like
206	/// a single statement in a single \c UnwrappedLine (i.e. matching parens).
207	///
208	/// If a token in a macro argument is a child of a token in the expansion,
209	/// the parent will be the corresponding token in the macro call.
210	/// For example:
211	/// #define C(a, b) class C { a b
212	/// C(int x;, int y;)
213	/// would expand to
214	/// class C { int x; int y;
215	/// where in a formatted line "int x;" and "int y;" would both be new separate
216	/// lines.
217	///
218	/// In the result, "int x;" will be a child of the opening parenthesis in "C("
219	/// and "int y;" will be a child of the "," token:
220	/// C (
221	/// \- int x;
222	/// ,
223	/// \- int y;
224	/// )
225	UnwrappedLine takeResult() &&;
226
227	private:
228	void add(FormatToken Token, FormatToken ExpandedParent, bool First,
229	unsigned Level);
230	void prepareParent(FormatToken ExpandedParent, bool* First, unsigned Level);
231	FormatToken getParentInResult(FormatToken Parent);
232	void reconstruct(FormatToken *Token);
233	void startReconstruction(FormatToken *Token);
234	bool reconstructActiveCallUntil(FormatToken *Token);
235	void endReconstruction(FormatToken *Token);
236	bool processNextReconstructed();
237	void finalize();
238
239	struct ReconstructedLine;
240
241	void appendToken(FormatToken Token, ReconstructedLine L = nullptr);
242	UnwrappedLine createUnwrappedLine(const ReconstructedLine &Line, int Level);
243	void debug(const ReconstructedLine &Line, int Level);
244	ReconstructedLine &parentLine();
245	ReconstructedLine *currentLine();
246	void debugParentMap() const;
247
248	#ifndef NDEBUG
249	enum ReconstructorState {
250	Start, // No macro expansion was found in the input yet.
251	InProgress, // During a macro reconstruction.
252	Finalized, // Past macro reconstruction, the result is finalized.
253	};
254	ReconstructorState State = Start;
255	#endif
256
257	// Node in which we build up the resulting unwrapped line; this type is
258	// analogous to UnwrappedLineNode.
259	struct LineNode {
260	LineNode() = default;
261	LineNode(FormatToken *Tok) : Tok(Tok) {}
262	FormatToken Tok = nullptr*;
263	SmallVector<std::unique_ptr<ReconstructedLine>> Children;
264	};
265
266	// Line in which we build up the resulting unwrapped line.
267	// FIXME: Investigate changing UnwrappedLine to a pointer type and using it
268	// instead of rolling our own type.
269	struct ReconstructedLine {
270	explicit ReconstructedLine(unsigned Level) : Level(Level) {}
271	unsigned Level;
272	SmallVector<std::unique_ptr<LineNode>> Tokens;
273	};
274
275	// The line in which we collect the resulting reconstructed output.
276	// To reduce special cases in the algorithm, the first level of the line
277	// contains a single null token that has the reconstructed incoming
278	// lines as children.
279	// In the end, we stich the lines together so that each subsequent line
280	// is a child of the last token of the previous line. This is necessary
281	// in order to format the overall expression as a single logical line -
282	// if we created separate lines, we'd format them with their own top-level
283	// indent depending on the semantic structure, which is not desired.
284	ReconstructedLine Result;
285
286	// Stack of currently "open" lines, where each line's predecessor's last
287	// token is the parent token for that line.
288	SmallVector<ReconstructedLine *> ActiveReconstructedLines;
289
290	// Maps from the expanded token to the token that takes its place in the
291	// reconstructed token stream in terms of parent-child relationships.
292	// Note that it might take multiple steps to arrive at the correct
293	// parent in the output.
294	// Given: #define C(a, b) []() { a; b; }
295	// And a call: C(f(), g())
296	// The structure in the incoming formatted unwrapped line will be:
297	// []() {
298	// \|- f();
299	// \- g();
300	// }
301	// with f and g being children of the opening brace.
302	// In the reconstructed call:
303	// C(f(), g())
304	// \- f()
305	// \- g()
306	// We want f to be a child of the opening parenthesis and g to be a child
307	// of the comma token in the macro call.
308	// Thus, we map
309	// { -> (
310	// and add
311	// ( -> ,
312	// once we're past the comma in the reconstruction.
313	llvm::DenseMap<FormatToken , FormatToken >
314	SpelledParentToReconstructedParent;
315
316	// Keeps track of a single expansion while we're reconstructing tokens it
317	// generated.
318	struct Expansion {
319	// The identifier token of the macro call.
320	FormatToken *ID;
321	// Our current position in the reconstruction.
322	std::list<UnwrappedLineNode>::iterator SpelledI;
323	// The end of the reconstructed token sequence.
324	std::list<UnwrappedLineNode>::iterator SpelledE;
325	};
326
327	// Stack of macro calls for which we're in the middle of an expansion.
328	SmallVector<Expansion> ActiveExpansions;
329
330	struct MacroCallState {
331	MacroCallState(ReconstructedLine Line, FormatToken ParentLastToken,
332	FormatToken *MacroCallLParen);
333
334	ReconstructedLine *Line;
335
336	// The last token in the parent line or expansion, or nullptr if the macro
337	// expansion is on a top-level line.
338	//
339	// For example, in the macro call:
340	// auto f = []() { ID(1); };
341	// The MacroCallState for ID will have '{' as ParentLastToken.
342	//
343	// In the macro call:
344	// ID(ID(void f()));
345	// The MacroCallState of the outer ID will have nullptr as ParentLastToken,
346	// while the MacroCallState for the inner ID will have the '(' of the outer
347	// ID as ParentLastToken.
348	//
349	// In the macro call:
350	// ID2(a, ID(b));
351	// The MacroCallState of ID will have ',' as ParentLastToken.
352	FormatToken *ParentLastToken;
353
354	// The l_paren of this MacroCallState's macro call.
355	FormatToken *MacroCallLParen;
356	};
357
358	// Keeps track of the lines into which the opening brace/parenthesis &
359	// argument separating commas for each level in the macro call go in order to
360	// put the corresponding closing brace/parenthesis into the same line in the
361	// output and keep track of which parents in the expanded token stream map to
362	// which tokens in the reconstructed stream.
363	// When an opening brace/parenthesis has children, we want the structure of
364	// the output line to be:
365	// \|- MACRO
366	// \|- (
367	// \| \- <argument>
368	// \|- ,
369	// \| \- <argument>
370	// \- )
371	SmallVector<MacroCallState> MacroCallStructure;
372
373	// Maps from identifier of the macro call to an unwrapped line containing
374	// all tokens of the macro call.
375	const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
376	&IdToReconstructed;
377	};
378
379	} // namespace format
380	} // namespace clang
381
382	#endif
383

Browse the source code of llvm_projects/clang/lib/Format/Macros.h