1 | //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file implements a token annotator, i.e. creates |
11 | /// \c AnnotatedTokens out of \c FormatTokens with required extra information. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H |
16 | #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H |
17 | |
18 | #include "UnwrappedLineParser.h" |
19 | |
20 | namespace clang { |
21 | namespace format { |
22 | |
23 | enum LineType { |
24 | LT_Invalid, |
25 | // Contains public/private/protected followed by TT_InheritanceColon. |
26 | LT_AccessModifier, |
27 | LT_ImportStatement, |
28 | LT_ObjCDecl, // An @interface, @implementation, or @protocol line. |
29 | LT_ObjCMethodDecl, |
30 | LT_ObjCProperty, // An @property line. |
31 | LT_Other, |
32 | LT_PreprocessorDirective, |
33 | LT_VirtualFunctionDecl, |
34 | LT_ArrayOfStructInitializer, |
35 | , |
36 | }; |
37 | |
38 | enum ScopeType { |
39 | // Contained in class declaration/definition. |
40 | ST_Class, |
41 | // Contained within function definition. |
42 | ST_Function, |
43 | // Contained within other scope block (loop, if/else, etc). |
44 | ST_Other, |
45 | }; |
46 | |
47 | class AnnotatedLine { |
48 | public: |
49 | AnnotatedLine(const UnwrappedLine &Line) |
50 | : First(Line.Tokens.front().Tok), Type(LT_Other), Level(Line.Level), |
51 | PPLevel(Line.PPLevel), |
52 | MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex), |
53 | MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex), |
54 | InPPDirective(Line.InPPDirective), |
55 | InPragmaDirective(Line.InPragmaDirective), |
56 | InMacroBody(Line.InMacroBody), |
57 | MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), |
58 | IsMultiVariableDeclStmt(false), Affected(false), |
59 | LeadingEmptyLinesAffected(false), ChildrenAffected(false), |
60 | ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation), |
61 | FirstStartColumn(Line.FirstStartColumn) { |
62 | assert(!Line.Tokens.empty()); |
63 | |
64 | // Calculate Next and Previous for all tokens. Note that we must overwrite |
65 | // Next and Previous for every token, as previous formatting runs might have |
66 | // left them in a different state. |
67 | First->Previous = nullptr; |
68 | FormatToken *Current = First; |
69 | addChildren(Node: Line.Tokens.front(), Current); |
70 | for (const UnwrappedLineNode &Node : llvm::drop_begin(RangeOrContainer: Line.Tokens)) { |
71 | if (Node.Tok->MacroParent) |
72 | ContainsMacroCall = true; |
73 | Current->Next = Node.Tok; |
74 | Node.Tok->Previous = Current; |
75 | Current = Current->Next; |
76 | addChildren(Node, Current); |
77 | // FIXME: if we add children, previous will point to the token before |
78 | // the children; changing this requires significant changes across |
79 | // clang-format. |
80 | } |
81 | Last = Current; |
82 | Last->Next = nullptr; |
83 | } |
84 | |
85 | void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) { |
86 | Current->Children.clear(); |
87 | for (const auto &Child : Node.Children) { |
88 | Children.push_back(Elt: new AnnotatedLine(Child)); |
89 | if (Children.back()->ContainsMacroCall) |
90 | ContainsMacroCall = true; |
91 | Current->Children.push_back(Elt: Children.back()); |
92 | } |
93 | } |
94 | |
95 | size_t size() const { |
96 | size_t Size = 1; |
97 | for (const auto *Child : Children) |
98 | Size += Child->size(); |
99 | return Size; |
100 | } |
101 | |
102 | ~AnnotatedLine() { |
103 | for (AnnotatedLine *Child : Children) |
104 | delete Child; |
105 | FormatToken *Current = First; |
106 | while (Current) { |
107 | Current->Children.clear(); |
108 | Current->Role.reset(); |
109 | Current = Current->Next; |
110 | } |
111 | } |
112 | |
113 | bool () const { |
114 | return First && First->is(Kind: tok::comment) && !First->getNextNonComment(); |
115 | } |
116 | |
117 | /// \c true if this line starts with the given tokens in order, ignoring |
118 | /// comments. |
119 | template <typename... Ts> bool startsWith(Ts... Tokens) const { |
120 | return First && First->startsSequence(Tokens...); |
121 | } |
122 | |
123 | /// \c true if this line ends with the given tokens in reversed order, |
124 | /// ignoring comments. |
125 | /// For example, given tokens [T1, T2, T3, ...], the function returns true if |
126 | /// this line is like "... T3 T2 T1". |
127 | template <typename... Ts> bool endsWith(Ts... Tokens) const { |
128 | return Last && Last->endsSequence(Tokens...); |
129 | } |
130 | |
131 | /// \c true if this line looks like a function definition instead of a |
132 | /// function declaration. Asserts MightBeFunctionDecl. |
133 | bool mightBeFunctionDefinition() const { |
134 | assert(MightBeFunctionDecl); |
135 | // Try to determine if the end of a stream of tokens is either the |
136 | // Definition or the Declaration for a function. It does this by looking for |
137 | // the ';' in foo(); and using that it ends with a ; to know this is the |
138 | // Definition, however the line could end with |
139 | // foo(); /* comment */ |
140 | // or |
141 | // foo(); // comment |
142 | // or |
143 | // foo() // comment |
144 | // endsWith() ignores the comment. |
145 | return !endsWith(Tokens: tok::semi); |
146 | } |
147 | |
148 | /// \c true if this line starts a namespace definition. |
149 | bool startsWithNamespace() const { |
150 | return startsWith(Tokens: tok::kw_namespace) || startsWith(Tokens: TT_NamespaceMacro) || |
151 | startsWith(Tokens: tok::kw_inline, Tokens: tok::kw_namespace) || |
152 | startsWith(Tokens: tok::kw_export, Tokens: tok::kw_namespace); |
153 | } |
154 | |
155 | FormatToken *() const { |
156 | assert(First); |
157 | return First->is(Kind: tok::comment) ? First->getNextNonComment() : First; |
158 | } |
159 | |
160 | FormatToken *() const { |
161 | assert(Last); |
162 | return Last->is(Kind: tok::comment) ? Last->getPreviousNonComment() : Last; |
163 | } |
164 | |
165 | FormatToken *First; |
166 | FormatToken *Last; |
167 | |
168 | SmallVector<AnnotatedLine *, 0> Children; |
169 | |
170 | LineType Type; |
171 | unsigned Level; |
172 | unsigned PPLevel; |
173 | size_t MatchingOpeningBlockLineIndex; |
174 | size_t MatchingClosingBlockLineIndex; |
175 | bool InPPDirective; |
176 | bool InPragmaDirective; |
177 | bool InMacroBody; |
178 | bool MustBeDeclaration; |
179 | bool MightBeFunctionDecl; |
180 | bool IsMultiVariableDeclStmt; |
181 | |
182 | /// \c True if this line contains a macro call for which an expansion exists. |
183 | bool ContainsMacroCall = false; |
184 | |
185 | /// \c True if this line should be formatted, i.e. intersects directly or |
186 | /// indirectly with one of the input ranges. |
187 | bool Affected; |
188 | |
189 | /// \c True if the leading empty lines of this line intersect with one of the |
190 | /// input ranges. |
191 | bool LeadingEmptyLinesAffected; |
192 | |
193 | /// \c True if one of this line's children intersects with an input range. |
194 | bool ChildrenAffected; |
195 | |
196 | /// \c True if breaking after last attribute group in function return type. |
197 | bool ReturnTypeWrapped; |
198 | |
199 | /// \c True if this line should be indented by ContinuationIndent in addition |
200 | /// to the normal indention level. |
201 | bool IsContinuation; |
202 | |
203 | unsigned FirstStartColumn; |
204 | |
205 | private: |
206 | // Disallow copying. |
207 | AnnotatedLine(const AnnotatedLine &) = delete; |
208 | void operator=(const AnnotatedLine &) = delete; |
209 | }; |
210 | |
211 | /// Determines extra information about the tokens comprising an |
212 | /// \c UnwrappedLine. |
213 | class TokenAnnotator { |
214 | public: |
215 | TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords) |
216 | : Style(Style), IsCpp(Style.isCpp()), |
217 | LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) { |
218 | assert(IsCpp == LangOpts.CXXOperatorNames); |
219 | } |
220 | |
221 | /// Adapts the indent levels of comment lines to the indent of the |
222 | /// subsequent line. |
223 | // FIXME: Can/should this be done in the UnwrappedLineParser? |
224 | void (SmallVectorImpl<AnnotatedLine *> &Lines) const; |
225 | |
226 | void annotate(AnnotatedLine &Line); |
227 | void calculateFormattingInformation(AnnotatedLine &Line) const; |
228 | |
229 | private: |
230 | /// Calculate the penalty for splitting before \c Tok. |
231 | unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok, |
232 | bool InFunctionDecl) const; |
233 | |
234 | bool spaceRequiredBeforeParens(const FormatToken &Right) const; |
235 | |
236 | bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, |
237 | const FormatToken &Right) const; |
238 | |
239 | bool spaceRequiredBefore(const AnnotatedLine &Line, |
240 | const FormatToken &Right) const; |
241 | |
242 | bool mustBreakBefore(const AnnotatedLine &Line, |
243 | const FormatToken &Right) const; |
244 | |
245 | bool canBreakBefore(const AnnotatedLine &Line, |
246 | const FormatToken &Right) const; |
247 | |
248 | bool mustBreakForReturnType(const AnnotatedLine &Line) const; |
249 | |
250 | void printDebugInfo(const AnnotatedLine &Line) const; |
251 | |
252 | void calculateUnbreakableTailLengths(AnnotatedLine &Line) const; |
253 | |
254 | void calculateArrayInitializerColumnList(AnnotatedLine &Line) const; |
255 | |
256 | FormatToken *calculateInitializerColumnList(AnnotatedLine &Line, |
257 | FormatToken *CurrentToken, |
258 | unsigned Depth) const; |
259 | FormatStyle::PointerAlignmentStyle |
260 | getTokenReferenceAlignment(const FormatToken &PointerOrReference) const; |
261 | |
262 | FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment( |
263 | const FormatToken &PointerOrReference) const; |
264 | |
265 | const FormatStyle &Style; |
266 | |
267 | bool IsCpp; |
268 | LangOptions LangOpts; |
269 | |
270 | const AdditionalKeywords &Keywords; |
271 | |
272 | SmallVector<ScopeType> Scopes; |
273 | }; |
274 | |
275 | } // end namespace format |
276 | } // end namespace clang |
277 | |
278 | #endif |
279 | |