1//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a token annotator, i.e. creates
11/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17
18#include "UnwrappedLineParser.h"
19
20namespace clang {
21namespace format {
22
23enum LineType {
24 LT_Invalid,
25 // Contains public/private/protected followed by TT_InheritanceColon.
26 LT_AccessModifier,
27 LT_ImportStatement,
28 LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
29 LT_ObjCMethodDecl,
30 LT_ObjCProperty, // An @property line.
31 LT_Other,
32 LT_PreprocessorDirective,
33 LT_VirtualFunctionDecl,
34 LT_ArrayOfStructInitializer,
35 LT_CommentAbovePPDirective,
36 LT_RequiresExpression,
37 LT_SimpleRequirement,
38};
39
40enum ScopeType {
41 // Contained in class declaration/definition.
42 ST_Class,
43 // Contained in enum declaration/definition.
44 ST_Enum,
45 // Contained in compound requirement.
46 ST_CompoundRequirement,
47 // Contained in other blocks (function, lambda, loop, if/else, child, etc).
48 ST_Other,
49};
50
51class AnnotatedLine {
52public:
53 AnnotatedLine(const UnwrappedLine &Line)
54 : First(Line.Tokens.front().Tok), Type(LT_Other), Level(Line.Level),
55 PPLevel(Line.PPLevel),
56 MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
57 MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex),
58 InPPDirective(Line.InPPDirective),
59 InPragmaDirective(Line.InPragmaDirective),
60 InMacroBody(Line.InMacroBody),
61 IsModuleOrImportDecl(Line.IsModuleOrImportDecl),
62 MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
63 IsMultiVariableDeclStmt(false), Affected(false),
64 LeadingEmptyLinesAffected(false), ChildrenAffected(false),
65 ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation),
66 FirstStartColumn(Line.FirstStartColumn) {
67 assert(!Line.Tokens.empty());
68
69 // Calculate Next and Previous for all tokens. Note that we must overwrite
70 // Next and Previous for every token, as previous formatting runs might have
71 // left them in a different state.
72 First->Previous = nullptr;
73 FormatToken *Current = First;
74 addChildren(Node: Line.Tokens.front(), Current);
75 for (const UnwrappedLineNode &Node : llvm::drop_begin(RangeOrContainer: Line.Tokens)) {
76 if (Node.Tok->MacroParent)
77 ContainsMacroCall = true;
78 Current->Next = Node.Tok;
79 Node.Tok->Previous = Current;
80 Current = Current->Next;
81 addChildren(Node, Current);
82 // FIXME: if we add children, previous will point to the token before
83 // the children; changing this requires significant changes across
84 // clang-format.
85 }
86 Last = Current;
87 Last->Next = nullptr;
88 }
89
90 void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) {
91 Current->Children.clear();
92 for (const auto &Child : Node.Children) {
93 Children.push_back(Elt: new AnnotatedLine(Child));
94 if (Children.back()->ContainsMacroCall)
95 ContainsMacroCall = true;
96 Current->Children.push_back(Elt: Children.back());
97 }
98 }
99
100 size_t size() const {
101 size_t Size = 1;
102 for (const auto *Child : Children)
103 Size += Child->size();
104 return Size;
105 }
106
107 ~AnnotatedLine() {
108 for (AnnotatedLine *Child : Children)
109 delete Child;
110 FormatToken *Current = First;
111 while (Current) {
112 Current->Children.clear();
113 Current->Role.reset();
114 Current = Current->Next;
115 }
116 }
117
118 bool isComment() const {
119 return First && First->is(Kind: tok::comment) && !First->getNextNonComment();
120 }
121
122 /// \c true if this line starts with the given tokens in order, ignoring
123 /// comments.
124 template <typename... Ts> bool startsWith(Ts... Tokens) const {
125 return First && First->startsSequence(Tokens...);
126 }
127
128 /// \c true if this line ends with the given tokens in reversed order,
129 /// ignoring comments.
130 /// For example, given tokens [T1, T2, T3, ...], the function returns true if
131 /// this line is like "... T3 T2 T1".
132 template <typename... Ts> bool endsWith(Ts... Tokens) const {
133 return Last && Last->endsSequence(Tokens...);
134 }
135
136 /// \c true if this line looks like a function definition instead of a
137 /// function declaration. Asserts MightBeFunctionDecl.
138 bool mightBeFunctionDefinition() const {
139 assert(MightBeFunctionDecl);
140 // Try to determine if the end of a stream of tokens is either the
141 // Definition or the Declaration for a function. It does this by looking for
142 // the ';' in foo(); and using that it ends with a ; to know this is the
143 // Definition, however the line could end with
144 // foo(); /* comment */
145 // or
146 // foo(); // comment
147 // or
148 // foo() // comment
149 // endsWith() ignores the comment.
150 return !endsWith(Tokens: tok::semi);
151 }
152
153 /// \c true if this line starts a namespace definition.
154 bool startsWithNamespace() const {
155 return startsWith(Tokens: tok::kw_namespace) || startsWith(Tokens: TT_NamespaceMacro) ||
156 startsWith(Tokens: tok::kw_inline, Tokens: tok::kw_namespace) ||
157 startsWith(Tokens: tok::kw_export, Tokens: tok::kw_namespace);
158 }
159
160 /// \c true if this line starts a C++ export block.
161 bool startsWithExportBlock() const {
162 return startsWith(Tokens: tok::kw_export, Tokens: tok::l_brace);
163 }
164
165 FormatToken *getFirstNonComment() const {
166 assert(First);
167 return First->is(Kind: tok::comment) ? First->getNextNonComment() : First;
168 }
169
170 FormatToken *getLastNonComment() const {
171 assert(Last);
172 return Last->is(Kind: tok::comment) ? Last->getPreviousNonComment() : Last;
173 }
174
175 FormatToken *First;
176 FormatToken *Last;
177
178 SmallVector<AnnotatedLine *, 0> Children;
179
180 LineType Type;
181 unsigned Level;
182 unsigned PPLevel;
183 size_t MatchingOpeningBlockLineIndex;
184 size_t MatchingClosingBlockLineIndex;
185 bool InPPDirective;
186 bool InPragmaDirective;
187 bool InMacroBody;
188 bool IsModuleOrImportDecl;
189 bool MustBeDeclaration;
190 bool MightBeFunctionDecl;
191 bool IsMultiVariableDeclStmt;
192
193 /// \c True if this line contains a macro call for which an expansion exists.
194 bool ContainsMacroCall = false;
195
196 /// \c True if calculateFormattingInformation() has been called on this line.
197 bool Computed = false;
198
199 /// \c True if this line should be formatted, i.e. intersects directly or
200 /// indirectly with one of the input ranges.
201 bool Affected;
202
203 /// \c True if the leading empty lines of this line intersect with one of the
204 /// input ranges.
205 bool LeadingEmptyLinesAffected;
206
207 /// \c True if one of this line's children intersects with an input range.
208 bool ChildrenAffected;
209
210 /// \c True if breaking after last attribute group in function return type.
211 bool ReturnTypeWrapped;
212
213 /// \c True if this line should be indented by ContinuationIndent in addition
214 /// to the normal indention level.
215 bool IsContinuation;
216
217 unsigned FirstStartColumn;
218
219private:
220 // Disallow copying.
221 AnnotatedLine(const AnnotatedLine &) = delete;
222 void operator=(const AnnotatedLine &) = delete;
223};
224
225/// Determines extra information about the tokens comprising an
226/// \c UnwrappedLine.
227class TokenAnnotator {
228public:
229 TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
230 : Style(Style), IsCpp(Style.isCpp()),
231 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) {}
232
233 /// Adapts the indent levels of comment lines to the indent of the
234 /// subsequent line.
235 // FIXME: Can/should this be done in the UnwrappedLineParser?
236 void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const;
237
238 void annotate(AnnotatedLine &Line);
239 void calculateFormattingInformation(AnnotatedLine &Line) const;
240
241private:
242 /// Calculate the penalty for splitting before \c Tok.
243 unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
244 bool InFunctionDecl) const;
245
246 bool spaceRequiredBeforeParens(const FormatToken &Right) const;
247
248 bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
249 const FormatToken &Right) const;
250
251 bool spaceRequiredBefore(const AnnotatedLine &Line,
252 const FormatToken &Right) const;
253
254 bool mustBreakBefore(AnnotatedLine &Line, const FormatToken &Right) const;
255
256 bool canBreakBefore(const AnnotatedLine &Line,
257 const FormatToken &Right) const;
258
259 bool mustBreakForReturnType(const AnnotatedLine &Line) const;
260
261 bool mustBreakBeforeReturnType(const AnnotatedLine &Line) const;
262
263 void printDebugInfo(const AnnotatedLine &Line) const;
264
265 void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
266
267 void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
268
269 FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
270 FormatToken *CurrentToken,
271 unsigned Depth) const;
272 FormatStyle::PointerAlignmentStyle
273 getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
274
275 FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
276 const FormatToken &PointerOrReference) const;
277
278 const FormatStyle &Style;
279
280 bool IsCpp;
281 LangOptions LangOpts;
282
283 const AdditionalKeywords &Keywords;
284
285 SmallVector<ScopeType> Scopes, MacroBodyScopes;
286};
287
288} // end namespace format
289} // end namespace clang
290
291#endif
292