1//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a token annotator, i.e. creates
11/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17
18#include "UnwrappedLineParser.h"
19
20namespace clang {
21namespace format {
22
23enum LineType {
24 LT_Invalid,
25 // Contains public/private/protected followed by TT_InheritanceColon.
26 LT_AccessModifier,
27 LT_ImportStatement,
28 LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
29 LT_ObjCMethodDecl,
30 LT_ObjCProperty, // An @property line.
31 LT_Other,
32 LT_PreprocessorDirective,
33 LT_VirtualFunctionDecl,
34 LT_ArrayOfStructInitializer,
35 LT_CommentAbovePPDirective,
36};
37
38enum ScopeType {
39 // Contained in class declaration/definition.
40 ST_Class,
41 // Contained within function definition.
42 ST_Function,
43 // Contained within other scope block (loop, if/else, etc).
44 ST_Other,
45};
46
47class AnnotatedLine {
48public:
49 AnnotatedLine(const UnwrappedLine &Line)
50 : First(Line.Tokens.front().Tok), Type(LT_Other), Level(Line.Level),
51 PPLevel(Line.PPLevel),
52 MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
53 MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex),
54 InPPDirective(Line.InPPDirective),
55 InPragmaDirective(Line.InPragmaDirective),
56 InMacroBody(Line.InMacroBody),
57 MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
58 IsMultiVariableDeclStmt(false), Affected(false),
59 LeadingEmptyLinesAffected(false), ChildrenAffected(false),
60 ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation),
61 FirstStartColumn(Line.FirstStartColumn) {
62 assert(!Line.Tokens.empty());
63
64 // Calculate Next and Previous for all tokens. Note that we must overwrite
65 // Next and Previous for every token, as previous formatting runs might have
66 // left them in a different state.
67 First->Previous = nullptr;
68 FormatToken *Current = First;
69 addChildren(Node: Line.Tokens.front(), Current);
70 for (const UnwrappedLineNode &Node : llvm::drop_begin(RangeOrContainer: Line.Tokens)) {
71 if (Node.Tok->MacroParent)
72 ContainsMacroCall = true;
73 Current->Next = Node.Tok;
74 Node.Tok->Previous = Current;
75 Current = Current->Next;
76 addChildren(Node, Current);
77 // FIXME: if we add children, previous will point to the token before
78 // the children; changing this requires significant changes across
79 // clang-format.
80 }
81 Last = Current;
82 Last->Next = nullptr;
83 }
84
85 void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) {
86 Current->Children.clear();
87 for (const auto &Child : Node.Children) {
88 Children.push_back(Elt: new AnnotatedLine(Child));
89 if (Children.back()->ContainsMacroCall)
90 ContainsMacroCall = true;
91 Current->Children.push_back(Elt: Children.back());
92 }
93 }
94
95 size_t size() const {
96 size_t Size = 1;
97 for (const auto *Child : Children)
98 Size += Child->size();
99 return Size;
100 }
101
102 ~AnnotatedLine() {
103 for (AnnotatedLine *Child : Children)
104 delete Child;
105 FormatToken *Current = First;
106 while (Current) {
107 Current->Children.clear();
108 Current->Role.reset();
109 Current = Current->Next;
110 }
111 }
112
113 bool isComment() const {
114 return First && First->is(Kind: tok::comment) && !First->getNextNonComment();
115 }
116
117 /// \c true if this line starts with the given tokens in order, ignoring
118 /// comments.
119 template <typename... Ts> bool startsWith(Ts... Tokens) const {
120 return First && First->startsSequence(Tokens...);
121 }
122
123 /// \c true if this line ends with the given tokens in reversed order,
124 /// ignoring comments.
125 /// For example, given tokens [T1, T2, T3, ...], the function returns true if
126 /// this line is like "... T3 T2 T1".
127 template <typename... Ts> bool endsWith(Ts... Tokens) const {
128 return Last && Last->endsSequence(Tokens...);
129 }
130
131 /// \c true if this line looks like a function definition instead of a
132 /// function declaration. Asserts MightBeFunctionDecl.
133 bool mightBeFunctionDefinition() const {
134 assert(MightBeFunctionDecl);
135 // Try to determine if the end of a stream of tokens is either the
136 // Definition or the Declaration for a function. It does this by looking for
137 // the ';' in foo(); and using that it ends with a ; to know this is the
138 // Definition, however the line could end with
139 // foo(); /* comment */
140 // or
141 // foo(); // comment
142 // or
143 // foo() // comment
144 // endsWith() ignores the comment.
145 return !endsWith(Tokens: tok::semi);
146 }
147
148 /// \c true if this line starts a namespace definition.
149 bool startsWithNamespace() const {
150 return startsWith(Tokens: tok::kw_namespace) || startsWith(Tokens: TT_NamespaceMacro) ||
151 startsWith(Tokens: tok::kw_inline, Tokens: tok::kw_namespace) ||
152 startsWith(Tokens: tok::kw_export, Tokens: tok::kw_namespace);
153 }
154
155 FormatToken *getFirstNonComment() const {
156 assert(First);
157 return First->is(Kind: tok::comment) ? First->getNextNonComment() : First;
158 }
159
160 FormatToken *getLastNonComment() const {
161 assert(Last);
162 return Last->is(Kind: tok::comment) ? Last->getPreviousNonComment() : Last;
163 }
164
165 FormatToken *First;
166 FormatToken *Last;
167
168 SmallVector<AnnotatedLine *, 0> Children;
169
170 LineType Type;
171 unsigned Level;
172 unsigned PPLevel;
173 size_t MatchingOpeningBlockLineIndex;
174 size_t MatchingClosingBlockLineIndex;
175 bool InPPDirective;
176 bool InPragmaDirective;
177 bool InMacroBody;
178 bool MustBeDeclaration;
179 bool MightBeFunctionDecl;
180 bool IsMultiVariableDeclStmt;
181
182 /// \c True if this line contains a macro call for which an expansion exists.
183 bool ContainsMacroCall = false;
184
185 /// \c True if this line should be formatted, i.e. intersects directly or
186 /// indirectly with one of the input ranges.
187 bool Affected;
188
189 /// \c True if the leading empty lines of this line intersect with one of the
190 /// input ranges.
191 bool LeadingEmptyLinesAffected;
192
193 /// \c True if one of this line's children intersects with an input range.
194 bool ChildrenAffected;
195
196 /// \c True if breaking after last attribute group in function return type.
197 bool ReturnTypeWrapped;
198
199 /// \c True if this line should be indented by ContinuationIndent in addition
200 /// to the normal indention level.
201 bool IsContinuation;
202
203 unsigned FirstStartColumn;
204
205private:
206 // Disallow copying.
207 AnnotatedLine(const AnnotatedLine &) = delete;
208 void operator=(const AnnotatedLine &) = delete;
209};
210
211/// Determines extra information about the tokens comprising an
212/// \c UnwrappedLine.
213class TokenAnnotator {
214public:
215 TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
216 : Style(Style), IsCpp(Style.isCpp()),
217 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) {
218 assert(IsCpp == LangOpts.CXXOperatorNames);
219 }
220
221 /// Adapts the indent levels of comment lines to the indent of the
222 /// subsequent line.
223 // FIXME: Can/should this be done in the UnwrappedLineParser?
224 void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const;
225
226 void annotate(AnnotatedLine &Line);
227 void calculateFormattingInformation(AnnotatedLine &Line) const;
228
229private:
230 /// Calculate the penalty for splitting before \c Tok.
231 unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
232 bool InFunctionDecl) const;
233
234 bool spaceRequiredBeforeParens(const FormatToken &Right) const;
235
236 bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
237 const FormatToken &Right) const;
238
239 bool spaceRequiredBefore(const AnnotatedLine &Line,
240 const FormatToken &Right) const;
241
242 bool mustBreakBefore(const AnnotatedLine &Line,
243 const FormatToken &Right) const;
244
245 bool canBreakBefore(const AnnotatedLine &Line,
246 const FormatToken &Right) const;
247
248 bool mustBreakForReturnType(const AnnotatedLine &Line) const;
249
250 void printDebugInfo(const AnnotatedLine &Line) const;
251
252 void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
253
254 void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
255
256 FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
257 FormatToken *CurrentToken,
258 unsigned Depth) const;
259 FormatStyle::PointerAlignmentStyle
260 getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
261
262 FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
263 const FormatToken &PointerOrReference) const;
264
265 const FormatStyle &Style;
266
267 bool IsCpp;
268 LangOptions LangOpts;
269
270 const AdditionalKeywords &Keywords;
271
272 SmallVector<ScopeType> Scopes;
273};
274
275} // end namespace format
276} // end namespace clang
277
278#endif
279