1 | //===--- FormatToken.cpp - Format C++ code --------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file implements specific functions of \c FormatTokens and their |
11 | /// roles. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "FormatToken.h" |
16 | #include "ContinuationIndenter.h" |
17 | #include "llvm/ADT/SmallVector.h" |
18 | #include "llvm/Support/Debug.h" |
19 | #include <climits> |
20 | |
21 | namespace clang { |
22 | namespace format { |
23 | |
24 | const char *getTokenTypeName(TokenType Type) { |
25 | static const char *const TokNames[] = { |
26 | #define TYPE(X) #X, |
27 | LIST_TOKEN_TYPES |
28 | #undef TYPE |
29 | nullptr}; |
30 | |
31 | if (Type < NUM_TOKEN_TYPES) |
32 | return TokNames[Type]; |
33 | llvm_unreachable("unknown TokenType" ); |
34 | return nullptr; |
35 | } |
36 | |
37 | // Sorted common C++ non-keyword types. |
38 | static SmallVector<StringRef> CppNonKeywordTypes = { |
39 | "clock_t" , "int16_t" , "int32_t" , "int64_t" , "int8_t" , |
40 | "intptr_t" , "ptrdiff_t" , "size_t" , "time_t" , "uint16_t" , |
41 | "uint32_t" , "uint64_t" , "uint8_t" , "uintptr_t" , |
42 | }; |
43 | |
44 | bool FormatToken::isTypeName(const LangOptions &LangOpts) const { |
45 | const bool IsCpp = LangOpts.CXXOperatorNames; |
46 | return is(TT: TT_TypeName) || Tok.isSimpleTypeSpecifier(LangOpts) || |
47 | (IsCpp && is(Kind: tok::identifier) && |
48 | std::binary_search(first: CppNonKeywordTypes.begin(), |
49 | last: CppNonKeywordTypes.end(), val: TokenText)); |
50 | } |
51 | |
52 | bool FormatToken::isTypeOrIdentifier(const LangOptions &LangOpts) const { |
53 | return isTypeName(LangOpts) || isOneOf(K1: tok::kw_auto, K2: tok::identifier); |
54 | } |
55 | |
56 | bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const { |
57 | assert(is(tok::r_brace)); |
58 | if (!Style.Cpp11BracedListStyle || |
59 | Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) { |
60 | return false; |
61 | } |
62 | const auto *LBrace = MatchingParen; |
63 | assert(LBrace && LBrace->is(tok::l_brace)); |
64 | if (LBrace->is(BBK: BK_BracedInit)) |
65 | return true; |
66 | if (LBrace->Previous && LBrace->Previous->is(Kind: tok::equal)) |
67 | return true; |
68 | return false; |
69 | } |
70 | |
71 | bool FormatToken::opensBlockOrBlockTypeList(const FormatStyle &Style) const { |
72 | // C# Does not indent object initialisers as continuations. |
73 | if (is(Kind: tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp()) |
74 | return true; |
75 | if (is(TT: TT_TemplateString) && opensScope()) |
76 | return true; |
77 | return is(TT: TT_ArrayInitializerLSquare) || is(TT: TT_ProtoExtensionLSquare) || |
78 | (is(Kind: tok::l_brace) && |
79 | (getBlockKind() == BK_Block || is(TT: TT_DictLiteral) || |
80 | (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || |
81 | (is(Kind: tok::less) && Style.isProto()); |
82 | } |
83 | |
84 | TokenRole::~TokenRole() {} |
85 | |
86 | void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {} |
87 | |
88 | unsigned CommaSeparatedList::formatAfterToken(LineState &State, |
89 | ContinuationIndenter *Indenter, |
90 | bool DryRun) { |
91 | if (!State.NextToken || !State.NextToken->Previous) |
92 | return 0; |
93 | |
94 | if (Formats.size() <= 1) |
95 | return 0; // Handled by formatFromToken (1) or avoid severe penalty (0). |
96 | |
97 | // Ensure that we start on the opening brace. |
98 | const FormatToken *LBrace = |
99 | State.NextToken->Previous->getPreviousNonComment(); |
100 | if (!LBrace || !LBrace->isOneOf(K1: tok::l_brace, K2: TT_ArrayInitializerLSquare) || |
101 | LBrace->is(BBK: BK_Block) || LBrace->is(TT: TT_DictLiteral) || |
102 | LBrace->Next->is(TT: TT_DesignatedInitializerPeriod)) { |
103 | return 0; |
104 | } |
105 | |
106 | // Calculate the number of code points we have to format this list. As the |
107 | // first token is already placed, we have to subtract it. |
108 | unsigned RemainingCodePoints = |
109 | Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth; |
110 | |
111 | // Find the best ColumnFormat, i.e. the best number of columns to use. |
112 | const ColumnFormat *Format = getColumnFormat(RemainingCharacters: RemainingCodePoints); |
113 | |
114 | // If no ColumnFormat can be used, the braced list would generally be |
115 | // bin-packed. Add a severe penalty to this so that column layouts are |
116 | // preferred if possible. |
117 | if (!Format) |
118 | return 10'000; |
119 | |
120 | // Format the entire list. |
121 | unsigned Penalty = 0; |
122 | unsigned Column = 0; |
123 | unsigned Item = 0; |
124 | while (State.NextToken != LBrace->MatchingParen) { |
125 | bool NewLine = false; |
126 | unsigned = 0; |
127 | |
128 | // If the previous token was one of our commas, we are now on the next item. |
129 | if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) { |
130 | if (!State.NextToken->isTrailingComment()) { |
131 | ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item]; |
132 | ++Column; |
133 | } |
134 | ++Item; |
135 | } |
136 | |
137 | if (Column == Format->Columns || State.NextToken->MustBreakBefore) { |
138 | Column = 0; |
139 | NewLine = true; |
140 | } |
141 | |
142 | // Place token using the continuation indenter and store the penalty. |
143 | Penalty += Indenter->addTokenToState(State, Newline: NewLine, DryRun, ExtraSpaces); |
144 | } |
145 | return Penalty; |
146 | } |
147 | |
148 | unsigned CommaSeparatedList::formatFromToken(LineState &State, |
149 | ContinuationIndenter *Indenter, |
150 | bool DryRun) { |
151 | // Formatting with 1 Column isn't really a column layout, so we don't need the |
152 | // special logic here. We can just avoid bin packing any of the parameters. |
153 | if (Formats.size() == 1 || HasNestedBracedList) |
154 | State.Stack.back().AvoidBinPacking = true; |
155 | return 0; |
156 | } |
157 | |
158 | // Returns the lengths in code points between Begin and End (both included), |
159 | // assuming that the entire sequence is put on a single line. |
160 | static unsigned CodePointsBetween(const FormatToken *Begin, |
161 | const FormatToken *End) { |
162 | assert(End->TotalLength >= Begin->TotalLength); |
163 | return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth; |
164 | } |
165 | |
166 | void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { |
167 | // FIXME: At some point we might want to do this for other lists, too. |
168 | if (!Token->MatchingParen || |
169 | !Token->isOneOf(K1: tok::l_brace, K2: TT_ArrayInitializerLSquare)) { |
170 | return; |
171 | } |
172 | |
173 | // In C++11 braced list style, we should not format in columns unless they |
174 | // have many items (20 or more) or we allow bin-packing of function call |
175 | // arguments. |
176 | if (Style.Cpp11BracedListStyle && !Style.BinPackArguments && |
177 | Commas.size() < 19) { |
178 | return; |
179 | } |
180 | |
181 | // Limit column layout for JavaScript array initializers to 20 or more items |
182 | // for now to introduce it carefully. We can become more aggressive if this |
183 | // necessary. |
184 | if (Token->is(TT: TT_ArrayInitializerLSquare) && Commas.size() < 19) |
185 | return; |
186 | |
187 | // Column format doesn't really make sense if we don't align after brackets. |
188 | if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) |
189 | return; |
190 | |
191 | FormatToken *ItemBegin = Token->Next; |
192 | while (ItemBegin->isTrailingComment()) |
193 | ItemBegin = ItemBegin->Next; |
194 | SmallVector<bool, 8> MustBreakBeforeItem; |
195 | |
196 | // The lengths of an item if it is put at the end of the line. This includes |
197 | // trailing comments which are otherwise ignored for column alignment. |
198 | SmallVector<unsigned, 8> EndOfLineItemLength; |
199 | MustBreakBeforeItem.reserve(N: Commas.size() + 1); |
200 | EndOfLineItemLength.reserve(N: Commas.size() + 1); |
201 | ItemLengths.reserve(N: Commas.size() + 1); |
202 | |
203 | bool = false; |
204 | for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) { |
205 | assert(ItemBegin); |
206 | // Skip comments on their own line. |
207 | while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) { |
208 | ItemBegin = ItemBegin->Next; |
209 | HasSeparatingComment = i > 0; |
210 | } |
211 | |
212 | MustBreakBeforeItem.push_back(Elt: ItemBegin->MustBreakBefore); |
213 | if (ItemBegin->is(Kind: tok::l_brace)) |
214 | HasNestedBracedList = true; |
215 | const FormatToken *ItemEnd = nullptr; |
216 | if (i == Commas.size()) { |
217 | ItemEnd = Token->MatchingParen; |
218 | const FormatToken * = ItemEnd->getPreviousNonComment(); |
219 | ItemLengths.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: NonCommentEnd)); |
220 | if (Style.Cpp11BracedListStyle && |
221 | !ItemEnd->Previous->isTrailingComment()) { |
222 | // In Cpp11 braced list style, the } and possibly other subsequent |
223 | // tokens will need to stay on a line with the last element. |
224 | while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore) |
225 | ItemEnd = ItemEnd->Next; |
226 | } else { |
227 | // In other braced lists styles, the "}" can be wrapped to the new line. |
228 | ItemEnd = Token->MatchingParen->Previous; |
229 | } |
230 | } else { |
231 | ItemEnd = Commas[i]; |
232 | // The comma is counted as part of the item when calculating the length. |
233 | ItemLengths.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: ItemEnd)); |
234 | |
235 | // Consume trailing comments so the are included in EndOfLineItemLength. |
236 | if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline && |
237 | ItemEnd->Next->isTrailingComment()) { |
238 | ItemEnd = ItemEnd->Next; |
239 | } |
240 | } |
241 | EndOfLineItemLength.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: ItemEnd)); |
242 | // If there is a trailing comma in the list, the next item will start at the |
243 | // closing brace. Don't create an extra item for this. |
244 | if (ItemEnd->getNextNonComment() == Token->MatchingParen) |
245 | break; |
246 | ItemBegin = ItemEnd->Next; |
247 | } |
248 | |
249 | // Don't use column layout for lists with few elements and in presence of |
250 | // separating comments. |
251 | if (Commas.size() < 5 || HasSeparatingComment) |
252 | return; |
253 | |
254 | if (Token->NestingLevel != 0 && Token->is(Kind: tok::l_brace) && Commas.size() < 19) |
255 | return; |
256 | |
257 | // We can never place more than ColumnLimit / 3 items in a row (because of the |
258 | // spaces and the comma). |
259 | unsigned MaxItems = Style.ColumnLimit / 3; |
260 | SmallVector<unsigned> MinSizeInColumn; |
261 | MinSizeInColumn.reserve(N: MaxItems); |
262 | for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) { |
263 | ColumnFormat Format; |
264 | Format.Columns = Columns; |
265 | Format.ColumnSizes.resize(N: Columns); |
266 | MinSizeInColumn.assign(NumElts: Columns, UINT_MAX); |
267 | Format.LineCount = 1; |
268 | bool HasRowWithSufficientColumns = false; |
269 | unsigned Column = 0; |
270 | for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) { |
271 | assert(i < MustBreakBeforeItem.size()); |
272 | if (MustBreakBeforeItem[i] || Column == Columns) { |
273 | ++Format.LineCount; |
274 | Column = 0; |
275 | } |
276 | if (Column == Columns - 1) |
277 | HasRowWithSufficientColumns = true; |
278 | unsigned Length = |
279 | (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i]; |
280 | Format.ColumnSizes[Column] = std::max(a: Format.ColumnSizes[Column], b: Length); |
281 | MinSizeInColumn[Column] = std::min(a: MinSizeInColumn[Column], b: Length); |
282 | ++Column; |
283 | } |
284 | // If all rows are terminated early (e.g. by trailing comments), we don't |
285 | // need to look further. |
286 | if (!HasRowWithSufficientColumns) |
287 | break; |
288 | Format.TotalWidth = Columns - 1; // Width of the N-1 spaces. |
289 | |
290 | for (unsigned i = 0; i < Columns; ++i) |
291 | Format.TotalWidth += Format.ColumnSizes[i]; |
292 | |
293 | // Don't use this Format, if the difference between the longest and shortest |
294 | // element in a column exceeds a threshold to avoid excessive spaces. |
295 | if ([&] { |
296 | for (unsigned i = 0; i < Columns - 1; ++i) |
297 | if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10) |
298 | return true; |
299 | return false; |
300 | }()) { |
301 | continue; |
302 | } |
303 | |
304 | // Ignore layouts that are bound to violate the column limit. |
305 | if (Format.TotalWidth > Style.ColumnLimit && Columns > 1) |
306 | continue; |
307 | |
308 | Formats.push_back(Elt: Format); |
309 | } |
310 | } |
311 | |
312 | const CommaSeparatedList::ColumnFormat * |
313 | CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const { |
314 | const ColumnFormat *BestFormat = nullptr; |
315 | for (const ColumnFormat &Format : llvm::reverse(C: Formats)) { |
316 | if (Format.TotalWidth <= RemainingCharacters || Format.Columns == 1) { |
317 | if (BestFormat && Format.LineCount > BestFormat->LineCount) |
318 | break; |
319 | BestFormat = &Format; |
320 | } |
321 | } |
322 | return BestFormat; |
323 | } |
324 | |
325 | } // namespace format |
326 | } // namespace clang |
327 | |