FormatToken.cpp source code [llvm_projects/clang/lib/Format/FormatToken.cpp]

1	//===--- FormatToken.cpp - Format C++ code --------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file implements specific functions of \c FormatTokens and their
11	/// roles.
12	///
13	//===----------------------------------------------------------------------===//
14
15	#include "FormatToken.h"
16	#include "ContinuationIndenter.h"
17	#include "llvm/ADT/SmallVector.h"
18	#include <climits>
19
20	namespace clang {
21	namespace format {
22
23	const char *getTokenTypeName(TokenType Type) {
24	static const char *const TokNames[] = {
25	#define TYPE(X) #X,
26	LIST_TOKEN_TYPES
27	#undef TYPE
28	nullptr};
29
30	if (Type < NUM_TOKEN_TYPES)
31	return TokNames[Type];
32	llvm_unreachable("unknown TokenType");
33	return nullptr;
34	}
35
36	// Sorted common C++ non-keyword types.
37	static SmallVector<StringRef> CppNonKeywordTypes = {
38	"clock_t", "int16_t", "int32_t", "int64_t", "int8_t",
39	"intptr_t", "ptrdiff_t", "size_t", "time_t", "uint16_t",
40	"uint32_t", "uint64_t", "uint8_t", "uintptr_t",
41	};
42
43	bool FormatToken::isTypeName(const LangOptions &LangOpts) const {
44	if (is(TT: TT_TypeName) \|\| Tok.isSimpleTypeSpecifier(LangOpts))
45	return true;
46	return (LangOpts.CXXOperatorNames \|\| LangOpts.C11) && is(Kind: tok::identifier) &&
47	llvm::binary_search(Range&: CppNonKeywordTypes, Value: TokenText);
48	}
49
50	bool FormatToken::isTypeOrIdentifier(const LangOptions &LangOpts) const {
51	return isTypeName(LangOpts) \|\| isOneOf(K1: tok::kw_auto, K2: tok::identifier);
52	}
53
54	bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const {
55	assert(is(tok::r_brace));
56	if (!Style.Cpp11BracedListStyle \|\|
57	Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) {
58	return false;
59	}
60	const auto *LBrace = MatchingParen;
61	assert(LBrace && LBrace->is(tok::l_brace));
62	if (LBrace->is(BBK: BK_BracedInit))
63	return true;
64	if (LBrace->Previous && LBrace->Previous->is(Kind: tok::equal))
65	return true;
66	return false;
67	}
68
69	bool FormatToken::opensBlockOrBlockTypeList(const FormatStyle &Style) const {
70	// C# Does not indent object initialisers as continuations.
71	if (is(Kind: tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp())
72	return true;
73	if (is(TT: TT_TemplateString) && opensScope())
74	return true;
75	return is(TT: TT_ArrayInitializerLSquare) \|\| is(TT: TT_ProtoExtensionLSquare) \|\|
76	(is(Kind: tok::l_brace) &&
77	(getBlockKind() == BK_Block \|\| is(TT: TT_DictLiteral) \|\|
78	(!Style.Cpp11BracedListStyle && NestingLevel == `0`))) \|\|
79	(is(Kind: tok::less) && Style.isProto());
80	}
81
82	TokenRole::~TokenRole() {}
83
84	void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
85
86	unsigned CommaSeparatedList::formatAfterToken(LineState &State,
87	ContinuationIndenter *Indenter,
88	bool DryRun) {
89	if (!State.NextToken \|\| !State.NextToken->Previous)
90	return `0`;
91
92	if (Formats.size() <= `1`)
93	return `0`; // Handled by formatFromToken (1) or avoid severe penalty (0).
94
95	// Ensure that we start on the opening brace.
96	const FormatToken *LBrace =
97	State.NextToken->Previous->getPreviousNonComment();
98	if (!LBrace \|\| !LBrace->isOneOf(K1: tok::l_brace, K2: TT_ArrayInitializerLSquare) \|\|
99	LBrace->is(BBK: BK_Block) \|\| LBrace->is(TT: TT_DictLiteral) \|\|
100	LBrace->Next->is(TT: TT_DesignatedInitializerPeriod)) {
101	return `0`;
102	}
103
104	// Calculate the number of code points we have to format this list. As the
105	// first token is already placed, we have to subtract it.
106	unsigned RemainingCodePoints =
107	Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth;
108
109	// Find the best ColumnFormat, i.e. the best number of columns to use.
110	const ColumnFormat *Format = getColumnFormat(RemainingCharacters: RemainingCodePoints);
111
112	// If no ColumnFormat can be used, the braced list would generally be
113	// bin-packed. Add a severe penalty to this so that column layouts are
114	// preferred if possible.
115	if (!Format)
116	return `10'000`;
117
118	// Format the entire list.
119	unsigned Penalty = `0`;
120	unsigned Column = `0`;
121	unsigned Item = `0`;
122	while (State.NextToken != LBrace->MatchingParen) {
123	bool NewLine = false;
124	unsigned ExtraSpaces = `0`;
125
126	// If the previous token was one of our commas, we are now on the next item.
127	if (Item < Commas.size() && State.NextToken->Previous == Commas [Item]) {
128	if (!State.NextToken->isTrailingComment()) {
129	ExtraSpaces += Format->ColumnSizes [Column] - ItemLengths [Item];
130	++Column;
131	}
132	++Item;
133	}
134
135	if (Column == Format->Columns \|\| State.NextToken->MustBreakBefore) {
136	Column = `0`;
137	NewLine = true;
138	}
139
140	// Place token using the continuation indenter and store the penalty.
141	Penalty += Indenter->addTokenToState(State, Newline: NewLine, DryRun, ExtraSpaces);
142	}
143	return Penalty;
144	}
145
146	unsigned CommaSeparatedList::formatFromToken(LineState &State,
147	ContinuationIndenter *Indenter,
148	bool DryRun) {
149	// Formatting with 1 Column isn't really a column layout, so we don't need the
150	// special logic here. We can just avoid bin packing any of the parameters.
151	if (Formats.size() == `1` \|\| HasNestedBracedList)
152	State.Stack.back().AvoidBinPacking = true;
153	return `0`;
154	}
155
156	// Returns the lengths in code points between Begin and End (both included),
157	// assuming that the entire sequence is put on a single line.
158	static unsigned CodePointsBetween(const FormatToken *Begin,
159	const FormatToken *End) {
160	assert(End->TotalLength >= Begin->TotalLength);
161	return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
162	}
163
164	void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
165	// FIXME: At some point we might want to do this for other lists, too.
166	if (!Token->MatchingParen \|\|
167	!Token->isOneOf(K1: tok::l_brace, K2: TT_ArrayInitializerLSquare)) {
168	return;
169	}
170
171	// In C++11 braced list style, we should not format in columns unless they
172	// have many items (20 or more) or we allow bin-packing of function call
173	// arguments.
174	if (Style.Cpp11BracedListStyle && !Style.BinPackArguments &&
175	(Commas.size() < `19` \|\| !Style.BinPackLongBracedList)) {
176	return;
177	}
178
179	// Limit column layout for JavaScript array initializers to 20 or more items
180	// for now to introduce it carefully. We can become more aggressive if this
181	// necessary.
182	if (Token->is(TT: TT_ArrayInitializerLSquare) && Commas.size() < `19`)
183	return;
184
185	// Column format doesn't really make sense if we don't align after brackets.
186	if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
187	return;
188
189	FormatToken *ItemBegin = Token->Next;
190	while (ItemBegin->isTrailingComment())
191	ItemBegin = ItemBegin->Next;
192	SmallVector<bool, `8`> MustBreakBeforeItem;
193
194	// The lengths of an item if it is put at the end of the line. This includes
195	// trailing comments which are otherwise ignored for column alignment.
196	SmallVector<unsigned, `8`> EndOfLineItemLength;
197	MustBreakBeforeItem.reserve(N: Commas.size() + `1`);
198	EndOfLineItemLength.reserve(N: Commas.size() + `1`);
199	ItemLengths.reserve(N: Commas.size() + `1`);
200
201	bool HasSeparatingComment = false;
202	for (unsigned i = `0`, e = Commas.size() + `1`; i != e; ++i) {
203	assert(ItemBegin);
204	// Skip comments on their own line.
205	while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
206	ItemBegin = ItemBegin->Next;
207	HasSeparatingComment = i > `0`;
208	}
209
210	MustBreakBeforeItem.push_back(Elt: ItemBegin->MustBreakBefore);
211	if (ItemBegin->is(Kind: tok::l_brace))
212	HasNestedBracedList = true;
213	const FormatToken ItemEnd = nullptr*;
214	if (i == Commas.size()) {
215	ItemEnd = Token->MatchingParen;
216	const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
217	ItemLengths.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: NonCommentEnd));
218	if (Style.Cpp11BracedListStyle &&
219	!ItemEnd->Previous->isTrailingComment()) {
220	// In Cpp11 braced list style, the } and possibly other subsequent
221	// tokens will need to stay on a line with the last element.
222	while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
223	ItemEnd = ItemEnd->Next;
224	} else {
225	// In other braced lists styles, the "}" can be wrapped to the new line.
226	ItemEnd = Token->MatchingParen->Previous;
227	}
228	} else {
229	ItemEnd = Commas [i];
230	// The comma is counted as part of the item when calculating the length.
231	ItemLengths.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: ItemEnd));
232
233	// Consume trailing comments so the are included in EndOfLineItemLength.
234	if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
235	ItemEnd->Next->isTrailingComment()) {
236	ItemEnd = ItemEnd->Next;
237	}
238	}
239	EndOfLineItemLength.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: ItemEnd));
240	// If there is a trailing comma in the list, the next item will start at the
241	// closing brace. Don't create an extra item for this.
242	if (ItemEnd->getNextNonComment() == Token->MatchingParen)
243	break;
244	ItemBegin = ItemEnd->Next;
245	}
246
247	// Don't use column layout for lists with few elements and in presence of
248	// separating comments.
249	if (Commas.size() < `5` \|\| HasSeparatingComment)
250	return;
251
252	if (Token->NestingLevel != `0` && Token->is(Kind: tok::l_brace) && Commas.size() < `19`)
253	return;
254
255	// We can never place more than ColumnLimit / 3 items in a row (because of the
256	// spaces and the comma).
257	unsigned MaxItems = Style.ColumnLimit / `3`;
258	SmallVector<unsigned> MinSizeInColumn;
259	MinSizeInColumn.reserve(N: MaxItems);
260	for (unsigned Columns = `1`; Columns <= MaxItems; ++Columns) {
261	ColumnFormat Format;
262	Format.Columns = Columns;
263	Format.ColumnSizes.resize(N: Columns);
264	MinSizeInColumn.assign(NumElts: Columns, UINT_MAX);
265	Format.LineCount = `1`;
266	bool HasRowWithSufficientColumns = false;
267	unsigned Column = `0`;
268	for (unsigned i = `0`, e = ItemLengths.size(); i != e; ++i) {
269	assert(i < MustBreakBeforeItem.size());
270	if (MustBreakBeforeItem [i] \|\| Column == Columns) {
271	++Format.LineCount;
272	Column = `0`;
273	}
274	if (Column == Columns - `1`)
275	HasRowWithSufficientColumns = true;
276	unsigned Length =
277	(Column == Columns - `1`) ? EndOfLineItemLength [i] : ItemLengths [i];
278	Format.ColumnSizes [Column] = std::max(a: Format.ColumnSizes [Column], b: Length);
279	MinSizeInColumn [Column] = std::min(a: MinSizeInColumn [Column], b: Length);
280	++Column;
281	}
282	// If all rows are terminated early (e.g. by trailing comments), we don't
283	// need to look further.
284	if (!HasRowWithSufficientColumns)
285	break;
286	Format.TotalWidth = Columns - `1`; // Width of the N-1 spaces.
287
288	for (unsigned i = `0`; i < Columns; ++i)
289	Format.TotalWidth += Format.ColumnSizes [i];
290
291	// Don't use this Format, if the difference between the longest and shortest
292	// element in a column exceeds a threshold to avoid excessive spaces.
293	if ([&] {
294	for (unsigned i = `0`; i < Columns - `1`; ++i)
295	if (Format.ColumnSizes [i] - MinSizeInColumn [i] > `10`)
296	return true;
297	return false;
298	}()) {
299	continue;
300	}
301
302	// Ignore layouts that are bound to violate the column limit.
303	if (Format.TotalWidth > Style.ColumnLimit && Columns > `1`)
304	continue;
305
306	Formats.push_back(Elt: Format);
307	}
308	}
309
310	const CommaSeparatedList::ColumnFormat *
311	CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
312	const ColumnFormat BestFormat = nullptr*;
313	for (const ColumnFormat &Format : llvm::reverse(C: Formats)) {
314	if (Format.TotalWidth <= RemainingCharacters \|\| Format.Columns == `1`) {
315	if (BestFormat && Format.LineCount > BestFormat->LineCount)
316	break;
317	BestFormat = &Format;
318	}
319	}
320	return BestFormat;
321	}
322
323	bool startsNextParameter(const FormatToken &Current, const FormatStyle &Style) {
324	assert(Current.Previous);
325	const auto &Previous = *Current.Previous;
326	if (Current.is(TT: TT_CtorInitializerComma) &&
327	Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) {
328	return true;
329	}
330	if (Style.Language == FormatStyle::LK_Proto && Current.is(TT: TT_SelectorName))
331	return true;
332	return Previous.is(Kind: tok::comma) && !Current.isTrailingComment() &&
333	((Previous.isNot(Kind: TT_CtorInitializerComma) \|\|
334	Style.BreakConstructorInitializers !=
335	FormatStyle::BCIS_BeforeComma) &&
336	(Previous.isNot(Kind: TT_InheritanceComma) \|\|
337	Style.BreakInheritanceList != FormatStyle::BILS_BeforeComma));
338	}
339
340	} // namespace format
341	} // namespace clang
342

Browse the source code of llvm_projects/clang/lib/Format/FormatToken.cpp