FormatToken.cpp source code [llvm_projects/clang/lib/Format/FormatToken.cpp]

1	//===--- FormatToken.cpp - Format C++ code --------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file implements specific functions of \c FormatTokens and their
11	/// roles.
12	///
13	//===----------------------------------------------------------------------===//
14
15	#include "FormatToken.h"
16	#include "ContinuationIndenter.h"
17	#include "llvm/ADT/SmallVector.h"
18	#include "llvm/Support/Debug.h"
19	#include <climits>
20
21	namespace clang {
22	namespace format {
23
24	const char *getTokenTypeName(TokenType Type) {
25	static const char *const TokNames[] = {
26	#define TYPE(X) #X,
27	LIST_TOKEN_TYPES
28	#undef TYPE
29	nullptr};
30
31	if (Type < NUM_TOKEN_TYPES)
32	return TokNames[Type];
33	llvm_unreachable("unknown TokenType");
34	return nullptr;
35	}
36
37	// Sorted common C++ non-keyword types.
38	static SmallVector<StringRef> CppNonKeywordTypes = {
39	"clock_t", "int16_t", "int32_t", "int64_t", "int8_t",
40	"intptr_t", "ptrdiff_t", "size_t", "time_t", "uint16_t",
41	"uint32_t", "uint64_t", "uint8_t", "uintptr_t",
42	};
43
44	bool FormatToken::isTypeName(const LangOptions &LangOpts) const {
45	const bool IsCpp = LangOpts.CXXOperatorNames;
46	return is(TT: TT_TypeName) \|\| Tok.isSimpleTypeSpecifier(LangOpts) \|\|
47	(IsCpp && is(Kind: tok::identifier) &&
48	std::binary_search(first: CppNonKeywordTypes.begin(),
49	last: CppNonKeywordTypes.end(), val: TokenText));
50	}
51
52	bool FormatToken::isTypeOrIdentifier(const LangOptions &LangOpts) const {
53	return isTypeName(LangOpts) \|\| isOneOf(K1: tok::kw_auto, K2: tok::identifier);
54	}
55
56	bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const {
57	assert(is(tok::r_brace));
58	if (!Style.Cpp11BracedListStyle \|\|
59	Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) {
60	return false;
61	}
62	const auto *LBrace = MatchingParen;
63	assert(LBrace && LBrace->is(tok::l_brace));
64	if (LBrace->is(BBK: BK_BracedInit))
65	return true;
66	if (LBrace->Previous && LBrace->Previous->is(Kind: tok::equal))
67	return true;
68	return false;
69	}
70
71	bool FormatToken::opensBlockOrBlockTypeList(const FormatStyle &Style) const {
72	// C# Does not indent object initialisers as continuations.
73	if (is(Kind: tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp())
74	return true;
75	if (is(TT: TT_TemplateString) && opensScope())
76	return true;
77	return is(TT: TT_ArrayInitializerLSquare) \|\| is(TT: TT_ProtoExtensionLSquare) \|\|
78	(is(Kind: tok::l_brace) &&
79	(getBlockKind() == BK_Block \|\| is(TT: TT_DictLiteral) \|\|
80	(!Style.Cpp11BracedListStyle && NestingLevel == `0`))) \|\|
81	(is(Kind: tok::less) && Style.isProto());
82	}
83
84	TokenRole::~TokenRole() {}
85
86	void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
87
88	unsigned CommaSeparatedList::formatAfterToken(LineState &State,
89	ContinuationIndenter *Indenter,
90	bool DryRun) {
91	if (!State.NextToken \|\| !State.NextToken->Previous)
92	return `0`;
93
94	if (Formats.size() <= `1`)
95	return `0`; // Handled by formatFromToken (1) or avoid severe penalty (0).
96
97	// Ensure that we start on the opening brace.
98	const FormatToken *LBrace =
99	State.NextToken->Previous->getPreviousNonComment();
100	if (!LBrace \|\| !LBrace->isOneOf(K1: tok::l_brace, K2: TT_ArrayInitializerLSquare) \|\|
101	LBrace->is(BBK: BK_Block) \|\| LBrace->is(TT: TT_DictLiteral) \|\|
102	LBrace->Next->is(TT: TT_DesignatedInitializerPeriod)) {
103	return `0`;
104	}
105
106	// Calculate the number of code points we have to format this list. As the
107	// first token is already placed, we have to subtract it.
108	unsigned RemainingCodePoints =
109	Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth;
110
111	// Find the best ColumnFormat, i.e. the best number of columns to use.
112	const ColumnFormat *Format = getColumnFormat(RemainingCharacters: RemainingCodePoints);
113
114	// If no ColumnFormat can be used, the braced list would generally be
115	// bin-packed. Add a severe penalty to this so that column layouts are
116	// preferred if possible.
117	if (!Format)
118	return `10'000`;
119
120	// Format the entire list.
121	unsigned Penalty = `0`;
122	unsigned Column = `0`;
123	unsigned Item = `0`;
124	while (State.NextToken != LBrace->MatchingParen) {
125	bool NewLine = false;
126	unsigned ExtraSpaces = `0`;
127
128	// If the previous token was one of our commas, we are now on the next item.
129	if (Item < Commas.size() && State.NextToken->Previous == Commas [Item]) {
130	if (!State.NextToken->isTrailingComment()) {
131	ExtraSpaces += Format->ColumnSizes [Column] - ItemLengths [Item];
132	++Column;
133	}
134	++Item;
135	}
136
137	if (Column == Format->Columns \|\| State.NextToken->MustBreakBefore) {
138	Column = `0`;
139	NewLine = true;
140	}
141
142	// Place token using the continuation indenter and store the penalty.
143	Penalty += Indenter->addTokenToState(State, Newline: NewLine, DryRun, ExtraSpaces);
144	}
145	return Penalty;
146	}
147
148	unsigned CommaSeparatedList::formatFromToken(LineState &State,
149	ContinuationIndenter *Indenter,
150	bool DryRun) {
151	// Formatting with 1 Column isn't really a column layout, so we don't need the
152	// special logic here. We can just avoid bin packing any of the parameters.
153	if (Formats.size() == `1` \|\| HasNestedBracedList)
154	State.Stack.back().AvoidBinPacking = true;
155	return `0`;
156	}
157
158	// Returns the lengths in code points between Begin and End (both included),
159	// assuming that the entire sequence is put on a single line.
160	static unsigned CodePointsBetween(const FormatToken *Begin,
161	const FormatToken *End) {
162	assert(End->TotalLength >= Begin->TotalLength);
163	return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
164	}
165
166	void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
167	// FIXME: At some point we might want to do this for other lists, too.
168	if (!Token->MatchingParen \|\|
169	!Token->isOneOf(K1: tok::l_brace, K2: TT_ArrayInitializerLSquare)) {
170	return;
171	}
172
173	// In C++11 braced list style, we should not format in columns unless they
174	// have many items (20 or more) or we allow bin-packing of function call
175	// arguments.
176	if (Style.Cpp11BracedListStyle && !Style.BinPackArguments &&
177	Commas.size() < `19`) {
178	return;
179	}
180
181	// Limit column layout for JavaScript array initializers to 20 or more items
182	// for now to introduce it carefully. We can become more aggressive if this
183	// necessary.
184	if (Token->is(TT: TT_ArrayInitializerLSquare) && Commas.size() < `19`)
185	return;
186
187	// Column format doesn't really make sense if we don't align after brackets.
188	if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
189	return;
190
191	FormatToken *ItemBegin = Token->Next;
192	while (ItemBegin->isTrailingComment())
193	ItemBegin = ItemBegin->Next;
194	SmallVector<bool, `8`> MustBreakBeforeItem;
195
196	// The lengths of an item if it is put at the end of the line. This includes
197	// trailing comments which are otherwise ignored for column alignment.
198	SmallVector<unsigned, `8`> EndOfLineItemLength;
199	MustBreakBeforeItem.reserve(N: Commas.size() + `1`);
200	EndOfLineItemLength.reserve(N: Commas.size() + `1`);
201	ItemLengths.reserve(N: Commas.size() + `1`);
202
203	bool HasSeparatingComment = false;
204	for (unsigned i = `0`, e = Commas.size() + `1`; i != e; ++i) {
205	assert(ItemBegin);
206	// Skip comments on their own line.
207	while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
208	ItemBegin = ItemBegin->Next;
209	HasSeparatingComment = i > `0`;
210	}
211
212	MustBreakBeforeItem.push_back(Elt: ItemBegin->MustBreakBefore);
213	if (ItemBegin->is(Kind: tok::l_brace))
214	HasNestedBracedList = true;
215	const FormatToken ItemEnd = nullptr*;
216	if (i == Commas.size()) {
217	ItemEnd = Token->MatchingParen;
218	const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
219	ItemLengths.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: NonCommentEnd));
220	if (Style.Cpp11BracedListStyle &&
221	!ItemEnd->Previous->isTrailingComment()) {
222	// In Cpp11 braced list style, the } and possibly other subsequent
223	// tokens will need to stay on a line with the last element.
224	while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
225	ItemEnd = ItemEnd->Next;
226	} else {
227	// In other braced lists styles, the "}" can be wrapped to the new line.
228	ItemEnd = Token->MatchingParen->Previous;
229	}
230	} else {
231	ItemEnd = Commas [i];
232	// The comma is counted as part of the item when calculating the length.
233	ItemLengths.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: ItemEnd));
234
235	// Consume trailing comments so the are included in EndOfLineItemLength.
236	if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
237	ItemEnd->Next->isTrailingComment()) {
238	ItemEnd = ItemEnd->Next;
239	}
240	}
241	EndOfLineItemLength.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: ItemEnd));
242	// If there is a trailing comma in the list, the next item will start at the
243	// closing brace. Don't create an extra item for this.
244	if (ItemEnd->getNextNonComment() == Token->MatchingParen)
245	break;
246	ItemBegin = ItemEnd->Next;
247	}
248
249	// Don't use column layout for lists with few elements and in presence of
250	// separating comments.
251	if (Commas.size() < `5` \|\| HasSeparatingComment)
252	return;
253
254	if (Token->NestingLevel != `0` && Token->is(Kind: tok::l_brace) && Commas.size() < `19`)
255	return;
256
257	// We can never place more than ColumnLimit / 3 items in a row (because of the
258	// spaces and the comma).
259	unsigned MaxItems = Style.ColumnLimit / `3`;
260	SmallVector<unsigned> MinSizeInColumn;
261	MinSizeInColumn.reserve(N: MaxItems);
262	for (unsigned Columns = `1`; Columns <= MaxItems; ++Columns) {
263	ColumnFormat Format;
264	Format.Columns = Columns;
265	Format.ColumnSizes.resize(N: Columns);
266	MinSizeInColumn.assign(NumElts: Columns, UINT_MAX);
267	Format.LineCount = `1`;
268	bool HasRowWithSufficientColumns = false;
269	unsigned Column = `0`;
270	for (unsigned i = `0`, e = ItemLengths.size(); i != e; ++i) {
271	assert(i < MustBreakBeforeItem.size());
272	if (MustBreakBeforeItem [i] \|\| Column == Columns) {
273	++Format.LineCount;
274	Column = `0`;
275	}
276	if (Column == Columns - `1`)
277	HasRowWithSufficientColumns = true;
278	unsigned Length =
279	(Column == Columns - `1`) ? EndOfLineItemLength [i] : ItemLengths [i];
280	Format.ColumnSizes [Column] = std::max(a: Format.ColumnSizes [Column], b: Length);
281	MinSizeInColumn [Column] = std::min(a: MinSizeInColumn [Column], b: Length);
282	++Column;
283	}
284	// If all rows are terminated early (e.g. by trailing comments), we don't
285	// need to look further.
286	if (!HasRowWithSufficientColumns)
287	break;
288	Format.TotalWidth = Columns - `1`; // Width of the N-1 spaces.
289
290	for (unsigned i = `0`; i < Columns; ++i)
291	Format.TotalWidth += Format.ColumnSizes [i];
292
293	// Don't use this Format, if the difference between the longest and shortest
294	// element in a column exceeds a threshold to avoid excessive spaces.
295	if ([&] {
296	for (unsigned i = `0`; i < Columns - `1`; ++i)
297	if (Format.ColumnSizes [i] - MinSizeInColumn [i] > `10`)
298	return true;
299	return false;
300	}()) {
301	continue;
302	}
303
304	// Ignore layouts that are bound to violate the column limit.
305	if (Format.TotalWidth > Style.ColumnLimit && Columns > `1`)
306	continue;
307
308	Formats.push_back(Elt: Format);
309	}
310	}
311
312	const CommaSeparatedList::ColumnFormat *
313	CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
314	const ColumnFormat BestFormat = nullptr*;
315	for (const ColumnFormat &Format : llvm::reverse(C: Formats)) {
316	if (Format.TotalWidth <= RemainingCharacters \|\| Format.Columns == `1`) {
317	if (BestFormat && Format.LineCount > BestFormat->LineCount)
318	break;
319	BestFormat = &Format;
320	}
321	}
322	return BestFormat;
323	}
324
325	} // namespace format
326	} // namespace clang
327

Browse the source code of llvm_projects/clang/lib/Format/FormatToken.cpp