BreakableToken.cpp source code [llvm_projects/clang/lib/Format/BreakableToken.cpp]

1	//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// Contains implementation of BreakableToken class and classes derived
11	/// from it.
12	///
13	//===----------------------------------------------------------------------===//
14
15	#include "BreakableToken.h"
16	#include "ContinuationIndenter.h"
17	#include "clang/Basic/CharInfo.h"
18	#include "clang/Format/Format.h"
19	#include "llvm/ADT/STLExtras.h"
20	#include "llvm/Support/Debug.h"
21	#include <algorithm>
22
23	#define DEBUG_TYPE "format-token-breaker"
24
25	namespace clang {
26	namespace format {
27
28	static constexpr StringRef Blanks = " \t\v\f\r";
29	static bool IsBlank(char C) {
30	switch (C) {
31	case `' '`:
32	case `'\t'`:
33	case `'\v'`:
34	case `'\f'`:
35	case `'\r'`:
36	return true;
37	default:
38	return false;
39	}
40	}
41
42	static StringRef getLineCommentIndentPrefix(StringRef Comment,
43	const FormatStyle &Style) {
44	static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///",
45	"//!", "//:", "//"};
46	static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##",
47	"//", "#"};
48	ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes);
49	if (Style.isTextProto())
50	KnownPrefixes = KnownTextProtoPrefixes;
51
52	assert(
53	llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept {
54	return Lhs.size() > Rhs.size();
55	}));
56
57	for (StringRef KnownPrefix : KnownPrefixes) {
58	if (Comment.starts_with(Prefix: KnownPrefix)) {
59	const auto PrefixLength =
60	Comment.find_first_not_of(C: `' '`, From: KnownPrefix.size());
61	return Comment.substr(Start: `0`, N: PrefixLength);
62	}
63	}
64	return {};
65	}
66
67	static BreakableToken::Split
68	getCommentSplit(StringRef Text, unsigned ContentStartColumn,
69	unsigned ColumnLimit, unsigned TabWidth,
70	encoding::Encoding Encoding, const FormatStyle &Style,
71	bool DecorationEndsWithStar = false) {
72	LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text
73	<< "\", Column limit: " << ColumnLimit
74	<< ", Content start: " << ContentStartColumn << "\n");
75	if (ColumnLimit <= ContentStartColumn + `1`)
76	return BreakableToken::Split (StringRef::npos, `0`);
77
78	unsigned MaxSplit = ColumnLimit - ContentStartColumn + `1`;
79	unsigned MaxSplitBytes = `0`;
80
81	for (unsigned NumChars = `0`;
82	NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
83	unsigned BytesInChar =
84	encoding::getCodePointNumBytes(FirstChar: Text [MaxSplitBytes], Encoding);
85	NumChars += encoding::columnWidthWithTabs(
86	Text: Text.substr(Start: MaxSplitBytes, N: BytesInChar), StartColumn: ContentStartColumn + NumChars,
87	TabWidth, Encoding);
88	MaxSplitBytes += BytesInChar;
89	}
90
91	// In JavaScript, some @tags can be followed by {, and machinery that parses
92	// these comments will fail to understand the comment if followed by a line
93	// break. So avoid ever breaking before a {.
94	if (Style.isJavaScript()) {
95	StringRef::size_type SpaceOffset =
96	Text.find_first_of(Chars: Blanks, From: MaxSplitBytes);
97	if (SpaceOffset != StringRef::npos && SpaceOffset + `1` < Text.size() &&
98	Text [SpaceOffset + `1`] == `'{'`) {
99	MaxSplitBytes = SpaceOffset + `1`;
100	}
101	}
102
103	StringRef::size_type SpaceOffset = Text.find_last_of(Chars: Blanks, From: MaxSplitBytes);
104
105	static const auto kNumberedListRegexp = llvm::Regex ("^[1-9][0-9]?\\.");
106	// Some spaces are unacceptable to break on, rewind past them.
107	while (SpaceOffset != StringRef::npos) {
108	// If a line-comment ends with `\`, the next line continues the comment,
109	// whether or not it starts with `//`. This is confusing and triggers
110	// -Wcomment.
111	// Avoid introducing multiline comments by not allowing a break right
112	// after '\'.
113	if (Style.isCpp()) {
114	StringRef::size_type LastNonBlank =
115	Text.find_last_not_of(Chars: Blanks, From: SpaceOffset);
116	if (LastNonBlank != StringRef::npos && Text [LastNonBlank] == `'\\'`) {
117	SpaceOffset = Text.find_last_of(Chars: Blanks, From: LastNonBlank);
118	continue;
119	}
120	}
121
122	// Do not split before a number followed by a dot: this would be interpreted
123	// as a numbered list, which would prevent re-flowing in subsequent passes.
124	if (kNumberedListRegexp.match(String: Text.substr(Start: SpaceOffset).ltrim(Chars: Blanks))) {
125	SpaceOffset = Text.find_last_of(Chars: Blanks, From: SpaceOffset);
126	continue;
127	}
128
129	// Avoid ever breaking before a @tag or a { in JavaScript.
130	if (Style.isJavaScript() && SpaceOffset + `1` < Text.size() &&
131	(Text [SpaceOffset + `1`] == `'{'` \|\| Text [SpaceOffset + `1`] == `'@'`)) {
132	SpaceOffset = Text.find_last_of(Chars: Blanks, From: SpaceOffset);
133	continue;
134	}
135
136	break;
137	}
138
139	if (SpaceOffset == StringRef::npos \|\|
140	// Don't break at leading whitespace.
141	Text.find_last_not_of(Chars: Blanks, From: SpaceOffset) == StringRef::npos) {
142	// Make sure that we don't break at leading whitespace that
143	// reaches past MaxSplit.
144	StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Chars: Blanks);
145	if (FirstNonWhitespace == StringRef::npos) {
146	// If the comment is only whitespace, we cannot split.
147	return BreakableToken::Split (StringRef::npos, `0`);
148	}
149	SpaceOffset = Text.find_first_of(
150	Chars: Blanks, From: std::max<unsigned>(a: MaxSplitBytes, b: FirstNonWhitespace));
151	}
152	if (SpaceOffset != StringRef::npos && SpaceOffset != `0`) {
153	// adaptStartOfLine will break after lines starting with /* if the comment*
154	// is broken anywhere. Avoid emitting this break twice here.
155	// Example: in /* longtextcomesherethatbreaks / (with ColumnLimit 20) will
156	// insert a break after /, so this code must not insert the same break.
157	if (SpaceOffset == `1` && Text [SpaceOffset - `1`] == `'*'`)
158	return BreakableToken::Split (StringRef::npos, `0`);
159	StringRef BeforeCut = Text.substr(Start: `0`, N: SpaceOffset).rtrim(Chars: Blanks);
160	StringRef AfterCut = Text.substr(Start: SpaceOffset);
161	if (!DecorationEndsWithStar)
162	AfterCut = AfterCut.ltrim(Chars: Blanks);
163	return BreakableToken::Split (BeforeCut.size(),
164	AfterCut.begin() - BeforeCut.end());
165	}
166	return BreakableToken::Split (StringRef::npos, `0`);
167	}
168
169	static BreakableToken::Split
170	getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
171	unsigned TabWidth, encoding::Encoding Encoding) {
172	// FIXME: Reduce unit test case.
173	if (Text.empty())
174	return BreakableToken::Split (StringRef::npos, `0`);
175	if (ColumnLimit <= UsedColumns)
176	return BreakableToken::Split (StringRef::npos, `0`);
177	unsigned MaxSplit = ColumnLimit - UsedColumns;
178	StringRef::size_type SpaceOffset = `0`;
179	StringRef::size_type SlashOffset = `0`;
180	StringRef::size_type WordStartOffset = `0`;
181	StringRef::size_type SplitPoint = `0`;
182	for (unsigned Chars = `0`;;) {
183	unsigned Advance;
184	if (Text [`0`] == `'\\'`) {
185	Advance = encoding::getEscapeSequenceLength(Text);
186	Chars += Advance;
187	} else {
188	Advance = encoding::getCodePointNumBytes(FirstChar: Text [`0`], Encoding);
189	Chars += encoding::columnWidthWithTabs(
190	Text: Text.substr(Start: `0`, N: Advance), StartColumn: UsedColumns + Chars, TabWidth, Encoding);
191	}
192
193	if (Chars > MaxSplit \|\| Text.size() <= Advance)
194	break;
195
196	if (IsBlank(C: Text [`0`]))
197	SpaceOffset = SplitPoint;
198	if (Text [`0`] == `'/'`)
199	SlashOffset = SplitPoint;
200	if (Advance == `1` && !isAlphanumeric(c: Text [`0`]))
201	WordStartOffset = SplitPoint;
202
203	SplitPoint += Advance;
204	Text = Text.substr(Start: Advance);
205	}
206
207	if (SpaceOffset != `0`)
208	return BreakableToken::Split (SpaceOffset + `1`, `0`);
209	if (SlashOffset != `0`)
210	return BreakableToken::Split (SlashOffset + `1`, `0`);
211	if (WordStartOffset != `0`)
212	return BreakableToken::Split (WordStartOffset + `1`, `0`);
213	if (SplitPoint != `0`)
214	return BreakableToken::Split (SplitPoint, `0`);
215	return BreakableToken::Split (StringRef::npos, `0`);
216	}
217
218	bool switchesFormatting(const FormatToken &Token) {
219	assert((Token.is(TT_BlockComment) \|\| Token.is(TT_LineComment)) &&
220	"formatting regions are switched by comment tokens");
221	StringRef Content = Token.TokenText.substr(Start: `2`).ltrim();
222	return Content.starts_with(Prefix: "clang-format on") \|\|
223	Content.starts_with(Prefix: "clang-format off");
224	}
225
226	unsigned
227	BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
228	Split Split) const {
229	// Example: consider the content
230	// lala lala
231	// - RemainingTokenColumns is the original number of columns, 10;
232	// - Split is (4, 2), denoting the two spaces between the two words;
233	//
234	// We compute the number of columns when the split is compressed into a single
235	// space, like:
236	// lala lala
237	//
238	// FIXME: Correctly measure the length of whitespace in Split.second so it
239	// works with tabs.
240	return RemainingTokenColumns + `1` - Split.second;
241	}
242
243	unsigned BreakableStringLiteral::getLineCount() const { return `1`; }
244
245	unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
246	unsigned Offset,
247	StringRef::size_type Length,
248	unsigned StartColumn) const {
249	llvm_unreachable("Getting the length of a part of the string literal "
250	"indicates that the code tries to reflow it.");
251	}
252
253	unsigned
254	BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,
255	unsigned StartColumn) const {
256	return UnbreakableTailLength + Postfix.size() +
257	encoding::columnWidthWithTabs(Text: Line.substr(Start: Offset), StartColumn,
258	TabWidth: Style.TabWidth, Encoding);
259	}
260
261	unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex,
262	bool Break) const {
263	return StartColumn + Prefix.size();
264	}
265
266	BreakableStringLiteral::BreakableStringLiteral(
267	const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
268	StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
269	encoding::Encoding Encoding, const FormatStyle &Style)
270	: BreakableToken (Tok, InPPDirective, Encoding, Style),
271	StartColumn(StartColumn), Prefix (Prefix), Postfix (Postfix),
272	UnbreakableTailLength(UnbreakableTailLength) {
273	assert(Tok.TokenText.starts_with(Prefix) && Tok.TokenText.ends_with(Postfix));
274	Line = Tok.TokenText.substr(
275	Start: Prefix.size(), N: Tok.TokenText.size() - Prefix.size() - Postfix.size());
276	}
277
278	BreakableToken::Split BreakableStringLiteral::getSplit(
279	unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
280	unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
281	return getStringSplit(Text: Line.substr(Start: TailOffset), UsedColumns: ContentStartColumn,
282	ColumnLimit: ColumnLimit - Postfix.size(), TabWidth: Style.TabWidth, Encoding);
283	}
284
285	void BreakableStringLiteral::insertBreak(unsigned LineIndex,
286	unsigned TailOffset, Split Split,
287	unsigned ContentIndent,
288	WhitespaceManager &Whitespaces) const {
289	Whitespaces.replaceWhitespaceInToken(
290	Tok, Offset: Prefix.size() + TailOffset + Split.first, ReplaceChars: Split.second, PreviousPostfix: Postfix,
291	CurrentPrefix: Prefix, InPPDirective, Newlines: `1`, Spaces: StartColumn);
292	}
293
294	BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators(
295	const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,
296	unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
297	encoding::Encoding Encoding, const FormatStyle &Style)
298	: BreakableStringLiteral (
299	Tok, StartColumn, /Prefix=/QuoteStyle == SingleQuotes ? "'"
300	: QuoteStyle == AtDoubleQuotes ? "@\""
301	: "\"",
302	/Postfix=/QuoteStyle == SingleQuotes ? "'" : "\"",
303	UnbreakableTailLength, InPPDirective, Encoding, Style),
304	BracesNeeded(Tok.isNot(Kind: TT_StringInConcatenation)),
305	QuoteStyle(QuoteStyle) {
306	// Find the replacement text for inserting braces and quotes and line breaks.
307	// We don't create an allocated string concatenated from parts here because it
308	// has to outlive the BreakableStringliteral object. The brace replacements
309	// include a quote so that WhitespaceManager can tell it apart from whitespace
310	// replacements between the string and surrounding tokens.
311
312	// The option is not implemented in JavaScript.
313	bool SignOnNewLine =
314	!Style.isJavaScript() &&
315	Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
316
317	if (Style.isVerilog()) {
318	// In Verilog, all strings are quoted by double quotes, joined by commas,
319	// and wrapped in braces. The comma is always before the newline.
320	assert(QuoteStyle == DoubleQuotes);
321	LeftBraceQuote = Style.Cpp11BracedListStyle ? "{\"" : "{ \"";
322	RightBraceQuote = Style.Cpp11BracedListStyle ? "\"}" : "\" }";
323	Postfix = "\",";
324	Prefix = "\"";
325	} else {
326	// The plus sign may be on either line. And also C# and JavaScript have
327	// several quoting styles.
328	if (QuoteStyle == SingleQuotes) {
329	LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( '" : "('";
330	RightBraceQuote = Style.SpacesInParensOptions.Other ? "' )" : "')";
331	Postfix = SignOnNewLine ? "'" : "' +";
332	Prefix = SignOnNewLine ? "+ '" : "'";
333	} else {
334	if (QuoteStyle == AtDoubleQuotes) {
335	LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( @" : "(@";
336	Prefix = SignOnNewLine ? "+ @\"" : "@\"";
337	} else {
338	LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( \"" : "(\"";
339	Prefix = SignOnNewLine ? "+ \"" : "\"";
340	}
341	RightBraceQuote = Style.SpacesInParensOptions.Other ? "\" )" : "\")";
342	Postfix = SignOnNewLine ? "\"" : "\" +";
343	}
344	}
345
346	// Following lines are indented by the width of the brace and space if any.
347	ContinuationIndent = BracesNeeded ? LeftBraceQuote.size() - `1` : `0`;
348	// The plus sign may need to be unindented depending on the style.
349	// FIXME: Add support for DontAlign.
350	if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus &&
351	Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator) {
352	ContinuationIndent -= `2`;
353	}
354	}
355
356	unsigned BreakableStringLiteralUsingOperators::getRemainingLength(
357	unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {
358	return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : `1`) +
359	encoding::columnWidthWithTabs(Text: Line.substr(Start: Offset), StartColumn,
360	TabWidth: Style.TabWidth, Encoding);
361	}
362
363	unsigned
364	BreakableStringLiteralUsingOperators::getContentStartColumn(unsigned LineIndex,
365	bool Break) const {
366	return std::max(
367	a: `0`,
368	b: static_cast<int>(StartColumn) +
369	(Break ? ContinuationIndent + static_cast<int>(Prefix.size())
370	: (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - `1`
371	: `0`) +
372	(QuoteStyle == AtDoubleQuotes ? `2` : `1`)));
373	}
374
375	void BreakableStringLiteralUsingOperators::insertBreak(
376	unsigned LineIndex, unsigned TailOffset, Split Split,
377	unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
378	Whitespaces.replaceWhitespaceInToken(
379	Tok, /Offset=/(QuoteStyle == AtDoubleQuotes ? `2` : `1`) + TailOffset +
380	Split.first,
381	/ReplaceChars=/Split.second, /PreviousPostfix=/Postfix,
382	/CurrentPrefix=/Prefix, InPPDirective, /NewLines=/Newlines: `1`,
383	/Spaces=/
384	std::max(a: `0`, b: static_cast<int>(StartColumn) + ContinuationIndent));
385	}
386
387	void BreakableStringLiteralUsingOperators::updateAfterBroken(
388	WhitespaceManager &Whitespaces) const {
389	// Add the braces required for breaking the token if they are needed.
390	if (!BracesNeeded)
391	return;
392
393	// To add a brace or parenthesis, we replace the quote (or the at sign) with a
394	// brace and another quote. This is because the rest of the program requires
395	// one replacement for each source range. If we replace the empty strings
396	// around the string, it may conflict with whitespace replacements between the
397	// string and adjacent tokens.
398	Whitespaces.replaceWhitespaceInToken(
399	Tok, /Offset=/`0`, /ReplaceChars=/`1`, /PreviousPostfix=/"",
400	/CurrentPrefix=/LeftBraceQuote, InPPDirective, /NewLines=/Newlines: `0`,
401	/Spaces=/`0`);
402	Whitespaces.replaceWhitespaceInToken(
403	Tok, /Offset=/Tok.TokenText.size() - `1`, /ReplaceChars=/`1`,
404	/PreviousPostfix=/RightBraceQuote,
405	/CurrentPrefix=/"", InPPDirective, /NewLines=/Newlines: `0`, /Spaces=/`0`);
406	}
407
408	BreakableComment::BreakableComment(const FormatToken &Token,
409	unsigned StartColumn, bool InPPDirective,
410	encoding::Encoding Encoding,
411	const FormatStyle &Style)
412	: BreakableToken (Token, InPPDirective, Encoding, Style),
413	StartColumn(StartColumn) {}
414
415	unsigned BreakableComment::getLineCount() const { return Lines.size(); }
416
417	BreakableToken::Split
418	BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
419	unsigned ColumnLimit, unsigned ContentStartColumn,
420	const llvm::Regex &CommentPragmasRegex) const {
421	// Don't break lines matching the comment pragmas regex.
422	if (!AlwaysReflow \|\| CommentPragmasRegex.match(String: Content [LineIndex]))
423	return Split (StringRef::npos, `0`);
424	return getCommentSplit(Text: Content [LineIndex].substr(Start: TailOffset),
425	ContentStartColumn, ColumnLimit, TabWidth: Style.TabWidth,
426	Encoding, Style);
427	}
428
429	void BreakableComment::compressWhitespace(
430	unsigned LineIndex, unsigned TailOffset, Split Split,
431	WhitespaceManager &Whitespaces) const {
432	StringRef Text = Content [LineIndex].substr(Start: TailOffset);
433	// Text is relative to the content line, but Whitespaces operates relative to
434	// the start of the corresponding token, so compute the start of the Split
435	// that needs to be compressed into a single space relative to the start of
436	// its token.
437	unsigned BreakOffsetInToken =
438	Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
439	unsigned CharsToRemove = Split.second;
440	Whitespaces.replaceWhitespaceInToken(
441	Tok: tokenAt(LineIndex), Offset: BreakOffsetInToken, ReplaceChars: CharsToRemove, PreviousPostfix: "", CurrentPrefix: "",
442	/InPPDirective=/false, /Newlines=/`0`, /Spaces=/`1`);
443	}
444
445	const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
446	return Tokens [LineIndex] ? *Tokens [LineIndex] : Tok;
447	}
448
449	static bool mayReflowContent(StringRef Content) {
450	Content = Content.trim(Chars: Blanks);
451	// Lines starting with '@' or '\' commonly have special meaning.
452	// Lines starting with '-', '-#', '+' or '' are bulleted/numbered lists.*
453	bool hasSpecialMeaningPrefix = false;
454	for (StringRef Prefix :
455	{"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {
456	if (Content.starts_with(Prefix)) {
457	hasSpecialMeaningPrefix = true;
458	break;
459	}
460	}
461
462	// Numbered lists may also start with a number followed by '.'
463	// To avoid issues if a line starts with a number which is actually the end
464	// of a previous line, we only consider numbers with up to 2 digits.
465	static const auto kNumberedListRegexp = llvm::Regex ("^[1-9][0-9]?\\. ");
466	hasSpecialMeaningPrefix =
467	hasSpecialMeaningPrefix \|\| kNumberedListRegexp.match(String: Content);
468
469	// Simple heuristic for what to reflow: content should contain at least two
470	// characters and either the first or second character must be
471	// non-punctuation.
472	return Content.size() >= `2` && !hasSpecialMeaningPrefix &&
473	!Content.ends_with(Suffix: "\\") &&
474	// Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
475	// true, then the first code point must be 1 byte long.
476	(!isPunctuation(c: Content [`0`]) \|\| !isPunctuation(c: Content [`1`]));
477	}
478
479	BreakableBlockComment::BreakableBlockComment(
480	const FormatToken &Token, unsigned StartColumn,
481	unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
482	encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
483	: BreakableComment (Token, StartColumn, InPPDirective, Encoding, Style),
484	DelimitersOnNewline(false),
485	UnbreakableTailLength(Token.UnbreakableTailLength) {
486	assert(Tok.is(TT_BlockComment) &&
487	"block comment section must start with a block comment");
488
489	StringRef TokenText(Tok.TokenText);
490	assert(TokenText.starts_with("/") && TokenText.ends_with("/"));
491	TokenText.substr(Start: `2`, N: TokenText.size() - `4`)
492	.split(A&: Lines, Separator: UseCRLF ? "\r\n" : "\n");
493
494	int IndentDelta = StartColumn - OriginalStartColumn;
495	Content.resize(N: Lines.size());
496	Content [`0`] = Lines [`0`];
497	ContentColumn.resize(N: Lines.size());
498	// Account for the initial '/'.*
499	ContentColumn [`0`] = StartColumn + `2`;
500	Tokens.resize(N: Lines.size());
501	for (size_t i = `1`; i < Lines.size(); ++i)
502	adjustWhitespace(LineIndex: i, IndentDelta);
503
504	// Align decorations with the column of the star on the first line,
505	// that is one column after the start "/".*
506	DecorationColumn = StartColumn + `1`;
507
508	// Account for comment decoration patterns like this:
509	//
510	// /*
511	// * blah blah blah*
512	// /*
513	if (Lines.size() >= `2` && Content [`1`].starts_with(Prefix: "**") &&
514	static_cast<unsigned>(ContentColumn [`1`]) == StartColumn) {
515	DecorationColumn = StartColumn;
516	}
517
518	Decoration = "* ";
519	if (Lines.size() == `1` && !FirstInLine) {
520	// Comments for which FirstInLine is false can start on arbitrary column,
521	// and available horizontal space can be too small to align consecutive
522	// lines with the first one.
523	// FIXME: We could, probably, align them to current indentation level, but
524	// now we just wrap them without stars.
525	Decoration = "";
526	}
527	for (size_t i = `1`, e = Content.size(); i < e && !Decoration.empty(); ++i) {
528	const StringRef &Text = Content [i];
529	if (i + `1` == e) {
530	// If the last line is empty, the closing "/" will have a star.*
531	if (Text.empty())
532	break;
533	} else if (!Text.empty() && Decoration.starts_with(Prefix: Text)) {
534	continue;
535	}
536	while (!Text.starts_with(Prefix: Decoration))
537	Decoration = Decoration.drop_back(N: `1`);
538	}
539
540	LastLineNeedsDecoration = true;
541	IndentAtLineBreak = ContentColumn [`0`] + `1`;
542	for (size_t i = `1`, e = Lines.size(); i < e; ++i) {
543	if (Content [i].empty()) {
544	if (i + `1` == e) {
545	// Empty last line means that we already have a star as a part of the
546	// trailing /. We also need to preserve whitespace, so that / is
547	// correctly indented.
548	LastLineNeedsDecoration = false;
549	// Align the star in the last '/' with the stars on the previous lines.*
550	if (e >= `2` && !Decoration.empty())
551	ContentColumn [i] = DecorationColumn;
552	} else if (Decoration.empty()) {
553	// For all other lines, set the start column to 0 if they're empty, so
554	// we do not insert trailing whitespace anywhere.
555	ContentColumn [i] = `0`;
556	}
557	continue;
558	}
559
560	// The first line already excludes the star.
561	// The last line excludes the star if LastLineNeedsDecoration is false.
562	// For all other lines, adjust the line to exclude the star and
563	// (optionally) the first whitespace.
564	unsigned DecorationSize = Decoration.starts_with(Prefix: Content [i])
565	? Content [i].size()
566	: Decoration.size();
567	if (DecorationSize)
568	ContentColumn [i] = DecorationColumn + DecorationSize;
569	Content [i] = Content [i].substr(Start: DecorationSize);
570	if (!Decoration.starts_with(Prefix: Content [i])) {
571	IndentAtLineBreak =
572	std::min<int>(a: IndentAtLineBreak, b: std::max(a: `0`, b: ContentColumn [i]));
573	}
574	}
575	IndentAtLineBreak = std::max<unsigned>(a: IndentAtLineBreak, b: Decoration.size());
576
577	// Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.
578	if (Style.isJavaScript() \|\| Style.isJava()) {
579	if ((Lines [`0`] == "" \|\| Lines [`0`].starts_with(Prefix: " ")) && Lines.size() > `1`) {
580	// This is a multiline jsdoc comment.
581	DelimitersOnNewline = true;
582	} else if (Lines [`0`].starts_with(Prefix: "* ") && Lines.size() == `1`) {
583	// Detect a long single-line comment, like:
584	// /* long long long /
585	// Below, '2' is the width of '/'.*
586	unsigned EndColumn =
587	ContentColumn [`0`] +
588	encoding::columnWidthWithTabs(Text: Lines [`0`], StartColumn: ContentColumn [`0`],
589	TabWidth: Style.TabWidth, Encoding) +
590	`2`;
591	DelimitersOnNewline = EndColumn > Style.ColumnLimit;
592	}
593	}
594
595	LLVM_DEBUG({
596	llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
597	llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
598	for (size_t i = `0`; i < Lines.size(); ++i) {
599	llvm::dbgs() << i << " \|" << Content[i] << "\| "
600	<< "CC=" << ContentColumn[i] << "\| "
601	<< "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
602	}
603	});
604	}
605
606	BreakableToken::Split BreakableBlockComment::getSplit(
607	unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
608	unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
609	// Don't break lines matching the comment pragmas regex.
610	if (!AlwaysReflow \|\| CommentPragmasRegex.match(String: Content [LineIndex]))
611	return Split (StringRef::npos, `0`);
612	return getCommentSplit(Text: Content [LineIndex].substr(Start: TailOffset),
613	ContentStartColumn, ColumnLimit, TabWidth: Style.TabWidth,
614	Encoding, Style, DecorationEndsWithStar: Decoration.ends_with(Suffix: "*"));
615	}
616
617	void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
618	int IndentDelta) {
619	// When in a preprocessor directive, the trailing backslash in a block comment
620	// is not needed, but can serve a purpose of uniformity with necessary escaped
621	// newlines outside the comment. In this case we remove it here before
622	// trimming the trailing whitespace. The backslash will be re-added later when
623	// inserting a line break.
624	size_t EndOfPreviousLine = Lines [LineIndex - `1`].size();
625	if (InPPDirective && Lines [LineIndex - `1`].ends_with(Suffix: "\\"))
626	--EndOfPreviousLine;
627
628	// Calculate the end of the non-whitespace text in the previous line.
629	EndOfPreviousLine =
630	Lines [LineIndex - `1`].find_last_not_of(Chars: Blanks, From: EndOfPreviousLine);
631	if (EndOfPreviousLine == StringRef::npos)
632	EndOfPreviousLine = `0`;
633	else
634	++EndOfPreviousLine;
635	// Calculate the start of the non-whitespace text in the current line.
636	size_t StartOfLine = Lines [LineIndex].find_first_not_of(Chars: Blanks);
637	if (StartOfLine == StringRef::npos)
638	StartOfLine = Lines [LineIndex].size();
639
640	StringRef Whitespace = Lines [LineIndex].substr(Start: `0`, N: StartOfLine);
641	// Adjust Lines to only contain relevant text.
642	size_t PreviousContentOffset =
643	Content [LineIndex - `1`].data() - Lines [LineIndex - `1`].data();
644	Content [LineIndex - `1`] = Lines [LineIndex - `1`].substr(
645	Start: PreviousContentOffset, N: EndOfPreviousLine - PreviousContentOffset);
646	Content [LineIndex] = Lines [LineIndex].substr(Start: StartOfLine);
647
648	// Adjust the start column uniformly across all lines.
649	ContentColumn [LineIndex] =
650	encoding::columnWidthWithTabs(Text: Whitespace, StartColumn: `0`, TabWidth: Style.TabWidth, Encoding) +
651	IndentDelta;
652	}
653
654	unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
655	unsigned Offset,
656	StringRef::size_type Length,
657	unsigned StartColumn) const {
658	return encoding::columnWidthWithTabs(
659	Text: Content [LineIndex].substr(Start: Offset, N: Length), StartColumn, TabWidth: Style.TabWidth,
660	Encoding);
661	}
662
663	unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex,
664	unsigned Offset,
665	unsigned StartColumn) const {
666	unsigned LineLength =
667	UnbreakableTailLength +
668	getRangeLength(LineIndex, Offset, Length: StringRef::npos, StartColumn);
669	if (LineIndex + `1` == Lines.size()) {
670	LineLength += `2`;
671	// We never need a decoration when breaking just the trailing "/" postfix.*
672	bool HasRemainingText = Offset < Content [LineIndex].size();
673	if (!HasRemainingText) {
674	bool HasDecoration = Lines [LineIndex].ltrim().starts_with(Prefix: Decoration);
675	if (HasDecoration)
676	LineLength -= Decoration.size();
677	}
678	}
679	return LineLength;
680	}
681
682	unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
683	bool Break) const {
684	if (Break)
685	return IndentAtLineBreak;
686	return std::max(a: `0`, b: ContentColumn [LineIndex]);
687	}
688
689	const llvm::StringSet<>
690	BreakableBlockComment::ContentIndentingJavadocAnnotations = {
691	"@param", "@return", "@returns", "@throws", "@type", "@template",
692	"@see", "@deprecated", "@define", "@exports", "@mods", "@private",
693	};
694
695	unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const {
696	if (!Style.isJava() && !Style.isJavaScript())
697	return `0`;
698	// The content at LineIndex 0 of a comment like:
699	// /* line 0 /
700	// is " line 0", so we need to skip over the decoration in that case.*
701	StringRef ContentWithNoDecoration = Content [LineIndex];
702	if (LineIndex == `0` && ContentWithNoDecoration.starts_with(Prefix: "*"))
703	ContentWithNoDecoration = ContentWithNoDecoration.substr(Start: `1`).ltrim(Chars: Blanks);
704	StringRef FirstWord = ContentWithNoDecoration.substr(
705	Start: `0`, N: ContentWithNoDecoration.find_first_of(Chars: Blanks));
706	if (ContentIndentingJavadocAnnotations.contains(key: FirstWord))
707	return Style.ContinuationIndentWidth;
708	return `0`;
709	}
710
711	void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
712	Split Split, unsigned ContentIndent,
713	WhitespaceManager &Whitespaces) const {
714	StringRef Text = Content [LineIndex].substr(Start: TailOffset);
715	StringRef Prefix = Decoration;
716	// We need this to account for the case when we have a decoration " " for all*
717	// the lines except for the last one, where the star in "/" acts as a*
718	// decoration.
719	unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
720	if (LineIndex + `1` == Lines.size() &&
721	Text.size() == Split.first + Split.second) {
722	// For the last line we need to break before "/", but not to add "* ".*
723	Prefix = "";
724	if (LocalIndentAtLineBreak >= `2`)
725	LocalIndentAtLineBreak -= `2`;
726	}
727	// The split offset is from the beginning of the line. Convert it to an offset
728	// from the beginning of the token text.
729	unsigned BreakOffsetInToken =
730	Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
731	unsigned CharsToRemove = Split.second;
732	assert(LocalIndentAtLineBreak >= Prefix.size());
733	std::string PrefixWithTrailingIndent = std::string (Prefix);
734	PrefixWithTrailingIndent.append(n: ContentIndent, c: `' '`);
735	Whitespaces.replaceWhitespaceInToken(
736	Tok: tokenAt(LineIndex), Offset: BreakOffsetInToken, ReplaceChars: CharsToRemove, PreviousPostfix: "",
737	CurrentPrefix: PrefixWithTrailingIndent, InPPDirective, /Newlines=/`1`,
738	/Spaces=/LocalIndentAtLineBreak + ContentIndent -
739	PrefixWithTrailingIndent.size());
740	}
741
742	BreakableToken::Split BreakableBlockComment::getReflowSplit(
743	unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
744	if (!mayReflow(LineIndex, CommentPragmasRegex))
745	return Split (StringRef::npos, `0`);
746
747	// If we're reflowing into a line with content indent, only reflow the next
748	// line if its starting whitespace matches the content indent.
749	size_t Trimmed = Content [LineIndex].find_first_not_of(Chars: Blanks);
750	if (LineIndex) {
751	unsigned PreviousContentIndent = getContentIndent(LineIndex: LineIndex - `1`);
752	if (PreviousContentIndent && Trimmed != StringRef::npos &&
753	Trimmed != PreviousContentIndent) {
754	return Split (StringRef::npos, `0`);
755	}
756	}
757
758	return Split (`0`, Trimmed != StringRef::npos ? Trimmed : `0`);
759	}
760
761	bool BreakableBlockComment::introducesBreakBeforeToken() const {
762	// A break is introduced when we want delimiters on newline.
763	return DelimitersOnNewline &&
764	Lines [`0`].substr(Start: `1`).find_first_not_of(Chars: Blanks) != StringRef::npos;
765	}
766
767	void BreakableBlockComment::reflow(unsigned LineIndex,
768	WhitespaceManager &Whitespaces) const {
769	StringRef TrimmedContent = Content [LineIndex].ltrim(Chars: Blanks);
770	// Here we need to reflow.
771	assert(Tokens[LineIndex - `1`] == Tokens[LineIndex] &&
772	"Reflowing whitespace within a token");
773	// This is the offset of the end of the last line relative to the start of
774	// the token text in the token.
775	unsigned WhitespaceOffsetInToken = Content [LineIndex - `1`].data() +
776	Content [LineIndex - `1`].size() -
777	tokenAt(LineIndex).TokenText.data();
778	unsigned WhitespaceLength = TrimmedContent.data() -
779	tokenAt(LineIndex).TokenText.data() -
780	WhitespaceOffsetInToken;
781	Whitespaces.replaceWhitespaceInToken(
782	Tok: tokenAt(LineIndex), Offset: WhitespaceOffsetInToken,
783	/ReplaceChars=/WhitespaceLength, /PreviousPostfix=/"",
784	/CurrentPrefix=/ReflowPrefix, InPPDirective, /Newlines=/`0`,
785	/Spaces=/`0`);
786	}
787
788	void BreakableBlockComment::adaptStartOfLine(
789	unsigned LineIndex, WhitespaceManager &Whitespaces) const {
790	if (LineIndex == `0`) {
791	if (DelimitersOnNewline) {
792	// Since we're breaking at index 1 below, the break position and the
793	// break length are the same.
794	// Note: this works because getCommentSplit is careful never to split at
795	// the beginning of a line.
796	size_t BreakLength = Lines [`0`].substr(Start: `1`).find_first_not_of(Chars: Blanks);
797	if (BreakLength != StringRef::npos) {
798	insertBreak(LineIndex, TailOffset: `0`, Split: Split (`1`, BreakLength), /ContentIndent=/`0`,
799	Whitespaces);
800	}
801	}
802	return;
803	}
804	// Here no reflow with the previous line will happen.
805	// Fix the decoration of the line at LineIndex.
806	StringRef Prefix = Decoration;
807	if (Content [LineIndex].empty()) {
808	if (LineIndex + `1` == Lines.size()) {
809	if (!LastLineNeedsDecoration) {
810	// If the last line was empty, we don't need a prefix, as the / will*
811	// line up with the decoration (if it exists).
812	Prefix = "";
813	}
814	} else if (!Decoration.empty()) {
815	// For other empty lines, if we do have a decoration, adapt it to not
816	// contain a trailing whitespace.
817	Prefix = Prefix.substr(Start: `0`, N: `1`);
818	}
819	} else if (ContentColumn [LineIndex] == `1`) {
820	// This line starts immediately after the decorating .*
821	Prefix = Prefix.substr(Start: `0`, N: `1`);
822	}
823	// This is the offset of the end of the last line relative to the start of the
824	// token text in the token.
825	unsigned WhitespaceOffsetInToken = Content [LineIndex - `1`].data() +
826	Content [LineIndex - `1`].size() -
827	tokenAt(LineIndex).TokenText.data();
828	unsigned WhitespaceLength = Content [LineIndex].data() -
829	tokenAt(LineIndex).TokenText.data() -
830	WhitespaceOffsetInToken;
831	Whitespaces.replaceWhitespaceInToken(
832	Tok: tokenAt(LineIndex), Offset: WhitespaceOffsetInToken, ReplaceChars: WhitespaceLength, PreviousPostfix: "", CurrentPrefix: Prefix,
833	InPPDirective, /Newlines=/`1`, Spaces: ContentColumn [LineIndex] - Prefix.size());
834	}
835
836	BreakableToken::Split
837	BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const {
838	if (DelimitersOnNewline) {
839	// Replace the trailing whitespace of the last line with a newline.
840	// In case the last line is empty, the ending '/' is already on its own*
841	// line.
842	StringRef Line = Content.back().substr(Start: TailOffset);
843	StringRef TrimmedLine = Line.rtrim(Chars: Blanks);
844	if (!TrimmedLine.empty())
845	return Split (TrimmedLine.size(), Line.size() - TrimmedLine.size());
846	}
847	return Split (StringRef::npos, `0`);
848	}
849
850	bool BreakableBlockComment::mayReflow(
851	unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
852	// Content[LineIndex] may exclude the indent after the '' decoration. In that*
853	// case, we compute the start of the comment pragma manually.
854	StringRef IndentContent = Content [LineIndex];
855	if (Lines [LineIndex].ltrim(Chars: Blanks).starts_with(Prefix: "*"))
856	IndentContent = Lines [LineIndex].ltrim(Chars: Blanks).substr(Start: `1`);
857	return LineIndex > `0` && AlwaysReflow &&
858	!CommentPragmasRegex.match(String: IndentContent) &&
859	mayReflowContent(Content: Content [LineIndex]) && !Tok.Finalized &&
860	!switchesFormatting(Token: tokenAt(LineIndex));
861	}
862
863	BreakableLineCommentSection::BreakableLineCommentSection(
864	const FormatToken &Token, unsigned StartColumn, bool InPPDirective,
865	encoding::Encoding Encoding, const FormatStyle &Style)
866	: BreakableComment (Token, StartColumn, InPPDirective, Encoding, Style) {
867	assert(Tok.is(TT_LineComment) &&
868	"line comment section must start with a line comment");
869	FormatToken LineTok = nullptr*;
870	const int Minimum = Style.SpacesInLineCommentPrefix.Minimum;
871	// How many spaces we changed in the first line of the section, this will be
872	// applied in all following lines
873	int FirstLineSpaceChange = `0`;
874	for (const FormatToken *CurrentTok = &Tok;
875	CurrentTok && CurrentTok->is(TT: TT_LineComment);
876	CurrentTok = CurrentTok->Next) {
877	LastLineTok = LineTok;
878	StringRef TokenText(CurrentTok->TokenText);
879	assert((TokenText.starts_with("//") \|\| TokenText.starts_with("#")) &&
880	"unsupported line comment prefix, '//' and '#' are supported");
881	size_t FirstLineIndex = Lines.size();
882	TokenText.split(A&: Lines, Separator: "\n");
883	Content.resize(N: Lines.size());
884	ContentColumn.resize(N: Lines.size());
885	PrefixSpaceChange.resize(N: Lines.size());
886	Tokens.resize(N: Lines.size());
887	Prefix.resize(N: Lines.size());
888	OriginalPrefix.resize(N: Lines.size());
889	for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
890	Lines [i] = Lines [i].ltrim(Chars: Blanks);
891	StringRef IndentPrefix = getLineCommentIndentPrefix(Comment: Lines [i], Style);
892	OriginalPrefix [i] = IndentPrefix;
893	const int SpacesInPrefix = llvm::count(Range&: IndentPrefix, Element: `' '`);
894
895	// This lambda also considers multibyte character that is not handled in
896	// functions like isPunctuation provided by CharInfo.
897	const auto NoSpaceBeforeFirstCommentChar = [&]() {
898	assert(Lines[i].size() > IndentPrefix.size());
899	const char FirstCommentChar = Lines [i][IndentPrefix.size()];
900	const unsigned FirstCharByteSize =
901	encoding::getCodePointNumBytes(FirstChar: FirstCommentChar, Encoding);
902	if (encoding::columnWidth(
903	Text: Lines [i].substr(Start: IndentPrefix.size(), N: FirstCharByteSize),
904	Encoding) != `1`) {
905	return false;
906	}
907	// In C-like comments, add a space before #. For example this is useful
908	// to preserve the relative indentation when commenting out code with
909	// #includes.
910	//
911	// In languages using # as the comment leader such as proto, don't
912	// add a space to support patterns like:
913	// #########
914	// # section
915	// #########
916	if (FirstCommentChar == `'#'` && !TokenText.starts_with(Prefix: "#"))
917	return false;
918	return FirstCommentChar == `'\\'` \|\| isPunctuation(c: FirstCommentChar) \|\|
919	isHorizontalWhitespace(c: FirstCommentChar);
920	};
921
922	// On the first line of the comment section we calculate how many spaces
923	// are to be added or removed, all lines after that just get only the
924	// change and we will not look at the maximum anymore. Additionally to the
925	// actual first line, we calculate that when the non space Prefix changes,
926	// e.g. from "///" to "//".
927	if (i == `0` \|\| OriginalPrefix [i].rtrim(Chars: Blanks) !=
928	OriginalPrefix [i - `1`].rtrim(Chars: Blanks)) {
929	if (SpacesInPrefix < Minimum && Lines [i].size() > IndentPrefix.size() &&
930	!NoSpaceBeforeFirstCommentChar ()) {
931	FirstLineSpaceChange = Minimum - SpacesInPrefix;
932	} else if (static_cast<unsigned>(SpacesInPrefix) >
933	Style.SpacesInLineCommentPrefix.Maximum) {
934	FirstLineSpaceChange =
935	Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix;
936	} else {
937	FirstLineSpaceChange = `0`;
938	}
939	}
940
941	if (Lines [i].size() != IndentPrefix.size()) {
942	PrefixSpaceChange [i] = FirstLineSpaceChange;
943
944	if (SpacesInPrefix + PrefixSpaceChange [i] < Minimum) {
945	PrefixSpaceChange [i] +=
946	Minimum - (SpacesInPrefix + PrefixSpaceChange [i]);
947	}
948
949	assert(Lines[i].size() > IndentPrefix.size());
950	const auto FirstNonSpace = Lines [i][IndentPrefix.size()];
951	const bool IsFormatComment = LineTok && switchesFormatting(Token: *LineTok);
952	const bool LineRequiresLeadingSpace =
953	!NoSpaceBeforeFirstCommentChar () \|\|
954	(FirstNonSpace == `'}'` && FirstLineSpaceChange != `0`);
955	const bool AllowsSpaceChange =
956	!IsFormatComment &&
957	(SpacesInPrefix != `0` \|\| LineRequiresLeadingSpace);
958
959	if (PrefixSpaceChange [i] > `0` && AllowsSpaceChange) {
960	Prefix [i] = IndentPrefix.str();
961	Prefix [i].append(n: PrefixSpaceChange [i], c: `' '`);
962	} else if (PrefixSpaceChange [i] < `0` && AllowsSpaceChange) {
963	Prefix [i] = IndentPrefix
964	.drop_back(N: std::min<std::size_t>(
965	a: -PrefixSpaceChange [i], b: SpacesInPrefix))
966	.str();
967	} else {
968	Prefix [i] = IndentPrefix.str();
969	}
970	} else {
971	// If the IndentPrefix is the whole line, there is no content and we
972	// drop just all space
973	Prefix [i] = IndentPrefix.drop_back(N: SpacesInPrefix).str();
974	}
975
976	Tokens [i] = LineTok;
977	Content [i] = Lines [i].substr(Start: IndentPrefix.size());
978	ContentColumn [i] =
979	StartColumn + encoding::columnWidthWithTabs(Text: Prefix [i], StartColumn,
980	TabWidth: Style.TabWidth, Encoding);
981
982	// Calculate the end of the non-whitespace text in this line.
983	size_t EndOfLine = Content [i].find_last_not_of(Chars: Blanks);
984	if (EndOfLine == StringRef::npos)
985	EndOfLine = Content [i].size();
986	else
987	++EndOfLine;
988	Content [i] = Content [i].substr(Start: `0`, N: EndOfLine);
989	}
990	LineTok = CurrentTok->Next;
991	if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
992	// A line comment section needs to broken by a line comment that is
993	// preceded by at least two newlines. Note that we put this break here
994	// instead of breaking at a previous stage during parsing, since that
995	// would split the contents of the enum into two unwrapped lines in this
996	// example, which is undesirable:
997	// enum A {
998	// a, // comment about a
999	//
1000	// // comment about b
1001	// b
1002	// };
1003	//
1004	// FIXME: Consider putting separate line comment sections as children to
1005	// the unwrapped line instead.
1006	break;
1007	}
1008	}
1009	}
1010
1011	unsigned
1012	BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,
1013	StringRef::size_type Length,
1014	unsigned StartColumn) const {
1015	return encoding::columnWidthWithTabs(
1016	Text: Content [LineIndex].substr(Start: Offset, N: Length), StartColumn, TabWidth: Style.TabWidth,
1017	Encoding);
1018	}
1019
1020	unsigned
1021	BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
1022	bool /Break/) const {
1023	return ContentColumn [LineIndex];
1024	}
1025
1026	void BreakableLineCommentSection::insertBreak(
1027	unsigned LineIndex, unsigned TailOffset, Split Split,
1028	unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
1029	StringRef Text = Content [LineIndex].substr(Start: TailOffset);
1030	// Compute the offset of the split relative to the beginning of the token
1031	// text.
1032	unsigned BreakOffsetInToken =
1033	Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
1034	unsigned CharsToRemove = Split.second;
1035	Whitespaces.replaceWhitespaceInToken(
1036	Tok: tokenAt(LineIndex), Offset: BreakOffsetInToken, ReplaceChars: CharsToRemove, PreviousPostfix: "",
1037	CurrentPrefix: Prefix [LineIndex], InPPDirective, /Newlines=/`1`,
1038	/Spaces=/ContentColumn [LineIndex] - Prefix [LineIndex].size());
1039	}
1040
1041	BreakableComment::Split BreakableLineCommentSection::getReflowSplit(
1042	unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1043	if (!mayReflow(LineIndex, CommentPragmasRegex))
1044	return Split (StringRef::npos, `0`);
1045
1046	size_t Trimmed = Content [LineIndex].find_first_not_of(Chars: Blanks);
1047
1048	// In a line comment section each line is a separate token; thus, after a
1049	// split we replace all whitespace before the current line comment token
1050	// (which does not need to be included in the split), plus the start of the
1051	// line up to where the content starts.
1052	return Split (`0`, Trimmed != StringRef::npos ? Trimmed : `0`);
1053	}
1054
1055	void BreakableLineCommentSection::reflow(unsigned LineIndex,
1056	WhitespaceManager &Whitespaces) const {
1057	if (LineIndex > `0` && Tokens [LineIndex] != Tokens [LineIndex - `1`]) {
1058	// Reflow happens between tokens. Replace the whitespace between the
1059	// tokens by the empty string.
1060	Whitespaces.replaceWhitespace(
1061	Tok&: Tokens [LineIndex], /Newlines=/`0`, /Spaces=/*`0`,
1062	/StartOfTokenColumn=/StartColumn, /IsAligned=/true,
1063	/InPPDirective=/false);
1064	} else if (LineIndex > `0`) {
1065	// In case we're reflowing after the '\' in:
1066	//
1067	// // line comment \
1068	// // line 2
1069	//
1070	// the reflow happens inside the single comment token (it is a single line
1071	// comment with an unescaped newline).
1072	// Replace the whitespace between the '\' and '//' with the empty string.
1073	//
1074	// Offset points to after the '\' relative to start of the token.
1075	unsigned Offset = Lines [LineIndex - `1`].data() +
1076	Lines [LineIndex - `1`].size() -
1077	tokenAt(LineIndex: LineIndex - `1`).TokenText.data();
1078	// WhitespaceLength is the number of chars between the '\' and the '//' on
1079	// the next line.
1080	unsigned WhitespaceLength =
1081	Lines [LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset;
1082	Whitespaces.replaceWhitespaceInToken(Tok: *Tokens [LineIndex], Offset,
1083	/ReplaceChars=/WhitespaceLength,
1084	/PreviousPostfix=/"",
1085	/CurrentPrefix=/"",
1086	/InPPDirective=/false,
1087	/Newlines=/`0`,
1088	/Spaces=/`0`);
1089	}
1090	// Replace the indent and prefix of the token with the reflow prefix.
1091	unsigned Offset =
1092	Lines [LineIndex].data() - tokenAt(LineIndex).TokenText.data();
1093	unsigned WhitespaceLength =
1094	Content [LineIndex].data() - Lines [LineIndex].data();
1095	Whitespaces.replaceWhitespaceInToken(Tok: *Tokens [LineIndex], Offset,
1096	/ReplaceChars=/WhitespaceLength,
1097	/PreviousPostfix=/"",
1098	/CurrentPrefix=/ReflowPrefix,
1099	/InPPDirective=/false,
1100	/Newlines=/`0`,
1101	/Spaces=/`0`);
1102	}
1103
1104	void BreakableLineCommentSection::adaptStartOfLine(
1105	unsigned LineIndex, WhitespaceManager &Whitespaces) const {
1106	// If this is the first line of a token, we need to inform Whitespace Manager
1107	// about it: either adapt the whitespace range preceding it, or mark it as an
1108	// untouchable token.
1109	// This happens for instance here:
1110	// // line 1 \
1111	// // line 2
1112	if (LineIndex > `0` && Tokens [LineIndex] != Tokens [LineIndex - `1`]) {
1113	// This is the first line for the current token, but no reflow with the
1114	// previous token is necessary. However, we still may need to adjust the
1115	// start column. Note that ContentColumn[LineIndex] is the expected
1116	// content column after a possible update to the prefix, hence the prefix
1117	// length change is included.
1118	unsigned LineColumn =
1119	ContentColumn [LineIndex] -
1120	(Content [LineIndex].data() - Lines [LineIndex].data()) +
1121	(OriginalPrefix [LineIndex].size() - Prefix [LineIndex].size());
1122
1123	// We always want to create a replacement instead of adding an untouchable
1124	// token, even if LineColumn is the same as the original column of the
1125	// token. This is because WhitespaceManager doesn't align trailing
1126	// comments if they are untouchable.
1127	Whitespaces.replaceWhitespace(Tok&: *Tokens [LineIndex],
1128	/Newlines=/`1`,
1129	/Spaces=/LineColumn,
1130	/StartOfTokenColumn=/LineColumn,
1131	/IsAligned=/true,
1132	/InPPDirective=/false);
1133	}
1134	if (OriginalPrefix [LineIndex] != Prefix [LineIndex]) {
1135	// Adjust the prefix if necessary.
1136	const auto SpacesToRemove = -std::min(a: PrefixSpaceChange [LineIndex], b: `0`);
1137	const auto SpacesToAdd = std::max(a: PrefixSpaceChange [LineIndex], b: `0`);
1138	Whitespaces.replaceWhitespaceInToken(
1139	Tok: tokenAt(LineIndex), Offset: OriginalPrefix [LineIndex].size() - SpacesToRemove,
1140	/ReplaceChars=/SpacesToRemove, PreviousPostfix: "", CurrentPrefix: "", /InPPDirective=/false,
1141	/Newlines=/`0`, /Spaces=/SpacesToAdd);
1142	}
1143	}
1144
1145	void BreakableLineCommentSection::updateNextToken(LineState &State) const {
1146	if (LastLineTok)
1147	State.NextToken = LastLineTok->Next;
1148	}
1149
1150	bool BreakableLineCommentSection::mayReflow(
1151	unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1152	// Line comments have the indent as part of the prefix, so we need to
1153	// recompute the start of the line.
1154	StringRef IndentContent = Content [LineIndex];
1155	if (Lines [LineIndex].starts_with(Prefix: "//"))
1156	IndentContent = Lines [LineIndex].substr(Start: `2`);
1157	// FIXME: Decide whether we want to reflow non-regular indents:
1158	// Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
1159	// OriginalPrefix[LineIndex-1]. That means we don't reflow
1160	// // text that protrudes
1161	// // into text with different indent
1162	// We do reflow in that case in block comments.
1163	return LineIndex > `0` && AlwaysReflow &&
1164	!CommentPragmasRegex.match(String: IndentContent) &&
1165	mayReflowContent(Content: Content [LineIndex]) && !Tok.Finalized &&
1166	!switchesFormatting(Token: tokenAt(LineIndex)) &&
1167	OriginalPrefix [LineIndex] == OriginalPrefix [LineIndex - `1`];
1168	}
1169
1170	} // namespace format
1171	} // namespace clang
1172

Browse the source code of llvm_projects/clang/lib/Format/BreakableToken.cpp