BreakableToken.cpp source code [llvm_projects/clang/lib/Format/BreakableToken.cpp]

1	//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// Contains implementation of BreakableToken class and classes derived
11	/// from it.
12	///
13	//===----------------------------------------------------------------------===//
14
15	#include "BreakableToken.h"
16	#include "ContinuationIndenter.h"
17	#include "clang/Basic/CharInfo.h"
18	#include "clang/Format/Format.h"
19	#include "llvm/ADT/STLExtras.h"
20	#include "llvm/Support/Debug.h"
21	#include <algorithm>
22
23	#define DEBUG_TYPE "format-token-breaker"
24
25	namespace clang {
26	namespace format {
27
28	static constexpr StringRef Blanks(" \t\v\f\r");
29
30	static StringRef getLineCommentIndentPrefix(StringRef Comment,
31	const FormatStyle &Style) {
32	static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///",
33	"//!", "//:", "//"};
34	static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##",
35	"//", "#"};
36	ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes);
37	if (Style.isTextProto())
38	KnownPrefixes = KnownTextProtoPrefixes;
39
40	assert(
41	llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept {
42	return Lhs.size() > Rhs.size();
43	}));
44
45	for (StringRef KnownPrefix : KnownPrefixes) {
46	if (Comment.starts_with(Prefix: KnownPrefix)) {
47	const auto PrefixLength =
48	Comment.find_first_not_of(C: `' '`, From: KnownPrefix.size());
49	return Comment.substr(Start: `0`, N: PrefixLength);
50	}
51	}
52	return {};
53	}
54
55	static BreakableToken::Split
56	getCommentSplit(StringRef Text, unsigned ContentStartColumn,
57	unsigned ColumnLimit, unsigned TabWidth,
58	encoding::Encoding Encoding, const FormatStyle &Style,
59	bool DecorationEndsWithStar = false) {
60	LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text
61	<< "\", Column limit: " << ColumnLimit
62	<< ", Content start: " << ContentStartColumn << "\n");
63	if (ColumnLimit <= ContentStartColumn + `1`)
64	return BreakableToken::Split (StringRef::npos, `0`);
65
66	unsigned MaxSplit = ColumnLimit - ContentStartColumn + `1`;
67	unsigned MaxSplitBytes = `0`;
68
69	for (unsigned NumChars = `0`;
70	NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
71	unsigned BytesInChar =
72	encoding::getCodePointNumBytes(FirstChar: Text [MaxSplitBytes], Encoding);
73	NumChars += encoding::columnWidthWithTabs(
74	Text: Text.substr(Start: MaxSplitBytes, N: BytesInChar), StartColumn: ContentStartColumn + NumChars,
75	TabWidth, Encoding);
76	MaxSplitBytes += BytesInChar;
77	}
78
79	// In JavaScript, some @tags can be followed by {, and machinery that parses
80	// these comments will fail to understand the comment if followed by a line
81	// break. So avoid ever breaking before a {.
82	if (Style.isJavaScript()) {
83	StringRef::size_type SpaceOffset =
84	Text.find_first_of(Chars: Blanks, From: MaxSplitBytes);
85	if (SpaceOffset != StringRef::npos && SpaceOffset + `1` < Text.size() &&
86	Text [SpaceOffset + `1`] == `'{'`) {
87	MaxSplitBytes = SpaceOffset + `1`;
88	}
89	}
90
91	StringRef::size_type SpaceOffset = Text.find_last_of(Chars: Blanks, From: MaxSplitBytes);
92
93	static const auto kNumberedListRegexp = llvm::Regex ("^[1-9][0-9]?\\.");
94	// Some spaces are unacceptable to break on, rewind past them.
95	while (SpaceOffset != StringRef::npos) {
96	// If a line-comment ends with `\`, the next line continues the comment,
97	// whether or not it starts with `//`. This is confusing and triggers
98	// -Wcomment.
99	// Avoid introducing multiline comments by not allowing a break right
100	// after '\'.
101	if (Style.isCpp()) {
102	StringRef::size_type LastNonBlank =
103	Text.find_last_not_of(Chars: Blanks, From: SpaceOffset);
104	if (LastNonBlank != StringRef::npos && Text [LastNonBlank] == `'\\'`) {
105	SpaceOffset = Text.find_last_of(Chars: Blanks, From: LastNonBlank);
106	continue;
107	}
108	}
109
110	// Do not split before a number followed by a dot: this would be interpreted
111	// as a numbered list, which would prevent re-flowing in subsequent passes.
112	if (kNumberedListRegexp.match(String: Text.substr(Start: SpaceOffset).ltrim(Chars: Blanks))) {
113	SpaceOffset = Text.find_last_of(Chars: Blanks, From: SpaceOffset);
114	continue;
115	}
116
117	// Avoid ever breaking before a @tag or a { in JavaScript.
118	if (Style.isJavaScript() && SpaceOffset + `1` < Text.size() &&
119	(Text [SpaceOffset + `1`] == `'{'` \|\| Text [SpaceOffset + `1`] == `'@'`)) {
120	SpaceOffset = Text.find_last_of(Chars: Blanks, From: SpaceOffset);
121	continue;
122	}
123
124	break;
125	}
126
127	if (SpaceOffset == StringRef::npos \|\|
128	// Don't break at leading whitespace.
129	Text.find_last_not_of(Chars: Blanks, From: SpaceOffset) == StringRef::npos) {
130	// Make sure that we don't break at leading whitespace that
131	// reaches past MaxSplit.
132	StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Chars: Blanks);
133	if (FirstNonWhitespace == StringRef::npos) {
134	// If the comment is only whitespace, we cannot split.
135	return BreakableToken::Split (StringRef::npos, `0`);
136	}
137	SpaceOffset = Text.find_first_of(
138	Chars: Blanks, From: std::max<unsigned>(a: MaxSplitBytes, b: FirstNonWhitespace));
139	}
140	if (SpaceOffset != StringRef::npos && SpaceOffset != `0`) {
141	// adaptStartOfLine will break after lines starting with /* if the comment*
142	// is broken anywhere. Avoid emitting this break twice here.
143	// Example: in /* longtextcomesherethatbreaks / (with ColumnLimit 20) will
144	// insert a break after /, so this code must not insert the same break.
145	if (SpaceOffset == `1` && Text [SpaceOffset - `1`] == `'*'`)
146	return BreakableToken::Split (StringRef::npos, `0`);
147	StringRef BeforeCut = Text.substr(Start: `0`, N: SpaceOffset).rtrim(Chars: Blanks);
148	StringRef AfterCut = Text.substr(Start: SpaceOffset);
149	if (!DecorationEndsWithStar)
150	AfterCut = AfterCut.ltrim(Chars: Blanks);
151	return BreakableToken::Split (BeforeCut.size(),
152	AfterCut.begin() - BeforeCut.end());
153	}
154	return BreakableToken::Split (StringRef::npos, `0`);
155	}
156
157	static BreakableToken::Split
158	getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
159	unsigned TabWidth, encoding::Encoding Encoding) {
160	// FIXME: Reduce unit test case.
161	if (Text.empty())
162	return BreakableToken::Split (StringRef::npos, `0`);
163	if (ColumnLimit <= UsedColumns)
164	return BreakableToken::Split (StringRef::npos, `0`);
165	unsigned MaxSplit = ColumnLimit - UsedColumns;
166	StringRef::size_type SpaceOffset = `0`;
167	StringRef::size_type SlashOffset = `0`;
168	StringRef::size_type WordStartOffset = `0`;
169	StringRef::size_type SplitPoint = `0`;
170	for (unsigned Chars = `0`;;) {
171	unsigned Advance;
172	if (Text [`0`] == `'\\'`) {
173	Advance = encoding::getEscapeSequenceLength(Text);
174	Chars += Advance;
175	} else {
176	Advance = encoding::getCodePointNumBytes(FirstChar: Text [`0`], Encoding);
177	Chars += encoding::columnWidthWithTabs(
178	Text: Text.substr(Start: `0`, N: Advance), StartColumn: UsedColumns + Chars, TabWidth, Encoding);
179	}
180
181	if (Chars > MaxSplit \|\| Text.size() <= Advance)
182	break;
183
184	if (Blanks.contains(C: Text [`0`]))
185	SpaceOffset = SplitPoint;
186	if (Text [`0`] == `'/'`)
187	SlashOffset = SplitPoint;
188	if (Advance == `1` && !isAlphanumeric(c: Text [`0`]))
189	WordStartOffset = SplitPoint;
190
191	SplitPoint += Advance;
192	Text = Text.substr(Start: Advance);
193	}
194
195	if (SpaceOffset != `0`)
196	return BreakableToken::Split (SpaceOffset + `1`, `0`);
197	if (SlashOffset != `0`)
198	return BreakableToken::Split (SlashOffset + `1`, `0`);
199	if (WordStartOffset != `0`)
200	return BreakableToken::Split (WordStartOffset + `1`, `0`);
201	if (SplitPoint != `0`)
202	return BreakableToken::Split (SplitPoint, `0`);
203	return BreakableToken::Split (StringRef::npos, `0`);
204	}
205
206	bool switchesFormatting(const FormatToken &Token) {
207	assert((Token.is(TT_BlockComment) \|\| Token.is(TT_LineComment)) &&
208	"formatting regions are switched by comment tokens");
209	StringRef Content = Token.TokenText.substr(Start: `2`).ltrim();
210	return Content.starts_with(Prefix: "clang-format on") \|\|
211	Content.starts_with(Prefix: "clang-format off");
212	}
213
214	unsigned
215	BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
216	Split Split) const {
217	// Example: consider the content
218	// lala lala
219	// - RemainingTokenColumns is the original number of columns, 10;
220	// - Split is (4, 2), denoting the two spaces between the two words;
221	//
222	// We compute the number of columns when the split is compressed into a single
223	// space, like:
224	// lala lala
225	//
226	// FIXME: Correctly measure the length of whitespace in Split.second so it
227	// works with tabs.
228	return RemainingTokenColumns + `1` - Split.second;
229	}
230
231	unsigned BreakableStringLiteral::getLineCount() const { return `1`; }
232
233	unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
234	unsigned Offset,
235	StringRef::size_type Length,
236	unsigned StartColumn) const {
237	llvm_unreachable("Getting the length of a part of the string literal "
238	"indicates that the code tries to reflow it.");
239	}
240
241	unsigned
242	BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,
243	unsigned StartColumn) const {
244	return UnbreakableTailLength + Postfix.size() +
245	encoding::columnWidthWithTabs(Text: Line.substr(Start: Offset), StartColumn,
246	TabWidth: Style.TabWidth, Encoding);
247	}
248
249	unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex,
250	bool Break) const {
251	return StartColumn + Prefix.size();
252	}
253
254	BreakableStringLiteral::BreakableStringLiteral(
255	const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
256	StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
257	encoding::Encoding Encoding, const FormatStyle &Style)
258	: BreakableToken (Tok, InPPDirective, Encoding, Style),
259	StartColumn(StartColumn), Prefix (Prefix), Postfix (Postfix),
260	UnbreakableTailLength(UnbreakableTailLength) {
261	assert(Tok.TokenText.starts_with(Prefix) && Tok.TokenText.ends_with(Postfix));
262	Line = Tok.TokenText.substr(
263	Start: Prefix.size(), N: Tok.TokenText.size() - Prefix.size() - Postfix.size());
264	}
265
266	BreakableToken::Split BreakableStringLiteral::getSplit(
267	unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
268	unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
269	return getStringSplit(Text: Line.substr(Start: TailOffset), UsedColumns: ContentStartColumn,
270	ColumnLimit: ColumnLimit - Postfix.size(), TabWidth: Style.TabWidth, Encoding);
271	}
272
273	void BreakableStringLiteral::insertBreak(unsigned LineIndex,
274	unsigned TailOffset, Split Split,
275	unsigned ContentIndent,
276	WhitespaceManager &Whitespaces) const {
277	Whitespaces.replaceWhitespaceInToken(
278	Tok, Offset: Prefix.size() + TailOffset + Split.first, ReplaceChars: Split.second, PreviousPostfix: Postfix,
279	CurrentPrefix: Prefix, InPPDirective, Newlines: `1`, Spaces: StartColumn);
280	}
281
282	BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators(
283	const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,
284	unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
285	encoding::Encoding Encoding, const FormatStyle &Style)
286	: BreakableStringLiteral (
287	Tok, StartColumn, /Prefix=/QuoteStyle == SingleQuotes ? "'"
288	: QuoteStyle == AtDoubleQuotes ? "@\""
289	: "\"",
290	/Postfix=/QuoteStyle == SingleQuotes ? "'" : "\"",
291	UnbreakableTailLength, InPPDirective, Encoding, Style),
292	BracesNeeded(Tok.isNot(Kind: TT_StringInConcatenation)),
293	QuoteStyle(QuoteStyle) {
294	// Find the replacement text for inserting braces and quotes and line breaks.
295	// We don't create an allocated string concatenated from parts here because it
296	// has to outlive the BreakableStringliteral object. The brace replacements
297	// include a quote so that WhitespaceManager can tell it apart from whitespace
298	// replacements between the string and surrounding tokens.
299
300	// The option is not implemented in JavaScript.
301	bool SignOnNewLine =
302	!Style.isJavaScript() &&
303	Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
304
305	if (Style.isVerilog()) {
306	// In Verilog, all strings are quoted by double quotes, joined by commas,
307	// and wrapped in braces. The comma is always before the newline.
308	assert(QuoteStyle == DoubleQuotes);
309	LeftBraceQuote =
310	Style.Cpp11BracedListStyle != FormatStyle::BLS_Block ? "{\"" : "{ \"";
311	RightBraceQuote =
312	Style.Cpp11BracedListStyle != FormatStyle::BLS_Block ? "\"}" : "\" }";
313	Postfix = "\",";
314	Prefix = "\"";
315	} else {
316	// The plus sign may be on either line. And also C# and JavaScript have
317	// several quoting styles.
318	if (QuoteStyle == SingleQuotes) {
319	LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( '" : "('";
320	RightBraceQuote = Style.SpacesInParensOptions.Other ? "' )" : "')";
321	Postfix = SignOnNewLine ? "'" : "' +";
322	Prefix = SignOnNewLine ? "+ '" : "'";
323	} else {
324	if (QuoteStyle == AtDoubleQuotes) {
325	LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( @" : "(@";
326	Prefix = SignOnNewLine ? "+ @\"" : "@\"";
327	} else {
328	LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( \"" : "(\"";
329	Prefix = SignOnNewLine ? "+ \"" : "\"";
330	}
331	RightBraceQuote = Style.SpacesInParensOptions.Other ? "\" )" : "\")";
332	Postfix = SignOnNewLine ? "\"" : "\" +";
333	}
334	}
335
336	// Following lines are indented by the width of the brace and space if any.
337	ContinuationIndent = BracesNeeded ? LeftBraceQuote.size() - `1` : `0`;
338	// The plus sign may need to be unindented depending on the style.
339	// FIXME: Add support for DontAlign.
340	if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus &&
341	Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator) {
342	ContinuationIndent -= `2`;
343	}
344	}
345
346	unsigned BreakableStringLiteralUsingOperators::getRemainingLength(
347	unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {
348	return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : `1`) +
349	encoding::columnWidthWithTabs(Text: Line.substr(Start: Offset), StartColumn,
350	TabWidth: Style.TabWidth, Encoding);
351	}
352
353	unsigned
354	BreakableStringLiteralUsingOperators::getContentStartColumn(unsigned LineIndex,
355	bool Break) const {
356	return std::max(
357	a: `0`,
358	b: static_cast<int>(StartColumn) +
359	(Break ? ContinuationIndent + static_cast<int>(Prefix.size())
360	: (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - `1`
361	: `0`) +
362	(QuoteStyle == AtDoubleQuotes ? `2` : `1`)));
363	}
364
365	void BreakableStringLiteralUsingOperators::insertBreak(
366	unsigned LineIndex, unsigned TailOffset, Split Split,
367	unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
368	Whitespaces.replaceWhitespaceInToken(
369	Tok, /Offset=/(QuoteStyle == AtDoubleQuotes ? `2` : `1`) + TailOffset +
370	Split.first,
371	/ReplaceChars=/Split.second, /PreviousPostfix=/Postfix,
372	/CurrentPrefix=/Prefix, InPPDirective, /NewLines=/Newlines: `1`,
373	/Spaces=/
374	std::max(a: `0`, b: static_cast<int>(StartColumn) + ContinuationIndent));
375	}
376
377	void BreakableStringLiteralUsingOperators::updateAfterBroken(
378	WhitespaceManager &Whitespaces) const {
379	// Add the braces required for breaking the token if they are needed.
380	if (!BracesNeeded)
381	return;
382
383	// To add a brace or parenthesis, we replace the quote (or the at sign) with a
384	// brace and another quote. This is because the rest of the program requires
385	// one replacement for each source range. If we replace the empty strings
386	// around the string, it may conflict with whitespace replacements between the
387	// string and adjacent tokens.
388	Whitespaces.replaceWhitespaceInToken(
389	Tok, /Offset=/`0`, /ReplaceChars=/`1`, /PreviousPostfix=/"",
390	/CurrentPrefix=/LeftBraceQuote, InPPDirective, /NewLines=/Newlines: `0`,
391	/Spaces=/`0`);
392	Whitespaces.replaceWhitespaceInToken(
393	Tok, /Offset=/Tok.TokenText.size() - `1`, /ReplaceChars=/`1`,
394	/PreviousPostfix=/RightBraceQuote,
395	/CurrentPrefix=/"", InPPDirective, /NewLines=/Newlines: `0`, /Spaces=/`0`);
396	}
397
398	BreakableComment::BreakableComment(const FormatToken &Token,
399	unsigned StartColumn, bool InPPDirective,
400	encoding::Encoding Encoding,
401	const FormatStyle &Style)
402	: BreakableToken (Token, InPPDirective, Encoding, Style),
403	StartColumn(StartColumn) {}
404
405	unsigned BreakableComment::getLineCount() const { return Lines.size(); }
406
407	BreakableToken::Split
408	BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
409	unsigned ColumnLimit, unsigned ContentStartColumn,
410	const llvm::Regex &CommentPragmasRegex) const {
411	// Don't break lines matching the comment pragmas regex.
412	if (!AlwaysReflow \|\| CommentPragmasRegex.match(String: Content [LineIndex]))
413	return Split (StringRef::npos, `0`);
414	return getCommentSplit(Text: Content [LineIndex].substr(Start: TailOffset),
415	ContentStartColumn, ColumnLimit, TabWidth: Style.TabWidth,
416	Encoding, Style);
417	}
418
419	void BreakableComment::compressWhitespace(
420	unsigned LineIndex, unsigned TailOffset, Split Split,
421	WhitespaceManager &Whitespaces) const {
422	StringRef Text = Content [LineIndex].substr(Start: TailOffset);
423	// Text is relative to the content line, but Whitespaces operates relative to
424	// the start of the corresponding token, so compute the start of the Split
425	// that needs to be compressed into a single space relative to the start of
426	// its token.
427	unsigned BreakOffsetInToken =
428	Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
429	unsigned CharsToRemove = Split.second;
430	Whitespaces.replaceWhitespaceInToken(
431	Tok: tokenAt(LineIndex), Offset: BreakOffsetInToken, ReplaceChars: CharsToRemove, PreviousPostfix: "", CurrentPrefix: "",
432	/InPPDirective=/false, /Newlines=/`0`, /Spaces=/`1`);
433	}
434
435	const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
436	return Tokens [LineIndex] ? *Tokens [LineIndex] : Tok;
437	}
438
439	static bool mayReflowContent(StringRef Content) {
440	Content = Content.trim(Chars: Blanks);
441	// Lines starting with '@' or '\' commonly have special meaning.
442	// Lines starting with '-', '-#', '+' or '' are bulleted/numbered lists.*
443	bool hasSpecialMeaningPrefix = false;
444	for (StringRef Prefix :
445	{"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {
446	if (Content.starts_with(Prefix)) {
447	hasSpecialMeaningPrefix = true;
448	break;
449	}
450	}
451
452	// Numbered lists may also start with a number followed by '.'
453	// To avoid issues if a line starts with a number which is actually the end
454	// of a previous line, we only consider numbers with up to 2 digits.
455	static const auto kNumberedListRegexp = llvm::Regex ("^[1-9][0-9]?\\. ");
456	hasSpecialMeaningPrefix =
457	hasSpecialMeaningPrefix \|\| kNumberedListRegexp.match(String: Content);
458
459	// Simple heuristic for what to reflow: content should contain at least two
460	// characters and either the first or second character must be
461	// non-punctuation.
462	return Content.size() >= `2` && !hasSpecialMeaningPrefix &&
463	!Content.ends_with(Suffix: "\\") &&
464	// Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
465	// true, then the first code point must be 1 byte long.
466	(!isPunctuation(c: Content [`0`]) \|\| !isPunctuation(c: Content [`1`]));
467	}
468
469	BreakableBlockComment::BreakableBlockComment(
470	const FormatToken &Token, unsigned StartColumn,
471	unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
472	encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
473	: BreakableComment (Token, StartColumn, InPPDirective, Encoding, Style),
474	DelimitersOnNewline(false),
475	UnbreakableTailLength(Token.UnbreakableTailLength) {
476	assert(Tok.is(TT_BlockComment) &&
477	"block comment section must start with a block comment");
478
479	StringRef TokenText(Tok.TokenText);
480	assert(TokenText.starts_with("/") && TokenText.ends_with("/"));
481	TokenText.substr(Start: `2`, N: TokenText.size() - `4`)
482	.split(A&: Lines, Separator: UseCRLF ? "\r\n" : "\n");
483
484	int IndentDelta = StartColumn - OriginalStartColumn;
485	Content.resize(N: Lines.size());
486	Content [`0`] = Lines [`0`];
487	ContentColumn.resize(N: Lines.size());
488	// Account for the initial '/'.*
489	ContentColumn [`0`] = StartColumn + `2`;
490	Tokens.resize(N: Lines.size());
491	for (size_t i = `1`; i < Lines.size(); ++i)
492	adjustWhitespace(LineIndex: i, IndentDelta);
493
494	// Align decorations with the column of the star on the first line,
495	// that is one column after the start "/".*
496	DecorationColumn = StartColumn + `1`;
497
498	// Account for comment decoration patterns like this:
499	//
500	// /*
501	// * blah blah blah*
502	// /*
503	if (Lines.size() >= `2` && Content [`1`].starts_with(Prefix: "**") &&
504	static_cast<unsigned>(ContentColumn [`1`]) == StartColumn) {
505	DecorationColumn = StartColumn;
506	}
507
508	Decoration = "* ";
509	if (Lines.size() == `1` && !FirstInLine) {
510	// Comments for which FirstInLine is false can start on arbitrary column,
511	// and available horizontal space can be too small to align consecutive
512	// lines with the first one.
513	// FIXME: We could, probably, align them to current indentation level, but
514	// now we just wrap them without stars.
515	Decoration = "";
516	}
517	for (size_t i = `1`, e = Content.size(); i < e && !Decoration.empty(); ++i) {
518	const StringRef Text(Content [i]);
519	if (i + `1` == e) {
520	// If the last line is empty, the closing "/" will have a star.*
521	if (Text.empty())
522	break;
523	} else if (!Text.empty() && Decoration.starts_with(Prefix: Text)) {
524	continue;
525	}
526	while (!Text.starts_with(Prefix: Decoration))
527	Decoration = Decoration.drop_back(N: `1`);
528	}
529
530	LastLineNeedsDecoration = true;
531	IndentAtLineBreak = ContentColumn [`0`] + `1`;
532	for (size_t i = `1`, e = Lines.size(); i < e; ++i) {
533	if (Content [i].empty()) {
534	if (i + `1` == e) {
535	// Empty last line means that we already have a star as a part of the
536	// trailing /. We also need to preserve whitespace, so that / is
537	// correctly indented.
538	LastLineNeedsDecoration = false;
539	// Align the star in the last '/' with the stars on the previous lines.*
540	if (e >= `2` && !Decoration.empty())
541	ContentColumn [i] = DecorationColumn;
542	} else if (Decoration.empty()) {
543	// For all other lines, set the start column to 0 if they're empty, so
544	// we do not insert trailing whitespace anywhere.
545	ContentColumn [i] = `0`;
546	}
547	continue;
548	}
549
550	// The first line already excludes the star.
551	// The last line excludes the star if LastLineNeedsDecoration is false.
552	// For all other lines, adjust the line to exclude the star and
553	// (optionally) the first whitespace.
554	unsigned DecorationSize = Decoration.starts_with(Prefix: Content [i])
555	? Content [i].size()
556	: Decoration.size();
557	if (DecorationSize)
558	ContentColumn [i] = DecorationColumn + DecorationSize;
559	Content [i] = Content [i].substr(Start: DecorationSize);
560	if (!Decoration.starts_with(Prefix: Content [i])) {
561	IndentAtLineBreak =
562	std::min<int>(a: IndentAtLineBreak, b: std::max(a: `0`, b: ContentColumn [i]));
563	}
564	}
565	IndentAtLineBreak = std::max<unsigned>(a: IndentAtLineBreak, b: Decoration.size());
566
567	// Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.
568	if (Style.isJavaScript() \|\| Style.isJava()) {
569	if ((Lines [`0`] == "" \|\| Lines [`0`].starts_with(Prefix: " ")) && Lines.size() > `1`) {
570	// This is a multiline jsdoc comment.
571	DelimitersOnNewline = true;
572	} else if (Lines [`0`].starts_with(Prefix: "* ") && Lines.size() == `1`) {
573	// Detect a long single-line comment, like:
574	// /* long long long /
575	// Below, '2' is the width of '/'.*
576	unsigned EndColumn =
577	ContentColumn [`0`] +
578	encoding::columnWidthWithTabs(Text: Lines [`0`], StartColumn: ContentColumn [`0`],
579	TabWidth: Style.TabWidth, Encoding) +
580	`2`;
581	DelimitersOnNewline = EndColumn > Style.ColumnLimit;
582	}
583	}
584
585	LLVM_DEBUG({
586	llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
587	llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
588	for (size_t i = `0`; i < Lines.size(); ++i) {
589	llvm::dbgs() << i << " \|" << Content[i] << "\| "
590	<< "CC=" << ContentColumn[i] << "\| "
591	<< "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
592	}
593	});
594	}
595
596	BreakableToken::Split BreakableBlockComment::getSplit(
597	unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
598	unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
599	// Don't break lines matching the comment pragmas regex.
600	if (!AlwaysReflow \|\| CommentPragmasRegex.match(String: Content [LineIndex]))
601	return Split (StringRef::npos, `0`);
602	return getCommentSplit(Text: Content [LineIndex].substr(Start: TailOffset),
603	ContentStartColumn, ColumnLimit, TabWidth: Style.TabWidth,
604	Encoding, Style, DecorationEndsWithStar: Decoration.ends_with(Suffix: "*"));
605	}
606
607	void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
608	int IndentDelta) {
609	// When in a preprocessor directive, the trailing backslash in a block comment
610	// is not needed, but can serve a purpose of uniformity with necessary escaped
611	// newlines outside the comment. In this case we remove it here before
612	// trimming the trailing whitespace. The backslash will be re-added later when
613	// inserting a line break.
614	size_t EndOfPreviousLine = Lines [LineIndex - `1`].size();
615	if (InPPDirective && Lines [LineIndex - `1`].ends_with(Suffix: "\\"))
616	--EndOfPreviousLine;
617
618	// Calculate the end of the non-whitespace text in the previous line.
619	EndOfPreviousLine =
620	Lines [LineIndex - `1`].find_last_not_of(Chars: Blanks, From: EndOfPreviousLine);
621	if (EndOfPreviousLine == StringRef::npos)
622	EndOfPreviousLine = `0`;
623	else
624	++EndOfPreviousLine;
625	// Calculate the start of the non-whitespace text in the current line.
626	size_t StartOfLine = Lines [LineIndex].find_first_not_of(Chars: Blanks);
627	if (StartOfLine == StringRef::npos)
628	StartOfLine = Lines [LineIndex].size();
629
630	StringRef Whitespace = Lines [LineIndex].substr(Start: `0`, N: StartOfLine);
631	// Adjust Lines to only contain relevant text.
632	size_t PreviousContentOffset =
633	Content [LineIndex - `1`].data() - Lines [LineIndex - `1`].data();
634	Content [LineIndex - `1`] = Lines [LineIndex - `1`].substr(
635	Start: PreviousContentOffset, N: EndOfPreviousLine - PreviousContentOffset);
636	Content [LineIndex] = Lines [LineIndex].substr(Start: StartOfLine);
637
638	// Adjust the start column uniformly across all lines.
639	ContentColumn [LineIndex] =
640	encoding::columnWidthWithTabs(Text: Whitespace, StartColumn: `0`, TabWidth: Style.TabWidth, Encoding) +
641	IndentDelta;
642	}
643
644	unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
645	unsigned Offset,
646	StringRef::size_type Length,
647	unsigned StartColumn) const {
648	return encoding::columnWidthWithTabs(
649	Text: Content [LineIndex].substr(Start: Offset, N: Length), StartColumn, TabWidth: Style.TabWidth,
650	Encoding);
651	}
652
653	unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex,
654	unsigned Offset,
655	unsigned StartColumn) const {
656	unsigned LineLength =
657	UnbreakableTailLength +
658	getRangeLength(LineIndex, Offset, Length: StringRef::npos, StartColumn);
659	if (LineIndex + `1` == Lines.size()) {
660	LineLength += `2`;
661	// We never need a decoration when breaking just the trailing "/" postfix.*
662	bool HasRemainingText = Offset < Content [LineIndex].size();
663	if (!HasRemainingText) {
664	bool HasDecoration = Lines [LineIndex].ltrim().starts_with(Prefix: Decoration);
665	if (HasDecoration)
666	LineLength -= Decoration.size();
667	}
668	}
669	return LineLength;
670	}
671
672	unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
673	bool Break) const {
674	if (Break)
675	return IndentAtLineBreak;
676	return std::max(a: `0`, b: ContentColumn [LineIndex]);
677	}
678
679	const llvm::StringSet<>
680	BreakableBlockComment::ContentIndentingJavadocAnnotations = {
681	"@param", "@return", "@returns", "@throws", "@type", "@template",
682	"@see", "@deprecated", "@define", "@exports", "@mods", "@private",
683	};
684
685	unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const {
686	if (!Style.isJava() && !Style.isJavaScript())
687	return `0`;
688	// The content at LineIndex 0 of a comment like:
689	// /* line 0 /
690	// is " line 0", so we need to skip over the decoration in that case.*
691	StringRef ContentWithNoDecoration = Content [LineIndex];
692	if (LineIndex == `0` && ContentWithNoDecoration.starts_with(Prefix: "*"))
693	ContentWithNoDecoration = ContentWithNoDecoration.substr(Start: `1`).ltrim(Chars: Blanks);
694	StringRef FirstWord = ContentWithNoDecoration.substr(
695	Start: `0`, N: ContentWithNoDecoration.find_first_of(Chars: Blanks));
696	if (ContentIndentingJavadocAnnotations.contains(key: FirstWord))
697	return Style.ContinuationIndentWidth;
698	return `0`;
699	}
700
701	void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
702	Split Split, unsigned ContentIndent,
703	WhitespaceManager &Whitespaces) const {
704	StringRef Text = Content [LineIndex].substr(Start: TailOffset);
705	StringRef Prefix = Decoration;
706	// We need this to account for the case when we have a decoration " " for all*
707	// the lines except for the last one, where the star in "/" acts as a*
708	// decoration.
709	unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
710	if (LineIndex + `1` == Lines.size() &&
711	Text.size() == Split.first + Split.second) {
712	// For the last line we need to break before "/", but not to add "* ".*
713	Prefix = "";
714	if (LocalIndentAtLineBreak >= `2`)
715	LocalIndentAtLineBreak -= `2`;
716	}
717	// The split offset is from the beginning of the line. Convert it to an offset
718	// from the beginning of the token text.
719	unsigned BreakOffsetInToken =
720	Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
721	unsigned CharsToRemove = Split.second;
722	assert(LocalIndentAtLineBreak >= Prefix.size());
723	std::string PrefixWithTrailingIndent = std::string (Prefix);
724	PrefixWithTrailingIndent.append(n: ContentIndent, c: `' '`);
725	Whitespaces.replaceWhitespaceInToken(
726	Tok: tokenAt(LineIndex), Offset: BreakOffsetInToken, ReplaceChars: CharsToRemove, PreviousPostfix: "",
727	CurrentPrefix: PrefixWithTrailingIndent, InPPDirective, /Newlines=/`1`,
728	/Spaces=/LocalIndentAtLineBreak + ContentIndent -
729	PrefixWithTrailingIndent.size());
730	}
731
732	BreakableToken::Split BreakableBlockComment::getReflowSplit(
733	unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
734	if (!mayReflow(LineIndex, CommentPragmasRegex))
735	return Split (StringRef::npos, `0`);
736
737	// If we're reflowing into a line with content indent, only reflow the next
738	// line if its starting whitespace matches the content indent.
739	size_t Trimmed = Content [LineIndex].find_first_not_of(Chars: Blanks);
740	if (LineIndex) {
741	unsigned PreviousContentIndent = getContentIndent(LineIndex: LineIndex - `1`);
742	if (PreviousContentIndent && Trimmed != StringRef::npos &&
743	Trimmed != PreviousContentIndent) {
744	return Split (StringRef::npos, `0`);
745	}
746	}
747
748	return Split (`0`, Trimmed != StringRef::npos ? Trimmed : `0`);
749	}
750
751	bool BreakableBlockComment::introducesBreakBeforeToken() const {
752	// A break is introduced when we want delimiters on newline.
753	return DelimitersOnNewline &&
754	Lines [`0`].substr(Start: `1`).find_first_not_of(Chars: Blanks) != StringRef::npos;
755	}
756
757	void BreakableBlockComment::reflow(unsigned LineIndex,
758	WhitespaceManager &Whitespaces) const {
759	StringRef TrimmedContent = Content [LineIndex].ltrim(Chars: Blanks);
760	// Here we need to reflow.
761	assert(Tokens[LineIndex - `1`] == Tokens[LineIndex] &&
762	"Reflowing whitespace within a token");
763	// This is the offset of the end of the last line relative to the start of
764	// the token text in the token.
765	unsigned WhitespaceOffsetInToken = Content [LineIndex - `1`].data() +
766	Content [LineIndex - `1`].size() -
767	tokenAt(LineIndex).TokenText.data();
768	unsigned WhitespaceLength = TrimmedContent.data() -
769	tokenAt(LineIndex).TokenText.data() -
770	WhitespaceOffsetInToken;
771	Whitespaces.replaceWhitespaceInToken(
772	Tok: tokenAt(LineIndex), Offset: WhitespaceOffsetInToken,
773	/ReplaceChars=/WhitespaceLength, /PreviousPostfix=/"",
774	/CurrentPrefix=/ReflowPrefix, InPPDirective, /Newlines=/`0`,
775	/Spaces=/`0`);
776	}
777
778	void BreakableBlockComment::adaptStartOfLine(
779	unsigned LineIndex, WhitespaceManager &Whitespaces) const {
780	if (LineIndex == `0`) {
781	if (DelimitersOnNewline) {
782	// Since we're breaking at index 1 below, the break position and the
783	// break length are the same.
784	// Note: this works because getCommentSplit is careful never to split at
785	// the beginning of a line.
786	size_t BreakLength = Lines [`0`].substr(Start: `1`).find_first_not_of(Chars: Blanks);
787	if (BreakLength != StringRef::npos) {
788	insertBreak(LineIndex, TailOffset: `0`, Split: Split (`1`, BreakLength), /ContentIndent=/`0`,
789	Whitespaces);
790	}
791	}
792	return;
793	}
794	// Here no reflow with the previous line will happen.
795	// Fix the decoration of the line at LineIndex.
796	StringRef Prefix = Decoration;
797	if (Content [LineIndex].empty()) {
798	if (LineIndex + `1` == Lines.size()) {
799	if (!LastLineNeedsDecoration) {
800	// If the last line was empty, we don't need a prefix, as the / will*
801	// line up with the decoration (if it exists).
802	Prefix = "";
803	}
804	} else if (!Decoration.empty()) {
805	// For other empty lines, if we do have a decoration, adapt it to not
806	// contain a trailing whitespace.
807	Prefix = Prefix.substr(Start: `0`, N: `1`);
808	}
809	} else if (ContentColumn [LineIndex] == `1`) {
810	// This line starts immediately after the decorating .*
811	Prefix = Prefix.substr(Start: `0`, N: `1`);
812	}
813	// This is the offset of the end of the last line relative to the start of the
814	// token text in the token.
815	unsigned WhitespaceOffsetInToken = Content [LineIndex - `1`].data() +
816	Content [LineIndex - `1`].size() -
817	tokenAt(LineIndex).TokenText.data();
818	unsigned WhitespaceLength = Content [LineIndex].data() -
819	tokenAt(LineIndex).TokenText.data() -
820	WhitespaceOffsetInToken;
821	Whitespaces.replaceWhitespaceInToken(
822	Tok: tokenAt(LineIndex), Offset: WhitespaceOffsetInToken, ReplaceChars: WhitespaceLength, PreviousPostfix: "", CurrentPrefix: Prefix,
823	InPPDirective, /Newlines=/`1`, Spaces: ContentColumn [LineIndex] - Prefix.size());
824	}
825
826	BreakableToken::Split
827	BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const {
828	if (DelimitersOnNewline) {
829	// Replace the trailing whitespace of the last line with a newline.
830	// In case the last line is empty, the ending '/' is already on its own*
831	// line.
832	StringRef Line = Content.back().substr(Start: TailOffset);
833	StringRef TrimmedLine = Line.rtrim(Chars: Blanks);
834	if (!TrimmedLine.empty())
835	return Split (TrimmedLine.size(), Line.size() - TrimmedLine.size());
836	}
837	return Split (StringRef::npos, `0`);
838	}
839
840	bool BreakableBlockComment::mayReflow(
841	unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
842	// Content[LineIndex] may exclude the indent after the '' decoration. In that*
843	// case, we compute the start of the comment pragma manually.
844	StringRef IndentContent = Content [LineIndex];
845	if (Lines [LineIndex].ltrim(Chars: Blanks).starts_with(Prefix: "*"))
846	IndentContent = Lines [LineIndex].ltrim(Chars: Blanks).substr(Start: `1`);
847	return LineIndex > `0` && AlwaysReflow &&
848	!CommentPragmasRegex.match(String: IndentContent) &&
849	mayReflowContent(Content: Content [LineIndex]) && !Tok.Finalized &&
850	!switchesFormatting(Token: tokenAt(LineIndex));
851	}
852
853	BreakableLineCommentSection::BreakableLineCommentSection(
854	const FormatToken &Token, unsigned StartColumn, bool InPPDirective,
855	encoding::Encoding Encoding, const FormatStyle &Style)
856	: BreakableComment (Token, StartColumn, InPPDirective, Encoding, Style) {
857	assert(Tok.is(TT_LineComment) &&
858	"line comment section must start with a line comment");
859	FormatToken LineTok = nullptr*;
860	const int Minimum = Style.SpacesInLineCommentPrefix.Minimum;
861	// How many spaces we changed in the first line of the section, this will be
862	// applied in all following lines
863	int FirstLineSpaceChange = `0`;
864	for (const FormatToken *CurrentTok = &Tok;
865	CurrentTok && CurrentTok->is(TT: TT_LineComment);
866	CurrentTok = CurrentTok->Next) {
867	LastLineTok = LineTok;
868	StringRef TokenText(CurrentTok->TokenText);
869	assert((TokenText.starts_with("//") \|\| TokenText.starts_with("#")) &&
870	"unsupported line comment prefix, '//' and '#' are supported");
871	size_t FirstLineIndex = Lines.size();
872	TokenText.split(A&: Lines, Separator: "\n");
873	Content.resize(N: Lines.size());
874	ContentColumn.resize(N: Lines.size());
875	PrefixSpaceChange.resize(N: Lines.size());
876	Tokens.resize(N: Lines.size());
877	Prefix.resize(N: Lines.size());
878	OriginalPrefix.resize(N: Lines.size());
879	for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
880	Lines [i] = Lines [i].ltrim(Chars: Blanks);
881	StringRef IndentPrefix = getLineCommentIndentPrefix(Comment: Lines [i], Style);
882	OriginalPrefix [i] = IndentPrefix;
883	const int SpacesInPrefix = llvm::count(Range&: IndentPrefix, Element: `' '`);
884
885	// This lambda also considers multibyte character that is not handled in
886	// functions like isPunctuation provided by CharInfo.
887	const auto NoSpaceBeforeFirstCommentChar = [&]() {
888	assert(Lines[i].size() > IndentPrefix.size());
889	const char FirstCommentChar = Lines [i][IndentPrefix.size()];
890	const unsigned FirstCharByteSize =
891	encoding::getCodePointNumBytes(FirstChar: FirstCommentChar, Encoding);
892	if (encoding::columnWidth(
893	Text: Lines [i].substr(Start: IndentPrefix.size(), N: FirstCharByteSize),
894	Encoding) != `1`) {
895	return false;
896	}
897	// In C-like comments, add a space before #. For example this is useful
898	// to preserve the relative indentation when commenting out code with
899	// #includes.
900	//
901	// In languages using # as the comment leader such as proto, don't
902	// add a space to support patterns like:
903	// #########
904	// # section
905	// #########
906	if (FirstCommentChar == `'#'` && !TokenText.starts_with(Prefix: "#"))
907	return false;
908	return FirstCommentChar == `'\\'` \|\| isPunctuation(c: FirstCommentChar) \|\|
909	isHorizontalWhitespace(c: FirstCommentChar);
910	};
911
912	// On the first line of the comment section we calculate how many spaces
913	// are to be added or removed, all lines after that just get only the
914	// change and we will not look at the maximum anymore. Additionally to the
915	// actual first line, we calculate that when the non space Prefix changes,
916	// e.g. from "///" to "//".
917	if (i == `0` \|\| OriginalPrefix [i].rtrim(Chars: Blanks) !=
918	OriginalPrefix [i - `1`].rtrim(Chars: Blanks)) {
919	if (SpacesInPrefix < Minimum && Lines [i].size() > IndentPrefix.size() &&
920	!NoSpaceBeforeFirstCommentChar ()) {
921	FirstLineSpaceChange = Minimum - SpacesInPrefix;
922	} else if (static_cast<unsigned>(SpacesInPrefix) >
923	Style.SpacesInLineCommentPrefix.Maximum) {
924	FirstLineSpaceChange =
925	Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix;
926	} else {
927	FirstLineSpaceChange = `0`;
928	}
929	}
930
931	if (Lines [i].size() != IndentPrefix.size()) {
932	assert(Lines[i].size() > IndentPrefix.size());
933
934	PrefixSpaceChange [i] = SpacesInPrefix + FirstLineSpaceChange < Minimum
935	? Minimum - SpacesInPrefix
936	: FirstLineSpaceChange;
937
938	const auto FirstNonSpace = Lines [i][IndentPrefix.size()];
939	const bool IsFormatComment = LineTok && switchesFormatting(Token: *LineTok);
940	const bool LineRequiresLeadingSpace =
941	!NoSpaceBeforeFirstCommentChar () \|\|
942	(FirstNonSpace == `'}'` && FirstLineSpaceChange != `0`);
943	const bool AllowsSpaceChange =
944	!IsFormatComment &&
945	(SpacesInPrefix != `0` \|\| LineRequiresLeadingSpace);
946
947	if (PrefixSpaceChange [i] > `0` && AllowsSpaceChange) {
948	Prefix [i] = IndentPrefix.str();
949	Prefix [i].append(n: PrefixSpaceChange [i], c: `' '`);
950	} else if (PrefixSpaceChange [i] < `0` && AllowsSpaceChange) {
951	Prefix [i] = IndentPrefix
952	.drop_back(N: std::min<std::size_t>(
953	a: -PrefixSpaceChange [i], b: SpacesInPrefix))
954	.str();
955	} else {
956	Prefix [i] = IndentPrefix.str();
957	}
958	} else {
959	// If the IndentPrefix is the whole line, there is no content and we
960	// drop just all space
961	Prefix [i] = IndentPrefix.drop_back(N: SpacesInPrefix).str();
962	}
963
964	Tokens [i] = LineTok;
965	Content [i] = Lines [i].substr(Start: IndentPrefix.size());
966	ContentColumn [i] =
967	StartColumn + encoding::columnWidthWithTabs(Text: Prefix [i], StartColumn,
968	TabWidth: Style.TabWidth, Encoding);
969
970	// Calculate the end of the non-whitespace text in this line.
971	size_t EndOfLine = Content [i].find_last_not_of(Chars: Blanks);
972	if (EndOfLine == StringRef::npos)
973	EndOfLine = Content [i].size();
974	else
975	++EndOfLine;
976	Content [i] = Content [i].substr(Start: `0`, N: EndOfLine);
977	}
978	LineTok = CurrentTok->Next;
979	if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
980	// A line comment section needs to broken by a line comment that is
981	// preceded by at least two newlines. Note that we put this break here
982	// instead of breaking at a previous stage during parsing, since that
983	// would split the contents of the enum into two unwrapped lines in this
984	// example, which is undesirable:
985	// enum A {
986	// a, // comment about a
987	//
988	// // comment about b
989	// b
990	// };
991	//
992	// FIXME: Consider putting separate line comment sections as children to
993	// the unwrapped line instead.
994	break;
995	}
996	}
997	}
998
999	unsigned
1000	BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,
1001	StringRef::size_type Length,
1002	unsigned StartColumn) const {
1003	return encoding::columnWidthWithTabs(
1004	Text: Content [LineIndex].substr(Start: Offset, N: Length), StartColumn, TabWidth: Style.TabWidth,
1005	Encoding);
1006	}
1007
1008	unsigned
1009	BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
1010	bool /Break/) const {
1011	return ContentColumn [LineIndex];
1012	}
1013
1014	void BreakableLineCommentSection::insertBreak(
1015	unsigned LineIndex, unsigned TailOffset, Split Split,
1016	unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
1017	StringRef Text = Content [LineIndex].substr(Start: TailOffset);
1018	// Compute the offset of the split relative to the beginning of the token
1019	// text.
1020	unsigned BreakOffsetInToken =
1021	Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
1022	unsigned CharsToRemove = Split.second;
1023	Whitespaces.replaceWhitespaceInToken(
1024	Tok: tokenAt(LineIndex), Offset: BreakOffsetInToken, ReplaceChars: CharsToRemove, PreviousPostfix: "",
1025	CurrentPrefix: Prefix [LineIndex], InPPDirective, /Newlines=/`1`,
1026	/Spaces=/ContentColumn [LineIndex] - Prefix [LineIndex].size());
1027	}
1028
1029	BreakableComment::Split BreakableLineCommentSection::getReflowSplit(
1030	unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1031	if (!mayReflow(LineIndex, CommentPragmasRegex))
1032	return Split (StringRef::npos, `0`);
1033
1034	size_t Trimmed = Content [LineIndex].find_first_not_of(Chars: Blanks);
1035
1036	// In a line comment section each line is a separate token; thus, after a
1037	// split we replace all whitespace before the current line comment token
1038	// (which does not need to be included in the split), plus the start of the
1039	// line up to where the content starts.
1040	return Split (`0`, Trimmed != StringRef::npos ? Trimmed : `0`);
1041	}
1042
1043	void BreakableLineCommentSection::reflow(unsigned LineIndex,
1044	WhitespaceManager &Whitespaces) const {
1045	if (LineIndex > `0`) {
1046	if (Tokens [LineIndex] != Tokens [LineIndex - `1`]) {
1047	// Reflow happens between tokens. Replace the whitespace between the
1048	// tokens by the empty string.
1049	Whitespaces.replaceWhitespace(
1050	Tok&: Tokens [LineIndex], /Newlines=/`0`, /Spaces=/*`0`,
1051	/StartOfTokenColumn=/StartColumn, /IsAligned=/true,
1052	/InPPDirective=/false);
1053	} else {
1054	// In case we're reflowing after the '\' in:
1055	//
1056	// // line comment \
1057	// // line 2
1058	//
1059	// the reflow happens inside the single comment token (it is a single line
1060	// comment with an unescaped newline).
1061	// Replace the whitespace between the '\' and '//' with the empty string.
1062	//
1063	// Offset points to after the '\' relative to start of the token.
1064	unsigned Offset = Lines [LineIndex - `1`].data() +
1065	Lines [LineIndex - `1`].size() -
1066	tokenAt(LineIndex: LineIndex - `1`).TokenText.data();
1067	// WhitespaceLength is the number of chars between the '\' and the '//' on
1068	// the next line.
1069	unsigned WhitespaceLength = Lines [LineIndex].data() -
1070	tokenAt(LineIndex).TokenText.data() - Offset;
1071	Whitespaces.replaceWhitespaceInToken(Tok: *Tokens [LineIndex], Offset,
1072	/ReplaceChars=/WhitespaceLength,
1073	/PreviousPostfix=/"",
1074	/CurrentPrefix=/"",
1075	/InPPDirective=/false,
1076	/Newlines=/`0`,
1077	/Spaces=/`0`);
1078	}
1079	}
1080	// Replace the indent and prefix of the token with the reflow prefix.
1081	unsigned Offset =
1082	Lines [LineIndex].data() - tokenAt(LineIndex).TokenText.data();
1083	unsigned WhitespaceLength =
1084	Content [LineIndex].data() - Lines [LineIndex].data();
1085	Whitespaces.replaceWhitespaceInToken(Tok: *Tokens [LineIndex], Offset,
1086	/ReplaceChars=/WhitespaceLength,
1087	/PreviousPostfix=/"",
1088	/CurrentPrefix=/ReflowPrefix,
1089	/InPPDirective=/false,
1090	/Newlines=/`0`,
1091	/Spaces=/`0`);
1092	}
1093
1094	void BreakableLineCommentSection::adaptStartOfLine(
1095	unsigned LineIndex, WhitespaceManager &Whitespaces) const {
1096	// If this is the first line of a token, we need to inform Whitespace Manager
1097	// about it: either adapt the whitespace range preceding it, or mark it as an
1098	// untouchable token.
1099	// This happens for instance here:
1100	// // line 1 \
1101	// // line 2
1102	if (LineIndex > `0` && Tokens [LineIndex] != Tokens [LineIndex - `1`]) {
1103	// This is the first line for the current token, but no reflow with the
1104	// previous token is necessary. However, we still may need to adjust the
1105	// start column. Note that ContentColumn[LineIndex] is the expected
1106	// content column after a possible update to the prefix, hence the prefix
1107	// length change is included.
1108	unsigned LineColumn =
1109	ContentColumn [LineIndex] -
1110	(Content [LineIndex].data() - Lines [LineIndex].data()) +
1111	(OriginalPrefix [LineIndex].size() - Prefix [LineIndex].size());
1112
1113	// We always want to create a replacement instead of adding an untouchable
1114	// token, even if LineColumn is the same as the original column of the
1115	// token. This is because WhitespaceManager doesn't align trailing
1116	// comments if they are untouchable.
1117	Whitespaces.replaceWhitespace(Tok&: *Tokens [LineIndex],
1118	/Newlines=/`1`,
1119	/Spaces=/LineColumn,
1120	/StartOfTokenColumn=/LineColumn,
1121	/IsAligned=/tokenAt(LineIndex: `0`).NewlinesBefore == `0`,
1122	/InPPDirective=/false);
1123	}
1124	if (OriginalPrefix [LineIndex] != Prefix [LineIndex]) {
1125	// Adjust the prefix if necessary.
1126	const auto SpacesToRemove = -std::min(a: PrefixSpaceChange [LineIndex], b: `0`);
1127	const auto SpacesToAdd = std::max(a: PrefixSpaceChange [LineIndex], b: `0`);
1128	Whitespaces.replaceWhitespaceInToken(
1129	Tok: tokenAt(LineIndex), Offset: OriginalPrefix [LineIndex].size() - SpacesToRemove,
1130	/ReplaceChars=/SpacesToRemove, PreviousPostfix: "", CurrentPrefix: "", /InPPDirective=/false,
1131	/Newlines=/`0`, /Spaces=/SpacesToAdd);
1132	}
1133	}
1134
1135	void BreakableLineCommentSection::updateNextToken(LineState &State) const {
1136	if (LastLineTok)
1137	State.NextToken = LastLineTok->Next;
1138	}
1139
1140	bool BreakableLineCommentSection::mayReflow(
1141	unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1142	// Line comments have the indent as part of the prefix, so we need to
1143	// recompute the start of the line.
1144	StringRef IndentContent = Content [LineIndex];
1145	if (Lines [LineIndex].starts_with(Prefix: "//"))
1146	IndentContent = Lines [LineIndex].substr(Start: `2`);
1147	// FIXME: Decide whether we want to reflow non-regular indents:
1148	// Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
1149	// OriginalPrefix[LineIndex-1]. That means we don't reflow
1150	// // text that protrudes
1151	// // into text with different indent
1152	// We do reflow in that case in block comments.
1153	return LineIndex > `0` && AlwaysReflow &&
1154	!CommentPragmasRegex.match(String: IndentContent) &&
1155	mayReflowContent(Content: Content [LineIndex]) && !Tok.Finalized &&
1156	!switchesFormatting(Token: tokenAt(LineIndex)) &&
1157	OriginalPrefix [LineIndex] == OriginalPrefix [LineIndex - `1`];
1158	}
1159
1160	} // namespace format
1161	} // namespace clang
1162

Browse the source code of llvm_projects/clang/lib/Format/BreakableToken.cpp