1//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Contains implementation of BreakableToken class and classes derived
11/// from it.
12///
13//===----------------------------------------------------------------------===//
14
15#include "BreakableToken.h"
16#include "ContinuationIndenter.h"
17#include "clang/Basic/CharInfo.h"
18#include "clang/Format/Format.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/Support/Debug.h"
21#include <algorithm>
22
23#define DEBUG_TYPE "format-token-breaker"
24
25namespace clang {
26namespace format {
27
28static constexpr StringRef Blanks(" \t\v\f\r");
29
30static StringRef getLineCommentIndentPrefix(StringRef Comment,
31 const FormatStyle &Style) {
32 static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///",
33 "//!", "//:", "//"};
34 static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##",
35 "//", "#"};
36 ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes);
37 if (Style.isTextProto())
38 KnownPrefixes = KnownTextProtoPrefixes;
39
40 assert(
41 llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept {
42 return Lhs.size() > Rhs.size();
43 }));
44
45 for (StringRef KnownPrefix : KnownPrefixes) {
46 if (Comment.starts_with(Prefix: KnownPrefix)) {
47 const auto PrefixLength =
48 Comment.find_first_not_of(C: ' ', From: KnownPrefix.size());
49 return Comment.substr(Start: 0, N: PrefixLength);
50 }
51 }
52 return {};
53}
54
55static BreakableToken::Split
56getCommentSplit(StringRef Text, unsigned ContentStartColumn,
57 unsigned ColumnLimit, unsigned TabWidth,
58 encoding::Encoding Encoding, const FormatStyle &Style,
59 bool DecorationEndsWithStar = false) {
60 LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text
61 << "\", Column limit: " << ColumnLimit
62 << ", Content start: " << ContentStartColumn << "\n");
63 if (ColumnLimit <= ContentStartColumn + 1)
64 return BreakableToken::Split(StringRef::npos, 0);
65
66 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
67 unsigned MaxSplitBytes = 0;
68
69 for (unsigned NumChars = 0;
70 NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
71 unsigned BytesInChar =
72 encoding::getCodePointNumBytes(FirstChar: Text[MaxSplitBytes], Encoding);
73 NumChars += encoding::columnWidthWithTabs(
74 Text: Text.substr(Start: MaxSplitBytes, N: BytesInChar), StartColumn: ContentStartColumn + NumChars,
75 TabWidth, Encoding);
76 MaxSplitBytes += BytesInChar;
77 }
78
79 // In JavaScript, some @tags can be followed by {, and machinery that parses
80 // these comments will fail to understand the comment if followed by a line
81 // break. So avoid ever breaking before a {.
82 if (Style.isJavaScript()) {
83 StringRef::size_type SpaceOffset =
84 Text.find_first_of(Chars: Blanks, From: MaxSplitBytes);
85 if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() &&
86 Text[SpaceOffset + 1] == '{') {
87 MaxSplitBytes = SpaceOffset + 1;
88 }
89 }
90
91 StringRef::size_type SpaceOffset = Text.find_last_of(Chars: Blanks, From: MaxSplitBytes);
92
93 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");
94 // Some spaces are unacceptable to break on, rewind past them.
95 while (SpaceOffset != StringRef::npos) {
96 // If a line-comment ends with `\`, the next line continues the comment,
97 // whether or not it starts with `//`. This is confusing and triggers
98 // -Wcomment.
99 // Avoid introducing multiline comments by not allowing a break right
100 // after '\'.
101 if (Style.isCpp()) {
102 StringRef::size_type LastNonBlank =
103 Text.find_last_not_of(Chars: Blanks, From: SpaceOffset);
104 if (LastNonBlank != StringRef::npos && Text[LastNonBlank] == '\\') {
105 SpaceOffset = Text.find_last_of(Chars: Blanks, From: LastNonBlank);
106 continue;
107 }
108 }
109
110 // Do not split before a number followed by a dot: this would be interpreted
111 // as a numbered list, which would prevent re-flowing in subsequent passes.
112 if (kNumberedListRegexp.match(String: Text.substr(Start: SpaceOffset).ltrim(Chars: Blanks))) {
113 SpaceOffset = Text.find_last_of(Chars: Blanks, From: SpaceOffset);
114 continue;
115 }
116
117 // Avoid ever breaking before a @tag or a { in JavaScript.
118 if (Style.isJavaScript() && SpaceOffset + 1 < Text.size() &&
119 (Text[SpaceOffset + 1] == '{' || Text[SpaceOffset + 1] == '@')) {
120 SpaceOffset = Text.find_last_of(Chars: Blanks, From: SpaceOffset);
121 continue;
122 }
123
124 break;
125 }
126
127 if (SpaceOffset == StringRef::npos ||
128 // Don't break at leading whitespace.
129 Text.find_last_not_of(Chars: Blanks, From: SpaceOffset) == StringRef::npos) {
130 // Make sure that we don't break at leading whitespace that
131 // reaches past MaxSplit.
132 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Chars: Blanks);
133 if (FirstNonWhitespace == StringRef::npos) {
134 // If the comment is only whitespace, we cannot split.
135 return BreakableToken::Split(StringRef::npos, 0);
136 }
137 SpaceOffset = Text.find_first_of(
138 Chars: Blanks, From: std::max<unsigned>(a: MaxSplitBytes, b: FirstNonWhitespace));
139 }
140 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
141 // adaptStartOfLine will break after lines starting with /** if the comment
142 // is broken anywhere. Avoid emitting this break twice here.
143 // Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will
144 // insert a break after /**, so this code must not insert the same break.
145 if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*')
146 return BreakableToken::Split(StringRef::npos, 0);
147 StringRef BeforeCut = Text.substr(Start: 0, N: SpaceOffset).rtrim(Chars: Blanks);
148 StringRef AfterCut = Text.substr(Start: SpaceOffset);
149 if (!DecorationEndsWithStar)
150 AfterCut = AfterCut.ltrim(Chars: Blanks);
151 return BreakableToken::Split(BeforeCut.size(),
152 AfterCut.begin() - BeforeCut.end());
153 }
154 return BreakableToken::Split(StringRef::npos, 0);
155}
156
157static BreakableToken::Split
158getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
159 unsigned TabWidth, encoding::Encoding Encoding) {
160 // FIXME: Reduce unit test case.
161 if (Text.empty())
162 return BreakableToken::Split(StringRef::npos, 0);
163 if (ColumnLimit <= UsedColumns)
164 return BreakableToken::Split(StringRef::npos, 0);
165 unsigned MaxSplit = ColumnLimit - UsedColumns;
166 StringRef::size_type SpaceOffset = 0;
167 StringRef::size_type SlashOffset = 0;
168 StringRef::size_type WordStartOffset = 0;
169 StringRef::size_type SplitPoint = 0;
170 for (unsigned Chars = 0;;) {
171 unsigned Advance;
172 if (Text[0] == '\\') {
173 Advance = encoding::getEscapeSequenceLength(Text);
174 Chars += Advance;
175 } else {
176 Advance = encoding::getCodePointNumBytes(FirstChar: Text[0], Encoding);
177 Chars += encoding::columnWidthWithTabs(
178 Text: Text.substr(Start: 0, N: Advance), StartColumn: UsedColumns + Chars, TabWidth, Encoding);
179 }
180
181 if (Chars > MaxSplit || Text.size() <= Advance)
182 break;
183
184 if (Blanks.contains(C: Text[0]))
185 SpaceOffset = SplitPoint;
186 if (Text[0] == '/')
187 SlashOffset = SplitPoint;
188 if (Advance == 1 && !isAlphanumeric(c: Text[0]))
189 WordStartOffset = SplitPoint;
190
191 SplitPoint += Advance;
192 Text = Text.substr(Start: Advance);
193 }
194
195 if (SpaceOffset != 0)
196 return BreakableToken::Split(SpaceOffset + 1, 0);
197 if (SlashOffset != 0)
198 return BreakableToken::Split(SlashOffset + 1, 0);
199 if (WordStartOffset != 0)
200 return BreakableToken::Split(WordStartOffset + 1, 0);
201 if (SplitPoint != 0)
202 return BreakableToken::Split(SplitPoint, 0);
203 return BreakableToken::Split(StringRef::npos, 0);
204}
205
206bool switchesFormatting(const FormatToken &Token) {
207 assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
208 "formatting regions are switched by comment tokens");
209 StringRef Content = Token.TokenText.substr(Start: 2).ltrim();
210 return Content.starts_with(Prefix: "clang-format on") ||
211 Content.starts_with(Prefix: "clang-format off");
212}
213
214unsigned
215BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
216 Split Split) const {
217 // Example: consider the content
218 // lala lala
219 // - RemainingTokenColumns is the original number of columns, 10;
220 // - Split is (4, 2), denoting the two spaces between the two words;
221 //
222 // We compute the number of columns when the split is compressed into a single
223 // space, like:
224 // lala lala
225 //
226 // FIXME: Correctly measure the length of whitespace in Split.second so it
227 // works with tabs.
228 return RemainingTokenColumns + 1 - Split.second;
229}
230
231unsigned BreakableStringLiteral::getLineCount() const { return 1; }
232
233unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
234 unsigned Offset,
235 StringRef::size_type Length,
236 unsigned StartColumn) const {
237 llvm_unreachable("Getting the length of a part of the string literal "
238 "indicates that the code tries to reflow it.");
239}
240
241unsigned
242BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,
243 unsigned StartColumn) const {
244 return UnbreakableTailLength + Postfix.size() +
245 encoding::columnWidthWithTabs(Text: Line.substr(Start: Offset), StartColumn,
246 TabWidth: Style.TabWidth, Encoding);
247}
248
249unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex,
250 bool Break) const {
251 return StartColumn + Prefix.size();
252}
253
254BreakableStringLiteral::BreakableStringLiteral(
255 const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
256 StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
257 encoding::Encoding Encoding, const FormatStyle &Style)
258 : BreakableToken(Tok, InPPDirective, Encoding, Style),
259 StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
260 UnbreakableTailLength(UnbreakableTailLength) {
261 assert(Tok.TokenText.starts_with(Prefix) && Tok.TokenText.ends_with(Postfix));
262 Line = Tok.TokenText.substr(
263 Start: Prefix.size(), N: Tok.TokenText.size() - Prefix.size() - Postfix.size());
264}
265
266BreakableToken::Split BreakableStringLiteral::getSplit(
267 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
268 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
269 return getStringSplit(Text: Line.substr(Start: TailOffset), UsedColumns: ContentStartColumn,
270 ColumnLimit: ColumnLimit - Postfix.size(), TabWidth: Style.TabWidth, Encoding);
271}
272
273void BreakableStringLiteral::insertBreak(unsigned LineIndex,
274 unsigned TailOffset, Split Split,
275 unsigned ContentIndent,
276 WhitespaceManager &Whitespaces) const {
277 Whitespaces.replaceWhitespaceInToken(
278 Tok, Offset: Prefix.size() + TailOffset + Split.first, ReplaceChars: Split.second, PreviousPostfix: Postfix,
279 CurrentPrefix: Prefix, InPPDirective, Newlines: 1, Spaces: StartColumn);
280}
281
282BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators(
283 const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,
284 unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
285 encoding::Encoding Encoding, const FormatStyle &Style)
286 : BreakableStringLiteral(
287 Tok, StartColumn, /*Prefix=*/QuoteStyle == SingleQuotes ? "'"
288 : QuoteStyle == AtDoubleQuotes ? "@\""
289 : "\"",
290 /*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"",
291 UnbreakableTailLength, InPPDirective, Encoding, Style),
292 BracesNeeded(Tok.isNot(Kind: TT_StringInConcatenation)),
293 QuoteStyle(QuoteStyle) {
294 // Find the replacement text for inserting braces and quotes and line breaks.
295 // We don't create an allocated string concatenated from parts here because it
296 // has to outlive the BreakableStringliteral object. The brace replacements
297 // include a quote so that WhitespaceManager can tell it apart from whitespace
298 // replacements between the string and surrounding tokens.
299
300 // The option is not implemented in JavaScript.
301 bool SignOnNewLine =
302 !Style.isJavaScript() &&
303 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
304
305 if (Style.isVerilog()) {
306 // In Verilog, all strings are quoted by double quotes, joined by commas,
307 // and wrapped in braces. The comma is always before the newline.
308 assert(QuoteStyle == DoubleQuotes);
309 LeftBraceQuote =
310 Style.Cpp11BracedListStyle != FormatStyle::BLS_Block ? "{\"" : "{ \"";
311 RightBraceQuote =
312 Style.Cpp11BracedListStyle != FormatStyle::BLS_Block ? "\"}" : "\" }";
313 Postfix = "\",";
314 Prefix = "\"";
315 } else {
316 // The plus sign may be on either line. And also C# and JavaScript have
317 // several quoting styles.
318 if (QuoteStyle == SingleQuotes) {
319 LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( '" : "('";
320 RightBraceQuote = Style.SpacesInParensOptions.Other ? "' )" : "')";
321 Postfix = SignOnNewLine ? "'" : "' +";
322 Prefix = SignOnNewLine ? "+ '" : "'";
323 } else {
324 if (QuoteStyle == AtDoubleQuotes) {
325 LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( @" : "(@";
326 Prefix = SignOnNewLine ? "+ @\"" : "@\"";
327 } else {
328 LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( \"" : "(\"";
329 Prefix = SignOnNewLine ? "+ \"" : "\"";
330 }
331 RightBraceQuote = Style.SpacesInParensOptions.Other ? "\" )" : "\")";
332 Postfix = SignOnNewLine ? "\"" : "\" +";
333 }
334 }
335
336 // Following lines are indented by the width of the brace and space if any.
337 ContinuationIndent = BracesNeeded ? LeftBraceQuote.size() - 1 : 0;
338 // The plus sign may need to be unindented depending on the style.
339 // FIXME: Add support for DontAlign.
340 if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus &&
341 Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator) {
342 ContinuationIndent -= 2;
343 }
344}
345
346unsigned BreakableStringLiteralUsingOperators::getRemainingLength(
347 unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {
348 return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : 1) +
349 encoding::columnWidthWithTabs(Text: Line.substr(Start: Offset), StartColumn,
350 TabWidth: Style.TabWidth, Encoding);
351}
352
353unsigned
354BreakableStringLiteralUsingOperators::getContentStartColumn(unsigned LineIndex,
355 bool Break) const {
356 return std::max(
357 a: 0,
358 b: static_cast<int>(StartColumn) +
359 (Break ? ContinuationIndent + static_cast<int>(Prefix.size())
360 : (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - 1
361 : 0) +
362 (QuoteStyle == AtDoubleQuotes ? 2 : 1)));
363}
364
365void BreakableStringLiteralUsingOperators::insertBreak(
366 unsigned LineIndex, unsigned TailOffset, Split Split,
367 unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
368 Whitespaces.replaceWhitespaceInToken(
369 Tok, /*Offset=*/(QuoteStyle == AtDoubleQuotes ? 2 : 1) + TailOffset +
370 Split.first,
371 /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/Postfix,
372 /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/Newlines: 1,
373 /*Spaces=*/
374 std::max(a: 0, b: static_cast<int>(StartColumn) + ContinuationIndent));
375}
376
377void BreakableStringLiteralUsingOperators::updateAfterBroken(
378 WhitespaceManager &Whitespaces) const {
379 // Add the braces required for breaking the token if they are needed.
380 if (!BracesNeeded)
381 return;
382
383 // To add a brace or parenthesis, we replace the quote (or the at sign) with a
384 // brace and another quote. This is because the rest of the program requires
385 // one replacement for each source range. If we replace the empty strings
386 // around the string, it may conflict with whitespace replacements between the
387 // string and adjacent tokens.
388 Whitespaces.replaceWhitespaceInToken(
389 Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"",
390 /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/Newlines: 0,
391 /*Spaces=*/0);
392 Whitespaces.replaceWhitespaceInToken(
393 Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1,
394 /*PreviousPostfix=*/RightBraceQuote,
395 /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/Newlines: 0, /*Spaces=*/0);
396}
397
398BreakableComment::BreakableComment(const FormatToken &Token,
399 unsigned StartColumn, bool InPPDirective,
400 encoding::Encoding Encoding,
401 const FormatStyle &Style)
402 : BreakableToken(Token, InPPDirective, Encoding, Style),
403 StartColumn(StartColumn) {}
404
405unsigned BreakableComment::getLineCount() const { return Lines.size(); }
406
407BreakableToken::Split
408BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
409 unsigned ColumnLimit, unsigned ContentStartColumn,
410 const llvm::Regex &CommentPragmasRegex) const {
411 // Don't break lines matching the comment pragmas regex.
412 if (!AlwaysReflow || CommentPragmasRegex.match(String: Content[LineIndex]))
413 return Split(StringRef::npos, 0);
414 return getCommentSplit(Text: Content[LineIndex].substr(Start: TailOffset),
415 ContentStartColumn, ColumnLimit, TabWidth: Style.TabWidth,
416 Encoding, Style);
417}
418
419void BreakableComment::compressWhitespace(
420 unsigned LineIndex, unsigned TailOffset, Split Split,
421 WhitespaceManager &Whitespaces) const {
422 StringRef Text = Content[LineIndex].substr(Start: TailOffset);
423 // Text is relative to the content line, but Whitespaces operates relative to
424 // the start of the corresponding token, so compute the start of the Split
425 // that needs to be compressed into a single space relative to the start of
426 // its token.
427 unsigned BreakOffsetInToken =
428 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
429 unsigned CharsToRemove = Split.second;
430 Whitespaces.replaceWhitespaceInToken(
431 Tok: tokenAt(LineIndex), Offset: BreakOffsetInToken, ReplaceChars: CharsToRemove, PreviousPostfix: "", CurrentPrefix: "",
432 /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
433}
434
435const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
436 return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
437}
438
439static bool mayReflowContent(StringRef Content) {
440 Content = Content.trim(Chars: Blanks);
441 // Lines starting with '@' or '\' commonly have special meaning.
442 // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
443 bool hasSpecialMeaningPrefix = false;
444 for (StringRef Prefix :
445 {"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {
446 if (Content.starts_with(Prefix)) {
447 hasSpecialMeaningPrefix = true;
448 break;
449 }
450 }
451
452 // Numbered lists may also start with a number followed by '.'
453 // To avoid issues if a line starts with a number which is actually the end
454 // of a previous line, we only consider numbers with up to 2 digits.
455 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
456 hasSpecialMeaningPrefix =
457 hasSpecialMeaningPrefix || kNumberedListRegexp.match(String: Content);
458
459 // Simple heuristic for what to reflow: content should contain at least two
460 // characters and either the first or second character must be
461 // non-punctuation.
462 return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
463 !Content.ends_with(Suffix: "\\") &&
464 // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
465 // true, then the first code point must be 1 byte long.
466 (!isPunctuation(c: Content[0]) || !isPunctuation(c: Content[1]));
467}
468
469BreakableBlockComment::BreakableBlockComment(
470 const FormatToken &Token, unsigned StartColumn,
471 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
472 encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
473 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style),
474 DelimitersOnNewline(false),
475 UnbreakableTailLength(Token.UnbreakableTailLength) {
476 assert(Tok.is(TT_BlockComment) &&
477 "block comment section must start with a block comment");
478
479 StringRef TokenText(Tok.TokenText);
480 assert(TokenText.starts_with("/*") && TokenText.ends_with("*/"));
481 TokenText.substr(Start: 2, N: TokenText.size() - 4)
482 .split(A&: Lines, Separator: UseCRLF ? "\r\n" : "\n");
483
484 int IndentDelta = StartColumn - OriginalStartColumn;
485 Content.resize(N: Lines.size());
486 Content[0] = Lines[0];
487 ContentColumn.resize(N: Lines.size());
488 // Account for the initial '/*'.
489 ContentColumn[0] = StartColumn + 2;
490 Tokens.resize(N: Lines.size());
491 for (size_t i = 1; i < Lines.size(); ++i)
492 adjustWhitespace(LineIndex: i, IndentDelta);
493
494 // Align decorations with the column of the star on the first line,
495 // that is one column after the start "/*".
496 DecorationColumn = StartColumn + 1;
497
498 // Account for comment decoration patterns like this:
499 //
500 // /*
501 // ** blah blah blah
502 // */
503 if (Lines.size() >= 2 && Content[1].starts_with(Prefix: "**") &&
504 static_cast<unsigned>(ContentColumn[1]) == StartColumn) {
505 DecorationColumn = StartColumn;
506 }
507
508 Decoration = "* ";
509 if (Lines.size() == 1 && !FirstInLine) {
510 // Comments for which FirstInLine is false can start on arbitrary column,
511 // and available horizontal space can be too small to align consecutive
512 // lines with the first one.
513 // FIXME: We could, probably, align them to current indentation level, but
514 // now we just wrap them without stars.
515 Decoration = "";
516 }
517 for (size_t i = 1, e = Content.size(); i < e && !Decoration.empty(); ++i) {
518 const StringRef Text(Content[i]);
519 if (i + 1 == e) {
520 // If the last line is empty, the closing "*/" will have a star.
521 if (Text.empty())
522 break;
523 } else if (!Text.empty() && Decoration.starts_with(Prefix: Text)) {
524 continue;
525 }
526 while (!Text.starts_with(Prefix: Decoration))
527 Decoration = Decoration.drop_back(N: 1);
528 }
529
530 LastLineNeedsDecoration = true;
531 IndentAtLineBreak = ContentColumn[0] + 1;
532 for (size_t i = 1, e = Lines.size(); i < e; ++i) {
533 if (Content[i].empty()) {
534 if (i + 1 == e) {
535 // Empty last line means that we already have a star as a part of the
536 // trailing */. We also need to preserve whitespace, so that */ is
537 // correctly indented.
538 LastLineNeedsDecoration = false;
539 // Align the star in the last '*/' with the stars on the previous lines.
540 if (e >= 2 && !Decoration.empty())
541 ContentColumn[i] = DecorationColumn;
542 } else if (Decoration.empty()) {
543 // For all other lines, set the start column to 0 if they're empty, so
544 // we do not insert trailing whitespace anywhere.
545 ContentColumn[i] = 0;
546 }
547 continue;
548 }
549
550 // The first line already excludes the star.
551 // The last line excludes the star if LastLineNeedsDecoration is false.
552 // For all other lines, adjust the line to exclude the star and
553 // (optionally) the first whitespace.
554 unsigned DecorationSize = Decoration.starts_with(Prefix: Content[i])
555 ? Content[i].size()
556 : Decoration.size();
557 if (DecorationSize)
558 ContentColumn[i] = DecorationColumn + DecorationSize;
559 Content[i] = Content[i].substr(Start: DecorationSize);
560 if (!Decoration.starts_with(Prefix: Content[i])) {
561 IndentAtLineBreak =
562 std::min<int>(a: IndentAtLineBreak, b: std::max(a: 0, b: ContentColumn[i]));
563 }
564 }
565 IndentAtLineBreak = std::max<unsigned>(a: IndentAtLineBreak, b: Decoration.size());
566
567 // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.
568 if (Style.isJavaScript() || Style.isJava()) {
569 if ((Lines[0] == "*" || Lines[0].starts_with(Prefix: "* ")) && Lines.size() > 1) {
570 // This is a multiline jsdoc comment.
571 DelimitersOnNewline = true;
572 } else if (Lines[0].starts_with(Prefix: "* ") && Lines.size() == 1) {
573 // Detect a long single-line comment, like:
574 // /** long long long */
575 // Below, '2' is the width of '*/'.
576 unsigned EndColumn =
577 ContentColumn[0] +
578 encoding::columnWidthWithTabs(Text: Lines[0], StartColumn: ContentColumn[0],
579 TabWidth: Style.TabWidth, Encoding) +
580 2;
581 DelimitersOnNewline = EndColumn > Style.ColumnLimit;
582 }
583 }
584
585 LLVM_DEBUG({
586 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
587 llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
588 for (size_t i = 0; i < Lines.size(); ++i) {
589 llvm::dbgs() << i << " |" << Content[i] << "| "
590 << "CC=" << ContentColumn[i] << "| "
591 << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
592 }
593 });
594}
595
596BreakableToken::Split BreakableBlockComment::getSplit(
597 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
598 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
599 // Don't break lines matching the comment pragmas regex.
600 if (!AlwaysReflow || CommentPragmasRegex.match(String: Content[LineIndex]))
601 return Split(StringRef::npos, 0);
602 return getCommentSplit(Text: Content[LineIndex].substr(Start: TailOffset),
603 ContentStartColumn, ColumnLimit, TabWidth: Style.TabWidth,
604 Encoding, Style, DecorationEndsWithStar: Decoration.ends_with(Suffix: "*"));
605}
606
607void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
608 int IndentDelta) {
609 // When in a preprocessor directive, the trailing backslash in a block comment
610 // is not needed, but can serve a purpose of uniformity with necessary escaped
611 // newlines outside the comment. In this case we remove it here before
612 // trimming the trailing whitespace. The backslash will be re-added later when
613 // inserting a line break.
614 size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
615 if (InPPDirective && Lines[LineIndex - 1].ends_with(Suffix: "\\"))
616 --EndOfPreviousLine;
617
618 // Calculate the end of the non-whitespace text in the previous line.
619 EndOfPreviousLine =
620 Lines[LineIndex - 1].find_last_not_of(Chars: Blanks, From: EndOfPreviousLine);
621 if (EndOfPreviousLine == StringRef::npos)
622 EndOfPreviousLine = 0;
623 else
624 ++EndOfPreviousLine;
625 // Calculate the start of the non-whitespace text in the current line.
626 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Chars: Blanks);
627 if (StartOfLine == StringRef::npos)
628 StartOfLine = Lines[LineIndex].size();
629
630 StringRef Whitespace = Lines[LineIndex].substr(Start: 0, N: StartOfLine);
631 // Adjust Lines to only contain relevant text.
632 size_t PreviousContentOffset =
633 Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
634 Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
635 Start: PreviousContentOffset, N: EndOfPreviousLine - PreviousContentOffset);
636 Content[LineIndex] = Lines[LineIndex].substr(Start: StartOfLine);
637
638 // Adjust the start column uniformly across all lines.
639 ContentColumn[LineIndex] =
640 encoding::columnWidthWithTabs(Text: Whitespace, StartColumn: 0, TabWidth: Style.TabWidth, Encoding) +
641 IndentDelta;
642}
643
644unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
645 unsigned Offset,
646 StringRef::size_type Length,
647 unsigned StartColumn) const {
648 return encoding::columnWidthWithTabs(
649 Text: Content[LineIndex].substr(Start: Offset, N: Length), StartColumn, TabWidth: Style.TabWidth,
650 Encoding);
651}
652
653unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex,
654 unsigned Offset,
655 unsigned StartColumn) const {
656 unsigned LineLength =
657 UnbreakableTailLength +
658 getRangeLength(LineIndex, Offset, Length: StringRef::npos, StartColumn);
659 if (LineIndex + 1 == Lines.size()) {
660 LineLength += 2;
661 // We never need a decoration when breaking just the trailing "*/" postfix.
662 bool HasRemainingText = Offset < Content[LineIndex].size();
663 if (!HasRemainingText) {
664 bool HasDecoration = Lines[LineIndex].ltrim().starts_with(Prefix: Decoration);
665 if (HasDecoration)
666 LineLength -= Decoration.size();
667 }
668 }
669 return LineLength;
670}
671
672unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
673 bool Break) const {
674 if (Break)
675 return IndentAtLineBreak;
676 return std::max(a: 0, b: ContentColumn[LineIndex]);
677}
678
679const llvm::StringSet<>
680 BreakableBlockComment::ContentIndentingJavadocAnnotations = {
681 "@param", "@return", "@returns", "@throws", "@type", "@template",
682 "@see", "@deprecated", "@define", "@exports", "@mods", "@private",
683};
684
685unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const {
686 if (!Style.isJava() && !Style.isJavaScript())
687 return 0;
688 // The content at LineIndex 0 of a comment like:
689 // /** line 0 */
690 // is "* line 0", so we need to skip over the decoration in that case.
691 StringRef ContentWithNoDecoration = Content[LineIndex];
692 if (LineIndex == 0 && ContentWithNoDecoration.starts_with(Prefix: "*"))
693 ContentWithNoDecoration = ContentWithNoDecoration.substr(Start: 1).ltrim(Chars: Blanks);
694 StringRef FirstWord = ContentWithNoDecoration.substr(
695 Start: 0, N: ContentWithNoDecoration.find_first_of(Chars: Blanks));
696 if (ContentIndentingJavadocAnnotations.contains(key: FirstWord))
697 return Style.ContinuationIndentWidth;
698 return 0;
699}
700
701void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
702 Split Split, unsigned ContentIndent,
703 WhitespaceManager &Whitespaces) const {
704 StringRef Text = Content[LineIndex].substr(Start: TailOffset);
705 StringRef Prefix = Decoration;
706 // We need this to account for the case when we have a decoration "* " for all
707 // the lines except for the last one, where the star in "*/" acts as a
708 // decoration.
709 unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
710 if (LineIndex + 1 == Lines.size() &&
711 Text.size() == Split.first + Split.second) {
712 // For the last line we need to break before "*/", but not to add "* ".
713 Prefix = "";
714 if (LocalIndentAtLineBreak >= 2)
715 LocalIndentAtLineBreak -= 2;
716 }
717 // The split offset is from the beginning of the line. Convert it to an offset
718 // from the beginning of the token text.
719 unsigned BreakOffsetInToken =
720 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
721 unsigned CharsToRemove = Split.second;
722 assert(LocalIndentAtLineBreak >= Prefix.size());
723 std::string PrefixWithTrailingIndent = std::string(Prefix);
724 PrefixWithTrailingIndent.append(n: ContentIndent, c: ' ');
725 Whitespaces.replaceWhitespaceInToken(
726 Tok: tokenAt(LineIndex), Offset: BreakOffsetInToken, ReplaceChars: CharsToRemove, PreviousPostfix: "",
727 CurrentPrefix: PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1,
728 /*Spaces=*/LocalIndentAtLineBreak + ContentIndent -
729 PrefixWithTrailingIndent.size());
730}
731
732BreakableToken::Split BreakableBlockComment::getReflowSplit(
733 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
734 if (!mayReflow(LineIndex, CommentPragmasRegex))
735 return Split(StringRef::npos, 0);
736
737 // If we're reflowing into a line with content indent, only reflow the next
738 // line if its starting whitespace matches the content indent.
739 size_t Trimmed = Content[LineIndex].find_first_not_of(Chars: Blanks);
740 if (LineIndex) {
741 unsigned PreviousContentIndent = getContentIndent(LineIndex: LineIndex - 1);
742 if (PreviousContentIndent && Trimmed != StringRef::npos &&
743 Trimmed != PreviousContentIndent) {
744 return Split(StringRef::npos, 0);
745 }
746 }
747
748 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
749}
750
751bool BreakableBlockComment::introducesBreakBeforeToken() const {
752 // A break is introduced when we want delimiters on newline.
753 return DelimitersOnNewline &&
754 Lines[0].substr(Start: 1).find_first_not_of(Chars: Blanks) != StringRef::npos;
755}
756
757void BreakableBlockComment::reflow(unsigned LineIndex,
758 WhitespaceManager &Whitespaces) const {
759 StringRef TrimmedContent = Content[LineIndex].ltrim(Chars: Blanks);
760 // Here we need to reflow.
761 assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
762 "Reflowing whitespace within a token");
763 // This is the offset of the end of the last line relative to the start of
764 // the token text in the token.
765 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
766 Content[LineIndex - 1].size() -
767 tokenAt(LineIndex).TokenText.data();
768 unsigned WhitespaceLength = TrimmedContent.data() -
769 tokenAt(LineIndex).TokenText.data() -
770 WhitespaceOffsetInToken;
771 Whitespaces.replaceWhitespaceInToken(
772 Tok: tokenAt(LineIndex), Offset: WhitespaceOffsetInToken,
773 /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
774 /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
775 /*Spaces=*/0);
776}
777
778void BreakableBlockComment::adaptStartOfLine(
779 unsigned LineIndex, WhitespaceManager &Whitespaces) const {
780 if (LineIndex == 0) {
781 if (DelimitersOnNewline) {
782 // Since we're breaking at index 1 below, the break position and the
783 // break length are the same.
784 // Note: this works because getCommentSplit is careful never to split at
785 // the beginning of a line.
786 size_t BreakLength = Lines[0].substr(Start: 1).find_first_not_of(Chars: Blanks);
787 if (BreakLength != StringRef::npos) {
788 insertBreak(LineIndex, TailOffset: 0, Split: Split(1, BreakLength), /*ContentIndent=*/0,
789 Whitespaces);
790 }
791 }
792 return;
793 }
794 // Here no reflow with the previous line will happen.
795 // Fix the decoration of the line at LineIndex.
796 StringRef Prefix = Decoration;
797 if (Content[LineIndex].empty()) {
798 if (LineIndex + 1 == Lines.size()) {
799 if (!LastLineNeedsDecoration) {
800 // If the last line was empty, we don't need a prefix, as the */ will
801 // line up with the decoration (if it exists).
802 Prefix = "";
803 }
804 } else if (!Decoration.empty()) {
805 // For other empty lines, if we do have a decoration, adapt it to not
806 // contain a trailing whitespace.
807 Prefix = Prefix.substr(Start: 0, N: 1);
808 }
809 } else if (ContentColumn[LineIndex] == 1) {
810 // This line starts immediately after the decorating *.
811 Prefix = Prefix.substr(Start: 0, N: 1);
812 }
813 // This is the offset of the end of the last line relative to the start of the
814 // token text in the token.
815 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
816 Content[LineIndex - 1].size() -
817 tokenAt(LineIndex).TokenText.data();
818 unsigned WhitespaceLength = Content[LineIndex].data() -
819 tokenAt(LineIndex).TokenText.data() -
820 WhitespaceOffsetInToken;
821 Whitespaces.replaceWhitespaceInToken(
822 Tok: tokenAt(LineIndex), Offset: WhitespaceOffsetInToken, ReplaceChars: WhitespaceLength, PreviousPostfix: "", CurrentPrefix: Prefix,
823 InPPDirective, /*Newlines=*/1, Spaces: ContentColumn[LineIndex] - Prefix.size());
824}
825
826BreakableToken::Split
827BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const {
828 if (DelimitersOnNewline) {
829 // Replace the trailing whitespace of the last line with a newline.
830 // In case the last line is empty, the ending '*/' is already on its own
831 // line.
832 StringRef Line = Content.back().substr(Start: TailOffset);
833 StringRef TrimmedLine = Line.rtrim(Chars: Blanks);
834 if (!TrimmedLine.empty())
835 return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size());
836 }
837 return Split(StringRef::npos, 0);
838}
839
840bool BreakableBlockComment::mayReflow(
841 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
842 // Content[LineIndex] may exclude the indent after the '*' decoration. In that
843 // case, we compute the start of the comment pragma manually.
844 StringRef IndentContent = Content[LineIndex];
845 if (Lines[LineIndex].ltrim(Chars: Blanks).starts_with(Prefix: "*"))
846 IndentContent = Lines[LineIndex].ltrim(Chars: Blanks).substr(Start: 1);
847 return LineIndex > 0 && AlwaysReflow &&
848 !CommentPragmasRegex.match(String: IndentContent) &&
849 mayReflowContent(Content: Content[LineIndex]) && !Tok.Finalized &&
850 !switchesFormatting(Token: tokenAt(LineIndex));
851}
852
853BreakableLineCommentSection::BreakableLineCommentSection(
854 const FormatToken &Token, unsigned StartColumn, bool InPPDirective,
855 encoding::Encoding Encoding, const FormatStyle &Style)
856 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
857 assert(Tok.is(TT_LineComment) &&
858 "line comment section must start with a line comment");
859 FormatToken *LineTok = nullptr;
860 const int Minimum = Style.SpacesInLineCommentPrefix.Minimum;
861 // How many spaces we changed in the first line of the section, this will be
862 // applied in all following lines
863 int FirstLineSpaceChange = 0;
864 for (const FormatToken *CurrentTok = &Tok;
865 CurrentTok && CurrentTok->is(TT: TT_LineComment);
866 CurrentTok = CurrentTok->Next) {
867 LastLineTok = LineTok;
868 StringRef TokenText(CurrentTok->TokenText);
869 assert((TokenText.starts_with("//") || TokenText.starts_with("#")) &&
870 "unsupported line comment prefix, '//' and '#' are supported");
871 size_t FirstLineIndex = Lines.size();
872 TokenText.split(A&: Lines, Separator: "\n");
873 Content.resize(N: Lines.size());
874 ContentColumn.resize(N: Lines.size());
875 PrefixSpaceChange.resize(N: Lines.size());
876 Tokens.resize(N: Lines.size());
877 Prefix.resize(N: Lines.size());
878 OriginalPrefix.resize(N: Lines.size());
879 for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
880 Lines[i] = Lines[i].ltrim(Chars: Blanks);
881 StringRef IndentPrefix = getLineCommentIndentPrefix(Comment: Lines[i], Style);
882 OriginalPrefix[i] = IndentPrefix;
883 const int SpacesInPrefix = llvm::count(Range&: IndentPrefix, Element: ' ');
884
885 // This lambda also considers multibyte character that is not handled in
886 // functions like isPunctuation provided by CharInfo.
887 const auto NoSpaceBeforeFirstCommentChar = [&]() {
888 assert(Lines[i].size() > IndentPrefix.size());
889 const char FirstCommentChar = Lines[i][IndentPrefix.size()];
890 const unsigned FirstCharByteSize =
891 encoding::getCodePointNumBytes(FirstChar: FirstCommentChar, Encoding);
892 if (encoding::columnWidth(
893 Text: Lines[i].substr(Start: IndentPrefix.size(), N: FirstCharByteSize),
894 Encoding) != 1) {
895 return false;
896 }
897 // In C-like comments, add a space before #. For example this is useful
898 // to preserve the relative indentation when commenting out code with
899 // #includes.
900 //
901 // In languages using # as the comment leader such as proto, don't
902 // add a space to support patterns like:
903 // #########
904 // # section
905 // #########
906 if (FirstCommentChar == '#' && !TokenText.starts_with(Prefix: "#"))
907 return false;
908 return FirstCommentChar == '\\' || isPunctuation(c: FirstCommentChar) ||
909 isHorizontalWhitespace(c: FirstCommentChar);
910 };
911
912 // On the first line of the comment section we calculate how many spaces
913 // are to be added or removed, all lines after that just get only the
914 // change and we will not look at the maximum anymore. Additionally to the
915 // actual first line, we calculate that when the non space Prefix changes,
916 // e.g. from "///" to "//".
917 if (i == 0 || OriginalPrefix[i].rtrim(Chars: Blanks) !=
918 OriginalPrefix[i - 1].rtrim(Chars: Blanks)) {
919 if (SpacesInPrefix < Minimum && Lines[i].size() > IndentPrefix.size() &&
920 !NoSpaceBeforeFirstCommentChar()) {
921 FirstLineSpaceChange = Minimum - SpacesInPrefix;
922 } else if (static_cast<unsigned>(SpacesInPrefix) >
923 Style.SpacesInLineCommentPrefix.Maximum) {
924 FirstLineSpaceChange =
925 Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix;
926 } else {
927 FirstLineSpaceChange = 0;
928 }
929 }
930
931 if (Lines[i].size() != IndentPrefix.size()) {
932 assert(Lines[i].size() > IndentPrefix.size());
933
934 PrefixSpaceChange[i] = SpacesInPrefix + FirstLineSpaceChange < Minimum
935 ? Minimum - SpacesInPrefix
936 : FirstLineSpaceChange;
937
938 const auto FirstNonSpace = Lines[i][IndentPrefix.size()];
939 const bool IsFormatComment = LineTok && switchesFormatting(Token: *LineTok);
940 const bool LineRequiresLeadingSpace =
941 !NoSpaceBeforeFirstCommentChar() ||
942 (FirstNonSpace == '}' && FirstLineSpaceChange != 0);
943 const bool AllowsSpaceChange =
944 !IsFormatComment &&
945 (SpacesInPrefix != 0 || LineRequiresLeadingSpace);
946
947 if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) {
948 Prefix[i] = IndentPrefix.str();
949 Prefix[i].append(n: PrefixSpaceChange[i], c: ' ');
950 } else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) {
951 Prefix[i] = IndentPrefix
952 .drop_back(N: std::min<std::size_t>(
953 a: -PrefixSpaceChange[i], b: SpacesInPrefix))
954 .str();
955 } else {
956 Prefix[i] = IndentPrefix.str();
957 }
958 } else {
959 // If the IndentPrefix is the whole line, there is no content and we
960 // drop just all space
961 Prefix[i] = IndentPrefix.drop_back(N: SpacesInPrefix).str();
962 }
963
964 Tokens[i] = LineTok;
965 Content[i] = Lines[i].substr(Start: IndentPrefix.size());
966 ContentColumn[i] =
967 StartColumn + encoding::columnWidthWithTabs(Text: Prefix[i], StartColumn,
968 TabWidth: Style.TabWidth, Encoding);
969
970 // Calculate the end of the non-whitespace text in this line.
971 size_t EndOfLine = Content[i].find_last_not_of(Chars: Blanks);
972 if (EndOfLine == StringRef::npos)
973 EndOfLine = Content[i].size();
974 else
975 ++EndOfLine;
976 Content[i] = Content[i].substr(Start: 0, N: EndOfLine);
977 }
978 LineTok = CurrentTok->Next;
979 if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
980 // A line comment section needs to broken by a line comment that is
981 // preceded by at least two newlines. Note that we put this break here
982 // instead of breaking at a previous stage during parsing, since that
983 // would split the contents of the enum into two unwrapped lines in this
984 // example, which is undesirable:
985 // enum A {
986 // a, // comment about a
987 //
988 // // comment about b
989 // b
990 // };
991 //
992 // FIXME: Consider putting separate line comment sections as children to
993 // the unwrapped line instead.
994 break;
995 }
996 }
997}
998
999unsigned
1000BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,
1001 StringRef::size_type Length,
1002 unsigned StartColumn) const {
1003 return encoding::columnWidthWithTabs(
1004 Text: Content[LineIndex].substr(Start: Offset, N: Length), StartColumn, TabWidth: Style.TabWidth,
1005 Encoding);
1006}
1007
1008unsigned
1009BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
1010 bool /*Break*/) const {
1011 return ContentColumn[LineIndex];
1012}
1013
1014void BreakableLineCommentSection::insertBreak(
1015 unsigned LineIndex, unsigned TailOffset, Split Split,
1016 unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
1017 StringRef Text = Content[LineIndex].substr(Start: TailOffset);
1018 // Compute the offset of the split relative to the beginning of the token
1019 // text.
1020 unsigned BreakOffsetInToken =
1021 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
1022 unsigned CharsToRemove = Split.second;
1023 Whitespaces.replaceWhitespaceInToken(
1024 Tok: tokenAt(LineIndex), Offset: BreakOffsetInToken, ReplaceChars: CharsToRemove, PreviousPostfix: "",
1025 CurrentPrefix: Prefix[LineIndex], InPPDirective, /*Newlines=*/1,
1026 /*Spaces=*/ContentColumn[LineIndex] - Prefix[LineIndex].size());
1027}
1028
1029BreakableComment::Split BreakableLineCommentSection::getReflowSplit(
1030 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1031 if (!mayReflow(LineIndex, CommentPragmasRegex))
1032 return Split(StringRef::npos, 0);
1033
1034 size_t Trimmed = Content[LineIndex].find_first_not_of(Chars: Blanks);
1035
1036 // In a line comment section each line is a separate token; thus, after a
1037 // split we replace all whitespace before the current line comment token
1038 // (which does not need to be included in the split), plus the start of the
1039 // line up to where the content starts.
1040 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
1041}
1042
1043void BreakableLineCommentSection::reflow(unsigned LineIndex,
1044 WhitespaceManager &Whitespaces) const {
1045 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1046 // Reflow happens between tokens. Replace the whitespace between the
1047 // tokens by the empty string.
1048 Whitespaces.replaceWhitespace(
1049 Tok&: *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
1050 /*StartOfTokenColumn=*/StartColumn, /*IsAligned=*/true,
1051 /*InPPDirective=*/false);
1052 } else if (LineIndex > 0) {
1053 // In case we're reflowing after the '\' in:
1054 //
1055 // // line comment \
1056 // // line 2
1057 //
1058 // the reflow happens inside the single comment token (it is a single line
1059 // comment with an unescaped newline).
1060 // Replace the whitespace between the '\' and '//' with the empty string.
1061 //
1062 // Offset points to after the '\' relative to start of the token.
1063 unsigned Offset = Lines[LineIndex - 1].data() +
1064 Lines[LineIndex - 1].size() -
1065 tokenAt(LineIndex: LineIndex - 1).TokenText.data();
1066 // WhitespaceLength is the number of chars between the '\' and the '//' on
1067 // the next line.
1068 unsigned WhitespaceLength =
1069 Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset;
1070 Whitespaces.replaceWhitespaceInToken(Tok: *Tokens[LineIndex], Offset,
1071 /*ReplaceChars=*/WhitespaceLength,
1072 /*PreviousPostfix=*/"",
1073 /*CurrentPrefix=*/"",
1074 /*InPPDirective=*/false,
1075 /*Newlines=*/0,
1076 /*Spaces=*/0);
1077 }
1078 // Replace the indent and prefix of the token with the reflow prefix.
1079 unsigned Offset =
1080 Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
1081 unsigned WhitespaceLength =
1082 Content[LineIndex].data() - Lines[LineIndex].data();
1083 Whitespaces.replaceWhitespaceInToken(Tok: *Tokens[LineIndex], Offset,
1084 /*ReplaceChars=*/WhitespaceLength,
1085 /*PreviousPostfix=*/"",
1086 /*CurrentPrefix=*/ReflowPrefix,
1087 /*InPPDirective=*/false,
1088 /*Newlines=*/0,
1089 /*Spaces=*/0);
1090}
1091
1092void BreakableLineCommentSection::adaptStartOfLine(
1093 unsigned LineIndex, WhitespaceManager &Whitespaces) const {
1094 // If this is the first line of a token, we need to inform Whitespace Manager
1095 // about it: either adapt the whitespace range preceding it, or mark it as an
1096 // untouchable token.
1097 // This happens for instance here:
1098 // // line 1 \
1099 // // line 2
1100 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1101 // This is the first line for the current token, but no reflow with the
1102 // previous token is necessary. However, we still may need to adjust the
1103 // start column. Note that ContentColumn[LineIndex] is the expected
1104 // content column after a possible update to the prefix, hence the prefix
1105 // length change is included.
1106 unsigned LineColumn =
1107 ContentColumn[LineIndex] -
1108 (Content[LineIndex].data() - Lines[LineIndex].data()) +
1109 (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
1110
1111 // We always want to create a replacement instead of adding an untouchable
1112 // token, even if LineColumn is the same as the original column of the
1113 // token. This is because WhitespaceManager doesn't align trailing
1114 // comments if they are untouchable.
1115 Whitespaces.replaceWhitespace(Tok&: *Tokens[LineIndex],
1116 /*Newlines=*/1,
1117 /*Spaces=*/LineColumn,
1118 /*StartOfTokenColumn=*/LineColumn,
1119 /*IsAligned=*/true,
1120 /*InPPDirective=*/false);
1121 }
1122 if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
1123 // Adjust the prefix if necessary.
1124 const auto SpacesToRemove = -std::min(a: PrefixSpaceChange[LineIndex], b: 0);
1125 const auto SpacesToAdd = std::max(a: PrefixSpaceChange[LineIndex], b: 0);
1126 Whitespaces.replaceWhitespaceInToken(
1127 Tok: tokenAt(LineIndex), Offset: OriginalPrefix[LineIndex].size() - SpacesToRemove,
1128 /*ReplaceChars=*/SpacesToRemove, PreviousPostfix: "", CurrentPrefix: "", /*InPPDirective=*/false,
1129 /*Newlines=*/0, /*Spaces=*/SpacesToAdd);
1130 }
1131}
1132
1133void BreakableLineCommentSection::updateNextToken(LineState &State) const {
1134 if (LastLineTok)
1135 State.NextToken = LastLineTok->Next;
1136}
1137
1138bool BreakableLineCommentSection::mayReflow(
1139 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1140 // Line comments have the indent as part of the prefix, so we need to
1141 // recompute the start of the line.
1142 StringRef IndentContent = Content[LineIndex];
1143 if (Lines[LineIndex].starts_with(Prefix: "//"))
1144 IndentContent = Lines[LineIndex].substr(Start: 2);
1145 // FIXME: Decide whether we want to reflow non-regular indents:
1146 // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
1147 // OriginalPrefix[LineIndex-1]. That means we don't reflow
1148 // // text that protrudes
1149 // // into text with different indent
1150 // We do reflow in that case in block comments.
1151 return LineIndex > 0 && AlwaysReflow &&
1152 !CommentPragmasRegex.match(String: IndentContent) &&
1153 mayReflowContent(Content: Content[LineIndex]) && !Tok.Finalized &&
1154 !switchesFormatting(Token: tokenAt(LineIndex)) &&
1155 OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
1156}
1157
1158} // namespace format
1159} // namespace clang
1160