1//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements an indenter that manages the indentation of
11/// continuations.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17
18#include "Encoding.h"
19#include "FormatToken.h"
20
21namespace clang {
22class SourceManager;
23
24namespace format {
25
26class AnnotatedLine;
27class BreakableToken;
28struct FormatToken;
29struct LineState;
30struct ParenState;
31struct RawStringFormatStyleManager;
32class WhitespaceManager;
33
34struct RawStringFormatStyleManager {
35 llvm::StringMap<FormatStyle> DelimiterStyle;
36 llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
37
38 RawStringFormatStyleManager(const FormatStyle &CodeStyle);
39
40 std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
41
42 std::optional<FormatStyle>
43 getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
44};
45
46/// Represents the spaces at the start of a line, keeping track of what the
47/// spaces are for.
48struct IndentationAndAlignment {
49 unsigned Total;
50
51 /// The column that the position of the start of the line is calculated
52 /// from. It can be more than Total.
53 unsigned IndentedFrom;
54
55 /// Add spaces for right-justifying the token. The IndentedFrom field does not
56 /// change.
57 ///
58 /// This example in Objective-C shows why the field should not change. The
59 /// token `xx` is right-justified with this method to align the `:`
60 /// symbols. The `:` symbols should remain aligned through the step that
61 /// aligns assignments. That step uses the IndentedFrom field to tell what
62 /// lines to move. Not changing the field in this method ensures that the 2
63 /// lines move together.
64 ///
65 /// [x //
66 /// xxxx:0
67 /// xx:0];
68 IndentationAndAlignment addPadding(unsigned Spaces) const;
69 /// Adding indentation is more common than padding. So the operator does that.
70 IndentationAndAlignment operator+(unsigned Spaces) const;
71 IndentationAndAlignment operator-(unsigned Spaces) const;
72 IndentationAndAlignment &operator+=(unsigned Spaces);
73
74 IndentationAndAlignment(unsigned Total, unsigned IndentedFrom);
75
76 IndentationAndAlignment(unsigned Spaces);
77
78 bool operator<(const IndentationAndAlignment &Other) const;
79};
80
81class ContinuationIndenter {
82public:
83 /// Constructs a \c ContinuationIndenter to format \p Line starting in
84 /// column \p FirstIndent.
85 ContinuationIndenter(const FormatStyle &Style,
86 const AdditionalKeywords &Keywords,
87 const SourceManager &SourceMgr,
88 WhitespaceManager &Whitespaces,
89 encoding::Encoding Encoding,
90 bool BinPackInconclusiveFunctions);
91
92 /// Get the initial state, i.e. the state after placing \p Line's
93 /// first token at \p FirstIndent. When reformatting a fragment of code, as in
94 /// the case of formatting inside raw string literals, \p FirstStartColumn is
95 /// the column at which the state of the parent formatter is.
96 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
97 const AnnotatedLine *Line, bool DryRun);
98
99 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
100 // better home.
101 /// Returns \c true, if a line break after \p State is allowed.
102 bool canBreak(const LineState &State);
103
104 /// Returns \c true, if a line break after \p State is mandatory.
105 bool mustBreak(const LineState &State);
106
107 /// Appends the next token to \p State and updates information
108 /// necessary for indentation.
109 ///
110 /// Puts the token on the current line if \p Newline is \c false and adds a
111 /// line break and necessary indentation otherwise.
112 ///
113 /// If \p DryRun is \c false, also creates and stores the required
114 /// \c Replacement.
115 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
116 unsigned ExtraSpaces = 0);
117
118 /// Get the column limit for this line. This is the style's column
119 /// limit, potentially reduced for preprocessor definitions.
120 unsigned getColumnLimit(const LineState &State) const;
121
122private:
123 /// Mark the next token as consumed in \p State and modify its stacks
124 /// accordingly.
125 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
126
127 /// Update 'State' according to the next token's fake left parentheses.
128 void moveStatePastFakeLParens(LineState &State, bool Newline);
129 /// Update 'State' according to the next token's fake r_parens.
130 void moveStatePastFakeRParens(LineState &State);
131
132 /// Update 'State' according to the next token being one of "(<{[".
133 void moveStatePastScopeOpener(LineState &State, bool Newline);
134 /// Update 'State' according to the next token being one of ")>}]".
135 void moveStatePastScopeCloser(LineState &State);
136 /// Update 'State' with the next token opening a nested block.
137 void moveStateToNewBlock(LineState &State, bool NewLine);
138
139 /// Reformats a raw string literal.
140 ///
141 /// \returns An extra penalty induced by reformatting the token.
142 unsigned reformatRawStringLiteral(const FormatToken &Current,
143 LineState &State,
144 const FormatStyle &RawStringStyle,
145 bool DryRun, bool Newline);
146
147 /// If the current token is at the end of the current line, handle
148 /// the transition to the next line.
149 unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
150 bool DryRun, bool AllowBreak, bool Newline);
151
152 /// If \p Current is a raw string that is configured to be reformatted,
153 /// return the style to be used.
154 std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
155 const LineState &State);
156
157 /// If the current token sticks out over the end of the line, break
158 /// it if possible.
159 ///
160 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
161 /// when tokens are broken or lines exceed the column limit, and exceeded
162 /// indicates whether the algorithm purposefully left lines exceeding the
163 /// column limit.
164 ///
165 /// The returned penalty will cover the cost of the additional line breaks
166 /// and column limit violation in all lines except for the last one. The
167 /// penalty for the column limit violation in the last line (and in single
168 /// line tokens) is handled in \c addNextStateToQueue.
169 ///
170 /// \p Strict indicates whether reflowing is allowed to leave characters
171 /// protruding the column limit; if true, lines will be split strictly within
172 /// the column limit where possible; if false, words are allowed to protrude
173 /// over the column limit as long as the penalty is less than the penalty
174 /// of a break.
175 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
176 LineState &State,
177 bool AllowBreak, bool DryRun,
178 bool Strict);
179
180 /// Returns the \c BreakableToken starting at \p Current, or nullptr
181 /// if the current token cannot be broken.
182 std::unique_ptr<BreakableToken>
183 createBreakableToken(const FormatToken &Current, LineState &State,
184 bool AllowBreak);
185
186 /// Appends the next token to \p State and updates information
187 /// necessary for indentation.
188 ///
189 /// Puts the token on the current line.
190 ///
191 /// If \p DryRun is \c false, also creates and stores the required
192 /// \c Replacement.
193 void addTokenOnCurrentLine(LineState &State, bool DryRun,
194 unsigned ExtraSpaces);
195
196 /// Appends the next token to \p State and updates information
197 /// necessary for indentation.
198 ///
199 /// Adds a line break and necessary indentation.
200 ///
201 /// If \p DryRun is \c false, also creates and stores the required
202 /// \c Replacement.
203 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
204
205 /// Calculate the new column for a line wrap before the next token.
206 IndentationAndAlignment getNewLineColumn(const LineState &State);
207
208 /// Adds a multiline token to the \p State.
209 ///
210 /// \returns Extra penalty for the first line of the literal: last line is
211 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
212 /// matter, as we don't change them.
213 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
214
215 /// Returns \c true if the next token starts a multiline string
216 /// literal.
217 ///
218 /// This includes implicitly concatenated strings, strings that will be broken
219 /// by clang-format and string literals with escaped newlines.
220 bool nextIsMultilineString(const LineState &State);
221
222 FormatStyle Style;
223 const AdditionalKeywords &Keywords;
224 const SourceManager &SourceMgr;
225 WhitespaceManager &Whitespaces;
226 encoding::Encoding Encoding;
227 bool BinPackInconclusiveFunctions;
228 llvm::Regex CommentPragmasRegex;
229 const RawStringFormatStyleManager RawStringFormats;
230};
231
232struct ParenState {
233 ParenState(const FormatToken *Tok, IndentationAndAlignment Indent,
234 unsigned LastSpace, bool AvoidBinPacking, bool NoLineBreak)
235 : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
236 NestedBlockIndent(Indent.Total), IsAligned(false),
237 BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false),
238 BreakBeforeClosingAngle(false), AvoidBinPacking(AvoidBinPacking),
239 BreakBeforeParameter(false), NoLineBreak(NoLineBreak),
240 NoLineBreakInOperand(false), LastOperatorWrapped(true),
241 ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
242 AlignColons(true), ObjCSelectorNameFound(false),
243 HasMultipleNestedBlocks(false), NestedBlockInlined(false),
244 IsInsideObjCArrayLiteral(false), IsCSharpGenericTypeConstraint(false),
245 IsChainedConditional(false), IsWrappedConditional(false),
246 UnindentOperator(false) {}
247
248 /// The token opening this parenthesis level, or nullptr if this level is
249 /// opened by fake parenthesis.
250 ///
251 /// Not considered for memoization as it will always have the same value at
252 /// the same token.
253 const FormatToken *Tok;
254
255 /// The position to which a specific parenthesis level needs to be
256 /// indented.
257 IndentationAndAlignment Indent;
258
259 /// The position of the last space on each level.
260 ///
261 /// Used e.g. to break like:
262 /// functionCall(Parameter, otherCall(
263 /// OtherParameter));
264 unsigned LastSpace;
265
266 /// If a block relative to this parenthesis level gets wrapped, indent
267 /// it this much.
268 unsigned NestedBlockIndent;
269
270 /// The position the first "<<" operator encountered on each level.
271 ///
272 /// Used to align "<<" operators. 0 if no such operator has been encountered
273 /// on a level.
274 unsigned FirstLessLess = 0;
275
276 /// The column of a \c ? in a conditional expression;
277 unsigned QuestionColumn = 0;
278
279 /// The position of the colon in an ObjC method declaration/call.
280 unsigned ColonPos = 0;
281
282 /// The start of the most recent function in a builder-type call.
283 unsigned StartOfFunctionCall = 0;
284
285 /// Contains the start of array subscript expressions, so that they
286 /// can be aligned.
287 unsigned StartOfArraySubscripts = 0;
288
289 /// If a nested name specifier was broken over multiple lines, this
290 /// contains the start column of the second line. Otherwise 0.
291 unsigned NestedNameSpecifierContinuation = 0;
292
293 /// If a call expression was broken over multiple lines, this
294 /// contains the start column of the second line. Otherwise 0.
295 unsigned CallContinuation = 0;
296
297 /// The column of the first variable name in a variable declaration.
298 ///
299 /// Used to align further variables if necessary.
300 unsigned VariablePos = 0;
301
302 /// Whether this block's indentation is used for alignment.
303 bool IsAligned : 1;
304
305 /// Whether a newline needs to be inserted before the block's closing
306 /// brace.
307 ///
308 /// We only want to insert a newline before the closing brace if there also
309 /// was a newline after the beginning left brace.
310 bool BreakBeforeClosingBrace : 1;
311
312 /// Whether a newline needs to be inserted before the block's closing
313 /// paren.
314 ///
315 /// We only want to insert a newline before the closing paren if there also
316 /// was a newline after the beginning left paren.
317 bool BreakBeforeClosingParen : 1;
318
319 /// Whether a newline needs to be inserted before a closing angle `>`.
320 bool BreakBeforeClosingAngle : 1;
321
322 /// Avoid bin packing, i.e. multiple parameters/elements on multiple
323 /// lines, in this context.
324 bool AvoidBinPacking : 1;
325
326 /// Break after the next comma (or all the commas in this context if
327 /// \c AvoidBinPacking is \c true).
328 bool BreakBeforeParameter : 1;
329
330 /// Line breaking in this context would break a formatting rule.
331 bool NoLineBreak : 1;
332
333 /// Same as \c NoLineBreak, but is restricted until the end of the
334 /// operand (including the next ",").
335 bool NoLineBreakInOperand : 1;
336
337 /// True if the last binary operator on this level was wrapped to the
338 /// next line.
339 bool LastOperatorWrapped : 1;
340
341 /// \c true if this \c ParenState already contains a line-break.
342 ///
343 /// The first line break in a certain \c ParenState causes extra penalty so
344 /// that clang-format prefers similar breaks, i.e. breaks in the same
345 /// parenthesis.
346 bool ContainsLineBreak : 1;
347
348 /// \c true if this \c ParenState contains multiple segments of a
349 /// builder-type call on one line.
350 bool ContainsUnwrappedBuilder : 1;
351
352 /// \c true if the colons of the curren ObjC method expression should
353 /// be aligned.
354 ///
355 /// Not considered for memoization as it will always have the same value at
356 /// the same token.
357 bool AlignColons : 1;
358
359 /// \c true if at least one selector name was found in the current
360 /// ObjC method expression.
361 ///
362 /// Not considered for memoization as it will always have the same value at
363 /// the same token.
364 bool ObjCSelectorNameFound : 1;
365
366 /// \c true if there are multiple nested blocks inside these parens.
367 ///
368 /// Not considered for memoization as it will always have the same value at
369 /// the same token.
370 bool HasMultipleNestedBlocks : 1;
371
372 /// The start of a nested block (e.g. lambda introducer in C++ or
373 /// "function" in JavaScript) is not wrapped to a new line.
374 bool NestedBlockInlined : 1;
375
376 /// \c true if the current \c ParenState represents an Objective-C
377 /// array literal.
378 bool IsInsideObjCArrayLiteral : 1;
379
380 bool IsCSharpGenericTypeConstraint : 1;
381
382 /// true if the current \c ParenState represents the false branch of a chained
383 /// conditional expression (e.g. else-if)
384 bool IsChainedConditional : 1;
385
386 /// true if there conditionnal was wrapped on the first operator (the question
387 /// mark)
388 bool IsWrappedConditional : 1;
389
390 /// Indicates the indent should be reduced by the length of the operator.
391 bool UnindentOperator : 1;
392
393 bool operator<(const ParenState &Other) const {
394 if (Indent.Total != Other.Indent.Total)
395 return Indent.Total < Other.Indent.Total;
396 if (LastSpace != Other.LastSpace)
397 return LastSpace < Other.LastSpace;
398 if (NestedBlockIndent != Other.NestedBlockIndent)
399 return NestedBlockIndent < Other.NestedBlockIndent;
400 if (FirstLessLess != Other.FirstLessLess)
401 return FirstLessLess < Other.FirstLessLess;
402 if (IsAligned != Other.IsAligned)
403 return IsAligned;
404 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
405 return BreakBeforeClosingBrace;
406 if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
407 return BreakBeforeClosingParen;
408 if (BreakBeforeClosingAngle != Other.BreakBeforeClosingAngle)
409 return BreakBeforeClosingAngle;
410 if (QuestionColumn != Other.QuestionColumn)
411 return QuestionColumn < Other.QuestionColumn;
412 if (AvoidBinPacking != Other.AvoidBinPacking)
413 return AvoidBinPacking;
414 if (BreakBeforeParameter != Other.BreakBeforeParameter)
415 return BreakBeforeParameter;
416 if (NoLineBreak != Other.NoLineBreak)
417 return NoLineBreak;
418 if (LastOperatorWrapped != Other.LastOperatorWrapped)
419 return LastOperatorWrapped;
420 if (ColonPos != Other.ColonPos)
421 return ColonPos < Other.ColonPos;
422 if (StartOfFunctionCall != Other.StartOfFunctionCall)
423 return StartOfFunctionCall < Other.StartOfFunctionCall;
424 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
425 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
426 if (CallContinuation != Other.CallContinuation)
427 return CallContinuation < Other.CallContinuation;
428 if (VariablePos != Other.VariablePos)
429 return VariablePos < Other.VariablePos;
430 if (ContainsLineBreak != Other.ContainsLineBreak)
431 return ContainsLineBreak;
432 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
433 return ContainsUnwrappedBuilder;
434 if (NestedBlockInlined != Other.NestedBlockInlined)
435 return NestedBlockInlined;
436 if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
437 return IsCSharpGenericTypeConstraint;
438 if (IsChainedConditional != Other.IsChainedConditional)
439 return IsChainedConditional;
440 if (IsWrappedConditional != Other.IsWrappedConditional)
441 return IsWrappedConditional;
442 if (UnindentOperator != Other.UnindentOperator)
443 return UnindentOperator;
444 return Indent < Other.Indent;
445 }
446};
447
448/// The current state when indenting a unwrapped line.
449///
450/// As the indenting tries different combinations this is copied by value.
451struct LineState {
452 /// The number of used columns in the current line.
453 unsigned Column;
454
455 /// The token that needs to be next formatted.
456 FormatToken *NextToken;
457
458 /// \c true if \p NextToken should not continue this line.
459 bool NoContinuation;
460
461 /// The \c NestingLevel at the start of this line.
462 unsigned StartOfLineLevel;
463
464 /// The lowest \c NestingLevel on the current line.
465 unsigned LowestLevelOnLine;
466
467 /// The start column of the string literal, if we're in a string
468 /// literal sequence, 0 otherwise.
469 unsigned StartOfStringLiteral;
470
471 /// Disallow line breaks for this line.
472 bool NoLineBreak;
473
474 /// A stack keeping track of properties applying to parenthesis
475 /// levels.
476 SmallVector<ParenState> Stack;
477
478 /// Ignore the stack of \c ParenStates for state comparison.
479 ///
480 /// In long and deeply nested unwrapped lines, the current algorithm can
481 /// be insufficient for finding the best formatting with a reasonable amount
482 /// of time and memory. Setting this flag will effectively lead to the
483 /// algorithm not analyzing some combinations. However, these combinations
484 /// rarely contain the optimal solution: In short, accepting a higher
485 /// penalty early would need to lead to different values in the \c
486 /// ParenState stack (in an otherwise identical state) and these different
487 /// values would need to lead to a significant amount of avoided penalty
488 /// later.
489 ///
490 /// FIXME: Come up with a better algorithm instead.
491 bool IgnoreStackForComparison;
492
493 /// The indent of the first token.
494 unsigned FirstIndent;
495
496 /// The line that is being formatted.
497 ///
498 /// Does not need to be considered for memoization because it doesn't change.
499 const AnnotatedLine *Line;
500
501 /// Comparison operator to be able to used \c LineState in \c map.
502 bool operator<(const LineState &Other) const {
503 if (NextToken != Other.NextToken)
504 return NextToken < Other.NextToken;
505 if (Column != Other.Column)
506 return Column < Other.Column;
507 if (NoContinuation != Other.NoContinuation)
508 return NoContinuation;
509 if (StartOfLineLevel != Other.StartOfLineLevel)
510 return StartOfLineLevel < Other.StartOfLineLevel;
511 if (LowestLevelOnLine != Other.LowestLevelOnLine)
512 return LowestLevelOnLine < Other.LowestLevelOnLine;
513 if (StartOfStringLiteral != Other.StartOfStringLiteral)
514 return StartOfStringLiteral < Other.StartOfStringLiteral;
515 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
516 return false;
517 return Stack < Other.Stack;
518 }
519};
520
521} // end namespace format
522} // end namespace clang
523
524#endif
525