1//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements an indenter that manages the indentation of
11/// continuations.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17
18#include "Encoding.h"
19#include "FormatToken.h"
20
21namespace clang {
22class SourceManager;
23
24namespace format {
25
26class AnnotatedLine;
27class BreakableToken;
28struct FormatToken;
29struct LineState;
30struct ParenState;
31struct RawStringFormatStyleManager;
32class WhitespaceManager;
33
34struct RawStringFormatStyleManager {
35 llvm::StringMap<FormatStyle> DelimiterStyle;
36 llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
37
38 RawStringFormatStyleManager(const FormatStyle &CodeStyle);
39
40 std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
41
42 std::optional<FormatStyle>
43 getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
44};
45
46/// Represents the spaces at the start of a line, keeping track of what the
47/// spaces are for.
48struct IndentationAndAlignment {
49 unsigned Total;
50
51 /// The column that the position of the start of the line is calculated
52 /// from. It can be more than Total.
53 unsigned IndentedFrom;
54
55 /// Add spaces for right-justifying the token. The IndentedFrom field does not
56 /// change.
57 ///
58 /// This example in Objective-C shows why the field should not change. The
59 /// token `xx` is right-justified with this method to align the `:`
60 /// symbols. The `:` symbols should remain aligned through the step that
61 /// aligns assignments. That step uses the IndentedFrom field to tell what
62 /// lines to move. Not changing the field in this method ensures that the 2
63 /// lines move together.
64 ///
65 /// [x //
66 /// xxxx:0
67 /// xx:0];
68 IndentationAndAlignment addPadding(unsigned Spaces) const;
69 /// Adding indentation is more common than padding. So the operator does that.
70 IndentationAndAlignment operator+(unsigned Spaces) const;
71 IndentationAndAlignment operator-(unsigned Spaces) const;
72 IndentationAndAlignment &operator+=(unsigned Spaces);
73
74 IndentationAndAlignment(unsigned Total, unsigned IndentedFrom);
75
76 IndentationAndAlignment(unsigned Spaces);
77
78 bool operator<(const IndentationAndAlignment &Other) const;
79};
80
81class ContinuationIndenter {
82public:
83 /// Constructs a \c ContinuationIndenter to format \p Line starting in
84 /// column \p FirstIndent.
85 ContinuationIndenter(const FormatStyle &Style,
86 const AdditionalKeywords &Keywords,
87 const SourceManager &SourceMgr,
88 WhitespaceManager &Whitespaces,
89 encoding::Encoding Encoding,
90 bool BinPackInconclusiveFunctions);
91
92 /// Get the initial state, i.e. the state after placing \p Line's
93 /// first token at \p FirstIndent. When reformatting a fragment of code, as in
94 /// the case of formatting inside raw string literals, \p FirstStartColumn is
95 /// the column at which the state of the parent formatter is.
96 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
97 const AnnotatedLine *Line, bool DryRun);
98
99 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
100 // better home.
101 /// Returns \c true, if a line break after \p State is allowed.
102 bool canBreak(const LineState &State);
103
104 /// Returns \c true, if a line break after \p State is mandatory.
105 bool mustBreak(const LineState &State);
106
107 /// Appends the next token to \p State and updates information
108 /// necessary for indentation.
109 ///
110 /// Puts the token on the current line if \p Newline is \c false and adds a
111 /// line break and necessary indentation otherwise.
112 ///
113 /// If \p DryRun is \c false, also creates and stores the required
114 /// \c Replacement.
115 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
116 unsigned ExtraSpaces = 0);
117
118 /// Get the column limit for this line. This is the style's column
119 /// limit, potentially reduced for preprocessor definitions.
120 unsigned getColumnLimit(const LineState &State) const;
121
122private:
123 /// Mark the next token as consumed in \p State and modify its stacks
124 /// accordingly.
125 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
126
127 /// Update 'State' according to the next token's fake left parentheses.
128 void moveStatePastFakeLParens(LineState &State, bool Newline);
129 /// Update 'State' according to the next token's fake r_parens.
130 void moveStatePastFakeRParens(LineState &State);
131
132 /// Update 'State' according to the next token being one of "(<{[".
133 void moveStatePastScopeOpener(LineState &State, bool Newline);
134 /// Update 'State' according to the next token being one of ")>}]".
135 void moveStatePastScopeCloser(LineState &State);
136 /// Update 'State' with the next token opening a nested block.
137 void moveStateToNewBlock(LineState &State, bool NewLine);
138
139 /// Reformats a raw string literal.
140 ///
141 /// \returns An extra penalty induced by reformatting the token.
142 unsigned reformatRawStringLiteral(const FormatToken &Current,
143 LineState &State,
144 const FormatStyle &RawStringStyle,
145 bool DryRun, bool Newline);
146
147 /// If the current token is at the end of the current line, handle
148 /// the transition to the next line.
149 unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
150 bool DryRun, bool AllowBreak, bool Newline);
151
152 /// If \p Current is a raw string that is configured to be reformatted,
153 /// return the style to be used.
154 std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
155 const LineState &State);
156
157 /// If the current token sticks out over the end of the line, break
158 /// it if possible.
159 ///
160 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
161 /// when tokens are broken or lines exceed the column limit, and exceeded
162 /// indicates whether the algorithm purposefully left lines exceeding the
163 /// column limit.
164 ///
165 /// The returned penalty will cover the cost of the additional line breaks
166 /// and column limit violation in all lines except for the last one. The
167 /// penalty for the column limit violation in the last line (and in single
168 /// line tokens) is handled in \c addNextStateToQueue.
169 ///
170 /// \p Strict indicates whether reflowing is allowed to leave characters
171 /// protruding the column limit; if true, lines will be split strictly within
172 /// the column limit where possible; if false, words are allowed to protrude
173 /// over the column limit as long as the penalty is less than the penalty
174 /// of a break.
175 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
176 LineState &State,
177 bool AllowBreak, bool DryRun,
178 bool Strict);
179
180 /// Returns the \c BreakableToken starting at \p Current, or nullptr
181 /// if the current token cannot be broken.
182 std::unique_ptr<BreakableToken>
183 createBreakableToken(const FormatToken &Current, LineState &State,
184 bool AllowBreak);
185
186 /// Appends the next token to \p State and updates information
187 /// necessary for indentation.
188 ///
189 /// Puts the token on the current line.
190 ///
191 /// If \p DryRun is \c false, also creates and stores the required
192 /// \c Replacement.
193 void addTokenOnCurrentLine(LineState &State, bool DryRun,
194 unsigned ExtraSpaces);
195
196 /// Appends the next token to \p State and updates information
197 /// necessary for indentation.
198 ///
199 /// Adds a line break and necessary indentation.
200 ///
201 /// If \p DryRun is \c false, also creates and stores the required
202 /// \c Replacement.
203 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
204
205 /// Calculate the new column for a line wrap before the next token.
206 IndentationAndAlignment getNewLineColumn(const LineState &State);
207
208 /// Adds a multiline token to the \p State.
209 ///
210 /// \returns Extra penalty for the first line of the literal: last line is
211 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
212 /// matter, as we don't change them.
213 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
214
215 /// Returns \c true if the next token starts a multiline string
216 /// literal.
217 ///
218 /// This includes implicitly concatenated strings, strings that will be broken
219 /// by clang-format and string literals with escaped newlines.
220 bool nextIsMultilineString(const LineState &State);
221
222 FormatStyle Style;
223 const AdditionalKeywords &Keywords;
224 const SourceManager &SourceMgr;
225 WhitespaceManager &Whitespaces;
226 encoding::Encoding Encoding;
227 bool BinPackInconclusiveFunctions;
228 llvm::Regex CommentPragmasRegex;
229 const RawStringFormatStyleManager RawStringFormats;
230};
231
232struct ParenState {
233 ParenState(const FormatToken *Tok, IndentationAndAlignment Indent,
234 unsigned LastSpace, bool AvoidBinPacking, bool NoLineBreak)
235 : Tok(Tok), Indent(Indent), AlignedTo(nullptr), LastSpace(LastSpace),
236 NestedBlockIndent(Indent.Total), BreakBeforeClosingBrace(false),
237 BreakBeforeClosingParen(false), BreakBeforeClosingAngle(false),
238 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
239 NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),
240 LastOperatorWrapped(true), ContainsLineBreak(false),
241 ContainsUnwrappedBuilder(false), AlignColons(true),
242 ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
243 NestedBlockInlined(false), IsInsideObjCArrayLiteral(false),
244 IsCSharpGenericTypeConstraint(false), IsChainedConditional(false),
245 IsWrappedConditional(false), UnindentOperator(false) {}
246
247 /// The token opening this parenthesis level, or nullptr if this level is
248 /// opened by fake parenthesis.
249 ///
250 /// Not considered for memoization as it will always have the same value at
251 /// the same token.
252 const FormatToken *Tok;
253
254 /// The position to which a specific parenthesis level needs to be
255 /// indented.
256 IndentationAndAlignment Indent;
257
258 /// The token in one of the previous lines this state wants to align to.
259 const FormatToken *AlignedTo;
260
261 /// The position of the last space on each level.
262 ///
263 /// Used e.g. to break like:
264 /// functionCall(Parameter, otherCall(
265 /// OtherParameter));
266 unsigned LastSpace;
267
268 /// If a block relative to this parenthesis level gets wrapped, indent
269 /// it this much.
270 unsigned NestedBlockIndent;
271
272 /// The position the first "<<" operator encountered on each level.
273 ///
274 /// Used to align "<<" operators. 0 if no such operator has been encountered
275 /// on a level.
276 unsigned FirstLessLess = 0;
277
278 /// The column of a \c ? in a conditional expression;
279 unsigned QuestionColumn = 0;
280
281 /// The position of the colon in an ObjC method declaration/call.
282 unsigned ColonPos = 0;
283
284 /// The start of the most recent function in a builder-type call.
285 unsigned StartOfFunctionCall = 0;
286
287 /// Contains the start of array subscript expressions, so that they
288 /// can be aligned.
289 unsigned StartOfArraySubscripts = 0;
290
291 /// If a nested name specifier was broken over multiple lines, this
292 /// contains the start column of the second line. Otherwise 0.
293 unsigned NestedNameSpecifierContinuation = 0;
294
295 /// If a call expression was broken over multiple lines, this
296 /// contains the start column of the second line. Otherwise 0.
297 unsigned CallContinuation = 0;
298
299 /// The column of the first variable name in a variable declaration.
300 ///
301 /// Used to align further variables if necessary.
302 unsigned VariablePos = 0;
303
304 /// The precedence. The outermost level and the levels corresponding to tokens
305 /// have prec::Unknown.
306 prec::Level Precedence = prec::Unknown;
307
308 /// Whether a newline needs to be inserted before the block's closing
309 /// brace.
310 ///
311 /// We only want to insert a newline before the closing brace if there also
312 /// was a newline after the beginning left brace.
313 bool BreakBeforeClosingBrace : 1;
314
315 /// Whether a newline needs to be inserted before the block's closing
316 /// paren.
317 ///
318 /// We only want to insert a newline before the closing paren if there also
319 /// was a newline after the beginning left paren.
320 bool BreakBeforeClosingParen : 1;
321
322 /// Whether a newline needs to be inserted before a closing angle `>`.
323 bool BreakBeforeClosingAngle : 1;
324
325 /// Avoid bin packing, i.e. multiple parameters/elements on multiple
326 /// lines, in this context.
327 bool AvoidBinPacking : 1;
328
329 /// Break after the next comma (or all the commas in this context if
330 /// \c AvoidBinPacking is \c true).
331 bool BreakBeforeParameter : 1;
332
333 /// Line breaking in this context would break a formatting rule.
334 bool NoLineBreak : 1;
335
336 /// Same as \c NoLineBreak, but is restricted until the end of the
337 /// operand (including the next ",").
338 bool NoLineBreakInOperand : 1;
339
340 /// True if the last binary operator on this level was wrapped to the
341 /// next line.
342 bool LastOperatorWrapped : 1;
343
344 /// \c true if this \c ParenState already contains a line-break.
345 ///
346 /// The first line break in a certain \c ParenState causes extra penalty so
347 /// that clang-format prefers similar breaks, i.e. breaks in the same
348 /// parenthesis.
349 bool ContainsLineBreak : 1;
350
351 /// \c true if this \c ParenState contains multiple segments of a
352 /// builder-type call on one line.
353 bool ContainsUnwrappedBuilder : 1;
354
355 /// \c true if the colons of the curren ObjC method expression should
356 /// be aligned.
357 ///
358 /// Not considered for memoization as it will always have the same value at
359 /// the same token.
360 bool AlignColons : 1;
361
362 /// \c true if at least one selector name was found in the current
363 /// ObjC method expression.
364 ///
365 /// Not considered for memoization as it will always have the same value at
366 /// the same token.
367 bool ObjCSelectorNameFound : 1;
368
369 /// \c true if there are multiple nested blocks inside these parens.
370 ///
371 /// Not considered for memoization as it will always have the same value at
372 /// the same token.
373 bool HasMultipleNestedBlocks : 1;
374
375 /// The start of a nested block (e.g. lambda introducer in C++ or
376 /// "function" in JavaScript) is not wrapped to a new line.
377 bool NestedBlockInlined : 1;
378
379 /// \c true if the current \c ParenState represents an Objective-C
380 /// array literal.
381 bool IsInsideObjCArrayLiteral : 1;
382
383 bool IsCSharpGenericTypeConstraint : 1;
384
385 /// true if the current \c ParenState represents the false branch of a chained
386 /// conditional expression (e.g. else-if)
387 bool IsChainedConditional : 1;
388
389 /// true if there conditionnal was wrapped on the first operator (the question
390 /// mark)
391 bool IsWrappedConditional : 1;
392
393 /// Indicates the indent should be reduced by the length of the operator.
394 bool UnindentOperator : 1;
395
396 bool operator<(const ParenState &Other) const {
397 if (Indent.Total != Other.Indent.Total)
398 return Indent.Total < Other.Indent.Total;
399 if (LastSpace != Other.LastSpace)
400 return LastSpace < Other.LastSpace;
401 if (NestedBlockIndent != Other.NestedBlockIndent)
402 return NestedBlockIndent < Other.NestedBlockIndent;
403 if (FirstLessLess != Other.FirstLessLess)
404 return FirstLessLess < Other.FirstLessLess;
405 if (AlignedTo != Other.AlignedTo)
406 return AlignedTo < Other.AlignedTo;
407 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
408 return BreakBeforeClosingBrace;
409 if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
410 return BreakBeforeClosingParen;
411 if (BreakBeforeClosingAngle != Other.BreakBeforeClosingAngle)
412 return BreakBeforeClosingAngle;
413 if (QuestionColumn != Other.QuestionColumn)
414 return QuestionColumn < Other.QuestionColumn;
415 if (AvoidBinPacking != Other.AvoidBinPacking)
416 return AvoidBinPacking;
417 if (BreakBeforeParameter != Other.BreakBeforeParameter)
418 return BreakBeforeParameter;
419 if (NoLineBreak != Other.NoLineBreak)
420 return NoLineBreak;
421 if (LastOperatorWrapped != Other.LastOperatorWrapped)
422 return LastOperatorWrapped;
423 if (ColonPos != Other.ColonPos)
424 return ColonPos < Other.ColonPos;
425 if (StartOfFunctionCall != Other.StartOfFunctionCall)
426 return StartOfFunctionCall < Other.StartOfFunctionCall;
427 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
428 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
429 if (CallContinuation != Other.CallContinuation)
430 return CallContinuation < Other.CallContinuation;
431 if (VariablePos != Other.VariablePos)
432 return VariablePos < Other.VariablePos;
433 if (ContainsLineBreak != Other.ContainsLineBreak)
434 return ContainsLineBreak;
435 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
436 return ContainsUnwrappedBuilder;
437 if (NestedBlockInlined != Other.NestedBlockInlined)
438 return NestedBlockInlined;
439 if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
440 return IsCSharpGenericTypeConstraint;
441 if (IsChainedConditional != Other.IsChainedConditional)
442 return IsChainedConditional;
443 if (IsWrappedConditional != Other.IsWrappedConditional)
444 return IsWrappedConditional;
445 if (UnindentOperator != Other.UnindentOperator)
446 return UnindentOperator;
447 return Indent < Other.Indent;
448 }
449};
450
451/// The current state when indenting a unwrapped line.
452///
453/// As the indenting tries different combinations this is copied by value.
454struct LineState {
455 /// The number of used columns in the current line.
456 unsigned Column;
457
458 /// The token that needs to be next formatted.
459 FormatToken *NextToken;
460
461 /// \c true if \p NextToken should not continue this line.
462 bool NoContinuation;
463
464 /// The \c NestingLevel at the start of this line.
465 unsigned StartOfLineLevel;
466
467 /// The lowest \c NestingLevel on the current line.
468 unsigned LowestLevelOnLine;
469
470 /// The start column of the string literal, if we're in a string
471 /// literal sequence, 0 otherwise.
472 unsigned StartOfStringLiteral;
473
474 /// Disallow line breaks for this line.
475 bool NoLineBreak;
476
477 /// A stack keeping track of properties applying to parenthesis
478 /// levels.
479 SmallVector<ParenState> Stack;
480
481 /// Ignore the stack of \c ParenStates for state comparison.
482 ///
483 /// In long and deeply nested unwrapped lines, the current algorithm can
484 /// be insufficient for finding the best formatting with a reasonable amount
485 /// of time and memory. Setting this flag will effectively lead to the
486 /// algorithm not analyzing some combinations. However, these combinations
487 /// rarely contain the optimal solution: In short, accepting a higher
488 /// penalty early would need to lead to different values in the \c
489 /// ParenState stack (in an otherwise identical state) and these different
490 /// values would need to lead to a significant amount of avoided penalty
491 /// later.
492 ///
493 /// FIXME: Come up with a better algorithm instead.
494 bool IgnoreStackForComparison;
495
496 /// The indent of the first token.
497 unsigned FirstIndent;
498
499 /// The line that is being formatted.
500 ///
501 /// Does not need to be considered for memoization because it doesn't change.
502 const AnnotatedLine *Line;
503
504 /// Comparison operator to be able to used \c LineState in \c map.
505 bool operator<(const LineState &Other) const {
506 if (NextToken != Other.NextToken)
507 return NextToken < Other.NextToken;
508 if (Column != Other.Column)
509 return Column < Other.Column;
510 if (NoContinuation != Other.NoContinuation)
511 return NoContinuation;
512 if (StartOfLineLevel != Other.StartOfLineLevel)
513 return StartOfLineLevel < Other.StartOfLineLevel;
514 if (LowestLevelOnLine != Other.LowestLevelOnLine)
515 return LowestLevelOnLine < Other.LowestLevelOnLine;
516 if (StartOfStringLiteral != Other.StartOfStringLiteral)
517 return StartOfStringLiteral < Other.StartOfStringLiteral;
518 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
519 return false;
520 return Stack < Other.Stack;
521 }
522};
523
524} // end namespace format
525} // end namespace clang
526
527#endif
528