1//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements an indenter that manages the indentation of
11/// continuations.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17
18#include "Encoding.h"
19#include "FormatToken.h"
20
21namespace clang {
22class SourceManager;
23
24namespace format {
25
26class AnnotatedLine;
27class BreakableToken;
28struct FormatToken;
29struct LineState;
30struct ParenState;
31struct RawStringFormatStyleManager;
32class WhitespaceManager;
33
34struct RawStringFormatStyleManager {
35 llvm::StringMap<FormatStyle> DelimiterStyle;
36 llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
37
38 RawStringFormatStyleManager(const FormatStyle &CodeStyle);
39
40 std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
41
42 std::optional<FormatStyle>
43 getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
44};
45
46class ContinuationIndenter {
47public:
48 /// Constructs a \c ContinuationIndenter to format \p Line starting in
49 /// column \p FirstIndent.
50 ContinuationIndenter(const FormatStyle &Style,
51 const AdditionalKeywords &Keywords,
52 const SourceManager &SourceMgr,
53 WhitespaceManager &Whitespaces,
54 encoding::Encoding Encoding,
55 bool BinPackInconclusiveFunctions);
56
57 /// Get the initial state, i.e. the state after placing \p Line's
58 /// first token at \p FirstIndent. When reformatting a fragment of code, as in
59 /// the case of formatting inside raw string literals, \p FirstStartColumn is
60 /// the column at which the state of the parent formatter is.
61 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
62 const AnnotatedLine *Line, bool DryRun);
63
64 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
65 // better home.
66 /// Returns \c true, if a line break after \p State is allowed.
67 bool canBreak(const LineState &State);
68
69 /// Returns \c true, if a line break after \p State is mandatory.
70 bool mustBreak(const LineState &State);
71
72 /// Appends the next token to \p State and updates information
73 /// necessary for indentation.
74 ///
75 /// Puts the token on the current line if \p Newline is \c false and adds a
76 /// line break and necessary indentation otherwise.
77 ///
78 /// If \p DryRun is \c false, also creates and stores the required
79 /// \c Replacement.
80 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
81 unsigned ExtraSpaces = 0);
82
83 /// Get the column limit for this line. This is the style's column
84 /// limit, potentially reduced for preprocessor definitions.
85 unsigned getColumnLimit(const LineState &State) const;
86
87private:
88 /// Mark the next token as consumed in \p State and modify its stacks
89 /// accordingly.
90 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
91
92 /// Update 'State' according to the next token's fake left parentheses.
93 void moveStatePastFakeLParens(LineState &State, bool Newline);
94 /// Update 'State' according to the next token's fake r_parens.
95 void moveStatePastFakeRParens(LineState &State);
96
97 /// Update 'State' according to the next token being one of "(<{[".
98 void moveStatePastScopeOpener(LineState &State, bool Newline);
99 /// Update 'State' according to the next token being one of ")>}]".
100 void moveStatePastScopeCloser(LineState &State);
101 /// Update 'State' with the next token opening a nested block.
102 void moveStateToNewBlock(LineState &State, bool NewLine);
103
104 /// Reformats a raw string literal.
105 ///
106 /// \returns An extra penalty induced by reformatting the token.
107 unsigned reformatRawStringLiteral(const FormatToken &Current,
108 LineState &State,
109 const FormatStyle &RawStringStyle,
110 bool DryRun, bool Newline);
111
112 /// If the current token is at the end of the current line, handle
113 /// the transition to the next line.
114 unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
115 bool DryRun, bool AllowBreak, bool Newline);
116
117 /// If \p Current is a raw string that is configured to be reformatted,
118 /// return the style to be used.
119 std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
120 const LineState &State);
121
122 /// If the current token sticks out over the end of the line, break
123 /// it if possible.
124 ///
125 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
126 /// when tokens are broken or lines exceed the column limit, and exceeded
127 /// indicates whether the algorithm purposefully left lines exceeding the
128 /// column limit.
129 ///
130 /// The returned penalty will cover the cost of the additional line breaks
131 /// and column limit violation in all lines except for the last one. The
132 /// penalty for the column limit violation in the last line (and in single
133 /// line tokens) is handled in \c addNextStateToQueue.
134 ///
135 /// \p Strict indicates whether reflowing is allowed to leave characters
136 /// protruding the column limit; if true, lines will be split strictly within
137 /// the column limit where possible; if false, words are allowed to protrude
138 /// over the column limit as long as the penalty is less than the penalty
139 /// of a break.
140 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
141 LineState &State,
142 bool AllowBreak, bool DryRun,
143 bool Strict);
144
145 /// Returns the \c BreakableToken starting at \p Current, or nullptr
146 /// if the current token cannot be broken.
147 std::unique_ptr<BreakableToken>
148 createBreakableToken(const FormatToken &Current, LineState &State,
149 bool AllowBreak);
150
151 /// Appends the next token to \p State and updates information
152 /// necessary for indentation.
153 ///
154 /// Puts the token on the current line.
155 ///
156 /// If \p DryRun is \c false, also creates and stores the required
157 /// \c Replacement.
158 void addTokenOnCurrentLine(LineState &State, bool DryRun,
159 unsigned ExtraSpaces);
160
161 /// Appends the next token to \p State and updates information
162 /// necessary for indentation.
163 ///
164 /// Adds a line break and necessary indentation.
165 ///
166 /// If \p DryRun is \c false, also creates and stores the required
167 /// \c Replacement.
168 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
169
170 /// Calculate the new column for a line wrap before the next token.
171 unsigned getNewLineColumn(const LineState &State);
172
173 /// Adds a multiline token to the \p State.
174 ///
175 /// \returns Extra penalty for the first line of the literal: last line is
176 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
177 /// matter, as we don't change them.
178 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
179
180 /// Returns \c true if the next token starts a multiline string
181 /// literal.
182 ///
183 /// This includes implicitly concatenated strings, strings that will be broken
184 /// by clang-format and string literals with escaped newlines.
185 bool nextIsMultilineString(const LineState &State);
186
187 FormatStyle Style;
188 const AdditionalKeywords &Keywords;
189 const SourceManager &SourceMgr;
190 WhitespaceManager &Whitespaces;
191 encoding::Encoding Encoding;
192 bool BinPackInconclusiveFunctions;
193 llvm::Regex CommentPragmasRegex;
194 const RawStringFormatStyleManager RawStringFormats;
195};
196
197struct ParenState {
198 ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
199 bool AvoidBinPacking, bool NoLineBreak)
200 : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
201 NestedBlockIndent(Indent), IsAligned(false),
202 BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false),
203 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
204 NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),
205 LastOperatorWrapped(true), ContainsLineBreak(false),
206 ContainsUnwrappedBuilder(false), AlignColons(true),
207 ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
208 NestedBlockInlined(false), IsInsideObjCArrayLiteral(false),
209 IsCSharpGenericTypeConstraint(false), IsChainedConditional(false),
210 IsWrappedConditional(false), UnindentOperator(false) {}
211
212 /// \brief The token opening this parenthesis level, or nullptr if this level
213 /// is opened by fake parenthesis.
214 ///
215 /// Not considered for memoization as it will always have the same value at
216 /// the same token.
217 const FormatToken *Tok;
218
219 /// The position to which a specific parenthesis level needs to be
220 /// indented.
221 unsigned Indent;
222
223 /// The position of the last space on each level.
224 ///
225 /// Used e.g. to break like:
226 /// functionCall(Parameter, otherCall(
227 /// OtherParameter));
228 unsigned LastSpace;
229
230 /// If a block relative to this parenthesis level gets wrapped, indent
231 /// it this much.
232 unsigned NestedBlockIndent;
233
234 /// The position the first "<<" operator encountered on each level.
235 ///
236 /// Used to align "<<" operators. 0 if no such operator has been encountered
237 /// on a level.
238 unsigned FirstLessLess = 0;
239
240 /// The column of a \c ? in a conditional expression;
241 unsigned QuestionColumn = 0;
242
243 /// The position of the colon in an ObjC method declaration/call.
244 unsigned ColonPos = 0;
245
246 /// The start of the most recent function in a builder-type call.
247 unsigned StartOfFunctionCall = 0;
248
249 /// Contains the start of array subscript expressions, so that they
250 /// can be aligned.
251 unsigned StartOfArraySubscripts = 0;
252
253 /// If a nested name specifier was broken over multiple lines, this
254 /// contains the start column of the second line. Otherwise 0.
255 unsigned NestedNameSpecifierContinuation = 0;
256
257 /// If a call expression was broken over multiple lines, this
258 /// contains the start column of the second line. Otherwise 0.
259 unsigned CallContinuation = 0;
260
261 /// The column of the first variable name in a variable declaration.
262 ///
263 /// Used to align further variables if necessary.
264 unsigned VariablePos = 0;
265
266 /// Whether this block's indentation is used for alignment.
267 bool IsAligned : 1;
268
269 /// Whether a newline needs to be inserted before the block's closing
270 /// brace.
271 ///
272 /// We only want to insert a newline before the closing brace if there also
273 /// was a newline after the beginning left brace.
274 bool BreakBeforeClosingBrace : 1;
275
276 /// Whether a newline needs to be inserted before the block's closing
277 /// paren.
278 ///
279 /// We only want to insert a newline before the closing paren if there also
280 /// was a newline after the beginning left paren.
281 bool BreakBeforeClosingParen : 1;
282
283 /// Avoid bin packing, i.e. multiple parameters/elements on multiple
284 /// lines, in this context.
285 bool AvoidBinPacking : 1;
286
287 /// Break after the next comma (or all the commas in this context if
288 /// \c AvoidBinPacking is \c true).
289 bool BreakBeforeParameter : 1;
290
291 /// Line breaking in this context would break a formatting rule.
292 bool NoLineBreak : 1;
293
294 /// Same as \c NoLineBreak, but is restricted until the end of the
295 /// operand (including the next ",").
296 bool NoLineBreakInOperand : 1;
297
298 /// True if the last binary operator on this level was wrapped to the
299 /// next line.
300 bool LastOperatorWrapped : 1;
301
302 /// \c true if this \c ParenState already contains a line-break.
303 ///
304 /// The first line break in a certain \c ParenState causes extra penalty so
305 /// that clang-format prefers similar breaks, i.e. breaks in the same
306 /// parenthesis.
307 bool ContainsLineBreak : 1;
308
309 /// \c true if this \c ParenState contains multiple segments of a
310 /// builder-type call on one line.
311 bool ContainsUnwrappedBuilder : 1;
312
313 /// \c true if the colons of the curren ObjC method expression should
314 /// be aligned.
315 ///
316 /// Not considered for memoization as it will always have the same value at
317 /// the same token.
318 bool AlignColons : 1;
319
320 /// \c true if at least one selector name was found in the current
321 /// ObjC method expression.
322 ///
323 /// Not considered for memoization as it will always have the same value at
324 /// the same token.
325 bool ObjCSelectorNameFound : 1;
326
327 /// \c true if there are multiple nested blocks inside these parens.
328 ///
329 /// Not considered for memoization as it will always have the same value at
330 /// the same token.
331 bool HasMultipleNestedBlocks : 1;
332
333 /// The start of a nested block (e.g. lambda introducer in C++ or
334 /// "function" in JavaScript) is not wrapped to a new line.
335 bool NestedBlockInlined : 1;
336
337 /// \c true if the current \c ParenState represents an Objective-C
338 /// array literal.
339 bool IsInsideObjCArrayLiteral : 1;
340
341 bool IsCSharpGenericTypeConstraint : 1;
342
343 /// \brief true if the current \c ParenState represents the false branch of
344 /// a chained conditional expression (e.g. else-if)
345 bool IsChainedConditional : 1;
346
347 /// \brief true if there conditionnal was wrapped on the first operator (the
348 /// question mark)
349 bool IsWrappedConditional : 1;
350
351 /// \brief Indicates the indent should be reduced by the length of the
352 /// operator.
353 bool UnindentOperator : 1;
354
355 bool operator<(const ParenState &Other) const {
356 if (Indent != Other.Indent)
357 return Indent < Other.Indent;
358 if (LastSpace != Other.LastSpace)
359 return LastSpace < Other.LastSpace;
360 if (NestedBlockIndent != Other.NestedBlockIndent)
361 return NestedBlockIndent < Other.NestedBlockIndent;
362 if (FirstLessLess != Other.FirstLessLess)
363 return FirstLessLess < Other.FirstLessLess;
364 if (IsAligned != Other.IsAligned)
365 return IsAligned;
366 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
367 return BreakBeforeClosingBrace;
368 if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
369 return BreakBeforeClosingParen;
370 if (QuestionColumn != Other.QuestionColumn)
371 return QuestionColumn < Other.QuestionColumn;
372 if (AvoidBinPacking != Other.AvoidBinPacking)
373 return AvoidBinPacking;
374 if (BreakBeforeParameter != Other.BreakBeforeParameter)
375 return BreakBeforeParameter;
376 if (NoLineBreak != Other.NoLineBreak)
377 return NoLineBreak;
378 if (LastOperatorWrapped != Other.LastOperatorWrapped)
379 return LastOperatorWrapped;
380 if (ColonPos != Other.ColonPos)
381 return ColonPos < Other.ColonPos;
382 if (StartOfFunctionCall != Other.StartOfFunctionCall)
383 return StartOfFunctionCall < Other.StartOfFunctionCall;
384 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
385 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
386 if (CallContinuation != Other.CallContinuation)
387 return CallContinuation < Other.CallContinuation;
388 if (VariablePos != Other.VariablePos)
389 return VariablePos < Other.VariablePos;
390 if (ContainsLineBreak != Other.ContainsLineBreak)
391 return ContainsLineBreak;
392 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
393 return ContainsUnwrappedBuilder;
394 if (NestedBlockInlined != Other.NestedBlockInlined)
395 return NestedBlockInlined;
396 if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
397 return IsCSharpGenericTypeConstraint;
398 if (IsChainedConditional != Other.IsChainedConditional)
399 return IsChainedConditional;
400 if (IsWrappedConditional != Other.IsWrappedConditional)
401 return IsWrappedConditional;
402 if (UnindentOperator != Other.UnindentOperator)
403 return UnindentOperator;
404 return false;
405 }
406};
407
408/// The current state when indenting a unwrapped line.
409///
410/// As the indenting tries different combinations this is copied by value.
411struct LineState {
412 /// The number of used columns in the current line.
413 unsigned Column;
414
415 /// The token that needs to be next formatted.
416 FormatToken *NextToken;
417
418 /// \c true if \p NextToken should not continue this line.
419 bool NoContinuation;
420
421 /// The \c NestingLevel at the start of this line.
422 unsigned StartOfLineLevel;
423
424 /// The lowest \c NestingLevel on the current line.
425 unsigned LowestLevelOnLine;
426
427 /// The start column of the string literal, if we're in a string
428 /// literal sequence, 0 otherwise.
429 unsigned StartOfStringLiteral;
430
431 /// Disallow line breaks for this line.
432 bool NoLineBreak;
433
434 /// A stack keeping track of properties applying to parenthesis
435 /// levels.
436 SmallVector<ParenState> Stack;
437
438 /// Ignore the stack of \c ParenStates for state comparison.
439 ///
440 /// In long and deeply nested unwrapped lines, the current algorithm can
441 /// be insufficient for finding the best formatting with a reasonable amount
442 /// of time and memory. Setting this flag will effectively lead to the
443 /// algorithm not analyzing some combinations. However, these combinations
444 /// rarely contain the optimal solution: In short, accepting a higher
445 /// penalty early would need to lead to different values in the \c
446 /// ParenState stack (in an otherwise identical state) and these different
447 /// values would need to lead to a significant amount of avoided penalty
448 /// later.
449 ///
450 /// FIXME: Come up with a better algorithm instead.
451 bool IgnoreStackForComparison;
452
453 /// The indent of the first token.
454 unsigned FirstIndent;
455
456 /// The line that is being formatted.
457 ///
458 /// Does not need to be considered for memoization because it doesn't change.
459 const AnnotatedLine *Line;
460
461 /// Comparison operator to be able to used \c LineState in \c map.
462 bool operator<(const LineState &Other) const {
463 if (NextToken != Other.NextToken)
464 return NextToken < Other.NextToken;
465 if (Column != Other.Column)
466 return Column < Other.Column;
467 if (NoContinuation != Other.NoContinuation)
468 return NoContinuation;
469 if (StartOfLineLevel != Other.StartOfLineLevel)
470 return StartOfLineLevel < Other.StartOfLineLevel;
471 if (LowestLevelOnLine != Other.LowestLevelOnLine)
472 return LowestLevelOnLine < Other.LowestLevelOnLine;
473 if (StartOfStringLiteral != Other.StartOfStringLiteral)
474 return StartOfStringLiteral < Other.StartOfStringLiteral;
475 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
476 return false;
477 return Stack < Other.Stack;
478 }
479};
480
481} // end namespace format
482} // end namespace clang
483
484#endif
485