1//===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// WhitespaceManager class manages whitespace around tokens and their
11/// replacements.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
16#define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
17
18#include "TokenAnnotator.h"
19#include "clang/Basic/SourceManager.h"
20
21namespace clang {
22namespace format {
23
24/// Manages the whitespaces around tokens and their replacements.
25///
26/// This includes special handling for certain constructs, e.g. the alignment of
27/// trailing line comments.
28///
29/// To guarantee correctness of alignment operations, the \c WhitespaceManager
30/// must be informed about every token in the source file; for each token, there
31/// must be exactly one call to either \c replaceWhitespace or
32/// \c addUntouchableToken.
33///
34/// There may be multiple calls to \c breakToken for a given token.
35class WhitespaceManager {
36public:
37 WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,
38 bool UseCRLF)
39 : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
40
41 bool useCRLF() const { return UseCRLF; }
42
43 /// Infers whether the input is using CRLF.
44 static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF);
45
46 /// Replaces the whitespace in front of \p Tok. Only call once for
47 /// each \c AnnotatedToken.
48 ///
49 /// \p StartOfTokenColumn is the column at which the token will start after
50 /// this replacement. It is needed for determining how \p Spaces is turned
51 /// into tabs and spaces for some format styles.
52 ///
53 /// \p IndentedFromColumn is only used when the replacement starts a new
54 /// line. It should be the column that the position of the line is derived
55 /// from. It is used for determining what lines the alignment process should
56 /// move.
57 void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,
58 unsigned StartOfTokenColumn, bool IsAligned = false,
59 bool InPPDirective = false,
60 unsigned IndentedFromColumn = 0);
61
62 /// Adds information about an unchangeable token's whitespace.
63 ///
64 /// Needs to be called for every token for which \c replaceWhitespace
65 /// was not called.
66 void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
67
68 llvm::Error addReplacement(const tooling::Replacement &Replacement);
69
70 /// Inserts or replaces whitespace in the middle of a token.
71 ///
72 /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
73 /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
74 /// characters.
75 ///
76 /// Note: \p Spaces can be negative to retain information about initial
77 /// relative column offset between a line of a block comment and the start of
78 /// the comment. This negative offset may be compensated by trailing comment
79 /// alignment here. In all other cases negative \p Spaces will be truncated to
80 /// 0.
81 ///
82 /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
83 /// used to align backslashes correctly.
84 void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,
85 unsigned ReplaceChars,
86 StringRef PreviousPostfix,
87 StringRef CurrentPrefix, bool InPPDirective,
88 unsigned Newlines, int Spaces);
89
90 /// Returns all the \c Replacements created during formatting.
91 const tooling::Replacements &generateReplacements();
92
93 /// Represents a change before a token, a break inside a token,
94 /// or the layout of an unchanged token (or whitespace within).
95 struct Change {
96 /// Functor to sort changes in original source order.
97 class IsBeforeInFile {
98 public:
99 IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
100 bool operator()(const Change &C1, const Change &C2) const;
101
102 private:
103 const SourceManager &SourceMgr;
104 };
105
106 /// Creates a \c Change.
107 ///
108 /// The generated \c Change will replace the characters at
109 /// \p OriginalWhitespaceRange with a concatenation of
110 /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
111 /// and \p CurrentLinePrefix.
112 ///
113 /// \p StartOfTokenColumn and \p ContinuesPPDirective will be used to lay
114 /// out trailing comments and escaped newlines. \p IndentedFromColumn will
115 /// be used to continue aligned lines.
116 Change(const FormatToken &Tok, bool CreateReplacement,
117 SourceRange OriginalWhitespaceRange, int Spaces,
118 unsigned StartOfTokenColumn, unsigned IndentedFromColumn,
119 unsigned NewlinesBefore, StringRef PreviousLinePostfix,
120 StringRef CurrentLinePrefix, bool IsAligned,
121 bool ContinuesPPDirective, bool IsInsideToken);
122
123 // The kind of the token whose whitespace this change replaces, or in which
124 // this change inserts whitespace.
125 // FIXME: Currently this is not set correctly for breaks inside comments, as
126 // the \c BreakableToken is still doing its own alignment.
127 const FormatToken *Tok;
128
129 bool CreateReplacement;
130 // Changes might be in the middle of a token, so we cannot just keep the
131 // FormatToken around to query its information.
132 SourceRange OriginalWhitespaceRange;
133 unsigned StartOfTokenColumn;
134 // Only used when the token is at the start of a line. The column that the
135 // position of the line is derived from. The alignment procedure moves the
136 // line when it moves a token in the same unwrapped line that is to the left
137 // of said column.
138 unsigned IndentedFromColumn;
139 unsigned NewlinesBefore;
140 std::string PreviousLinePostfix;
141 std::string CurrentLinePrefix;
142 bool IsAligned;
143 bool ContinuesPPDirective;
144
145 // The number of spaces in front of the token or broken part of the token.
146 // This will be adapted when aligning tokens.
147 // Can be negative to retain information about the initial relative offset
148 // of the lines in a block comment. This is used when aligning trailing
149 // comments. Uncompensated negative offset is truncated to 0.
150 int Spaces;
151
152 // If this change is inside of a token but not at the start of the token or
153 // directly after a newline.
154 bool IsInsideToken;
155
156 // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
157 // \c EscapedNewlineColumn will be calculated in
158 // \c calculateLineBreakInformation.
159 bool IsTrailingComment;
160 unsigned TokenLength;
161 unsigned PreviousEndOfTokenColumn;
162 unsigned EscapedNewlineColumn;
163
164 // These fields are used to retain correct relative line indentation in a
165 // block comment when aligning trailing comments.
166 //
167 // If this Change represents a continuation of a block comment,
168 // \c StartOfBlockComment is pointer to the first Change in the block
169 // comment. \c IndentationOffset is a relative column offset to this
170 // change, so that the correct column can be reconstructed at the end of
171 // the alignment process.
172 const Change *StartOfBlockComment;
173 int IndentationOffset;
174
175 // Depth of conditionals. Computed from tracking fake parenthesis, except
176 // it does not increase the indent for "chained" conditionals.
177 int ConditionalsLevel;
178
179 // A combination of indent, nesting and conditionals levels, which are used
180 // in tandem to compute lexical scope, for the purposes of deciding
181 // when to stop consecutive alignment runs.
182 std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const {
183 return std::make_tuple(args: Tok->IndentLevel, args: Tok->NestingLevel,
184 args: ConditionalsLevel);
185 }
186 };
187
188private:
189 struct CellDescription {
190 unsigned Index = 0;
191 unsigned Cell = 0;
192 unsigned EndIndex = 0;
193 bool HasSplit = false;
194 CellDescription *NextColumnElement = nullptr;
195
196 constexpr bool operator==(const CellDescription &Other) const {
197 return Index == Other.Index && Cell == Other.Cell &&
198 EndIndex == Other.EndIndex;
199 }
200 constexpr bool operator!=(const CellDescription &Other) const {
201 return !(*this == Other);
202 }
203 };
204
205 struct CellDescriptions {
206 SmallVector<CellDescription> Cells;
207 SmallVector<unsigned> CellCounts;
208 unsigned InitialSpaces = 0;
209
210 // Determine if every row in the array
211 // has the same number of columns.
212 bool isRectangular() const {
213 if (CellCounts.size() < 2)
214 return false;
215
216 for (auto NumberOfColumns : CellCounts)
217 if (NumberOfColumns != CellCounts[0])
218 return false;
219 return true;
220 }
221 };
222
223 /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens
224 /// or token parts in a line and \c PreviousEndOfTokenColumn and
225 /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
226 void calculateLineBreakInformation();
227
228 /// Align consecutive C/C++ preprocessor macros over all \c Changes.
229 void alignConsecutiveMacros();
230
231 /// Align consecutive assignments over all \c Changes.
232 void alignConsecutiveAssignments();
233
234 /// Align consecutive bitfields over all \c Changes.
235 void alignConsecutiveBitFields();
236
237 /// Align consecutive colon. For bitfields, TableGen DAGArgs and definitions.
238 void
239 alignConsecutiveColons(const FormatStyle::AlignConsecutiveStyle &AlignStyle,
240 TokenType Type);
241
242 /// Align consecutive declarations over all \c Changes.
243 void alignConsecutiveDeclarations();
244
245 /// Align consecutive declarations over all \c Changes.
246 void alignChainedConditionals();
247
248 /// Align consecutive short case statements over all \c Changes.
249 void alignConsecutiveShortCaseStatements(bool IsExpr);
250
251 /// Align consecutive TableGen DAGArg colon over all \c Changes.
252 void alignConsecutiveTableGenBreakingDAGArgColons();
253
254 /// Align consecutive TableGen cond operator colon over all \c Changes.
255 void alignConsecutiveTableGenCondOperatorColons();
256
257 /// Align consecutive TableGen definitions over all \c Changes.
258 void alignConsecutiveTableGenDefinitions();
259
260 /// Align trailing comments over all \c Changes.
261 void alignTrailingComments();
262
263 /// Align trailing comments from change \p Start to change \p End at
264 /// the specified \p Column.
265 void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);
266
267 /// Align escaped newlines over all \c Changes.
268 void alignEscapedNewlines();
269
270 /// Align escaped newlines from change \p Start to change \p End at
271 /// the specified \p Column.
272 void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);
273
274 /// Align Array Initializers over all \c Changes.
275 void alignArrayInitializers();
276
277 /// Align Array Initializers from change \p Start to change \p End at
278 /// the specified \p Column.
279 void alignArrayInitializers(unsigned Start, unsigned End);
280
281 /// Align Array Initializers being careful to right justify the columns
282 /// as described by \p CellDescs.
283 void alignArrayInitializersRightJustified(CellDescriptions &&CellDescs);
284
285 /// Align Array Initializers being careful to left justify the columns
286 /// as described by \p CellDescs.
287 void alignArrayInitializersLeftJustified(CellDescriptions &&CellDescs);
288
289 /// Calculate the cell width between two indexes.
290 unsigned calculateCellWidth(unsigned Start, unsigned End,
291 bool WithSpaces = false) const;
292
293 /// Get a set of fully specified CellDescriptions between \p Start and
294 /// \p End of the change list.
295 CellDescriptions getCells(unsigned Start, unsigned End);
296
297 /// Does this \p Cell contain a split element?
298 static bool isSplitCell(const CellDescription &Cell);
299
300 /// Get the width of the preceding cells from \p Start to \p End.
301 template <typename I>
302 auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const {
303 auto NetWidth = InitialSpaces;
304 for (auto PrevIter = Start; PrevIter != End; ++PrevIter) {
305 // If we broke the line the initial spaces are already
306 // accounted for.
307 assert(PrevIter->Index < Changes.size());
308 if (Changes[PrevIter->Index].NewlinesBefore > 0)
309 NetWidth = 0;
310 NetWidth +=
311 calculateCellWidth(Start: PrevIter->Index, End: PrevIter->EndIndex, WithSpaces: true) + 1;
312 }
313 return NetWidth;
314 }
315
316 /// Get the maximum width of a cell in a sequence of columns.
317 template <typename I>
318 unsigned getMaximumCellWidth(I CellIter, unsigned NetWidth) const {
319 unsigned CellWidth =
320 calculateCellWidth(Start: CellIter->Index, End: CellIter->EndIndex, WithSpaces: true);
321 if (Changes[CellIter->Index].NewlinesBefore == 0)
322 CellWidth += NetWidth;
323 for (const auto *Next = CellIter->NextColumnElement; Next;
324 Next = Next->NextColumnElement) {
325 auto ThisWidth = calculateCellWidth(Start: Next->Index, End: Next->EndIndex, WithSpaces: true);
326 if (Changes[Next->Index].NewlinesBefore == 0)
327 ThisWidth += NetWidth;
328 CellWidth = std::max(CellWidth, ThisWidth);
329 }
330 return CellWidth;
331 }
332
333 /// Get The maximum width of all columns to a given cell.
334 template <typename I>
335 unsigned getMaximumNetWidth(const I &CellStart, const I &CellStop,
336 unsigned InitialSpaces, unsigned CellCount,
337 unsigned MaxRowCount) const {
338 auto MaxNetWidth = getNetWidth(CellStart, CellStop, InitialSpaces);
339 auto RowCount = 1U;
340 auto Offset = std::distance(CellStart, CellStop);
341 for (const auto *Next = CellStop->NextColumnElement; Next;
342 Next = Next->NextColumnElement) {
343 if (RowCount >= MaxRowCount)
344 break;
345 auto Start = (CellStart + RowCount * CellCount);
346 auto End = Start + Offset;
347 MaxNetWidth =
348 std::max(MaxNetWidth, getNetWidth(Start, End, InitialSpaces));
349 ++RowCount;
350 }
351 return MaxNetWidth;
352 }
353
354 /// Align a split cell with a newline to the first element in the cell.
355 void alignToStartOfCell(unsigned Start, unsigned End);
356
357 /// Link the Cell pointers in the list of Cells.
358 static CellDescriptions linkCells(CellDescriptions &&CellDesc);
359
360 /// Fill \c Replaces with the replacements for all effective changes.
361 void generateChanges();
362
363 /// Stores \p Text as the replacement for the whitespace in \p Range.
364 void storeReplacement(SourceRange Range, StringRef Text);
365 void appendNewlineText(std::string &Text, const Change &C);
366 void appendEscapedNewlineText(std::string &Text, unsigned Newlines,
367 unsigned PreviousEndOfTokenColumn,
368 unsigned EscapedNewlineColumn);
369 void appendIndentText(std::string &Text, unsigned IndentLevel,
370 unsigned Spaces, unsigned WhitespaceStartColumn,
371 bool IsAligned);
372 unsigned appendTabIndent(std::string &Text, unsigned Spaces,
373 unsigned Indentation);
374
375 SmallVector<Change, 16> Changes;
376 const SourceManager &SourceMgr;
377 tooling::Replacements Replaces;
378 const FormatStyle &Style;
379 bool UseCRLF;
380};
381
382} // namespace format
383} // namespace clang
384
385#endif
386