1//===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// WhitespaceManager class manages whitespace around tokens and their
11/// replacements.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
16#define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
17
18#include "TokenAnnotator.h"
19#include "clang/Basic/SourceManager.h"
20
21namespace clang {
22namespace format {
23
24/// Manages the whitespaces around tokens and their replacements.
25///
26/// This includes special handling for certain constructs, e.g. the alignment of
27/// trailing line comments.
28///
29/// To guarantee correctness of alignment operations, the \c WhitespaceManager
30/// must be informed about every token in the source file; for each token, there
31/// must be exactly one call to either \c replaceWhitespace or
32/// \c addUntouchableToken.
33///
34/// There may be multiple calls to \c breakToken for a given token.
35class WhitespaceManager {
36public:
37 WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,
38 bool UseCRLF)
39 : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
40
41 bool useCRLF() const { return UseCRLF; }
42
43 /// Infers whether the input is using CRLF.
44 static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF);
45
46 /// Replaces the whitespace in front of \p Tok. Only call once for
47 /// each \c AnnotatedToken.
48 ///
49 /// \p StartOfTokenColumn is the column at which the token will start after
50 /// this replacement. It is needed for determining how \p Spaces is turned
51 /// into tabs and spaces for some format styles.
52 ///
53 /// \p IndentedFromColumn is only used when the replacement starts a new
54 /// line. It should be the column that the position of the line is derived
55 /// from. It is used for determining what lines the alignment process should
56 /// move.
57 void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,
58 unsigned StartOfTokenColumn,
59 const FormatToken *AlignedTo = nullptr,
60 bool InPPDirective = false,
61 unsigned IndentedFromColumn = 0);
62
63 /// Adds information about an unchangeable token's whitespace.
64 ///
65 /// Needs to be called for every token for which \c replaceWhitespace
66 /// was not called.
67 void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
68
69 llvm::Error addReplacement(const tooling::Replacement &Replacement);
70
71 /// Inserts or replaces whitespace in the middle of a token.
72 ///
73 /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
74 /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
75 /// characters.
76 ///
77 /// Note: \p Spaces can be negative to retain information about initial
78 /// relative column offset between a line of a block comment and the start of
79 /// the comment. This negative offset may be compensated by trailing comment
80 /// alignment here. In all other cases negative \p Spaces will be truncated to
81 /// 0.
82 ///
83 /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
84 /// used to align backslashes correctly.
85 void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,
86 unsigned ReplaceChars,
87 StringRef PreviousPostfix,
88 StringRef CurrentPrefix, bool InPPDirective,
89 unsigned Newlines, int Spaces);
90
91 /// Returns all the \c Replacements created during formatting.
92 const tooling::Replacements &generateReplacements();
93
94 /// Represents a change before a token, a break inside a token,
95 /// or the layout of an unchanged token (or whitespace within).
96 struct Change {
97 /// Functor to sort changes in original source order.
98 class IsBeforeInFile {
99 public:
100 IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
101 bool operator()(const Change &C1, const Change &C2) const;
102
103 private:
104 const SourceManager &SourceMgr;
105 };
106
107 /// Creates a \c Change.
108 ///
109 /// The generated \c Change will replace the characters at
110 /// \p OriginalWhitespaceRange with a concatenation of
111 /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
112 /// and \p CurrentLinePrefix.
113 ///
114 /// \p StartOfTokenColumn and \p ContinuesPPDirective will be used to lay
115 /// out trailing comments and escaped newlines. \p IndentedFromColumn will
116 /// be used to continue aligned lines.
117 Change(const FormatToken &Tok, bool CreateReplacement,
118 SourceRange OriginalWhitespaceRange, int Spaces,
119 unsigned StartOfTokenColumn, unsigned IndentedFromColumn,
120 unsigned NewlinesBefore, StringRef PreviousLinePostfix,
121 StringRef CurrentLinePrefix, const FormatToken *AlignedTo,
122 bool ContinuesPPDirective, bool IsInsideToken);
123
124 // The kind of the token whose whitespace this change replaces, or in which
125 // this change inserts whitespace.
126 // FIXME: Currently this is not set correctly for breaks inside comments, as
127 // the \c BreakableToken is still doing its own alignment.
128 const FormatToken *Tok;
129
130 bool CreateReplacement;
131 // Changes might be in the middle of a token, so we cannot just keep the
132 // FormatToken around to query its information.
133 SourceRange OriginalWhitespaceRange;
134 unsigned StartOfTokenColumn;
135 // Only used when the token is at the start of a line. The column that the
136 // position of the line is derived from. The alignment procedure moves the
137 // line when it moves a token in the same unwrapped line that is to the left
138 // of said column.
139 unsigned IndentedFromColumn;
140 unsigned NewlinesBefore;
141 std::string PreviousLinePostfix;
142 std::string CurrentLinePrefix;
143 const FormatToken *AlignedTo;
144 bool ContinuesPPDirective;
145
146 // The number of spaces in front of the token or broken part of the token.
147 // This will be adapted when aligning tokens.
148 // Can be negative to retain information about the initial relative offset
149 // of the lines in a block comment. This is used when aligning trailing
150 // comments. Uncompensated negative offset is truncated to 0.
151 int Spaces;
152
153 // If this change is inside of a token but not at the start of the token or
154 // directly after a newline.
155 bool IsInsideToken;
156
157 // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
158 // \c EscapedNewlineColumn will be calculated in
159 // \c calculateLineBreakInformation.
160 bool IsTrailingComment;
161 unsigned TokenLength;
162 unsigned PreviousEndOfTokenColumn;
163 unsigned EscapedNewlineColumn;
164
165 // These fields are used to retain correct relative line indentation in a
166 // block comment when aligning trailing comments.
167 //
168 // If this Change represents a continuation of a block comment,
169 // \c StartOfBlockComment is pointer to the first Change in the block
170 // comment. \c IndentationOffset is a relative column offset to this
171 // change, so that the correct column can be reconstructed at the end of
172 // the alignment process.
173 const Change *StartOfBlockComment;
174 int IndentationOffset;
175
176 // Depth of conditionals. Computed from tracking fake parenthesis, except
177 // it does not increase the indent for "chained" conditionals.
178 int ConditionalsLevel;
179
180 // A combination of indent, nesting and conditionals levels, which are used
181 // in tandem to compute lexical scope, for the purposes of deciding
182 // when to stop consecutive alignment runs.
183 std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const {
184 return std::make_tuple(args: Tok->IndentLevel, args: Tok->NestingLevel,
185 args: ConditionalsLevel);
186 }
187 };
188
189private:
190 struct CellDescription {
191 unsigned Index = 0;
192 unsigned Cell = 0;
193 unsigned EndIndex = 0;
194 bool HasSplit = false;
195 CellDescription *NextColumnElement = nullptr;
196
197 constexpr bool operator==(const CellDescription &Other) const {
198 return Index == Other.Index && Cell == Other.Cell &&
199 EndIndex == Other.EndIndex;
200 }
201 constexpr bool operator!=(const CellDescription &Other) const {
202 return !(*this == Other);
203 }
204 };
205
206 struct CellDescriptions {
207 SmallVector<CellDescription> Cells;
208 SmallVector<unsigned> CellCounts;
209 unsigned InitialSpaces = 0;
210
211 // Determine if every row in the array
212 // has the same number of columns.
213 bool isRectangular() const {
214 if (CellCounts.size() < 2)
215 return false;
216
217 for (auto NumberOfColumns : CellCounts)
218 if (NumberOfColumns != CellCounts[0])
219 return false;
220 return true;
221 }
222 };
223
224 /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens
225 /// or token parts in a line and \c PreviousEndOfTokenColumn and
226 /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
227 void calculateLineBreakInformation();
228
229 /// Align consecutive C/C++ preprocessor macros over all \c Changes.
230 void alignConsecutiveMacros();
231
232 /// Align consecutive assignments over all \c Changes.
233 void alignConsecutiveAssignments();
234
235 /// Align consecutive bitfields over all \c Changes.
236 void alignConsecutiveBitFields();
237
238 /// Align consecutive colon. For bitfields, TableGen DAGArgs and definitions.
239 void
240 alignConsecutiveColons(const FormatStyle::AlignConsecutiveStyle &AlignStyle,
241 TokenType Type);
242
243 /// Align consecutive declarations over all \c Changes.
244 void alignConsecutiveDeclarations();
245
246 /// Align consecutive declarations over all \c Changes.
247 void alignChainedConditionals();
248
249 /// Align consecutive short case statements over all \c Changes.
250 void alignConsecutiveShortCaseStatements(bool IsExpr);
251
252 /// Align consecutive TableGen DAGArg colon over all \c Changes.
253 void alignConsecutiveTableGenBreakingDAGArgColons();
254
255 /// Align consecutive TableGen cond operator colon over all \c Changes.
256 void alignConsecutiveTableGenCondOperatorColons();
257
258 /// Align consecutive TableGen definitions over all \c Changes.
259 void alignConsecutiveTableGenDefinitions();
260
261 /// Align trailing comments over all \c Changes.
262 void alignTrailingComments();
263
264 /// Align trailing comments from change \p Start to change \p End at
265 /// the specified \p Column.
266 void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);
267
268 /// Align escaped newlines over all \c Changes.
269 void alignEscapedNewlines();
270
271 /// Align escaped newlines from change \p Start to change \p End at
272 /// the specified \p Column.
273 void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);
274
275 /// Align Array Initializers over all \c Changes.
276 void alignArrayInitializers();
277
278 /// Align Array Initializers from change \p Start to change \p End at
279 /// the specified \p Column.
280 void alignArrayInitializers(unsigned Start, unsigned End);
281
282 /// Align Array Initializers being careful to right justify the columns
283 /// as described by \p CellDescs.
284 void alignArrayInitializersRightJustified(CellDescriptions &&CellDescs);
285
286 /// Align Array Initializers being careful to left justify the columns
287 /// as described by \p CellDescs.
288 void alignArrayInitializersLeftJustified(CellDescriptions &&CellDescs);
289
290 /// Calculate the cell width between two indexes.
291 unsigned calculateCellWidth(unsigned Start, unsigned End,
292 bool WithSpaces = false) const;
293
294 /// Get a set of fully specified CellDescriptions between \p Start and
295 /// \p End of the change list.
296 CellDescriptions getCells(unsigned Start, unsigned End);
297
298 /// Does this \p Cell contain a split element?
299 static bool isSplitCell(const CellDescription &Cell);
300
301 /// Get the width of the preceding cells from \p Start to \p End.
302 template <typename I>
303 auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const {
304 auto NetWidth = InitialSpaces;
305 for (auto PrevIter = Start; PrevIter != End; ++PrevIter) {
306 // If we broke the line the initial spaces are already
307 // accounted for.
308 assert(PrevIter->Index < Changes.size());
309 if (Changes[PrevIter->Index].NewlinesBefore > 0)
310 NetWidth = 0;
311 NetWidth +=
312 calculateCellWidth(Start: PrevIter->Index, End: PrevIter->EndIndex, WithSpaces: true) + 1;
313 }
314 return NetWidth;
315 }
316
317 /// Get the maximum width of a cell in a sequence of columns.
318 template <typename I>
319 unsigned getMaximumCellWidth(I CellIter, unsigned NetWidth) const {
320 unsigned CellWidth =
321 calculateCellWidth(Start: CellIter->Index, End: CellIter->EndIndex, WithSpaces: true);
322 if (Changes[CellIter->Index].NewlinesBefore == 0)
323 CellWidth += NetWidth;
324 for (const auto *Next = CellIter->NextColumnElement; Next;
325 Next = Next->NextColumnElement) {
326 auto ThisWidth = calculateCellWidth(Start: Next->Index, End: Next->EndIndex, WithSpaces: true);
327 if (Changes[Next->Index].NewlinesBefore == 0)
328 ThisWidth += NetWidth;
329 CellWidth = std::max(CellWidth, ThisWidth);
330 }
331 return CellWidth;
332 }
333
334 /// Get The maximum width of all columns to a given cell.
335 template <typename I>
336 unsigned getMaximumNetWidth(const I &CellStart, const I &CellStop,
337 unsigned InitialSpaces, unsigned CellCount,
338 unsigned MaxRowCount) const {
339 auto MaxNetWidth = getNetWidth(CellStart, CellStop, InitialSpaces);
340 auto RowCount = 1U;
341 auto Offset = std::distance(CellStart, CellStop);
342 for (const auto *Next = CellStop->NextColumnElement; Next;
343 Next = Next->NextColumnElement) {
344 if (RowCount >= MaxRowCount)
345 break;
346 auto Start = (CellStart + RowCount * CellCount);
347 auto End = Start + Offset;
348 MaxNetWidth =
349 std::max(MaxNetWidth, getNetWidth(Start, End, InitialSpaces));
350 ++RowCount;
351 }
352 return MaxNetWidth;
353 }
354
355 /// Align a split cell with a newline to the first element in the cell.
356 void alignToStartOfCell(unsigned Start, unsigned End);
357
358 /// Link the Cell pointers in the list of Cells.
359 static CellDescriptions linkCells(CellDescriptions &&CellDesc);
360
361 void setChangeSpaces(unsigned Start, unsigned Spaces);
362
363 /// Fill \c Replaces with the replacements for all effective changes.
364 void generateChanges();
365
366 /// Stores \p Text as the replacement for the whitespace in \p Range.
367 void storeReplacement(SourceRange Range, StringRef Text);
368 void appendNewlineText(std::string &Text, const Change &C);
369 void appendEscapedNewlineText(std::string &Text, unsigned Newlines,
370 unsigned PreviousEndOfTokenColumn,
371 unsigned EscapedNewlineColumn);
372 void appendIndentText(std::string &Text, unsigned IndentLevel,
373 unsigned Spaces, unsigned WhitespaceStartColumn,
374 bool IsAligned);
375 unsigned appendTabIndent(std::string &Text, unsigned Spaces,
376 unsigned Indentation);
377
378 SmallVector<Change, 16> Changes;
379 const SourceManager &SourceMgr;
380 tooling::Replacements Replaces;
381 const FormatStyle &Style;
382 bool UseCRLF;
383};
384
385} // namespace format
386} // namespace clang
387
388#endif
389