| 1 | //===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// WhitespaceManager class manages whitespace around tokens and their |
| 11 | /// replacements. |
| 12 | /// |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H |
| 16 | #define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H |
| 17 | |
| 18 | #include "TokenAnnotator.h" |
| 19 | #include "clang/Basic/SourceManager.h" |
| 20 | |
| 21 | namespace clang { |
| 22 | namespace format { |
| 23 | |
| 24 | /// Manages the whitespaces around tokens and their replacements. |
| 25 | /// |
| 26 | /// This includes special handling for certain constructs, e.g. the alignment of |
| 27 | /// trailing line comments. |
| 28 | /// |
| 29 | /// To guarantee correctness of alignment operations, the \c WhitespaceManager |
| 30 | /// must be informed about every token in the source file; for each token, there |
| 31 | /// must be exactly one call to either \c replaceWhitespace or |
| 32 | /// \c addUntouchableToken. |
| 33 | /// |
| 34 | /// There may be multiple calls to \c breakToken for a given token. |
| 35 | class WhitespaceManager { |
| 36 | public: |
| 37 | WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style, |
| 38 | bool UseCRLF) |
| 39 | : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {} |
| 40 | |
| 41 | bool useCRLF() const { return UseCRLF; } |
| 42 | |
| 43 | /// Infers whether the input is using CRLF. |
| 44 | static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF); |
| 45 | |
| 46 | /// Replaces the whitespace in front of \p Tok. Only call once for |
| 47 | /// each \c AnnotatedToken. |
| 48 | /// |
| 49 | /// \p StartOfTokenColumn is the column at which the token will start after |
| 50 | /// this replacement. It is needed for determining how \p Spaces is turned |
| 51 | /// into tabs and spaces for some format styles. |
| 52 | void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, |
| 53 | unsigned StartOfTokenColumn, bool IsAligned = false, |
| 54 | bool InPPDirective = false); |
| 55 | |
| 56 | /// Adds information about an unchangeable token's whitespace. |
| 57 | /// |
| 58 | /// Needs to be called for every token for which \c replaceWhitespace |
| 59 | /// was not called. |
| 60 | void addUntouchableToken(const FormatToken &Tok, bool InPPDirective); |
| 61 | |
| 62 | llvm::Error addReplacement(const tooling::Replacement &Replacement); |
| 63 | |
| 64 | /// Inserts or replaces whitespace in the middle of a token. |
| 65 | /// |
| 66 | /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix |
| 67 | /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars |
| 68 | /// characters. |
| 69 | /// |
| 70 | /// Note: \p Spaces can be negative to retain information about initial |
| 71 | /// relative column offset between a line of a block comment and the start of |
| 72 | /// the comment. This negative offset may be compensated by trailing comment |
| 73 | /// alignment here. In all other cases negative \p Spaces will be truncated to |
| 74 | /// 0. |
| 75 | /// |
| 76 | /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is |
| 77 | /// used to align backslashes correctly. |
| 78 | void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, |
| 79 | unsigned ReplaceChars, |
| 80 | StringRef PreviousPostfix, |
| 81 | StringRef CurrentPrefix, bool InPPDirective, |
| 82 | unsigned Newlines, int Spaces); |
| 83 | |
| 84 | /// Returns all the \c Replacements created during formatting. |
| 85 | const tooling::Replacements &generateReplacements(); |
| 86 | |
| 87 | /// Represents a change before a token, a break inside a token, |
| 88 | /// or the layout of an unchanged token (or whitespace within). |
| 89 | struct Change { |
| 90 | /// Functor to sort changes in original source order. |
| 91 | class IsBeforeInFile { |
| 92 | public: |
| 93 | IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} |
| 94 | bool operator()(const Change &C1, const Change &C2) const; |
| 95 | |
| 96 | private: |
| 97 | const SourceManager &SourceMgr; |
| 98 | }; |
| 99 | |
| 100 | /// Creates a \c Change. |
| 101 | /// |
| 102 | /// The generated \c Change will replace the characters at |
| 103 | /// \p OriginalWhitespaceRange with a concatenation of |
| 104 | /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces |
| 105 | /// and \p CurrentLinePrefix. |
| 106 | /// |
| 107 | /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out |
| 108 | /// trailing comments and escaped newlines. |
| 109 | Change(const FormatToken &Tok, bool CreateReplacement, |
| 110 | SourceRange OriginalWhitespaceRange, int Spaces, |
| 111 | unsigned StartOfTokenColumn, unsigned NewlinesBefore, |
| 112 | StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, |
| 113 | bool IsAligned, bool ContinuesPPDirective, bool IsInsideToken); |
| 114 | |
| 115 | // The kind of the token whose whitespace this change replaces, or in which |
| 116 | // this change inserts whitespace. |
| 117 | // FIXME: Currently this is not set correctly for breaks inside comments, as |
| 118 | // the \c BreakableToken is still doing its own alignment. |
| 119 | const FormatToken *Tok; |
| 120 | |
| 121 | bool CreateReplacement; |
| 122 | // Changes might be in the middle of a token, so we cannot just keep the |
| 123 | // FormatToken around to query its information. |
| 124 | SourceRange OriginalWhitespaceRange; |
| 125 | unsigned StartOfTokenColumn; |
| 126 | unsigned NewlinesBefore; |
| 127 | std::string PreviousLinePostfix; |
| 128 | std::string CurrentLinePrefix; |
| 129 | bool IsAligned; |
| 130 | bool ContinuesPPDirective; |
| 131 | |
| 132 | // The number of spaces in front of the token or broken part of the token. |
| 133 | // This will be adapted when aligning tokens. |
| 134 | // Can be negative to retain information about the initial relative offset |
| 135 | // of the lines in a block comment. This is used when aligning trailing |
| 136 | // comments. Uncompensated negative offset is truncated to 0. |
| 137 | int Spaces; |
| 138 | |
| 139 | // If this change is inside of a token but not at the start of the token or |
| 140 | // directly after a newline. |
| 141 | bool IsInsideToken; |
| 142 | |
| 143 | // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and |
| 144 | // \c EscapedNewlineColumn will be calculated in |
| 145 | // \c calculateLineBreakInformation. |
| 146 | bool ; |
| 147 | unsigned TokenLength; |
| 148 | unsigned PreviousEndOfTokenColumn; |
| 149 | unsigned EscapedNewlineColumn; |
| 150 | |
| 151 | // These fields are used to retain correct relative line indentation in a |
| 152 | // block comment when aligning trailing comments. |
| 153 | // |
| 154 | // If this Change represents a continuation of a block comment, |
| 155 | // \c StartOfBlockComment is pointer to the first Change in the block |
| 156 | // comment. \c IndentationOffset is a relative column offset to this |
| 157 | // change, so that the correct column can be reconstructed at the end of |
| 158 | // the alignment process. |
| 159 | const Change *; |
| 160 | int IndentationOffset; |
| 161 | |
| 162 | // Depth of conditionals. Computed from tracking fake parenthesis, except |
| 163 | // it does not increase the indent for "chained" conditionals. |
| 164 | int ConditionalsLevel; |
| 165 | |
| 166 | // A combination of indent, nesting and conditionals levels, which are used |
| 167 | // in tandem to compute lexical scope, for the purposes of deciding |
| 168 | // when to stop consecutive alignment runs. |
| 169 | std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const { |
| 170 | return std::make_tuple(args: Tok->IndentLevel, args: Tok->NestingLevel, |
| 171 | args: ConditionalsLevel); |
| 172 | } |
| 173 | }; |
| 174 | |
| 175 | private: |
| 176 | struct CellDescription { |
| 177 | unsigned Index = 0; |
| 178 | unsigned Cell = 0; |
| 179 | unsigned EndIndex = 0; |
| 180 | bool HasSplit = false; |
| 181 | CellDescription *NextColumnElement = nullptr; |
| 182 | |
| 183 | constexpr bool operator==(const CellDescription &Other) const { |
| 184 | return Index == Other.Index && Cell == Other.Cell && |
| 185 | EndIndex == Other.EndIndex; |
| 186 | } |
| 187 | constexpr bool operator!=(const CellDescription &Other) const { |
| 188 | return !(*this == Other); |
| 189 | } |
| 190 | }; |
| 191 | |
| 192 | struct CellDescriptions { |
| 193 | SmallVector<CellDescription> Cells; |
| 194 | SmallVector<unsigned> CellCounts; |
| 195 | unsigned InitialSpaces = 0; |
| 196 | |
| 197 | // Determine if every row in the array |
| 198 | // has the same number of columns. |
| 199 | bool isRectangular() const { |
| 200 | if (CellCounts.size() < 2) |
| 201 | return false; |
| 202 | |
| 203 | for (auto NumberOfColumns : CellCounts) |
| 204 | if (NumberOfColumns != CellCounts[0]) |
| 205 | return false; |
| 206 | return true; |
| 207 | } |
| 208 | }; |
| 209 | |
| 210 | /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens |
| 211 | /// or token parts in a line and \c PreviousEndOfTokenColumn and |
| 212 | /// \c EscapedNewlineColumn for the first tokens or token parts in a line. |
| 213 | void calculateLineBreakInformation(); |
| 214 | |
| 215 | /// Align consecutive C/C++ preprocessor macros over all \c Changes. |
| 216 | void alignConsecutiveMacros(); |
| 217 | |
| 218 | /// Align consecutive assignments over all \c Changes. |
| 219 | void alignConsecutiveAssignments(); |
| 220 | |
| 221 | /// Align consecutive bitfields over all \c Changes. |
| 222 | void alignConsecutiveBitFields(); |
| 223 | |
| 224 | /// Align consecutive colon. For bitfields, TableGen DAGArgs and defintions. |
| 225 | void |
| 226 | alignConsecutiveColons(const FormatStyle::AlignConsecutiveStyle &AlignStyle, |
| 227 | TokenType Type); |
| 228 | |
| 229 | /// Align consecutive declarations over all \c Changes. |
| 230 | void alignConsecutiveDeclarations(); |
| 231 | |
| 232 | /// Align consecutive declarations over all \c Changes. |
| 233 | void alignChainedConditionals(); |
| 234 | |
| 235 | /// Align consecutive short case statements over all \c Changes. |
| 236 | void alignConsecutiveShortCaseStatements(bool IsExpr); |
| 237 | |
| 238 | /// Align consecutive TableGen DAGArg colon over all \c Changes. |
| 239 | void alignConsecutiveTableGenBreakingDAGArgColons(); |
| 240 | |
| 241 | /// Align consecutive TableGen cond operator colon over all \c Changes. |
| 242 | void alignConsecutiveTableGenCondOperatorColons(); |
| 243 | |
| 244 | /// Align consecutive TableGen definitions over all \c Changes. |
| 245 | void alignConsecutiveTableGenDefinitions(); |
| 246 | |
| 247 | /// Align trailing comments over all \c Changes. |
| 248 | void (); |
| 249 | |
| 250 | /// Align trailing comments from change \p Start to change \p End at |
| 251 | /// the specified \p Column. |
| 252 | void (unsigned Start, unsigned End, unsigned Column); |
| 253 | |
| 254 | /// Align escaped newlines over all \c Changes. |
| 255 | void alignEscapedNewlines(); |
| 256 | |
| 257 | /// Align escaped newlines from change \p Start to change \p End at |
| 258 | /// the specified \p Column. |
| 259 | void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column); |
| 260 | |
| 261 | /// Align Array Initializers over all \c Changes. |
| 262 | void alignArrayInitializers(); |
| 263 | |
| 264 | /// Align Array Initializers from change \p Start to change \p End at |
| 265 | /// the specified \p Column. |
| 266 | void alignArrayInitializers(unsigned Start, unsigned End); |
| 267 | |
| 268 | /// Align Array Initializers being careful to right justify the columns |
| 269 | /// as described by \p CellDescs. |
| 270 | void alignArrayInitializersRightJustified(CellDescriptions &&CellDescs); |
| 271 | |
| 272 | /// Align Array Initializers being careful to left justify the columns |
| 273 | /// as described by \p CellDescs. |
| 274 | void alignArrayInitializersLeftJustified(CellDescriptions &&CellDescs); |
| 275 | |
| 276 | /// Calculate the cell width between two indexes. |
| 277 | unsigned calculateCellWidth(unsigned Start, unsigned End, |
| 278 | bool WithSpaces = false) const; |
| 279 | |
| 280 | /// Get a set of fully specified CellDescriptions between \p Start and |
| 281 | /// \p End of the change list. |
| 282 | CellDescriptions getCells(unsigned Start, unsigned End); |
| 283 | |
| 284 | /// Does this \p Cell contain a split element? |
| 285 | static bool isSplitCell(const CellDescription &Cell); |
| 286 | |
| 287 | /// Get the width of the preceding cells from \p Start to \p End. |
| 288 | template <typename I> |
| 289 | auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const { |
| 290 | auto NetWidth = InitialSpaces; |
| 291 | for (auto PrevIter = Start; PrevIter != End; ++PrevIter) { |
| 292 | // If we broke the line the initial spaces are already |
| 293 | // accounted for. |
| 294 | assert(PrevIter->Index < Changes.size()); |
| 295 | if (Changes[PrevIter->Index].NewlinesBefore > 0) |
| 296 | NetWidth = 0; |
| 297 | NetWidth += |
| 298 | calculateCellWidth(Start: PrevIter->Index, End: PrevIter->EndIndex, WithSpaces: true) + 1; |
| 299 | } |
| 300 | return NetWidth; |
| 301 | } |
| 302 | |
| 303 | /// Get the maximum width of a cell in a sequence of columns. |
| 304 | template <typename I> |
| 305 | unsigned getMaximumCellWidth(I CellIter, unsigned NetWidth) const { |
| 306 | unsigned CellWidth = |
| 307 | calculateCellWidth(Start: CellIter->Index, End: CellIter->EndIndex, WithSpaces: true); |
| 308 | if (Changes[CellIter->Index].NewlinesBefore == 0) |
| 309 | CellWidth += NetWidth; |
| 310 | for (const auto *Next = CellIter->NextColumnElement; Next; |
| 311 | Next = Next->NextColumnElement) { |
| 312 | auto ThisWidth = calculateCellWidth(Start: Next->Index, End: Next->EndIndex, WithSpaces: true); |
| 313 | if (Changes[Next->Index].NewlinesBefore == 0) |
| 314 | ThisWidth += NetWidth; |
| 315 | CellWidth = std::max(CellWidth, ThisWidth); |
| 316 | } |
| 317 | return CellWidth; |
| 318 | } |
| 319 | |
| 320 | /// Get The maximum width of all columns to a given cell. |
| 321 | template <typename I> |
| 322 | unsigned getMaximumNetWidth(const I &CellStart, const I &CellStop, |
| 323 | unsigned InitialSpaces, unsigned CellCount, |
| 324 | unsigned MaxRowCount) const { |
| 325 | auto MaxNetWidth = getNetWidth(CellStart, CellStop, InitialSpaces); |
| 326 | auto RowCount = 1U; |
| 327 | auto Offset = std::distance(CellStart, CellStop); |
| 328 | for (const auto *Next = CellStop->NextColumnElement; Next; |
| 329 | Next = Next->NextColumnElement) { |
| 330 | if (RowCount >= MaxRowCount) |
| 331 | break; |
| 332 | auto Start = (CellStart + RowCount * CellCount); |
| 333 | auto End = Start + Offset; |
| 334 | MaxNetWidth = |
| 335 | std::max(MaxNetWidth, getNetWidth(Start, End, InitialSpaces)); |
| 336 | ++RowCount; |
| 337 | } |
| 338 | return MaxNetWidth; |
| 339 | } |
| 340 | |
| 341 | /// Align a split cell with a newline to the first element in the cell. |
| 342 | void alignToStartOfCell(unsigned Start, unsigned End); |
| 343 | |
| 344 | /// Link the Cell pointers in the list of Cells. |
| 345 | static CellDescriptions linkCells(CellDescriptions &&CellDesc); |
| 346 | |
| 347 | /// Fill \c Replaces with the replacements for all effective changes. |
| 348 | void generateChanges(); |
| 349 | |
| 350 | /// Stores \p Text as the replacement for the whitespace in \p Range. |
| 351 | void storeReplacement(SourceRange Range, StringRef Text); |
| 352 | void appendNewlineText(std::string &Text, const Change &C); |
| 353 | void appendEscapedNewlineText(std::string &Text, unsigned Newlines, |
| 354 | unsigned PreviousEndOfTokenColumn, |
| 355 | unsigned EscapedNewlineColumn); |
| 356 | void appendIndentText(std::string &Text, unsigned IndentLevel, |
| 357 | unsigned Spaces, unsigned WhitespaceStartColumn, |
| 358 | bool IsAligned); |
| 359 | unsigned appendTabIndent(std::string &Text, unsigned Spaces, |
| 360 | unsigned Indentation); |
| 361 | |
| 362 | SmallVector<Change, 16> Changes; |
| 363 | const SourceManager &SourceMgr; |
| 364 | tooling::Replacements Replaces; |
| 365 | const FormatStyle &Style; |
| 366 | bool UseCRLF; |
| 367 | }; |
| 368 | |
| 369 | } // namespace format |
| 370 | } // namespace clang |
| 371 | |
| 372 | #endif |
| 373 | |