1//===--- DefinitionBlockSeparator.cpp ---------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements DefinitionBlockSeparator, a TokenAnalyzer that inserts
11/// or removes empty lines separating definition blocks like classes, structs,
12/// functions, enums, and namespaces in between.
13///
14//===----------------------------------------------------------------------===//
15
16#include "DefinitionBlockSeparator.h"
17#define DEBUG_TYPE "definition-block-separator"
18
19namespace clang {
20namespace format {
21std::pair<tooling::Replacements, unsigned> DefinitionBlockSeparator::analyze(
22 TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
23 FormatTokenLexer &Tokens) {
24 assert(Style.SeparateDefinitionBlocks != FormatStyle::SDS_Leave);
25 AffectedRangeMgr.computeAffectedLines(Lines&: AnnotatedLines);
26 tooling::Replacements Result;
27 separateBlocks(Lines&: AnnotatedLines, Result, Tokens);
28 return {Result, 0};
29}
30
31void DefinitionBlockSeparator::separateBlocks(
32 SmallVectorImpl<AnnotatedLine *> &Lines, tooling::Replacements &Result,
33 FormatTokenLexer &Tokens) {
34 const bool IsNeverStyle =
35 Style.SeparateDefinitionBlocks == FormatStyle::SDS_Never;
36 const AdditionalKeywords &ExtraKeywords = Tokens.getKeywords();
37 auto GetBracketLevelChange = [](const FormatToken *Tok) {
38 if (Tok->isOneOf(K1: tok::l_brace, K2: tok::l_paren, Ks: tok::l_square))
39 return 1;
40 if (Tok->isOneOf(K1: tok::r_brace, K2: tok::r_paren, Ks: tok::r_square))
41 return -1;
42 return 0;
43 };
44 auto LikelyDefinition = [&](const AnnotatedLine *Line,
45 bool ExcludeEnum = false) {
46 if ((Line->MightBeFunctionDecl && Line->mightBeFunctionDefinition()) ||
47 Line->startsWithNamespace()) {
48 return true;
49 }
50 int BracketLevel = 0;
51 for (const FormatToken *CurrentToken = Line->First; CurrentToken;
52 CurrentToken = CurrentToken->Next) {
53 if (BracketLevel == 0) {
54 if (CurrentToken->isOneOf(K1: tok::kw_class, K2: tok::kw_struct,
55 Ks: tok::kw_union) ||
56 (Style.isJavaScript() &&
57 CurrentToken->is(II: ExtraKeywords.kw_function))) {
58 return true;
59 }
60 if (!ExcludeEnum && CurrentToken->is(Kind: tok::kw_enum))
61 return true;
62 }
63 BracketLevel += GetBracketLevelChange(CurrentToken);
64 }
65 return false;
66 };
67 unsigned NewlineCount =
68 (Style.SeparateDefinitionBlocks == FormatStyle::SDS_Always ? 1 : 0) + 1;
69 WhitespaceManager Whitespaces(
70 Env.getSourceManager(), Style,
71 Style.LineEnding > FormatStyle::LE_CRLF
72 ? WhitespaceManager::inputUsesCRLF(
73 Text: Env.getSourceManager().getBufferData(FID: Env.getFileID()),
74 DefaultToCRLF: Style.LineEnding == FormatStyle::LE_DeriveCRLF)
75 : Style.LineEnding == FormatStyle::LE_CRLF);
76 for (unsigned I = 0; I < Lines.size(); ++I) {
77 const auto &CurrentLine = Lines[I];
78 if (CurrentLine->InPPDirective)
79 continue;
80 FormatToken *TargetToken = nullptr;
81 AnnotatedLine *TargetLine;
82 auto OpeningLineIndex = CurrentLine->MatchingOpeningBlockLineIndex;
83 AnnotatedLine *OpeningLine = nullptr;
84 const auto IsAccessSpecifierToken = [](const FormatToken *Token) {
85 return Token->isAccessSpecifier() || Token->isObjCAccessSpecifier();
86 };
87 const auto InsertReplacement = [&](const int NewlineToInsert) {
88 assert(TargetLine);
89 assert(TargetToken);
90
91 // Lines should not be added in the disabled region.
92 if (TargetToken->is(Kind: tok::comment) &&
93 isClangFormatOn(Comment: TargetToken->TokenText)) {
94 return;
95 }
96 // Do not handle EOF newlines.
97 if (TargetToken->is(Kind: tok::eof))
98 return;
99 if (IsAccessSpecifierToken(TargetToken) ||
100 (OpeningLineIndex > 0 &&
101 IsAccessSpecifierToken(Lines[OpeningLineIndex - 1]->First))) {
102 return;
103 }
104 if (!TargetLine->Affected)
105 return;
106 Whitespaces.replaceWhitespace(Tok&: *TargetToken, Newlines: NewlineToInsert,
107 Spaces: TargetToken->OriginalColumn,
108 StartOfTokenColumn: TargetToken->OriginalColumn);
109 };
110 const auto IsPPConditional = [&](const size_t LineIndex) {
111 const auto &Line = Lines[LineIndex];
112 return Line->First->is(Kind: tok::hash) && Line->First->Next &&
113 Line->First->Next->isOneOf(K1: tok::pp_if, K2: tok::pp_ifdef, Ks: tok::pp_else,
114 Ks: tok::pp_ifndef, Ks: tok::pp_elifndef,
115 Ks: tok::pp_elifdef, Ks: tok::pp_elif,
116 Ks: tok::pp_endif);
117 };
118 const auto FollowingOtherOpening = [&]() {
119 return OpeningLineIndex == 0 ||
120 Lines[OpeningLineIndex - 1]->Last->opensScope() ||
121 IsPPConditional(OpeningLineIndex - 1);
122 };
123 const auto HasEnumOnLine = [&]() {
124 bool FoundEnumKeyword = false;
125 int BracketLevel = 0;
126 for (const FormatToken *CurrentToken = CurrentLine->First; CurrentToken;
127 CurrentToken = CurrentToken->Next) {
128 if (BracketLevel == 0) {
129 if (CurrentToken->is(Kind: tok::kw_enum))
130 FoundEnumKeyword = true;
131 else if (FoundEnumKeyword && CurrentToken->is(Kind: tok::l_brace))
132 return true;
133 }
134 BracketLevel += GetBracketLevelChange(CurrentToken);
135 }
136 return FoundEnumKeyword && I + 1 < Lines.size() &&
137 Lines[I + 1]->First->is(Kind: tok::l_brace);
138 };
139
140 bool IsDefBlock = false;
141 const auto MayPrecedeDefinition = [&](const int Direction = -1) {
142 assert(Direction >= -1);
143 assert(Direction <= 1);
144
145 if (Lines[OpeningLineIndex]->First->is(TT: TT_CSharpGenericTypeConstraint))
146 return true;
147
148 const size_t OperateIndex = OpeningLineIndex + Direction;
149 assert(OperateIndex < Lines.size());
150 const auto &OperateLine = Lines[OperateIndex];
151 if (LikelyDefinition(OperateLine))
152 return false;
153
154 const auto *NextLine =
155 OperateIndex + 1 < Lines.size() ? Lines[OperateIndex + 1] : nullptr;
156
157 if (const auto *Tok = OperateLine->First;
158 Tok->is(Kind: tok::comment) && !isClangFormatOn(Comment: Tok->TokenText)) {
159 const bool IsEndComment = Tok->NewlinesBefore == 1 && NextLine &&
160 NextLine->First->NewlinesBefore > 1;
161 if (!IsEndComment)
162 return true;
163 }
164
165 // A single line identifier that is not in the last line.
166 if (OperateLine->First->is(Kind: tok::identifier) &&
167 OperateLine->First == OperateLine->Last && NextLine) {
168 // UnwrappedLineParser's recognition of free-standing macro like
169 // Q_OBJECT may also recognize some uppercased type names that may be
170 // used as return type as that kind of macros, which is a bit hard to
171 // distinguish one from another purely from token patterns. Here, we
172 // try not to add new lines below those identifiers.
173 if (NextLine->MightBeFunctionDecl &&
174 NextLine->mightBeFunctionDefinition() &&
175 NextLine->First->NewlinesBefore == 1 &&
176 OperateLine->First->is(TT: TT_FunctionLikeOrFreestandingMacro)) {
177 return true;
178 }
179 }
180
181 if (Style.isCSharp() && OperateLine->First->is(TT: TT_AttributeLSquare))
182 return true;
183 return false;
184 };
185
186 if (HasEnumOnLine() &&
187 !LikelyDefinition(CurrentLine, /*ExcludeEnum=*/true)) {
188 // We have no scope opening/closing information for enum.
189 IsDefBlock = true;
190 OpeningLineIndex = I;
191 while (OpeningLineIndex > 0 && MayPrecedeDefinition())
192 --OpeningLineIndex;
193 OpeningLine = Lines[OpeningLineIndex];
194 TargetLine = OpeningLine;
195 TargetToken = TargetLine->First;
196 if (!FollowingOtherOpening())
197 InsertReplacement(NewlineCount);
198 else if (IsNeverStyle)
199 InsertReplacement(OpeningLineIndex != 0);
200 TargetLine = CurrentLine;
201 TargetToken = TargetLine->First;
202 while (TargetToken && TargetToken->isNot(Kind: tok::r_brace))
203 TargetToken = TargetToken->Next;
204 if (!TargetToken)
205 while (I < Lines.size() && Lines[I]->First->isNot(Kind: tok::r_brace))
206 ++I;
207 } else if (CurrentLine->First->closesScope()) {
208 if (OpeningLineIndex > Lines.size())
209 continue;
210 // A function try block should be together.
211 if (CurrentLine->First->startsSequence(K1: tok::r_brace, Tokens: tok::kw_catch))
212 continue;
213 // Handling the case that opening brace has its own line, with checking
214 // whether the last line already had an opening brace to guard against
215 // misrecognition.
216 if (OpeningLineIndex > 0 &&
217 Lines[OpeningLineIndex]->First->is(Kind: tok::l_brace) &&
218 Lines[OpeningLineIndex - 1]->Last->isNot(Kind: tok::l_brace)) {
219 --OpeningLineIndex;
220 }
221 OpeningLine = Lines[OpeningLineIndex];
222 // Closing a function definition.
223 if (LikelyDefinition(OpeningLine)) {
224 IsDefBlock = true;
225 while (OpeningLineIndex > 0 && MayPrecedeDefinition())
226 --OpeningLineIndex;
227 OpeningLine = Lines[OpeningLineIndex];
228 TargetLine = OpeningLine;
229 TargetToken = TargetLine->First;
230 if (!FollowingOtherOpening()) {
231 // Avoid duplicated replacement.
232 if (TargetToken->isNot(Kind: tok::l_brace))
233 InsertReplacement(NewlineCount);
234 } else if (IsNeverStyle) {
235 InsertReplacement(OpeningLineIndex != 0);
236 }
237 }
238 }
239
240 // Not the last token.
241 if (IsDefBlock && I + 1 < Lines.size()) {
242 OpeningLineIndex = I + 1;
243 TargetLine = Lines[OpeningLineIndex];
244 TargetToken = TargetLine->First;
245
246 // No empty line for continuously closing scopes. The token will be
247 // handled in another case if the line following is opening a
248 // definition.
249 if (!TargetToken->closesScope() && !IsPPConditional(OpeningLineIndex)) {
250 // Check whether current line may precede a definition line.
251 while (OpeningLineIndex + 1 < Lines.size() &&
252 MayPrecedeDefinition(/*Direction=*/0)) {
253 ++OpeningLineIndex;
254 }
255 TargetLine = Lines[OpeningLineIndex];
256 if (!LikelyDefinition(TargetLine)) {
257 OpeningLineIndex = I + 1;
258 TargetLine = Lines[I + 1];
259 TargetToken = TargetLine->First;
260 InsertReplacement(NewlineCount);
261 }
262 } else if (IsNeverStyle) {
263 InsertReplacement(/*NewlineToInsert=*/1);
264 }
265 }
266 }
267 for (const auto &R : Whitespaces.generateReplacements()) {
268 // The add method returns an Error instance which simulates program exit
269 // code through overloading boolean operator, thus false here indicates
270 // success.
271 if (Result.add(R))
272 return;
273 }
274}
275} // namespace format
276} // namespace clang
277