1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#include "UnwrappedLineParser.h"
16#include "FormatToken.h"
17#include "FormatTokenSource.h"
18#include "Macros.h"
19#include "TokenAnnotator.h"
20#include "clang/Basic/TokenKinds.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/raw_os_ostream.h"
25#include "llvm/Support/raw_ostream.h"
26
27#include <utility>
28
29#define DEBUG_TYPE "format-parser"
30
31namespace clang {
32namespace format {
33
34namespace {
35
36void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
37 StringRef Prefix = "", bool PrintText = false) {
38 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
39 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
40 bool NewLine = false;
41 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
42 E = Line.Tokens.end();
43 I != E; ++I) {
44 if (NewLine) {
45 OS << Prefix;
46 NewLine = false;
47 }
48 OS << I->Tok->Tok.getName() << "["
49 << "T=" << (unsigned)I->Tok->getType()
50 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
51 << "\"] ";
52 for (const auto *CI = I->Children.begin(), *CE = I->Children.end();
53 CI != CE; ++CI) {
54 OS << "\n";
55 printLine(OS, Line: *CI, Prefix: (Prefix + " ").str());
56 NewLine = true;
57 }
58 }
59 if (!NewLine)
60 OS << "\n";
61}
62
63[[maybe_unused]] static void printDebugInfo(const UnwrappedLine &Line) {
64 printLine(OS&: llvm::dbgs(), Line);
65}
66
67class ScopedDeclarationState {
68public:
69 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
70 bool MustBeDeclaration)
71 : Line(Line), Stack(Stack) {
72 Line.MustBeDeclaration = MustBeDeclaration;
73 Stack.push_back(Val: MustBeDeclaration);
74 }
75 ~ScopedDeclarationState() {
76 Stack.pop_back();
77 if (!Stack.empty())
78 Line.MustBeDeclaration = Stack.back();
79 else
80 Line.MustBeDeclaration = true;
81 }
82
83private:
84 UnwrappedLine &Line;
85 llvm::BitVector &Stack;
86};
87
88} // end anonymous namespace
89
90std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
91 llvm::raw_os_ostream OS(Stream);
92 printLine(OS, Line);
93 return Stream;
94}
95
96class ScopedLineState {
97public:
98 ScopedLineState(UnwrappedLineParser &Parser,
99 bool SwitchToPreprocessorLines = false)
100 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
101 if (SwitchToPreprocessorLines)
102 Parser.CurrentLines = &Parser.PreprocessorDirectives;
103 else if (!Parser.Line->Tokens.empty())
104 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
105 PreBlockLine = std::move(Parser.Line);
106 Parser.Line = std::make_unique<UnwrappedLine>();
107 Parser.Line->Level = PreBlockLine->Level;
108 Parser.Line->PPLevel = PreBlockLine->PPLevel;
109 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
110 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
111 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel;
112 }
113
114 ~ScopedLineState() {
115 if (!Parser.Line->Tokens.empty())
116 Parser.addUnwrappedLine();
117 assert(Parser.Line->Tokens.empty());
118 Parser.Line = std::move(PreBlockLine);
119 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
120 Parser.AtEndOfPPLine = true;
121 Parser.CurrentLines = OriginalLines;
122 }
123
124private:
125 UnwrappedLineParser &Parser;
126
127 std::unique_ptr<UnwrappedLine> PreBlockLine;
128 SmallVectorImpl<UnwrappedLine> *OriginalLines;
129};
130
131class CompoundStatementIndenter {
132public:
133 CompoundStatementIndenter(UnwrappedLineParser *Parser,
134 const FormatStyle &Style, unsigned &LineLevel)
135 : CompoundStatementIndenter(Parser, LineLevel,
136 Style.BraceWrapping.AfterControlStatement ==
137 FormatStyle::BWACS_Always,
138 Style.BraceWrapping.IndentBraces) {}
139 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
140 bool WrapBrace, bool IndentBrace)
141 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
142 if (WrapBrace)
143 Parser->addUnwrappedLine();
144 if (IndentBrace)
145 ++LineLevel;
146 }
147 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
148
149private:
150 unsigned &LineLevel;
151 unsigned OldLineLevel;
152};
153
154UnwrappedLineParser::UnwrappedLineParser(
155 SourceManager &SourceMgr, const FormatStyle &Style,
156 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
157 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
158 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
159 IdentifierTable &IdentTable)
160 : Line(new UnwrappedLine), AtEndOfPPLine(false), CurrentLines(&Lines),
161 Style(Style), IsCpp(Style.isCpp()),
162 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords),
163 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
164 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
165 IncludeGuard(getIncludeGuardState(Style: Style.IndentPPDirectives)),
166 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
167 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
168
169void UnwrappedLineParser::reset() {
170 PPBranchLevel = -1;
171 IncludeGuard = getIncludeGuardState(Style: Style.IndentPPDirectives);
172 IncludeGuardToken = nullptr;
173 Line.reset(p: new UnwrappedLine);
174 CommentsBeforeNextToken.clear();
175 FormatTok = nullptr;
176 AtEndOfPPLine = false;
177 IsDecltypeAutoFunction = false;
178 PreprocessorDirectives.clear();
179 CurrentLines = &Lines;
180 DeclarationScopeStack.clear();
181 NestedTooDeep.clear();
182 NestedLambdas.clear();
183 PPStack.clear();
184 Line->FirstStartColumn = FirstStartColumn;
185
186 if (!Unexpanded.empty())
187 for (FormatToken *Token : AllTokens)
188 Token->MacroCtx.reset();
189 CurrentExpandedLines.clear();
190 ExpandedLines.clear();
191 Unexpanded.clear();
192 InExpansion = false;
193 Reconstruct.reset();
194}
195
196void UnwrappedLineParser::parse() {
197 IndexedTokenSource TokenSource(AllTokens);
198 Line->FirstStartColumn = FirstStartColumn;
199 do {
200 LLVM_DEBUG(llvm::dbgs() << "----\n");
201 reset();
202 Tokens = &TokenSource;
203 TokenSource.reset();
204
205 readToken();
206 parseFile();
207
208 // If we found an include guard then all preprocessor directives (other than
209 // the guard) are over-indented by one.
210 if (IncludeGuard == IG_Found) {
211 for (auto &Line : Lines)
212 if (Line.InPPDirective && Line.Level > 0)
213 --Line.Level;
214 }
215
216 // Create line with eof token.
217 assert(eof());
218 pushToken(Tok: FormatTok);
219 addUnwrappedLine();
220
221 // In a first run, format everything with the lines containing macro calls
222 // replaced by the expansion.
223 if (!ExpandedLines.empty()) {
224 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
225 for (const auto &Line : Lines) {
226 if (!Line.Tokens.empty()) {
227 auto it = ExpandedLines.find(Val: Line.Tokens.begin()->Tok);
228 if (it != ExpandedLines.end()) {
229 for (const auto &Expanded : it->second) {
230 LLVM_DEBUG(printDebugInfo(Expanded));
231 Callback.consumeUnwrappedLine(Line: Expanded);
232 }
233 continue;
234 }
235 }
236 LLVM_DEBUG(printDebugInfo(Line));
237 Callback.consumeUnwrappedLine(Line);
238 }
239 Callback.finishRun();
240 }
241
242 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
243 for (const UnwrappedLine &Line : Lines) {
244 LLVM_DEBUG(printDebugInfo(Line));
245 Callback.consumeUnwrappedLine(Line);
246 }
247 Callback.finishRun();
248 Lines.clear();
249 while (!PPLevelBranchIndex.empty() &&
250 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
251 PPLevelBranchIndex.resize(N: PPLevelBranchIndex.size() - 1);
252 PPLevelBranchCount.resize(N: PPLevelBranchCount.size() - 1);
253 }
254 if (!PPLevelBranchIndex.empty()) {
255 ++PPLevelBranchIndex.back();
256 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
257 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
258 }
259 } while (!PPLevelBranchIndex.empty());
260}
261
262void UnwrappedLineParser::parseFile() {
263 // The top-level context in a file always has declarations, except for pre-
264 // processor directives and JavaScript files.
265 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
266 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
267 MustBeDeclaration);
268 if (Style.isTextProto() || (Style.isJson() && FormatTok->IsFirst))
269 parseBracedList();
270 else
271 parseLevel();
272 // Make sure to format the remaining tokens.
273 //
274 // LK_TextProto is special since its top-level is parsed as the body of a
275 // braced list, which does not necessarily have natural line separators such
276 // as a semicolon. Comments after the last entry that have been determined to
277 // not belong to that line, as in:
278 // key: value
279 // // endfile comment
280 // do not have a chance to be put on a line of their own until this point.
281 // Here we add this newline before end-of-file comments.
282 if (Style.isTextProto() && !CommentsBeforeNextToken.empty())
283 addUnwrappedLine();
284 flushComments(NewlineBeforeNext: true);
285 addUnwrappedLine();
286}
287
288void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
289 do {
290 switch (FormatTok->Tok.getKind()) {
291 case tok::l_brace:
292 case tok::semi:
293 return;
294 default:
295 if (FormatTok->is(II: Keywords.kw_where)) {
296 addUnwrappedLine();
297 nextToken();
298 parseCSharpGenericTypeConstraint();
299 break;
300 }
301 nextToken();
302 break;
303 }
304 } while (!eof());
305}
306
307void UnwrappedLineParser::parseCSharpAttribute() {
308 int UnpairedSquareBrackets = 1;
309 do {
310 switch (FormatTok->Tok.getKind()) {
311 case tok::r_square:
312 nextToken();
313 --UnpairedSquareBrackets;
314 if (UnpairedSquareBrackets == 0) {
315 addUnwrappedLine();
316 return;
317 }
318 break;
319 case tok::l_square:
320 ++UnpairedSquareBrackets;
321 nextToken();
322 break;
323 default:
324 nextToken();
325 break;
326 }
327 } while (!eof());
328}
329
330bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
331 if (!Lines.empty() && Lines.back().InPPDirective)
332 return true;
333
334 const FormatToken *Previous = Tokens->getPreviousToken();
335 return Previous && Previous->is(Kind: tok::comment) &&
336 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
337}
338
339/// Parses a level, that is ???.
340/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
341/// \param IfKind The \p if statement kind in the level.
342/// \param IfLeftBrace The left brace of the \p if block in the level.
343/// \returns true if a simple block of if/else/for/while, or false otherwise.
344/// (A simple block has a single statement.)
345bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
346 IfStmtKind *IfKind,
347 FormatToken **IfLeftBrace) {
348 const bool InRequiresExpression =
349 OpeningBrace && OpeningBrace->is(TT: TT_RequiresExpressionLBrace);
350 const bool IsPrecededByCommentOrPPDirective =
351 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
352 FormatToken *IfLBrace = nullptr;
353 bool HasDoWhile = false;
354 bool HasLabel = false;
355 unsigned StatementCount = 0;
356 bool SwitchLabelEncountered = false;
357
358 do {
359 if (FormatTok->isAttribute()) {
360 nextToken();
361 if (FormatTok->is(Kind: tok::l_paren))
362 parseParens();
363 continue;
364 }
365 tok::TokenKind Kind = FormatTok->Tok.getKind();
366 if (FormatTok->is(TT: TT_MacroBlockBegin))
367 Kind = tok::l_brace;
368 else if (FormatTok->is(TT: TT_MacroBlockEnd))
369 Kind = tok::r_brace;
370
371 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
372 &HasLabel, &StatementCount] {
373 parseStructuralElement(OpeningBrace, IfKind, IfLeftBrace: &IfLBrace,
374 HasDoWhile: HasDoWhile ? nullptr : &HasDoWhile,
375 HasLabel: HasLabel ? nullptr : &HasLabel);
376 ++StatementCount;
377 assert(StatementCount > 0 && "StatementCount overflow!");
378 };
379
380 switch (Kind) {
381 case tok::comment:
382 nextToken();
383 addUnwrappedLine();
384 break;
385 case tok::l_brace:
386 if (InRequiresExpression) {
387 FormatTok->setFinalizedType(TT_CompoundRequirementLBrace);
388 } else if (FormatTok->Previous &&
389 FormatTok->Previous->ClosesRequiresClause) {
390 // We need the 'default' case here to correctly parse a function
391 // l_brace.
392 ParseDefault();
393 continue;
394 }
395 if (!InRequiresExpression && FormatTok->isNot(Kind: TT_MacroBlockBegin)) {
396 if (tryToParseBracedList())
397 continue;
398 FormatTok->setFinalizedType(TT_BlockLBrace);
399 }
400 parseBlock();
401 ++StatementCount;
402 assert(StatementCount > 0 && "StatementCount overflow!");
403 addUnwrappedLine();
404 break;
405 case tok::r_brace:
406 if (OpeningBrace) {
407 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
408 OpeningBrace->isNoneOf(Ks: TT_ControlStatementLBrace, Ks: TT_ElseLBrace)) {
409 return false;
410 }
411 if (FormatTok->isNot(Kind: tok::r_brace) || StatementCount != 1 || HasLabel ||
412 HasDoWhile || IsPrecededByCommentOrPPDirective ||
413 precededByCommentOrPPDirective()) {
414 return false;
415 }
416 const FormatToken *Next = Tokens->peekNextToken();
417 if (Next->is(Kind: tok::comment) && Next->NewlinesBefore == 0)
418 return false;
419 if (IfLeftBrace)
420 *IfLeftBrace = IfLBrace;
421 return true;
422 }
423 nextToken();
424 addUnwrappedLine();
425 break;
426 case tok::kw_default: {
427 unsigned StoredPosition = Tokens->getPosition();
428 auto *Next = Tokens->getNextNonComment();
429 FormatTok = Tokens->setPosition(StoredPosition);
430 if (Next->isNoneOf(Ks: tok::colon, Ks: tok::arrow)) {
431 // default not followed by `:` or `->` is not a case label; treat it
432 // like an identifier.
433 parseStructuralElement();
434 break;
435 }
436 // Else, if it is 'default:', fall through to the case handling.
437 [[fallthrough]];
438 }
439 case tok::kw_case:
440 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
441 (Style.isJavaScript() && Line->MustBeDeclaration)) {
442 // Proto: there are no switch/case statements
443 // Verilog: Case labels don't have this word. We handle case
444 // labels including default in TokenAnnotator.
445 // JavaScript: A 'case: string' style field declaration.
446 ParseDefault();
447 break;
448 }
449 if (!SwitchLabelEncountered &&
450 (Style.IndentCaseLabels ||
451 (OpeningBrace && OpeningBrace->is(TT: TT_SwitchExpressionLBrace)) ||
452 (Line->InPPDirective && Line->Level == 1))) {
453 ++Line->Level;
454 }
455 SwitchLabelEncountered = true;
456 parseStructuralElement();
457 break;
458 case tok::l_square:
459 if (Style.isCSharp()) {
460 nextToken();
461 parseCSharpAttribute();
462 break;
463 }
464 if (handleCppAttributes())
465 break;
466 [[fallthrough]];
467 default:
468 ParseDefault();
469 break;
470 }
471 } while (!eof());
472
473 return false;
474}
475
476void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
477 // We'll parse forward through the tokens until we hit
478 // a closing brace or eof - note that getNextToken() will
479 // parse macros, so this will magically work inside macro
480 // definitions, too.
481 unsigned StoredPosition = Tokens->getPosition();
482 FormatToken *Tok = FormatTok;
483 const FormatToken *PrevTok = Tok->Previous;
484 // Keep a stack of positions of lbrace tokens. We will
485 // update information about whether an lbrace starts a
486 // braced init list or a different block during the loop.
487 struct StackEntry {
488 FormatToken *Tok;
489 const FormatToken *PrevTok;
490 };
491 SmallVector<StackEntry, 8> LBraceStack;
492 assert(Tok->is(tok::l_brace));
493
494 do {
495 auto *NextTok = Tokens->getNextNonComment();
496
497 if (!Line->InMacroBody && !Style.isTableGen()) {
498 // Skip PPDirective lines (except macro definitions) and comments.
499 while (NextTok->is(Kind: tok::hash)) {
500 NextTok = Tokens->getNextToken();
501 if (NextTok->isOneOf(K1: tok::pp_not_keyword, K2: tok::pp_define))
502 break;
503 do {
504 NextTok = Tokens->getNextToken();
505 } while (!NextTok->HasUnescapedNewline && NextTok->isNot(Kind: tok::eof));
506
507 while (NextTok->is(Kind: tok::comment))
508 NextTok = Tokens->getNextToken();
509 }
510 }
511
512 switch (Tok->Tok.getKind()) {
513 case tok::l_brace:
514 if (Style.isJavaScript() && PrevTok) {
515 if (PrevTok->isOneOf(K1: tok::colon, K2: tok::less)) {
516 // A ':' indicates this code is in a type, or a braced list
517 // following a label in an object literal ({a: {b: 1}}).
518 // A '<' could be an object used in a comparison, but that is nonsense
519 // code (can never return true), so more likely it is a generic type
520 // argument (`X<{a: string; b: number}>`).
521 // The code below could be confused by semicolons between the
522 // individual members in a type member list, which would normally
523 // trigger BK_Block. In both cases, this must be parsed as an inline
524 // braced init.
525 Tok->setBlockKind(BK_BracedInit);
526 } else if (PrevTok->is(Kind: tok::r_paren)) {
527 // `) { }` can only occur in function or method declarations in JS.
528 Tok->setBlockKind(BK_Block);
529 }
530 } else if (Style.isJava() && PrevTok && PrevTok->is(Kind: tok::arrow)) {
531 Tok->setBlockKind(BK_Block);
532 } else {
533 Tok->setBlockKind(BK_Unknown);
534 }
535 LBraceStack.push_back(Elt: {.Tok: Tok, .PrevTok: PrevTok});
536 break;
537 case tok::r_brace:
538 if (LBraceStack.empty())
539 break;
540 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BBK: BK_Unknown)) {
541 bool ProbablyBracedList = false;
542 if (Style.Language == FormatStyle::LK_Proto) {
543 ProbablyBracedList = NextTok->isOneOf(K1: tok::comma, K2: tok::r_square);
544 } else if (LBrace->isNot(Kind: TT_EnumLBrace)) {
545 // Using OriginalColumn to distinguish between ObjC methods and
546 // binary operators is a bit hacky.
547 bool NextIsObjCMethod = NextTok->isOneOf(K1: tok::plus, K2: tok::minus) &&
548 NextTok->OriginalColumn == 0;
549
550 // Try to detect a braced list. Note that regardless how we mark inner
551 // braces here, we will overwrite the BlockKind later if we parse a
552 // braced list (where all blocks inside are by default braced lists),
553 // or when we explicitly detect blocks (for example while parsing
554 // lambdas).
555
556 // If we already marked the opening brace as braced list, the closing
557 // must also be part of it.
558 ProbablyBracedList = LBrace->is(TT: TT_BracedListLBrace);
559
560 ProbablyBracedList = ProbablyBracedList ||
561 (Style.isJavaScript() &&
562 NextTok->isOneOf(K1: Keywords.kw_of, K2: Keywords.kw_in,
563 Ks: Keywords.kw_as));
564 ProbablyBracedList =
565 ProbablyBracedList ||
566 (IsCpp && (PrevTok->Tok.isLiteral() ||
567 NextTok->isOneOf(K1: tok::l_paren, K2: tok::arrow)));
568
569 // If there is a comma, or right paren after the closing brace, we
570 // assume this is a braced initializer list.
571 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
572 // braced list in JS.
573 ProbablyBracedList =
574 ProbablyBracedList ||
575 NextTok->isOneOf(K1: tok::comma, K2: tok::period, Ks: tok::colon,
576 Ks: tok::r_paren, Ks: tok::r_square, Ks: tok::ellipsis);
577
578 // Distinguish between braced list in a constructor initializer list
579 // followed by constructor body, or just adjacent blocks.
580 ProbablyBracedList =
581 ProbablyBracedList ||
582 (NextTok->is(Kind: tok::l_brace) && LBraceStack.back().PrevTok &&
583 LBraceStack.back().PrevTok->isOneOf(K1: tok::identifier,
584 K2: tok::greater));
585
586 ProbablyBracedList =
587 ProbablyBracedList ||
588 (NextTok->is(Kind: tok::identifier) &&
589 PrevTok->isNoneOf(Ks: tok::semi, Ks: tok::r_brace, Ks: tok::l_brace));
590
591 ProbablyBracedList = ProbablyBracedList ||
592 (NextTok->is(Kind: tok::semi) &&
593 (!ExpectClassBody || LBraceStack.size() != 1));
594
595 ProbablyBracedList =
596 ProbablyBracedList ||
597 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
598
599 if (!Style.isCSharp() && NextTok->is(Kind: tok::l_square)) {
600 // We can have an array subscript after a braced init
601 // list, but C++11 attributes are expected after blocks.
602 NextTok = Tokens->getNextToken();
603 ProbablyBracedList = NextTok->isNot(Kind: tok::l_square);
604 }
605
606 // Cpp macro definition body that is a nonempty braced list or block:
607 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
608 !FormatTok->Previous && NextTok->is(Kind: tok::eof) &&
609 // A statement can end with only `;` (simple statement), a block
610 // closing brace (compound statement), or `:` (label statement).
611 // If PrevTok is a block opening brace, Tok ends an empty block.
612 PrevTok->isNoneOf(Ks: tok::semi, Ks: BK_Block, Ks: tok::colon)) {
613 ProbablyBracedList = true;
614 }
615 }
616 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block;
617 Tok->setBlockKind(BlockKind);
618 LBrace->setBlockKind(BlockKind);
619 }
620 LBraceStack.pop_back();
621 break;
622 case tok::identifier:
623 if (Tok->isNot(Kind: TT_StatementMacro))
624 break;
625 [[fallthrough]];
626 case tok::at:
627 case tok::semi:
628 case tok::kw_if:
629 case tok::kw_while:
630 case tok::kw_for:
631 case tok::kw_switch:
632 case tok::kw_try:
633 case tok::kw___try:
634 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BBK: BK_Unknown))
635 LBraceStack.back().Tok->setBlockKind(BK_Block);
636 break;
637 default:
638 break;
639 }
640
641 PrevTok = Tok;
642 Tok = NextTok;
643 } while (Tok->isNot(Kind: tok::eof) && !LBraceStack.empty());
644
645 // Assume other blocks for all unclosed opening braces.
646 for (const auto &Entry : LBraceStack)
647 if (Entry.Tok->is(BBK: BK_Unknown))
648 Entry.Tok->setBlockKind(BK_Block);
649
650 FormatTok = Tokens->setPosition(StoredPosition);
651}
652
653// Sets the token type of the directly previous right brace.
654void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
655 if (auto Prev = FormatTok->getPreviousNonComment();
656 Prev && Prev->is(Kind: tok::r_brace)) {
657 Prev->setFinalizedType(Type);
658 }
659}
660
661template <class T>
662static inline void hash_combine(std::size_t &seed, const T &v) {
663 std::hash<T> hasher;
664 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
665}
666
667size_t UnwrappedLineParser::computePPHash() const {
668 size_t h = 0;
669 for (const auto &i : PPStack) {
670 hash_combine(seed&: h, v: size_t(i.Kind));
671 hash_combine(seed&: h, v: i.Line);
672 }
673 return h;
674}
675
676// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
677// is not null, subtracts its length (plus the preceding space) when computing
678// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
679// running the token annotator on it so that we can restore them afterward.
680bool UnwrappedLineParser::mightFitOnOneLine(
681 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
682 const auto ColumnLimit = Style.ColumnLimit;
683 if (ColumnLimit == 0)
684 return true;
685
686 auto &Tokens = ParsedLine.Tokens;
687 assert(!Tokens.empty());
688
689 const auto *LastToken = Tokens.back().Tok;
690 assert(LastToken);
691
692 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
693
694 int Index = 0;
695 for (const auto &Token : Tokens) {
696 assert(Token.Tok);
697 auto &SavedToken = SavedTokens[Index++];
698 SavedToken.Tok = new FormatToken;
699 SavedToken.Tok->copyFrom(Tok: *Token.Tok);
700 SavedToken.Children = std::move(Token.Children);
701 }
702
703 AnnotatedLine Line(ParsedLine);
704 assert(Line.Last == LastToken);
705
706 TokenAnnotator Annotator(Style, Keywords);
707 Annotator.annotate(Line);
708 Annotator.calculateFormattingInformation(Line);
709
710 auto Length = LastToken->TotalLength;
711 if (OpeningBrace) {
712 assert(OpeningBrace != Tokens.front().Tok);
713 if (auto Prev = OpeningBrace->Previous;
714 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
715 Length -= ColumnLimit;
716 }
717 Length -= OpeningBrace->TokenText.size() + 1;
718 }
719
720 if (const auto *FirstToken = Line.First; FirstToken->is(Kind: tok::r_brace)) {
721 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
722 Length -= FirstToken->TokenText.size() + 1;
723 }
724
725 Index = 0;
726 for (auto &Token : Tokens) {
727 const auto &SavedToken = SavedTokens[Index++];
728 Token.Tok->copyFrom(Tok: *SavedToken.Tok);
729 Token.Children = std::move(SavedToken.Children);
730 delete SavedToken.Tok;
731 }
732
733 // If these change PPLevel needs to be used for get correct indentation.
734 assert(!Line.InMacroBody);
735 assert(!Line.InPPDirective);
736 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
737}
738
739FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
740 unsigned AddLevels, bool MunchSemi,
741 bool KeepBraces,
742 IfStmtKind *IfKind,
743 bool UnindentWhitesmithsBraces) {
744 auto HandleVerilogBlockLabel = [this]() {
745 // ":" name
746 if (Style.isVerilog() && FormatTok->is(Kind: tok::colon)) {
747 nextToken();
748 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
749 nextToken();
750 }
751 };
752
753 // Whether this is a Verilog-specific block that has a special header like a
754 // module.
755 const bool VerilogHierarchy =
756 Style.isVerilog() && Keywords.isVerilogHierarchy(Tok: *FormatTok);
757 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
758 (Style.isVerilog() &&
759 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
760 "'{' or macro block token expected");
761 FormatToken *Tok = FormatTok;
762 const bool FollowedByComment = Tokens->peekNextToken()->is(Kind: tok::comment);
763 auto Index = CurrentLines->size();
764 const bool MacroBlock = FormatTok->is(TT: TT_MacroBlockBegin);
765 FormatTok->setBlockKind(BK_Block);
766
767 const bool IsWhitesmiths =
768 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
769
770 // For Whitesmiths mode, jump to the next level prior to skipping over the
771 // braces.
772 if (!VerilogHierarchy && AddLevels > 0 && IsWhitesmiths)
773 ++Line->Level;
774
775 size_t PPStartHash = computePPHash();
776
777 const unsigned InitialLevel = Line->Level;
778 if (VerilogHierarchy) {
779 AddLevels += parseVerilogHierarchyHeader();
780 } else {
781 nextToken(/*LevelDifference=*/AddLevels);
782 HandleVerilogBlockLabel();
783 }
784
785 // Bail out if there are too many levels. Otherwise, the stack might overflow.
786 if (Line->Level > 300)
787 return nullptr;
788
789 if (MacroBlock && FormatTok->is(Kind: tok::l_paren))
790 parseParens();
791
792 size_t NbPreprocessorDirectives =
793 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
794 addUnwrappedLine();
795 size_t OpeningLineIndex =
796 CurrentLines->empty()
797 ? (UnwrappedLine::kInvalidIndex)
798 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
799
800 // Whitesmiths is weird here. The brace needs to be indented for the namespace
801 // block, but the block itself may not be indented depending on the style
802 // settings. This allows the format to back up one level in those cases.
803 if (UnindentWhitesmithsBraces)
804 --Line->Level;
805
806 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
807 MustBeDeclaration);
808
809 // Whitesmiths logic has already added a level by this point, so avoid
810 // adding it twice.
811 if (AddLevels > 0u)
812 Line->Level += AddLevels - (IsWhitesmiths ? 1 : 0);
813
814 FormatToken *IfLBrace = nullptr;
815 const bool SimpleBlock = parseLevel(OpeningBrace: Tok, IfKind, IfLeftBrace: &IfLBrace);
816
817 if (eof())
818 return IfLBrace;
819
820 if (MacroBlock ? FormatTok->isNot(Kind: TT_MacroBlockEnd)
821 : FormatTok->isNot(Kind: tok::r_brace)) {
822 Line->Level = InitialLevel;
823 FormatTok->setBlockKind(BK_Block);
824 return IfLBrace;
825 }
826
827 if (FormatTok->is(Kind: tok::r_brace)) {
828 FormatTok->setBlockKind(BK_Block);
829 if (Tok->is(TT: TT_NamespaceLBrace))
830 FormatTok->setFinalizedType(TT_NamespaceRBrace);
831 }
832
833 const bool IsFunctionRBrace =
834 FormatTok->is(Kind: tok::r_brace) && Tok->is(TT: TT_FunctionLBrace);
835
836 auto RemoveBraces = [=]() mutable {
837 if (!SimpleBlock)
838 return false;
839 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
840 assert(FormatTok->is(tok::r_brace));
841 const bool WrappedOpeningBrace = !Tok->Previous;
842 if (WrappedOpeningBrace && FollowedByComment)
843 return false;
844 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
845 if (KeepBraces && !HasRequiredIfBraces)
846 return false;
847 if (Tok->isNot(Kind: TT_ElseLBrace) || !HasRequiredIfBraces) {
848 const FormatToken *Previous = Tokens->getPreviousToken();
849 assert(Previous);
850 if (Previous->is(Kind: tok::r_brace) && !Previous->Optional)
851 return false;
852 }
853 assert(!CurrentLines->empty());
854 auto &LastLine = CurrentLines->back();
855 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(ParsedLine&: LastLine))
856 return false;
857 if (Tok->is(TT: TT_ElseLBrace))
858 return true;
859 if (WrappedOpeningBrace) {
860 assert(Index > 0);
861 --Index; // The line above the wrapped l_brace.
862 Tok = nullptr;
863 }
864 return mightFitOnOneLine(ParsedLine&: (*CurrentLines)[Index], OpeningBrace: Tok);
865 };
866 if (RemoveBraces()) {
867 Tok->MatchingParen = FormatTok;
868 FormatTok->MatchingParen = Tok;
869 }
870
871 size_t PPEndHash = computePPHash();
872
873 // Munch the closing brace.
874 nextToken(/*LevelDifference=*/-AddLevels);
875
876 // When this is a function block and there is an unnecessary semicolon
877 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
878 // it later).
879 if (Style.RemoveSemicolon && IsFunctionRBrace) {
880 while (FormatTok->is(Kind: tok::semi)) {
881 FormatTok->Optional = true;
882 nextToken();
883 }
884 }
885
886 HandleVerilogBlockLabel();
887
888 if (MacroBlock && FormatTok->is(Kind: tok::l_paren))
889 parseParens();
890
891 Line->Level = InitialLevel;
892
893 if (FormatTok->is(Kind: tok::kw_noexcept)) {
894 // A noexcept in a requires expression.
895 nextToken();
896 }
897
898 if (FormatTok->is(Kind: tok::arrow)) {
899 // Following the } or noexcept we can find a trailing return type arrow
900 // as part of an implicit conversion constraint.
901 nextToken();
902 parseStructuralElement();
903 }
904
905 if (MunchSemi && FormatTok->is(Kind: tok::semi))
906 nextToken();
907
908 if (PPStartHash == PPEndHash) {
909 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
910 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
911 // Update the opening line to add the forward reference as well
912 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
913 CurrentLines->size() - 1;
914 }
915 }
916
917 return IfLBrace;
918}
919
920static bool isGoogScope(const UnwrappedLine &Line) {
921 // FIXME: Closure-library specific stuff should not be hard-coded but be
922 // configurable.
923 if (Line.Tokens.size() < 4)
924 return false;
925 auto I = Line.Tokens.begin();
926 if (I->Tok->TokenText != "goog")
927 return false;
928 ++I;
929 if (I->Tok->isNot(Kind: tok::period))
930 return false;
931 ++I;
932 if (I->Tok->TokenText != "scope")
933 return false;
934 ++I;
935 return I->Tok->is(Kind: tok::l_paren);
936}
937
938static bool isIIFE(const UnwrappedLine &Line,
939 const AdditionalKeywords &Keywords) {
940 // Look for the start of an immediately invoked anonymous function.
941 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
942 // This is commonly done in JavaScript to create a new, anonymous scope.
943 // Example: (function() { ... })()
944 if (Line.Tokens.size() < 3)
945 return false;
946 auto I = Line.Tokens.begin();
947 if (I->Tok->isNot(Kind: tok::l_paren))
948 return false;
949 ++I;
950 if (I->Tok->isNot(Kind: Keywords.kw_function))
951 return false;
952 ++I;
953 return I->Tok->is(Kind: tok::l_paren);
954}
955
956static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
957 const FormatToken &InitialToken,
958 bool IsEmptyBlock,
959 bool IsJavaRecord = false) {
960 if (IsJavaRecord)
961 return Style.BraceWrapping.AfterClass;
962
963 tok::TokenKind Kind = InitialToken.Tok.getKind();
964 if (InitialToken.is(TT: TT_NamespaceMacro))
965 Kind = tok::kw_namespace;
966
967 const bool WrapRecordAllowed =
968 !IsEmptyBlock ||
969 Style.AllowShortRecordOnASingleLine < FormatStyle::SRS_Empty ||
970 Style.BraceWrapping.SplitEmptyRecord;
971
972 switch (Kind) {
973 case tok::kw_namespace:
974 return Style.BraceWrapping.AfterNamespace;
975 case tok::kw_class:
976 return Style.BraceWrapping.AfterClass && WrapRecordAllowed;
977 case tok::kw_union:
978 return Style.BraceWrapping.AfterUnion && WrapRecordAllowed;
979 case tok::kw_struct:
980 return Style.BraceWrapping.AfterStruct && WrapRecordAllowed;
981 case tok::kw_enum:
982 return Style.BraceWrapping.AfterEnum;
983 default:
984 return false;
985 }
986}
987
988void UnwrappedLineParser::parseChildBlock() {
989 assert(FormatTok->is(tok::l_brace));
990 FormatTok->setBlockKind(BK_Block);
991 const FormatToken *OpeningBrace = FormatTok;
992 nextToken();
993 {
994 bool SkipIndent = (Style.isJavaScript() &&
995 (isGoogScope(Line: *Line) || isIIFE(Line: *Line, Keywords)));
996 ScopedLineState LineState(*this);
997 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
998 /*MustBeDeclaration=*/false);
999 Line->Level += SkipIndent ? 0 : 1;
1000 parseLevel(OpeningBrace);
1001 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
1002 Line->Level -= SkipIndent ? 0 : 1;
1003 }
1004 nextToken();
1005}
1006
1007void UnwrappedLineParser::parsePPDirective() {
1008 assert(FormatTok->is(tok::hash) && "'#' expected");
1009 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
1010
1011 nextToken();
1012
1013 if (!FormatTok->Tok.getIdentifierInfo()) {
1014 parsePPUnknown();
1015 return;
1016 }
1017
1018 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1019 case tok::pp_define:
1020 parsePPDefine();
1021 return;
1022 case tok::pp_if:
1023 parsePPIf(/*IfDef=*/false);
1024 break;
1025 case tok::pp_ifdef:
1026 case tok::pp_ifndef:
1027 parsePPIf(/*IfDef=*/true);
1028 break;
1029 case tok::pp_else:
1030 case tok::pp_elifdef:
1031 case tok::pp_elifndef:
1032 case tok::pp_elif:
1033 parsePPElse();
1034 break;
1035 case tok::pp_endif:
1036 parsePPEndIf();
1037 break;
1038 case tok::pp_pragma:
1039 parsePPPragma();
1040 break;
1041 case tok::pp_error:
1042 case tok::pp_warning:
1043 nextToken();
1044 if (!eof() && Style.isCpp())
1045 FormatTok->setFinalizedType(TT_AfterPPDirective);
1046 [[fallthrough]];
1047 default:
1048 parsePPUnknown();
1049 break;
1050 }
1051}
1052
1053void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1054 size_t Line = CurrentLines->size();
1055 if (CurrentLines == &PreprocessorDirectives)
1056 Line += Lines.size();
1057
1058 if (Unreachable ||
1059 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1060 PPStack.push_back(Elt: {PP_Unreachable, Line});
1061 } else {
1062 PPStack.push_back(Elt: {PP_Conditional, Line});
1063 }
1064}
1065
1066void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1067 ++PPBranchLevel;
1068 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1069 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1070 PPLevelBranchIndex.push_back(Elt: 0);
1071 PPLevelBranchCount.push_back(Elt: 0);
1072 }
1073 PPChainBranchIndex.push(x: Unreachable ? -1 : 0);
1074 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1075 conditionalCompilationCondition(Unreachable: Unreachable || Skip);
1076}
1077
1078void UnwrappedLineParser::conditionalCompilationAlternative() {
1079 if (!PPStack.empty())
1080 PPStack.pop_back();
1081 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1082 if (!PPChainBranchIndex.empty())
1083 ++PPChainBranchIndex.top();
1084 conditionalCompilationCondition(
1085 Unreachable: PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1086 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1087}
1088
1089void UnwrappedLineParser::conditionalCompilationEnd() {
1090 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1091 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1092 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1093 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1094 }
1095 // Guard against #endif's without #if.
1096 if (PPBranchLevel > -1)
1097 --PPBranchLevel;
1098 if (!PPChainBranchIndex.empty())
1099 PPChainBranchIndex.pop();
1100 if (!PPStack.empty())
1101 PPStack.pop_back();
1102}
1103
1104void UnwrappedLineParser::parsePPIf(bool IfDef) {
1105 bool IfNDef = FormatTok->is(Kind: tok::pp_ifndef);
1106 nextToken();
1107 bool Unreachable = false;
1108 if (!IfDef && (FormatTok->is(Kind: tok::kw_false) || FormatTok->TokenText == "0"))
1109 Unreachable = true;
1110 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1111 Unreachable = true;
1112 conditionalCompilationStart(Unreachable);
1113 FormatToken *IfCondition = FormatTok;
1114 // If there's a #ifndef on the first line, and the only lines before it are
1115 // comments, it could be an include guard.
1116 bool MaybeIncludeGuard = IfNDef;
1117 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1118 for (auto &Line : Lines) {
1119 if (Line.Tokens.front().Tok->isNot(Kind: tok::comment)) {
1120 MaybeIncludeGuard = false;
1121 IncludeGuard = IG_Rejected;
1122 break;
1123 }
1124 }
1125 }
1126 --PPBranchLevel;
1127 parsePPUnknown();
1128 ++PPBranchLevel;
1129 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1130 IncludeGuard = IG_IfNdefed;
1131 IncludeGuardToken = IfCondition;
1132 }
1133}
1134
1135void UnwrappedLineParser::parsePPElse() {
1136 // If a potential include guard has an #else, it's not an include guard.
1137 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1138 IncludeGuard = IG_Rejected;
1139 // Don't crash when there is an #else without an #if.
1140 assert(PPBranchLevel >= -1);
1141 if (PPBranchLevel == -1)
1142 conditionalCompilationStart(/*Unreachable=*/true);
1143 conditionalCompilationAlternative();
1144 --PPBranchLevel;
1145 parsePPUnknown();
1146 ++PPBranchLevel;
1147}
1148
1149void UnwrappedLineParser::parsePPEndIf() {
1150 conditionalCompilationEnd();
1151 parsePPUnknown();
1152}
1153
1154void UnwrappedLineParser::parsePPDefine() {
1155 nextToken();
1156
1157 if (!FormatTok->Tok.getIdentifierInfo()) {
1158 IncludeGuard = IG_Rejected;
1159 IncludeGuardToken = nullptr;
1160 parsePPUnknown();
1161 return;
1162 }
1163
1164 bool MaybeIncludeGuard = false;
1165 if (IncludeGuard == IG_IfNdefed &&
1166 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1167 IncludeGuard = IG_Defined;
1168 IncludeGuardToken = nullptr;
1169 for (auto &Line : Lines) {
1170 if (Line.Tokens.front().Tok->isNoneOf(Ks: tok::comment, Ks: tok::hash)) {
1171 IncludeGuard = IG_Rejected;
1172 break;
1173 }
1174 }
1175 MaybeIncludeGuard = IncludeGuard == IG_Defined;
1176 }
1177
1178 // In the context of a define, even keywords should be treated as normal
1179 // identifiers. Setting the kind to identifier is not enough, because we need
1180 // to treat additional keywords like __except as well, which are already
1181 // identifiers. Setting the identifier info to null interferes with include
1182 // guard processing above, and changes preprocessing nesting.
1183 FormatTok->Tok.setKind(tok::identifier);
1184 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1185 nextToken();
1186
1187 // IncludeGuard can't have a non-empty macro definition.
1188 if (MaybeIncludeGuard && !eof())
1189 IncludeGuard = IG_Rejected;
1190
1191 if (FormatTok->is(Kind: tok::l_paren) && !FormatTok->hasWhitespaceBefore())
1192 parseParens();
1193 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1194 Line->Level += PPBranchLevel + 1;
1195 addUnwrappedLine();
1196 ++Line->Level;
1197
1198 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1199 assert((int)Line->PPLevel >= 0);
1200
1201 if (eof())
1202 return;
1203
1204 Line->InMacroBody = true;
1205
1206 if (!Style.SkipMacroDefinitionBody) {
1207 // Errors during a preprocessor directive can only affect the layout of the
1208 // preprocessor directive, and thus we ignore them. An alternative approach
1209 // would be to use the same approach we use on the file level (no
1210 // re-indentation if there was a structural error) within the macro
1211 // definition.
1212 parseFile();
1213 return;
1214 }
1215
1216 for (auto *Comment : CommentsBeforeNextToken)
1217 Comment->Finalized = true;
1218
1219 do {
1220 FormatTok->Finalized = true;
1221 FormatTok = Tokens->getNextToken();
1222 } while (!eof());
1223
1224 addUnwrappedLine();
1225}
1226
1227void UnwrappedLineParser::parsePPPragma() {
1228 Line->InPragmaDirective = true;
1229 parsePPUnknown();
1230}
1231
1232void UnwrappedLineParser::parsePPUnknown() {
1233 while (!eof())
1234 nextToken();
1235 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1236 Line->Level += PPBranchLevel + 1;
1237 addUnwrappedLine();
1238}
1239
1240// Here we exclude certain tokens that are not usually the first token in an
1241// unwrapped line. This is used in attempt to distinguish macro calls without
1242// trailing semicolons from other constructs split to several lines.
1243static bool tokenCanStartNewLine(const FormatToken &Tok) {
1244 // Semicolon can be a null-statement, l_square can be a start of a macro or
1245 // a C++11 attribute, but this doesn't seem to be common.
1246 return Tok.isNoneOf(Ks: tok::semi, Ks: tok::l_brace,
1247 // Tokens that can only be used as binary operators and a
1248 // part of overloaded operator names.
1249 Ks: tok::period, Ks: tok::periodstar, Ks: tok::arrow, Ks: tok::arrowstar,
1250 Ks: tok::less, Ks: tok::greater, Ks: tok::slash, Ks: tok::percent,
1251 Ks: tok::lessless, Ks: tok::greatergreater, Ks: tok::equal,
1252 Ks: tok::plusequal, Ks: tok::minusequal, Ks: tok::starequal,
1253 Ks: tok::slashequal, Ks: tok::percentequal, Ks: tok::ampequal,
1254 Ks: tok::pipeequal, Ks: tok::caretequal, Ks: tok::greatergreaterequal,
1255 Ks: tok::lesslessequal,
1256 // Colon is used in labels, base class lists, initializer
1257 // lists, range-based for loops, ternary operator, but
1258 // should never be the first token in an unwrapped line.
1259 Ks: tok::colon,
1260 // 'noexcept' is a trailing annotation.
1261 Ks: tok::kw_noexcept);
1262}
1263
1264static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1265 const FormatToken *FormatTok) {
1266 // FIXME: This returns true for C/C++ keywords like 'struct'.
1267 return FormatTok->is(Kind: tok::identifier) &&
1268 (!FormatTok->Tok.getIdentifierInfo() ||
1269 FormatTok->isNoneOf(
1270 Ks: Keywords.kw_in, Ks: Keywords.kw_of, Ks: Keywords.kw_as, Ks: Keywords.kw_async,
1271 Ks: Keywords.kw_await, Ks: Keywords.kw_yield, Ks: Keywords.kw_finally,
1272 Ks: Keywords.kw_function, Ks: Keywords.kw_import, Ks: Keywords.kw_is,
1273 Ks: Keywords.kw_let, Ks: Keywords.kw_var, Ks: tok::kw_const,
1274 Ks: Keywords.kw_abstract, Ks: Keywords.kw_extends, Ks: Keywords.kw_implements,
1275 Ks: Keywords.kw_instanceof, Ks: Keywords.kw_interface,
1276 Ks: Keywords.kw_override, Ks: Keywords.kw_throws, Ks: Keywords.kw_from));
1277}
1278
1279static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1280 const FormatToken *FormatTok) {
1281 return FormatTok->Tok.isLiteral() ||
1282 FormatTok->isOneOf(K1: tok::kw_true, K2: tok::kw_false) ||
1283 mustBeJSIdent(Keywords, FormatTok);
1284}
1285
1286// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1287// when encountered after a value (see mustBeJSIdentOrValue).
1288static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1289 const FormatToken *FormatTok) {
1290 return FormatTok->isOneOf(
1291 K1: tok::kw_return, K2: Keywords.kw_yield,
1292 // conditionals
1293 Ks: tok::kw_if, Ks: tok::kw_else,
1294 // loops
1295 Ks: tok::kw_for, Ks: tok::kw_while, Ks: tok::kw_do, Ks: tok::kw_continue, Ks: tok::kw_break,
1296 // switch/case
1297 Ks: tok::kw_switch, Ks: tok::kw_case,
1298 // exceptions
1299 Ks: tok::kw_throw, Ks: tok::kw_try, Ks: tok::kw_catch, Ks: Keywords.kw_finally,
1300 // declaration
1301 Ks: tok::kw_const, Ks: tok::kw_class, Ks: Keywords.kw_var, Ks: Keywords.kw_let,
1302 Ks: Keywords.kw_async, Ks: Keywords.kw_function,
1303 // import/export
1304 Ks: Keywords.kw_import, Ks: tok::kw_export);
1305}
1306
1307// Checks whether a token is a type in K&R C (aka C78).
1308static bool isC78Type(const FormatToken &Tok) {
1309 return Tok.isOneOf(K1: tok::kw_char, K2: tok::kw_short, Ks: tok::kw_int, Ks: tok::kw_long,
1310 Ks: tok::kw_unsigned, Ks: tok::kw_float, Ks: tok::kw_double,
1311 Ks: tok::identifier);
1312}
1313
1314// This function checks whether a token starts the first parameter declaration
1315// in a K&R C (aka C78) function definition, e.g.:
1316// int f(a, b)
1317// short a, b;
1318// {
1319// return a + b;
1320// }
1321static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1322 const FormatToken *FuncName) {
1323 assert(Tok);
1324 assert(Next);
1325 assert(FuncName);
1326
1327 if (FuncName->isNot(Kind: tok::identifier))
1328 return false;
1329
1330 const FormatToken *Prev = FuncName->Previous;
1331 if (!Prev || (Prev->isNot(Kind: tok::star) && !isC78Type(Tok: *Prev)))
1332 return false;
1333
1334 if (!isC78Type(Tok: *Tok) &&
1335 Tok->isNoneOf(Ks: tok::kw_register, Ks: tok::kw_struct, Ks: tok::kw_union)) {
1336 return false;
1337 }
1338
1339 if (Next->isNot(Kind: tok::star) && !Next->Tok.getIdentifierInfo())
1340 return false;
1341
1342 Tok = Tok->Previous;
1343 if (!Tok || Tok->isNot(Kind: tok::r_paren))
1344 return false;
1345
1346 Tok = Tok->Previous;
1347 if (!Tok || Tok->isNot(Kind: tok::identifier))
1348 return false;
1349
1350 return Tok->Previous && Tok->Previous->isOneOf(K1: tok::l_paren, K2: tok::comma);
1351}
1352
1353bool UnwrappedLineParser::parseModuleDecl() {
1354 assert(IsCpp);
1355 assert(FormatTok->is(Keywords.kw_module));
1356
1357 if (Style.Language == FormatStyle::LK_C ||
1358 Style.Standard < FormatStyle::LS_Cpp20) {
1359 return false;
1360 }
1361
1362 nextToken();
1363 if (FormatTok->isNot(Kind: tok::identifier))
1364 return false;
1365
1366 for (nextToken(); FormatTok->isNoneOf(Ks: tok::semi, Ks: tok::eof); nextToken())
1367 if (FormatTok->is(Kind: tok::colon))
1368 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1369
1370 nextToken();
1371 Line->IsModuleOrImportDecl = true;
1372 addUnwrappedLine();
1373 return true;
1374}
1375
1376bool UnwrappedLineParser::parseImportDecl() {
1377 assert(IsCpp);
1378 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1379
1380 if (Style.Language == FormatStyle::LK_C ||
1381 Style.Standard < FormatStyle::LS_Cpp20) {
1382 return false;
1383 }
1384
1385 nextToken();
1386 if (FormatTok->is(Kind: tok::colon)) {
1387 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1388 nextToken();
1389 }
1390 if (FormatTok->isNoneOf(Ks: tok::identifier, Ks: tok::less, Ks: tok::string_literal))
1391 return false;
1392
1393 for (; FormatTok->isNoneOf(Ks: tok::semi, Ks: tok::eof); nextToken()) {
1394 // Handle import <foo/bar.h> as we would an include statement.
1395 if (FormatTok->is(Kind: tok::less)) {
1396 for (nextToken(); FormatTok->isNoneOf(Ks: tok::greater, Ks: tok::semi, Ks: tok::eof);
1397 nextToken()) {
1398 // Mark tokens as implicit string literals, so that import <A/Foo> will
1399 // neither be broken nor have a space added.
1400 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1401 }
1402 }
1403 }
1404
1405 nextToken();
1406 Line->IsModuleOrImportDecl = true;
1407 addUnwrappedLine();
1408 return true;
1409}
1410
1411// readTokenWithJavaScriptASI reads the next token and terminates the current
1412// line if JavaScript Automatic Semicolon Insertion must
1413// happen between the current token and the next token.
1414//
1415// This method is conservative - it cannot cover all edge cases of JavaScript,
1416// but only aims to correctly handle certain well known cases. It *must not*
1417// return true in speculative cases.
1418void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1419 FormatToken *Previous = FormatTok;
1420 readToken();
1421 FormatToken *Next = FormatTok;
1422
1423 bool IsOnSameLine =
1424 CommentsBeforeNextToken.empty()
1425 ? Next->NewlinesBefore == 0
1426 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1427 if (IsOnSameLine)
1428 return;
1429
1430 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, FormatTok: Previous);
1431 bool PreviousStartsTemplateExpr =
1432 Previous->is(TT: TT_TemplateString) && Previous->TokenText.ends_with(Suffix: "${");
1433 if (PreviousMustBeValue || Previous->is(Kind: tok::r_paren)) {
1434 // If the line contains an '@' sign, the previous token might be an
1435 // annotation, which can precede another identifier/value.
1436 bool HasAt = llvm::any_of(Range&: Line->Tokens, P: [](UnwrappedLineNode &LineNode) {
1437 return LineNode.Tok->is(Kind: tok::at);
1438 });
1439 if (HasAt)
1440 return;
1441 }
1442 if (Next->is(Kind: tok::exclaim) && PreviousMustBeValue)
1443 return addUnwrappedLine();
1444 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, FormatTok: Next);
1445 bool NextEndsTemplateExpr =
1446 Next->is(TT: TT_TemplateString) && Next->TokenText.starts_with(Prefix: "}");
1447 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1448 (PreviousMustBeValue ||
1449 Previous->isOneOf(K1: tok::r_square, K2: tok::r_paren, Ks: tok::plusplus,
1450 Ks: tok::minusminus))) {
1451 return addUnwrappedLine();
1452 }
1453 if ((PreviousMustBeValue || Previous->is(Kind: tok::r_paren)) &&
1454 isJSDeclOrStmt(Keywords, FormatTok: Next)) {
1455 return addUnwrappedLine();
1456 }
1457}
1458
1459void UnwrappedLineParser::parseStructuralElement(
1460 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1461 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1462 if (Style.isTableGen() && FormatTok->is(Kind: tok::pp_include)) {
1463 nextToken();
1464 if (FormatTok->is(Kind: tok::string_literal))
1465 nextToken();
1466 addUnwrappedLine();
1467 return;
1468 }
1469
1470 if (IsCpp) {
1471 while (FormatTok->is(Kind: tok::l_square) && handleCppAttributes()) {
1472 }
1473 } else if (Style.isVerilog()) {
1474 // Skip attributes.
1475 while (FormatTok->is(Kind: tok::l_paren) &&
1476 Tokens->peekNextToken()->is(Kind: tok::star)) {
1477 parseParens();
1478 }
1479 skipVerilogQualifiers();
1480 // Skip things that can exist before keywords like 'if' and 'case'.
1481 if (FormatTok->isOneOf(K1: Keywords.kw_priority, K2: Keywords.kw_unique,
1482 Ks: Keywords.kw_unique0)) {
1483 nextToken();
1484 }
1485
1486 if (Keywords.isVerilogStructuredProcedure(Tok: *FormatTok)) {
1487 parseForOrWhileLoop(/*HasParens=*/false);
1488 return;
1489 }
1490 if (FormatTok->isOneOf(K1: Keywords.kw_foreach, K2: Keywords.kw_repeat)) {
1491 parseForOrWhileLoop();
1492 return;
1493 }
1494 if (FormatTok->isOneOf(K1: tok::kw_restrict, K2: Keywords.kw_assert,
1495 Ks: Keywords.kw_assume, Ks: Keywords.kw_cover)) {
1496 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1497 return;
1498 }
1499 }
1500
1501 // Tokens that only make sense at the beginning of a line.
1502 if (FormatTok->isAccessSpecifierKeyword()) {
1503 if (Style.isJava() || Style.isJavaScript() || Style.isCSharp())
1504 nextToken();
1505 else
1506 parseAccessSpecifier();
1507 return;
1508 }
1509 switch (FormatTok->Tok.getKind()) {
1510 case tok::kw_asm: {
1511 // Track whether to skip formatting inline asm by finalizing the tokens
1512 // in the block. Formatting is skipped inside of braces by default.
1513 // A style option could be added to also skip formatting inside parens.
1514 bool DoNotFormat = false;
1515 tok::TokenKind OpenType;
1516 tok::TokenKind CloseType;
1517 nextToken();
1518 while (FormatTok &&
1519 FormatTok->isOneOf(K1: tok::kw_volatile, K2: tok::kw_inline, Ks: tok::kw_goto)) {
1520 nextToken();
1521 }
1522 if (!FormatTok)
1523 break;
1524 if (FormatTok->is(Kind: tok::l_brace)) {
1525 FormatTok->setFinalizedType(TT_InlineASMBrace);
1526 OpenType = tok::l_brace;
1527 CloseType = tok::r_brace;
1528 DoNotFormat = true;
1529 } else if (FormatTok->is(Kind: tok::l_paren)) {
1530 OpenType = tok::l_paren;
1531 CloseType = tok::r_paren;
1532 FormatTok->setFinalizedType(TT_InlineASMParen);
1533 } else {
1534 break;
1535 }
1536 if (DoNotFormat) {
1537 FormatToken *OpenTok = FormatTok;
1538 int NestLevel = 0;
1539 nextToken();
1540 while (FormatTok && !eof()) {
1541 if (FormatTok->is(Kind: OpenType)) {
1542 ++NestLevel;
1543 } else if (FormatTok->is(Kind: CloseType)) {
1544 --NestLevel;
1545 if (NestLevel < 1) {
1546 FormatTok->setFinalizedType(OpenTok->getType());
1547 nextToken();
1548 addUnwrappedLine();
1549 break;
1550 }
1551 }
1552 FormatTok->Finalized = true;
1553 nextToken();
1554 }
1555 }
1556 break;
1557 }
1558 case tok::kw_namespace:
1559 parseNamespace();
1560 return;
1561 case tok::kw_if: {
1562 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1563 // field/method declaration.
1564 break;
1565 }
1566 FormatToken *Tok = parseIfThenElse(IfKind);
1567 if (IfLeftBrace)
1568 *IfLeftBrace = Tok;
1569 return;
1570 }
1571 case tok::kw_for:
1572 case tok::kw_while:
1573 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1574 // field/method declaration.
1575 break;
1576 }
1577 parseForOrWhileLoop();
1578 return;
1579 case tok::kw_do:
1580 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1581 // field/method declaration.
1582 break;
1583 }
1584 parseDoWhile();
1585 if (HasDoWhile)
1586 *HasDoWhile = true;
1587 return;
1588 case tok::kw_switch:
1589 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1590 // 'switch: string' field declaration.
1591 break;
1592 }
1593 parseSwitch(/*IsExpr=*/false);
1594 return;
1595 case tok::kw_default: {
1596 // In Verilog default along with other labels are handled in the next loop.
1597 if (Style.isVerilog())
1598 break;
1599 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1600 // 'default: string' field declaration.
1601 break;
1602 }
1603 auto *Default = FormatTok;
1604 nextToken();
1605 if (FormatTok->is(Kind: tok::colon)) {
1606 FormatTok->setFinalizedType(TT_CaseLabelColon);
1607 parseLabel();
1608 return;
1609 }
1610 if (FormatTok->is(Kind: tok::arrow)) {
1611 FormatTok->setFinalizedType(TT_CaseLabelArrow);
1612 Default->setFinalizedType(TT_SwitchExpressionLabel);
1613 parseLabel();
1614 return;
1615 }
1616 // e.g. "default void f() {}" in a Java interface.
1617 break;
1618 }
1619 case tok::kw_case:
1620 // Proto: there are no switch/case statements.
1621 if (Style.Language == FormatStyle::LK_Proto) {
1622 nextToken();
1623 return;
1624 }
1625 if (Style.isVerilog()) {
1626 parseBlock();
1627 addUnwrappedLine();
1628 return;
1629 }
1630 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1631 // 'case: string' field declaration.
1632 nextToken();
1633 break;
1634 }
1635 parseCaseLabel();
1636 return;
1637 case tok::kw_goto:
1638 nextToken();
1639 if (FormatTok->is(Kind: tok::kw_case))
1640 nextToken();
1641 break;
1642 case tok::kw_try:
1643 case tok::kw___try:
1644 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1645 // field/method declaration.
1646 break;
1647 }
1648 parseTryCatch();
1649 return;
1650 case tok::kw_extern:
1651 if (Style.isVerilog()) {
1652 // In Verilog an extern module declaration looks like a start of module.
1653 // But there is no body and endmodule. So we handle it separately.
1654 parseVerilogExtern();
1655 return;
1656 }
1657 nextToken();
1658 if (FormatTok->is(Kind: tok::string_literal)) {
1659 nextToken();
1660 if (FormatTok->is(Kind: tok::l_brace)) {
1661 if (Style.BraceWrapping.AfterExternBlock)
1662 addUnwrappedLine();
1663 // Either we indent or for backwards compatibility we follow the
1664 // AfterExternBlock style.
1665 unsigned AddLevels =
1666 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1667 (Style.BraceWrapping.AfterExternBlock &&
1668 Style.IndentExternBlock ==
1669 FormatStyle::IEBS_AfterExternBlock)
1670 ? 1u
1671 : 0u;
1672 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1673 addUnwrappedLine();
1674 return;
1675 }
1676 }
1677 break;
1678 case tok::kw_export:
1679 if (IsCpp) {
1680 nextToken();
1681 if (FormatTok->is(Kind: tok::kw_namespace)) {
1682 parseNamespace();
1683 return;
1684 }
1685 if (FormatTok->is(Kind: tok::l_brace)) {
1686 parseCppExportBlock();
1687 return;
1688 }
1689 if (FormatTok->is(II: Keywords.kw_module) && parseModuleDecl())
1690 return;
1691 if (FormatTok->is(II: Keywords.kw_import) && parseImportDecl())
1692 return;
1693 break;
1694 }
1695 if (Style.isJavaScript()) {
1696 parseJavaScriptEs6ImportExport();
1697 return;
1698 }
1699 if (Style.isVerilog()) {
1700 parseVerilogExtern();
1701 return;
1702 }
1703 break;
1704 case tok::kw_inline:
1705 nextToken();
1706 if (FormatTok->is(Kind: tok::kw_namespace)) {
1707 parseNamespace();
1708 return;
1709 }
1710 break;
1711 case tok::identifier:
1712 if (FormatTok->is(TT: TT_ForEachMacro)) {
1713 parseForOrWhileLoop();
1714 return;
1715 }
1716 if (FormatTok->is(TT: TT_MacroBlockBegin)) {
1717 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1718 /*MunchSemi=*/false);
1719 return;
1720 }
1721 if (FormatTok->is(II: Keywords.kw_import)) {
1722 if (IsCpp && parseImportDecl())
1723 return;
1724 if (Style.isJavaScript()) {
1725 parseJavaScriptEs6ImportExport();
1726 return;
1727 }
1728 if (Style.Language == FormatStyle::LK_Proto) {
1729 nextToken();
1730 if (FormatTok->is(Kind: tok::kw_public))
1731 nextToken();
1732 if (FormatTok->isNot(Kind: tok::string_literal))
1733 return;
1734 nextToken();
1735 if (FormatTok->is(Kind: tok::semi))
1736 nextToken();
1737 addUnwrappedLine();
1738 return;
1739 }
1740 if (Style.isVerilog()) {
1741 parseVerilogExtern();
1742 return;
1743 }
1744 }
1745 if (IsCpp) {
1746 if (FormatTok->is(II: Keywords.kw_module) && parseModuleDecl())
1747 return;
1748 if (FormatTok->isOneOf(K1: Keywords.kw_signals, K2: Keywords.kw_qsignals,
1749 Ks: Keywords.kw_slots, Ks: Keywords.kw_qslots)) {
1750 nextToken();
1751 if (FormatTok->is(Kind: tok::colon)) {
1752 nextToken();
1753 addUnwrappedLine();
1754 return;
1755 }
1756 }
1757 if (FormatTok->is(TT: TT_StatementMacro)) {
1758 parseStatementMacro();
1759 return;
1760 }
1761 if (FormatTok->is(TT: TT_NamespaceMacro)) {
1762 parseNamespace();
1763 return;
1764 }
1765 }
1766 // In Verilog labels can be any expression, so we don't do them here.
1767 // JS doesn't have macros, and within classes colons indicate fields, not
1768 // labels.
1769 // TableGen doesn't have labels.
1770 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1771 Tokens->peekNextToken()->is(Kind: tok::colon) && !Line->MustBeDeclaration) {
1772 nextToken();
1773 if (!Line->InMacroBody || CurrentLines->size() > 1)
1774 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1775 FormatTok->setFinalizedType(TT_GotoLabelColon);
1776 parseLabel(IndentGotoLabels: Style.IndentGotoLabels);
1777 if (HasLabel)
1778 *HasLabel = true;
1779 return;
1780 }
1781 if (Style.isJava() && FormatTok->is(II: Keywords.kw_record)) {
1782 parseRecord(/*ParseAsExpr=*/false, /*IsJavaRecord=*/true);
1783 addUnwrappedLine();
1784 return;
1785 }
1786 // In all other cases, parse the declaration.
1787 break;
1788 default:
1789 break;
1790 }
1791
1792 bool SeenEqual = false;
1793 for (const bool InRequiresExpression =
1794 OpeningBrace && OpeningBrace->isOneOf(K1: TT_RequiresExpressionLBrace,
1795 K2: TT_CompoundRequirementLBrace);
1796 !eof();) {
1797 const FormatToken *Previous = FormatTok->Previous;
1798 switch (FormatTok->Tok.getKind()) {
1799 case tok::at:
1800 nextToken();
1801 if (FormatTok->is(Kind: tok::l_brace)) {
1802 nextToken();
1803 parseBracedList();
1804 break;
1805 }
1806 if (Style.isJava() && FormatTok->is(II: Keywords.kw_interface)) {
1807 nextToken();
1808 break;
1809 }
1810 switch (bool IsAutoRelease = false; FormatTok->Tok.getObjCKeywordID()) {
1811 case tok::objc_public:
1812 case tok::objc_protected:
1813 case tok::objc_package:
1814 case tok::objc_private:
1815 return parseAccessSpecifier();
1816 case tok::objc_interface:
1817 case tok::objc_implementation:
1818 return parseObjCInterfaceOrImplementation();
1819 case tok::objc_protocol:
1820 if (parseObjCProtocol())
1821 return;
1822 break;
1823 case tok::objc_end:
1824 return; // Handled by the caller.
1825 case tok::objc_optional:
1826 case tok::objc_required:
1827 nextToken();
1828 addUnwrappedLine();
1829 return;
1830 case tok::objc_autoreleasepool:
1831 IsAutoRelease = true;
1832 [[fallthrough]];
1833 case tok::objc_synchronized:
1834 nextToken();
1835 if (!IsAutoRelease && FormatTok->is(Kind: tok::l_paren)) {
1836 // Skip synchronization object
1837 parseParens();
1838 }
1839 if (FormatTok->is(Kind: tok::l_brace)) {
1840 if (Style.BraceWrapping.AfterControlStatement ==
1841 FormatStyle::BWACS_Always) {
1842 addUnwrappedLine();
1843 }
1844 parseBlock();
1845 }
1846 addUnwrappedLine();
1847 return;
1848 case tok::objc_try:
1849 // This branch isn't strictly necessary (the kw_try case below would
1850 // do this too after the tok::at is parsed above). But be explicit.
1851 parseTryCatch();
1852 return;
1853 default:
1854 break;
1855 }
1856 break;
1857 case tok::kw_requires: {
1858 if (IsCpp) {
1859 bool ParsedClause = parseRequires(SeenEqual);
1860 if (ParsedClause)
1861 return;
1862 } else {
1863 nextToken();
1864 }
1865 break;
1866 }
1867 case tok::kw_enum:
1868 // Ignore if this is part of "template <enum ..." or "... -> enum" or
1869 // "template <..., enum ...>".
1870 if (Previous && Previous->isOneOf(K1: tok::less, K2: tok::arrow, Ks: tok::comma)) {
1871 nextToken();
1872 break;
1873 }
1874
1875 // parseEnum falls through and does not yet add an unwrapped line as an
1876 // enum definition can start a structural element.
1877 if (!parseEnum())
1878 break;
1879 // This only applies to C++ and Verilog.
1880 if (!IsCpp && !Style.isVerilog()) {
1881 addUnwrappedLine();
1882 return;
1883 }
1884 break;
1885 case tok::kw_typedef:
1886 nextToken();
1887 if (FormatTok->isOneOf(K1: Keywords.kw_NS_ENUM, K2: Keywords.kw_NS_OPTIONS,
1888 Ks: Keywords.kw_CF_ENUM, Ks: Keywords.kw_CF_OPTIONS,
1889 Ks: Keywords.kw_CF_CLOSED_ENUM,
1890 Ks: Keywords.kw_NS_CLOSED_ENUM)) {
1891 parseEnum();
1892 }
1893 break;
1894 case tok::kw_class:
1895 if (Style.isVerilog()) {
1896 parseBlock();
1897 addUnwrappedLine();
1898 return;
1899 }
1900 if (Style.isTableGen()) {
1901 // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1902 // This is same as def and so on.
1903 nextToken();
1904 break;
1905 }
1906 [[fallthrough]];
1907 case tok::kw_struct:
1908 case tok::kw_union:
1909 if (parseStructLike())
1910 return;
1911 break;
1912 case tok::kw_decltype:
1913 nextToken();
1914 if (FormatTok->is(Kind: tok::l_paren)) {
1915 parseParens();
1916 if (FormatTok->Previous &&
1917 FormatTok->Previous->endsSequence(K1: tok::r_paren, Tokens: tok::kw_auto,
1918 Tokens: tok::l_paren)) {
1919 Line->SeenDecltypeAuto = true;
1920 }
1921 }
1922 break;
1923 case tok::period:
1924 nextToken();
1925 // In Java, classes have an implicit static member "class".
1926 if (Style.isJava() && FormatTok && FormatTok->is(Kind: tok::kw_class))
1927 nextToken();
1928 if (Style.isJavaScript() && FormatTok &&
1929 FormatTok->Tok.getIdentifierInfo()) {
1930 // JavaScript only has pseudo keywords, all keywords are allowed to
1931 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1932 nextToken();
1933 }
1934 break;
1935 case tok::semi:
1936 nextToken();
1937 addUnwrappedLine();
1938 return;
1939 case tok::r_brace:
1940 addUnwrappedLine();
1941 return;
1942 case tok::string_literal:
1943 if (Style.isVerilog() && FormatTok->is(TT: TT_VerilogProtected)) {
1944 FormatTok->Finalized = true;
1945 nextToken();
1946 addUnwrappedLine();
1947 return;
1948 }
1949 nextToken();
1950 break;
1951 case tok::l_paren: {
1952 parseParens();
1953 // Break the unwrapped line if a K&R C function definition has a parameter
1954 // declaration.
1955 if (OpeningBrace || !IsCpp || !Previous || eof())
1956 break;
1957 if (isC78ParameterDecl(Tok: FormatTok,
1958 Next: Tokens->peekNextToken(/*SkipComment=*/true),
1959 FuncName: Previous)) {
1960 addUnwrappedLine();
1961 return;
1962 }
1963 break;
1964 }
1965 case tok::kw_operator:
1966 nextToken();
1967 if (FormatTok->isBinaryOperator())
1968 nextToken();
1969 break;
1970 case tok::caret: {
1971 const auto *Prev = FormatTok->getPreviousNonComment();
1972 nextToken();
1973 if (Prev && Prev->is(Kind: tok::identifier))
1974 break;
1975 // Block return type.
1976 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) {
1977 nextToken();
1978 // Return types: pointers are ok too.
1979 while (FormatTok->is(Kind: tok::star))
1980 nextToken();
1981 }
1982 // Block argument list.
1983 if (FormatTok->is(Kind: tok::l_paren))
1984 parseParens();
1985 // Block body.
1986 if (FormatTok->is(Kind: tok::l_brace))
1987 parseChildBlock();
1988 break;
1989 }
1990 case tok::l_brace:
1991 if (InRequiresExpression)
1992 FormatTok->setFinalizedType(TT_BracedListLBrace);
1993 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1994 IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1995 // A block outside of parentheses must be the last part of a
1996 // structural element.
1997 // FIXME: Figure out cases where this is not true, and add projections
1998 // for them (the one we know is missing are lambdas).
1999 if (Style.isJava() &&
2000 Line->Tokens.front().Tok->is(II: Keywords.kw_synchronized)) {
2001 // If necessary, we could set the type to something different than
2002 // TT_FunctionLBrace.
2003 if (Style.BraceWrapping.AfterControlStatement ==
2004 FormatStyle::BWACS_Always) {
2005 addUnwrappedLine();
2006 }
2007 } else if (Style.BraceWrapping.AfterFunction) {
2008 addUnwrappedLine();
2009 }
2010 if (!Previous || Previous->isNot(Kind: TT_TypeDeclarationParen))
2011 FormatTok->setFinalizedType(TT_FunctionLBrace);
2012 parseBlock();
2013 IsDecltypeAutoFunction = false;
2014 addUnwrappedLine();
2015 return;
2016 }
2017 // Otherwise this was a braced init list, and the structural
2018 // element continues.
2019 break;
2020 case tok::kw_try:
2021 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2022 // field/method declaration.
2023 nextToken();
2024 break;
2025 }
2026 // We arrive here when parsing function-try blocks.
2027 if (Style.BraceWrapping.AfterFunction)
2028 addUnwrappedLine();
2029 parseTryCatch();
2030 return;
2031 case tok::identifier: {
2032 if (Style.isCSharp() && FormatTok->is(II: Keywords.kw_where) &&
2033 Line->MustBeDeclaration) {
2034 addUnwrappedLine();
2035 parseCSharpGenericTypeConstraint();
2036 break;
2037 }
2038 if (FormatTok->is(TT: TT_MacroBlockEnd)) {
2039 addUnwrappedLine();
2040 return;
2041 }
2042
2043 // Function declarations (as opposed to function expressions) are parsed
2044 // on their own unwrapped line by continuing this loop. Function
2045 // expressions (functions that are not on their own line) must not create
2046 // a new unwrapped line, so they are special cased below.
2047 size_t TokenCount = Line->Tokens.size();
2048 if (Style.isJavaScript() && FormatTok->is(II: Keywords.kw_function) &&
2049 (TokenCount > 1 ||
2050 (TokenCount == 1 &&
2051 Line->Tokens.front().Tok->isNot(Kind: Keywords.kw_async)))) {
2052 tryToParseJSFunction();
2053 break;
2054 }
2055 if ((Style.isJavaScript() || Style.isJava()) &&
2056 FormatTok->is(II: Keywords.kw_interface)) {
2057 if (Style.isJavaScript()) {
2058 // In JavaScript/TypeScript, "interface" can be used as a standalone
2059 // identifier, e.g. in `var interface = 1;`. If "interface" is
2060 // followed by another identifier, it is very like to be an actual
2061 // interface declaration.
2062 unsigned StoredPosition = Tokens->getPosition();
2063 FormatToken *Next = Tokens->getNextToken();
2064 FormatTok = Tokens->setPosition(StoredPosition);
2065 if (!mustBeJSIdent(Keywords, FormatTok: Next)) {
2066 nextToken();
2067 break;
2068 }
2069 }
2070 parseRecord();
2071 addUnwrappedLine();
2072 return;
2073 }
2074
2075 if (Style.isVerilog()) {
2076 if (FormatTok->is(II: Keywords.kw_table)) {
2077 parseVerilogTable();
2078 return;
2079 }
2080 if (Keywords.isVerilogBegin(Tok: *FormatTok) ||
2081 Keywords.isVerilogHierarchy(Tok: *FormatTok)) {
2082 parseBlock();
2083 addUnwrappedLine();
2084 return;
2085 }
2086 }
2087
2088 if (!IsCpp && FormatTok->is(II: Keywords.kw_interface)) {
2089 if (parseStructLike())
2090 return;
2091 break;
2092 }
2093
2094 if (IsCpp && FormatTok->is(TT: TT_StatementMacro)) {
2095 parseStatementMacro();
2096 return;
2097 }
2098
2099 // See if the following token should start a new unwrapped line.
2100 StringRef Text = FormatTok->TokenText;
2101
2102 FormatToken *PreviousToken = FormatTok;
2103 nextToken();
2104
2105 // JS doesn't have macros, and within classes colons indicate fields, not
2106 // labels.
2107 if (Style.isJavaScript())
2108 break;
2109
2110 auto OneTokenSoFar = [&]() {
2111 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2112 while (I != E && I->Tok->is(Kind: tok::comment))
2113 ++I;
2114 if (Style.isVerilog())
2115 while (I != E && I->Tok->is(Kind: tok::hash))
2116 ++I;
2117 return I != E && (++I == E);
2118 };
2119 if (OneTokenSoFar()) {
2120 // Recognize function-like macro usages without trailing semicolon as
2121 // well as free-standing macros like Q_OBJECT.
2122 bool FunctionLike = FormatTok->is(Kind: tok::l_paren);
2123 if (FunctionLike)
2124 parseParens();
2125
2126 bool FollowedByNewline =
2127 CommentsBeforeNextToken.empty()
2128 ? FormatTok->NewlinesBefore > 0
2129 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2130
2131 if (FollowedByNewline &&
2132 (Text.size() >= 5 ||
2133 (FunctionLike && FormatTok->isNot(Kind: tok::l_paren))) &&
2134 tokenCanStartNewLine(Tok: *FormatTok) && Text == Text.upper()) {
2135 if (PreviousToken->isNot(Kind: TT_UntouchableMacroFunc))
2136 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2137 addUnwrappedLine();
2138 return;
2139 }
2140 }
2141 break;
2142 }
2143 case tok::equal:
2144 if ((Style.isJavaScript() || Style.isCSharp()) &&
2145 FormatTok->is(TT: TT_FatArrow)) {
2146 tryToParseChildBlock();
2147 break;
2148 }
2149
2150 SeenEqual = true;
2151 nextToken();
2152 if (FormatTok->is(Kind: tok::l_brace)) {
2153 // C# needs this change to ensure that array initialisers and object
2154 // initialisers are indented the same way. In TypeScript, the brace
2155 // can also be an object type definition.
2156 if (!Style.isJavaScript())
2157 FormatTok->setBlockKind(BK_BracedInit);
2158 // TableGen's defset statement has syntax of the form,
2159 // `defset <type> <name> = { <statement>... }`
2160 if (Style.isTableGen() &&
2161 Line->Tokens.begin()->Tok->is(II: Keywords.kw_defset)) {
2162 FormatTok->setFinalizedType(TT_FunctionLBrace);
2163 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2164 /*MunchSemi=*/false);
2165 addUnwrappedLine();
2166 break;
2167 }
2168 nextToken();
2169 parseBracedList();
2170 } else if (Style.Language == FormatStyle::LK_Proto &&
2171 FormatTok->is(Kind: tok::less)) {
2172 nextToken();
2173 parseBracedList(/*IsAngleBracket=*/true);
2174 }
2175 break;
2176 case tok::l_square:
2177 parseSquare();
2178 break;
2179 case tok::kw_new:
2180 if (Style.isCSharp() &&
2181 (Tokens->peekNextToken()->isAccessSpecifierKeyword() ||
2182 (Previous && Previous->isAccessSpecifierKeyword()))) {
2183 nextToken();
2184 } else {
2185 parseNew();
2186 }
2187 break;
2188 case tok::kw_switch:
2189 if (Style.isJava())
2190 parseSwitch(/*IsExpr=*/true);
2191 else
2192 nextToken();
2193 break;
2194 case tok::kw_case:
2195 // Proto: there are no switch/case statements.
2196 if (Style.Language == FormatStyle::LK_Proto) {
2197 nextToken();
2198 return;
2199 }
2200 // In Verilog switch is called case.
2201 if (Style.isVerilog()) {
2202 parseBlock();
2203 addUnwrappedLine();
2204 return;
2205 }
2206 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2207 // 'case: string' field declaration.
2208 nextToken();
2209 break;
2210 }
2211 parseCaseLabel();
2212 break;
2213 case tok::kw_default:
2214 nextToken();
2215 if (Style.isVerilog()) {
2216 if (FormatTok->is(Kind: tok::colon)) {
2217 // The label will be handled in the next iteration.
2218 break;
2219 }
2220 if (FormatTok->is(II: Keywords.kw_clocking)) {
2221 // A default clocking block.
2222 parseBlock();
2223 addUnwrappedLine();
2224 return;
2225 }
2226 parseVerilogCaseLabel();
2227 return;
2228 }
2229 break;
2230 case tok::colon:
2231 nextToken();
2232 if (Style.isVerilog()) {
2233 parseVerilogCaseLabel();
2234 return;
2235 }
2236 break;
2237 case tok::greater:
2238 nextToken();
2239 if (FormatTok->is(Kind: tok::l_brace))
2240 FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
2241 break;
2242 default:
2243 nextToken();
2244 break;
2245 }
2246 }
2247}
2248
2249bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2250 assert(FormatTok->is(tok::l_brace));
2251 if (!Style.isCSharp())
2252 return false;
2253 // See if it's a property accessor.
2254 if (!FormatTok->Previous || FormatTok->Previous->isNot(Kind: tok::identifier))
2255 return false;
2256
2257 // See if we are inside a property accessor.
2258 //
2259 // Record the current tokenPosition so that we can advance and
2260 // reset the current token. `Next` is not set yet so we need
2261 // another way to advance along the token stream.
2262 unsigned int StoredPosition = Tokens->getPosition();
2263 FormatToken *Tok = Tokens->getNextToken();
2264
2265 // A trivial property accessor is of the form:
2266 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2267 // Track these as they do not require line breaks to be introduced.
2268 bool HasSpecialAccessor = false;
2269 bool IsTrivialPropertyAccessor = true;
2270 bool HasAttribute = false;
2271 while (!eof()) {
2272 if (const bool IsAccessorKeyword =
2273 Tok->isOneOf(K1: Keywords.kw_get, K2: Keywords.kw_init, Ks: Keywords.kw_set);
2274 IsAccessorKeyword || Tok->isAccessSpecifierKeyword() ||
2275 Tok->isOneOf(K1: tok::l_square, K2: tok::semi, Ks: Keywords.kw_internal)) {
2276 if (IsAccessorKeyword)
2277 HasSpecialAccessor = true;
2278 else if (Tok->is(Kind: tok::l_square))
2279 HasAttribute = true;
2280 Tok = Tokens->getNextToken();
2281 continue;
2282 }
2283 if (Tok->isNot(Kind: tok::r_brace))
2284 IsTrivialPropertyAccessor = false;
2285 break;
2286 }
2287
2288 if (!HasSpecialAccessor || HasAttribute) {
2289 Tokens->setPosition(StoredPosition);
2290 return false;
2291 }
2292
2293 // Try to parse the property accessor:
2294 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2295 Tokens->setPosition(StoredPosition);
2296 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2297 addUnwrappedLine();
2298 nextToken();
2299 do {
2300 switch (FormatTok->Tok.getKind()) {
2301 case tok::r_brace:
2302 nextToken();
2303 if (FormatTok->is(Kind: tok::equal)) {
2304 while (!eof() && FormatTok->isNot(Kind: tok::semi))
2305 nextToken();
2306 nextToken();
2307 }
2308 addUnwrappedLine();
2309 return true;
2310 case tok::l_brace:
2311 ++Line->Level;
2312 parseBlock(/*MustBeDeclaration=*/true);
2313 addUnwrappedLine();
2314 --Line->Level;
2315 break;
2316 case tok::equal:
2317 if (FormatTok->is(TT: TT_FatArrow)) {
2318 ++Line->Level;
2319 do {
2320 nextToken();
2321 } while (!eof() && FormatTok->isNot(Kind: tok::semi));
2322 nextToken();
2323 addUnwrappedLine();
2324 --Line->Level;
2325 break;
2326 }
2327 nextToken();
2328 break;
2329 default:
2330 if (FormatTok->isOneOf(K1: Keywords.kw_get, K2: Keywords.kw_init,
2331 Ks: Keywords.kw_set) &&
2332 !IsTrivialPropertyAccessor) {
2333 // Non-trivial get/set needs to be on its own line.
2334 addUnwrappedLine();
2335 }
2336 nextToken();
2337 }
2338 } while (!eof());
2339
2340 // Unreachable for well-formed code (paired '{' and '}').
2341 return true;
2342}
2343
2344bool UnwrappedLineParser::tryToParseLambda() {
2345 assert(FormatTok->is(tok::l_square));
2346 if (!IsCpp) {
2347 nextToken();
2348 return false;
2349 }
2350 FormatToken &LSquare = *FormatTok;
2351 if (!tryToParseLambdaIntroducer())
2352 return false;
2353
2354 FormatToken *Arrow = nullptr;
2355 bool InTemplateParameterList = false;
2356
2357 while (FormatTok->isNot(Kind: tok::l_brace)) {
2358 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) {
2359 nextToken();
2360 continue;
2361 }
2362 switch (FormatTok->Tok.getKind()) {
2363 case tok::l_brace:
2364 break;
2365 case tok::l_paren:
2366 parseParens(/*AmpAmpTokenType=*/StarAndAmpTokenType: TT_PointerOrReference);
2367 break;
2368 case tok::l_square:
2369 parseSquare();
2370 break;
2371 case tok::less:
2372 assert(FormatTok->Previous);
2373 if (FormatTok->Previous->is(Kind: tok::r_square))
2374 InTemplateParameterList = true;
2375 nextToken();
2376 break;
2377 case tok::kw_auto:
2378 case tok::kw_class:
2379 case tok::kw_struct:
2380 case tok::kw_union:
2381 case tok::kw_template:
2382 case tok::kw_typename:
2383 case tok::amp:
2384 case tok::star:
2385 case tok::kw_const:
2386 case tok::kw_constexpr:
2387 case tok::kw_consteval:
2388 case tok::comma:
2389 case tok::greater:
2390 case tok::identifier:
2391 case tok::numeric_constant:
2392 case tok::coloncolon:
2393 case tok::kw_mutable:
2394 case tok::kw_noexcept:
2395 case tok::kw_static:
2396 nextToken();
2397 break;
2398 // Specialization of a template with an integer parameter can contain
2399 // arithmetic, logical, comparison and ternary operators.
2400 //
2401 // FIXME: This also accepts sequences of operators that are not in the scope
2402 // of a template argument list.
2403 //
2404 // In a C++ lambda a template type can only occur after an arrow. We use
2405 // this as an heuristic to distinguish between Objective-C expressions
2406 // followed by an `a->b` expression, such as:
2407 // ([obj func:arg] + a->b)
2408 // Otherwise the code below would parse as a lambda.
2409 case tok::plus:
2410 case tok::minus:
2411 case tok::exclaim:
2412 case tok::tilde:
2413 case tok::slash:
2414 case tok::percent:
2415 case tok::lessless:
2416 case tok::pipe:
2417 case tok::pipepipe:
2418 case tok::ampamp:
2419 case tok::caret:
2420 case tok::equalequal:
2421 case tok::exclaimequal:
2422 case tok::greaterequal:
2423 case tok::lessequal:
2424 case tok::question:
2425 case tok::colon:
2426 case tok::ellipsis:
2427 case tok::kw_true:
2428 case tok::kw_false:
2429 if (Arrow || InTemplateParameterList) {
2430 nextToken();
2431 break;
2432 }
2433 return true;
2434 case tok::arrow:
2435 Arrow = FormatTok;
2436 nextToken();
2437 break;
2438 case tok::kw_requires:
2439 parseRequiresClause();
2440 break;
2441 case tok::equal:
2442 if (!InTemplateParameterList)
2443 return true;
2444 nextToken();
2445 break;
2446 default:
2447 return true;
2448 }
2449 }
2450
2451 FormatTok->setFinalizedType(TT_LambdaLBrace);
2452 LSquare.setFinalizedType(TT_LambdaLSquare);
2453
2454 if (Arrow)
2455 Arrow->setFinalizedType(TT_LambdaArrow);
2456
2457 NestedLambdas.push_back(Elt: Line->SeenDecltypeAuto);
2458 parseChildBlock();
2459 assert(!NestedLambdas.empty());
2460 NestedLambdas.pop_back();
2461
2462 return true;
2463}
2464
2465bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2466 const FormatToken *Previous = FormatTok->Previous;
2467 const FormatToken *LeftSquare = FormatTok;
2468 nextToken();
2469 if (Previous) {
2470 const auto *PrevPrev = Previous->getPreviousNonComment();
2471 if (Previous->is(Kind: tok::star) && PrevPrev && PrevPrev->isTypeName(LangOpts))
2472 return false;
2473 if (Previous->closesScope()) {
2474 // Not a potential C-style cast.
2475 if (Previous->isNot(Kind: tok::r_paren))
2476 return false;
2477 // Lambdas can be cast to function types only, e.g. `std::function<int()>`
2478 // and `int (*)()`.
2479 if (!PrevPrev || PrevPrev->isNoneOf(Ks: tok::greater, Ks: tok::r_paren))
2480 return false;
2481 }
2482 if (Previous && Previous->Tok.getIdentifierInfo() &&
2483 Previous->isNoneOf(Ks: tok::kw_return, Ks: tok::kw_co_await, Ks: tok::kw_co_yield,
2484 Ks: tok::kw_co_return)) {
2485 return false;
2486 }
2487 }
2488 if (LeftSquare->isCppStructuredBinding(IsCpp))
2489 return false;
2490 if (FormatTok->is(Kind: tok::l_square) || tok::isLiteral(K: FormatTok->Tok.getKind()))
2491 return false;
2492 if (FormatTok->is(Kind: tok::r_square)) {
2493 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2494 if (Next->is(Kind: tok::greater))
2495 return false;
2496 }
2497 parseSquare(/*LambdaIntroducer=*/true);
2498 return true;
2499}
2500
2501void UnwrappedLineParser::tryToParseJSFunction() {
2502 assert(FormatTok->is(Keywords.kw_function));
2503 if (FormatTok->is(II: Keywords.kw_async))
2504 nextToken();
2505 // Consume "function".
2506 nextToken();
2507
2508 // Consume * (generator function). Treat it like C++'s overloaded operators.
2509 if (FormatTok->is(Kind: tok::star)) {
2510 FormatTok->setFinalizedType(TT_OverloadedOperator);
2511 nextToken();
2512 }
2513
2514 // Consume function name.
2515 if (FormatTok->is(Kind: tok::identifier))
2516 nextToken();
2517
2518 if (FormatTok->isNot(Kind: tok::l_paren))
2519 return;
2520
2521 // Parse formal parameter list.
2522 parseParens();
2523
2524 if (FormatTok->is(Kind: tok::colon)) {
2525 // Parse a type definition.
2526 nextToken();
2527
2528 // Eat the type declaration. For braced inline object types, balance braces,
2529 // otherwise just parse until finding an l_brace for the function body.
2530 if (FormatTok->is(Kind: tok::l_brace))
2531 tryToParseBracedList();
2532 else
2533 while (FormatTok->isNoneOf(Ks: tok::l_brace, Ks: tok::semi) && !eof())
2534 nextToken();
2535 }
2536
2537 if (FormatTok->is(Kind: tok::semi))
2538 return;
2539
2540 parseChildBlock();
2541}
2542
2543bool UnwrappedLineParser::tryToParseBracedList() {
2544 if (FormatTok->is(BBK: BK_Unknown))
2545 calculateBraceTypes();
2546 assert(FormatTok->isNot(BK_Unknown));
2547 if (FormatTok->is(BBK: BK_Block))
2548 return false;
2549 nextToken();
2550 parseBracedList();
2551 return true;
2552}
2553
2554bool UnwrappedLineParser::tryToParseChildBlock() {
2555 assert(Style.isJavaScript() || Style.isCSharp());
2556 assert(FormatTok->is(TT_FatArrow));
2557 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2558 // They always start an expression or a child block if followed by a curly
2559 // brace.
2560 nextToken();
2561 if (FormatTok->isNot(Kind: tok::l_brace))
2562 return false;
2563 parseChildBlock();
2564 return true;
2565}
2566
2567bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2568 assert(!IsAngleBracket || !IsEnum);
2569 bool HasError = false;
2570
2571 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2572 // replace this by using parseAssignmentExpression() inside.
2573 do {
2574 if (Style.isCSharp() && FormatTok->is(TT: TT_FatArrow) &&
2575 tryToParseChildBlock()) {
2576 continue;
2577 }
2578 if (Style.isJavaScript()) {
2579 if (FormatTok->is(II: Keywords.kw_function)) {
2580 tryToParseJSFunction();
2581 continue;
2582 }
2583 if (FormatTok->is(Kind: tok::l_brace)) {
2584 // Could be a method inside of a braced list `{a() { return 1; }}`.
2585 if (tryToParseBracedList())
2586 continue;
2587 parseChildBlock();
2588 }
2589 }
2590 if (FormatTok->is(Kind: IsAngleBracket ? tok::greater : tok::r_brace)) {
2591 if (IsEnum) {
2592 FormatTok->setBlockKind(BK_Block);
2593 if (!Style.AllowShortEnumsOnASingleLine)
2594 addUnwrappedLine();
2595 }
2596 nextToken();
2597 return !HasError;
2598 }
2599 switch (FormatTok->Tok.getKind()) {
2600 case tok::l_square:
2601 if (Style.isCSharp())
2602 parseSquare();
2603 else
2604 tryToParseLambda();
2605 break;
2606 case tok::l_paren:
2607 parseParens();
2608 // JavaScript can just have free standing methods and getters/setters in
2609 // object literals. Detect them by a "{" following ")".
2610 if (Style.isJavaScript()) {
2611 if (FormatTok->is(Kind: tok::l_brace))
2612 parseChildBlock();
2613 break;
2614 }
2615 break;
2616 case tok::l_brace:
2617 // Assume there are no blocks inside a braced init list apart
2618 // from the ones we explicitly parse out (like lambdas).
2619 FormatTok->setBlockKind(BK_BracedInit);
2620 if (!IsAngleBracket) {
2621 auto *Prev = FormatTok->Previous;
2622 if (Prev && Prev->is(Kind: tok::greater))
2623 Prev->setFinalizedType(TT_TemplateCloser);
2624 }
2625 nextToken();
2626 parseBracedList();
2627 break;
2628 case tok::less:
2629 nextToken();
2630 if (IsAngleBracket)
2631 parseBracedList(/*IsAngleBracket=*/true);
2632 break;
2633 case tok::semi:
2634 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2635 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2636 // used for error recovery if we have otherwise determined that this is
2637 // a braced list.
2638 if (Style.isJavaScript()) {
2639 nextToken();
2640 break;
2641 }
2642 HasError = true;
2643 if (!IsEnum)
2644 return false;
2645 nextToken();
2646 break;
2647 case tok::comma:
2648 nextToken();
2649 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2650 addUnwrappedLine();
2651 break;
2652 case tok::kw_requires:
2653 parseRequiresExpression();
2654 break;
2655 default:
2656 nextToken();
2657 break;
2658 }
2659 } while (!eof());
2660 return false;
2661}
2662
2663/// Parses a pair of parentheses (and everything between them).
2664/// \param StarAndAmpTokenType If different than TT_Unknown sets this type for
2665/// all (double) ampersands and stars. This applies for all nested scopes as
2666/// well.
2667///
2668/// Returns whether there is a `=` token between the parentheses.
2669bool UnwrappedLineParser::parseParens(TokenType StarAndAmpTokenType,
2670 bool InMacroCall) {
2671 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2672 auto *LParen = FormatTok;
2673 auto *Prev = FormatTok->Previous;
2674 bool SeenComma = false;
2675 bool SeenEqual = false;
2676 bool MightBeFoldExpr = false;
2677 nextToken();
2678 const bool MightBeStmtExpr = FormatTok->is(Kind: tok::l_brace);
2679 if (!InMacroCall && Prev && Prev->is(TT: TT_FunctionLikeMacro))
2680 InMacroCall = true;
2681 do {
2682 switch (FormatTok->Tok.getKind()) {
2683 case tok::l_paren:
2684 if (parseParens(StarAndAmpTokenType, InMacroCall))
2685 SeenEqual = true;
2686 if (Style.isJava() && FormatTok->is(Kind: tok::l_brace))
2687 parseChildBlock();
2688 break;
2689 case tok::r_paren: {
2690 auto *RParen = FormatTok;
2691 nextToken();
2692 if (Prev) {
2693 auto OptionalParens = [&] {
2694 if (Style.RemoveParentheses == FormatStyle::RPS_Leave ||
2695 MightBeStmtExpr || MightBeFoldExpr || SeenComma || InMacroCall ||
2696 Line->InMacroBody || RParen->getPreviousNonComment() == LParen) {
2697 return false;
2698 }
2699 const bool DoubleParens =
2700 Prev->is(Kind: tok::l_paren) && FormatTok->is(Kind: tok::r_paren);
2701 if (DoubleParens) {
2702 const auto *PrevPrev = Prev->getPreviousNonComment();
2703 const bool Excluded =
2704 PrevPrev &&
2705 (PrevPrev->isOneOf(K1: tok::kw___attribute, K2: tok::kw_decltype) ||
2706 (SeenEqual &&
2707 (PrevPrev->isOneOf(K1: tok::kw_if, K2: tok::kw_while) ||
2708 PrevPrev->endsSequence(K1: tok::kw_constexpr, Tokens: tok::kw_if))));
2709 if (!Excluded)
2710 return true;
2711 } else {
2712 const bool CommaSeparated =
2713 Prev->isOneOf(K1: tok::l_paren, K2: tok::comma) &&
2714 FormatTok->isOneOf(K1: tok::comma, K2: tok::r_paren);
2715 if (CommaSeparated &&
2716 // LParen is not preceded by ellipsis, comma.
2717 !Prev->endsSequence(K1: tok::comma, Tokens: tok::ellipsis) &&
2718 // RParen is not followed by comma, ellipsis.
2719 !(FormatTok->is(Kind: tok::comma) &&
2720 Tokens->peekNextToken()->is(Kind: tok::ellipsis))) {
2721 return true;
2722 }
2723 const bool ReturnParens =
2724 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2725 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2726 (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2727 Prev->isOneOf(K1: tok::kw_return, K2: tok::kw_co_return) &&
2728 FormatTok->is(Kind: tok::semi);
2729 if (ReturnParens)
2730 return true;
2731 }
2732 return false;
2733 };
2734 if (OptionalParens()) {
2735 LParen->Optional = true;
2736 RParen->Optional = true;
2737 } else if (Prev->is(TT: TT_TypenameMacro)) {
2738 LParen->setFinalizedType(TT_TypeDeclarationParen);
2739 RParen->setFinalizedType(TT_TypeDeclarationParen);
2740 } else if (Prev->is(Kind: tok::greater) && RParen->Previous == LParen) {
2741 Prev->setFinalizedType(TT_TemplateCloser);
2742 } else if (FormatTok->is(Kind: tok::l_brace) && Prev->is(Kind: tok::amp) &&
2743 !Prev->Previous) {
2744 FormatTok->setBlockKind(BK_BracedInit);
2745 }
2746 }
2747 return SeenEqual;
2748 }
2749 case tok::r_brace:
2750 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2751 return SeenEqual;
2752 case tok::l_square:
2753 tryToParseLambda();
2754 break;
2755 case tok::l_brace:
2756 if (!tryToParseBracedList())
2757 parseChildBlock();
2758 break;
2759 case tok::at:
2760 nextToken();
2761 if (FormatTok->is(Kind: tok::l_brace)) {
2762 nextToken();
2763 parseBracedList();
2764 }
2765 break;
2766 case tok::comma:
2767 SeenComma = true;
2768 nextToken();
2769 break;
2770 case tok::ellipsis:
2771 MightBeFoldExpr = true;
2772 nextToken();
2773 break;
2774 case tok::equal:
2775 SeenEqual = true;
2776 if (Style.isCSharp() && FormatTok->is(TT: TT_FatArrow))
2777 tryToParseChildBlock();
2778 else
2779 nextToken();
2780 break;
2781 case tok::kw_class:
2782 if (Style.isJavaScript())
2783 parseRecord(/*ParseAsExpr=*/true);
2784 else
2785 nextToken();
2786 break;
2787 case tok::identifier:
2788 if (Style.isJavaScript() && (FormatTok->is(II: Keywords.kw_function)))
2789 tryToParseJSFunction();
2790 else
2791 nextToken();
2792 break;
2793 case tok::kw_switch:
2794 if (Style.isJava())
2795 parseSwitch(/*IsExpr=*/true);
2796 else
2797 nextToken();
2798 break;
2799 case tok::kw_requires:
2800 parseRequiresExpression();
2801 break;
2802 case tok::star:
2803 case tok::amp:
2804 case tok::ampamp:
2805 if (StarAndAmpTokenType != TT_Unknown)
2806 FormatTok->setFinalizedType(StarAndAmpTokenType);
2807 [[fallthrough]];
2808 default:
2809 nextToken();
2810 break;
2811 }
2812 } while (!eof());
2813 return SeenEqual;
2814}
2815
2816void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2817 if (!LambdaIntroducer) {
2818 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2819 if (tryToParseLambda())
2820 return;
2821 }
2822 do {
2823 switch (FormatTok->Tok.getKind()) {
2824 case tok::l_paren:
2825 parseParens();
2826 break;
2827 case tok::r_square:
2828 nextToken();
2829 return;
2830 case tok::r_brace:
2831 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2832 return;
2833 case tok::l_square:
2834 parseSquare();
2835 break;
2836 case tok::l_brace: {
2837 if (!tryToParseBracedList())
2838 parseChildBlock();
2839 break;
2840 }
2841 case tok::at:
2842 case tok::colon:
2843 nextToken();
2844 if (FormatTok->is(Kind: tok::l_brace)) {
2845 nextToken();
2846 parseBracedList();
2847 }
2848 break;
2849 default:
2850 nextToken();
2851 break;
2852 }
2853 } while (!eof());
2854}
2855
2856void UnwrappedLineParser::keepAncestorBraces() {
2857 if (!Style.RemoveBracesLLVM)
2858 return;
2859
2860 const int MaxNestingLevels = 2;
2861 const int Size = NestedTooDeep.size();
2862 if (Size >= MaxNestingLevels)
2863 NestedTooDeep[Size - MaxNestingLevels] = true;
2864 NestedTooDeep.push_back(Elt: false);
2865}
2866
2867static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2868 for (const auto &Token : llvm::reverse(C: Line.Tokens))
2869 if (Token.Tok->isNot(Kind: tok::comment))
2870 return Token.Tok;
2871
2872 return nullptr;
2873}
2874
2875void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2876 FormatToken *Tok = nullptr;
2877
2878 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2879 PreprocessorDirectives.empty() && FormatTok->isNot(Kind: tok::semi)) {
2880 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2881 ? getLastNonComment(Line: *Line)
2882 : Line->Tokens.back().Tok;
2883 assert(Tok);
2884 if (Tok->BraceCount < 0) {
2885 assert(Tok->BraceCount == -1);
2886 Tok = nullptr;
2887 } else {
2888 Tok->BraceCount = -1;
2889 }
2890 }
2891
2892 addUnwrappedLine();
2893 ++Line->Level;
2894 ++Line->UnbracedBodyLevel;
2895 parseStructuralElement();
2896 --Line->UnbracedBodyLevel;
2897
2898 if (Tok) {
2899 assert(!Line->InPPDirective);
2900 Tok = nullptr;
2901 for (const auto &L : llvm::reverse(C&: *CurrentLines)) {
2902 if (!L.InPPDirective && getLastNonComment(Line: L)) {
2903 Tok = L.Tokens.back().Tok;
2904 break;
2905 }
2906 }
2907 assert(Tok);
2908 ++Tok->BraceCount;
2909 }
2910
2911 if (CheckEOF && eof())
2912 addUnwrappedLine();
2913
2914 --Line->Level;
2915}
2916
2917static void markOptionalBraces(FormatToken *LeftBrace) {
2918 if (!LeftBrace)
2919 return;
2920
2921 assert(LeftBrace->is(tok::l_brace));
2922
2923 FormatToken *RightBrace = LeftBrace->MatchingParen;
2924 if (!RightBrace) {
2925 assert(!LeftBrace->Optional);
2926 return;
2927 }
2928
2929 assert(RightBrace->is(tok::r_brace));
2930 assert(RightBrace->MatchingParen == LeftBrace);
2931 assert(LeftBrace->Optional == RightBrace->Optional);
2932
2933 LeftBrace->Optional = true;
2934 RightBrace->Optional = true;
2935}
2936
2937void UnwrappedLineParser::handleAttributes() {
2938 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2939 if (FormatTok->isAttribute())
2940 nextToken();
2941 else if (FormatTok->is(Kind: tok::l_square))
2942 handleCppAttributes();
2943}
2944
2945bool UnwrappedLineParser::handleCppAttributes() {
2946 // Handle [[likely]] / [[unlikely]] attributes.
2947 assert(FormatTok->is(tok::l_square));
2948 if (!tryToParseSimpleAttribute())
2949 return false;
2950 parseSquare();
2951 return true;
2952}
2953
2954/// Returns whether \c Tok begins a block.
2955bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2956 // FIXME: rename the function or make
2957 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2958 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2959 : Tok.is(Kind: tok::l_brace);
2960}
2961
2962FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2963 bool KeepBraces,
2964 bool IsVerilogAssert) {
2965 assert((FormatTok->is(tok::kw_if) ||
2966 (Style.isVerilog() &&
2967 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2968 Keywords.kw_assume, Keywords.kw_cover))) &&
2969 "'if' expected");
2970 nextToken();
2971
2972 if (IsVerilogAssert) {
2973 // Handle `assert #0` and `assert final`.
2974 if (FormatTok->is(II: Keywords.kw_verilogHash)) {
2975 nextToken();
2976 if (FormatTok->is(Kind: tok::numeric_constant))
2977 nextToken();
2978 } else if (FormatTok->isOneOf(K1: Keywords.kw_final, K2: Keywords.kw_property,
2979 Ks: Keywords.kw_sequence)) {
2980 nextToken();
2981 }
2982 }
2983
2984 // TableGen's if statement has the form of `if <cond> then { ... }`.
2985 if (Style.isTableGen()) {
2986 while (!eof() && FormatTok->isNot(Kind: Keywords.kw_then)) {
2987 // Simply skip until then. This range only contains a value.
2988 nextToken();
2989 }
2990 }
2991
2992 // Handle `if !consteval`.
2993 if (FormatTok->is(Kind: tok::exclaim))
2994 nextToken();
2995
2996 bool KeepIfBraces = true;
2997 if (FormatTok->is(Kind: tok::kw_consteval)) {
2998 nextToken();
2999 } else {
3000 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
3001 if (FormatTok->isOneOf(K1: tok::kw_constexpr, K2: tok::identifier))
3002 nextToken();
3003 if (FormatTok->is(Kind: tok::l_paren)) {
3004 FormatTok->setFinalizedType(TT_ConditionLParen);
3005 parseParens();
3006 }
3007 }
3008 handleAttributes();
3009 // The then action is optional in Verilog assert statements.
3010 if (IsVerilogAssert && FormatTok->is(Kind: tok::semi)) {
3011 nextToken();
3012 addUnwrappedLine();
3013 return nullptr;
3014 }
3015
3016 bool NeedsUnwrappedLine = false;
3017 keepAncestorBraces();
3018
3019 FormatToken *IfLeftBrace = nullptr;
3020 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
3021
3022 if (isBlockBegin(Tok: *FormatTok)) {
3023 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3024 IfLeftBrace = FormatTok;
3025 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3026 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3027 /*MunchSemi=*/true, KeepBraces: KeepIfBraces, IfKind: &IfBlockKind);
3028 setPreviousRBraceType(TT_ControlStatementRBrace);
3029 if (Style.BraceWrapping.BeforeElse)
3030 addUnwrappedLine();
3031 else
3032 NeedsUnwrappedLine = true;
3033 } else if (IsVerilogAssert && FormatTok->is(Kind: tok::kw_else)) {
3034 addUnwrappedLine();
3035 } else {
3036 parseUnbracedBody();
3037 }
3038
3039 if (Style.RemoveBracesLLVM) {
3040 assert(!NestedTooDeep.empty());
3041 KeepIfBraces = KeepIfBraces ||
3042 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
3043 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
3044 IfBlockKind == IfStmtKind::IfElseIf;
3045 }
3046
3047 bool KeepElseBraces = KeepIfBraces;
3048 FormatToken *ElseLeftBrace = nullptr;
3049 IfStmtKind Kind = IfStmtKind::IfOnly;
3050
3051 if (FormatTok->is(Kind: tok::kw_else)) {
3052 if (Style.RemoveBracesLLVM) {
3053 NestedTooDeep.back() = false;
3054 Kind = IfStmtKind::IfElse;
3055 }
3056 nextToken();
3057 handleAttributes();
3058 if (isBlockBegin(Tok: *FormatTok)) {
3059 const bool FollowedByIf = Tokens->peekNextToken()->is(Kind: tok::kw_if);
3060 FormatTok->setFinalizedType(TT_ElseLBrace);
3061 ElseLeftBrace = FormatTok;
3062 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3063 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
3064 FormatToken *IfLBrace =
3065 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3066 /*MunchSemi=*/true, KeepBraces: KeepElseBraces, IfKind: &ElseBlockKind);
3067 setPreviousRBraceType(TT_ElseRBrace);
3068 if (FormatTok->is(Kind: tok::kw_else)) {
3069 KeepElseBraces = KeepElseBraces ||
3070 ElseBlockKind == IfStmtKind::IfOnly ||
3071 ElseBlockKind == IfStmtKind::IfElseIf;
3072 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
3073 KeepElseBraces = true;
3074 assert(ElseLeftBrace->MatchingParen);
3075 markOptionalBraces(LeftBrace: ElseLeftBrace);
3076 }
3077 addUnwrappedLine();
3078 } else if (!IsVerilogAssert && FormatTok->is(Kind: tok::kw_if)) {
3079 const FormatToken *Previous = Tokens->getPreviousToken();
3080 assert(Previous);
3081 const bool IsPrecededByComment = Previous->is(Kind: tok::comment);
3082 if (IsPrecededByComment) {
3083 addUnwrappedLine();
3084 ++Line->Level;
3085 }
3086 bool TooDeep = true;
3087 if (Style.RemoveBracesLLVM) {
3088 Kind = IfStmtKind::IfElseIf;
3089 TooDeep = NestedTooDeep.pop_back_val();
3090 }
3091 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepBraces: KeepIfBraces);
3092 if (Style.RemoveBracesLLVM)
3093 NestedTooDeep.push_back(Elt: TooDeep);
3094 if (IsPrecededByComment)
3095 --Line->Level;
3096 } else {
3097 parseUnbracedBody(/*CheckEOF=*/true);
3098 }
3099 } else {
3100 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
3101 if (NeedsUnwrappedLine)
3102 addUnwrappedLine();
3103 }
3104
3105 if (!Style.RemoveBracesLLVM)
3106 return nullptr;
3107
3108 assert(!NestedTooDeep.empty());
3109 KeepElseBraces = KeepElseBraces ||
3110 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
3111 NestedTooDeep.back();
3112
3113 NestedTooDeep.pop_back();
3114
3115 if (!KeepIfBraces && !KeepElseBraces) {
3116 markOptionalBraces(LeftBrace: IfLeftBrace);
3117 markOptionalBraces(LeftBrace: ElseLeftBrace);
3118 } else if (IfLeftBrace) {
3119 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
3120 if (IfRightBrace) {
3121 assert(IfRightBrace->MatchingParen == IfLeftBrace);
3122 assert(!IfLeftBrace->Optional);
3123 assert(!IfRightBrace->Optional);
3124 IfLeftBrace->MatchingParen = nullptr;
3125 IfRightBrace->MatchingParen = nullptr;
3126 }
3127 }
3128
3129 if (IfKind)
3130 *IfKind = Kind;
3131
3132 return IfLeftBrace;
3133}
3134
3135void UnwrappedLineParser::parseTryCatch() {
3136 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
3137 nextToken();
3138 bool NeedsUnwrappedLine = false;
3139 bool HasCtorInitializer = false;
3140 if (FormatTok->is(Kind: tok::colon)) {
3141 auto *Colon = FormatTok;
3142 // We are in a function try block, what comes is an initializer list.
3143 nextToken();
3144 if (FormatTok->is(Kind: tok::identifier)) {
3145 HasCtorInitializer = true;
3146 Colon->setFinalizedType(TT_CtorInitializerColon);
3147 }
3148
3149 // In case identifiers were removed by clang-tidy, what might follow is
3150 // multiple commas in sequence - before the first identifier.
3151 while (FormatTok->is(Kind: tok::comma))
3152 nextToken();
3153
3154 while (FormatTok->is(Kind: tok::identifier)) {
3155 nextToken();
3156 if (FormatTok->is(Kind: tok::l_paren)) {
3157 parseParens();
3158 } else if (FormatTok->is(Kind: tok::l_brace)) {
3159 nextToken();
3160 parseBracedList();
3161 }
3162
3163 // In case identifiers were removed by clang-tidy, what might follow is
3164 // multiple commas in sequence - after the first identifier.
3165 while (FormatTok->is(Kind: tok::comma))
3166 nextToken();
3167 }
3168 }
3169 // Parse try with resource.
3170 if (Style.isJava() && FormatTok->is(Kind: tok::l_paren))
3171 parseParens();
3172
3173 keepAncestorBraces();
3174
3175 if (FormatTok->is(Kind: tok::l_brace)) {
3176 if (HasCtorInitializer)
3177 FormatTok->setFinalizedType(TT_FunctionLBrace);
3178 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3179 parseBlock();
3180 if (Style.BraceWrapping.BeforeCatch)
3181 addUnwrappedLine();
3182 else
3183 NeedsUnwrappedLine = true;
3184 } else if (FormatTok->isNot(Kind: tok::kw_catch)) {
3185 // The C++ standard requires a compound-statement after a try.
3186 // If there's none, we try to assume there's a structuralElement
3187 // and try to continue.
3188 addUnwrappedLine();
3189 ++Line->Level;
3190 parseStructuralElement();
3191 --Line->Level;
3192 }
3193 for (bool SeenCatch = false;;) {
3194 if (FormatTok->is(Kind: tok::at))
3195 nextToken();
3196 if (FormatTok->isNoneOf(Ks: tok::kw_catch, Ks: Keywords.kw___except,
3197 Ks: tok::kw___finally, Ks: tok::objc_catch,
3198 Ks: tok::objc_finally) &&
3199 !((Style.isJava() || Style.isJavaScript()) &&
3200 FormatTok->is(II: Keywords.kw_finally))) {
3201 break;
3202 }
3203 if (FormatTok->is(Kind: tok::kw_catch))
3204 SeenCatch = true;
3205 nextToken();
3206 while (FormatTok->isNot(Kind: tok::l_brace)) {
3207 if (FormatTok->is(Kind: tok::l_paren)) {
3208 parseParens();
3209 continue;
3210 }
3211 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::r_brace) || eof()) {
3212 if (Style.RemoveBracesLLVM)
3213 NestedTooDeep.pop_back();
3214 return;
3215 }
3216 nextToken();
3217 }
3218 if (SeenCatch) {
3219 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3220 SeenCatch = false;
3221 }
3222 NeedsUnwrappedLine = false;
3223 Line->MustBeDeclaration = false;
3224 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3225 parseBlock();
3226 if (Style.BraceWrapping.BeforeCatch)
3227 addUnwrappedLine();
3228 else
3229 NeedsUnwrappedLine = true;
3230 }
3231
3232 if (Style.RemoveBracesLLVM)
3233 NestedTooDeep.pop_back();
3234
3235 if (NeedsUnwrappedLine)
3236 addUnwrappedLine();
3237}
3238
3239void UnwrappedLineParser::parseNamespaceOrExportBlock(unsigned AddLevels) {
3240 bool ManageWhitesmithsBraces =
3241 AddLevels == 0u && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3242
3243 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3244 // the whole block.
3245 if (ManageWhitesmithsBraces)
3246 ++Line->Level;
3247
3248 // Munch the semicolon after the block. This is more common than one would
3249 // think. Putting the semicolon into its own line is very ugly.
3250 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3251 /*KeepBraces=*/true, /*IfKind=*/nullptr, UnindentWhitesmithsBraces: ManageWhitesmithsBraces);
3252
3253 addUnwrappedLine(AdjustLevel: AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3254
3255 if (ManageWhitesmithsBraces)
3256 --Line->Level;
3257}
3258
3259void UnwrappedLineParser::parseNamespace() {
3260 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3261 "'namespace' expected");
3262
3263 const FormatToken &InitialToken = *FormatTok;
3264 nextToken();
3265 if (InitialToken.is(TT: TT_NamespaceMacro)) {
3266 parseParens();
3267 } else {
3268 while (FormatTok->isOneOf(K1: tok::identifier, K2: tok::coloncolon, Ks: tok::kw_inline,
3269 Ks: tok::l_square, Ks: tok::period, Ks: tok::l_paren) ||
3270 (Style.isCSharp() && FormatTok->is(Kind: tok::kw_union))) {
3271 if (FormatTok->is(Kind: tok::l_square))
3272 parseSquare();
3273 else if (FormatTok->is(Kind: tok::l_paren))
3274 parseParens();
3275 else
3276 nextToken();
3277 }
3278 }
3279 if (FormatTok->is(Kind: tok::l_brace)) {
3280 FormatTok->setFinalizedType(TT_NamespaceLBrace);
3281
3282 if (ShouldBreakBeforeBrace(Style, InitialToken,
3283 IsEmptyBlock: Tokens->peekNextToken()->is(Kind: tok::r_brace))) {
3284 addUnwrappedLine();
3285 }
3286
3287 unsigned AddLevels =
3288 Style.NamespaceIndentation == FormatStyle::NI_All ||
3289 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3290 DeclarationScopeStack.size() > 1)
3291 ? 1u
3292 : 0u;
3293 parseNamespaceOrExportBlock(AddLevels);
3294 }
3295 // FIXME: Add error handling.
3296}
3297
3298void UnwrappedLineParser::parseCppExportBlock() {
3299 parseNamespaceOrExportBlock(/*AddLevels=*/Style.IndentExportBlock ? 1 : 0);
3300}
3301
3302void UnwrappedLineParser::parseNew() {
3303 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3304 nextToken();
3305
3306 if (Style.isCSharp()) {
3307 do {
3308 // Handle constructor invocation, e.g. `new(field: value)`.
3309 if (FormatTok->is(Kind: tok::l_paren))
3310 parseParens();
3311
3312 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3313 if (FormatTok->is(Kind: tok::l_brace))
3314 parseBracedList();
3315
3316 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::comma))
3317 return;
3318
3319 nextToken();
3320 } while (!eof());
3321 }
3322
3323 if (!Style.isJava())
3324 return;
3325
3326 // In Java, we can parse everything up to the parens, which aren't optional.
3327 do {
3328 // There should not be a ;, { or } before the new's open paren.
3329 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::r_brace))
3330 return;
3331
3332 // Consume the parens.
3333 if (FormatTok->is(Kind: tok::l_paren)) {
3334 parseParens();
3335
3336 // If there is a class body of an anonymous class, consume that as child.
3337 if (FormatTok->is(Kind: tok::l_brace))
3338 parseChildBlock();
3339 return;
3340 }
3341 nextToken();
3342 } while (!eof());
3343}
3344
3345void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3346 keepAncestorBraces();
3347
3348 if (isBlockBegin(Tok: *FormatTok)) {
3349 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3350 FormatToken *LeftBrace = FormatTok;
3351 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3352 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3353 /*MunchSemi=*/true, KeepBraces);
3354 setPreviousRBraceType(TT_ControlStatementRBrace);
3355 if (!KeepBraces) {
3356 assert(!NestedTooDeep.empty());
3357 if (!NestedTooDeep.back())
3358 markOptionalBraces(LeftBrace);
3359 }
3360 if (WrapRightBrace)
3361 addUnwrappedLine();
3362 } else {
3363 parseUnbracedBody();
3364 }
3365
3366 if (!KeepBraces)
3367 NestedTooDeep.pop_back();
3368}
3369
3370void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3371 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3372 (Style.isVerilog() &&
3373 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3374 Keywords.kw_always_ff, Keywords.kw_always_latch,
3375 Keywords.kw_final, Keywords.kw_initial,
3376 Keywords.kw_foreach, Keywords.kw_forever,
3377 Keywords.kw_repeat))) &&
3378 "'for', 'while' or foreach macro expected");
3379 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3380 FormatTok->isNoneOf(Ks: tok::kw_for, Ks: tok::kw_while);
3381
3382 nextToken();
3383 // JS' for await ( ...
3384 if (Style.isJavaScript() && FormatTok->is(II: Keywords.kw_await))
3385 nextToken();
3386 if (IsCpp && FormatTok->is(Kind: tok::kw_co_await))
3387 nextToken();
3388 if (HasParens && FormatTok->is(Kind: tok::l_paren)) {
3389 // The type is only set for Verilog basically because we were afraid to
3390 // change the existing behavior for loops. See the discussion on D121756 for
3391 // details.
3392 if (Style.isVerilog())
3393 FormatTok->setFinalizedType(TT_ConditionLParen);
3394 parseParens();
3395 }
3396
3397 if (Style.isVerilog()) {
3398 // Event control.
3399 parseVerilogSensitivityList();
3400 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(Kind: tok::semi) &&
3401 Tokens->getPreviousToken()->is(Kind: tok::r_paren)) {
3402 nextToken();
3403 addUnwrappedLine();
3404 return;
3405 }
3406
3407 handleAttributes();
3408 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3409}
3410
3411void UnwrappedLineParser::parseDoWhile() {
3412 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3413 nextToken();
3414
3415 parseLoopBody(/*KeepBraces=*/true, WrapRightBrace: Style.BraceWrapping.BeforeWhile);
3416
3417 // FIXME: Add error handling.
3418 if (FormatTok->isNot(Kind: tok::kw_while)) {
3419 addUnwrappedLine();
3420 return;
3421 }
3422
3423 FormatTok->setFinalizedType(TT_DoWhile);
3424
3425 // If in Whitesmiths mode, the line with the while() needs to be indented
3426 // to the same level as the block.
3427 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3428 ++Line->Level;
3429
3430 nextToken();
3431 parseStructuralElement();
3432}
3433
3434void UnwrappedLineParser::parseLabel(
3435 FormatStyle::IndentGotoLabelStyle IndentGotoLabels) {
3436 const bool IsGotoLabel = FormatTok->is(TT: TT_GotoLabelColon);
3437 nextToken();
3438 unsigned OldLineLevel = Line->Level;
3439
3440 switch (IndentGotoLabels) {
3441 case FormatStyle::IGLS_NoIndent:
3442 Line->Level = 0;
3443 break;
3444 case FormatStyle::IGLS_OuterIndent:
3445 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3446 --Line->Level;
3447 break;
3448 case FormatStyle::IGLS_HalfIndent:
3449 case FormatStyle::IGLS_InnerIndent:
3450 break;
3451 }
3452
3453 if (!IsGotoLabel && !Style.IndentCaseBlocks &&
3454 CommentsBeforeNextToken.empty() && FormatTok->is(Kind: tok::l_brace)) {
3455 CompoundStatementIndenter Indenter(this, Line->Level,
3456 Style.BraceWrapping.AfterCaseLabel,
3457 Style.BraceWrapping.IndentBraces);
3458 parseBlock();
3459 if (FormatTok->is(Kind: tok::kw_break)) {
3460 if (Style.BraceWrapping.AfterControlStatement ==
3461 FormatStyle::BWACS_Always) {
3462 addUnwrappedLine();
3463 if (!Style.IndentCaseBlocks &&
3464 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3465 ++Line->Level;
3466 }
3467 }
3468 parseStructuralElement();
3469 }
3470 addUnwrappedLine();
3471 } else {
3472 if (FormatTok->is(Kind: tok::semi))
3473 nextToken();
3474 addUnwrappedLine();
3475 }
3476 Line->Level = OldLineLevel;
3477 if (FormatTok->isNot(Kind: tok::l_brace)) {
3478 parseStructuralElement();
3479 addUnwrappedLine();
3480 }
3481}
3482
3483void UnwrappedLineParser::parseCaseLabel() {
3484 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3485 auto *Case = FormatTok;
3486
3487 // FIXME: fix handling of complex expressions here.
3488 do {
3489 nextToken();
3490 if (FormatTok->is(Kind: tok::colon)) {
3491 FormatTok->setFinalizedType(TT_CaseLabelColon);
3492 break;
3493 }
3494 if (Style.isJava() && FormatTok->is(Kind: tok::arrow)) {
3495 FormatTok->setFinalizedType(TT_CaseLabelArrow);
3496 Case->setFinalizedType(TT_SwitchExpressionLabel);
3497 break;
3498 }
3499 } while (!eof());
3500 parseLabel();
3501}
3502
3503void UnwrappedLineParser::parseSwitch(bool IsExpr) {
3504 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3505 nextToken();
3506 if (FormatTok->is(Kind: tok::l_paren))
3507 parseParens();
3508
3509 keepAncestorBraces();
3510
3511 if (FormatTok->is(Kind: tok::l_brace)) {
3512 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3513 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace
3514 : TT_ControlStatementLBrace);
3515 if (IsExpr)
3516 parseChildBlock();
3517 else
3518 parseBlock();
3519 setPreviousRBraceType(TT_ControlStatementRBrace);
3520 if (!IsExpr)
3521 addUnwrappedLine();
3522 } else {
3523 addUnwrappedLine();
3524 ++Line->Level;
3525 parseStructuralElement();
3526 --Line->Level;
3527 }
3528
3529 if (Style.RemoveBracesLLVM)
3530 NestedTooDeep.pop_back();
3531}
3532
3533void UnwrappedLineParser::parseAccessSpecifier() {
3534 nextToken();
3535 // Understand Qt's slots.
3536 if (FormatTok->isOneOf(K1: Keywords.kw_slots, K2: Keywords.kw_qslots))
3537 nextToken();
3538 // Otherwise, we don't know what it is, and we'd better keep the next token.
3539 if (FormatTok->is(Kind: tok::colon))
3540 nextToken();
3541 addUnwrappedLine();
3542}
3543
3544/// Parses a requires, decides if it is a clause or an expression.
3545/// \pre The current token has to be the requires keyword.
3546/// \returns true if it parsed a clause.
3547bool UnwrappedLineParser::parseRequires(bool SeenEqual) {
3548 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3549
3550 // We try to guess if it is a requires clause, or a requires expression. For
3551 // that we first check the next token.
3552 switch (Tokens->peekNextToken(/*SkipComment=*/true)->Tok.getKind()) {
3553 case tok::l_brace:
3554 // This can only be an expression, never a clause.
3555 parseRequiresExpression();
3556 return false;
3557 case tok::l_paren:
3558 // Clauses and expression can start with a paren, it's unclear what we have.
3559 break;
3560 default:
3561 // All other tokens can only be a clause.
3562 parseRequiresClause();
3563 return true;
3564 }
3565
3566 // Looking forward we would have to decide if there are function declaration
3567 // like arguments to the requires expression:
3568 // requires (T t) {
3569 // Or there is a constraint expression for the requires clause:
3570 // requires (C<T> && ...
3571
3572 // But first let's look behind.
3573 auto *PreviousNonComment = FormatTok->getPreviousNonComment();
3574
3575 if (!PreviousNonComment ||
3576 PreviousNonComment->is(TT: TT_RequiresExpressionLBrace)) {
3577 // If there is no token, or an expression left brace, we are a requires
3578 // clause within a requires expression.
3579 parseRequiresClause();
3580 return true;
3581 }
3582
3583 switch (PreviousNonComment->Tok.getKind()) {
3584 case tok::greater:
3585 case tok::r_paren:
3586 case tok::kw_noexcept:
3587 case tok::kw_const:
3588 case tok::star:
3589 case tok::amp:
3590 // This is a requires clause.
3591 parseRequiresClause();
3592 return true;
3593 case tok::ampamp: {
3594 // This can be either:
3595 // if (... && requires (T t) ...)
3596 // Or
3597 // void member(...) && requires (C<T> ...
3598 // We check the one token before that for a const:
3599 // void member(...) const && requires (C<T> ...
3600 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3601 if ((PrevPrev && PrevPrev->is(Kind: tok::kw_const)) || !SeenEqual) {
3602 parseRequiresClause();
3603 return true;
3604 }
3605 break;
3606 }
3607 default:
3608 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) {
3609 // This is a requires clause.
3610 parseRequiresClause();
3611 return true;
3612 }
3613 // It's an expression.
3614 parseRequiresExpression();
3615 return false;
3616 }
3617
3618 // Now we look forward and try to check if the paren content is a parameter
3619 // list. The parameters can be cv-qualified and contain references or
3620 // pointers.
3621 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3622 // of stuff: typename, const, *, &, &&, ::, identifiers.
3623
3624 unsigned StoredPosition = Tokens->getPosition();
3625 FormatToken *NextToken = Tokens->getNextToken();
3626 int Lookahead = 0;
3627 auto PeekNext = [&Lookahead, &NextToken, this] {
3628 ++Lookahead;
3629 NextToken = Tokens->getNextToken();
3630 };
3631
3632 bool FoundType = false;
3633 bool LastWasColonColon = false;
3634 int OpenAngles = 0;
3635
3636 for (; Lookahead < 50; PeekNext()) {
3637 switch (NextToken->Tok.getKind()) {
3638 case tok::kw_volatile:
3639 case tok::kw_const:
3640 case tok::comma:
3641 if (OpenAngles == 0) {
3642 FormatTok = Tokens->setPosition(StoredPosition);
3643 parseRequiresExpression();
3644 return false;
3645 }
3646 break;
3647 case tok::eof:
3648 // Break out of the loop.
3649 Lookahead = 50;
3650 break;
3651 case tok::coloncolon:
3652 LastWasColonColon = true;
3653 break;
3654 case tok::kw_decltype:
3655 case tok::identifier:
3656 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3657 FormatTok = Tokens->setPosition(StoredPosition);
3658 parseRequiresExpression();
3659 return false;
3660 }
3661 FoundType = true;
3662 LastWasColonColon = false;
3663 break;
3664 case tok::less:
3665 ++OpenAngles;
3666 break;
3667 case tok::greater:
3668 --OpenAngles;
3669 break;
3670 default:
3671 if (NextToken->isTypeName(LangOpts)) {
3672 FormatTok = Tokens->setPosition(StoredPosition);
3673 parseRequiresExpression();
3674 return false;
3675 }
3676 break;
3677 }
3678 }
3679 // This seems to be a complicated expression, just assume it's a clause.
3680 FormatTok = Tokens->setPosition(StoredPosition);
3681 parseRequiresClause();
3682 return true;
3683}
3684
3685/// Parses a requires clause.
3686/// \sa parseRequiresExpression
3687///
3688/// Returns if it either has finished parsing the clause, or it detects, that
3689/// the clause is incorrect.
3690void UnwrappedLineParser::parseRequiresClause() {
3691 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3692
3693 // If there is no previous token, we are within a requires expression,
3694 // otherwise we will always have the template or function declaration in front
3695 // of it.
3696 bool InRequiresExpression =
3697 !FormatTok->Previous ||
3698 FormatTok->Previous->is(TT: TT_RequiresExpressionLBrace);
3699
3700 FormatTok->setFinalizedType(InRequiresExpression
3701 ? TT_RequiresClauseInARequiresExpression
3702 : TT_RequiresClause);
3703 nextToken();
3704
3705 // NOTE: parseConstraintExpression is only ever called from this function.
3706 // It could be inlined into here.
3707 parseConstraintExpression();
3708
3709 if (!InRequiresExpression && FormatTok->Previous)
3710 FormatTok->Previous->ClosesRequiresClause = true;
3711}
3712
3713/// Parses a requires expression.
3714/// \sa parseRequiresClause
3715///
3716/// Returns if it either has finished parsing the expression, or it detects,
3717/// that the expression is incorrect.
3718void UnwrappedLineParser::parseRequiresExpression() {
3719 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3720
3721 FormatTok->setFinalizedType(TT_RequiresExpression);
3722 nextToken();
3723
3724 if (FormatTok->is(Kind: tok::l_paren)) {
3725 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3726 parseParens();
3727 }
3728
3729 if (FormatTok->is(Kind: tok::l_brace)) {
3730 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3731 parseChildBlock();
3732 }
3733}
3734
3735/// Parses a constraint expression.
3736///
3737/// This is the body of a requires clause. It returns, when the parsing is
3738/// complete, or the expression is incorrect.
3739void UnwrappedLineParser::parseConstraintExpression() {
3740 // The special handling for lambdas is needed since tryToParseLambda() eats a
3741 // token and if a requires expression is the last part of a requires clause
3742 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3743 // not set on the correct token. Thus we need to be aware if we even expect a
3744 // lambda to be possible.
3745 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3746 bool LambdaNextTimeAllowed = true;
3747
3748 // Within lambda declarations, it is permitted to put a requires clause after
3749 // its template parameter list, which would place the requires clause right
3750 // before the parentheses of the parameters of the lambda declaration. Thus,
3751 // we track if we expect to see grouping parentheses at all.
3752 // Without this check, `requires foo<T> (T t)` in the below example would be
3753 // seen as the whole requires clause, accidentally eating the parameters of
3754 // the lambda.
3755 // [&]<typename T> requires foo<T> (T t) { ... };
3756 bool TopLevelParensAllowed = true;
3757
3758 do {
3759 bool LambdaThisTimeAllowed = std::exchange(obj&: LambdaNextTimeAllowed, new_val: false);
3760
3761 switch (FormatTok->Tok.getKind()) {
3762 case tok::kw_requires:
3763 parseRequiresExpression();
3764 break;
3765
3766 case tok::l_paren:
3767 if (!TopLevelParensAllowed)
3768 return;
3769 parseParens(/*AmpAmpTokenType=*/StarAndAmpTokenType: TT_BinaryOperator);
3770 TopLevelParensAllowed = false;
3771 break;
3772
3773 case tok::l_square:
3774 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3775 return;
3776 break;
3777
3778 case tok::kw_const:
3779 case tok::semi:
3780 case tok::kw_class:
3781 case tok::kw_struct:
3782 case tok::kw_union:
3783 return;
3784
3785 case tok::l_brace:
3786 // Potential function body.
3787 return;
3788
3789 case tok::ampamp:
3790 case tok::pipepipe:
3791 FormatTok->setFinalizedType(TT_BinaryOperator);
3792 nextToken();
3793 LambdaNextTimeAllowed = true;
3794 TopLevelParensAllowed = true;
3795 break;
3796
3797 case tok::comma:
3798 case tok::comment:
3799 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3800 nextToken();
3801 break;
3802
3803 case tok::kw_sizeof:
3804 case tok::greater:
3805 case tok::greaterequal:
3806 case tok::greatergreater:
3807 case tok::less:
3808 case tok::lessequal:
3809 case tok::lessless:
3810 case tok::equalequal:
3811 case tok::exclaim:
3812 case tok::exclaimequal:
3813 case tok::plus:
3814 case tok::minus:
3815 case tok::star:
3816 case tok::slash:
3817 LambdaNextTimeAllowed = true;
3818 TopLevelParensAllowed = true;
3819 // Just eat them.
3820 nextToken();
3821 break;
3822
3823 case tok::numeric_constant:
3824 case tok::coloncolon:
3825 case tok::kw_true:
3826 case tok::kw_false:
3827 TopLevelParensAllowed = false;
3828 // Just eat them.
3829 nextToken();
3830 break;
3831
3832 case tok::kw_static_cast:
3833 case tok::kw_const_cast:
3834 case tok::kw_reinterpret_cast:
3835 case tok::kw_dynamic_cast:
3836 nextToken();
3837 if (FormatTok->isNot(Kind: tok::less))
3838 return;
3839
3840 nextToken();
3841 parseBracedList(/*IsAngleBracket=*/true);
3842 break;
3843
3844 default:
3845 if (!FormatTok->Tok.getIdentifierInfo()) {
3846 // Identifiers are part of the default case, we check for more then
3847 // tok::identifier to handle builtin type traits.
3848 return;
3849 }
3850
3851 // We need to differentiate identifiers for a template deduction guide,
3852 // variables, or function return types (the constraint expression has
3853 // ended before that), and basically all other cases. But it's easier to
3854 // check the other way around.
3855 assert(FormatTok->Previous);
3856 switch (FormatTok->Previous->Tok.getKind()) {
3857 case tok::coloncolon: // Nested identifier.
3858 case tok::ampamp: // Start of a function or variable for the
3859 case tok::pipepipe: // constraint expression. (binary)
3860 case tok::exclaim: // The same as above, but unary.
3861 case tok::kw_requires: // Initial identifier of a requires clause.
3862 case tok::equal: // Initial identifier of a concept declaration.
3863 break;
3864 default:
3865 return;
3866 }
3867
3868 // Read identifier with optional template declaration.
3869 nextToken();
3870 if (FormatTok->is(Kind: tok::less)) {
3871 nextToken();
3872 parseBracedList(/*IsAngleBracket=*/true);
3873 }
3874 TopLevelParensAllowed = false;
3875 break;
3876 }
3877 } while (!eof());
3878}
3879
3880bool UnwrappedLineParser::parseEnum() {
3881 const FormatToken &InitialToken = *FormatTok;
3882
3883 // Won't be 'enum' for NS_ENUMs.
3884 if (FormatTok->is(Kind: tok::kw_enum))
3885 nextToken();
3886
3887 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3888 // declarations. An "enum" keyword followed by a colon would be a syntax
3889 // error and thus assume it is just an identifier.
3890 if (Style.isJavaScript() && FormatTok->isOneOf(K1: tok::colon, K2: tok::question))
3891 return false;
3892
3893 // In protobuf, "enum" can be used as a field name.
3894 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(Kind: tok::equal))
3895 return false;
3896
3897 if (IsCpp) {
3898 // Eat up enum class ...
3899 if (FormatTok->isOneOf(K1: tok::kw_class, K2: tok::kw_struct))
3900 nextToken();
3901 while (FormatTok->is(Kind: tok::l_square))
3902 if (!handleCppAttributes())
3903 return false;
3904 }
3905
3906 while (FormatTok->Tok.getIdentifierInfo() ||
3907 FormatTok->isOneOf(K1: tok::colon, K2: tok::coloncolon, Ks: tok::less,
3908 Ks: tok::greater, Ks: tok::comma, Ks: tok::question,
3909 Ks: tok::l_square)) {
3910 if (FormatTok->is(Kind: tok::colon))
3911 FormatTok->setFinalizedType(TT_EnumUnderlyingTypeColon);
3912 if (Style.isVerilog()) {
3913 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3914 nextToken();
3915 // In Verilog the base type can have dimensions.
3916 while (FormatTok->is(Kind: tok::l_square))
3917 parseSquare();
3918 } else {
3919 nextToken();
3920 }
3921 // We can have macros or attributes in between 'enum' and the enum name.
3922 if (FormatTok->is(Kind: tok::l_paren))
3923 parseParens();
3924 if (FormatTok->is(Kind: tok::identifier)) {
3925 nextToken();
3926 // If there are two identifiers in a row, this is likely an elaborate
3927 // return type. In Java, this can be "implements", etc.
3928 if (IsCpp && FormatTok->is(Kind: tok::identifier))
3929 return false;
3930 }
3931 }
3932
3933 // Just a declaration or something is wrong.
3934 if (FormatTok->isNot(Kind: tok::l_brace))
3935 return true;
3936 FormatTok->setFinalizedType(TT_EnumLBrace);
3937 FormatTok->setBlockKind(BK_Block);
3938
3939 if (Style.isJava()) {
3940 // Java enums are different.
3941 parseJavaEnumBody();
3942 return true;
3943 }
3944 if (Style.Language == FormatStyle::LK_Proto) {
3945 parseBlock(/*MustBeDeclaration=*/true);
3946 return true;
3947 }
3948
3949 const bool ManageWhitesmithsBraces =
3950 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3951
3952 if (!Style.AllowShortEnumsOnASingleLine &&
3953 ShouldBreakBeforeBrace(Style, InitialToken,
3954 IsEmptyBlock: Tokens->peekNextToken()->is(Kind: tok::r_brace))) {
3955 addUnwrappedLine();
3956
3957 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3958 // the whole block.
3959 if (ManageWhitesmithsBraces)
3960 ++Line->Level;
3961 }
3962 // Parse enum body.
3963 nextToken();
3964 if (!Style.AllowShortEnumsOnASingleLine) {
3965 addUnwrappedLine();
3966 if (!ManageWhitesmithsBraces)
3967 ++Line->Level;
3968 }
3969 const auto OpeningLineIndex = CurrentLines->empty()
3970 ? UnwrappedLine::kInvalidIndex
3971 : CurrentLines->size() - 1;
3972 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3973 if (!Style.AllowShortEnumsOnASingleLine && !ManageWhitesmithsBraces)
3974 --Line->Level;
3975 if (HasError) {
3976 if (FormatTok->is(Kind: tok::semi))
3977 nextToken();
3978 addUnwrappedLine();
3979 }
3980 setPreviousRBraceType(TT_EnumRBrace);
3981 if (ManageWhitesmithsBraces)
3982 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
3983 return true;
3984
3985 // There is no addUnwrappedLine() here so that we fall through to parsing a
3986 // structural element afterwards. Thus, in "enum A {} n, m;",
3987 // "} n, m;" will end up in one unwrapped line.
3988}
3989
3990bool UnwrappedLineParser::parseStructLike() {
3991 // parseRecord falls through and does not yet add an unwrapped line as a
3992 // record declaration or definition can start a structural element.
3993 parseRecord();
3994 // This does not apply to Java, JavaScript and C#.
3995 if (Style.isJava() || Style.isJavaScript() || Style.isCSharp()) {
3996 if (FormatTok->is(Kind: tok::semi))
3997 nextToken();
3998 addUnwrappedLine();
3999 return true;
4000 }
4001 return false;
4002}
4003
4004namespace {
4005// A class used to set and restore the Token position when peeking
4006// ahead in the token source.
4007class ScopedTokenPosition {
4008 unsigned StoredPosition;
4009 FormatTokenSource *Tokens;
4010
4011public:
4012 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
4013 assert(Tokens && "Tokens expected to not be null");
4014 StoredPosition = Tokens->getPosition();
4015 }
4016
4017 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
4018};
4019} // namespace
4020
4021// Look to see if we have [[ by looking ahead, if
4022// its not then rewind to the original position.
4023bool UnwrappedLineParser::tryToParseSimpleAttribute() {
4024 ScopedTokenPosition AutoPosition(Tokens);
4025 FormatToken *Tok = Tokens->getNextToken();
4026 // We already read the first [ check for the second.
4027 if (Tok->isNot(Kind: tok::l_square))
4028 return false;
4029 // Double check that the attribute is just something
4030 // fairly simple.
4031 while (Tok->isNot(Kind: tok::eof)) {
4032 if (Tok->is(Kind: tok::r_square))
4033 break;
4034 Tok = Tokens->getNextToken();
4035 }
4036 if (Tok->is(Kind: tok::eof))
4037 return false;
4038 Tok = Tokens->getNextToken();
4039 if (Tok->isNot(Kind: tok::r_square))
4040 return false;
4041 Tok = Tokens->getNextToken();
4042 if (Tok->is(Kind: tok::semi))
4043 return false;
4044 return true;
4045}
4046
4047void UnwrappedLineParser::parseJavaEnumBody() {
4048 assert(FormatTok->is(tok::l_brace));
4049 const FormatToken *OpeningBrace = FormatTok;
4050
4051 // Determine whether the enum is simple, i.e. does not have a semicolon or
4052 // constants with class bodies. Simple enums can be formatted like braced
4053 // lists, contracted to a single line, etc.
4054 unsigned StoredPosition = Tokens->getPosition();
4055 bool IsSimple = true;
4056 FormatToken *Tok = Tokens->getNextToken();
4057 while (Tok->isNot(Kind: tok::eof)) {
4058 if (Tok->is(Kind: tok::r_brace))
4059 break;
4060 if (Tok->isOneOf(K1: tok::l_brace, K2: tok::semi)) {
4061 IsSimple = false;
4062 break;
4063 }
4064 // FIXME: This will also mark enums with braces in the arguments to enum
4065 // constants as "not simple". This is probably fine in practice, though.
4066 Tok = Tokens->getNextToken();
4067 }
4068 FormatTok = Tokens->setPosition(StoredPosition);
4069
4070 if (IsSimple) {
4071 nextToken();
4072 parseBracedList();
4073 addUnwrappedLine();
4074 return;
4075 }
4076
4077 // Parse the body of a more complex enum.
4078 // First add a line for everything up to the "{".
4079 nextToken();
4080 addUnwrappedLine();
4081 ++Line->Level;
4082
4083 // Parse the enum constants.
4084 while (!eof()) {
4085 if (FormatTok->is(Kind: tok::l_brace)) {
4086 // Parse the constant's class body.
4087 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
4088 /*MunchSemi=*/false);
4089 } else if (FormatTok->is(Kind: tok::l_paren)) {
4090 parseParens();
4091 } else if (FormatTok->is(Kind: tok::comma)) {
4092 nextToken();
4093 addUnwrappedLine();
4094 } else if (FormatTok->is(Kind: tok::semi)) {
4095 nextToken();
4096 addUnwrappedLine();
4097 break;
4098 } else if (FormatTok->is(Kind: tok::r_brace)) {
4099 addUnwrappedLine();
4100 break;
4101 } else {
4102 nextToken();
4103 }
4104 }
4105
4106 // Parse the class body after the enum's ";" if any.
4107 parseLevel(OpeningBrace);
4108 nextToken();
4109 --Line->Level;
4110 addUnwrappedLine();
4111}
4112
4113void UnwrappedLineParser::parseRecord(bool ParseAsExpr, bool IsJavaRecord) {
4114 assert(!IsJavaRecord || FormatTok->is(Keywords.kw_record));
4115 const FormatToken &InitialToken = *FormatTok;
4116 nextToken();
4117
4118 FormatToken *ClassName =
4119 IsJavaRecord && FormatTok->is(Kind: tok::identifier) ? FormatTok : nullptr;
4120 bool IsDerived = false;
4121 auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
4122 return Tok->is(Kind: tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
4123 };
4124 // JavaScript/TypeScript supports anonymous classes like:
4125 // a = class extends foo { }
4126 bool JSPastExtendsOrImplements = false;
4127 // The actual identifier can be a nested name specifier, and in macros
4128 // it is often token-pasted.
4129 // An [[attribute]] can be before the identifier.
4130 while (FormatTok->isOneOf(K1: tok::identifier, K2: tok::coloncolon, Ks: tok::hashhash,
4131 Ks: tok::kw_alignas, Ks: tok::l_square) ||
4132 FormatTok->isAttribute() ||
4133 ((Style.isJava() || Style.isJavaScript()) &&
4134 FormatTok->isOneOf(K1: tok::period, K2: tok::comma))) {
4135 if (Style.isJavaScript() &&
4136 FormatTok->isOneOf(K1: Keywords.kw_extends, K2: Keywords.kw_implements)) {
4137 JSPastExtendsOrImplements = true;
4138 // JavaScript/TypeScript supports inline object types in
4139 // extends/implements positions:
4140 // class Foo implements {bar: number} { }
4141 nextToken();
4142 if (FormatTok->is(Kind: tok::l_brace)) {
4143 tryToParseBracedList();
4144 continue;
4145 }
4146 }
4147 if (FormatTok->is(Kind: tok::l_square) && handleCppAttributes())
4148 continue;
4149 auto *Previous = FormatTok;
4150 nextToken();
4151 switch (FormatTok->Tok.getKind()) {
4152 case tok::l_paren:
4153 // We can have macros in between 'class' and the class name.
4154 if (IsJavaRecord || !IsNonMacroIdentifier(Previous) ||
4155 // e.g. `struct macro(a) S { int i; };`
4156 Previous->Previous == &InitialToken) {
4157 parseParens();
4158 }
4159 break;
4160 case tok::coloncolon:
4161 case tok::hashhash:
4162 break;
4163 default:
4164 if (JSPastExtendsOrImplements || ClassName ||
4165 Previous->isNot(Kind: tok::identifier) || Previous->is(TT: TT_AttributeMacro)) {
4166 break;
4167 }
4168 if (const auto Text = Previous->TokenText;
4169 Text.size() == 1 || Text != Text.upper()) {
4170 ClassName = Previous;
4171 }
4172 }
4173 }
4174
4175 auto IsListInitialization = [&] {
4176 if (!ClassName || IsDerived || JSPastExtendsOrImplements)
4177 return false;
4178 assert(FormatTok->is(tok::l_brace));
4179 const auto *Prev = FormatTok->getPreviousNonComment();
4180 assert(Prev);
4181 return Prev != ClassName && Prev->is(Kind: tok::identifier) &&
4182 Prev->isNot(Kind: Keywords.kw_final) && tryToParseBracedList();
4183 };
4184
4185 if (FormatTok->isOneOf(K1: tok::colon, K2: tok::less)) {
4186 int AngleNestingLevel = 0;
4187 do {
4188 if (FormatTok->is(Kind: tok::less))
4189 ++AngleNestingLevel;
4190 else if (FormatTok->is(Kind: tok::greater))
4191 --AngleNestingLevel;
4192
4193 if (AngleNestingLevel == 0) {
4194 if (FormatTok->is(Kind: tok::colon)) {
4195 IsDerived = true;
4196 } else if (!IsDerived && FormatTok->is(Kind: tok::identifier) &&
4197 FormatTok->Previous->is(Kind: tok::coloncolon)) {
4198 ClassName = FormatTok;
4199 } else if (FormatTok->is(Kind: tok::l_paren) &&
4200 IsNonMacroIdentifier(FormatTok->Previous)) {
4201 break;
4202 }
4203 }
4204 if (FormatTok->is(Kind: tok::l_brace)) {
4205 if (AngleNestingLevel == 0 && IsListInitialization())
4206 return;
4207 calculateBraceTypes(/*ExpectClassBody=*/true);
4208 if (!tryToParseBracedList())
4209 break;
4210 }
4211 if (FormatTok->is(Kind: tok::l_square)) {
4212 FormatToken *Previous = FormatTok->Previous;
4213 if (!Previous || (Previous->isNot(Kind: tok::r_paren) &&
4214 !Previous->isTypeOrIdentifier(LangOpts))) {
4215 // Don't try parsing a lambda if we had a closing parenthesis before,
4216 // it was probably a pointer to an array: int (*)[].
4217 if (!tryToParseLambda())
4218 continue;
4219 } else {
4220 parseSquare();
4221 continue;
4222 }
4223 }
4224 if (FormatTok->is(Kind: tok::semi))
4225 return;
4226 if (Style.isCSharp() && FormatTok->is(II: Keywords.kw_where)) {
4227 addUnwrappedLine();
4228 nextToken();
4229 parseCSharpGenericTypeConstraint();
4230 break;
4231 }
4232 nextToken();
4233 } while (!eof());
4234 }
4235
4236 auto GetBraceTypes =
4237 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
4238 switch (RecordTok.Tok.getKind()) {
4239 case tok::kw_class:
4240 return {TT_ClassLBrace, TT_ClassRBrace};
4241 case tok::kw_struct:
4242 return {TT_StructLBrace, TT_StructRBrace};
4243 case tok::kw_union:
4244 return {TT_UnionLBrace, TT_UnionRBrace};
4245 default:
4246 // Useful for e.g. interface.
4247 return {TT_RecordLBrace, TT_RecordRBrace};
4248 }
4249 };
4250 if (FormatTok->is(Kind: tok::l_brace)) {
4251 if (IsListInitialization())
4252 return;
4253 if (ClassName)
4254 ClassName->setFinalizedType(TT_ClassHeadName);
4255 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4256 FormatTok->setFinalizedType(OpenBraceType);
4257 if (ParseAsExpr) {
4258 parseChildBlock();
4259 } else {
4260 if (ShouldBreakBeforeBrace(Style, InitialToken,
4261 IsEmptyBlock: Tokens->peekNextToken()->is(Kind: tok::r_brace),
4262 IsJavaRecord)) {
4263 addUnwrappedLine();
4264 }
4265
4266 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4267 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4268 }
4269 setPreviousRBraceType(ClosingBraceType);
4270 }
4271 // There is no addUnwrappedLine() here so that we fall through to parsing a
4272 // structural element afterwards. Thus, in "class A {} n, m;",
4273 // "} n, m;" will end up in one unwrapped line.
4274}
4275
4276void UnwrappedLineParser::parseObjCMethod() {
4277 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4278 "'(' or identifier expected.");
4279 do {
4280 if (FormatTok->is(Kind: tok::semi)) {
4281 nextToken();
4282 addUnwrappedLine();
4283 return;
4284 } else if (FormatTok->is(Kind: tok::l_brace)) {
4285 if (Style.BraceWrapping.AfterFunction)
4286 addUnwrappedLine();
4287 parseBlock();
4288 addUnwrappedLine();
4289 return;
4290 } else {
4291 nextToken();
4292 }
4293 } while (!eof());
4294}
4295
4296void UnwrappedLineParser::parseObjCProtocolList() {
4297 assert(FormatTok->is(tok::less) && "'<' expected.");
4298 do {
4299 nextToken();
4300 // Early exit in case someone forgot a close angle.
4301 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::objc_end))
4302 return;
4303 } while (!eof() && FormatTok->isNot(Kind: tok::greater));
4304 nextToken(); // Skip '>'.
4305}
4306
4307void UnwrappedLineParser::parseObjCUntilAtEnd() {
4308 do {
4309 if (FormatTok->is(Kind: tok::objc_end)) {
4310 nextToken();
4311 addUnwrappedLine();
4312 break;
4313 }
4314 if (FormatTok->is(Kind: tok::l_brace)) {
4315 parseBlock();
4316 // In ObjC interfaces, nothing should be following the "}".
4317 addUnwrappedLine();
4318 } else if (FormatTok->is(Kind: tok::r_brace)) {
4319 // Ignore stray "}". parseStructuralElement doesn't consume them.
4320 nextToken();
4321 addUnwrappedLine();
4322 } else if (FormatTok->isOneOf(K1: tok::minus, K2: tok::plus)) {
4323 nextToken();
4324 if (FormatTok->isOneOf(K1: tok::l_paren, K2: tok::identifier))
4325 parseObjCMethod();
4326 } else {
4327 parseStructuralElement();
4328 }
4329 } while (!eof());
4330}
4331
4332void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4333 assert(FormatTok->isOneOf(tok::objc_interface, tok::objc_implementation));
4334 nextToken();
4335 nextToken(); // interface name
4336
4337 // @interface can be followed by a lightweight generic
4338 // specialization list, then either a base class or a category.
4339 if (FormatTok->is(Kind: tok::less))
4340 parseObjCLightweightGenerics();
4341 if (FormatTok->is(Kind: tok::colon)) {
4342 nextToken();
4343 nextToken(); // base class name
4344 // The base class can also have lightweight generics applied to it.
4345 if (FormatTok->is(Kind: tok::less))
4346 parseObjCLightweightGenerics();
4347 } else if (FormatTok->is(Kind: tok::l_paren)) {
4348 // Skip category, if present.
4349 parseParens();
4350 }
4351
4352 if (FormatTok->is(Kind: tok::less))
4353 parseObjCProtocolList();
4354
4355 if (FormatTok->is(Kind: tok::l_brace)) {
4356 if (Style.BraceWrapping.AfterObjCDeclaration)
4357 addUnwrappedLine();
4358 parseBlock(/*MustBeDeclaration=*/true);
4359 }
4360
4361 // With instance variables, this puts '}' on its own line. Without instance
4362 // variables, this ends the @interface line.
4363 addUnwrappedLine();
4364
4365 parseObjCUntilAtEnd();
4366}
4367
4368void UnwrappedLineParser::parseObjCLightweightGenerics() {
4369 assert(FormatTok->is(tok::less));
4370 // Unlike protocol lists, generic parameterizations support
4371 // nested angles:
4372 //
4373 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4374 // NSObject <NSCopying, NSSecureCoding>
4375 //
4376 // so we need to count how many open angles we have left.
4377 unsigned NumOpenAngles = 1;
4378 do {
4379 nextToken();
4380 // Early exit in case someone forgot a close angle.
4381 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::objc_end))
4382 break;
4383 if (FormatTok->is(Kind: tok::less)) {
4384 ++NumOpenAngles;
4385 } else if (FormatTok->is(Kind: tok::greater)) {
4386 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4387 --NumOpenAngles;
4388 }
4389 } while (!eof() && NumOpenAngles != 0);
4390 nextToken(); // Skip '>'.
4391}
4392
4393// Returns true for the declaration/definition form of @protocol,
4394// false for the expression form.
4395bool UnwrappedLineParser::parseObjCProtocol() {
4396 assert(FormatTok->is(tok::objc_protocol));
4397 nextToken();
4398
4399 if (FormatTok->is(Kind: tok::l_paren)) {
4400 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4401 return false;
4402 }
4403
4404 // The definition/declaration form,
4405 // @protocol Foo
4406 // - (int)someMethod;
4407 // @end
4408
4409 nextToken(); // protocol name
4410
4411 if (FormatTok->is(Kind: tok::less))
4412 parseObjCProtocolList();
4413
4414 // Check for protocol declaration.
4415 if (FormatTok->is(Kind: tok::semi)) {
4416 nextToken();
4417 addUnwrappedLine();
4418 return true;
4419 }
4420
4421 addUnwrappedLine();
4422 parseObjCUntilAtEnd();
4423 return true;
4424}
4425
4426void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4427 bool IsImport = FormatTok->is(II: Keywords.kw_import);
4428 assert(IsImport || FormatTok->is(tok::kw_export));
4429 nextToken();
4430
4431 // Consume the "default" in "export default class/function".
4432 if (FormatTok->is(Kind: tok::kw_default))
4433 nextToken();
4434
4435 // Consume "async function", "function" and "default function", so that these
4436 // get parsed as free-standing JS functions, i.e. do not require a trailing
4437 // semicolon.
4438 if (FormatTok->is(II: Keywords.kw_async))
4439 nextToken();
4440 if (FormatTok->is(II: Keywords.kw_function)) {
4441 nextToken();
4442 return;
4443 }
4444
4445 // For imports, `export *`, `export {...}`, consume the rest of the line up
4446 // to the terminating `;`. For everything else, just return and continue
4447 // parsing the structural element, i.e. the declaration or expression for
4448 // `export default`.
4449 if (!IsImport && FormatTok->isNoneOf(Ks: tok::l_brace, Ks: tok::star) &&
4450 !FormatTok->isStringLiteral() &&
4451 !(FormatTok->is(II: Keywords.kw_type) &&
4452 Tokens->peekNextToken()->isOneOf(K1: tok::l_brace, K2: tok::star))) {
4453 return;
4454 }
4455
4456 while (!eof()) {
4457 if (FormatTok->is(Kind: tok::semi))
4458 return;
4459 if (Line->Tokens.empty()) {
4460 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4461 // import statement should terminate.
4462 return;
4463 }
4464 if (FormatTok->is(Kind: tok::l_brace)) {
4465 FormatTok->setBlockKind(BK_Block);
4466 nextToken();
4467 parseBracedList();
4468 } else {
4469 nextToken();
4470 }
4471 }
4472}
4473
4474void UnwrappedLineParser::parseStatementMacro() {
4475 nextToken();
4476 if (FormatTok->is(Kind: tok::l_paren))
4477 parseParens();
4478 if (FormatTok->is(Kind: tok::semi))
4479 nextToken();
4480 addUnwrappedLine();
4481}
4482
4483void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4484 // consume things like a::`b.c[d:e] or a::*
4485 while (true) {
4486 if (FormatTok->isOneOf(K1: tok::star, K2: tok::period, Ks: tok::periodstar,
4487 Ks: tok::coloncolon, Ks: tok::hash) ||
4488 Keywords.isVerilogIdentifier(Tok: *FormatTok)) {
4489 nextToken();
4490 } else if (FormatTok->is(Kind: tok::l_square)) {
4491 parseSquare();
4492 } else {
4493 break;
4494 }
4495 }
4496}
4497
4498void UnwrappedLineParser::parseVerilogSensitivityList() {
4499 if (FormatTok->isNot(Kind: tok::at))
4500 return;
4501 nextToken();
4502 // A block event expression has 2 at signs.
4503 if (FormatTok->is(Kind: tok::at))
4504 nextToken();
4505 switch (FormatTok->Tok.getKind()) {
4506 case tok::star:
4507 nextToken();
4508 break;
4509 case tok::l_paren:
4510 parseParens();
4511 break;
4512 default:
4513 parseVerilogHierarchyIdentifier();
4514 break;
4515 }
4516}
4517
4518unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4519 unsigned AddLevels = 0;
4520
4521 if (FormatTok->is(II: Keywords.kw_clocking)) {
4522 nextToken();
4523 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
4524 nextToken();
4525 parseVerilogSensitivityList();
4526 if (FormatTok->is(Kind: tok::semi))
4527 nextToken();
4528 } else if (FormatTok->isOneOf(K1: tok::kw_case, K2: Keywords.kw_casex,
4529 Ks: Keywords.kw_casez, Ks: Keywords.kw_randcase,
4530 Ks: Keywords.kw_randsequence)) {
4531 if (Style.IndentCaseLabels)
4532 AddLevels++;
4533 nextToken();
4534 if (FormatTok->is(Kind: tok::l_paren)) {
4535 FormatTok->setFinalizedType(TT_ConditionLParen);
4536 parseParens();
4537 }
4538 if (FormatTok->isOneOf(K1: Keywords.kw_inside, K2: Keywords.kw_matches))
4539 nextToken();
4540 // The case header has no semicolon.
4541 } else {
4542 // "module" etc.
4543 nextToken();
4544 // all the words like the name of the module and specifiers like
4545 // "automatic" and the width of function return type
4546 while (true) {
4547 if (FormatTok->is(Kind: tok::l_square)) {
4548 auto Prev = FormatTok->getPreviousNonComment();
4549 if (Prev && Keywords.isVerilogIdentifier(Tok: *Prev))
4550 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4551 parseSquare();
4552 } else if (Keywords.isVerilogIdentifier(Tok: *FormatTok) ||
4553 FormatTok->isOneOf(K1: tok::hash, K2: tok::hashhash, Ks: tok::coloncolon,
4554 Ks: Keywords.kw_automatic, Ks: tok::kw_static)) {
4555 nextToken();
4556 } else {
4557 break;
4558 }
4559 }
4560
4561 auto NewLine = [this]() {
4562 addUnwrappedLine();
4563 Line->IsContinuation = true;
4564 };
4565
4566 // package imports
4567 while (FormatTok->is(II: Keywords.kw_import)) {
4568 NewLine();
4569 nextToken();
4570 parseVerilogHierarchyIdentifier();
4571 if (FormatTok->is(Kind: tok::semi))
4572 nextToken();
4573 }
4574
4575 // parameters and ports
4576 if (FormatTok->is(II: Keywords.kw_verilogHash)) {
4577 NewLine();
4578 nextToken();
4579 if (FormatTok->is(Kind: tok::l_paren)) {
4580 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4581 parseParens();
4582 }
4583 }
4584 if (FormatTok->is(Kind: tok::l_paren)) {
4585 NewLine();
4586 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4587 parseParens();
4588 }
4589
4590 // extends and implements
4591 if (FormatTok->is(II: Keywords.kw_extends)) {
4592 NewLine();
4593 nextToken();
4594 parseVerilogHierarchyIdentifier();
4595 if (FormatTok->is(Kind: tok::l_paren))
4596 parseParens();
4597 }
4598 if (FormatTok->is(II: Keywords.kw_implements)) {
4599 NewLine();
4600 do {
4601 nextToken();
4602 parseVerilogHierarchyIdentifier();
4603 } while (FormatTok->is(Kind: tok::comma));
4604 }
4605
4606 // Coverage event for cover groups.
4607 if (FormatTok->is(Kind: tok::at)) {
4608 NewLine();
4609 parseVerilogSensitivityList();
4610 }
4611
4612 if (FormatTok->is(Kind: tok::semi))
4613 nextToken(/*LevelDifference=*/1);
4614 addUnwrappedLine();
4615 }
4616
4617 return AddLevels;
4618}
4619
4620void UnwrappedLineParser::parseVerilogTable() {
4621 assert(FormatTok->is(Keywords.kw_table));
4622 nextToken(/*LevelDifference=*/1);
4623 addUnwrappedLine();
4624
4625 auto InitialLevel = Line->Level++;
4626 while (!eof() && !Keywords.isVerilogEnd(Tok: *FormatTok)) {
4627 FormatToken *Tok = FormatTok;
4628 nextToken();
4629 if (Tok->is(Kind: tok::semi))
4630 addUnwrappedLine();
4631 else if (Tok->isOneOf(K1: tok::star, K2: tok::colon, Ks: tok::question, Ks: tok::minus))
4632 Tok->setFinalizedType(TT_VerilogTableItem);
4633 }
4634 Line->Level = InitialLevel;
4635 nextToken(/*LevelDifference=*/-1);
4636 addUnwrappedLine();
4637}
4638
4639void UnwrappedLineParser::parseVerilogCaseLabel() {
4640 // The label will get unindented in AnnotatingParser. If there are no leading
4641 // spaces, indent the rest here so that things inside the block will be
4642 // indented relative to things outside. We don't use parseLabel because we
4643 // don't know whether this colon is a label or a ternary expression at this
4644 // point.
4645 auto OrigLevel = Line->Level;
4646 auto FirstLine = CurrentLines->size();
4647 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4648 ++Line->Level;
4649 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(Tok: *FormatTok))
4650 --Line->Level;
4651 parseStructuralElement();
4652 // Restore the indentation in both the new line and the line that has the
4653 // label.
4654 if (CurrentLines->size() > FirstLine)
4655 (*CurrentLines)[FirstLine].Level = OrigLevel;
4656 Line->Level = OrigLevel;
4657}
4658
4659void UnwrappedLineParser::parseVerilogExtern() {
4660 assert(
4661 FormatTok->isOneOf(tok::kw_extern, tok::kw_export, Keywords.kw_import));
4662 nextToken();
4663 // "DPI-C"
4664 if (FormatTok->is(Kind: tok::string_literal))
4665 nextToken();
4666 skipVerilogQualifiers();
4667 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
4668 nextToken();
4669 if (FormatTok->is(Kind: tok::equal))
4670 nextToken();
4671 if (Keywords.isVerilogHierarchy(Tok: *FormatTok))
4672 parseVerilogHierarchyHeader();
4673}
4674
4675void UnwrappedLineParser::skipVerilogQualifiers() {
4676 while (FormatTok->isOneOf(K1: tok::kw_protected, K2: tok::kw_virtual, Ks: tok::kw_static,
4677 Ks: Keywords.kw_rand, Ks: Keywords.kw_context,
4678 Ks: Keywords.kw_pure, Ks: Keywords.kw_randc,
4679 Ks: Keywords.kw_local)) {
4680 nextToken();
4681 }
4682}
4683
4684bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4685 for (const auto &N : Line.Tokens) {
4686 if (N.Tok->MacroCtx)
4687 return true;
4688 for (const UnwrappedLine &Child : N.Children)
4689 if (containsExpansion(Line: Child))
4690 return true;
4691 }
4692 return false;
4693}
4694
4695void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4696 if (Line->Tokens.empty())
4697 return;
4698 LLVM_DEBUG({
4699 if (!parsingPPDirective()) {
4700 llvm::dbgs() << "Adding unwrapped line:\n";
4701 printDebugInfo(*Line);
4702 }
4703 });
4704
4705 // If this line closes a block when in Whitesmiths mode, remember that
4706 // information so that the level can be decreased after the line is added.
4707 // This has to happen after the addition of the line since the line itself
4708 // needs to be indented.
4709 bool ClosesWhitesmithsBlock =
4710 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4711 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4712
4713 // If the current line was expanded from a macro call, we use it to
4714 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4715 // line and the unexpanded token stream.
4716 if (!parsingPPDirective() && !InExpansion && containsExpansion(Line: *Line)) {
4717 if (!Reconstruct)
4718 Reconstruct.emplace(args&: Line->Level, args&: Unexpanded);
4719 Reconstruct->addLine(Line: *Line);
4720
4721 // While the reconstructed unexpanded lines are stored in the normal
4722 // flow of lines, the expanded lines are stored on the side to be analyzed
4723 // in an extra step.
4724 CurrentExpandedLines.push_back(Elt: std::move(*Line));
4725
4726 if (Reconstruct->finished()) {
4727 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4728 assert(!Reconstructed.Tokens.empty() &&
4729 "Reconstructed must at least contain the macro identifier.");
4730 assert(!parsingPPDirective());
4731 LLVM_DEBUG({
4732 llvm::dbgs() << "Adding unexpanded line:\n";
4733 printDebugInfo(Reconstructed);
4734 });
4735 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4736 Lines.push_back(Elt: std::move(Reconstructed));
4737 CurrentExpandedLines.clear();
4738 Reconstruct.reset();
4739 }
4740 } else {
4741 // At the top level we only get here when no unexpansion is going on, or
4742 // when conditional formatting led to unfinished macro reconstructions.
4743 assert(!Reconstruct || (CurrentLines != &Lines) || !PPStack.empty());
4744 CurrentLines->push_back(Elt: std::move(*Line));
4745 }
4746 Line->Tokens.clear();
4747 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4748 Line->FirstStartColumn = 0;
4749 Line->IsContinuation = false;
4750 Line->SeenDecltypeAuto = false;
4751 Line->IsModuleOrImportDecl = false;
4752
4753 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4754 --Line->Level;
4755 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4756 CurrentLines->append(
4757 in_start: std::make_move_iterator(i: PreprocessorDirectives.begin()),
4758 in_end: std::make_move_iterator(i: PreprocessorDirectives.end()));
4759 PreprocessorDirectives.clear();
4760 }
4761 // Disconnect the current token from the last token on the previous line.
4762 FormatTok->Previous = nullptr;
4763}
4764
4765bool UnwrappedLineParser::eof() const { return FormatTok->is(Kind: tok::eof); }
4766
4767bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4768 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4769 FormatTok.NewlinesBefore > 0;
4770}
4771
4772// Checks if \p FormatTok is a line comment that continues the line comment
4773// section on \p Line.
4774static bool
4775continuesLineCommentSection(const FormatToken &FormatTok,
4776 const UnwrappedLine &Line, const FormatStyle &Style,
4777 const llvm::Regex &CommentPragmasRegex) {
4778 if (Line.Tokens.empty() || Style.ReflowComments != FormatStyle::RCS_Always)
4779 return false;
4780
4781 StringRef IndentContent = FormatTok.TokenText;
4782 if (FormatTok.TokenText.starts_with(Prefix: "//") ||
4783 FormatTok.TokenText.starts_with(Prefix: "/*")) {
4784 IndentContent = FormatTok.TokenText.substr(Start: 2);
4785 }
4786 if (CommentPragmasRegex.match(String: IndentContent))
4787 return false;
4788
4789 // If Line starts with a line comment, then FormatTok continues the comment
4790 // section if its original column is greater or equal to the original start
4791 // column of the line.
4792 //
4793 // Define the min column token of a line as follows: if a line ends in '{' or
4794 // contains a '{' followed by a line comment, then the min column token is
4795 // that '{'. Otherwise, the min column token of the line is the first token of
4796 // the line.
4797 //
4798 // If Line starts with a token other than a line comment, then FormatTok
4799 // continues the comment section if its original column is greater than the
4800 // original start column of the min column token of the line.
4801 //
4802 // For example, the second line comment continues the first in these cases:
4803 //
4804 // // first line
4805 // // second line
4806 //
4807 // and:
4808 //
4809 // // first line
4810 // // second line
4811 //
4812 // and:
4813 //
4814 // int i; // first line
4815 // // second line
4816 //
4817 // and:
4818 //
4819 // do { // first line
4820 // // second line
4821 // int i;
4822 // } while (true);
4823 //
4824 // and:
4825 //
4826 // enum {
4827 // a, // first line
4828 // // second line
4829 // b
4830 // };
4831 //
4832 // The second line comment doesn't continue the first in these cases:
4833 //
4834 // // first line
4835 // // second line
4836 //
4837 // and:
4838 //
4839 // int i; // first line
4840 // // second line
4841 //
4842 // and:
4843 //
4844 // do { // first line
4845 // // second line
4846 // int i;
4847 // } while (true);
4848 //
4849 // and:
4850 //
4851 // enum {
4852 // a, // first line
4853 // // second line
4854 // };
4855 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4856
4857 // Scan for '{//'. If found, use the column of '{' as a min column for line
4858 // comment section continuation.
4859 const FormatToken *PreviousToken = nullptr;
4860 for (const UnwrappedLineNode &Node : Line.Tokens) {
4861 if (PreviousToken && PreviousToken->is(Kind: tok::l_brace) &&
4862 isLineComment(FormatTok: *Node.Tok)) {
4863 MinColumnToken = PreviousToken;
4864 break;
4865 }
4866 PreviousToken = Node.Tok;
4867
4868 // Grab the last newline preceding a token in this unwrapped line.
4869 if (Node.Tok->NewlinesBefore > 0)
4870 MinColumnToken = Node.Tok;
4871 }
4872 if (PreviousToken && PreviousToken->is(Kind: tok::l_brace))
4873 MinColumnToken = PreviousToken;
4874
4875 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4876 MinColumnToken);
4877}
4878
4879void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4880 bool JustComments = Line->Tokens.empty();
4881 for (FormatToken *Tok : CommentsBeforeNextToken) {
4882 // Line comments that belong to the same line comment section are put on the
4883 // same line since later we might want to reflow content between them.
4884 // Additional fine-grained breaking of line comment sections is controlled
4885 // by the class BreakableLineCommentSection in case it is desirable to keep
4886 // several line comment sections in the same unwrapped line.
4887 //
4888 // FIXME: Consider putting separate line comment sections as children to the
4889 // unwrapped line instead.
4890 Tok->ContinuesLineCommentSection =
4891 continuesLineCommentSection(FormatTok: *Tok, Line: *Line, Style, CommentPragmasRegex);
4892 if (isOnNewLine(FormatTok: *Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4893 addUnwrappedLine();
4894 pushToken(Tok);
4895 }
4896 if (NewlineBeforeNext && JustComments)
4897 addUnwrappedLine();
4898 CommentsBeforeNextToken.clear();
4899}
4900
4901void UnwrappedLineParser::nextToken(int LevelDifference) {
4902 if (eof())
4903 return;
4904 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
4905 pushToken(Tok: FormatTok);
4906 FormatToken *Previous = FormatTok;
4907 if (!Style.isJavaScript())
4908 readToken(LevelDifference);
4909 else
4910 readTokenWithJavaScriptASI();
4911 FormatTok->Previous = Previous;
4912 if (Style.isVerilog()) {
4913 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4914 // keywords like `begin`, we can't treat them the same as left braces
4915 // because some contexts require one of them. For example structs use
4916 // braces and if blocks use keywords, and a left brace can occur in an if
4917 // statement, but it is not a block. For keywords like `end`, we simply
4918 // treat them the same as right braces.
4919 if (Keywords.isVerilogEnd(Tok: *FormatTok))
4920 FormatTok->Tok.setKind(tok::r_brace);
4921 }
4922}
4923
4924void UnwrappedLineParser::distributeComments(
4925 const ArrayRef<FormatToken *> &Comments, const FormatToken *NextTok) {
4926 // Whether or not a line comment token continues a line is controlled by
4927 // the method continuesLineCommentSection, with the following caveat:
4928 //
4929 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4930 // that each comment line from the trail is aligned with the next token, if
4931 // the next token exists. If a trail exists, the beginning of the maximal
4932 // trail is marked as a start of a new comment section.
4933 //
4934 // For example in this code:
4935 //
4936 // int a; // line about a
4937 // // line 1 about b
4938 // // line 2 about b
4939 // int b;
4940 //
4941 // the two lines about b form a maximal trail, so there are two sections, the
4942 // first one consisting of the single comment "// line about a" and the
4943 // second one consisting of the next two comments.
4944 if (Comments.empty())
4945 return;
4946 bool ShouldPushCommentsInCurrentLine = true;
4947 bool HasTrailAlignedWithNextToken = false;
4948 unsigned StartOfTrailAlignedWithNextToken = 0;
4949 if (NextTok) {
4950 // We are skipping the first element intentionally.
4951 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4952 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4953 HasTrailAlignedWithNextToken = true;
4954 StartOfTrailAlignedWithNextToken = i;
4955 }
4956 }
4957 }
4958 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4959 FormatToken *FormatTok = Comments[i];
4960 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4961 FormatTok->ContinuesLineCommentSection = false;
4962 } else {
4963 FormatTok->ContinuesLineCommentSection = continuesLineCommentSection(
4964 FormatTok: *FormatTok, Line: *Line, Style, CommentPragmasRegex);
4965 }
4966 if (!FormatTok->ContinuesLineCommentSection &&
4967 (isOnNewLine(FormatTok: *FormatTok) || FormatTok->IsFirst)) {
4968 ShouldPushCommentsInCurrentLine = false;
4969 }
4970 if (ShouldPushCommentsInCurrentLine)
4971 pushToken(Tok: FormatTok);
4972 else
4973 CommentsBeforeNextToken.push_back(Elt: FormatTok);
4974 }
4975}
4976
4977void UnwrappedLineParser::readToken(int LevelDifference) {
4978 SmallVector<FormatToken *, 1> Comments;
4979 bool PreviousWasComment = false;
4980 bool FirstNonCommentOnLine = false;
4981 do {
4982 FormatTok = Tokens->getNextToken();
4983 assert(FormatTok);
4984 while (FormatTok->isOneOf(K1: TT_ConflictStart, K2: TT_ConflictEnd,
4985 Ks: TT_ConflictAlternative)) {
4986 if (FormatTok->is(TT: TT_ConflictStart))
4987 conditionalCompilationStart(/*Unreachable=*/false);
4988 else if (FormatTok->is(TT: TT_ConflictAlternative))
4989 conditionalCompilationAlternative();
4990 else if (FormatTok->is(TT: TT_ConflictEnd))
4991 conditionalCompilationEnd();
4992 FormatTok = Tokens->getNextToken();
4993 FormatTok->MustBreakBefore = true;
4994 FormatTok->MustBreakBeforeFinalized = true;
4995 }
4996
4997 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4998 const FormatToken &Tok,
4999 bool PreviousWasComment) {
5000 auto IsFirstOnLine = [](const FormatToken &Tok) {
5001 return Tok.HasUnescapedNewline || Tok.IsFirst;
5002 };
5003
5004 // Consider preprocessor directives preceded by block comments as first
5005 // on line.
5006 if (PreviousWasComment)
5007 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
5008 return IsFirstOnLine(Tok);
5009 };
5010
5011 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
5012 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
5013 PreviousWasComment = FormatTok->is(Kind: tok::comment);
5014
5015 while (!Line->InPPDirective && FormatTok->is(Kind: tok::hash) &&
5016 FirstNonCommentOnLine) {
5017 // In Verilog, the backtick is used for macro invocations. In TableGen,
5018 // the single hash is used for the paste operator.
5019 const auto *Next = Tokens->peekNextToken();
5020 if ((Style.isVerilog() && !Keywords.isVerilogPPDirective(Tok: *Next)) ||
5021 (Style.isTableGen() &&
5022 Next->isNoneOf(Ks: tok::kw_else, Ks: tok::pp_define, Ks: tok::pp_ifdef,
5023 Ks: tok::pp_ifndef, Ks: tok::pp_endif))) {
5024 break;
5025 }
5026 distributeComments(Comments, NextTok: FormatTok);
5027 Comments.clear();
5028 // If there is an unfinished unwrapped line, we flush the preprocessor
5029 // directives only after that unwrapped line was finished later.
5030 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
5031 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
5032 assert((LevelDifference >= 0 ||
5033 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
5034 "LevelDifference makes Line->Level negative");
5035 Line->Level += LevelDifference;
5036 // Comments stored before the preprocessor directive need to be output
5037 // before the preprocessor directive, at the same level as the
5038 // preprocessor directive, as we consider them to apply to the directive.
5039 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
5040 PPBranchLevel > 0) {
5041 Line->Level += PPBranchLevel;
5042 }
5043 assert(Line->Level >= Line->UnbracedBodyLevel);
5044 Line->Level -= Line->UnbracedBodyLevel;
5045 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
5046 const bool IsEndIf = Tokens->peekNextToken()->is(Kind: tok::pp_endif);
5047 parsePPDirective();
5048 PreviousWasComment = FormatTok->is(Kind: tok::comment);
5049 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
5050 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
5051 // If the #endif of a potential include guard is the last thing in the
5052 // file, then we found an include guard.
5053 if (IsEndIf && IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
5054 getIncludeGuardState(Style: Style.IndentPPDirectives) == IG_Inited &&
5055 (eof() ||
5056 (PreviousWasComment &&
5057 Tokens->peekNextToken(/*SkipComment=*/true)->is(Kind: tok::eof)))) {
5058 IncludeGuard = IG_Found;
5059 }
5060 }
5061
5062 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
5063 !Line->InPPDirective) {
5064 continue;
5065 }
5066
5067 if (FormatTok->is(Kind: tok::identifier) &&
5068 Macros.defined(Name: FormatTok->TokenText) &&
5069 // FIXME: Allow expanding macros in preprocessor directives.
5070 !Line->InPPDirective) {
5071 FormatToken *ID = FormatTok;
5072 unsigned Position = Tokens->getPosition();
5073
5074 // To correctly parse the code, we need to replace the tokens of the macro
5075 // call with its expansion.
5076 auto PreCall = std::move(Line);
5077 Line.reset(p: new UnwrappedLine);
5078 bool OldInExpansion = InExpansion;
5079 InExpansion = true;
5080 // We parse the macro call into a new line.
5081 auto Args = parseMacroCall();
5082 InExpansion = OldInExpansion;
5083 assert(Line->Tokens.front().Tok == ID);
5084 // And remember the unexpanded macro call tokens.
5085 auto UnexpandedLine = std::move(Line);
5086 // Reset to the old line.
5087 Line = std::move(PreCall);
5088
5089 LLVM_DEBUG({
5090 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
5091 if (Args) {
5092 llvm::dbgs() << "(";
5093 for (const auto &Arg : Args.value())
5094 for (const auto &T : Arg)
5095 llvm::dbgs() << T->TokenText << " ";
5096 llvm::dbgs() << ")";
5097 }
5098 llvm::dbgs() << "\n";
5099 });
5100 if (Macros.objectLike(Name: ID->TokenText) && Args &&
5101 !Macros.hasArity(Name: ID->TokenText, Arity: Args->size())) {
5102 // The macro is either
5103 // - object-like, but we got argumnets, or
5104 // - overloaded to be both object-like and function-like, but none of
5105 // the function-like arities match the number of arguments.
5106 // Thus, expand as object-like macro.
5107 LLVM_DEBUG(llvm::dbgs()
5108 << "Macro \"" << ID->TokenText
5109 << "\" not overloaded for arity " << Args->size()
5110 << "or not function-like, using object-like overload.");
5111 Args.reset();
5112 UnexpandedLine->Tokens.resize(new_size: 1);
5113 Tokens->setPosition(Position);
5114 nextToken();
5115 assert(!Args && Macros.objectLike(ID->TokenText));
5116 }
5117 if ((!Args && Macros.objectLike(Name: ID->TokenText)) ||
5118 (Args && Macros.hasArity(Name: ID->TokenText, Arity: Args->size()))) {
5119 // Next, we insert the expanded tokens in the token stream at the
5120 // current position, and continue parsing.
5121 Unexpanded[ID] = std::move(UnexpandedLine);
5122 SmallVector<FormatToken *, 8> Expansion =
5123 Macros.expand(ID, OptionalArgs: std::move(Args));
5124 if (!Expansion.empty())
5125 FormatTok = Tokens->insertTokens(Tokens: Expansion);
5126
5127 LLVM_DEBUG({
5128 llvm::dbgs() << "Expanded: ";
5129 for (const auto &T : Expansion)
5130 llvm::dbgs() << T->TokenText << " ";
5131 llvm::dbgs() << "\n";
5132 });
5133 } else {
5134 LLVM_DEBUG({
5135 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
5136 << "\", because it was used ";
5137 if (Args)
5138 llvm::dbgs() << "with " << Args->size();
5139 else
5140 llvm::dbgs() << "without";
5141 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
5142 });
5143 Tokens->setPosition(Position);
5144 FormatTok = ID;
5145 }
5146 }
5147
5148 if (FormatTok->isNot(Kind: tok::comment)) {
5149 distributeComments(Comments, NextTok: FormatTok);
5150 Comments.clear();
5151 return;
5152 }
5153
5154 Comments.push_back(Elt: FormatTok);
5155 } while (!eof());
5156
5157 distributeComments(Comments, NextTok: nullptr);
5158 Comments.clear();
5159}
5160
5161namespace {
5162template <typename Iterator>
5163void pushTokens(Iterator Begin, Iterator End,
5164 SmallVectorImpl<FormatToken *> &Into) {
5165 for (auto I = Begin; I != End; ++I) {
5166 Into.push_back(Elt: I->Tok);
5167 for (const auto &Child : I->Children)
5168 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
5169 }
5170}
5171} // namespace
5172
5173std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
5174UnwrappedLineParser::parseMacroCall() {
5175 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
5176 assert(Line->Tokens.empty());
5177 nextToken();
5178 if (FormatTok->isNot(Kind: tok::l_paren))
5179 return Args;
5180 unsigned Position = Tokens->getPosition();
5181 FormatToken *Tok = FormatTok;
5182 nextToken();
5183 Args.emplace();
5184 auto ArgStart = std::prev(x: Line->Tokens.end());
5185
5186 int Parens = 0;
5187 do {
5188 switch (FormatTok->Tok.getKind()) {
5189 case tok::l_paren:
5190 ++Parens;
5191 nextToken();
5192 break;
5193 case tok::r_paren: {
5194 if (Parens > 0) {
5195 --Parens;
5196 nextToken();
5197 break;
5198 }
5199 Args->push_back(Elt: {});
5200 pushTokens(Begin: std::next(x: ArgStart), End: Line->Tokens.end(), Into&: Args->back());
5201 nextToken();
5202 return Args;
5203 }
5204 case tok::comma: {
5205 if (Parens > 0) {
5206 nextToken();
5207 break;
5208 }
5209 Args->push_back(Elt: {});
5210 pushTokens(Begin: std::next(x: ArgStart), End: Line->Tokens.end(), Into&: Args->back());
5211 nextToken();
5212 ArgStart = std::prev(x: Line->Tokens.end());
5213 break;
5214 }
5215 default:
5216 nextToken();
5217 break;
5218 }
5219 } while (!eof());
5220 Line->Tokens.resize(new_size: 1);
5221 Tokens->setPosition(Position);
5222 FormatTok = Tok;
5223 return {};
5224}
5225
5226void UnwrappedLineParser::pushToken(FormatToken *Tok) {
5227 Line->Tokens.push_back(x: UnwrappedLineNode(Tok));
5228 if (AtEndOfPPLine) {
5229 auto &Tok = *Line->Tokens.back().Tok;
5230 Tok.MustBreakBefore = true;
5231 Tok.MustBreakBeforeFinalized = true;
5232 Tok.FirstAfterPPLine = true;
5233 AtEndOfPPLine = false;
5234 }
5235}
5236
5237} // end namespace format
5238} // end namespace clang
5239