1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#include "UnwrappedLineParser.h"
16#include "FormatToken.h"
17#include "FormatTokenSource.h"
18#include "Macros.h"
19#include "TokenAnnotator.h"
20#include "clang/Basic/TokenKinds.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/raw_os_ostream.h"
25#include "llvm/Support/raw_ostream.h"
26
27#include <utility>
28
29#define DEBUG_TYPE "format-parser"
30
31namespace clang {
32namespace format {
33
34namespace {
35
36void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
37 StringRef Prefix = "", bool PrintText = false) {
38 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
39 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
40 bool NewLine = false;
41 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
42 E = Line.Tokens.end();
43 I != E; ++I) {
44 if (NewLine) {
45 OS << Prefix;
46 NewLine = false;
47 }
48 OS << I->Tok->Tok.getName() << "["
49 << "T=" << (unsigned)I->Tok->getType()
50 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
51 << "\"] ";
52 for (const auto *CI = I->Children.begin(), *CE = I->Children.end();
53 CI != CE; ++CI) {
54 OS << "\n";
55 printLine(OS, Line: *CI, Prefix: (Prefix + " ").str());
56 NewLine = true;
57 }
58 }
59 if (!NewLine)
60 OS << "\n";
61}
62
63[[maybe_unused]] static void printDebugInfo(const UnwrappedLine &Line) {
64 printLine(OS&: llvm::dbgs(), Line);
65}
66
67class ScopedDeclarationState {
68public:
69 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
70 bool MustBeDeclaration)
71 : Line(Line), Stack(Stack) {
72 Line.MustBeDeclaration = MustBeDeclaration;
73 Stack.push_back(Val: MustBeDeclaration);
74 }
75 ~ScopedDeclarationState() {
76 Stack.pop_back();
77 if (!Stack.empty())
78 Line.MustBeDeclaration = Stack.back();
79 else
80 Line.MustBeDeclaration = true;
81 }
82
83private:
84 UnwrappedLine &Line;
85 llvm::BitVector &Stack;
86};
87
88} // end anonymous namespace
89
90std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
91 llvm::raw_os_ostream OS(Stream);
92 printLine(OS, Line);
93 return Stream;
94}
95
96class ScopedLineState {
97public:
98 ScopedLineState(UnwrappedLineParser &Parser,
99 bool SwitchToPreprocessorLines = false)
100 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
101 if (SwitchToPreprocessorLines)
102 Parser.CurrentLines = &Parser.PreprocessorDirectives;
103 else if (!Parser.Line->Tokens.empty())
104 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
105 PreBlockLine = std::move(Parser.Line);
106 Parser.Line = std::make_unique<UnwrappedLine>();
107 Parser.Line->Level = PreBlockLine->Level;
108 Parser.Line->PPLevel = PreBlockLine->PPLevel;
109 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
110 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
111 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel;
112 }
113
114 ~ScopedLineState() {
115 if (!Parser.Line->Tokens.empty())
116 Parser.addUnwrappedLine();
117 assert(Parser.Line->Tokens.empty());
118 Parser.Line = std::move(PreBlockLine);
119 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
120 Parser.AtEndOfPPLine = true;
121 Parser.CurrentLines = OriginalLines;
122 }
123
124private:
125 UnwrappedLineParser &Parser;
126
127 std::unique_ptr<UnwrappedLine> PreBlockLine;
128 SmallVectorImpl<UnwrappedLine> *OriginalLines;
129};
130
131class CompoundStatementIndenter {
132public:
133 CompoundStatementIndenter(UnwrappedLineParser *Parser,
134 const FormatStyle &Style, unsigned &LineLevel)
135 : CompoundStatementIndenter(Parser, LineLevel,
136 Style.BraceWrapping.AfterControlStatement ==
137 FormatStyle::BWACS_Always,
138 Style.BraceWrapping.IndentBraces) {}
139 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
140 bool WrapBrace, bool IndentBrace)
141 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
142 if (WrapBrace)
143 Parser->addUnwrappedLine();
144 if (IndentBrace)
145 ++LineLevel;
146 }
147 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
148
149private:
150 unsigned &LineLevel;
151 unsigned OldLineLevel;
152};
153
154UnwrappedLineParser::UnwrappedLineParser(
155 SourceManager &SourceMgr, const FormatStyle &Style,
156 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
157 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
158 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
159 IdentifierTable &IdentTable)
160 : Line(new UnwrappedLine), AtEndOfPPLine(false), CurrentLines(&Lines),
161 Style(Style), IsCpp(Style.isCpp()),
162 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords),
163 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
164 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
165 IncludeGuard(getIncludeGuardState(Style: Style.IndentPPDirectives)),
166 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
167 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
168
169void UnwrappedLineParser::reset() {
170 PPBranchLevel = -1;
171 IncludeGuard = getIncludeGuardState(Style: Style.IndentPPDirectives);
172 IncludeGuardToken = nullptr;
173 Line.reset(p: new UnwrappedLine);
174 CommentsBeforeNextToken.clear();
175 FormatTok = nullptr;
176 AtEndOfPPLine = false;
177 IsDecltypeAutoFunction = false;
178 PreprocessorDirectives.clear();
179 CurrentLines = &Lines;
180 DeclarationScopeStack.clear();
181 NestedTooDeep.clear();
182 NestedLambdas.clear();
183 PPStack.clear();
184 Line->FirstStartColumn = FirstStartColumn;
185
186 if (!Unexpanded.empty())
187 for (FormatToken *Token : AllTokens)
188 Token->MacroCtx.reset();
189 CurrentExpandedLines.clear();
190 ExpandedLines.clear();
191 Unexpanded.clear();
192 InExpansion = false;
193 Reconstruct.reset();
194}
195
196void UnwrappedLineParser::parse() {
197 IndexedTokenSource TokenSource(AllTokens);
198 Line->FirstStartColumn = FirstStartColumn;
199 do {
200 LLVM_DEBUG(llvm::dbgs() << "----\n");
201 reset();
202 Tokens = &TokenSource;
203 TokenSource.reset();
204
205 readToken();
206 parseFile();
207
208 // If we found an include guard then all preprocessor directives (other than
209 // the guard) are over-indented by one.
210 if (IncludeGuard == IG_Found) {
211 for (auto &Line : Lines)
212 if (Line.InPPDirective && Line.Level > 0)
213 --Line.Level;
214 }
215
216 // Create line with eof token.
217 assert(eof());
218 pushToken(Tok: FormatTok);
219 addUnwrappedLine();
220
221 // In a first run, format everything with the lines containing macro calls
222 // replaced by the expansion.
223 if (!ExpandedLines.empty()) {
224 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
225 for (const auto &Line : Lines) {
226 if (!Line.Tokens.empty()) {
227 auto it = ExpandedLines.find(Val: Line.Tokens.begin()->Tok);
228 if (it != ExpandedLines.end()) {
229 for (const auto &Expanded : it->second) {
230 LLVM_DEBUG(printDebugInfo(Expanded));
231 Callback.consumeUnwrappedLine(Line: Expanded);
232 }
233 continue;
234 }
235 }
236 LLVM_DEBUG(printDebugInfo(Line));
237 Callback.consumeUnwrappedLine(Line);
238 }
239 Callback.finishRun();
240 }
241
242 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
243 for (const UnwrappedLine &Line : Lines) {
244 LLVM_DEBUG(printDebugInfo(Line));
245 Callback.consumeUnwrappedLine(Line);
246 }
247 Callback.finishRun();
248 Lines.clear();
249 while (!PPLevelBranchIndex.empty() &&
250 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
251 PPLevelBranchIndex.resize(N: PPLevelBranchIndex.size() - 1);
252 PPLevelBranchCount.resize(N: PPLevelBranchCount.size() - 1);
253 }
254 if (!PPLevelBranchIndex.empty()) {
255 ++PPLevelBranchIndex.back();
256 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
257 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
258 }
259 } while (!PPLevelBranchIndex.empty());
260}
261
262void UnwrappedLineParser::parseFile() {
263 // The top-level context in a file always has declarations, except for pre-
264 // processor directives and JavaScript files.
265 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
266 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
267 MustBeDeclaration);
268 if (Style.isTextProto() || (Style.isJson() && FormatTok->IsFirst))
269 parseBracedList();
270 else
271 parseLevel();
272 // Make sure to format the remaining tokens.
273 //
274 // LK_TextProto is special since its top-level is parsed as the body of a
275 // braced list, which does not necessarily have natural line separators such
276 // as a semicolon. Comments after the last entry that have been determined to
277 // not belong to that line, as in:
278 // key: value
279 // // endfile comment
280 // do not have a chance to be put on a line of their own until this point.
281 // Here we add this newline before end-of-file comments.
282 if (Style.isTextProto() && !CommentsBeforeNextToken.empty())
283 addUnwrappedLine();
284 flushComments(NewlineBeforeNext: true);
285 addUnwrappedLine();
286}
287
288void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
289 do {
290 switch (FormatTok->Tok.getKind()) {
291 case tok::l_brace:
292 case tok::semi:
293 return;
294 default:
295 if (FormatTok->is(II: Keywords.kw_where)) {
296 addUnwrappedLine();
297 nextToken();
298 parseCSharpGenericTypeConstraint();
299 break;
300 }
301 nextToken();
302 break;
303 }
304 } while (!eof());
305}
306
307void UnwrappedLineParser::parseCSharpAttribute() {
308 int UnpairedSquareBrackets = 1;
309 do {
310 switch (FormatTok->Tok.getKind()) {
311 case tok::r_square:
312 nextToken();
313 --UnpairedSquareBrackets;
314 if (UnpairedSquareBrackets == 0) {
315 addUnwrappedLine();
316 return;
317 }
318 break;
319 case tok::l_square:
320 ++UnpairedSquareBrackets;
321 nextToken();
322 break;
323 default:
324 nextToken();
325 break;
326 }
327 } while (!eof());
328}
329
330bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
331 if (!Lines.empty() && Lines.back().InPPDirective)
332 return true;
333
334 const FormatToken *Previous = Tokens->getPreviousToken();
335 return Previous && Previous->is(Kind: tok::comment) &&
336 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
337}
338
339/// Parses a level, that is ???.
340/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
341/// \param IfKind The \p if statement kind in the level.
342/// \param IfLeftBrace The left brace of the \p if block in the level.
343/// \returns true if a simple block of if/else/for/while, or false otherwise.
344/// (A simple block has a single statement.)
345bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
346 IfStmtKind *IfKind,
347 FormatToken **IfLeftBrace) {
348 const bool InRequiresExpression =
349 OpeningBrace && OpeningBrace->is(TT: TT_RequiresExpressionLBrace);
350 const bool IsPrecededByCommentOrPPDirective =
351 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
352 FormatToken *IfLBrace = nullptr;
353 bool HasDoWhile = false;
354 bool HasLabel = false;
355 unsigned StatementCount = 0;
356 bool SwitchLabelEncountered = false;
357
358 do {
359 if (FormatTok->isAttribute()) {
360 nextToken();
361 if (FormatTok->is(Kind: tok::l_paren))
362 parseParens();
363 continue;
364 }
365 tok::TokenKind Kind = FormatTok->Tok.getKind();
366 if (FormatTok->is(TT: TT_MacroBlockBegin))
367 Kind = tok::l_brace;
368 else if (FormatTok->is(TT: TT_MacroBlockEnd))
369 Kind = tok::r_brace;
370
371 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
372 &HasLabel, &StatementCount] {
373 parseStructuralElement(OpeningBrace, IfKind, IfLeftBrace: &IfLBrace,
374 HasDoWhile: HasDoWhile ? nullptr : &HasDoWhile,
375 HasLabel: HasLabel ? nullptr : &HasLabel);
376 ++StatementCount;
377 assert(StatementCount > 0 && "StatementCount overflow!");
378 };
379
380 switch (Kind) {
381 case tok::comment:
382 nextToken();
383 addUnwrappedLine();
384 break;
385 case tok::l_brace:
386 if (InRequiresExpression) {
387 FormatTok->setFinalizedType(TT_CompoundRequirementLBrace);
388 } else if (FormatTok->Previous &&
389 FormatTok->Previous->ClosesRequiresClause) {
390 // We need the 'default' case here to correctly parse a function
391 // l_brace.
392 ParseDefault();
393 continue;
394 }
395 if (!InRequiresExpression && FormatTok->isNot(Kind: TT_MacroBlockBegin)) {
396 if (tryToParseBracedList())
397 continue;
398 FormatTok->setFinalizedType(TT_BlockLBrace);
399 }
400 parseBlock();
401 ++StatementCount;
402 assert(StatementCount > 0 && "StatementCount overflow!");
403 addUnwrappedLine();
404 break;
405 case tok::r_brace:
406 if (OpeningBrace) {
407 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
408 OpeningBrace->isNoneOf(Ks: TT_ControlStatementLBrace, Ks: TT_ElseLBrace)) {
409 return false;
410 }
411 if (FormatTok->isNot(Kind: tok::r_brace) || StatementCount != 1 || HasLabel ||
412 HasDoWhile || IsPrecededByCommentOrPPDirective ||
413 precededByCommentOrPPDirective()) {
414 return false;
415 }
416 const FormatToken *Next = Tokens->peekNextToken();
417 if (Next->is(Kind: tok::comment) && Next->NewlinesBefore == 0)
418 return false;
419 if (IfLeftBrace)
420 *IfLeftBrace = IfLBrace;
421 return true;
422 }
423 nextToken();
424 addUnwrappedLine();
425 break;
426 case tok::kw_default: {
427 unsigned StoredPosition = Tokens->getPosition();
428 auto *Next = Tokens->getNextNonComment();
429 FormatTok = Tokens->setPosition(StoredPosition);
430 if (Next->isNoneOf(Ks: tok::colon, Ks: tok::arrow)) {
431 // default not followed by `:` or `->` is not a case label; treat it
432 // like an identifier.
433 parseStructuralElement();
434 break;
435 }
436 // Else, if it is 'default:', fall through to the case handling.
437 [[fallthrough]];
438 }
439 case tok::kw_case:
440 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
441 (Style.isJavaScript() && Line->MustBeDeclaration)) {
442 // Proto: there are no switch/case statements
443 // Verilog: Case labels don't have this word. We handle case
444 // labels including default in TokenAnnotator.
445 // JavaScript: A 'case: string' style field declaration.
446 ParseDefault();
447 break;
448 }
449 if (!SwitchLabelEncountered &&
450 (Style.IndentCaseLabels ||
451 (OpeningBrace && OpeningBrace->is(TT: TT_SwitchExpressionLBrace)) ||
452 (Line->InPPDirective && Line->Level == 1))) {
453 ++Line->Level;
454 }
455 SwitchLabelEncountered = true;
456 parseStructuralElement();
457 break;
458 case tok::l_square:
459 if (Style.isCSharp()) {
460 nextToken();
461 parseCSharpAttribute();
462 break;
463 }
464 if (handleCppAttributes())
465 break;
466 [[fallthrough]];
467 default:
468 ParseDefault();
469 break;
470 }
471 } while (!eof());
472
473 return false;
474}
475
476void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
477 // We'll parse forward through the tokens until we hit
478 // a closing brace or eof - note that getNextToken() will
479 // parse macros, so this will magically work inside macro
480 // definitions, too.
481 unsigned StoredPosition = Tokens->getPosition();
482 FormatToken *Tok = FormatTok;
483 const FormatToken *PrevTok = Tok->Previous;
484 // Keep a stack of positions of lbrace tokens. We will
485 // update information about whether an lbrace starts a
486 // braced init list or a different block during the loop.
487 struct StackEntry {
488 FormatToken *Tok;
489 const FormatToken *PrevTok;
490 };
491 SmallVector<StackEntry, 8> LBraceStack;
492 assert(Tok->is(tok::l_brace));
493
494 do {
495 auto *NextTok = Tokens->getNextNonComment();
496
497 if (!Line->InMacroBody && !Style.isTableGen()) {
498 // Skip PPDirective lines (except macro definitions) and comments.
499 while (NextTok->is(Kind: tok::hash)) {
500 NextTok = Tokens->getNextToken();
501 if (NextTok->isOneOf(K1: tok::pp_not_keyword, K2: tok::pp_define))
502 break;
503 do {
504 NextTok = Tokens->getNextToken();
505 } while (!NextTok->HasUnescapedNewline && NextTok->isNot(Kind: tok::eof));
506
507 while (NextTok->is(Kind: tok::comment))
508 NextTok = Tokens->getNextToken();
509 }
510 }
511
512 switch (Tok->Tok.getKind()) {
513 case tok::l_brace:
514 if (Style.isJavaScript() && PrevTok) {
515 if (PrevTok->isOneOf(K1: tok::colon, K2: tok::less)) {
516 // A ':' indicates this code is in a type, or a braced list
517 // following a label in an object literal ({a: {b: 1}}).
518 // A '<' could be an object used in a comparison, but that is nonsense
519 // code (can never return true), so more likely it is a generic type
520 // argument (`X<{a: string; b: number}>`).
521 // The code below could be confused by semicolons between the
522 // individual members in a type member list, which would normally
523 // trigger BK_Block. In both cases, this must be parsed as an inline
524 // braced init.
525 Tok->setBlockKind(BK_BracedInit);
526 } else if (PrevTok->is(Kind: tok::r_paren)) {
527 // `) { }` can only occur in function or method declarations in JS.
528 Tok->setBlockKind(BK_Block);
529 }
530 } else {
531 Tok->setBlockKind(BK_Unknown);
532 }
533 LBraceStack.push_back(Elt: {.Tok: Tok, .PrevTok: PrevTok});
534 break;
535 case tok::r_brace:
536 if (LBraceStack.empty())
537 break;
538 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BBK: BK_Unknown)) {
539 bool ProbablyBracedList = false;
540 if (Style.Language == FormatStyle::LK_Proto) {
541 ProbablyBracedList = NextTok->isOneOf(K1: tok::comma, K2: tok::r_square);
542 } else if (LBrace->isNot(Kind: TT_EnumLBrace)) {
543 // Using OriginalColumn to distinguish between ObjC methods and
544 // binary operators is a bit hacky.
545 bool NextIsObjCMethod = NextTok->isOneOf(K1: tok::plus, K2: tok::minus) &&
546 NextTok->OriginalColumn == 0;
547
548 // Try to detect a braced list. Note that regardless how we mark inner
549 // braces here, we will overwrite the BlockKind later if we parse a
550 // braced list (where all blocks inside are by default braced lists),
551 // or when we explicitly detect blocks (for example while parsing
552 // lambdas).
553
554 // If we already marked the opening brace as braced list, the closing
555 // must also be part of it.
556 ProbablyBracedList = LBrace->is(TT: TT_BracedListLBrace);
557
558 ProbablyBracedList = ProbablyBracedList ||
559 (Style.isJavaScript() &&
560 NextTok->isOneOf(K1: Keywords.kw_of, K2: Keywords.kw_in,
561 Ks: Keywords.kw_as));
562 ProbablyBracedList =
563 ProbablyBracedList ||
564 (IsCpp && (PrevTok->Tok.isLiteral() ||
565 NextTok->isOneOf(K1: tok::l_paren, K2: tok::arrow)));
566
567 // If there is a comma, semicolon or right paren after the closing
568 // brace, we assume this is a braced initializer list.
569 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
570 // braced list in JS.
571 ProbablyBracedList =
572 ProbablyBracedList ||
573 NextTok->isOneOf(K1: tok::comma, K2: tok::period, Ks: tok::colon,
574 Ks: tok::r_paren, Ks: tok::r_square, Ks: tok::ellipsis);
575
576 // Distinguish between braced list in a constructor initializer list
577 // followed by constructor body, or just adjacent blocks.
578 ProbablyBracedList =
579 ProbablyBracedList ||
580 (NextTok->is(Kind: tok::l_brace) && LBraceStack.back().PrevTok &&
581 LBraceStack.back().PrevTok->isOneOf(K1: tok::identifier,
582 K2: tok::greater));
583
584 ProbablyBracedList =
585 ProbablyBracedList ||
586 (NextTok->is(Kind: tok::identifier) &&
587 PrevTok->isNoneOf(Ks: tok::semi, Ks: tok::r_brace, Ks: tok::l_brace));
588
589 ProbablyBracedList = ProbablyBracedList ||
590 (NextTok->is(Kind: tok::semi) &&
591 (!ExpectClassBody || LBraceStack.size() != 1));
592
593 ProbablyBracedList =
594 ProbablyBracedList ||
595 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
596
597 if (!Style.isCSharp() && NextTok->is(Kind: tok::l_square)) {
598 // We can have an array subscript after a braced init
599 // list, but C++11 attributes are expected after blocks.
600 NextTok = Tokens->getNextToken();
601 ProbablyBracedList = NextTok->isNot(Kind: tok::l_square);
602 }
603
604 // Cpp macro definition body that is a nonempty braced list or block:
605 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
606 !FormatTok->Previous && NextTok->is(Kind: tok::eof) &&
607 // A statement can end with only `;` (simple statement), a block
608 // closing brace (compound statement), or `:` (label statement).
609 // If PrevTok is a block opening brace, Tok ends an empty block.
610 PrevTok->isNoneOf(Ks: tok::semi, Ks: BK_Block, Ks: tok::colon)) {
611 ProbablyBracedList = true;
612 }
613 }
614 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block;
615 Tok->setBlockKind(BlockKind);
616 LBrace->setBlockKind(BlockKind);
617 }
618 LBraceStack.pop_back();
619 break;
620 case tok::identifier:
621 if (Tok->isNot(Kind: TT_StatementMacro))
622 break;
623 [[fallthrough]];
624 case tok::at:
625 case tok::semi:
626 case tok::kw_if:
627 case tok::kw_while:
628 case tok::kw_for:
629 case tok::kw_switch:
630 case tok::kw_try:
631 case tok::kw___try:
632 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BBK: BK_Unknown))
633 LBraceStack.back().Tok->setBlockKind(BK_Block);
634 break;
635 default:
636 break;
637 }
638
639 PrevTok = Tok;
640 Tok = NextTok;
641 } while (Tok->isNot(Kind: tok::eof) && !LBraceStack.empty());
642
643 // Assume other blocks for all unclosed opening braces.
644 for (const auto &Entry : LBraceStack)
645 if (Entry.Tok->is(BBK: BK_Unknown))
646 Entry.Tok->setBlockKind(BK_Block);
647
648 FormatTok = Tokens->setPosition(StoredPosition);
649}
650
651// Sets the token type of the directly previous right brace.
652void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
653 if (auto Prev = FormatTok->getPreviousNonComment();
654 Prev && Prev->is(Kind: tok::r_brace)) {
655 Prev->setFinalizedType(Type);
656 }
657}
658
659template <class T>
660static inline void hash_combine(std::size_t &seed, const T &v) {
661 std::hash<T> hasher;
662 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
663}
664
665size_t UnwrappedLineParser::computePPHash() const {
666 size_t h = 0;
667 for (const auto &i : PPStack) {
668 hash_combine(seed&: h, v: size_t(i.Kind));
669 hash_combine(seed&: h, v: i.Line);
670 }
671 return h;
672}
673
674// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
675// is not null, subtracts its length (plus the preceding space) when computing
676// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
677// running the token annotator on it so that we can restore them afterward.
678bool UnwrappedLineParser::mightFitOnOneLine(
679 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
680 const auto ColumnLimit = Style.ColumnLimit;
681 if (ColumnLimit == 0)
682 return true;
683
684 auto &Tokens = ParsedLine.Tokens;
685 assert(!Tokens.empty());
686
687 const auto *LastToken = Tokens.back().Tok;
688 assert(LastToken);
689
690 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
691
692 int Index = 0;
693 for (const auto &Token : Tokens) {
694 assert(Token.Tok);
695 auto &SavedToken = SavedTokens[Index++];
696 SavedToken.Tok = new FormatToken;
697 SavedToken.Tok->copyFrom(Tok: *Token.Tok);
698 SavedToken.Children = std::move(Token.Children);
699 }
700
701 AnnotatedLine Line(ParsedLine);
702 assert(Line.Last == LastToken);
703
704 TokenAnnotator Annotator(Style, Keywords);
705 Annotator.annotate(Line);
706 Annotator.calculateFormattingInformation(Line);
707
708 auto Length = LastToken->TotalLength;
709 if (OpeningBrace) {
710 assert(OpeningBrace != Tokens.front().Tok);
711 if (auto Prev = OpeningBrace->Previous;
712 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
713 Length -= ColumnLimit;
714 }
715 Length -= OpeningBrace->TokenText.size() + 1;
716 }
717
718 if (const auto *FirstToken = Line.First; FirstToken->is(Kind: tok::r_brace)) {
719 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
720 Length -= FirstToken->TokenText.size() + 1;
721 }
722
723 Index = 0;
724 for (auto &Token : Tokens) {
725 const auto &SavedToken = SavedTokens[Index++];
726 Token.Tok->copyFrom(Tok: *SavedToken.Tok);
727 Token.Children = std::move(SavedToken.Children);
728 delete SavedToken.Tok;
729 }
730
731 // If these change PPLevel needs to be used for get correct indentation.
732 assert(!Line.InMacroBody);
733 assert(!Line.InPPDirective);
734 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
735}
736
737FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
738 unsigned AddLevels, bool MunchSemi,
739 bool KeepBraces,
740 IfStmtKind *IfKind,
741 bool UnindentWhitesmithsBraces) {
742 auto HandleVerilogBlockLabel = [this]() {
743 // ":" name
744 if (Style.isVerilog() && FormatTok->is(Kind: tok::colon)) {
745 nextToken();
746 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
747 nextToken();
748 }
749 };
750
751 // Whether this is a Verilog-specific block that has a special header like a
752 // module.
753 const bool VerilogHierarchy =
754 Style.isVerilog() && Keywords.isVerilogHierarchy(Tok: *FormatTok);
755 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
756 (Style.isVerilog() &&
757 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
758 "'{' or macro block token expected");
759 FormatToken *Tok = FormatTok;
760 const bool FollowedByComment = Tokens->peekNextToken()->is(Kind: tok::comment);
761 auto Index = CurrentLines->size();
762 const bool MacroBlock = FormatTok->is(TT: TT_MacroBlockBegin);
763 FormatTok->setBlockKind(BK_Block);
764
765 const bool IsWhitesmiths =
766 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
767
768 // For Whitesmiths mode, jump to the next level prior to skipping over the
769 // braces.
770 if (!VerilogHierarchy && AddLevels > 0 && IsWhitesmiths)
771 ++Line->Level;
772
773 size_t PPStartHash = computePPHash();
774
775 const unsigned InitialLevel = Line->Level;
776 if (VerilogHierarchy) {
777 AddLevels += parseVerilogHierarchyHeader();
778 } else {
779 nextToken(/*LevelDifference=*/AddLevels);
780 HandleVerilogBlockLabel();
781 }
782
783 // Bail out if there are too many levels. Otherwise, the stack might overflow.
784 if (Line->Level > 300)
785 return nullptr;
786
787 if (MacroBlock && FormatTok->is(Kind: tok::l_paren))
788 parseParens();
789
790 size_t NbPreprocessorDirectives =
791 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
792 addUnwrappedLine();
793 size_t OpeningLineIndex =
794 CurrentLines->empty()
795 ? (UnwrappedLine::kInvalidIndex)
796 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
797
798 // Whitesmiths is weird here. The brace needs to be indented for the namespace
799 // block, but the block itself may not be indented depending on the style
800 // settings. This allows the format to back up one level in those cases.
801 if (UnindentWhitesmithsBraces)
802 --Line->Level;
803
804 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
805 MustBeDeclaration);
806
807 // Whitesmiths logic has already added a level by this point, so avoid
808 // adding it twice.
809 if (AddLevels > 0u)
810 Line->Level += AddLevels - (IsWhitesmiths ? 1 : 0);
811
812 FormatToken *IfLBrace = nullptr;
813 const bool SimpleBlock = parseLevel(OpeningBrace: Tok, IfKind, IfLeftBrace: &IfLBrace);
814
815 if (eof())
816 return IfLBrace;
817
818 if (MacroBlock ? FormatTok->isNot(Kind: TT_MacroBlockEnd)
819 : FormatTok->isNot(Kind: tok::r_brace)) {
820 Line->Level = InitialLevel;
821 FormatTok->setBlockKind(BK_Block);
822 return IfLBrace;
823 }
824
825 if (FormatTok->is(Kind: tok::r_brace)) {
826 FormatTok->setBlockKind(BK_Block);
827 if (Tok->is(TT: TT_NamespaceLBrace))
828 FormatTok->setFinalizedType(TT_NamespaceRBrace);
829 }
830
831 const bool IsFunctionRBrace =
832 FormatTok->is(Kind: tok::r_brace) && Tok->is(TT: TT_FunctionLBrace);
833
834 auto RemoveBraces = [=]() mutable {
835 if (!SimpleBlock)
836 return false;
837 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
838 assert(FormatTok->is(tok::r_brace));
839 const bool WrappedOpeningBrace = !Tok->Previous;
840 if (WrappedOpeningBrace && FollowedByComment)
841 return false;
842 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
843 if (KeepBraces && !HasRequiredIfBraces)
844 return false;
845 if (Tok->isNot(Kind: TT_ElseLBrace) || !HasRequiredIfBraces) {
846 const FormatToken *Previous = Tokens->getPreviousToken();
847 assert(Previous);
848 if (Previous->is(Kind: tok::r_brace) && !Previous->Optional)
849 return false;
850 }
851 assert(!CurrentLines->empty());
852 auto &LastLine = CurrentLines->back();
853 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(ParsedLine&: LastLine))
854 return false;
855 if (Tok->is(TT: TT_ElseLBrace))
856 return true;
857 if (WrappedOpeningBrace) {
858 assert(Index > 0);
859 --Index; // The line above the wrapped l_brace.
860 Tok = nullptr;
861 }
862 return mightFitOnOneLine(ParsedLine&: (*CurrentLines)[Index], OpeningBrace: Tok);
863 };
864 if (RemoveBraces()) {
865 Tok->MatchingParen = FormatTok;
866 FormatTok->MatchingParen = Tok;
867 }
868
869 size_t PPEndHash = computePPHash();
870
871 // Munch the closing brace.
872 nextToken(/*LevelDifference=*/-AddLevels);
873
874 // When this is a function block and there is an unnecessary semicolon
875 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
876 // it later).
877 if (Style.RemoveSemicolon && IsFunctionRBrace) {
878 while (FormatTok->is(Kind: tok::semi)) {
879 FormatTok->Optional = true;
880 nextToken();
881 }
882 }
883
884 HandleVerilogBlockLabel();
885
886 if (MacroBlock && FormatTok->is(Kind: tok::l_paren))
887 parseParens();
888
889 Line->Level = InitialLevel;
890
891 if (FormatTok->is(Kind: tok::kw_noexcept)) {
892 // A noexcept in a requires expression.
893 nextToken();
894 }
895
896 if (FormatTok->is(Kind: tok::arrow)) {
897 // Following the } or noexcept we can find a trailing return type arrow
898 // as part of an implicit conversion constraint.
899 nextToken();
900 parseStructuralElement();
901 }
902
903 if (MunchSemi && FormatTok->is(Kind: tok::semi))
904 nextToken();
905
906 if (PPStartHash == PPEndHash) {
907 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
908 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
909 // Update the opening line to add the forward reference as well
910 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
911 CurrentLines->size() - 1;
912 }
913 }
914
915 return IfLBrace;
916}
917
918static bool isGoogScope(const UnwrappedLine &Line) {
919 // FIXME: Closure-library specific stuff should not be hard-coded but be
920 // configurable.
921 if (Line.Tokens.size() < 4)
922 return false;
923 auto I = Line.Tokens.begin();
924 if (I->Tok->TokenText != "goog")
925 return false;
926 ++I;
927 if (I->Tok->isNot(Kind: tok::period))
928 return false;
929 ++I;
930 if (I->Tok->TokenText != "scope")
931 return false;
932 ++I;
933 return I->Tok->is(Kind: tok::l_paren);
934}
935
936static bool isIIFE(const UnwrappedLine &Line,
937 const AdditionalKeywords &Keywords) {
938 // Look for the start of an immediately invoked anonymous function.
939 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
940 // This is commonly done in JavaScript to create a new, anonymous scope.
941 // Example: (function() { ... })()
942 if (Line.Tokens.size() < 3)
943 return false;
944 auto I = Line.Tokens.begin();
945 if (I->Tok->isNot(Kind: tok::l_paren))
946 return false;
947 ++I;
948 if (I->Tok->isNot(Kind: Keywords.kw_function))
949 return false;
950 ++I;
951 return I->Tok->is(Kind: tok::l_paren);
952}
953
954static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
955 const FormatToken &InitialToken,
956 bool IsEmptyBlock,
957 bool IsJavaRecord = false) {
958 if (IsJavaRecord)
959 return Style.BraceWrapping.AfterClass;
960
961 tok::TokenKind Kind = InitialToken.Tok.getKind();
962 if (InitialToken.is(TT: TT_NamespaceMacro))
963 Kind = tok::kw_namespace;
964
965 const bool WrapRecordAllowed =
966 !IsEmptyBlock ||
967 Style.AllowShortRecordOnASingleLine < FormatStyle::SRS_Empty ||
968 Style.BraceWrapping.SplitEmptyRecord;
969
970 switch (Kind) {
971 case tok::kw_namespace:
972 return Style.BraceWrapping.AfterNamespace;
973 case tok::kw_class:
974 return Style.BraceWrapping.AfterClass && WrapRecordAllowed;
975 case tok::kw_union:
976 return Style.BraceWrapping.AfterUnion && WrapRecordAllowed;
977 case tok::kw_struct:
978 return Style.BraceWrapping.AfterStruct && WrapRecordAllowed;
979 case tok::kw_enum:
980 return Style.BraceWrapping.AfterEnum;
981 default:
982 return false;
983 }
984}
985
986void UnwrappedLineParser::parseChildBlock() {
987 assert(FormatTok->is(tok::l_brace));
988 FormatTok->setBlockKind(BK_Block);
989 const FormatToken *OpeningBrace = FormatTok;
990 nextToken();
991 {
992 bool SkipIndent = (Style.isJavaScript() &&
993 (isGoogScope(Line: *Line) || isIIFE(Line: *Line, Keywords)));
994 ScopedLineState LineState(*this);
995 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
996 /*MustBeDeclaration=*/false);
997 Line->Level += SkipIndent ? 0 : 1;
998 parseLevel(OpeningBrace);
999 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
1000 Line->Level -= SkipIndent ? 0 : 1;
1001 }
1002 nextToken();
1003}
1004
1005void UnwrappedLineParser::parsePPDirective() {
1006 assert(FormatTok->is(tok::hash) && "'#' expected");
1007 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
1008
1009 nextToken();
1010
1011 if (!FormatTok->Tok.getIdentifierInfo()) {
1012 parsePPUnknown();
1013 return;
1014 }
1015
1016 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1017 case tok::pp_define:
1018 parsePPDefine();
1019 return;
1020 case tok::pp_if:
1021 parsePPIf(/*IfDef=*/false);
1022 break;
1023 case tok::pp_ifdef:
1024 case tok::pp_ifndef:
1025 parsePPIf(/*IfDef=*/true);
1026 break;
1027 case tok::pp_else:
1028 case tok::pp_elifdef:
1029 case tok::pp_elifndef:
1030 case tok::pp_elif:
1031 parsePPElse();
1032 break;
1033 case tok::pp_endif:
1034 parsePPEndIf();
1035 break;
1036 case tok::pp_pragma:
1037 parsePPPragma();
1038 break;
1039 case tok::pp_error:
1040 case tok::pp_warning:
1041 nextToken();
1042 if (!eof() && Style.isCpp())
1043 FormatTok->setFinalizedType(TT_AfterPPDirective);
1044 [[fallthrough]];
1045 default:
1046 parsePPUnknown();
1047 break;
1048 }
1049}
1050
1051void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1052 size_t Line = CurrentLines->size();
1053 if (CurrentLines == &PreprocessorDirectives)
1054 Line += Lines.size();
1055
1056 if (Unreachable ||
1057 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1058 PPStack.push_back(Elt: {PP_Unreachable, Line});
1059 } else {
1060 PPStack.push_back(Elt: {PP_Conditional, Line});
1061 }
1062}
1063
1064void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1065 ++PPBranchLevel;
1066 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1067 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1068 PPLevelBranchIndex.push_back(Elt: 0);
1069 PPLevelBranchCount.push_back(Elt: 0);
1070 }
1071 PPChainBranchIndex.push(x: Unreachable ? -1 : 0);
1072 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1073 conditionalCompilationCondition(Unreachable: Unreachable || Skip);
1074}
1075
1076void UnwrappedLineParser::conditionalCompilationAlternative() {
1077 if (!PPStack.empty())
1078 PPStack.pop_back();
1079 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1080 if (!PPChainBranchIndex.empty())
1081 ++PPChainBranchIndex.top();
1082 conditionalCompilationCondition(
1083 Unreachable: PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1084 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1085}
1086
1087void UnwrappedLineParser::conditionalCompilationEnd() {
1088 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1089 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1090 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1091 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1092 }
1093 // Guard against #endif's without #if.
1094 if (PPBranchLevel > -1)
1095 --PPBranchLevel;
1096 if (!PPChainBranchIndex.empty())
1097 PPChainBranchIndex.pop();
1098 if (!PPStack.empty())
1099 PPStack.pop_back();
1100}
1101
1102void UnwrappedLineParser::parsePPIf(bool IfDef) {
1103 bool IfNDef = FormatTok->is(Kind: tok::pp_ifndef);
1104 nextToken();
1105 bool Unreachable = false;
1106 if (!IfDef && (FormatTok->is(Kind: tok::kw_false) || FormatTok->TokenText == "0"))
1107 Unreachable = true;
1108 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1109 Unreachable = true;
1110 conditionalCompilationStart(Unreachable);
1111 FormatToken *IfCondition = FormatTok;
1112 // If there's a #ifndef on the first line, and the only lines before it are
1113 // comments, it could be an include guard.
1114 bool MaybeIncludeGuard = IfNDef;
1115 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1116 for (auto &Line : Lines) {
1117 if (Line.Tokens.front().Tok->isNot(Kind: tok::comment)) {
1118 MaybeIncludeGuard = false;
1119 IncludeGuard = IG_Rejected;
1120 break;
1121 }
1122 }
1123 }
1124 --PPBranchLevel;
1125 parsePPUnknown();
1126 ++PPBranchLevel;
1127 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1128 IncludeGuard = IG_IfNdefed;
1129 IncludeGuardToken = IfCondition;
1130 }
1131}
1132
1133void UnwrappedLineParser::parsePPElse() {
1134 // If a potential include guard has an #else, it's not an include guard.
1135 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1136 IncludeGuard = IG_Rejected;
1137 // Don't crash when there is an #else without an #if.
1138 assert(PPBranchLevel >= -1);
1139 if (PPBranchLevel == -1)
1140 conditionalCompilationStart(/*Unreachable=*/true);
1141 conditionalCompilationAlternative();
1142 --PPBranchLevel;
1143 parsePPUnknown();
1144 ++PPBranchLevel;
1145}
1146
1147void UnwrappedLineParser::parsePPEndIf() {
1148 conditionalCompilationEnd();
1149 parsePPUnknown();
1150}
1151
1152void UnwrappedLineParser::parsePPDefine() {
1153 nextToken();
1154
1155 if (!FormatTok->Tok.getIdentifierInfo()) {
1156 IncludeGuard = IG_Rejected;
1157 IncludeGuardToken = nullptr;
1158 parsePPUnknown();
1159 return;
1160 }
1161
1162 bool MaybeIncludeGuard = false;
1163 if (IncludeGuard == IG_IfNdefed &&
1164 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1165 IncludeGuard = IG_Defined;
1166 IncludeGuardToken = nullptr;
1167 for (auto &Line : Lines) {
1168 if (Line.Tokens.front().Tok->isNoneOf(Ks: tok::comment, Ks: tok::hash)) {
1169 IncludeGuard = IG_Rejected;
1170 break;
1171 }
1172 }
1173 MaybeIncludeGuard = IncludeGuard == IG_Defined;
1174 }
1175
1176 // In the context of a define, even keywords should be treated as normal
1177 // identifiers. Setting the kind to identifier is not enough, because we need
1178 // to treat additional keywords like __except as well, which are already
1179 // identifiers. Setting the identifier info to null interferes with include
1180 // guard processing above, and changes preprocessing nesting.
1181 FormatTok->Tok.setKind(tok::identifier);
1182 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1183 nextToken();
1184
1185 // IncludeGuard can't have a non-empty macro definition.
1186 if (MaybeIncludeGuard && !eof())
1187 IncludeGuard = IG_Rejected;
1188
1189 if (FormatTok->is(Kind: tok::l_paren) && !FormatTok->hasWhitespaceBefore())
1190 parseParens();
1191 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1192 Line->Level += PPBranchLevel + 1;
1193 addUnwrappedLine();
1194 ++Line->Level;
1195
1196 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1197 assert((int)Line->PPLevel >= 0);
1198
1199 if (eof())
1200 return;
1201
1202 Line->InMacroBody = true;
1203
1204 if (!Style.SkipMacroDefinitionBody) {
1205 // Errors during a preprocessor directive can only affect the layout of the
1206 // preprocessor directive, and thus we ignore them. An alternative approach
1207 // would be to use the same approach we use on the file level (no
1208 // re-indentation if there was a structural error) within the macro
1209 // definition.
1210 parseFile();
1211 return;
1212 }
1213
1214 for (auto *Comment : CommentsBeforeNextToken)
1215 Comment->Finalized = true;
1216
1217 do {
1218 FormatTok->Finalized = true;
1219 FormatTok = Tokens->getNextToken();
1220 } while (!eof());
1221
1222 addUnwrappedLine();
1223}
1224
1225void UnwrappedLineParser::parsePPPragma() {
1226 Line->InPragmaDirective = true;
1227 parsePPUnknown();
1228}
1229
1230void UnwrappedLineParser::parsePPUnknown() {
1231 while (!eof())
1232 nextToken();
1233 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1234 Line->Level += PPBranchLevel + 1;
1235 addUnwrappedLine();
1236}
1237
1238// Here we exclude certain tokens that are not usually the first token in an
1239// unwrapped line. This is used in attempt to distinguish macro calls without
1240// trailing semicolons from other constructs split to several lines.
1241static bool tokenCanStartNewLine(const FormatToken &Tok) {
1242 // Semicolon can be a null-statement, l_square can be a start of a macro or
1243 // a C++11 attribute, but this doesn't seem to be common.
1244 return Tok.isNoneOf(Ks: tok::semi, Ks: tok::l_brace,
1245 // Tokens that can only be used as binary operators and a
1246 // part of overloaded operator names.
1247 Ks: tok::period, Ks: tok::periodstar, Ks: tok::arrow, Ks: tok::arrowstar,
1248 Ks: tok::less, Ks: tok::greater, Ks: tok::slash, Ks: tok::percent,
1249 Ks: tok::lessless, Ks: tok::greatergreater, Ks: tok::equal,
1250 Ks: tok::plusequal, Ks: tok::minusequal, Ks: tok::starequal,
1251 Ks: tok::slashequal, Ks: tok::percentequal, Ks: tok::ampequal,
1252 Ks: tok::pipeequal, Ks: tok::caretequal, Ks: tok::greatergreaterequal,
1253 Ks: tok::lesslessequal,
1254 // Colon is used in labels, base class lists, initializer
1255 // lists, range-based for loops, ternary operator, but
1256 // should never be the first token in an unwrapped line.
1257 Ks: tok::colon,
1258 // 'noexcept' is a trailing annotation.
1259 Ks: tok::kw_noexcept);
1260}
1261
1262static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1263 const FormatToken *FormatTok) {
1264 // FIXME: This returns true for C/C++ keywords like 'struct'.
1265 return FormatTok->is(Kind: tok::identifier) &&
1266 (!FormatTok->Tok.getIdentifierInfo() ||
1267 FormatTok->isNoneOf(
1268 Ks: Keywords.kw_in, Ks: Keywords.kw_of, Ks: Keywords.kw_as, Ks: Keywords.kw_async,
1269 Ks: Keywords.kw_await, Ks: Keywords.kw_yield, Ks: Keywords.kw_finally,
1270 Ks: Keywords.kw_function, Ks: Keywords.kw_import, Ks: Keywords.kw_is,
1271 Ks: Keywords.kw_let, Ks: Keywords.kw_var, Ks: tok::kw_const,
1272 Ks: Keywords.kw_abstract, Ks: Keywords.kw_extends, Ks: Keywords.kw_implements,
1273 Ks: Keywords.kw_instanceof, Ks: Keywords.kw_interface,
1274 Ks: Keywords.kw_override, Ks: Keywords.kw_throws, Ks: Keywords.kw_from));
1275}
1276
1277static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1278 const FormatToken *FormatTok) {
1279 return FormatTok->Tok.isLiteral() ||
1280 FormatTok->isOneOf(K1: tok::kw_true, K2: tok::kw_false) ||
1281 mustBeJSIdent(Keywords, FormatTok);
1282}
1283
1284// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1285// when encountered after a value (see mustBeJSIdentOrValue).
1286static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1287 const FormatToken *FormatTok) {
1288 return FormatTok->isOneOf(
1289 K1: tok::kw_return, K2: Keywords.kw_yield,
1290 // conditionals
1291 Ks: tok::kw_if, Ks: tok::kw_else,
1292 // loops
1293 Ks: tok::kw_for, Ks: tok::kw_while, Ks: tok::kw_do, Ks: tok::kw_continue, Ks: tok::kw_break,
1294 // switch/case
1295 Ks: tok::kw_switch, Ks: tok::kw_case,
1296 // exceptions
1297 Ks: tok::kw_throw, Ks: tok::kw_try, Ks: tok::kw_catch, Ks: Keywords.kw_finally,
1298 // declaration
1299 Ks: tok::kw_const, Ks: tok::kw_class, Ks: Keywords.kw_var, Ks: Keywords.kw_let,
1300 Ks: Keywords.kw_async, Ks: Keywords.kw_function,
1301 // import/export
1302 Ks: Keywords.kw_import, Ks: tok::kw_export);
1303}
1304
1305// Checks whether a token is a type in K&R C (aka C78).
1306static bool isC78Type(const FormatToken &Tok) {
1307 return Tok.isOneOf(K1: tok::kw_char, K2: tok::kw_short, Ks: tok::kw_int, Ks: tok::kw_long,
1308 Ks: tok::kw_unsigned, Ks: tok::kw_float, Ks: tok::kw_double,
1309 Ks: tok::identifier);
1310}
1311
1312// This function checks whether a token starts the first parameter declaration
1313// in a K&R C (aka C78) function definition, e.g.:
1314// int f(a, b)
1315// short a, b;
1316// {
1317// return a + b;
1318// }
1319static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1320 const FormatToken *FuncName) {
1321 assert(Tok);
1322 assert(Next);
1323 assert(FuncName);
1324
1325 if (FuncName->isNot(Kind: tok::identifier))
1326 return false;
1327
1328 const FormatToken *Prev = FuncName->Previous;
1329 if (!Prev || (Prev->isNot(Kind: tok::star) && !isC78Type(Tok: *Prev)))
1330 return false;
1331
1332 if (!isC78Type(Tok: *Tok) &&
1333 Tok->isNoneOf(Ks: tok::kw_register, Ks: tok::kw_struct, Ks: tok::kw_union)) {
1334 return false;
1335 }
1336
1337 if (Next->isNot(Kind: tok::star) && !Next->Tok.getIdentifierInfo())
1338 return false;
1339
1340 Tok = Tok->Previous;
1341 if (!Tok || Tok->isNot(Kind: tok::r_paren))
1342 return false;
1343
1344 Tok = Tok->Previous;
1345 if (!Tok || Tok->isNot(Kind: tok::identifier))
1346 return false;
1347
1348 return Tok->Previous && Tok->Previous->isOneOf(K1: tok::l_paren, K2: tok::comma);
1349}
1350
1351bool UnwrappedLineParser::parseModuleImport() {
1352 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1353
1354 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1355 !Token->Tok.getIdentifierInfo() &&
1356 Token->isNoneOf(Ks: tok::colon, Ks: tok::less, Ks: tok::string_literal)) {
1357 return false;
1358 }
1359
1360 nextToken();
1361 while (!eof()) {
1362 if (FormatTok->is(Kind: tok::colon)) {
1363 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1364 }
1365 // Handle import <foo/bar.h> as we would an include statement.
1366 else if (FormatTok->is(Kind: tok::less)) {
1367 nextToken();
1368 while (FormatTok->isNoneOf(Ks: tok::semi, Ks: tok::greater) && !eof()) {
1369 // Mark tokens up to the trailing line comments as implicit string
1370 // literals.
1371 if (FormatTok->isNot(Kind: tok::comment) &&
1372 !FormatTok->TokenText.starts_with(Prefix: "//")) {
1373 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1374 }
1375 nextToken();
1376 }
1377 }
1378 if (FormatTok->is(Kind: tok::semi)) {
1379 nextToken();
1380 break;
1381 }
1382 nextToken();
1383 }
1384
1385 addUnwrappedLine();
1386 return true;
1387}
1388
1389// readTokenWithJavaScriptASI reads the next token and terminates the current
1390// line if JavaScript Automatic Semicolon Insertion must
1391// happen between the current token and the next token.
1392//
1393// This method is conservative - it cannot cover all edge cases of JavaScript,
1394// but only aims to correctly handle certain well known cases. It *must not*
1395// return true in speculative cases.
1396void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1397 FormatToken *Previous = FormatTok;
1398 readToken();
1399 FormatToken *Next = FormatTok;
1400
1401 bool IsOnSameLine =
1402 CommentsBeforeNextToken.empty()
1403 ? Next->NewlinesBefore == 0
1404 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1405 if (IsOnSameLine)
1406 return;
1407
1408 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, FormatTok: Previous);
1409 bool PreviousStartsTemplateExpr =
1410 Previous->is(TT: TT_TemplateString) && Previous->TokenText.ends_with(Suffix: "${");
1411 if (PreviousMustBeValue || Previous->is(Kind: tok::r_paren)) {
1412 // If the line contains an '@' sign, the previous token might be an
1413 // annotation, which can precede another identifier/value.
1414 bool HasAt = llvm::any_of(Range&: Line->Tokens, P: [](UnwrappedLineNode &LineNode) {
1415 return LineNode.Tok->is(Kind: tok::at);
1416 });
1417 if (HasAt)
1418 return;
1419 }
1420 if (Next->is(Kind: tok::exclaim) && PreviousMustBeValue)
1421 return addUnwrappedLine();
1422 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, FormatTok: Next);
1423 bool NextEndsTemplateExpr =
1424 Next->is(TT: TT_TemplateString) && Next->TokenText.starts_with(Prefix: "}");
1425 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1426 (PreviousMustBeValue ||
1427 Previous->isOneOf(K1: tok::r_square, K2: tok::r_paren, Ks: tok::plusplus,
1428 Ks: tok::minusminus))) {
1429 return addUnwrappedLine();
1430 }
1431 if ((PreviousMustBeValue || Previous->is(Kind: tok::r_paren)) &&
1432 isJSDeclOrStmt(Keywords, FormatTok: Next)) {
1433 return addUnwrappedLine();
1434 }
1435}
1436
1437void UnwrappedLineParser::parseStructuralElement(
1438 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1439 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1440 if (Style.isTableGen() && FormatTok->is(Kind: tok::pp_include)) {
1441 nextToken();
1442 if (FormatTok->is(Kind: tok::string_literal))
1443 nextToken();
1444 addUnwrappedLine();
1445 return;
1446 }
1447
1448 if (IsCpp) {
1449 while (FormatTok->is(Kind: tok::l_square) && handleCppAttributes()) {
1450 }
1451 } else if (Style.isVerilog()) {
1452 if (Keywords.isVerilogStructuredProcedure(Tok: *FormatTok)) {
1453 parseForOrWhileLoop(/*HasParens=*/false);
1454 return;
1455 }
1456 if (FormatTok->isOneOf(K1: Keywords.kw_foreach, K2: Keywords.kw_repeat)) {
1457 parseForOrWhileLoop();
1458 return;
1459 }
1460 if (FormatTok->isOneOf(K1: tok::kw_restrict, K2: Keywords.kw_assert,
1461 Ks: Keywords.kw_assume, Ks: Keywords.kw_cover)) {
1462 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1463 return;
1464 }
1465
1466 // Skip things that can exist before keywords like 'if' and 'case'.
1467 while (true) {
1468 if (FormatTok->isOneOf(K1: Keywords.kw_priority, K2: Keywords.kw_unique,
1469 Ks: Keywords.kw_unique0)) {
1470 nextToken();
1471 } else if (FormatTok->is(Kind: tok::l_paren) &&
1472 Tokens->peekNextToken()->is(Kind: tok::star)) {
1473 parseParens();
1474 } else {
1475 break;
1476 }
1477 }
1478 }
1479
1480 // Tokens that only make sense at the beginning of a line.
1481 if (FormatTok->isAccessSpecifierKeyword()) {
1482 if (Style.isJava() || Style.isJavaScript() || Style.isCSharp())
1483 nextToken();
1484 else
1485 parseAccessSpecifier();
1486 return;
1487 }
1488 switch (FormatTok->Tok.getKind()) {
1489 case tok::kw_asm:
1490 nextToken();
1491 if (FormatTok->is(Kind: tok::l_brace)) {
1492 FormatTok->setFinalizedType(TT_InlineASMBrace);
1493 nextToken();
1494 while (FormatTok && !eof()) {
1495 if (FormatTok->is(Kind: tok::r_brace)) {
1496 FormatTok->setFinalizedType(TT_InlineASMBrace);
1497 nextToken();
1498 addUnwrappedLine();
1499 break;
1500 }
1501 FormatTok->Finalized = true;
1502 nextToken();
1503 }
1504 }
1505 break;
1506 case tok::kw_namespace:
1507 parseNamespace();
1508 return;
1509 case tok::kw_if: {
1510 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1511 // field/method declaration.
1512 break;
1513 }
1514 FormatToken *Tok = parseIfThenElse(IfKind);
1515 if (IfLeftBrace)
1516 *IfLeftBrace = Tok;
1517 return;
1518 }
1519 case tok::kw_for:
1520 case tok::kw_while:
1521 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1522 // field/method declaration.
1523 break;
1524 }
1525 parseForOrWhileLoop();
1526 return;
1527 case tok::kw_do:
1528 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1529 // field/method declaration.
1530 break;
1531 }
1532 parseDoWhile();
1533 if (HasDoWhile)
1534 *HasDoWhile = true;
1535 return;
1536 case tok::kw_switch:
1537 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1538 // 'switch: string' field declaration.
1539 break;
1540 }
1541 parseSwitch(/*IsExpr=*/false);
1542 return;
1543 case tok::kw_default: {
1544 // In Verilog default along with other labels are handled in the next loop.
1545 if (Style.isVerilog())
1546 break;
1547 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1548 // 'default: string' field declaration.
1549 break;
1550 }
1551 auto *Default = FormatTok;
1552 nextToken();
1553 if (FormatTok->is(Kind: tok::colon)) {
1554 FormatTok->setFinalizedType(TT_CaseLabelColon);
1555 parseLabel();
1556 return;
1557 }
1558 if (FormatTok->is(Kind: tok::arrow)) {
1559 FormatTok->setFinalizedType(TT_CaseLabelArrow);
1560 Default->setFinalizedType(TT_SwitchExpressionLabel);
1561 parseLabel();
1562 return;
1563 }
1564 // e.g. "default void f() {}" in a Java interface.
1565 break;
1566 }
1567 case tok::kw_case:
1568 // Proto: there are no switch/case statements.
1569 if (Style.Language == FormatStyle::LK_Proto) {
1570 nextToken();
1571 return;
1572 }
1573 if (Style.isVerilog()) {
1574 parseBlock();
1575 addUnwrappedLine();
1576 return;
1577 }
1578 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1579 // 'case: string' field declaration.
1580 nextToken();
1581 break;
1582 }
1583 parseCaseLabel();
1584 return;
1585 case tok::kw_goto:
1586 nextToken();
1587 if (FormatTok->is(Kind: tok::kw_case))
1588 nextToken();
1589 break;
1590 case tok::kw_try:
1591 case tok::kw___try:
1592 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1593 // field/method declaration.
1594 break;
1595 }
1596 parseTryCatch();
1597 return;
1598 case tok::kw_extern:
1599 if (Style.isVerilog()) {
1600 // In Verilog an extern module declaration looks like a start of module.
1601 // But there is no body and endmodule. So we handle it separately.
1602 parseVerilogExtern();
1603 return;
1604 }
1605 nextToken();
1606 if (FormatTok->is(Kind: tok::string_literal)) {
1607 nextToken();
1608 if (FormatTok->is(Kind: tok::l_brace)) {
1609 if (Style.BraceWrapping.AfterExternBlock)
1610 addUnwrappedLine();
1611 // Either we indent or for backwards compatibility we follow the
1612 // AfterExternBlock style.
1613 unsigned AddLevels =
1614 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1615 (Style.BraceWrapping.AfterExternBlock &&
1616 Style.IndentExternBlock ==
1617 FormatStyle::IEBS_AfterExternBlock)
1618 ? 1u
1619 : 0u;
1620 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1621 addUnwrappedLine();
1622 return;
1623 }
1624 }
1625 break;
1626 case tok::kw_export:
1627 if (Style.isJavaScript()) {
1628 parseJavaScriptEs6ImportExport();
1629 return;
1630 }
1631 if (Style.isVerilog()) {
1632 parseVerilogExtern();
1633 return;
1634 }
1635 if (IsCpp) {
1636 nextToken();
1637 if (FormatTok->is(Kind: tok::kw_namespace)) {
1638 parseNamespace();
1639 return;
1640 }
1641 if (FormatTok->is(Kind: tok::l_brace)) {
1642 parseCppExportBlock();
1643 return;
1644 }
1645 if (FormatTok->is(II: Keywords.kw_import) && parseModuleImport())
1646 return;
1647 }
1648 break;
1649 case tok::kw_inline:
1650 nextToken();
1651 if (FormatTok->is(Kind: tok::kw_namespace)) {
1652 parseNamespace();
1653 return;
1654 }
1655 break;
1656 case tok::identifier:
1657 if (FormatTok->is(TT: TT_ForEachMacro)) {
1658 parseForOrWhileLoop();
1659 return;
1660 }
1661 if (FormatTok->is(TT: TT_MacroBlockBegin)) {
1662 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1663 /*MunchSemi=*/false);
1664 return;
1665 }
1666 if (FormatTok->is(II: Keywords.kw_import)) {
1667 if (Style.isJavaScript()) {
1668 parseJavaScriptEs6ImportExport();
1669 return;
1670 }
1671 if (Style.Language == FormatStyle::LK_Proto) {
1672 nextToken();
1673 if (FormatTok->is(Kind: tok::kw_public))
1674 nextToken();
1675 if (FormatTok->isNot(Kind: tok::string_literal))
1676 return;
1677 nextToken();
1678 if (FormatTok->is(Kind: tok::semi))
1679 nextToken();
1680 addUnwrappedLine();
1681 return;
1682 }
1683 if (Style.isVerilog()) {
1684 parseVerilogExtern();
1685 return;
1686 }
1687 if (IsCpp && parseModuleImport())
1688 return;
1689 }
1690 if (IsCpp && FormatTok->isOneOf(K1: Keywords.kw_signals, K2: Keywords.kw_qsignals,
1691 Ks: Keywords.kw_slots, Ks: Keywords.kw_qslots)) {
1692 nextToken();
1693 if (FormatTok->is(Kind: tok::colon)) {
1694 nextToken();
1695 addUnwrappedLine();
1696 return;
1697 }
1698 }
1699 if (IsCpp && FormatTok->is(TT: TT_StatementMacro)) {
1700 parseStatementMacro();
1701 return;
1702 }
1703 if (IsCpp && FormatTok->is(TT: TT_NamespaceMacro)) {
1704 parseNamespace();
1705 return;
1706 }
1707 // In Verilog labels can be any expression, so we don't do them here.
1708 // JS doesn't have macros, and within classes colons indicate fields, not
1709 // labels.
1710 // TableGen doesn't have labels.
1711 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1712 Tokens->peekNextToken()->is(Kind: tok::colon) && !Line->MustBeDeclaration) {
1713 nextToken();
1714 if (!Line->InMacroBody || CurrentLines->size() > 1)
1715 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1716 FormatTok->setFinalizedType(TT_GotoLabelColon);
1717 parseLabel(IndentGotoLabels: Style.IndentGotoLabels);
1718 if (HasLabel)
1719 *HasLabel = true;
1720 return;
1721 }
1722 if (Style.isJava() && FormatTok->is(II: Keywords.kw_record)) {
1723 parseRecord(/*ParseAsExpr=*/false, /*IsJavaRecord=*/true);
1724 addUnwrappedLine();
1725 return;
1726 }
1727 // In all other cases, parse the declaration.
1728 break;
1729 default:
1730 break;
1731 }
1732
1733 bool SeenEqual = false;
1734 for (const bool InRequiresExpression =
1735 OpeningBrace && OpeningBrace->isOneOf(K1: TT_RequiresExpressionLBrace,
1736 K2: TT_CompoundRequirementLBrace);
1737 !eof();) {
1738 const FormatToken *Previous = FormatTok->Previous;
1739 switch (FormatTok->Tok.getKind()) {
1740 case tok::at:
1741 nextToken();
1742 if (FormatTok->is(Kind: tok::l_brace)) {
1743 nextToken();
1744 parseBracedList();
1745 break;
1746 }
1747 if (Style.isJava() && FormatTok->is(II: Keywords.kw_interface)) {
1748 nextToken();
1749 break;
1750 }
1751 switch (bool IsAutoRelease = false; FormatTok->Tok.getObjCKeywordID()) {
1752 case tok::objc_public:
1753 case tok::objc_protected:
1754 case tok::objc_package:
1755 case tok::objc_private:
1756 return parseAccessSpecifier();
1757 case tok::objc_interface:
1758 case tok::objc_implementation:
1759 return parseObjCInterfaceOrImplementation();
1760 case tok::objc_protocol:
1761 if (parseObjCProtocol())
1762 return;
1763 break;
1764 case tok::objc_end:
1765 return; // Handled by the caller.
1766 case tok::objc_optional:
1767 case tok::objc_required:
1768 nextToken();
1769 addUnwrappedLine();
1770 return;
1771 case tok::objc_autoreleasepool:
1772 IsAutoRelease = true;
1773 [[fallthrough]];
1774 case tok::objc_synchronized:
1775 nextToken();
1776 if (!IsAutoRelease && FormatTok->is(Kind: tok::l_paren)) {
1777 // Skip synchronization object
1778 parseParens();
1779 }
1780 if (FormatTok->is(Kind: tok::l_brace)) {
1781 if (Style.BraceWrapping.AfterControlStatement ==
1782 FormatStyle::BWACS_Always) {
1783 addUnwrappedLine();
1784 }
1785 parseBlock();
1786 }
1787 addUnwrappedLine();
1788 return;
1789 case tok::objc_try:
1790 // This branch isn't strictly necessary (the kw_try case below would
1791 // do this too after the tok::at is parsed above). But be explicit.
1792 parseTryCatch();
1793 return;
1794 default:
1795 break;
1796 }
1797 break;
1798 case tok::kw_requires: {
1799 if (IsCpp) {
1800 bool ParsedClause = parseRequires(SeenEqual);
1801 if (ParsedClause)
1802 return;
1803 } else {
1804 nextToken();
1805 }
1806 break;
1807 }
1808 case tok::kw_enum:
1809 // Ignore if this is part of "template <enum ..." or "... -> enum" or
1810 // "template <..., enum ...>".
1811 if (Previous && Previous->isOneOf(K1: tok::less, K2: tok::arrow, Ks: tok::comma)) {
1812 nextToken();
1813 break;
1814 }
1815
1816 // parseEnum falls through and does not yet add an unwrapped line as an
1817 // enum definition can start a structural element.
1818 if (!parseEnum())
1819 break;
1820 // This only applies to C++ and Verilog.
1821 if (!IsCpp && !Style.isVerilog()) {
1822 addUnwrappedLine();
1823 return;
1824 }
1825 break;
1826 case tok::kw_typedef:
1827 nextToken();
1828 if (FormatTok->isOneOf(K1: Keywords.kw_NS_ENUM, K2: Keywords.kw_NS_OPTIONS,
1829 Ks: Keywords.kw_CF_ENUM, Ks: Keywords.kw_CF_OPTIONS,
1830 Ks: Keywords.kw_CF_CLOSED_ENUM,
1831 Ks: Keywords.kw_NS_CLOSED_ENUM)) {
1832 parseEnum();
1833 }
1834 break;
1835 case tok::kw_class:
1836 if (Style.isVerilog()) {
1837 parseBlock();
1838 addUnwrappedLine();
1839 return;
1840 }
1841 if (Style.isTableGen()) {
1842 // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1843 // This is same as def and so on.
1844 nextToken();
1845 break;
1846 }
1847 [[fallthrough]];
1848 case tok::kw_struct:
1849 case tok::kw_union:
1850 if (parseStructLike())
1851 return;
1852 break;
1853 case tok::kw_decltype:
1854 nextToken();
1855 if (FormatTok->is(Kind: tok::l_paren)) {
1856 parseParens();
1857 if (FormatTok->Previous &&
1858 FormatTok->Previous->endsSequence(K1: tok::r_paren, Tokens: tok::kw_auto,
1859 Tokens: tok::l_paren)) {
1860 Line->SeenDecltypeAuto = true;
1861 }
1862 }
1863 break;
1864 case tok::period:
1865 nextToken();
1866 // In Java, classes have an implicit static member "class".
1867 if (Style.isJava() && FormatTok && FormatTok->is(Kind: tok::kw_class))
1868 nextToken();
1869 if (Style.isJavaScript() && FormatTok &&
1870 FormatTok->Tok.getIdentifierInfo()) {
1871 // JavaScript only has pseudo keywords, all keywords are allowed to
1872 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1873 nextToken();
1874 }
1875 break;
1876 case tok::semi:
1877 nextToken();
1878 addUnwrappedLine();
1879 return;
1880 case tok::r_brace:
1881 addUnwrappedLine();
1882 return;
1883 case tok::l_paren: {
1884 parseParens();
1885 // Break the unwrapped line if a K&R C function definition has a parameter
1886 // declaration.
1887 if (OpeningBrace || !IsCpp || !Previous || eof())
1888 break;
1889 if (isC78ParameterDecl(Tok: FormatTok,
1890 Next: Tokens->peekNextToken(/*SkipComment=*/true),
1891 FuncName: Previous)) {
1892 addUnwrappedLine();
1893 return;
1894 }
1895 break;
1896 }
1897 case tok::kw_operator:
1898 nextToken();
1899 if (FormatTok->isBinaryOperator())
1900 nextToken();
1901 break;
1902 case tok::caret: {
1903 const auto *Prev = FormatTok->getPreviousNonComment();
1904 nextToken();
1905 if (Prev && Prev->is(Kind: tok::identifier))
1906 break;
1907 // Block return type.
1908 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) {
1909 nextToken();
1910 // Return types: pointers are ok too.
1911 while (FormatTok->is(Kind: tok::star))
1912 nextToken();
1913 }
1914 // Block argument list.
1915 if (FormatTok->is(Kind: tok::l_paren))
1916 parseParens();
1917 // Block body.
1918 if (FormatTok->is(Kind: tok::l_brace))
1919 parseChildBlock();
1920 break;
1921 }
1922 case tok::l_brace:
1923 if (InRequiresExpression)
1924 FormatTok->setFinalizedType(TT_BracedListLBrace);
1925 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1926 IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1927 // A block outside of parentheses must be the last part of a
1928 // structural element.
1929 // FIXME: Figure out cases where this is not true, and add projections
1930 // for them (the one we know is missing are lambdas).
1931 if (Style.isJava() &&
1932 Line->Tokens.front().Tok->is(II: Keywords.kw_synchronized)) {
1933 // If necessary, we could set the type to something different than
1934 // TT_FunctionLBrace.
1935 if (Style.BraceWrapping.AfterControlStatement ==
1936 FormatStyle::BWACS_Always) {
1937 addUnwrappedLine();
1938 }
1939 } else if (Style.BraceWrapping.AfterFunction) {
1940 addUnwrappedLine();
1941 }
1942 if (!Previous || Previous->isNot(Kind: TT_TypeDeclarationParen))
1943 FormatTok->setFinalizedType(TT_FunctionLBrace);
1944 parseBlock();
1945 IsDecltypeAutoFunction = false;
1946 addUnwrappedLine();
1947 return;
1948 }
1949 // Otherwise this was a braced init list, and the structural
1950 // element continues.
1951 break;
1952 case tok::kw_try:
1953 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1954 // field/method declaration.
1955 nextToken();
1956 break;
1957 }
1958 // We arrive here when parsing function-try blocks.
1959 if (Style.BraceWrapping.AfterFunction)
1960 addUnwrappedLine();
1961 parseTryCatch();
1962 return;
1963 case tok::identifier: {
1964 if (Style.isCSharp() && FormatTok->is(II: Keywords.kw_where) &&
1965 Line->MustBeDeclaration) {
1966 addUnwrappedLine();
1967 parseCSharpGenericTypeConstraint();
1968 break;
1969 }
1970 if (FormatTok->is(TT: TT_MacroBlockEnd)) {
1971 addUnwrappedLine();
1972 return;
1973 }
1974
1975 // Function declarations (as opposed to function expressions) are parsed
1976 // on their own unwrapped line by continuing this loop. Function
1977 // expressions (functions that are not on their own line) must not create
1978 // a new unwrapped line, so they are special cased below.
1979 size_t TokenCount = Line->Tokens.size();
1980 if (Style.isJavaScript() && FormatTok->is(II: Keywords.kw_function) &&
1981 (TokenCount > 1 ||
1982 (TokenCount == 1 &&
1983 Line->Tokens.front().Tok->isNot(Kind: Keywords.kw_async)))) {
1984 tryToParseJSFunction();
1985 break;
1986 }
1987 if ((Style.isJavaScript() || Style.isJava()) &&
1988 FormatTok->is(II: Keywords.kw_interface)) {
1989 if (Style.isJavaScript()) {
1990 // In JavaScript/TypeScript, "interface" can be used as a standalone
1991 // identifier, e.g. in `var interface = 1;`. If "interface" is
1992 // followed by another identifier, it is very like to be an actual
1993 // interface declaration.
1994 unsigned StoredPosition = Tokens->getPosition();
1995 FormatToken *Next = Tokens->getNextToken();
1996 FormatTok = Tokens->setPosition(StoredPosition);
1997 if (!mustBeJSIdent(Keywords, FormatTok: Next)) {
1998 nextToken();
1999 break;
2000 }
2001 }
2002 parseRecord();
2003 addUnwrappedLine();
2004 return;
2005 }
2006
2007 if (Style.isVerilog()) {
2008 if (FormatTok->is(II: Keywords.kw_table)) {
2009 parseVerilogTable();
2010 return;
2011 }
2012 if (Keywords.isVerilogBegin(Tok: *FormatTok) ||
2013 Keywords.isVerilogHierarchy(Tok: *FormatTok)) {
2014 parseBlock();
2015 addUnwrappedLine();
2016 return;
2017 }
2018 }
2019
2020 if (!IsCpp && FormatTok->is(II: Keywords.kw_interface)) {
2021 if (parseStructLike())
2022 return;
2023 break;
2024 }
2025
2026 if (IsCpp && FormatTok->is(TT: TT_StatementMacro)) {
2027 parseStatementMacro();
2028 return;
2029 }
2030
2031 // See if the following token should start a new unwrapped line.
2032 StringRef Text = FormatTok->TokenText;
2033
2034 FormatToken *PreviousToken = FormatTok;
2035 nextToken();
2036
2037 // JS doesn't have macros, and within classes colons indicate fields, not
2038 // labels.
2039 if (Style.isJavaScript())
2040 break;
2041
2042 auto OneTokenSoFar = [&]() {
2043 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2044 while (I != E && I->Tok->is(Kind: tok::comment))
2045 ++I;
2046 if (Style.isVerilog())
2047 while (I != E && I->Tok->is(Kind: tok::hash))
2048 ++I;
2049 return I != E && (++I == E);
2050 };
2051 if (OneTokenSoFar()) {
2052 // Recognize function-like macro usages without trailing semicolon as
2053 // well as free-standing macros like Q_OBJECT.
2054 bool FunctionLike = FormatTok->is(Kind: tok::l_paren);
2055 if (FunctionLike)
2056 parseParens();
2057
2058 bool FollowedByNewline =
2059 CommentsBeforeNextToken.empty()
2060 ? FormatTok->NewlinesBefore > 0
2061 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2062
2063 if (FollowedByNewline &&
2064 (Text.size() >= 5 ||
2065 (FunctionLike && FormatTok->isNot(Kind: tok::l_paren))) &&
2066 tokenCanStartNewLine(Tok: *FormatTok) && Text == Text.upper()) {
2067 if (PreviousToken->isNot(Kind: TT_UntouchableMacroFunc))
2068 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2069 addUnwrappedLine();
2070 return;
2071 }
2072 }
2073 break;
2074 }
2075 case tok::equal:
2076 if ((Style.isJavaScript() || Style.isCSharp()) &&
2077 FormatTok->is(TT: TT_FatArrow)) {
2078 tryToParseChildBlock();
2079 break;
2080 }
2081
2082 SeenEqual = true;
2083 nextToken();
2084 if (FormatTok->is(Kind: tok::l_brace)) {
2085 // Block kind should probably be set to BK_BracedInit for any language.
2086 // C# needs this change to ensure that array initialisers and object
2087 // initialisers are indented the same way.
2088 if (Style.isCSharp())
2089 FormatTok->setBlockKind(BK_BracedInit);
2090 // TableGen's defset statement has syntax of the form,
2091 // `defset <type> <name> = { <statement>... }`
2092 if (Style.isTableGen() &&
2093 Line->Tokens.begin()->Tok->is(II: Keywords.kw_defset)) {
2094 FormatTok->setFinalizedType(TT_FunctionLBrace);
2095 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2096 /*MunchSemi=*/false);
2097 addUnwrappedLine();
2098 break;
2099 }
2100 nextToken();
2101 parseBracedList();
2102 } else if (Style.Language == FormatStyle::LK_Proto &&
2103 FormatTok->is(Kind: tok::less)) {
2104 nextToken();
2105 parseBracedList(/*IsAngleBracket=*/true);
2106 }
2107 break;
2108 case tok::l_square:
2109 parseSquare();
2110 break;
2111 case tok::kw_new:
2112 if (Style.isCSharp() &&
2113 (Tokens->peekNextToken()->isAccessSpecifierKeyword() ||
2114 (Previous && Previous->isAccessSpecifierKeyword()))) {
2115 nextToken();
2116 } else {
2117 parseNew();
2118 }
2119 break;
2120 case tok::kw_switch:
2121 if (Style.isJava())
2122 parseSwitch(/*IsExpr=*/true);
2123 else
2124 nextToken();
2125 break;
2126 case tok::kw_case:
2127 // Proto: there are no switch/case statements.
2128 if (Style.Language == FormatStyle::LK_Proto) {
2129 nextToken();
2130 return;
2131 }
2132 // In Verilog switch is called case.
2133 if (Style.isVerilog()) {
2134 parseBlock();
2135 addUnwrappedLine();
2136 return;
2137 }
2138 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2139 // 'case: string' field declaration.
2140 nextToken();
2141 break;
2142 }
2143 parseCaseLabel();
2144 break;
2145 case tok::kw_default:
2146 nextToken();
2147 if (Style.isVerilog()) {
2148 if (FormatTok->is(Kind: tok::colon)) {
2149 // The label will be handled in the next iteration.
2150 break;
2151 }
2152 if (FormatTok->is(II: Keywords.kw_clocking)) {
2153 // A default clocking block.
2154 parseBlock();
2155 addUnwrappedLine();
2156 return;
2157 }
2158 parseVerilogCaseLabel();
2159 return;
2160 }
2161 break;
2162 case tok::colon:
2163 nextToken();
2164 if (Style.isVerilog()) {
2165 parseVerilogCaseLabel();
2166 return;
2167 }
2168 break;
2169 case tok::greater:
2170 nextToken();
2171 if (FormatTok->is(Kind: tok::l_brace))
2172 FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
2173 break;
2174 default:
2175 nextToken();
2176 break;
2177 }
2178 }
2179}
2180
2181bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2182 assert(FormatTok->is(tok::l_brace));
2183 if (!Style.isCSharp())
2184 return false;
2185 // See if it's a property accessor.
2186 if (!FormatTok->Previous || FormatTok->Previous->isNot(Kind: tok::identifier))
2187 return false;
2188
2189 // See if we are inside a property accessor.
2190 //
2191 // Record the current tokenPosition so that we can advance and
2192 // reset the current token. `Next` is not set yet so we need
2193 // another way to advance along the token stream.
2194 unsigned int StoredPosition = Tokens->getPosition();
2195 FormatToken *Tok = Tokens->getNextToken();
2196
2197 // A trivial property accessor is of the form:
2198 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2199 // Track these as they do not require line breaks to be introduced.
2200 bool HasSpecialAccessor = false;
2201 bool IsTrivialPropertyAccessor = true;
2202 bool HasAttribute = false;
2203 while (!eof()) {
2204 if (const bool IsAccessorKeyword =
2205 Tok->isOneOf(K1: Keywords.kw_get, K2: Keywords.kw_init, Ks: Keywords.kw_set);
2206 IsAccessorKeyword || Tok->isAccessSpecifierKeyword() ||
2207 Tok->isOneOf(K1: tok::l_square, K2: tok::semi, Ks: Keywords.kw_internal)) {
2208 if (IsAccessorKeyword)
2209 HasSpecialAccessor = true;
2210 else if (Tok->is(Kind: tok::l_square))
2211 HasAttribute = true;
2212 Tok = Tokens->getNextToken();
2213 continue;
2214 }
2215 if (Tok->isNot(Kind: tok::r_brace))
2216 IsTrivialPropertyAccessor = false;
2217 break;
2218 }
2219
2220 if (!HasSpecialAccessor || HasAttribute) {
2221 Tokens->setPosition(StoredPosition);
2222 return false;
2223 }
2224
2225 // Try to parse the property accessor:
2226 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2227 Tokens->setPosition(StoredPosition);
2228 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2229 addUnwrappedLine();
2230 nextToken();
2231 do {
2232 switch (FormatTok->Tok.getKind()) {
2233 case tok::r_brace:
2234 nextToken();
2235 if (FormatTok->is(Kind: tok::equal)) {
2236 while (!eof() && FormatTok->isNot(Kind: tok::semi))
2237 nextToken();
2238 nextToken();
2239 }
2240 addUnwrappedLine();
2241 return true;
2242 case tok::l_brace:
2243 ++Line->Level;
2244 parseBlock(/*MustBeDeclaration=*/true);
2245 addUnwrappedLine();
2246 --Line->Level;
2247 break;
2248 case tok::equal:
2249 if (FormatTok->is(TT: TT_FatArrow)) {
2250 ++Line->Level;
2251 do {
2252 nextToken();
2253 } while (!eof() && FormatTok->isNot(Kind: tok::semi));
2254 nextToken();
2255 addUnwrappedLine();
2256 --Line->Level;
2257 break;
2258 }
2259 nextToken();
2260 break;
2261 default:
2262 if (FormatTok->isOneOf(K1: Keywords.kw_get, K2: Keywords.kw_init,
2263 Ks: Keywords.kw_set) &&
2264 !IsTrivialPropertyAccessor) {
2265 // Non-trivial get/set needs to be on its own line.
2266 addUnwrappedLine();
2267 }
2268 nextToken();
2269 }
2270 } while (!eof());
2271
2272 // Unreachable for well-formed code (paired '{' and '}').
2273 return true;
2274}
2275
2276bool UnwrappedLineParser::tryToParseLambda() {
2277 assert(FormatTok->is(tok::l_square));
2278 if (!IsCpp) {
2279 nextToken();
2280 return false;
2281 }
2282 FormatToken &LSquare = *FormatTok;
2283 if (!tryToParseLambdaIntroducer())
2284 return false;
2285
2286 FormatToken *Arrow = nullptr;
2287 bool InTemplateParameterList = false;
2288
2289 while (FormatTok->isNot(Kind: tok::l_brace)) {
2290 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) {
2291 nextToken();
2292 continue;
2293 }
2294 switch (FormatTok->Tok.getKind()) {
2295 case tok::l_brace:
2296 break;
2297 case tok::l_paren:
2298 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2299 break;
2300 case tok::l_square:
2301 parseSquare();
2302 break;
2303 case tok::less:
2304 assert(FormatTok->Previous);
2305 if (FormatTok->Previous->is(Kind: tok::r_square))
2306 InTemplateParameterList = true;
2307 nextToken();
2308 break;
2309 case tok::kw_auto:
2310 case tok::kw_class:
2311 case tok::kw_struct:
2312 case tok::kw_union:
2313 case tok::kw_template:
2314 case tok::kw_typename:
2315 case tok::amp:
2316 case tok::star:
2317 case tok::kw_const:
2318 case tok::kw_constexpr:
2319 case tok::kw_consteval:
2320 case tok::comma:
2321 case tok::greater:
2322 case tok::identifier:
2323 case tok::numeric_constant:
2324 case tok::coloncolon:
2325 case tok::kw_mutable:
2326 case tok::kw_noexcept:
2327 case tok::kw_static:
2328 nextToken();
2329 break;
2330 // Specialization of a template with an integer parameter can contain
2331 // arithmetic, logical, comparison and ternary operators.
2332 //
2333 // FIXME: This also accepts sequences of operators that are not in the scope
2334 // of a template argument list.
2335 //
2336 // In a C++ lambda a template type can only occur after an arrow. We use
2337 // this as an heuristic to distinguish between Objective-C expressions
2338 // followed by an `a->b` expression, such as:
2339 // ([obj func:arg] + a->b)
2340 // Otherwise the code below would parse as a lambda.
2341 case tok::plus:
2342 case tok::minus:
2343 case tok::exclaim:
2344 case tok::tilde:
2345 case tok::slash:
2346 case tok::percent:
2347 case tok::lessless:
2348 case tok::pipe:
2349 case tok::pipepipe:
2350 case tok::ampamp:
2351 case tok::caret:
2352 case tok::equalequal:
2353 case tok::exclaimequal:
2354 case tok::greaterequal:
2355 case tok::lessequal:
2356 case tok::question:
2357 case tok::colon:
2358 case tok::ellipsis:
2359 case tok::kw_true:
2360 case tok::kw_false:
2361 if (Arrow || InTemplateParameterList) {
2362 nextToken();
2363 break;
2364 }
2365 return true;
2366 case tok::arrow:
2367 Arrow = FormatTok;
2368 nextToken();
2369 break;
2370 case tok::kw_requires:
2371 parseRequiresClause();
2372 break;
2373 case tok::equal:
2374 if (!InTemplateParameterList)
2375 return true;
2376 nextToken();
2377 break;
2378 default:
2379 return true;
2380 }
2381 }
2382
2383 FormatTok->setFinalizedType(TT_LambdaLBrace);
2384 LSquare.setFinalizedType(TT_LambdaLSquare);
2385
2386 if (Arrow)
2387 Arrow->setFinalizedType(TT_LambdaArrow);
2388
2389 NestedLambdas.push_back(Elt: Line->SeenDecltypeAuto);
2390 parseChildBlock();
2391 assert(!NestedLambdas.empty());
2392 NestedLambdas.pop_back();
2393
2394 return true;
2395}
2396
2397bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2398 const FormatToken *Previous = FormatTok->Previous;
2399 const FormatToken *LeftSquare = FormatTok;
2400 nextToken();
2401 if (Previous) {
2402 const auto *PrevPrev = Previous->getPreviousNonComment();
2403 if (Previous->is(Kind: tok::star) && PrevPrev && PrevPrev->isTypeName(LangOpts))
2404 return false;
2405 if (Previous->closesScope()) {
2406 // Not a potential C-style cast.
2407 if (Previous->isNot(Kind: tok::r_paren))
2408 return false;
2409 // Lambdas can be cast to function types only, e.g. `std::function<int()>`
2410 // and `int (*)()`.
2411 if (!PrevPrev || PrevPrev->isNoneOf(Ks: tok::greater, Ks: tok::r_paren))
2412 return false;
2413 }
2414 if (Previous && Previous->Tok.getIdentifierInfo() &&
2415 Previous->isNoneOf(Ks: tok::kw_return, Ks: tok::kw_co_await, Ks: tok::kw_co_yield,
2416 Ks: tok::kw_co_return)) {
2417 return false;
2418 }
2419 }
2420 if (LeftSquare->isCppStructuredBinding(IsCpp))
2421 return false;
2422 if (FormatTok->is(Kind: tok::l_square) || tok::isLiteral(K: FormatTok->Tok.getKind()))
2423 return false;
2424 if (FormatTok->is(Kind: tok::r_square)) {
2425 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2426 if (Next->is(Kind: tok::greater))
2427 return false;
2428 }
2429 parseSquare(/*LambdaIntroducer=*/true);
2430 return true;
2431}
2432
2433void UnwrappedLineParser::tryToParseJSFunction() {
2434 assert(FormatTok->is(Keywords.kw_function));
2435 if (FormatTok->is(II: Keywords.kw_async))
2436 nextToken();
2437 // Consume "function".
2438 nextToken();
2439
2440 // Consume * (generator function). Treat it like C++'s overloaded operators.
2441 if (FormatTok->is(Kind: tok::star)) {
2442 FormatTok->setFinalizedType(TT_OverloadedOperator);
2443 nextToken();
2444 }
2445
2446 // Consume function name.
2447 if (FormatTok->is(Kind: tok::identifier))
2448 nextToken();
2449
2450 if (FormatTok->isNot(Kind: tok::l_paren))
2451 return;
2452
2453 // Parse formal parameter list.
2454 parseParens();
2455
2456 if (FormatTok->is(Kind: tok::colon)) {
2457 // Parse a type definition.
2458 nextToken();
2459
2460 // Eat the type declaration. For braced inline object types, balance braces,
2461 // otherwise just parse until finding an l_brace for the function body.
2462 if (FormatTok->is(Kind: tok::l_brace))
2463 tryToParseBracedList();
2464 else
2465 while (FormatTok->isNoneOf(Ks: tok::l_brace, Ks: tok::semi) && !eof())
2466 nextToken();
2467 }
2468
2469 if (FormatTok->is(Kind: tok::semi))
2470 return;
2471
2472 parseChildBlock();
2473}
2474
2475bool UnwrappedLineParser::tryToParseBracedList() {
2476 if (FormatTok->is(BBK: BK_Unknown))
2477 calculateBraceTypes();
2478 assert(FormatTok->isNot(BK_Unknown));
2479 if (FormatTok->is(BBK: BK_Block))
2480 return false;
2481 nextToken();
2482 parseBracedList();
2483 return true;
2484}
2485
2486bool UnwrappedLineParser::tryToParseChildBlock() {
2487 assert(Style.isJavaScript() || Style.isCSharp());
2488 assert(FormatTok->is(TT_FatArrow));
2489 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2490 // They always start an expression or a child block if followed by a curly
2491 // brace.
2492 nextToken();
2493 if (FormatTok->isNot(Kind: tok::l_brace))
2494 return false;
2495 parseChildBlock();
2496 return true;
2497}
2498
2499bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2500 assert(!IsAngleBracket || !IsEnum);
2501 bool HasError = false;
2502
2503 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2504 // replace this by using parseAssignmentExpression() inside.
2505 do {
2506 if (Style.isCSharp() && FormatTok->is(TT: TT_FatArrow) &&
2507 tryToParseChildBlock()) {
2508 continue;
2509 }
2510 if (Style.isJavaScript()) {
2511 if (FormatTok->is(II: Keywords.kw_function)) {
2512 tryToParseJSFunction();
2513 continue;
2514 }
2515 if (FormatTok->is(Kind: tok::l_brace)) {
2516 // Could be a method inside of a braced list `{a() { return 1; }}`.
2517 if (tryToParseBracedList())
2518 continue;
2519 parseChildBlock();
2520 }
2521 }
2522 if (FormatTok->is(Kind: IsAngleBracket ? tok::greater : tok::r_brace)) {
2523 if (IsEnum) {
2524 FormatTok->setBlockKind(BK_Block);
2525 if (!Style.AllowShortEnumsOnASingleLine)
2526 addUnwrappedLine();
2527 }
2528 nextToken();
2529 return !HasError;
2530 }
2531 switch (FormatTok->Tok.getKind()) {
2532 case tok::l_square:
2533 if (Style.isCSharp())
2534 parseSquare();
2535 else
2536 tryToParseLambda();
2537 break;
2538 case tok::l_paren:
2539 parseParens();
2540 // JavaScript can just have free standing methods and getters/setters in
2541 // object literals. Detect them by a "{" following ")".
2542 if (Style.isJavaScript()) {
2543 if (FormatTok->is(Kind: tok::l_brace))
2544 parseChildBlock();
2545 break;
2546 }
2547 break;
2548 case tok::l_brace:
2549 // Assume there are no blocks inside a braced init list apart
2550 // from the ones we explicitly parse out (like lambdas).
2551 FormatTok->setBlockKind(BK_BracedInit);
2552 if (!IsAngleBracket) {
2553 auto *Prev = FormatTok->Previous;
2554 if (Prev && Prev->is(Kind: tok::greater))
2555 Prev->setFinalizedType(TT_TemplateCloser);
2556 }
2557 nextToken();
2558 parseBracedList();
2559 break;
2560 case tok::less:
2561 nextToken();
2562 if (IsAngleBracket)
2563 parseBracedList(/*IsAngleBracket=*/true);
2564 break;
2565 case tok::semi:
2566 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2567 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2568 // used for error recovery if we have otherwise determined that this is
2569 // a braced list.
2570 if (Style.isJavaScript()) {
2571 nextToken();
2572 break;
2573 }
2574 HasError = true;
2575 if (!IsEnum)
2576 return false;
2577 nextToken();
2578 break;
2579 case tok::comma:
2580 nextToken();
2581 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2582 addUnwrappedLine();
2583 break;
2584 case tok::kw_requires:
2585 parseRequiresExpression();
2586 break;
2587 default:
2588 nextToken();
2589 break;
2590 }
2591 } while (!eof());
2592 return false;
2593}
2594
2595/// Parses a pair of parentheses (and everything between them).
2596/// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2597/// double ampersands. This applies for all nested scopes as well.
2598///
2599/// Returns whether there is a `=` token between the parentheses.
2600bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType,
2601 bool InMacroCall) {
2602 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2603 auto *LParen = FormatTok;
2604 auto *Prev = FormatTok->Previous;
2605 bool SeenComma = false;
2606 bool SeenEqual = false;
2607 bool MightBeFoldExpr = false;
2608 nextToken();
2609 const bool MightBeStmtExpr = FormatTok->is(Kind: tok::l_brace);
2610 if (!InMacroCall && Prev && Prev->is(TT: TT_FunctionLikeMacro))
2611 InMacroCall = true;
2612 do {
2613 switch (FormatTok->Tok.getKind()) {
2614 case tok::l_paren:
2615 if (parseParens(AmpAmpTokenType, InMacroCall))
2616 SeenEqual = true;
2617 if (Style.isJava() && FormatTok->is(Kind: tok::l_brace))
2618 parseChildBlock();
2619 break;
2620 case tok::r_paren: {
2621 auto *RParen = FormatTok;
2622 nextToken();
2623 if (Prev) {
2624 auto OptionalParens = [&] {
2625 if (Style.RemoveParentheses == FormatStyle::RPS_Leave ||
2626 MightBeStmtExpr || MightBeFoldExpr || SeenComma || InMacroCall ||
2627 Line->InMacroBody || RParen->getPreviousNonComment() == LParen) {
2628 return false;
2629 }
2630 const bool DoubleParens =
2631 Prev->is(Kind: tok::l_paren) && FormatTok->is(Kind: tok::r_paren);
2632 if (DoubleParens) {
2633 const auto *PrevPrev = Prev->getPreviousNonComment();
2634 const bool Excluded =
2635 PrevPrev &&
2636 (PrevPrev->isOneOf(K1: tok::kw___attribute, K2: tok::kw_decltype) ||
2637 (SeenEqual &&
2638 (PrevPrev->isOneOf(K1: tok::kw_if, K2: tok::kw_while) ||
2639 PrevPrev->endsSequence(K1: tok::kw_constexpr, Tokens: tok::kw_if))));
2640 if (!Excluded)
2641 return true;
2642 } else {
2643 const bool CommaSeparated =
2644 Prev->isOneOf(K1: tok::l_paren, K2: tok::comma) &&
2645 FormatTok->isOneOf(K1: tok::comma, K2: tok::r_paren);
2646 if (CommaSeparated &&
2647 // LParen is not preceded by ellipsis, comma.
2648 !Prev->endsSequence(K1: tok::comma, Tokens: tok::ellipsis) &&
2649 // RParen is not followed by comma, ellipsis.
2650 !(FormatTok->is(Kind: tok::comma) &&
2651 Tokens->peekNextToken()->is(Kind: tok::ellipsis))) {
2652 return true;
2653 }
2654 const bool ReturnParens =
2655 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2656 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2657 (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2658 Prev->isOneOf(K1: tok::kw_return, K2: tok::kw_co_return) &&
2659 FormatTok->is(Kind: tok::semi);
2660 if (ReturnParens)
2661 return true;
2662 }
2663 return false;
2664 };
2665 if (OptionalParens()) {
2666 LParen->Optional = true;
2667 RParen->Optional = true;
2668 } else if (Prev->is(TT: TT_TypenameMacro)) {
2669 LParen->setFinalizedType(TT_TypeDeclarationParen);
2670 RParen->setFinalizedType(TT_TypeDeclarationParen);
2671 } else if (Prev->is(Kind: tok::greater) && RParen->Previous == LParen) {
2672 Prev->setFinalizedType(TT_TemplateCloser);
2673 } else if (FormatTok->is(Kind: tok::l_brace) && Prev->is(Kind: tok::amp) &&
2674 !Prev->Previous) {
2675 FormatTok->setBlockKind(BK_BracedInit);
2676 }
2677 }
2678 return SeenEqual;
2679 }
2680 case tok::r_brace:
2681 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2682 return SeenEqual;
2683 case tok::l_square:
2684 tryToParseLambda();
2685 break;
2686 case tok::l_brace:
2687 if (!tryToParseBracedList())
2688 parseChildBlock();
2689 break;
2690 case tok::at:
2691 nextToken();
2692 if (FormatTok->is(Kind: tok::l_brace)) {
2693 nextToken();
2694 parseBracedList();
2695 }
2696 break;
2697 case tok::comma:
2698 SeenComma = true;
2699 nextToken();
2700 break;
2701 case tok::ellipsis:
2702 MightBeFoldExpr = true;
2703 nextToken();
2704 break;
2705 case tok::equal:
2706 SeenEqual = true;
2707 if (Style.isCSharp() && FormatTok->is(TT: TT_FatArrow))
2708 tryToParseChildBlock();
2709 else
2710 nextToken();
2711 break;
2712 case tok::kw_class:
2713 if (Style.isJavaScript())
2714 parseRecord(/*ParseAsExpr=*/true);
2715 else
2716 nextToken();
2717 break;
2718 case tok::identifier:
2719 if (Style.isJavaScript() && (FormatTok->is(II: Keywords.kw_function)))
2720 tryToParseJSFunction();
2721 else
2722 nextToken();
2723 break;
2724 case tok::kw_switch:
2725 if (Style.isJava())
2726 parseSwitch(/*IsExpr=*/true);
2727 else
2728 nextToken();
2729 break;
2730 case tok::kw_requires:
2731 parseRequiresExpression();
2732 break;
2733 case tok::ampamp:
2734 if (AmpAmpTokenType != TT_Unknown)
2735 FormatTok->setFinalizedType(AmpAmpTokenType);
2736 [[fallthrough]];
2737 default:
2738 nextToken();
2739 break;
2740 }
2741 } while (!eof());
2742 return SeenEqual;
2743}
2744
2745void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2746 if (!LambdaIntroducer) {
2747 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2748 if (tryToParseLambda())
2749 return;
2750 }
2751 do {
2752 switch (FormatTok->Tok.getKind()) {
2753 case tok::l_paren:
2754 parseParens();
2755 break;
2756 case tok::r_square:
2757 nextToken();
2758 return;
2759 case tok::r_brace:
2760 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2761 return;
2762 case tok::l_square:
2763 parseSquare();
2764 break;
2765 case tok::l_brace: {
2766 if (!tryToParseBracedList())
2767 parseChildBlock();
2768 break;
2769 }
2770 case tok::at:
2771 case tok::colon:
2772 nextToken();
2773 if (FormatTok->is(Kind: tok::l_brace)) {
2774 nextToken();
2775 parseBracedList();
2776 }
2777 break;
2778 default:
2779 nextToken();
2780 break;
2781 }
2782 } while (!eof());
2783}
2784
2785void UnwrappedLineParser::keepAncestorBraces() {
2786 if (!Style.RemoveBracesLLVM)
2787 return;
2788
2789 const int MaxNestingLevels = 2;
2790 const int Size = NestedTooDeep.size();
2791 if (Size >= MaxNestingLevels)
2792 NestedTooDeep[Size - MaxNestingLevels] = true;
2793 NestedTooDeep.push_back(Elt: false);
2794}
2795
2796static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2797 for (const auto &Token : llvm::reverse(C: Line.Tokens))
2798 if (Token.Tok->isNot(Kind: tok::comment))
2799 return Token.Tok;
2800
2801 return nullptr;
2802}
2803
2804void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2805 FormatToken *Tok = nullptr;
2806
2807 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2808 PreprocessorDirectives.empty() && FormatTok->isNot(Kind: tok::semi)) {
2809 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2810 ? getLastNonComment(Line: *Line)
2811 : Line->Tokens.back().Tok;
2812 assert(Tok);
2813 if (Tok->BraceCount < 0) {
2814 assert(Tok->BraceCount == -1);
2815 Tok = nullptr;
2816 } else {
2817 Tok->BraceCount = -1;
2818 }
2819 }
2820
2821 addUnwrappedLine();
2822 ++Line->Level;
2823 ++Line->UnbracedBodyLevel;
2824 parseStructuralElement();
2825 --Line->UnbracedBodyLevel;
2826
2827 if (Tok) {
2828 assert(!Line->InPPDirective);
2829 Tok = nullptr;
2830 for (const auto &L : llvm::reverse(C&: *CurrentLines)) {
2831 if (!L.InPPDirective && getLastNonComment(Line: L)) {
2832 Tok = L.Tokens.back().Tok;
2833 break;
2834 }
2835 }
2836 assert(Tok);
2837 ++Tok->BraceCount;
2838 }
2839
2840 if (CheckEOF && eof())
2841 addUnwrappedLine();
2842
2843 --Line->Level;
2844}
2845
2846static void markOptionalBraces(FormatToken *LeftBrace) {
2847 if (!LeftBrace)
2848 return;
2849
2850 assert(LeftBrace->is(tok::l_brace));
2851
2852 FormatToken *RightBrace = LeftBrace->MatchingParen;
2853 if (!RightBrace) {
2854 assert(!LeftBrace->Optional);
2855 return;
2856 }
2857
2858 assert(RightBrace->is(tok::r_brace));
2859 assert(RightBrace->MatchingParen == LeftBrace);
2860 assert(LeftBrace->Optional == RightBrace->Optional);
2861
2862 LeftBrace->Optional = true;
2863 RightBrace->Optional = true;
2864}
2865
2866void UnwrappedLineParser::handleAttributes() {
2867 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2868 if (FormatTok->isAttribute())
2869 nextToken();
2870 else if (FormatTok->is(Kind: tok::l_square))
2871 handleCppAttributes();
2872}
2873
2874bool UnwrappedLineParser::handleCppAttributes() {
2875 // Handle [[likely]] / [[unlikely]] attributes.
2876 assert(FormatTok->is(tok::l_square));
2877 if (!tryToParseSimpleAttribute())
2878 return false;
2879 parseSquare();
2880 return true;
2881}
2882
2883/// Returns whether \c Tok begins a block.
2884bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2885 // FIXME: rename the function or make
2886 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2887 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2888 : Tok.is(Kind: tok::l_brace);
2889}
2890
2891FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2892 bool KeepBraces,
2893 bool IsVerilogAssert) {
2894 assert((FormatTok->is(tok::kw_if) ||
2895 (Style.isVerilog() &&
2896 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2897 Keywords.kw_assume, Keywords.kw_cover))) &&
2898 "'if' expected");
2899 nextToken();
2900
2901 if (IsVerilogAssert) {
2902 // Handle `assert #0` and `assert final`.
2903 if (FormatTok->is(II: Keywords.kw_verilogHash)) {
2904 nextToken();
2905 if (FormatTok->is(Kind: tok::numeric_constant))
2906 nextToken();
2907 } else if (FormatTok->isOneOf(K1: Keywords.kw_final, K2: Keywords.kw_property,
2908 Ks: Keywords.kw_sequence)) {
2909 nextToken();
2910 }
2911 }
2912
2913 // TableGen's if statement has the form of `if <cond> then { ... }`.
2914 if (Style.isTableGen()) {
2915 while (!eof() && FormatTok->isNot(Kind: Keywords.kw_then)) {
2916 // Simply skip until then. This range only contains a value.
2917 nextToken();
2918 }
2919 }
2920
2921 // Handle `if !consteval`.
2922 if (FormatTok->is(Kind: tok::exclaim))
2923 nextToken();
2924
2925 bool KeepIfBraces = true;
2926 if (FormatTok->is(Kind: tok::kw_consteval)) {
2927 nextToken();
2928 } else {
2929 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2930 if (FormatTok->isOneOf(K1: tok::kw_constexpr, K2: tok::identifier))
2931 nextToken();
2932 if (FormatTok->is(Kind: tok::l_paren)) {
2933 FormatTok->setFinalizedType(TT_ConditionLParen);
2934 parseParens();
2935 }
2936 }
2937 handleAttributes();
2938 // The then action is optional in Verilog assert statements.
2939 if (IsVerilogAssert && FormatTok->is(Kind: tok::semi)) {
2940 nextToken();
2941 addUnwrappedLine();
2942 return nullptr;
2943 }
2944
2945 bool NeedsUnwrappedLine = false;
2946 keepAncestorBraces();
2947
2948 FormatToken *IfLeftBrace = nullptr;
2949 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2950
2951 if (isBlockBegin(Tok: *FormatTok)) {
2952 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2953 IfLeftBrace = FormatTok;
2954 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2955 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2956 /*MunchSemi=*/true, KeepBraces: KeepIfBraces, IfKind: &IfBlockKind);
2957 setPreviousRBraceType(TT_ControlStatementRBrace);
2958 if (Style.BraceWrapping.BeforeElse)
2959 addUnwrappedLine();
2960 else
2961 NeedsUnwrappedLine = true;
2962 } else if (IsVerilogAssert && FormatTok->is(Kind: tok::kw_else)) {
2963 addUnwrappedLine();
2964 } else {
2965 parseUnbracedBody();
2966 }
2967
2968 if (Style.RemoveBracesLLVM) {
2969 assert(!NestedTooDeep.empty());
2970 KeepIfBraces = KeepIfBraces ||
2971 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2972 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2973 IfBlockKind == IfStmtKind::IfElseIf;
2974 }
2975
2976 bool KeepElseBraces = KeepIfBraces;
2977 FormatToken *ElseLeftBrace = nullptr;
2978 IfStmtKind Kind = IfStmtKind::IfOnly;
2979
2980 if (FormatTok->is(Kind: tok::kw_else)) {
2981 if (Style.RemoveBracesLLVM) {
2982 NestedTooDeep.back() = false;
2983 Kind = IfStmtKind::IfElse;
2984 }
2985 nextToken();
2986 handleAttributes();
2987 if (isBlockBegin(Tok: *FormatTok)) {
2988 const bool FollowedByIf = Tokens->peekNextToken()->is(Kind: tok::kw_if);
2989 FormatTok->setFinalizedType(TT_ElseLBrace);
2990 ElseLeftBrace = FormatTok;
2991 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2992 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2993 FormatToken *IfLBrace =
2994 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2995 /*MunchSemi=*/true, KeepBraces: KeepElseBraces, IfKind: &ElseBlockKind);
2996 setPreviousRBraceType(TT_ElseRBrace);
2997 if (FormatTok->is(Kind: tok::kw_else)) {
2998 KeepElseBraces = KeepElseBraces ||
2999 ElseBlockKind == IfStmtKind::IfOnly ||
3000 ElseBlockKind == IfStmtKind::IfElseIf;
3001 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
3002 KeepElseBraces = true;
3003 assert(ElseLeftBrace->MatchingParen);
3004 markOptionalBraces(LeftBrace: ElseLeftBrace);
3005 }
3006 addUnwrappedLine();
3007 } else if (!IsVerilogAssert && FormatTok->is(Kind: tok::kw_if)) {
3008 const FormatToken *Previous = Tokens->getPreviousToken();
3009 assert(Previous);
3010 const bool IsPrecededByComment = Previous->is(Kind: tok::comment);
3011 if (IsPrecededByComment) {
3012 addUnwrappedLine();
3013 ++Line->Level;
3014 }
3015 bool TooDeep = true;
3016 if (Style.RemoveBracesLLVM) {
3017 Kind = IfStmtKind::IfElseIf;
3018 TooDeep = NestedTooDeep.pop_back_val();
3019 }
3020 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepBraces: KeepIfBraces);
3021 if (Style.RemoveBracesLLVM)
3022 NestedTooDeep.push_back(Elt: TooDeep);
3023 if (IsPrecededByComment)
3024 --Line->Level;
3025 } else {
3026 parseUnbracedBody(/*CheckEOF=*/true);
3027 }
3028 } else {
3029 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
3030 if (NeedsUnwrappedLine)
3031 addUnwrappedLine();
3032 }
3033
3034 if (!Style.RemoveBracesLLVM)
3035 return nullptr;
3036
3037 assert(!NestedTooDeep.empty());
3038 KeepElseBraces = KeepElseBraces ||
3039 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
3040 NestedTooDeep.back();
3041
3042 NestedTooDeep.pop_back();
3043
3044 if (!KeepIfBraces && !KeepElseBraces) {
3045 markOptionalBraces(LeftBrace: IfLeftBrace);
3046 markOptionalBraces(LeftBrace: ElseLeftBrace);
3047 } else if (IfLeftBrace) {
3048 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
3049 if (IfRightBrace) {
3050 assert(IfRightBrace->MatchingParen == IfLeftBrace);
3051 assert(!IfLeftBrace->Optional);
3052 assert(!IfRightBrace->Optional);
3053 IfLeftBrace->MatchingParen = nullptr;
3054 IfRightBrace->MatchingParen = nullptr;
3055 }
3056 }
3057
3058 if (IfKind)
3059 *IfKind = Kind;
3060
3061 return IfLeftBrace;
3062}
3063
3064void UnwrappedLineParser::parseTryCatch() {
3065 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
3066 nextToken();
3067 bool NeedsUnwrappedLine = false;
3068 bool HasCtorInitializer = false;
3069 if (FormatTok->is(Kind: tok::colon)) {
3070 auto *Colon = FormatTok;
3071 // We are in a function try block, what comes is an initializer list.
3072 nextToken();
3073 if (FormatTok->is(Kind: tok::identifier)) {
3074 HasCtorInitializer = true;
3075 Colon->setFinalizedType(TT_CtorInitializerColon);
3076 }
3077
3078 // In case identifiers were removed by clang-tidy, what might follow is
3079 // multiple commas in sequence - before the first identifier.
3080 while (FormatTok->is(Kind: tok::comma))
3081 nextToken();
3082
3083 while (FormatTok->is(Kind: tok::identifier)) {
3084 nextToken();
3085 if (FormatTok->is(Kind: tok::l_paren)) {
3086 parseParens();
3087 } else if (FormatTok->is(Kind: tok::l_brace)) {
3088 nextToken();
3089 parseBracedList();
3090 }
3091
3092 // In case identifiers were removed by clang-tidy, what might follow is
3093 // multiple commas in sequence - after the first identifier.
3094 while (FormatTok->is(Kind: tok::comma))
3095 nextToken();
3096 }
3097 }
3098 // Parse try with resource.
3099 if (Style.isJava() && FormatTok->is(Kind: tok::l_paren))
3100 parseParens();
3101
3102 keepAncestorBraces();
3103
3104 if (FormatTok->is(Kind: tok::l_brace)) {
3105 if (HasCtorInitializer)
3106 FormatTok->setFinalizedType(TT_FunctionLBrace);
3107 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3108 parseBlock();
3109 if (Style.BraceWrapping.BeforeCatch)
3110 addUnwrappedLine();
3111 else
3112 NeedsUnwrappedLine = true;
3113 } else if (FormatTok->isNot(Kind: tok::kw_catch)) {
3114 // The C++ standard requires a compound-statement after a try.
3115 // If there's none, we try to assume there's a structuralElement
3116 // and try to continue.
3117 addUnwrappedLine();
3118 ++Line->Level;
3119 parseStructuralElement();
3120 --Line->Level;
3121 }
3122 for (bool SeenCatch = false;;) {
3123 if (FormatTok->is(Kind: tok::at))
3124 nextToken();
3125 if (FormatTok->isNoneOf(Ks: tok::kw_catch, Ks: Keywords.kw___except,
3126 Ks: tok::kw___finally, Ks: tok::objc_catch,
3127 Ks: tok::objc_finally) &&
3128 !((Style.isJava() || Style.isJavaScript()) &&
3129 FormatTok->is(II: Keywords.kw_finally))) {
3130 break;
3131 }
3132 if (FormatTok->is(Kind: tok::kw_catch))
3133 SeenCatch = true;
3134 nextToken();
3135 while (FormatTok->isNot(Kind: tok::l_brace)) {
3136 if (FormatTok->is(Kind: tok::l_paren)) {
3137 parseParens();
3138 continue;
3139 }
3140 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::r_brace) || eof()) {
3141 if (Style.RemoveBracesLLVM)
3142 NestedTooDeep.pop_back();
3143 return;
3144 }
3145 nextToken();
3146 }
3147 if (SeenCatch) {
3148 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3149 SeenCatch = false;
3150 }
3151 NeedsUnwrappedLine = false;
3152 Line->MustBeDeclaration = false;
3153 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3154 parseBlock();
3155 if (Style.BraceWrapping.BeforeCatch)
3156 addUnwrappedLine();
3157 else
3158 NeedsUnwrappedLine = true;
3159 }
3160
3161 if (Style.RemoveBracesLLVM)
3162 NestedTooDeep.pop_back();
3163
3164 if (NeedsUnwrappedLine)
3165 addUnwrappedLine();
3166}
3167
3168void UnwrappedLineParser::parseNamespaceOrExportBlock(unsigned AddLevels) {
3169 bool ManageWhitesmithsBraces =
3170 AddLevels == 0u && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3171
3172 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3173 // the whole block.
3174 if (ManageWhitesmithsBraces)
3175 ++Line->Level;
3176
3177 // Munch the semicolon after the block. This is more common than one would
3178 // think. Putting the semicolon into its own line is very ugly.
3179 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3180 /*KeepBraces=*/true, /*IfKind=*/nullptr, UnindentWhitesmithsBraces: ManageWhitesmithsBraces);
3181
3182 addUnwrappedLine(AdjustLevel: AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3183
3184 if (ManageWhitesmithsBraces)
3185 --Line->Level;
3186}
3187
3188void UnwrappedLineParser::parseNamespace() {
3189 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3190 "'namespace' expected");
3191
3192 const FormatToken &InitialToken = *FormatTok;
3193 nextToken();
3194 if (InitialToken.is(TT: TT_NamespaceMacro)) {
3195 parseParens();
3196 } else {
3197 while (FormatTok->isOneOf(K1: tok::identifier, K2: tok::coloncolon, Ks: tok::kw_inline,
3198 Ks: tok::l_square, Ks: tok::period, Ks: tok::l_paren) ||
3199 (Style.isCSharp() && FormatTok->is(Kind: tok::kw_union))) {
3200 if (FormatTok->is(Kind: tok::l_square))
3201 parseSquare();
3202 else if (FormatTok->is(Kind: tok::l_paren))
3203 parseParens();
3204 else
3205 nextToken();
3206 }
3207 }
3208 if (FormatTok->is(Kind: tok::l_brace)) {
3209 FormatTok->setFinalizedType(TT_NamespaceLBrace);
3210
3211 if (ShouldBreakBeforeBrace(Style, InitialToken,
3212 IsEmptyBlock: Tokens->peekNextToken()->is(Kind: tok::r_brace))) {
3213 addUnwrappedLine();
3214 }
3215
3216 unsigned AddLevels =
3217 Style.NamespaceIndentation == FormatStyle::NI_All ||
3218 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3219 DeclarationScopeStack.size() > 1)
3220 ? 1u
3221 : 0u;
3222 parseNamespaceOrExportBlock(AddLevels);
3223 }
3224 // FIXME: Add error handling.
3225}
3226
3227void UnwrappedLineParser::parseCppExportBlock() {
3228 parseNamespaceOrExportBlock(/*AddLevels=*/Style.IndentExportBlock ? 1 : 0);
3229}
3230
3231void UnwrappedLineParser::parseNew() {
3232 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3233 nextToken();
3234
3235 if (Style.isCSharp()) {
3236 do {
3237 // Handle constructor invocation, e.g. `new(field: value)`.
3238 if (FormatTok->is(Kind: tok::l_paren))
3239 parseParens();
3240
3241 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3242 if (FormatTok->is(Kind: tok::l_brace))
3243 parseBracedList();
3244
3245 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::comma))
3246 return;
3247
3248 nextToken();
3249 } while (!eof());
3250 }
3251
3252 if (!Style.isJava())
3253 return;
3254
3255 // In Java, we can parse everything up to the parens, which aren't optional.
3256 do {
3257 // There should not be a ;, { or } before the new's open paren.
3258 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::r_brace))
3259 return;
3260
3261 // Consume the parens.
3262 if (FormatTok->is(Kind: tok::l_paren)) {
3263 parseParens();
3264
3265 // If there is a class body of an anonymous class, consume that as child.
3266 if (FormatTok->is(Kind: tok::l_brace))
3267 parseChildBlock();
3268 return;
3269 }
3270 nextToken();
3271 } while (!eof());
3272}
3273
3274void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3275 keepAncestorBraces();
3276
3277 if (isBlockBegin(Tok: *FormatTok)) {
3278 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3279 FormatToken *LeftBrace = FormatTok;
3280 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3281 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3282 /*MunchSemi=*/true, KeepBraces);
3283 setPreviousRBraceType(TT_ControlStatementRBrace);
3284 if (!KeepBraces) {
3285 assert(!NestedTooDeep.empty());
3286 if (!NestedTooDeep.back())
3287 markOptionalBraces(LeftBrace);
3288 }
3289 if (WrapRightBrace)
3290 addUnwrappedLine();
3291 } else {
3292 parseUnbracedBody();
3293 }
3294
3295 if (!KeepBraces)
3296 NestedTooDeep.pop_back();
3297}
3298
3299void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3300 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3301 (Style.isVerilog() &&
3302 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3303 Keywords.kw_always_ff, Keywords.kw_always_latch,
3304 Keywords.kw_final, Keywords.kw_initial,
3305 Keywords.kw_foreach, Keywords.kw_forever,
3306 Keywords.kw_repeat))) &&
3307 "'for', 'while' or foreach macro expected");
3308 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3309 FormatTok->isNoneOf(Ks: tok::kw_for, Ks: tok::kw_while);
3310
3311 nextToken();
3312 // JS' for await ( ...
3313 if (Style.isJavaScript() && FormatTok->is(II: Keywords.kw_await))
3314 nextToken();
3315 if (IsCpp && FormatTok->is(Kind: tok::kw_co_await))
3316 nextToken();
3317 if (HasParens && FormatTok->is(Kind: tok::l_paren)) {
3318 // The type is only set for Verilog basically because we were afraid to
3319 // change the existing behavior for loops. See the discussion on D121756 for
3320 // details.
3321 if (Style.isVerilog())
3322 FormatTok->setFinalizedType(TT_ConditionLParen);
3323 parseParens();
3324 }
3325
3326 if (Style.isVerilog()) {
3327 // Event control.
3328 parseVerilogSensitivityList();
3329 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(Kind: tok::semi) &&
3330 Tokens->getPreviousToken()->is(Kind: tok::r_paren)) {
3331 nextToken();
3332 addUnwrappedLine();
3333 return;
3334 }
3335
3336 handleAttributes();
3337 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3338}
3339
3340void UnwrappedLineParser::parseDoWhile() {
3341 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3342 nextToken();
3343
3344 parseLoopBody(/*KeepBraces=*/true, WrapRightBrace: Style.BraceWrapping.BeforeWhile);
3345
3346 // FIXME: Add error handling.
3347 if (FormatTok->isNot(Kind: tok::kw_while)) {
3348 addUnwrappedLine();
3349 return;
3350 }
3351
3352 FormatTok->setFinalizedType(TT_DoWhile);
3353
3354 // If in Whitesmiths mode, the line with the while() needs to be indented
3355 // to the same level as the block.
3356 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3357 ++Line->Level;
3358
3359 nextToken();
3360 parseStructuralElement();
3361}
3362
3363void UnwrappedLineParser::parseLabel(
3364 FormatStyle::IndentGotoLabelStyle IndentGotoLabels) {
3365 nextToken();
3366 unsigned OldLineLevel = Line->Level;
3367
3368 switch (IndentGotoLabels) {
3369 case FormatStyle::IGLS_NoIndent:
3370 Line->Level = 0;
3371 break;
3372 case FormatStyle::IGLS_OuterIndent:
3373 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3374 --Line->Level;
3375 break;
3376 case FormatStyle::IGLS_HalfIndent:
3377 case FormatStyle::IGLS_InnerIndent:
3378 break;
3379 }
3380
3381 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3382 FormatTok->is(Kind: tok::l_brace)) {
3383
3384 CompoundStatementIndenter Indenter(this, Line->Level,
3385 Style.BraceWrapping.AfterCaseLabel,
3386 Style.BraceWrapping.IndentBraces);
3387 parseBlock();
3388 if (FormatTok->is(Kind: tok::kw_break)) {
3389 if (Style.BraceWrapping.AfterControlStatement ==
3390 FormatStyle::BWACS_Always) {
3391 addUnwrappedLine();
3392 if (!Style.IndentCaseBlocks &&
3393 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3394 ++Line->Level;
3395 }
3396 }
3397 parseStructuralElement();
3398 }
3399 addUnwrappedLine();
3400 } else {
3401 if (FormatTok->is(Kind: tok::semi))
3402 nextToken();
3403 addUnwrappedLine();
3404 }
3405 Line->Level = OldLineLevel;
3406 if (FormatTok->isNot(Kind: tok::l_brace)) {
3407 parseStructuralElement();
3408 addUnwrappedLine();
3409 }
3410}
3411
3412void UnwrappedLineParser::parseCaseLabel() {
3413 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3414 auto *Case = FormatTok;
3415
3416 // FIXME: fix handling of complex expressions here.
3417 do {
3418 nextToken();
3419 if (FormatTok->is(Kind: tok::colon)) {
3420 FormatTok->setFinalizedType(TT_CaseLabelColon);
3421 break;
3422 }
3423 if (Style.isJava() && FormatTok->is(Kind: tok::arrow)) {
3424 FormatTok->setFinalizedType(TT_CaseLabelArrow);
3425 Case->setFinalizedType(TT_SwitchExpressionLabel);
3426 break;
3427 }
3428 } while (!eof());
3429 parseLabel();
3430}
3431
3432void UnwrappedLineParser::parseSwitch(bool IsExpr) {
3433 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3434 nextToken();
3435 if (FormatTok->is(Kind: tok::l_paren))
3436 parseParens();
3437
3438 keepAncestorBraces();
3439
3440 if (FormatTok->is(Kind: tok::l_brace)) {
3441 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3442 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace
3443 : TT_ControlStatementLBrace);
3444 if (IsExpr)
3445 parseChildBlock();
3446 else
3447 parseBlock();
3448 setPreviousRBraceType(TT_ControlStatementRBrace);
3449 if (!IsExpr)
3450 addUnwrappedLine();
3451 } else {
3452 addUnwrappedLine();
3453 ++Line->Level;
3454 parseStructuralElement();
3455 --Line->Level;
3456 }
3457
3458 if (Style.RemoveBracesLLVM)
3459 NestedTooDeep.pop_back();
3460}
3461
3462void UnwrappedLineParser::parseAccessSpecifier() {
3463 nextToken();
3464 // Understand Qt's slots.
3465 if (FormatTok->isOneOf(K1: Keywords.kw_slots, K2: Keywords.kw_qslots))
3466 nextToken();
3467 // Otherwise, we don't know what it is, and we'd better keep the next token.
3468 if (FormatTok->is(Kind: tok::colon))
3469 nextToken();
3470 addUnwrappedLine();
3471}
3472
3473/// Parses a requires, decides if it is a clause or an expression.
3474/// \pre The current token has to be the requires keyword.
3475/// \returns true if it parsed a clause.
3476bool UnwrappedLineParser::parseRequires(bool SeenEqual) {
3477 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3478
3479 // We try to guess if it is a requires clause, or a requires expression. For
3480 // that we first check the next token.
3481 switch (Tokens->peekNextToken(/*SkipComment=*/true)->Tok.getKind()) {
3482 case tok::l_brace:
3483 // This can only be an expression, never a clause.
3484 parseRequiresExpression();
3485 return false;
3486 case tok::l_paren:
3487 // Clauses and expression can start with a paren, it's unclear what we have.
3488 break;
3489 default:
3490 // All other tokens can only be a clause.
3491 parseRequiresClause();
3492 return true;
3493 }
3494
3495 // Looking forward we would have to decide if there are function declaration
3496 // like arguments to the requires expression:
3497 // requires (T t) {
3498 // Or there is a constraint expression for the requires clause:
3499 // requires (C<T> && ...
3500
3501 // But first let's look behind.
3502 auto *PreviousNonComment = FormatTok->getPreviousNonComment();
3503
3504 if (!PreviousNonComment ||
3505 PreviousNonComment->is(TT: TT_RequiresExpressionLBrace)) {
3506 // If there is no token, or an expression left brace, we are a requires
3507 // clause within a requires expression.
3508 parseRequiresClause();
3509 return true;
3510 }
3511
3512 switch (PreviousNonComment->Tok.getKind()) {
3513 case tok::greater:
3514 case tok::r_paren:
3515 case tok::kw_noexcept:
3516 case tok::kw_const:
3517 case tok::star:
3518 case tok::amp:
3519 // This is a requires clause.
3520 parseRequiresClause();
3521 return true;
3522 case tok::ampamp: {
3523 // This can be either:
3524 // if (... && requires (T t) ...)
3525 // Or
3526 // void member(...) && requires (C<T> ...
3527 // We check the one token before that for a const:
3528 // void member(...) const && requires (C<T> ...
3529 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3530 if ((PrevPrev && PrevPrev->is(Kind: tok::kw_const)) || !SeenEqual) {
3531 parseRequiresClause();
3532 return true;
3533 }
3534 break;
3535 }
3536 default:
3537 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) {
3538 // This is a requires clause.
3539 parseRequiresClause();
3540 return true;
3541 }
3542 // It's an expression.
3543 parseRequiresExpression();
3544 return false;
3545 }
3546
3547 // Now we look forward and try to check if the paren content is a parameter
3548 // list. The parameters can be cv-qualified and contain references or
3549 // pointers.
3550 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3551 // of stuff: typename, const, *, &, &&, ::, identifiers.
3552
3553 unsigned StoredPosition = Tokens->getPosition();
3554 FormatToken *NextToken = Tokens->getNextToken();
3555 int Lookahead = 0;
3556 auto PeekNext = [&Lookahead, &NextToken, this] {
3557 ++Lookahead;
3558 NextToken = Tokens->getNextToken();
3559 };
3560
3561 bool FoundType = false;
3562 bool LastWasColonColon = false;
3563 int OpenAngles = 0;
3564
3565 for (; Lookahead < 50; PeekNext()) {
3566 switch (NextToken->Tok.getKind()) {
3567 case tok::kw_volatile:
3568 case tok::kw_const:
3569 case tok::comma:
3570 if (OpenAngles == 0) {
3571 FormatTok = Tokens->setPosition(StoredPosition);
3572 parseRequiresExpression();
3573 return false;
3574 }
3575 break;
3576 case tok::eof:
3577 // Break out of the loop.
3578 Lookahead = 50;
3579 break;
3580 case tok::coloncolon:
3581 LastWasColonColon = true;
3582 break;
3583 case tok::kw_decltype:
3584 case tok::identifier:
3585 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3586 FormatTok = Tokens->setPosition(StoredPosition);
3587 parseRequiresExpression();
3588 return false;
3589 }
3590 FoundType = true;
3591 LastWasColonColon = false;
3592 break;
3593 case tok::less:
3594 ++OpenAngles;
3595 break;
3596 case tok::greater:
3597 --OpenAngles;
3598 break;
3599 default:
3600 if (NextToken->isTypeName(LangOpts)) {
3601 FormatTok = Tokens->setPosition(StoredPosition);
3602 parseRequiresExpression();
3603 return false;
3604 }
3605 break;
3606 }
3607 }
3608 // This seems to be a complicated expression, just assume it's a clause.
3609 FormatTok = Tokens->setPosition(StoredPosition);
3610 parseRequiresClause();
3611 return true;
3612}
3613
3614/// Parses a requires clause.
3615/// \sa parseRequiresExpression
3616///
3617/// Returns if it either has finished parsing the clause, or it detects, that
3618/// the clause is incorrect.
3619void UnwrappedLineParser::parseRequiresClause() {
3620 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3621
3622 // If there is no previous token, we are within a requires expression,
3623 // otherwise we will always have the template or function declaration in front
3624 // of it.
3625 bool InRequiresExpression =
3626 !FormatTok->Previous ||
3627 FormatTok->Previous->is(TT: TT_RequiresExpressionLBrace);
3628
3629 FormatTok->setFinalizedType(InRequiresExpression
3630 ? TT_RequiresClauseInARequiresExpression
3631 : TT_RequiresClause);
3632 nextToken();
3633
3634 // NOTE: parseConstraintExpression is only ever called from this function.
3635 // It could be inlined into here.
3636 parseConstraintExpression();
3637
3638 if (!InRequiresExpression && FormatTok->Previous)
3639 FormatTok->Previous->ClosesRequiresClause = true;
3640}
3641
3642/// Parses a requires expression.
3643/// \sa parseRequiresClause
3644///
3645/// Returns if it either has finished parsing the expression, or it detects,
3646/// that the expression is incorrect.
3647void UnwrappedLineParser::parseRequiresExpression() {
3648 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3649
3650 FormatTok->setFinalizedType(TT_RequiresExpression);
3651 nextToken();
3652
3653 if (FormatTok->is(Kind: tok::l_paren)) {
3654 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3655 parseParens();
3656 }
3657
3658 if (FormatTok->is(Kind: tok::l_brace)) {
3659 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3660 parseChildBlock();
3661 }
3662}
3663
3664/// Parses a constraint expression.
3665///
3666/// This is the body of a requires clause. It returns, when the parsing is
3667/// complete, or the expression is incorrect.
3668void UnwrappedLineParser::parseConstraintExpression() {
3669 // The special handling for lambdas is needed since tryToParseLambda() eats a
3670 // token and if a requires expression is the last part of a requires clause
3671 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3672 // not set on the correct token. Thus we need to be aware if we even expect a
3673 // lambda to be possible.
3674 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3675 bool LambdaNextTimeAllowed = true;
3676
3677 // Within lambda declarations, it is permitted to put a requires clause after
3678 // its template parameter list, which would place the requires clause right
3679 // before the parentheses of the parameters of the lambda declaration. Thus,
3680 // we track if we expect to see grouping parentheses at all.
3681 // Without this check, `requires foo<T> (T t)` in the below example would be
3682 // seen as the whole requires clause, accidentally eating the parameters of
3683 // the lambda.
3684 // [&]<typename T> requires foo<T> (T t) { ... };
3685 bool TopLevelParensAllowed = true;
3686
3687 do {
3688 bool LambdaThisTimeAllowed = std::exchange(obj&: LambdaNextTimeAllowed, new_val: false);
3689
3690 switch (FormatTok->Tok.getKind()) {
3691 case tok::kw_requires:
3692 parseRequiresExpression();
3693 break;
3694
3695 case tok::l_paren:
3696 if (!TopLevelParensAllowed)
3697 return;
3698 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3699 TopLevelParensAllowed = false;
3700 break;
3701
3702 case tok::l_square:
3703 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3704 return;
3705 break;
3706
3707 case tok::kw_const:
3708 case tok::semi:
3709 case tok::kw_class:
3710 case tok::kw_struct:
3711 case tok::kw_union:
3712 return;
3713
3714 case tok::l_brace:
3715 // Potential function body.
3716 return;
3717
3718 case tok::ampamp:
3719 case tok::pipepipe:
3720 FormatTok->setFinalizedType(TT_BinaryOperator);
3721 nextToken();
3722 LambdaNextTimeAllowed = true;
3723 TopLevelParensAllowed = true;
3724 break;
3725
3726 case tok::comma:
3727 case tok::comment:
3728 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3729 nextToken();
3730 break;
3731
3732 case tok::kw_sizeof:
3733 case tok::greater:
3734 case tok::greaterequal:
3735 case tok::greatergreater:
3736 case tok::less:
3737 case tok::lessequal:
3738 case tok::lessless:
3739 case tok::equalequal:
3740 case tok::exclaim:
3741 case tok::exclaimequal:
3742 case tok::plus:
3743 case tok::minus:
3744 case tok::star:
3745 case tok::slash:
3746 LambdaNextTimeAllowed = true;
3747 TopLevelParensAllowed = true;
3748 // Just eat them.
3749 nextToken();
3750 break;
3751
3752 case tok::numeric_constant:
3753 case tok::coloncolon:
3754 case tok::kw_true:
3755 case tok::kw_false:
3756 TopLevelParensAllowed = false;
3757 // Just eat them.
3758 nextToken();
3759 break;
3760
3761 case tok::kw_static_cast:
3762 case tok::kw_const_cast:
3763 case tok::kw_reinterpret_cast:
3764 case tok::kw_dynamic_cast:
3765 nextToken();
3766 if (FormatTok->isNot(Kind: tok::less))
3767 return;
3768
3769 nextToken();
3770 parseBracedList(/*IsAngleBracket=*/true);
3771 break;
3772
3773 default:
3774 if (!FormatTok->Tok.getIdentifierInfo()) {
3775 // Identifiers are part of the default case, we check for more then
3776 // tok::identifier to handle builtin type traits.
3777 return;
3778 }
3779
3780 // We need to differentiate identifiers for a template deduction guide,
3781 // variables, or function return types (the constraint expression has
3782 // ended before that), and basically all other cases. But it's easier to
3783 // check the other way around.
3784 assert(FormatTok->Previous);
3785 switch (FormatTok->Previous->Tok.getKind()) {
3786 case tok::coloncolon: // Nested identifier.
3787 case tok::ampamp: // Start of a function or variable for the
3788 case tok::pipepipe: // constraint expression. (binary)
3789 case tok::exclaim: // The same as above, but unary.
3790 case tok::kw_requires: // Initial identifier of a requires clause.
3791 case tok::equal: // Initial identifier of a concept declaration.
3792 break;
3793 default:
3794 return;
3795 }
3796
3797 // Read identifier with optional template declaration.
3798 nextToken();
3799 if (FormatTok->is(Kind: tok::less)) {
3800 nextToken();
3801 parseBracedList(/*IsAngleBracket=*/true);
3802 }
3803 TopLevelParensAllowed = false;
3804 break;
3805 }
3806 } while (!eof());
3807}
3808
3809bool UnwrappedLineParser::parseEnum() {
3810 const FormatToken &InitialToken = *FormatTok;
3811
3812 // Won't be 'enum' for NS_ENUMs.
3813 if (FormatTok->is(Kind: tok::kw_enum))
3814 nextToken();
3815
3816 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3817 // declarations. An "enum" keyword followed by a colon would be a syntax
3818 // error and thus assume it is just an identifier.
3819 if (Style.isJavaScript() && FormatTok->isOneOf(K1: tok::colon, K2: tok::question))
3820 return false;
3821
3822 // In protobuf, "enum" can be used as a field name.
3823 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(Kind: tok::equal))
3824 return false;
3825
3826 if (IsCpp) {
3827 // Eat up enum class ...
3828 if (FormatTok->isOneOf(K1: tok::kw_class, K2: tok::kw_struct))
3829 nextToken();
3830 while (FormatTok->is(Kind: tok::l_square))
3831 if (!handleCppAttributes())
3832 return false;
3833 }
3834
3835 while (FormatTok->Tok.getIdentifierInfo() ||
3836 FormatTok->isOneOf(K1: tok::colon, K2: tok::coloncolon, Ks: tok::less,
3837 Ks: tok::greater, Ks: tok::comma, Ks: tok::question,
3838 Ks: tok::l_square)) {
3839 if (Style.isVerilog()) {
3840 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3841 nextToken();
3842 // In Verilog the base type can have dimensions.
3843 while (FormatTok->is(Kind: tok::l_square))
3844 parseSquare();
3845 } else {
3846 nextToken();
3847 }
3848 // We can have macros or attributes in between 'enum' and the enum name.
3849 if (FormatTok->is(Kind: tok::l_paren))
3850 parseParens();
3851 if (FormatTok->is(Kind: tok::identifier)) {
3852 nextToken();
3853 // If there are two identifiers in a row, this is likely an elaborate
3854 // return type. In Java, this can be "implements", etc.
3855 if (IsCpp && FormatTok->is(Kind: tok::identifier))
3856 return false;
3857 }
3858 }
3859
3860 // Just a declaration or something is wrong.
3861 if (FormatTok->isNot(Kind: tok::l_brace))
3862 return true;
3863 FormatTok->setFinalizedType(TT_EnumLBrace);
3864 FormatTok->setBlockKind(BK_Block);
3865
3866 if (Style.isJava()) {
3867 // Java enums are different.
3868 parseJavaEnumBody();
3869 return true;
3870 }
3871 if (Style.Language == FormatStyle::LK_Proto) {
3872 parseBlock(/*MustBeDeclaration=*/true);
3873 return true;
3874 }
3875
3876 if (!Style.AllowShortEnumsOnASingleLine &&
3877 ShouldBreakBeforeBrace(Style, InitialToken,
3878 IsEmptyBlock: Tokens->peekNextToken()->is(Kind: tok::r_brace))) {
3879 addUnwrappedLine();
3880 }
3881 // Parse enum body.
3882 nextToken();
3883 if (!Style.AllowShortEnumsOnASingleLine) {
3884 addUnwrappedLine();
3885 Line->Level += 1;
3886 }
3887 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3888 if (!Style.AllowShortEnumsOnASingleLine)
3889 Line->Level -= 1;
3890 if (HasError) {
3891 if (FormatTok->is(Kind: tok::semi))
3892 nextToken();
3893 addUnwrappedLine();
3894 }
3895 setPreviousRBraceType(TT_EnumRBrace);
3896 return true;
3897
3898 // There is no addUnwrappedLine() here so that we fall through to parsing a
3899 // structural element afterwards. Thus, in "enum A {} n, m;",
3900 // "} n, m;" will end up in one unwrapped line.
3901}
3902
3903bool UnwrappedLineParser::parseStructLike() {
3904 // parseRecord falls through and does not yet add an unwrapped line as a
3905 // record declaration or definition can start a structural element.
3906 parseRecord();
3907 // This does not apply to Java, JavaScript and C#.
3908 if (Style.isJava() || Style.isJavaScript() || Style.isCSharp()) {
3909 if (FormatTok->is(Kind: tok::semi))
3910 nextToken();
3911 addUnwrappedLine();
3912 return true;
3913 }
3914 return false;
3915}
3916
3917namespace {
3918// A class used to set and restore the Token position when peeking
3919// ahead in the token source.
3920class ScopedTokenPosition {
3921 unsigned StoredPosition;
3922 FormatTokenSource *Tokens;
3923
3924public:
3925 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3926 assert(Tokens && "Tokens expected to not be null");
3927 StoredPosition = Tokens->getPosition();
3928 }
3929
3930 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3931};
3932} // namespace
3933
3934// Look to see if we have [[ by looking ahead, if
3935// its not then rewind to the original position.
3936bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3937 ScopedTokenPosition AutoPosition(Tokens);
3938 FormatToken *Tok = Tokens->getNextToken();
3939 // We already read the first [ check for the second.
3940 if (Tok->isNot(Kind: tok::l_square))
3941 return false;
3942 // Double check that the attribute is just something
3943 // fairly simple.
3944 while (Tok->isNot(Kind: tok::eof)) {
3945 if (Tok->is(Kind: tok::r_square))
3946 break;
3947 Tok = Tokens->getNextToken();
3948 }
3949 if (Tok->is(Kind: tok::eof))
3950 return false;
3951 Tok = Tokens->getNextToken();
3952 if (Tok->isNot(Kind: tok::r_square))
3953 return false;
3954 Tok = Tokens->getNextToken();
3955 if (Tok->is(Kind: tok::semi))
3956 return false;
3957 return true;
3958}
3959
3960void UnwrappedLineParser::parseJavaEnumBody() {
3961 assert(FormatTok->is(tok::l_brace));
3962 const FormatToken *OpeningBrace = FormatTok;
3963
3964 // Determine whether the enum is simple, i.e. does not have a semicolon or
3965 // constants with class bodies. Simple enums can be formatted like braced
3966 // lists, contracted to a single line, etc.
3967 unsigned StoredPosition = Tokens->getPosition();
3968 bool IsSimple = true;
3969 FormatToken *Tok = Tokens->getNextToken();
3970 while (Tok->isNot(Kind: tok::eof)) {
3971 if (Tok->is(Kind: tok::r_brace))
3972 break;
3973 if (Tok->isOneOf(K1: tok::l_brace, K2: tok::semi)) {
3974 IsSimple = false;
3975 break;
3976 }
3977 // FIXME: This will also mark enums with braces in the arguments to enum
3978 // constants as "not simple". This is probably fine in practice, though.
3979 Tok = Tokens->getNextToken();
3980 }
3981 FormatTok = Tokens->setPosition(StoredPosition);
3982
3983 if (IsSimple) {
3984 nextToken();
3985 parseBracedList();
3986 addUnwrappedLine();
3987 return;
3988 }
3989
3990 // Parse the body of a more complex enum.
3991 // First add a line for everything up to the "{".
3992 nextToken();
3993 addUnwrappedLine();
3994 ++Line->Level;
3995
3996 // Parse the enum constants.
3997 while (!eof()) {
3998 if (FormatTok->is(Kind: tok::l_brace)) {
3999 // Parse the constant's class body.
4000 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
4001 /*MunchSemi=*/false);
4002 } else if (FormatTok->is(Kind: tok::l_paren)) {
4003 parseParens();
4004 } else if (FormatTok->is(Kind: tok::comma)) {
4005 nextToken();
4006 addUnwrappedLine();
4007 } else if (FormatTok->is(Kind: tok::semi)) {
4008 nextToken();
4009 addUnwrappedLine();
4010 break;
4011 } else if (FormatTok->is(Kind: tok::r_brace)) {
4012 addUnwrappedLine();
4013 break;
4014 } else {
4015 nextToken();
4016 }
4017 }
4018
4019 // Parse the class body after the enum's ";" if any.
4020 parseLevel(OpeningBrace);
4021 nextToken();
4022 --Line->Level;
4023 addUnwrappedLine();
4024}
4025
4026void UnwrappedLineParser::parseRecord(bool ParseAsExpr, bool IsJavaRecord) {
4027 assert(!IsJavaRecord || FormatTok->is(Keywords.kw_record));
4028 const FormatToken &InitialToken = *FormatTok;
4029 nextToken();
4030
4031 FormatToken *ClassName =
4032 IsJavaRecord && FormatTok->is(Kind: tok::identifier) ? FormatTok : nullptr;
4033 bool IsDerived = false;
4034 auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
4035 return Tok->is(Kind: tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
4036 };
4037 // JavaScript/TypeScript supports anonymous classes like:
4038 // a = class extends foo { }
4039 bool JSPastExtendsOrImplements = false;
4040 // The actual identifier can be a nested name specifier, and in macros
4041 // it is often token-pasted.
4042 // An [[attribute]] can be before the identifier.
4043 while (FormatTok->isOneOf(K1: tok::identifier, K2: tok::coloncolon, Ks: tok::hashhash,
4044 Ks: tok::kw_alignas, Ks: tok::l_square) ||
4045 FormatTok->isAttribute() ||
4046 ((Style.isJava() || Style.isJavaScript()) &&
4047 FormatTok->isOneOf(K1: tok::period, K2: tok::comma))) {
4048 if (Style.isJavaScript() &&
4049 FormatTok->isOneOf(K1: Keywords.kw_extends, K2: Keywords.kw_implements)) {
4050 JSPastExtendsOrImplements = true;
4051 // JavaScript/TypeScript supports inline object types in
4052 // extends/implements positions:
4053 // class Foo implements {bar: number} { }
4054 nextToken();
4055 if (FormatTok->is(Kind: tok::l_brace)) {
4056 tryToParseBracedList();
4057 continue;
4058 }
4059 }
4060 if (FormatTok->is(Kind: tok::l_square) && handleCppAttributes())
4061 continue;
4062 auto *Previous = FormatTok;
4063 nextToken();
4064 switch (FormatTok->Tok.getKind()) {
4065 case tok::l_paren:
4066 // We can have macros in between 'class' and the class name.
4067 if (IsJavaRecord || !IsNonMacroIdentifier(Previous) ||
4068 // e.g. `struct macro(a) S { int i; };`
4069 Previous->Previous == &InitialToken) {
4070 parseParens();
4071 }
4072 break;
4073 case tok::coloncolon:
4074 case tok::hashhash:
4075 break;
4076 default:
4077 if (JSPastExtendsOrImplements || ClassName ||
4078 Previous->isNot(Kind: tok::identifier) || Previous->is(TT: TT_AttributeMacro)) {
4079 break;
4080 }
4081 if (const auto Text = Previous->TokenText;
4082 Text.size() == 1 || Text != Text.upper()) {
4083 ClassName = Previous;
4084 }
4085 }
4086 }
4087
4088 auto IsListInitialization = [&] {
4089 if (!ClassName || IsDerived || JSPastExtendsOrImplements)
4090 return false;
4091 assert(FormatTok->is(tok::l_brace));
4092 const auto *Prev = FormatTok->getPreviousNonComment();
4093 assert(Prev);
4094 return Prev != ClassName && Prev->is(Kind: tok::identifier) &&
4095 Prev->isNot(Kind: Keywords.kw_final) && tryToParseBracedList();
4096 };
4097
4098 if (FormatTok->isOneOf(K1: tok::colon, K2: tok::less)) {
4099 int AngleNestingLevel = 0;
4100 do {
4101 if (FormatTok->is(Kind: tok::less))
4102 ++AngleNestingLevel;
4103 else if (FormatTok->is(Kind: tok::greater))
4104 --AngleNestingLevel;
4105
4106 if (AngleNestingLevel == 0) {
4107 if (FormatTok->is(Kind: tok::colon)) {
4108 IsDerived = true;
4109 } else if (!IsDerived && FormatTok->is(Kind: tok::identifier) &&
4110 FormatTok->Previous->is(Kind: tok::coloncolon)) {
4111 ClassName = FormatTok;
4112 } else if (FormatTok->is(Kind: tok::l_paren) &&
4113 IsNonMacroIdentifier(FormatTok->Previous)) {
4114 break;
4115 }
4116 }
4117 if (FormatTok->is(Kind: tok::l_brace)) {
4118 if (AngleNestingLevel == 0 && IsListInitialization())
4119 return;
4120 calculateBraceTypes(/*ExpectClassBody=*/true);
4121 if (!tryToParseBracedList())
4122 break;
4123 }
4124 if (FormatTok->is(Kind: tok::l_square)) {
4125 FormatToken *Previous = FormatTok->Previous;
4126 if (!Previous || (Previous->isNot(Kind: tok::r_paren) &&
4127 !Previous->isTypeOrIdentifier(LangOpts))) {
4128 // Don't try parsing a lambda if we had a closing parenthesis before,
4129 // it was probably a pointer to an array: int (*)[].
4130 if (!tryToParseLambda())
4131 continue;
4132 } else {
4133 parseSquare();
4134 continue;
4135 }
4136 }
4137 if (FormatTok->is(Kind: tok::semi))
4138 return;
4139 if (Style.isCSharp() && FormatTok->is(II: Keywords.kw_where)) {
4140 addUnwrappedLine();
4141 nextToken();
4142 parseCSharpGenericTypeConstraint();
4143 break;
4144 }
4145 nextToken();
4146 } while (!eof());
4147 }
4148
4149 auto GetBraceTypes =
4150 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
4151 switch (RecordTok.Tok.getKind()) {
4152 case tok::kw_class:
4153 return {TT_ClassLBrace, TT_ClassRBrace};
4154 case tok::kw_struct:
4155 return {TT_StructLBrace, TT_StructRBrace};
4156 case tok::kw_union:
4157 return {TT_UnionLBrace, TT_UnionRBrace};
4158 default:
4159 // Useful for e.g. interface.
4160 return {TT_RecordLBrace, TT_RecordRBrace};
4161 }
4162 };
4163 if (FormatTok->is(Kind: tok::l_brace)) {
4164 if (IsListInitialization())
4165 return;
4166 if (ClassName)
4167 ClassName->setFinalizedType(TT_ClassHeadName);
4168 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4169 FormatTok->setFinalizedType(OpenBraceType);
4170 if (ParseAsExpr) {
4171 parseChildBlock();
4172 } else {
4173 if (ShouldBreakBeforeBrace(Style, InitialToken,
4174 IsEmptyBlock: Tokens->peekNextToken()->is(Kind: tok::r_brace),
4175 IsJavaRecord)) {
4176 addUnwrappedLine();
4177 }
4178
4179 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4180 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4181 }
4182 setPreviousRBraceType(ClosingBraceType);
4183 }
4184 // There is no addUnwrappedLine() here so that we fall through to parsing a
4185 // structural element afterwards. Thus, in "class A {} n, m;",
4186 // "} n, m;" will end up in one unwrapped line.
4187}
4188
4189void UnwrappedLineParser::parseObjCMethod() {
4190 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4191 "'(' or identifier expected.");
4192 do {
4193 if (FormatTok->is(Kind: tok::semi)) {
4194 nextToken();
4195 addUnwrappedLine();
4196 return;
4197 } else if (FormatTok->is(Kind: tok::l_brace)) {
4198 if (Style.BraceWrapping.AfterFunction)
4199 addUnwrappedLine();
4200 parseBlock();
4201 addUnwrappedLine();
4202 return;
4203 } else {
4204 nextToken();
4205 }
4206 } while (!eof());
4207}
4208
4209void UnwrappedLineParser::parseObjCProtocolList() {
4210 assert(FormatTok->is(tok::less) && "'<' expected.");
4211 do {
4212 nextToken();
4213 // Early exit in case someone forgot a close angle.
4214 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::objc_end))
4215 return;
4216 } while (!eof() && FormatTok->isNot(Kind: tok::greater));
4217 nextToken(); // Skip '>'.
4218}
4219
4220void UnwrappedLineParser::parseObjCUntilAtEnd() {
4221 do {
4222 if (FormatTok->is(Kind: tok::objc_end)) {
4223 nextToken();
4224 addUnwrappedLine();
4225 break;
4226 }
4227 if (FormatTok->is(Kind: tok::l_brace)) {
4228 parseBlock();
4229 // In ObjC interfaces, nothing should be following the "}".
4230 addUnwrappedLine();
4231 } else if (FormatTok->is(Kind: tok::r_brace)) {
4232 // Ignore stray "}". parseStructuralElement doesn't consume them.
4233 nextToken();
4234 addUnwrappedLine();
4235 } else if (FormatTok->isOneOf(K1: tok::minus, K2: tok::plus)) {
4236 nextToken();
4237 parseObjCMethod();
4238 } else {
4239 parseStructuralElement();
4240 }
4241 } while (!eof());
4242}
4243
4244void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4245 assert(FormatTok->isOneOf(tok::objc_interface, tok::objc_implementation));
4246 nextToken();
4247 nextToken(); // interface name
4248
4249 // @interface can be followed by a lightweight generic
4250 // specialization list, then either a base class or a category.
4251 if (FormatTok->is(Kind: tok::less))
4252 parseObjCLightweightGenerics();
4253 if (FormatTok->is(Kind: tok::colon)) {
4254 nextToken();
4255 nextToken(); // base class name
4256 // The base class can also have lightweight generics applied to it.
4257 if (FormatTok->is(Kind: tok::less))
4258 parseObjCLightweightGenerics();
4259 } else if (FormatTok->is(Kind: tok::l_paren)) {
4260 // Skip category, if present.
4261 parseParens();
4262 }
4263
4264 if (FormatTok->is(Kind: tok::less))
4265 parseObjCProtocolList();
4266
4267 if (FormatTok->is(Kind: tok::l_brace)) {
4268 if (Style.BraceWrapping.AfterObjCDeclaration)
4269 addUnwrappedLine();
4270 parseBlock(/*MustBeDeclaration=*/true);
4271 }
4272
4273 // With instance variables, this puts '}' on its own line. Without instance
4274 // variables, this ends the @interface line.
4275 addUnwrappedLine();
4276
4277 parseObjCUntilAtEnd();
4278}
4279
4280void UnwrappedLineParser::parseObjCLightweightGenerics() {
4281 assert(FormatTok->is(tok::less));
4282 // Unlike protocol lists, generic parameterizations support
4283 // nested angles:
4284 //
4285 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4286 // NSObject <NSCopying, NSSecureCoding>
4287 //
4288 // so we need to count how many open angles we have left.
4289 unsigned NumOpenAngles = 1;
4290 do {
4291 nextToken();
4292 // Early exit in case someone forgot a close angle.
4293 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::objc_end))
4294 break;
4295 if (FormatTok->is(Kind: tok::less)) {
4296 ++NumOpenAngles;
4297 } else if (FormatTok->is(Kind: tok::greater)) {
4298 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4299 --NumOpenAngles;
4300 }
4301 } while (!eof() && NumOpenAngles != 0);
4302 nextToken(); // Skip '>'.
4303}
4304
4305// Returns true for the declaration/definition form of @protocol,
4306// false for the expression form.
4307bool UnwrappedLineParser::parseObjCProtocol() {
4308 assert(FormatTok->is(tok::objc_protocol));
4309 nextToken();
4310
4311 if (FormatTok->is(Kind: tok::l_paren)) {
4312 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4313 return false;
4314 }
4315
4316 // The definition/declaration form,
4317 // @protocol Foo
4318 // - (int)someMethod;
4319 // @end
4320
4321 nextToken(); // protocol name
4322
4323 if (FormatTok->is(Kind: tok::less))
4324 parseObjCProtocolList();
4325
4326 // Check for protocol declaration.
4327 if (FormatTok->is(Kind: tok::semi)) {
4328 nextToken();
4329 addUnwrappedLine();
4330 return true;
4331 }
4332
4333 addUnwrappedLine();
4334 parseObjCUntilAtEnd();
4335 return true;
4336}
4337
4338void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4339 bool IsImport = FormatTok->is(II: Keywords.kw_import);
4340 assert(IsImport || FormatTok->is(tok::kw_export));
4341 nextToken();
4342
4343 // Consume the "default" in "export default class/function".
4344 if (FormatTok->is(Kind: tok::kw_default))
4345 nextToken();
4346
4347 // Consume "async function", "function" and "default function", so that these
4348 // get parsed as free-standing JS functions, i.e. do not require a trailing
4349 // semicolon.
4350 if (FormatTok->is(II: Keywords.kw_async))
4351 nextToken();
4352 if (FormatTok->is(II: Keywords.kw_function)) {
4353 nextToken();
4354 return;
4355 }
4356
4357 // For imports, `export *`, `export {...}`, consume the rest of the line up
4358 // to the terminating `;`. For everything else, just return and continue
4359 // parsing the structural element, i.e. the declaration or expression for
4360 // `export default`.
4361 if (!IsImport && FormatTok->isNoneOf(Ks: tok::l_brace, Ks: tok::star) &&
4362 !FormatTok->isStringLiteral() &&
4363 !(FormatTok->is(II: Keywords.kw_type) &&
4364 Tokens->peekNextToken()->isOneOf(K1: tok::l_brace, K2: tok::star))) {
4365 return;
4366 }
4367
4368 while (!eof()) {
4369 if (FormatTok->is(Kind: tok::semi))
4370 return;
4371 if (Line->Tokens.empty()) {
4372 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4373 // import statement should terminate.
4374 return;
4375 }
4376 if (FormatTok->is(Kind: tok::l_brace)) {
4377 FormatTok->setBlockKind(BK_Block);
4378 nextToken();
4379 parseBracedList();
4380 } else {
4381 nextToken();
4382 }
4383 }
4384}
4385
4386void UnwrappedLineParser::parseStatementMacro() {
4387 nextToken();
4388 if (FormatTok->is(Kind: tok::l_paren))
4389 parseParens();
4390 if (FormatTok->is(Kind: tok::semi))
4391 nextToken();
4392 addUnwrappedLine();
4393}
4394
4395void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4396 // consume things like a::`b.c[d:e] or a::*
4397 while (true) {
4398 if (FormatTok->isOneOf(K1: tok::star, K2: tok::period, Ks: tok::periodstar,
4399 Ks: tok::coloncolon, Ks: tok::hash) ||
4400 Keywords.isVerilogIdentifier(Tok: *FormatTok)) {
4401 nextToken();
4402 } else if (FormatTok->is(Kind: tok::l_square)) {
4403 parseSquare();
4404 } else {
4405 break;
4406 }
4407 }
4408}
4409
4410void UnwrappedLineParser::parseVerilogSensitivityList() {
4411 if (FormatTok->isNot(Kind: tok::at))
4412 return;
4413 nextToken();
4414 // A block event expression has 2 at signs.
4415 if (FormatTok->is(Kind: tok::at))
4416 nextToken();
4417 switch (FormatTok->Tok.getKind()) {
4418 case tok::star:
4419 nextToken();
4420 break;
4421 case tok::l_paren:
4422 parseParens();
4423 break;
4424 default:
4425 parseVerilogHierarchyIdentifier();
4426 break;
4427 }
4428}
4429
4430unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4431 unsigned AddLevels = 0;
4432
4433 if (FormatTok->is(II: Keywords.kw_clocking)) {
4434 nextToken();
4435 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
4436 nextToken();
4437 parseVerilogSensitivityList();
4438 if (FormatTok->is(Kind: tok::semi))
4439 nextToken();
4440 } else if (FormatTok->isOneOf(K1: tok::kw_case, K2: Keywords.kw_casex,
4441 Ks: Keywords.kw_casez, Ks: Keywords.kw_randcase,
4442 Ks: Keywords.kw_randsequence)) {
4443 if (Style.IndentCaseLabels)
4444 AddLevels++;
4445 nextToken();
4446 if (FormatTok->is(Kind: tok::l_paren)) {
4447 FormatTok->setFinalizedType(TT_ConditionLParen);
4448 parseParens();
4449 }
4450 if (FormatTok->isOneOf(K1: Keywords.kw_inside, K2: Keywords.kw_matches))
4451 nextToken();
4452 // The case header has no semicolon.
4453 } else {
4454 // "module" etc.
4455 nextToken();
4456 // all the words like the name of the module and specifiers like
4457 // "automatic" and the width of function return type
4458 while (true) {
4459 if (FormatTok->is(Kind: tok::l_square)) {
4460 auto Prev = FormatTok->getPreviousNonComment();
4461 if (Prev && Keywords.isVerilogIdentifier(Tok: *Prev))
4462 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4463 parseSquare();
4464 } else if (Keywords.isVerilogIdentifier(Tok: *FormatTok) ||
4465 FormatTok->isOneOf(K1: tok::hash, K2: tok::hashhash, Ks: tok::coloncolon,
4466 Ks: Keywords.kw_automatic, Ks: tok::kw_static)) {
4467 nextToken();
4468 } else {
4469 break;
4470 }
4471 }
4472
4473 auto NewLine = [this]() {
4474 addUnwrappedLine();
4475 Line->IsContinuation = true;
4476 };
4477
4478 // package imports
4479 while (FormatTok->is(II: Keywords.kw_import)) {
4480 NewLine();
4481 nextToken();
4482 parseVerilogHierarchyIdentifier();
4483 if (FormatTok->is(Kind: tok::semi))
4484 nextToken();
4485 }
4486
4487 // parameters and ports
4488 if (FormatTok->is(II: Keywords.kw_verilogHash)) {
4489 NewLine();
4490 nextToken();
4491 if (FormatTok->is(Kind: tok::l_paren)) {
4492 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4493 parseParens();
4494 }
4495 }
4496 if (FormatTok->is(Kind: tok::l_paren)) {
4497 NewLine();
4498 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4499 parseParens();
4500 }
4501
4502 // extends and implements
4503 if (FormatTok->is(II: Keywords.kw_extends)) {
4504 NewLine();
4505 nextToken();
4506 parseVerilogHierarchyIdentifier();
4507 if (FormatTok->is(Kind: tok::l_paren))
4508 parseParens();
4509 }
4510 if (FormatTok->is(II: Keywords.kw_implements)) {
4511 NewLine();
4512 do {
4513 nextToken();
4514 parseVerilogHierarchyIdentifier();
4515 } while (FormatTok->is(Kind: tok::comma));
4516 }
4517
4518 // Coverage event for cover groups.
4519 if (FormatTok->is(Kind: tok::at)) {
4520 NewLine();
4521 parseVerilogSensitivityList();
4522 }
4523
4524 if (FormatTok->is(Kind: tok::semi))
4525 nextToken(/*LevelDifference=*/1);
4526 addUnwrappedLine();
4527 }
4528
4529 return AddLevels;
4530}
4531
4532void UnwrappedLineParser::parseVerilogTable() {
4533 assert(FormatTok->is(Keywords.kw_table));
4534 nextToken(/*LevelDifference=*/1);
4535 addUnwrappedLine();
4536
4537 auto InitialLevel = Line->Level++;
4538 while (!eof() && !Keywords.isVerilogEnd(Tok: *FormatTok)) {
4539 FormatToken *Tok = FormatTok;
4540 nextToken();
4541 if (Tok->is(Kind: tok::semi))
4542 addUnwrappedLine();
4543 else if (Tok->isOneOf(K1: tok::star, K2: tok::colon, Ks: tok::question, Ks: tok::minus))
4544 Tok->setFinalizedType(TT_VerilogTableItem);
4545 }
4546 Line->Level = InitialLevel;
4547 nextToken(/*LevelDifference=*/-1);
4548 addUnwrappedLine();
4549}
4550
4551void UnwrappedLineParser::parseVerilogCaseLabel() {
4552 // The label will get unindented in AnnotatingParser. If there are no leading
4553 // spaces, indent the rest here so that things inside the block will be
4554 // indented relative to things outside. We don't use parseLabel because we
4555 // don't know whether this colon is a label or a ternary expression at this
4556 // point.
4557 auto OrigLevel = Line->Level;
4558 auto FirstLine = CurrentLines->size();
4559 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4560 ++Line->Level;
4561 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(Tok: *FormatTok))
4562 --Line->Level;
4563 parseStructuralElement();
4564 // Restore the indentation in both the new line and the line that has the
4565 // label.
4566 if (CurrentLines->size() > FirstLine)
4567 (*CurrentLines)[FirstLine].Level = OrigLevel;
4568 Line->Level = OrigLevel;
4569}
4570
4571void UnwrappedLineParser::parseVerilogExtern() {
4572 assert(
4573 FormatTok->isOneOf(tok::kw_extern, tok::kw_export, Keywords.kw_import));
4574 nextToken();
4575 // "DPI-C"
4576 if (FormatTok->is(Kind: tok::string_literal))
4577 nextToken();
4578 if (FormatTok->isOneOf(K1: Keywords.kw_context, K2: Keywords.kw_pure))
4579 nextToken();
4580 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
4581 nextToken();
4582 if (FormatTok->is(Kind: tok::equal))
4583 nextToken();
4584 if (Keywords.isVerilogHierarchy(Tok: *FormatTok))
4585 parseVerilogHierarchyHeader();
4586}
4587
4588bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4589 for (const auto &N : Line.Tokens) {
4590 if (N.Tok->MacroCtx)
4591 return true;
4592 for (const UnwrappedLine &Child : N.Children)
4593 if (containsExpansion(Line: Child))
4594 return true;
4595 }
4596 return false;
4597}
4598
4599void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4600 if (Line->Tokens.empty())
4601 return;
4602 LLVM_DEBUG({
4603 if (!parsingPPDirective()) {
4604 llvm::dbgs() << "Adding unwrapped line:\n";
4605 printDebugInfo(*Line);
4606 }
4607 });
4608
4609 // If this line closes a block when in Whitesmiths mode, remember that
4610 // information so that the level can be decreased after the line is added.
4611 // This has to happen after the addition of the line since the line itself
4612 // needs to be indented.
4613 bool ClosesWhitesmithsBlock =
4614 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4615 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4616
4617 // If the current line was expanded from a macro call, we use it to
4618 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4619 // line and the unexpanded token stream.
4620 if (!parsingPPDirective() && !InExpansion && containsExpansion(Line: *Line)) {
4621 if (!Reconstruct)
4622 Reconstruct.emplace(args&: Line->Level, args&: Unexpanded);
4623 Reconstruct->addLine(Line: *Line);
4624
4625 // While the reconstructed unexpanded lines are stored in the normal
4626 // flow of lines, the expanded lines are stored on the side to be analyzed
4627 // in an extra step.
4628 CurrentExpandedLines.push_back(Elt: std::move(*Line));
4629
4630 if (Reconstruct->finished()) {
4631 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4632 assert(!Reconstructed.Tokens.empty() &&
4633 "Reconstructed must at least contain the macro identifier.");
4634 assert(!parsingPPDirective());
4635 LLVM_DEBUG({
4636 llvm::dbgs() << "Adding unexpanded line:\n";
4637 printDebugInfo(Reconstructed);
4638 });
4639 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4640 Lines.push_back(Elt: std::move(Reconstructed));
4641 CurrentExpandedLines.clear();
4642 Reconstruct.reset();
4643 }
4644 } else {
4645 // At the top level we only get here when no unexpansion is going on, or
4646 // when conditional formatting led to unfinished macro reconstructions.
4647 assert(!Reconstruct || (CurrentLines != &Lines) || !PPStack.empty());
4648 CurrentLines->push_back(Elt: std::move(*Line));
4649 }
4650 Line->Tokens.clear();
4651 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4652 Line->FirstStartColumn = 0;
4653 Line->IsContinuation = false;
4654 Line->SeenDecltypeAuto = false;
4655
4656 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4657 --Line->Level;
4658 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4659 CurrentLines->append(
4660 in_start: std::make_move_iterator(i: PreprocessorDirectives.begin()),
4661 in_end: std::make_move_iterator(i: PreprocessorDirectives.end()));
4662 PreprocessorDirectives.clear();
4663 }
4664 // Disconnect the current token from the last token on the previous line.
4665 FormatTok->Previous = nullptr;
4666}
4667
4668bool UnwrappedLineParser::eof() const { return FormatTok->is(Kind: tok::eof); }
4669
4670bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4671 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4672 FormatTok.NewlinesBefore > 0;
4673}
4674
4675// Checks if \p FormatTok is a line comment that continues the line comment
4676// section on \p Line.
4677static bool
4678continuesLineCommentSection(const FormatToken &FormatTok,
4679 const UnwrappedLine &Line, const FormatStyle &Style,
4680 const llvm::Regex &CommentPragmasRegex) {
4681 if (Line.Tokens.empty() || Style.ReflowComments != FormatStyle::RCS_Always)
4682 return false;
4683
4684 StringRef IndentContent = FormatTok.TokenText;
4685 if (FormatTok.TokenText.starts_with(Prefix: "//") ||
4686 FormatTok.TokenText.starts_with(Prefix: "/*")) {
4687 IndentContent = FormatTok.TokenText.substr(Start: 2);
4688 }
4689 if (CommentPragmasRegex.match(String: IndentContent))
4690 return false;
4691
4692 // If Line starts with a line comment, then FormatTok continues the comment
4693 // section if its original column is greater or equal to the original start
4694 // column of the line.
4695 //
4696 // Define the min column token of a line as follows: if a line ends in '{' or
4697 // contains a '{' followed by a line comment, then the min column token is
4698 // that '{'. Otherwise, the min column token of the line is the first token of
4699 // the line.
4700 //
4701 // If Line starts with a token other than a line comment, then FormatTok
4702 // continues the comment section if its original column is greater than the
4703 // original start column of the min column token of the line.
4704 //
4705 // For example, the second line comment continues the first in these cases:
4706 //
4707 // // first line
4708 // // second line
4709 //
4710 // and:
4711 //
4712 // // first line
4713 // // second line
4714 //
4715 // and:
4716 //
4717 // int i; // first line
4718 // // second line
4719 //
4720 // and:
4721 //
4722 // do { // first line
4723 // // second line
4724 // int i;
4725 // } while (true);
4726 //
4727 // and:
4728 //
4729 // enum {
4730 // a, // first line
4731 // // second line
4732 // b
4733 // };
4734 //
4735 // The second line comment doesn't continue the first in these cases:
4736 //
4737 // // first line
4738 // // second line
4739 //
4740 // and:
4741 //
4742 // int i; // first line
4743 // // second line
4744 //
4745 // and:
4746 //
4747 // do { // first line
4748 // // second line
4749 // int i;
4750 // } while (true);
4751 //
4752 // and:
4753 //
4754 // enum {
4755 // a, // first line
4756 // // second line
4757 // };
4758 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4759
4760 // Scan for '{//'. If found, use the column of '{' as a min column for line
4761 // comment section continuation.
4762 const FormatToken *PreviousToken = nullptr;
4763 for (const UnwrappedLineNode &Node : Line.Tokens) {
4764 if (PreviousToken && PreviousToken->is(Kind: tok::l_brace) &&
4765 isLineComment(FormatTok: *Node.Tok)) {
4766 MinColumnToken = PreviousToken;
4767 break;
4768 }
4769 PreviousToken = Node.Tok;
4770
4771 // Grab the last newline preceding a token in this unwrapped line.
4772 if (Node.Tok->NewlinesBefore > 0)
4773 MinColumnToken = Node.Tok;
4774 }
4775 if (PreviousToken && PreviousToken->is(Kind: tok::l_brace))
4776 MinColumnToken = PreviousToken;
4777
4778 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4779 MinColumnToken);
4780}
4781
4782void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4783 bool JustComments = Line->Tokens.empty();
4784 for (FormatToken *Tok : CommentsBeforeNextToken) {
4785 // Line comments that belong to the same line comment section are put on the
4786 // same line since later we might want to reflow content between them.
4787 // Additional fine-grained breaking of line comment sections is controlled
4788 // by the class BreakableLineCommentSection in case it is desirable to keep
4789 // several line comment sections in the same unwrapped line.
4790 //
4791 // FIXME: Consider putting separate line comment sections as children to the
4792 // unwrapped line instead.
4793 Tok->ContinuesLineCommentSection =
4794 continuesLineCommentSection(FormatTok: *Tok, Line: *Line, Style, CommentPragmasRegex);
4795 if (isOnNewLine(FormatTok: *Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4796 addUnwrappedLine();
4797 pushToken(Tok);
4798 }
4799 if (NewlineBeforeNext && JustComments)
4800 addUnwrappedLine();
4801 CommentsBeforeNextToken.clear();
4802}
4803
4804void UnwrappedLineParser::nextToken(int LevelDifference) {
4805 if (eof())
4806 return;
4807 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
4808 pushToken(Tok: FormatTok);
4809 FormatToken *Previous = FormatTok;
4810 if (!Style.isJavaScript())
4811 readToken(LevelDifference);
4812 else
4813 readTokenWithJavaScriptASI();
4814 FormatTok->Previous = Previous;
4815 if (Style.isVerilog()) {
4816 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4817 // keywords like `begin`, we can't treat them the same as left braces
4818 // because some contexts require one of them. For example structs use
4819 // braces and if blocks use keywords, and a left brace can occur in an if
4820 // statement, but it is not a block. For keywords like `end`, we simply
4821 // treat them the same as right braces.
4822 if (Keywords.isVerilogEnd(Tok: *FormatTok))
4823 FormatTok->Tok.setKind(tok::r_brace);
4824 }
4825}
4826
4827void UnwrappedLineParser::distributeComments(
4828 const ArrayRef<FormatToken *> &Comments, const FormatToken *NextTok) {
4829 // Whether or not a line comment token continues a line is controlled by
4830 // the method continuesLineCommentSection, with the following caveat:
4831 //
4832 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4833 // that each comment line from the trail is aligned with the next token, if
4834 // the next token exists. If a trail exists, the beginning of the maximal
4835 // trail is marked as a start of a new comment section.
4836 //
4837 // For example in this code:
4838 //
4839 // int a; // line about a
4840 // // line 1 about b
4841 // // line 2 about b
4842 // int b;
4843 //
4844 // the two lines about b form a maximal trail, so there are two sections, the
4845 // first one consisting of the single comment "// line about a" and the
4846 // second one consisting of the next two comments.
4847 if (Comments.empty())
4848 return;
4849 bool ShouldPushCommentsInCurrentLine = true;
4850 bool HasTrailAlignedWithNextToken = false;
4851 unsigned StartOfTrailAlignedWithNextToken = 0;
4852 if (NextTok) {
4853 // We are skipping the first element intentionally.
4854 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4855 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4856 HasTrailAlignedWithNextToken = true;
4857 StartOfTrailAlignedWithNextToken = i;
4858 }
4859 }
4860 }
4861 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4862 FormatToken *FormatTok = Comments[i];
4863 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4864 FormatTok->ContinuesLineCommentSection = false;
4865 } else {
4866 FormatTok->ContinuesLineCommentSection = continuesLineCommentSection(
4867 FormatTok: *FormatTok, Line: *Line, Style, CommentPragmasRegex);
4868 }
4869 if (!FormatTok->ContinuesLineCommentSection &&
4870 (isOnNewLine(FormatTok: *FormatTok) || FormatTok->IsFirst)) {
4871 ShouldPushCommentsInCurrentLine = false;
4872 }
4873 if (ShouldPushCommentsInCurrentLine)
4874 pushToken(Tok: FormatTok);
4875 else
4876 CommentsBeforeNextToken.push_back(Elt: FormatTok);
4877 }
4878}
4879
4880void UnwrappedLineParser::readToken(int LevelDifference) {
4881 SmallVector<FormatToken *, 1> Comments;
4882 bool PreviousWasComment = false;
4883 bool FirstNonCommentOnLine = false;
4884 do {
4885 FormatTok = Tokens->getNextToken();
4886 assert(FormatTok);
4887 while (FormatTok->isOneOf(K1: TT_ConflictStart, K2: TT_ConflictEnd,
4888 Ks: TT_ConflictAlternative)) {
4889 if (FormatTok->is(TT: TT_ConflictStart))
4890 conditionalCompilationStart(/*Unreachable=*/false);
4891 else if (FormatTok->is(TT: TT_ConflictAlternative))
4892 conditionalCompilationAlternative();
4893 else if (FormatTok->is(TT: TT_ConflictEnd))
4894 conditionalCompilationEnd();
4895 FormatTok = Tokens->getNextToken();
4896 FormatTok->MustBreakBefore = true;
4897 FormatTok->MustBreakBeforeFinalized = true;
4898 }
4899
4900 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4901 const FormatToken &Tok,
4902 bool PreviousWasComment) {
4903 auto IsFirstOnLine = [](const FormatToken &Tok) {
4904 return Tok.HasUnescapedNewline || Tok.IsFirst;
4905 };
4906
4907 // Consider preprocessor directives preceded by block comments as first
4908 // on line.
4909 if (PreviousWasComment)
4910 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4911 return IsFirstOnLine(Tok);
4912 };
4913
4914 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4915 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4916 PreviousWasComment = FormatTok->is(Kind: tok::comment);
4917
4918 while (!Line->InPPDirective && FormatTok->is(Kind: tok::hash) &&
4919 FirstNonCommentOnLine) {
4920 // In Verilog, the backtick is used for macro invocations. In TableGen,
4921 // the single hash is used for the paste operator.
4922 const auto *Next = Tokens->peekNextToken();
4923 if ((Style.isVerilog() && !Keywords.isVerilogPPDirective(Tok: *Next)) ||
4924 (Style.isTableGen() &&
4925 Next->isNoneOf(Ks: tok::kw_else, Ks: tok::pp_define, Ks: tok::pp_ifdef,
4926 Ks: tok::pp_ifndef, Ks: tok::pp_endif))) {
4927 break;
4928 }
4929 distributeComments(Comments, NextTok: FormatTok);
4930 Comments.clear();
4931 // If there is an unfinished unwrapped line, we flush the preprocessor
4932 // directives only after that unwrapped line was finished later.
4933 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4934 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4935 assert((LevelDifference >= 0 ||
4936 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4937 "LevelDifference makes Line->Level negative");
4938 Line->Level += LevelDifference;
4939 // Comments stored before the preprocessor directive need to be output
4940 // before the preprocessor directive, at the same level as the
4941 // preprocessor directive, as we consider them to apply to the directive.
4942 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4943 PPBranchLevel > 0) {
4944 Line->Level += PPBranchLevel;
4945 }
4946 assert(Line->Level >= Line->UnbracedBodyLevel);
4947 Line->Level -= Line->UnbracedBodyLevel;
4948 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
4949 const bool IsEndIf = Tokens->peekNextToken()->is(Kind: tok::pp_endif);
4950 parsePPDirective();
4951 PreviousWasComment = FormatTok->is(Kind: tok::comment);
4952 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4953 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4954 // If the #endif of a potential include guard is the last thing in the
4955 // file, then we found an include guard.
4956 if (IsEndIf && IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
4957 getIncludeGuardState(Style: Style.IndentPPDirectives) == IG_Inited &&
4958 (eof() ||
4959 (PreviousWasComment &&
4960 Tokens->peekNextToken(/*SkipComment=*/true)->is(Kind: tok::eof)))) {
4961 IncludeGuard = IG_Found;
4962 }
4963 }
4964
4965 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4966 !Line->InPPDirective) {
4967 continue;
4968 }
4969
4970 if (FormatTok->is(Kind: tok::identifier) &&
4971 Macros.defined(Name: FormatTok->TokenText) &&
4972 // FIXME: Allow expanding macros in preprocessor directives.
4973 !Line->InPPDirective) {
4974 FormatToken *ID = FormatTok;
4975 unsigned Position = Tokens->getPosition();
4976
4977 // To correctly parse the code, we need to replace the tokens of the macro
4978 // call with its expansion.
4979 auto PreCall = std::move(Line);
4980 Line.reset(p: new UnwrappedLine);
4981 bool OldInExpansion = InExpansion;
4982 InExpansion = true;
4983 // We parse the macro call into a new line.
4984 auto Args = parseMacroCall();
4985 InExpansion = OldInExpansion;
4986 assert(Line->Tokens.front().Tok == ID);
4987 // And remember the unexpanded macro call tokens.
4988 auto UnexpandedLine = std::move(Line);
4989 // Reset to the old line.
4990 Line = std::move(PreCall);
4991
4992 LLVM_DEBUG({
4993 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4994 if (Args) {
4995 llvm::dbgs() << "(";
4996 for (const auto &Arg : Args.value())
4997 for (const auto &T : Arg)
4998 llvm::dbgs() << T->TokenText << " ";
4999 llvm::dbgs() << ")";
5000 }
5001 llvm::dbgs() << "\n";
5002 });
5003 if (Macros.objectLike(Name: ID->TokenText) && Args &&
5004 !Macros.hasArity(Name: ID->TokenText, Arity: Args->size())) {
5005 // The macro is either
5006 // - object-like, but we got argumnets, or
5007 // - overloaded to be both object-like and function-like, but none of
5008 // the function-like arities match the number of arguments.
5009 // Thus, expand as object-like macro.
5010 LLVM_DEBUG(llvm::dbgs()
5011 << "Macro \"" << ID->TokenText
5012 << "\" not overloaded for arity " << Args->size()
5013 << "or not function-like, using object-like overload.");
5014 Args.reset();
5015 UnexpandedLine->Tokens.resize(new_size: 1);
5016 Tokens->setPosition(Position);
5017 nextToken();
5018 assert(!Args && Macros.objectLike(ID->TokenText));
5019 }
5020 if ((!Args && Macros.objectLike(Name: ID->TokenText)) ||
5021 (Args && Macros.hasArity(Name: ID->TokenText, Arity: Args->size()))) {
5022 // Next, we insert the expanded tokens in the token stream at the
5023 // current position, and continue parsing.
5024 Unexpanded[ID] = std::move(UnexpandedLine);
5025 SmallVector<FormatToken *, 8> Expansion =
5026 Macros.expand(ID, OptionalArgs: std::move(Args));
5027 if (!Expansion.empty())
5028 FormatTok = Tokens->insertTokens(Tokens: Expansion);
5029
5030 LLVM_DEBUG({
5031 llvm::dbgs() << "Expanded: ";
5032 for (const auto &T : Expansion)
5033 llvm::dbgs() << T->TokenText << " ";
5034 llvm::dbgs() << "\n";
5035 });
5036 } else {
5037 LLVM_DEBUG({
5038 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
5039 << "\", because it was used ";
5040 if (Args)
5041 llvm::dbgs() << "with " << Args->size();
5042 else
5043 llvm::dbgs() << "without";
5044 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
5045 });
5046 Tokens->setPosition(Position);
5047 FormatTok = ID;
5048 }
5049 }
5050
5051 if (FormatTok->isNot(Kind: tok::comment)) {
5052 distributeComments(Comments, NextTok: FormatTok);
5053 Comments.clear();
5054 return;
5055 }
5056
5057 Comments.push_back(Elt: FormatTok);
5058 } while (!eof());
5059
5060 distributeComments(Comments, NextTok: nullptr);
5061 Comments.clear();
5062}
5063
5064namespace {
5065template <typename Iterator>
5066void pushTokens(Iterator Begin, Iterator End,
5067 SmallVectorImpl<FormatToken *> &Into) {
5068 for (auto I = Begin; I != End; ++I) {
5069 Into.push_back(Elt: I->Tok);
5070 for (const auto &Child : I->Children)
5071 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
5072 }
5073}
5074} // namespace
5075
5076std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
5077UnwrappedLineParser::parseMacroCall() {
5078 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
5079 assert(Line->Tokens.empty());
5080 nextToken();
5081 if (FormatTok->isNot(Kind: tok::l_paren))
5082 return Args;
5083 unsigned Position = Tokens->getPosition();
5084 FormatToken *Tok = FormatTok;
5085 nextToken();
5086 Args.emplace();
5087 auto ArgStart = std::prev(x: Line->Tokens.end());
5088
5089 int Parens = 0;
5090 do {
5091 switch (FormatTok->Tok.getKind()) {
5092 case tok::l_paren:
5093 ++Parens;
5094 nextToken();
5095 break;
5096 case tok::r_paren: {
5097 if (Parens > 0) {
5098 --Parens;
5099 nextToken();
5100 break;
5101 }
5102 Args->push_back(Elt: {});
5103 pushTokens(Begin: std::next(x: ArgStart), End: Line->Tokens.end(), Into&: Args->back());
5104 nextToken();
5105 return Args;
5106 }
5107 case tok::comma: {
5108 if (Parens > 0) {
5109 nextToken();
5110 break;
5111 }
5112 Args->push_back(Elt: {});
5113 pushTokens(Begin: std::next(x: ArgStart), End: Line->Tokens.end(), Into&: Args->back());
5114 nextToken();
5115 ArgStart = std::prev(x: Line->Tokens.end());
5116 break;
5117 }
5118 default:
5119 nextToken();
5120 break;
5121 }
5122 } while (!eof());
5123 Line->Tokens.resize(new_size: 1);
5124 Tokens->setPosition(Position);
5125 FormatTok = Tok;
5126 return {};
5127}
5128
5129void UnwrappedLineParser::pushToken(FormatToken *Tok) {
5130 Line->Tokens.push_back(x: UnwrappedLineNode(Tok));
5131 if (AtEndOfPPLine) {
5132 auto &Tok = *Line->Tokens.back().Tok;
5133 Tok.MustBreakBefore = true;
5134 Tok.MustBreakBeforeFinalized = true;
5135 Tok.FirstAfterPPLine = true;
5136 AtEndOfPPLine = false;
5137 }
5138}
5139
5140} // end namespace format
5141} // end namespace clang
5142