1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#include "UnwrappedLineParser.h"
16#include "FormatToken.h"
17#include "FormatTokenSource.h"
18#include "Macros.h"
19#include "TokenAnnotator.h"
20#include "clang/Basic/TokenKinds.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/raw_os_ostream.h"
25#include "llvm/Support/raw_ostream.h"
26
27#include <utility>
28
29#define DEBUG_TYPE "format-parser"
30
31namespace clang {
32namespace format {
33
34namespace {
35
36void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
37 StringRef Prefix = "", bool PrintText = false) {
38 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
39 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
40 bool NewLine = false;
41 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
42 E = Line.Tokens.end();
43 I != E; ++I) {
44 if (NewLine) {
45 OS << Prefix;
46 NewLine = false;
47 }
48 OS << I->Tok->Tok.getName() << "["
49 << "T=" << (unsigned)I->Tok->getType()
50 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
51 << "\"] ";
52 for (const auto *CI = I->Children.begin(), *CE = I->Children.end();
53 CI != CE; ++CI) {
54 OS << "\n";
55 printLine(OS, Line: *CI, Prefix: (Prefix + " ").str());
56 NewLine = true;
57 }
58 }
59 if (!NewLine)
60 OS << "\n";
61}
62
63[[maybe_unused]] static void printDebugInfo(const UnwrappedLine &Line) {
64 printLine(OS&: llvm::dbgs(), Line);
65}
66
67class ScopedDeclarationState {
68public:
69 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
70 bool MustBeDeclaration)
71 : Line(Line), Stack(Stack) {
72 Line.MustBeDeclaration = MustBeDeclaration;
73 Stack.push_back(Val: MustBeDeclaration);
74 }
75 ~ScopedDeclarationState() {
76 Stack.pop_back();
77 if (!Stack.empty())
78 Line.MustBeDeclaration = Stack.back();
79 else
80 Line.MustBeDeclaration = true;
81 }
82
83private:
84 UnwrappedLine &Line;
85 llvm::BitVector &Stack;
86};
87
88} // end anonymous namespace
89
90std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
91 llvm::raw_os_ostream OS(Stream);
92 printLine(OS, Line);
93 return Stream;
94}
95
96class ScopedLineState {
97public:
98 ScopedLineState(UnwrappedLineParser &Parser,
99 bool SwitchToPreprocessorLines = false)
100 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
101 if (SwitchToPreprocessorLines)
102 Parser.CurrentLines = &Parser.PreprocessorDirectives;
103 else if (!Parser.Line->Tokens.empty())
104 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
105 PreBlockLine = std::move(Parser.Line);
106 Parser.Line = std::make_unique<UnwrappedLine>();
107 Parser.Line->Level = PreBlockLine->Level;
108 Parser.Line->PPLevel = PreBlockLine->PPLevel;
109 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
110 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
111 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel;
112 }
113
114 ~ScopedLineState() {
115 if (!Parser.Line->Tokens.empty())
116 Parser.addUnwrappedLine();
117 assert(Parser.Line->Tokens.empty());
118 Parser.Line = std::move(PreBlockLine);
119 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
120 Parser.AtEndOfPPLine = true;
121 Parser.CurrentLines = OriginalLines;
122 }
123
124private:
125 UnwrappedLineParser &Parser;
126
127 std::unique_ptr<UnwrappedLine> PreBlockLine;
128 SmallVectorImpl<UnwrappedLine> *OriginalLines;
129};
130
131class CompoundStatementIndenter {
132public:
133 CompoundStatementIndenter(UnwrappedLineParser *Parser,
134 const FormatStyle &Style, unsigned &LineLevel)
135 : CompoundStatementIndenter(Parser, LineLevel,
136 Style.BraceWrapping.AfterControlStatement ==
137 FormatStyle::BWACS_Always,
138 Style.BraceWrapping.IndentBraces) {}
139 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
140 bool WrapBrace, bool IndentBrace)
141 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
142 if (WrapBrace)
143 Parser->addUnwrappedLine();
144 if (IndentBrace)
145 ++LineLevel;
146 }
147 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
148
149private:
150 unsigned &LineLevel;
151 unsigned OldLineLevel;
152};
153
154UnwrappedLineParser::UnwrappedLineParser(
155 SourceManager &SourceMgr, const FormatStyle &Style,
156 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
157 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
158 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
159 IdentifierTable &IdentTable)
160 : Line(new UnwrappedLine), AtEndOfPPLine(false), CurrentLines(&Lines),
161 Style(Style), IsCpp(Style.isCpp()),
162 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords),
163 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
164 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
165 IncludeGuard(getIncludeGuardState(Style: Style.IndentPPDirectives)),
166 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
167 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
168
169void UnwrappedLineParser::reset() {
170 PPBranchLevel = -1;
171 IncludeGuard = getIncludeGuardState(Style: Style.IndentPPDirectives);
172 IncludeGuardToken = nullptr;
173 Line.reset(p: new UnwrappedLine);
174 CommentsBeforeNextToken.clear();
175 FormatTok = nullptr;
176 AtEndOfPPLine = false;
177 IsDecltypeAutoFunction = false;
178 PreprocessorDirectives.clear();
179 CurrentLines = &Lines;
180 DeclarationScopeStack.clear();
181 NestedTooDeep.clear();
182 NestedLambdas.clear();
183 PPStack.clear();
184 Line->FirstStartColumn = FirstStartColumn;
185
186 if (!Unexpanded.empty())
187 for (FormatToken *Token : AllTokens)
188 Token->MacroCtx.reset();
189 CurrentExpandedLines.clear();
190 ExpandedLines.clear();
191 Unexpanded.clear();
192 InExpansion = false;
193 Reconstruct.reset();
194}
195
196void UnwrappedLineParser::parse() {
197 IndexedTokenSource TokenSource(AllTokens);
198 Line->FirstStartColumn = FirstStartColumn;
199 do {
200 LLVM_DEBUG(llvm::dbgs() << "----\n");
201 reset();
202 Tokens = &TokenSource;
203 TokenSource.reset();
204
205 readToken();
206 parseFile();
207
208 // If we found an include guard then all preprocessor directives (other than
209 // the guard) are over-indented by one.
210 if (IncludeGuard == IG_Found) {
211 for (auto &Line : Lines)
212 if (Line.InPPDirective && Line.Level > 0)
213 --Line.Level;
214 }
215
216 // Create line with eof token.
217 assert(eof());
218 pushToken(Tok: FormatTok);
219 addUnwrappedLine();
220
221 // In a first run, format everything with the lines containing macro calls
222 // replaced by the expansion.
223 if (!ExpandedLines.empty()) {
224 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
225 for (const auto &Line : Lines) {
226 if (!Line.Tokens.empty()) {
227 auto it = ExpandedLines.find(Val: Line.Tokens.begin()->Tok);
228 if (it != ExpandedLines.end()) {
229 for (const auto &Expanded : it->second) {
230 LLVM_DEBUG(printDebugInfo(Expanded));
231 Callback.consumeUnwrappedLine(Line: Expanded);
232 }
233 continue;
234 }
235 }
236 LLVM_DEBUG(printDebugInfo(Line));
237 Callback.consumeUnwrappedLine(Line);
238 }
239 Callback.finishRun();
240 }
241
242 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
243 for (const UnwrappedLine &Line : Lines) {
244 LLVM_DEBUG(printDebugInfo(Line));
245 Callback.consumeUnwrappedLine(Line);
246 }
247 Callback.finishRun();
248 Lines.clear();
249 while (!PPLevelBranchIndex.empty() &&
250 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
251 PPLevelBranchIndex.resize(N: PPLevelBranchIndex.size() - 1);
252 PPLevelBranchCount.resize(N: PPLevelBranchCount.size() - 1);
253 }
254 if (!PPLevelBranchIndex.empty()) {
255 ++PPLevelBranchIndex.back();
256 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
257 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
258 }
259 } while (!PPLevelBranchIndex.empty());
260}
261
262void UnwrappedLineParser::parseFile() {
263 // The top-level context in a file always has declarations, except for pre-
264 // processor directives and JavaScript files.
265 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
266 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
267 MustBeDeclaration);
268 if (Style.isTextProto() || (Style.isJson() && FormatTok->IsFirst))
269 parseBracedList();
270 else
271 parseLevel();
272 // Make sure to format the remaining tokens.
273 //
274 // LK_TextProto is special since its top-level is parsed as the body of a
275 // braced list, which does not necessarily have natural line separators such
276 // as a semicolon. Comments after the last entry that have been determined to
277 // not belong to that line, as in:
278 // key: value
279 // // endfile comment
280 // do not have a chance to be put on a line of their own until this point.
281 // Here we add this newline before end-of-file comments.
282 if (Style.isTextProto() && !CommentsBeforeNextToken.empty())
283 addUnwrappedLine();
284 flushComments(NewlineBeforeNext: true);
285 addUnwrappedLine();
286}
287
288void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
289 do {
290 switch (FormatTok->Tok.getKind()) {
291 case tok::l_brace:
292 case tok::semi:
293 return;
294 default:
295 if (FormatTok->is(II: Keywords.kw_where)) {
296 addUnwrappedLine();
297 nextToken();
298 parseCSharpGenericTypeConstraint();
299 break;
300 }
301 nextToken();
302 break;
303 }
304 } while (!eof());
305}
306
307void UnwrappedLineParser::parseCSharpAttribute() {
308 int UnpairedSquareBrackets = 1;
309 do {
310 switch (FormatTok->Tok.getKind()) {
311 case tok::r_square:
312 nextToken();
313 --UnpairedSquareBrackets;
314 if (UnpairedSquareBrackets == 0) {
315 addUnwrappedLine();
316 return;
317 }
318 break;
319 case tok::l_square:
320 ++UnpairedSquareBrackets;
321 nextToken();
322 break;
323 default:
324 nextToken();
325 break;
326 }
327 } while (!eof());
328}
329
330bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
331 if (!Lines.empty() && Lines.back().InPPDirective)
332 return true;
333
334 const FormatToken *Previous = Tokens->getPreviousToken();
335 return Previous && Previous->is(Kind: tok::comment) &&
336 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
337}
338
339/// Parses a level, that is ???.
340/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
341/// \param IfKind The \p if statement kind in the level.
342/// \param IfLeftBrace The left brace of the \p if block in the level.
343/// \returns true if a simple block of if/else/for/while, or false otherwise.
344/// (A simple block has a single statement.)
345bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
346 IfStmtKind *IfKind,
347 FormatToken **IfLeftBrace) {
348 const bool InRequiresExpression =
349 OpeningBrace && OpeningBrace->is(TT: TT_RequiresExpressionLBrace);
350 const bool IsPrecededByCommentOrPPDirective =
351 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
352 FormatToken *IfLBrace = nullptr;
353 bool HasDoWhile = false;
354 bool HasLabel = false;
355 unsigned StatementCount = 0;
356 bool SwitchLabelEncountered = false;
357
358 do {
359 if (FormatTok->isAttribute()) {
360 nextToken();
361 if (FormatTok->is(Kind: tok::l_paren))
362 parseParens();
363 continue;
364 }
365 tok::TokenKind Kind = FormatTok->Tok.getKind();
366 if (FormatTok->is(TT: TT_MacroBlockBegin))
367 Kind = tok::l_brace;
368 else if (FormatTok->is(TT: TT_MacroBlockEnd))
369 Kind = tok::r_brace;
370
371 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
372 &HasLabel, &StatementCount] {
373 parseStructuralElement(OpeningBrace, IfKind, IfLeftBrace: &IfLBrace,
374 HasDoWhile: HasDoWhile ? nullptr : &HasDoWhile,
375 HasLabel: HasLabel ? nullptr : &HasLabel);
376 ++StatementCount;
377 assert(StatementCount > 0 && "StatementCount overflow!");
378 };
379
380 switch (Kind) {
381 case tok::comment:
382 nextToken();
383 addUnwrappedLine();
384 break;
385 case tok::l_brace:
386 if (InRequiresExpression) {
387 FormatTok->setFinalizedType(TT_CompoundRequirementLBrace);
388 } else if (FormatTok->Previous &&
389 FormatTok->Previous->ClosesRequiresClause) {
390 // We need the 'default' case here to correctly parse a function
391 // l_brace.
392 ParseDefault();
393 continue;
394 }
395 if (!InRequiresExpression && FormatTok->isNot(Kind: TT_MacroBlockBegin)) {
396 if (tryToParseBracedList())
397 continue;
398 FormatTok->setFinalizedType(TT_BlockLBrace);
399 }
400 parseBlock();
401 ++StatementCount;
402 assert(StatementCount > 0 && "StatementCount overflow!");
403 addUnwrappedLine();
404 break;
405 case tok::r_brace:
406 if (OpeningBrace) {
407 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
408 OpeningBrace->isNoneOf(Ks: TT_ControlStatementLBrace, Ks: TT_ElseLBrace)) {
409 return false;
410 }
411 if (FormatTok->isNot(Kind: tok::r_brace) || StatementCount != 1 || HasLabel ||
412 HasDoWhile || IsPrecededByCommentOrPPDirective ||
413 precededByCommentOrPPDirective()) {
414 return false;
415 }
416 const FormatToken *Next = Tokens->peekNextToken();
417 if (Next->is(Kind: tok::comment) && Next->NewlinesBefore == 0)
418 return false;
419 if (IfLeftBrace)
420 *IfLeftBrace = IfLBrace;
421 return true;
422 }
423 nextToken();
424 addUnwrappedLine();
425 break;
426 case tok::kw_default: {
427 unsigned StoredPosition = Tokens->getPosition();
428 auto *Next = Tokens->getNextNonComment();
429 FormatTok = Tokens->setPosition(StoredPosition);
430 if (Next->isNoneOf(Ks: tok::colon, Ks: tok::arrow)) {
431 // default not followed by `:` or `->` is not a case label; treat it
432 // like an identifier.
433 parseStructuralElement();
434 break;
435 }
436 // Else, if it is 'default:', fall through to the case handling.
437 [[fallthrough]];
438 }
439 case tok::kw_case:
440 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
441 (Style.isJavaScript() && Line->MustBeDeclaration)) {
442 // Proto: there are no switch/case statements
443 // Verilog: Case labels don't have this word. We handle case
444 // labels including default in TokenAnnotator.
445 // JavaScript: A 'case: string' style field declaration.
446 ParseDefault();
447 break;
448 }
449 if (!SwitchLabelEncountered &&
450 (Style.IndentCaseLabels ||
451 (OpeningBrace && OpeningBrace->is(TT: TT_SwitchExpressionLBrace)) ||
452 (Line->InPPDirective && Line->Level == 1))) {
453 ++Line->Level;
454 }
455 SwitchLabelEncountered = true;
456 parseStructuralElement();
457 break;
458 case tok::l_square:
459 if (Style.isCSharp()) {
460 nextToken();
461 parseCSharpAttribute();
462 break;
463 }
464 if (handleCppAttributes())
465 break;
466 [[fallthrough]];
467 default:
468 ParseDefault();
469 break;
470 }
471 } while (!eof());
472
473 return false;
474}
475
476void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
477 // We'll parse forward through the tokens until we hit
478 // a closing brace or eof - note that getNextToken() will
479 // parse macros, so this will magically work inside macro
480 // definitions, too.
481 unsigned StoredPosition = Tokens->getPosition();
482 FormatToken *Tok = FormatTok;
483 const FormatToken *PrevTok = Tok->Previous;
484 // Keep a stack of positions of lbrace tokens. We will
485 // update information about whether an lbrace starts a
486 // braced init list or a different block during the loop.
487 struct StackEntry {
488 FormatToken *Tok;
489 const FormatToken *PrevTok;
490 };
491 SmallVector<StackEntry, 8> LBraceStack;
492 assert(Tok->is(tok::l_brace));
493
494 do {
495 auto *NextTok = Tokens->getNextNonComment();
496
497 if (!Line->InMacroBody && !Style.isTableGen()) {
498 // Skip PPDirective lines (except macro definitions) and comments.
499 while (NextTok->is(Kind: tok::hash)) {
500 NextTok = Tokens->getNextToken();
501 if (NextTok->isOneOf(K1: tok::pp_not_keyword, K2: tok::pp_define))
502 break;
503 do {
504 NextTok = Tokens->getNextToken();
505 } while (!NextTok->HasUnescapedNewline && NextTok->isNot(Kind: tok::eof));
506
507 while (NextTok->is(Kind: tok::comment))
508 NextTok = Tokens->getNextToken();
509 }
510 }
511
512 switch (Tok->Tok.getKind()) {
513 case tok::l_brace:
514 if (Style.isJavaScript() && PrevTok) {
515 if (PrevTok->isOneOf(K1: tok::colon, K2: tok::less)) {
516 // A ':' indicates this code is in a type, or a braced list
517 // following a label in an object literal ({a: {b: 1}}).
518 // A '<' could be an object used in a comparison, but that is nonsense
519 // code (can never return true), so more likely it is a generic type
520 // argument (`X<{a: string; b: number}>`).
521 // The code below could be confused by semicolons between the
522 // individual members in a type member list, which would normally
523 // trigger BK_Block. In both cases, this must be parsed as an inline
524 // braced init.
525 Tok->setBlockKind(BK_BracedInit);
526 } else if (PrevTok->is(Kind: tok::r_paren)) {
527 // `) { }` can only occur in function or method declarations in JS.
528 Tok->setBlockKind(BK_Block);
529 }
530 } else {
531 Tok->setBlockKind(BK_Unknown);
532 }
533 LBraceStack.push_back(Elt: {.Tok: Tok, .PrevTok: PrevTok});
534 break;
535 case tok::r_brace:
536 if (LBraceStack.empty())
537 break;
538 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BBK: BK_Unknown)) {
539 bool ProbablyBracedList = false;
540 if (Style.Language == FormatStyle::LK_Proto) {
541 ProbablyBracedList = NextTok->isOneOf(K1: tok::comma, K2: tok::r_square);
542 } else if (LBrace->isNot(Kind: TT_EnumLBrace)) {
543 // Using OriginalColumn to distinguish between ObjC methods and
544 // binary operators is a bit hacky.
545 bool NextIsObjCMethod = NextTok->isOneOf(K1: tok::plus, K2: tok::minus) &&
546 NextTok->OriginalColumn == 0;
547
548 // Try to detect a braced list. Note that regardless how we mark inner
549 // braces here, we will overwrite the BlockKind later if we parse a
550 // braced list (where all blocks inside are by default braced lists),
551 // or when we explicitly detect blocks (for example while parsing
552 // lambdas).
553
554 // If we already marked the opening brace as braced list, the closing
555 // must also be part of it.
556 ProbablyBracedList = LBrace->is(TT: TT_BracedListLBrace);
557
558 ProbablyBracedList = ProbablyBracedList ||
559 (Style.isJavaScript() &&
560 NextTok->isOneOf(K1: Keywords.kw_of, K2: Keywords.kw_in,
561 Ks: Keywords.kw_as));
562 ProbablyBracedList =
563 ProbablyBracedList ||
564 (IsCpp && (PrevTok->Tok.isLiteral() ||
565 NextTok->isOneOf(K1: tok::l_paren, K2: tok::arrow)));
566
567 // If there is a comma, semicolon or right paren after the closing
568 // brace, we assume this is a braced initializer list.
569 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
570 // braced list in JS.
571 ProbablyBracedList =
572 ProbablyBracedList ||
573 NextTok->isOneOf(K1: tok::comma, K2: tok::period, Ks: tok::colon,
574 Ks: tok::r_paren, Ks: tok::r_square, Ks: tok::ellipsis);
575
576 // Distinguish between braced list in a constructor initializer list
577 // followed by constructor body, or just adjacent blocks.
578 ProbablyBracedList =
579 ProbablyBracedList ||
580 (NextTok->is(Kind: tok::l_brace) && LBraceStack.back().PrevTok &&
581 LBraceStack.back().PrevTok->isOneOf(K1: tok::identifier,
582 K2: tok::greater));
583
584 ProbablyBracedList =
585 ProbablyBracedList ||
586 (NextTok->is(Kind: tok::identifier) &&
587 PrevTok->isNoneOf(Ks: tok::semi, Ks: tok::r_brace, Ks: tok::l_brace));
588
589 ProbablyBracedList = ProbablyBracedList ||
590 (NextTok->is(Kind: tok::semi) &&
591 (!ExpectClassBody || LBraceStack.size() != 1));
592
593 ProbablyBracedList =
594 ProbablyBracedList ||
595 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
596
597 if (!Style.isCSharp() && NextTok->is(Kind: tok::l_square)) {
598 // We can have an array subscript after a braced init
599 // list, but C++11 attributes are expected after blocks.
600 NextTok = Tokens->getNextToken();
601 ProbablyBracedList = NextTok->isNot(Kind: tok::l_square);
602 }
603
604 // Cpp macro definition body that is a nonempty braced list or block:
605 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
606 !FormatTok->Previous && NextTok->is(Kind: tok::eof) &&
607 // A statement can end with only `;` (simple statement), a block
608 // closing brace (compound statement), or `:` (label statement).
609 // If PrevTok is a block opening brace, Tok ends an empty block.
610 PrevTok->isNoneOf(Ks: tok::semi, Ks: BK_Block, Ks: tok::colon)) {
611 ProbablyBracedList = true;
612 }
613 }
614 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block;
615 Tok->setBlockKind(BlockKind);
616 LBrace->setBlockKind(BlockKind);
617 }
618 LBraceStack.pop_back();
619 break;
620 case tok::identifier:
621 if (Tok->isNot(Kind: TT_StatementMacro))
622 break;
623 [[fallthrough]];
624 case tok::at:
625 case tok::semi:
626 case tok::kw_if:
627 case tok::kw_while:
628 case tok::kw_for:
629 case tok::kw_switch:
630 case tok::kw_try:
631 case tok::kw___try:
632 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BBK: BK_Unknown))
633 LBraceStack.back().Tok->setBlockKind(BK_Block);
634 break;
635 default:
636 break;
637 }
638
639 PrevTok = Tok;
640 Tok = NextTok;
641 } while (Tok->isNot(Kind: tok::eof) && !LBraceStack.empty());
642
643 // Assume other blocks for all unclosed opening braces.
644 for (const auto &Entry : LBraceStack)
645 if (Entry.Tok->is(BBK: BK_Unknown))
646 Entry.Tok->setBlockKind(BK_Block);
647
648 FormatTok = Tokens->setPosition(StoredPosition);
649}
650
651// Sets the token type of the directly previous right brace.
652void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
653 if (auto Prev = FormatTok->getPreviousNonComment();
654 Prev && Prev->is(Kind: tok::r_brace)) {
655 Prev->setFinalizedType(Type);
656 }
657}
658
659template <class T>
660static inline void hash_combine(std::size_t &seed, const T &v) {
661 std::hash<T> hasher;
662 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
663}
664
665size_t UnwrappedLineParser::computePPHash() const {
666 size_t h = 0;
667 for (const auto &i : PPStack) {
668 hash_combine(seed&: h, v: size_t(i.Kind));
669 hash_combine(seed&: h, v: i.Line);
670 }
671 return h;
672}
673
674// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
675// is not null, subtracts its length (plus the preceding space) when computing
676// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
677// running the token annotator on it so that we can restore them afterward.
678bool UnwrappedLineParser::mightFitOnOneLine(
679 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
680 const auto ColumnLimit = Style.ColumnLimit;
681 if (ColumnLimit == 0)
682 return true;
683
684 auto &Tokens = ParsedLine.Tokens;
685 assert(!Tokens.empty());
686
687 const auto *LastToken = Tokens.back().Tok;
688 assert(LastToken);
689
690 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
691
692 int Index = 0;
693 for (const auto &Token : Tokens) {
694 assert(Token.Tok);
695 auto &SavedToken = SavedTokens[Index++];
696 SavedToken.Tok = new FormatToken;
697 SavedToken.Tok->copyFrom(Tok: *Token.Tok);
698 SavedToken.Children = std::move(Token.Children);
699 }
700
701 AnnotatedLine Line(ParsedLine);
702 assert(Line.Last == LastToken);
703
704 TokenAnnotator Annotator(Style, Keywords);
705 Annotator.annotate(Line);
706 Annotator.calculateFormattingInformation(Line);
707
708 auto Length = LastToken->TotalLength;
709 if (OpeningBrace) {
710 assert(OpeningBrace != Tokens.front().Tok);
711 if (auto Prev = OpeningBrace->Previous;
712 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
713 Length -= ColumnLimit;
714 }
715 Length -= OpeningBrace->TokenText.size() + 1;
716 }
717
718 if (const auto *FirstToken = Line.First; FirstToken->is(Kind: tok::r_brace)) {
719 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
720 Length -= FirstToken->TokenText.size() + 1;
721 }
722
723 Index = 0;
724 for (auto &Token : Tokens) {
725 const auto &SavedToken = SavedTokens[Index++];
726 Token.Tok->copyFrom(Tok: *SavedToken.Tok);
727 Token.Children = std::move(SavedToken.Children);
728 delete SavedToken.Tok;
729 }
730
731 // If these change PPLevel needs to be used for get correct indentation.
732 assert(!Line.InMacroBody);
733 assert(!Line.InPPDirective);
734 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
735}
736
737FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
738 unsigned AddLevels, bool MunchSemi,
739 bool KeepBraces,
740 IfStmtKind *IfKind,
741 bool UnindentWhitesmithsBraces) {
742 auto HandleVerilogBlockLabel = [this]() {
743 // ":" name
744 if (Style.isVerilog() && FormatTok->is(Kind: tok::colon)) {
745 nextToken();
746 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
747 nextToken();
748 }
749 };
750
751 // Whether this is a Verilog-specific block that has a special header like a
752 // module.
753 const bool VerilogHierarchy =
754 Style.isVerilog() && Keywords.isVerilogHierarchy(Tok: *FormatTok);
755 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
756 (Style.isVerilog() &&
757 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
758 "'{' or macro block token expected");
759 FormatToken *Tok = FormatTok;
760 const bool FollowedByComment = Tokens->peekNextToken()->is(Kind: tok::comment);
761 auto Index = CurrentLines->size();
762 const bool MacroBlock = FormatTok->is(TT: TT_MacroBlockBegin);
763 FormatTok->setBlockKind(BK_Block);
764
765 // For Whitesmiths mode, jump to the next level prior to skipping over the
766 // braces.
767 if (!VerilogHierarchy && AddLevels > 0 &&
768 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
769 ++Line->Level;
770 }
771
772 size_t PPStartHash = computePPHash();
773
774 const unsigned InitialLevel = Line->Level;
775 if (VerilogHierarchy) {
776 AddLevels += parseVerilogHierarchyHeader();
777 } else {
778 nextToken(/*LevelDifference=*/AddLevels);
779 HandleVerilogBlockLabel();
780 }
781
782 // Bail out if there are too many levels. Otherwise, the stack might overflow.
783 if (Line->Level > 300)
784 return nullptr;
785
786 if (MacroBlock && FormatTok->is(Kind: tok::l_paren))
787 parseParens();
788
789 size_t NbPreprocessorDirectives =
790 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
791 addUnwrappedLine();
792 size_t OpeningLineIndex =
793 CurrentLines->empty()
794 ? (UnwrappedLine::kInvalidIndex)
795 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
796
797 // Whitesmiths is weird here. The brace needs to be indented for the namespace
798 // block, but the block itself may not be indented depending on the style
799 // settings. This allows the format to back up one level in those cases.
800 if (UnindentWhitesmithsBraces)
801 --Line->Level;
802
803 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
804 MustBeDeclaration);
805 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
806 Line->Level += AddLevels;
807
808 FormatToken *IfLBrace = nullptr;
809 const bool SimpleBlock = parseLevel(OpeningBrace: Tok, IfKind, IfLeftBrace: &IfLBrace);
810
811 if (eof())
812 return IfLBrace;
813
814 if (MacroBlock ? FormatTok->isNot(Kind: TT_MacroBlockEnd)
815 : FormatTok->isNot(Kind: tok::r_brace)) {
816 Line->Level = InitialLevel;
817 FormatTok->setBlockKind(BK_Block);
818 return IfLBrace;
819 }
820
821 if (FormatTok->is(Kind: tok::r_brace)) {
822 FormatTok->setBlockKind(BK_Block);
823 if (Tok->is(TT: TT_NamespaceLBrace))
824 FormatTok->setFinalizedType(TT_NamespaceRBrace);
825 }
826
827 const bool IsFunctionRBrace =
828 FormatTok->is(Kind: tok::r_brace) && Tok->is(TT: TT_FunctionLBrace);
829
830 auto RemoveBraces = [=]() mutable {
831 if (!SimpleBlock)
832 return false;
833 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
834 assert(FormatTok->is(tok::r_brace));
835 const bool WrappedOpeningBrace = !Tok->Previous;
836 if (WrappedOpeningBrace && FollowedByComment)
837 return false;
838 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
839 if (KeepBraces && !HasRequiredIfBraces)
840 return false;
841 if (Tok->isNot(Kind: TT_ElseLBrace) || !HasRequiredIfBraces) {
842 const FormatToken *Previous = Tokens->getPreviousToken();
843 assert(Previous);
844 if (Previous->is(Kind: tok::r_brace) && !Previous->Optional)
845 return false;
846 }
847 assert(!CurrentLines->empty());
848 auto &LastLine = CurrentLines->back();
849 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(ParsedLine&: LastLine))
850 return false;
851 if (Tok->is(TT: TT_ElseLBrace))
852 return true;
853 if (WrappedOpeningBrace) {
854 assert(Index > 0);
855 --Index; // The line above the wrapped l_brace.
856 Tok = nullptr;
857 }
858 return mightFitOnOneLine(ParsedLine&: (*CurrentLines)[Index], OpeningBrace: Tok);
859 };
860 if (RemoveBraces()) {
861 Tok->MatchingParen = FormatTok;
862 FormatTok->MatchingParen = Tok;
863 }
864
865 size_t PPEndHash = computePPHash();
866
867 // Munch the closing brace.
868 nextToken(/*LevelDifference=*/-AddLevels);
869
870 // When this is a function block and there is an unnecessary semicolon
871 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
872 // it later).
873 if (Style.RemoveSemicolon && IsFunctionRBrace) {
874 while (FormatTok->is(Kind: tok::semi)) {
875 FormatTok->Optional = true;
876 nextToken();
877 }
878 }
879
880 HandleVerilogBlockLabel();
881
882 if (MacroBlock && FormatTok->is(Kind: tok::l_paren))
883 parseParens();
884
885 Line->Level = InitialLevel;
886
887 if (FormatTok->is(Kind: tok::kw_noexcept)) {
888 // A noexcept in a requires expression.
889 nextToken();
890 }
891
892 if (FormatTok->is(Kind: tok::arrow)) {
893 // Following the } or noexcept we can find a trailing return type arrow
894 // as part of an implicit conversion constraint.
895 nextToken();
896 parseStructuralElement();
897 }
898
899 if (MunchSemi && FormatTok->is(Kind: tok::semi))
900 nextToken();
901
902 if (PPStartHash == PPEndHash) {
903 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
904 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
905 // Update the opening line to add the forward reference as well
906 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
907 CurrentLines->size() - 1;
908 }
909 }
910
911 return IfLBrace;
912}
913
914static bool isGoogScope(const UnwrappedLine &Line) {
915 // FIXME: Closure-library specific stuff should not be hard-coded but be
916 // configurable.
917 if (Line.Tokens.size() < 4)
918 return false;
919 auto I = Line.Tokens.begin();
920 if (I->Tok->TokenText != "goog")
921 return false;
922 ++I;
923 if (I->Tok->isNot(Kind: tok::period))
924 return false;
925 ++I;
926 if (I->Tok->TokenText != "scope")
927 return false;
928 ++I;
929 return I->Tok->is(Kind: tok::l_paren);
930}
931
932static bool isIIFE(const UnwrappedLine &Line,
933 const AdditionalKeywords &Keywords) {
934 // Look for the start of an immediately invoked anonymous function.
935 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
936 // This is commonly done in JavaScript to create a new, anonymous scope.
937 // Example: (function() { ... })()
938 if (Line.Tokens.size() < 3)
939 return false;
940 auto I = Line.Tokens.begin();
941 if (I->Tok->isNot(Kind: tok::l_paren))
942 return false;
943 ++I;
944 if (I->Tok->isNot(Kind: Keywords.kw_function))
945 return false;
946 ++I;
947 return I->Tok->is(Kind: tok::l_paren);
948}
949
950static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
951 const FormatToken &InitialToken,
952 const bool IsJavaRecord) {
953 if (IsJavaRecord)
954 return Style.BraceWrapping.AfterClass;
955
956 tok::TokenKind Kind = InitialToken.Tok.getKind();
957 if (InitialToken.is(TT: TT_NamespaceMacro))
958 Kind = tok::kw_namespace;
959
960 switch (Kind) {
961 case tok::kw_namespace:
962 return Style.BraceWrapping.AfterNamespace;
963 case tok::kw_class:
964 return Style.BraceWrapping.AfterClass;
965 case tok::kw_union:
966 return Style.BraceWrapping.AfterUnion;
967 case tok::kw_struct:
968 return Style.BraceWrapping.AfterStruct;
969 case tok::kw_enum:
970 return Style.BraceWrapping.AfterEnum;
971 default:
972 return false;
973 }
974}
975
976void UnwrappedLineParser::parseChildBlock() {
977 assert(FormatTok->is(tok::l_brace));
978 FormatTok->setBlockKind(BK_Block);
979 const FormatToken *OpeningBrace = FormatTok;
980 nextToken();
981 {
982 bool SkipIndent = (Style.isJavaScript() &&
983 (isGoogScope(Line: *Line) || isIIFE(Line: *Line, Keywords)));
984 ScopedLineState LineState(*this);
985 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
986 /*MustBeDeclaration=*/false);
987 Line->Level += SkipIndent ? 0 : 1;
988 parseLevel(OpeningBrace);
989 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
990 Line->Level -= SkipIndent ? 0 : 1;
991 }
992 nextToken();
993}
994
995void UnwrappedLineParser::parsePPDirective() {
996 assert(FormatTok->is(tok::hash) && "'#' expected");
997 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
998
999 nextToken();
1000
1001 if (!FormatTok->Tok.getIdentifierInfo()) {
1002 parsePPUnknown();
1003 return;
1004 }
1005
1006 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1007 case tok::pp_define:
1008 parsePPDefine();
1009 return;
1010 case tok::pp_if:
1011 parsePPIf(/*IfDef=*/false);
1012 break;
1013 case tok::pp_ifdef:
1014 case tok::pp_ifndef:
1015 parsePPIf(/*IfDef=*/true);
1016 break;
1017 case tok::pp_else:
1018 case tok::pp_elifdef:
1019 case tok::pp_elifndef:
1020 case tok::pp_elif:
1021 parsePPElse();
1022 break;
1023 case tok::pp_endif:
1024 parsePPEndIf();
1025 break;
1026 case tok::pp_pragma:
1027 parsePPPragma();
1028 break;
1029 case tok::pp_error:
1030 case tok::pp_warning:
1031 nextToken();
1032 if (!eof() && Style.isCpp())
1033 FormatTok->setFinalizedType(TT_AfterPPDirective);
1034 [[fallthrough]];
1035 default:
1036 parsePPUnknown();
1037 break;
1038 }
1039}
1040
1041void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1042 size_t Line = CurrentLines->size();
1043 if (CurrentLines == &PreprocessorDirectives)
1044 Line += Lines.size();
1045
1046 if (Unreachable ||
1047 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1048 PPStack.push_back(Elt: {PP_Unreachable, Line});
1049 } else {
1050 PPStack.push_back(Elt: {PP_Conditional, Line});
1051 }
1052}
1053
1054void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1055 ++PPBranchLevel;
1056 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1057 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1058 PPLevelBranchIndex.push_back(Elt: 0);
1059 PPLevelBranchCount.push_back(Elt: 0);
1060 }
1061 PPChainBranchIndex.push(x: Unreachable ? -1 : 0);
1062 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1063 conditionalCompilationCondition(Unreachable: Unreachable || Skip);
1064}
1065
1066void UnwrappedLineParser::conditionalCompilationAlternative() {
1067 if (!PPStack.empty())
1068 PPStack.pop_back();
1069 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1070 if (!PPChainBranchIndex.empty())
1071 ++PPChainBranchIndex.top();
1072 conditionalCompilationCondition(
1073 Unreachable: PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1074 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1075}
1076
1077void UnwrappedLineParser::conditionalCompilationEnd() {
1078 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1079 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1080 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1081 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1082 }
1083 // Guard against #endif's without #if.
1084 if (PPBranchLevel > -1)
1085 --PPBranchLevel;
1086 if (!PPChainBranchIndex.empty())
1087 PPChainBranchIndex.pop();
1088 if (!PPStack.empty())
1089 PPStack.pop_back();
1090}
1091
1092void UnwrappedLineParser::parsePPIf(bool IfDef) {
1093 bool IfNDef = FormatTok->is(Kind: tok::pp_ifndef);
1094 nextToken();
1095 bool Unreachable = false;
1096 if (!IfDef && (FormatTok->is(Kind: tok::kw_false) || FormatTok->TokenText == "0"))
1097 Unreachable = true;
1098 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1099 Unreachable = true;
1100 conditionalCompilationStart(Unreachable);
1101 FormatToken *IfCondition = FormatTok;
1102 // If there's a #ifndef on the first line, and the only lines before it are
1103 // comments, it could be an include guard.
1104 bool MaybeIncludeGuard = IfNDef;
1105 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1106 for (auto &Line : Lines) {
1107 if (Line.Tokens.front().Tok->isNot(Kind: tok::comment)) {
1108 MaybeIncludeGuard = false;
1109 IncludeGuard = IG_Rejected;
1110 break;
1111 }
1112 }
1113 }
1114 --PPBranchLevel;
1115 parsePPUnknown();
1116 ++PPBranchLevel;
1117 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1118 IncludeGuard = IG_IfNdefed;
1119 IncludeGuardToken = IfCondition;
1120 }
1121}
1122
1123void UnwrappedLineParser::parsePPElse() {
1124 // If a potential include guard has an #else, it's not an include guard.
1125 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1126 IncludeGuard = IG_Rejected;
1127 // Don't crash when there is an #else without an #if.
1128 assert(PPBranchLevel >= -1);
1129 if (PPBranchLevel == -1)
1130 conditionalCompilationStart(/*Unreachable=*/true);
1131 conditionalCompilationAlternative();
1132 --PPBranchLevel;
1133 parsePPUnknown();
1134 ++PPBranchLevel;
1135}
1136
1137void UnwrappedLineParser::parsePPEndIf() {
1138 conditionalCompilationEnd();
1139 parsePPUnknown();
1140 // If the #endif of a potential include guard is the last thing in the file,
1141 // then we found an include guard.
1142 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1143 getIncludeGuardState(Style: Style.IndentPPDirectives) == IG_Inited) {
1144 IncludeGuard = IG_Found;
1145 }
1146}
1147
1148void UnwrappedLineParser::parsePPDefine() {
1149 nextToken();
1150
1151 if (!FormatTok->Tok.getIdentifierInfo()) {
1152 IncludeGuard = IG_Rejected;
1153 IncludeGuardToken = nullptr;
1154 parsePPUnknown();
1155 return;
1156 }
1157
1158 bool MaybeIncludeGuard = false;
1159 if (IncludeGuard == IG_IfNdefed &&
1160 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1161 IncludeGuard = IG_Defined;
1162 IncludeGuardToken = nullptr;
1163 for (auto &Line : Lines) {
1164 if (Line.Tokens.front().Tok->isNoneOf(Ks: tok::comment, Ks: tok::hash)) {
1165 IncludeGuard = IG_Rejected;
1166 break;
1167 }
1168 }
1169 MaybeIncludeGuard = IncludeGuard == IG_Defined;
1170 }
1171
1172 // In the context of a define, even keywords should be treated as normal
1173 // identifiers. Setting the kind to identifier is not enough, because we need
1174 // to treat additional keywords like __except as well, which are already
1175 // identifiers. Setting the identifier info to null interferes with include
1176 // guard processing above, and changes preprocessing nesting.
1177 FormatTok->Tok.setKind(tok::identifier);
1178 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1179 nextToken();
1180
1181 // IncludeGuard can't have a non-empty macro definition.
1182 if (MaybeIncludeGuard && !eof())
1183 IncludeGuard = IG_Rejected;
1184
1185 if (FormatTok->is(Kind: tok::l_paren) && !FormatTok->hasWhitespaceBefore())
1186 parseParens();
1187 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1188 Line->Level += PPBranchLevel + 1;
1189 addUnwrappedLine();
1190 ++Line->Level;
1191
1192 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1193 assert((int)Line->PPLevel >= 0);
1194
1195 if (eof())
1196 return;
1197
1198 Line->InMacroBody = true;
1199
1200 if (!Style.SkipMacroDefinitionBody) {
1201 // Errors during a preprocessor directive can only affect the layout of the
1202 // preprocessor directive, and thus we ignore them. An alternative approach
1203 // would be to use the same approach we use on the file level (no
1204 // re-indentation if there was a structural error) within the macro
1205 // definition.
1206 parseFile();
1207 return;
1208 }
1209
1210 for (auto *Comment : CommentsBeforeNextToken)
1211 Comment->Finalized = true;
1212
1213 do {
1214 FormatTok->Finalized = true;
1215 FormatTok = Tokens->getNextToken();
1216 } while (!eof());
1217
1218 addUnwrappedLine();
1219}
1220
1221void UnwrappedLineParser::parsePPPragma() {
1222 Line->InPragmaDirective = true;
1223 parsePPUnknown();
1224}
1225
1226void UnwrappedLineParser::parsePPUnknown() {
1227 while (!eof())
1228 nextToken();
1229 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1230 Line->Level += PPBranchLevel + 1;
1231 addUnwrappedLine();
1232}
1233
1234// Here we exclude certain tokens that are not usually the first token in an
1235// unwrapped line. This is used in attempt to distinguish macro calls without
1236// trailing semicolons from other constructs split to several lines.
1237static bool tokenCanStartNewLine(const FormatToken &Tok) {
1238 // Semicolon can be a null-statement, l_square can be a start of a macro or
1239 // a C++11 attribute, but this doesn't seem to be common.
1240 return Tok.isNoneOf(Ks: tok::semi, Ks: tok::l_brace,
1241 // Tokens that can only be used as binary operators and a
1242 // part of overloaded operator names.
1243 Ks: tok::period, Ks: tok::periodstar, Ks: tok::arrow, Ks: tok::arrowstar,
1244 Ks: tok::less, Ks: tok::greater, Ks: tok::slash, Ks: tok::percent,
1245 Ks: tok::lessless, Ks: tok::greatergreater, Ks: tok::equal,
1246 Ks: tok::plusequal, Ks: tok::minusequal, Ks: tok::starequal,
1247 Ks: tok::slashequal, Ks: tok::percentequal, Ks: tok::ampequal,
1248 Ks: tok::pipeequal, Ks: tok::caretequal, Ks: tok::greatergreaterequal,
1249 Ks: tok::lesslessequal,
1250 // Colon is used in labels, base class lists, initializer
1251 // lists, range-based for loops, ternary operator, but
1252 // should never be the first token in an unwrapped line.
1253 Ks: tok::colon,
1254 // 'noexcept' is a trailing annotation.
1255 Ks: tok::kw_noexcept);
1256}
1257
1258static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1259 const FormatToken *FormatTok) {
1260 // FIXME: This returns true for C/C++ keywords like 'struct'.
1261 return FormatTok->is(Kind: tok::identifier) &&
1262 (!FormatTok->Tok.getIdentifierInfo() ||
1263 FormatTok->isNoneOf(
1264 Ks: Keywords.kw_in, Ks: Keywords.kw_of, Ks: Keywords.kw_as, Ks: Keywords.kw_async,
1265 Ks: Keywords.kw_await, Ks: Keywords.kw_yield, Ks: Keywords.kw_finally,
1266 Ks: Keywords.kw_function, Ks: Keywords.kw_import, Ks: Keywords.kw_is,
1267 Ks: Keywords.kw_let, Ks: Keywords.kw_var, Ks: tok::kw_const,
1268 Ks: Keywords.kw_abstract, Ks: Keywords.kw_extends, Ks: Keywords.kw_implements,
1269 Ks: Keywords.kw_instanceof, Ks: Keywords.kw_interface,
1270 Ks: Keywords.kw_override, Ks: Keywords.kw_throws, Ks: Keywords.kw_from));
1271}
1272
1273static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1274 const FormatToken *FormatTok) {
1275 return FormatTok->Tok.isLiteral() ||
1276 FormatTok->isOneOf(K1: tok::kw_true, K2: tok::kw_false) ||
1277 mustBeJSIdent(Keywords, FormatTok);
1278}
1279
1280// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1281// when encountered after a value (see mustBeJSIdentOrValue).
1282static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1283 const FormatToken *FormatTok) {
1284 return FormatTok->isOneOf(
1285 K1: tok::kw_return, K2: Keywords.kw_yield,
1286 // conditionals
1287 Ks: tok::kw_if, Ks: tok::kw_else,
1288 // loops
1289 Ks: tok::kw_for, Ks: tok::kw_while, Ks: tok::kw_do, Ks: tok::kw_continue, Ks: tok::kw_break,
1290 // switch/case
1291 Ks: tok::kw_switch, Ks: tok::kw_case,
1292 // exceptions
1293 Ks: tok::kw_throw, Ks: tok::kw_try, Ks: tok::kw_catch, Ks: Keywords.kw_finally,
1294 // declaration
1295 Ks: tok::kw_const, Ks: tok::kw_class, Ks: Keywords.kw_var, Ks: Keywords.kw_let,
1296 Ks: Keywords.kw_async, Ks: Keywords.kw_function,
1297 // import/export
1298 Ks: Keywords.kw_import, Ks: tok::kw_export);
1299}
1300
1301// Checks whether a token is a type in K&R C (aka C78).
1302static bool isC78Type(const FormatToken &Tok) {
1303 return Tok.isOneOf(K1: tok::kw_char, K2: tok::kw_short, Ks: tok::kw_int, Ks: tok::kw_long,
1304 Ks: tok::kw_unsigned, Ks: tok::kw_float, Ks: tok::kw_double,
1305 Ks: tok::identifier);
1306}
1307
1308// This function checks whether a token starts the first parameter declaration
1309// in a K&R C (aka C78) function definition, e.g.:
1310// int f(a, b)
1311// short a, b;
1312// {
1313// return a + b;
1314// }
1315static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1316 const FormatToken *FuncName) {
1317 assert(Tok);
1318 assert(Next);
1319 assert(FuncName);
1320
1321 if (FuncName->isNot(Kind: tok::identifier))
1322 return false;
1323
1324 const FormatToken *Prev = FuncName->Previous;
1325 if (!Prev || (Prev->isNot(Kind: tok::star) && !isC78Type(Tok: *Prev)))
1326 return false;
1327
1328 if (!isC78Type(Tok: *Tok) &&
1329 Tok->isNoneOf(Ks: tok::kw_register, Ks: tok::kw_struct, Ks: tok::kw_union)) {
1330 return false;
1331 }
1332
1333 if (Next->isNot(Kind: tok::star) && !Next->Tok.getIdentifierInfo())
1334 return false;
1335
1336 Tok = Tok->Previous;
1337 if (!Tok || Tok->isNot(Kind: tok::r_paren))
1338 return false;
1339
1340 Tok = Tok->Previous;
1341 if (!Tok || Tok->isNot(Kind: tok::identifier))
1342 return false;
1343
1344 return Tok->Previous && Tok->Previous->isOneOf(K1: tok::l_paren, K2: tok::comma);
1345}
1346
1347bool UnwrappedLineParser::parseModuleImport() {
1348 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1349
1350 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1351 !Token->Tok.getIdentifierInfo() &&
1352 Token->isNoneOf(Ks: tok::colon, Ks: tok::less, Ks: tok::string_literal)) {
1353 return false;
1354 }
1355
1356 nextToken();
1357 while (!eof()) {
1358 if (FormatTok->is(Kind: tok::colon)) {
1359 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1360 }
1361 // Handle import <foo/bar.h> as we would an include statement.
1362 else if (FormatTok->is(Kind: tok::less)) {
1363 nextToken();
1364 while (FormatTok->isNoneOf(Ks: tok::semi, Ks: tok::greater) && !eof()) {
1365 // Mark tokens up to the trailing line comments as implicit string
1366 // literals.
1367 if (FormatTok->isNot(Kind: tok::comment) &&
1368 !FormatTok->TokenText.starts_with(Prefix: "//")) {
1369 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1370 }
1371 nextToken();
1372 }
1373 }
1374 if (FormatTok->is(Kind: tok::semi)) {
1375 nextToken();
1376 break;
1377 }
1378 nextToken();
1379 }
1380
1381 addUnwrappedLine();
1382 return true;
1383}
1384
1385// readTokenWithJavaScriptASI reads the next token and terminates the current
1386// line if JavaScript Automatic Semicolon Insertion must
1387// happen between the current token and the next token.
1388//
1389// This method is conservative - it cannot cover all edge cases of JavaScript,
1390// but only aims to correctly handle certain well known cases. It *must not*
1391// return true in speculative cases.
1392void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1393 FormatToken *Previous = FormatTok;
1394 readToken();
1395 FormatToken *Next = FormatTok;
1396
1397 bool IsOnSameLine =
1398 CommentsBeforeNextToken.empty()
1399 ? Next->NewlinesBefore == 0
1400 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1401 if (IsOnSameLine)
1402 return;
1403
1404 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, FormatTok: Previous);
1405 bool PreviousStartsTemplateExpr =
1406 Previous->is(TT: TT_TemplateString) && Previous->TokenText.ends_with(Suffix: "${");
1407 if (PreviousMustBeValue || Previous->is(Kind: tok::r_paren)) {
1408 // If the line contains an '@' sign, the previous token might be an
1409 // annotation, which can precede another identifier/value.
1410 bool HasAt = llvm::any_of(Range&: Line->Tokens, P: [](UnwrappedLineNode &LineNode) {
1411 return LineNode.Tok->is(Kind: tok::at);
1412 });
1413 if (HasAt)
1414 return;
1415 }
1416 if (Next->is(Kind: tok::exclaim) && PreviousMustBeValue)
1417 return addUnwrappedLine();
1418 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, FormatTok: Next);
1419 bool NextEndsTemplateExpr =
1420 Next->is(TT: TT_TemplateString) && Next->TokenText.starts_with(Prefix: "}");
1421 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1422 (PreviousMustBeValue ||
1423 Previous->isOneOf(K1: tok::r_square, K2: tok::r_paren, Ks: tok::plusplus,
1424 Ks: tok::minusminus))) {
1425 return addUnwrappedLine();
1426 }
1427 if ((PreviousMustBeValue || Previous->is(Kind: tok::r_paren)) &&
1428 isJSDeclOrStmt(Keywords, FormatTok: Next)) {
1429 return addUnwrappedLine();
1430 }
1431}
1432
1433void UnwrappedLineParser::parseStructuralElement(
1434 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1435 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1436 if (Style.isTableGen() && FormatTok->is(Kind: tok::pp_include)) {
1437 nextToken();
1438 if (FormatTok->is(Kind: tok::string_literal))
1439 nextToken();
1440 addUnwrappedLine();
1441 return;
1442 }
1443
1444 if (IsCpp) {
1445 while (FormatTok->is(Kind: tok::l_square) && handleCppAttributes()) {
1446 }
1447 } else if (Style.isVerilog()) {
1448 if (Keywords.isVerilogStructuredProcedure(Tok: *FormatTok)) {
1449 parseForOrWhileLoop(/*HasParens=*/false);
1450 return;
1451 }
1452 if (FormatTok->isOneOf(K1: Keywords.kw_foreach, K2: Keywords.kw_repeat)) {
1453 parseForOrWhileLoop();
1454 return;
1455 }
1456 if (FormatTok->isOneOf(K1: tok::kw_restrict, K2: Keywords.kw_assert,
1457 Ks: Keywords.kw_assume, Ks: Keywords.kw_cover)) {
1458 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1459 return;
1460 }
1461
1462 // Skip things that can exist before keywords like 'if' and 'case'.
1463 while (true) {
1464 if (FormatTok->isOneOf(K1: Keywords.kw_priority, K2: Keywords.kw_unique,
1465 Ks: Keywords.kw_unique0)) {
1466 nextToken();
1467 } else if (FormatTok->is(Kind: tok::l_paren) &&
1468 Tokens->peekNextToken()->is(Kind: tok::star)) {
1469 parseParens();
1470 } else {
1471 break;
1472 }
1473 }
1474 }
1475
1476 // Tokens that only make sense at the beginning of a line.
1477 if (FormatTok->isAccessSpecifierKeyword()) {
1478 if (Style.isJava() || Style.isJavaScript() || Style.isCSharp())
1479 nextToken();
1480 else
1481 parseAccessSpecifier();
1482 return;
1483 }
1484 switch (FormatTok->Tok.getKind()) {
1485 case tok::kw_asm:
1486 nextToken();
1487 if (FormatTok->is(Kind: tok::l_brace)) {
1488 FormatTok->setFinalizedType(TT_InlineASMBrace);
1489 nextToken();
1490 while (FormatTok && !eof()) {
1491 if (FormatTok->is(Kind: tok::r_brace)) {
1492 FormatTok->setFinalizedType(TT_InlineASMBrace);
1493 nextToken();
1494 addUnwrappedLine();
1495 break;
1496 }
1497 FormatTok->Finalized = true;
1498 nextToken();
1499 }
1500 }
1501 break;
1502 case tok::kw_namespace:
1503 parseNamespace();
1504 return;
1505 case tok::kw_if: {
1506 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1507 // field/method declaration.
1508 break;
1509 }
1510 FormatToken *Tok = parseIfThenElse(IfKind);
1511 if (IfLeftBrace)
1512 *IfLeftBrace = Tok;
1513 return;
1514 }
1515 case tok::kw_for:
1516 case tok::kw_while:
1517 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1518 // field/method declaration.
1519 break;
1520 }
1521 parseForOrWhileLoop();
1522 return;
1523 case tok::kw_do:
1524 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1525 // field/method declaration.
1526 break;
1527 }
1528 parseDoWhile();
1529 if (HasDoWhile)
1530 *HasDoWhile = true;
1531 return;
1532 case tok::kw_switch:
1533 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1534 // 'switch: string' field declaration.
1535 break;
1536 }
1537 parseSwitch(/*IsExpr=*/false);
1538 return;
1539 case tok::kw_default: {
1540 // In Verilog default along with other labels are handled in the next loop.
1541 if (Style.isVerilog())
1542 break;
1543 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1544 // 'default: string' field declaration.
1545 break;
1546 }
1547 auto *Default = FormatTok;
1548 nextToken();
1549 if (FormatTok->is(Kind: tok::colon)) {
1550 FormatTok->setFinalizedType(TT_CaseLabelColon);
1551 parseLabel();
1552 return;
1553 }
1554 if (FormatTok->is(Kind: tok::arrow)) {
1555 FormatTok->setFinalizedType(TT_CaseLabelArrow);
1556 Default->setFinalizedType(TT_SwitchExpressionLabel);
1557 parseLabel();
1558 return;
1559 }
1560 // e.g. "default void f() {}" in a Java interface.
1561 break;
1562 }
1563 case tok::kw_case:
1564 // Proto: there are no switch/case statements.
1565 if (Style.Language == FormatStyle::LK_Proto) {
1566 nextToken();
1567 return;
1568 }
1569 if (Style.isVerilog()) {
1570 parseBlock();
1571 addUnwrappedLine();
1572 return;
1573 }
1574 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1575 // 'case: string' field declaration.
1576 nextToken();
1577 break;
1578 }
1579 parseCaseLabel();
1580 return;
1581 case tok::kw_goto:
1582 nextToken();
1583 if (FormatTok->is(Kind: tok::kw_case))
1584 nextToken();
1585 break;
1586 case tok::kw_try:
1587 case tok::kw___try:
1588 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1589 // field/method declaration.
1590 break;
1591 }
1592 parseTryCatch();
1593 return;
1594 case tok::kw_extern:
1595 if (Style.isVerilog()) {
1596 // In Verilog an extern module declaration looks like a start of module.
1597 // But there is no body and endmodule. So we handle it separately.
1598 parseVerilogExtern();
1599 return;
1600 }
1601 nextToken();
1602 if (FormatTok->is(Kind: tok::string_literal)) {
1603 nextToken();
1604 if (FormatTok->is(Kind: tok::l_brace)) {
1605 if (Style.BraceWrapping.AfterExternBlock)
1606 addUnwrappedLine();
1607 // Either we indent or for backwards compatibility we follow the
1608 // AfterExternBlock style.
1609 unsigned AddLevels =
1610 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1611 (Style.BraceWrapping.AfterExternBlock &&
1612 Style.IndentExternBlock ==
1613 FormatStyle::IEBS_AfterExternBlock)
1614 ? 1u
1615 : 0u;
1616 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1617 addUnwrappedLine();
1618 return;
1619 }
1620 }
1621 break;
1622 case tok::kw_export:
1623 if (Style.isJavaScript()) {
1624 parseJavaScriptEs6ImportExport();
1625 return;
1626 }
1627 if (Style.isVerilog()) {
1628 parseVerilogExtern();
1629 return;
1630 }
1631 if (IsCpp) {
1632 nextToken();
1633 if (FormatTok->is(Kind: tok::kw_namespace)) {
1634 parseNamespace();
1635 return;
1636 }
1637 if (FormatTok->is(Kind: tok::l_brace)) {
1638 parseCppExportBlock();
1639 return;
1640 }
1641 if (FormatTok->is(II: Keywords.kw_import) && parseModuleImport())
1642 return;
1643 }
1644 break;
1645 case tok::kw_inline:
1646 nextToken();
1647 if (FormatTok->is(Kind: tok::kw_namespace)) {
1648 parseNamespace();
1649 return;
1650 }
1651 break;
1652 case tok::identifier:
1653 if (FormatTok->is(TT: TT_ForEachMacro)) {
1654 parseForOrWhileLoop();
1655 return;
1656 }
1657 if (FormatTok->is(TT: TT_MacroBlockBegin)) {
1658 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1659 /*MunchSemi=*/false);
1660 return;
1661 }
1662 if (FormatTok->is(II: Keywords.kw_import)) {
1663 if (Style.isJavaScript()) {
1664 parseJavaScriptEs6ImportExport();
1665 return;
1666 }
1667 if (Style.Language == FormatStyle::LK_Proto) {
1668 nextToken();
1669 if (FormatTok->is(Kind: tok::kw_public))
1670 nextToken();
1671 if (FormatTok->isNot(Kind: tok::string_literal))
1672 return;
1673 nextToken();
1674 if (FormatTok->is(Kind: tok::semi))
1675 nextToken();
1676 addUnwrappedLine();
1677 return;
1678 }
1679 if (Style.isVerilog()) {
1680 parseVerilogExtern();
1681 return;
1682 }
1683 if (IsCpp && parseModuleImport())
1684 return;
1685 }
1686 if (IsCpp && FormatTok->isOneOf(K1: Keywords.kw_signals, K2: Keywords.kw_qsignals,
1687 Ks: Keywords.kw_slots, Ks: Keywords.kw_qslots)) {
1688 nextToken();
1689 if (FormatTok->is(Kind: tok::colon)) {
1690 nextToken();
1691 addUnwrappedLine();
1692 return;
1693 }
1694 }
1695 if (IsCpp && FormatTok->is(TT: TT_StatementMacro)) {
1696 parseStatementMacro();
1697 return;
1698 }
1699 if (IsCpp && FormatTok->is(TT: TT_NamespaceMacro)) {
1700 parseNamespace();
1701 return;
1702 }
1703 // In Verilog labels can be any expression, so we don't do them here.
1704 // JS doesn't have macros, and within classes colons indicate fields, not
1705 // labels.
1706 // TableGen doesn't have labels.
1707 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1708 Tokens->peekNextToken()->is(Kind: tok::colon) && !Line->MustBeDeclaration) {
1709 nextToken();
1710 if (!Line->InMacroBody || CurrentLines->size() > 1)
1711 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1712 FormatTok->setFinalizedType(TT_GotoLabelColon);
1713 parseLabel(LeftAlignLabel: !Style.IndentGotoLabels);
1714 if (HasLabel)
1715 *HasLabel = true;
1716 return;
1717 }
1718 if (Style.isJava() && FormatTok->is(II: Keywords.kw_record)) {
1719 parseRecord(/*ParseAsExpr=*/false, /*IsJavaRecord=*/true);
1720 addUnwrappedLine();
1721 return;
1722 }
1723 // In all other cases, parse the declaration.
1724 break;
1725 default:
1726 break;
1727 }
1728
1729 bool SeenEqual = false;
1730 for (const bool InRequiresExpression =
1731 OpeningBrace && OpeningBrace->isOneOf(K1: TT_RequiresExpressionLBrace,
1732 K2: TT_CompoundRequirementLBrace);
1733 !eof();) {
1734 const FormatToken *Previous = FormatTok->Previous;
1735 switch (FormatTok->Tok.getKind()) {
1736 case tok::at:
1737 nextToken();
1738 if (FormatTok->is(Kind: tok::l_brace)) {
1739 nextToken();
1740 parseBracedList();
1741 break;
1742 }
1743 if (Style.isJava() && FormatTok->is(II: Keywords.kw_interface)) {
1744 nextToken();
1745 break;
1746 }
1747 switch (bool IsAutoRelease = false; FormatTok->Tok.getObjCKeywordID()) {
1748 case tok::objc_public:
1749 case tok::objc_protected:
1750 case tok::objc_package:
1751 case tok::objc_private:
1752 return parseAccessSpecifier();
1753 case tok::objc_interface:
1754 case tok::objc_implementation:
1755 return parseObjCInterfaceOrImplementation();
1756 case tok::objc_protocol:
1757 if (parseObjCProtocol())
1758 return;
1759 break;
1760 case tok::objc_end:
1761 return; // Handled by the caller.
1762 case tok::objc_optional:
1763 case tok::objc_required:
1764 nextToken();
1765 addUnwrappedLine();
1766 return;
1767 case tok::objc_autoreleasepool:
1768 IsAutoRelease = true;
1769 [[fallthrough]];
1770 case tok::objc_synchronized:
1771 nextToken();
1772 if (!IsAutoRelease && FormatTok->is(Kind: tok::l_paren)) {
1773 // Skip synchronization object
1774 parseParens();
1775 }
1776 if (FormatTok->is(Kind: tok::l_brace)) {
1777 if (Style.BraceWrapping.AfterControlStatement ==
1778 FormatStyle::BWACS_Always) {
1779 addUnwrappedLine();
1780 }
1781 parseBlock();
1782 }
1783 addUnwrappedLine();
1784 return;
1785 case tok::objc_try:
1786 // This branch isn't strictly necessary (the kw_try case below would
1787 // do this too after the tok::at is parsed above). But be explicit.
1788 parseTryCatch();
1789 return;
1790 default:
1791 break;
1792 }
1793 break;
1794 case tok::kw_requires: {
1795 if (IsCpp) {
1796 bool ParsedClause = parseRequires(SeenEqual);
1797 if (ParsedClause)
1798 return;
1799 } else {
1800 nextToken();
1801 }
1802 break;
1803 }
1804 case tok::kw_enum:
1805 // Ignore if this is part of "template <enum ..." or "... -> enum" or
1806 // "template <..., enum ...>".
1807 if (Previous && Previous->isOneOf(K1: tok::less, K2: tok::arrow, Ks: tok::comma)) {
1808 nextToken();
1809 break;
1810 }
1811
1812 // parseEnum falls through and does not yet add an unwrapped line as an
1813 // enum definition can start a structural element.
1814 if (!parseEnum())
1815 break;
1816 // This only applies to C++ and Verilog.
1817 if (!IsCpp && !Style.isVerilog()) {
1818 addUnwrappedLine();
1819 return;
1820 }
1821 break;
1822 case tok::kw_typedef:
1823 nextToken();
1824 if (FormatTok->isOneOf(K1: Keywords.kw_NS_ENUM, K2: Keywords.kw_NS_OPTIONS,
1825 Ks: Keywords.kw_CF_ENUM, Ks: Keywords.kw_CF_OPTIONS,
1826 Ks: Keywords.kw_CF_CLOSED_ENUM,
1827 Ks: Keywords.kw_NS_CLOSED_ENUM)) {
1828 parseEnum();
1829 }
1830 break;
1831 case tok::kw_class:
1832 if (Style.isVerilog()) {
1833 parseBlock();
1834 addUnwrappedLine();
1835 return;
1836 }
1837 if (Style.isTableGen()) {
1838 // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1839 // This is same as def and so on.
1840 nextToken();
1841 break;
1842 }
1843 [[fallthrough]];
1844 case tok::kw_struct:
1845 case tok::kw_union:
1846 if (parseStructLike())
1847 return;
1848 break;
1849 case tok::kw_decltype:
1850 nextToken();
1851 if (FormatTok->is(Kind: tok::l_paren)) {
1852 parseParens();
1853 if (FormatTok->Previous &&
1854 FormatTok->Previous->endsSequence(K1: tok::r_paren, Tokens: tok::kw_auto,
1855 Tokens: tok::l_paren)) {
1856 Line->SeenDecltypeAuto = true;
1857 }
1858 }
1859 break;
1860 case tok::period:
1861 nextToken();
1862 // In Java, classes have an implicit static member "class".
1863 if (Style.isJava() && FormatTok && FormatTok->is(Kind: tok::kw_class))
1864 nextToken();
1865 if (Style.isJavaScript() && FormatTok &&
1866 FormatTok->Tok.getIdentifierInfo()) {
1867 // JavaScript only has pseudo keywords, all keywords are allowed to
1868 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1869 nextToken();
1870 }
1871 break;
1872 case tok::semi:
1873 nextToken();
1874 addUnwrappedLine();
1875 return;
1876 case tok::r_brace:
1877 addUnwrappedLine();
1878 return;
1879 case tok::l_paren: {
1880 parseParens();
1881 // Break the unwrapped line if a K&R C function definition has a parameter
1882 // declaration.
1883 if (OpeningBrace || !IsCpp || !Previous || eof())
1884 break;
1885 if (isC78ParameterDecl(Tok: FormatTok,
1886 Next: Tokens->peekNextToken(/*SkipComment=*/true),
1887 FuncName: Previous)) {
1888 addUnwrappedLine();
1889 return;
1890 }
1891 break;
1892 }
1893 case tok::kw_operator:
1894 nextToken();
1895 if (FormatTok->isBinaryOperator())
1896 nextToken();
1897 break;
1898 case tok::caret: {
1899 const auto *Prev = FormatTok->getPreviousNonComment();
1900 nextToken();
1901 if (Prev && Prev->is(Kind: tok::identifier))
1902 break;
1903 // Block return type.
1904 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) {
1905 nextToken();
1906 // Return types: pointers are ok too.
1907 while (FormatTok->is(Kind: tok::star))
1908 nextToken();
1909 }
1910 // Block argument list.
1911 if (FormatTok->is(Kind: tok::l_paren))
1912 parseParens();
1913 // Block body.
1914 if (FormatTok->is(Kind: tok::l_brace))
1915 parseChildBlock();
1916 break;
1917 }
1918 case tok::l_brace:
1919 if (InRequiresExpression)
1920 FormatTok->setFinalizedType(TT_BracedListLBrace);
1921 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1922 IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1923 // A block outside of parentheses must be the last part of a
1924 // structural element.
1925 // FIXME: Figure out cases where this is not true, and add projections
1926 // for them (the one we know is missing are lambdas).
1927 if (Style.isJava() &&
1928 Line->Tokens.front().Tok->is(II: Keywords.kw_synchronized)) {
1929 // If necessary, we could set the type to something different than
1930 // TT_FunctionLBrace.
1931 if (Style.BraceWrapping.AfterControlStatement ==
1932 FormatStyle::BWACS_Always) {
1933 addUnwrappedLine();
1934 }
1935 } else if (Style.BraceWrapping.AfterFunction) {
1936 addUnwrappedLine();
1937 }
1938 if (!Previous || Previous->isNot(Kind: TT_TypeDeclarationParen))
1939 FormatTok->setFinalizedType(TT_FunctionLBrace);
1940 parseBlock();
1941 IsDecltypeAutoFunction = false;
1942 addUnwrappedLine();
1943 return;
1944 }
1945 // Otherwise this was a braced init list, and the structural
1946 // element continues.
1947 break;
1948 case tok::kw_try:
1949 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1950 // field/method declaration.
1951 nextToken();
1952 break;
1953 }
1954 // We arrive here when parsing function-try blocks.
1955 if (Style.BraceWrapping.AfterFunction)
1956 addUnwrappedLine();
1957 parseTryCatch();
1958 return;
1959 case tok::identifier: {
1960 if (Style.isCSharp() && FormatTok->is(II: Keywords.kw_where) &&
1961 Line->MustBeDeclaration) {
1962 addUnwrappedLine();
1963 parseCSharpGenericTypeConstraint();
1964 break;
1965 }
1966 if (FormatTok->is(TT: TT_MacroBlockEnd)) {
1967 addUnwrappedLine();
1968 return;
1969 }
1970
1971 // Function declarations (as opposed to function expressions) are parsed
1972 // on their own unwrapped line by continuing this loop. Function
1973 // expressions (functions that are not on their own line) must not create
1974 // a new unwrapped line, so they are special cased below.
1975 size_t TokenCount = Line->Tokens.size();
1976 if (Style.isJavaScript() && FormatTok->is(II: Keywords.kw_function) &&
1977 (TokenCount > 1 ||
1978 (TokenCount == 1 &&
1979 Line->Tokens.front().Tok->isNot(Kind: Keywords.kw_async)))) {
1980 tryToParseJSFunction();
1981 break;
1982 }
1983 if ((Style.isJavaScript() || Style.isJava()) &&
1984 FormatTok->is(II: Keywords.kw_interface)) {
1985 if (Style.isJavaScript()) {
1986 // In JavaScript/TypeScript, "interface" can be used as a standalone
1987 // identifier, e.g. in `var interface = 1;`. If "interface" is
1988 // followed by another identifier, it is very like to be an actual
1989 // interface declaration.
1990 unsigned StoredPosition = Tokens->getPosition();
1991 FormatToken *Next = Tokens->getNextToken();
1992 FormatTok = Tokens->setPosition(StoredPosition);
1993 if (!mustBeJSIdent(Keywords, FormatTok: Next)) {
1994 nextToken();
1995 break;
1996 }
1997 }
1998 parseRecord();
1999 addUnwrappedLine();
2000 return;
2001 }
2002
2003 if (Style.isVerilog()) {
2004 if (FormatTok->is(II: Keywords.kw_table)) {
2005 parseVerilogTable();
2006 return;
2007 }
2008 if (Keywords.isVerilogBegin(Tok: *FormatTok) ||
2009 Keywords.isVerilogHierarchy(Tok: *FormatTok)) {
2010 parseBlock();
2011 addUnwrappedLine();
2012 return;
2013 }
2014 }
2015
2016 if (!IsCpp && FormatTok->is(II: Keywords.kw_interface)) {
2017 if (parseStructLike())
2018 return;
2019 break;
2020 }
2021
2022 if (IsCpp && FormatTok->is(TT: TT_StatementMacro)) {
2023 parseStatementMacro();
2024 return;
2025 }
2026
2027 // See if the following token should start a new unwrapped line.
2028 StringRef Text = FormatTok->TokenText;
2029
2030 FormatToken *PreviousToken = FormatTok;
2031 nextToken();
2032
2033 // JS doesn't have macros, and within classes colons indicate fields, not
2034 // labels.
2035 if (Style.isJavaScript())
2036 break;
2037
2038 auto OneTokenSoFar = [&]() {
2039 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2040 while (I != E && I->Tok->is(Kind: tok::comment))
2041 ++I;
2042 if (Style.isVerilog())
2043 while (I != E && I->Tok->is(Kind: tok::hash))
2044 ++I;
2045 return I != E && (++I == E);
2046 };
2047 if (OneTokenSoFar()) {
2048 // Recognize function-like macro usages without trailing semicolon as
2049 // well as free-standing macros like Q_OBJECT.
2050 bool FunctionLike = FormatTok->is(Kind: tok::l_paren);
2051 if (FunctionLike)
2052 parseParens();
2053
2054 bool FollowedByNewline =
2055 CommentsBeforeNextToken.empty()
2056 ? FormatTok->NewlinesBefore > 0
2057 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2058
2059 if (FollowedByNewline &&
2060 (Text.size() >= 5 ||
2061 (FunctionLike && FormatTok->isNot(Kind: tok::l_paren))) &&
2062 tokenCanStartNewLine(Tok: *FormatTok) && Text == Text.upper()) {
2063 if (PreviousToken->isNot(Kind: TT_UntouchableMacroFunc))
2064 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2065 addUnwrappedLine();
2066 return;
2067 }
2068 }
2069 break;
2070 }
2071 case tok::equal:
2072 if ((Style.isJavaScript() || Style.isCSharp()) &&
2073 FormatTok->is(TT: TT_FatArrow)) {
2074 tryToParseChildBlock();
2075 break;
2076 }
2077
2078 SeenEqual = true;
2079 nextToken();
2080 if (FormatTok->is(Kind: tok::l_brace)) {
2081 // Block kind should probably be set to BK_BracedInit for any language.
2082 // C# needs this change to ensure that array initialisers and object
2083 // initialisers are indented the same way.
2084 if (Style.isCSharp())
2085 FormatTok->setBlockKind(BK_BracedInit);
2086 // TableGen's defset statement has syntax of the form,
2087 // `defset <type> <name> = { <statement>... }`
2088 if (Style.isTableGen() &&
2089 Line->Tokens.begin()->Tok->is(II: Keywords.kw_defset)) {
2090 FormatTok->setFinalizedType(TT_FunctionLBrace);
2091 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2092 /*MunchSemi=*/false);
2093 addUnwrappedLine();
2094 break;
2095 }
2096 nextToken();
2097 parseBracedList();
2098 } else if (Style.Language == FormatStyle::LK_Proto &&
2099 FormatTok->is(Kind: tok::less)) {
2100 nextToken();
2101 parseBracedList(/*IsAngleBracket=*/true);
2102 }
2103 break;
2104 case tok::l_square:
2105 parseSquare();
2106 break;
2107 case tok::kw_new:
2108 if (Style.isCSharp() &&
2109 (Tokens->peekNextToken()->isAccessSpecifierKeyword() ||
2110 (Previous && Previous->isAccessSpecifierKeyword()))) {
2111 nextToken();
2112 } else {
2113 parseNew();
2114 }
2115 break;
2116 case tok::kw_switch:
2117 if (Style.isJava())
2118 parseSwitch(/*IsExpr=*/true);
2119 else
2120 nextToken();
2121 break;
2122 case tok::kw_case:
2123 // Proto: there are no switch/case statements.
2124 if (Style.Language == FormatStyle::LK_Proto) {
2125 nextToken();
2126 return;
2127 }
2128 // In Verilog switch is called case.
2129 if (Style.isVerilog()) {
2130 parseBlock();
2131 addUnwrappedLine();
2132 return;
2133 }
2134 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2135 // 'case: string' field declaration.
2136 nextToken();
2137 break;
2138 }
2139 parseCaseLabel();
2140 break;
2141 case tok::kw_default:
2142 nextToken();
2143 if (Style.isVerilog()) {
2144 if (FormatTok->is(Kind: tok::colon)) {
2145 // The label will be handled in the next iteration.
2146 break;
2147 }
2148 if (FormatTok->is(II: Keywords.kw_clocking)) {
2149 // A default clocking block.
2150 parseBlock();
2151 addUnwrappedLine();
2152 return;
2153 }
2154 parseVerilogCaseLabel();
2155 return;
2156 }
2157 break;
2158 case tok::colon:
2159 nextToken();
2160 if (Style.isVerilog()) {
2161 parseVerilogCaseLabel();
2162 return;
2163 }
2164 break;
2165 case tok::greater:
2166 nextToken();
2167 if (FormatTok->is(Kind: tok::l_brace))
2168 FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
2169 break;
2170 default:
2171 nextToken();
2172 break;
2173 }
2174 }
2175}
2176
2177bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2178 assert(FormatTok->is(tok::l_brace));
2179 if (!Style.isCSharp())
2180 return false;
2181 // See if it's a property accessor.
2182 if (!FormatTok->Previous || FormatTok->Previous->isNot(Kind: tok::identifier))
2183 return false;
2184
2185 // See if we are inside a property accessor.
2186 //
2187 // Record the current tokenPosition so that we can advance and
2188 // reset the current token. `Next` is not set yet so we need
2189 // another way to advance along the token stream.
2190 unsigned int StoredPosition = Tokens->getPosition();
2191 FormatToken *Tok = Tokens->getNextToken();
2192
2193 // A trivial property accessor is of the form:
2194 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2195 // Track these as they do not require line breaks to be introduced.
2196 bool HasSpecialAccessor = false;
2197 bool IsTrivialPropertyAccessor = true;
2198 bool HasAttribute = false;
2199 while (!eof()) {
2200 if (const bool IsAccessorKeyword =
2201 Tok->isOneOf(K1: Keywords.kw_get, K2: Keywords.kw_init, Ks: Keywords.kw_set);
2202 IsAccessorKeyword || Tok->isAccessSpecifierKeyword() ||
2203 Tok->isOneOf(K1: tok::l_square, K2: tok::semi, Ks: Keywords.kw_internal)) {
2204 if (IsAccessorKeyword)
2205 HasSpecialAccessor = true;
2206 else if (Tok->is(Kind: tok::l_square))
2207 HasAttribute = true;
2208 Tok = Tokens->getNextToken();
2209 continue;
2210 }
2211 if (Tok->isNot(Kind: tok::r_brace))
2212 IsTrivialPropertyAccessor = false;
2213 break;
2214 }
2215
2216 if (!HasSpecialAccessor || HasAttribute) {
2217 Tokens->setPosition(StoredPosition);
2218 return false;
2219 }
2220
2221 // Try to parse the property accessor:
2222 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2223 Tokens->setPosition(StoredPosition);
2224 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2225 addUnwrappedLine();
2226 nextToken();
2227 do {
2228 switch (FormatTok->Tok.getKind()) {
2229 case tok::r_brace:
2230 nextToken();
2231 if (FormatTok->is(Kind: tok::equal)) {
2232 while (!eof() && FormatTok->isNot(Kind: tok::semi))
2233 nextToken();
2234 nextToken();
2235 }
2236 addUnwrappedLine();
2237 return true;
2238 case tok::l_brace:
2239 ++Line->Level;
2240 parseBlock(/*MustBeDeclaration=*/true);
2241 addUnwrappedLine();
2242 --Line->Level;
2243 break;
2244 case tok::equal:
2245 if (FormatTok->is(TT: TT_FatArrow)) {
2246 ++Line->Level;
2247 do {
2248 nextToken();
2249 } while (!eof() && FormatTok->isNot(Kind: tok::semi));
2250 nextToken();
2251 addUnwrappedLine();
2252 --Line->Level;
2253 break;
2254 }
2255 nextToken();
2256 break;
2257 default:
2258 if (FormatTok->isOneOf(K1: Keywords.kw_get, K2: Keywords.kw_init,
2259 Ks: Keywords.kw_set) &&
2260 !IsTrivialPropertyAccessor) {
2261 // Non-trivial get/set needs to be on its own line.
2262 addUnwrappedLine();
2263 }
2264 nextToken();
2265 }
2266 } while (!eof());
2267
2268 // Unreachable for well-formed code (paired '{' and '}').
2269 return true;
2270}
2271
2272bool UnwrappedLineParser::tryToParseLambda() {
2273 assert(FormatTok->is(tok::l_square));
2274 if (!IsCpp) {
2275 nextToken();
2276 return false;
2277 }
2278 FormatToken &LSquare = *FormatTok;
2279 if (!tryToParseLambdaIntroducer())
2280 return false;
2281
2282 FormatToken *Arrow = nullptr;
2283 bool InTemplateParameterList = false;
2284
2285 while (FormatTok->isNot(Kind: tok::l_brace)) {
2286 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) {
2287 nextToken();
2288 continue;
2289 }
2290 switch (FormatTok->Tok.getKind()) {
2291 case tok::l_brace:
2292 break;
2293 case tok::l_paren:
2294 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2295 break;
2296 case tok::l_square:
2297 parseSquare();
2298 break;
2299 case tok::less:
2300 assert(FormatTok->Previous);
2301 if (FormatTok->Previous->is(Kind: tok::r_square))
2302 InTemplateParameterList = true;
2303 nextToken();
2304 break;
2305 case tok::kw_auto:
2306 case tok::kw_class:
2307 case tok::kw_struct:
2308 case tok::kw_union:
2309 case tok::kw_template:
2310 case tok::kw_typename:
2311 case tok::amp:
2312 case tok::star:
2313 case tok::kw_const:
2314 case tok::kw_constexpr:
2315 case tok::kw_consteval:
2316 case tok::comma:
2317 case tok::greater:
2318 case tok::identifier:
2319 case tok::numeric_constant:
2320 case tok::coloncolon:
2321 case tok::kw_mutable:
2322 case tok::kw_noexcept:
2323 case tok::kw_static:
2324 nextToken();
2325 break;
2326 // Specialization of a template with an integer parameter can contain
2327 // arithmetic, logical, comparison and ternary operators.
2328 //
2329 // FIXME: This also accepts sequences of operators that are not in the scope
2330 // of a template argument list.
2331 //
2332 // In a C++ lambda a template type can only occur after an arrow. We use
2333 // this as an heuristic to distinguish between Objective-C expressions
2334 // followed by an `a->b` expression, such as:
2335 // ([obj func:arg] + a->b)
2336 // Otherwise the code below would parse as a lambda.
2337 case tok::plus:
2338 case tok::minus:
2339 case tok::exclaim:
2340 case tok::tilde:
2341 case tok::slash:
2342 case tok::percent:
2343 case tok::lessless:
2344 case tok::pipe:
2345 case tok::pipepipe:
2346 case tok::ampamp:
2347 case tok::caret:
2348 case tok::equalequal:
2349 case tok::exclaimequal:
2350 case tok::greaterequal:
2351 case tok::lessequal:
2352 case tok::question:
2353 case tok::colon:
2354 case tok::ellipsis:
2355 case tok::kw_true:
2356 case tok::kw_false:
2357 if (Arrow || InTemplateParameterList) {
2358 nextToken();
2359 break;
2360 }
2361 return true;
2362 case tok::arrow:
2363 Arrow = FormatTok;
2364 nextToken();
2365 break;
2366 case tok::kw_requires:
2367 parseRequiresClause();
2368 break;
2369 case tok::equal:
2370 if (!InTemplateParameterList)
2371 return true;
2372 nextToken();
2373 break;
2374 default:
2375 return true;
2376 }
2377 }
2378
2379 FormatTok->setFinalizedType(TT_LambdaLBrace);
2380 LSquare.setFinalizedType(TT_LambdaLSquare);
2381
2382 if (Arrow)
2383 Arrow->setFinalizedType(TT_LambdaArrow);
2384
2385 NestedLambdas.push_back(Elt: Line->SeenDecltypeAuto);
2386 parseChildBlock();
2387 assert(!NestedLambdas.empty());
2388 NestedLambdas.pop_back();
2389
2390 return true;
2391}
2392
2393bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2394 const FormatToken *Previous = FormatTok->Previous;
2395 const FormatToken *LeftSquare = FormatTok;
2396 nextToken();
2397 if (Previous) {
2398 const auto *PrevPrev = Previous->getPreviousNonComment();
2399 if (Previous->is(Kind: tok::star) && PrevPrev && PrevPrev->isTypeName(LangOpts))
2400 return false;
2401 if (Previous->closesScope()) {
2402 // Not a potential C-style cast.
2403 if (Previous->isNot(Kind: tok::r_paren))
2404 return false;
2405 // Lambdas can be cast to function types only, e.g. `std::function<int()>`
2406 // and `int (*)()`.
2407 if (!PrevPrev || PrevPrev->isNoneOf(Ks: tok::greater, Ks: tok::r_paren))
2408 return false;
2409 }
2410 if (Previous && Previous->Tok.getIdentifierInfo() &&
2411 Previous->isNoneOf(Ks: tok::kw_return, Ks: tok::kw_co_await, Ks: tok::kw_co_yield,
2412 Ks: tok::kw_co_return)) {
2413 return false;
2414 }
2415 }
2416 if (LeftSquare->isCppStructuredBinding(IsCpp))
2417 return false;
2418 if (FormatTok->is(Kind: tok::l_square) || tok::isLiteral(K: FormatTok->Tok.getKind()))
2419 return false;
2420 if (FormatTok->is(Kind: tok::r_square)) {
2421 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2422 if (Next->is(Kind: tok::greater))
2423 return false;
2424 }
2425 parseSquare(/*LambdaIntroducer=*/true);
2426 return true;
2427}
2428
2429void UnwrappedLineParser::tryToParseJSFunction() {
2430 assert(FormatTok->is(Keywords.kw_function));
2431 if (FormatTok->is(II: Keywords.kw_async))
2432 nextToken();
2433 // Consume "function".
2434 nextToken();
2435
2436 // Consume * (generator function). Treat it like C++'s overloaded operators.
2437 if (FormatTok->is(Kind: tok::star)) {
2438 FormatTok->setFinalizedType(TT_OverloadedOperator);
2439 nextToken();
2440 }
2441
2442 // Consume function name.
2443 if (FormatTok->is(Kind: tok::identifier))
2444 nextToken();
2445
2446 if (FormatTok->isNot(Kind: tok::l_paren))
2447 return;
2448
2449 // Parse formal parameter list.
2450 parseParens();
2451
2452 if (FormatTok->is(Kind: tok::colon)) {
2453 // Parse a type definition.
2454 nextToken();
2455
2456 // Eat the type declaration. For braced inline object types, balance braces,
2457 // otherwise just parse until finding an l_brace for the function body.
2458 if (FormatTok->is(Kind: tok::l_brace))
2459 tryToParseBracedList();
2460 else
2461 while (FormatTok->isNoneOf(Ks: tok::l_brace, Ks: tok::semi) && !eof())
2462 nextToken();
2463 }
2464
2465 if (FormatTok->is(Kind: tok::semi))
2466 return;
2467
2468 parseChildBlock();
2469}
2470
2471bool UnwrappedLineParser::tryToParseBracedList() {
2472 if (FormatTok->is(BBK: BK_Unknown))
2473 calculateBraceTypes();
2474 assert(FormatTok->isNot(BK_Unknown));
2475 if (FormatTok->is(BBK: BK_Block))
2476 return false;
2477 nextToken();
2478 parseBracedList();
2479 return true;
2480}
2481
2482bool UnwrappedLineParser::tryToParseChildBlock() {
2483 assert(Style.isJavaScript() || Style.isCSharp());
2484 assert(FormatTok->is(TT_FatArrow));
2485 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2486 // They always start an expression or a child block if followed by a curly
2487 // brace.
2488 nextToken();
2489 if (FormatTok->isNot(Kind: tok::l_brace))
2490 return false;
2491 parseChildBlock();
2492 return true;
2493}
2494
2495bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2496 assert(!IsAngleBracket || !IsEnum);
2497 bool HasError = false;
2498
2499 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2500 // replace this by using parseAssignmentExpression() inside.
2501 do {
2502 if (Style.isCSharp() && FormatTok->is(TT: TT_FatArrow) &&
2503 tryToParseChildBlock()) {
2504 continue;
2505 }
2506 if (Style.isJavaScript()) {
2507 if (FormatTok->is(II: Keywords.kw_function)) {
2508 tryToParseJSFunction();
2509 continue;
2510 }
2511 if (FormatTok->is(Kind: tok::l_brace)) {
2512 // Could be a method inside of a braced list `{a() { return 1; }}`.
2513 if (tryToParseBracedList())
2514 continue;
2515 parseChildBlock();
2516 }
2517 }
2518 if (FormatTok->is(Kind: IsAngleBracket ? tok::greater : tok::r_brace)) {
2519 if (IsEnum) {
2520 FormatTok->setBlockKind(BK_Block);
2521 if (!Style.AllowShortEnumsOnASingleLine)
2522 addUnwrappedLine();
2523 }
2524 nextToken();
2525 return !HasError;
2526 }
2527 switch (FormatTok->Tok.getKind()) {
2528 case tok::l_square:
2529 if (Style.isCSharp())
2530 parseSquare();
2531 else
2532 tryToParseLambda();
2533 break;
2534 case tok::l_paren:
2535 parseParens();
2536 // JavaScript can just have free standing methods and getters/setters in
2537 // object literals. Detect them by a "{" following ")".
2538 if (Style.isJavaScript()) {
2539 if (FormatTok->is(Kind: tok::l_brace))
2540 parseChildBlock();
2541 break;
2542 }
2543 break;
2544 case tok::l_brace:
2545 // Assume there are no blocks inside a braced init list apart
2546 // from the ones we explicitly parse out (like lambdas).
2547 FormatTok->setBlockKind(BK_BracedInit);
2548 if (!IsAngleBracket) {
2549 auto *Prev = FormatTok->Previous;
2550 if (Prev && Prev->is(Kind: tok::greater))
2551 Prev->setFinalizedType(TT_TemplateCloser);
2552 }
2553 nextToken();
2554 parseBracedList();
2555 break;
2556 case tok::less:
2557 nextToken();
2558 if (IsAngleBracket)
2559 parseBracedList(/*IsAngleBracket=*/true);
2560 break;
2561 case tok::semi:
2562 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2563 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2564 // used for error recovery if we have otherwise determined that this is
2565 // a braced list.
2566 if (Style.isJavaScript()) {
2567 nextToken();
2568 break;
2569 }
2570 HasError = true;
2571 if (!IsEnum)
2572 return false;
2573 nextToken();
2574 break;
2575 case tok::comma:
2576 nextToken();
2577 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2578 addUnwrappedLine();
2579 break;
2580 case tok::kw_requires:
2581 parseRequiresExpression();
2582 break;
2583 default:
2584 nextToken();
2585 break;
2586 }
2587 } while (!eof());
2588 return false;
2589}
2590
2591/// Parses a pair of parentheses (and everything between them).
2592/// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2593/// double ampersands. This applies for all nested scopes as well.
2594///
2595/// Returns whether there is a `=` token between the parentheses.
2596bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType,
2597 bool InMacroCall) {
2598 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2599 auto *LParen = FormatTok;
2600 auto *Prev = FormatTok->Previous;
2601 bool SeenComma = false;
2602 bool SeenEqual = false;
2603 bool MightBeFoldExpr = false;
2604 nextToken();
2605 const bool MightBeStmtExpr = FormatTok->is(Kind: tok::l_brace);
2606 if (!InMacroCall && Prev && Prev->is(TT: TT_FunctionLikeMacro))
2607 InMacroCall = true;
2608 do {
2609 switch (FormatTok->Tok.getKind()) {
2610 case tok::l_paren:
2611 if (parseParens(AmpAmpTokenType, InMacroCall))
2612 SeenEqual = true;
2613 if (Style.isJava() && FormatTok->is(Kind: tok::l_brace))
2614 parseChildBlock();
2615 break;
2616 case tok::r_paren: {
2617 auto *RParen = FormatTok;
2618 nextToken();
2619 if (Prev) {
2620 auto OptionalParens = [&] {
2621 if (Style.RemoveParentheses == FormatStyle::RPS_Leave ||
2622 MightBeStmtExpr || MightBeFoldExpr || SeenComma || InMacroCall ||
2623 Line->InMacroBody || RParen->getPreviousNonComment() == LParen) {
2624 return false;
2625 }
2626 const bool DoubleParens =
2627 Prev->is(Kind: tok::l_paren) && FormatTok->is(Kind: tok::r_paren);
2628 if (DoubleParens) {
2629 const auto *PrevPrev = Prev->getPreviousNonComment();
2630 const bool Excluded =
2631 PrevPrev &&
2632 (PrevPrev->isOneOf(K1: tok::kw___attribute, K2: tok::kw_decltype) ||
2633 (SeenEqual &&
2634 (PrevPrev->isOneOf(K1: tok::kw_if, K2: tok::kw_while) ||
2635 PrevPrev->endsSequence(K1: tok::kw_constexpr, Tokens: tok::kw_if))));
2636 if (!Excluded)
2637 return true;
2638 } else {
2639 const bool CommaSeparated =
2640 Prev->isOneOf(K1: tok::l_paren, K2: tok::comma) &&
2641 FormatTok->isOneOf(K1: tok::comma, K2: tok::r_paren);
2642 if (CommaSeparated &&
2643 // LParen is not preceded by ellipsis, comma.
2644 !Prev->endsSequence(K1: tok::comma, Tokens: tok::ellipsis) &&
2645 // RParen is not followed by comma, ellipsis.
2646 !(FormatTok->is(Kind: tok::comma) &&
2647 Tokens->peekNextToken()->is(Kind: tok::ellipsis))) {
2648 return true;
2649 }
2650 const bool ReturnParens =
2651 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2652 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2653 (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2654 Prev->isOneOf(K1: tok::kw_return, K2: tok::kw_co_return) &&
2655 FormatTok->is(Kind: tok::semi);
2656 if (ReturnParens)
2657 return true;
2658 }
2659 return false;
2660 };
2661 if (OptionalParens()) {
2662 LParen->Optional = true;
2663 RParen->Optional = true;
2664 } else if (Prev->is(TT: TT_TypenameMacro)) {
2665 LParen->setFinalizedType(TT_TypeDeclarationParen);
2666 RParen->setFinalizedType(TT_TypeDeclarationParen);
2667 } else if (Prev->is(Kind: tok::greater) && RParen->Previous == LParen) {
2668 Prev->setFinalizedType(TT_TemplateCloser);
2669 } else if (FormatTok->is(Kind: tok::l_brace) && Prev->is(Kind: tok::amp) &&
2670 !Prev->Previous) {
2671 FormatTok->setBlockKind(BK_BracedInit);
2672 }
2673 }
2674 return SeenEqual;
2675 }
2676 case tok::r_brace:
2677 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2678 return SeenEqual;
2679 case tok::l_square:
2680 tryToParseLambda();
2681 break;
2682 case tok::l_brace:
2683 if (!tryToParseBracedList())
2684 parseChildBlock();
2685 break;
2686 case tok::at:
2687 nextToken();
2688 if (FormatTok->is(Kind: tok::l_brace)) {
2689 nextToken();
2690 parseBracedList();
2691 }
2692 break;
2693 case tok::comma:
2694 SeenComma = true;
2695 nextToken();
2696 break;
2697 case tok::ellipsis:
2698 MightBeFoldExpr = true;
2699 nextToken();
2700 break;
2701 case tok::equal:
2702 SeenEqual = true;
2703 if (Style.isCSharp() && FormatTok->is(TT: TT_FatArrow))
2704 tryToParseChildBlock();
2705 else
2706 nextToken();
2707 break;
2708 case tok::kw_class:
2709 if (Style.isJavaScript())
2710 parseRecord(/*ParseAsExpr=*/true);
2711 else
2712 nextToken();
2713 break;
2714 case tok::identifier:
2715 if (Style.isJavaScript() && (FormatTok->is(II: Keywords.kw_function)))
2716 tryToParseJSFunction();
2717 else
2718 nextToken();
2719 break;
2720 case tok::kw_switch:
2721 if (Style.isJava())
2722 parseSwitch(/*IsExpr=*/true);
2723 else
2724 nextToken();
2725 break;
2726 case tok::kw_requires:
2727 parseRequiresExpression();
2728 break;
2729 case tok::ampamp:
2730 if (AmpAmpTokenType != TT_Unknown)
2731 FormatTok->setFinalizedType(AmpAmpTokenType);
2732 [[fallthrough]];
2733 default:
2734 nextToken();
2735 break;
2736 }
2737 } while (!eof());
2738 return SeenEqual;
2739}
2740
2741void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2742 if (!LambdaIntroducer) {
2743 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2744 if (tryToParseLambda())
2745 return;
2746 }
2747 do {
2748 switch (FormatTok->Tok.getKind()) {
2749 case tok::l_paren:
2750 parseParens();
2751 break;
2752 case tok::r_square:
2753 nextToken();
2754 return;
2755 case tok::r_brace:
2756 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2757 return;
2758 case tok::l_square:
2759 parseSquare();
2760 break;
2761 case tok::l_brace: {
2762 if (!tryToParseBracedList())
2763 parseChildBlock();
2764 break;
2765 }
2766 case tok::at:
2767 case tok::colon:
2768 nextToken();
2769 if (FormatTok->is(Kind: tok::l_brace)) {
2770 nextToken();
2771 parseBracedList();
2772 }
2773 break;
2774 default:
2775 nextToken();
2776 break;
2777 }
2778 } while (!eof());
2779}
2780
2781void UnwrappedLineParser::keepAncestorBraces() {
2782 if (!Style.RemoveBracesLLVM)
2783 return;
2784
2785 const int MaxNestingLevels = 2;
2786 const int Size = NestedTooDeep.size();
2787 if (Size >= MaxNestingLevels)
2788 NestedTooDeep[Size - MaxNestingLevels] = true;
2789 NestedTooDeep.push_back(Elt: false);
2790}
2791
2792static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2793 for (const auto &Token : llvm::reverse(C: Line.Tokens))
2794 if (Token.Tok->isNot(Kind: tok::comment))
2795 return Token.Tok;
2796
2797 return nullptr;
2798}
2799
2800void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2801 FormatToken *Tok = nullptr;
2802
2803 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2804 PreprocessorDirectives.empty() && FormatTok->isNot(Kind: tok::semi)) {
2805 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2806 ? getLastNonComment(Line: *Line)
2807 : Line->Tokens.back().Tok;
2808 assert(Tok);
2809 if (Tok->BraceCount < 0) {
2810 assert(Tok->BraceCount == -1);
2811 Tok = nullptr;
2812 } else {
2813 Tok->BraceCount = -1;
2814 }
2815 }
2816
2817 addUnwrappedLine();
2818 ++Line->Level;
2819 ++Line->UnbracedBodyLevel;
2820 parseStructuralElement();
2821 --Line->UnbracedBodyLevel;
2822
2823 if (Tok) {
2824 assert(!Line->InPPDirective);
2825 Tok = nullptr;
2826 for (const auto &L : llvm::reverse(C&: *CurrentLines)) {
2827 if (!L.InPPDirective && getLastNonComment(Line: L)) {
2828 Tok = L.Tokens.back().Tok;
2829 break;
2830 }
2831 }
2832 assert(Tok);
2833 ++Tok->BraceCount;
2834 }
2835
2836 if (CheckEOF && eof())
2837 addUnwrappedLine();
2838
2839 --Line->Level;
2840}
2841
2842static void markOptionalBraces(FormatToken *LeftBrace) {
2843 if (!LeftBrace)
2844 return;
2845
2846 assert(LeftBrace->is(tok::l_brace));
2847
2848 FormatToken *RightBrace = LeftBrace->MatchingParen;
2849 if (!RightBrace) {
2850 assert(!LeftBrace->Optional);
2851 return;
2852 }
2853
2854 assert(RightBrace->is(tok::r_brace));
2855 assert(RightBrace->MatchingParen == LeftBrace);
2856 assert(LeftBrace->Optional == RightBrace->Optional);
2857
2858 LeftBrace->Optional = true;
2859 RightBrace->Optional = true;
2860}
2861
2862void UnwrappedLineParser::handleAttributes() {
2863 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2864 if (FormatTok->isAttribute())
2865 nextToken();
2866 else if (FormatTok->is(Kind: tok::l_square))
2867 handleCppAttributes();
2868}
2869
2870bool UnwrappedLineParser::handleCppAttributes() {
2871 // Handle [[likely]] / [[unlikely]] attributes.
2872 assert(FormatTok->is(tok::l_square));
2873 if (!tryToParseSimpleAttribute())
2874 return false;
2875 parseSquare();
2876 return true;
2877}
2878
2879/// Returns whether \c Tok begins a block.
2880bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2881 // FIXME: rename the function or make
2882 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2883 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2884 : Tok.is(Kind: tok::l_brace);
2885}
2886
2887FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2888 bool KeepBraces,
2889 bool IsVerilogAssert) {
2890 assert((FormatTok->is(tok::kw_if) ||
2891 (Style.isVerilog() &&
2892 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2893 Keywords.kw_assume, Keywords.kw_cover))) &&
2894 "'if' expected");
2895 nextToken();
2896
2897 if (IsVerilogAssert) {
2898 // Handle `assert #0` and `assert final`.
2899 if (FormatTok->is(II: Keywords.kw_verilogHash)) {
2900 nextToken();
2901 if (FormatTok->is(Kind: tok::numeric_constant))
2902 nextToken();
2903 } else if (FormatTok->isOneOf(K1: Keywords.kw_final, K2: Keywords.kw_property,
2904 Ks: Keywords.kw_sequence)) {
2905 nextToken();
2906 }
2907 }
2908
2909 // TableGen's if statement has the form of `if <cond> then { ... }`.
2910 if (Style.isTableGen()) {
2911 while (!eof() && FormatTok->isNot(Kind: Keywords.kw_then)) {
2912 // Simply skip until then. This range only contains a value.
2913 nextToken();
2914 }
2915 }
2916
2917 // Handle `if !consteval`.
2918 if (FormatTok->is(Kind: tok::exclaim))
2919 nextToken();
2920
2921 bool KeepIfBraces = true;
2922 if (FormatTok->is(Kind: tok::kw_consteval)) {
2923 nextToken();
2924 } else {
2925 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2926 if (FormatTok->isOneOf(K1: tok::kw_constexpr, K2: tok::identifier))
2927 nextToken();
2928 if (FormatTok->is(Kind: tok::l_paren)) {
2929 FormatTok->setFinalizedType(TT_ConditionLParen);
2930 parseParens();
2931 }
2932 }
2933 handleAttributes();
2934 // The then action is optional in Verilog assert statements.
2935 if (IsVerilogAssert && FormatTok->is(Kind: tok::semi)) {
2936 nextToken();
2937 addUnwrappedLine();
2938 return nullptr;
2939 }
2940
2941 bool NeedsUnwrappedLine = false;
2942 keepAncestorBraces();
2943
2944 FormatToken *IfLeftBrace = nullptr;
2945 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2946
2947 if (isBlockBegin(Tok: *FormatTok)) {
2948 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2949 IfLeftBrace = FormatTok;
2950 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2951 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2952 /*MunchSemi=*/true, KeepBraces: KeepIfBraces, IfKind: &IfBlockKind);
2953 setPreviousRBraceType(TT_ControlStatementRBrace);
2954 if (Style.BraceWrapping.BeforeElse)
2955 addUnwrappedLine();
2956 else
2957 NeedsUnwrappedLine = true;
2958 } else if (IsVerilogAssert && FormatTok->is(Kind: tok::kw_else)) {
2959 addUnwrappedLine();
2960 } else {
2961 parseUnbracedBody();
2962 }
2963
2964 if (Style.RemoveBracesLLVM) {
2965 assert(!NestedTooDeep.empty());
2966 KeepIfBraces = KeepIfBraces ||
2967 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2968 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2969 IfBlockKind == IfStmtKind::IfElseIf;
2970 }
2971
2972 bool KeepElseBraces = KeepIfBraces;
2973 FormatToken *ElseLeftBrace = nullptr;
2974 IfStmtKind Kind = IfStmtKind::IfOnly;
2975
2976 if (FormatTok->is(Kind: tok::kw_else)) {
2977 if (Style.RemoveBracesLLVM) {
2978 NestedTooDeep.back() = false;
2979 Kind = IfStmtKind::IfElse;
2980 }
2981 nextToken();
2982 handleAttributes();
2983 if (isBlockBegin(Tok: *FormatTok)) {
2984 const bool FollowedByIf = Tokens->peekNextToken()->is(Kind: tok::kw_if);
2985 FormatTok->setFinalizedType(TT_ElseLBrace);
2986 ElseLeftBrace = FormatTok;
2987 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2988 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2989 FormatToken *IfLBrace =
2990 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2991 /*MunchSemi=*/true, KeepBraces: KeepElseBraces, IfKind: &ElseBlockKind);
2992 setPreviousRBraceType(TT_ElseRBrace);
2993 if (FormatTok->is(Kind: tok::kw_else)) {
2994 KeepElseBraces = KeepElseBraces ||
2995 ElseBlockKind == IfStmtKind::IfOnly ||
2996 ElseBlockKind == IfStmtKind::IfElseIf;
2997 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2998 KeepElseBraces = true;
2999 assert(ElseLeftBrace->MatchingParen);
3000 markOptionalBraces(LeftBrace: ElseLeftBrace);
3001 }
3002 addUnwrappedLine();
3003 } else if (!IsVerilogAssert && FormatTok->is(Kind: tok::kw_if)) {
3004 const FormatToken *Previous = Tokens->getPreviousToken();
3005 assert(Previous);
3006 const bool IsPrecededByComment = Previous->is(Kind: tok::comment);
3007 if (IsPrecededByComment) {
3008 addUnwrappedLine();
3009 ++Line->Level;
3010 }
3011 bool TooDeep = true;
3012 if (Style.RemoveBracesLLVM) {
3013 Kind = IfStmtKind::IfElseIf;
3014 TooDeep = NestedTooDeep.pop_back_val();
3015 }
3016 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepBraces: KeepIfBraces);
3017 if (Style.RemoveBracesLLVM)
3018 NestedTooDeep.push_back(Elt: TooDeep);
3019 if (IsPrecededByComment)
3020 --Line->Level;
3021 } else {
3022 parseUnbracedBody(/*CheckEOF=*/true);
3023 }
3024 } else {
3025 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
3026 if (NeedsUnwrappedLine)
3027 addUnwrappedLine();
3028 }
3029
3030 if (!Style.RemoveBracesLLVM)
3031 return nullptr;
3032
3033 assert(!NestedTooDeep.empty());
3034 KeepElseBraces = KeepElseBraces ||
3035 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
3036 NestedTooDeep.back();
3037
3038 NestedTooDeep.pop_back();
3039
3040 if (!KeepIfBraces && !KeepElseBraces) {
3041 markOptionalBraces(LeftBrace: IfLeftBrace);
3042 markOptionalBraces(LeftBrace: ElseLeftBrace);
3043 } else if (IfLeftBrace) {
3044 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
3045 if (IfRightBrace) {
3046 assert(IfRightBrace->MatchingParen == IfLeftBrace);
3047 assert(!IfLeftBrace->Optional);
3048 assert(!IfRightBrace->Optional);
3049 IfLeftBrace->MatchingParen = nullptr;
3050 IfRightBrace->MatchingParen = nullptr;
3051 }
3052 }
3053
3054 if (IfKind)
3055 *IfKind = Kind;
3056
3057 return IfLeftBrace;
3058}
3059
3060void UnwrappedLineParser::parseTryCatch() {
3061 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
3062 nextToken();
3063 bool NeedsUnwrappedLine = false;
3064 bool HasCtorInitializer = false;
3065 if (FormatTok->is(Kind: tok::colon)) {
3066 auto *Colon = FormatTok;
3067 // We are in a function try block, what comes is an initializer list.
3068 nextToken();
3069 if (FormatTok->is(Kind: tok::identifier)) {
3070 HasCtorInitializer = true;
3071 Colon->setFinalizedType(TT_CtorInitializerColon);
3072 }
3073
3074 // In case identifiers were removed by clang-tidy, what might follow is
3075 // multiple commas in sequence - before the first identifier.
3076 while (FormatTok->is(Kind: tok::comma))
3077 nextToken();
3078
3079 while (FormatTok->is(Kind: tok::identifier)) {
3080 nextToken();
3081 if (FormatTok->is(Kind: tok::l_paren)) {
3082 parseParens();
3083 } else if (FormatTok->is(Kind: tok::l_brace)) {
3084 nextToken();
3085 parseBracedList();
3086 }
3087
3088 // In case identifiers were removed by clang-tidy, what might follow is
3089 // multiple commas in sequence - after the first identifier.
3090 while (FormatTok->is(Kind: tok::comma))
3091 nextToken();
3092 }
3093 }
3094 // Parse try with resource.
3095 if (Style.isJava() && FormatTok->is(Kind: tok::l_paren))
3096 parseParens();
3097
3098 keepAncestorBraces();
3099
3100 if (FormatTok->is(Kind: tok::l_brace)) {
3101 if (HasCtorInitializer)
3102 FormatTok->setFinalizedType(TT_FunctionLBrace);
3103 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3104 parseBlock();
3105 if (Style.BraceWrapping.BeforeCatch)
3106 addUnwrappedLine();
3107 else
3108 NeedsUnwrappedLine = true;
3109 } else if (FormatTok->isNot(Kind: tok::kw_catch)) {
3110 // The C++ standard requires a compound-statement after a try.
3111 // If there's none, we try to assume there's a structuralElement
3112 // and try to continue.
3113 addUnwrappedLine();
3114 ++Line->Level;
3115 parseStructuralElement();
3116 --Line->Level;
3117 }
3118 for (bool SeenCatch = false;;) {
3119 if (FormatTok->is(Kind: tok::at))
3120 nextToken();
3121 if (FormatTok->isNoneOf(Ks: tok::kw_catch, Ks: Keywords.kw___except,
3122 Ks: tok::kw___finally, Ks: tok::objc_catch,
3123 Ks: tok::objc_finally) &&
3124 !((Style.isJava() || Style.isJavaScript()) &&
3125 FormatTok->is(II: Keywords.kw_finally))) {
3126 break;
3127 }
3128 if (FormatTok->is(Kind: tok::kw_catch))
3129 SeenCatch = true;
3130 nextToken();
3131 while (FormatTok->isNot(Kind: tok::l_brace)) {
3132 if (FormatTok->is(Kind: tok::l_paren)) {
3133 parseParens();
3134 continue;
3135 }
3136 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::r_brace) || eof()) {
3137 if (Style.RemoveBracesLLVM)
3138 NestedTooDeep.pop_back();
3139 return;
3140 }
3141 nextToken();
3142 }
3143 if (SeenCatch) {
3144 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3145 SeenCatch = false;
3146 }
3147 NeedsUnwrappedLine = false;
3148 Line->MustBeDeclaration = false;
3149 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3150 parseBlock();
3151 if (Style.BraceWrapping.BeforeCatch)
3152 addUnwrappedLine();
3153 else
3154 NeedsUnwrappedLine = true;
3155 }
3156
3157 if (Style.RemoveBracesLLVM)
3158 NestedTooDeep.pop_back();
3159
3160 if (NeedsUnwrappedLine)
3161 addUnwrappedLine();
3162}
3163
3164void UnwrappedLineParser::parseNamespaceOrExportBlock(unsigned AddLevels) {
3165 bool ManageWhitesmithsBraces =
3166 AddLevels == 0u && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3167
3168 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3169 // the whole block.
3170 if (ManageWhitesmithsBraces)
3171 ++Line->Level;
3172
3173 // Munch the semicolon after the block. This is more common than one would
3174 // think. Putting the semicolon into its own line is very ugly.
3175 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3176 /*KeepBraces=*/true, /*IfKind=*/nullptr, UnindentWhitesmithsBraces: ManageWhitesmithsBraces);
3177
3178 addUnwrappedLine(AdjustLevel: AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3179
3180 if (ManageWhitesmithsBraces)
3181 --Line->Level;
3182}
3183
3184void UnwrappedLineParser::parseNamespace() {
3185 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3186 "'namespace' expected");
3187
3188 const FormatToken &InitialToken = *FormatTok;
3189 nextToken();
3190 if (InitialToken.is(TT: TT_NamespaceMacro)) {
3191 parseParens();
3192 } else {
3193 while (FormatTok->isOneOf(K1: tok::identifier, K2: tok::coloncolon, Ks: tok::kw_inline,
3194 Ks: tok::l_square, Ks: tok::period, Ks: tok::l_paren) ||
3195 (Style.isCSharp() && FormatTok->is(Kind: tok::kw_union))) {
3196 if (FormatTok->is(Kind: tok::l_square))
3197 parseSquare();
3198 else if (FormatTok->is(Kind: tok::l_paren))
3199 parseParens();
3200 else
3201 nextToken();
3202 }
3203 }
3204 if (FormatTok->is(Kind: tok::l_brace)) {
3205 FormatTok->setFinalizedType(TT_NamespaceLBrace);
3206
3207 if (ShouldBreakBeforeBrace(Style, InitialToken, /*IsJavaRecord=*/false))
3208 addUnwrappedLine();
3209
3210 unsigned AddLevels =
3211 Style.NamespaceIndentation == FormatStyle::NI_All ||
3212 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3213 DeclarationScopeStack.size() > 1)
3214 ? 1u
3215 : 0u;
3216 parseNamespaceOrExportBlock(AddLevels);
3217 }
3218 // FIXME: Add error handling.
3219}
3220
3221void UnwrappedLineParser::parseCppExportBlock() {
3222 parseNamespaceOrExportBlock(/*AddLevels=*/Style.IndentExportBlock ? 1 : 0);
3223}
3224
3225void UnwrappedLineParser::parseNew() {
3226 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3227 nextToken();
3228
3229 if (Style.isCSharp()) {
3230 do {
3231 // Handle constructor invocation, e.g. `new(field: value)`.
3232 if (FormatTok->is(Kind: tok::l_paren))
3233 parseParens();
3234
3235 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3236 if (FormatTok->is(Kind: tok::l_brace))
3237 parseBracedList();
3238
3239 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::comma))
3240 return;
3241
3242 nextToken();
3243 } while (!eof());
3244 }
3245
3246 if (!Style.isJava())
3247 return;
3248
3249 // In Java, we can parse everything up to the parens, which aren't optional.
3250 do {
3251 // There should not be a ;, { or } before the new's open paren.
3252 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::r_brace))
3253 return;
3254
3255 // Consume the parens.
3256 if (FormatTok->is(Kind: tok::l_paren)) {
3257 parseParens();
3258
3259 // If there is a class body of an anonymous class, consume that as child.
3260 if (FormatTok->is(Kind: tok::l_brace))
3261 parseChildBlock();
3262 return;
3263 }
3264 nextToken();
3265 } while (!eof());
3266}
3267
3268void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3269 keepAncestorBraces();
3270
3271 if (isBlockBegin(Tok: *FormatTok)) {
3272 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3273 FormatToken *LeftBrace = FormatTok;
3274 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3275 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3276 /*MunchSemi=*/true, KeepBraces);
3277 setPreviousRBraceType(TT_ControlStatementRBrace);
3278 if (!KeepBraces) {
3279 assert(!NestedTooDeep.empty());
3280 if (!NestedTooDeep.back())
3281 markOptionalBraces(LeftBrace);
3282 }
3283 if (WrapRightBrace)
3284 addUnwrappedLine();
3285 } else {
3286 parseUnbracedBody();
3287 }
3288
3289 if (!KeepBraces)
3290 NestedTooDeep.pop_back();
3291}
3292
3293void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3294 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3295 (Style.isVerilog() &&
3296 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3297 Keywords.kw_always_ff, Keywords.kw_always_latch,
3298 Keywords.kw_final, Keywords.kw_initial,
3299 Keywords.kw_foreach, Keywords.kw_forever,
3300 Keywords.kw_repeat))) &&
3301 "'for', 'while' or foreach macro expected");
3302 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3303 FormatTok->isNoneOf(Ks: tok::kw_for, Ks: tok::kw_while);
3304
3305 nextToken();
3306 // JS' for await ( ...
3307 if (Style.isJavaScript() && FormatTok->is(II: Keywords.kw_await))
3308 nextToken();
3309 if (IsCpp && FormatTok->is(Kind: tok::kw_co_await))
3310 nextToken();
3311 if (HasParens && FormatTok->is(Kind: tok::l_paren)) {
3312 // The type is only set for Verilog basically because we were afraid to
3313 // change the existing behavior for loops. See the discussion on D121756 for
3314 // details.
3315 if (Style.isVerilog())
3316 FormatTok->setFinalizedType(TT_ConditionLParen);
3317 parseParens();
3318 }
3319
3320 if (Style.isVerilog()) {
3321 // Event control.
3322 parseVerilogSensitivityList();
3323 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(Kind: tok::semi) &&
3324 Tokens->getPreviousToken()->is(Kind: tok::r_paren)) {
3325 nextToken();
3326 addUnwrappedLine();
3327 return;
3328 }
3329
3330 handleAttributes();
3331 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3332}
3333
3334void UnwrappedLineParser::parseDoWhile() {
3335 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3336 nextToken();
3337
3338 parseLoopBody(/*KeepBraces=*/true, WrapRightBrace: Style.BraceWrapping.BeforeWhile);
3339
3340 // FIXME: Add error handling.
3341 if (FormatTok->isNot(Kind: tok::kw_while)) {
3342 addUnwrappedLine();
3343 return;
3344 }
3345
3346 FormatTok->setFinalizedType(TT_DoWhile);
3347
3348 // If in Whitesmiths mode, the line with the while() needs to be indented
3349 // to the same level as the block.
3350 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3351 ++Line->Level;
3352
3353 nextToken();
3354 parseStructuralElement();
3355}
3356
3357void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3358 nextToken();
3359 unsigned OldLineLevel = Line->Level;
3360
3361 if (LeftAlignLabel)
3362 Line->Level = 0;
3363 else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3364 --Line->Level;
3365
3366 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3367 FormatTok->is(Kind: tok::l_brace)) {
3368
3369 CompoundStatementIndenter Indenter(this, Line->Level,
3370 Style.BraceWrapping.AfterCaseLabel,
3371 Style.BraceWrapping.IndentBraces);
3372 parseBlock();
3373 if (FormatTok->is(Kind: tok::kw_break)) {
3374 if (Style.BraceWrapping.AfterControlStatement ==
3375 FormatStyle::BWACS_Always) {
3376 addUnwrappedLine();
3377 if (!Style.IndentCaseBlocks &&
3378 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3379 ++Line->Level;
3380 }
3381 }
3382 parseStructuralElement();
3383 }
3384 addUnwrappedLine();
3385 } else {
3386 if (FormatTok->is(Kind: tok::semi))
3387 nextToken();
3388 addUnwrappedLine();
3389 }
3390 Line->Level = OldLineLevel;
3391 if (FormatTok->isNot(Kind: tok::l_brace)) {
3392 parseStructuralElement();
3393 addUnwrappedLine();
3394 }
3395}
3396
3397void UnwrappedLineParser::parseCaseLabel() {
3398 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3399 auto *Case = FormatTok;
3400
3401 // FIXME: fix handling of complex expressions here.
3402 do {
3403 nextToken();
3404 if (FormatTok->is(Kind: tok::colon)) {
3405 FormatTok->setFinalizedType(TT_CaseLabelColon);
3406 break;
3407 }
3408 if (Style.isJava() && FormatTok->is(Kind: tok::arrow)) {
3409 FormatTok->setFinalizedType(TT_CaseLabelArrow);
3410 Case->setFinalizedType(TT_SwitchExpressionLabel);
3411 break;
3412 }
3413 } while (!eof());
3414 parseLabel();
3415}
3416
3417void UnwrappedLineParser::parseSwitch(bool IsExpr) {
3418 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3419 nextToken();
3420 if (FormatTok->is(Kind: tok::l_paren))
3421 parseParens();
3422
3423 keepAncestorBraces();
3424
3425 if (FormatTok->is(Kind: tok::l_brace)) {
3426 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3427 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace
3428 : TT_ControlStatementLBrace);
3429 if (IsExpr)
3430 parseChildBlock();
3431 else
3432 parseBlock();
3433 setPreviousRBraceType(TT_ControlStatementRBrace);
3434 if (!IsExpr)
3435 addUnwrappedLine();
3436 } else {
3437 addUnwrappedLine();
3438 ++Line->Level;
3439 parseStructuralElement();
3440 --Line->Level;
3441 }
3442
3443 if (Style.RemoveBracesLLVM)
3444 NestedTooDeep.pop_back();
3445}
3446
3447void UnwrappedLineParser::parseAccessSpecifier() {
3448 nextToken();
3449 // Understand Qt's slots.
3450 if (FormatTok->isOneOf(K1: Keywords.kw_slots, K2: Keywords.kw_qslots))
3451 nextToken();
3452 // Otherwise, we don't know what it is, and we'd better keep the next token.
3453 if (FormatTok->is(Kind: tok::colon))
3454 nextToken();
3455 addUnwrappedLine();
3456}
3457
3458/// Parses a requires, decides if it is a clause or an expression.
3459/// \pre The current token has to be the requires keyword.
3460/// \returns true if it parsed a clause.
3461bool UnwrappedLineParser::parseRequires(bool SeenEqual) {
3462 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3463
3464 // We try to guess if it is a requires clause, or a requires expression. For
3465 // that we first check the next token.
3466 switch (Tokens->peekNextToken(/*SkipComment=*/true)->Tok.getKind()) {
3467 case tok::l_brace:
3468 // This can only be an expression, never a clause.
3469 parseRequiresExpression();
3470 return false;
3471 case tok::l_paren:
3472 // Clauses and expression can start with a paren, it's unclear what we have.
3473 break;
3474 default:
3475 // All other tokens can only be a clause.
3476 parseRequiresClause();
3477 return true;
3478 }
3479
3480 // Looking forward we would have to decide if there are function declaration
3481 // like arguments to the requires expression:
3482 // requires (T t) {
3483 // Or there is a constraint expression for the requires clause:
3484 // requires (C<T> && ...
3485
3486 // But first let's look behind.
3487 auto *PreviousNonComment = FormatTok->getPreviousNonComment();
3488
3489 if (!PreviousNonComment ||
3490 PreviousNonComment->is(TT: TT_RequiresExpressionLBrace)) {
3491 // If there is no token, or an expression left brace, we are a requires
3492 // clause within a requires expression.
3493 parseRequiresClause();
3494 return true;
3495 }
3496
3497 switch (PreviousNonComment->Tok.getKind()) {
3498 case tok::greater:
3499 case tok::r_paren:
3500 case tok::kw_noexcept:
3501 case tok::kw_const:
3502 case tok::star:
3503 case tok::amp:
3504 // This is a requires clause.
3505 parseRequiresClause();
3506 return true;
3507 case tok::ampamp: {
3508 // This can be either:
3509 // if (... && requires (T t) ...)
3510 // Or
3511 // void member(...) && requires (C<T> ...
3512 // We check the one token before that for a const:
3513 // void member(...) const && requires (C<T> ...
3514 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3515 if ((PrevPrev && PrevPrev->is(Kind: tok::kw_const)) || !SeenEqual) {
3516 parseRequiresClause();
3517 return true;
3518 }
3519 break;
3520 }
3521 default:
3522 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) {
3523 // This is a requires clause.
3524 parseRequiresClause();
3525 return true;
3526 }
3527 // It's an expression.
3528 parseRequiresExpression();
3529 return false;
3530 }
3531
3532 // Now we look forward and try to check if the paren content is a parameter
3533 // list. The parameters can be cv-qualified and contain references or
3534 // pointers.
3535 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3536 // of stuff: typename, const, *, &, &&, ::, identifiers.
3537
3538 unsigned StoredPosition = Tokens->getPosition();
3539 FormatToken *NextToken = Tokens->getNextToken();
3540 int Lookahead = 0;
3541 auto PeekNext = [&Lookahead, &NextToken, this] {
3542 ++Lookahead;
3543 NextToken = Tokens->getNextToken();
3544 };
3545
3546 bool FoundType = false;
3547 bool LastWasColonColon = false;
3548 int OpenAngles = 0;
3549
3550 for (; Lookahead < 50; PeekNext()) {
3551 switch (NextToken->Tok.getKind()) {
3552 case tok::kw_volatile:
3553 case tok::kw_const:
3554 case tok::comma:
3555 if (OpenAngles == 0) {
3556 FormatTok = Tokens->setPosition(StoredPosition);
3557 parseRequiresExpression();
3558 return false;
3559 }
3560 break;
3561 case tok::eof:
3562 // Break out of the loop.
3563 Lookahead = 50;
3564 break;
3565 case tok::coloncolon:
3566 LastWasColonColon = true;
3567 break;
3568 case tok::kw_decltype:
3569 case tok::identifier:
3570 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3571 FormatTok = Tokens->setPosition(StoredPosition);
3572 parseRequiresExpression();
3573 return false;
3574 }
3575 FoundType = true;
3576 LastWasColonColon = false;
3577 break;
3578 case tok::less:
3579 ++OpenAngles;
3580 break;
3581 case tok::greater:
3582 --OpenAngles;
3583 break;
3584 default:
3585 if (NextToken->isTypeName(LangOpts)) {
3586 FormatTok = Tokens->setPosition(StoredPosition);
3587 parseRequiresExpression();
3588 return false;
3589 }
3590 break;
3591 }
3592 }
3593 // This seems to be a complicated expression, just assume it's a clause.
3594 FormatTok = Tokens->setPosition(StoredPosition);
3595 parseRequiresClause();
3596 return true;
3597}
3598
3599/// Parses a requires clause.
3600/// \sa parseRequiresExpression
3601///
3602/// Returns if it either has finished parsing the clause, or it detects, that
3603/// the clause is incorrect.
3604void UnwrappedLineParser::parseRequiresClause() {
3605 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3606
3607 // If there is no previous token, we are within a requires expression,
3608 // otherwise we will always have the template or function declaration in front
3609 // of it.
3610 bool InRequiresExpression =
3611 !FormatTok->Previous ||
3612 FormatTok->Previous->is(TT: TT_RequiresExpressionLBrace);
3613
3614 FormatTok->setFinalizedType(InRequiresExpression
3615 ? TT_RequiresClauseInARequiresExpression
3616 : TT_RequiresClause);
3617 nextToken();
3618
3619 // NOTE: parseConstraintExpression is only ever called from this function.
3620 // It could be inlined into here.
3621 parseConstraintExpression();
3622
3623 if (!InRequiresExpression && FormatTok->Previous)
3624 FormatTok->Previous->ClosesRequiresClause = true;
3625}
3626
3627/// Parses a requires expression.
3628/// \sa parseRequiresClause
3629///
3630/// Returns if it either has finished parsing the expression, or it detects,
3631/// that the expression is incorrect.
3632void UnwrappedLineParser::parseRequiresExpression() {
3633 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3634
3635 FormatTok->setFinalizedType(TT_RequiresExpression);
3636 nextToken();
3637
3638 if (FormatTok->is(Kind: tok::l_paren)) {
3639 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3640 parseParens();
3641 }
3642
3643 if (FormatTok->is(Kind: tok::l_brace)) {
3644 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3645 parseChildBlock();
3646 }
3647}
3648
3649/// Parses a constraint expression.
3650///
3651/// This is the body of a requires clause. It returns, when the parsing is
3652/// complete, or the expression is incorrect.
3653void UnwrappedLineParser::parseConstraintExpression() {
3654 // The special handling for lambdas is needed since tryToParseLambda() eats a
3655 // token and if a requires expression is the last part of a requires clause
3656 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3657 // not set on the correct token. Thus we need to be aware if we even expect a
3658 // lambda to be possible.
3659 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3660 bool LambdaNextTimeAllowed = true;
3661
3662 // Within lambda declarations, it is permitted to put a requires clause after
3663 // its template parameter list, which would place the requires clause right
3664 // before the parentheses of the parameters of the lambda declaration. Thus,
3665 // we track if we expect to see grouping parentheses at all.
3666 // Without this check, `requires foo<T> (T t)` in the below example would be
3667 // seen as the whole requires clause, accidentally eating the parameters of
3668 // the lambda.
3669 // [&]<typename T> requires foo<T> (T t) { ... };
3670 bool TopLevelParensAllowed = true;
3671
3672 do {
3673 bool LambdaThisTimeAllowed = std::exchange(obj&: LambdaNextTimeAllowed, new_val: false);
3674
3675 switch (FormatTok->Tok.getKind()) {
3676 case tok::kw_requires:
3677 parseRequiresExpression();
3678 break;
3679
3680 case tok::l_paren:
3681 if (!TopLevelParensAllowed)
3682 return;
3683 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3684 TopLevelParensAllowed = false;
3685 break;
3686
3687 case tok::l_square:
3688 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3689 return;
3690 break;
3691
3692 case tok::kw_const:
3693 case tok::semi:
3694 case tok::kw_class:
3695 case tok::kw_struct:
3696 case tok::kw_union:
3697 return;
3698
3699 case tok::l_brace:
3700 // Potential function body.
3701 return;
3702
3703 case tok::ampamp:
3704 case tok::pipepipe:
3705 FormatTok->setFinalizedType(TT_BinaryOperator);
3706 nextToken();
3707 LambdaNextTimeAllowed = true;
3708 TopLevelParensAllowed = true;
3709 break;
3710
3711 case tok::comma:
3712 case tok::comment:
3713 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3714 nextToken();
3715 break;
3716
3717 case tok::kw_sizeof:
3718 case tok::greater:
3719 case tok::greaterequal:
3720 case tok::greatergreater:
3721 case tok::less:
3722 case tok::lessequal:
3723 case tok::lessless:
3724 case tok::equalequal:
3725 case tok::exclaim:
3726 case tok::exclaimequal:
3727 case tok::plus:
3728 case tok::minus:
3729 case tok::star:
3730 case tok::slash:
3731 LambdaNextTimeAllowed = true;
3732 TopLevelParensAllowed = true;
3733 // Just eat them.
3734 nextToken();
3735 break;
3736
3737 case tok::numeric_constant:
3738 case tok::coloncolon:
3739 case tok::kw_true:
3740 case tok::kw_false:
3741 TopLevelParensAllowed = false;
3742 // Just eat them.
3743 nextToken();
3744 break;
3745
3746 case tok::kw_static_cast:
3747 case tok::kw_const_cast:
3748 case tok::kw_reinterpret_cast:
3749 case tok::kw_dynamic_cast:
3750 nextToken();
3751 if (FormatTok->isNot(Kind: tok::less))
3752 return;
3753
3754 nextToken();
3755 parseBracedList(/*IsAngleBracket=*/true);
3756 break;
3757
3758 default:
3759 if (!FormatTok->Tok.getIdentifierInfo()) {
3760 // Identifiers are part of the default case, we check for more then
3761 // tok::identifier to handle builtin type traits.
3762 return;
3763 }
3764
3765 // We need to differentiate identifiers for a template deduction guide,
3766 // variables, or function return types (the constraint expression has
3767 // ended before that), and basically all other cases. But it's easier to
3768 // check the other way around.
3769 assert(FormatTok->Previous);
3770 switch (FormatTok->Previous->Tok.getKind()) {
3771 case tok::coloncolon: // Nested identifier.
3772 case tok::ampamp: // Start of a function or variable for the
3773 case tok::pipepipe: // constraint expression. (binary)
3774 case tok::exclaim: // The same as above, but unary.
3775 case tok::kw_requires: // Initial identifier of a requires clause.
3776 case tok::equal: // Initial identifier of a concept declaration.
3777 break;
3778 default:
3779 return;
3780 }
3781
3782 // Read identifier with optional template declaration.
3783 nextToken();
3784 if (FormatTok->is(Kind: tok::less)) {
3785 nextToken();
3786 parseBracedList(/*IsAngleBracket=*/true);
3787 }
3788 TopLevelParensAllowed = false;
3789 break;
3790 }
3791 } while (!eof());
3792}
3793
3794bool UnwrappedLineParser::parseEnum() {
3795 const FormatToken &InitialToken = *FormatTok;
3796
3797 // Won't be 'enum' for NS_ENUMs.
3798 if (FormatTok->is(Kind: tok::kw_enum))
3799 nextToken();
3800
3801 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3802 // declarations. An "enum" keyword followed by a colon would be a syntax
3803 // error and thus assume it is just an identifier.
3804 if (Style.isJavaScript() && FormatTok->isOneOf(K1: tok::colon, K2: tok::question))
3805 return false;
3806
3807 // In protobuf, "enum" can be used as a field name.
3808 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(Kind: tok::equal))
3809 return false;
3810
3811 if (IsCpp) {
3812 // Eat up enum class ...
3813 if (FormatTok->isOneOf(K1: tok::kw_class, K2: tok::kw_struct))
3814 nextToken();
3815 while (FormatTok->is(Kind: tok::l_square))
3816 if (!handleCppAttributes())
3817 return false;
3818 }
3819
3820 while (FormatTok->Tok.getIdentifierInfo() ||
3821 FormatTok->isOneOf(K1: tok::colon, K2: tok::coloncolon, Ks: tok::less,
3822 Ks: tok::greater, Ks: tok::comma, Ks: tok::question,
3823 Ks: tok::l_square)) {
3824 if (Style.isVerilog()) {
3825 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3826 nextToken();
3827 // In Verilog the base type can have dimensions.
3828 while (FormatTok->is(Kind: tok::l_square))
3829 parseSquare();
3830 } else {
3831 nextToken();
3832 }
3833 // We can have macros or attributes in between 'enum' and the enum name.
3834 if (FormatTok->is(Kind: tok::l_paren))
3835 parseParens();
3836 if (FormatTok->is(Kind: tok::identifier)) {
3837 nextToken();
3838 // If there are two identifiers in a row, this is likely an elaborate
3839 // return type. In Java, this can be "implements", etc.
3840 if (IsCpp && FormatTok->is(Kind: tok::identifier))
3841 return false;
3842 }
3843 }
3844
3845 // Just a declaration or something is wrong.
3846 if (FormatTok->isNot(Kind: tok::l_brace))
3847 return true;
3848 FormatTok->setFinalizedType(TT_EnumLBrace);
3849 FormatTok->setBlockKind(BK_Block);
3850
3851 if (Style.isJava()) {
3852 // Java enums are different.
3853 parseJavaEnumBody();
3854 return true;
3855 }
3856 if (Style.Language == FormatStyle::LK_Proto) {
3857 parseBlock(/*MustBeDeclaration=*/true);
3858 return true;
3859 }
3860
3861 if (!Style.AllowShortEnumsOnASingleLine &&
3862 ShouldBreakBeforeBrace(Style, InitialToken, /*IsJavaRecord=*/false)) {
3863 addUnwrappedLine();
3864 }
3865 // Parse enum body.
3866 nextToken();
3867 if (!Style.AllowShortEnumsOnASingleLine) {
3868 addUnwrappedLine();
3869 Line->Level += 1;
3870 }
3871 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3872 if (!Style.AllowShortEnumsOnASingleLine)
3873 Line->Level -= 1;
3874 if (HasError) {
3875 if (FormatTok->is(Kind: tok::semi))
3876 nextToken();
3877 addUnwrappedLine();
3878 }
3879 setPreviousRBraceType(TT_EnumRBrace);
3880 return true;
3881
3882 // There is no addUnwrappedLine() here so that we fall through to parsing a
3883 // structural element afterwards. Thus, in "enum A {} n, m;",
3884 // "} n, m;" will end up in one unwrapped line.
3885}
3886
3887bool UnwrappedLineParser::parseStructLike() {
3888 // parseRecord falls through and does not yet add an unwrapped line as a
3889 // record declaration or definition can start a structural element.
3890 parseRecord();
3891 // This does not apply to Java, JavaScript and C#.
3892 if (Style.isJava() || Style.isJavaScript() || Style.isCSharp()) {
3893 if (FormatTok->is(Kind: tok::semi))
3894 nextToken();
3895 addUnwrappedLine();
3896 return true;
3897 }
3898 return false;
3899}
3900
3901namespace {
3902// A class used to set and restore the Token position when peeking
3903// ahead in the token source.
3904class ScopedTokenPosition {
3905 unsigned StoredPosition;
3906 FormatTokenSource *Tokens;
3907
3908public:
3909 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3910 assert(Tokens && "Tokens expected to not be null");
3911 StoredPosition = Tokens->getPosition();
3912 }
3913
3914 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3915};
3916} // namespace
3917
3918// Look to see if we have [[ by looking ahead, if
3919// its not then rewind to the original position.
3920bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3921 ScopedTokenPosition AutoPosition(Tokens);
3922 FormatToken *Tok = Tokens->getNextToken();
3923 // We already read the first [ check for the second.
3924 if (Tok->isNot(Kind: tok::l_square))
3925 return false;
3926 // Double check that the attribute is just something
3927 // fairly simple.
3928 while (Tok->isNot(Kind: tok::eof)) {
3929 if (Tok->is(Kind: tok::r_square))
3930 break;
3931 Tok = Tokens->getNextToken();
3932 }
3933 if (Tok->is(Kind: tok::eof))
3934 return false;
3935 Tok = Tokens->getNextToken();
3936 if (Tok->isNot(Kind: tok::r_square))
3937 return false;
3938 Tok = Tokens->getNextToken();
3939 if (Tok->is(Kind: tok::semi))
3940 return false;
3941 return true;
3942}
3943
3944void UnwrappedLineParser::parseJavaEnumBody() {
3945 assert(FormatTok->is(tok::l_brace));
3946 const FormatToken *OpeningBrace = FormatTok;
3947
3948 // Determine whether the enum is simple, i.e. does not have a semicolon or
3949 // constants with class bodies. Simple enums can be formatted like braced
3950 // lists, contracted to a single line, etc.
3951 unsigned StoredPosition = Tokens->getPosition();
3952 bool IsSimple = true;
3953 FormatToken *Tok = Tokens->getNextToken();
3954 while (Tok->isNot(Kind: tok::eof)) {
3955 if (Tok->is(Kind: tok::r_brace))
3956 break;
3957 if (Tok->isOneOf(K1: tok::l_brace, K2: tok::semi)) {
3958 IsSimple = false;
3959 break;
3960 }
3961 // FIXME: This will also mark enums with braces in the arguments to enum
3962 // constants as "not simple". This is probably fine in practice, though.
3963 Tok = Tokens->getNextToken();
3964 }
3965 FormatTok = Tokens->setPosition(StoredPosition);
3966
3967 if (IsSimple) {
3968 nextToken();
3969 parseBracedList();
3970 addUnwrappedLine();
3971 return;
3972 }
3973
3974 // Parse the body of a more complex enum.
3975 // First add a line for everything up to the "{".
3976 nextToken();
3977 addUnwrappedLine();
3978 ++Line->Level;
3979
3980 // Parse the enum constants.
3981 while (!eof()) {
3982 if (FormatTok->is(Kind: tok::l_brace)) {
3983 // Parse the constant's class body.
3984 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3985 /*MunchSemi=*/false);
3986 } else if (FormatTok->is(Kind: tok::l_paren)) {
3987 parseParens();
3988 } else if (FormatTok->is(Kind: tok::comma)) {
3989 nextToken();
3990 addUnwrappedLine();
3991 } else if (FormatTok->is(Kind: tok::semi)) {
3992 nextToken();
3993 addUnwrappedLine();
3994 break;
3995 } else if (FormatTok->is(Kind: tok::r_brace)) {
3996 addUnwrappedLine();
3997 break;
3998 } else {
3999 nextToken();
4000 }
4001 }
4002
4003 // Parse the class body after the enum's ";" if any.
4004 parseLevel(OpeningBrace);
4005 nextToken();
4006 --Line->Level;
4007 addUnwrappedLine();
4008}
4009
4010void UnwrappedLineParser::parseRecord(bool ParseAsExpr, bool IsJavaRecord) {
4011 assert(!IsJavaRecord || FormatTok->is(Keywords.kw_record));
4012 const FormatToken &InitialToken = *FormatTok;
4013 nextToken();
4014
4015 FormatToken *ClassName =
4016 IsJavaRecord && FormatTok->is(Kind: tok::identifier) ? FormatTok : nullptr;
4017 bool IsDerived = false;
4018 auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
4019 return Tok->is(Kind: tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
4020 };
4021 // JavaScript/TypeScript supports anonymous classes like:
4022 // a = class extends foo { }
4023 bool JSPastExtendsOrImplements = false;
4024 // The actual identifier can be a nested name specifier, and in macros
4025 // it is often token-pasted.
4026 // An [[attribute]] can be before the identifier.
4027 while (FormatTok->isOneOf(K1: tok::identifier, K2: tok::coloncolon, Ks: tok::hashhash,
4028 Ks: tok::kw_alignas, Ks: tok::l_square) ||
4029 FormatTok->isAttribute() ||
4030 ((Style.isJava() || Style.isJavaScript()) &&
4031 FormatTok->isOneOf(K1: tok::period, K2: tok::comma))) {
4032 if (Style.isJavaScript() &&
4033 FormatTok->isOneOf(K1: Keywords.kw_extends, K2: Keywords.kw_implements)) {
4034 JSPastExtendsOrImplements = true;
4035 // JavaScript/TypeScript supports inline object types in
4036 // extends/implements positions:
4037 // class Foo implements {bar: number} { }
4038 nextToken();
4039 if (FormatTok->is(Kind: tok::l_brace)) {
4040 tryToParseBracedList();
4041 continue;
4042 }
4043 }
4044 if (FormatTok->is(Kind: tok::l_square) && handleCppAttributes())
4045 continue;
4046 auto *Previous = FormatTok;
4047 nextToken();
4048 switch (FormatTok->Tok.getKind()) {
4049 case tok::l_paren:
4050 // We can have macros in between 'class' and the class name.
4051 if (IsJavaRecord || !IsNonMacroIdentifier(Previous) ||
4052 // e.g. `struct macro(a) S { int i; };`
4053 Previous->Previous == &InitialToken) {
4054 parseParens();
4055 }
4056 break;
4057 case tok::coloncolon:
4058 case tok::hashhash:
4059 break;
4060 default:
4061 if (JSPastExtendsOrImplements || ClassName ||
4062 Previous->isNot(Kind: tok::identifier) || Previous->is(TT: TT_AttributeMacro)) {
4063 break;
4064 }
4065 if (const auto Text = Previous->TokenText;
4066 Text.size() == 1 || Text != Text.upper()) {
4067 ClassName = Previous;
4068 }
4069 }
4070 }
4071
4072 auto IsListInitialization = [&] {
4073 if (!ClassName || IsDerived || JSPastExtendsOrImplements)
4074 return false;
4075 assert(FormatTok->is(tok::l_brace));
4076 const auto *Prev = FormatTok->getPreviousNonComment();
4077 assert(Prev);
4078 return Prev != ClassName && Prev->is(Kind: tok::identifier) &&
4079 Prev->isNot(Kind: Keywords.kw_final) && tryToParseBracedList();
4080 };
4081
4082 if (FormatTok->isOneOf(K1: tok::colon, K2: tok::less)) {
4083 int AngleNestingLevel = 0;
4084 do {
4085 if (FormatTok->is(Kind: tok::less))
4086 ++AngleNestingLevel;
4087 else if (FormatTok->is(Kind: tok::greater))
4088 --AngleNestingLevel;
4089
4090 if (AngleNestingLevel == 0) {
4091 if (FormatTok->is(Kind: tok::colon)) {
4092 IsDerived = true;
4093 } else if (!IsDerived && FormatTok->is(Kind: tok::identifier) &&
4094 FormatTok->Previous->is(Kind: tok::coloncolon)) {
4095 ClassName = FormatTok;
4096 } else if (FormatTok->is(Kind: tok::l_paren) &&
4097 IsNonMacroIdentifier(FormatTok->Previous)) {
4098 break;
4099 }
4100 }
4101 if (FormatTok->is(Kind: tok::l_brace)) {
4102 if (AngleNestingLevel == 0 && IsListInitialization())
4103 return;
4104 calculateBraceTypes(/*ExpectClassBody=*/true);
4105 if (!tryToParseBracedList())
4106 break;
4107 }
4108 if (FormatTok->is(Kind: tok::l_square)) {
4109 FormatToken *Previous = FormatTok->Previous;
4110 if (!Previous || (Previous->isNot(Kind: tok::r_paren) &&
4111 !Previous->isTypeOrIdentifier(LangOpts))) {
4112 // Don't try parsing a lambda if we had a closing parenthesis before,
4113 // it was probably a pointer to an array: int (*)[].
4114 if (!tryToParseLambda())
4115 continue;
4116 } else {
4117 parseSquare();
4118 continue;
4119 }
4120 }
4121 if (FormatTok->is(Kind: tok::semi))
4122 return;
4123 if (Style.isCSharp() && FormatTok->is(II: Keywords.kw_where)) {
4124 addUnwrappedLine();
4125 nextToken();
4126 parseCSharpGenericTypeConstraint();
4127 break;
4128 }
4129 nextToken();
4130 } while (!eof());
4131 }
4132
4133 auto GetBraceTypes =
4134 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
4135 switch (RecordTok.Tok.getKind()) {
4136 case tok::kw_class:
4137 return {TT_ClassLBrace, TT_ClassRBrace};
4138 case tok::kw_struct:
4139 return {TT_StructLBrace, TT_StructRBrace};
4140 case tok::kw_union:
4141 return {TT_UnionLBrace, TT_UnionRBrace};
4142 default:
4143 // Useful for e.g. interface.
4144 return {TT_RecordLBrace, TT_RecordRBrace};
4145 }
4146 };
4147 if (FormatTok->is(Kind: tok::l_brace)) {
4148 if (IsListInitialization())
4149 return;
4150 if (ClassName)
4151 ClassName->setFinalizedType(TT_ClassHeadName);
4152 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4153 FormatTok->setFinalizedType(OpenBraceType);
4154 if (ParseAsExpr) {
4155 parseChildBlock();
4156 } else {
4157 if (ShouldBreakBeforeBrace(Style, InitialToken, IsJavaRecord))
4158 addUnwrappedLine();
4159
4160 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4161 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4162 }
4163 setPreviousRBraceType(ClosingBraceType);
4164 }
4165 // There is no addUnwrappedLine() here so that we fall through to parsing a
4166 // structural element afterwards. Thus, in "class A {} n, m;",
4167 // "} n, m;" will end up in one unwrapped line.
4168}
4169
4170void UnwrappedLineParser::parseObjCMethod() {
4171 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4172 "'(' or identifier expected.");
4173 do {
4174 if (FormatTok->is(Kind: tok::semi)) {
4175 nextToken();
4176 addUnwrappedLine();
4177 return;
4178 } else if (FormatTok->is(Kind: tok::l_brace)) {
4179 if (Style.BraceWrapping.AfterFunction)
4180 addUnwrappedLine();
4181 parseBlock();
4182 addUnwrappedLine();
4183 return;
4184 } else {
4185 nextToken();
4186 }
4187 } while (!eof());
4188}
4189
4190void UnwrappedLineParser::parseObjCProtocolList() {
4191 assert(FormatTok->is(tok::less) && "'<' expected.");
4192 do {
4193 nextToken();
4194 // Early exit in case someone forgot a close angle.
4195 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::objc_end))
4196 return;
4197 } while (!eof() && FormatTok->isNot(Kind: tok::greater));
4198 nextToken(); // Skip '>'.
4199}
4200
4201void UnwrappedLineParser::parseObjCUntilAtEnd() {
4202 do {
4203 if (FormatTok->is(Kind: tok::objc_end)) {
4204 nextToken();
4205 addUnwrappedLine();
4206 break;
4207 }
4208 if (FormatTok->is(Kind: tok::l_brace)) {
4209 parseBlock();
4210 // In ObjC interfaces, nothing should be following the "}".
4211 addUnwrappedLine();
4212 } else if (FormatTok->is(Kind: tok::r_brace)) {
4213 // Ignore stray "}". parseStructuralElement doesn't consume them.
4214 nextToken();
4215 addUnwrappedLine();
4216 } else if (FormatTok->isOneOf(K1: tok::minus, K2: tok::plus)) {
4217 nextToken();
4218 parseObjCMethod();
4219 } else {
4220 parseStructuralElement();
4221 }
4222 } while (!eof());
4223}
4224
4225void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4226 assert(FormatTok->isOneOf(tok::objc_interface, tok::objc_implementation));
4227 nextToken();
4228 nextToken(); // interface name
4229
4230 // @interface can be followed by a lightweight generic
4231 // specialization list, then either a base class or a category.
4232 if (FormatTok->is(Kind: tok::less))
4233 parseObjCLightweightGenerics();
4234 if (FormatTok->is(Kind: tok::colon)) {
4235 nextToken();
4236 nextToken(); // base class name
4237 // The base class can also have lightweight generics applied to it.
4238 if (FormatTok->is(Kind: tok::less))
4239 parseObjCLightweightGenerics();
4240 } else if (FormatTok->is(Kind: tok::l_paren)) {
4241 // Skip category, if present.
4242 parseParens();
4243 }
4244
4245 if (FormatTok->is(Kind: tok::less))
4246 parseObjCProtocolList();
4247
4248 if (FormatTok->is(Kind: tok::l_brace)) {
4249 if (Style.BraceWrapping.AfterObjCDeclaration)
4250 addUnwrappedLine();
4251 parseBlock(/*MustBeDeclaration=*/true);
4252 }
4253
4254 // With instance variables, this puts '}' on its own line. Without instance
4255 // variables, this ends the @interface line.
4256 addUnwrappedLine();
4257
4258 parseObjCUntilAtEnd();
4259}
4260
4261void UnwrappedLineParser::parseObjCLightweightGenerics() {
4262 assert(FormatTok->is(tok::less));
4263 // Unlike protocol lists, generic parameterizations support
4264 // nested angles:
4265 //
4266 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4267 // NSObject <NSCopying, NSSecureCoding>
4268 //
4269 // so we need to count how many open angles we have left.
4270 unsigned NumOpenAngles = 1;
4271 do {
4272 nextToken();
4273 // Early exit in case someone forgot a close angle.
4274 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::objc_end))
4275 break;
4276 if (FormatTok->is(Kind: tok::less)) {
4277 ++NumOpenAngles;
4278 } else if (FormatTok->is(Kind: tok::greater)) {
4279 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4280 --NumOpenAngles;
4281 }
4282 } while (!eof() && NumOpenAngles != 0);
4283 nextToken(); // Skip '>'.
4284}
4285
4286// Returns true for the declaration/definition form of @protocol,
4287// false for the expression form.
4288bool UnwrappedLineParser::parseObjCProtocol() {
4289 assert(FormatTok->is(tok::objc_protocol));
4290 nextToken();
4291
4292 if (FormatTok->is(Kind: tok::l_paren)) {
4293 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4294 return false;
4295 }
4296
4297 // The definition/declaration form,
4298 // @protocol Foo
4299 // - (int)someMethod;
4300 // @end
4301
4302 nextToken(); // protocol name
4303
4304 if (FormatTok->is(Kind: tok::less))
4305 parseObjCProtocolList();
4306
4307 // Check for protocol declaration.
4308 if (FormatTok->is(Kind: tok::semi)) {
4309 nextToken();
4310 addUnwrappedLine();
4311 return true;
4312 }
4313
4314 addUnwrappedLine();
4315 parseObjCUntilAtEnd();
4316 return true;
4317}
4318
4319void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4320 bool IsImport = FormatTok->is(II: Keywords.kw_import);
4321 assert(IsImport || FormatTok->is(tok::kw_export));
4322 nextToken();
4323
4324 // Consume the "default" in "export default class/function".
4325 if (FormatTok->is(Kind: tok::kw_default))
4326 nextToken();
4327
4328 // Consume "async function", "function" and "default function", so that these
4329 // get parsed as free-standing JS functions, i.e. do not require a trailing
4330 // semicolon.
4331 if (FormatTok->is(II: Keywords.kw_async))
4332 nextToken();
4333 if (FormatTok->is(II: Keywords.kw_function)) {
4334 nextToken();
4335 return;
4336 }
4337
4338 // For imports, `export *`, `export {...}`, consume the rest of the line up
4339 // to the terminating `;`. For everything else, just return and continue
4340 // parsing the structural element, i.e. the declaration or expression for
4341 // `export default`.
4342 if (!IsImport && FormatTok->isNoneOf(Ks: tok::l_brace, Ks: tok::star) &&
4343 !FormatTok->isStringLiteral() &&
4344 !(FormatTok->is(II: Keywords.kw_type) &&
4345 Tokens->peekNextToken()->isOneOf(K1: tok::l_brace, K2: tok::star))) {
4346 return;
4347 }
4348
4349 while (!eof()) {
4350 if (FormatTok->is(Kind: tok::semi))
4351 return;
4352 if (Line->Tokens.empty()) {
4353 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4354 // import statement should terminate.
4355 return;
4356 }
4357 if (FormatTok->is(Kind: tok::l_brace)) {
4358 FormatTok->setBlockKind(BK_Block);
4359 nextToken();
4360 parseBracedList();
4361 } else {
4362 nextToken();
4363 }
4364 }
4365}
4366
4367void UnwrappedLineParser::parseStatementMacro() {
4368 nextToken();
4369 if (FormatTok->is(Kind: tok::l_paren))
4370 parseParens();
4371 if (FormatTok->is(Kind: tok::semi))
4372 nextToken();
4373 addUnwrappedLine();
4374}
4375
4376void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4377 // consume things like a::`b.c[d:e] or a::*
4378 while (true) {
4379 if (FormatTok->isOneOf(K1: tok::star, K2: tok::period, Ks: tok::periodstar,
4380 Ks: tok::coloncolon, Ks: tok::hash) ||
4381 Keywords.isVerilogIdentifier(Tok: *FormatTok)) {
4382 nextToken();
4383 } else if (FormatTok->is(Kind: tok::l_square)) {
4384 parseSquare();
4385 } else {
4386 break;
4387 }
4388 }
4389}
4390
4391void UnwrappedLineParser::parseVerilogSensitivityList() {
4392 if (FormatTok->isNot(Kind: tok::at))
4393 return;
4394 nextToken();
4395 // A block event expression has 2 at signs.
4396 if (FormatTok->is(Kind: tok::at))
4397 nextToken();
4398 switch (FormatTok->Tok.getKind()) {
4399 case tok::star:
4400 nextToken();
4401 break;
4402 case tok::l_paren:
4403 parseParens();
4404 break;
4405 default:
4406 parseVerilogHierarchyIdentifier();
4407 break;
4408 }
4409}
4410
4411unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4412 unsigned AddLevels = 0;
4413
4414 if (FormatTok->is(II: Keywords.kw_clocking)) {
4415 nextToken();
4416 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
4417 nextToken();
4418 parseVerilogSensitivityList();
4419 if (FormatTok->is(Kind: tok::semi))
4420 nextToken();
4421 } else if (FormatTok->isOneOf(K1: tok::kw_case, K2: Keywords.kw_casex,
4422 Ks: Keywords.kw_casez, Ks: Keywords.kw_randcase,
4423 Ks: Keywords.kw_randsequence)) {
4424 if (Style.IndentCaseLabels)
4425 AddLevels++;
4426 nextToken();
4427 if (FormatTok->is(Kind: tok::l_paren)) {
4428 FormatTok->setFinalizedType(TT_ConditionLParen);
4429 parseParens();
4430 }
4431 if (FormatTok->isOneOf(K1: Keywords.kw_inside, K2: Keywords.kw_matches))
4432 nextToken();
4433 // The case header has no semicolon.
4434 } else {
4435 // "module" etc.
4436 nextToken();
4437 // all the words like the name of the module and specifiers like
4438 // "automatic" and the width of function return type
4439 while (true) {
4440 if (FormatTok->is(Kind: tok::l_square)) {
4441 auto Prev = FormatTok->getPreviousNonComment();
4442 if (Prev && Keywords.isVerilogIdentifier(Tok: *Prev))
4443 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4444 parseSquare();
4445 } else if (Keywords.isVerilogIdentifier(Tok: *FormatTok) ||
4446 FormatTok->isOneOf(K1: tok::hash, K2: tok::hashhash, Ks: tok::coloncolon,
4447 Ks: Keywords.kw_automatic, Ks: tok::kw_static)) {
4448 nextToken();
4449 } else {
4450 break;
4451 }
4452 }
4453
4454 auto NewLine = [this]() {
4455 addUnwrappedLine();
4456 Line->IsContinuation = true;
4457 };
4458
4459 // package imports
4460 while (FormatTok->is(II: Keywords.kw_import)) {
4461 NewLine();
4462 nextToken();
4463 parseVerilogHierarchyIdentifier();
4464 if (FormatTok->is(Kind: tok::semi))
4465 nextToken();
4466 }
4467
4468 // parameters and ports
4469 if (FormatTok->is(II: Keywords.kw_verilogHash)) {
4470 NewLine();
4471 nextToken();
4472 if (FormatTok->is(Kind: tok::l_paren)) {
4473 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4474 parseParens();
4475 }
4476 }
4477 if (FormatTok->is(Kind: tok::l_paren)) {
4478 NewLine();
4479 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4480 parseParens();
4481 }
4482
4483 // extends and implements
4484 if (FormatTok->is(II: Keywords.kw_extends)) {
4485 NewLine();
4486 nextToken();
4487 parseVerilogHierarchyIdentifier();
4488 if (FormatTok->is(Kind: tok::l_paren))
4489 parseParens();
4490 }
4491 if (FormatTok->is(II: Keywords.kw_implements)) {
4492 NewLine();
4493 do {
4494 nextToken();
4495 parseVerilogHierarchyIdentifier();
4496 } while (FormatTok->is(Kind: tok::comma));
4497 }
4498
4499 // Coverage event for cover groups.
4500 if (FormatTok->is(Kind: tok::at)) {
4501 NewLine();
4502 parseVerilogSensitivityList();
4503 }
4504
4505 if (FormatTok->is(Kind: tok::semi))
4506 nextToken(/*LevelDifference=*/1);
4507 addUnwrappedLine();
4508 }
4509
4510 return AddLevels;
4511}
4512
4513void UnwrappedLineParser::parseVerilogTable() {
4514 assert(FormatTok->is(Keywords.kw_table));
4515 nextToken(/*LevelDifference=*/1);
4516 addUnwrappedLine();
4517
4518 auto InitialLevel = Line->Level++;
4519 while (!eof() && !Keywords.isVerilogEnd(Tok: *FormatTok)) {
4520 FormatToken *Tok = FormatTok;
4521 nextToken();
4522 if (Tok->is(Kind: tok::semi))
4523 addUnwrappedLine();
4524 else if (Tok->isOneOf(K1: tok::star, K2: tok::colon, Ks: tok::question, Ks: tok::minus))
4525 Tok->setFinalizedType(TT_VerilogTableItem);
4526 }
4527 Line->Level = InitialLevel;
4528 nextToken(/*LevelDifference=*/-1);
4529 addUnwrappedLine();
4530}
4531
4532void UnwrappedLineParser::parseVerilogCaseLabel() {
4533 // The label will get unindented in AnnotatingParser. If there are no leading
4534 // spaces, indent the rest here so that things inside the block will be
4535 // indented relative to things outside. We don't use parseLabel because we
4536 // don't know whether this colon is a label or a ternary expression at this
4537 // point.
4538 auto OrigLevel = Line->Level;
4539 auto FirstLine = CurrentLines->size();
4540 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4541 ++Line->Level;
4542 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(Tok: *FormatTok))
4543 --Line->Level;
4544 parseStructuralElement();
4545 // Restore the indentation in both the new line and the line that has the
4546 // label.
4547 if (CurrentLines->size() > FirstLine)
4548 (*CurrentLines)[FirstLine].Level = OrigLevel;
4549 Line->Level = OrigLevel;
4550}
4551
4552void UnwrappedLineParser::parseVerilogExtern() {
4553 assert(
4554 FormatTok->isOneOf(tok::kw_extern, tok::kw_export, Keywords.kw_import));
4555 nextToken();
4556 // "DPI-C"
4557 if (FormatTok->is(Kind: tok::string_literal))
4558 nextToken();
4559 if (FormatTok->isOneOf(K1: Keywords.kw_context, K2: Keywords.kw_pure))
4560 nextToken();
4561 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
4562 nextToken();
4563 if (FormatTok->is(Kind: tok::equal))
4564 nextToken();
4565 if (Keywords.isVerilogHierarchy(Tok: *FormatTok))
4566 parseVerilogHierarchyHeader();
4567}
4568
4569bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4570 for (const auto &N : Line.Tokens) {
4571 if (N.Tok->MacroCtx)
4572 return true;
4573 for (const UnwrappedLine &Child : N.Children)
4574 if (containsExpansion(Line: Child))
4575 return true;
4576 }
4577 return false;
4578}
4579
4580void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4581 if (Line->Tokens.empty())
4582 return;
4583 LLVM_DEBUG({
4584 if (!parsingPPDirective()) {
4585 llvm::dbgs() << "Adding unwrapped line:\n";
4586 printDebugInfo(*Line);
4587 }
4588 });
4589
4590 // If this line closes a block when in Whitesmiths mode, remember that
4591 // information so that the level can be decreased after the line is added.
4592 // This has to happen after the addition of the line since the line itself
4593 // needs to be indented.
4594 bool ClosesWhitesmithsBlock =
4595 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4596 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4597
4598 // If the current line was expanded from a macro call, we use it to
4599 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4600 // line and the unexpanded token stream.
4601 if (!parsingPPDirective() && !InExpansion && containsExpansion(Line: *Line)) {
4602 if (!Reconstruct)
4603 Reconstruct.emplace(args&: Line->Level, args&: Unexpanded);
4604 Reconstruct->addLine(Line: *Line);
4605
4606 // While the reconstructed unexpanded lines are stored in the normal
4607 // flow of lines, the expanded lines are stored on the side to be analyzed
4608 // in an extra step.
4609 CurrentExpandedLines.push_back(Elt: std::move(*Line));
4610
4611 if (Reconstruct->finished()) {
4612 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4613 assert(!Reconstructed.Tokens.empty() &&
4614 "Reconstructed must at least contain the macro identifier.");
4615 assert(!parsingPPDirective());
4616 LLVM_DEBUG({
4617 llvm::dbgs() << "Adding unexpanded line:\n";
4618 printDebugInfo(Reconstructed);
4619 });
4620 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4621 Lines.push_back(Elt: std::move(Reconstructed));
4622 CurrentExpandedLines.clear();
4623 Reconstruct.reset();
4624 }
4625 } else {
4626 // At the top level we only get here when no unexpansion is going on, or
4627 // when conditional formatting led to unfinished macro reconstructions.
4628 assert(!Reconstruct || (CurrentLines != &Lines) || !PPStack.empty());
4629 CurrentLines->push_back(Elt: std::move(*Line));
4630 }
4631 Line->Tokens.clear();
4632 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4633 Line->FirstStartColumn = 0;
4634 Line->IsContinuation = false;
4635 Line->SeenDecltypeAuto = false;
4636
4637 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4638 --Line->Level;
4639 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4640 CurrentLines->append(
4641 in_start: std::make_move_iterator(i: PreprocessorDirectives.begin()),
4642 in_end: std::make_move_iterator(i: PreprocessorDirectives.end()));
4643 PreprocessorDirectives.clear();
4644 }
4645 // Disconnect the current token from the last token on the previous line.
4646 FormatTok->Previous = nullptr;
4647}
4648
4649bool UnwrappedLineParser::eof() const { return FormatTok->is(Kind: tok::eof); }
4650
4651bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4652 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4653 FormatTok.NewlinesBefore > 0;
4654}
4655
4656// Checks if \p FormatTok is a line comment that continues the line comment
4657// section on \p Line.
4658static bool
4659continuesLineCommentSection(const FormatToken &FormatTok,
4660 const UnwrappedLine &Line, const FormatStyle &Style,
4661 const llvm::Regex &CommentPragmasRegex) {
4662 if (Line.Tokens.empty() || Style.ReflowComments != FormatStyle::RCS_Always)
4663 return false;
4664
4665 StringRef IndentContent = FormatTok.TokenText;
4666 if (FormatTok.TokenText.starts_with(Prefix: "//") ||
4667 FormatTok.TokenText.starts_with(Prefix: "/*")) {
4668 IndentContent = FormatTok.TokenText.substr(Start: 2);
4669 }
4670 if (CommentPragmasRegex.match(String: IndentContent))
4671 return false;
4672
4673 // If Line starts with a line comment, then FormatTok continues the comment
4674 // section if its original column is greater or equal to the original start
4675 // column of the line.
4676 //
4677 // Define the min column token of a line as follows: if a line ends in '{' or
4678 // contains a '{' followed by a line comment, then the min column token is
4679 // that '{'. Otherwise, the min column token of the line is the first token of
4680 // the line.
4681 //
4682 // If Line starts with a token other than a line comment, then FormatTok
4683 // continues the comment section if its original column is greater than the
4684 // original start column of the min column token of the line.
4685 //
4686 // For example, the second line comment continues the first in these cases:
4687 //
4688 // // first line
4689 // // second line
4690 //
4691 // and:
4692 //
4693 // // first line
4694 // // second line
4695 //
4696 // and:
4697 //
4698 // int i; // first line
4699 // // second line
4700 //
4701 // and:
4702 //
4703 // do { // first line
4704 // // second line
4705 // int i;
4706 // } while (true);
4707 //
4708 // and:
4709 //
4710 // enum {
4711 // a, // first line
4712 // // second line
4713 // b
4714 // };
4715 //
4716 // The second line comment doesn't continue the first in these cases:
4717 //
4718 // // first line
4719 // // second line
4720 //
4721 // and:
4722 //
4723 // int i; // first line
4724 // // second line
4725 //
4726 // and:
4727 //
4728 // do { // first line
4729 // // second line
4730 // int i;
4731 // } while (true);
4732 //
4733 // and:
4734 //
4735 // enum {
4736 // a, // first line
4737 // // second line
4738 // };
4739 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4740
4741 // Scan for '{//'. If found, use the column of '{' as a min column for line
4742 // comment section continuation.
4743 const FormatToken *PreviousToken = nullptr;
4744 for (const UnwrappedLineNode &Node : Line.Tokens) {
4745 if (PreviousToken && PreviousToken->is(Kind: tok::l_brace) &&
4746 isLineComment(FormatTok: *Node.Tok)) {
4747 MinColumnToken = PreviousToken;
4748 break;
4749 }
4750 PreviousToken = Node.Tok;
4751
4752 // Grab the last newline preceding a token in this unwrapped line.
4753 if (Node.Tok->NewlinesBefore > 0)
4754 MinColumnToken = Node.Tok;
4755 }
4756 if (PreviousToken && PreviousToken->is(Kind: tok::l_brace))
4757 MinColumnToken = PreviousToken;
4758
4759 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4760 MinColumnToken);
4761}
4762
4763void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4764 bool JustComments = Line->Tokens.empty();
4765 for (FormatToken *Tok : CommentsBeforeNextToken) {
4766 // Line comments that belong to the same line comment section are put on the
4767 // same line since later we might want to reflow content between them.
4768 // Additional fine-grained breaking of line comment sections is controlled
4769 // by the class BreakableLineCommentSection in case it is desirable to keep
4770 // several line comment sections in the same unwrapped line.
4771 //
4772 // FIXME: Consider putting separate line comment sections as children to the
4773 // unwrapped line instead.
4774 Tok->ContinuesLineCommentSection =
4775 continuesLineCommentSection(FormatTok: *Tok, Line: *Line, Style, CommentPragmasRegex);
4776 if (isOnNewLine(FormatTok: *Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4777 addUnwrappedLine();
4778 pushToken(Tok);
4779 }
4780 if (NewlineBeforeNext && JustComments)
4781 addUnwrappedLine();
4782 CommentsBeforeNextToken.clear();
4783}
4784
4785void UnwrappedLineParser::nextToken(int LevelDifference) {
4786 if (eof())
4787 return;
4788 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
4789 pushToken(Tok: FormatTok);
4790 FormatToken *Previous = FormatTok;
4791 if (!Style.isJavaScript())
4792 readToken(LevelDifference);
4793 else
4794 readTokenWithJavaScriptASI();
4795 FormatTok->Previous = Previous;
4796 if (Style.isVerilog()) {
4797 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4798 // keywords like `begin`, we can't treat them the same as left braces
4799 // because some contexts require one of them. For example structs use
4800 // braces and if blocks use keywords, and a left brace can occur in an if
4801 // statement, but it is not a block. For keywords like `end`, we simply
4802 // treat them the same as right braces.
4803 if (Keywords.isVerilogEnd(Tok: *FormatTok))
4804 FormatTok->Tok.setKind(tok::r_brace);
4805 }
4806}
4807
4808void UnwrappedLineParser::distributeComments(
4809 const ArrayRef<FormatToken *> &Comments, const FormatToken *NextTok) {
4810 // Whether or not a line comment token continues a line is controlled by
4811 // the method continuesLineCommentSection, with the following caveat:
4812 //
4813 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4814 // that each comment line from the trail is aligned with the next token, if
4815 // the next token exists. If a trail exists, the beginning of the maximal
4816 // trail is marked as a start of a new comment section.
4817 //
4818 // For example in this code:
4819 //
4820 // int a; // line about a
4821 // // line 1 about b
4822 // // line 2 about b
4823 // int b;
4824 //
4825 // the two lines about b form a maximal trail, so there are two sections, the
4826 // first one consisting of the single comment "// line about a" and the
4827 // second one consisting of the next two comments.
4828 if (Comments.empty())
4829 return;
4830 bool ShouldPushCommentsInCurrentLine = true;
4831 bool HasTrailAlignedWithNextToken = false;
4832 unsigned StartOfTrailAlignedWithNextToken = 0;
4833 if (NextTok) {
4834 // We are skipping the first element intentionally.
4835 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4836 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4837 HasTrailAlignedWithNextToken = true;
4838 StartOfTrailAlignedWithNextToken = i;
4839 }
4840 }
4841 }
4842 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4843 FormatToken *FormatTok = Comments[i];
4844 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4845 FormatTok->ContinuesLineCommentSection = false;
4846 } else {
4847 FormatTok->ContinuesLineCommentSection = continuesLineCommentSection(
4848 FormatTok: *FormatTok, Line: *Line, Style, CommentPragmasRegex);
4849 }
4850 if (!FormatTok->ContinuesLineCommentSection &&
4851 (isOnNewLine(FormatTok: *FormatTok) || FormatTok->IsFirst)) {
4852 ShouldPushCommentsInCurrentLine = false;
4853 }
4854 if (ShouldPushCommentsInCurrentLine)
4855 pushToken(Tok: FormatTok);
4856 else
4857 CommentsBeforeNextToken.push_back(Elt: FormatTok);
4858 }
4859}
4860
4861void UnwrappedLineParser::readToken(int LevelDifference) {
4862 SmallVector<FormatToken *, 1> Comments;
4863 bool PreviousWasComment = false;
4864 bool FirstNonCommentOnLine = false;
4865 do {
4866 FormatTok = Tokens->getNextToken();
4867 assert(FormatTok);
4868 while (FormatTok->isOneOf(K1: TT_ConflictStart, K2: TT_ConflictEnd,
4869 Ks: TT_ConflictAlternative)) {
4870 if (FormatTok->is(TT: TT_ConflictStart))
4871 conditionalCompilationStart(/*Unreachable=*/false);
4872 else if (FormatTok->is(TT: TT_ConflictAlternative))
4873 conditionalCompilationAlternative();
4874 else if (FormatTok->is(TT: TT_ConflictEnd))
4875 conditionalCompilationEnd();
4876 FormatTok = Tokens->getNextToken();
4877 FormatTok->MustBreakBefore = true;
4878 FormatTok->MustBreakBeforeFinalized = true;
4879 }
4880
4881 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4882 const FormatToken &Tok,
4883 bool PreviousWasComment) {
4884 auto IsFirstOnLine = [](const FormatToken &Tok) {
4885 return Tok.HasUnescapedNewline || Tok.IsFirst;
4886 };
4887
4888 // Consider preprocessor directives preceded by block comments as first
4889 // on line.
4890 if (PreviousWasComment)
4891 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4892 return IsFirstOnLine(Tok);
4893 };
4894
4895 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4896 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4897 PreviousWasComment = FormatTok->is(Kind: tok::comment);
4898
4899 while (!Line->InPPDirective && FormatTok->is(Kind: tok::hash) &&
4900 FirstNonCommentOnLine) {
4901 // In Verilog, the backtick is used for macro invocations. In TableGen,
4902 // the single hash is used for the paste operator.
4903 const auto *Next = Tokens->peekNextToken();
4904 if ((Style.isVerilog() && !Keywords.isVerilogPPDirective(Tok: *Next)) ||
4905 (Style.isTableGen() &&
4906 Next->isNoneOf(Ks: tok::kw_else, Ks: tok::pp_define, Ks: tok::pp_ifdef,
4907 Ks: tok::pp_ifndef, Ks: tok::pp_endif))) {
4908 break;
4909 }
4910 distributeComments(Comments, NextTok: FormatTok);
4911 Comments.clear();
4912 // If there is an unfinished unwrapped line, we flush the preprocessor
4913 // directives only after that unwrapped line was finished later.
4914 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4915 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4916 assert((LevelDifference >= 0 ||
4917 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4918 "LevelDifference makes Line->Level negative");
4919 Line->Level += LevelDifference;
4920 // Comments stored before the preprocessor directive need to be output
4921 // before the preprocessor directive, at the same level as the
4922 // preprocessor directive, as we consider them to apply to the directive.
4923 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4924 PPBranchLevel > 0) {
4925 Line->Level += PPBranchLevel;
4926 }
4927 assert(Line->Level >= Line->UnbracedBodyLevel);
4928 Line->Level -= Line->UnbracedBodyLevel;
4929 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
4930 parsePPDirective();
4931 PreviousWasComment = FormatTok->is(Kind: tok::comment);
4932 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4933 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4934 }
4935
4936 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4937 !Line->InPPDirective) {
4938 continue;
4939 }
4940
4941 if (FormatTok->is(Kind: tok::identifier) &&
4942 Macros.defined(Name: FormatTok->TokenText) &&
4943 // FIXME: Allow expanding macros in preprocessor directives.
4944 !Line->InPPDirective) {
4945 FormatToken *ID = FormatTok;
4946 unsigned Position = Tokens->getPosition();
4947
4948 // To correctly parse the code, we need to replace the tokens of the macro
4949 // call with its expansion.
4950 auto PreCall = std::move(Line);
4951 Line.reset(p: new UnwrappedLine);
4952 bool OldInExpansion = InExpansion;
4953 InExpansion = true;
4954 // We parse the macro call into a new line.
4955 auto Args = parseMacroCall();
4956 InExpansion = OldInExpansion;
4957 assert(Line->Tokens.front().Tok == ID);
4958 // And remember the unexpanded macro call tokens.
4959 auto UnexpandedLine = std::move(Line);
4960 // Reset to the old line.
4961 Line = std::move(PreCall);
4962
4963 LLVM_DEBUG({
4964 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4965 if (Args) {
4966 llvm::dbgs() << "(";
4967 for (const auto &Arg : Args.value())
4968 for (const auto &T : Arg)
4969 llvm::dbgs() << T->TokenText << " ";
4970 llvm::dbgs() << ")";
4971 }
4972 llvm::dbgs() << "\n";
4973 });
4974 if (Macros.objectLike(Name: ID->TokenText) && Args &&
4975 !Macros.hasArity(Name: ID->TokenText, Arity: Args->size())) {
4976 // The macro is either
4977 // - object-like, but we got argumnets, or
4978 // - overloaded to be both object-like and function-like, but none of
4979 // the function-like arities match the number of arguments.
4980 // Thus, expand as object-like macro.
4981 LLVM_DEBUG(llvm::dbgs()
4982 << "Macro \"" << ID->TokenText
4983 << "\" not overloaded for arity " << Args->size()
4984 << "or not function-like, using object-like overload.");
4985 Args.reset();
4986 UnexpandedLine->Tokens.resize(new_size: 1);
4987 Tokens->setPosition(Position);
4988 nextToken();
4989 assert(!Args && Macros.objectLike(ID->TokenText));
4990 }
4991 if ((!Args && Macros.objectLike(Name: ID->TokenText)) ||
4992 (Args && Macros.hasArity(Name: ID->TokenText, Arity: Args->size()))) {
4993 // Next, we insert the expanded tokens in the token stream at the
4994 // current position, and continue parsing.
4995 Unexpanded[ID] = std::move(UnexpandedLine);
4996 SmallVector<FormatToken *, 8> Expansion =
4997 Macros.expand(ID, OptionalArgs: std::move(Args));
4998 if (!Expansion.empty())
4999 FormatTok = Tokens->insertTokens(Tokens: Expansion);
5000
5001 LLVM_DEBUG({
5002 llvm::dbgs() << "Expanded: ";
5003 for (const auto &T : Expansion)
5004 llvm::dbgs() << T->TokenText << " ";
5005 llvm::dbgs() << "\n";
5006 });
5007 } else {
5008 LLVM_DEBUG({
5009 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
5010 << "\", because it was used ";
5011 if (Args)
5012 llvm::dbgs() << "with " << Args->size();
5013 else
5014 llvm::dbgs() << "without";
5015 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
5016 });
5017 Tokens->setPosition(Position);
5018 FormatTok = ID;
5019 }
5020 }
5021
5022 if (FormatTok->isNot(Kind: tok::comment)) {
5023 distributeComments(Comments, NextTok: FormatTok);
5024 Comments.clear();
5025 return;
5026 }
5027
5028 Comments.push_back(Elt: FormatTok);
5029 } while (!eof());
5030
5031 distributeComments(Comments, NextTok: nullptr);
5032 Comments.clear();
5033}
5034
5035namespace {
5036template <typename Iterator>
5037void pushTokens(Iterator Begin, Iterator End,
5038 SmallVectorImpl<FormatToken *> &Into) {
5039 for (auto I = Begin; I != End; ++I) {
5040 Into.push_back(Elt: I->Tok);
5041 for (const auto &Child : I->Children)
5042 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
5043 }
5044}
5045} // namespace
5046
5047std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
5048UnwrappedLineParser::parseMacroCall() {
5049 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
5050 assert(Line->Tokens.empty());
5051 nextToken();
5052 if (FormatTok->isNot(Kind: tok::l_paren))
5053 return Args;
5054 unsigned Position = Tokens->getPosition();
5055 FormatToken *Tok = FormatTok;
5056 nextToken();
5057 Args.emplace();
5058 auto ArgStart = std::prev(x: Line->Tokens.end());
5059
5060 int Parens = 0;
5061 do {
5062 switch (FormatTok->Tok.getKind()) {
5063 case tok::l_paren:
5064 ++Parens;
5065 nextToken();
5066 break;
5067 case tok::r_paren: {
5068 if (Parens > 0) {
5069 --Parens;
5070 nextToken();
5071 break;
5072 }
5073 Args->push_back(Elt: {});
5074 pushTokens(Begin: std::next(x: ArgStart), End: Line->Tokens.end(), Into&: Args->back());
5075 nextToken();
5076 return Args;
5077 }
5078 case tok::comma: {
5079 if (Parens > 0) {
5080 nextToken();
5081 break;
5082 }
5083 Args->push_back(Elt: {});
5084 pushTokens(Begin: std::next(x: ArgStart), End: Line->Tokens.end(), Into&: Args->back());
5085 nextToken();
5086 ArgStart = std::prev(x: Line->Tokens.end());
5087 break;
5088 }
5089 default:
5090 nextToken();
5091 break;
5092 }
5093 } while (!eof());
5094 Line->Tokens.resize(new_size: 1);
5095 Tokens->setPosition(Position);
5096 FormatTok = Tok;
5097 return {};
5098}
5099
5100void UnwrappedLineParser::pushToken(FormatToken *Tok) {
5101 Line->Tokens.push_back(x: UnwrappedLineNode(Tok));
5102 if (AtEndOfPPLine) {
5103 auto &Tok = *Line->Tokens.back().Tok;
5104 Tok.MustBreakBefore = true;
5105 Tok.MustBreakBeforeFinalized = true;
5106 Tok.FirstAfterPPLine = true;
5107 AtEndOfPPLine = false;
5108 }
5109}
5110
5111} // end namespace format
5112} // end namespace clang
5113