1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#include "UnwrappedLineParser.h"
16#include "FormatToken.h"
17#include "FormatTokenSource.h"
18#include "Macros.h"
19#include "TokenAnnotator.h"
20#include "clang/Basic/TokenKinds.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/raw_os_ostream.h"
25#include "llvm/Support/raw_ostream.h"
26
27#include <utility>
28
29#define DEBUG_TYPE "format-parser"
30
31namespace clang {
32namespace format {
33
34namespace {
35
36void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
37 StringRef Prefix = "", bool PrintText = false) {
38 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
39 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
40 bool NewLine = false;
41 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
42 E = Line.Tokens.end();
43 I != E; ++I) {
44 if (NewLine) {
45 OS << Prefix;
46 NewLine = false;
47 }
48 OS << I->Tok->Tok.getName() << "["
49 << "T=" << (unsigned)I->Tok->getType()
50 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
51 << "\"] ";
52 for (const auto *CI = I->Children.begin(), *CE = I->Children.end();
53 CI != CE; ++CI) {
54 OS << "\n";
55 printLine(OS, Line: *CI, Prefix: (Prefix + " ").str());
56 NewLine = true;
57 }
58 }
59 if (!NewLine)
60 OS << "\n";
61}
62
63[[maybe_unused]] static void printDebugInfo(const UnwrappedLine &Line) {
64 printLine(OS&: llvm::dbgs(), Line);
65}
66
67class ScopedDeclarationState {
68public:
69 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
70 bool MustBeDeclaration)
71 : Line(Line), Stack(Stack) {
72 Line.MustBeDeclaration = MustBeDeclaration;
73 Stack.push_back(Val: MustBeDeclaration);
74 }
75 ~ScopedDeclarationState() {
76 Stack.pop_back();
77 if (!Stack.empty())
78 Line.MustBeDeclaration = Stack.back();
79 else
80 Line.MustBeDeclaration = true;
81 }
82
83private:
84 UnwrappedLine &Line;
85 llvm::BitVector &Stack;
86};
87
88} // end anonymous namespace
89
90std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
91 llvm::raw_os_ostream OS(Stream);
92 printLine(OS, Line);
93 return Stream;
94}
95
96class ScopedLineState {
97public:
98 ScopedLineState(UnwrappedLineParser &Parser,
99 bool SwitchToPreprocessorLines = false)
100 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
101 if (SwitchToPreprocessorLines)
102 Parser.CurrentLines = &Parser.PreprocessorDirectives;
103 else if (!Parser.Line->Tokens.empty())
104 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
105 PreBlockLine = std::move(Parser.Line);
106 Parser.Line = std::make_unique<UnwrappedLine>();
107 Parser.Line->Level = PreBlockLine->Level;
108 Parser.Line->PPLevel = PreBlockLine->PPLevel;
109 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
110 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
111 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel;
112 }
113
114 ~ScopedLineState() {
115 if (!Parser.Line->Tokens.empty())
116 Parser.addUnwrappedLine();
117 assert(Parser.Line->Tokens.empty());
118 Parser.Line = std::move(PreBlockLine);
119 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
120 Parser.AtEndOfPPLine = true;
121 Parser.CurrentLines = OriginalLines;
122 }
123
124private:
125 UnwrappedLineParser &Parser;
126
127 std::unique_ptr<UnwrappedLine> PreBlockLine;
128 SmallVectorImpl<UnwrappedLine> *OriginalLines;
129};
130
131class CompoundStatementIndenter {
132public:
133 CompoundStatementIndenter(UnwrappedLineParser *Parser,
134 const FormatStyle &Style, unsigned &LineLevel)
135 : CompoundStatementIndenter(Parser, LineLevel,
136 Style.BraceWrapping.AfterControlStatement ==
137 FormatStyle::BWACS_Always,
138 Style.BraceWrapping.IndentBraces) {}
139 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
140 bool WrapBrace, bool IndentBrace)
141 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
142 if (WrapBrace)
143 Parser->addUnwrappedLine();
144 if (IndentBrace)
145 ++LineLevel;
146 }
147 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
148
149private:
150 unsigned &LineLevel;
151 unsigned OldLineLevel;
152};
153
154UnwrappedLineParser::UnwrappedLineParser(
155 SourceManager &SourceMgr, const FormatStyle &Style,
156 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
157 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
158 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
159 IdentifierTable &IdentTable)
160 : Line(new UnwrappedLine), AtEndOfPPLine(false), CurrentLines(&Lines),
161 Style(Style), IsCpp(Style.isCpp()),
162 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords),
163 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
164 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
165 IncludeGuard(getIncludeGuardState(Style: Style.IndentPPDirectives)),
166 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
167 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
168
169void UnwrappedLineParser::reset() {
170 PPBranchLevel = -1;
171 IncludeGuard = getIncludeGuardState(Style: Style.IndentPPDirectives);
172 IncludeGuardToken = nullptr;
173 Line.reset(p: new UnwrappedLine);
174 CommentsBeforeNextToken.clear();
175 FormatTok = nullptr;
176 AtEndOfPPLine = false;
177 IsDecltypeAutoFunction = false;
178 PreprocessorDirectives.clear();
179 CurrentLines = &Lines;
180 DeclarationScopeStack.clear();
181 NestedTooDeep.clear();
182 NestedLambdas.clear();
183 PPStack.clear();
184 Line->FirstStartColumn = FirstStartColumn;
185
186 if (!Unexpanded.empty())
187 for (FormatToken *Token : AllTokens)
188 Token->MacroCtx.reset();
189 CurrentExpandedLines.clear();
190 ExpandedLines.clear();
191 Unexpanded.clear();
192 InExpansion = false;
193 Reconstruct.reset();
194}
195
196void UnwrappedLineParser::parse() {
197 IndexedTokenSource TokenSource(AllTokens);
198 Line->FirstStartColumn = FirstStartColumn;
199 do {
200 LLVM_DEBUG(llvm::dbgs() << "----\n");
201 reset();
202 Tokens = &TokenSource;
203 TokenSource.reset();
204
205 readToken();
206 parseFile();
207
208 // If we found an include guard then all preprocessor directives (other than
209 // the guard) are over-indented by one.
210 if (IncludeGuard == IG_Found) {
211 for (auto &Line : Lines)
212 if (Line.InPPDirective && Line.Level > 0)
213 --Line.Level;
214 }
215
216 // Create line with eof token.
217 assert(eof());
218 pushToken(Tok: FormatTok);
219 addUnwrappedLine();
220
221 // In a first run, format everything with the lines containing macro calls
222 // replaced by the expansion.
223 if (!ExpandedLines.empty()) {
224 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
225 for (const auto &Line : Lines) {
226 if (!Line.Tokens.empty()) {
227 auto it = ExpandedLines.find(Val: Line.Tokens.begin()->Tok);
228 if (it != ExpandedLines.end()) {
229 for (const auto &Expanded : it->second) {
230 LLVM_DEBUG(printDebugInfo(Expanded));
231 Callback.consumeUnwrappedLine(Line: Expanded);
232 }
233 continue;
234 }
235 }
236 LLVM_DEBUG(printDebugInfo(Line));
237 Callback.consumeUnwrappedLine(Line);
238 }
239 Callback.finishRun();
240 }
241
242 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
243 for (const UnwrappedLine &Line : Lines) {
244 LLVM_DEBUG(printDebugInfo(Line));
245 Callback.consumeUnwrappedLine(Line);
246 }
247 Callback.finishRun();
248 Lines.clear();
249 while (!PPLevelBranchIndex.empty() &&
250 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
251 PPLevelBranchIndex.resize(N: PPLevelBranchIndex.size() - 1);
252 PPLevelBranchCount.resize(N: PPLevelBranchCount.size() - 1);
253 }
254 if (!PPLevelBranchIndex.empty()) {
255 ++PPLevelBranchIndex.back();
256 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
257 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
258 }
259 } while (!PPLevelBranchIndex.empty());
260}
261
262void UnwrappedLineParser::parseFile() {
263 // The top-level context in a file always has declarations, except for pre-
264 // processor directives and JavaScript files.
265 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
266 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
267 MustBeDeclaration);
268 if (Style.isTextProto() || (Style.isJson() && FormatTok->IsFirst))
269 parseBracedList();
270 else
271 parseLevel();
272 // Make sure to format the remaining tokens.
273 //
274 // LK_TextProto is special since its top-level is parsed as the body of a
275 // braced list, which does not necessarily have natural line separators such
276 // as a semicolon. Comments after the last entry that have been determined to
277 // not belong to that line, as in:
278 // key: value
279 // // endfile comment
280 // do not have a chance to be put on a line of their own until this point.
281 // Here we add this newline before end-of-file comments.
282 if (Style.isTextProto() && !CommentsBeforeNextToken.empty())
283 addUnwrappedLine();
284 flushComments(NewlineBeforeNext: true);
285 addUnwrappedLine();
286}
287
288void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
289 do {
290 switch (FormatTok->Tok.getKind()) {
291 case tok::l_brace:
292 case tok::semi:
293 return;
294 default:
295 if (FormatTok->is(II: Keywords.kw_where)) {
296 addUnwrappedLine();
297 nextToken();
298 parseCSharpGenericTypeConstraint();
299 break;
300 }
301 nextToken();
302 break;
303 }
304 } while (!eof());
305}
306
307void UnwrappedLineParser::parseCSharpAttribute() {
308 int UnpairedSquareBrackets = 1;
309 do {
310 switch (FormatTok->Tok.getKind()) {
311 case tok::r_square:
312 nextToken();
313 --UnpairedSquareBrackets;
314 if (UnpairedSquareBrackets == 0) {
315 addUnwrappedLine();
316 return;
317 }
318 break;
319 case tok::l_square:
320 ++UnpairedSquareBrackets;
321 nextToken();
322 break;
323 default:
324 nextToken();
325 break;
326 }
327 } while (!eof());
328}
329
330bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
331 if (!Lines.empty() && Lines.back().InPPDirective)
332 return true;
333
334 const FormatToken *Previous = Tokens->getPreviousToken();
335 return Previous && Previous->is(Kind: tok::comment) &&
336 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
337}
338
339/// Parses a level, that is ???.
340/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
341/// \param IfKind The \p if statement kind in the level.
342/// \param IfLeftBrace The left brace of the \p if block in the level.
343/// \returns true if a simple block of if/else/for/while, or false otherwise.
344/// (A simple block has a single statement.)
345bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
346 IfStmtKind *IfKind,
347 FormatToken **IfLeftBrace) {
348 const bool InRequiresExpression =
349 OpeningBrace && OpeningBrace->is(TT: TT_RequiresExpressionLBrace);
350 const bool IsPrecededByCommentOrPPDirective =
351 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
352 FormatToken *IfLBrace = nullptr;
353 bool HasDoWhile = false;
354 bool HasLabel = false;
355 unsigned StatementCount = 0;
356 bool SwitchLabelEncountered = false;
357
358 do {
359 if (FormatTok->isAttribute()) {
360 nextToken();
361 if (FormatTok->is(Kind: tok::l_paren))
362 parseParens();
363 continue;
364 }
365 tok::TokenKind Kind = FormatTok->Tok.getKind();
366 if (FormatTok->is(TT: TT_MacroBlockBegin))
367 Kind = tok::l_brace;
368 else if (FormatTok->is(TT: TT_MacroBlockEnd))
369 Kind = tok::r_brace;
370
371 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
372 &HasLabel, &StatementCount] {
373 parseStructuralElement(OpeningBrace, IfKind, IfLeftBrace: &IfLBrace,
374 HasDoWhile: HasDoWhile ? nullptr : &HasDoWhile,
375 HasLabel: HasLabel ? nullptr : &HasLabel);
376 ++StatementCount;
377 assert(StatementCount > 0 && "StatementCount overflow!");
378 };
379
380 switch (Kind) {
381 case tok::comment:
382 nextToken();
383 addUnwrappedLine();
384 break;
385 case tok::l_brace:
386 if (InRequiresExpression) {
387 FormatTok->setFinalizedType(TT_CompoundRequirementLBrace);
388 } else if (FormatTok->Previous &&
389 FormatTok->Previous->ClosesRequiresClause) {
390 // We need the 'default' case here to correctly parse a function
391 // l_brace.
392 ParseDefault();
393 continue;
394 }
395 if (!InRequiresExpression && FormatTok->isNot(Kind: TT_MacroBlockBegin)) {
396 if (tryToParseBracedList())
397 continue;
398 FormatTok->setFinalizedType(TT_BlockLBrace);
399 }
400 parseBlock();
401 ++StatementCount;
402 assert(StatementCount > 0 && "StatementCount overflow!");
403 addUnwrappedLine();
404 break;
405 case tok::r_brace:
406 if (OpeningBrace) {
407 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
408 OpeningBrace->isNoneOf(Ks: TT_ControlStatementLBrace, Ks: TT_ElseLBrace)) {
409 return false;
410 }
411 if (FormatTok->isNot(Kind: tok::r_brace) || StatementCount != 1 || HasLabel ||
412 HasDoWhile || IsPrecededByCommentOrPPDirective ||
413 precededByCommentOrPPDirective()) {
414 return false;
415 }
416 const FormatToken *Next = Tokens->peekNextToken();
417 if (Next->is(Kind: tok::comment) && Next->NewlinesBefore == 0)
418 return false;
419 if (IfLeftBrace)
420 *IfLeftBrace = IfLBrace;
421 return true;
422 }
423 nextToken();
424 addUnwrappedLine();
425 break;
426 case tok::kw_default: {
427 unsigned StoredPosition = Tokens->getPosition();
428 auto *Next = Tokens->getNextNonComment();
429 FormatTok = Tokens->setPosition(StoredPosition);
430 if (Next->isNoneOf(Ks: tok::colon, Ks: tok::arrow)) {
431 // default not followed by `:` or `->` is not a case label; treat it
432 // like an identifier.
433 parseStructuralElement();
434 break;
435 }
436 // Else, if it is 'default:', fall through to the case handling.
437 [[fallthrough]];
438 }
439 case tok::kw_case:
440 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
441 (Style.isJavaScript() && Line->MustBeDeclaration)) {
442 // Proto: there are no switch/case statements
443 // Verilog: Case labels don't have this word. We handle case
444 // labels including default in TokenAnnotator.
445 // JavaScript: A 'case: string' style field declaration.
446 ParseDefault();
447 break;
448 }
449 if (!SwitchLabelEncountered &&
450 (Style.IndentCaseLabels ||
451 (OpeningBrace && OpeningBrace->is(TT: TT_SwitchExpressionLBrace)) ||
452 (Line->InPPDirective && Line->Level == 1))) {
453 ++Line->Level;
454 }
455 SwitchLabelEncountered = true;
456 parseStructuralElement();
457 break;
458 case tok::l_square:
459 if (Style.isCSharp()) {
460 nextToken();
461 parseCSharpAttribute();
462 break;
463 }
464 if (handleCppAttributes())
465 break;
466 [[fallthrough]];
467 default:
468 ParseDefault();
469 break;
470 }
471 } while (!eof());
472
473 return false;
474}
475
476void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
477 // We'll parse forward through the tokens until we hit
478 // a closing brace or eof - note that getNextToken() will
479 // parse macros, so this will magically work inside macro
480 // definitions, too.
481 unsigned StoredPosition = Tokens->getPosition();
482 FormatToken *Tok = FormatTok;
483 const FormatToken *PrevTok = Tok->Previous;
484 // Keep a stack of positions of lbrace tokens. We will
485 // update information about whether an lbrace starts a
486 // braced init list or a different block during the loop.
487 struct StackEntry {
488 FormatToken *Tok;
489 const FormatToken *PrevTok;
490 };
491 SmallVector<StackEntry, 8> LBraceStack;
492 assert(Tok->is(tok::l_brace));
493
494 do {
495 auto *NextTok = Tokens->getNextNonComment();
496
497 if (!Line->InMacroBody && !Style.isTableGen()) {
498 // Skip PPDirective lines (except macro definitions) and comments.
499 while (NextTok->is(Kind: tok::hash)) {
500 NextTok = Tokens->getNextToken();
501 if (NextTok->isOneOf(K1: tok::pp_not_keyword, K2: tok::pp_define))
502 break;
503 do {
504 NextTok = Tokens->getNextToken();
505 } while (!NextTok->HasUnescapedNewline && NextTok->isNot(Kind: tok::eof));
506
507 while (NextTok->is(Kind: tok::comment))
508 NextTok = Tokens->getNextToken();
509 }
510 }
511
512 switch (Tok->Tok.getKind()) {
513 case tok::l_brace:
514 if (Style.isJavaScript() && PrevTok) {
515 if (PrevTok->isOneOf(K1: tok::colon, K2: tok::less)) {
516 // A ':' indicates this code is in a type, or a braced list
517 // following a label in an object literal ({a: {b: 1}}).
518 // A '<' could be an object used in a comparison, but that is nonsense
519 // code (can never return true), so more likely it is a generic type
520 // argument (`X<{a: string; b: number}>`).
521 // The code below could be confused by semicolons between the
522 // individual members in a type member list, which would normally
523 // trigger BK_Block. In both cases, this must be parsed as an inline
524 // braced init.
525 Tok->setBlockKind(BK_BracedInit);
526 } else if (PrevTok->is(Kind: tok::r_paren)) {
527 // `) { }` can only occur in function or method declarations in JS.
528 Tok->setBlockKind(BK_Block);
529 }
530 } else if (Style.isJava() && PrevTok && PrevTok->is(Kind: tok::arrow)) {
531 Tok->setBlockKind(BK_Block);
532 } else {
533 Tok->setBlockKind(BK_Unknown);
534 }
535 LBraceStack.push_back(Elt: {.Tok: Tok, .PrevTok: PrevTok});
536 break;
537 case tok::r_brace:
538 if (LBraceStack.empty())
539 break;
540 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BBK: BK_Unknown)) {
541 bool ProbablyBracedList = false;
542 if (Style.Language == FormatStyle::LK_Proto) {
543 ProbablyBracedList = NextTok->isOneOf(K1: tok::comma, K2: tok::r_square);
544 } else if (LBrace->isNot(Kind: TT_EnumLBrace)) {
545 // Using OriginalColumn to distinguish between ObjC methods and
546 // binary operators is a bit hacky.
547 bool NextIsObjCMethod = NextTok->isOneOf(K1: tok::plus, K2: tok::minus) &&
548 NextTok->OriginalColumn == 0;
549
550 // Try to detect a braced list. Note that regardless how we mark inner
551 // braces here, we will overwrite the BlockKind later if we parse a
552 // braced list (where all blocks inside are by default braced lists),
553 // or when we explicitly detect blocks (for example while parsing
554 // lambdas).
555
556 // If we already marked the opening brace as braced list, the closing
557 // must also be part of it.
558 ProbablyBracedList = LBrace->is(TT: TT_BracedListLBrace);
559
560 ProbablyBracedList = ProbablyBracedList ||
561 (Style.isJavaScript() &&
562 NextTok->isOneOf(K1: Keywords.kw_of, K2: Keywords.kw_in,
563 Ks: Keywords.kw_as));
564 ProbablyBracedList =
565 ProbablyBracedList ||
566 (IsCpp && (PrevTok->Tok.isLiteral() ||
567 NextTok->isOneOf(K1: tok::l_paren, K2: tok::arrow)));
568
569 // If there is a comma, semicolon or right paren after the closing
570 // brace, we assume this is a braced initializer list.
571 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
572 // braced list in JS.
573 ProbablyBracedList =
574 ProbablyBracedList ||
575 NextTok->isOneOf(K1: tok::comma, K2: tok::period, Ks: tok::colon,
576 Ks: tok::r_paren, Ks: tok::r_square, Ks: tok::ellipsis);
577
578 // Distinguish between braced list in a constructor initializer list
579 // followed by constructor body, or just adjacent blocks.
580 ProbablyBracedList =
581 ProbablyBracedList ||
582 (NextTok->is(Kind: tok::l_brace) && LBraceStack.back().PrevTok &&
583 LBraceStack.back().PrevTok->isOneOf(K1: tok::identifier,
584 K2: tok::greater));
585
586 ProbablyBracedList =
587 ProbablyBracedList ||
588 (NextTok->is(Kind: tok::identifier) &&
589 PrevTok->isNoneOf(Ks: tok::semi, Ks: tok::r_brace, Ks: tok::l_brace));
590
591 ProbablyBracedList = ProbablyBracedList ||
592 (NextTok->is(Kind: tok::semi) &&
593 (!ExpectClassBody || LBraceStack.size() != 1));
594
595 ProbablyBracedList =
596 ProbablyBracedList ||
597 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
598
599 if (!Style.isCSharp() && NextTok->is(Kind: tok::l_square)) {
600 // We can have an array subscript after a braced init
601 // list, but C++11 attributes are expected after blocks.
602 NextTok = Tokens->getNextToken();
603 ProbablyBracedList = NextTok->isNot(Kind: tok::l_square);
604 }
605
606 // Cpp macro definition body that is a nonempty braced list or block:
607 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
608 !FormatTok->Previous && NextTok->is(Kind: tok::eof) &&
609 // A statement can end with only `;` (simple statement), a block
610 // closing brace (compound statement), or `:` (label statement).
611 // If PrevTok is a block opening brace, Tok ends an empty block.
612 PrevTok->isNoneOf(Ks: tok::semi, Ks: BK_Block, Ks: tok::colon)) {
613 ProbablyBracedList = true;
614 }
615 }
616 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block;
617 Tok->setBlockKind(BlockKind);
618 LBrace->setBlockKind(BlockKind);
619 }
620 LBraceStack.pop_back();
621 break;
622 case tok::identifier:
623 if (Tok->isNot(Kind: TT_StatementMacro))
624 break;
625 [[fallthrough]];
626 case tok::at:
627 case tok::semi:
628 case tok::kw_if:
629 case tok::kw_while:
630 case tok::kw_for:
631 case tok::kw_switch:
632 case tok::kw_try:
633 case tok::kw___try:
634 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BBK: BK_Unknown))
635 LBraceStack.back().Tok->setBlockKind(BK_Block);
636 break;
637 default:
638 break;
639 }
640
641 PrevTok = Tok;
642 Tok = NextTok;
643 } while (Tok->isNot(Kind: tok::eof) && !LBraceStack.empty());
644
645 // Assume other blocks for all unclosed opening braces.
646 for (const auto &Entry : LBraceStack)
647 if (Entry.Tok->is(BBK: BK_Unknown))
648 Entry.Tok->setBlockKind(BK_Block);
649
650 FormatTok = Tokens->setPosition(StoredPosition);
651}
652
653// Sets the token type of the directly previous right brace.
654void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
655 if (auto Prev = FormatTok->getPreviousNonComment();
656 Prev && Prev->is(Kind: tok::r_brace)) {
657 Prev->setFinalizedType(Type);
658 }
659}
660
661template <class T>
662static inline void hash_combine(std::size_t &seed, const T &v) {
663 std::hash<T> hasher;
664 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
665}
666
667size_t UnwrappedLineParser::computePPHash() const {
668 size_t h = 0;
669 for (const auto &i : PPStack) {
670 hash_combine(seed&: h, v: size_t(i.Kind));
671 hash_combine(seed&: h, v: i.Line);
672 }
673 return h;
674}
675
676// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
677// is not null, subtracts its length (plus the preceding space) when computing
678// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
679// running the token annotator on it so that we can restore them afterward.
680bool UnwrappedLineParser::mightFitOnOneLine(
681 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
682 const auto ColumnLimit = Style.ColumnLimit;
683 if (ColumnLimit == 0)
684 return true;
685
686 auto &Tokens = ParsedLine.Tokens;
687 assert(!Tokens.empty());
688
689 const auto *LastToken = Tokens.back().Tok;
690 assert(LastToken);
691
692 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
693
694 int Index = 0;
695 for (const auto &Token : Tokens) {
696 assert(Token.Tok);
697 auto &SavedToken = SavedTokens[Index++];
698 SavedToken.Tok = new FormatToken;
699 SavedToken.Tok->copyFrom(Tok: *Token.Tok);
700 SavedToken.Children = std::move(Token.Children);
701 }
702
703 AnnotatedLine Line(ParsedLine);
704 assert(Line.Last == LastToken);
705
706 TokenAnnotator Annotator(Style, Keywords);
707 Annotator.annotate(Line);
708 Annotator.calculateFormattingInformation(Line);
709
710 auto Length = LastToken->TotalLength;
711 if (OpeningBrace) {
712 assert(OpeningBrace != Tokens.front().Tok);
713 if (auto Prev = OpeningBrace->Previous;
714 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
715 Length -= ColumnLimit;
716 }
717 Length -= OpeningBrace->TokenText.size() + 1;
718 }
719
720 if (const auto *FirstToken = Line.First; FirstToken->is(Kind: tok::r_brace)) {
721 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
722 Length -= FirstToken->TokenText.size() + 1;
723 }
724
725 Index = 0;
726 for (auto &Token : Tokens) {
727 const auto &SavedToken = SavedTokens[Index++];
728 Token.Tok->copyFrom(Tok: *SavedToken.Tok);
729 Token.Children = std::move(SavedToken.Children);
730 delete SavedToken.Tok;
731 }
732
733 // If these change PPLevel needs to be used for get correct indentation.
734 assert(!Line.InMacroBody);
735 assert(!Line.InPPDirective);
736 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
737}
738
739FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
740 unsigned AddLevels, bool MunchSemi,
741 bool KeepBraces,
742 IfStmtKind *IfKind,
743 bool UnindentWhitesmithsBraces) {
744 auto HandleVerilogBlockLabel = [this]() {
745 // ":" name
746 if (Style.isVerilog() && FormatTok->is(Kind: tok::colon)) {
747 nextToken();
748 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
749 nextToken();
750 }
751 };
752
753 // Whether this is a Verilog-specific block that has a special header like a
754 // module.
755 const bool VerilogHierarchy =
756 Style.isVerilog() && Keywords.isVerilogHierarchy(Tok: *FormatTok);
757 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
758 (Style.isVerilog() &&
759 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
760 "'{' or macro block token expected");
761 FormatToken *Tok = FormatTok;
762 const bool FollowedByComment = Tokens->peekNextToken()->is(Kind: tok::comment);
763 auto Index = CurrentLines->size();
764 const bool MacroBlock = FormatTok->is(TT: TT_MacroBlockBegin);
765 FormatTok->setBlockKind(BK_Block);
766
767 const bool IsWhitesmiths =
768 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
769
770 // For Whitesmiths mode, jump to the next level prior to skipping over the
771 // braces.
772 if (!VerilogHierarchy && AddLevels > 0 && IsWhitesmiths)
773 ++Line->Level;
774
775 size_t PPStartHash = computePPHash();
776
777 const unsigned InitialLevel = Line->Level;
778 if (VerilogHierarchy) {
779 AddLevels += parseVerilogHierarchyHeader();
780 } else {
781 nextToken(/*LevelDifference=*/AddLevels);
782 HandleVerilogBlockLabel();
783 }
784
785 // Bail out if there are too many levels. Otherwise, the stack might overflow.
786 if (Line->Level > 300)
787 return nullptr;
788
789 if (MacroBlock && FormatTok->is(Kind: tok::l_paren))
790 parseParens();
791
792 size_t NbPreprocessorDirectives =
793 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
794 addUnwrappedLine();
795 size_t OpeningLineIndex =
796 CurrentLines->empty()
797 ? (UnwrappedLine::kInvalidIndex)
798 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
799
800 // Whitesmiths is weird here. The brace needs to be indented for the namespace
801 // block, but the block itself may not be indented depending on the style
802 // settings. This allows the format to back up one level in those cases.
803 if (UnindentWhitesmithsBraces)
804 --Line->Level;
805
806 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
807 MustBeDeclaration);
808
809 // Whitesmiths logic has already added a level by this point, so avoid
810 // adding it twice.
811 if (AddLevels > 0u)
812 Line->Level += AddLevels - (IsWhitesmiths ? 1 : 0);
813
814 FormatToken *IfLBrace = nullptr;
815 const bool SimpleBlock = parseLevel(OpeningBrace: Tok, IfKind, IfLeftBrace: &IfLBrace);
816
817 if (eof())
818 return IfLBrace;
819
820 if (MacroBlock ? FormatTok->isNot(Kind: TT_MacroBlockEnd)
821 : FormatTok->isNot(Kind: tok::r_brace)) {
822 Line->Level = InitialLevel;
823 FormatTok->setBlockKind(BK_Block);
824 return IfLBrace;
825 }
826
827 if (FormatTok->is(Kind: tok::r_brace)) {
828 FormatTok->setBlockKind(BK_Block);
829 if (Tok->is(TT: TT_NamespaceLBrace))
830 FormatTok->setFinalizedType(TT_NamespaceRBrace);
831 }
832
833 const bool IsFunctionRBrace =
834 FormatTok->is(Kind: tok::r_brace) && Tok->is(TT: TT_FunctionLBrace);
835
836 auto RemoveBraces = [=]() mutable {
837 if (!SimpleBlock)
838 return false;
839 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
840 assert(FormatTok->is(tok::r_brace));
841 const bool WrappedOpeningBrace = !Tok->Previous;
842 if (WrappedOpeningBrace && FollowedByComment)
843 return false;
844 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
845 if (KeepBraces && !HasRequiredIfBraces)
846 return false;
847 if (Tok->isNot(Kind: TT_ElseLBrace) || !HasRequiredIfBraces) {
848 const FormatToken *Previous = Tokens->getPreviousToken();
849 assert(Previous);
850 if (Previous->is(Kind: tok::r_brace) && !Previous->Optional)
851 return false;
852 }
853 assert(!CurrentLines->empty());
854 auto &LastLine = CurrentLines->back();
855 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(ParsedLine&: LastLine))
856 return false;
857 if (Tok->is(TT: TT_ElseLBrace))
858 return true;
859 if (WrappedOpeningBrace) {
860 assert(Index > 0);
861 --Index; // The line above the wrapped l_brace.
862 Tok = nullptr;
863 }
864 return mightFitOnOneLine(ParsedLine&: (*CurrentLines)[Index], OpeningBrace: Tok);
865 };
866 if (RemoveBraces()) {
867 Tok->MatchingParen = FormatTok;
868 FormatTok->MatchingParen = Tok;
869 }
870
871 size_t PPEndHash = computePPHash();
872
873 // Munch the closing brace.
874 nextToken(/*LevelDifference=*/-AddLevels);
875
876 // When this is a function block and there is an unnecessary semicolon
877 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
878 // it later).
879 if (Style.RemoveSemicolon && IsFunctionRBrace) {
880 while (FormatTok->is(Kind: tok::semi)) {
881 FormatTok->Optional = true;
882 nextToken();
883 }
884 }
885
886 HandleVerilogBlockLabel();
887
888 if (MacroBlock && FormatTok->is(Kind: tok::l_paren))
889 parseParens();
890
891 Line->Level = InitialLevel;
892
893 if (FormatTok->is(Kind: tok::kw_noexcept)) {
894 // A noexcept in a requires expression.
895 nextToken();
896 }
897
898 if (FormatTok->is(Kind: tok::arrow)) {
899 // Following the } or noexcept we can find a trailing return type arrow
900 // as part of an implicit conversion constraint.
901 nextToken();
902 parseStructuralElement();
903 }
904
905 if (MunchSemi && FormatTok->is(Kind: tok::semi))
906 nextToken();
907
908 if (PPStartHash == PPEndHash) {
909 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
910 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
911 // Update the opening line to add the forward reference as well
912 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
913 CurrentLines->size() - 1;
914 }
915 }
916
917 return IfLBrace;
918}
919
920static bool isGoogScope(const UnwrappedLine &Line) {
921 // FIXME: Closure-library specific stuff should not be hard-coded but be
922 // configurable.
923 if (Line.Tokens.size() < 4)
924 return false;
925 auto I = Line.Tokens.begin();
926 if (I->Tok->TokenText != "goog")
927 return false;
928 ++I;
929 if (I->Tok->isNot(Kind: tok::period))
930 return false;
931 ++I;
932 if (I->Tok->TokenText != "scope")
933 return false;
934 ++I;
935 return I->Tok->is(Kind: tok::l_paren);
936}
937
938static bool isIIFE(const UnwrappedLine &Line,
939 const AdditionalKeywords &Keywords) {
940 // Look for the start of an immediately invoked anonymous function.
941 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
942 // This is commonly done in JavaScript to create a new, anonymous scope.
943 // Example: (function() { ... })()
944 if (Line.Tokens.size() < 3)
945 return false;
946 auto I = Line.Tokens.begin();
947 if (I->Tok->isNot(Kind: tok::l_paren))
948 return false;
949 ++I;
950 if (I->Tok->isNot(Kind: Keywords.kw_function))
951 return false;
952 ++I;
953 return I->Tok->is(Kind: tok::l_paren);
954}
955
956static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
957 const FormatToken &InitialToken,
958 bool IsEmptyBlock,
959 bool IsJavaRecord = false) {
960 if (IsJavaRecord)
961 return Style.BraceWrapping.AfterClass;
962
963 tok::TokenKind Kind = InitialToken.Tok.getKind();
964 if (InitialToken.is(TT: TT_NamespaceMacro))
965 Kind = tok::kw_namespace;
966
967 const bool WrapRecordAllowed =
968 !IsEmptyBlock ||
969 Style.AllowShortRecordOnASingleLine < FormatStyle::SRS_Empty ||
970 Style.BraceWrapping.SplitEmptyRecord;
971
972 switch (Kind) {
973 case tok::kw_namespace:
974 return Style.BraceWrapping.AfterNamespace;
975 case tok::kw_class:
976 return Style.BraceWrapping.AfterClass && WrapRecordAllowed;
977 case tok::kw_union:
978 return Style.BraceWrapping.AfterUnion && WrapRecordAllowed;
979 case tok::kw_struct:
980 return Style.BraceWrapping.AfterStruct && WrapRecordAllowed;
981 case tok::kw_enum:
982 return Style.BraceWrapping.AfterEnum;
983 default:
984 return false;
985 }
986}
987
988void UnwrappedLineParser::parseChildBlock() {
989 assert(FormatTok->is(tok::l_brace));
990 FormatTok->setBlockKind(BK_Block);
991 const FormatToken *OpeningBrace = FormatTok;
992 nextToken();
993 {
994 bool SkipIndent = (Style.isJavaScript() &&
995 (isGoogScope(Line: *Line) || isIIFE(Line: *Line, Keywords)));
996 ScopedLineState LineState(*this);
997 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
998 /*MustBeDeclaration=*/false);
999 Line->Level += SkipIndent ? 0 : 1;
1000 parseLevel(OpeningBrace);
1001 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
1002 Line->Level -= SkipIndent ? 0 : 1;
1003 }
1004 nextToken();
1005}
1006
1007void UnwrappedLineParser::parsePPDirective() {
1008 assert(FormatTok->is(tok::hash) && "'#' expected");
1009 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
1010
1011 nextToken();
1012
1013 if (!FormatTok->Tok.getIdentifierInfo()) {
1014 parsePPUnknown();
1015 return;
1016 }
1017
1018 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1019 case tok::pp_define:
1020 parsePPDefine();
1021 return;
1022 case tok::pp_if:
1023 parsePPIf(/*IfDef=*/false);
1024 break;
1025 case tok::pp_ifdef:
1026 case tok::pp_ifndef:
1027 parsePPIf(/*IfDef=*/true);
1028 break;
1029 case tok::pp_else:
1030 case tok::pp_elifdef:
1031 case tok::pp_elifndef:
1032 case tok::pp_elif:
1033 parsePPElse();
1034 break;
1035 case tok::pp_endif:
1036 parsePPEndIf();
1037 break;
1038 case tok::pp_pragma:
1039 parsePPPragma();
1040 break;
1041 case tok::pp_error:
1042 case tok::pp_warning:
1043 nextToken();
1044 if (!eof() && Style.isCpp())
1045 FormatTok->setFinalizedType(TT_AfterPPDirective);
1046 [[fallthrough]];
1047 default:
1048 parsePPUnknown();
1049 break;
1050 }
1051}
1052
1053void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1054 size_t Line = CurrentLines->size();
1055 if (CurrentLines == &PreprocessorDirectives)
1056 Line += Lines.size();
1057
1058 if (Unreachable ||
1059 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1060 PPStack.push_back(Elt: {PP_Unreachable, Line});
1061 } else {
1062 PPStack.push_back(Elt: {PP_Conditional, Line});
1063 }
1064}
1065
1066void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1067 ++PPBranchLevel;
1068 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1069 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1070 PPLevelBranchIndex.push_back(Elt: 0);
1071 PPLevelBranchCount.push_back(Elt: 0);
1072 }
1073 PPChainBranchIndex.push(x: Unreachable ? -1 : 0);
1074 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1075 conditionalCompilationCondition(Unreachable: Unreachable || Skip);
1076}
1077
1078void UnwrappedLineParser::conditionalCompilationAlternative() {
1079 if (!PPStack.empty())
1080 PPStack.pop_back();
1081 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1082 if (!PPChainBranchIndex.empty())
1083 ++PPChainBranchIndex.top();
1084 conditionalCompilationCondition(
1085 Unreachable: PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1086 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1087}
1088
1089void UnwrappedLineParser::conditionalCompilationEnd() {
1090 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1091 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1092 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1093 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1094 }
1095 // Guard against #endif's without #if.
1096 if (PPBranchLevel > -1)
1097 --PPBranchLevel;
1098 if (!PPChainBranchIndex.empty())
1099 PPChainBranchIndex.pop();
1100 if (!PPStack.empty())
1101 PPStack.pop_back();
1102}
1103
1104void UnwrappedLineParser::parsePPIf(bool IfDef) {
1105 bool IfNDef = FormatTok->is(Kind: tok::pp_ifndef);
1106 nextToken();
1107 bool Unreachable = false;
1108 if (!IfDef && (FormatTok->is(Kind: tok::kw_false) || FormatTok->TokenText == "0"))
1109 Unreachable = true;
1110 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1111 Unreachable = true;
1112 conditionalCompilationStart(Unreachable);
1113 FormatToken *IfCondition = FormatTok;
1114 // If there's a #ifndef on the first line, and the only lines before it are
1115 // comments, it could be an include guard.
1116 bool MaybeIncludeGuard = IfNDef;
1117 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1118 for (auto &Line : Lines) {
1119 if (Line.Tokens.front().Tok->isNot(Kind: tok::comment)) {
1120 MaybeIncludeGuard = false;
1121 IncludeGuard = IG_Rejected;
1122 break;
1123 }
1124 }
1125 }
1126 --PPBranchLevel;
1127 parsePPUnknown();
1128 ++PPBranchLevel;
1129 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1130 IncludeGuard = IG_IfNdefed;
1131 IncludeGuardToken = IfCondition;
1132 }
1133}
1134
1135void UnwrappedLineParser::parsePPElse() {
1136 // If a potential include guard has an #else, it's not an include guard.
1137 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1138 IncludeGuard = IG_Rejected;
1139 // Don't crash when there is an #else without an #if.
1140 assert(PPBranchLevel >= -1);
1141 if (PPBranchLevel == -1)
1142 conditionalCompilationStart(/*Unreachable=*/true);
1143 conditionalCompilationAlternative();
1144 --PPBranchLevel;
1145 parsePPUnknown();
1146 ++PPBranchLevel;
1147}
1148
1149void UnwrappedLineParser::parsePPEndIf() {
1150 conditionalCompilationEnd();
1151 parsePPUnknown();
1152}
1153
1154void UnwrappedLineParser::parsePPDefine() {
1155 nextToken();
1156
1157 if (!FormatTok->Tok.getIdentifierInfo()) {
1158 IncludeGuard = IG_Rejected;
1159 IncludeGuardToken = nullptr;
1160 parsePPUnknown();
1161 return;
1162 }
1163
1164 bool MaybeIncludeGuard = false;
1165 if (IncludeGuard == IG_IfNdefed &&
1166 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1167 IncludeGuard = IG_Defined;
1168 IncludeGuardToken = nullptr;
1169 for (auto &Line : Lines) {
1170 if (Line.Tokens.front().Tok->isNoneOf(Ks: tok::comment, Ks: tok::hash)) {
1171 IncludeGuard = IG_Rejected;
1172 break;
1173 }
1174 }
1175 MaybeIncludeGuard = IncludeGuard == IG_Defined;
1176 }
1177
1178 // In the context of a define, even keywords should be treated as normal
1179 // identifiers. Setting the kind to identifier is not enough, because we need
1180 // to treat additional keywords like __except as well, which are already
1181 // identifiers. Setting the identifier info to null interferes with include
1182 // guard processing above, and changes preprocessing nesting.
1183 FormatTok->Tok.setKind(tok::identifier);
1184 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1185 nextToken();
1186
1187 // IncludeGuard can't have a non-empty macro definition.
1188 if (MaybeIncludeGuard && !eof())
1189 IncludeGuard = IG_Rejected;
1190
1191 if (FormatTok->is(Kind: tok::l_paren) && !FormatTok->hasWhitespaceBefore())
1192 parseParens();
1193 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1194 Line->Level += PPBranchLevel + 1;
1195 addUnwrappedLine();
1196 ++Line->Level;
1197
1198 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1199 assert((int)Line->PPLevel >= 0);
1200
1201 if (eof())
1202 return;
1203
1204 Line->InMacroBody = true;
1205
1206 if (!Style.SkipMacroDefinitionBody) {
1207 // Errors during a preprocessor directive can only affect the layout of the
1208 // preprocessor directive, and thus we ignore them. An alternative approach
1209 // would be to use the same approach we use on the file level (no
1210 // re-indentation if there was a structural error) within the macro
1211 // definition.
1212 parseFile();
1213 return;
1214 }
1215
1216 for (auto *Comment : CommentsBeforeNextToken)
1217 Comment->Finalized = true;
1218
1219 do {
1220 FormatTok->Finalized = true;
1221 FormatTok = Tokens->getNextToken();
1222 } while (!eof());
1223
1224 addUnwrappedLine();
1225}
1226
1227void UnwrappedLineParser::parsePPPragma() {
1228 Line->InPragmaDirective = true;
1229 parsePPUnknown();
1230}
1231
1232void UnwrappedLineParser::parsePPUnknown() {
1233 while (!eof())
1234 nextToken();
1235 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1236 Line->Level += PPBranchLevel + 1;
1237 addUnwrappedLine();
1238}
1239
1240// Here we exclude certain tokens that are not usually the first token in an
1241// unwrapped line. This is used in attempt to distinguish macro calls without
1242// trailing semicolons from other constructs split to several lines.
1243static bool tokenCanStartNewLine(const FormatToken &Tok) {
1244 // Semicolon can be a null-statement, l_square can be a start of a macro or
1245 // a C++11 attribute, but this doesn't seem to be common.
1246 return Tok.isNoneOf(Ks: tok::semi, Ks: tok::l_brace,
1247 // Tokens that can only be used as binary operators and a
1248 // part of overloaded operator names.
1249 Ks: tok::period, Ks: tok::periodstar, Ks: tok::arrow, Ks: tok::arrowstar,
1250 Ks: tok::less, Ks: tok::greater, Ks: tok::slash, Ks: tok::percent,
1251 Ks: tok::lessless, Ks: tok::greatergreater, Ks: tok::equal,
1252 Ks: tok::plusequal, Ks: tok::minusequal, Ks: tok::starequal,
1253 Ks: tok::slashequal, Ks: tok::percentequal, Ks: tok::ampequal,
1254 Ks: tok::pipeequal, Ks: tok::caretequal, Ks: tok::greatergreaterequal,
1255 Ks: tok::lesslessequal,
1256 // Colon is used in labels, base class lists, initializer
1257 // lists, range-based for loops, ternary operator, but
1258 // should never be the first token in an unwrapped line.
1259 Ks: tok::colon,
1260 // 'noexcept' is a trailing annotation.
1261 Ks: tok::kw_noexcept);
1262}
1263
1264static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1265 const FormatToken *FormatTok) {
1266 // FIXME: This returns true for C/C++ keywords like 'struct'.
1267 return FormatTok->is(Kind: tok::identifier) &&
1268 (!FormatTok->Tok.getIdentifierInfo() ||
1269 FormatTok->isNoneOf(
1270 Ks: Keywords.kw_in, Ks: Keywords.kw_of, Ks: Keywords.kw_as, Ks: Keywords.kw_async,
1271 Ks: Keywords.kw_await, Ks: Keywords.kw_yield, Ks: Keywords.kw_finally,
1272 Ks: Keywords.kw_function, Ks: Keywords.kw_import, Ks: Keywords.kw_is,
1273 Ks: Keywords.kw_let, Ks: Keywords.kw_var, Ks: tok::kw_const,
1274 Ks: Keywords.kw_abstract, Ks: Keywords.kw_extends, Ks: Keywords.kw_implements,
1275 Ks: Keywords.kw_instanceof, Ks: Keywords.kw_interface,
1276 Ks: Keywords.kw_override, Ks: Keywords.kw_throws, Ks: Keywords.kw_from));
1277}
1278
1279static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1280 const FormatToken *FormatTok) {
1281 return FormatTok->Tok.isLiteral() ||
1282 FormatTok->isOneOf(K1: tok::kw_true, K2: tok::kw_false) ||
1283 mustBeJSIdent(Keywords, FormatTok);
1284}
1285
1286// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1287// when encountered after a value (see mustBeJSIdentOrValue).
1288static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1289 const FormatToken *FormatTok) {
1290 return FormatTok->isOneOf(
1291 K1: tok::kw_return, K2: Keywords.kw_yield,
1292 // conditionals
1293 Ks: tok::kw_if, Ks: tok::kw_else,
1294 // loops
1295 Ks: tok::kw_for, Ks: tok::kw_while, Ks: tok::kw_do, Ks: tok::kw_continue, Ks: tok::kw_break,
1296 // switch/case
1297 Ks: tok::kw_switch, Ks: tok::kw_case,
1298 // exceptions
1299 Ks: tok::kw_throw, Ks: tok::kw_try, Ks: tok::kw_catch, Ks: Keywords.kw_finally,
1300 // declaration
1301 Ks: tok::kw_const, Ks: tok::kw_class, Ks: Keywords.kw_var, Ks: Keywords.kw_let,
1302 Ks: Keywords.kw_async, Ks: Keywords.kw_function,
1303 // import/export
1304 Ks: Keywords.kw_import, Ks: tok::kw_export);
1305}
1306
1307// Checks whether a token is a type in K&R C (aka C78).
1308static bool isC78Type(const FormatToken &Tok) {
1309 return Tok.isOneOf(K1: tok::kw_char, K2: tok::kw_short, Ks: tok::kw_int, Ks: tok::kw_long,
1310 Ks: tok::kw_unsigned, Ks: tok::kw_float, Ks: tok::kw_double,
1311 Ks: tok::identifier);
1312}
1313
1314// This function checks whether a token starts the first parameter declaration
1315// in a K&R C (aka C78) function definition, e.g.:
1316// int f(a, b)
1317// short a, b;
1318// {
1319// return a + b;
1320// }
1321static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1322 const FormatToken *FuncName) {
1323 assert(Tok);
1324 assert(Next);
1325 assert(FuncName);
1326
1327 if (FuncName->isNot(Kind: tok::identifier))
1328 return false;
1329
1330 const FormatToken *Prev = FuncName->Previous;
1331 if (!Prev || (Prev->isNot(Kind: tok::star) && !isC78Type(Tok: *Prev)))
1332 return false;
1333
1334 if (!isC78Type(Tok: *Tok) &&
1335 Tok->isNoneOf(Ks: tok::kw_register, Ks: tok::kw_struct, Ks: tok::kw_union)) {
1336 return false;
1337 }
1338
1339 if (Next->isNot(Kind: tok::star) && !Next->Tok.getIdentifierInfo())
1340 return false;
1341
1342 Tok = Tok->Previous;
1343 if (!Tok || Tok->isNot(Kind: tok::r_paren))
1344 return false;
1345
1346 Tok = Tok->Previous;
1347 if (!Tok || Tok->isNot(Kind: tok::identifier))
1348 return false;
1349
1350 return Tok->Previous && Tok->Previous->isOneOf(K1: tok::l_paren, K2: tok::comma);
1351}
1352
1353bool UnwrappedLineParser::parseModuleImport() {
1354 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1355
1356 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1357 !Token->Tok.getIdentifierInfo() &&
1358 Token->isNoneOf(Ks: tok::colon, Ks: tok::less, Ks: tok::string_literal)) {
1359 return false;
1360 }
1361
1362 nextToken();
1363 while (!eof()) {
1364 if (FormatTok->is(Kind: tok::colon)) {
1365 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1366 }
1367 // Handle import <foo/bar.h> as we would an include statement.
1368 else if (FormatTok->is(Kind: tok::less)) {
1369 nextToken();
1370 while (FormatTok->isNoneOf(Ks: tok::semi, Ks: tok::greater) && !eof()) {
1371 // Mark tokens up to the trailing line comments as implicit string
1372 // literals.
1373 if (FormatTok->isNot(Kind: tok::comment) &&
1374 !FormatTok->TokenText.starts_with(Prefix: "//")) {
1375 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1376 }
1377 nextToken();
1378 }
1379 }
1380 if (FormatTok->is(Kind: tok::semi)) {
1381 nextToken();
1382 break;
1383 }
1384 nextToken();
1385 }
1386
1387 addUnwrappedLine();
1388 return true;
1389}
1390
1391// readTokenWithJavaScriptASI reads the next token and terminates the current
1392// line if JavaScript Automatic Semicolon Insertion must
1393// happen between the current token and the next token.
1394//
1395// This method is conservative - it cannot cover all edge cases of JavaScript,
1396// but only aims to correctly handle certain well known cases. It *must not*
1397// return true in speculative cases.
1398void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1399 FormatToken *Previous = FormatTok;
1400 readToken();
1401 FormatToken *Next = FormatTok;
1402
1403 bool IsOnSameLine =
1404 CommentsBeforeNextToken.empty()
1405 ? Next->NewlinesBefore == 0
1406 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1407 if (IsOnSameLine)
1408 return;
1409
1410 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, FormatTok: Previous);
1411 bool PreviousStartsTemplateExpr =
1412 Previous->is(TT: TT_TemplateString) && Previous->TokenText.ends_with(Suffix: "${");
1413 if (PreviousMustBeValue || Previous->is(Kind: tok::r_paren)) {
1414 // If the line contains an '@' sign, the previous token might be an
1415 // annotation, which can precede another identifier/value.
1416 bool HasAt = llvm::any_of(Range&: Line->Tokens, P: [](UnwrappedLineNode &LineNode) {
1417 return LineNode.Tok->is(Kind: tok::at);
1418 });
1419 if (HasAt)
1420 return;
1421 }
1422 if (Next->is(Kind: tok::exclaim) && PreviousMustBeValue)
1423 return addUnwrappedLine();
1424 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, FormatTok: Next);
1425 bool NextEndsTemplateExpr =
1426 Next->is(TT: TT_TemplateString) && Next->TokenText.starts_with(Prefix: "}");
1427 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1428 (PreviousMustBeValue ||
1429 Previous->isOneOf(K1: tok::r_square, K2: tok::r_paren, Ks: tok::plusplus,
1430 Ks: tok::minusminus))) {
1431 return addUnwrappedLine();
1432 }
1433 if ((PreviousMustBeValue || Previous->is(Kind: tok::r_paren)) &&
1434 isJSDeclOrStmt(Keywords, FormatTok: Next)) {
1435 return addUnwrappedLine();
1436 }
1437}
1438
1439void UnwrappedLineParser::parseStructuralElement(
1440 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1441 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1442 if (Style.isTableGen() && FormatTok->is(Kind: tok::pp_include)) {
1443 nextToken();
1444 if (FormatTok->is(Kind: tok::string_literal))
1445 nextToken();
1446 addUnwrappedLine();
1447 return;
1448 }
1449
1450 if (IsCpp) {
1451 while (FormatTok->is(Kind: tok::l_square) && handleCppAttributes()) {
1452 }
1453 } else if (Style.isVerilog()) {
1454 if (Keywords.isVerilogStructuredProcedure(Tok: *FormatTok)) {
1455 parseForOrWhileLoop(/*HasParens=*/false);
1456 return;
1457 }
1458 if (FormatTok->isOneOf(K1: Keywords.kw_foreach, K2: Keywords.kw_repeat)) {
1459 parseForOrWhileLoop();
1460 return;
1461 }
1462 if (FormatTok->isOneOf(K1: tok::kw_restrict, K2: Keywords.kw_assert,
1463 Ks: Keywords.kw_assume, Ks: Keywords.kw_cover)) {
1464 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1465 return;
1466 }
1467
1468 // Skip things that can exist before keywords like 'if' and 'case'.
1469 while (true) {
1470 if (FormatTok->isOneOf(K1: Keywords.kw_priority, K2: Keywords.kw_unique,
1471 Ks: Keywords.kw_unique0)) {
1472 nextToken();
1473 } else if (FormatTok->is(Kind: tok::l_paren) &&
1474 Tokens->peekNextToken()->is(Kind: tok::star)) {
1475 parseParens();
1476 } else {
1477 break;
1478 }
1479 }
1480 }
1481
1482 // Tokens that only make sense at the beginning of a line.
1483 if (FormatTok->isAccessSpecifierKeyword()) {
1484 if (Style.isJava() || Style.isJavaScript() || Style.isCSharp())
1485 nextToken();
1486 else
1487 parseAccessSpecifier();
1488 return;
1489 }
1490 switch (FormatTok->Tok.getKind()) {
1491 case tok::kw_asm:
1492 nextToken();
1493 if (FormatTok->is(Kind: tok::l_brace)) {
1494 FormatTok->setFinalizedType(TT_InlineASMBrace);
1495 nextToken();
1496 while (FormatTok && !eof()) {
1497 if (FormatTok->is(Kind: tok::r_brace)) {
1498 FormatTok->setFinalizedType(TT_InlineASMBrace);
1499 nextToken();
1500 addUnwrappedLine();
1501 break;
1502 }
1503 FormatTok->Finalized = true;
1504 nextToken();
1505 }
1506 }
1507 break;
1508 case tok::kw_namespace:
1509 parseNamespace();
1510 return;
1511 case tok::kw_if: {
1512 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1513 // field/method declaration.
1514 break;
1515 }
1516 FormatToken *Tok = parseIfThenElse(IfKind);
1517 if (IfLeftBrace)
1518 *IfLeftBrace = Tok;
1519 return;
1520 }
1521 case tok::kw_for:
1522 case tok::kw_while:
1523 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1524 // field/method declaration.
1525 break;
1526 }
1527 parseForOrWhileLoop();
1528 return;
1529 case tok::kw_do:
1530 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1531 // field/method declaration.
1532 break;
1533 }
1534 parseDoWhile();
1535 if (HasDoWhile)
1536 *HasDoWhile = true;
1537 return;
1538 case tok::kw_switch:
1539 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1540 // 'switch: string' field declaration.
1541 break;
1542 }
1543 parseSwitch(/*IsExpr=*/false);
1544 return;
1545 case tok::kw_default: {
1546 // In Verilog default along with other labels are handled in the next loop.
1547 if (Style.isVerilog())
1548 break;
1549 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1550 // 'default: string' field declaration.
1551 break;
1552 }
1553 auto *Default = FormatTok;
1554 nextToken();
1555 if (FormatTok->is(Kind: tok::colon)) {
1556 FormatTok->setFinalizedType(TT_CaseLabelColon);
1557 parseLabel();
1558 return;
1559 }
1560 if (FormatTok->is(Kind: tok::arrow)) {
1561 FormatTok->setFinalizedType(TT_CaseLabelArrow);
1562 Default->setFinalizedType(TT_SwitchExpressionLabel);
1563 parseLabel();
1564 return;
1565 }
1566 // e.g. "default void f() {}" in a Java interface.
1567 break;
1568 }
1569 case tok::kw_case:
1570 // Proto: there are no switch/case statements.
1571 if (Style.Language == FormatStyle::LK_Proto) {
1572 nextToken();
1573 return;
1574 }
1575 if (Style.isVerilog()) {
1576 parseBlock();
1577 addUnwrappedLine();
1578 return;
1579 }
1580 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1581 // 'case: string' field declaration.
1582 nextToken();
1583 break;
1584 }
1585 parseCaseLabel();
1586 return;
1587 case tok::kw_goto:
1588 nextToken();
1589 if (FormatTok->is(Kind: tok::kw_case))
1590 nextToken();
1591 break;
1592 case tok::kw_try:
1593 case tok::kw___try:
1594 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1595 // field/method declaration.
1596 break;
1597 }
1598 parseTryCatch();
1599 return;
1600 case tok::kw_extern:
1601 if (Style.isVerilog()) {
1602 // In Verilog an extern module declaration looks like a start of module.
1603 // But there is no body and endmodule. So we handle it separately.
1604 parseVerilogExtern();
1605 return;
1606 }
1607 nextToken();
1608 if (FormatTok->is(Kind: tok::string_literal)) {
1609 nextToken();
1610 if (FormatTok->is(Kind: tok::l_brace)) {
1611 if (Style.BraceWrapping.AfterExternBlock)
1612 addUnwrappedLine();
1613 // Either we indent or for backwards compatibility we follow the
1614 // AfterExternBlock style.
1615 unsigned AddLevels =
1616 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1617 (Style.BraceWrapping.AfterExternBlock &&
1618 Style.IndentExternBlock ==
1619 FormatStyle::IEBS_AfterExternBlock)
1620 ? 1u
1621 : 0u;
1622 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1623 addUnwrappedLine();
1624 return;
1625 }
1626 }
1627 break;
1628 case tok::kw_export:
1629 if (Style.isJavaScript()) {
1630 parseJavaScriptEs6ImportExport();
1631 return;
1632 }
1633 if (Style.isVerilog()) {
1634 parseVerilogExtern();
1635 return;
1636 }
1637 if (IsCpp) {
1638 nextToken();
1639 if (FormatTok->is(Kind: tok::kw_namespace)) {
1640 parseNamespace();
1641 return;
1642 }
1643 if (FormatTok->is(Kind: tok::l_brace)) {
1644 parseCppExportBlock();
1645 return;
1646 }
1647 if (FormatTok->is(II: Keywords.kw_import) && parseModuleImport())
1648 return;
1649 }
1650 break;
1651 case tok::kw_inline:
1652 nextToken();
1653 if (FormatTok->is(Kind: tok::kw_namespace)) {
1654 parseNamespace();
1655 return;
1656 }
1657 break;
1658 case tok::identifier:
1659 if (FormatTok->is(TT: TT_ForEachMacro)) {
1660 parseForOrWhileLoop();
1661 return;
1662 }
1663 if (FormatTok->is(TT: TT_MacroBlockBegin)) {
1664 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1665 /*MunchSemi=*/false);
1666 return;
1667 }
1668 if (FormatTok->is(II: Keywords.kw_import)) {
1669 if (Style.isJavaScript()) {
1670 parseJavaScriptEs6ImportExport();
1671 return;
1672 }
1673 if (Style.Language == FormatStyle::LK_Proto) {
1674 nextToken();
1675 if (FormatTok->is(Kind: tok::kw_public))
1676 nextToken();
1677 if (FormatTok->isNot(Kind: tok::string_literal))
1678 return;
1679 nextToken();
1680 if (FormatTok->is(Kind: tok::semi))
1681 nextToken();
1682 addUnwrappedLine();
1683 return;
1684 }
1685 if (Style.isVerilog()) {
1686 parseVerilogExtern();
1687 return;
1688 }
1689 if (IsCpp && parseModuleImport())
1690 return;
1691 }
1692 if (IsCpp && FormatTok->isOneOf(K1: Keywords.kw_signals, K2: Keywords.kw_qsignals,
1693 Ks: Keywords.kw_slots, Ks: Keywords.kw_qslots)) {
1694 nextToken();
1695 if (FormatTok->is(Kind: tok::colon)) {
1696 nextToken();
1697 addUnwrappedLine();
1698 return;
1699 }
1700 }
1701 if (IsCpp && FormatTok->is(TT: TT_StatementMacro)) {
1702 parseStatementMacro();
1703 return;
1704 }
1705 if (IsCpp && FormatTok->is(TT: TT_NamespaceMacro)) {
1706 parseNamespace();
1707 return;
1708 }
1709 // In Verilog labels can be any expression, so we don't do them here.
1710 // JS doesn't have macros, and within classes colons indicate fields, not
1711 // labels.
1712 // TableGen doesn't have labels.
1713 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1714 Tokens->peekNextToken()->is(Kind: tok::colon) && !Line->MustBeDeclaration) {
1715 nextToken();
1716 if (!Line->InMacroBody || CurrentLines->size() > 1)
1717 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1718 FormatTok->setFinalizedType(TT_GotoLabelColon);
1719 parseLabel(IndentGotoLabels: Style.IndentGotoLabels);
1720 if (HasLabel)
1721 *HasLabel = true;
1722 return;
1723 }
1724 if (Style.isJava() && FormatTok->is(II: Keywords.kw_record)) {
1725 parseRecord(/*ParseAsExpr=*/false, /*IsJavaRecord=*/true);
1726 addUnwrappedLine();
1727 return;
1728 }
1729 // In all other cases, parse the declaration.
1730 break;
1731 default:
1732 break;
1733 }
1734
1735 bool SeenEqual = false;
1736 for (const bool InRequiresExpression =
1737 OpeningBrace && OpeningBrace->isOneOf(K1: TT_RequiresExpressionLBrace,
1738 K2: TT_CompoundRequirementLBrace);
1739 !eof();) {
1740 const FormatToken *Previous = FormatTok->Previous;
1741 switch (FormatTok->Tok.getKind()) {
1742 case tok::at:
1743 nextToken();
1744 if (FormatTok->is(Kind: tok::l_brace)) {
1745 nextToken();
1746 parseBracedList();
1747 break;
1748 }
1749 if (Style.isJava() && FormatTok->is(II: Keywords.kw_interface)) {
1750 nextToken();
1751 break;
1752 }
1753 switch (bool IsAutoRelease = false; FormatTok->Tok.getObjCKeywordID()) {
1754 case tok::objc_public:
1755 case tok::objc_protected:
1756 case tok::objc_package:
1757 case tok::objc_private:
1758 return parseAccessSpecifier();
1759 case tok::objc_interface:
1760 case tok::objc_implementation:
1761 return parseObjCInterfaceOrImplementation();
1762 case tok::objc_protocol:
1763 if (parseObjCProtocol())
1764 return;
1765 break;
1766 case tok::objc_end:
1767 return; // Handled by the caller.
1768 case tok::objc_optional:
1769 case tok::objc_required:
1770 nextToken();
1771 addUnwrappedLine();
1772 return;
1773 case tok::objc_autoreleasepool:
1774 IsAutoRelease = true;
1775 [[fallthrough]];
1776 case tok::objc_synchronized:
1777 nextToken();
1778 if (!IsAutoRelease && FormatTok->is(Kind: tok::l_paren)) {
1779 // Skip synchronization object
1780 parseParens();
1781 }
1782 if (FormatTok->is(Kind: tok::l_brace)) {
1783 if (Style.BraceWrapping.AfterControlStatement ==
1784 FormatStyle::BWACS_Always) {
1785 addUnwrappedLine();
1786 }
1787 parseBlock();
1788 }
1789 addUnwrappedLine();
1790 return;
1791 case tok::objc_try:
1792 // This branch isn't strictly necessary (the kw_try case below would
1793 // do this too after the tok::at is parsed above). But be explicit.
1794 parseTryCatch();
1795 return;
1796 default:
1797 break;
1798 }
1799 break;
1800 case tok::kw_requires: {
1801 if (IsCpp) {
1802 bool ParsedClause = parseRequires(SeenEqual);
1803 if (ParsedClause)
1804 return;
1805 } else {
1806 nextToken();
1807 }
1808 break;
1809 }
1810 case tok::kw_enum:
1811 // Ignore if this is part of "template <enum ..." or "... -> enum" or
1812 // "template <..., enum ...>".
1813 if (Previous && Previous->isOneOf(K1: tok::less, K2: tok::arrow, Ks: tok::comma)) {
1814 nextToken();
1815 break;
1816 }
1817
1818 // parseEnum falls through and does not yet add an unwrapped line as an
1819 // enum definition can start a structural element.
1820 if (!parseEnum())
1821 break;
1822 // This only applies to C++ and Verilog.
1823 if (!IsCpp && !Style.isVerilog()) {
1824 addUnwrappedLine();
1825 return;
1826 }
1827 break;
1828 case tok::kw_typedef:
1829 nextToken();
1830 if (FormatTok->isOneOf(K1: Keywords.kw_NS_ENUM, K2: Keywords.kw_NS_OPTIONS,
1831 Ks: Keywords.kw_CF_ENUM, Ks: Keywords.kw_CF_OPTIONS,
1832 Ks: Keywords.kw_CF_CLOSED_ENUM,
1833 Ks: Keywords.kw_NS_CLOSED_ENUM)) {
1834 parseEnum();
1835 }
1836 break;
1837 case tok::kw_class:
1838 if (Style.isVerilog()) {
1839 parseBlock();
1840 addUnwrappedLine();
1841 return;
1842 }
1843 if (Style.isTableGen()) {
1844 // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1845 // This is same as def and so on.
1846 nextToken();
1847 break;
1848 }
1849 [[fallthrough]];
1850 case tok::kw_struct:
1851 case tok::kw_union:
1852 if (parseStructLike())
1853 return;
1854 break;
1855 case tok::kw_decltype:
1856 nextToken();
1857 if (FormatTok->is(Kind: tok::l_paren)) {
1858 parseParens();
1859 if (FormatTok->Previous &&
1860 FormatTok->Previous->endsSequence(K1: tok::r_paren, Tokens: tok::kw_auto,
1861 Tokens: tok::l_paren)) {
1862 Line->SeenDecltypeAuto = true;
1863 }
1864 }
1865 break;
1866 case tok::period:
1867 nextToken();
1868 // In Java, classes have an implicit static member "class".
1869 if (Style.isJava() && FormatTok && FormatTok->is(Kind: tok::kw_class))
1870 nextToken();
1871 if (Style.isJavaScript() && FormatTok &&
1872 FormatTok->Tok.getIdentifierInfo()) {
1873 // JavaScript only has pseudo keywords, all keywords are allowed to
1874 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1875 nextToken();
1876 }
1877 break;
1878 case tok::semi:
1879 nextToken();
1880 addUnwrappedLine();
1881 return;
1882 case tok::r_brace:
1883 addUnwrappedLine();
1884 return;
1885 case tok::l_paren: {
1886 parseParens();
1887 // Break the unwrapped line if a K&R C function definition has a parameter
1888 // declaration.
1889 if (OpeningBrace || !IsCpp || !Previous || eof())
1890 break;
1891 if (isC78ParameterDecl(Tok: FormatTok,
1892 Next: Tokens->peekNextToken(/*SkipComment=*/true),
1893 FuncName: Previous)) {
1894 addUnwrappedLine();
1895 return;
1896 }
1897 break;
1898 }
1899 case tok::kw_operator:
1900 nextToken();
1901 if (FormatTok->isBinaryOperator())
1902 nextToken();
1903 break;
1904 case tok::caret: {
1905 const auto *Prev = FormatTok->getPreviousNonComment();
1906 nextToken();
1907 if (Prev && Prev->is(Kind: tok::identifier))
1908 break;
1909 // Block return type.
1910 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) {
1911 nextToken();
1912 // Return types: pointers are ok too.
1913 while (FormatTok->is(Kind: tok::star))
1914 nextToken();
1915 }
1916 // Block argument list.
1917 if (FormatTok->is(Kind: tok::l_paren))
1918 parseParens();
1919 // Block body.
1920 if (FormatTok->is(Kind: tok::l_brace))
1921 parseChildBlock();
1922 break;
1923 }
1924 case tok::l_brace:
1925 if (InRequiresExpression)
1926 FormatTok->setFinalizedType(TT_BracedListLBrace);
1927 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1928 IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1929 // A block outside of parentheses must be the last part of a
1930 // structural element.
1931 // FIXME: Figure out cases where this is not true, and add projections
1932 // for them (the one we know is missing are lambdas).
1933 if (Style.isJava() &&
1934 Line->Tokens.front().Tok->is(II: Keywords.kw_synchronized)) {
1935 // If necessary, we could set the type to something different than
1936 // TT_FunctionLBrace.
1937 if (Style.BraceWrapping.AfterControlStatement ==
1938 FormatStyle::BWACS_Always) {
1939 addUnwrappedLine();
1940 }
1941 } else if (Style.BraceWrapping.AfterFunction) {
1942 addUnwrappedLine();
1943 }
1944 if (!Previous || Previous->isNot(Kind: TT_TypeDeclarationParen))
1945 FormatTok->setFinalizedType(TT_FunctionLBrace);
1946 parseBlock();
1947 IsDecltypeAutoFunction = false;
1948 addUnwrappedLine();
1949 return;
1950 }
1951 // Otherwise this was a braced init list, and the structural
1952 // element continues.
1953 break;
1954 case tok::kw_try:
1955 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1956 // field/method declaration.
1957 nextToken();
1958 break;
1959 }
1960 // We arrive here when parsing function-try blocks.
1961 if (Style.BraceWrapping.AfterFunction)
1962 addUnwrappedLine();
1963 parseTryCatch();
1964 return;
1965 case tok::identifier: {
1966 if (Style.isCSharp() && FormatTok->is(II: Keywords.kw_where) &&
1967 Line->MustBeDeclaration) {
1968 addUnwrappedLine();
1969 parseCSharpGenericTypeConstraint();
1970 break;
1971 }
1972 if (FormatTok->is(TT: TT_MacroBlockEnd)) {
1973 addUnwrappedLine();
1974 return;
1975 }
1976
1977 // Function declarations (as opposed to function expressions) are parsed
1978 // on their own unwrapped line by continuing this loop. Function
1979 // expressions (functions that are not on their own line) must not create
1980 // a new unwrapped line, so they are special cased below.
1981 size_t TokenCount = Line->Tokens.size();
1982 if (Style.isJavaScript() && FormatTok->is(II: Keywords.kw_function) &&
1983 (TokenCount > 1 ||
1984 (TokenCount == 1 &&
1985 Line->Tokens.front().Tok->isNot(Kind: Keywords.kw_async)))) {
1986 tryToParseJSFunction();
1987 break;
1988 }
1989 if ((Style.isJavaScript() || Style.isJava()) &&
1990 FormatTok->is(II: Keywords.kw_interface)) {
1991 if (Style.isJavaScript()) {
1992 // In JavaScript/TypeScript, "interface" can be used as a standalone
1993 // identifier, e.g. in `var interface = 1;`. If "interface" is
1994 // followed by another identifier, it is very like to be an actual
1995 // interface declaration.
1996 unsigned StoredPosition = Tokens->getPosition();
1997 FormatToken *Next = Tokens->getNextToken();
1998 FormatTok = Tokens->setPosition(StoredPosition);
1999 if (!mustBeJSIdent(Keywords, FormatTok: Next)) {
2000 nextToken();
2001 break;
2002 }
2003 }
2004 parseRecord();
2005 addUnwrappedLine();
2006 return;
2007 }
2008
2009 if (Style.isVerilog()) {
2010 if (FormatTok->is(II: Keywords.kw_table)) {
2011 parseVerilogTable();
2012 return;
2013 }
2014 if (Keywords.isVerilogBegin(Tok: *FormatTok) ||
2015 Keywords.isVerilogHierarchy(Tok: *FormatTok)) {
2016 parseBlock();
2017 addUnwrappedLine();
2018 return;
2019 }
2020 }
2021
2022 if (!IsCpp && FormatTok->is(II: Keywords.kw_interface)) {
2023 if (parseStructLike())
2024 return;
2025 break;
2026 }
2027
2028 if (IsCpp && FormatTok->is(TT: TT_StatementMacro)) {
2029 parseStatementMacro();
2030 return;
2031 }
2032
2033 // See if the following token should start a new unwrapped line.
2034 StringRef Text = FormatTok->TokenText;
2035
2036 FormatToken *PreviousToken = FormatTok;
2037 nextToken();
2038
2039 // JS doesn't have macros, and within classes colons indicate fields, not
2040 // labels.
2041 if (Style.isJavaScript())
2042 break;
2043
2044 auto OneTokenSoFar = [&]() {
2045 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2046 while (I != E && I->Tok->is(Kind: tok::comment))
2047 ++I;
2048 if (Style.isVerilog())
2049 while (I != E && I->Tok->is(Kind: tok::hash))
2050 ++I;
2051 return I != E && (++I == E);
2052 };
2053 if (OneTokenSoFar()) {
2054 // Recognize function-like macro usages without trailing semicolon as
2055 // well as free-standing macros like Q_OBJECT.
2056 bool FunctionLike = FormatTok->is(Kind: tok::l_paren);
2057 if (FunctionLike)
2058 parseParens();
2059
2060 bool FollowedByNewline =
2061 CommentsBeforeNextToken.empty()
2062 ? FormatTok->NewlinesBefore > 0
2063 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2064
2065 if (FollowedByNewline &&
2066 (Text.size() >= 5 ||
2067 (FunctionLike && FormatTok->isNot(Kind: tok::l_paren))) &&
2068 tokenCanStartNewLine(Tok: *FormatTok) && Text == Text.upper()) {
2069 if (PreviousToken->isNot(Kind: TT_UntouchableMacroFunc))
2070 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2071 addUnwrappedLine();
2072 return;
2073 }
2074 }
2075 break;
2076 }
2077 case tok::equal:
2078 if ((Style.isJavaScript() || Style.isCSharp()) &&
2079 FormatTok->is(TT: TT_FatArrow)) {
2080 tryToParseChildBlock();
2081 break;
2082 }
2083
2084 SeenEqual = true;
2085 nextToken();
2086 if (FormatTok->is(Kind: tok::l_brace)) {
2087 // Block kind should probably be set to BK_BracedInit for any language.
2088 // C# needs this change to ensure that array initialisers and object
2089 // initialisers are indented the same way.
2090 if (Style.isCSharp())
2091 FormatTok->setBlockKind(BK_BracedInit);
2092 // TableGen's defset statement has syntax of the form,
2093 // `defset <type> <name> = { <statement>... }`
2094 if (Style.isTableGen() &&
2095 Line->Tokens.begin()->Tok->is(II: Keywords.kw_defset)) {
2096 FormatTok->setFinalizedType(TT_FunctionLBrace);
2097 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2098 /*MunchSemi=*/false);
2099 addUnwrappedLine();
2100 break;
2101 }
2102 nextToken();
2103 parseBracedList();
2104 } else if (Style.Language == FormatStyle::LK_Proto &&
2105 FormatTok->is(Kind: tok::less)) {
2106 nextToken();
2107 parseBracedList(/*IsAngleBracket=*/true);
2108 }
2109 break;
2110 case tok::l_square:
2111 parseSquare();
2112 break;
2113 case tok::kw_new:
2114 if (Style.isCSharp() &&
2115 (Tokens->peekNextToken()->isAccessSpecifierKeyword() ||
2116 (Previous && Previous->isAccessSpecifierKeyword()))) {
2117 nextToken();
2118 } else {
2119 parseNew();
2120 }
2121 break;
2122 case tok::kw_switch:
2123 if (Style.isJava())
2124 parseSwitch(/*IsExpr=*/true);
2125 else
2126 nextToken();
2127 break;
2128 case tok::kw_case:
2129 // Proto: there are no switch/case statements.
2130 if (Style.Language == FormatStyle::LK_Proto) {
2131 nextToken();
2132 return;
2133 }
2134 // In Verilog switch is called case.
2135 if (Style.isVerilog()) {
2136 parseBlock();
2137 addUnwrappedLine();
2138 return;
2139 }
2140 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2141 // 'case: string' field declaration.
2142 nextToken();
2143 break;
2144 }
2145 parseCaseLabel();
2146 break;
2147 case tok::kw_default:
2148 nextToken();
2149 if (Style.isVerilog()) {
2150 if (FormatTok->is(Kind: tok::colon)) {
2151 // The label will be handled in the next iteration.
2152 break;
2153 }
2154 if (FormatTok->is(II: Keywords.kw_clocking)) {
2155 // A default clocking block.
2156 parseBlock();
2157 addUnwrappedLine();
2158 return;
2159 }
2160 parseVerilogCaseLabel();
2161 return;
2162 }
2163 break;
2164 case tok::colon:
2165 nextToken();
2166 if (Style.isVerilog()) {
2167 parseVerilogCaseLabel();
2168 return;
2169 }
2170 break;
2171 case tok::greater:
2172 nextToken();
2173 if (FormatTok->is(Kind: tok::l_brace))
2174 FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
2175 break;
2176 default:
2177 nextToken();
2178 break;
2179 }
2180 }
2181}
2182
2183bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2184 assert(FormatTok->is(tok::l_brace));
2185 if (!Style.isCSharp())
2186 return false;
2187 // See if it's a property accessor.
2188 if (!FormatTok->Previous || FormatTok->Previous->isNot(Kind: tok::identifier))
2189 return false;
2190
2191 // See if we are inside a property accessor.
2192 //
2193 // Record the current tokenPosition so that we can advance and
2194 // reset the current token. `Next` is not set yet so we need
2195 // another way to advance along the token stream.
2196 unsigned int StoredPosition = Tokens->getPosition();
2197 FormatToken *Tok = Tokens->getNextToken();
2198
2199 // A trivial property accessor is of the form:
2200 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2201 // Track these as they do not require line breaks to be introduced.
2202 bool HasSpecialAccessor = false;
2203 bool IsTrivialPropertyAccessor = true;
2204 bool HasAttribute = false;
2205 while (!eof()) {
2206 if (const bool IsAccessorKeyword =
2207 Tok->isOneOf(K1: Keywords.kw_get, K2: Keywords.kw_init, Ks: Keywords.kw_set);
2208 IsAccessorKeyword || Tok->isAccessSpecifierKeyword() ||
2209 Tok->isOneOf(K1: tok::l_square, K2: tok::semi, Ks: Keywords.kw_internal)) {
2210 if (IsAccessorKeyword)
2211 HasSpecialAccessor = true;
2212 else if (Tok->is(Kind: tok::l_square))
2213 HasAttribute = true;
2214 Tok = Tokens->getNextToken();
2215 continue;
2216 }
2217 if (Tok->isNot(Kind: tok::r_brace))
2218 IsTrivialPropertyAccessor = false;
2219 break;
2220 }
2221
2222 if (!HasSpecialAccessor || HasAttribute) {
2223 Tokens->setPosition(StoredPosition);
2224 return false;
2225 }
2226
2227 // Try to parse the property accessor:
2228 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2229 Tokens->setPosition(StoredPosition);
2230 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2231 addUnwrappedLine();
2232 nextToken();
2233 do {
2234 switch (FormatTok->Tok.getKind()) {
2235 case tok::r_brace:
2236 nextToken();
2237 if (FormatTok->is(Kind: tok::equal)) {
2238 while (!eof() && FormatTok->isNot(Kind: tok::semi))
2239 nextToken();
2240 nextToken();
2241 }
2242 addUnwrappedLine();
2243 return true;
2244 case tok::l_brace:
2245 ++Line->Level;
2246 parseBlock(/*MustBeDeclaration=*/true);
2247 addUnwrappedLine();
2248 --Line->Level;
2249 break;
2250 case tok::equal:
2251 if (FormatTok->is(TT: TT_FatArrow)) {
2252 ++Line->Level;
2253 do {
2254 nextToken();
2255 } while (!eof() && FormatTok->isNot(Kind: tok::semi));
2256 nextToken();
2257 addUnwrappedLine();
2258 --Line->Level;
2259 break;
2260 }
2261 nextToken();
2262 break;
2263 default:
2264 if (FormatTok->isOneOf(K1: Keywords.kw_get, K2: Keywords.kw_init,
2265 Ks: Keywords.kw_set) &&
2266 !IsTrivialPropertyAccessor) {
2267 // Non-trivial get/set needs to be on its own line.
2268 addUnwrappedLine();
2269 }
2270 nextToken();
2271 }
2272 } while (!eof());
2273
2274 // Unreachable for well-formed code (paired '{' and '}').
2275 return true;
2276}
2277
2278bool UnwrappedLineParser::tryToParseLambda() {
2279 assert(FormatTok->is(tok::l_square));
2280 if (!IsCpp) {
2281 nextToken();
2282 return false;
2283 }
2284 FormatToken &LSquare = *FormatTok;
2285 if (!tryToParseLambdaIntroducer())
2286 return false;
2287
2288 FormatToken *Arrow = nullptr;
2289 bool InTemplateParameterList = false;
2290
2291 while (FormatTok->isNot(Kind: tok::l_brace)) {
2292 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) {
2293 nextToken();
2294 continue;
2295 }
2296 switch (FormatTok->Tok.getKind()) {
2297 case tok::l_brace:
2298 break;
2299 case tok::l_paren:
2300 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2301 break;
2302 case tok::l_square:
2303 parseSquare();
2304 break;
2305 case tok::less:
2306 assert(FormatTok->Previous);
2307 if (FormatTok->Previous->is(Kind: tok::r_square))
2308 InTemplateParameterList = true;
2309 nextToken();
2310 break;
2311 case tok::kw_auto:
2312 case tok::kw_class:
2313 case tok::kw_struct:
2314 case tok::kw_union:
2315 case tok::kw_template:
2316 case tok::kw_typename:
2317 case tok::amp:
2318 case tok::star:
2319 case tok::kw_const:
2320 case tok::kw_constexpr:
2321 case tok::kw_consteval:
2322 case tok::comma:
2323 case tok::greater:
2324 case tok::identifier:
2325 case tok::numeric_constant:
2326 case tok::coloncolon:
2327 case tok::kw_mutable:
2328 case tok::kw_noexcept:
2329 case tok::kw_static:
2330 nextToken();
2331 break;
2332 // Specialization of a template with an integer parameter can contain
2333 // arithmetic, logical, comparison and ternary operators.
2334 //
2335 // FIXME: This also accepts sequences of operators that are not in the scope
2336 // of a template argument list.
2337 //
2338 // In a C++ lambda a template type can only occur after an arrow. We use
2339 // this as an heuristic to distinguish between Objective-C expressions
2340 // followed by an `a->b` expression, such as:
2341 // ([obj func:arg] + a->b)
2342 // Otherwise the code below would parse as a lambda.
2343 case tok::plus:
2344 case tok::minus:
2345 case tok::exclaim:
2346 case tok::tilde:
2347 case tok::slash:
2348 case tok::percent:
2349 case tok::lessless:
2350 case tok::pipe:
2351 case tok::pipepipe:
2352 case tok::ampamp:
2353 case tok::caret:
2354 case tok::equalequal:
2355 case tok::exclaimequal:
2356 case tok::greaterequal:
2357 case tok::lessequal:
2358 case tok::question:
2359 case tok::colon:
2360 case tok::ellipsis:
2361 case tok::kw_true:
2362 case tok::kw_false:
2363 if (Arrow || InTemplateParameterList) {
2364 nextToken();
2365 break;
2366 }
2367 return true;
2368 case tok::arrow:
2369 Arrow = FormatTok;
2370 nextToken();
2371 break;
2372 case tok::kw_requires:
2373 parseRequiresClause();
2374 break;
2375 case tok::equal:
2376 if (!InTemplateParameterList)
2377 return true;
2378 nextToken();
2379 break;
2380 default:
2381 return true;
2382 }
2383 }
2384
2385 FormatTok->setFinalizedType(TT_LambdaLBrace);
2386 LSquare.setFinalizedType(TT_LambdaLSquare);
2387
2388 if (Arrow)
2389 Arrow->setFinalizedType(TT_LambdaArrow);
2390
2391 NestedLambdas.push_back(Elt: Line->SeenDecltypeAuto);
2392 parseChildBlock();
2393 assert(!NestedLambdas.empty());
2394 NestedLambdas.pop_back();
2395
2396 return true;
2397}
2398
2399bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2400 const FormatToken *Previous = FormatTok->Previous;
2401 const FormatToken *LeftSquare = FormatTok;
2402 nextToken();
2403 if (Previous) {
2404 const auto *PrevPrev = Previous->getPreviousNonComment();
2405 if (Previous->is(Kind: tok::star) && PrevPrev && PrevPrev->isTypeName(LangOpts))
2406 return false;
2407 if (Previous->closesScope()) {
2408 // Not a potential C-style cast.
2409 if (Previous->isNot(Kind: tok::r_paren))
2410 return false;
2411 // Lambdas can be cast to function types only, e.g. `std::function<int()>`
2412 // and `int (*)()`.
2413 if (!PrevPrev || PrevPrev->isNoneOf(Ks: tok::greater, Ks: tok::r_paren))
2414 return false;
2415 }
2416 if (Previous && Previous->Tok.getIdentifierInfo() &&
2417 Previous->isNoneOf(Ks: tok::kw_return, Ks: tok::kw_co_await, Ks: tok::kw_co_yield,
2418 Ks: tok::kw_co_return)) {
2419 return false;
2420 }
2421 }
2422 if (LeftSquare->isCppStructuredBinding(IsCpp))
2423 return false;
2424 if (FormatTok->is(Kind: tok::l_square) || tok::isLiteral(K: FormatTok->Tok.getKind()))
2425 return false;
2426 if (FormatTok->is(Kind: tok::r_square)) {
2427 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2428 if (Next->is(Kind: tok::greater))
2429 return false;
2430 }
2431 parseSquare(/*LambdaIntroducer=*/true);
2432 return true;
2433}
2434
2435void UnwrappedLineParser::tryToParseJSFunction() {
2436 assert(FormatTok->is(Keywords.kw_function));
2437 if (FormatTok->is(II: Keywords.kw_async))
2438 nextToken();
2439 // Consume "function".
2440 nextToken();
2441
2442 // Consume * (generator function). Treat it like C++'s overloaded operators.
2443 if (FormatTok->is(Kind: tok::star)) {
2444 FormatTok->setFinalizedType(TT_OverloadedOperator);
2445 nextToken();
2446 }
2447
2448 // Consume function name.
2449 if (FormatTok->is(Kind: tok::identifier))
2450 nextToken();
2451
2452 if (FormatTok->isNot(Kind: tok::l_paren))
2453 return;
2454
2455 // Parse formal parameter list.
2456 parseParens();
2457
2458 if (FormatTok->is(Kind: tok::colon)) {
2459 // Parse a type definition.
2460 nextToken();
2461
2462 // Eat the type declaration. For braced inline object types, balance braces,
2463 // otherwise just parse until finding an l_brace for the function body.
2464 if (FormatTok->is(Kind: tok::l_brace))
2465 tryToParseBracedList();
2466 else
2467 while (FormatTok->isNoneOf(Ks: tok::l_brace, Ks: tok::semi) && !eof())
2468 nextToken();
2469 }
2470
2471 if (FormatTok->is(Kind: tok::semi))
2472 return;
2473
2474 parseChildBlock();
2475}
2476
2477bool UnwrappedLineParser::tryToParseBracedList() {
2478 if (FormatTok->is(BBK: BK_Unknown))
2479 calculateBraceTypes();
2480 assert(FormatTok->isNot(BK_Unknown));
2481 if (FormatTok->is(BBK: BK_Block))
2482 return false;
2483 nextToken();
2484 parseBracedList();
2485 return true;
2486}
2487
2488bool UnwrappedLineParser::tryToParseChildBlock() {
2489 assert(Style.isJavaScript() || Style.isCSharp());
2490 assert(FormatTok->is(TT_FatArrow));
2491 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2492 // They always start an expression or a child block if followed by a curly
2493 // brace.
2494 nextToken();
2495 if (FormatTok->isNot(Kind: tok::l_brace))
2496 return false;
2497 parseChildBlock();
2498 return true;
2499}
2500
2501bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2502 assert(!IsAngleBracket || !IsEnum);
2503 bool HasError = false;
2504
2505 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2506 // replace this by using parseAssignmentExpression() inside.
2507 do {
2508 if (Style.isCSharp() && FormatTok->is(TT: TT_FatArrow) &&
2509 tryToParseChildBlock()) {
2510 continue;
2511 }
2512 if (Style.isJavaScript()) {
2513 if (FormatTok->is(II: Keywords.kw_function)) {
2514 tryToParseJSFunction();
2515 continue;
2516 }
2517 if (FormatTok->is(Kind: tok::l_brace)) {
2518 // Could be a method inside of a braced list `{a() { return 1; }}`.
2519 if (tryToParseBracedList())
2520 continue;
2521 parseChildBlock();
2522 }
2523 }
2524 if (FormatTok->is(Kind: IsAngleBracket ? tok::greater : tok::r_brace)) {
2525 if (IsEnum) {
2526 FormatTok->setBlockKind(BK_Block);
2527 if (!Style.AllowShortEnumsOnASingleLine)
2528 addUnwrappedLine();
2529 }
2530 nextToken();
2531 return !HasError;
2532 }
2533 switch (FormatTok->Tok.getKind()) {
2534 case tok::l_square:
2535 if (Style.isCSharp())
2536 parseSquare();
2537 else
2538 tryToParseLambda();
2539 break;
2540 case tok::l_paren:
2541 parseParens();
2542 // JavaScript can just have free standing methods and getters/setters in
2543 // object literals. Detect them by a "{" following ")".
2544 if (Style.isJavaScript()) {
2545 if (FormatTok->is(Kind: tok::l_brace))
2546 parseChildBlock();
2547 break;
2548 }
2549 break;
2550 case tok::l_brace:
2551 // Assume there are no blocks inside a braced init list apart
2552 // from the ones we explicitly parse out (like lambdas).
2553 FormatTok->setBlockKind(BK_BracedInit);
2554 if (!IsAngleBracket) {
2555 auto *Prev = FormatTok->Previous;
2556 if (Prev && Prev->is(Kind: tok::greater))
2557 Prev->setFinalizedType(TT_TemplateCloser);
2558 }
2559 nextToken();
2560 parseBracedList();
2561 break;
2562 case tok::less:
2563 nextToken();
2564 if (IsAngleBracket)
2565 parseBracedList(/*IsAngleBracket=*/true);
2566 break;
2567 case tok::semi:
2568 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2569 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2570 // used for error recovery if we have otherwise determined that this is
2571 // a braced list.
2572 if (Style.isJavaScript()) {
2573 nextToken();
2574 break;
2575 }
2576 HasError = true;
2577 if (!IsEnum)
2578 return false;
2579 nextToken();
2580 break;
2581 case tok::comma:
2582 nextToken();
2583 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2584 addUnwrappedLine();
2585 break;
2586 case tok::kw_requires:
2587 parseRequiresExpression();
2588 break;
2589 default:
2590 nextToken();
2591 break;
2592 }
2593 } while (!eof());
2594 return false;
2595}
2596
2597/// Parses a pair of parentheses (and everything between them).
2598/// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2599/// double ampersands. This applies for all nested scopes as well.
2600///
2601/// Returns whether there is a `=` token between the parentheses.
2602bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType,
2603 bool InMacroCall) {
2604 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2605 auto *LParen = FormatTok;
2606 auto *Prev = FormatTok->Previous;
2607 bool SeenComma = false;
2608 bool SeenEqual = false;
2609 bool MightBeFoldExpr = false;
2610 nextToken();
2611 const bool MightBeStmtExpr = FormatTok->is(Kind: tok::l_brace);
2612 if (!InMacroCall && Prev && Prev->is(TT: TT_FunctionLikeMacro))
2613 InMacroCall = true;
2614 do {
2615 switch (FormatTok->Tok.getKind()) {
2616 case tok::l_paren:
2617 if (parseParens(AmpAmpTokenType, InMacroCall))
2618 SeenEqual = true;
2619 if (Style.isJava() && FormatTok->is(Kind: tok::l_brace))
2620 parseChildBlock();
2621 break;
2622 case tok::r_paren: {
2623 auto *RParen = FormatTok;
2624 nextToken();
2625 if (Prev) {
2626 auto OptionalParens = [&] {
2627 if (Style.RemoveParentheses == FormatStyle::RPS_Leave ||
2628 MightBeStmtExpr || MightBeFoldExpr || SeenComma || InMacroCall ||
2629 Line->InMacroBody || RParen->getPreviousNonComment() == LParen) {
2630 return false;
2631 }
2632 const bool DoubleParens =
2633 Prev->is(Kind: tok::l_paren) && FormatTok->is(Kind: tok::r_paren);
2634 if (DoubleParens) {
2635 const auto *PrevPrev = Prev->getPreviousNonComment();
2636 const bool Excluded =
2637 PrevPrev &&
2638 (PrevPrev->isOneOf(K1: tok::kw___attribute, K2: tok::kw_decltype) ||
2639 (SeenEqual &&
2640 (PrevPrev->isOneOf(K1: tok::kw_if, K2: tok::kw_while) ||
2641 PrevPrev->endsSequence(K1: tok::kw_constexpr, Tokens: tok::kw_if))));
2642 if (!Excluded)
2643 return true;
2644 } else {
2645 const bool CommaSeparated =
2646 Prev->isOneOf(K1: tok::l_paren, K2: tok::comma) &&
2647 FormatTok->isOneOf(K1: tok::comma, K2: tok::r_paren);
2648 if (CommaSeparated &&
2649 // LParen is not preceded by ellipsis, comma.
2650 !Prev->endsSequence(K1: tok::comma, Tokens: tok::ellipsis) &&
2651 // RParen is not followed by comma, ellipsis.
2652 !(FormatTok->is(Kind: tok::comma) &&
2653 Tokens->peekNextToken()->is(Kind: tok::ellipsis))) {
2654 return true;
2655 }
2656 const bool ReturnParens =
2657 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2658 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2659 (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2660 Prev->isOneOf(K1: tok::kw_return, K2: tok::kw_co_return) &&
2661 FormatTok->is(Kind: tok::semi);
2662 if (ReturnParens)
2663 return true;
2664 }
2665 return false;
2666 };
2667 if (OptionalParens()) {
2668 LParen->Optional = true;
2669 RParen->Optional = true;
2670 } else if (Prev->is(TT: TT_TypenameMacro)) {
2671 LParen->setFinalizedType(TT_TypeDeclarationParen);
2672 RParen->setFinalizedType(TT_TypeDeclarationParen);
2673 } else if (Prev->is(Kind: tok::greater) && RParen->Previous == LParen) {
2674 Prev->setFinalizedType(TT_TemplateCloser);
2675 } else if (FormatTok->is(Kind: tok::l_brace) && Prev->is(Kind: tok::amp) &&
2676 !Prev->Previous) {
2677 FormatTok->setBlockKind(BK_BracedInit);
2678 }
2679 }
2680 return SeenEqual;
2681 }
2682 case tok::r_brace:
2683 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2684 return SeenEqual;
2685 case tok::l_square:
2686 tryToParseLambda();
2687 break;
2688 case tok::l_brace:
2689 if (!tryToParseBracedList())
2690 parseChildBlock();
2691 break;
2692 case tok::at:
2693 nextToken();
2694 if (FormatTok->is(Kind: tok::l_brace)) {
2695 nextToken();
2696 parseBracedList();
2697 }
2698 break;
2699 case tok::comma:
2700 SeenComma = true;
2701 nextToken();
2702 break;
2703 case tok::ellipsis:
2704 MightBeFoldExpr = true;
2705 nextToken();
2706 break;
2707 case tok::equal:
2708 SeenEqual = true;
2709 if (Style.isCSharp() && FormatTok->is(TT: TT_FatArrow))
2710 tryToParseChildBlock();
2711 else
2712 nextToken();
2713 break;
2714 case tok::kw_class:
2715 if (Style.isJavaScript())
2716 parseRecord(/*ParseAsExpr=*/true);
2717 else
2718 nextToken();
2719 break;
2720 case tok::identifier:
2721 if (Style.isJavaScript() && (FormatTok->is(II: Keywords.kw_function)))
2722 tryToParseJSFunction();
2723 else
2724 nextToken();
2725 break;
2726 case tok::kw_switch:
2727 if (Style.isJava())
2728 parseSwitch(/*IsExpr=*/true);
2729 else
2730 nextToken();
2731 break;
2732 case tok::kw_requires:
2733 parseRequiresExpression();
2734 break;
2735 case tok::ampamp:
2736 if (AmpAmpTokenType != TT_Unknown)
2737 FormatTok->setFinalizedType(AmpAmpTokenType);
2738 [[fallthrough]];
2739 default:
2740 nextToken();
2741 break;
2742 }
2743 } while (!eof());
2744 return SeenEqual;
2745}
2746
2747void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2748 if (!LambdaIntroducer) {
2749 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2750 if (tryToParseLambda())
2751 return;
2752 }
2753 do {
2754 switch (FormatTok->Tok.getKind()) {
2755 case tok::l_paren:
2756 parseParens();
2757 break;
2758 case tok::r_square:
2759 nextToken();
2760 return;
2761 case tok::r_brace:
2762 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2763 return;
2764 case tok::l_square:
2765 parseSquare();
2766 break;
2767 case tok::l_brace: {
2768 if (!tryToParseBracedList())
2769 parseChildBlock();
2770 break;
2771 }
2772 case tok::at:
2773 case tok::colon:
2774 nextToken();
2775 if (FormatTok->is(Kind: tok::l_brace)) {
2776 nextToken();
2777 parseBracedList();
2778 }
2779 break;
2780 default:
2781 nextToken();
2782 break;
2783 }
2784 } while (!eof());
2785}
2786
2787void UnwrappedLineParser::keepAncestorBraces() {
2788 if (!Style.RemoveBracesLLVM)
2789 return;
2790
2791 const int MaxNestingLevels = 2;
2792 const int Size = NestedTooDeep.size();
2793 if (Size >= MaxNestingLevels)
2794 NestedTooDeep[Size - MaxNestingLevels] = true;
2795 NestedTooDeep.push_back(Elt: false);
2796}
2797
2798static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2799 for (const auto &Token : llvm::reverse(C: Line.Tokens))
2800 if (Token.Tok->isNot(Kind: tok::comment))
2801 return Token.Tok;
2802
2803 return nullptr;
2804}
2805
2806void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2807 FormatToken *Tok = nullptr;
2808
2809 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2810 PreprocessorDirectives.empty() && FormatTok->isNot(Kind: tok::semi)) {
2811 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2812 ? getLastNonComment(Line: *Line)
2813 : Line->Tokens.back().Tok;
2814 assert(Tok);
2815 if (Tok->BraceCount < 0) {
2816 assert(Tok->BraceCount == -1);
2817 Tok = nullptr;
2818 } else {
2819 Tok->BraceCount = -1;
2820 }
2821 }
2822
2823 addUnwrappedLine();
2824 ++Line->Level;
2825 ++Line->UnbracedBodyLevel;
2826 parseStructuralElement();
2827 --Line->UnbracedBodyLevel;
2828
2829 if (Tok) {
2830 assert(!Line->InPPDirective);
2831 Tok = nullptr;
2832 for (const auto &L : llvm::reverse(C&: *CurrentLines)) {
2833 if (!L.InPPDirective && getLastNonComment(Line: L)) {
2834 Tok = L.Tokens.back().Tok;
2835 break;
2836 }
2837 }
2838 assert(Tok);
2839 ++Tok->BraceCount;
2840 }
2841
2842 if (CheckEOF && eof())
2843 addUnwrappedLine();
2844
2845 --Line->Level;
2846}
2847
2848static void markOptionalBraces(FormatToken *LeftBrace) {
2849 if (!LeftBrace)
2850 return;
2851
2852 assert(LeftBrace->is(tok::l_brace));
2853
2854 FormatToken *RightBrace = LeftBrace->MatchingParen;
2855 if (!RightBrace) {
2856 assert(!LeftBrace->Optional);
2857 return;
2858 }
2859
2860 assert(RightBrace->is(tok::r_brace));
2861 assert(RightBrace->MatchingParen == LeftBrace);
2862 assert(LeftBrace->Optional == RightBrace->Optional);
2863
2864 LeftBrace->Optional = true;
2865 RightBrace->Optional = true;
2866}
2867
2868void UnwrappedLineParser::handleAttributes() {
2869 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2870 if (FormatTok->isAttribute())
2871 nextToken();
2872 else if (FormatTok->is(Kind: tok::l_square))
2873 handleCppAttributes();
2874}
2875
2876bool UnwrappedLineParser::handleCppAttributes() {
2877 // Handle [[likely]] / [[unlikely]] attributes.
2878 assert(FormatTok->is(tok::l_square));
2879 if (!tryToParseSimpleAttribute())
2880 return false;
2881 parseSquare();
2882 return true;
2883}
2884
2885/// Returns whether \c Tok begins a block.
2886bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2887 // FIXME: rename the function or make
2888 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2889 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2890 : Tok.is(Kind: tok::l_brace);
2891}
2892
2893FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2894 bool KeepBraces,
2895 bool IsVerilogAssert) {
2896 assert((FormatTok->is(tok::kw_if) ||
2897 (Style.isVerilog() &&
2898 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2899 Keywords.kw_assume, Keywords.kw_cover))) &&
2900 "'if' expected");
2901 nextToken();
2902
2903 if (IsVerilogAssert) {
2904 // Handle `assert #0` and `assert final`.
2905 if (FormatTok->is(II: Keywords.kw_verilogHash)) {
2906 nextToken();
2907 if (FormatTok->is(Kind: tok::numeric_constant))
2908 nextToken();
2909 } else if (FormatTok->isOneOf(K1: Keywords.kw_final, K2: Keywords.kw_property,
2910 Ks: Keywords.kw_sequence)) {
2911 nextToken();
2912 }
2913 }
2914
2915 // TableGen's if statement has the form of `if <cond> then { ... }`.
2916 if (Style.isTableGen()) {
2917 while (!eof() && FormatTok->isNot(Kind: Keywords.kw_then)) {
2918 // Simply skip until then. This range only contains a value.
2919 nextToken();
2920 }
2921 }
2922
2923 // Handle `if !consteval`.
2924 if (FormatTok->is(Kind: tok::exclaim))
2925 nextToken();
2926
2927 bool KeepIfBraces = true;
2928 if (FormatTok->is(Kind: tok::kw_consteval)) {
2929 nextToken();
2930 } else {
2931 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2932 if (FormatTok->isOneOf(K1: tok::kw_constexpr, K2: tok::identifier))
2933 nextToken();
2934 if (FormatTok->is(Kind: tok::l_paren)) {
2935 FormatTok->setFinalizedType(TT_ConditionLParen);
2936 parseParens();
2937 }
2938 }
2939 handleAttributes();
2940 // The then action is optional in Verilog assert statements.
2941 if (IsVerilogAssert && FormatTok->is(Kind: tok::semi)) {
2942 nextToken();
2943 addUnwrappedLine();
2944 return nullptr;
2945 }
2946
2947 bool NeedsUnwrappedLine = false;
2948 keepAncestorBraces();
2949
2950 FormatToken *IfLeftBrace = nullptr;
2951 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2952
2953 if (isBlockBegin(Tok: *FormatTok)) {
2954 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2955 IfLeftBrace = FormatTok;
2956 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2957 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2958 /*MunchSemi=*/true, KeepBraces: KeepIfBraces, IfKind: &IfBlockKind);
2959 setPreviousRBraceType(TT_ControlStatementRBrace);
2960 if (Style.BraceWrapping.BeforeElse)
2961 addUnwrappedLine();
2962 else
2963 NeedsUnwrappedLine = true;
2964 } else if (IsVerilogAssert && FormatTok->is(Kind: tok::kw_else)) {
2965 addUnwrappedLine();
2966 } else {
2967 parseUnbracedBody();
2968 }
2969
2970 if (Style.RemoveBracesLLVM) {
2971 assert(!NestedTooDeep.empty());
2972 KeepIfBraces = KeepIfBraces ||
2973 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2974 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2975 IfBlockKind == IfStmtKind::IfElseIf;
2976 }
2977
2978 bool KeepElseBraces = KeepIfBraces;
2979 FormatToken *ElseLeftBrace = nullptr;
2980 IfStmtKind Kind = IfStmtKind::IfOnly;
2981
2982 if (FormatTok->is(Kind: tok::kw_else)) {
2983 if (Style.RemoveBracesLLVM) {
2984 NestedTooDeep.back() = false;
2985 Kind = IfStmtKind::IfElse;
2986 }
2987 nextToken();
2988 handleAttributes();
2989 if (isBlockBegin(Tok: *FormatTok)) {
2990 const bool FollowedByIf = Tokens->peekNextToken()->is(Kind: tok::kw_if);
2991 FormatTok->setFinalizedType(TT_ElseLBrace);
2992 ElseLeftBrace = FormatTok;
2993 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2994 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2995 FormatToken *IfLBrace =
2996 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2997 /*MunchSemi=*/true, KeepBraces: KeepElseBraces, IfKind: &ElseBlockKind);
2998 setPreviousRBraceType(TT_ElseRBrace);
2999 if (FormatTok->is(Kind: tok::kw_else)) {
3000 KeepElseBraces = KeepElseBraces ||
3001 ElseBlockKind == IfStmtKind::IfOnly ||
3002 ElseBlockKind == IfStmtKind::IfElseIf;
3003 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
3004 KeepElseBraces = true;
3005 assert(ElseLeftBrace->MatchingParen);
3006 markOptionalBraces(LeftBrace: ElseLeftBrace);
3007 }
3008 addUnwrappedLine();
3009 } else if (!IsVerilogAssert && FormatTok->is(Kind: tok::kw_if)) {
3010 const FormatToken *Previous = Tokens->getPreviousToken();
3011 assert(Previous);
3012 const bool IsPrecededByComment = Previous->is(Kind: tok::comment);
3013 if (IsPrecededByComment) {
3014 addUnwrappedLine();
3015 ++Line->Level;
3016 }
3017 bool TooDeep = true;
3018 if (Style.RemoveBracesLLVM) {
3019 Kind = IfStmtKind::IfElseIf;
3020 TooDeep = NestedTooDeep.pop_back_val();
3021 }
3022 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepBraces: KeepIfBraces);
3023 if (Style.RemoveBracesLLVM)
3024 NestedTooDeep.push_back(Elt: TooDeep);
3025 if (IsPrecededByComment)
3026 --Line->Level;
3027 } else {
3028 parseUnbracedBody(/*CheckEOF=*/true);
3029 }
3030 } else {
3031 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
3032 if (NeedsUnwrappedLine)
3033 addUnwrappedLine();
3034 }
3035
3036 if (!Style.RemoveBracesLLVM)
3037 return nullptr;
3038
3039 assert(!NestedTooDeep.empty());
3040 KeepElseBraces = KeepElseBraces ||
3041 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
3042 NestedTooDeep.back();
3043
3044 NestedTooDeep.pop_back();
3045
3046 if (!KeepIfBraces && !KeepElseBraces) {
3047 markOptionalBraces(LeftBrace: IfLeftBrace);
3048 markOptionalBraces(LeftBrace: ElseLeftBrace);
3049 } else if (IfLeftBrace) {
3050 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
3051 if (IfRightBrace) {
3052 assert(IfRightBrace->MatchingParen == IfLeftBrace);
3053 assert(!IfLeftBrace->Optional);
3054 assert(!IfRightBrace->Optional);
3055 IfLeftBrace->MatchingParen = nullptr;
3056 IfRightBrace->MatchingParen = nullptr;
3057 }
3058 }
3059
3060 if (IfKind)
3061 *IfKind = Kind;
3062
3063 return IfLeftBrace;
3064}
3065
3066void UnwrappedLineParser::parseTryCatch() {
3067 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
3068 nextToken();
3069 bool NeedsUnwrappedLine = false;
3070 bool HasCtorInitializer = false;
3071 if (FormatTok->is(Kind: tok::colon)) {
3072 auto *Colon = FormatTok;
3073 // We are in a function try block, what comes is an initializer list.
3074 nextToken();
3075 if (FormatTok->is(Kind: tok::identifier)) {
3076 HasCtorInitializer = true;
3077 Colon->setFinalizedType(TT_CtorInitializerColon);
3078 }
3079
3080 // In case identifiers were removed by clang-tidy, what might follow is
3081 // multiple commas in sequence - before the first identifier.
3082 while (FormatTok->is(Kind: tok::comma))
3083 nextToken();
3084
3085 while (FormatTok->is(Kind: tok::identifier)) {
3086 nextToken();
3087 if (FormatTok->is(Kind: tok::l_paren)) {
3088 parseParens();
3089 } else if (FormatTok->is(Kind: tok::l_brace)) {
3090 nextToken();
3091 parseBracedList();
3092 }
3093
3094 // In case identifiers were removed by clang-tidy, what might follow is
3095 // multiple commas in sequence - after the first identifier.
3096 while (FormatTok->is(Kind: tok::comma))
3097 nextToken();
3098 }
3099 }
3100 // Parse try with resource.
3101 if (Style.isJava() && FormatTok->is(Kind: tok::l_paren))
3102 parseParens();
3103
3104 keepAncestorBraces();
3105
3106 if (FormatTok->is(Kind: tok::l_brace)) {
3107 if (HasCtorInitializer)
3108 FormatTok->setFinalizedType(TT_FunctionLBrace);
3109 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3110 parseBlock();
3111 if (Style.BraceWrapping.BeforeCatch)
3112 addUnwrappedLine();
3113 else
3114 NeedsUnwrappedLine = true;
3115 } else if (FormatTok->isNot(Kind: tok::kw_catch)) {
3116 // The C++ standard requires a compound-statement after a try.
3117 // If there's none, we try to assume there's a structuralElement
3118 // and try to continue.
3119 addUnwrappedLine();
3120 ++Line->Level;
3121 parseStructuralElement();
3122 --Line->Level;
3123 }
3124 for (bool SeenCatch = false;;) {
3125 if (FormatTok->is(Kind: tok::at))
3126 nextToken();
3127 if (FormatTok->isNoneOf(Ks: tok::kw_catch, Ks: Keywords.kw___except,
3128 Ks: tok::kw___finally, Ks: tok::objc_catch,
3129 Ks: tok::objc_finally) &&
3130 !((Style.isJava() || Style.isJavaScript()) &&
3131 FormatTok->is(II: Keywords.kw_finally))) {
3132 break;
3133 }
3134 if (FormatTok->is(Kind: tok::kw_catch))
3135 SeenCatch = true;
3136 nextToken();
3137 while (FormatTok->isNot(Kind: tok::l_brace)) {
3138 if (FormatTok->is(Kind: tok::l_paren)) {
3139 parseParens();
3140 continue;
3141 }
3142 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::r_brace) || eof()) {
3143 if (Style.RemoveBracesLLVM)
3144 NestedTooDeep.pop_back();
3145 return;
3146 }
3147 nextToken();
3148 }
3149 if (SeenCatch) {
3150 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3151 SeenCatch = false;
3152 }
3153 NeedsUnwrappedLine = false;
3154 Line->MustBeDeclaration = false;
3155 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3156 parseBlock();
3157 if (Style.BraceWrapping.BeforeCatch)
3158 addUnwrappedLine();
3159 else
3160 NeedsUnwrappedLine = true;
3161 }
3162
3163 if (Style.RemoveBracesLLVM)
3164 NestedTooDeep.pop_back();
3165
3166 if (NeedsUnwrappedLine)
3167 addUnwrappedLine();
3168}
3169
3170void UnwrappedLineParser::parseNamespaceOrExportBlock(unsigned AddLevels) {
3171 bool ManageWhitesmithsBraces =
3172 AddLevels == 0u && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3173
3174 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3175 // the whole block.
3176 if (ManageWhitesmithsBraces)
3177 ++Line->Level;
3178
3179 // Munch the semicolon after the block. This is more common than one would
3180 // think. Putting the semicolon into its own line is very ugly.
3181 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3182 /*KeepBraces=*/true, /*IfKind=*/nullptr, UnindentWhitesmithsBraces: ManageWhitesmithsBraces);
3183
3184 addUnwrappedLine(AdjustLevel: AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3185
3186 if (ManageWhitesmithsBraces)
3187 --Line->Level;
3188}
3189
3190void UnwrappedLineParser::parseNamespace() {
3191 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3192 "'namespace' expected");
3193
3194 const FormatToken &InitialToken = *FormatTok;
3195 nextToken();
3196 if (InitialToken.is(TT: TT_NamespaceMacro)) {
3197 parseParens();
3198 } else {
3199 while (FormatTok->isOneOf(K1: tok::identifier, K2: tok::coloncolon, Ks: tok::kw_inline,
3200 Ks: tok::l_square, Ks: tok::period, Ks: tok::l_paren) ||
3201 (Style.isCSharp() && FormatTok->is(Kind: tok::kw_union))) {
3202 if (FormatTok->is(Kind: tok::l_square))
3203 parseSquare();
3204 else if (FormatTok->is(Kind: tok::l_paren))
3205 parseParens();
3206 else
3207 nextToken();
3208 }
3209 }
3210 if (FormatTok->is(Kind: tok::l_brace)) {
3211 FormatTok->setFinalizedType(TT_NamespaceLBrace);
3212
3213 if (ShouldBreakBeforeBrace(Style, InitialToken,
3214 IsEmptyBlock: Tokens->peekNextToken()->is(Kind: tok::r_brace))) {
3215 addUnwrappedLine();
3216 }
3217
3218 unsigned AddLevels =
3219 Style.NamespaceIndentation == FormatStyle::NI_All ||
3220 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3221 DeclarationScopeStack.size() > 1)
3222 ? 1u
3223 : 0u;
3224 parseNamespaceOrExportBlock(AddLevels);
3225 }
3226 // FIXME: Add error handling.
3227}
3228
3229void UnwrappedLineParser::parseCppExportBlock() {
3230 parseNamespaceOrExportBlock(/*AddLevels=*/Style.IndentExportBlock ? 1 : 0);
3231}
3232
3233void UnwrappedLineParser::parseNew() {
3234 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3235 nextToken();
3236
3237 if (Style.isCSharp()) {
3238 do {
3239 // Handle constructor invocation, e.g. `new(field: value)`.
3240 if (FormatTok->is(Kind: tok::l_paren))
3241 parseParens();
3242
3243 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3244 if (FormatTok->is(Kind: tok::l_brace))
3245 parseBracedList();
3246
3247 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::comma))
3248 return;
3249
3250 nextToken();
3251 } while (!eof());
3252 }
3253
3254 if (!Style.isJava())
3255 return;
3256
3257 // In Java, we can parse everything up to the parens, which aren't optional.
3258 do {
3259 // There should not be a ;, { or } before the new's open paren.
3260 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::r_brace))
3261 return;
3262
3263 // Consume the parens.
3264 if (FormatTok->is(Kind: tok::l_paren)) {
3265 parseParens();
3266
3267 // If there is a class body of an anonymous class, consume that as child.
3268 if (FormatTok->is(Kind: tok::l_brace))
3269 parseChildBlock();
3270 return;
3271 }
3272 nextToken();
3273 } while (!eof());
3274}
3275
3276void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3277 keepAncestorBraces();
3278
3279 if (isBlockBegin(Tok: *FormatTok)) {
3280 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3281 FormatToken *LeftBrace = FormatTok;
3282 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3283 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3284 /*MunchSemi=*/true, KeepBraces);
3285 setPreviousRBraceType(TT_ControlStatementRBrace);
3286 if (!KeepBraces) {
3287 assert(!NestedTooDeep.empty());
3288 if (!NestedTooDeep.back())
3289 markOptionalBraces(LeftBrace);
3290 }
3291 if (WrapRightBrace)
3292 addUnwrappedLine();
3293 } else {
3294 parseUnbracedBody();
3295 }
3296
3297 if (!KeepBraces)
3298 NestedTooDeep.pop_back();
3299}
3300
3301void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3302 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3303 (Style.isVerilog() &&
3304 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3305 Keywords.kw_always_ff, Keywords.kw_always_latch,
3306 Keywords.kw_final, Keywords.kw_initial,
3307 Keywords.kw_foreach, Keywords.kw_forever,
3308 Keywords.kw_repeat))) &&
3309 "'for', 'while' or foreach macro expected");
3310 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3311 FormatTok->isNoneOf(Ks: tok::kw_for, Ks: tok::kw_while);
3312
3313 nextToken();
3314 // JS' for await ( ...
3315 if (Style.isJavaScript() && FormatTok->is(II: Keywords.kw_await))
3316 nextToken();
3317 if (IsCpp && FormatTok->is(Kind: tok::kw_co_await))
3318 nextToken();
3319 if (HasParens && FormatTok->is(Kind: tok::l_paren)) {
3320 // The type is only set for Verilog basically because we were afraid to
3321 // change the existing behavior for loops. See the discussion on D121756 for
3322 // details.
3323 if (Style.isVerilog())
3324 FormatTok->setFinalizedType(TT_ConditionLParen);
3325 parseParens();
3326 }
3327
3328 if (Style.isVerilog()) {
3329 // Event control.
3330 parseVerilogSensitivityList();
3331 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(Kind: tok::semi) &&
3332 Tokens->getPreviousToken()->is(Kind: tok::r_paren)) {
3333 nextToken();
3334 addUnwrappedLine();
3335 return;
3336 }
3337
3338 handleAttributes();
3339 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3340}
3341
3342void UnwrappedLineParser::parseDoWhile() {
3343 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3344 nextToken();
3345
3346 parseLoopBody(/*KeepBraces=*/true, WrapRightBrace: Style.BraceWrapping.BeforeWhile);
3347
3348 // FIXME: Add error handling.
3349 if (FormatTok->isNot(Kind: tok::kw_while)) {
3350 addUnwrappedLine();
3351 return;
3352 }
3353
3354 FormatTok->setFinalizedType(TT_DoWhile);
3355
3356 // If in Whitesmiths mode, the line with the while() needs to be indented
3357 // to the same level as the block.
3358 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3359 ++Line->Level;
3360
3361 nextToken();
3362 parseStructuralElement();
3363}
3364
3365void UnwrappedLineParser::parseLabel(
3366 FormatStyle::IndentGotoLabelStyle IndentGotoLabels) {
3367 nextToken();
3368 unsigned OldLineLevel = Line->Level;
3369
3370 switch (IndentGotoLabels) {
3371 case FormatStyle::IGLS_NoIndent:
3372 Line->Level = 0;
3373 break;
3374 case FormatStyle::IGLS_OuterIndent:
3375 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3376 --Line->Level;
3377 break;
3378 case FormatStyle::IGLS_HalfIndent:
3379 case FormatStyle::IGLS_InnerIndent:
3380 break;
3381 }
3382
3383 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3384 FormatTok->is(Kind: tok::l_brace)) {
3385
3386 CompoundStatementIndenter Indenter(this, Line->Level,
3387 Style.BraceWrapping.AfterCaseLabel,
3388 Style.BraceWrapping.IndentBraces);
3389 parseBlock();
3390 if (FormatTok->is(Kind: tok::kw_break)) {
3391 if (Style.BraceWrapping.AfterControlStatement ==
3392 FormatStyle::BWACS_Always) {
3393 addUnwrappedLine();
3394 if (!Style.IndentCaseBlocks &&
3395 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3396 ++Line->Level;
3397 }
3398 }
3399 parseStructuralElement();
3400 }
3401 addUnwrappedLine();
3402 } else {
3403 if (FormatTok->is(Kind: tok::semi))
3404 nextToken();
3405 addUnwrappedLine();
3406 }
3407 Line->Level = OldLineLevel;
3408 if (FormatTok->isNot(Kind: tok::l_brace)) {
3409 parseStructuralElement();
3410 addUnwrappedLine();
3411 }
3412}
3413
3414void UnwrappedLineParser::parseCaseLabel() {
3415 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3416 auto *Case = FormatTok;
3417
3418 // FIXME: fix handling of complex expressions here.
3419 do {
3420 nextToken();
3421 if (FormatTok->is(Kind: tok::colon)) {
3422 FormatTok->setFinalizedType(TT_CaseLabelColon);
3423 break;
3424 }
3425 if (Style.isJava() && FormatTok->is(Kind: tok::arrow)) {
3426 FormatTok->setFinalizedType(TT_CaseLabelArrow);
3427 Case->setFinalizedType(TT_SwitchExpressionLabel);
3428 break;
3429 }
3430 } while (!eof());
3431 parseLabel();
3432}
3433
3434void UnwrappedLineParser::parseSwitch(bool IsExpr) {
3435 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3436 nextToken();
3437 if (FormatTok->is(Kind: tok::l_paren))
3438 parseParens();
3439
3440 keepAncestorBraces();
3441
3442 if (FormatTok->is(Kind: tok::l_brace)) {
3443 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3444 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace
3445 : TT_ControlStatementLBrace);
3446 if (IsExpr)
3447 parseChildBlock();
3448 else
3449 parseBlock();
3450 setPreviousRBraceType(TT_ControlStatementRBrace);
3451 if (!IsExpr)
3452 addUnwrappedLine();
3453 } else {
3454 addUnwrappedLine();
3455 ++Line->Level;
3456 parseStructuralElement();
3457 --Line->Level;
3458 }
3459
3460 if (Style.RemoveBracesLLVM)
3461 NestedTooDeep.pop_back();
3462}
3463
3464void UnwrappedLineParser::parseAccessSpecifier() {
3465 nextToken();
3466 // Understand Qt's slots.
3467 if (FormatTok->isOneOf(K1: Keywords.kw_slots, K2: Keywords.kw_qslots))
3468 nextToken();
3469 // Otherwise, we don't know what it is, and we'd better keep the next token.
3470 if (FormatTok->is(Kind: tok::colon))
3471 nextToken();
3472 addUnwrappedLine();
3473}
3474
3475/// Parses a requires, decides if it is a clause or an expression.
3476/// \pre The current token has to be the requires keyword.
3477/// \returns true if it parsed a clause.
3478bool UnwrappedLineParser::parseRequires(bool SeenEqual) {
3479 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3480
3481 // We try to guess if it is a requires clause, or a requires expression. For
3482 // that we first check the next token.
3483 switch (Tokens->peekNextToken(/*SkipComment=*/true)->Tok.getKind()) {
3484 case tok::l_brace:
3485 // This can only be an expression, never a clause.
3486 parseRequiresExpression();
3487 return false;
3488 case tok::l_paren:
3489 // Clauses and expression can start with a paren, it's unclear what we have.
3490 break;
3491 default:
3492 // All other tokens can only be a clause.
3493 parseRequiresClause();
3494 return true;
3495 }
3496
3497 // Looking forward we would have to decide if there are function declaration
3498 // like arguments to the requires expression:
3499 // requires (T t) {
3500 // Or there is a constraint expression for the requires clause:
3501 // requires (C<T> && ...
3502
3503 // But first let's look behind.
3504 auto *PreviousNonComment = FormatTok->getPreviousNonComment();
3505
3506 if (!PreviousNonComment ||
3507 PreviousNonComment->is(TT: TT_RequiresExpressionLBrace)) {
3508 // If there is no token, or an expression left brace, we are a requires
3509 // clause within a requires expression.
3510 parseRequiresClause();
3511 return true;
3512 }
3513
3514 switch (PreviousNonComment->Tok.getKind()) {
3515 case tok::greater:
3516 case tok::r_paren:
3517 case tok::kw_noexcept:
3518 case tok::kw_const:
3519 case tok::star:
3520 case tok::amp:
3521 // This is a requires clause.
3522 parseRequiresClause();
3523 return true;
3524 case tok::ampamp: {
3525 // This can be either:
3526 // if (... && requires (T t) ...)
3527 // Or
3528 // void member(...) && requires (C<T> ...
3529 // We check the one token before that for a const:
3530 // void member(...) const && requires (C<T> ...
3531 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3532 if ((PrevPrev && PrevPrev->is(Kind: tok::kw_const)) || !SeenEqual) {
3533 parseRequiresClause();
3534 return true;
3535 }
3536 break;
3537 }
3538 default:
3539 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) {
3540 // This is a requires clause.
3541 parseRequiresClause();
3542 return true;
3543 }
3544 // It's an expression.
3545 parseRequiresExpression();
3546 return false;
3547 }
3548
3549 // Now we look forward and try to check if the paren content is a parameter
3550 // list. The parameters can be cv-qualified and contain references or
3551 // pointers.
3552 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3553 // of stuff: typename, const, *, &, &&, ::, identifiers.
3554
3555 unsigned StoredPosition = Tokens->getPosition();
3556 FormatToken *NextToken = Tokens->getNextToken();
3557 int Lookahead = 0;
3558 auto PeekNext = [&Lookahead, &NextToken, this] {
3559 ++Lookahead;
3560 NextToken = Tokens->getNextToken();
3561 };
3562
3563 bool FoundType = false;
3564 bool LastWasColonColon = false;
3565 int OpenAngles = 0;
3566
3567 for (; Lookahead < 50; PeekNext()) {
3568 switch (NextToken->Tok.getKind()) {
3569 case tok::kw_volatile:
3570 case tok::kw_const:
3571 case tok::comma:
3572 if (OpenAngles == 0) {
3573 FormatTok = Tokens->setPosition(StoredPosition);
3574 parseRequiresExpression();
3575 return false;
3576 }
3577 break;
3578 case tok::eof:
3579 // Break out of the loop.
3580 Lookahead = 50;
3581 break;
3582 case tok::coloncolon:
3583 LastWasColonColon = true;
3584 break;
3585 case tok::kw_decltype:
3586 case tok::identifier:
3587 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3588 FormatTok = Tokens->setPosition(StoredPosition);
3589 parseRequiresExpression();
3590 return false;
3591 }
3592 FoundType = true;
3593 LastWasColonColon = false;
3594 break;
3595 case tok::less:
3596 ++OpenAngles;
3597 break;
3598 case tok::greater:
3599 --OpenAngles;
3600 break;
3601 default:
3602 if (NextToken->isTypeName(LangOpts)) {
3603 FormatTok = Tokens->setPosition(StoredPosition);
3604 parseRequiresExpression();
3605 return false;
3606 }
3607 break;
3608 }
3609 }
3610 // This seems to be a complicated expression, just assume it's a clause.
3611 FormatTok = Tokens->setPosition(StoredPosition);
3612 parseRequiresClause();
3613 return true;
3614}
3615
3616/// Parses a requires clause.
3617/// \sa parseRequiresExpression
3618///
3619/// Returns if it either has finished parsing the clause, or it detects, that
3620/// the clause is incorrect.
3621void UnwrappedLineParser::parseRequiresClause() {
3622 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3623
3624 // If there is no previous token, we are within a requires expression,
3625 // otherwise we will always have the template or function declaration in front
3626 // of it.
3627 bool InRequiresExpression =
3628 !FormatTok->Previous ||
3629 FormatTok->Previous->is(TT: TT_RequiresExpressionLBrace);
3630
3631 FormatTok->setFinalizedType(InRequiresExpression
3632 ? TT_RequiresClauseInARequiresExpression
3633 : TT_RequiresClause);
3634 nextToken();
3635
3636 // NOTE: parseConstraintExpression is only ever called from this function.
3637 // It could be inlined into here.
3638 parseConstraintExpression();
3639
3640 if (!InRequiresExpression && FormatTok->Previous)
3641 FormatTok->Previous->ClosesRequiresClause = true;
3642}
3643
3644/// Parses a requires expression.
3645/// \sa parseRequiresClause
3646///
3647/// Returns if it either has finished parsing the expression, or it detects,
3648/// that the expression is incorrect.
3649void UnwrappedLineParser::parseRequiresExpression() {
3650 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3651
3652 FormatTok->setFinalizedType(TT_RequiresExpression);
3653 nextToken();
3654
3655 if (FormatTok->is(Kind: tok::l_paren)) {
3656 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3657 parseParens();
3658 }
3659
3660 if (FormatTok->is(Kind: tok::l_brace)) {
3661 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3662 parseChildBlock();
3663 }
3664}
3665
3666/// Parses a constraint expression.
3667///
3668/// This is the body of a requires clause. It returns, when the parsing is
3669/// complete, or the expression is incorrect.
3670void UnwrappedLineParser::parseConstraintExpression() {
3671 // The special handling for lambdas is needed since tryToParseLambda() eats a
3672 // token and if a requires expression is the last part of a requires clause
3673 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3674 // not set on the correct token. Thus we need to be aware if we even expect a
3675 // lambda to be possible.
3676 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3677 bool LambdaNextTimeAllowed = true;
3678
3679 // Within lambda declarations, it is permitted to put a requires clause after
3680 // its template parameter list, which would place the requires clause right
3681 // before the parentheses of the parameters of the lambda declaration. Thus,
3682 // we track if we expect to see grouping parentheses at all.
3683 // Without this check, `requires foo<T> (T t)` in the below example would be
3684 // seen as the whole requires clause, accidentally eating the parameters of
3685 // the lambda.
3686 // [&]<typename T> requires foo<T> (T t) { ... };
3687 bool TopLevelParensAllowed = true;
3688
3689 do {
3690 bool LambdaThisTimeAllowed = std::exchange(obj&: LambdaNextTimeAllowed, new_val: false);
3691
3692 switch (FormatTok->Tok.getKind()) {
3693 case tok::kw_requires:
3694 parseRequiresExpression();
3695 break;
3696
3697 case tok::l_paren:
3698 if (!TopLevelParensAllowed)
3699 return;
3700 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3701 TopLevelParensAllowed = false;
3702 break;
3703
3704 case tok::l_square:
3705 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3706 return;
3707 break;
3708
3709 case tok::kw_const:
3710 case tok::semi:
3711 case tok::kw_class:
3712 case tok::kw_struct:
3713 case tok::kw_union:
3714 return;
3715
3716 case tok::l_brace:
3717 // Potential function body.
3718 return;
3719
3720 case tok::ampamp:
3721 case tok::pipepipe:
3722 FormatTok->setFinalizedType(TT_BinaryOperator);
3723 nextToken();
3724 LambdaNextTimeAllowed = true;
3725 TopLevelParensAllowed = true;
3726 break;
3727
3728 case tok::comma:
3729 case tok::comment:
3730 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3731 nextToken();
3732 break;
3733
3734 case tok::kw_sizeof:
3735 case tok::greater:
3736 case tok::greaterequal:
3737 case tok::greatergreater:
3738 case tok::less:
3739 case tok::lessequal:
3740 case tok::lessless:
3741 case tok::equalequal:
3742 case tok::exclaim:
3743 case tok::exclaimequal:
3744 case tok::plus:
3745 case tok::minus:
3746 case tok::star:
3747 case tok::slash:
3748 LambdaNextTimeAllowed = true;
3749 TopLevelParensAllowed = true;
3750 // Just eat them.
3751 nextToken();
3752 break;
3753
3754 case tok::numeric_constant:
3755 case tok::coloncolon:
3756 case tok::kw_true:
3757 case tok::kw_false:
3758 TopLevelParensAllowed = false;
3759 // Just eat them.
3760 nextToken();
3761 break;
3762
3763 case tok::kw_static_cast:
3764 case tok::kw_const_cast:
3765 case tok::kw_reinterpret_cast:
3766 case tok::kw_dynamic_cast:
3767 nextToken();
3768 if (FormatTok->isNot(Kind: tok::less))
3769 return;
3770
3771 nextToken();
3772 parseBracedList(/*IsAngleBracket=*/true);
3773 break;
3774
3775 default:
3776 if (!FormatTok->Tok.getIdentifierInfo()) {
3777 // Identifiers are part of the default case, we check for more then
3778 // tok::identifier to handle builtin type traits.
3779 return;
3780 }
3781
3782 // We need to differentiate identifiers for a template deduction guide,
3783 // variables, or function return types (the constraint expression has
3784 // ended before that), and basically all other cases. But it's easier to
3785 // check the other way around.
3786 assert(FormatTok->Previous);
3787 switch (FormatTok->Previous->Tok.getKind()) {
3788 case tok::coloncolon: // Nested identifier.
3789 case tok::ampamp: // Start of a function or variable for the
3790 case tok::pipepipe: // constraint expression. (binary)
3791 case tok::exclaim: // The same as above, but unary.
3792 case tok::kw_requires: // Initial identifier of a requires clause.
3793 case tok::equal: // Initial identifier of a concept declaration.
3794 break;
3795 default:
3796 return;
3797 }
3798
3799 // Read identifier with optional template declaration.
3800 nextToken();
3801 if (FormatTok->is(Kind: tok::less)) {
3802 nextToken();
3803 parseBracedList(/*IsAngleBracket=*/true);
3804 }
3805 TopLevelParensAllowed = false;
3806 break;
3807 }
3808 } while (!eof());
3809}
3810
3811bool UnwrappedLineParser::parseEnum() {
3812 const FormatToken &InitialToken = *FormatTok;
3813
3814 // Won't be 'enum' for NS_ENUMs.
3815 if (FormatTok->is(Kind: tok::kw_enum))
3816 nextToken();
3817
3818 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3819 // declarations. An "enum" keyword followed by a colon would be a syntax
3820 // error and thus assume it is just an identifier.
3821 if (Style.isJavaScript() && FormatTok->isOneOf(K1: tok::colon, K2: tok::question))
3822 return false;
3823
3824 // In protobuf, "enum" can be used as a field name.
3825 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(Kind: tok::equal))
3826 return false;
3827
3828 if (IsCpp) {
3829 // Eat up enum class ...
3830 if (FormatTok->isOneOf(K1: tok::kw_class, K2: tok::kw_struct))
3831 nextToken();
3832 while (FormatTok->is(Kind: tok::l_square))
3833 if (!handleCppAttributes())
3834 return false;
3835 }
3836
3837 while (FormatTok->Tok.getIdentifierInfo() ||
3838 FormatTok->isOneOf(K1: tok::colon, K2: tok::coloncolon, Ks: tok::less,
3839 Ks: tok::greater, Ks: tok::comma, Ks: tok::question,
3840 Ks: tok::l_square)) {
3841 if (FormatTok->is(Kind: tok::colon))
3842 FormatTok->setFinalizedType(TT_EnumUnderlyingTypeColon);
3843 if (Style.isVerilog()) {
3844 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3845 nextToken();
3846 // In Verilog the base type can have dimensions.
3847 while (FormatTok->is(Kind: tok::l_square))
3848 parseSquare();
3849 } else {
3850 nextToken();
3851 }
3852 // We can have macros or attributes in between 'enum' and the enum name.
3853 if (FormatTok->is(Kind: tok::l_paren))
3854 parseParens();
3855 if (FormatTok->is(Kind: tok::identifier)) {
3856 nextToken();
3857 // If there are two identifiers in a row, this is likely an elaborate
3858 // return type. In Java, this can be "implements", etc.
3859 if (IsCpp && FormatTok->is(Kind: tok::identifier))
3860 return false;
3861 }
3862 }
3863
3864 // Just a declaration or something is wrong.
3865 if (FormatTok->isNot(Kind: tok::l_brace))
3866 return true;
3867 FormatTok->setFinalizedType(TT_EnumLBrace);
3868 FormatTok->setBlockKind(BK_Block);
3869
3870 if (Style.isJava()) {
3871 // Java enums are different.
3872 parseJavaEnumBody();
3873 return true;
3874 }
3875 if (Style.Language == FormatStyle::LK_Proto) {
3876 parseBlock(/*MustBeDeclaration=*/true);
3877 return true;
3878 }
3879
3880 const bool ManageWhitesmithsBraces =
3881 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3882
3883 if (!Style.AllowShortEnumsOnASingleLine &&
3884 ShouldBreakBeforeBrace(Style, InitialToken,
3885 IsEmptyBlock: Tokens->peekNextToken()->is(Kind: tok::r_brace))) {
3886 addUnwrappedLine();
3887
3888 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3889 // the whole block.
3890 if (ManageWhitesmithsBraces)
3891 ++Line->Level;
3892 }
3893 // Parse enum body.
3894 nextToken();
3895 if (!Style.AllowShortEnumsOnASingleLine) {
3896 addUnwrappedLine();
3897 if (!ManageWhitesmithsBraces)
3898 ++Line->Level;
3899 }
3900 const auto OpeningLineIndex = CurrentLines->empty()
3901 ? UnwrappedLine::kInvalidIndex
3902 : CurrentLines->size() - 1;
3903 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3904 if (!Style.AllowShortEnumsOnASingleLine && !ManageWhitesmithsBraces)
3905 --Line->Level;
3906 if (HasError) {
3907 if (FormatTok->is(Kind: tok::semi))
3908 nextToken();
3909 addUnwrappedLine();
3910 }
3911 setPreviousRBraceType(TT_EnumRBrace);
3912 if (ManageWhitesmithsBraces)
3913 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
3914 return true;
3915
3916 // There is no addUnwrappedLine() here so that we fall through to parsing a
3917 // structural element afterwards. Thus, in "enum A {} n, m;",
3918 // "} n, m;" will end up in one unwrapped line.
3919}
3920
3921bool UnwrappedLineParser::parseStructLike() {
3922 // parseRecord falls through and does not yet add an unwrapped line as a
3923 // record declaration or definition can start a structural element.
3924 parseRecord();
3925 // This does not apply to Java, JavaScript and C#.
3926 if (Style.isJava() || Style.isJavaScript() || Style.isCSharp()) {
3927 if (FormatTok->is(Kind: tok::semi))
3928 nextToken();
3929 addUnwrappedLine();
3930 return true;
3931 }
3932 return false;
3933}
3934
3935namespace {
3936// A class used to set and restore the Token position when peeking
3937// ahead in the token source.
3938class ScopedTokenPosition {
3939 unsigned StoredPosition;
3940 FormatTokenSource *Tokens;
3941
3942public:
3943 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3944 assert(Tokens && "Tokens expected to not be null");
3945 StoredPosition = Tokens->getPosition();
3946 }
3947
3948 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3949};
3950} // namespace
3951
3952// Look to see if we have [[ by looking ahead, if
3953// its not then rewind to the original position.
3954bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3955 ScopedTokenPosition AutoPosition(Tokens);
3956 FormatToken *Tok = Tokens->getNextToken();
3957 // We already read the first [ check for the second.
3958 if (Tok->isNot(Kind: tok::l_square))
3959 return false;
3960 // Double check that the attribute is just something
3961 // fairly simple.
3962 while (Tok->isNot(Kind: tok::eof)) {
3963 if (Tok->is(Kind: tok::r_square))
3964 break;
3965 Tok = Tokens->getNextToken();
3966 }
3967 if (Tok->is(Kind: tok::eof))
3968 return false;
3969 Tok = Tokens->getNextToken();
3970 if (Tok->isNot(Kind: tok::r_square))
3971 return false;
3972 Tok = Tokens->getNextToken();
3973 if (Tok->is(Kind: tok::semi))
3974 return false;
3975 return true;
3976}
3977
3978void UnwrappedLineParser::parseJavaEnumBody() {
3979 assert(FormatTok->is(tok::l_brace));
3980 const FormatToken *OpeningBrace = FormatTok;
3981
3982 // Determine whether the enum is simple, i.e. does not have a semicolon or
3983 // constants with class bodies. Simple enums can be formatted like braced
3984 // lists, contracted to a single line, etc.
3985 unsigned StoredPosition = Tokens->getPosition();
3986 bool IsSimple = true;
3987 FormatToken *Tok = Tokens->getNextToken();
3988 while (Tok->isNot(Kind: tok::eof)) {
3989 if (Tok->is(Kind: tok::r_brace))
3990 break;
3991 if (Tok->isOneOf(K1: tok::l_brace, K2: tok::semi)) {
3992 IsSimple = false;
3993 break;
3994 }
3995 // FIXME: This will also mark enums with braces in the arguments to enum
3996 // constants as "not simple". This is probably fine in practice, though.
3997 Tok = Tokens->getNextToken();
3998 }
3999 FormatTok = Tokens->setPosition(StoredPosition);
4000
4001 if (IsSimple) {
4002 nextToken();
4003 parseBracedList();
4004 addUnwrappedLine();
4005 return;
4006 }
4007
4008 // Parse the body of a more complex enum.
4009 // First add a line for everything up to the "{".
4010 nextToken();
4011 addUnwrappedLine();
4012 ++Line->Level;
4013
4014 // Parse the enum constants.
4015 while (!eof()) {
4016 if (FormatTok->is(Kind: tok::l_brace)) {
4017 // Parse the constant's class body.
4018 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
4019 /*MunchSemi=*/false);
4020 } else if (FormatTok->is(Kind: tok::l_paren)) {
4021 parseParens();
4022 } else if (FormatTok->is(Kind: tok::comma)) {
4023 nextToken();
4024 addUnwrappedLine();
4025 } else if (FormatTok->is(Kind: tok::semi)) {
4026 nextToken();
4027 addUnwrappedLine();
4028 break;
4029 } else if (FormatTok->is(Kind: tok::r_brace)) {
4030 addUnwrappedLine();
4031 break;
4032 } else {
4033 nextToken();
4034 }
4035 }
4036
4037 // Parse the class body after the enum's ";" if any.
4038 parseLevel(OpeningBrace);
4039 nextToken();
4040 --Line->Level;
4041 addUnwrappedLine();
4042}
4043
4044void UnwrappedLineParser::parseRecord(bool ParseAsExpr, bool IsJavaRecord) {
4045 assert(!IsJavaRecord || FormatTok->is(Keywords.kw_record));
4046 const FormatToken &InitialToken = *FormatTok;
4047 nextToken();
4048
4049 FormatToken *ClassName =
4050 IsJavaRecord && FormatTok->is(Kind: tok::identifier) ? FormatTok : nullptr;
4051 bool IsDerived = false;
4052 auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
4053 return Tok->is(Kind: tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
4054 };
4055 // JavaScript/TypeScript supports anonymous classes like:
4056 // a = class extends foo { }
4057 bool JSPastExtendsOrImplements = false;
4058 // The actual identifier can be a nested name specifier, and in macros
4059 // it is often token-pasted.
4060 // An [[attribute]] can be before the identifier.
4061 while (FormatTok->isOneOf(K1: tok::identifier, K2: tok::coloncolon, Ks: tok::hashhash,
4062 Ks: tok::kw_alignas, Ks: tok::l_square) ||
4063 FormatTok->isAttribute() ||
4064 ((Style.isJava() || Style.isJavaScript()) &&
4065 FormatTok->isOneOf(K1: tok::period, K2: tok::comma))) {
4066 if (Style.isJavaScript() &&
4067 FormatTok->isOneOf(K1: Keywords.kw_extends, K2: Keywords.kw_implements)) {
4068 JSPastExtendsOrImplements = true;
4069 // JavaScript/TypeScript supports inline object types in
4070 // extends/implements positions:
4071 // class Foo implements {bar: number} { }
4072 nextToken();
4073 if (FormatTok->is(Kind: tok::l_brace)) {
4074 tryToParseBracedList();
4075 continue;
4076 }
4077 }
4078 if (FormatTok->is(Kind: tok::l_square) && handleCppAttributes())
4079 continue;
4080 auto *Previous = FormatTok;
4081 nextToken();
4082 switch (FormatTok->Tok.getKind()) {
4083 case tok::l_paren:
4084 // We can have macros in between 'class' and the class name.
4085 if (IsJavaRecord || !IsNonMacroIdentifier(Previous) ||
4086 // e.g. `struct macro(a) S { int i; };`
4087 Previous->Previous == &InitialToken) {
4088 parseParens();
4089 }
4090 break;
4091 case tok::coloncolon:
4092 case tok::hashhash:
4093 break;
4094 default:
4095 if (JSPastExtendsOrImplements || ClassName ||
4096 Previous->isNot(Kind: tok::identifier) || Previous->is(TT: TT_AttributeMacro)) {
4097 break;
4098 }
4099 if (const auto Text = Previous->TokenText;
4100 Text.size() == 1 || Text != Text.upper()) {
4101 ClassName = Previous;
4102 }
4103 }
4104 }
4105
4106 auto IsListInitialization = [&] {
4107 if (!ClassName || IsDerived || JSPastExtendsOrImplements)
4108 return false;
4109 assert(FormatTok->is(tok::l_brace));
4110 const auto *Prev = FormatTok->getPreviousNonComment();
4111 assert(Prev);
4112 return Prev != ClassName && Prev->is(Kind: tok::identifier) &&
4113 Prev->isNot(Kind: Keywords.kw_final) && tryToParseBracedList();
4114 };
4115
4116 if (FormatTok->isOneOf(K1: tok::colon, K2: tok::less)) {
4117 int AngleNestingLevel = 0;
4118 do {
4119 if (FormatTok->is(Kind: tok::less))
4120 ++AngleNestingLevel;
4121 else if (FormatTok->is(Kind: tok::greater))
4122 --AngleNestingLevel;
4123
4124 if (AngleNestingLevel == 0) {
4125 if (FormatTok->is(Kind: tok::colon)) {
4126 IsDerived = true;
4127 } else if (!IsDerived && FormatTok->is(Kind: tok::identifier) &&
4128 FormatTok->Previous->is(Kind: tok::coloncolon)) {
4129 ClassName = FormatTok;
4130 } else if (FormatTok->is(Kind: tok::l_paren) &&
4131 IsNonMacroIdentifier(FormatTok->Previous)) {
4132 break;
4133 }
4134 }
4135 if (FormatTok->is(Kind: tok::l_brace)) {
4136 if (AngleNestingLevel == 0 && IsListInitialization())
4137 return;
4138 calculateBraceTypes(/*ExpectClassBody=*/true);
4139 if (!tryToParseBracedList())
4140 break;
4141 }
4142 if (FormatTok->is(Kind: tok::l_square)) {
4143 FormatToken *Previous = FormatTok->Previous;
4144 if (!Previous || (Previous->isNot(Kind: tok::r_paren) &&
4145 !Previous->isTypeOrIdentifier(LangOpts))) {
4146 // Don't try parsing a lambda if we had a closing parenthesis before,
4147 // it was probably a pointer to an array: int (*)[].
4148 if (!tryToParseLambda())
4149 continue;
4150 } else {
4151 parseSquare();
4152 continue;
4153 }
4154 }
4155 if (FormatTok->is(Kind: tok::semi))
4156 return;
4157 if (Style.isCSharp() && FormatTok->is(II: Keywords.kw_where)) {
4158 addUnwrappedLine();
4159 nextToken();
4160 parseCSharpGenericTypeConstraint();
4161 break;
4162 }
4163 nextToken();
4164 } while (!eof());
4165 }
4166
4167 auto GetBraceTypes =
4168 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
4169 switch (RecordTok.Tok.getKind()) {
4170 case tok::kw_class:
4171 return {TT_ClassLBrace, TT_ClassRBrace};
4172 case tok::kw_struct:
4173 return {TT_StructLBrace, TT_StructRBrace};
4174 case tok::kw_union:
4175 return {TT_UnionLBrace, TT_UnionRBrace};
4176 default:
4177 // Useful for e.g. interface.
4178 return {TT_RecordLBrace, TT_RecordRBrace};
4179 }
4180 };
4181 if (FormatTok->is(Kind: tok::l_brace)) {
4182 if (IsListInitialization())
4183 return;
4184 if (ClassName)
4185 ClassName->setFinalizedType(TT_ClassHeadName);
4186 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4187 FormatTok->setFinalizedType(OpenBraceType);
4188 if (ParseAsExpr) {
4189 parseChildBlock();
4190 } else {
4191 if (ShouldBreakBeforeBrace(Style, InitialToken,
4192 IsEmptyBlock: Tokens->peekNextToken()->is(Kind: tok::r_brace),
4193 IsJavaRecord)) {
4194 addUnwrappedLine();
4195 }
4196
4197 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4198 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4199 }
4200 setPreviousRBraceType(ClosingBraceType);
4201 }
4202 // There is no addUnwrappedLine() here so that we fall through to parsing a
4203 // structural element afterwards. Thus, in "class A {} n, m;",
4204 // "} n, m;" will end up in one unwrapped line.
4205}
4206
4207void UnwrappedLineParser::parseObjCMethod() {
4208 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4209 "'(' or identifier expected.");
4210 do {
4211 if (FormatTok->is(Kind: tok::semi)) {
4212 nextToken();
4213 addUnwrappedLine();
4214 return;
4215 } else if (FormatTok->is(Kind: tok::l_brace)) {
4216 if (Style.BraceWrapping.AfterFunction)
4217 addUnwrappedLine();
4218 parseBlock();
4219 addUnwrappedLine();
4220 return;
4221 } else {
4222 nextToken();
4223 }
4224 } while (!eof());
4225}
4226
4227void UnwrappedLineParser::parseObjCProtocolList() {
4228 assert(FormatTok->is(tok::less) && "'<' expected.");
4229 do {
4230 nextToken();
4231 // Early exit in case someone forgot a close angle.
4232 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::objc_end))
4233 return;
4234 } while (!eof() && FormatTok->isNot(Kind: tok::greater));
4235 nextToken(); // Skip '>'.
4236}
4237
4238void UnwrappedLineParser::parseObjCUntilAtEnd() {
4239 do {
4240 if (FormatTok->is(Kind: tok::objc_end)) {
4241 nextToken();
4242 addUnwrappedLine();
4243 break;
4244 }
4245 if (FormatTok->is(Kind: tok::l_brace)) {
4246 parseBlock();
4247 // In ObjC interfaces, nothing should be following the "}".
4248 addUnwrappedLine();
4249 } else if (FormatTok->is(Kind: tok::r_brace)) {
4250 // Ignore stray "}". parseStructuralElement doesn't consume them.
4251 nextToken();
4252 addUnwrappedLine();
4253 } else if (FormatTok->isOneOf(K1: tok::minus, K2: tok::plus)) {
4254 nextToken();
4255 parseObjCMethod();
4256 } else {
4257 parseStructuralElement();
4258 }
4259 } while (!eof());
4260}
4261
4262void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4263 assert(FormatTok->isOneOf(tok::objc_interface, tok::objc_implementation));
4264 nextToken();
4265 nextToken(); // interface name
4266
4267 // @interface can be followed by a lightweight generic
4268 // specialization list, then either a base class or a category.
4269 if (FormatTok->is(Kind: tok::less))
4270 parseObjCLightweightGenerics();
4271 if (FormatTok->is(Kind: tok::colon)) {
4272 nextToken();
4273 nextToken(); // base class name
4274 // The base class can also have lightweight generics applied to it.
4275 if (FormatTok->is(Kind: tok::less))
4276 parseObjCLightweightGenerics();
4277 } else if (FormatTok->is(Kind: tok::l_paren)) {
4278 // Skip category, if present.
4279 parseParens();
4280 }
4281
4282 if (FormatTok->is(Kind: tok::less))
4283 parseObjCProtocolList();
4284
4285 if (FormatTok->is(Kind: tok::l_brace)) {
4286 if (Style.BraceWrapping.AfterObjCDeclaration)
4287 addUnwrappedLine();
4288 parseBlock(/*MustBeDeclaration=*/true);
4289 }
4290
4291 // With instance variables, this puts '}' on its own line. Without instance
4292 // variables, this ends the @interface line.
4293 addUnwrappedLine();
4294
4295 parseObjCUntilAtEnd();
4296}
4297
4298void UnwrappedLineParser::parseObjCLightweightGenerics() {
4299 assert(FormatTok->is(tok::less));
4300 // Unlike protocol lists, generic parameterizations support
4301 // nested angles:
4302 //
4303 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4304 // NSObject <NSCopying, NSSecureCoding>
4305 //
4306 // so we need to count how many open angles we have left.
4307 unsigned NumOpenAngles = 1;
4308 do {
4309 nextToken();
4310 // Early exit in case someone forgot a close angle.
4311 if (FormatTok->isOneOf(K1: tok::semi, K2: tok::l_brace, Ks: tok::objc_end))
4312 break;
4313 if (FormatTok->is(Kind: tok::less)) {
4314 ++NumOpenAngles;
4315 } else if (FormatTok->is(Kind: tok::greater)) {
4316 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4317 --NumOpenAngles;
4318 }
4319 } while (!eof() && NumOpenAngles != 0);
4320 nextToken(); // Skip '>'.
4321}
4322
4323// Returns true for the declaration/definition form of @protocol,
4324// false for the expression form.
4325bool UnwrappedLineParser::parseObjCProtocol() {
4326 assert(FormatTok->is(tok::objc_protocol));
4327 nextToken();
4328
4329 if (FormatTok->is(Kind: tok::l_paren)) {
4330 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4331 return false;
4332 }
4333
4334 // The definition/declaration form,
4335 // @protocol Foo
4336 // - (int)someMethod;
4337 // @end
4338
4339 nextToken(); // protocol name
4340
4341 if (FormatTok->is(Kind: tok::less))
4342 parseObjCProtocolList();
4343
4344 // Check for protocol declaration.
4345 if (FormatTok->is(Kind: tok::semi)) {
4346 nextToken();
4347 addUnwrappedLine();
4348 return true;
4349 }
4350
4351 addUnwrappedLine();
4352 parseObjCUntilAtEnd();
4353 return true;
4354}
4355
4356void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4357 bool IsImport = FormatTok->is(II: Keywords.kw_import);
4358 assert(IsImport || FormatTok->is(tok::kw_export));
4359 nextToken();
4360
4361 // Consume the "default" in "export default class/function".
4362 if (FormatTok->is(Kind: tok::kw_default))
4363 nextToken();
4364
4365 // Consume "async function", "function" and "default function", so that these
4366 // get parsed as free-standing JS functions, i.e. do not require a trailing
4367 // semicolon.
4368 if (FormatTok->is(II: Keywords.kw_async))
4369 nextToken();
4370 if (FormatTok->is(II: Keywords.kw_function)) {
4371 nextToken();
4372 return;
4373 }
4374
4375 // For imports, `export *`, `export {...}`, consume the rest of the line up
4376 // to the terminating `;`. For everything else, just return and continue
4377 // parsing the structural element, i.e. the declaration or expression for
4378 // `export default`.
4379 if (!IsImport && FormatTok->isNoneOf(Ks: tok::l_brace, Ks: tok::star) &&
4380 !FormatTok->isStringLiteral() &&
4381 !(FormatTok->is(II: Keywords.kw_type) &&
4382 Tokens->peekNextToken()->isOneOf(K1: tok::l_brace, K2: tok::star))) {
4383 return;
4384 }
4385
4386 while (!eof()) {
4387 if (FormatTok->is(Kind: tok::semi))
4388 return;
4389 if (Line->Tokens.empty()) {
4390 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4391 // import statement should terminate.
4392 return;
4393 }
4394 if (FormatTok->is(Kind: tok::l_brace)) {
4395 FormatTok->setBlockKind(BK_Block);
4396 nextToken();
4397 parseBracedList();
4398 } else {
4399 nextToken();
4400 }
4401 }
4402}
4403
4404void UnwrappedLineParser::parseStatementMacro() {
4405 nextToken();
4406 if (FormatTok->is(Kind: tok::l_paren))
4407 parseParens();
4408 if (FormatTok->is(Kind: tok::semi))
4409 nextToken();
4410 addUnwrappedLine();
4411}
4412
4413void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4414 // consume things like a::`b.c[d:e] or a::*
4415 while (true) {
4416 if (FormatTok->isOneOf(K1: tok::star, K2: tok::period, Ks: tok::periodstar,
4417 Ks: tok::coloncolon, Ks: tok::hash) ||
4418 Keywords.isVerilogIdentifier(Tok: *FormatTok)) {
4419 nextToken();
4420 } else if (FormatTok->is(Kind: tok::l_square)) {
4421 parseSquare();
4422 } else {
4423 break;
4424 }
4425 }
4426}
4427
4428void UnwrappedLineParser::parseVerilogSensitivityList() {
4429 if (FormatTok->isNot(Kind: tok::at))
4430 return;
4431 nextToken();
4432 // A block event expression has 2 at signs.
4433 if (FormatTok->is(Kind: tok::at))
4434 nextToken();
4435 switch (FormatTok->Tok.getKind()) {
4436 case tok::star:
4437 nextToken();
4438 break;
4439 case tok::l_paren:
4440 parseParens();
4441 break;
4442 default:
4443 parseVerilogHierarchyIdentifier();
4444 break;
4445 }
4446}
4447
4448unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4449 unsigned AddLevels = 0;
4450
4451 if (FormatTok->is(II: Keywords.kw_clocking)) {
4452 nextToken();
4453 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
4454 nextToken();
4455 parseVerilogSensitivityList();
4456 if (FormatTok->is(Kind: tok::semi))
4457 nextToken();
4458 } else if (FormatTok->isOneOf(K1: tok::kw_case, K2: Keywords.kw_casex,
4459 Ks: Keywords.kw_casez, Ks: Keywords.kw_randcase,
4460 Ks: Keywords.kw_randsequence)) {
4461 if (Style.IndentCaseLabels)
4462 AddLevels++;
4463 nextToken();
4464 if (FormatTok->is(Kind: tok::l_paren)) {
4465 FormatTok->setFinalizedType(TT_ConditionLParen);
4466 parseParens();
4467 }
4468 if (FormatTok->isOneOf(K1: Keywords.kw_inside, K2: Keywords.kw_matches))
4469 nextToken();
4470 // The case header has no semicolon.
4471 } else {
4472 // "module" etc.
4473 nextToken();
4474 // all the words like the name of the module and specifiers like
4475 // "automatic" and the width of function return type
4476 while (true) {
4477 if (FormatTok->is(Kind: tok::l_square)) {
4478 auto Prev = FormatTok->getPreviousNonComment();
4479 if (Prev && Keywords.isVerilogIdentifier(Tok: *Prev))
4480 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4481 parseSquare();
4482 } else if (Keywords.isVerilogIdentifier(Tok: *FormatTok) ||
4483 FormatTok->isOneOf(K1: tok::hash, K2: tok::hashhash, Ks: tok::coloncolon,
4484 Ks: Keywords.kw_automatic, Ks: tok::kw_static)) {
4485 nextToken();
4486 } else {
4487 break;
4488 }
4489 }
4490
4491 auto NewLine = [this]() {
4492 addUnwrappedLine();
4493 Line->IsContinuation = true;
4494 };
4495
4496 // package imports
4497 while (FormatTok->is(II: Keywords.kw_import)) {
4498 NewLine();
4499 nextToken();
4500 parseVerilogHierarchyIdentifier();
4501 if (FormatTok->is(Kind: tok::semi))
4502 nextToken();
4503 }
4504
4505 // parameters and ports
4506 if (FormatTok->is(II: Keywords.kw_verilogHash)) {
4507 NewLine();
4508 nextToken();
4509 if (FormatTok->is(Kind: tok::l_paren)) {
4510 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4511 parseParens();
4512 }
4513 }
4514 if (FormatTok->is(Kind: tok::l_paren)) {
4515 NewLine();
4516 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4517 parseParens();
4518 }
4519
4520 // extends and implements
4521 if (FormatTok->is(II: Keywords.kw_extends)) {
4522 NewLine();
4523 nextToken();
4524 parseVerilogHierarchyIdentifier();
4525 if (FormatTok->is(Kind: tok::l_paren))
4526 parseParens();
4527 }
4528 if (FormatTok->is(II: Keywords.kw_implements)) {
4529 NewLine();
4530 do {
4531 nextToken();
4532 parseVerilogHierarchyIdentifier();
4533 } while (FormatTok->is(Kind: tok::comma));
4534 }
4535
4536 // Coverage event for cover groups.
4537 if (FormatTok->is(Kind: tok::at)) {
4538 NewLine();
4539 parseVerilogSensitivityList();
4540 }
4541
4542 if (FormatTok->is(Kind: tok::semi))
4543 nextToken(/*LevelDifference=*/1);
4544 addUnwrappedLine();
4545 }
4546
4547 return AddLevels;
4548}
4549
4550void UnwrappedLineParser::parseVerilogTable() {
4551 assert(FormatTok->is(Keywords.kw_table));
4552 nextToken(/*LevelDifference=*/1);
4553 addUnwrappedLine();
4554
4555 auto InitialLevel = Line->Level++;
4556 while (!eof() && !Keywords.isVerilogEnd(Tok: *FormatTok)) {
4557 FormatToken *Tok = FormatTok;
4558 nextToken();
4559 if (Tok->is(Kind: tok::semi))
4560 addUnwrappedLine();
4561 else if (Tok->isOneOf(K1: tok::star, K2: tok::colon, Ks: tok::question, Ks: tok::minus))
4562 Tok->setFinalizedType(TT_VerilogTableItem);
4563 }
4564 Line->Level = InitialLevel;
4565 nextToken(/*LevelDifference=*/-1);
4566 addUnwrappedLine();
4567}
4568
4569void UnwrappedLineParser::parseVerilogCaseLabel() {
4570 // The label will get unindented in AnnotatingParser. If there are no leading
4571 // spaces, indent the rest here so that things inside the block will be
4572 // indented relative to things outside. We don't use parseLabel because we
4573 // don't know whether this colon is a label or a ternary expression at this
4574 // point.
4575 auto OrigLevel = Line->Level;
4576 auto FirstLine = CurrentLines->size();
4577 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4578 ++Line->Level;
4579 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(Tok: *FormatTok))
4580 --Line->Level;
4581 parseStructuralElement();
4582 // Restore the indentation in both the new line and the line that has the
4583 // label.
4584 if (CurrentLines->size() > FirstLine)
4585 (*CurrentLines)[FirstLine].Level = OrigLevel;
4586 Line->Level = OrigLevel;
4587}
4588
4589void UnwrappedLineParser::parseVerilogExtern() {
4590 assert(
4591 FormatTok->isOneOf(tok::kw_extern, tok::kw_export, Keywords.kw_import));
4592 nextToken();
4593 // "DPI-C"
4594 if (FormatTok->is(Kind: tok::string_literal))
4595 nextToken();
4596 if (FormatTok->isOneOf(K1: Keywords.kw_context, K2: Keywords.kw_pure))
4597 nextToken();
4598 if (Keywords.isVerilogIdentifier(Tok: *FormatTok))
4599 nextToken();
4600 if (FormatTok->is(Kind: tok::equal))
4601 nextToken();
4602 if (Keywords.isVerilogHierarchy(Tok: *FormatTok))
4603 parseVerilogHierarchyHeader();
4604}
4605
4606bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4607 for (const auto &N : Line.Tokens) {
4608 if (N.Tok->MacroCtx)
4609 return true;
4610 for (const UnwrappedLine &Child : N.Children)
4611 if (containsExpansion(Line: Child))
4612 return true;
4613 }
4614 return false;
4615}
4616
4617void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4618 if (Line->Tokens.empty())
4619 return;
4620 LLVM_DEBUG({
4621 if (!parsingPPDirective()) {
4622 llvm::dbgs() << "Adding unwrapped line:\n";
4623 printDebugInfo(*Line);
4624 }
4625 });
4626
4627 // If this line closes a block when in Whitesmiths mode, remember that
4628 // information so that the level can be decreased after the line is added.
4629 // This has to happen after the addition of the line since the line itself
4630 // needs to be indented.
4631 bool ClosesWhitesmithsBlock =
4632 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4633 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4634
4635 // If the current line was expanded from a macro call, we use it to
4636 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4637 // line and the unexpanded token stream.
4638 if (!parsingPPDirective() && !InExpansion && containsExpansion(Line: *Line)) {
4639 if (!Reconstruct)
4640 Reconstruct.emplace(args&: Line->Level, args&: Unexpanded);
4641 Reconstruct->addLine(Line: *Line);
4642
4643 // While the reconstructed unexpanded lines are stored in the normal
4644 // flow of lines, the expanded lines are stored on the side to be analyzed
4645 // in an extra step.
4646 CurrentExpandedLines.push_back(Elt: std::move(*Line));
4647
4648 if (Reconstruct->finished()) {
4649 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4650 assert(!Reconstructed.Tokens.empty() &&
4651 "Reconstructed must at least contain the macro identifier.");
4652 assert(!parsingPPDirective());
4653 LLVM_DEBUG({
4654 llvm::dbgs() << "Adding unexpanded line:\n";
4655 printDebugInfo(Reconstructed);
4656 });
4657 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4658 Lines.push_back(Elt: std::move(Reconstructed));
4659 CurrentExpandedLines.clear();
4660 Reconstruct.reset();
4661 }
4662 } else {
4663 // At the top level we only get here when no unexpansion is going on, or
4664 // when conditional formatting led to unfinished macro reconstructions.
4665 assert(!Reconstruct || (CurrentLines != &Lines) || !PPStack.empty());
4666 CurrentLines->push_back(Elt: std::move(*Line));
4667 }
4668 Line->Tokens.clear();
4669 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4670 Line->FirstStartColumn = 0;
4671 Line->IsContinuation = false;
4672 Line->SeenDecltypeAuto = false;
4673
4674 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4675 --Line->Level;
4676 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4677 CurrentLines->append(
4678 in_start: std::make_move_iterator(i: PreprocessorDirectives.begin()),
4679 in_end: std::make_move_iterator(i: PreprocessorDirectives.end()));
4680 PreprocessorDirectives.clear();
4681 }
4682 // Disconnect the current token from the last token on the previous line.
4683 FormatTok->Previous = nullptr;
4684}
4685
4686bool UnwrappedLineParser::eof() const { return FormatTok->is(Kind: tok::eof); }
4687
4688bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4689 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4690 FormatTok.NewlinesBefore > 0;
4691}
4692
4693// Checks if \p FormatTok is a line comment that continues the line comment
4694// section on \p Line.
4695static bool
4696continuesLineCommentSection(const FormatToken &FormatTok,
4697 const UnwrappedLine &Line, const FormatStyle &Style,
4698 const llvm::Regex &CommentPragmasRegex) {
4699 if (Line.Tokens.empty() || Style.ReflowComments != FormatStyle::RCS_Always)
4700 return false;
4701
4702 StringRef IndentContent = FormatTok.TokenText;
4703 if (FormatTok.TokenText.starts_with(Prefix: "//") ||
4704 FormatTok.TokenText.starts_with(Prefix: "/*")) {
4705 IndentContent = FormatTok.TokenText.substr(Start: 2);
4706 }
4707 if (CommentPragmasRegex.match(String: IndentContent))
4708 return false;
4709
4710 // If Line starts with a line comment, then FormatTok continues the comment
4711 // section if its original column is greater or equal to the original start
4712 // column of the line.
4713 //
4714 // Define the min column token of a line as follows: if a line ends in '{' or
4715 // contains a '{' followed by a line comment, then the min column token is
4716 // that '{'. Otherwise, the min column token of the line is the first token of
4717 // the line.
4718 //
4719 // If Line starts with a token other than a line comment, then FormatTok
4720 // continues the comment section if its original column is greater than the
4721 // original start column of the min column token of the line.
4722 //
4723 // For example, the second line comment continues the first in these cases:
4724 //
4725 // // first line
4726 // // second line
4727 //
4728 // and:
4729 //
4730 // // first line
4731 // // second line
4732 //
4733 // and:
4734 //
4735 // int i; // first line
4736 // // second line
4737 //
4738 // and:
4739 //
4740 // do { // first line
4741 // // second line
4742 // int i;
4743 // } while (true);
4744 //
4745 // and:
4746 //
4747 // enum {
4748 // a, // first line
4749 // // second line
4750 // b
4751 // };
4752 //
4753 // The second line comment doesn't continue the first in these cases:
4754 //
4755 // // first line
4756 // // second line
4757 //
4758 // and:
4759 //
4760 // int i; // first line
4761 // // second line
4762 //
4763 // and:
4764 //
4765 // do { // first line
4766 // // second line
4767 // int i;
4768 // } while (true);
4769 //
4770 // and:
4771 //
4772 // enum {
4773 // a, // first line
4774 // // second line
4775 // };
4776 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4777
4778 // Scan for '{//'. If found, use the column of '{' as a min column for line
4779 // comment section continuation.
4780 const FormatToken *PreviousToken = nullptr;
4781 for (const UnwrappedLineNode &Node : Line.Tokens) {
4782 if (PreviousToken && PreviousToken->is(Kind: tok::l_brace) &&
4783 isLineComment(FormatTok: *Node.Tok)) {
4784 MinColumnToken = PreviousToken;
4785 break;
4786 }
4787 PreviousToken = Node.Tok;
4788
4789 // Grab the last newline preceding a token in this unwrapped line.
4790 if (Node.Tok->NewlinesBefore > 0)
4791 MinColumnToken = Node.Tok;
4792 }
4793 if (PreviousToken && PreviousToken->is(Kind: tok::l_brace))
4794 MinColumnToken = PreviousToken;
4795
4796 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4797 MinColumnToken);
4798}
4799
4800void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4801 bool JustComments = Line->Tokens.empty();
4802 for (FormatToken *Tok : CommentsBeforeNextToken) {
4803 // Line comments that belong to the same line comment section are put on the
4804 // same line since later we might want to reflow content between them.
4805 // Additional fine-grained breaking of line comment sections is controlled
4806 // by the class BreakableLineCommentSection in case it is desirable to keep
4807 // several line comment sections in the same unwrapped line.
4808 //
4809 // FIXME: Consider putting separate line comment sections as children to the
4810 // unwrapped line instead.
4811 Tok->ContinuesLineCommentSection =
4812 continuesLineCommentSection(FormatTok: *Tok, Line: *Line, Style, CommentPragmasRegex);
4813 if (isOnNewLine(FormatTok: *Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4814 addUnwrappedLine();
4815 pushToken(Tok);
4816 }
4817 if (NewlineBeforeNext && JustComments)
4818 addUnwrappedLine();
4819 CommentsBeforeNextToken.clear();
4820}
4821
4822void UnwrappedLineParser::nextToken(int LevelDifference) {
4823 if (eof())
4824 return;
4825 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
4826 pushToken(Tok: FormatTok);
4827 FormatToken *Previous = FormatTok;
4828 if (!Style.isJavaScript())
4829 readToken(LevelDifference);
4830 else
4831 readTokenWithJavaScriptASI();
4832 FormatTok->Previous = Previous;
4833 if (Style.isVerilog()) {
4834 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4835 // keywords like `begin`, we can't treat them the same as left braces
4836 // because some contexts require one of them. For example structs use
4837 // braces and if blocks use keywords, and a left brace can occur in an if
4838 // statement, but it is not a block. For keywords like `end`, we simply
4839 // treat them the same as right braces.
4840 if (Keywords.isVerilogEnd(Tok: *FormatTok))
4841 FormatTok->Tok.setKind(tok::r_brace);
4842 }
4843}
4844
4845void UnwrappedLineParser::distributeComments(
4846 const ArrayRef<FormatToken *> &Comments, const FormatToken *NextTok) {
4847 // Whether or not a line comment token continues a line is controlled by
4848 // the method continuesLineCommentSection, with the following caveat:
4849 //
4850 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4851 // that each comment line from the trail is aligned with the next token, if
4852 // the next token exists. If a trail exists, the beginning of the maximal
4853 // trail is marked as a start of a new comment section.
4854 //
4855 // For example in this code:
4856 //
4857 // int a; // line about a
4858 // // line 1 about b
4859 // // line 2 about b
4860 // int b;
4861 //
4862 // the two lines about b form a maximal trail, so there are two sections, the
4863 // first one consisting of the single comment "// line about a" and the
4864 // second one consisting of the next two comments.
4865 if (Comments.empty())
4866 return;
4867 bool ShouldPushCommentsInCurrentLine = true;
4868 bool HasTrailAlignedWithNextToken = false;
4869 unsigned StartOfTrailAlignedWithNextToken = 0;
4870 if (NextTok) {
4871 // We are skipping the first element intentionally.
4872 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4873 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4874 HasTrailAlignedWithNextToken = true;
4875 StartOfTrailAlignedWithNextToken = i;
4876 }
4877 }
4878 }
4879 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4880 FormatToken *FormatTok = Comments[i];
4881 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4882 FormatTok->ContinuesLineCommentSection = false;
4883 } else {
4884 FormatTok->ContinuesLineCommentSection = continuesLineCommentSection(
4885 FormatTok: *FormatTok, Line: *Line, Style, CommentPragmasRegex);
4886 }
4887 if (!FormatTok->ContinuesLineCommentSection &&
4888 (isOnNewLine(FormatTok: *FormatTok) || FormatTok->IsFirst)) {
4889 ShouldPushCommentsInCurrentLine = false;
4890 }
4891 if (ShouldPushCommentsInCurrentLine)
4892 pushToken(Tok: FormatTok);
4893 else
4894 CommentsBeforeNextToken.push_back(Elt: FormatTok);
4895 }
4896}
4897
4898void UnwrappedLineParser::readToken(int LevelDifference) {
4899 SmallVector<FormatToken *, 1> Comments;
4900 bool PreviousWasComment = false;
4901 bool FirstNonCommentOnLine = false;
4902 do {
4903 FormatTok = Tokens->getNextToken();
4904 assert(FormatTok);
4905 while (FormatTok->isOneOf(K1: TT_ConflictStart, K2: TT_ConflictEnd,
4906 Ks: TT_ConflictAlternative)) {
4907 if (FormatTok->is(TT: TT_ConflictStart))
4908 conditionalCompilationStart(/*Unreachable=*/false);
4909 else if (FormatTok->is(TT: TT_ConflictAlternative))
4910 conditionalCompilationAlternative();
4911 else if (FormatTok->is(TT: TT_ConflictEnd))
4912 conditionalCompilationEnd();
4913 FormatTok = Tokens->getNextToken();
4914 FormatTok->MustBreakBefore = true;
4915 FormatTok->MustBreakBeforeFinalized = true;
4916 }
4917
4918 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4919 const FormatToken &Tok,
4920 bool PreviousWasComment) {
4921 auto IsFirstOnLine = [](const FormatToken &Tok) {
4922 return Tok.HasUnescapedNewline || Tok.IsFirst;
4923 };
4924
4925 // Consider preprocessor directives preceded by block comments as first
4926 // on line.
4927 if (PreviousWasComment)
4928 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4929 return IsFirstOnLine(Tok);
4930 };
4931
4932 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4933 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4934 PreviousWasComment = FormatTok->is(Kind: tok::comment);
4935
4936 while (!Line->InPPDirective && FormatTok->is(Kind: tok::hash) &&
4937 FirstNonCommentOnLine) {
4938 // In Verilog, the backtick is used for macro invocations. In TableGen,
4939 // the single hash is used for the paste operator.
4940 const auto *Next = Tokens->peekNextToken();
4941 if ((Style.isVerilog() && !Keywords.isVerilogPPDirective(Tok: *Next)) ||
4942 (Style.isTableGen() &&
4943 Next->isNoneOf(Ks: tok::kw_else, Ks: tok::pp_define, Ks: tok::pp_ifdef,
4944 Ks: tok::pp_ifndef, Ks: tok::pp_endif))) {
4945 break;
4946 }
4947 distributeComments(Comments, NextTok: FormatTok);
4948 Comments.clear();
4949 // If there is an unfinished unwrapped line, we flush the preprocessor
4950 // directives only after that unwrapped line was finished later.
4951 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4952 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4953 assert((LevelDifference >= 0 ||
4954 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4955 "LevelDifference makes Line->Level negative");
4956 Line->Level += LevelDifference;
4957 // Comments stored before the preprocessor directive need to be output
4958 // before the preprocessor directive, at the same level as the
4959 // preprocessor directive, as we consider them to apply to the directive.
4960 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4961 PPBranchLevel > 0) {
4962 Line->Level += PPBranchLevel;
4963 }
4964 assert(Line->Level >= Line->UnbracedBodyLevel);
4965 Line->Level -= Line->UnbracedBodyLevel;
4966 flushComments(NewlineBeforeNext: isOnNewLine(FormatTok: *FormatTok));
4967 const bool IsEndIf = Tokens->peekNextToken()->is(Kind: tok::pp_endif);
4968 parsePPDirective();
4969 PreviousWasComment = FormatTok->is(Kind: tok::comment);
4970 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4971 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4972 // If the #endif of a potential include guard is the last thing in the
4973 // file, then we found an include guard.
4974 if (IsEndIf && IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
4975 getIncludeGuardState(Style: Style.IndentPPDirectives) == IG_Inited &&
4976 (eof() ||
4977 (PreviousWasComment &&
4978 Tokens->peekNextToken(/*SkipComment=*/true)->is(Kind: tok::eof)))) {
4979 IncludeGuard = IG_Found;
4980 }
4981 }
4982
4983 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4984 !Line->InPPDirective) {
4985 continue;
4986 }
4987
4988 if (FormatTok->is(Kind: tok::identifier) &&
4989 Macros.defined(Name: FormatTok->TokenText) &&
4990 // FIXME: Allow expanding macros in preprocessor directives.
4991 !Line->InPPDirective) {
4992 FormatToken *ID = FormatTok;
4993 unsigned Position = Tokens->getPosition();
4994
4995 // To correctly parse the code, we need to replace the tokens of the macro
4996 // call with its expansion.
4997 auto PreCall = std::move(Line);
4998 Line.reset(p: new UnwrappedLine);
4999 bool OldInExpansion = InExpansion;
5000 InExpansion = true;
5001 // We parse the macro call into a new line.
5002 auto Args = parseMacroCall();
5003 InExpansion = OldInExpansion;
5004 assert(Line->Tokens.front().Tok == ID);
5005 // And remember the unexpanded macro call tokens.
5006 auto UnexpandedLine = std::move(Line);
5007 // Reset to the old line.
5008 Line = std::move(PreCall);
5009
5010 LLVM_DEBUG({
5011 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
5012 if (Args) {
5013 llvm::dbgs() << "(";
5014 for (const auto &Arg : Args.value())
5015 for (const auto &T : Arg)
5016 llvm::dbgs() << T->TokenText << " ";
5017 llvm::dbgs() << ")";
5018 }
5019 llvm::dbgs() << "\n";
5020 });
5021 if (Macros.objectLike(Name: ID->TokenText) && Args &&
5022 !Macros.hasArity(Name: ID->TokenText, Arity: Args->size())) {
5023 // The macro is either
5024 // - object-like, but we got argumnets, or
5025 // - overloaded to be both object-like and function-like, but none of
5026 // the function-like arities match the number of arguments.
5027 // Thus, expand as object-like macro.
5028 LLVM_DEBUG(llvm::dbgs()
5029 << "Macro \"" << ID->TokenText
5030 << "\" not overloaded for arity " << Args->size()
5031 << "or not function-like, using object-like overload.");
5032 Args.reset();
5033 UnexpandedLine->Tokens.resize(new_size: 1);
5034 Tokens->setPosition(Position);
5035 nextToken();
5036 assert(!Args && Macros.objectLike(ID->TokenText));
5037 }
5038 if ((!Args && Macros.objectLike(Name: ID->TokenText)) ||
5039 (Args && Macros.hasArity(Name: ID->TokenText, Arity: Args->size()))) {
5040 // Next, we insert the expanded tokens in the token stream at the
5041 // current position, and continue parsing.
5042 Unexpanded[ID] = std::move(UnexpandedLine);
5043 SmallVector<FormatToken *, 8> Expansion =
5044 Macros.expand(ID, OptionalArgs: std::move(Args));
5045 if (!Expansion.empty())
5046 FormatTok = Tokens->insertTokens(Tokens: Expansion);
5047
5048 LLVM_DEBUG({
5049 llvm::dbgs() << "Expanded: ";
5050 for (const auto &T : Expansion)
5051 llvm::dbgs() << T->TokenText << " ";
5052 llvm::dbgs() << "\n";
5053 });
5054 } else {
5055 LLVM_DEBUG({
5056 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
5057 << "\", because it was used ";
5058 if (Args)
5059 llvm::dbgs() << "with " << Args->size();
5060 else
5061 llvm::dbgs() << "without";
5062 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
5063 });
5064 Tokens->setPosition(Position);
5065 FormatTok = ID;
5066 }
5067 }
5068
5069 if (FormatTok->isNot(Kind: tok::comment)) {
5070 distributeComments(Comments, NextTok: FormatTok);
5071 Comments.clear();
5072 return;
5073 }
5074
5075 Comments.push_back(Elt: FormatTok);
5076 } while (!eof());
5077
5078 distributeComments(Comments, NextTok: nullptr);
5079 Comments.clear();
5080}
5081
5082namespace {
5083template <typename Iterator>
5084void pushTokens(Iterator Begin, Iterator End,
5085 SmallVectorImpl<FormatToken *> &Into) {
5086 for (auto I = Begin; I != End; ++I) {
5087 Into.push_back(Elt: I->Tok);
5088 for (const auto &Child : I->Children)
5089 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
5090 }
5091}
5092} // namespace
5093
5094std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
5095UnwrappedLineParser::parseMacroCall() {
5096 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
5097 assert(Line->Tokens.empty());
5098 nextToken();
5099 if (FormatTok->isNot(Kind: tok::l_paren))
5100 return Args;
5101 unsigned Position = Tokens->getPosition();
5102 FormatToken *Tok = FormatTok;
5103 nextToken();
5104 Args.emplace();
5105 auto ArgStart = std::prev(x: Line->Tokens.end());
5106
5107 int Parens = 0;
5108 do {
5109 switch (FormatTok->Tok.getKind()) {
5110 case tok::l_paren:
5111 ++Parens;
5112 nextToken();
5113 break;
5114 case tok::r_paren: {
5115 if (Parens > 0) {
5116 --Parens;
5117 nextToken();
5118 break;
5119 }
5120 Args->push_back(Elt: {});
5121 pushTokens(Begin: std::next(x: ArgStart), End: Line->Tokens.end(), Into&: Args->back());
5122 nextToken();
5123 return Args;
5124 }
5125 case tok::comma: {
5126 if (Parens > 0) {
5127 nextToken();
5128 break;
5129 }
5130 Args->push_back(Elt: {});
5131 pushTokens(Begin: std::next(x: ArgStart), End: Line->Tokens.end(), Into&: Args->back());
5132 nextToken();
5133 ArgStart = std::prev(x: Line->Tokens.end());
5134 break;
5135 }
5136 default:
5137 nextToken();
5138 break;
5139 }
5140 } while (!eof());
5141 Line->Tokens.resize(new_size: 1);
5142 Tokens->setPosition(Position);
5143 FormatTok = Tok;
5144 return {};
5145}
5146
5147void UnwrappedLineParser::pushToken(FormatToken *Tok) {
5148 Line->Tokens.push_back(x: UnwrappedLineNode(Tok));
5149 if (AtEndOfPPLine) {
5150 auto &Tok = *Line->Tokens.back().Tok;
5151 Tok.MustBreakBefore = true;
5152 Tok.MustBreakBeforeFinalized = true;
5153 Tok.FirstAfterPPLine = true;
5154 AtEndOfPPLine = false;
5155 }
5156}
5157
5158} // end namespace format
5159} // end namespace clang
5160