1//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/AST/CommentParser.h"
10#include "clang/AST/Comment.h"
11#include "clang/AST/CommentCommandTraits.h"
12#include "clang/AST/CommentSema.h"
13#include "clang/Basic/CharInfo.h"
14#include "clang/Basic/DiagnosticComment.h"
15#include "clang/Basic/SourceManager.h"
16#include "llvm/Support/ErrorHandling.h"
17
18namespace clang {
19
20static inline bool isWhitespace(llvm::StringRef S) {
21 for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
22 if (!isWhitespace(c: *I))
23 return false;
24 }
25 return true;
26}
27
28namespace comments {
29
30/// Re-lexes a sequence of tok::text tokens.
31class TextTokenRetokenizer {
32 llvm::BumpPtrAllocator &Allocator;
33 Parser &P;
34
35 /// This flag is set when there are no more tokens we can fetch from lexer.
36 bool NoMoreInterestingTokens;
37
38 /// Token buffer: tokens we have processed and lookahead.
39 SmallVector<Token, 16> Toks;
40
41 /// A position in \c Toks.
42 struct Position {
43 const char *BufferStart;
44 const char *BufferEnd;
45 const char *BufferPtr;
46 SourceLocation BufferStartLoc;
47 unsigned CurToken;
48 };
49
50 /// Current position in Toks.
51 Position Pos;
52
53 bool isEnd() const {
54 return Pos.CurToken >= Toks.size();
55 }
56
57 /// Sets up the buffer pointers to point to current token.
58 void setupBuffer() {
59 assert(!isEnd());
60 const Token &Tok = Toks[Pos.CurToken];
61
62 Pos.BufferStart = Tok.getText().begin();
63 Pos.BufferEnd = Tok.getText().end();
64 Pos.BufferPtr = Pos.BufferStart;
65 Pos.BufferStartLoc = Tok.getLocation();
66 }
67
68 SourceLocation getSourceLocation() const {
69 const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
70 return Pos.BufferStartLoc.getLocWithOffset(Offset: CharNo);
71 }
72
73 char peek() const {
74 assert(!isEnd());
75 assert(Pos.BufferPtr != Pos.BufferEnd);
76 return *Pos.BufferPtr;
77 }
78
79 void consumeChar() {
80 assert(!isEnd());
81 assert(Pos.BufferPtr != Pos.BufferEnd);
82 Pos.BufferPtr++;
83 if (Pos.BufferPtr == Pos.BufferEnd) {
84 Pos.CurToken++;
85 if (isEnd() && !addToken())
86 return;
87
88 assert(!isEnd());
89 setupBuffer();
90 }
91 }
92
93 /// Extract a template type
94 bool lexTemplate(SmallString<32> &WordText) {
95 unsigned BracketCount = 0;
96 while (!isEnd()) {
97 const char C = peek();
98 WordText.push_back(Elt: C);
99 consumeChar();
100 switch (C) {
101 case '<': {
102 BracketCount++;
103 break;
104 }
105 case '>': {
106 BracketCount--;
107 if (!BracketCount)
108 return true;
109 break;
110 }
111 default:
112 break;
113 }
114 }
115 return false;
116 }
117
118 /// Add a token.
119 /// Returns true on success, false if there are no interesting tokens to
120 /// fetch from lexer.
121 bool addToken() {
122 if (NoMoreInterestingTokens)
123 return false;
124
125 if (P.Tok.is(K: tok::newline)) {
126 // If we see a single newline token between text tokens, skip it.
127 Token Newline = P.Tok;
128 P.consumeToken();
129 if (P.Tok.isNot(K: tok::text)) {
130 P.putBack(OldTok: Newline);
131 NoMoreInterestingTokens = true;
132 return false;
133 }
134 }
135 if (P.Tok.isNot(K: tok::text)) {
136 NoMoreInterestingTokens = true;
137 return false;
138 }
139
140 Toks.push_back(Elt: P.Tok);
141 P.consumeToken();
142 if (Toks.size() == 1)
143 setupBuffer();
144 return true;
145 }
146
147 void consumeWhitespace() {
148 while (!isEnd()) {
149 if (isWhitespace(c: peek()))
150 consumeChar();
151 else
152 break;
153 }
154 }
155
156 void formTokenWithChars(Token &Result,
157 SourceLocation Loc,
158 const char *TokBegin,
159 unsigned TokLength,
160 StringRef Text) {
161 Result.setLocation(Loc);
162 Result.setKind(tok::text);
163 Result.setLength(TokLength);
164#ifndef NDEBUG
165 Result.TextPtr = "<UNSET>";
166 Result.IntVal = 7;
167#endif
168 Result.setText(Text);
169 }
170
171public:
172 TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
173 Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
174 Pos.CurToken = 0;
175 addToken();
176 }
177
178 /// Extract a type argument
179 bool lexType(Token &Tok) {
180 if (isEnd())
181 return false;
182
183 // Save current position in case we need to rollback because the type is
184 // empty.
185 Position SavedPos = Pos;
186
187 // Consume any leading whitespace.
188 consumeWhitespace();
189 SmallString<32> WordText;
190 const char *WordBegin = Pos.BufferPtr;
191 SourceLocation Loc = getSourceLocation();
192
193 while (!isEnd()) {
194 const char C = peek();
195 // For non-whitespace characters we check if it's a template or otherwise
196 // continue reading the text into a word.
197 if (!isWhitespace(c: C)) {
198 if (C == '<') {
199 if (!lexTemplate(WordText))
200 return false;
201 } else {
202 WordText.push_back(Elt: C);
203 consumeChar();
204 }
205 } else {
206 consumeChar();
207 break;
208 }
209 }
210
211 const unsigned Length = WordText.size();
212 if (Length == 0) {
213 Pos = SavedPos;
214 return false;
215 }
216
217 char *TextPtr = Allocator.Allocate<char>(Num: Length + 1);
218
219 memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + 1);
220 StringRef Text = StringRef(TextPtr, Length);
221
222 formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin, TokLength: Length, Text);
223 return true;
224 }
225
226 // Check if this line starts with @par or \par
227 bool startsWithParCommand() {
228 unsigned Offset = 1;
229
230 // Skip all whitespace characters at the beginning.
231 // This needs to backtrack because Pos has already advanced past the
232 // actual \par or @par command by the time this function is called.
233 while (isWhitespace(c: *(Pos.BufferPtr - Offset)))
234 Offset++;
235
236 // Once we've reached the whitespace, backtrack and check if the previous
237 // four characters are \par or @par.
238 llvm::StringRef LineStart(Pos.BufferPtr - Offset - 3, 4);
239 return LineStart.starts_with(Prefix: "\\par") || LineStart.starts_with(Prefix: "@par");
240 }
241
242 /// Extract a par command argument-header.
243 bool lexParHeading(Token &Tok) {
244 if (isEnd())
245 return false;
246
247 Position SavedPos = Pos;
248
249 consumeWhitespace();
250 SmallString<32> WordText;
251 const char *WordBegin = Pos.BufferPtr;
252 SourceLocation Loc = getSourceLocation();
253
254 if (!startsWithParCommand())
255 return false;
256
257 // Read until the end of this token, which is effectively the end of the
258 // line. This gets us the content of the par header, if there is one.
259 while (!isEnd()) {
260 WordText.push_back(Elt: peek());
261 if (Pos.BufferPtr + 1 == Pos.BufferEnd) {
262 consumeChar();
263 break;
264 }
265 consumeChar();
266 }
267
268 unsigned Length = WordText.size();
269 if (Length == 0) {
270 Pos = SavedPos;
271 return false;
272 }
273
274 char *TextPtr = Allocator.Allocate<char>(Num: Length + 1);
275
276 memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + 1);
277 StringRef Text = StringRef(TextPtr, Length);
278
279 formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin, TokLength: Length, Text);
280 return true;
281 }
282
283 /// Extract a word -- sequence of non-whitespace characters.
284 bool lexWord(Token &Tok) {
285 if (isEnd())
286 return false;
287
288 Position SavedPos = Pos;
289
290 consumeWhitespace();
291 SmallString<32> WordText;
292 const char *WordBegin = Pos.BufferPtr;
293 SourceLocation Loc = getSourceLocation();
294 while (!isEnd()) {
295 const char C = peek();
296 if (!isWhitespace(c: C)) {
297 WordText.push_back(Elt: C);
298 consumeChar();
299 } else
300 break;
301 }
302 if (WordText.ends_with(C: ':'))
303 WordText.pop_back();
304 const unsigned Length = WordText.size();
305 if (Length == 0) {
306 Pos = SavedPos;
307 return false;
308 }
309
310 char *TextPtr = Allocator.Allocate<char>(Num: Length + 1);
311
312 memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + 1);
313 StringRef Text = StringRef(TextPtr, Length);
314
315 formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin, TokLength: Length, Text);
316 return true;
317 }
318
319 bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
320 if (isEnd())
321 return false;
322
323 Position SavedPos = Pos;
324
325 consumeWhitespace();
326 SmallString<32> WordText;
327 const char *WordBegin = Pos.BufferPtr;
328 SourceLocation Loc = getSourceLocation();
329 bool Error = false;
330 if (!isEnd()) {
331 const char C = peek();
332 if (C == OpenDelim) {
333 WordText.push_back(Elt: C);
334 consumeChar();
335 } else
336 Error = true;
337 }
338 char C = '\0';
339 while (!Error && !isEnd()) {
340 C = peek();
341 WordText.push_back(Elt: C);
342 consumeChar();
343 if (C == CloseDelim)
344 break;
345 }
346 if (!Error && C != CloseDelim)
347 Error = true;
348
349 if (Error) {
350 Pos = SavedPos;
351 return false;
352 }
353
354 const unsigned Length = WordText.size();
355 char *TextPtr = Allocator.Allocate<char>(Num: Length + 1);
356
357 memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + 1);
358 StringRef Text = StringRef(TextPtr, Length);
359
360 formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin,
361 TokLength: Pos.BufferPtr - WordBegin, Text);
362 return true;
363 }
364
365 /// Put back tokens that we didn't consume.
366 void putBackLeftoverTokens() {
367 if (isEnd())
368 return;
369
370 bool HavePartialTok = false;
371 Token PartialTok;
372 if (Pos.BufferPtr != Pos.BufferStart) {
373 formTokenWithChars(Result&: PartialTok, Loc: getSourceLocation(),
374 TokBegin: Pos.BufferPtr, TokLength: Pos.BufferEnd - Pos.BufferPtr,
375 Text: StringRef(Pos.BufferPtr,
376 Pos.BufferEnd - Pos.BufferPtr));
377 HavePartialTok = true;
378 Pos.CurToken++;
379 }
380
381 P.putBack(Toks: ArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
382 Pos.CurToken = Toks.size();
383
384 if (HavePartialTok)
385 P.putBack(OldTok: PartialTok);
386 }
387};
388
389Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
390 const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
391 const CommandTraits &Traits):
392 L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
393 Traits(Traits) {
394 consumeToken();
395}
396
397void Parser::parseParamCommandArgs(ParamCommandComment *PC,
398 TextTokenRetokenizer &Retokenizer) {
399 Token Arg;
400 // Check if argument looks like direction specification: [dir]
401 // e.g., [in], [out], [in,out]
402 if (Retokenizer.lexDelimitedSeq(Tok&: Arg, OpenDelim: '[', CloseDelim: ']'))
403 S.actOnParamCommandDirectionArg(Command: PC,
404 ArgLocBegin: Arg.getLocation(),
405 ArgLocEnd: Arg.getEndLocation(),
406 Arg: Arg.getText());
407
408 if (Retokenizer.lexWord(Tok&: Arg))
409 S.actOnParamCommandParamNameArg(Command: PC,
410 ArgLocBegin: Arg.getLocation(),
411 ArgLocEnd: Arg.getEndLocation(),
412 Arg: Arg.getText());
413}
414
415void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
416 TextTokenRetokenizer &Retokenizer) {
417 Token Arg;
418 if (Retokenizer.lexWord(Tok&: Arg))
419 S.actOnTParamCommandParamNameArg(Command: TPC,
420 ArgLocBegin: Arg.getLocation(),
421 ArgLocEnd: Arg.getEndLocation(),
422 Arg: Arg.getText());
423}
424
425ArrayRef<Comment::Argument>
426Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {
427 auto *Args = new (Allocator.Allocate<Comment::Argument>(Num: NumArgs))
428 Comment::Argument[NumArgs];
429 unsigned ParsedArgs = 0;
430 Token Arg;
431 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Tok&: Arg)) {
432 Args[ParsedArgs] = Comment::Argument{
433 .Range: SourceRange(Arg.getLocation(), Arg.getEndLocation()), .Text: Arg.getText()};
434 ParsedArgs++;
435 }
436
437 return ArrayRef(Args, ParsedArgs);
438}
439
440ArrayRef<Comment::Argument>
441Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,
442 unsigned NumArgs) {
443 auto *Args = new (Allocator.Allocate<Comment::Argument>(Num: NumArgs))
444 Comment::Argument[NumArgs];
445 unsigned ParsedArgs = 0;
446 Token Arg;
447
448 while (ParsedArgs < NumArgs && Retokenizer.lexType(Tok&: Arg)) {
449 Args[ParsedArgs] = Comment::Argument{
450 .Range: SourceRange(Arg.getLocation(), Arg.getEndLocation()), .Text: Arg.getText()};
451 ParsedArgs++;
452 }
453
454 return ArrayRef(Args, ParsedArgs);
455}
456
457ArrayRef<Comment::Argument>
458Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer,
459 unsigned NumArgs) {
460 assert(NumArgs > 0);
461 auto *Args = new (Allocator.Allocate<Comment::Argument>(Num: NumArgs))
462 Comment::Argument[NumArgs];
463 unsigned ParsedArgs = 0;
464 Token Arg;
465
466 while (ParsedArgs < NumArgs && Retokenizer.lexParHeading(Tok&: Arg)) {
467 Args[ParsedArgs] = Comment::Argument{
468 .Range: SourceRange(Arg.getLocation(), Arg.getEndLocation()), .Text: Arg.getText()};
469 ParsedArgs++;
470 }
471
472 return ArrayRef(Args, ParsedArgs);
473}
474
475BlockCommandComment *Parser::parseBlockCommand() {
476 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
477
478 ParamCommandComment *PC = nullptr;
479 TParamCommandComment *TPC = nullptr;
480 BlockCommandComment *BC = nullptr;
481 const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getCommandID());
482 CommandMarkerKind CommandMarker =
483 Tok.is(K: tok::backslash_command) ? CMK_Backslash : CMK_At;
484 if (Info->IsParamCommand) {
485 PC = S.actOnParamCommandStart(LocBegin: Tok.getLocation(),
486 LocEnd: Tok.getEndLocation(),
487 CommandID: Tok.getCommandID(),
488 CommandMarker);
489 } else if (Info->IsTParamCommand) {
490 TPC = S.actOnTParamCommandStart(LocBegin: Tok.getLocation(),
491 LocEnd: Tok.getEndLocation(),
492 CommandID: Tok.getCommandID(),
493 CommandMarker);
494 } else {
495 BC = S.actOnBlockCommandStart(LocBegin: Tok.getLocation(),
496 LocEnd: Tok.getEndLocation(),
497 CommandID: Tok.getCommandID(),
498 CommandMarker);
499 }
500 consumeToken();
501
502 if (isTokBlockCommand()) {
503 // Block command ahead. We can't nest block commands, so pretend that this
504 // command has an empty argument.
505 ParagraphComment *Paragraph = S.actOnParagraphComment(Content: {});
506 if (PC) {
507 S.actOnParamCommandFinish(Command: PC, Paragraph);
508 return PC;
509 } else if (TPC) {
510 S.actOnTParamCommandFinish(Command: TPC, Paragraph);
511 return TPC;
512 } else {
513 S.actOnBlockCommandFinish(Command: BC, Paragraph);
514 return BC;
515 }
516 }
517
518 if (PC || TPC || Info->NumArgs > 0) {
519 // In order to parse command arguments we need to retokenize a few
520 // following text tokens.
521 TextTokenRetokenizer Retokenizer(Allocator, *this);
522
523 if (PC)
524 parseParamCommandArgs(PC, Retokenizer);
525 else if (TPC)
526 parseTParamCommandArgs(TPC, Retokenizer);
527 else if (Info->IsThrowsCommand)
528 S.actOnBlockCommandArgs(
529 Command: BC, Args: parseThrowCommandArgs(Retokenizer, NumArgs: Info->NumArgs));
530 else if (Info->IsParCommand)
531 S.actOnBlockCommandArgs(Command: BC,
532 Args: parseParCommandArgs(Retokenizer, NumArgs: Info->NumArgs));
533 else
534 S.actOnBlockCommandArgs(Command: BC, Args: parseCommandArgs(Retokenizer, NumArgs: Info->NumArgs));
535
536 Retokenizer.putBackLeftoverTokens();
537 }
538
539 // If there's a block command ahead, we will attach an empty paragraph to
540 // this command.
541 bool EmptyParagraph = false;
542 if (isTokBlockCommand())
543 EmptyParagraph = true;
544 else if (Tok.is(K: tok::newline)) {
545 Token PrevTok = Tok;
546 consumeToken();
547 EmptyParagraph = isTokBlockCommand();
548 putBack(OldTok: PrevTok);
549 }
550
551 ParagraphComment *Paragraph;
552 if (EmptyParagraph)
553 Paragraph = S.actOnParagraphComment(Content: {});
554 else {
555 BlockContentComment *Block = parseParagraphOrBlockCommand();
556 // Since we have checked for a block command, we should have parsed a
557 // paragraph.
558 Paragraph = cast<ParagraphComment>(Val: Block);
559 }
560
561 if (PC) {
562 S.actOnParamCommandFinish(Command: PC, Paragraph);
563 return PC;
564 } else if (TPC) {
565 S.actOnTParamCommandFinish(Command: TPC, Paragraph);
566 return TPC;
567 } else {
568 S.actOnBlockCommandFinish(Command: BC, Paragraph);
569 return BC;
570 }
571}
572
573InlineCommandComment *Parser::parseInlineCommand() {
574 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
575 CommandMarkerKind CMK =
576 Tok.is(K: tok::backslash_command) ? CMK_Backslash : CMK_At;
577 const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getCommandID());
578
579 const Token CommandTok = Tok;
580 consumeToken();
581
582 TextTokenRetokenizer Retokenizer(Allocator, *this);
583 ArrayRef<Comment::Argument> Args =
584 parseCommandArgs(Retokenizer, NumArgs: Info->NumArgs);
585
586 InlineCommandComment *IC = S.actOnInlineCommand(
587 CommandLocBegin: CommandTok.getLocation(), CommandLocEnd: CommandTok.getEndLocation(),
588 CommandID: CommandTok.getCommandID(), CommandMarker: CMK, Args);
589
590 if (Args.size() < Info->NumArgs) {
591 Diag(Loc: CommandTok.getEndLocation().getLocWithOffset(Offset: 1),
592 DiagID: diag::warn_doc_inline_command_not_enough_arguments)
593 << CommandTok.is(K: tok::at_command) << Info->Name << Args.size()
594 << Info->NumArgs
595 << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation());
596 }
597
598 Retokenizer.putBackLeftoverTokens();
599
600 return IC;
601}
602
603HTMLStartTagComment *Parser::parseHTMLStartTag() {
604 assert(Tok.is(tok::html_start_tag));
605 HTMLStartTagComment *HST =
606 S.actOnHTMLStartTagStart(LocBegin: Tok.getLocation(),
607 TagName: Tok.getHTMLTagStartName());
608 consumeToken();
609
610 SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
611 while (true) {
612 switch (Tok.getKind()) {
613 case tok::html_ident: {
614 Token Ident = Tok;
615 consumeToken();
616 if (Tok.isNot(K: tok::html_equals)) {
617 Attrs.push_back(Elt: HTMLStartTagComment::Attribute(Ident.getLocation(),
618 Ident.getHTMLIdent()));
619 continue;
620 }
621 Token Equals = Tok;
622 consumeToken();
623 if (Tok.isNot(K: tok::html_quoted_string)) {
624 Diag(Loc: Tok.getLocation(),
625 DiagID: diag::warn_doc_html_start_tag_expected_quoted_string)
626 << SourceRange(Equals.getLocation());
627 Attrs.push_back(Elt: HTMLStartTagComment::Attribute(Ident.getLocation(),
628 Ident.getHTMLIdent()));
629 while (Tok.is(K: tok::html_equals) ||
630 Tok.is(K: tok::html_quoted_string))
631 consumeToken();
632 continue;
633 }
634 Attrs.push_back(Elt: HTMLStartTagComment::Attribute(
635 Ident.getLocation(),
636 Ident.getHTMLIdent(),
637 Equals.getLocation(),
638 SourceRange(Tok.getLocation(),
639 Tok.getEndLocation()),
640 Tok.getHTMLQuotedString()));
641 consumeToken();
642 continue;
643 }
644
645 case tok::html_greater:
646 S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: ArrayRef(Attrs)),
647 GreaterLoc: Tok.getLocation(),
648 /* IsSelfClosing = */ false);
649 consumeToken();
650 return HST;
651
652 case tok::html_slash_greater:
653 S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: ArrayRef(Attrs)),
654 GreaterLoc: Tok.getLocation(),
655 /* IsSelfClosing = */ true);
656 consumeToken();
657 return HST;
658
659 case tok::html_equals:
660 case tok::html_quoted_string:
661 Diag(Loc: Tok.getLocation(),
662 DiagID: diag::warn_doc_html_start_tag_expected_ident_or_greater);
663 while (Tok.is(K: tok::html_equals) ||
664 Tok.is(K: tok::html_quoted_string))
665 consumeToken();
666 if (Tok.is(K: tok::html_ident) ||
667 Tok.is(K: tok::html_greater) ||
668 Tok.is(K: tok::html_slash_greater))
669 continue;
670
671 S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: ArrayRef(Attrs)),
672 GreaterLoc: SourceLocation(),
673 /* IsSelfClosing = */ false);
674 return HST;
675
676 default:
677 // Not a token from an HTML start tag. Thus HTML tag prematurely ended.
678 S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: ArrayRef(Attrs)),
679 GreaterLoc: SourceLocation(),
680 /* IsSelfClosing = */ false);
681 bool StartLineInvalid;
682 const unsigned StartLine = SourceMgr.getPresumedLineNumber(
683 Loc: HST->getLocation(),
684 Invalid: &StartLineInvalid);
685 bool EndLineInvalid;
686 const unsigned EndLine = SourceMgr.getPresumedLineNumber(
687 Loc: Tok.getLocation(),
688 Invalid: &EndLineInvalid);
689 if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
690 Diag(Loc: Tok.getLocation(),
691 DiagID: diag::warn_doc_html_start_tag_expected_ident_or_greater)
692 << HST->getSourceRange();
693 else {
694 Diag(Loc: Tok.getLocation(),
695 DiagID: diag::warn_doc_html_start_tag_expected_ident_or_greater);
696 Diag(Loc: HST->getLocation(), DiagID: diag::note_doc_html_tag_started_here)
697 << HST->getSourceRange();
698 }
699 return HST;
700 }
701 }
702}
703
704HTMLEndTagComment *Parser::parseHTMLEndTag() {
705 assert(Tok.is(tok::html_end_tag));
706 Token TokEndTag = Tok;
707 consumeToken();
708 SourceLocation Loc;
709 if (Tok.is(K: tok::html_greater)) {
710 Loc = Tok.getLocation();
711 consumeToken();
712 }
713
714 return S.actOnHTMLEndTag(LocBegin: TokEndTag.getLocation(),
715 LocEnd: Loc,
716 TagName: TokEndTag.getHTMLTagEndName());
717}
718
719BlockContentComment *Parser::parseParagraphOrBlockCommand() {
720 SmallVector<InlineContentComment *, 8> Content;
721
722 while (true) {
723 switch (Tok.getKind()) {
724 case tok::verbatim_block_begin:
725 case tok::verbatim_line_name:
726 case tok::eof:
727 break; // Block content or EOF ahead, finish this parapgaph.
728
729 case tok::unknown_backslash_command:
730 case tok::unknown_at_command:
731 Content.push_back(Elt: S.actOnUnknownCommand(
732 LocBegin: Tok.getLocation(), LocEnd: Tok.getEndLocation(), CommandName: Tok.getUnknownCommandName(),
733 CommandMarker: Tok.getKind() == tok::unknown_backslash_command ? CMK_Backslash
734 : CMK_At));
735 consumeToken();
736 continue;
737
738 case tok::backslash_command:
739 case tok::at_command: {
740 const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getCommandID());
741 if (Info->IsBlockCommand) {
742 if (Content.size() == 0)
743 return parseBlockCommand();
744 break; // Block command ahead, finish this parapgaph.
745 }
746 if (Info->IsVerbatimBlockEndCommand) {
747 Diag(Loc: Tok.getLocation(),
748 DiagID: diag::warn_verbatim_block_end_without_start)
749 << Tok.is(K: tok::at_command)
750 << Info->Name
751 << SourceRange(Tok.getLocation(), Tok.getEndLocation());
752 consumeToken();
753 continue;
754 }
755 if (Info->IsUnknownCommand) {
756 Content.push_back(Elt: S.actOnUnknownCommand(
757 LocBegin: Tok.getLocation(), LocEnd: Tok.getEndLocation(), CommandID: Info->getID(),
758 CommandMarker: Tok.getKind() == tok::backslash_command ? CMK_Backslash : CMK_At));
759 consumeToken();
760 continue;
761 }
762 assert(Info->IsInlineCommand);
763 Content.push_back(Elt: parseInlineCommand());
764 continue;
765 }
766
767 case tok::newline: {
768 consumeToken();
769 if (Tok.is(K: tok::newline) || Tok.is(K: tok::eof)) {
770 consumeToken();
771 break; // Two newlines -- end of paragraph.
772 }
773 // Also allow [tok::newline, tok::text, tok::newline] if the middle
774 // tok::text is just whitespace.
775 if (Tok.is(K: tok::text) && isWhitespace(S: Tok.getText())) {
776 Token WhitespaceTok = Tok;
777 consumeToken();
778 if (Tok.is(K: tok::newline) || Tok.is(K: tok::eof)) {
779 consumeToken();
780 break;
781 }
782 // We have [tok::newline, tok::text, non-newline]. Put back tok::text.
783 putBack(OldTok: WhitespaceTok);
784 }
785 if (Content.size() > 0)
786 Content.back()->addTrailingNewline();
787 continue;
788 }
789
790 // Don't deal with HTML tag soup now.
791 case tok::html_start_tag:
792 Content.push_back(Elt: parseHTMLStartTag());
793 continue;
794
795 case tok::html_end_tag:
796 Content.push_back(Elt: parseHTMLEndTag());
797 continue;
798
799 case tok::text:
800 Content.push_back(Elt: S.actOnText(LocBegin: Tok.getLocation(),
801 LocEnd: Tok.getEndLocation(),
802 Text: Tok.getText()));
803 consumeToken();
804 continue;
805
806 case tok::verbatim_block_line:
807 case tok::verbatim_block_end:
808 case tok::verbatim_line_text:
809 case tok::html_ident:
810 case tok::html_equals:
811 case tok::html_quoted_string:
812 case tok::html_greater:
813 case tok::html_slash_greater:
814 llvm_unreachable("should not see this token");
815 }
816 break;
817 }
818
819 return S.actOnParagraphComment(Content: S.copyArray(Source: ArrayRef(Content)));
820}
821
822VerbatimBlockComment *Parser::parseVerbatimBlock() {
823 assert(Tok.is(tok::verbatim_block_begin));
824
825 VerbatimBlockComment *VB =
826 S.actOnVerbatimBlockStart(Loc: Tok.getLocation(),
827 CommandID: Tok.getVerbatimBlockID());
828 consumeToken();
829
830 // Don't create an empty line if verbatim opening command is followed
831 // by a newline.
832 if (Tok.is(K: tok::newline))
833 consumeToken();
834
835 SmallVector<VerbatimBlockLineComment *, 8> Lines;
836 while (Tok.is(K: tok::verbatim_block_line) ||
837 Tok.is(K: tok::newline)) {
838 VerbatimBlockLineComment *Line;
839 if (Tok.is(K: tok::verbatim_block_line)) {
840 Line = S.actOnVerbatimBlockLine(Loc: Tok.getLocation(),
841 Text: Tok.getVerbatimBlockText());
842 consumeToken();
843 if (Tok.is(K: tok::newline)) {
844 consumeToken();
845 }
846 } else {
847 // Empty line, just a tok::newline.
848 Line = S.actOnVerbatimBlockLine(Loc: Tok.getLocation(), Text: "");
849 consumeToken();
850 }
851 Lines.push_back(Elt: Line);
852 }
853
854 if (Tok.is(K: tok::verbatim_block_end)) {
855 const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getVerbatimBlockID());
856 S.actOnVerbatimBlockFinish(Block: VB, CloseNameLocBegin: Tok.getLocation(), CloseName: Info->Name,
857 Lines: S.copyArray(Source: ArrayRef(Lines)));
858 consumeToken();
859 } else {
860 // Unterminated \\verbatim block
861 S.actOnVerbatimBlockFinish(Block: VB, CloseNameLocBegin: SourceLocation(), CloseName: "",
862 Lines: S.copyArray(Source: ArrayRef(Lines)));
863 }
864
865 return VB;
866}
867
868VerbatimLineComment *Parser::parseVerbatimLine() {
869 assert(Tok.is(tok::verbatim_line_name));
870
871 Token NameTok = Tok;
872 consumeToken();
873
874 SourceLocation TextBegin;
875 StringRef Text;
876 // Next token might not be a tok::verbatim_line_text if verbatim line
877 // starting command comes just before a newline or comment end.
878 if (Tok.is(K: tok::verbatim_line_text)) {
879 TextBegin = Tok.getLocation();
880 Text = Tok.getVerbatimLineText();
881 } else {
882 TextBegin = NameTok.getEndLocation();
883 Text = "";
884 }
885
886 VerbatimLineComment *VL = S.actOnVerbatimLine(LocBegin: NameTok.getLocation(),
887 CommandID: NameTok.getVerbatimLineID(),
888 TextBegin,
889 Text);
890 consumeToken();
891 return VL;
892}
893
894BlockContentComment *Parser::parseBlockContent() {
895 switch (Tok.getKind()) {
896 case tok::text:
897 case tok::unknown_backslash_command:
898 case tok::unknown_at_command:
899 case tok::backslash_command:
900 case tok::at_command:
901 case tok::html_start_tag:
902 case tok::html_end_tag:
903 return parseParagraphOrBlockCommand();
904
905 case tok::verbatim_block_begin:
906 return parseVerbatimBlock();
907
908 case tok::verbatim_line_name:
909 return parseVerbatimLine();
910
911 case tok::eof:
912 case tok::newline:
913 case tok::verbatim_block_line:
914 case tok::verbatim_block_end:
915 case tok::verbatim_line_text:
916 case tok::html_ident:
917 case tok::html_equals:
918 case tok::html_quoted_string:
919 case tok::html_greater:
920 case tok::html_slash_greater:
921 llvm_unreachable("should not see this token");
922 }
923 llvm_unreachable("bogus token kind");
924}
925
926FullComment *Parser::parseFullComment() {
927 // Skip newlines at the beginning of the comment.
928 while (Tok.is(K: tok::newline))
929 consumeToken();
930
931 SmallVector<BlockContentComment *, 8> Blocks;
932 while (Tok.isNot(K: tok::eof)) {
933 Blocks.push_back(Elt: parseBlockContent());
934
935 // Skip extra newlines after paragraph end.
936 while (Tok.is(K: tok::newline))
937 consumeToken();
938 }
939 return S.actOnFullComment(Blocks: S.copyArray(Source: ArrayRef(Blocks)));
940}
941
942} // end namespace comments
943} // end namespace clang
944