1//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/AST/CommentParser.h"
10#include "clang/AST/Comment.h"
11#include "clang/AST/CommentCommandTraits.h"
12#include "clang/AST/CommentSema.h"
13#include "clang/Basic/CharInfo.h"
14#include "clang/Basic/DiagnosticComment.h"
15#include "clang/Basic/SourceManager.h"
16#include "llvm/Support/ErrorHandling.h"
17
18namespace clang {
19
20static inline bool isWhitespace(llvm::StringRef S) {
21 for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
22 if (!isWhitespace(c: *I))
23 return false;
24 }
25 return true;
26}
27
28namespace comments {
29
30/// Re-lexes a sequence of tok::text tokens.
31class TextTokenRetokenizer {
32 llvm::BumpPtrAllocator &Allocator;
33 Parser &P;
34
35 /// This flag is set when there are no more tokens we can fetch from lexer.
36 bool NoMoreInterestingTokens;
37
38 /// Token buffer: tokens we have processed and lookahead.
39 SmallVector<Token, 16> Toks;
40
41 /// A position in \c Toks.
42 struct Position {
43 const char *BufferStart;
44 const char *BufferEnd;
45 const char *BufferPtr;
46 SourceLocation BufferStartLoc;
47 unsigned CurToken;
48 };
49
50 /// Current position in Toks.
51 Position Pos;
52
53 bool isEnd() const {
54 return Pos.CurToken >= Toks.size();
55 }
56
57 /// Sets up the buffer pointers to point to current token.
58 void setupBuffer() {
59 assert(!isEnd());
60 const Token &Tok = Toks[Pos.CurToken];
61
62 Pos.BufferStart = Tok.getText().begin();
63 Pos.BufferEnd = Tok.getText().end();
64 Pos.BufferPtr = Pos.BufferStart;
65 Pos.BufferStartLoc = Tok.getLocation();
66 }
67
68 SourceLocation getSourceLocation() const {
69 const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
70 return Pos.BufferStartLoc.getLocWithOffset(Offset: CharNo);
71 }
72
73 char peek() const {
74 assert(!isEnd());
75 assert(Pos.BufferPtr != Pos.BufferEnd);
76 return *Pos.BufferPtr;
77 }
78
79 void consumeChar() {
80 assert(!isEnd());
81 assert(Pos.BufferPtr != Pos.BufferEnd);
82 Pos.BufferPtr++;
83 if (Pos.BufferPtr == Pos.BufferEnd) {
84 Pos.CurToken++;
85 if (isEnd() && !addToken())
86 return;
87
88 assert(!isEnd());
89 setupBuffer();
90 }
91 }
92
93 /// Extract a template type
94 bool lexTemplate(SmallString<32> &WordText) {
95 unsigned BracketCount = 0;
96 while (!isEnd()) {
97 const char C = peek();
98 WordText.push_back(Elt: C);
99 consumeChar();
100 switch (C) {
101 case '<': {
102 BracketCount++;
103 break;
104 }
105 case '>': {
106 BracketCount--;
107 if (!BracketCount)
108 return true;
109 break;
110 }
111 default:
112 break;
113 }
114 }
115 return false;
116 }
117
118 /// Add a token.
119 /// Returns true on success, false if there are no interesting tokens to
120 /// fetch from lexer.
121 bool addToken() {
122 if (NoMoreInterestingTokens)
123 return false;
124
125 if (P.Tok.is(K: tok::newline)) {
126 // If we see a single newline token between text tokens, skip it.
127 Token Newline = P.Tok;
128 P.consumeToken();
129 if (P.Tok.isNot(K: tok::text)) {
130 P.putBack(OldTok: Newline);
131 NoMoreInterestingTokens = true;
132 return false;
133 }
134 }
135 if (P.Tok.isNot(K: tok::text)) {
136 NoMoreInterestingTokens = true;
137 return false;
138 }
139
140 Toks.push_back(Elt: P.Tok);
141 P.consumeToken();
142 if (Toks.size() == 1)
143 setupBuffer();
144 return true;
145 }
146
147 void consumeWhitespace() {
148 while (!isEnd()) {
149 if (isWhitespace(c: peek()))
150 consumeChar();
151 else
152 break;
153 }
154 }
155
156 void formTokenWithChars(Token &Result,
157 SourceLocation Loc,
158 const char *TokBegin,
159 unsigned TokLength,
160 StringRef Text) {
161 Result.setLocation(Loc);
162 Result.setKind(tok::text);
163 Result.setLength(TokLength);
164#ifndef NDEBUG
165 Result.TextPtr = "<UNSET>";
166 Result.IntVal = 7;
167#endif
168 Result.setText(Text);
169 }
170
171public:
172 TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
173 Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
174 Pos.CurToken = 0;
175 addToken();
176 }
177
178 /// Extract a type argument
179 bool lexType(Token &Tok) {
180 if (isEnd())
181 return false;
182
183 // Save current position in case we need to rollback because the type is
184 // empty.
185 Position SavedPos = Pos;
186
187 // Consume any leading whitespace.
188 consumeWhitespace();
189 SmallString<32> WordText;
190 const char *WordBegin = Pos.BufferPtr;
191 SourceLocation Loc = getSourceLocation();
192
193 while (!isEnd()) {
194 const char C = peek();
195 // For non-whitespace characters we check if it's a template or otherwise
196 // continue reading the text into a word.
197 if (!isWhitespace(c: C)) {
198 if (C == '<') {
199 if (!lexTemplate(WordText))
200 return false;
201 } else {
202 WordText.push_back(Elt: C);
203 consumeChar();
204 }
205 } else {
206 consumeChar();
207 break;
208 }
209 }
210
211 const unsigned Length = WordText.size();
212 if (Length == 0) {
213 Pos = SavedPos;
214 return false;
215 }
216
217 char *TextPtr = Allocator.Allocate<char>(Num: Length + 1);
218
219 memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + 1);
220 StringRef Text = StringRef(TextPtr, Length);
221
222 formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin, TokLength: Length, Text);
223 return true;
224 }
225
226 // Check if this line starts with @par or \par
227 bool startsWithParCommand() {
228 unsigned Offset = 1;
229
230 // Skip all whitespace characters at the beginning.
231 // This needs to backtrack because Pos has already advanced past the
232 // actual \par or @par command by the time this function is called.
233 while (isWhitespace(c: *(Pos.BufferPtr - Offset)))
234 Offset++;
235
236 // Once we've reached the whitespace, backtrack and check if the previous
237 // four characters are \par or @par.
238 llvm::StringRef LineStart(Pos.BufferPtr - Offset - 3, 4);
239 return LineStart.starts_with(Prefix: "\\par") || LineStart.starts_with(Prefix: "@par");
240 }
241
242 /// Extract a par command argument-header.
243 bool lexParHeading(Token &Tok) {
244 if (isEnd())
245 return false;
246
247 Position SavedPos = Pos;
248
249 consumeWhitespace();
250 SmallString<32> WordText;
251 const char *WordBegin = Pos.BufferPtr;
252 SourceLocation Loc = getSourceLocation();
253
254 if (!startsWithParCommand())
255 return false;
256
257 // Read until the end of this token, which is effectively the end of the
258 // line. This gets us the content of the par header, if there is one.
259 while (!isEnd()) {
260 WordText.push_back(Elt: peek());
261 if (Pos.BufferPtr + 1 == Pos.BufferEnd) {
262 consumeChar();
263 break;
264 }
265 consumeChar();
266 }
267
268 unsigned Length = WordText.size();
269 if (Length == 0) {
270 Pos = SavedPos;
271 return false;
272 }
273
274 char *TextPtr = Allocator.Allocate<char>(Num: Length + 1);
275
276 memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + 1);
277 StringRef Text = StringRef(TextPtr, Length);
278
279 formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin, TokLength: Length, Text);
280 return true;
281 }
282
283 /// Extract a word -- sequence of non-whitespace characters.
284 bool lexWord(Token &Tok) {
285 if (isEnd())
286 return false;
287
288 Position SavedPos = Pos;
289
290 consumeWhitespace();
291 SmallString<32> WordText;
292 const char *WordBegin = Pos.BufferPtr;
293 SourceLocation Loc = getSourceLocation();
294 while (!isEnd()) {
295 const char C = peek();
296 if (!isWhitespace(c: C)) {
297 WordText.push_back(Elt: C);
298 consumeChar();
299 } else
300 break;
301 }
302 const unsigned Length = WordText.size();
303 if (Length == 0) {
304 Pos = SavedPos;
305 return false;
306 }
307
308 char *TextPtr = Allocator.Allocate<char>(Num: Length + 1);
309
310 memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + 1);
311 StringRef Text = StringRef(TextPtr, Length);
312
313 formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin, TokLength: Length, Text);
314 return true;
315 }
316
317 bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
318 if (isEnd())
319 return false;
320
321 Position SavedPos = Pos;
322
323 consumeWhitespace();
324 SmallString<32> WordText;
325 const char *WordBegin = Pos.BufferPtr;
326 SourceLocation Loc = getSourceLocation();
327 bool Error = false;
328 if (!isEnd()) {
329 const char C = peek();
330 if (C == OpenDelim) {
331 WordText.push_back(Elt: C);
332 consumeChar();
333 } else
334 Error = true;
335 }
336 char C = '\0';
337 while (!Error && !isEnd()) {
338 C = peek();
339 WordText.push_back(Elt: C);
340 consumeChar();
341 if (C == CloseDelim)
342 break;
343 }
344 if (!Error && C != CloseDelim)
345 Error = true;
346
347 if (Error) {
348 Pos = SavedPos;
349 return false;
350 }
351
352 const unsigned Length = WordText.size();
353 char *TextPtr = Allocator.Allocate<char>(Num: Length + 1);
354
355 memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + 1);
356 StringRef Text = StringRef(TextPtr, Length);
357
358 formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin,
359 TokLength: Pos.BufferPtr - WordBegin, Text);
360 return true;
361 }
362
363 /// Put back tokens that we didn't consume.
364 void putBackLeftoverTokens() {
365 if (isEnd())
366 return;
367
368 bool HavePartialTok = false;
369 Token PartialTok;
370 if (Pos.BufferPtr != Pos.BufferStart) {
371 formTokenWithChars(Result&: PartialTok, Loc: getSourceLocation(),
372 TokBegin: Pos.BufferPtr, TokLength: Pos.BufferEnd - Pos.BufferPtr,
373 Text: StringRef(Pos.BufferPtr,
374 Pos.BufferEnd - Pos.BufferPtr));
375 HavePartialTok = true;
376 Pos.CurToken++;
377 }
378
379 P.putBack(Toks: ArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
380 Pos.CurToken = Toks.size();
381
382 if (HavePartialTok)
383 P.putBack(OldTok: PartialTok);
384 }
385};
386
387Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
388 const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
389 const CommandTraits &Traits):
390 L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
391 Traits(Traits) {
392 consumeToken();
393}
394
395void Parser::parseParamCommandArgs(ParamCommandComment *PC,
396 TextTokenRetokenizer &Retokenizer) {
397 Token Arg;
398 // Check if argument looks like direction specification: [dir]
399 // e.g., [in], [out], [in,out]
400 if (Retokenizer.lexDelimitedSeq(Tok&: Arg, OpenDelim: '[', CloseDelim: ']'))
401 S.actOnParamCommandDirectionArg(Command: PC,
402 ArgLocBegin: Arg.getLocation(),
403 ArgLocEnd: Arg.getEndLocation(),
404 Arg: Arg.getText());
405
406 if (Retokenizer.lexWord(Tok&: Arg))
407 S.actOnParamCommandParamNameArg(Command: PC,
408 ArgLocBegin: Arg.getLocation(),
409 ArgLocEnd: Arg.getEndLocation(),
410 Arg: Arg.getText());
411}
412
413void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
414 TextTokenRetokenizer &Retokenizer) {
415 Token Arg;
416 if (Retokenizer.lexWord(Tok&: Arg))
417 S.actOnTParamCommandParamNameArg(Command: TPC,
418 ArgLocBegin: Arg.getLocation(),
419 ArgLocEnd: Arg.getEndLocation(),
420 Arg: Arg.getText());
421}
422
423ArrayRef<Comment::Argument>
424Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {
425 auto *Args = new (Allocator.Allocate<Comment::Argument>(Num: NumArgs))
426 Comment::Argument[NumArgs];
427 unsigned ParsedArgs = 0;
428 Token Arg;
429 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Tok&: Arg)) {
430 Args[ParsedArgs] = Comment::Argument{
431 .Range: SourceRange(Arg.getLocation(), Arg.getEndLocation()), .Text: Arg.getText()};
432 ParsedArgs++;
433 }
434
435 return ArrayRef(Args, ParsedArgs);
436}
437
438ArrayRef<Comment::Argument>
439Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,
440 unsigned NumArgs) {
441 auto *Args = new (Allocator.Allocate<Comment::Argument>(Num: NumArgs))
442 Comment::Argument[NumArgs];
443 unsigned ParsedArgs = 0;
444 Token Arg;
445
446 while (ParsedArgs < NumArgs && Retokenizer.lexType(Tok&: Arg)) {
447 Args[ParsedArgs] = Comment::Argument{
448 .Range: SourceRange(Arg.getLocation(), Arg.getEndLocation()), .Text: Arg.getText()};
449 ParsedArgs++;
450 }
451
452 return ArrayRef(Args, ParsedArgs);
453}
454
455ArrayRef<Comment::Argument>
456Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer,
457 unsigned NumArgs) {
458 assert(NumArgs > 0);
459 auto *Args = new (Allocator.Allocate<Comment::Argument>(Num: NumArgs))
460 Comment::Argument[NumArgs];
461 unsigned ParsedArgs = 0;
462 Token Arg;
463
464 while (ParsedArgs < NumArgs && Retokenizer.lexParHeading(Tok&: Arg)) {
465 Args[ParsedArgs] = Comment::Argument{
466 .Range: SourceRange(Arg.getLocation(), Arg.getEndLocation()), .Text: Arg.getText()};
467 ParsedArgs++;
468 }
469
470 return ArrayRef(Args, ParsedArgs);
471}
472
473BlockCommandComment *Parser::parseBlockCommand() {
474 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
475
476 ParamCommandComment *PC = nullptr;
477 TParamCommandComment *TPC = nullptr;
478 BlockCommandComment *BC = nullptr;
479 const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getCommandID());
480 CommandMarkerKind CommandMarker =
481 Tok.is(K: tok::backslash_command) ? CMK_Backslash : CMK_At;
482 if (Info->IsParamCommand) {
483 PC = S.actOnParamCommandStart(LocBegin: Tok.getLocation(),
484 LocEnd: Tok.getEndLocation(),
485 CommandID: Tok.getCommandID(),
486 CommandMarker);
487 } else if (Info->IsTParamCommand) {
488 TPC = S.actOnTParamCommandStart(LocBegin: Tok.getLocation(),
489 LocEnd: Tok.getEndLocation(),
490 CommandID: Tok.getCommandID(),
491 CommandMarker);
492 } else {
493 BC = S.actOnBlockCommandStart(LocBegin: Tok.getLocation(),
494 LocEnd: Tok.getEndLocation(),
495 CommandID: Tok.getCommandID(),
496 CommandMarker);
497 }
498 consumeToken();
499
500 if (isTokBlockCommand()) {
501 // Block command ahead. We can't nest block commands, so pretend that this
502 // command has an empty argument.
503 ParagraphComment *Paragraph = S.actOnParagraphComment(Content: {});
504 if (PC) {
505 S.actOnParamCommandFinish(Command: PC, Paragraph);
506 return PC;
507 } else if (TPC) {
508 S.actOnTParamCommandFinish(Command: TPC, Paragraph);
509 return TPC;
510 } else {
511 S.actOnBlockCommandFinish(Command: BC, Paragraph);
512 return BC;
513 }
514 }
515
516 if (PC || TPC || Info->NumArgs > 0) {
517 // In order to parse command arguments we need to retokenize a few
518 // following text tokens.
519 TextTokenRetokenizer Retokenizer(Allocator, *this);
520
521 if (PC)
522 parseParamCommandArgs(PC, Retokenizer);
523 else if (TPC)
524 parseTParamCommandArgs(TPC, Retokenizer);
525 else if (Info->IsThrowsCommand)
526 S.actOnBlockCommandArgs(
527 Command: BC, Args: parseThrowCommandArgs(Retokenizer, NumArgs: Info->NumArgs));
528 else if (Info->IsParCommand)
529 S.actOnBlockCommandArgs(Command: BC,
530 Args: parseParCommandArgs(Retokenizer, NumArgs: Info->NumArgs));
531 else
532 S.actOnBlockCommandArgs(Command: BC, Args: parseCommandArgs(Retokenizer, NumArgs: Info->NumArgs));
533
534 Retokenizer.putBackLeftoverTokens();
535 }
536
537 // If there's a block command ahead, we will attach an empty paragraph to
538 // this command.
539 bool EmptyParagraph = false;
540 if (isTokBlockCommand())
541 EmptyParagraph = true;
542 else if (Tok.is(K: tok::newline)) {
543 Token PrevTok = Tok;
544 consumeToken();
545 EmptyParagraph = isTokBlockCommand();
546 putBack(OldTok: PrevTok);
547 }
548
549 ParagraphComment *Paragraph;
550 if (EmptyParagraph)
551 Paragraph = S.actOnParagraphComment(Content: {});
552 else {
553 BlockContentComment *Block = parseParagraphOrBlockCommand();
554 // Since we have checked for a block command, we should have parsed a
555 // paragraph.
556 Paragraph = cast<ParagraphComment>(Val: Block);
557 }
558
559 if (PC) {
560 S.actOnParamCommandFinish(Command: PC, Paragraph);
561 return PC;
562 } else if (TPC) {
563 S.actOnTParamCommandFinish(Command: TPC, Paragraph);
564 return TPC;
565 } else {
566 S.actOnBlockCommandFinish(Command: BC, Paragraph);
567 return BC;
568 }
569}
570
571InlineCommandComment *Parser::parseInlineCommand() {
572 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
573 CommandMarkerKind CMK =
574 Tok.is(K: tok::backslash_command) ? CMK_Backslash : CMK_At;
575 const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getCommandID());
576
577 const Token CommandTok = Tok;
578 consumeToken();
579
580 TextTokenRetokenizer Retokenizer(Allocator, *this);
581 ArrayRef<Comment::Argument> Args =
582 parseCommandArgs(Retokenizer, NumArgs: Info->NumArgs);
583
584 InlineCommandComment *IC = S.actOnInlineCommand(
585 CommandLocBegin: CommandTok.getLocation(), CommandLocEnd: CommandTok.getEndLocation(),
586 CommandID: CommandTok.getCommandID(), CommandMarker: CMK, Args);
587
588 if (Args.size() < Info->NumArgs) {
589 Diag(Loc: CommandTok.getEndLocation().getLocWithOffset(Offset: 1),
590 DiagID: diag::warn_doc_inline_command_not_enough_arguments)
591 << CommandTok.is(K: tok::at_command) << Info->Name << Args.size()
592 << Info->NumArgs
593 << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation());
594 }
595
596 Retokenizer.putBackLeftoverTokens();
597
598 return IC;
599}
600
601HTMLStartTagComment *Parser::parseHTMLStartTag() {
602 assert(Tok.is(tok::html_start_tag));
603 HTMLStartTagComment *HST =
604 S.actOnHTMLStartTagStart(LocBegin: Tok.getLocation(),
605 TagName: Tok.getHTMLTagStartName());
606 consumeToken();
607
608 SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
609 while (true) {
610 switch (Tok.getKind()) {
611 case tok::html_ident: {
612 Token Ident = Tok;
613 consumeToken();
614 if (Tok.isNot(K: tok::html_equals)) {
615 Attrs.push_back(Elt: HTMLStartTagComment::Attribute(Ident.getLocation(),
616 Ident.getHTMLIdent()));
617 continue;
618 }
619 Token Equals = Tok;
620 consumeToken();
621 if (Tok.isNot(K: tok::html_quoted_string)) {
622 Diag(Loc: Tok.getLocation(),
623 DiagID: diag::warn_doc_html_start_tag_expected_quoted_string)
624 << SourceRange(Equals.getLocation());
625 Attrs.push_back(Elt: HTMLStartTagComment::Attribute(Ident.getLocation(),
626 Ident.getHTMLIdent()));
627 while (Tok.is(K: tok::html_equals) ||
628 Tok.is(K: tok::html_quoted_string))
629 consumeToken();
630 continue;
631 }
632 Attrs.push_back(Elt: HTMLStartTagComment::Attribute(
633 Ident.getLocation(),
634 Ident.getHTMLIdent(),
635 Equals.getLocation(),
636 SourceRange(Tok.getLocation(),
637 Tok.getEndLocation()),
638 Tok.getHTMLQuotedString()));
639 consumeToken();
640 continue;
641 }
642
643 case tok::html_greater:
644 S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: ArrayRef(Attrs)),
645 GreaterLoc: Tok.getLocation(),
646 /* IsSelfClosing = */ false);
647 consumeToken();
648 return HST;
649
650 case tok::html_slash_greater:
651 S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: ArrayRef(Attrs)),
652 GreaterLoc: Tok.getLocation(),
653 /* IsSelfClosing = */ true);
654 consumeToken();
655 return HST;
656
657 case tok::html_equals:
658 case tok::html_quoted_string:
659 Diag(Loc: Tok.getLocation(),
660 DiagID: diag::warn_doc_html_start_tag_expected_ident_or_greater);
661 while (Tok.is(K: tok::html_equals) ||
662 Tok.is(K: tok::html_quoted_string))
663 consumeToken();
664 if (Tok.is(K: tok::html_ident) ||
665 Tok.is(K: tok::html_greater) ||
666 Tok.is(K: tok::html_slash_greater))
667 continue;
668
669 S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: ArrayRef(Attrs)),
670 GreaterLoc: SourceLocation(),
671 /* IsSelfClosing = */ false);
672 return HST;
673
674 default:
675 // Not a token from an HTML start tag. Thus HTML tag prematurely ended.
676 S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: ArrayRef(Attrs)),
677 GreaterLoc: SourceLocation(),
678 /* IsSelfClosing = */ false);
679 bool StartLineInvalid;
680 const unsigned StartLine = SourceMgr.getPresumedLineNumber(
681 Loc: HST->getLocation(),
682 Invalid: &StartLineInvalid);
683 bool EndLineInvalid;
684 const unsigned EndLine = SourceMgr.getPresumedLineNumber(
685 Loc: Tok.getLocation(),
686 Invalid: &EndLineInvalid);
687 if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
688 Diag(Loc: Tok.getLocation(),
689 DiagID: diag::warn_doc_html_start_tag_expected_ident_or_greater)
690 << HST->getSourceRange();
691 else {
692 Diag(Loc: Tok.getLocation(),
693 DiagID: diag::warn_doc_html_start_tag_expected_ident_or_greater);
694 Diag(Loc: HST->getLocation(), DiagID: diag::note_doc_html_tag_started_here)
695 << HST->getSourceRange();
696 }
697 return HST;
698 }
699 }
700}
701
702HTMLEndTagComment *Parser::parseHTMLEndTag() {
703 assert(Tok.is(tok::html_end_tag));
704 Token TokEndTag = Tok;
705 consumeToken();
706 SourceLocation Loc;
707 if (Tok.is(K: tok::html_greater)) {
708 Loc = Tok.getLocation();
709 consumeToken();
710 }
711
712 return S.actOnHTMLEndTag(LocBegin: TokEndTag.getLocation(),
713 LocEnd: Loc,
714 TagName: TokEndTag.getHTMLTagEndName());
715}
716
717BlockContentComment *Parser::parseParagraphOrBlockCommand() {
718 SmallVector<InlineContentComment *, 8> Content;
719
720 while (true) {
721 switch (Tok.getKind()) {
722 case tok::verbatim_block_begin:
723 case tok::verbatim_line_name:
724 case tok::eof:
725 break; // Block content or EOF ahead, finish this parapgaph.
726
727 case tok::unknown_command:
728 Content.push_back(Elt: S.actOnUnknownCommand(LocBegin: Tok.getLocation(),
729 LocEnd: Tok.getEndLocation(),
730 CommandName: Tok.getUnknownCommandName()));
731 consumeToken();
732 continue;
733
734 case tok::backslash_command:
735 case tok::at_command: {
736 const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getCommandID());
737 if (Info->IsBlockCommand) {
738 if (Content.size() == 0)
739 return parseBlockCommand();
740 break; // Block command ahead, finish this parapgaph.
741 }
742 if (Info->IsVerbatimBlockEndCommand) {
743 Diag(Loc: Tok.getLocation(),
744 DiagID: diag::warn_verbatim_block_end_without_start)
745 << Tok.is(K: tok::at_command)
746 << Info->Name
747 << SourceRange(Tok.getLocation(), Tok.getEndLocation());
748 consumeToken();
749 continue;
750 }
751 if (Info->IsUnknownCommand) {
752 Content.push_back(Elt: S.actOnUnknownCommand(LocBegin: Tok.getLocation(),
753 LocEnd: Tok.getEndLocation(),
754 CommandID: Info->getID()));
755 consumeToken();
756 continue;
757 }
758 assert(Info->IsInlineCommand);
759 Content.push_back(Elt: parseInlineCommand());
760 continue;
761 }
762
763 case tok::newline: {
764 consumeToken();
765 if (Tok.is(K: tok::newline) || Tok.is(K: tok::eof)) {
766 consumeToken();
767 break; // Two newlines -- end of paragraph.
768 }
769 // Also allow [tok::newline, tok::text, tok::newline] if the middle
770 // tok::text is just whitespace.
771 if (Tok.is(K: tok::text) && isWhitespace(S: Tok.getText())) {
772 Token WhitespaceTok = Tok;
773 consumeToken();
774 if (Tok.is(K: tok::newline) || Tok.is(K: tok::eof)) {
775 consumeToken();
776 break;
777 }
778 // We have [tok::newline, tok::text, non-newline]. Put back tok::text.
779 putBack(OldTok: WhitespaceTok);
780 }
781 if (Content.size() > 0)
782 Content.back()->addTrailingNewline();
783 continue;
784 }
785
786 // Don't deal with HTML tag soup now.
787 case tok::html_start_tag:
788 Content.push_back(Elt: parseHTMLStartTag());
789 continue;
790
791 case tok::html_end_tag:
792 Content.push_back(Elt: parseHTMLEndTag());
793 continue;
794
795 case tok::text:
796 Content.push_back(Elt: S.actOnText(LocBegin: Tok.getLocation(),
797 LocEnd: Tok.getEndLocation(),
798 Text: Tok.getText()));
799 consumeToken();
800 continue;
801
802 case tok::verbatim_block_line:
803 case tok::verbatim_block_end:
804 case tok::verbatim_line_text:
805 case tok::html_ident:
806 case tok::html_equals:
807 case tok::html_quoted_string:
808 case tok::html_greater:
809 case tok::html_slash_greater:
810 llvm_unreachable("should not see this token");
811 }
812 break;
813 }
814
815 return S.actOnParagraphComment(Content: S.copyArray(Source: ArrayRef(Content)));
816}
817
818VerbatimBlockComment *Parser::parseVerbatimBlock() {
819 assert(Tok.is(tok::verbatim_block_begin));
820
821 VerbatimBlockComment *VB =
822 S.actOnVerbatimBlockStart(Loc: Tok.getLocation(),
823 CommandID: Tok.getVerbatimBlockID());
824 consumeToken();
825
826 // Don't create an empty line if verbatim opening command is followed
827 // by a newline.
828 if (Tok.is(K: tok::newline))
829 consumeToken();
830
831 SmallVector<VerbatimBlockLineComment *, 8> Lines;
832 while (Tok.is(K: tok::verbatim_block_line) ||
833 Tok.is(K: tok::newline)) {
834 VerbatimBlockLineComment *Line;
835 if (Tok.is(K: tok::verbatim_block_line)) {
836 Line = S.actOnVerbatimBlockLine(Loc: Tok.getLocation(),
837 Text: Tok.getVerbatimBlockText());
838 consumeToken();
839 if (Tok.is(K: tok::newline)) {
840 consumeToken();
841 }
842 } else {
843 // Empty line, just a tok::newline.
844 Line = S.actOnVerbatimBlockLine(Loc: Tok.getLocation(), Text: "");
845 consumeToken();
846 }
847 Lines.push_back(Elt: Line);
848 }
849
850 if (Tok.is(K: tok::verbatim_block_end)) {
851 const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getVerbatimBlockID());
852 S.actOnVerbatimBlockFinish(Block: VB, CloseNameLocBegin: Tok.getLocation(), CloseName: Info->Name,
853 Lines: S.copyArray(Source: ArrayRef(Lines)));
854 consumeToken();
855 } else {
856 // Unterminated \\verbatim block
857 S.actOnVerbatimBlockFinish(Block: VB, CloseNameLocBegin: SourceLocation(), CloseName: "",
858 Lines: S.copyArray(Source: ArrayRef(Lines)));
859 }
860
861 return VB;
862}
863
864VerbatimLineComment *Parser::parseVerbatimLine() {
865 assert(Tok.is(tok::verbatim_line_name));
866
867 Token NameTok = Tok;
868 consumeToken();
869
870 SourceLocation TextBegin;
871 StringRef Text;
872 // Next token might not be a tok::verbatim_line_text if verbatim line
873 // starting command comes just before a newline or comment end.
874 if (Tok.is(K: tok::verbatim_line_text)) {
875 TextBegin = Tok.getLocation();
876 Text = Tok.getVerbatimLineText();
877 } else {
878 TextBegin = NameTok.getEndLocation();
879 Text = "";
880 }
881
882 VerbatimLineComment *VL = S.actOnVerbatimLine(LocBegin: NameTok.getLocation(),
883 CommandID: NameTok.getVerbatimLineID(),
884 TextBegin,
885 Text);
886 consumeToken();
887 return VL;
888}
889
890BlockContentComment *Parser::parseBlockContent() {
891 switch (Tok.getKind()) {
892 case tok::text:
893 case tok::unknown_command:
894 case tok::backslash_command:
895 case tok::at_command:
896 case tok::html_start_tag:
897 case tok::html_end_tag:
898 return parseParagraphOrBlockCommand();
899
900 case tok::verbatim_block_begin:
901 return parseVerbatimBlock();
902
903 case tok::verbatim_line_name:
904 return parseVerbatimLine();
905
906 case tok::eof:
907 case tok::newline:
908 case tok::verbatim_block_line:
909 case tok::verbatim_block_end:
910 case tok::verbatim_line_text:
911 case tok::html_ident:
912 case tok::html_equals:
913 case tok::html_quoted_string:
914 case tok::html_greater:
915 case tok::html_slash_greater:
916 llvm_unreachable("should not see this token");
917 }
918 llvm_unreachable("bogus token kind");
919}
920
921FullComment *Parser::parseFullComment() {
922 // Skip newlines at the beginning of the comment.
923 while (Tok.is(K: tok::newline))
924 consumeToken();
925
926 SmallVector<BlockContentComment *, 8> Blocks;
927 while (Tok.isNot(K: tok::eof)) {
928 Blocks.push_back(Elt: parseBlockContent());
929
930 // Skip extra newlines after paragraph end.
931 while (Tok.is(K: tok::newline))
932 consumeToken();
933 }
934 return S.actOnFullComment(Blocks: S.copyArray(Source: ArrayRef(Blocks)));
935}
936
937} // end namespace comments
938} // end namespace clang
939