CommentParser.cpp source code [llvm_projects/clang/lib/AST/CommentParser.cpp]

1	//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "clang/AST/CommentParser.h"
10	#include "clang/AST/Comment.h"
11	#include "clang/AST/CommentCommandTraits.h"
12	#include "clang/AST/CommentSema.h"
13	#include "clang/Basic/CharInfo.h"
14	#include "clang/Basic/DiagnosticComment.h"
15	#include "clang/Basic/SourceManager.h"
16	#include "llvm/Support/ErrorHandling.h"
17
18	namespace clang {
19
20	static inline bool isWhitespace(llvm::StringRef S) {
21	for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
22	if (!isWhitespace(c: *I))
23	return false;
24	}
25	return true;
26	}
27
28	namespace comments {
29
30	/// Re-lexes a sequence of tok::text tokens.
31	class TextTokenRetokenizer {
32	llvm::BumpPtrAllocator &Allocator;
33	Parser &P;
34
35	/// This flag is set when there are no more tokens we can fetch from lexer.
36	bool NoMoreInterestingTokens;
37
38	/// Token buffer: tokens we have processed and lookahead.
39	SmallVector<Token, `16`> Toks;
40
41	/// A position in \c Toks.
42	struct Position {
43	const char *BufferStart;
44	const char *BufferEnd;
45	const char *BufferPtr;
46	SourceLocation BufferStartLoc;
47	unsigned CurToken;
48	};
49
50	/// Current position in Toks.
51	Position Pos;
52
53	bool isEnd() const {
54	return Pos.CurToken >= Toks.size();
55	}
56
57	/// Sets up the buffer pointers to point to current token.
58	void setupBuffer() {
59	assert(!isEnd());
60	const Token &Tok = Toks [Pos.CurToken];
61
62	Pos.BufferStart = Tok.getText().begin();
63	Pos.BufferEnd = Tok.getText().end();
64	Pos.BufferPtr = Pos.BufferStart;
65	Pos.BufferStartLoc = Tok.getLocation();
66	}
67
68	SourceLocation getSourceLocation() const {
69	const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
70	return Pos.BufferStartLoc.getLocWithOffset(Offset: CharNo);
71	}
72
73	char peek() const {
74	assert(!isEnd());
75	assert(Pos.BufferPtr != Pos.BufferEnd);
76	return *Pos.BufferPtr;
77	}
78
79	void consumeChar() {
80	assert(!isEnd());
81	assert(Pos.BufferPtr != Pos.BufferEnd);
82	Pos.BufferPtr++;
83	if (Pos.BufferPtr == Pos.BufferEnd) {
84	Pos.CurToken++;
85	if (isEnd() && !addToken())
86	return;
87
88	assert(!isEnd());
89	setupBuffer();
90	}
91	}
92
93	/// Extract a template type
94	bool lexTemplate(SmallString<`32`> &WordText) {
95	unsigned BracketCount = `0`;
96	while (!isEnd()) {
97	const char C = peek();
98	WordText.push_back(Elt: C);
99	consumeChar();
100	switch (C) {
101	case `'<'`: {
102	BracketCount++;
103	break;
104	}
105	case `'>'`: {
106	BracketCount--;
107	if (!BracketCount)
108	return true;
109	break;
110	}
111	default:
112	break;
113	}
114	}
115	return false;
116	}
117
118	/// Add a token.
119	/// Returns true on success, false if there are no interesting tokens to
120	/// fetch from lexer.
121	bool addToken() {
122	if (NoMoreInterestingTokens)
123	return false;
124
125	if (P.Tok.is(K: tok::newline)) {
126	// If we see a single newline token between text tokens, skip it.
127	Token Newline = P.Tok;
128	P.consumeToken();
129	if (P.Tok.isNot(K: tok::text)) {
130	P.putBack(OldTok: Newline);
131	NoMoreInterestingTokens = true;
132	return false;
133	}
134	}
135	if (P.Tok.isNot(K: tok::text)) {
136	NoMoreInterestingTokens = true;
137	return false;
138	}
139
140	Toks.push_back(Elt: P.Tok);
141	P.consumeToken();
142	if (Toks.size() == `1`)
143	setupBuffer();
144	return true;
145	}
146
147	void consumeWhitespace() {
148	while (!isEnd()) {
149	if (isWhitespace(c: peek()))
150	consumeChar();
151	else
152	break;
153	}
154	}
155
156	void formTokenWithChars(Token &Result,
157	SourceLocation Loc,
158	const char *TokBegin,
159	unsigned TokLength,
160	StringRef Text) {
161	Result.setLocation(Loc);
162	Result.setKind(tok::text);
163	Result.setLength(TokLength);
164	#ifndef NDEBUG
165	Result.TextPtr = "<UNSET>";
166	Result.IntVal = `7`;
167	#endif
168	Result.setText(Text);
169	}
170
171	public:
172	TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
173	Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
174	Pos.CurToken = `0`;
175	addToken();
176	}
177
178	/// Extract a type argument
179	bool lexType(Token &Tok) {
180	if (isEnd())
181	return false;
182
183	// Save current position in case we need to rollback because the type is
184	// empty.
185	Position SavedPos = Pos;
186
187	// Consume any leading whitespace.
188	consumeWhitespace();
189	SmallString<`32`> WordText;
190	const char *WordBegin = Pos.BufferPtr;
191	SourceLocation Loc = getSourceLocation();
192
193	while (!isEnd()) {
194	const char C = peek();
195	// For non-whitespace characters we check if it's a template or otherwise
196	// continue reading the text into a word.
197	if (!isWhitespace(c: C)) {
198	if (C == `'<'`) {
199	if (!lexTemplate(WordText))
200	return false;
201	} else {
202	WordText.push_back(Elt: C);
203	consumeChar();
204	}
205	} else {
206	consumeChar();
207	break;
208	}
209	}
210
211	const unsigned Length = WordText.size();
212	if (Length == `0`) {
213	Pos = SavedPos;
214	return false;
215	}
216
217	char TextPtr = Allocator.Allocate<char*>(Num: Length + `1`);
218
219	memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + `1`);
220	StringRef Text = StringRef(TextPtr, Length);
221
222	formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin, TokLength: Length, Text);
223	return true;
224	}
225
226	// Check if this line starts with @par or \par
227	bool startsWithParCommand() {
228	unsigned Offset = `1`;
229
230	// Skip all whitespace characters at the beginning.
231	// This needs to backtrack because Pos has already advanced past the
232	// actual \par or @par command by the time this function is called.
233	while (isWhitespace(c: *(Pos.BufferPtr - Offset)))
234	Offset++;
235
236	// Once we've reached the whitespace, backtrack and check if the previous
237	// four characters are \par or @par.
238	llvm::StringRef LineStart(Pos.BufferPtr - Offset - `3`, `4`);
239	return LineStart.starts_with(Prefix: "\\par") \|\| LineStart.starts_with(Prefix: "@par");
240	}
241
242	/// Extract a par command argument-header.
243	bool lexParHeading(Token &Tok) {
244	if (isEnd())
245	return false;
246
247	Position SavedPos = Pos;
248
249	consumeWhitespace();
250	SmallString<`32`> WordText;
251	const char *WordBegin = Pos.BufferPtr;
252	SourceLocation Loc = getSourceLocation();
253
254	if (!startsWithParCommand())
255	return false;
256
257	// Read until the end of this token, which is effectively the end of the
258	// line. This gets us the content of the par header, if there is one.
259	while (!isEnd()) {
260	WordText.push_back(Elt: peek());
261	if (Pos.BufferPtr + `1` == Pos.BufferEnd) {
262	consumeChar();
263	break;
264	}
265	consumeChar();
266	}
267
268	unsigned Length = WordText.size();
269	if (Length == `0`) {
270	Pos = SavedPos;
271	return false;
272	}
273
274	char TextPtr = Allocator.Allocate<char*>(Num: Length + `1`);
275
276	memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + `1`);
277	StringRef Text = StringRef(TextPtr, Length);
278
279	formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin, TokLength: Length, Text);
280	return true;
281	}
282
283	/// Extract a word -- sequence of non-whitespace characters.
284	bool lexWord(Token &Tok) {
285	if (isEnd())
286	return false;
287
288	Position SavedPos = Pos;
289
290	consumeWhitespace();
291	SmallString<`32`> WordText;
292	const char *WordBegin = Pos.BufferPtr;
293	SourceLocation Loc = getSourceLocation();
294	while (!isEnd()) {
295	const char C = peek();
296	if (!isWhitespace(c: C)) {
297	WordText.push_back(Elt: C);
298	consumeChar();
299	} else
300	break;
301	}
302	const unsigned Length = WordText.size();
303	if (Length == `0`) {
304	Pos = SavedPos;
305	return false;
306	}
307
308	char TextPtr = Allocator.Allocate<char*>(Num: Length + `1`);
309
310	memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + `1`);
311	StringRef Text = StringRef(TextPtr, Length);
312
313	formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin, TokLength: Length, Text);
314	return true;
315	}
316
317	bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
318	if (isEnd())
319	return false;
320
321	Position SavedPos = Pos;
322
323	consumeWhitespace();
324	SmallString<`32`> WordText;
325	const char *WordBegin = Pos.BufferPtr;
326	SourceLocation Loc = getSourceLocation();
327	bool Error = false;
328	if (!isEnd()) {
329	const char C = peek();
330	if (C == OpenDelim) {
331	WordText.push_back(Elt: C);
332	consumeChar();
333	} else
334	Error = true;
335	}
336	char C = `'\0'`;
337	while (!Error && !isEnd()) {
338	C = peek();
339	WordText.push_back(Elt: C);
340	consumeChar();
341	if (C == CloseDelim)
342	break;
343	}
344	if (!Error && C != CloseDelim)
345	Error = true;
346
347	if (Error) {
348	Pos = SavedPos;
349	return false;
350	}
351
352	const unsigned Length = WordText.size();
353	char TextPtr = Allocator.Allocate<char*>(Num: Length + `1`);
354
355	memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + `1`);
356	StringRef Text = StringRef(TextPtr, Length);
357
358	formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin,
359	TokLength: Pos.BufferPtr - WordBegin, Text);
360	return true;
361	}
362
363	/// Put back tokens that we didn't consume.
364	void putBackLeftoverTokens() {
365	if (isEnd())
366	return;
367
368	bool HavePartialTok = false;
369	Token PartialTok;
370	if (Pos.BufferPtr != Pos.BufferStart) {
371	formTokenWithChars(Result&: PartialTok, Loc: getSourceLocation(),
372	TokBegin: Pos.BufferPtr, TokLength: Pos.BufferEnd - Pos.BufferPtr,
373	Text: StringRef(Pos.BufferPtr,
374	Pos.BufferEnd - Pos.BufferPtr));
375	HavePartialTok = true;
376	Pos.CurToken++;
377	}
378
379	P.putBack(Toks: ArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
380	Pos.CurToken = Toks.size();
381
382	if (HavePartialTok)
383	P.putBack(OldTok: PartialTok);
384	}
385	};
386
387	Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
388	const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
389	const CommandTraits &Traits):
390	L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
391	Traits(Traits) {
392	consumeToken();
393	}
394
395	void Parser::parseParamCommandArgs(ParamCommandComment *PC,
396	TextTokenRetokenizer &Retokenizer) {
397	Token Arg;
398	// Check if argument looks like direction specification: [dir]
399	// e.g., [in], [out], [in,out]
400	if (Retokenizer.lexDelimitedSeq(Tok&: Arg, OpenDelim: `'['`, CloseDelim: `']'`))
401	S.actOnParamCommandDirectionArg(Command: PC,
402	ArgLocBegin: Arg.getLocation(),
403	ArgLocEnd: Arg.getEndLocation(),
404	Arg: Arg.getText());
405
406	if (Retokenizer.lexWord(Tok&: Arg))
407	S.actOnParamCommandParamNameArg(Command: PC,
408	ArgLocBegin: Arg.getLocation(),
409	ArgLocEnd: Arg.getEndLocation(),
410	Arg: Arg.getText());
411	}
412
413	void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
414	TextTokenRetokenizer &Retokenizer) {
415	Token Arg;
416	if (Retokenizer.lexWord(Tok&: Arg))
417	S.actOnTParamCommandParamNameArg(Command: TPC,
418	ArgLocBegin: Arg.getLocation(),
419	ArgLocEnd: Arg.getEndLocation(),
420	Arg: Arg.getText());
421	}
422
423	ArrayRef<Comment::Argument>
424	Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {
425	auto Args = new* (Allocator.Allocate<Comment::Argument>(Num: NumArgs))
426	Comment::Argument[NumArgs];
427	unsigned ParsedArgs = `0`;
428	Token Arg;
429	while (ParsedArgs < NumArgs && Retokenizer.lexWord(Tok&: Arg)) {
430	Args[ParsedArgs] = Comment::Argument{
431	.Range: SourceRange (Arg.getLocation(), Arg.getEndLocation()), .Text: Arg.getText()};
432	ParsedArgs++;
433	}
434
435	return ArrayRef(Args, ParsedArgs);
436	}
437
438	ArrayRef<Comment::Argument>
439	Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,
440	unsigned NumArgs) {
441	auto Args = new* (Allocator.Allocate<Comment::Argument>(Num: NumArgs))
442	Comment::Argument[NumArgs];
443	unsigned ParsedArgs = `0`;
444	Token Arg;
445
446	while (ParsedArgs < NumArgs && Retokenizer.lexType(Tok&: Arg)) {
447	Args[ParsedArgs] = Comment::Argument{
448	.Range: SourceRange (Arg.getLocation(), Arg.getEndLocation()), .Text: Arg.getText()};
449	ParsedArgs++;
450	}
451
452	return ArrayRef(Args, ParsedArgs);
453	}
454
455	ArrayRef<Comment::Argument>
456	Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer,
457	unsigned NumArgs) {
458	assert(NumArgs > `0`);
459	auto Args = new* (Allocator.Allocate<Comment::Argument>(Num: NumArgs))
460	Comment::Argument[NumArgs];
461	unsigned ParsedArgs = `0`;
462	Token Arg;
463
464	while (ParsedArgs < NumArgs && Retokenizer.lexParHeading(Tok&: Arg)) {
465	Args[ParsedArgs] = Comment::Argument{
466	.Range: SourceRange (Arg.getLocation(), Arg.getEndLocation()), .Text: Arg.getText()};
467	ParsedArgs++;
468	}
469
470	return ArrayRef(Args, ParsedArgs);
471	}
472
473	BlockCommandComment *Parser::parseBlockCommand() {
474	assert(Tok.is(tok::backslash_command) \|\| Tok.is(tok::at_command));
475
476	ParamCommandComment PC = nullptr*;
477	TParamCommandComment TPC = nullptr*;
478	BlockCommandComment BC = nullptr*;
479	const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getCommandID());
480	CommandMarkerKind CommandMarker =
481	Tok.is(K: tok::backslash_command) ? CMK_Backslash : CMK_At;
482	if (Info->IsParamCommand) {
483	PC = S.actOnParamCommandStart(LocBegin: Tok.getLocation(),
484	LocEnd: Tok.getEndLocation(),
485	CommandID: Tok.getCommandID(),
486	CommandMarker);
487	} else if (Info->IsTParamCommand) {
488	TPC = S.actOnTParamCommandStart(LocBegin: Tok.getLocation(),
489	LocEnd: Tok.getEndLocation(),
490	CommandID: Tok.getCommandID(),
491	CommandMarker);
492	} else {
493	BC = S.actOnBlockCommandStart(LocBegin: Tok.getLocation(),
494	LocEnd: Tok.getEndLocation(),
495	CommandID: Tok.getCommandID(),
496	CommandMarker);
497	}
498	consumeToken();
499
500	if (isTokBlockCommand()) {
501	// Block command ahead. We can't nest block commands, so pretend that this
502	// command has an empty argument.
503	ParagraphComment *Paragraph = S.actOnParagraphComment(Content: {});
504	if (PC) {
505	S.actOnParamCommandFinish(Command: PC, Paragraph);
506	return PC;
507	} else if (TPC) {
508	S.actOnTParamCommandFinish(Command: TPC, Paragraph);
509	return TPC;
510	} else {
511	S.actOnBlockCommandFinish(Command: BC, Paragraph);
512	return BC;
513	}
514	}
515
516	if (PC \|\| TPC \|\| Info->NumArgs > `0`) {
517	// In order to parse command arguments we need to retokenize a few
518	// following text tokens.
519	TextTokenRetokenizer Retokenizer(Allocator, *this);
520
521	if (PC)
522	parseParamCommandArgs(PC, Retokenizer);
523	else if (TPC)
524	parseTParamCommandArgs(TPC, Retokenizer);
525	else if (Info->IsThrowsCommand)
526	S.actOnBlockCommandArgs(
527	Command: BC, Args: parseThrowCommandArgs(Retokenizer, NumArgs: Info->NumArgs));
528	else if (Info->IsParCommand)
529	S.actOnBlockCommandArgs(Command: BC,
530	Args: parseParCommandArgs(Retokenizer, NumArgs: Info->NumArgs));
531	else
532	S.actOnBlockCommandArgs(Command: BC, Args: parseCommandArgs(Retokenizer, NumArgs: Info->NumArgs));
533
534	Retokenizer.putBackLeftoverTokens();
535	}
536
537	// If there's a block command ahead, we will attach an empty paragraph to
538	// this command.
539	bool EmptyParagraph = false;
540	if (isTokBlockCommand())
541	EmptyParagraph = true;
542	else if (Tok.is(K: tok::newline)) {
543	Token PrevTok = Tok;
544	consumeToken();
545	EmptyParagraph = isTokBlockCommand();
546	putBack(OldTok: PrevTok);
547	}
548
549	ParagraphComment *Paragraph;
550	if (EmptyParagraph)
551	Paragraph = S.actOnParagraphComment(Content: {});
552	else {
553	BlockContentComment *Block = parseParagraphOrBlockCommand();
554	// Since we have checked for a block command, we should have parsed a
555	// paragraph.
556	Paragraph = cast<ParagraphComment>(Val: Block);
557	}
558
559	if (PC) {
560	S.actOnParamCommandFinish(Command: PC, Paragraph);
561	return PC;
562	} else if (TPC) {
563	S.actOnTParamCommandFinish(Command: TPC, Paragraph);
564	return TPC;
565	} else {
566	S.actOnBlockCommandFinish(Command: BC, Paragraph);
567	return BC;
568	}
569	}
570
571	InlineCommandComment *Parser::parseInlineCommand() {
572	assert(Tok.is(tok::backslash_command) \|\| Tok.is(tok::at_command));
573	CommandMarkerKind CMK =
574	Tok.is(K: tok::backslash_command) ? CMK_Backslash : CMK_At;
575	const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getCommandID());
576
577	const Token CommandTok = Tok;
578	consumeToken();
579
580	TextTokenRetokenizer Retokenizer(Allocator, *this);
581	ArrayRef<Comment::Argument> Args =
582	parseCommandArgs(Retokenizer, NumArgs: Info->NumArgs);
583
584	InlineCommandComment *IC = S.actOnInlineCommand(
585	CommandLocBegin: CommandTok.getLocation(), CommandLocEnd: CommandTok.getEndLocation(),
586	CommandID: CommandTok.getCommandID(), CommandMarker: CMK, Args);
587
588	if (Args.size() < Info->NumArgs) {
589	Diag(Loc: CommandTok.getEndLocation().getLocWithOffset(Offset: `1`),
590	DiagID: diag::warn_doc_inline_command_not_enough_arguments)
591	<< CommandTok.is(K: tok::at_command) << Info->Name << Args.size()
592	<< Info->NumArgs
593	<< SourceRange (CommandTok.getLocation(), CommandTok.getEndLocation());
594	}
595
596	Retokenizer.putBackLeftoverTokens();
597
598	return IC;
599	}
600
601	HTMLStartTagComment *Parser::parseHTMLStartTag() {
602	assert(Tok.is(tok::html_start_tag));
603	HTMLStartTagComment *HST =
604	S.actOnHTMLStartTagStart(LocBegin: Tok.getLocation(),
605	TagName: Tok.getHTMLTagStartName());
606	consumeToken();
607
608	SmallVector<HTMLStartTagComment::Attribute, `2`> Attrs;
609	while (true) {
610	switch (Tok.getKind()) {
611	case tok::html_ident: {
612	Token Ident = Tok;
613	consumeToken();
614	if (Tok.isNot(K: tok::html_equals)) {
615	Attrs.push_back(Elt: HTMLStartTagComment::Attribute (Ident.getLocation(),
616	Ident.getHTMLIdent()));
617	continue;
618	}
619	Token Equals = Tok;
620	consumeToken();
621	if (Tok.isNot(K: tok::html_quoted_string)) {
622	Diag(Loc: Tok.getLocation(),
623	DiagID: diag::warn_doc_html_start_tag_expected_quoted_string)
624	<< SourceRange (Equals.getLocation());
625	Attrs.push_back(Elt: HTMLStartTagComment::Attribute (Ident.getLocation(),
626	Ident.getHTMLIdent()));
627	while (Tok.is(K: tok::html_equals) \|\|
628	Tok.is(K: tok::html_quoted_string))
629	consumeToken();
630	continue;
631	}
632	Attrs.push_back(Elt: HTMLStartTagComment::Attribute (
633	Ident.getLocation(),
634	Ident.getHTMLIdent(),
635	Equals.getLocation(),
636	SourceRange (Tok.getLocation(),
637	Tok.getEndLocation()),
638	Tok.getHTMLQuotedString()));
639	consumeToken();
640	continue;
641	}
642
643	case tok::html_greater:
644	S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: ArrayRef(Attrs)),
645	GreaterLoc: Tok.getLocation(),
646	/ IsSelfClosing = / false);
647	consumeToken();
648	return HST;
649
650	case tok::html_slash_greater:
651	S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: ArrayRef(Attrs)),
652	GreaterLoc: Tok.getLocation(),
653	/ IsSelfClosing = / true);
654	consumeToken();
655	return HST;
656
657	case tok::html_equals:
658	case tok::html_quoted_string:
659	Diag(Loc: Tok.getLocation(),
660	DiagID: diag::warn_doc_html_start_tag_expected_ident_or_greater);
661	while (Tok.is(K: tok::html_equals) \|\|
662	Tok.is(K: tok::html_quoted_string))
663	consumeToken();
664	if (Tok.is(K: tok::html_ident) \|\|
665	Tok.is(K: tok::html_greater) \|\|
666	Tok.is(K: tok::html_slash_greater))
667	continue;
668
669	S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: ArrayRef(Attrs)),
670	GreaterLoc: SourceLocation (),
671	/ IsSelfClosing = / false);
672	return HST;
673
674	default:
675	// Not a token from an HTML start tag. Thus HTML tag prematurely ended.
676	S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: ArrayRef(Attrs)),
677	GreaterLoc: SourceLocation (),
678	/ IsSelfClosing = / false);
679	bool StartLineInvalid;
680	const unsigned StartLine = SourceMgr.getPresumedLineNumber(
681	Loc: HST->getLocation(),
682	Invalid: &StartLineInvalid);
683	bool EndLineInvalid;
684	const unsigned EndLine = SourceMgr.getPresumedLineNumber(
685	Loc: Tok.getLocation(),
686	Invalid: &EndLineInvalid);
687	if (StartLineInvalid \|\| EndLineInvalid \|\| StartLine == EndLine)
688	Diag(Loc: Tok.getLocation(),
689	DiagID: diag::warn_doc_html_start_tag_expected_ident_or_greater)
690	<< HST->getSourceRange();
691	else {
692	Diag(Loc: Tok.getLocation(),
693	DiagID: diag::warn_doc_html_start_tag_expected_ident_or_greater);
694	Diag(Loc: HST->getLocation(), DiagID: diag::note_doc_html_tag_started_here)
695	<< HST->getSourceRange();
696	}
697	return HST;
698	}
699	}
700	}
701
702	HTMLEndTagComment *Parser::parseHTMLEndTag() {
703	assert(Tok.is(tok::html_end_tag));
704	Token TokEndTag = Tok;
705	consumeToken();
706	SourceLocation Loc;
707	if (Tok.is(K: tok::html_greater)) {
708	Loc = Tok.getLocation();
709	consumeToken();
710	}
711
712	return S.actOnHTMLEndTag(LocBegin: TokEndTag.getLocation(),
713	LocEnd: Loc,
714	TagName: TokEndTag.getHTMLTagEndName());
715	}
716
717	BlockContentComment *Parser::parseParagraphOrBlockCommand() {
718	SmallVector<InlineContentComment *, `8`> Content;
719
720	while (true) {
721	switch (Tok.getKind()) {
722	case tok::verbatim_block_begin:
723	case tok::verbatim_line_name:
724	case tok::eof:
725	break; // Block content or EOF ahead, finish this parapgaph.
726
727	case tok::unknown_command:
728	Content.push_back(Elt: S.actOnUnknownCommand(LocBegin: Tok.getLocation(),
729	LocEnd: Tok.getEndLocation(),
730	CommandName: Tok.getUnknownCommandName()));
731	consumeToken();
732	continue;
733
734	case tok::backslash_command:
735	case tok::at_command: {
736	const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getCommandID());
737	if (Info->IsBlockCommand) {
738	if (Content.size() == `0`)
739	return parseBlockCommand();
740	break; // Block command ahead, finish this parapgaph.
741	}
742	if (Info->IsVerbatimBlockEndCommand) {
743	Diag(Loc: Tok.getLocation(),
744	DiagID: diag::warn_verbatim_block_end_without_start)
745	<< Tok.is(K: tok::at_command)
746	<< Info->Name
747	<< SourceRange (Tok.getLocation(), Tok.getEndLocation());
748	consumeToken();
749	continue;
750	}
751	if (Info->IsUnknownCommand) {
752	Content.push_back(Elt: S.actOnUnknownCommand(LocBegin: Tok.getLocation(),
753	LocEnd: Tok.getEndLocation(),
754	CommandID: Info->getID()));
755	consumeToken();
756	continue;
757	}
758	assert(Info->IsInlineCommand);
759	Content.push_back(Elt: parseInlineCommand());
760	continue;
761	}
762
763	case tok::newline: {
764	consumeToken();
765	if (Tok.is(K: tok::newline) \|\| Tok.is(K: tok::eof)) {
766	consumeToken();
767	break; // Two newlines -- end of paragraph.
768	}
769	// Also allow [tok::newline, tok::text, tok::newline] if the middle
770	// tok::text is just whitespace.
771	if (Tok.is(K: tok::text) && isWhitespace(S: Tok.getText())) {
772	Token WhitespaceTok = Tok;
773	consumeToken();
774	if (Tok.is(K: tok::newline) \|\| Tok.is(K: tok::eof)) {
775	consumeToken();
776	break;
777	}
778	// We have [tok::newline, tok::text, non-newline]. Put back tok::text.
779	putBack(OldTok: WhitespaceTok);
780	}
781	if (Content.size() > `0`)
782	Content.back()->addTrailingNewline();
783	continue;
784	}
785
786	// Don't deal with HTML tag soup now.
787	case tok::html_start_tag:
788	Content.push_back(Elt: parseHTMLStartTag());
789	continue;
790
791	case tok::html_end_tag:
792	Content.push_back(Elt: parseHTMLEndTag());
793	continue;
794
795	case tok::text:
796	Content.push_back(Elt: S.actOnText(LocBegin: Tok.getLocation(),
797	LocEnd: Tok.getEndLocation(),
798	Text: Tok.getText()));
799	consumeToken();
800	continue;
801
802	case tok::verbatim_block_line:
803	case tok::verbatim_block_end:
804	case tok::verbatim_line_text:
805	case tok::html_ident:
806	case tok::html_equals:
807	case tok::html_quoted_string:
808	case tok::html_greater:
809	case tok::html_slash_greater:
810	llvm_unreachable("should not see this token");
811	}
812	break;
813	}
814
815	return S.actOnParagraphComment(Content: S.copyArray(Source: ArrayRef(Content)));
816	}
817
818	VerbatimBlockComment *Parser::parseVerbatimBlock() {
819	assert(Tok.is(tok::verbatim_block_begin));
820
821	VerbatimBlockComment *VB =
822	S.actOnVerbatimBlockStart(Loc: Tok.getLocation(),
823	CommandID: Tok.getVerbatimBlockID());
824	consumeToken();
825
826	// Don't create an empty line if verbatim opening command is followed
827	// by a newline.
828	if (Tok.is(K: tok::newline))
829	consumeToken();
830
831	SmallVector<VerbatimBlockLineComment *, `8`> Lines;
832	while (Tok.is(K: tok::verbatim_block_line) \|\|
833	Tok.is(K: tok::newline)) {
834	VerbatimBlockLineComment *Line;
835	if (Tok.is(K: tok::verbatim_block_line)) {
836	Line = S.actOnVerbatimBlockLine(Loc: Tok.getLocation(),
837	Text: Tok.getVerbatimBlockText());
838	consumeToken();
839	if (Tok.is(K: tok::newline)) {
840	consumeToken();
841	}
842	} else {
843	// Empty line, just a tok::newline.
844	Line = S.actOnVerbatimBlockLine(Loc: Tok.getLocation(), Text: "");
845	consumeToken();
846	}
847	Lines.push_back(Elt: Line);
848	}
849
850	if (Tok.is(K: tok::verbatim_block_end)) {
851	const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getVerbatimBlockID());
852	S.actOnVerbatimBlockFinish(Block: VB, CloseNameLocBegin: Tok.getLocation(), CloseName: Info->Name,
853	Lines: S.copyArray(Source: ArrayRef(Lines)));
854	consumeToken();
855	} else {
856	// Unterminated \\verbatim block
857	S.actOnVerbatimBlockFinish(Block: VB, CloseNameLocBegin: SourceLocation (), CloseName: "",
858	Lines: S.copyArray(Source: ArrayRef(Lines)));
859	}
860
861	return VB;
862	}
863
864	VerbatimLineComment *Parser::parseVerbatimLine() {
865	assert(Tok.is(tok::verbatim_line_name));
866
867	Token NameTok = Tok;
868	consumeToken();
869
870	SourceLocation TextBegin;
871	StringRef Text;
872	// Next token might not be a tok::verbatim_line_text if verbatim line
873	// starting command comes just before a newline or comment end.
874	if (Tok.is(K: tok::verbatim_line_text)) {
875	TextBegin = Tok.getLocation();
876	Text = Tok.getVerbatimLineText();
877	} else {
878	TextBegin = NameTok.getEndLocation();
879	Text = "";
880	}
881
882	VerbatimLineComment *VL = S.actOnVerbatimLine(LocBegin: NameTok.getLocation(),
883	CommandID: NameTok.getVerbatimLineID(),
884	TextBegin,
885	Text);
886	consumeToken();
887	return VL;
888	}
889
890	BlockContentComment *Parser::parseBlockContent() {
891	switch (Tok.getKind()) {
892	case tok::text:
893	case tok::unknown_command:
894	case tok::backslash_command:
895	case tok::at_command:
896	case tok::html_start_tag:
897	case tok::html_end_tag:
898	return parseParagraphOrBlockCommand();
899
900	case tok::verbatim_block_begin:
901	return parseVerbatimBlock();
902
903	case tok::verbatim_line_name:
904	return parseVerbatimLine();
905
906	case tok::eof:
907	case tok::newline:
908	case tok::verbatim_block_line:
909	case tok::verbatim_block_end:
910	case tok::verbatim_line_text:
911	case tok::html_ident:
912	case tok::html_equals:
913	case tok::html_quoted_string:
914	case tok::html_greater:
915	case tok::html_slash_greater:
916	llvm_unreachable("should not see this token");
917	}
918	llvm_unreachable("bogus token kind");
919	}
920
921	FullComment *Parser::parseFullComment() {
922	// Skip newlines at the beginning of the comment.
923	while (Tok.is(K: tok::newline))
924	consumeToken();
925
926	SmallVector<BlockContentComment *, `8`> Blocks;
927	while (Tok.isNot(K: tok::eof)) {
928	Blocks.push_back(Elt: parseBlockContent());
929
930	// Skip extra newlines after paragraph end.
931	while (Tok.is(K: tok::newline))
932	consumeToken();
933	}
934	return S.actOnFullComment(Blocks: S.copyArray(Source: ArrayRef(Blocks)));
935	}
936
937	} // end namespace comments
938	} // end namespace clang
939

Browse the source code of llvm_projects/clang/lib/AST/CommentParser.cpp