CommentParser.cpp source code [llvm_projects/clang/lib/AST/CommentParser.cpp]

1	//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "clang/AST/CommentParser.h"
10	#include "clang/AST/CommentCommandTraits.h"
11	#include "clang/AST/CommentDiagnostic.h"
12	#include "clang/AST/CommentSema.h"
13	#include "clang/Basic/CharInfo.h"
14	#include "clang/Basic/SourceManager.h"
15	#include "llvm/Support/ErrorHandling.h"
16
17	namespace clang {
18
19	static inline bool isWhitespace(llvm::StringRef S) {
20	for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
21	if (!isWhitespace(c: *I))
22	return false;
23	}
24	return true;
25	}
26
27	namespace comments {
28
29	/// Re-lexes a sequence of tok::text tokens.
30	class TextTokenRetokenizer {
31	llvm::BumpPtrAllocator &Allocator;
32	Parser &P;
33
34	/// This flag is set when there are no more tokens we can fetch from lexer.
35	bool NoMoreInterestingTokens;
36
37	/// Token buffer: tokens we have processed and lookahead.
38	SmallVector<Token, `16`> Toks;
39
40	/// A position in \c Toks.
41	struct Position {
42	const char *BufferStart;
43	const char *BufferEnd;
44	const char *BufferPtr;
45	SourceLocation BufferStartLoc;
46	unsigned CurToken;
47	};
48
49	/// Current position in Toks.
50	Position Pos;
51
52	bool isEnd() const {
53	return Pos.CurToken >= Toks.size();
54	}
55
56	/// Sets up the buffer pointers to point to current token.
57	void setupBuffer() {
58	assert(!isEnd());
59	const Token &Tok = Toks [Pos.CurToken];
60
61	Pos.BufferStart = Tok.getText().begin();
62	Pos.BufferEnd = Tok.getText().end();
63	Pos.BufferPtr = Pos.BufferStart;
64	Pos.BufferStartLoc = Tok.getLocation();
65	}
66
67	SourceLocation getSourceLocation() const {
68	const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
69	return Pos.BufferStartLoc.getLocWithOffset(Offset: CharNo);
70	}
71
72	char peek() const {
73	assert(!isEnd());
74	assert(Pos.BufferPtr != Pos.BufferEnd);
75	return *Pos.BufferPtr;
76	}
77
78	void consumeChar() {
79	assert(!isEnd());
80	assert(Pos.BufferPtr != Pos.BufferEnd);
81	Pos.BufferPtr++;
82	if (Pos.BufferPtr == Pos.BufferEnd) {
83	Pos.CurToken++;
84	if (isEnd() && !addToken())
85	return;
86
87	assert(!isEnd());
88	setupBuffer();
89	}
90	}
91
92	/// Extract a template type
93	bool lexTemplate(SmallString<`32`> &WordText) {
94	unsigned BracketCount = `0`;
95	while (!isEnd()) {
96	const char C = peek();
97	WordText.push_back(Elt: C);
98	consumeChar();
99	switch (C) {
100	case `'<'`: {
101	BracketCount++;
102	break;
103	}
104	case `'>'`: {
105	BracketCount--;
106	if (!BracketCount)
107	return true;
108	break;
109	}
110	default:
111	break;
112	}
113	}
114	return false;
115	}
116
117	/// Add a token.
118	/// Returns true on success, false if there are no interesting tokens to
119	/// fetch from lexer.
120	bool addToken() {
121	if (NoMoreInterestingTokens)
122	return false;
123
124	if (P.Tok.is(K: tok::newline)) {
125	// If we see a single newline token between text tokens, skip it.
126	Token Newline = P.Tok;
127	P.consumeToken();
128	if (P.Tok.isNot(K: tok::text)) {
129	P.putBack(OldTok: Newline);
130	NoMoreInterestingTokens = true;
131	return false;
132	}
133	}
134	if (P.Tok.isNot(K: tok::text)) {
135	NoMoreInterestingTokens = true;
136	return false;
137	}
138
139	Toks.push_back(Elt: P.Tok);
140	P.consumeToken();
141	if (Toks.size() == `1`)
142	setupBuffer();
143	return true;
144	}
145
146	void consumeWhitespace() {
147	while (!isEnd()) {
148	if (isWhitespace(c: peek()))
149	consumeChar();
150	else
151	break;
152	}
153	}
154
155	void formTokenWithChars(Token &Result,
156	SourceLocation Loc,
157	const char *TokBegin,
158	unsigned TokLength,
159	StringRef Text) {
160	Result.setLocation(Loc);
161	Result.setKind(tok::text);
162	Result.setLength(TokLength);
163	#ifndef NDEBUG
164	Result.TextPtr = "<UNSET>";
165	Result.IntVal = `7`;
166	#endif
167	Result.setText(Text);
168	}
169
170	public:
171	TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
172	Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
173	Pos.CurToken = `0`;
174	addToken();
175	}
176
177	/// Extract a type argument
178	bool lexType(Token &Tok) {
179	if (isEnd())
180	return false;
181
182	// Save current position in case we need to rollback because the type is
183	// empty.
184	Position SavedPos = Pos;
185
186	// Consume any leading whitespace.
187	consumeWhitespace();
188	SmallString<`32`> WordText;
189	const char *WordBegin = Pos.BufferPtr;
190	SourceLocation Loc = getSourceLocation();
191
192	while (!isEnd()) {
193	const char C = peek();
194	// For non-whitespace characters we check if it's a template or otherwise
195	// continue reading the text into a word.
196	if (!isWhitespace(c: C)) {
197	if (C == `'<'`) {
198	if (!lexTemplate(WordText))
199	return false;
200	} else {
201	WordText.push_back(Elt: C);
202	consumeChar();
203	}
204	} else {
205	consumeChar();
206	break;
207	}
208	}
209
210	const unsigned Length = WordText.size();
211	if (Length == `0`) {
212	Pos = SavedPos;
213	return false;
214	}
215
216	char TextPtr = Allocator.Allocate<char*>(Num: Length + `1`);
217
218	memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + `1`);
219	StringRef Text = StringRef (TextPtr, Length);
220
221	formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin, TokLength: Length, Text);
222	return true;
223	}
224
225	// Check if this line starts with @par or \par
226	bool startsWithParCommand() {
227	unsigned Offset = `1`;
228
229	// Skip all whitespace characters at the beginning.
230	// This needs to backtrack because Pos has already advanced past the
231	// actual \par or @par command by the time this function is called.
232	while (isWhitespace(c: *(Pos.BufferPtr - Offset)))
233	Offset++;
234
235	// Once we've reached the whitespace, backtrack and check if the previous
236	// four characters are \par or @par.
237	llvm::StringRef LineStart(Pos.BufferPtr - Offset - `3`, `4`);
238	return LineStart.starts_with(Prefix: "\\par") \|\| LineStart.starts_with(Prefix: "@par");
239	}
240
241	/// Extract a par command argument-header.
242	bool lexParHeading(Token &Tok) {
243	if (isEnd())
244	return false;
245
246	Position SavedPos = Pos;
247
248	consumeWhitespace();
249	SmallString<`32`> WordText;
250	const char *WordBegin = Pos.BufferPtr;
251	SourceLocation Loc = getSourceLocation();
252
253	if (!startsWithParCommand())
254	return false;
255
256	// Read until the end of this token, which is effectively the end of the
257	// line. This gets us the content of the par header, if there is one.
258	while (!isEnd()) {
259	WordText.push_back(Elt: peek());
260	if (Pos.BufferPtr + `1` == Pos.BufferEnd) {
261	consumeChar();
262	break;
263	}
264	consumeChar();
265	}
266
267	unsigned Length = WordText.size();
268	if (Length == `0`) {
269	Pos = SavedPos;
270	return false;
271	}
272
273	char TextPtr = Allocator.Allocate<char*>(Num: Length + `1`);
274
275	memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + `1`);
276	StringRef Text = StringRef (TextPtr, Length);
277
278	formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin, TokLength: Length, Text);
279	return true;
280	}
281
282	/// Extract a word -- sequence of non-whitespace characters.
283	bool lexWord(Token &Tok) {
284	if (isEnd())
285	return false;
286
287	Position SavedPos = Pos;
288
289	consumeWhitespace();
290	SmallString<`32`> WordText;
291	const char *WordBegin = Pos.BufferPtr;
292	SourceLocation Loc = getSourceLocation();
293	while (!isEnd()) {
294	const char C = peek();
295	if (!isWhitespace(c: C)) {
296	WordText.push_back(Elt: C);
297	consumeChar();
298	} else
299	break;
300	}
301	const unsigned Length = WordText.size();
302	if (Length == `0`) {
303	Pos = SavedPos;
304	return false;
305	}
306
307	char TextPtr = Allocator.Allocate<char*>(Num: Length + `1`);
308
309	memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + `1`);
310	StringRef Text = StringRef (TextPtr, Length);
311
312	formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin, TokLength: Length, Text);
313	return true;
314	}
315
316	bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
317	if (isEnd())
318	return false;
319
320	Position SavedPos = Pos;
321
322	consumeWhitespace();
323	SmallString<`32`> WordText;
324	const char *WordBegin = Pos.BufferPtr;
325	SourceLocation Loc = getSourceLocation();
326	bool Error = false;
327	if (!isEnd()) {
328	const char C = peek();
329	if (C == OpenDelim) {
330	WordText.push_back(Elt: C);
331	consumeChar();
332	} else
333	Error = true;
334	}
335	char C = `'\0'`;
336	while (!Error && !isEnd()) {
337	C = peek();
338	WordText.push_back(Elt: C);
339	consumeChar();
340	if (C == CloseDelim)
341	break;
342	}
343	if (!Error && C != CloseDelim)
344	Error = true;
345
346	if (Error) {
347	Pos = SavedPos;
348	return false;
349	}
350
351	const unsigned Length = WordText.size();
352	char TextPtr = Allocator.Allocate<char*>(Num: Length + `1`);
353
354	memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + `1`);
355	StringRef Text = StringRef (TextPtr, Length);
356
357	formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin,
358	TokLength: Pos.BufferPtr - WordBegin, Text);
359	return true;
360	}
361
362	/// Put back tokens that we didn't consume.
363	void putBackLeftoverTokens() {
364	if (isEnd())
365	return;
366
367	bool HavePartialTok = false;
368	Token PartialTok;
369	if (Pos.BufferPtr != Pos.BufferStart) {
370	formTokenWithChars(Result&: PartialTok, Loc: getSourceLocation(),
371	TokBegin: Pos.BufferPtr, TokLength: Pos.BufferEnd - Pos.BufferPtr,
372	Text: StringRef (Pos.BufferPtr,
373	Pos.BufferEnd - Pos.BufferPtr));
374	HavePartialTok = true;
375	Pos.CurToken++;
376	}
377
378	P.putBack(Toks: llvm::ArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
379	Pos.CurToken = Toks.size();
380
381	if (HavePartialTok)
382	P.putBack(OldTok: PartialTok);
383	}
384	};
385
386	Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
387	const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
388	const CommandTraits &Traits):
389	L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
390	Traits(Traits) {
391	consumeToken();
392	}
393
394	void Parser::parseParamCommandArgs(ParamCommandComment *PC,
395	TextTokenRetokenizer &Retokenizer) {
396	Token Arg;
397	// Check if argument looks like direction specification: [dir]
398	// e.g., [in], [out], [in,out]
399	if (Retokenizer.lexDelimitedSeq(Tok&: Arg, OpenDelim: `'['`, CloseDelim: `']'`))
400	S.actOnParamCommandDirectionArg(Command: PC,
401	ArgLocBegin: Arg.getLocation(),
402	ArgLocEnd: Arg.getEndLocation(),
403	Arg: Arg.getText());
404
405	if (Retokenizer.lexWord(Tok&: Arg))
406	S.actOnParamCommandParamNameArg(Command: PC,
407	ArgLocBegin: Arg.getLocation(),
408	ArgLocEnd: Arg.getEndLocation(),
409	Arg: Arg.getText());
410	}
411
412	void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
413	TextTokenRetokenizer &Retokenizer) {
414	Token Arg;
415	if (Retokenizer.lexWord(Tok&: Arg))
416	S.actOnTParamCommandParamNameArg(Command: TPC,
417	ArgLocBegin: Arg.getLocation(),
418	ArgLocEnd: Arg.getEndLocation(),
419	Arg: Arg.getText());
420	}
421
422	ArrayRef<Comment::Argument>
423	Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {
424	auto Args = new* (Allocator.Allocate<Comment::Argument>(Num: NumArgs))
425	Comment::Argument[NumArgs];
426	unsigned ParsedArgs = `0`;
427	Token Arg;
428	while (ParsedArgs < NumArgs && Retokenizer.lexWord(Tok&: Arg)) {
429	Args[ParsedArgs] = Comment::Argument{
430	.Range: SourceRange (Arg.getLocation(), Arg.getEndLocation()), .Text: Arg.getText()};
431	ParsedArgs++;
432	}
433
434	return llvm::ArrayRef(Args, ParsedArgs);
435	}
436
437	ArrayRef<Comment::Argument>
438	Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,
439	unsigned NumArgs) {
440	auto Args = new* (Allocator.Allocate<Comment::Argument>(Num: NumArgs))
441	Comment::Argument[NumArgs];
442	unsigned ParsedArgs = `0`;
443	Token Arg;
444
445	while (ParsedArgs < NumArgs && Retokenizer.lexType(Tok&: Arg)) {
446	Args[ParsedArgs] = Comment::Argument{
447	.Range: SourceRange (Arg.getLocation(), Arg.getEndLocation()), .Text: Arg.getText()};
448	ParsedArgs++;
449	}
450
451	return llvm::ArrayRef(Args, ParsedArgs);
452	}
453
454	ArrayRef<Comment::Argument>
455	Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer,
456	unsigned NumArgs) {
457	assert(NumArgs > `0`);
458	auto Args = new* (Allocator.Allocate<Comment::Argument>(Num: NumArgs))
459	Comment::Argument[NumArgs];
460	unsigned ParsedArgs = `0`;
461	Token Arg;
462
463	while (ParsedArgs < NumArgs && Retokenizer.lexParHeading(Tok&: Arg)) {
464	Args[ParsedArgs] = Comment::Argument{
465	.Range: SourceRange (Arg.getLocation(), Arg.getEndLocation()), .Text: Arg.getText()};
466	ParsedArgs++;
467	}
468
469	return llvm::ArrayRef(Args, ParsedArgs);
470	}
471
472	BlockCommandComment *Parser::parseBlockCommand() {
473	assert(Tok.is(tok::backslash_command) \|\| Tok.is(tok::at_command));
474
475	ParamCommandComment PC = nullptr*;
476	TParamCommandComment TPC = nullptr*;
477	BlockCommandComment BC = nullptr*;
478	const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getCommandID());
479	CommandMarkerKind CommandMarker =
480	Tok.is(K: tok::backslash_command) ? CMK_Backslash : CMK_At;
481	if (Info->IsParamCommand) {
482	PC = S.actOnParamCommandStart(LocBegin: Tok.getLocation(),
483	LocEnd: Tok.getEndLocation(),
484	CommandID: Tok.getCommandID(),
485	CommandMarker);
486	} else if (Info->IsTParamCommand) {
487	TPC = S.actOnTParamCommandStart(LocBegin: Tok.getLocation(),
488	LocEnd: Tok.getEndLocation(),
489	CommandID: Tok.getCommandID(),
490	CommandMarker);
491	} else {
492	BC = S.actOnBlockCommandStart(LocBegin: Tok.getLocation(),
493	LocEnd: Tok.getEndLocation(),
494	CommandID: Tok.getCommandID(),
495	CommandMarker);
496	}
497	consumeToken();
498
499	if (isTokBlockCommand()) {
500	// Block command ahead. We can't nest block commands, so pretend that this
501	// command has an empty argument.
502	ParagraphComment *Paragraph = S.actOnParagraphComment(Content: std::nullopt);
503	if (PC) {
504	S.actOnParamCommandFinish(Command: PC, Paragraph);
505	return PC;
506	} else if (TPC) {
507	S.actOnTParamCommandFinish(Command: TPC, Paragraph);
508	return TPC;
509	} else {
510	S.actOnBlockCommandFinish(Command: BC, Paragraph);
511	return BC;
512	}
513	}
514
515	if (PC \|\| TPC \|\| Info->NumArgs > `0`) {
516	// In order to parse command arguments we need to retokenize a few
517	// following text tokens.
518	TextTokenRetokenizer Retokenizer(Allocator, *this);
519
520	if (PC)
521	parseParamCommandArgs(PC, Retokenizer);
522	else if (TPC)
523	parseTParamCommandArgs(TPC, Retokenizer);
524	else if (Info->IsThrowsCommand)
525	S.actOnBlockCommandArgs(
526	Command: BC, Args: parseThrowCommandArgs(Retokenizer, NumArgs: Info->NumArgs));
527	else if (Info->IsParCommand)
528	S.actOnBlockCommandArgs(Command: BC,
529	Args: parseParCommandArgs(Retokenizer, NumArgs: Info->NumArgs));
530	else
531	S.actOnBlockCommandArgs(Command: BC, Args: parseCommandArgs(Retokenizer, NumArgs: Info->NumArgs));
532
533	Retokenizer.putBackLeftoverTokens();
534	}
535
536	// If there's a block command ahead, we will attach an empty paragraph to
537	// this command.
538	bool EmptyParagraph = false;
539	if (isTokBlockCommand())
540	EmptyParagraph = true;
541	else if (Tok.is(K: tok::newline)) {
542	Token PrevTok = Tok;
543	consumeToken();
544	EmptyParagraph = isTokBlockCommand();
545	putBack(OldTok: PrevTok);
546	}
547
548	ParagraphComment *Paragraph;
549	if (EmptyParagraph)
550	Paragraph = S.actOnParagraphComment(Content: std::nullopt);
551	else {
552	BlockContentComment *Block = parseParagraphOrBlockCommand();
553	// Since we have checked for a block command, we should have parsed a
554	// paragraph.
555	Paragraph = cast<ParagraphComment>(Val: Block);
556	}
557
558	if (PC) {
559	S.actOnParamCommandFinish(Command: PC, Paragraph);
560	return PC;
561	} else if (TPC) {
562	S.actOnTParamCommandFinish(Command: TPC, Paragraph);
563	return TPC;
564	} else {
565	S.actOnBlockCommandFinish(Command: BC, Paragraph);
566	return BC;
567	}
568	}
569
570	InlineCommandComment *Parser::parseInlineCommand() {
571	assert(Tok.is(tok::backslash_command) \|\| Tok.is(tok::at_command));
572	const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getCommandID());
573
574	const Token CommandTok = Tok;
575	consumeToken();
576
577	TextTokenRetokenizer Retokenizer(Allocator, *this);
578	ArrayRef<Comment::Argument> Args =
579	parseCommandArgs(Retokenizer, NumArgs: Info->NumArgs);
580
581	InlineCommandComment *IC = S.actOnInlineCommand(
582	CommandLocBegin: CommandTok.getLocation(), CommandLocEnd: CommandTok.getEndLocation(),
583	CommandID: CommandTok.getCommandID(), Args);
584
585	if (Args.size() < Info->NumArgs) {
586	Diag(Loc: CommandTok.getEndLocation().getLocWithOffset(Offset: `1`),
587	DiagID: diag::warn_doc_inline_command_not_enough_arguments)
588	<< CommandTok.is(K: tok::at_command) << Info->Name << Args.size()
589	<< Info->NumArgs
590	<< SourceRange (CommandTok.getLocation(), CommandTok.getEndLocation());
591	}
592
593	Retokenizer.putBackLeftoverTokens();
594
595	return IC;
596	}
597
598	HTMLStartTagComment *Parser::parseHTMLStartTag() {
599	assert(Tok.is(tok::html_start_tag));
600	HTMLStartTagComment *HST =
601	S.actOnHTMLStartTagStart(LocBegin: Tok.getLocation(),
602	TagName: Tok.getHTMLTagStartName());
603	consumeToken();
604
605	SmallVector<HTMLStartTagComment::Attribute, `2`> Attrs;
606	while (true) {
607	switch (Tok.getKind()) {
608	case tok::html_ident: {
609	Token Ident = Tok;
610	consumeToken();
611	if (Tok.isNot(K: tok::html_equals)) {
612	Attrs.push_back(Elt: HTMLStartTagComment::Attribute (Ident.getLocation(),
613	Ident.getHTMLIdent()));
614	continue;
615	}
616	Token Equals = Tok;
617	consumeToken();
618	if (Tok.isNot(K: tok::html_quoted_string)) {
619	Diag(Loc: Tok.getLocation(),
620	DiagID: diag::warn_doc_html_start_tag_expected_quoted_string)
621	<< SourceRange (Equals.getLocation());
622	Attrs.push_back(Elt: HTMLStartTagComment::Attribute (Ident.getLocation(),
623	Ident.getHTMLIdent()));
624	while (Tok.is(K: tok::html_equals) \|\|
625	Tok.is(K: tok::html_quoted_string))
626	consumeToken();
627	continue;
628	}
629	Attrs.push_back(Elt: HTMLStartTagComment::Attribute (
630	Ident.getLocation(),
631	Ident.getHTMLIdent(),
632	Equals.getLocation(),
633	SourceRange (Tok.getLocation(),
634	Tok.getEndLocation()),
635	Tok.getHTMLQuotedString()));
636	consumeToken();
637	continue;
638	}
639
640	case tok::html_greater:
641	S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: llvm::ArrayRef(Attrs)),
642	GreaterLoc: Tok.getLocation(),
643	/ IsSelfClosing = / false);
644	consumeToken();
645	return HST;
646
647	case tok::html_slash_greater:
648	S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: llvm::ArrayRef(Attrs)),
649	GreaterLoc: Tok.getLocation(),
650	/ IsSelfClosing = / true);
651	consumeToken();
652	return HST;
653
654	case tok::html_equals:
655	case tok::html_quoted_string:
656	Diag(Loc: Tok.getLocation(),
657	DiagID: diag::warn_doc_html_start_tag_expected_ident_or_greater);
658	while (Tok.is(K: tok::html_equals) \|\|
659	Tok.is(K: tok::html_quoted_string))
660	consumeToken();
661	if (Tok.is(K: tok::html_ident) \|\|
662	Tok.is(K: tok::html_greater) \|\|
663	Tok.is(K: tok::html_slash_greater))
664	continue;
665
666	S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: llvm::ArrayRef(Attrs)),
667	GreaterLoc: SourceLocation (),
668	/ IsSelfClosing = / false);
669	return HST;
670
671	default:
672	// Not a token from an HTML start tag. Thus HTML tag prematurely ended.
673	S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: llvm::ArrayRef(Attrs)),
674	GreaterLoc: SourceLocation (),
675	/ IsSelfClosing = / false);
676	bool StartLineInvalid;
677	const unsigned StartLine = SourceMgr.getPresumedLineNumber(
678	Loc: HST->getLocation(),
679	Invalid: &StartLineInvalid);
680	bool EndLineInvalid;
681	const unsigned EndLine = SourceMgr.getPresumedLineNumber(
682	Loc: Tok.getLocation(),
683	Invalid: &EndLineInvalid);
684	if (StartLineInvalid \|\| EndLineInvalid \|\| StartLine == EndLine)
685	Diag(Loc: Tok.getLocation(),
686	DiagID: diag::warn_doc_html_start_tag_expected_ident_or_greater)
687	<< HST->getSourceRange();
688	else {
689	Diag(Loc: Tok.getLocation(),
690	DiagID: diag::warn_doc_html_start_tag_expected_ident_or_greater);
691	Diag(Loc: HST->getLocation(), DiagID: diag::note_doc_html_tag_started_here)
692	<< HST->getSourceRange();
693	}
694	return HST;
695	}
696	}
697	}
698
699	HTMLEndTagComment *Parser::parseHTMLEndTag() {
700	assert(Tok.is(tok::html_end_tag));
701	Token TokEndTag = Tok;
702	consumeToken();
703	SourceLocation Loc;
704	if (Tok.is(K: tok::html_greater)) {
705	Loc = Tok.getLocation();
706	consumeToken();
707	}
708
709	return S.actOnHTMLEndTag(LocBegin: TokEndTag.getLocation(),
710	LocEnd: Loc,
711	TagName: TokEndTag.getHTMLTagEndName());
712	}
713
714	BlockContentComment *Parser::parseParagraphOrBlockCommand() {
715	SmallVector<InlineContentComment *, `8`> Content;
716
717	while (true) {
718	switch (Tok.getKind()) {
719	case tok::verbatim_block_begin:
720	case tok::verbatim_line_name:
721	case tok::eof:
722	break; // Block content or EOF ahead, finish this parapgaph.
723
724	case tok::unknown_command:
725	Content.push_back(Elt: S.actOnUnknownCommand(LocBegin: Tok.getLocation(),
726	LocEnd: Tok.getEndLocation(),
727	CommandName: Tok.getUnknownCommandName()));
728	consumeToken();
729	continue;
730
731	case tok::backslash_command:
732	case tok::at_command: {
733	const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getCommandID());
734	if (Info->IsBlockCommand) {
735	if (Content.size() == `0`)
736	return parseBlockCommand();
737	break; // Block command ahead, finish this parapgaph.
738	}
739	if (Info->IsVerbatimBlockEndCommand) {
740	Diag(Loc: Tok.getLocation(),
741	DiagID: diag::warn_verbatim_block_end_without_start)
742	<< Tok.is(K: tok::at_command)
743	<< Info->Name
744	<< SourceRange (Tok.getLocation(), Tok.getEndLocation());
745	consumeToken();
746	continue;
747	}
748	if (Info->IsUnknownCommand) {
749	Content.push_back(Elt: S.actOnUnknownCommand(LocBegin: Tok.getLocation(),
750	LocEnd: Tok.getEndLocation(),
751	CommandID: Info->getID()));
752	consumeToken();
753	continue;
754	}
755	assert(Info->IsInlineCommand);
756	Content.push_back(Elt: parseInlineCommand());
757	continue;
758	}
759
760	case tok::newline: {
761	consumeToken();
762	if (Tok.is(K: tok::newline) \|\| Tok.is(K: tok::eof)) {
763	consumeToken();
764	break; // Two newlines -- end of paragraph.
765	}
766	// Also allow [tok::newline, tok::text, tok::newline] if the middle
767	// tok::text is just whitespace.
768	if (Tok.is(K: tok::text) && isWhitespace(S: Tok.getText())) {
769	Token WhitespaceTok = Tok;
770	consumeToken();
771	if (Tok.is(K: tok::newline) \|\| Tok.is(K: tok::eof)) {
772	consumeToken();
773	break;
774	}
775	// We have [tok::newline, tok::text, non-newline]. Put back tok::text.
776	putBack(OldTok: WhitespaceTok);
777	}
778	if (Content.size() > `0`)
779	Content.back()->addTrailingNewline();
780	continue;
781	}
782
783	// Don't deal with HTML tag soup now.
784	case tok::html_start_tag:
785	Content.push_back(Elt: parseHTMLStartTag());
786	continue;
787
788	case tok::html_end_tag:
789	Content.push_back(Elt: parseHTMLEndTag());
790	continue;
791
792	case tok::text:
793	Content.push_back(Elt: S.actOnText(LocBegin: Tok.getLocation(),
794	LocEnd: Tok.getEndLocation(),
795	Text: Tok.getText()));
796	consumeToken();
797	continue;
798
799	case tok::verbatim_block_line:
800	case tok::verbatim_block_end:
801	case tok::verbatim_line_text:
802	case tok::html_ident:
803	case tok::html_equals:
804	case tok::html_quoted_string:
805	case tok::html_greater:
806	case tok::html_slash_greater:
807	llvm_unreachable("should not see this token");
808	}
809	break;
810	}
811
812	return S.actOnParagraphComment(Content: S.copyArray(Source: llvm::ArrayRef(Content)));
813	}
814
815	VerbatimBlockComment *Parser::parseVerbatimBlock() {
816	assert(Tok.is(tok::verbatim_block_begin));
817
818	VerbatimBlockComment *VB =
819	S.actOnVerbatimBlockStart(Loc: Tok.getLocation(),
820	CommandID: Tok.getVerbatimBlockID());
821	consumeToken();
822
823	// Don't create an empty line if verbatim opening command is followed
824	// by a newline.
825	if (Tok.is(K: tok::newline))
826	consumeToken();
827
828	SmallVector<VerbatimBlockLineComment *, `8`> Lines;
829	while (Tok.is(K: tok::verbatim_block_line) \|\|
830	Tok.is(K: tok::newline)) {
831	VerbatimBlockLineComment *Line;
832	if (Tok.is(K: tok::verbatim_block_line)) {
833	Line = S.actOnVerbatimBlockLine(Loc: Tok.getLocation(),
834	Text: Tok.getVerbatimBlockText());
835	consumeToken();
836	if (Tok.is(K: tok::newline)) {
837	consumeToken();
838	}
839	} else {
840	// Empty line, just a tok::newline.
841	Line = S.actOnVerbatimBlockLine(Loc: Tok.getLocation(), Text: "");
842	consumeToken();
843	}
844	Lines.push_back(Elt: Line);
845	}
846
847	if (Tok.is(K: tok::verbatim_block_end)) {
848	const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getVerbatimBlockID());
849	S.actOnVerbatimBlockFinish(Block: VB, CloseNameLocBegin: Tok.getLocation(), CloseName: Info->Name,
850	Lines: S.copyArray(Source: llvm::ArrayRef(Lines)));
851	consumeToken();
852	} else {
853	// Unterminated \\verbatim block
854	S.actOnVerbatimBlockFinish(Block: VB, CloseNameLocBegin: SourceLocation (), CloseName: "",
855	Lines: S.copyArray(Source: llvm::ArrayRef(Lines)));
856	}
857
858	return VB;
859	}
860
861	VerbatimLineComment *Parser::parseVerbatimLine() {
862	assert(Tok.is(tok::verbatim_line_name));
863
864	Token NameTok = Tok;
865	consumeToken();
866
867	SourceLocation TextBegin;
868	StringRef Text;
869	// Next token might not be a tok::verbatim_line_text if verbatim line
870	// starting command comes just before a newline or comment end.
871	if (Tok.is(K: tok::verbatim_line_text)) {
872	TextBegin = Tok.getLocation();
873	Text = Tok.getVerbatimLineText();
874	} else {
875	TextBegin = NameTok.getEndLocation();
876	Text = "";
877	}
878
879	VerbatimLineComment *VL = S.actOnVerbatimLine(LocBegin: NameTok.getLocation(),
880	CommandID: NameTok.getVerbatimLineID(),
881	TextBegin,
882	Text);
883	consumeToken();
884	return VL;
885	}
886
887	BlockContentComment *Parser::parseBlockContent() {
888	switch (Tok.getKind()) {
889	case tok::text:
890	case tok::unknown_command:
891	case tok::backslash_command:
892	case tok::at_command:
893	case tok::html_start_tag:
894	case tok::html_end_tag:
895	return parseParagraphOrBlockCommand();
896
897	case tok::verbatim_block_begin:
898	return parseVerbatimBlock();
899
900	case tok::verbatim_line_name:
901	return parseVerbatimLine();
902
903	case tok::eof:
904	case tok::newline:
905	case tok::verbatim_block_line:
906	case tok::verbatim_block_end:
907	case tok::verbatim_line_text:
908	case tok::html_ident:
909	case tok::html_equals:
910	case tok::html_quoted_string:
911	case tok::html_greater:
912	case tok::html_slash_greater:
913	llvm_unreachable("should not see this token");
914	}
915	llvm_unreachable("bogus token kind");
916	}
917
918	FullComment *Parser::parseFullComment() {
919	// Skip newlines at the beginning of the comment.
920	while (Tok.is(K: tok::newline))
921	consumeToken();
922
923	SmallVector<BlockContentComment *, `8`> Blocks;
924	while (Tok.isNot(K: tok::eof)) {
925	Blocks.push_back(Elt: parseBlockContent());
926
927	// Skip extra newlines after paragraph end.
928	while (Tok.is(K: tok::newline))
929	consumeToken();
930	}
931	return S.actOnFullComment(Blocks: S.copyArray(Source: llvm::ArrayRef(Blocks)));
932	}
933
934	} // end namespace comments
935	} // end namespace clang
936

Browse the source code of llvm_projects/clang/lib/AST/CommentParser.cpp