1 | //===--- CommentParser.cpp - Doxygen comment parser -----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang/AST/CommentParser.h" |
10 | #include "clang/AST/CommentCommandTraits.h" |
11 | #include "clang/AST/CommentDiagnostic.h" |
12 | #include "clang/AST/CommentSema.h" |
13 | #include "clang/Basic/CharInfo.h" |
14 | #include "clang/Basic/SourceManager.h" |
15 | #include "llvm/Support/ErrorHandling.h" |
16 | |
17 | namespace clang { |
18 | |
19 | static inline bool isWhitespace(llvm::StringRef S) { |
20 | for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) { |
21 | if (!isWhitespace(c: *I)) |
22 | return false; |
23 | } |
24 | return true; |
25 | } |
26 | |
27 | namespace comments { |
28 | |
29 | /// Re-lexes a sequence of tok::text tokens. |
30 | class { |
31 | llvm::BumpPtrAllocator &; |
32 | Parser &; |
33 | |
34 | /// This flag is set when there are no more tokens we can fetch from lexer. |
35 | bool ; |
36 | |
37 | /// Token buffer: tokens we have processed and lookahead. |
38 | SmallVector<Token, 16> ; |
39 | |
40 | /// A position in \c Toks. |
41 | struct { |
42 | const char *; |
43 | const char *; |
44 | const char *; |
45 | SourceLocation ; |
46 | unsigned ; |
47 | }; |
48 | |
49 | /// Current position in Toks. |
50 | Position ; |
51 | |
52 | bool () const { |
53 | return Pos.CurToken >= Toks.size(); |
54 | } |
55 | |
56 | /// Sets up the buffer pointers to point to current token. |
57 | void () { |
58 | assert(!isEnd()); |
59 | const Token &Tok = Toks[Pos.CurToken]; |
60 | |
61 | Pos.BufferStart = Tok.getText().begin(); |
62 | Pos.BufferEnd = Tok.getText().end(); |
63 | Pos.BufferPtr = Pos.BufferStart; |
64 | Pos.BufferStartLoc = Tok.getLocation(); |
65 | } |
66 | |
67 | SourceLocation () const { |
68 | const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart; |
69 | return Pos.BufferStartLoc.getLocWithOffset(Offset: CharNo); |
70 | } |
71 | |
72 | char () const { |
73 | assert(!isEnd()); |
74 | assert(Pos.BufferPtr != Pos.BufferEnd); |
75 | return *Pos.BufferPtr; |
76 | } |
77 | |
78 | void () { |
79 | assert(!isEnd()); |
80 | assert(Pos.BufferPtr != Pos.BufferEnd); |
81 | Pos.BufferPtr++; |
82 | if (Pos.BufferPtr == Pos.BufferEnd) { |
83 | Pos.CurToken++; |
84 | if (isEnd() && !addToken()) |
85 | return; |
86 | |
87 | assert(!isEnd()); |
88 | setupBuffer(); |
89 | } |
90 | } |
91 | |
92 | /// Extract a template type |
93 | bool (SmallString<32> &WordText) { |
94 | unsigned BracketCount = 0; |
95 | while (!isEnd()) { |
96 | const char C = peek(); |
97 | WordText.push_back(Elt: C); |
98 | consumeChar(); |
99 | switch (C) { |
100 | case '<': { |
101 | BracketCount++; |
102 | break; |
103 | } |
104 | case '>': { |
105 | BracketCount--; |
106 | if (!BracketCount) |
107 | return true; |
108 | break; |
109 | } |
110 | default: |
111 | break; |
112 | } |
113 | } |
114 | return false; |
115 | } |
116 | |
117 | /// Add a token. |
118 | /// Returns true on success, false if there are no interesting tokens to |
119 | /// fetch from lexer. |
120 | bool () { |
121 | if (NoMoreInterestingTokens) |
122 | return false; |
123 | |
124 | if (P.Tok.is(K: tok::newline)) { |
125 | // If we see a single newline token between text tokens, skip it. |
126 | Token Newline = P.Tok; |
127 | P.consumeToken(); |
128 | if (P.Tok.isNot(K: tok::text)) { |
129 | P.putBack(OldTok: Newline); |
130 | NoMoreInterestingTokens = true; |
131 | return false; |
132 | } |
133 | } |
134 | if (P.Tok.isNot(K: tok::text)) { |
135 | NoMoreInterestingTokens = true; |
136 | return false; |
137 | } |
138 | |
139 | Toks.push_back(Elt: P.Tok); |
140 | P.consumeToken(); |
141 | if (Toks.size() == 1) |
142 | setupBuffer(); |
143 | return true; |
144 | } |
145 | |
146 | void () { |
147 | while (!isEnd()) { |
148 | if (isWhitespace(c: peek())) |
149 | consumeChar(); |
150 | else |
151 | break; |
152 | } |
153 | } |
154 | |
155 | void (Token &Result, |
156 | SourceLocation Loc, |
157 | const char *TokBegin, |
158 | unsigned TokLength, |
159 | StringRef Text) { |
160 | Result.setLocation(Loc); |
161 | Result.setKind(tok::text); |
162 | Result.setLength(TokLength); |
163 | #ifndef NDEBUG |
164 | Result.TextPtr = "<UNSET>" ; |
165 | Result.IntVal = 7; |
166 | #endif |
167 | Result.setText(Text); |
168 | } |
169 | |
170 | public: |
171 | (llvm::BumpPtrAllocator &Allocator, Parser &P): |
172 | Allocator(Allocator), P(P), NoMoreInterestingTokens(false) { |
173 | Pos.CurToken = 0; |
174 | addToken(); |
175 | } |
176 | |
177 | /// Extract a type argument |
178 | bool (Token &Tok) { |
179 | if (isEnd()) |
180 | return false; |
181 | |
182 | // Save current position in case we need to rollback because the type is |
183 | // empty. |
184 | Position SavedPos = Pos; |
185 | |
186 | // Consume any leading whitespace. |
187 | consumeWhitespace(); |
188 | SmallString<32> WordText; |
189 | const char *WordBegin = Pos.BufferPtr; |
190 | SourceLocation Loc = getSourceLocation(); |
191 | |
192 | while (!isEnd()) { |
193 | const char C = peek(); |
194 | // For non-whitespace characters we check if it's a template or otherwise |
195 | // continue reading the text into a word. |
196 | if (!isWhitespace(c: C)) { |
197 | if (C == '<') { |
198 | if (!lexTemplate(WordText)) |
199 | return false; |
200 | } else { |
201 | WordText.push_back(Elt: C); |
202 | consumeChar(); |
203 | } |
204 | } else { |
205 | consumeChar(); |
206 | break; |
207 | } |
208 | } |
209 | |
210 | const unsigned Length = WordText.size(); |
211 | if (Length == 0) { |
212 | Pos = SavedPos; |
213 | return false; |
214 | } |
215 | |
216 | char *TextPtr = Allocator.Allocate<char>(Num: Length + 1); |
217 | |
218 | memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + 1); |
219 | StringRef Text = StringRef(TextPtr, Length); |
220 | |
221 | formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin, TokLength: Length, Text); |
222 | return true; |
223 | } |
224 | |
225 | // Check if this line starts with @par or \par |
226 | bool startsWithParCommand() { |
227 | unsigned Offset = 1; |
228 | |
229 | // Skip all whitespace characters at the beginning. |
230 | // This needs to backtrack because Pos has already advanced past the |
231 | // actual \par or @par command by the time this function is called. |
232 | while (isWhitespace(c: *(Pos.BufferPtr - Offset))) |
233 | Offset++; |
234 | |
235 | // Once we've reached the whitespace, backtrack and check if the previous |
236 | // four characters are \par or @par. |
237 | llvm::StringRef LineStart(Pos.BufferPtr - Offset - 3, 4); |
238 | return LineStart.starts_with(Prefix: "\\par" ) || LineStart.starts_with(Prefix: "@par" ); |
239 | } |
240 | |
241 | /// Extract a par command argument-header. |
242 | bool (Token &Tok) { |
243 | if (isEnd()) |
244 | return false; |
245 | |
246 | Position SavedPos = Pos; |
247 | |
248 | consumeWhitespace(); |
249 | SmallString<32> WordText; |
250 | const char *WordBegin = Pos.BufferPtr; |
251 | SourceLocation Loc = getSourceLocation(); |
252 | |
253 | if (!startsWithParCommand()) |
254 | return false; |
255 | |
256 | // Read until the end of this token, which is effectively the end of the |
257 | // line. This gets us the content of the par header, if there is one. |
258 | while (!isEnd()) { |
259 | WordText.push_back(Elt: peek()); |
260 | if (Pos.BufferPtr + 1 == Pos.BufferEnd) { |
261 | consumeChar(); |
262 | break; |
263 | } |
264 | consumeChar(); |
265 | } |
266 | |
267 | unsigned Length = WordText.size(); |
268 | if (Length == 0) { |
269 | Pos = SavedPos; |
270 | return false; |
271 | } |
272 | |
273 | char *TextPtr = Allocator.Allocate<char>(Num: Length + 1); |
274 | |
275 | memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + 1); |
276 | StringRef Text = StringRef(TextPtr, Length); |
277 | |
278 | formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin, TokLength: Length, Text); |
279 | return true; |
280 | } |
281 | |
282 | /// Extract a word -- sequence of non-whitespace characters. |
283 | bool (Token &Tok) { |
284 | if (isEnd()) |
285 | return false; |
286 | |
287 | Position SavedPos = Pos; |
288 | |
289 | consumeWhitespace(); |
290 | SmallString<32> WordText; |
291 | const char *WordBegin = Pos.BufferPtr; |
292 | SourceLocation Loc = getSourceLocation(); |
293 | while (!isEnd()) { |
294 | const char C = peek(); |
295 | if (!isWhitespace(c: C)) { |
296 | WordText.push_back(Elt: C); |
297 | consumeChar(); |
298 | } else |
299 | break; |
300 | } |
301 | const unsigned Length = WordText.size(); |
302 | if (Length == 0) { |
303 | Pos = SavedPos; |
304 | return false; |
305 | } |
306 | |
307 | char *TextPtr = Allocator.Allocate<char>(Num: Length + 1); |
308 | |
309 | memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + 1); |
310 | StringRef Text = StringRef(TextPtr, Length); |
311 | |
312 | formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin, TokLength: Length, Text); |
313 | return true; |
314 | } |
315 | |
316 | bool (Token &Tok, char OpenDelim, char CloseDelim) { |
317 | if (isEnd()) |
318 | return false; |
319 | |
320 | Position SavedPos = Pos; |
321 | |
322 | consumeWhitespace(); |
323 | SmallString<32> WordText; |
324 | const char *WordBegin = Pos.BufferPtr; |
325 | SourceLocation Loc = getSourceLocation(); |
326 | bool Error = false; |
327 | if (!isEnd()) { |
328 | const char C = peek(); |
329 | if (C == OpenDelim) { |
330 | WordText.push_back(Elt: C); |
331 | consumeChar(); |
332 | } else |
333 | Error = true; |
334 | } |
335 | char C = '\0'; |
336 | while (!Error && !isEnd()) { |
337 | C = peek(); |
338 | WordText.push_back(Elt: C); |
339 | consumeChar(); |
340 | if (C == CloseDelim) |
341 | break; |
342 | } |
343 | if (!Error && C != CloseDelim) |
344 | Error = true; |
345 | |
346 | if (Error) { |
347 | Pos = SavedPos; |
348 | return false; |
349 | } |
350 | |
351 | const unsigned Length = WordText.size(); |
352 | char *TextPtr = Allocator.Allocate<char>(Num: Length + 1); |
353 | |
354 | memcpy(dest: TextPtr, src: WordText.c_str(), n: Length + 1); |
355 | StringRef Text = StringRef(TextPtr, Length); |
356 | |
357 | formTokenWithChars(Result&: Tok, Loc, TokBegin: WordBegin, |
358 | TokLength: Pos.BufferPtr - WordBegin, Text); |
359 | return true; |
360 | } |
361 | |
362 | /// Put back tokens that we didn't consume. |
363 | void () { |
364 | if (isEnd()) |
365 | return; |
366 | |
367 | bool HavePartialTok = false; |
368 | Token PartialTok; |
369 | if (Pos.BufferPtr != Pos.BufferStart) { |
370 | formTokenWithChars(Result&: PartialTok, Loc: getSourceLocation(), |
371 | TokBegin: Pos.BufferPtr, TokLength: Pos.BufferEnd - Pos.BufferPtr, |
372 | Text: StringRef(Pos.BufferPtr, |
373 | Pos.BufferEnd - Pos.BufferPtr)); |
374 | HavePartialTok = true; |
375 | Pos.CurToken++; |
376 | } |
377 | |
378 | P.putBack(Toks: llvm::ArrayRef(Toks.begin() + Pos.CurToken, Toks.end())); |
379 | Pos.CurToken = Toks.size(); |
380 | |
381 | if (HavePartialTok) |
382 | P.putBack(OldTok: PartialTok); |
383 | } |
384 | }; |
385 | |
386 | Parser::(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator, |
387 | const SourceManager &SourceMgr, DiagnosticsEngine &Diags, |
388 | const CommandTraits &Traits): |
389 | L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags), |
390 | Traits(Traits) { |
391 | consumeToken(); |
392 | } |
393 | |
394 | void Parser::parseParamCommandArgs(ParamCommandComment *PC, |
395 | TextTokenRetokenizer &Retokenizer) { |
396 | Token Arg; |
397 | // Check if argument looks like direction specification: [dir] |
398 | // e.g., [in], [out], [in,out] |
399 | if (Retokenizer.lexDelimitedSeq(Tok&: Arg, OpenDelim: '[', CloseDelim: ']')) |
400 | S.actOnParamCommandDirectionArg(Command: PC, |
401 | ArgLocBegin: Arg.getLocation(), |
402 | ArgLocEnd: Arg.getEndLocation(), |
403 | Arg: Arg.getText()); |
404 | |
405 | if (Retokenizer.lexWord(Tok&: Arg)) |
406 | S.actOnParamCommandParamNameArg(Command: PC, |
407 | ArgLocBegin: Arg.getLocation(), |
408 | ArgLocEnd: Arg.getEndLocation(), |
409 | Arg: Arg.getText()); |
410 | } |
411 | |
412 | void Parser::parseTParamCommandArgs(TParamCommandComment *TPC, |
413 | TextTokenRetokenizer &Retokenizer) { |
414 | Token Arg; |
415 | if (Retokenizer.lexWord(Tok&: Arg)) |
416 | S.actOnTParamCommandParamNameArg(Command: TPC, |
417 | ArgLocBegin: Arg.getLocation(), |
418 | ArgLocEnd: Arg.getEndLocation(), |
419 | Arg: Arg.getText()); |
420 | } |
421 | |
422 | ArrayRef<Comment::Argument> |
423 | Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) { |
424 | auto *Args = new (Allocator.Allocate<Comment::Argument>(Num: NumArgs)) |
425 | Comment::Argument[NumArgs]; |
426 | unsigned ParsedArgs = 0; |
427 | Token Arg; |
428 | while (ParsedArgs < NumArgs && Retokenizer.lexWord(Tok&: Arg)) { |
429 | Args[ParsedArgs] = Comment::Argument{ |
430 | .Range: SourceRange(Arg.getLocation(), Arg.getEndLocation()), .Text: Arg.getText()}; |
431 | ParsedArgs++; |
432 | } |
433 | |
434 | return llvm::ArrayRef(Args, ParsedArgs); |
435 | } |
436 | |
437 | ArrayRef<Comment::Argument> |
438 | Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, |
439 | unsigned NumArgs) { |
440 | auto *Args = new (Allocator.Allocate<Comment::Argument>(Num: NumArgs)) |
441 | Comment::Argument[NumArgs]; |
442 | unsigned ParsedArgs = 0; |
443 | Token Arg; |
444 | |
445 | while (ParsedArgs < NumArgs && Retokenizer.lexType(Tok&: Arg)) { |
446 | Args[ParsedArgs] = Comment::Argument{ |
447 | .Range: SourceRange(Arg.getLocation(), Arg.getEndLocation()), .Text: Arg.getText()}; |
448 | ParsedArgs++; |
449 | } |
450 | |
451 | return llvm::ArrayRef(Args, ParsedArgs); |
452 | } |
453 | |
454 | ArrayRef<Comment::Argument> |
455 | Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer, |
456 | unsigned NumArgs) { |
457 | assert(NumArgs > 0); |
458 | auto *Args = new (Allocator.Allocate<Comment::Argument>(Num: NumArgs)) |
459 | Comment::Argument[NumArgs]; |
460 | unsigned ParsedArgs = 0; |
461 | Token Arg; |
462 | |
463 | while (ParsedArgs < NumArgs && Retokenizer.lexParHeading(Tok&: Arg)) { |
464 | Args[ParsedArgs] = Comment::Argument{ |
465 | .Range: SourceRange(Arg.getLocation(), Arg.getEndLocation()), .Text: Arg.getText()}; |
466 | ParsedArgs++; |
467 | } |
468 | |
469 | return llvm::ArrayRef(Args, ParsedArgs); |
470 | } |
471 | |
472 | BlockCommandComment *Parser::parseBlockCommand() { |
473 | assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); |
474 | |
475 | ParamCommandComment *PC = nullptr; |
476 | TParamCommandComment *TPC = nullptr; |
477 | BlockCommandComment *BC = nullptr; |
478 | const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getCommandID()); |
479 | CommandMarkerKind CommandMarker = |
480 | Tok.is(K: tok::backslash_command) ? CMK_Backslash : CMK_At; |
481 | if (Info->IsParamCommand) { |
482 | PC = S.actOnParamCommandStart(LocBegin: Tok.getLocation(), |
483 | LocEnd: Tok.getEndLocation(), |
484 | CommandID: Tok.getCommandID(), |
485 | CommandMarker); |
486 | } else if (Info->IsTParamCommand) { |
487 | TPC = S.actOnTParamCommandStart(LocBegin: Tok.getLocation(), |
488 | LocEnd: Tok.getEndLocation(), |
489 | CommandID: Tok.getCommandID(), |
490 | CommandMarker); |
491 | } else { |
492 | BC = S.actOnBlockCommandStart(LocBegin: Tok.getLocation(), |
493 | LocEnd: Tok.getEndLocation(), |
494 | CommandID: Tok.getCommandID(), |
495 | CommandMarker); |
496 | } |
497 | consumeToken(); |
498 | |
499 | if (isTokBlockCommand()) { |
500 | // Block command ahead. We can't nest block commands, so pretend that this |
501 | // command has an empty argument. |
502 | ParagraphComment *Paragraph = S.actOnParagraphComment(Content: std::nullopt); |
503 | if (PC) { |
504 | S.actOnParamCommandFinish(Command: PC, Paragraph); |
505 | return PC; |
506 | } else if (TPC) { |
507 | S.actOnTParamCommandFinish(Command: TPC, Paragraph); |
508 | return TPC; |
509 | } else { |
510 | S.actOnBlockCommandFinish(Command: BC, Paragraph); |
511 | return BC; |
512 | } |
513 | } |
514 | |
515 | if (PC || TPC || Info->NumArgs > 0) { |
516 | // In order to parse command arguments we need to retokenize a few |
517 | // following text tokens. |
518 | TextTokenRetokenizer Retokenizer(Allocator, *this); |
519 | |
520 | if (PC) |
521 | parseParamCommandArgs(PC, Retokenizer); |
522 | else if (TPC) |
523 | parseTParamCommandArgs(TPC, Retokenizer); |
524 | else if (Info->IsThrowsCommand) |
525 | S.actOnBlockCommandArgs( |
526 | Command: BC, Args: parseThrowCommandArgs(Retokenizer, NumArgs: Info->NumArgs)); |
527 | else if (Info->IsParCommand) |
528 | S.actOnBlockCommandArgs(Command: BC, |
529 | Args: parseParCommandArgs(Retokenizer, NumArgs: Info->NumArgs)); |
530 | else |
531 | S.actOnBlockCommandArgs(Command: BC, Args: parseCommandArgs(Retokenizer, NumArgs: Info->NumArgs)); |
532 | |
533 | Retokenizer.putBackLeftoverTokens(); |
534 | } |
535 | |
536 | // If there's a block command ahead, we will attach an empty paragraph to |
537 | // this command. |
538 | bool EmptyParagraph = false; |
539 | if (isTokBlockCommand()) |
540 | EmptyParagraph = true; |
541 | else if (Tok.is(K: tok::newline)) { |
542 | Token PrevTok = Tok; |
543 | consumeToken(); |
544 | EmptyParagraph = isTokBlockCommand(); |
545 | putBack(OldTok: PrevTok); |
546 | } |
547 | |
548 | ParagraphComment *Paragraph; |
549 | if (EmptyParagraph) |
550 | Paragraph = S.actOnParagraphComment(Content: std::nullopt); |
551 | else { |
552 | BlockContentComment *Block = parseParagraphOrBlockCommand(); |
553 | // Since we have checked for a block command, we should have parsed a |
554 | // paragraph. |
555 | Paragraph = cast<ParagraphComment>(Val: Block); |
556 | } |
557 | |
558 | if (PC) { |
559 | S.actOnParamCommandFinish(Command: PC, Paragraph); |
560 | return PC; |
561 | } else if (TPC) { |
562 | S.actOnTParamCommandFinish(Command: TPC, Paragraph); |
563 | return TPC; |
564 | } else { |
565 | S.actOnBlockCommandFinish(Command: BC, Paragraph); |
566 | return BC; |
567 | } |
568 | } |
569 | |
570 | InlineCommandComment *Parser::parseInlineCommand() { |
571 | assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); |
572 | const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getCommandID()); |
573 | |
574 | const Token CommandTok = Tok; |
575 | consumeToken(); |
576 | |
577 | TextTokenRetokenizer Retokenizer(Allocator, *this); |
578 | ArrayRef<Comment::Argument> Args = |
579 | parseCommandArgs(Retokenizer, NumArgs: Info->NumArgs); |
580 | |
581 | InlineCommandComment *IC = S.actOnInlineCommand( |
582 | CommandLocBegin: CommandTok.getLocation(), CommandLocEnd: CommandTok.getEndLocation(), |
583 | CommandID: CommandTok.getCommandID(), Args); |
584 | |
585 | if (Args.size() < Info->NumArgs) { |
586 | Diag(Loc: CommandTok.getEndLocation().getLocWithOffset(Offset: 1), |
587 | DiagID: diag::warn_doc_inline_command_not_enough_arguments) |
588 | << CommandTok.is(K: tok::at_command) << Info->Name << Args.size() |
589 | << Info->NumArgs |
590 | << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation()); |
591 | } |
592 | |
593 | Retokenizer.putBackLeftoverTokens(); |
594 | |
595 | return IC; |
596 | } |
597 | |
598 | HTMLStartTagComment *Parser::() { |
599 | assert(Tok.is(tok::html_start_tag)); |
600 | HTMLStartTagComment *HST = |
601 | S.actOnHTMLStartTagStart(LocBegin: Tok.getLocation(), |
602 | TagName: Tok.getHTMLTagStartName()); |
603 | consumeToken(); |
604 | |
605 | SmallVector<HTMLStartTagComment::Attribute, 2> Attrs; |
606 | while (true) { |
607 | switch (Tok.getKind()) { |
608 | case tok::html_ident: { |
609 | Token Ident = Tok; |
610 | consumeToken(); |
611 | if (Tok.isNot(K: tok::html_equals)) { |
612 | Attrs.push_back(Elt: HTMLStartTagComment::Attribute(Ident.getLocation(), |
613 | Ident.getHTMLIdent())); |
614 | continue; |
615 | } |
616 | Token Equals = Tok; |
617 | consumeToken(); |
618 | if (Tok.isNot(K: tok::html_quoted_string)) { |
619 | Diag(Loc: Tok.getLocation(), |
620 | DiagID: diag::warn_doc_html_start_tag_expected_quoted_string) |
621 | << SourceRange(Equals.getLocation()); |
622 | Attrs.push_back(Elt: HTMLStartTagComment::Attribute(Ident.getLocation(), |
623 | Ident.getHTMLIdent())); |
624 | while (Tok.is(K: tok::html_equals) || |
625 | Tok.is(K: tok::html_quoted_string)) |
626 | consumeToken(); |
627 | continue; |
628 | } |
629 | Attrs.push_back(Elt: HTMLStartTagComment::Attribute( |
630 | Ident.getLocation(), |
631 | Ident.getHTMLIdent(), |
632 | Equals.getLocation(), |
633 | SourceRange(Tok.getLocation(), |
634 | Tok.getEndLocation()), |
635 | Tok.getHTMLQuotedString())); |
636 | consumeToken(); |
637 | continue; |
638 | } |
639 | |
640 | case tok::html_greater: |
641 | S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: llvm::ArrayRef(Attrs)), |
642 | GreaterLoc: Tok.getLocation(), |
643 | /* IsSelfClosing = */ false); |
644 | consumeToken(); |
645 | return HST; |
646 | |
647 | case tok::html_slash_greater: |
648 | S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: llvm::ArrayRef(Attrs)), |
649 | GreaterLoc: Tok.getLocation(), |
650 | /* IsSelfClosing = */ true); |
651 | consumeToken(); |
652 | return HST; |
653 | |
654 | case tok::html_equals: |
655 | case tok::html_quoted_string: |
656 | Diag(Loc: Tok.getLocation(), |
657 | DiagID: diag::warn_doc_html_start_tag_expected_ident_or_greater); |
658 | while (Tok.is(K: tok::html_equals) || |
659 | Tok.is(K: tok::html_quoted_string)) |
660 | consumeToken(); |
661 | if (Tok.is(K: tok::html_ident) || |
662 | Tok.is(K: tok::html_greater) || |
663 | Tok.is(K: tok::html_slash_greater)) |
664 | continue; |
665 | |
666 | S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: llvm::ArrayRef(Attrs)), |
667 | GreaterLoc: SourceLocation(), |
668 | /* IsSelfClosing = */ false); |
669 | return HST; |
670 | |
671 | default: |
672 | // Not a token from an HTML start tag. Thus HTML tag prematurely ended. |
673 | S.actOnHTMLStartTagFinish(Tag: HST, Attrs: S.copyArray(Source: llvm::ArrayRef(Attrs)), |
674 | GreaterLoc: SourceLocation(), |
675 | /* IsSelfClosing = */ false); |
676 | bool StartLineInvalid; |
677 | const unsigned StartLine = SourceMgr.getPresumedLineNumber( |
678 | Loc: HST->getLocation(), |
679 | Invalid: &StartLineInvalid); |
680 | bool EndLineInvalid; |
681 | const unsigned EndLine = SourceMgr.getPresumedLineNumber( |
682 | Loc: Tok.getLocation(), |
683 | Invalid: &EndLineInvalid); |
684 | if (StartLineInvalid || EndLineInvalid || StartLine == EndLine) |
685 | Diag(Loc: Tok.getLocation(), |
686 | DiagID: diag::warn_doc_html_start_tag_expected_ident_or_greater) |
687 | << HST->getSourceRange(); |
688 | else { |
689 | Diag(Loc: Tok.getLocation(), |
690 | DiagID: diag::warn_doc_html_start_tag_expected_ident_or_greater); |
691 | Diag(Loc: HST->getLocation(), DiagID: diag::note_doc_html_tag_started_here) |
692 | << HST->getSourceRange(); |
693 | } |
694 | return HST; |
695 | } |
696 | } |
697 | } |
698 | |
699 | HTMLEndTagComment *Parser::() { |
700 | assert(Tok.is(tok::html_end_tag)); |
701 | Token TokEndTag = Tok; |
702 | consumeToken(); |
703 | SourceLocation Loc; |
704 | if (Tok.is(K: tok::html_greater)) { |
705 | Loc = Tok.getLocation(); |
706 | consumeToken(); |
707 | } |
708 | |
709 | return S.actOnHTMLEndTag(LocBegin: TokEndTag.getLocation(), |
710 | LocEnd: Loc, |
711 | TagName: TokEndTag.getHTMLTagEndName()); |
712 | } |
713 | |
714 | BlockContentComment *Parser::parseParagraphOrBlockCommand() { |
715 | SmallVector<InlineContentComment *, 8> Content; |
716 | |
717 | while (true) { |
718 | switch (Tok.getKind()) { |
719 | case tok::verbatim_block_begin: |
720 | case tok::verbatim_line_name: |
721 | case tok::eof: |
722 | break; // Block content or EOF ahead, finish this parapgaph. |
723 | |
724 | case tok::unknown_command: |
725 | Content.push_back(Elt: S.actOnUnknownCommand(LocBegin: Tok.getLocation(), |
726 | LocEnd: Tok.getEndLocation(), |
727 | CommandName: Tok.getUnknownCommandName())); |
728 | consumeToken(); |
729 | continue; |
730 | |
731 | case tok::backslash_command: |
732 | case tok::at_command: { |
733 | const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getCommandID()); |
734 | if (Info->IsBlockCommand) { |
735 | if (Content.size() == 0) |
736 | return parseBlockCommand(); |
737 | break; // Block command ahead, finish this parapgaph. |
738 | } |
739 | if (Info->IsVerbatimBlockEndCommand) { |
740 | Diag(Loc: Tok.getLocation(), |
741 | DiagID: diag::warn_verbatim_block_end_without_start) |
742 | << Tok.is(K: tok::at_command) |
743 | << Info->Name |
744 | << SourceRange(Tok.getLocation(), Tok.getEndLocation()); |
745 | consumeToken(); |
746 | continue; |
747 | } |
748 | if (Info->IsUnknownCommand) { |
749 | Content.push_back(Elt: S.actOnUnknownCommand(LocBegin: Tok.getLocation(), |
750 | LocEnd: Tok.getEndLocation(), |
751 | CommandID: Info->getID())); |
752 | consumeToken(); |
753 | continue; |
754 | } |
755 | assert(Info->IsInlineCommand); |
756 | Content.push_back(Elt: parseInlineCommand()); |
757 | continue; |
758 | } |
759 | |
760 | case tok::newline: { |
761 | consumeToken(); |
762 | if (Tok.is(K: tok::newline) || Tok.is(K: tok::eof)) { |
763 | consumeToken(); |
764 | break; // Two newlines -- end of paragraph. |
765 | } |
766 | // Also allow [tok::newline, tok::text, tok::newline] if the middle |
767 | // tok::text is just whitespace. |
768 | if (Tok.is(K: tok::text) && isWhitespace(S: Tok.getText())) { |
769 | Token WhitespaceTok = Tok; |
770 | consumeToken(); |
771 | if (Tok.is(K: tok::newline) || Tok.is(K: tok::eof)) { |
772 | consumeToken(); |
773 | break; |
774 | } |
775 | // We have [tok::newline, tok::text, non-newline]. Put back tok::text. |
776 | putBack(OldTok: WhitespaceTok); |
777 | } |
778 | if (Content.size() > 0) |
779 | Content.back()->addTrailingNewline(); |
780 | continue; |
781 | } |
782 | |
783 | // Don't deal with HTML tag soup now. |
784 | case tok::html_start_tag: |
785 | Content.push_back(Elt: parseHTMLStartTag()); |
786 | continue; |
787 | |
788 | case tok::html_end_tag: |
789 | Content.push_back(Elt: parseHTMLEndTag()); |
790 | continue; |
791 | |
792 | case tok::text: |
793 | Content.push_back(Elt: S.actOnText(LocBegin: Tok.getLocation(), |
794 | LocEnd: Tok.getEndLocation(), |
795 | Text: Tok.getText())); |
796 | consumeToken(); |
797 | continue; |
798 | |
799 | case tok::verbatim_block_line: |
800 | case tok::verbatim_block_end: |
801 | case tok::verbatim_line_text: |
802 | case tok::html_ident: |
803 | case tok::html_equals: |
804 | case tok::html_quoted_string: |
805 | case tok::html_greater: |
806 | case tok::html_slash_greater: |
807 | llvm_unreachable("should not see this token" ); |
808 | } |
809 | break; |
810 | } |
811 | |
812 | return S.actOnParagraphComment(Content: S.copyArray(Source: llvm::ArrayRef(Content))); |
813 | } |
814 | |
815 | VerbatimBlockComment *Parser::() { |
816 | assert(Tok.is(tok::verbatim_block_begin)); |
817 | |
818 | VerbatimBlockComment *VB = |
819 | S.actOnVerbatimBlockStart(Loc: Tok.getLocation(), |
820 | CommandID: Tok.getVerbatimBlockID()); |
821 | consumeToken(); |
822 | |
823 | // Don't create an empty line if verbatim opening command is followed |
824 | // by a newline. |
825 | if (Tok.is(K: tok::newline)) |
826 | consumeToken(); |
827 | |
828 | SmallVector<VerbatimBlockLineComment *, 8> Lines; |
829 | while (Tok.is(K: tok::verbatim_block_line) || |
830 | Tok.is(K: tok::newline)) { |
831 | VerbatimBlockLineComment *Line; |
832 | if (Tok.is(K: tok::verbatim_block_line)) { |
833 | Line = S.actOnVerbatimBlockLine(Loc: Tok.getLocation(), |
834 | Text: Tok.getVerbatimBlockText()); |
835 | consumeToken(); |
836 | if (Tok.is(K: tok::newline)) { |
837 | consumeToken(); |
838 | } |
839 | } else { |
840 | // Empty line, just a tok::newline. |
841 | Line = S.actOnVerbatimBlockLine(Loc: Tok.getLocation(), Text: "" ); |
842 | consumeToken(); |
843 | } |
844 | Lines.push_back(Elt: Line); |
845 | } |
846 | |
847 | if (Tok.is(K: tok::verbatim_block_end)) { |
848 | const CommandInfo *Info = Traits.getCommandInfo(CommandID: Tok.getVerbatimBlockID()); |
849 | S.actOnVerbatimBlockFinish(Block: VB, CloseNameLocBegin: Tok.getLocation(), CloseName: Info->Name, |
850 | Lines: S.copyArray(Source: llvm::ArrayRef(Lines))); |
851 | consumeToken(); |
852 | } else { |
853 | // Unterminated \\verbatim block |
854 | S.actOnVerbatimBlockFinish(Block: VB, CloseNameLocBegin: SourceLocation(), CloseName: "" , |
855 | Lines: S.copyArray(Source: llvm::ArrayRef(Lines))); |
856 | } |
857 | |
858 | return VB; |
859 | } |
860 | |
861 | VerbatimLineComment *Parser::() { |
862 | assert(Tok.is(tok::verbatim_line_name)); |
863 | |
864 | Token NameTok = Tok; |
865 | consumeToken(); |
866 | |
867 | SourceLocation TextBegin; |
868 | StringRef Text; |
869 | // Next token might not be a tok::verbatim_line_text if verbatim line |
870 | // starting command comes just before a newline or comment end. |
871 | if (Tok.is(K: tok::verbatim_line_text)) { |
872 | TextBegin = Tok.getLocation(); |
873 | Text = Tok.getVerbatimLineText(); |
874 | } else { |
875 | TextBegin = NameTok.getEndLocation(); |
876 | Text = "" ; |
877 | } |
878 | |
879 | VerbatimLineComment *VL = S.actOnVerbatimLine(LocBegin: NameTok.getLocation(), |
880 | CommandID: NameTok.getVerbatimLineID(), |
881 | TextBegin, |
882 | Text); |
883 | consumeToken(); |
884 | return VL; |
885 | } |
886 | |
887 | BlockContentComment *Parser::() { |
888 | switch (Tok.getKind()) { |
889 | case tok::text: |
890 | case tok::unknown_command: |
891 | case tok::backslash_command: |
892 | case tok::at_command: |
893 | case tok::html_start_tag: |
894 | case tok::html_end_tag: |
895 | return parseParagraphOrBlockCommand(); |
896 | |
897 | case tok::verbatim_block_begin: |
898 | return parseVerbatimBlock(); |
899 | |
900 | case tok::verbatim_line_name: |
901 | return parseVerbatimLine(); |
902 | |
903 | case tok::eof: |
904 | case tok::newline: |
905 | case tok::verbatim_block_line: |
906 | case tok::verbatim_block_end: |
907 | case tok::verbatim_line_text: |
908 | case tok::html_ident: |
909 | case tok::html_equals: |
910 | case tok::html_quoted_string: |
911 | case tok::html_greater: |
912 | case tok::html_slash_greater: |
913 | llvm_unreachable("should not see this token" ); |
914 | } |
915 | llvm_unreachable("bogus token kind" ); |
916 | } |
917 | |
918 | FullComment *Parser::() { |
919 | // Skip newlines at the beginning of the comment. |
920 | while (Tok.is(K: tok::newline)) |
921 | consumeToken(); |
922 | |
923 | SmallVector<BlockContentComment *, 8> Blocks; |
924 | while (Tok.isNot(K: tok::eof)) { |
925 | Blocks.push_back(Elt: parseBlockContent()); |
926 | |
927 | // Skip extra newlines after paragraph end. |
928 | while (Tok.is(K: tok::newline)) |
929 | consumeToken(); |
930 | } |
931 | return S.actOnFullComment(Blocks: S.copyArray(Source: llvm::ArrayRef(Blocks))); |
932 | } |
933 | |
934 | } // end namespace comments |
935 | } // end namespace clang |
936 | |