RawCommentList.cpp source code [llvm_projects/clang/lib/AST/RawCommentList.cpp]

1	//===--- RawCommentList.cpp - Processing raw comments ------------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "clang/AST/RawCommentList.h"
10	#include "clang/AST/ASTContext.h"
11	#include "clang/AST/Comment.h"
12	#include "clang/AST/CommentBriefParser.h"
13	#include "clang/AST/CommentCommandTraits.h"
14	#include "clang/AST/CommentLexer.h"
15	#include "clang/AST/CommentParser.h"
16	#include "clang/AST/CommentSema.h"
17	#include "clang/Basic/CharInfo.h"
18	#include "llvm/ADT/STLExtras.h"
19	#include "llvm/ADT/StringExtras.h"
20	#include "llvm/Support/Allocator.h"
21
22	using namespace clang;
23
24	namespace {
25	/// Get comment kind and bool describing if it is a trailing comment.
26	std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
27	bool ParseAllComments) {
28	const size_t MinCommentLength = ParseAllComments ? `2` : `3`;
29	if ((Comment.size() < MinCommentLength) \|\| Comment [`0`] != `'/'`)
30	return std::make_pair(x: RawComment::RCK_Invalid, y: false);
31
32	RawComment::CommentKind K;
33	if (Comment [`1`] == `'/'`) {
34	if (Comment.size() < `3`)
35	return std::make_pair(x: RawComment::RCK_OrdinaryBCPL, y: false);
36
37	if (Comment [`2`] == `'/'`)
38	K = RawComment::RCK_BCPLSlash;
39	else if (Comment [`2`] == `'!'`)
40	K = RawComment::RCK_BCPLExcl;
41	else
42	return std::make_pair(x: RawComment::RCK_OrdinaryBCPL, y: false);
43	} else {
44	assert(Comment.size() >= `4`);
45
46	// Comment lexer does not understand escapes in comment markers, so pretend
47	// that this is not a comment.
48	if (Comment [`1`] != `'*'` \|\|
49	Comment [Comment.size() - `2`] != `'*'` \|\|
50	Comment [Comment.size() - `1`] != `'/'`)
51	return std::make_pair(x: RawComment::RCK_Invalid, y: false);
52
53	if (Comment [`2`] == `'*'`)
54	K = RawComment::RCK_JavaDoc;
55	else if (Comment [`2`] == `'!'`)
56	K = RawComment::RCK_Qt;
57	else
58	return std::make_pair(x: RawComment::RCK_OrdinaryC, y: false);
59	}
60	const bool TrailingComment = (Comment.size() > `3`) && (Comment [`3`] == `'<'`);
61	return std::make_pair(x&: K, y: TrailingComment);
62	}
63
64	bool mergedCommentIsTrailingComment(StringRef Comment) {
65	return (Comment.size() > `3`) && (Comment [`3`] == `'<'`);
66	}
67
68	/// Returns true if R1 and R2 both have valid locations that start on the same
69	/// column.
70	bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
71	const RawComment &R2) {
72	SourceLocation L1 = R1.getBeginLoc();
73	SourceLocation L2 = R2.getBeginLoc();
74	bool Invalid = false;
75	unsigned C1 = SM.getPresumedColumnNumber(Loc: L1, Invalid: &Invalid);
76	if (!Invalid) {
77	unsigned C2 = SM.getPresumedColumnNumber(Loc: L2, Invalid: &Invalid);
78	return !Invalid && (C1 == C2);
79	}
80	return false;
81	}
82	} // unnamed namespace
83
84	/// Determines whether there is only whitespace in `Buffer` between `P`
85	/// and the previous line.
86	/// \param Buffer The buffer to search in.
87	/// \param P The offset from the beginning of `Buffer` to start from.
88	/// \return true if all of the characters in `Buffer` ranging from the closest
89	/// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
90	/// are whitespace.
91	static bool onlyWhitespaceOnLineBefore(const char Buffer, unsigned* P) {
92	// Search backwards until we see linefeed or carriage return.
93	for (unsigned I = P; I != `0`; --I) {
94	char C = Buffer[I - `1`];
95	if (isVerticalWhitespace(c: C))
96	return true;
97	if (!isHorizontalWhitespace(c: C))
98	return false;
99	}
100	// We hit the beginning of the buffer.
101	return true;
102	}
103
104	/// Returns whether `K` is an ordinary comment kind.
105	static bool isOrdinaryKind(RawComment::CommentKind K) {
106	return (K == RawComment::RCK_OrdinaryBCPL) \|\|
107	(K == RawComment::RCK_OrdinaryC);
108	}
109
110	RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
111	const CommentOptions &CommentOpts, bool Merged) :
112	Range (SR), RawTextValid(false), BriefTextValid(false),
113	IsAttached(false), IsTrailingComment(false),
114	IsAlmostTrailingComment(false) {
115	// Extract raw comment text, if possible.
116	if (SR.getBegin() == SR.getEnd() \|\| getRawText(SourceMgr).empty()) {
117	Kind = RCK_Invalid;
118	return;
119	}
120
121	// Guess comment kind.
122	std::pair<CommentKind, bool> K =
123	getCommentKind(Comment: RawText, ParseAllComments: CommentOpts.ParseAllComments);
124
125	// Guess whether an ordinary comment is trailing.
126	if (CommentOpts.ParseAllComments && isOrdinaryKind(K: K.first)) {
127	FileID BeginFileID;
128	unsigned BeginOffset;
129	std::tie(args&: BeginFileID, args&: BeginOffset) =
130	SourceMgr.getDecomposedLoc(Loc: Range.getBegin());
131	if (BeginOffset != `0`) {
132	bool Invalid = false;
133	const char *Buffer =
134	SourceMgr.getBufferData(FID: BeginFileID, Invalid: &Invalid).data();
135	IsTrailingComment \|=
136	(!Invalid && !onlyWhitespaceOnLineBefore(Buffer, P: BeginOffset));
137	}
138	}
139
140	if (!Merged) {
141	Kind = K.first;
142	IsTrailingComment \|= K.second;
143
144	IsAlmostTrailingComment =
145	RawText.starts_with(Prefix: "//<") \|\| RawText.starts_with(Prefix: "/*<");
146	} else {
147	Kind = RCK_Merged;
148	IsTrailingComment =
149	IsTrailingComment \|\| mergedCommentIsTrailingComment(Comment: RawText);
150	}
151	}
152
153	StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
154	FileID BeginFileID;
155	FileID EndFileID;
156	unsigned BeginOffset;
157	unsigned EndOffset;
158
159	std::tie(args&: BeginFileID, args&: BeginOffset) =
160	SourceMgr.getDecomposedLoc(Loc: Range.getBegin());
161	std::tie(args&: EndFileID, args&: EndOffset) = SourceMgr.getDecomposedLoc(Loc: Range.getEnd());
162
163	const unsigned Length = EndOffset - BeginOffset;
164	if (Length < `2`)
165	return StringRef ();
166
167	// The comment can't begin in one file and end in another.
168	assert(BeginFileID == EndFileID);
169
170	bool Invalid = false;
171	const char *BufferStart = SourceMgr.getBufferData(FID: BeginFileID,
172	Invalid: &Invalid).data();
173	if (Invalid)
174	return StringRef ();
175
176	return StringRef (BufferStart + BeginOffset, Length);
177	}
178
179	const char RawComment::extractBriefText(const* ASTContext &Context) const {
180	// Lazily initialize RawText using the accessor before using it.
181	(void)getRawText(SourceMgr: Context.getSourceManager());
182
183	// Since we will be copying the resulting text, all allocations made during
184	// parsing are garbage after resulting string is formed. Thus we can use
185	// a separate allocator for all temporary stuff.
186	llvm::BumpPtrAllocator Allocator;
187
188	comments::Lexer L(Allocator, Context.getDiagnostics(),
189	Context.getCommentCommandTraits(),
190	Range.getBegin(),
191	RawText.begin(), RawText.end());
192	comments::BriefParser P(L, Context.getCommentCommandTraits());
193
194	const std::string Result = P.Parse();
195	const unsigned BriefTextLength = Result.size();
196	char BriefTextPtr = new* (Context) char[BriefTextLength + `1`];
197	memcpy(dest: BriefTextPtr, src: Result.c_str(), n: BriefTextLength + `1`);
198	BriefText = BriefTextPtr;
199	BriefTextValid = true;
200
201	return BriefTextPtr;
202	}
203
204	comments::FullComment RawComment::parse(const* ASTContext &Context,
205	const Preprocessor *PP,
206	const Decl D) const* {
207	// Lazily initialize RawText using the accessor before using it.
208	(void)getRawText(SourceMgr: Context.getSourceManager());
209
210	comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(),
211	Context.getCommentCommandTraits(),
212	getSourceRange().getBegin(),
213	RawText.begin(), RawText.end());
214	comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
215	Context.getDiagnostics(),
216	Context.getCommentCommandTraits(),
217	PP);
218	S.setDecl(D);
219	comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
220	Context.getDiagnostics(),
221	Context.getCommentCommandTraits());
222
223	return P.parseFullComment();
224	}
225
226	static bool onlyWhitespaceBetween(SourceManager &SM,
227	SourceLocation Loc1, SourceLocation Loc2,
228	unsigned MaxNewlinesAllowed) {
229	std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc: Loc1);
230	std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc: Loc2);
231
232	// Question does not make sense if locations are in different files.
233	if (Loc1Info.first != Loc2Info.first)
234	return false;
235
236	bool Invalid = false;
237	const char *Buffer = SM.getBufferData(FID: Loc1Info.first, Invalid: &Invalid).data();
238	if (Invalid)
239	return false;
240
241	unsigned NumNewlines = `0`;
242	assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
243	// Look for non-whitespace characters and remember any newlines seen.
244	for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) {
245	switch (Buffer[I]) {
246	default:
247	return false;
248	case `' '`:
249	case `'\t'`:
250	case `'\f'`:
251	case `'\v'`:
252	break;
253	case `'\r'`:
254	case `'\n'`:
255	++NumNewlines;
256
257	// Check if we have found more than the maximum allowed number of
258	// newlines.
259	if (NumNewlines > MaxNewlinesAllowed)
260	return false;
261
262	// Collapse \r\n and \n\r into a single newline.
263	if (I + `1` != Loc2Info.second &&
264	(Buffer[I + `1`] == `'\n'` \|\| Buffer[I + `1`] == `'\r'`) &&
265	Buffer[I] != Buffer[I + `1`])
266	++I;
267	break;
268	}
269	}
270
271	return true;
272	}
273
274	void RawCommentList::addComment(const RawComment &RC,
275	const CommentOptions &CommentOpts,
276	llvm::BumpPtrAllocator &Allocator) {
277	if (RC.isInvalid())
278	return;
279
280	// Ordinary comments are not interesting for us.
281	if (RC.isOrdinary() && !CommentOpts.ParseAllComments)
282	return;
283
284	std::pair<FileID, unsigned> Loc =
285	SourceMgr.getDecomposedLoc(Loc: RC.getBeginLoc());
286
287	const FileID CommentFile = Loc.first;
288	const unsigned CommentOffset = Loc.second;
289
290	// If this is the first Doxygen comment, save it (because there isn't
291	// anything to merge it with).
292	if (OrderedComments [CommentFile].empty()) {
293	OrderedComments [CommentFile][CommentOffset] =
294	new (Allocator) RawComment (RC);
295	return;
296	}
297
298	const RawComment &C1 = *OrderedComments [CommentFile].rbegin()->second;
299	const RawComment &C2 = RC;
300
301	// Merge comments only if there is only whitespace between them.
302	// Can't merge trailing and non-trailing comments unless the second is
303	// non-trailing ordinary in the same column, as in the case:
304	// int x; // documents x
305	// // more text
306	// versus:
307	// int x; // documents x
308	// int y; // documents y
309	// or:
310	// int x; // documents x
311	// // documents y
312	// int y;
313	// Merge comments if they are on same or consecutive lines.
314	if ((C1.isTrailingComment() == C2.isTrailingComment() \|\|
315	(C1.isTrailingComment() && !C2.isTrailingComment() &&
316	isOrdinaryKind(K: C2.getKind()) &&
317	commentsStartOnSameColumn(SM: SourceMgr, R1: C1, R2: C2))) &&
318	onlyWhitespaceBetween(SM&: SourceMgr, Loc1: C1.getEndLoc(), Loc2: C2.getBeginLoc(),
319	/MaxNewlinesAllowed=/`1`)) {
320	SourceRange MergedRange(C1.getBeginLoc(), C2.getEndLoc());
321	*OrderedComments [CommentFile].rbegin()->second =
322	RawComment (SourceMgr, MergedRange, CommentOpts, true);
323	} else {
324	OrderedComments [CommentFile][CommentOffset] =
325	new (Allocator) RawComment (RC);
326	}
327	}
328
329	const std::map<unsigned, RawComment >
330	RawCommentList::getCommentsInFile(FileID File) const {
331	auto CommentsInFile = OrderedComments.find(Val: File);
332	if (CommentsInFile == OrderedComments.end())
333	return nullptr;
334
335	return &CommentsInFile ->second;
336	}
337
338	bool RawCommentList::empty() const { return OrderedComments.empty(); }
339
340	unsigned RawCommentList::getCommentBeginLine(RawComment *C, FileID File,
341	unsigned Offset) const {
342	auto Cached = CommentBeginLine.find(Val: C);
343	if (Cached != CommentBeginLine.end())
344	return Cached ->second;
345	const unsigned Line = SourceMgr.getLineNumber(FID: File, FilePos: Offset);
346	CommentBeginLine [C] = Line;
347	return Line;
348	}
349
350	unsigned RawCommentList::getCommentEndOffset(RawComment C) const* {
351	auto Cached = CommentEndOffset.find(Val: C);
352	if (Cached != CommentEndOffset.end())
353	return Cached ->second;
354	const unsigned Offset =
355	SourceMgr.getDecomposedLoc(Loc: C->getSourceRange().getEnd()).second;
356	CommentEndOffset [C] = Offset;
357	return Offset;
358	}
359
360	std::string RawComment::getFormattedText(const SourceManager &SourceMgr,
361	DiagnosticsEngine &Diags) const {
362	llvm::StringRef CommentText = getRawText(SourceMgr);
363	if (CommentText.empty())
364	return "";
365
366	std::string Result;
367	for (const RawComment::CommentLine &Line :
368	getFormattedLines(SourceMgr, Diags))
369	Result += Line.Text + "\n";
370
371	auto LastChar = Result.find_last_not_of(c: `'\n'`);
372	Result.erase(pos: LastChar + `1`, n: Result.size());
373
374	return Result;
375	}
376
377	std::vector<RawComment::CommentLine>
378	RawComment::getFormattedLines(const SourceManager &SourceMgr,
379	DiagnosticsEngine &Diags) const {
380	llvm::StringRef CommentText = getRawText(SourceMgr);
381	if (CommentText.empty())
382	return {};
383
384	llvm::BumpPtrAllocator Allocator;
385	// We do not parse any commands, so CommentOptions are ignored by
386	// comments::Lexer. Therefore, we just use default-constructed options.
387	CommentOptions DefOpts;
388	comments::CommandTraits EmptyTraits(Allocator, DefOpts);
389	comments::Lexer L(Allocator, Diags, EmptyTraits, getSourceRange().getBegin(),
390	CommentText.begin(), CommentText.end(),
391	/ParseCommands=/false);
392
393	std::vector<RawComment::CommentLine> Result;
394	// A column number of the first non-whitespace token in the comment text.
395	// We skip whitespace up to this column, but keep the whitespace after this
396	// column. IndentColumn is calculated when lexing the first line and reused
397	// for the rest of lines.
398	unsigned IndentColumn = `0`;
399
400	// Record the line number of the last processed comment line.
401	// For block-style comments, an extra newline token will be produced after
402	// the end-comment marker, e.g.:
403	// /* This is a multi-line comment block.*
404	// The lexer will produce two newline tokens here > /*
405	// previousLine will record the line number when we previously saw a newline
406	// token and recorded a comment line. If we see another newline token on the
407	// same line, don't record anything in between.
408	unsigned PreviousLine = `0`;
409
410	// Processes one line of the comment and adds it to the result.
411	// Handles skipping the indent at the start of the line.
412	// Returns false when eof is reached and true otherwise.
413	auto LexLine = [&](bool IsFirstLine) -> bool {
414	comments::Token Tok;
415	// Lex the first token on the line. We handle it separately, because we to
416	// fix up its indentation.
417	L.lex(T&: Tok);
418	if (Tok.is(K: comments::tok::eof))
419	return false;
420	if (Tok.is(K: comments::tok::newline)) {
421	PresumedLoc Loc = SourceMgr.getPresumedLoc(Loc: Tok.getLocation());
422	if (Loc.getLine() != PreviousLine) {
423	Result.emplace_back(args: "", args&: Loc, args&: Loc);
424	PreviousLine = Loc.getLine();
425	}
426	return true;
427	}
428	SmallString<`124`> Line;
429	llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
430	bool LocInvalid = false;
431	unsigned TokColumn =
432	SourceMgr.getSpellingColumnNumber(Loc: Tok.getLocation(), Invalid: &LocInvalid);
433	assert(!LocInvalid && "getFormattedText for invalid location");
434
435	// Amount of leading whitespace in TokText.
436	size_t WhitespaceLen = TokText.find_first_not_of(Chars: " \t");
437	if (WhitespaceLen == StringRef::npos)
438	WhitespaceLen = TokText.size();
439	// Remember the amount of whitespace we skipped in the first line to remove
440	// indent up to that column in the following lines.
441	if (IsFirstLine)
442	IndentColumn = TokColumn + WhitespaceLen;
443
444	// Amount of leading whitespace we actually want to skip.
445	// For the first line we skip all the whitespace.
446	// For the rest of the lines, we skip whitespace up to IndentColumn.
447	unsigned SkipLen =
448	IsFirstLine
449	? WhitespaceLen
450	: std::min<size_t>(
451	a: WhitespaceLen,
452	b: std::max<int>(a: static_cast<int>(IndentColumn) - TokColumn, b: `0`));
453	llvm::StringRef Trimmed = TokText.drop_front(N: SkipLen);
454	Line += Trimmed;
455	// Get the beginning location of the adjusted comment line.
456	PresumedLoc Begin =
457	SourceMgr.getPresumedLoc(Loc: Tok.getLocation().getLocWithOffset(Offset: SkipLen));
458
459	// Lex all tokens in the rest of the line.
460	for (L.lex(T&: Tok); Tok.isNot(K: comments::tok::eof); L.lex(T&: Tok)) {
461	if (Tok.is(K: comments::tok::newline)) {
462	// Get the ending location of the comment line.
463	PresumedLoc End = SourceMgr.getPresumedLoc(Loc: Tok.getLocation());
464	if (End.getLine() != PreviousLine) {
465	Result.emplace_back(args&: Line, args&: Begin, args&: End);
466	PreviousLine = End.getLine();
467	}
468	return true;
469	}
470	Line += L.getSpelling(Tok, SourceMgr);
471	}
472	PresumedLoc End = SourceMgr.getPresumedLoc(Loc: Tok.getLocation());
473	Result.emplace_back(args&: Line, args&: Begin, args&: End);
474	// We've reached the end of file token.
475	return false;
476	};
477
478	// Process first line separately to remember indent for the following lines.
479	if (!LexLine (/IsFirstLine=/true))
480	return Result;
481	// Process the rest of the lines.
482	while (LexLine (/IsFirstLine=/false))
483	;
484	return Result;
485	}
486

Browse the source code of llvm_projects/clang/lib/AST/RawCommentList.cpp