FormatTokenLexer.h source code [llvm_projects/clang/lib/Format/FormatTokenLexer.h]

1	//===--- FormatTokenLexer.h - Format C++ code ----------------- C++ -----===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file contains FormatTokenLexer, which tokenizes a source file
11	/// into a token stream suitable for ClangFormat.
12	///
13	//===----------------------------------------------------------------------===//
14
15	#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
16	#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
17
18	#include "Encoding.h"
19	#include "FormatToken.h"
20	#include "llvm/ADT/MapVector.h"
21	#include "llvm/ADT/SmallPtrSet.h"
22	#include "llvm/ADT/StringSet.h"
23
24	#include <stack>
25
26	namespace clang {
27	namespace format {
28
29	enum LexerState {
30	NORMAL,
31	TEMPLATE_STRING,
32	TOKEN_STASHED,
33	};
34
35	class FormatTokenLexer {
36	public:
37	FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
38	const FormatStyle &Style, encoding::Encoding Encoding,
39	llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
40	IdentifierTable &IdentTable);
41
42	ArrayRef<FormatToken *> lex();
43
44	const AdditionalKeywords &getKeywords() { return Keywords; }
45
46	private:
47	void tryMergePreviousTokens();
48
49	bool tryMergeLessLess();
50	bool tryMergeGreaterGreater();
51	bool tryMergeUserDefinedLiteral();
52	bool tryMergeNSStringLiteral();
53	bool tryMergeJSPrivateIdentifier();
54	bool tryMergeCSharpStringLiteral();
55	bool tryMergeCSharpKeywordVariables();
56	bool tryMergeNullishCoalescingEqual();
57	bool tryTransformCSharpForEach();
58	bool tryMergeForEach();
59	bool tryTransformTryUsageForC();
60
61	// Merge the most recently lexed tokens into a single token if their kinds are
62	// correct.
63	bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
64	// Merge without checking their kinds.
65	bool tryMergeTokens(size_t Count, TokenType NewType);
66	// Merge if their kinds match any one of Kinds.
67	bool tryMergeTokensAny(ArrayRef<ArrayRef<tok::TokenKind>> Kinds,
68	TokenType NewType);
69
70	// Returns \c true if \p Tok can only be followed by an operand in JavaScript.
71	bool precedesOperand(FormatToken *Tok);
72
73	bool canPrecedeRegexLiteral(FormatToken *Prev);
74
75	void tryParseJavaTextBlock();
76
77	// Tries to parse a JavaScript Regex literal starting at the current token,
78	// if that begins with a slash and is in a location where JavaScript allows
79	// regex literals. Changes the current token to a regex literal and updates
80	// its text if successful.
81	void tryParseJSRegexLiteral();
82
83	// Handles JavaScript template strings.
84	//
85	// JavaScript template strings use backticks ('`') as delimiters, and allow
86	// embedding expressions nested in ${expr-here}. Template strings can be
87	// nested recursively, i.e. expressions can contain template strings in turn.
88	//
89	// The code below parses starting from a backtick, up to a closing backtick or
90	// an opening ${. It also maintains a stack of lexing contexts to handle
91	// nested template parts by balancing curly braces.
92	void handleTemplateStrings();
93
94	void handleCSharpVerbatimAndInterpolatedStrings();
95
96	// Handles TableGen multiline strings. It has the form [{ ... }].
97	void handleTableGenMultilineString();
98	// Handles TableGen numeric like identifiers.
99	// They have a forms of [0-9][_a-zA-Z]([_a-zA-Z0-9]). But limited to the
100	// case it is not lexed as an integer.
101	void handleTableGenNumericLikeIdentifier();
102
103	void tryParsePythonComment();
104
105	bool tryMerge_TMacro();
106
107	bool tryMergeConflictMarkers();
108
109	void truncateToken(size_t NewLen);
110
111	FormatToken *getStashedToken();
112
113	FormatToken *getNextToken();
114
115	FormatToken *FormatTok;
116	bool IsFirstToken;
117	std::stack<LexerState> StateStack;
118	unsigned Column;
119	unsigned TrailingWhitespace;
120	std::unique_ptr<Lexer> Lex;
121	LangOptions LangOpts;
122	const SourceManager &SourceMgr;
123	FileID ID;
124	const FormatStyle &Style;
125	IdentifierTable &IdentTable;
126	AdditionalKeywords Keywords;
127	encoding::Encoding Encoding;
128	llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
129	// Index (in 'Tokens') of the last token that starts a new line.
130	unsigned FirstInLineIndex;
131	SmallVector<FormatToken *, `16`> Tokens;
132
133	llvm::SmallMapVector<IdentifierInfo *, TokenType, `8`> Macros;
134
135	llvm::SmallPtrSet<IdentifierInfo *, `8`> TemplateNames, TypeNames,
136	VariableTemplates;
137
138	bool FormattingDisabled;
139	llvm::Regex FormatOffRegex; // For one line.
140
141	llvm::Regex MacroBlockBeginRegex;
142	llvm::Regex MacroBlockEndRegex;
143
144	// Targets that may appear inside a C# attribute.
145	static const llvm::StringSet<> CSharpAttributeTargets;
146
147	/// Handle Verilog-specific tokens.
148	bool readRawTokenVerilogSpecific(Token &Tok);
149
150	void readRawToken(FormatToken &Tok);
151
152	void resetLexer(unsigned Offset);
153	};
154
155	} // namespace format
156	} // namespace clang
157
158	#endif
159

Browse the source code of llvm_projects/clang/lib/Format/FormatTokenLexer.h