regex_scanner.h source code [include/c++/12/bits/regex_scanner.h]

1	// class template regex -- C++ --
2
3	// Copyright (C) 2013-2022 Free Software Foundation, Inc.
4	//
5	// This file is part of the GNU ISO C++ Library. This library is free
6	// software; you can redistribute it and/or modify it under the
7	// terms of the GNU General Public License as published by the
8	// Free Software Foundation; either version 3, or (at your option)
9	// any later version.
10
11	// This library is distributed in the hope that it will be useful,
12	// but WITHOUT ANY WARRANTY; without even the implied warranty of
13	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	// GNU General Public License for more details.
15
16	// Under Section 7 of GPL version 3, you are granted additional
17	// permissions described in the GCC Runtime Library Exception, version
18	// 3.1, as published by the Free Software Foundation.
19
20	// You should have received a copy of the GNU General Public License and
21	// a copy of the GCC Runtime Library Exception along with this program;
22	// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23	// <http://www.gnu.org/licenses/>.
24
25	/**
26	* @file bits/regex_scanner.h
27	* This is an internal header file, included by other library headers.
28	* Do not attempt to use it directly. @headername{regex}
29	*/
30
31	namespace std _GLIBCXX_VISIBILITY(default)
32	{
33	_GLIBCXX_BEGIN_NAMESPACE_VERSION
34
35	namespace __detail
36	{
37	/**
38	* @addtogroup regex-detail
39	* @{
40	*/
41
42	struct _ScannerBase
43	{
44	public:
45	/// Token types returned from the scanner.
46	enum _TokenT : unsigned
47	{
48	_S_token_anychar,
49	_S_token_ord_char,
50	_S_token_oct_num,
51	_S_token_hex_num,
52	_S_token_backref,
53	_S_token_subexpr_begin,
54	_S_token_subexpr_no_group_begin,
55	_S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
56	_S_token_subexpr_end,
57	_S_token_bracket_begin,
58	_S_token_bracket_neg_begin,
59	_S_token_bracket_end,
60	_S_token_interval_begin,
61	_S_token_interval_end,
62	_S_token_quoted_class,
63	_S_token_char_class_name,
64	_S_token_collsymbol,
65	_S_token_equiv_class_name,
66	_S_token_opt,
67	_S_token_or,
68	_S_token_closure0,
69	_S_token_closure1,
70	_S_token_line_begin,
71	_S_token_line_end,
72	_S_token_word_bound, // neg if _M_value[0] == 'n'
73	_S_token_comma,
74	_S_token_dup_count,
75	_S_token_eof,
76	_S_token_bracket_dash,
77	_S_token_unknown = -`1u`
78	};
79
80	protected:
81	typedef regex_constants::syntax_option_type _FlagT;
82
83	enum _StateT
84	{
85	_S_state_normal,
86	_S_state_in_brace,
87	_S_state_in_bracket,
88	};
89
90	protected:
91	_ScannerBase(_FlagT __flags)
92	: _M_state(_S_state_normal),
93	_M_flags(__flags),
94	_M_escape_tbl(_M_is_ecma()
95	? _M_ecma_escape_tbl
96	: _M_awk_escape_tbl),
97	_M_spec_char(_M_is_ecma()
98	? _M_ecma_spec_char
99	: _M_flags & regex_constants::basic
100	? _M_basic_spec_char
101	: _M_flags & regex_constants::extended
102	? _M_extended_spec_char
103	: _M_flags & regex_constants::grep
104	? ".[\\*^$\n"
105	: _M_flags & regex_constants::egrep
106	? ".[\\()*+?{\|^$\n"
107	: _M_flags & regex_constants::awk
108	? _M_extended_spec_char
109	: nullptr),
110	_M_at_bracket_start(false)
111	{ __glibcxx_assert(_M_spec_char); }
112
113	protected:
114	const char*
115	_M_find_escape(char __c)
116	{
117	auto __it = _M_escape_tbl;
118	for (; __it->first != `'\0'`; ++__it)
119	if (__it->first == __c)
120	return &__it->second;
121	return nullptr;
122	}
123
124	bool
125	_M_is_ecma() const
126	{ return _M_flags & regex_constants::ECMAScript; }
127
128	bool
129	_M_is_basic() const
130	{ return _M_flags & (regex_constants::basic \| regex_constants::grep); }
131
132	bool
133	_M_is_extended() const
134	{
135	return _M_flags & (regex_constants::extended
136	\| regex_constants::egrep
137	\| regex_constants::awk);
138	}
139
140	bool
141	_M_is_grep() const
142	{ return _M_flags & (regex_constants::grep \| regex_constants::egrep); }
143
144	bool
145	_M_is_awk() const
146	{ return _M_flags & regex_constants::awk; }
147
148	protected:
149	// TODO: Make them static in the next abi change.
150	const std::pair<char, _TokenT> _M_token_tbl[`9`] =
151	{
152	{`'^'`, _S_token_line_begin},
153	{`'$'`, _S_token_line_end},
154	{`'.'`, _S_token_anychar},
155	{`'*'`, _S_token_closure0},
156	{`'+'`, _S_token_closure1},
157	{`'?'`, _S_token_opt},
158	{`'\|'`, _S_token_or},
159	{`'\n'`, _S_token_or}, // grep and egrep
160	{`'\0'`, _S_token_or},
161	};
162	const std::pair<char, char> _M_ecma_escape_tbl[`8`] =
163	{
164	{`'0'`, `'\0'`},
165	{`'b'`, `'\b'`},
166	{`'f'`, `'\f'`},
167	{`'n'`, `'\n'`},
168	{`'r'`, `'\r'`},
169	{`'t'`, `'\t'`},
170	{`'v'`, `'\v'`},
171	{`'\0'`, `'\0'`},
172	};
173	const std::pair<char, char> _M_awk_escape_tbl[`11`] =
174	{
175	{`'"'`, `'"'`},
176	{`'/'`, `'/'`},
177	{`'\\'`, `'\\'`},
178	{`'a'`, `'\a'`},
179	{`'b'`, `'\b'`},
180	{`'f'`, `'\f'`},
181	{`'n'`, `'\n'`},
182	{`'r'`, `'\r'`},
183	{`'t'`, `'\t'`},
184	{`'v'`, `'\v'`},
185	{`'\0'`, `'\0'`},
186	};
187	const char* _M_ecma_spec_char = "^$\\.*+?()[]{}\|";
188	const char* _M_basic_spec_char = ".[\\*^$";
189	const char* _M_extended_spec_char = ".[\\()*+?{\|^$";
190
191	_StateT _M_state;
192	_FlagT _M_flags;
193	_TokenT _M_token;
194	const std::pair<char, char>* _M_escape_tbl;
195	const char* _M_spec_char;
196	bool _M_at_bracket_start;
197	};
198
199	/**
200	* @brief Scans an input range for regex tokens.
201	*
202	* The %_Scanner class interprets the regular expression pattern in
203	* the input range passed to its constructor as a sequence of parse
204	* tokens passed to the regular expression compiler. The sequence
205	* of tokens provided depends on the flag settings passed to the
206	* constructor: different regular expression grammars will interpret
207	* the same input pattern in syntactically different ways.
208	*/
209	template<typename _CharT>
210	class _Scanner
211	: public _ScannerBase
212	{
213	public:
214	typedef std::basic_string<_CharT> _StringT;
215	typedef regex_constants::syntax_option_type _FlagT;
216	typedef const std::ctype<_CharT> _CtypeT;
217
218	_Scanner(const _CharT* __begin, const _CharT* __end,
219	_FlagT __flags, std::locale __loc);
220
221	void
222	_M_advance();
223
224	_TokenT
225	_M_get_token() const noexcept
226	{ return _M_token; }
227
228	const _StringT&
229	_M_get_value() const noexcept
230	{ return _M_value; }
231
232	#ifdef _GLIBCXX_DEBUG
233	std::ostream&
234	_M_print(std::ostream&);
235	#endif
236
237	private:
238	void
239	_M_scan_normal();
240
241	void
242	_M_scan_in_bracket();
243
244	void
245	_M_scan_in_brace();
246
247	void
248	_M_eat_escape_ecma();
249
250	void
251	_M_eat_escape_posix();
252
253	void
254	_M_eat_escape_awk();
255
256	void
257	_M_eat_class(char);
258
259	const _CharT* _M_current;
260	const _CharT* _M_end;
261	_CtypeT& _M_ctype;
262	_StringT _M_value;
263	void (_Scanner::* _M_eat_escape)();
264	};
265
266	///@} regex-detail
267	} // namespace __detail
268	_GLIBCXX_END_NAMESPACE_VERSION
269	} // namespace std
270
271	#include <bits/regex_scanner.tcc>
272

Browse the source code of include/c++/12/bits/regex_scanner.h