| 1 | // class template regex -*- C++ -*- | 
|---|
| 2 |  | 
|---|
| 3 | // Copyright (C) 2013-2024 Free Software Foundation, Inc. | 
|---|
| 4 | // | 
|---|
| 5 | // This file is part of the GNU ISO C++ Library.  This library is free | 
|---|
| 6 | // software; you can redistribute it and/or modify it under the | 
|---|
| 7 | // terms of the GNU General Public License as published by the | 
|---|
| 8 | // Free Software Foundation; either version 3, or (at your option) | 
|---|
| 9 | // any later version. | 
|---|
| 10 |  | 
|---|
| 11 | // This library is distributed in the hope that it will be useful, | 
|---|
| 12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
| 14 | // GNU General Public License for more details. | 
|---|
| 15 |  | 
|---|
| 16 | // Under Section 7 of GPL version 3, you are granted additional | 
|---|
| 17 | // permissions described in the GCC Runtime Library Exception, version | 
|---|
| 18 | // 3.1, as published by the Free Software Foundation. | 
|---|
| 19 |  | 
|---|
| 20 | // You should have received a copy of the GNU General Public License and | 
|---|
| 21 | // a copy of the GCC Runtime Library Exception along with this program; | 
|---|
| 22 | // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see | 
|---|
| 23 | // <http://www.gnu.org/licenses/>. | 
|---|
| 24 |  | 
|---|
| 25 | /** | 
|---|
| 26 | *  @file bits/regex_scanner.h | 
|---|
| 27 | *  This is an internal header file, included by other library headers. | 
|---|
| 28 | *  Do not attempt to use it directly. @headername{regex} | 
|---|
| 29 | */ | 
|---|
| 30 |  | 
|---|
| 31 | namespace std _GLIBCXX_VISIBILITY(default) | 
|---|
| 32 | { | 
|---|
| 33 | _GLIBCXX_BEGIN_NAMESPACE_VERSION | 
|---|
| 34 |  | 
|---|
| 35 | namespace __detail | 
|---|
| 36 | { | 
|---|
| 37 | /** | 
|---|
| 38 | * @addtogroup regex-detail | 
|---|
| 39 | * @{ | 
|---|
| 40 | */ | 
|---|
| 41 |  | 
|---|
| 42 | struct _ScannerBase | 
|---|
| 43 | { | 
|---|
| 44 | public: | 
|---|
| 45 | /// Token types returned from the scanner. | 
|---|
| 46 | enum _TokenT : unsigned | 
|---|
| 47 | { | 
|---|
| 48 | _S_token_anychar, | 
|---|
| 49 | _S_token_ord_char, | 
|---|
| 50 | _S_token_oct_num, | 
|---|
| 51 | _S_token_hex_num, | 
|---|
| 52 | _S_token_backref, | 
|---|
| 53 | _S_token_subexpr_begin, | 
|---|
| 54 | _S_token_subexpr_no_group_begin, | 
|---|
| 55 | _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n' | 
|---|
| 56 | _S_token_subexpr_end, | 
|---|
| 57 | _S_token_bracket_begin, | 
|---|
| 58 | _S_token_bracket_neg_begin, | 
|---|
| 59 | _S_token_bracket_end, | 
|---|
| 60 | _S_token_interval_begin, | 
|---|
| 61 | _S_token_interval_end, | 
|---|
| 62 | _S_token_quoted_class, | 
|---|
| 63 | _S_token_char_class_name, | 
|---|
| 64 | _S_token_collsymbol, | 
|---|
| 65 | _S_token_equiv_class_name, | 
|---|
| 66 | _S_token_opt, | 
|---|
| 67 | _S_token_or, | 
|---|
| 68 | _S_token_closure0, | 
|---|
| 69 | _S_token_closure1, | 
|---|
| 70 | _S_token_line_begin, | 
|---|
| 71 | _S_token_line_end, | 
|---|
| 72 | _S_token_word_bound, // neg if _M_value[0] == 'n' | 
|---|
| 73 | _S_token_comma, | 
|---|
| 74 | _S_token_dup_count, | 
|---|
| 75 | _S_token_eof, | 
|---|
| 76 | _S_token_bracket_dash, | 
|---|
| 77 | _S_token_unknown = -1u | 
|---|
| 78 | }; | 
|---|
| 79 |  | 
|---|
| 80 | protected: | 
|---|
| 81 | typedef regex_constants::syntax_option_type _FlagT; | 
|---|
| 82 |  | 
|---|
| 83 | enum _StateT | 
|---|
| 84 | { | 
|---|
| 85 | _S_state_normal, | 
|---|
| 86 | _S_state_in_brace, | 
|---|
| 87 | _S_state_in_bracket, | 
|---|
| 88 | }; | 
|---|
| 89 |  | 
|---|
| 90 | protected: | 
|---|
| 91 | _ScannerBase(_FlagT __flags) | 
|---|
| 92 | : _M_state(_S_state_normal), | 
|---|
| 93 | _M_flags(__flags), | 
|---|
| 94 | _M_escape_tbl(_M_is_ecma() | 
|---|
| 95 | ? _M_ecma_escape_tbl | 
|---|
| 96 | : _M_awk_escape_tbl), | 
|---|
| 97 | _M_spec_char(_M_is_ecma() | 
|---|
| 98 | ? _M_ecma_spec_char | 
|---|
| 99 | : _M_flags & regex_constants::basic | 
|---|
| 100 | ? _M_basic_spec_char | 
|---|
| 101 | : _M_flags & regex_constants::extended | 
|---|
| 102 | ? _M_extended_spec_char | 
|---|
| 103 | : _M_flags & regex_constants::grep | 
|---|
| 104 | ? ".[\\*^$\n" | 
|---|
| 105 | : _M_flags & regex_constants::egrep | 
|---|
| 106 | ? ".[\\()*+?{|^$\n" | 
|---|
| 107 | : _M_flags & regex_constants::awk | 
|---|
| 108 | ? _M_extended_spec_char | 
|---|
| 109 | : nullptr), | 
|---|
| 110 | _M_at_bracket_start(false) | 
|---|
| 111 | { __glibcxx_assert(_M_spec_char); } | 
|---|
| 112 |  | 
|---|
| 113 | protected: | 
|---|
| 114 | const char* | 
|---|
| 115 | _M_find_escape(char __c) | 
|---|
| 116 | { | 
|---|
| 117 | auto __it = _M_escape_tbl; | 
|---|
| 118 | for (; __it->first != '\0'; ++__it) | 
|---|
| 119 | if (__it->first == __c) | 
|---|
| 120 | return &__it->second; | 
|---|
| 121 | return nullptr; | 
|---|
| 122 | } | 
|---|
| 123 |  | 
|---|
| 124 | bool | 
|---|
| 125 | _M_is_ecma() const | 
|---|
| 126 | { return _M_flags & regex_constants::ECMAScript; } | 
|---|
| 127 |  | 
|---|
| 128 | bool | 
|---|
| 129 | _M_is_basic() const | 
|---|
| 130 | { return _M_flags & (regex_constants::basic | regex_constants::grep); } | 
|---|
| 131 |  | 
|---|
| 132 | bool | 
|---|
| 133 | _M_is_extended() const | 
|---|
| 134 | { | 
|---|
| 135 | return _M_flags & (regex_constants::extended | 
|---|
| 136 | | regex_constants::egrep | 
|---|
| 137 | | regex_constants::awk); | 
|---|
| 138 | } | 
|---|
| 139 |  | 
|---|
| 140 | bool | 
|---|
| 141 | _M_is_grep() const | 
|---|
| 142 | { return _M_flags & (regex_constants::grep | regex_constants::egrep); } | 
|---|
| 143 |  | 
|---|
| 144 | bool | 
|---|
| 145 | _M_is_awk() const | 
|---|
| 146 | { return _M_flags & regex_constants::awk; } | 
|---|
| 147 |  | 
|---|
| 148 | protected: | 
|---|
| 149 | // TODO: Make them static in the next abi change. | 
|---|
| 150 | const std::pair<char, _TokenT> _M_token_tbl[9] = | 
|---|
| 151 | { | 
|---|
| 152 | {'^', _S_token_line_begin}, | 
|---|
| 153 | {'$', _S_token_line_end}, | 
|---|
| 154 | {'.', _S_token_anychar}, | 
|---|
| 155 | {'*', _S_token_closure0}, | 
|---|
| 156 | {'+', _S_token_closure1}, | 
|---|
| 157 | {'?', _S_token_opt}, | 
|---|
| 158 | {'|', _S_token_or}, | 
|---|
| 159 | {'\n', _S_token_or}, // grep and egrep | 
|---|
| 160 | {'\0', _S_token_or}, | 
|---|
| 161 | }; | 
|---|
| 162 | const std::pair<char, char> _M_ecma_escape_tbl[8] = | 
|---|
| 163 | { | 
|---|
| 164 | {'0', '\0'}, | 
|---|
| 165 | {'b', '\b'}, | 
|---|
| 166 | {'f', '\f'}, | 
|---|
| 167 | {'n', '\n'}, | 
|---|
| 168 | {'r', '\r'}, | 
|---|
| 169 | {'t', '\t'}, | 
|---|
| 170 | {'v', '\v'}, | 
|---|
| 171 | {'\0', '\0'}, | 
|---|
| 172 | }; | 
|---|
| 173 | const std::pair<char, char> _M_awk_escape_tbl[11] = | 
|---|
| 174 | { | 
|---|
| 175 | {'"', '"'}, | 
|---|
| 176 | {'/', '/'}, | 
|---|
| 177 | {'\\', '\\'}, | 
|---|
| 178 | {'a', '\a'}, | 
|---|
| 179 | {'b', '\b'}, | 
|---|
| 180 | {'f', '\f'}, | 
|---|
| 181 | {'n', '\n'}, | 
|---|
| 182 | {'r', '\r'}, | 
|---|
| 183 | {'t', '\t'}, | 
|---|
| 184 | {'v', '\v'}, | 
|---|
| 185 | {'\0', '\0'}, | 
|---|
| 186 | }; | 
|---|
| 187 | const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|"; | 
|---|
| 188 | const char* _M_basic_spec_char = ".[\\*^$"; | 
|---|
| 189 | const char* _M_extended_spec_char = ".[\\()*+?{|^$"; | 
|---|
| 190 |  | 
|---|
| 191 | _StateT                       _M_state; | 
|---|
| 192 | _FlagT                        _M_flags; | 
|---|
| 193 | _TokenT                       _M_token; | 
|---|
| 194 | const std::pair<char, char>*  _M_escape_tbl; | 
|---|
| 195 | const char*                   _M_spec_char; | 
|---|
| 196 | bool                          _M_at_bracket_start; | 
|---|
| 197 | }; | 
|---|
| 198 |  | 
|---|
| 199 | /** | 
|---|
| 200 | * @brief Scans an input range for regex tokens. | 
|---|
| 201 | * | 
|---|
| 202 | * The %_Scanner class interprets the regular expression pattern in | 
|---|
| 203 | * the input range passed to its constructor as a sequence of parse | 
|---|
| 204 | * tokens passed to the regular expression compiler.  The sequence | 
|---|
| 205 | * of tokens provided depends on the flag settings passed to the | 
|---|
| 206 | * constructor: different regular expression grammars will interpret | 
|---|
| 207 | * the same input pattern in syntactically different ways. | 
|---|
| 208 | */ | 
|---|
| 209 | template<typename _CharT> | 
|---|
| 210 | class _Scanner | 
|---|
| 211 | : public _ScannerBase | 
|---|
| 212 | { | 
|---|
| 213 | public: | 
|---|
| 214 | typedef std::basic_string<_CharT>                           _StringT; | 
|---|
| 215 | typedef regex_constants::syntax_option_type                 _FlagT; | 
|---|
| 216 | typedef const std::ctype<_CharT>                            _CtypeT; | 
|---|
| 217 |  | 
|---|
| 218 | _Scanner(const _CharT* __begin, const _CharT* __end, | 
|---|
| 219 | _FlagT __flags, std::locale __loc); | 
|---|
| 220 |  | 
|---|
| 221 | void | 
|---|
| 222 | _M_advance(); | 
|---|
| 223 |  | 
|---|
| 224 | _TokenT | 
|---|
| 225 | _M_get_token() const noexcept | 
|---|
| 226 | { return _M_token; } | 
|---|
| 227 |  | 
|---|
| 228 | const _StringT& | 
|---|
| 229 | _M_get_value() const noexcept | 
|---|
| 230 | { return _M_value; } | 
|---|
| 231 |  | 
|---|
| 232 | #ifdef _GLIBCXX_DEBUG | 
|---|
| 233 | std::ostream& | 
|---|
| 234 | _M_print(std::ostream&); | 
|---|
| 235 | #endif | 
|---|
| 236 |  | 
|---|
| 237 | private: | 
|---|
| 238 | void | 
|---|
| 239 | _M_scan_normal(); | 
|---|
| 240 |  | 
|---|
| 241 | void | 
|---|
| 242 | _M_scan_in_bracket(); | 
|---|
| 243 |  | 
|---|
| 244 | void | 
|---|
| 245 | _M_scan_in_brace(); | 
|---|
| 246 |  | 
|---|
| 247 | void | 
|---|
| 248 | _M_eat_escape_ecma(); | 
|---|
| 249 |  | 
|---|
| 250 | void | 
|---|
| 251 | _M_eat_escape_posix(); | 
|---|
| 252 |  | 
|---|
| 253 | void | 
|---|
| 254 | _M_eat_escape_awk(); | 
|---|
| 255 |  | 
|---|
| 256 | void | 
|---|
| 257 | _M_eat_class(char); | 
|---|
| 258 |  | 
|---|
| 259 | const _CharT*                 _M_current; | 
|---|
| 260 | const _CharT*                 _M_end; | 
|---|
| 261 | _CtypeT&                      _M_ctype; | 
|---|
| 262 | _StringT                      _M_value; | 
|---|
| 263 | void (_Scanner::* _M_eat_escape)(); | 
|---|
| 264 | }; | 
|---|
| 265 |  | 
|---|
| 266 | ///@} regex-detail | 
|---|
| 267 | } // namespace __detail | 
|---|
| 268 | _GLIBCXX_END_NAMESPACE_VERSION | 
|---|
| 269 | } // namespace std | 
|---|
| 270 |  | 
|---|
| 271 | #include <bits/regex_scanner.tcc> | 
|---|
| 272 |  | 
|---|