| 1 | //===----------------------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef _LIBCPP___LOCALE_DIR_SCAN_KEYWORD_H |
| 10 | #define _LIBCPP___LOCALE_DIR_SCAN_KEYWORD_H |
| 11 | |
| 12 | #include <__config> |
| 13 | #include <__memory/unique_ptr.h> |
| 14 | #include <ios> |
| 15 | |
| 16 | #if _LIBCPP_HAS_LOCALIZATION |
| 17 | |
| 18 | # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) |
| 19 | # pragma GCC system_header |
| 20 | # endif |
| 21 | |
| 22 | _LIBCPP_BEGIN_NAMESPACE_STD |
| 23 | |
| 24 | // __scan_keyword |
| 25 | // Scans [__b, __e) until a match is found in the basic_strings range |
| 26 | // [__kb, __ke) or until it can be shown that there is no match in [__kb, __ke). |
| 27 | // __b will be incremented (visibly), consuming CharT until a match is found |
| 28 | // or proved to not exist. A keyword may be "", in which will match anything. |
| 29 | // If one keyword is a prefix of another, and the next CharT in the input |
| 30 | // might match another keyword, the algorithm will attempt to find the longest |
| 31 | // matching keyword. If the longer matching keyword ends up not matching, then |
| 32 | // no keyword match is found. If no keyword match is found, __ke is returned |
| 33 | // and failbit is set in __err. |
| 34 | // Else an iterator pointing to the matching keyword is found. If more than |
| 35 | // one keyword matches, an iterator to the first matching keyword is returned. |
| 36 | // If on exit __b == __e, eofbit is set in __err. If __case_sensitive is false, |
| 37 | // __ct is used to force to lower case before comparing characters. |
| 38 | // Examples: |
| 39 | // Keywords: "a", "abb" |
| 40 | // If the input is "a", the first keyword matches and eofbit is set. |
| 41 | // If the input is "abc", no match is found and "ab" are consumed. |
| 42 | template <class _InputIterator, class _ForwardIterator, class _Ctype> |
| 43 | _LIBCPP_HIDE_FROM_ABI _ForwardIterator __scan_keyword( |
| 44 | _InputIterator& __b, |
| 45 | _InputIterator __e, |
| 46 | _ForwardIterator __kb, |
| 47 | _ForwardIterator __ke, |
| 48 | const _Ctype& __ct, |
| 49 | ios_base::iostate& __err, |
| 50 | bool __case_sensitive = true) { |
| 51 | typedef typename iterator_traits<_InputIterator>::value_type _CharT; |
| 52 | size_t __nkw = static_cast<size_t>(std::distance(__kb, __ke)); |
| 53 | const unsigned char __doesnt_match = '\0'; |
| 54 | const unsigned char __might_match = '\1'; |
| 55 | const unsigned char __does_match = '\2'; |
| 56 | unsigned char __statbuf[100]; |
| 57 | unsigned char* __status = __statbuf; |
| 58 | unique_ptr<unsigned char, void (*)(void*)> __stat_hold(nullptr, free); |
| 59 | if (__nkw > sizeof(__statbuf)) { |
| 60 | __status = (unsigned char*)malloc(size: __nkw); |
| 61 | if (__status == nullptr) |
| 62 | std::__throw_bad_alloc(); |
| 63 | __stat_hold.reset(p: __status); |
| 64 | } |
| 65 | size_t __n_might_match = __nkw; // At this point, any keyword might match |
| 66 | size_t __n_does_match = 0; // but none of them definitely do |
| 67 | // Initialize all statuses to __might_match, except for "" keywords are __does_match |
| 68 | unsigned char* __st = __status; |
| 69 | for (_ForwardIterator __ky = __kb; __ky != __ke; ++__ky, (void)++__st) { |
| 70 | if (!__ky->empty()) |
| 71 | *__st = __might_match; |
| 72 | else { |
| 73 | *__st = __does_match; |
| 74 | --__n_might_match; |
| 75 | ++__n_does_match; |
| 76 | } |
| 77 | } |
| 78 | // While there might be a match, test keywords against the next CharT |
| 79 | for (size_t __indx = 0; __b != __e && __n_might_match > 0; ++__indx) { |
| 80 | // Peek at the next CharT but don't consume it |
| 81 | _CharT __c = *__b; |
| 82 | if (!__case_sensitive) |
| 83 | __c = __ct.toupper(__c); |
| 84 | bool __consume = false; |
| 85 | // For each keyword which might match, see if the __indx character is __c |
| 86 | // If a match if found, consume __c |
| 87 | // If a match is found, and that is the last character in the keyword, |
| 88 | // then that keyword matches. |
| 89 | // If the keyword doesn't match this character, then change the keyword |
| 90 | // to doesn't match |
| 91 | __st = __status; |
| 92 | for (_ForwardIterator __ky = __kb; __ky != __ke; ++__ky, (void)++__st) { |
| 93 | if (*__st == __might_match) { |
| 94 | _CharT __kc = (*__ky)[__indx]; |
| 95 | if (!__case_sensitive) |
| 96 | __kc = __ct.toupper(__kc); |
| 97 | if (__c == __kc) { |
| 98 | __consume = true; |
| 99 | if (__ky->size() == __indx + 1) { |
| 100 | *__st = __does_match; |
| 101 | --__n_might_match; |
| 102 | ++__n_does_match; |
| 103 | } |
| 104 | } else { |
| 105 | *__st = __doesnt_match; |
| 106 | --__n_might_match; |
| 107 | } |
| 108 | } |
| 109 | } |
| 110 | // consume if we matched a character |
| 111 | if (__consume) { |
| 112 | ++__b; |
| 113 | // If we consumed a character and there might be a matched keyword that |
| 114 | // was marked matched on a previous iteration, then such keywords |
| 115 | // which are now marked as not matching. |
| 116 | if (__n_might_match + __n_does_match > 1) { |
| 117 | __st = __status; |
| 118 | for (_ForwardIterator __ky = __kb; __ky != __ke; ++__ky, (void)++__st) { |
| 119 | if (*__st == __does_match && __ky->size() != __indx + 1) { |
| 120 | *__st = __doesnt_match; |
| 121 | --__n_does_match; |
| 122 | } |
| 123 | } |
| 124 | } |
| 125 | } |
| 126 | } |
| 127 | // We've exited the loop because we hit eof and/or we have no more "might matches". |
| 128 | if (__b == __e) |
| 129 | __err |= ios_base::eofbit; |
| 130 | // Return the first matching result |
| 131 | for (__st = __status; __kb != __ke; ++__kb, (void)++__st) |
| 132 | if (*__st == __does_match) |
| 133 | break; |
| 134 | if (__kb == __ke) |
| 135 | __err |= ios_base::failbit; |
| 136 | return __kb; |
| 137 | } |
| 138 | |
| 139 | _LIBCPP_END_NAMESPACE_STD |
| 140 | |
| 141 | #endif // _LIBCPP_HAS_LOCALIZATION |
| 142 | |
| 143 | #endif // _LIBCPP___LOCALE_DIR_SCAN_KEYWORD_H |
| 144 | |