| 1 | //===----------------------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H |
| 10 | #define _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H |
| 11 | |
| 12 | #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) |
| 13 | # pragma GCC system_header |
| 14 | #endif |
| 15 | |
| 16 | #include <__algorithm/fill_n.h> |
| 17 | #include <__config> |
| 18 | #include <__functional/hash.h> |
| 19 | #include <__functional/operations.h> |
| 20 | #include <__iterator/iterator_traits.h> |
| 21 | #include <__memory/shared_ptr.h> |
| 22 | #include <__type_traits/make_unsigned.h> |
| 23 | #include <__utility/pair.h> |
| 24 | #include <array> |
| 25 | #include <limits> |
| 26 | #include <unordered_map> |
| 27 | |
| 28 | #if _LIBCPP_STD_VER >= 17 |
| 29 | |
| 30 | _LIBCPP_PUSH_MACROS |
| 31 | # include <__undef_macros> |
| 32 | |
| 33 | _LIBCPP_BEGIN_NAMESPACE_STD |
| 34 | |
| 35 | template <class _Key, class _Value, class _Hash, class _BinaryPredicate, bool /*useArray*/> |
| 36 | class _BMSkipTable; |
| 37 | |
| 38 | // General case for BM data searching; use a map |
| 39 | template <class _Key, class _Value, class _Hash, class _BinaryPredicate> |
| 40 | class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, false> { |
| 41 | private: |
| 42 | using value_type = _Value; |
| 43 | using key_type = _Key; |
| 44 | |
| 45 | const value_type __default_value_; |
| 46 | unordered_map<_Key, _Value, _Hash, _BinaryPredicate> __table_; |
| 47 | |
| 48 | public: |
| 49 | _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable( |
| 50 | size_t __sz, value_type __default_value, _Hash __hash, _BinaryPredicate __pred) |
| 51 | : __default_value_(__default_value), __table_(__sz, __hash, __pred) {} |
| 52 | |
| 53 | _LIBCPP_HIDE_FROM_ABI void insert(const key_type& __key, value_type __val) { __table_[__key] = __val; } |
| 54 | |
| 55 | _LIBCPP_HIDE_FROM_ABI value_type operator[](const key_type& __key) const { |
| 56 | auto __it = __table_.find(__key); |
| 57 | return __it == __table_.end() ? __default_value_ : __it->second; |
| 58 | } |
| 59 | }; |
| 60 | |
| 61 | // Special case small numeric values; use an array |
| 62 | template <class _Key, class _Value, class _Hash, class _BinaryPredicate> |
| 63 | class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, true> { |
| 64 | private: |
| 65 | using value_type = _Value; |
| 66 | using key_type = _Key; |
| 67 | |
| 68 | using unsigned_key_type = make_unsigned_t<key_type>; |
| 69 | std::array<value_type, 256> __table_; |
| 70 | static_assert(numeric_limits<unsigned_key_type>::max() < 256); |
| 71 | |
| 72 | public: |
| 73 | _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(size_t, value_type __default_value, _Hash, _BinaryPredicate) { |
| 74 | std::fill_n(__table_.data(), __table_.size(), __default_value); |
| 75 | } |
| 76 | |
| 77 | _LIBCPP_HIDE_FROM_ABI void insert(key_type __key, value_type __val) { |
| 78 | __table_[static_cast<unsigned_key_type>(__key)] = __val; |
| 79 | } |
| 80 | |
| 81 | _LIBCPP_HIDE_FROM_ABI value_type operator[](key_type __key) const { |
| 82 | return __table_[static_cast<unsigned_key_type>(__key)]; |
| 83 | } |
| 84 | }; |
| 85 | |
| 86 | template <class _RandomAccessIterator1, |
| 87 | class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>, |
| 88 | class _BinaryPredicate = equal_to<>> |
| 89 | class boyer_moore_searcher { |
| 90 | private: |
| 91 | using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type; |
| 92 | using value_type = typename std::iterator_traits<_RandomAccessIterator1>::value_type; |
| 93 | using __skip_table_type _LIBCPP_NODEBUG = |
| 94 | _BMSkipTable<value_type, |
| 95 | difference_type, |
| 96 | _Hash, |
| 97 | _BinaryPredicate, |
| 98 | is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> && |
| 99 | is_same_v<_BinaryPredicate, equal_to<>>>; |
| 100 | |
| 101 | public: |
| 102 | _LIBCPP_HIDE_FROM_ABI boyer_moore_searcher( |
| 103 | _RandomAccessIterator1 __first, |
| 104 | _RandomAccessIterator1 __last, |
| 105 | _Hash __hash = _Hash(), |
| 106 | _BinaryPredicate __pred = _BinaryPredicate()) |
| 107 | : __first_(__first), |
| 108 | __last_(__last), |
| 109 | __pred_(__pred), |
| 110 | __pattern_length_(__last - __first), |
| 111 | __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, -1, __hash, __pred_)), |
| 112 | __suffix_(std::__allocate_shared_unbounded_array<difference_type[]>( |
| 113 | allocator<difference_type>(), __pattern_length_ + 1)) { |
| 114 | difference_type __i = 0; |
| 115 | while (__first != __last) { |
| 116 | __skip_table_->insert(*__first, __i); |
| 117 | ++__first; |
| 118 | ++__i; |
| 119 | } |
| 120 | __build_suffix_table(first: __first_, last: __last_, pred: __pred_); |
| 121 | } |
| 122 | |
| 123 | template <class _RandomAccessIterator2> |
| 124 | _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> |
| 125 | operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { |
| 126 | static_assert(is_same_v<__remove_cvref_t<typename iterator_traits<_RandomAccessIterator1>::value_type>, |
| 127 | __remove_cvref_t<typename iterator_traits<_RandomAccessIterator2>::value_type>>, |
| 128 | "Corpus and Pattern iterators must point to the same type" ); |
| 129 | if (__first == __last) |
| 130 | return std::make_pair(__last, __last); |
| 131 | if (__first_ == __last_) |
| 132 | return std::make_pair(__first, __first); |
| 133 | |
| 134 | if (__pattern_length_ > (__last - __first)) |
| 135 | return std::make_pair(__last, __last); |
| 136 | return __search(__first, __last); |
| 137 | } |
| 138 | |
| 139 | private: |
| 140 | _RandomAccessIterator1 __first_; |
| 141 | _RandomAccessIterator1 __last_; |
| 142 | _BinaryPredicate __pred_; |
| 143 | difference_type __pattern_length_; |
| 144 | shared_ptr<__skip_table_type> __skip_table_; |
| 145 | shared_ptr<difference_type[]> __suffix_; |
| 146 | |
| 147 | template <class _RandomAccessIterator2> |
| 148 | _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> |
| 149 | __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const { |
| 150 | _RandomAccessIterator2 __current = __f; |
| 151 | const _RandomAccessIterator2 __last = __l - __pattern_length_; |
| 152 | const __skip_table_type& __skip_table = *__skip_table_; |
| 153 | |
| 154 | while (__current <= __last) { |
| 155 | difference_type __j = __pattern_length_; |
| 156 | while (__pred_(__first_[__j - 1], __current[__j - 1])) { |
| 157 | --__j; |
| 158 | if (__j == 0) |
| 159 | return std::make_pair(__current, __current + __pattern_length_); |
| 160 | } |
| 161 | |
| 162 | difference_type __k = __skip_table[__current[__j - 1]]; |
| 163 | difference_type __m = __j - __k - 1; |
| 164 | if (__k < __j && __m > __suffix_[__j]) |
| 165 | __current += __m; |
| 166 | else |
| 167 | __current += __suffix_[__j]; |
| 168 | } |
| 169 | return std::make_pair(__l, __l); |
| 170 | } |
| 171 | |
| 172 | template <class _Iterator, class _Container> |
| 173 | _LIBCPP_HIDE_FROM_ABI void |
| 174 | __compute_bm_prefix(_Iterator __first, _Iterator __last, _BinaryPredicate __pred, _Container& __prefix) { |
| 175 | const size_t __count = __last - __first; |
| 176 | |
| 177 | __prefix[0] = 0; |
| 178 | size_t __k = 0; |
| 179 | |
| 180 | for (size_t __i = 1; __i != __count; ++__i) { |
| 181 | while (__k > 0 && !__pred(__first[__k], __first[__i])) |
| 182 | __k = __prefix[__k - 1]; |
| 183 | |
| 184 | if (__pred(__first[__k], __first[__i])) |
| 185 | ++__k; |
| 186 | __prefix[__i] = __k; |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | _LIBCPP_HIDE_FROM_ABI void |
| 191 | __build_suffix_table(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _BinaryPredicate __pred) { |
| 192 | const size_t __count = __last - __first; |
| 193 | |
| 194 | if (__count == 0) |
| 195 | return; |
| 196 | |
| 197 | auto __scratch = std::make_unique<difference_type[]>(__count); |
| 198 | |
| 199 | __compute_bm_prefix(__first, __last, __pred, __scratch); |
| 200 | for (size_t __i = 0; __i <= __count; ++__i) |
| 201 | __suffix_[__i] = __count - __scratch[__count - 1]; |
| 202 | |
| 203 | using _ReverseIter = reverse_iterator<_RandomAccessIterator1>; |
| 204 | __compute_bm_prefix(_ReverseIter(__last), _ReverseIter(__first), __pred, __scratch); |
| 205 | |
| 206 | for (size_t __i = 0; __i != __count; ++__i) { |
| 207 | const size_t __j = __count - __scratch[__i]; |
| 208 | const difference_type __k = __i - __scratch[__i] + 1; |
| 209 | |
| 210 | if (__suffix_[__j] > __k) |
| 211 | __suffix_[__j] = __k; |
| 212 | } |
| 213 | } |
| 214 | }; |
| 215 | _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_searcher); |
| 216 | |
| 217 | template <class _RandomAccessIterator1, |
| 218 | class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>, |
| 219 | class _BinaryPredicate = equal_to<>> |
| 220 | class boyer_moore_horspool_searcher { |
| 221 | private: |
| 222 | using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type; |
| 223 | using value_type = typename iterator_traits<_RandomAccessIterator1>::value_type; |
| 224 | using __skip_table_type _LIBCPP_NODEBUG = |
| 225 | _BMSkipTable<value_type, |
| 226 | difference_type, |
| 227 | _Hash, |
| 228 | _BinaryPredicate, |
| 229 | is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> && |
| 230 | is_same_v<_BinaryPredicate, equal_to<>>>; |
| 231 | |
| 232 | public: |
| 233 | _LIBCPP_HIDE_FROM_ABI boyer_moore_horspool_searcher( |
| 234 | _RandomAccessIterator1 __first, |
| 235 | _RandomAccessIterator1 __last, |
| 236 | _Hash __hash = _Hash(), |
| 237 | _BinaryPredicate __pred = _BinaryPredicate()) |
| 238 | : __first_(__first), |
| 239 | __last_(__last), |
| 240 | __pred_(__pred), |
| 241 | __pattern_length_(__last - __first), |
| 242 | __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, __pattern_length_, __hash, __pred_)) { |
| 243 | if (__first == __last) |
| 244 | return; |
| 245 | --__last; |
| 246 | difference_type __i = 0; |
| 247 | while (__first != __last) { |
| 248 | __skip_table_->insert(*__first, __pattern_length_ - 1 - __i); |
| 249 | ++__first; |
| 250 | ++__i; |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | template <class _RandomAccessIterator2> |
| 255 | _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> |
| 256 | operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { |
| 257 | static_assert(is_same_v<__remove_cvref_t<typename std::iterator_traits<_RandomAccessIterator1>::value_type>, |
| 258 | __remove_cvref_t<typename std::iterator_traits<_RandomAccessIterator2>::value_type>>, |
| 259 | "Corpus and Pattern iterators must point to the same type" ); |
| 260 | if (__first == __last) |
| 261 | return std::make_pair(__last, __last); |
| 262 | if (__first_ == __last_) |
| 263 | return std::make_pair(__first, __first); |
| 264 | |
| 265 | if (__pattern_length_ > __last - __first) |
| 266 | return std::make_pair(__last, __last); |
| 267 | |
| 268 | return __search(__first, __last); |
| 269 | } |
| 270 | |
| 271 | private: |
| 272 | _RandomAccessIterator1 __first_; |
| 273 | _RandomAccessIterator1 __last_; |
| 274 | _BinaryPredicate __pred_; |
| 275 | difference_type __pattern_length_; |
| 276 | shared_ptr<__skip_table_type> __skip_table_; |
| 277 | |
| 278 | template <class _RandomAccessIterator2> |
| 279 | _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> |
| 280 | __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const { |
| 281 | _RandomAccessIterator2 __current = __f; |
| 282 | const _RandomAccessIterator2 __last = __l - __pattern_length_; |
| 283 | const __skip_table_type& __skip_table = *__skip_table_; |
| 284 | |
| 285 | while (__current <= __last) { |
| 286 | difference_type __j = __pattern_length_; |
| 287 | while (__pred_(__first_[__j - 1], __current[__j - 1])) { |
| 288 | --__j; |
| 289 | if (__j == 0) |
| 290 | return std::make_pair(__current, __current + __pattern_length_); |
| 291 | } |
| 292 | __current += __skip_table[__current[__pattern_length_ - 1]]; |
| 293 | } |
| 294 | return std::make_pair(__l, __l); |
| 295 | } |
| 296 | }; |
| 297 | _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_horspool_searcher); |
| 298 | |
| 299 | _LIBCPP_END_NAMESPACE_STD |
| 300 | |
| 301 | _LIBCPP_POP_MACROS |
| 302 | |
| 303 | #endif // _LIBCPP_STD_VER >= 17 |
| 304 | |
| 305 | #endif // _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H |
| 306 | |