| 1 | //===----------------------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H |
| 10 | #define _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H |
| 11 | |
| 12 | // These headers are in the shared LLVM-libc header library. |
| 13 | #include "shared/fp_bits.h" |
| 14 | #include "shared/str_to_float.h" |
| 15 | #include "shared/str_to_integer.h" |
| 16 | |
| 17 | #include <__assert> |
| 18 | #include <__config> |
| 19 | #include <cctype> |
| 20 | #include <charconv> |
| 21 | #include <concepts> |
| 22 | #include <limits> |
| 23 | |
| 24 | // Included for the _Floating_type_traits class |
| 25 | #include "to_chars_floating_point.h" |
| 26 | |
| 27 | _LIBCPP_BEGIN_NAMESPACE_STD |
| 28 | |
| 29 | // Parses an infinity string. |
| 30 | // Valid strings are case insensitive and contain INF or INFINITY. |
| 31 | // |
| 32 | // - __first is the first argument to std::from_chars. When the string is invalid |
| 33 | // this value is returned as ptr in the result. |
| 34 | // - __last is the last argument of std::from_chars. |
| 35 | // - __value is the value argument of std::from_chars, |
| 36 | // - __ptr is the current position is the input string. This is points beyond |
| 37 | // the initial I character. |
| 38 | // - __negative whether a valid string represents -inf or +inf. |
| 39 | template <floating_point _Fp> |
| 40 | __from_chars_result<_Fp> |
| 41 | __from_chars_floating_point_inf(const char* const __first, const char* __last, const char* __ptr, bool __negative) { |
| 42 | if (__last - __ptr < 2) [[unlikely]] |
| 43 | return {_Fp{0}, 0, errc::invalid_argument}; |
| 44 | |
| 45 | if (std::tolower(c: __ptr[0]) != 'n' || std::tolower(c: __ptr[1]) != 'f') [[unlikely]] |
| 46 | return {_Fp{0}, 0, errc::invalid_argument}; |
| 47 | |
| 48 | __ptr += 2; |
| 49 | |
| 50 | // At this point the result is valid and contains INF. |
| 51 | // When the remaining part contains INITY this will be consumed. Otherwise |
| 52 | // only INF is consumed. For example INFINITZ will consume INF and ignore |
| 53 | // INITZ. |
| 54 | |
| 55 | if (__last - __ptr >= 5 // |
| 56 | && std::tolower(c: __ptr[0]) == 'i' // |
| 57 | && std::tolower(c: __ptr[1]) == 'n' // |
| 58 | && std::tolower(c: __ptr[2]) == 'i' // |
| 59 | && std::tolower(c: __ptr[3]) == 't' // |
| 60 | && std::tolower(c: __ptr[4]) == 'y') |
| 61 | __ptr += 5; |
| 62 | |
| 63 | if constexpr (numeric_limits<_Fp>::has_infinity) { |
| 64 | if (__negative) |
| 65 | return {-std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}}; |
| 66 | |
| 67 | return {std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}}; |
| 68 | } else { |
| 69 | return {_Fp{0}, __ptr - __first, errc::result_out_of_range}; |
| 70 | } |
| 71 | } |
| 72 | |
| 73 | // Parses a nan string. |
| 74 | // Valid strings are case insensitive and contain INF or INFINITY. |
| 75 | // |
| 76 | // - __first is the first argument to std::from_chars. When the string is invalid |
| 77 | // this value is returned as ptr in the result. |
| 78 | // - __last is the last argument of std::from_chars. |
| 79 | // - __value is the value argument of std::from_chars, |
| 80 | // - __ptr is the current position is the input string. This is points beyond |
| 81 | // the initial N character. |
| 82 | // - __negative whether a valid string represents -nan or +nan. |
| 83 | template <floating_point _Fp> |
| 84 | __from_chars_result<_Fp> |
| 85 | __from_chars_floating_point_nan(const char* const __first, const char* __last, const char* __ptr, bool __negative) { |
| 86 | if (__last - __ptr < 2) [[unlikely]] |
| 87 | return {_Fp{0}, 0, errc::invalid_argument}; |
| 88 | |
| 89 | if (std::tolower(c: __ptr[0]) != 'a' || std::tolower(c: __ptr[1]) != 'n') [[unlikely]] |
| 90 | return {_Fp{0}, 0, errc::invalid_argument}; |
| 91 | |
| 92 | __ptr += 2; |
| 93 | |
| 94 | // At this point the result is valid and contains NAN. When the remaining |
| 95 | // part contains ( n-char-sequence_opt ) this will be consumed. Otherwise |
| 96 | // only NAN is consumed. For example NAN(abcd will consume NAN and ignore |
| 97 | // (abcd. |
| 98 | if (__last - __ptr >= 2 && __ptr[0] == '(') { |
| 99 | size_t __offset = 1; |
| 100 | do { |
| 101 | if (__ptr[__offset] == ')') { |
| 102 | __ptr += __offset + 1; |
| 103 | break; |
| 104 | } |
| 105 | if (__ptr[__offset] != '_' && !std::isalnum(c: __ptr[__offset])) |
| 106 | break; |
| 107 | ++__offset; |
| 108 | } while (__ptr + __offset != __last); |
| 109 | } |
| 110 | |
| 111 | if (__negative) |
| 112 | return {-std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}}; |
| 113 | |
| 114 | return {std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}}; |
| 115 | } |
| 116 | |
| 117 | template <class _Tp> |
| 118 | struct __fractional_constant_result { |
| 119 | size_t __offset{size_t(-1)}; |
| 120 | _Tp __mantissa{0}; |
| 121 | int __exponent{0}; |
| 122 | bool __truncated{false}; |
| 123 | bool __is_valid{false}; |
| 124 | }; |
| 125 | |
| 126 | // Parses the hex constant part of the hexadecimal floating-point value. |
| 127 | // - input start of buffer given to from_chars |
| 128 | // - __n the number of elements in the buffer |
| 129 | // - __offset where to start parsing. The input can have an optional sign, the |
| 130 | // offset starts after this sign. |
| 131 | template <class _Tp> |
| 132 | __fractional_constant_result<_Tp> __parse_fractional_hex_constant(const char* __input, size_t __n, size_t __offset) { |
| 133 | __fractional_constant_result<_Tp> __result; |
| 134 | |
| 135 | const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / 16; |
| 136 | bool __fraction = false; |
| 137 | for (; __offset < __n; ++__offset) { |
| 138 | if (std::isxdigit(c: __input[__offset])) { |
| 139 | __result.__is_valid = true; |
| 140 | |
| 141 | uint32_t __digit = __input[__offset] - '0'; |
| 142 | switch (std::tolower(c: __input[__offset])) { |
| 143 | case 'a': |
| 144 | __digit = 10; |
| 145 | break; |
| 146 | case 'b': |
| 147 | __digit = 11; |
| 148 | break; |
| 149 | case 'c': |
| 150 | __digit = 12; |
| 151 | break; |
| 152 | case 'd': |
| 153 | __digit = 13; |
| 154 | break; |
| 155 | case 'e': |
| 156 | __digit = 14; |
| 157 | break; |
| 158 | case 'f': |
| 159 | __digit = 15; |
| 160 | break; |
| 161 | } |
| 162 | |
| 163 | if (__result.__mantissa < __mantissa_truncate_threshold) { |
| 164 | __result.__mantissa = (__result.__mantissa * 16) + __digit; |
| 165 | if (__fraction) |
| 166 | __result.__exponent -= 4; |
| 167 | } else { |
| 168 | if (__digit > 0) |
| 169 | __result.__truncated = true; |
| 170 | if (!__fraction) |
| 171 | __result.__exponent += 4; |
| 172 | } |
| 173 | } else if (__input[__offset] == '.') { |
| 174 | if (__fraction) |
| 175 | break; // this means that __input[__offset] points to a second decimal point, ending the number. |
| 176 | |
| 177 | __fraction = true; |
| 178 | } else |
| 179 | break; |
| 180 | } |
| 181 | |
| 182 | __result.__offset = __offset; |
| 183 | return __result; |
| 184 | } |
| 185 | |
| 186 | struct __exponent_result { |
| 187 | size_t __offset{size_t(-1)}; |
| 188 | int __value{0}; |
| 189 | bool __present{false}; |
| 190 | }; |
| 191 | |
| 192 | // When the exponent is not present the result of the struct contains |
| 193 | // __offset, 0, false. This allows using the results unconditionally, the |
| 194 | // __present is important for the scientific notation, where the value is |
| 195 | // mandatory. |
| 196 | __exponent_result __parse_exponent(const char* __input, size_t __n, size_t __offset, char __marker) { |
| 197 | if (__offset + 1 < __n && // an exponent always needs at least one digit. |
| 198 | std::tolower(c: __input[__offset]) == __marker && // |
| 199 | !std::isspace(c: __input[__offset + 1]) // leading whitespace is not allowed. |
| 200 | ) { |
| 201 | ++__offset; |
| 202 | LIBC_NAMESPACE::shared::StrToNumResult<int32_t> __e = |
| 203 | LIBC_NAMESPACE::shared::strtointeger<int32_t>(src: __input + __offset, base: 10, src_len: __n - __offset); |
| 204 | // __result.error contains the errno value, 0 or ERANGE these are not interesting. |
| 205 | // If the number of characters parsed is 0 it means there was no number. |
| 206 | if (__e.parsed_len != 0) |
| 207 | return {__offset + __e.parsed_len, __e.value, true}; |
| 208 | else |
| 209 | --__offset; // the assumption of a valid exponent was not true, undo eating the exponent character. |
| 210 | } |
| 211 | |
| 212 | return {__offset, 0, false}; |
| 213 | } |
| 214 | |
| 215 | // Here we do this operation as int64 to avoid overflow. |
| 216 | int32_t __merge_exponents(int64_t __fractional, int64_t __exponent, int __max_biased_exponent) { |
| 217 | int64_t __sum = __fractional + __exponent; |
| 218 | |
| 219 | if (__sum > __max_biased_exponent) |
| 220 | return __max_biased_exponent; |
| 221 | |
| 222 | if (__sum < -__max_biased_exponent) |
| 223 | return -__max_biased_exponent; |
| 224 | |
| 225 | return __sum; |
| 226 | } |
| 227 | |
| 228 | template <class _Fp, class _Tp> |
| 229 | __from_chars_result<_Fp> |
| 230 | __calculate_result(_Tp __mantissa, int __exponent, bool __negative, __from_chars_result<_Fp> __result) { |
| 231 | auto __r = LIBC_NAMESPACE::shared::FPBits<_Fp>(); |
| 232 | __r.set_mantissa(__mantissa); |
| 233 | __r.set_biased_exponent(__exponent); |
| 234 | |
| 235 | // C17 7.12.1/6 |
| 236 | // The result underflows if the magnitude of the mathematical result is so |
| 237 | // small that the mathematical result cannot be represented, without |
| 238 | // extraordinary roundoff error, in an object of the specified type.237) If |
| 239 | // the result underflows, the function returns an implementation-defined |
| 240 | // value whose magnitude is no greater than the smallest normalized positive |
| 241 | // number in the specified type; if the integer expression math_errhandling |
| 242 | // & MATH_ERRNO is nonzero, whether errno acquires the value ERANGE is |
| 243 | // implementation-defined; if the integer expression math_errhandling & |
| 244 | // MATH_ERREXCEPT is nonzero, whether the "underflow" floating-point |
| 245 | // exception is raised is implementation-defined. |
| 246 | // |
| 247 | // LLVM-LIBC sets ERAGNE for subnormal values |
| 248 | // |
| 249 | // [charconv.from.chars]/1 |
| 250 | // ... If the parsed value is not in the range representable by the type of |
| 251 | // value, value is unmodified and the member ec of the return value is |
| 252 | // equal to errc::result_out_of_range. ... |
| 253 | // |
| 254 | // Undo the ERANGE for subnormal values. |
| 255 | if (__result.__ec == errc::result_out_of_range && __r.is_subnormal() && !__r.is_zero()) |
| 256 | __result.__ec = errc{}; |
| 257 | |
| 258 | if (__negative) |
| 259 | __result.__value = -__r.get_val(); |
| 260 | else |
| 261 | __result.__value = __r.get_val(); |
| 262 | |
| 263 | return __result; |
| 264 | } |
| 265 | |
| 266 | // Implements from_chars for decimal floating-point values. |
| 267 | // __first forwarded from from_chars |
| 268 | // __last forwarded from from_chars |
| 269 | // __value forwarded from from_chars |
| 270 | // __fmt forwarded from from_chars |
| 271 | // __ptr the start of the buffer to parse. This is after the optional sign character. |
| 272 | // __negative should __value be set to a negative value? |
| 273 | // |
| 274 | // This function and __from_chars_floating_point_decimal are similar. However |
| 275 | // the similar parts are all in helper functions. So the amount of code |
| 276 | // duplication is minimal. |
| 277 | template <floating_point _Fp> |
| 278 | __from_chars_result<_Fp> |
| 279 | __from_chars_floating_point_hex(const char* const __first, const char* __last, const char* __ptr, bool __negative) { |
| 280 | size_t __n = __last - __first; |
| 281 | ptrdiff_t __offset = __ptr - __first; |
| 282 | |
| 283 | auto __fractional = |
| 284 | std::__parse_fractional_hex_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset); |
| 285 | if (!__fractional.__is_valid) |
| 286 | return {_Fp{0}, 0, errc::invalid_argument}; |
| 287 | |
| 288 | auto __parsed_exponent = std::__parse_exponent(input: __first, __n, offset: __fractional.__offset, marker: 'p'); |
| 289 | __offset = __parsed_exponent.__offset; |
| 290 | int __exponent = std::__merge_exponents( |
| 291 | fractional: __fractional.__exponent, exponent: __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT); |
| 292 | |
| 293 | __from_chars_result<_Fp> __result{_Fp{0}, __offset, {}}; |
| 294 | LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {0, 0}; |
| 295 | if (__fractional.__mantissa != 0) { |
| 296 | auto __temp = LIBC_NAMESPACE::shared::binary_exp_to_float<_Fp>( |
| 297 | {__fractional.__mantissa, __exponent}, |
| 298 | __fractional.__truncated, |
| 299 | LIBC_NAMESPACE::shared::RoundDirection::Nearest); |
| 300 | __expanded_float = __temp.num; |
| 301 | if (__temp.error == ERANGE) { |
| 302 | __result.__ec = errc::result_out_of_range; |
| 303 | } |
| 304 | } |
| 305 | |
| 306 | return std::__calculate_result<_Fp>(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result); |
| 307 | } |
| 308 | |
| 309 | // Parses the hex constant part of the decimal float value. |
| 310 | // - input start of buffer given to from_chars |
| 311 | // - __n the number of elements in the buffer |
| 312 | // - __offset where to start parsing. The input can have an optional sign, the |
| 313 | // offset starts after this sign. |
| 314 | template <class _Tp> |
| 315 | __fractional_constant_result<_Tp> |
| 316 | __parse_fractional_decimal_constant(const char* __input, ptrdiff_t __n, ptrdiff_t __offset) { |
| 317 | __fractional_constant_result<_Tp> __result; |
| 318 | |
| 319 | const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / 10; |
| 320 | bool __fraction = false; |
| 321 | for (; __offset < __n; ++__offset) { |
| 322 | if (std::isdigit(c: __input[__offset])) { |
| 323 | __result.__is_valid = true; |
| 324 | |
| 325 | uint32_t __digit = __input[__offset] - '0'; |
| 326 | if (__result.__mantissa < __mantissa_truncate_threshold) { |
| 327 | __result.__mantissa = (__result.__mantissa * 10) + __digit; |
| 328 | if (__fraction) |
| 329 | --__result.__exponent; |
| 330 | } else { |
| 331 | if (__digit > 0) |
| 332 | __result.__truncated = true; |
| 333 | if (!__fraction) |
| 334 | ++__result.__exponent; |
| 335 | } |
| 336 | } else if (__input[__offset] == '.') { |
| 337 | if (__fraction) |
| 338 | break; // this means that __input[__offset] points to a second decimal point, ending the number. |
| 339 | |
| 340 | __fraction = true; |
| 341 | } else |
| 342 | break; |
| 343 | } |
| 344 | |
| 345 | __result.__offset = __offset; |
| 346 | return __result; |
| 347 | } |
| 348 | |
| 349 | // Implements from_chars for decimal floating-point values. |
| 350 | // __first forwarded from from_chars |
| 351 | // __last forwarded from from_chars |
| 352 | // __value forwarded from from_chars |
| 353 | // __fmt forwarded from from_chars |
| 354 | // __ptr the start of the buffer to parse. This is after the optional sign character. |
| 355 | // __negative should __value be set to a negative value? |
| 356 | template <floating_point _Fp> |
| 357 | __from_chars_result<_Fp> __from_chars_floating_point_decimal( |
| 358 | const char* const __first, const char* __last, chars_format __fmt, const char* __ptr, bool __negative) { |
| 359 | ptrdiff_t __n = __last - __first; |
| 360 | ptrdiff_t __offset = __ptr - __first; |
| 361 | |
| 362 | auto __fractional = |
| 363 | std::__parse_fractional_decimal_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset); |
| 364 | if (!__fractional.__is_valid) |
| 365 | return {_Fp{0}, 0, errc::invalid_argument}; |
| 366 | |
| 367 | __offset = __fractional.__offset; |
| 368 | |
| 369 | // LWG3456 Pattern used by std::from_chars is underspecified |
| 370 | // This changes fixed to ignore a possible exponent instead of making its |
| 371 | // existance an error. |
| 372 | int __exponent; |
| 373 | if (__fmt == chars_format::fixed) { |
| 374 | __exponent = |
| 375 | std::__merge_exponents(fractional: __fractional.__exponent, exponent: 0, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT); |
| 376 | } else { |
| 377 | auto __parsed_exponent = std::__parse_exponent(input: __first, __n, __offset, marker: 'e'); |
| 378 | if (__fmt == chars_format::scientific && !__parsed_exponent.__present) { |
| 379 | // [charconv.from.chars]/6.2 if fmt has chars_format::scientific set but not chars_format::fixed, |
| 380 | // the otherwise optional exponent part shall appear; |
| 381 | return {_Fp{0}, 0, errc::invalid_argument}; |
| 382 | } |
| 383 | |
| 384 | __offset = __parsed_exponent.__offset; |
| 385 | __exponent = std::__merge_exponents( |
| 386 | fractional: __fractional.__exponent, exponent: __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT); |
| 387 | } |
| 388 | |
| 389 | __from_chars_result<_Fp> __result{_Fp{0}, __offset, {}}; |
| 390 | LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {0, 0}; |
| 391 | if (__fractional.__mantissa != 0) { |
| 392 | // This function expects to parse a positive value. This means it does not |
| 393 | // take a __first, __n as arguments, since __first points to '-' for |
| 394 | // negative values. |
| 395 | auto __temp = LIBC_NAMESPACE::shared::decimal_exp_to_float<_Fp>( |
| 396 | {__fractional.__mantissa, __exponent}, |
| 397 | __fractional.__truncated, |
| 398 | LIBC_NAMESPACE::shared::RoundDirection::Nearest, |
| 399 | __ptr, |
| 400 | __last - __ptr); |
| 401 | __expanded_float = __temp.num; |
| 402 | if (__temp.error == ERANGE) { |
| 403 | __result.__ec = errc::result_out_of_range; |
| 404 | } |
| 405 | } |
| 406 | |
| 407 | return std::__calculate_result(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result); |
| 408 | } |
| 409 | |
| 410 | template <floating_point _Fp> |
| 411 | __from_chars_result<_Fp> |
| 412 | __from_chars_floating_point_impl(const char* const __first, const char* __last, chars_format __fmt) { |
| 413 | if (__first == __last) [[unlikely]] |
| 414 | return {_Fp{0}, 0, errc::invalid_argument}; |
| 415 | |
| 416 | const char* __ptr = __first; |
| 417 | bool __negative = *__ptr == '-'; |
| 418 | if (__negative) { |
| 419 | ++__ptr; |
| 420 | if (__ptr == __last) [[unlikely]] |
| 421 | return {_Fp{0}, 0, errc::invalid_argument}; |
| 422 | } |
| 423 | |
| 424 | // [charconv.from.chars] |
| 425 | // [Note 1: If the pattern allows for an optional sign, but the string has |
| 426 | // no digit characters following the sign, no characters match the pattern. |
| 427 | // -- end note] |
| 428 | // This is true for integrals, floating point allows -.0 |
| 429 | |
| 430 | // [charconv.from.chars]/6.2 |
| 431 | // if fmt has chars_format::scientific set but not chars_format::fixed, the |
| 432 | // otherwise optional exponent part shall appear; |
| 433 | // Since INF/NAN do not have an exponent this value is not valid. |
| 434 | // |
| 435 | // LWG3456 Pattern used by std::from_chars is underspecified |
| 436 | // Does not address this point, but proposed option B does solve this issue, |
| 437 | // Both MSVC STL and libstdc++ implement this this behaviour. |
| 438 | switch (std::tolower(c: *__ptr)) { |
| 439 | case 'i': |
| 440 | return std::__from_chars_floating_point_inf<_Fp>(__first, __last, __ptr + 1, __negative); |
| 441 | case 'n': |
| 442 | if constexpr (numeric_limits<_Fp>::has_quiet_NaN) |
| 443 | // NOTE: The pointer passed here will be parsed in the default C locale. |
| 444 | // This is standard behavior (see https://eel.is/c++draft/charconv.from.chars), but may be unexpected. |
| 445 | return std::__from_chars_floating_point_nan<_Fp>(__first, __last, __ptr + 1, __negative); |
| 446 | return {_Fp{0}, 0, errc::invalid_argument}; |
| 447 | } |
| 448 | |
| 449 | if (__fmt == chars_format::hex) |
| 450 | return std::__from_chars_floating_point_hex<_Fp>(__first, __last, __ptr, __negative); |
| 451 | |
| 452 | return std::__from_chars_floating_point_decimal<_Fp>(__first, __last, __fmt, __ptr, __negative); |
| 453 | } |
| 454 | |
| 455 | _LIBCPP_END_NAMESPACE_STD |
| 456 | |
| 457 | #endif //_LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H |
| 458 | |