1 | //===----------------------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H |
10 | #define _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H |
11 | |
12 | // These headers are in the shared LLVM-libc header library. |
13 | #include "shared/fp_bits.h" |
14 | #include "shared/str_to_float.h" |
15 | #include "shared/str_to_integer.h" |
16 | |
17 | #include <__assert> |
18 | #include <__config> |
19 | #include <cctype> |
20 | #include <charconv> |
21 | #include <concepts> |
22 | #include <limits> |
23 | |
24 | // Included for the _Floating_type_traits class |
25 | #include "to_chars_floating_point.h" |
26 | |
27 | _LIBCPP_BEGIN_NAMESPACE_STD |
28 | |
29 | // Parses an infinity string. |
30 | // Valid strings are case insensitive and contain INF or INFINITY. |
31 | // |
32 | // - __first is the first argument to std::from_chars. When the string is invalid |
33 | // this value is returned as ptr in the result. |
34 | // - __last is the last argument of std::from_chars. |
35 | // - __value is the value argument of std::from_chars, |
36 | // - __ptr is the current position is the input string. This is points beyond |
37 | // the initial I character. |
38 | // - __negative whether a valid string represents -inf or +inf. |
39 | template <floating_point _Fp> |
40 | __from_chars_result<_Fp> |
41 | __from_chars_floating_point_inf(const char* const __first, const char* __last, const char* __ptr, bool __negative) { |
42 | if (__last - __ptr < 2) [[unlikely]] |
43 | return {_Fp{0}, 0, errc::invalid_argument}; |
44 | |
45 | if (std::tolower(c: __ptr[0]) != 'n' || std::tolower(c: __ptr[1]) != 'f') [[unlikely]] |
46 | return {_Fp{0}, 0, errc::invalid_argument}; |
47 | |
48 | __ptr += 2; |
49 | |
50 | // At this point the result is valid and contains INF. |
51 | // When the remaining part contains INITY this will be consumed. Otherwise |
52 | // only INF is consumed. For example INFINITZ will consume INF and ignore |
53 | // INITZ. |
54 | |
55 | if (__last - __ptr >= 5 // |
56 | && std::tolower(c: __ptr[0]) == 'i' // |
57 | && std::tolower(c: __ptr[1]) == 'n' // |
58 | && std::tolower(c: __ptr[2]) == 'i' // |
59 | && std::tolower(c: __ptr[3]) == 't' // |
60 | && std::tolower(c: __ptr[4]) == 'y') |
61 | __ptr += 5; |
62 | |
63 | if constexpr (numeric_limits<_Fp>::has_infinity) { |
64 | if (__negative) |
65 | return {-std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}}; |
66 | |
67 | return {std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}}; |
68 | } else { |
69 | return {_Fp{0}, __ptr - __first, errc::result_out_of_range}; |
70 | } |
71 | } |
72 | |
73 | // Parses a nan string. |
74 | // Valid strings are case insensitive and contain INF or INFINITY. |
75 | // |
76 | // - __first is the first argument to std::from_chars. When the string is invalid |
77 | // this value is returned as ptr in the result. |
78 | // - __last is the last argument of std::from_chars. |
79 | // - __value is the value argument of std::from_chars, |
80 | // - __ptr is the current position is the input string. This is points beyond |
81 | // the initial N character. |
82 | // - __negative whether a valid string represents -nan or +nan. |
83 | template <floating_point _Fp> |
84 | __from_chars_result<_Fp> |
85 | __from_chars_floating_point_nan(const char* const __first, const char* __last, const char* __ptr, bool __negative) { |
86 | if (__last - __ptr < 2) [[unlikely]] |
87 | return {_Fp{0}, 0, errc::invalid_argument}; |
88 | |
89 | if (std::tolower(c: __ptr[0]) != 'a' || std::tolower(c: __ptr[1]) != 'n') [[unlikely]] |
90 | return {_Fp{0}, 0, errc::invalid_argument}; |
91 | |
92 | __ptr += 2; |
93 | |
94 | // At this point the result is valid and contains NAN. When the remaining |
95 | // part contains ( n-char-sequence_opt ) this will be consumed. Otherwise |
96 | // only NAN is consumed. For example NAN(abcd will consume NAN and ignore |
97 | // (abcd. |
98 | if (__last - __ptr >= 2 && __ptr[0] == '(') { |
99 | size_t __offset = 1; |
100 | do { |
101 | if (__ptr[__offset] == ')') { |
102 | __ptr += __offset + 1; |
103 | break; |
104 | } |
105 | if (__ptr[__offset] != '_' && !std::isalnum(c: __ptr[__offset])) |
106 | break; |
107 | ++__offset; |
108 | } while (__ptr + __offset != __last); |
109 | } |
110 | |
111 | if (__negative) |
112 | return {-std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}}; |
113 | |
114 | return {std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}}; |
115 | } |
116 | |
117 | template <class _Tp> |
118 | struct __fractional_constant_result { |
119 | size_t __offset{size_t(-1)}; |
120 | _Tp __mantissa{0}; |
121 | int __exponent{0}; |
122 | bool __truncated{false}; |
123 | bool __is_valid{false}; |
124 | }; |
125 | |
126 | // Parses the hex constant part of the hexadecimal floating-point value. |
127 | // - input start of buffer given to from_chars |
128 | // - __n the number of elements in the buffer |
129 | // - __offset where to start parsing. The input can have an optional sign, the |
130 | // offset starts after this sign. |
131 | template <class _Tp> |
132 | __fractional_constant_result<_Tp> __parse_fractional_hex_constant(const char* __input, size_t __n, size_t __offset) { |
133 | __fractional_constant_result<_Tp> __result; |
134 | |
135 | const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / 16; |
136 | bool __fraction = false; |
137 | for (; __offset < __n; ++__offset) { |
138 | if (std::isxdigit(c: __input[__offset])) { |
139 | __result.__is_valid = true; |
140 | |
141 | uint32_t __digit = __input[__offset] - '0'; |
142 | switch (std::tolower(c: __input[__offset])) { |
143 | case 'a': |
144 | __digit = 10; |
145 | break; |
146 | case 'b': |
147 | __digit = 11; |
148 | break; |
149 | case 'c': |
150 | __digit = 12; |
151 | break; |
152 | case 'd': |
153 | __digit = 13; |
154 | break; |
155 | case 'e': |
156 | __digit = 14; |
157 | break; |
158 | case 'f': |
159 | __digit = 15; |
160 | break; |
161 | } |
162 | |
163 | if (__result.__mantissa < __mantissa_truncate_threshold) { |
164 | __result.__mantissa = (__result.__mantissa * 16) + __digit; |
165 | if (__fraction) |
166 | __result.__exponent -= 4; |
167 | } else { |
168 | if (__digit > 0) |
169 | __result.__truncated = true; |
170 | if (!__fraction) |
171 | __result.__exponent += 4; |
172 | } |
173 | } else if (__input[__offset] == '.') { |
174 | if (__fraction) |
175 | break; // this means that __input[__offset] points to a second decimal point, ending the number. |
176 | |
177 | __fraction = true; |
178 | } else |
179 | break; |
180 | } |
181 | |
182 | __result.__offset = __offset; |
183 | return __result; |
184 | } |
185 | |
186 | struct __exponent_result { |
187 | size_t __offset{size_t(-1)}; |
188 | int __value{0}; |
189 | bool __present{false}; |
190 | }; |
191 | |
192 | // When the exponent is not present the result of the struct contains |
193 | // __offset, 0, false. This allows using the results unconditionally, the |
194 | // __present is important for the scientific notation, where the value is |
195 | // mandatory. |
196 | __exponent_result __parse_exponent(const char* __input, size_t __n, size_t __offset, char __marker) { |
197 | if (__offset + 1 < __n && // an exponent always needs at least one digit. |
198 | std::tolower(c: __input[__offset]) == __marker && // |
199 | !std::isspace(c: __input[__offset + 1]) // leading whitespace is not allowed. |
200 | ) { |
201 | ++__offset; |
202 | LIBC_NAMESPACE::shared::StrToNumResult<int32_t> __e = |
203 | LIBC_NAMESPACE::shared::strtointeger<int32_t>(src: __input + __offset, base: 10, src_len: __n - __offset); |
204 | // __result.error contains the errno value, 0 or ERANGE these are not interesting. |
205 | // If the number of characters parsed is 0 it means there was no number. |
206 | if (__e.parsed_len != 0) |
207 | return {__offset + __e.parsed_len, __e.value, true}; |
208 | else |
209 | --__offset; // the assumption of a valid exponent was not true, undo eating the exponent character. |
210 | } |
211 | |
212 | return {__offset, 0, false}; |
213 | } |
214 | |
215 | // Here we do this operation as int64 to avoid overflow. |
216 | int32_t __merge_exponents(int64_t __fractional, int64_t __exponent, int __max_biased_exponent) { |
217 | int64_t __sum = __fractional + __exponent; |
218 | |
219 | if (__sum > __max_biased_exponent) |
220 | return __max_biased_exponent; |
221 | |
222 | if (__sum < -__max_biased_exponent) |
223 | return -__max_biased_exponent; |
224 | |
225 | return __sum; |
226 | } |
227 | |
228 | template <class _Fp, class _Tp> |
229 | __from_chars_result<_Fp> |
230 | __calculate_result(_Tp __mantissa, int __exponent, bool __negative, __from_chars_result<_Fp> __result) { |
231 | auto __r = LIBC_NAMESPACE::shared::FPBits<_Fp>(); |
232 | __r.set_mantissa(__mantissa); |
233 | __r.set_biased_exponent(__exponent); |
234 | |
235 | // C17 7.12.1/6 |
236 | // The result underflows if the magnitude of the mathematical result is so |
237 | // small that the mathematical result cannot be represented, without |
238 | // extraordinary roundoff error, in an object of the specified type.237) If |
239 | // the result underflows, the function returns an implementation-defined |
240 | // value whose magnitude is no greater than the smallest normalized positive |
241 | // number in the specified type; if the integer expression math_errhandling |
242 | // & MATH_ERRNO is nonzero, whether errno acquires the value ERANGE is |
243 | // implementation-defined; if the integer expression math_errhandling & |
244 | // MATH_ERREXCEPT is nonzero, whether the "underflow" floating-point |
245 | // exception is raised is implementation-defined. |
246 | // |
247 | // LLVM-LIBC sets ERAGNE for subnormal values |
248 | // |
249 | // [charconv.from.chars]/1 |
250 | // ... If the parsed value is not in the range representable by the type of |
251 | // value, value is unmodified and the member ec of the return value is |
252 | // equal to errc::result_out_of_range. ... |
253 | // |
254 | // Undo the ERANGE for subnormal values. |
255 | if (__result.__ec == errc::result_out_of_range && __r.is_subnormal() && !__r.is_zero()) |
256 | __result.__ec = errc{}; |
257 | |
258 | if (__negative) |
259 | __result.__value = -__r.get_val(); |
260 | else |
261 | __result.__value = __r.get_val(); |
262 | |
263 | return __result; |
264 | } |
265 | |
266 | // Implements from_chars for decimal floating-point values. |
267 | // __first forwarded from from_chars |
268 | // __last forwarded from from_chars |
269 | // __value forwarded from from_chars |
270 | // __fmt forwarded from from_chars |
271 | // __ptr the start of the buffer to parse. This is after the optional sign character. |
272 | // __negative should __value be set to a negative value? |
273 | // |
274 | // This function and __from_chars_floating_point_decimal are similar. However |
275 | // the similar parts are all in helper functions. So the amount of code |
276 | // duplication is minimal. |
277 | template <floating_point _Fp> |
278 | __from_chars_result<_Fp> |
279 | __from_chars_floating_point_hex(const char* const __first, const char* __last, const char* __ptr, bool __negative) { |
280 | size_t __n = __last - __first; |
281 | ptrdiff_t __offset = __ptr - __first; |
282 | |
283 | auto __fractional = |
284 | std::__parse_fractional_hex_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset); |
285 | if (!__fractional.__is_valid) |
286 | return {_Fp{0}, 0, errc::invalid_argument}; |
287 | |
288 | auto __parsed_exponent = std::__parse_exponent(input: __first, __n, offset: __fractional.__offset, marker: 'p'); |
289 | __offset = __parsed_exponent.__offset; |
290 | int __exponent = std::__merge_exponents( |
291 | fractional: __fractional.__exponent, exponent: __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT); |
292 | |
293 | __from_chars_result<_Fp> __result{_Fp{0}, __offset, {}}; |
294 | LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {0, 0}; |
295 | if (__fractional.__mantissa != 0) { |
296 | auto __temp = LIBC_NAMESPACE::shared::binary_exp_to_float<_Fp>( |
297 | {__fractional.__mantissa, __exponent}, |
298 | __fractional.__truncated, |
299 | LIBC_NAMESPACE::shared::RoundDirection::Nearest); |
300 | __expanded_float = __temp.num; |
301 | if (__temp.error == ERANGE) { |
302 | __result.__ec = errc::result_out_of_range; |
303 | } |
304 | } |
305 | |
306 | return std::__calculate_result<_Fp>(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result); |
307 | } |
308 | |
309 | // Parses the hex constant part of the decimal float value. |
310 | // - input start of buffer given to from_chars |
311 | // - __n the number of elements in the buffer |
312 | // - __offset where to start parsing. The input can have an optional sign, the |
313 | // offset starts after this sign. |
314 | template <class _Tp> |
315 | __fractional_constant_result<_Tp> |
316 | __parse_fractional_decimal_constant(const char* __input, ptrdiff_t __n, ptrdiff_t __offset) { |
317 | __fractional_constant_result<_Tp> __result; |
318 | |
319 | const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / 10; |
320 | bool __fraction = false; |
321 | for (; __offset < __n; ++__offset) { |
322 | if (std::isdigit(c: __input[__offset])) { |
323 | __result.__is_valid = true; |
324 | |
325 | uint32_t __digit = __input[__offset] - '0'; |
326 | if (__result.__mantissa < __mantissa_truncate_threshold) { |
327 | __result.__mantissa = (__result.__mantissa * 10) + __digit; |
328 | if (__fraction) |
329 | --__result.__exponent; |
330 | } else { |
331 | if (__digit > 0) |
332 | __result.__truncated = true; |
333 | if (!__fraction) |
334 | ++__result.__exponent; |
335 | } |
336 | } else if (__input[__offset] == '.') { |
337 | if (__fraction) |
338 | break; // this means that __input[__offset] points to a second decimal point, ending the number. |
339 | |
340 | __fraction = true; |
341 | } else |
342 | break; |
343 | } |
344 | |
345 | __result.__offset = __offset; |
346 | return __result; |
347 | } |
348 | |
349 | // Implements from_chars for decimal floating-point values. |
350 | // __first forwarded from from_chars |
351 | // __last forwarded from from_chars |
352 | // __value forwarded from from_chars |
353 | // __fmt forwarded from from_chars |
354 | // __ptr the start of the buffer to parse. This is after the optional sign character. |
355 | // __negative should __value be set to a negative value? |
356 | template <floating_point _Fp> |
357 | __from_chars_result<_Fp> __from_chars_floating_point_decimal( |
358 | const char* const __first, const char* __last, chars_format __fmt, const char* __ptr, bool __negative) { |
359 | ptrdiff_t __n = __last - __first; |
360 | ptrdiff_t __offset = __ptr - __first; |
361 | |
362 | auto __fractional = |
363 | std::__parse_fractional_decimal_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset); |
364 | if (!__fractional.__is_valid) |
365 | return {_Fp{0}, 0, errc::invalid_argument}; |
366 | |
367 | __offset = __fractional.__offset; |
368 | |
369 | // LWG3456 Pattern used by std::from_chars is underspecified |
370 | // This changes fixed to ignore a possible exponent instead of making its |
371 | // existance an error. |
372 | int __exponent; |
373 | if (__fmt == chars_format::fixed) { |
374 | __exponent = |
375 | std::__merge_exponents(fractional: __fractional.__exponent, exponent: 0, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT); |
376 | } else { |
377 | auto __parsed_exponent = std::__parse_exponent(input: __first, __n, __offset, marker: 'e'); |
378 | if (__fmt == chars_format::scientific && !__parsed_exponent.__present) { |
379 | // [charconv.from.chars]/6.2 if fmt has chars_format::scientific set but not chars_format::fixed, |
380 | // the otherwise optional exponent part shall appear; |
381 | return {_Fp{0}, 0, errc::invalid_argument}; |
382 | } |
383 | |
384 | __offset = __parsed_exponent.__offset; |
385 | __exponent = std::__merge_exponents( |
386 | fractional: __fractional.__exponent, exponent: __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT); |
387 | } |
388 | |
389 | __from_chars_result<_Fp> __result{_Fp{0}, __offset, {}}; |
390 | LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {0, 0}; |
391 | if (__fractional.__mantissa != 0) { |
392 | // This function expects to parse a positive value. This means it does not |
393 | // take a __first, __n as arguments, since __first points to '-' for |
394 | // negative values. |
395 | auto __temp = LIBC_NAMESPACE::shared::decimal_exp_to_float<_Fp>( |
396 | {__fractional.__mantissa, __exponent}, |
397 | __fractional.__truncated, |
398 | LIBC_NAMESPACE::shared::RoundDirection::Nearest, |
399 | __ptr, |
400 | __last - __ptr); |
401 | __expanded_float = __temp.num; |
402 | if (__temp.error == ERANGE) { |
403 | __result.__ec = errc::result_out_of_range; |
404 | } |
405 | } |
406 | |
407 | return std::__calculate_result(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result); |
408 | } |
409 | |
410 | template <floating_point _Fp> |
411 | __from_chars_result<_Fp> |
412 | __from_chars_floating_point_impl(const char* const __first, const char* __last, chars_format __fmt) { |
413 | if (__first == __last) [[unlikely]] |
414 | return {_Fp{0}, 0, errc::invalid_argument}; |
415 | |
416 | const char* __ptr = __first; |
417 | bool __negative = *__ptr == '-'; |
418 | if (__negative) { |
419 | ++__ptr; |
420 | if (__ptr == __last) [[unlikely]] |
421 | return {_Fp{0}, 0, errc::invalid_argument}; |
422 | } |
423 | |
424 | // [charconv.from.chars] |
425 | // [Note 1: If the pattern allows for an optional sign, but the string has |
426 | // no digit characters following the sign, no characters match the pattern. |
427 | // -- end note] |
428 | // This is true for integrals, floating point allows -.0 |
429 | |
430 | // [charconv.from.chars]/6.2 |
431 | // if fmt has chars_format::scientific set but not chars_format::fixed, the |
432 | // otherwise optional exponent part shall appear; |
433 | // Since INF/NAN do not have an exponent this value is not valid. |
434 | // |
435 | // LWG3456 Pattern used by std::from_chars is underspecified |
436 | // Does not address this point, but proposed option B does solve this issue, |
437 | // Both MSVC STL and libstdc++ implement this this behaviour. |
438 | switch (std::tolower(c: *__ptr)) { |
439 | case 'i': |
440 | return std::__from_chars_floating_point_inf<_Fp>(__first, __last, __ptr + 1, __negative); |
441 | case 'n': |
442 | if constexpr (numeric_limits<_Fp>::has_quiet_NaN) |
443 | // NOTE: The pointer passed here will be parsed in the default C locale. |
444 | // This is standard behavior (see https://eel.is/c++draft/charconv.from.chars), but may be unexpected. |
445 | return std::__from_chars_floating_point_nan<_Fp>(__first, __last, __ptr + 1, __negative); |
446 | return {_Fp{0}, 0, errc::invalid_argument}; |
447 | } |
448 | |
449 | if (__fmt == chars_format::hex) |
450 | return std::__from_chars_floating_point_hex<_Fp>(__first, __last, __ptr, __negative); |
451 | |
452 | return std::__from_chars_floating_point_decimal<_Fp>(__first, __last, __fmt, __ptr, __negative); |
453 | } |
454 | |
455 | _LIBCPP_END_NAMESPACE_STD |
456 | |
457 | #endif //_LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H |
458 | |