1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
10#define _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
11
12#include <__assert>
13#include <__config>
14#include <cctype>
15#include <charconv>
16#include <concepts>
17#include <limits>
18
19// Make sure we use libc++'s assertion machinery within the shared code we use
20// from LLVM libc.
21#define LIBC_ASSERT(cond) _LIBCPP_ASSERT((cond), _LIBCPP_TOSTRING(cond))
22
23// These headers are in the shared LLVM-libc header library.
24#include "shared/fp_bits.h"
25#include "shared/str_to_float.h"
26#include "shared/str_to_integer.h"
27
28// Included for the _Floating_type_traits class
29#include "to_chars_floating_point.h"
30
31_LIBCPP_BEGIN_NAMESPACE_STD
32
33// Parses an infinity string.
34// Valid strings are case insensitive and contain INF or INFINITY.
35//
36// - __first is the first argument to std::from_chars. When the string is invalid
37// this value is returned as ptr in the result.
38// - __last is the last argument of std::from_chars.
39// - __value is the value argument of std::from_chars,
40// - __ptr is the current position is the input string. This is points beyond
41// the initial I character.
42// - __negative whether a valid string represents -inf or +inf.
43template <floating_point _Fp>
44__from_chars_result<_Fp>
45__from_chars_floating_point_inf(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
46 if (__last - __ptr < 2) [[unlikely]]
47 return {_Fp{0}, 0, errc::invalid_argument};
48
49 if (std::tolower(c: __ptr[0]) != 'n' || std::tolower(c: __ptr[1]) != 'f') [[unlikely]]
50 return {_Fp{0}, 0, errc::invalid_argument};
51
52 __ptr += 2;
53
54 // At this point the result is valid and contains INF.
55 // When the remaining part contains INITY this will be consumed. Otherwise
56 // only INF is consumed. For example INFINITZ will consume INF and ignore
57 // INITZ.
58
59 if (__last - __ptr >= 5 //
60 && std::tolower(c: __ptr[0]) == 'i' //
61 && std::tolower(c: __ptr[1]) == 'n' //
62 && std::tolower(c: __ptr[2]) == 'i' //
63 && std::tolower(c: __ptr[3]) == 't' //
64 && std::tolower(c: __ptr[4]) == 'y')
65 __ptr += 5;
66
67 if constexpr (numeric_limits<_Fp>::has_infinity) {
68 if (__negative)
69 return {-std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}};
70
71 return {std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}};
72 } else {
73 return {_Fp{0}, __ptr - __first, errc::result_out_of_range};
74 }
75}
76
77// Parses a nan string.
78// Valid strings are case insensitive and contain INF or INFINITY.
79//
80// - __first is the first argument to std::from_chars. When the string is invalid
81// this value is returned as ptr in the result.
82// - __last is the last argument of std::from_chars.
83// - __value is the value argument of std::from_chars,
84// - __ptr is the current position is the input string. This is points beyond
85// the initial N character.
86// - __negative whether a valid string represents -nan or +nan.
87template <floating_point _Fp>
88__from_chars_result<_Fp>
89__from_chars_floating_point_nan(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
90 if (__last - __ptr < 2) [[unlikely]]
91 return {_Fp{0}, 0, errc::invalid_argument};
92
93 if (std::tolower(c: __ptr[0]) != 'a' || std::tolower(c: __ptr[1]) != 'n') [[unlikely]]
94 return {_Fp{0}, 0, errc::invalid_argument};
95
96 __ptr += 2;
97
98 // At this point the result is valid and contains NAN. When the remaining
99 // part contains ( n-char-sequence_opt ) this will be consumed. Otherwise
100 // only NAN is consumed. For example NAN(abcd will consume NAN and ignore
101 // (abcd.
102 if (__last - __ptr >= 2 && __ptr[0] == '(') {
103 size_t __offset = 1;
104 do {
105 if (__ptr[__offset] == ')') {
106 __ptr += __offset + 1;
107 break;
108 }
109 if (__ptr[__offset] != '_' && !std::isalnum(c: __ptr[__offset]))
110 break;
111 ++__offset;
112 } while (__ptr + __offset != __last);
113 }
114
115 if (__negative)
116 return {-std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}};
117
118 return {std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}};
119}
120
121template <class _Tp>
122struct __fractional_constant_result {
123 size_t __offset{size_t(-1)};
124 _Tp __mantissa{0};
125 int __exponent{0};
126 bool __truncated{false};
127 bool __is_valid{false};
128};
129
130// Parses the hex constant part of the hexadecimal floating-point value.
131// - input start of buffer given to from_chars
132// - __n the number of elements in the buffer
133// - __offset where to start parsing. The input can have an optional sign, the
134// offset starts after this sign.
135template <class _Tp>
136__fractional_constant_result<_Tp> __parse_fractional_hex_constant(const char* __input, size_t __n, size_t __offset) {
137 __fractional_constant_result<_Tp> __result;
138
139 const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / 16;
140 bool __fraction = false;
141 for (; __offset < __n; ++__offset) {
142 if (std::isxdigit(c: __input[__offset])) {
143 __result.__is_valid = true;
144
145 uint32_t __digit = __input[__offset] - '0';
146 switch (std::tolower(c: __input[__offset])) {
147 case 'a':
148 __digit = 10;
149 break;
150 case 'b':
151 __digit = 11;
152 break;
153 case 'c':
154 __digit = 12;
155 break;
156 case 'd':
157 __digit = 13;
158 break;
159 case 'e':
160 __digit = 14;
161 break;
162 case 'f':
163 __digit = 15;
164 break;
165 }
166
167 if (__result.__mantissa < __mantissa_truncate_threshold) {
168 __result.__mantissa = (__result.__mantissa * 16) + __digit;
169 if (__fraction)
170 __result.__exponent -= 4;
171 } else {
172 if (__digit > 0)
173 __result.__truncated = true;
174 if (!__fraction)
175 __result.__exponent += 4;
176 }
177 } else if (__input[__offset] == '.') {
178 if (__fraction)
179 break; // this means that __input[__offset] points to a second decimal point, ending the number.
180
181 __fraction = true;
182 } else
183 break;
184 }
185
186 __result.__offset = __offset;
187 return __result;
188}
189
190struct __exponent_result {
191 size_t __offset{size_t(-1)};
192 int __value{0};
193 bool __present{false};
194};
195
196// When the exponent is not present the result of the struct contains
197// __offset, 0, false. This allows using the results unconditionally, the
198// __present is important for the scientific notation, where the value is
199// mandatory.
200static __exponent_result __parse_exponent(const char* __input, size_t __n, size_t __offset, char __marker) {
201 if (__offset + 1 < __n && // an exponent always needs at least one digit.
202 std::tolower(c: __input[__offset]) == __marker && //
203 !std::isspace(c: __input[__offset + 1]) // leading whitespace is not allowed.
204 ) {
205 ++__offset;
206 LIBC_NAMESPACE::shared::StrToNumResult<int32_t> __e =
207 LIBC_NAMESPACE::shared::strtointeger<int32_t>(src: __input + __offset, base: 10, src_len: __n - __offset);
208 // __result.error contains the errno value, 0 or ERANGE these are not interesting.
209 // If the number of characters parsed is 0 it means there was no number.
210 if (__e.parsed_len != 0)
211 return {__offset + __e.parsed_len, __e.value, true};
212 else
213 --__offset; // the assumption of a valid exponent was not true, undo eating the exponent character.
214 }
215
216 return {__offset, 0, false};
217}
218
219// Here we do this operation as int64 to avoid overflow.
220static int32_t __merge_exponents(int64_t __fractional, int64_t __exponent, int __max_biased_exponent) {
221 int64_t __sum = __fractional + __exponent;
222
223 if (__sum > __max_biased_exponent)
224 return __max_biased_exponent;
225
226 if (__sum < -__max_biased_exponent)
227 return -__max_biased_exponent;
228
229 return __sum;
230}
231
232template <class _Fp, class _Tp>
233__from_chars_result<_Fp>
234__calculate_result(_Tp __mantissa, int __exponent, bool __negative, __from_chars_result<_Fp> __result) {
235 auto __r = LIBC_NAMESPACE::shared::FPBits<_Fp>();
236 __r.set_mantissa(__mantissa);
237 __r.set_biased_exponent(__exponent);
238
239 // C17 7.12.1/6
240 // The result underflows if the magnitude of the mathematical result is so
241 // small that the mathematical result cannot be represented, without
242 // extraordinary roundoff error, in an object of the specified type.237) If
243 // the result underflows, the function returns an implementation-defined
244 // value whose magnitude is no greater than the smallest normalized positive
245 // number in the specified type; if the integer expression math_errhandling
246 // & MATH_ERRNO is nonzero, whether errno acquires the value ERANGE is
247 // implementation-defined; if the integer expression math_errhandling &
248 // MATH_ERREXCEPT is nonzero, whether the "underflow" floating-point
249 // exception is raised is implementation-defined.
250 //
251 // LLVM-LIBC sets ERAGNE for subnormal values
252 //
253 // [charconv.from.chars]/1
254 // ... If the parsed value is not in the range representable by the type of
255 // value, value is unmodified and the member ec of the return value is
256 // equal to errc::result_out_of_range. ...
257 //
258 // Undo the ERANGE for subnormal values.
259 if (__result.__ec == errc::result_out_of_range && __r.is_subnormal() && !__r.is_zero())
260 __result.__ec = errc{};
261
262 if (__negative)
263 __result.__value = -__r.get_val();
264 else
265 __result.__value = __r.get_val();
266
267 return __result;
268}
269
270// Implements from_chars for decimal floating-point values.
271// __first forwarded from from_chars
272// __last forwarded from from_chars
273// __value forwarded from from_chars
274// __fmt forwarded from from_chars
275// __ptr the start of the buffer to parse. This is after the optional sign character.
276// __negative should __value be set to a negative value?
277//
278// This function and __from_chars_floating_point_decimal are similar. However
279// the similar parts are all in helper functions. So the amount of code
280// duplication is minimal.
281template <floating_point _Fp>
282__from_chars_result<_Fp>
283__from_chars_floating_point_hex(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
284 size_t __n = __last - __first;
285 ptrdiff_t __offset = __ptr - __first;
286
287 auto __fractional =
288 std::__parse_fractional_hex_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset);
289 if (!__fractional.__is_valid)
290 return {_Fp{0}, 0, errc::invalid_argument};
291
292 auto __parsed_exponent = std::__parse_exponent(input: __first, __n, offset: __fractional.__offset, marker: 'p');
293 __offset = __parsed_exponent.__offset;
294 int __exponent = std::__merge_exponents(
295 fractional: __fractional.__exponent, exponent: __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
296
297 __from_chars_result<_Fp> __result{_Fp{0}, __offset, {}};
298 LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {0, 0};
299 if (__fractional.__mantissa != 0) {
300 auto __temp = LIBC_NAMESPACE::shared::binary_exp_to_float<_Fp>(
301 {__fractional.__mantissa, __exponent},
302 __fractional.__truncated,
303 LIBC_NAMESPACE::shared::RoundDirection::Nearest);
304 __expanded_float = __temp.num;
305 if (__temp.error == ERANGE) {
306 __result.__ec = errc::result_out_of_range;
307 }
308 }
309
310 return std::__calculate_result<_Fp>(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result);
311}
312
313// Parses the hex constant part of the decimal float value.
314// - input start of buffer given to from_chars
315// - __n the number of elements in the buffer
316// - __offset where to start parsing. The input can have an optional sign, the
317// offset starts after this sign.
318template <class _Tp>
319__fractional_constant_result<_Tp>
320__parse_fractional_decimal_constant(const char* __input, ptrdiff_t __n, ptrdiff_t __offset) {
321 __fractional_constant_result<_Tp> __result;
322
323 const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / 10;
324 bool __fraction = false;
325 for (; __offset < __n; ++__offset) {
326 if (std::isdigit(c: __input[__offset])) {
327 __result.__is_valid = true;
328
329 uint32_t __digit = __input[__offset] - '0';
330 if (__result.__mantissa < __mantissa_truncate_threshold) {
331 __result.__mantissa = (__result.__mantissa * 10) + __digit;
332 if (__fraction)
333 --__result.__exponent;
334 } else {
335 if (__digit > 0)
336 __result.__truncated = true;
337 if (!__fraction)
338 ++__result.__exponent;
339 }
340 } else if (__input[__offset] == '.') {
341 if (__fraction)
342 break; // this means that __input[__offset] points to a second decimal point, ending the number.
343
344 __fraction = true;
345 } else
346 break;
347 }
348
349 __result.__offset = __offset;
350 return __result;
351}
352
353// Implements from_chars for decimal floating-point values.
354// __first forwarded from from_chars
355// __last forwarded from from_chars
356// __value forwarded from from_chars
357// __fmt forwarded from from_chars
358// __ptr the start of the buffer to parse. This is after the optional sign character.
359// __negative should __value be set to a negative value?
360template <floating_point _Fp>
361__from_chars_result<_Fp> __from_chars_floating_point_decimal(
362 const char* const __first, const char* __last, chars_format __fmt, const char* __ptr, bool __negative) {
363 ptrdiff_t __n = __last - __first;
364 ptrdiff_t __offset = __ptr - __first;
365
366 auto __fractional =
367 std::__parse_fractional_decimal_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset);
368 if (!__fractional.__is_valid)
369 return {_Fp{0}, 0, errc::invalid_argument};
370
371 __offset = __fractional.__offset;
372
373 // LWG3456 Pattern used by std::from_chars is underspecified
374 // This changes fixed to ignore a possible exponent instead of making its
375 // existance an error.
376 int __exponent;
377 if (__fmt == chars_format::fixed) {
378 __exponent =
379 std::__merge_exponents(fractional: __fractional.__exponent, exponent: 0, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
380 } else {
381 auto __parsed_exponent = std::__parse_exponent(input: __first, __n, __offset, marker: 'e');
382 if (__fmt == chars_format::scientific && !__parsed_exponent.__present) {
383 // [charconv.from.chars]/6.2 if fmt has chars_format::scientific set but not chars_format::fixed,
384 // the otherwise optional exponent part shall appear;
385 return {_Fp{0}, 0, errc::invalid_argument};
386 }
387
388 __offset = __parsed_exponent.__offset;
389 __exponent = std::__merge_exponents(
390 fractional: __fractional.__exponent, exponent: __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
391 }
392
393 __from_chars_result<_Fp> __result{_Fp{0}, __offset, {}};
394 LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {0, 0};
395 if (__fractional.__mantissa != 0) {
396 // This function expects to parse a positive value. This means it does not
397 // take a __first, __n as arguments, since __first points to '-' for
398 // negative values.
399 auto __temp = LIBC_NAMESPACE::shared::decimal_exp_to_float<_Fp>(
400 {__fractional.__mantissa, __exponent},
401 __fractional.__truncated,
402 LIBC_NAMESPACE::shared::RoundDirection::Nearest,
403 __ptr,
404 __last - __ptr);
405 __expanded_float = __temp.num;
406 if (__temp.error == ERANGE) {
407 __result.__ec = errc::result_out_of_range;
408 }
409 }
410
411 return std::__calculate_result(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result);
412}
413
414template <floating_point _Fp>
415__from_chars_result<_Fp>
416__from_chars_floating_point_impl(const char* const __first, const char* __last, chars_format __fmt) {
417 if (__first == __last) [[unlikely]]
418 return {_Fp{0}, 0, errc::invalid_argument};
419
420 const char* __ptr = __first;
421 bool __negative = *__ptr == '-';
422 if (__negative) {
423 ++__ptr;
424 if (__ptr == __last) [[unlikely]]
425 return {_Fp{0}, 0, errc::invalid_argument};
426 }
427
428 // [charconv.from.chars]
429 // [Note 1: If the pattern allows for an optional sign, but the string has
430 // no digit characters following the sign, no characters match the pattern.
431 // -- end note]
432 // This is true for integrals, floating point allows -.0
433
434 // [charconv.from.chars]/6.2
435 // if fmt has chars_format::scientific set but not chars_format::fixed, the
436 // otherwise optional exponent part shall appear;
437 // Since INF/NAN do not have an exponent this value is not valid.
438 //
439 // LWG3456 Pattern used by std::from_chars is underspecified
440 // Does not address this point, but proposed option B does solve this issue,
441 // Both MSVC STL and libstdc++ implement this this behaviour.
442 switch (std::tolower(c: *__ptr)) {
443 case 'i':
444 return std::__from_chars_floating_point_inf<_Fp>(__first, __last, __ptr + 1, __negative);
445 case 'n':
446 if constexpr (numeric_limits<_Fp>::has_quiet_NaN)
447 // NOTE: The pointer passed here will be parsed in the default C locale.
448 // This is standard behavior (see https://eel.is/c++draft/charconv.from.chars), but may be unexpected.
449 return std::__from_chars_floating_point_nan<_Fp>(__first, __last, __ptr + 1, __negative);
450 return {_Fp{0}, 0, errc::invalid_argument};
451 }
452
453 if (__fmt == chars_format::hex)
454 return std::__from_chars_floating_point_hex<_Fp>(__first, __last, __ptr, __negative);
455
456 return std::__from_chars_floating_point_decimal<_Fp>(__first, __last, __fmt, __ptr, __negative);
457}
458
459_LIBCPP_END_NAMESPACE_STD
460
461#endif //_LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
462