from_chars_floating_point.h source code [llvm_projects/libcxx/src/include/from_chars_floating_point.h]

1	//===----------------------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
10	#define _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
11
12	#include <__assert>
13	#include <__config>
14	#include <cctype>
15	#include <charconv>
16	#include <concepts>
17	#include <limits>
18
19	// Make sure we use libc++'s assertion machinery within the shared code we use
20	// from LLVM libc.
21	#define LIBC_ASSERT(cond) _LIBCPP_ASSERT((cond), _LIBCPP_TOSTRING(cond))
22
23	// These headers are in the shared LLVM-libc header library.
24	#include "shared/fp_bits.h"
25	#include "shared/str_to_float.h"
26	#include "shared/str_to_integer.h"
27
28	// Included for the _Floating_type_traits class
29	#include "to_chars_floating_point.h"
30
31	_LIBCPP_BEGIN_NAMESPACE_STD
32
33	// Parses an infinity string.
34	// Valid strings are case insensitive and contain INF or INFINITY.
35	//
36	// - __first is the first argument to std::from_chars. When the string is invalid
37	// this value is returned as ptr in the result.
38	// - __last is the last argument of std::from_chars.
39	// - __value is the value argument of std::from_chars,
40	// - __ptr is the current position is the input string. This is points beyond
41	// the initial I character.
42	// - __negative whether a valid string represents -inf or +inf.
43	template <floating_point _Fp>
44	__from_chars_result<_Fp>
45	__from_chars_floating_point_inf(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
46	if (__last - __ptr < `2`) [[unlikely]]
47	return {_Fp{`0`}, `0`, errc::invalid_argument};
48
49	if (std::tolower(c: __ptr[`0`]) != `'n'` \|\| std::tolower(c: __ptr[`1`]) != `'f'`) [[unlikely]]
50	return {_Fp{`0`}, `0`, errc::invalid_argument};
51
52	__ptr += `2`;
53
54	// At this point the result is valid and contains INF.
55	// When the remaining part contains INITY this will be consumed. Otherwise
56	// only INF is consumed. For example INFINITZ will consume INF and ignore
57	// INITZ.
58
59	if (__last - __ptr >= `5` //
60	&& std::tolower(c: __ptr[`0`]) == `'i'` //
61	&& std::tolower(c: __ptr[`1`]) == `'n'` //
62	&& std::tolower(c: __ptr[`2`]) == `'i'` //
63	&& std::tolower(c: __ptr[`3`]) == `'t'` //
64	&& std::tolower(c: __ptr[`4`]) == `'y'`)
65	__ptr += `5`;
66
67	if constexpr (numeric_limits<_Fp>::has_infinity) {
68	if (__negative)
69	return {-std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}};
70
71	return {std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}};
72	} else {
73	return {_Fp{`0`}, __ptr - __first, errc::result_out_of_range};
74	}
75	}
76
77	// Parses a nan string.
78	// Valid strings are case insensitive and contain INF or INFINITY.
79	//
80	// - __first is the first argument to std::from_chars. When the string is invalid
81	// this value is returned as ptr in the result.
82	// - __last is the last argument of std::from_chars.
83	// - __value is the value argument of std::from_chars,
84	// - __ptr is the current position is the input string. This is points beyond
85	// the initial N character.
86	// - __negative whether a valid string represents -nan or +nan.
87	template <floating_point _Fp>
88	__from_chars_result<_Fp>
89	__from_chars_floating_point_nan(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
90	if (__last - __ptr < `2`) [[unlikely]]
91	return {_Fp{`0`}, `0`, errc::invalid_argument};
92
93	if (std::tolower(c: __ptr[`0`]) != `'a'` \|\| std::tolower(c: __ptr[`1`]) != `'n'`) [[unlikely]]
94	return {_Fp{`0`}, `0`, errc::invalid_argument};
95
96	__ptr += `2`;
97
98	// At this point the result is valid and contains NAN. When the remaining
99	// part contains ( n-char-sequence_opt ) this will be consumed. Otherwise
100	// only NAN is consumed. For example NAN(abcd will consume NAN and ignore
101	// (abcd.
102	if (__last - __ptr >= `2` && __ptr[`0`] == `'('`) {
103	size_t __offset = `1`;
104	do {
105	if (__ptr[__offset] == `')'`) {
106	__ptr += __offset + `1`;
107	break;
108	}
109	if (__ptr[__offset] != `'_'` && !std::isalnum(c: __ptr[__offset]))
110	break;
111	++__offset;
112	} while (__ptr + __offset != __last);
113	}
114
115	if (__negative)
116	return {-std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}};
117
118	return {std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}};
119	}
120
121	template <class _Tp>
122	struct __fractional_constant_result {
123	size_t __offset{size_t(-`1`)};
124	_Tp __mantissa{`0`};
125	int __exponent{`0`};
126	bool __truncated{false};
127	bool __is_valid{false};
128	};
129
130	// Parses the hex constant part of the hexadecimal floating-point value.
131	// - input start of buffer given to from_chars
132	// - __n the number of elements in the buffer
133	// - __offset where to start parsing. The input can have an optional sign, the
134	// offset starts after this sign.
135	template <class _Tp>
136	__fractional_constant_result<_Tp> __parse_fractional_hex_constant(const char* __input, size_t __n, size_t __offset) {
137	__fractional_constant_result<_Tp> __result;
138
139	const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / `16`;
140	bool __fraction = false;
141	for (; __offset < __n; ++__offset) {
142	if (std::isxdigit(c: __input[__offset])) {
143	__result.__is_valid = true;
144
145	uint32_t __digit = __input[__offset] - `'0'`;
146	switch (std::tolower(c: __input[__offset])) {
147	case `'a'`:
148	__digit = `10`;
149	break;
150	case `'b'`:
151	__digit = `11`;
152	break;
153	case `'c'`:
154	__digit = `12`;
155	break;
156	case `'d'`:
157	__digit = `13`;
158	break;
159	case `'e'`:
160	__digit = `14`;
161	break;
162	case `'f'`:
163	__digit = `15`;
164	break;
165	}
166
167	if (__result.__mantissa < __mantissa_truncate_threshold) {
168	__result.__mantissa = (__result.__mantissa * `16`) + __digit;
169	if (__fraction)
170	__result.__exponent -= `4`;
171	} else {
172	if (__digit > `0`)
173	__result.__truncated = true;
174	if (!__fraction)
175	__result.__exponent += `4`;
176	}
177	} else if (__input[__offset] == `'.'`) {
178	if (__fraction)
179	break; // this means that __input[__offset] points to a second decimal point, ending the number.
180
181	__fraction = true;
182	} else
183	break;
184	}
185
186	__result.__offset = __offset;
187	return __result;
188	}
189
190	struct __exponent_result {
191	size_t __offset{size_t(-`1`)};
192	int __value{`0`};
193	bool __present{false};
194	};
195
196	// When the exponent is not present the result of the struct contains
197	// __offset, 0, false. This allows using the results unconditionally, the
198	// __present is important for the scientific notation, where the value is
199	// mandatory.
200	static __exponent_result __parse_exponent(const char* __input, size_t __n, size_t __offset, char __marker) {
201	if (__offset + `1` < __n && // an exponent always needs at least one digit.
202	std::tolower(c: __input[__offset]) == __marker && //
203	!std::isspace(c: __input[__offset + `1`]) // leading whitespace is not allowed.
204	) {
205	++__offset;
206	LIBC_NAMESPACE::shared::StrToNumResult<int32_t> __e =
207	LIBC_NAMESPACE::shared::strtointeger<int32_t>(src: __input + __offset, base: `10`, src_len: __n - __offset);
208	// __result.error contains the errno value, 0 or ERANGE these are not interesting.
209	// If the number of characters parsed is 0 it means there was no number.
210	if (__e.parsed_len != `0`)
211	return {__offset + __e.parsed_len, __e.value, true};
212	else
213	--__offset; // the assumption of a valid exponent was not true, undo eating the exponent character.
214	}
215
216	return {__offset, `0`, false};
217	}
218
219	// Here we do this operation as int64 to avoid overflow.
220	static int32_t __merge_exponents(int64_t __fractional, int64_t __exponent, int __max_biased_exponent) {
221	int64_t __sum = __fractional + __exponent;
222
223	if (__sum > __max_biased_exponent)
224	return __max_biased_exponent;
225
226	if (__sum < -__max_biased_exponent)
227	return -__max_biased_exponent;
228
229	return __sum;
230	}
231
232	template <class _Fp, class _Tp>
233	__from_chars_result<_Fp>
234	__calculate_result(_Tp __mantissa, int __exponent, bool __negative, __from_chars_result<_Fp> __result) {
235	auto __r = LIBC_NAMESPACE::shared::FPBits<_Fp>();
236	__r.set_mantissa(__mantissa);
237	__r.set_biased_exponent(__exponent);
238
239	// C17 7.12.1/6
240	// The result underflows if the magnitude of the mathematical result is so
241	// small that the mathematical result cannot be represented, without
242	// extraordinary roundoff error, in an object of the specified type.237) If
243	// the result underflows, the function returns an implementation-defined
244	// value whose magnitude is no greater than the smallest normalized positive
245	// number in the specified type; if the integer expression math_errhandling
246	// & MATH_ERRNO is nonzero, whether errno acquires the value ERANGE is
247	// implementation-defined; if the integer expression math_errhandling &
248	// MATH_ERREXCEPT is nonzero, whether the "underflow" floating-point
249	// exception is raised is implementation-defined.
250	//
251	// LLVM-LIBC sets ERAGNE for subnormal values
252	//
253	// [charconv.from.chars]/1
254	// ... If the parsed value is not in the range representable by the type of
255	// value, value is unmodified and the member ec of the return value is
256	// equal to errc::result_out_of_range. ...
257	//
258	// Undo the ERANGE for subnormal values.
259	if (__result.__ec == errc::result_out_of_range && __r.is_subnormal() && !__r.is_zero())
260	__result.__ec = errc{};
261
262	if (__negative)
263	__result.__value = -__r.get_val();
264	else
265	__result.__value = __r.get_val();
266
267	return __result;
268	}
269
270	// Implements from_chars for decimal floating-point values.
271	// __first forwarded from from_chars
272	// __last forwarded from from_chars
273	// __value forwarded from from_chars
274	// __fmt forwarded from from_chars
275	// __ptr the start of the buffer to parse. This is after the optional sign character.
276	// __negative should __value be set to a negative value?
277	//
278	// This function and __from_chars_floating_point_decimal are similar. However
279	// the similar parts are all in helper functions. So the amount of code
280	// duplication is minimal.
281	template <floating_point _Fp>
282	__from_chars_result<_Fp>
283	__from_chars_floating_point_hex(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
284	size_t __n = __last - __first;
285	ptrdiff_t __offset = __ptr - __first;
286
287	auto __fractional =
288	std::__parse_fractional_hex_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset);
289	if (!__fractional.__is_valid)
290	return {_Fp{`0`}, `0`, errc::invalid_argument};
291
292	auto __parsed_exponent = std::__parse_exponent(input: __first, __n, offset: __fractional.__offset, marker: `'p'`);
293	__offset = __parsed_exponent.__offset;
294	int __exponent = std::__merge_exponents(
295	fractional: __fractional.__exponent, exponent: __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
296
297	__from_chars_result<_Fp> __result{_Fp{`0`}, __offset, {}};
298	LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {`0`, `0`};
299	if (__fractional.__mantissa != `0`) {
300	auto __temp = LIBC_NAMESPACE::shared::binary_exp_to_float<_Fp>(
301	{__fractional.__mantissa, __exponent},
302	__fractional.__truncated,
303	LIBC_NAMESPACE::shared::RoundDirection::Nearest);
304	__expanded_float = __temp.num;
305	if (__temp.error == ERANGE) {
306	__result.__ec = errc::result_out_of_range;
307	}
308	}
309
310	return std::__calculate_result<_Fp>(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result);
311	}
312
313	// Parses the hex constant part of the decimal float value.
314	// - input start of buffer given to from_chars
315	// - __n the number of elements in the buffer
316	// - __offset where to start parsing. The input can have an optional sign, the
317	// offset starts after this sign.
318	template <class _Tp>
319	__fractional_constant_result<_Tp>
320	__parse_fractional_decimal_constant(const char* __input, ptrdiff_t __n, ptrdiff_t __offset) {
321	__fractional_constant_result<_Tp> __result;
322
323	const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / `10`;
324	bool __fraction = false;
325	for (; __offset < __n; ++__offset) {
326	if (std::isdigit(c: __input[__offset])) {
327	__result.__is_valid = true;
328
329	uint32_t __digit = __input[__offset] - `'0'`;
330	if (__result.__mantissa < __mantissa_truncate_threshold) {
331	__result.__mantissa = (__result.__mantissa * `10`) + __digit;
332	if (__fraction)
333	--__result.__exponent;
334	} else {
335	if (__digit > `0`)
336	__result.__truncated = true;
337	if (!__fraction)
338	++__result.__exponent;
339	}
340	} else if (__input[__offset] == `'.'`) {
341	if (__fraction)
342	break; // this means that __input[__offset] points to a second decimal point, ending the number.
343
344	__fraction = true;
345	} else
346	break;
347	}
348
349	__result.__offset = __offset;
350	return __result;
351	}
352
353	// Implements from_chars for decimal floating-point values.
354	// __first forwarded from from_chars
355	// __last forwarded from from_chars
356	// __value forwarded from from_chars
357	// __fmt forwarded from from_chars
358	// __ptr the start of the buffer to parse. This is after the optional sign character.
359	// __negative should __value be set to a negative value?
360	template <floating_point _Fp>
361	__from_chars_result<_Fp> __from_chars_floating_point_decimal(
362	const char* const __first, const char* __last, chars_format __fmt, const char* __ptr, bool __negative) {
363	ptrdiff_t __n = __last - __first;
364	ptrdiff_t __offset = __ptr - __first;
365
366	auto __fractional =
367	std::__parse_fractional_decimal_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset);
368	if (!__fractional.__is_valid)
369	return {_Fp{`0`}, `0`, errc::invalid_argument};
370
371	__offset = __fractional.__offset;
372
373	// LWG3456 Pattern used by std::from_chars is underspecified
374	// This changes fixed to ignore a possible exponent instead of making its
375	// existance an error.
376	int __exponent;
377	if (__fmt == chars_format::fixed) {
378	__exponent =
379	std::__merge_exponents(fractional: __fractional.__exponent, exponent: `0`, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
380	} else {
381	auto __parsed_exponent = std::__parse_exponent(input: __first, __n, __offset, marker: `'e'`);
382	if (__fmt == chars_format::scientific && !__parsed_exponent.__present) {
383	// [charconv.from.chars]/6.2 if fmt has chars_format::scientific set but not chars_format::fixed,
384	// the otherwise optional exponent part shall appear;
385	return {_Fp{`0`}, `0`, errc::invalid_argument};
386	}
387
388	__offset = __parsed_exponent.__offset;
389	__exponent = std::__merge_exponents(
390	fractional: __fractional.__exponent, exponent: __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
391	}
392
393	__from_chars_result<_Fp> __result{_Fp{`0`}, __offset, {}};
394	LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {`0`, `0`};
395	if (__fractional.__mantissa != `0`) {
396	// This function expects to parse a positive value. This means it does not
397	// take a __first, __n as arguments, since __first points to '-' for
398	// negative values.
399	auto __temp = LIBC_NAMESPACE::shared::decimal_exp_to_float<_Fp>(
400	{__fractional.__mantissa, __exponent},
401	__fractional.__truncated,
402	LIBC_NAMESPACE::shared::RoundDirection::Nearest,
403	__ptr,
404	__last - __ptr);
405	__expanded_float = __temp.num;
406	if (__temp.error == ERANGE) {
407	__result.__ec = errc::result_out_of_range;
408	}
409	}
410
411	return std::__calculate_result(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result);
412	}
413
414	template <floating_point _Fp>
415	__from_chars_result<_Fp>
416	__from_chars_floating_point_impl(const char* const __first, const char* __last, chars_format __fmt) {
417	if (__first == __last) [[unlikely]]
418	return {_Fp{`0`}, `0`, errc::invalid_argument};
419
420	const char* __ptr = __first;
421	bool __negative = *__ptr == `'-'`;
422	if (__negative) {
423	++__ptr;
424	if (__ptr == __last) [[unlikely]]
425	return {_Fp{`0`}, `0`, errc::invalid_argument};
426	}
427
428	// [charconv.from.chars]
429	// [Note 1: If the pattern allows for an optional sign, but the string has
430	// no digit characters following the sign, no characters match the pattern.
431	// -- end note]
432	// This is true for integrals, floating point allows -.0
433
434	// [charconv.from.chars]/6.2
435	// if fmt has chars_format::scientific set but not chars_format::fixed, the
436	// otherwise optional exponent part shall appear;
437	// Since INF/NAN do not have an exponent this value is not valid.
438	//
439	// LWG3456 Pattern used by std::from_chars is underspecified
440	// Does not address this point, but proposed option B does solve this issue,
441	// Both MSVC STL and libstdc++ implement this this behaviour.
442	switch (std::tolower(c: *__ptr)) {
443	case `'i'`:
444	return std::__from_chars_floating_point_inf<_Fp>(__first, __last, __ptr + `1`, __negative);
445	case `'n'`:
446	if constexpr (numeric_limits<_Fp>::has_quiet_NaN)
447	// NOTE: The pointer passed here will be parsed in the default C locale.
448	// This is standard behavior (see https://eel.is/c++draft/charconv.from.chars), but may be unexpected.
449	return std::__from_chars_floating_point_nan<_Fp>(__first, __last, __ptr + `1`, __negative);
450	return {_Fp{`0`}, `0`, errc::invalid_argument};
451	}
452
453	if (__fmt == chars_format::hex)
454	return std::__from_chars_floating_point_hex<_Fp>(__first, __last, __ptr, __negative);
455
456	return std::__from_chars_floating_point_decimal<_Fp>(__first, __last, __fmt, __ptr, __negative);
457	}
458
459	_LIBCPP_END_NAMESPACE_STD
460
461	#endif //_LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
462

Browse the source code of llvm_projects/libcxx/src/include/from_chars_floating_point.h