from_chars_floating_point.h source code [llvm_projects/libcxx/src/include/from_chars_floating_point.h]

1	//===----------------------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
10	#define _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
11
12	// These headers are in the shared LLVM-libc header library.
13	#include "shared/fp_bits.h"
14	#include "shared/str_to_float.h"
15	#include "shared/str_to_integer.h"
16
17	#include <__assert>
18	#include <__config>
19	#include <cctype>
20	#include <charconv>
21	#include <concepts>
22	#include <limits>
23
24	// Included for the _Floating_type_traits class
25	#include "to_chars_floating_point.h"
26
27	_LIBCPP_BEGIN_NAMESPACE_STD
28
29	// Parses an infinity string.
30	// Valid strings are case insensitive and contain INF or INFINITY.
31	//
32	// - __first is the first argument to std::from_chars. When the string is invalid
33	// this value is returned as ptr in the result.
34	// - __last is the last argument of std::from_chars.
35	// - __value is the value argument of std::from_chars,
36	// - __ptr is the current position is the input string. This is points beyond
37	// the initial I character.
38	// - __negative whether a valid string represents -inf or +inf.
39	template <floating_point _Fp>
40	__from_chars_result<_Fp>
41	__from_chars_floating_point_inf(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
42	if (__last - __ptr < `2`) [[unlikely]]
43	return {_Fp{`0`}, `0`, errc::invalid_argument};
44
45	if (std::tolower(c: __ptr[`0`]) != `'n'` \|\| std::tolower(c: __ptr[`1`]) != `'f'`) [[unlikely]]
46	return {_Fp{`0`}, `0`, errc::invalid_argument};
47
48	__ptr += `2`;
49
50	// At this point the result is valid and contains INF.
51	// When the remaining part contains INITY this will be consumed. Otherwise
52	// only INF is consumed. For example INFINITZ will consume INF and ignore
53	// INITZ.
54
55	if (__last - __ptr >= `5` //
56	&& std::tolower(c: __ptr[`0`]) == `'i'` //
57	&& std::tolower(c: __ptr[`1`]) == `'n'` //
58	&& std::tolower(c: __ptr[`2`]) == `'i'` //
59	&& std::tolower(c: __ptr[`3`]) == `'t'` //
60	&& std::tolower(c: __ptr[`4`]) == `'y'`)
61	__ptr += `5`;
62
63	if constexpr (numeric_limits<_Fp>::has_infinity) {
64	if (__negative)
65	return {-std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}};
66
67	return {std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}};
68	} else {
69	return {_Fp{`0`}, __ptr - __first, errc::result_out_of_range};
70	}
71	}
72
73	// Parses a nan string.
74	// Valid strings are case insensitive and contain INF or INFINITY.
75	//
76	// - __first is the first argument to std::from_chars. When the string is invalid
77	// this value is returned as ptr in the result.
78	// - __last is the last argument of std::from_chars.
79	// - __value is the value argument of std::from_chars,
80	// - __ptr is the current position is the input string. This is points beyond
81	// the initial N character.
82	// - __negative whether a valid string represents -nan or +nan.
83	template <floating_point _Fp>
84	__from_chars_result<_Fp>
85	__from_chars_floating_point_nan(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
86	if (__last - __ptr < `2`) [[unlikely]]
87	return {_Fp{`0`}, `0`, errc::invalid_argument};
88
89	if (std::tolower(c: __ptr[`0`]) != `'a'` \|\| std::tolower(c: __ptr[`1`]) != `'n'`) [[unlikely]]
90	return {_Fp{`0`}, `0`, errc::invalid_argument};
91
92	__ptr += `2`;
93
94	// At this point the result is valid and contains NAN. When the remaining
95	// part contains ( n-char-sequence_opt ) this will be consumed. Otherwise
96	// only NAN is consumed. For example NAN(abcd will consume NAN and ignore
97	// (abcd.
98	if (__last - __ptr >= `2` && __ptr[`0`] == `'('`) {
99	size_t __offset = `1`;
100	do {
101	if (__ptr[__offset] == `')'`) {
102	__ptr += __offset + `1`;
103	break;
104	}
105	if (__ptr[__offset] != `'_'` && !std::isalnum(c: __ptr[__offset]))
106	break;
107	++__offset;
108	} while (__ptr + __offset != __last);
109	}
110
111	if (__negative)
112	return {-std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}};
113
114	return {std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}};
115	}
116
117	template <class _Tp>
118	struct __fractional_constant_result {
119	size_t __offset{size_t(-`1`)};
120	_Tp __mantissa{`0`};
121	int __exponent{`0`};
122	bool __truncated{false};
123	bool __is_valid{false};
124	};
125
126	// Parses the hex constant part of the hexadecimal floating-point value.
127	// - input start of buffer given to from_chars
128	// - __n the number of elements in the buffer
129	// - __offset where to start parsing. The input can have an optional sign, the
130	// offset starts after this sign.
131	template <class _Tp>
132	__fractional_constant_result<_Tp> __parse_fractional_hex_constant(const char* __input, size_t __n, size_t __offset) {
133	__fractional_constant_result<_Tp> __result;
134
135	const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / `16`;
136	bool __fraction = false;
137	for (; __offset < __n; ++__offset) {
138	if (std::isxdigit(c: __input[__offset])) {
139	__result.__is_valid = true;
140
141	uint32_t __digit = __input[__offset] - `'0'`;
142	switch (std::tolower(c: __input[__offset])) {
143	case `'a'`:
144	__digit = `10`;
145	break;
146	case `'b'`:
147	__digit = `11`;
148	break;
149	case `'c'`:
150	__digit = `12`;
151	break;
152	case `'d'`:
153	__digit = `13`;
154	break;
155	case `'e'`:
156	__digit = `14`;
157	break;
158	case `'f'`:
159	__digit = `15`;
160	break;
161	}
162
163	if (__result.__mantissa < __mantissa_truncate_threshold) {
164	__result.__mantissa = (__result.__mantissa * `16`) + __digit;
165	if (__fraction)
166	__result.__exponent -= `4`;
167	} else {
168	if (__digit > `0`)
169	__result.__truncated = true;
170	if (!__fraction)
171	__result.__exponent += `4`;
172	}
173	} else if (__input[__offset] == `'.'`) {
174	if (__fraction)
175	break; // this means that __input[__offset] points to a second decimal point, ending the number.
176
177	__fraction = true;
178	} else
179	break;
180	}
181
182	__result.__offset = __offset;
183	return __result;
184	}
185
186	struct __exponent_result {
187	size_t __offset{size_t(-`1`)};
188	int __value{`0`};
189	bool __present{false};
190	};
191
192	// When the exponent is not present the result of the struct contains
193	// __offset, 0, false. This allows using the results unconditionally, the
194	// __present is important for the scientific notation, where the value is
195	// mandatory.
196	__exponent_result __parse_exponent(const char* __input, size_t __n, size_t __offset, char __marker) {
197	if (__offset + `1` < __n && // an exponent always needs at least one digit.
198	std::tolower(c: __input[__offset]) == __marker && //
199	!std::isspace(c: __input[__offset + `1`]) // leading whitespace is not allowed.
200	) {
201	++__offset;
202	LIBC_NAMESPACE::shared::StrToNumResult<int32_t> __e =
203	LIBC_NAMESPACE::shared::strtointeger<int32_t>(src: __input + __offset, base: `10`, src_len: __n - __offset);
204	// __result.error contains the errno value, 0 or ERANGE these are not interesting.
205	// If the number of characters parsed is 0 it means there was no number.
206	if (__e.parsed_len != `0`)
207	return {__offset + __e.parsed_len, __e.value, true};
208	else
209	--__offset; // the assumption of a valid exponent was not true, undo eating the exponent character.
210	}
211
212	return {__offset, `0`, false};
213	}
214
215	// Here we do this operation as int64 to avoid overflow.
216	int32_t __merge_exponents(int64_t __fractional, int64_t __exponent, int __max_biased_exponent) {
217	int64_t __sum = __fractional + __exponent;
218
219	if (__sum > __max_biased_exponent)
220	return __max_biased_exponent;
221
222	if (__sum < -__max_biased_exponent)
223	return -__max_biased_exponent;
224
225	return __sum;
226	}
227
228	template <class _Fp, class _Tp>
229	__from_chars_result<_Fp>
230	__calculate_result(_Tp __mantissa, int __exponent, bool __negative, __from_chars_result<_Fp> __result) {
231	auto __r = LIBC_NAMESPACE::shared::FPBits<_Fp>();
232	__r.set_mantissa(__mantissa);
233	__r.set_biased_exponent(__exponent);
234
235	// C17 7.12.1/6
236	// The result underflows if the magnitude of the mathematical result is so
237	// small that the mathematical result cannot be represented, without
238	// extraordinary roundoff error, in an object of the specified type.237) If
239	// the result underflows, the function returns an implementation-defined
240	// value whose magnitude is no greater than the smallest normalized positive
241	// number in the specified type; if the integer expression math_errhandling
242	// & MATH_ERRNO is nonzero, whether errno acquires the value ERANGE is
243	// implementation-defined; if the integer expression math_errhandling &
244	// MATH_ERREXCEPT is nonzero, whether the "underflow" floating-point
245	// exception is raised is implementation-defined.
246	//
247	// LLVM-LIBC sets ERAGNE for subnormal values
248	//
249	// [charconv.from.chars]/1
250	// ... If the parsed value is not in the range representable by the type of
251	// value, value is unmodified and the member ec of the return value is
252	// equal to errc::result_out_of_range. ...
253	//
254	// Undo the ERANGE for subnormal values.
255	if (__result.__ec == errc::result_out_of_range && __r.is_subnormal() && !__r.is_zero())
256	__result.__ec = errc{};
257
258	if (__negative)
259	__result.__value = -__r.get_val();
260	else
261	__result.__value = __r.get_val();
262
263	return __result;
264	}
265
266	// Implements from_chars for decimal floating-point values.
267	// __first forwarded from from_chars
268	// __last forwarded from from_chars
269	// __value forwarded from from_chars
270	// __fmt forwarded from from_chars
271	// __ptr the start of the buffer to parse. This is after the optional sign character.
272	// __negative should __value be set to a negative value?
273	//
274	// This function and __from_chars_floating_point_decimal are similar. However
275	// the similar parts are all in helper functions. So the amount of code
276	// duplication is minimal.
277	template <floating_point _Fp>
278	__from_chars_result<_Fp>
279	__from_chars_floating_point_hex(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
280	size_t __n = __last - __first;
281	ptrdiff_t __offset = __ptr - __first;
282
283	auto __fractional =
284	std::__parse_fractional_hex_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset);
285	if (!__fractional.__is_valid)
286	return {_Fp{`0`}, `0`, errc::invalid_argument};
287
288	auto __parsed_exponent = std::__parse_exponent(input: __first, __n, offset: __fractional.__offset, marker: `'p'`);
289	__offset = __parsed_exponent.__offset;
290	int __exponent = std::__merge_exponents(
291	fractional: __fractional.__exponent, exponent: __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
292
293	__from_chars_result<_Fp> __result{_Fp{`0`}, __offset, {}};
294	LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {`0`, `0`};
295	if (__fractional.__mantissa != `0`) {
296	auto __temp = LIBC_NAMESPACE::shared::binary_exp_to_float<_Fp>(
297	{__fractional.__mantissa, __exponent},
298	__fractional.__truncated,
299	LIBC_NAMESPACE::shared::RoundDirection::Nearest);
300	__expanded_float = __temp.num;
301	if (__temp.error == ERANGE) {
302	__result.__ec = errc::result_out_of_range;
303	}
304	}
305
306	return std::__calculate_result<_Fp>(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result);
307	}
308
309	// Parses the hex constant part of the decimal float value.
310	// - input start of buffer given to from_chars
311	// - __n the number of elements in the buffer
312	// - __offset where to start parsing. The input can have an optional sign, the
313	// offset starts after this sign.
314	template <class _Tp>
315	__fractional_constant_result<_Tp>
316	__parse_fractional_decimal_constant(const char* __input, ptrdiff_t __n, ptrdiff_t __offset) {
317	__fractional_constant_result<_Tp> __result;
318
319	const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / `10`;
320	bool __fraction = false;
321	for (; __offset < __n; ++__offset) {
322	if (std::isdigit(c: __input[__offset])) {
323	__result.__is_valid = true;
324
325	uint32_t __digit = __input[__offset] - `'0'`;
326	if (__result.__mantissa < __mantissa_truncate_threshold) {
327	__result.__mantissa = (__result.__mantissa * `10`) + __digit;
328	if (__fraction)
329	--__result.__exponent;
330	} else {
331	if (__digit > `0`)
332	__result.__truncated = true;
333	if (!__fraction)
334	++__result.__exponent;
335	}
336	} else if (__input[__offset] == `'.'`) {
337	if (__fraction)
338	break; // this means that __input[__offset] points to a second decimal point, ending the number.
339
340	__fraction = true;
341	} else
342	break;
343	}
344
345	__result.__offset = __offset;
346	return __result;
347	}
348
349	// Implements from_chars for decimal floating-point values.
350	// __first forwarded from from_chars
351	// __last forwarded from from_chars
352	// __value forwarded from from_chars
353	// __fmt forwarded from from_chars
354	// __ptr the start of the buffer to parse. This is after the optional sign character.
355	// __negative should __value be set to a negative value?
356	template <floating_point _Fp>
357	__from_chars_result<_Fp> __from_chars_floating_point_decimal(
358	const char* const __first, const char* __last, chars_format __fmt, const char* __ptr, bool __negative) {
359	ptrdiff_t __n = __last - __first;
360	ptrdiff_t __offset = __ptr - __first;
361
362	auto __fractional =
363	std::__parse_fractional_decimal_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset);
364	if (!__fractional.__is_valid)
365	return {_Fp{`0`}, `0`, errc::invalid_argument};
366
367	__offset = __fractional.__offset;
368
369	// LWG3456 Pattern used by std::from_chars is underspecified
370	// This changes fixed to ignore a possible exponent instead of making its
371	// existance an error.
372	int __exponent;
373	if (__fmt == chars_format::fixed) {
374	__exponent =
375	std::__merge_exponents(fractional: __fractional.__exponent, exponent: `0`, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
376	} else {
377	auto __parsed_exponent = std::__parse_exponent(input: __first, __n, __offset, marker: `'e'`);
378	if (__fmt == chars_format::scientific && !__parsed_exponent.__present) {
379	// [charconv.from.chars]/6.2 if fmt has chars_format::scientific set but not chars_format::fixed,
380	// the otherwise optional exponent part shall appear;
381	return {_Fp{`0`}, `0`, errc::invalid_argument};
382	}
383
384	__offset = __parsed_exponent.__offset;
385	__exponent = std::__merge_exponents(
386	fractional: __fractional.__exponent, exponent: __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
387	}
388
389	__from_chars_result<_Fp> __result{_Fp{`0`}, __offset, {}};
390	LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {`0`, `0`};
391	if (__fractional.__mantissa != `0`) {
392	// This function expects to parse a positive value. This means it does not
393	// take a __first, __n as arguments, since __first points to '-' for
394	// negative values.
395	auto __temp = LIBC_NAMESPACE::shared::decimal_exp_to_float<_Fp>(
396	{__fractional.__mantissa, __exponent},
397	__fractional.__truncated,
398	LIBC_NAMESPACE::shared::RoundDirection::Nearest,
399	__ptr,
400	__last - __ptr);
401	__expanded_float = __temp.num;
402	if (__temp.error == ERANGE) {
403	__result.__ec = errc::result_out_of_range;
404	}
405	}
406
407	return std::__calculate_result(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result);
408	}
409
410	template <floating_point _Fp>
411	__from_chars_result<_Fp>
412	__from_chars_floating_point_impl(const char* const __first, const char* __last, chars_format __fmt) {
413	if (__first == __last) [[unlikely]]
414	return {_Fp{`0`}, `0`, errc::invalid_argument};
415
416	const char* __ptr = __first;
417	bool __negative = *__ptr == `'-'`;
418	if (__negative) {
419	++__ptr;
420	if (__ptr == __last) [[unlikely]]
421	return {_Fp{`0`}, `0`, errc::invalid_argument};
422	}
423
424	// [charconv.from.chars]
425	// [Note 1: If the pattern allows for an optional sign, but the string has
426	// no digit characters following the sign, no characters match the pattern.
427	// -- end note]
428	// This is true for integrals, floating point allows -.0
429
430	// [charconv.from.chars]/6.2
431	// if fmt has chars_format::scientific set but not chars_format::fixed, the
432	// otherwise optional exponent part shall appear;
433	// Since INF/NAN do not have an exponent this value is not valid.
434	//
435	// LWG3456 Pattern used by std::from_chars is underspecified
436	// Does not address this point, but proposed option B does solve this issue,
437	// Both MSVC STL and libstdc++ implement this this behaviour.
438	switch (std::tolower(c: *__ptr)) {
439	case `'i'`:
440	return std::__from_chars_floating_point_inf<_Fp>(__first, __last, __ptr + `1`, __negative);
441	case `'n'`:
442	if constexpr (numeric_limits<_Fp>::has_quiet_NaN)
443	// NOTE: The pointer passed here will be parsed in the default C locale.
444	// This is standard behavior (see https://eel.is/c++draft/charconv.from.chars), but may be unexpected.
445	return std::__from_chars_floating_point_nan<_Fp>(__first, __last, __ptr + `1`, __negative);
446	return {_Fp{`0`}, `0`, errc::invalid_argument};
447	}
448
449	if (__fmt == chars_format::hex)
450	return std::__from_chars_floating_point_hex<_Fp>(__first, __last, __ptr, __negative);
451
452	return std::__from_chars_floating_point_decimal<_Fp>(__first, __last, __fmt, __ptr, __negative);
453	}
454
455	_LIBCPP_END_NAMESPACE_STD
456
457	#endif //_LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
458

Browse the source code of llvm_projects/libcxx/src/include/from_chars_floating_point.h