NormalFloat.h source code [llvm_projects/libc/src/__support/FPUtil/NormalFloat.h]

1	//===-- A class to store a normalized floating point number ------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
10	#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
11
12	#include "FPBits.h"
13
14	#include "hdr/stdint_proxy.h"
15	#include "src/__support/CPP/type_traits.h"
16	#include "src/__support/common.h"
17	#include "src/__support/macros/config.h"
18
19	namespace LIBC_NAMESPACE_DECL {
20	namespace fputil {
21
22	// A class which stores the normalized form of a floating point value.
23	// The special IEEE-754 bits patterns of Zero, infinity and NaNs are
24	// are not handled by this class.
25	//
26	// A normalized floating point number is of this form:
27	// (-1)sign * 2^exponent * <mantissa>*
28	// where <mantissa> is of the form 1.<...>.
29	template <typename T> struct NormalFloat {
30	static_assert(
31	cpp::is_floating_point_v<T>,
32	"NormalFloat template parameter has to be a floating point type.");
33
34	using StorageType = typename FPBits<T>::StorageType;
35	static constexpr StorageType ONE =
36	(StorageType(`1`) << FPBits<T>::FRACTION_LEN);
37
38	// Unbiased exponent value.
39	int32_t exponent{};
40
41	StorageType mantissa{};
42	// We want \|StorageType\| to have atleast one bit more than the actual mantissa
43	// bit width to accommodate the implicit 1 value.
44	static_assert(sizeof(StorageType) * `8` >= FPBits<T>::FRACTION_LEN + `1`,
45	"Bad type for mantissa in NormalFloat.");
46
47	Sign sign = Sign::POS;
48
49	LIBC_INLINE constexpr NormalFloat(Sign s, int32_t e, StorageType m)
50	: exponent(e), mantissa(m), sign (s) {
51	if (mantissa >= ONE)
52	return;
53
54	unsigned normalization_shift = evaluate_normalization_shift(m: mantissa);
55	mantissa <<= normalization_shift;
56	exponent -= normalization_shift;
57	}
58
59	LIBC_INLINE constexpr explicit NormalFloat(T x) {
60	init_from_bits(bits: FPBits<T>(x));
61	}
62
63	LIBC_INLINE constexpr explicit NormalFloat(FPBits<T> bits) {
64	init_from_bits(bits);
65	}
66
67	// Compares this normalized number with another normalized number.
68	// Returns -1 is this number is less than \|other\|, 0 if this number is equal
69	// to \|other\|, and 1 if this number is greater than \|other\|.
70	LIBC_INLINE constexpr int cmp(const NormalFloat<T> &other) const {
71	const int result = sign.is_neg() ? -`1` : `1`;
72	if (sign != other.sign)
73	return result;
74
75	if (exponent > other.exponent) {
76	return result;
77	} else if (exponent == other.exponent) {
78	if (mantissa > other.mantissa)
79	return result;
80	else if (mantissa == other.mantissa)
81	return `0`;
82	else
83	return -result;
84	} else {
85	return -result;
86	}
87	}
88
89	// Returns a new normalized floating point number which is equal in value
90	// to this number multiplied by 2^e. That is:
91	// new = this 2^e*
92	LIBC_INLINE constexpr NormalFloat<T> mul2(int e) const {
93	NormalFloat<T> result = *this;
94	result.exponent += e;
95	return result;
96	}
97
98	LIBC_INLINE LIBC_BIT_CAST_CONSTEXPR operator T() const {
99	int biased_exponent = exponent + FPBits<T>::EXP_BIAS;
100	// Max exponent is of the form 0xFF...E. That is why -2 and not -1.
101	constexpr int MAX_EXPONENT_VALUE = (`1` << FPBits<T>::EXP_LEN) - `2`;
102	if (biased_exponent > MAX_EXPONENT_VALUE) {
103	return FPBits<T>::inf(sign).get_val();
104	}
105
106	FPBits<T> result(T(`0.0`));
107	result.set_sign(sign);
108
109	constexpr int SUBNORMAL_EXPONENT = -FPBits<T>::EXP_BIAS + `1`;
110	if (exponent < SUBNORMAL_EXPONENT) {
111	unsigned shift = static_cast<unsigned>(SUBNORMAL_EXPONENT - exponent);
112	// Since exponent > subnormalExponent, shift is strictly greater than
113	// zero.
114	if (shift <= FPBits<T>::FRACTION_LEN + `1`) {
115	// Generate a subnormal number. Might lead to loss of precision.
116	// We round to nearest and round halfway cases to even.
117	const StorageType shift_out_mask =
118	static_cast<StorageType>(StorageType(`1`) << shift) - `1`;
119	const StorageType shift_out_value = mantissa & shift_out_mask;
120	const StorageType halfway_value =
121	static_cast<StorageType>(StorageType(`1`) << (shift - `1`));
122	result.set_biased_exponent(`0`);
123	result.set_mantissa(mantissa >> shift);
124	StorageType new_mantissa = result.get_mantissa();
125	if (shift_out_value > halfway_value) {
126	new_mantissa += `1`;
127	} else if (shift_out_value == halfway_value) {
128	// Round to even.
129	if (result.get_mantissa() & `0x1`)
130	new_mantissa += `1`;
131	}
132	result.set_mantissa(new_mantissa);
133	// Adding 1 to mantissa can lead to overflow. This can only happen if
134	// mantissa was all ones (0b111..11). For such a case, we will carry
135	// the overflow into the exponent.
136	if (new_mantissa == ONE)
137	result.set_biased_exponent(`1`);
138	return result.get_val();
139	} else {
140	return result.get_val();
141	}
142	}
143
144	result.set_biased_exponent(
145	static_cast<StorageType>(exponent + FPBits<T>::EXP_BIAS));
146	result.set_mantissa(mantissa);
147	return result.get_val();
148	}
149
150	private:
151	LIBC_INLINE constexpr void init_from_bits(FPBits<T> bits) {
152	sign = bits.sign();
153
154	if (bits.is_inf_or_nan() \|\| bits.is_zero()) {
155	// Ignore special bit patterns. Implementations deal with them separately
156	// anyway so this should not be a problem.
157	exponent = `0`;
158	mantissa = `0`;
159	return;
160	}
161
162	// Normalize subnormal numbers.
163	if (bits.is_subnormal()) {
164	unsigned shift = evaluate_normalization_shift(m: bits.get_mantissa());
165	mantissa = static_cast<StorageType>(bits.get_mantissa() << shift);
166	exponent = `1` - FPBits<T>::EXP_BIAS - static_cast<int32_t>(shift);
167	} else {
168	exponent = bits.get_biased_exponent() - FPBits<T>::EXP_BIAS;
169	mantissa = ONE \| bits.get_mantissa();
170	}
171	}
172
173	LIBC_INLINE constexpr unsigned evaluate_normalization_shift(StorageType m) {
174	unsigned shift = `0`;
175	for (; (ONE & m) == `0` && (shift < FPBits<T>::FRACTION_LEN);
176	m <<= `1`, ++shift)
177	;
178	return shift;
179	}
180	};
181
182	#ifdef LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80
183	template <>
184	LIBC_INLINE constexpr void
185	NormalFloat<long double>::init_from_bits(FPBits<long double> bits) {
186	sign = bits.sign();
187
188	if (bits.is_inf_or_nan() \|\| bits.is_zero()) {
189	// Ignore special bit patterns. Implementations deal with them separately
190	// anyway so this should not be a problem.
191	exponent = `0`;
192	mantissa = `0`;
193	return;
194	}
195
196	if (bits.is_subnormal()) {
197	if (bits.get_implicit_bit() == `0`) {
198	// Since we ignore zero value, the mantissa in this case is non-zero.
199	int normalization_shift =
200	evaluate_normalization_shift(m: bits.get_mantissa());
201	exponent = -`16382` - normalization_shift;
202	mantissa = (bits.get_mantissa() << normalization_shift);
203	} else {
204	exponent = -`16382`;
205	mantissa = ONE \| bits.get_mantissa();
206	}
207	} else {
208	if (bits.get_implicit_bit() == `0`) {
209	// Invalid number so just store 0 similar to a NaN.
210	exponent = `0`;
211	mantissa = `0`;
212	} else {
213	exponent = bits.get_biased_exponent() - `16383`;
214	mantissa = ONE \| bits.get_mantissa();
215	}
216	}
217	}
218
219	template <>
220	LIBC_INLINE LIBC_BIT_CAST_CONSTEXPR NormalFloat<long double>::
221	operator long double() const {
222	using LDBits = FPBits<long double>;
223	int biased_exponent = exponent + LDBits::EXP_BIAS;
224	// Max exponent is of the form 0xFF...E. That is why -2 and not -1.
225	constexpr int MAX_EXPONENT_VALUE = (`1` << LDBits::EXP_LEN) - `2`;
226	if (biased_exponent > MAX_EXPONENT_VALUE) {
227	return LDBits::inf(sign).get_val();
228	}
229
230	FPBits<long double> result(`0.0l`);
231	result.set_sign(sign);
232
233	constexpr int SUBNORMAL_EXPONENT = -LDBits::EXP_BIAS + `1`;
234	if (exponent < SUBNORMAL_EXPONENT) {
235	unsigned shift = SUBNORMAL_EXPONENT - exponent;
236	if (shift <= LDBits::FRACTION_LEN + `1`) {
237	// Generate a subnormal number. Might lead to loss of precision.
238	// We round to nearest and round halfway cases to even.
239	const StorageType shift_out_mask = (StorageType(`1`) << shift) - `1`;
240	const StorageType shift_out_value = mantissa & shift_out_mask;
241	const StorageType halfway_value = StorageType(`1`) << (shift - `1`);
242	result.set_biased_exponent(`0`);
243	result.set_mantissa(mantissa >> shift);
244	StorageType new_mantissa = result.get_mantissa();
245	if (shift_out_value > halfway_value) {
246	new_mantissa += `1`;
247	} else if (shift_out_value == halfway_value) {
248	// Round to even.
249	if (result.get_mantissa() & `0x1`)
250	new_mantissa += `1`;
251	}
252	result.set_mantissa(new_mantissa);
253	// Adding 1 to mantissa can lead to overflow. This can only happen if
254	// mantissa was all ones (0b111..11). For such a case, we will carry
255	// the overflow into the exponent and set the implicit bit to 1.
256	if (new_mantissa == ONE) {
257	result.set_biased_exponent(`1`);
258	result.set_implicit_bit(`1`);
259	} else {
260	result.set_implicit_bit(`0`);
261	}
262	return result.get_val();
263	} else {
264	return result.get_val();
265	}
266	}
267
268	result.set_biased_exponent(biased_exponent);
269	result.set_mantissa(mantissa);
270	result.set_implicit_bit(`1`);
271	return result.get_val();
272	}
273	#endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80
274
275	} // namespace fputil
276	} // namespace LIBC_NAMESPACE_DECL
277
278	#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
279

Browse the source code of llvm_projects/libc/src/__support/FPUtil/NormalFloat.h