1//===-- A class to store a normalized floating point number -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
10#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
11
12#include "FPBits.h"
13
14#include "hdr/stdint_proxy.h"
15#include "src/__support/CPP/type_traits.h"
16#include "src/__support/common.h"
17#include "src/__support/macros/config.h"
18
19namespace LIBC_NAMESPACE_DECL {
20namespace fputil {
21
22// A class which stores the normalized form of a floating point value.
23// The special IEEE-754 bits patterns of Zero, infinity and NaNs are
24// are not handled by this class.
25//
26// A normalized floating point number is of this form:
27// (-1)*sign * 2^exponent * <mantissa>
28// where <mantissa> is of the form 1.<...>.
29template <typename T> struct NormalFloat {
30 static_assert(
31 cpp::is_floating_point_v<T>,
32 "NormalFloat template parameter has to be a floating point type.");
33
34 using StorageType = typename FPBits<T>::StorageType;
35 static constexpr StorageType ONE =
36 (StorageType(1) << FPBits<T>::FRACTION_LEN);
37
38 // Unbiased exponent value.
39 int32_t exponent{};
40
41 StorageType mantissa{};
42 // We want |StorageType| to have atleast one bit more than the actual mantissa
43 // bit width to accommodate the implicit 1 value.
44 static_assert(sizeof(StorageType) * 8 >= FPBits<T>::FRACTION_LEN + 1,
45 "Bad type for mantissa in NormalFloat.");
46
47 Sign sign = Sign::POS;
48
49 LIBC_INLINE constexpr NormalFloat(Sign s, int32_t e, StorageType m)
50 : exponent(e), mantissa(m), sign(s) {
51 if (mantissa >= ONE)
52 return;
53
54 unsigned normalization_shift = evaluate_normalization_shift(m: mantissa);
55 mantissa <<= normalization_shift;
56 exponent -= normalization_shift;
57 }
58
59 LIBC_INLINE constexpr explicit NormalFloat(T x) {
60 init_from_bits(bits: FPBits<T>(x));
61 }
62
63 LIBC_INLINE constexpr explicit NormalFloat(FPBits<T> bits) {
64 init_from_bits(bits);
65 }
66
67 // Compares this normalized number with another normalized number.
68 // Returns -1 is this number is less than |other|, 0 if this number is equal
69 // to |other|, and 1 if this number is greater than |other|.
70 LIBC_INLINE constexpr int cmp(const NormalFloat<T> &other) const {
71 const int result = sign.is_neg() ? -1 : 1;
72 if (sign != other.sign)
73 return result;
74
75 if (exponent > other.exponent) {
76 return result;
77 } else if (exponent == other.exponent) {
78 if (mantissa > other.mantissa)
79 return result;
80 else if (mantissa == other.mantissa)
81 return 0;
82 else
83 return -result;
84 } else {
85 return -result;
86 }
87 }
88
89 // Returns a new normalized floating point number which is equal in value
90 // to this number multiplied by 2^e. That is:
91 // new = this * 2^e
92 LIBC_INLINE constexpr NormalFloat<T> mul2(int e) const {
93 NormalFloat<T> result = *this;
94 result.exponent += e;
95 return result;
96 }
97
98 LIBC_INLINE LIBC_BIT_CAST_CONSTEXPR operator T() const {
99 int biased_exponent = exponent + FPBits<T>::EXP_BIAS;
100 // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
101 constexpr int MAX_EXPONENT_VALUE = (1 << FPBits<T>::EXP_LEN) - 2;
102 if (biased_exponent > MAX_EXPONENT_VALUE) {
103 return FPBits<T>::inf(sign).get_val();
104 }
105
106 FPBits<T> result(T(0.0));
107 result.set_sign(sign);
108
109 constexpr int SUBNORMAL_EXPONENT = -FPBits<T>::EXP_BIAS + 1;
110 if (exponent < SUBNORMAL_EXPONENT) {
111 unsigned shift = static_cast<unsigned>(SUBNORMAL_EXPONENT - exponent);
112 // Since exponent > subnormalExponent, shift is strictly greater than
113 // zero.
114 if (shift <= FPBits<T>::FRACTION_LEN + 1) {
115 // Generate a subnormal number. Might lead to loss of precision.
116 // We round to nearest and round halfway cases to even.
117 const StorageType shift_out_mask =
118 static_cast<StorageType>(StorageType(1) << shift) - 1;
119 const StorageType shift_out_value = mantissa & shift_out_mask;
120 const StorageType halfway_value =
121 static_cast<StorageType>(StorageType(1) << (shift - 1));
122 result.set_biased_exponent(0);
123 result.set_mantissa(mantissa >> shift);
124 StorageType new_mantissa = result.get_mantissa();
125 if (shift_out_value > halfway_value) {
126 new_mantissa += 1;
127 } else if (shift_out_value == halfway_value) {
128 // Round to even.
129 if (result.get_mantissa() & 0x1)
130 new_mantissa += 1;
131 }
132 result.set_mantissa(new_mantissa);
133 // Adding 1 to mantissa can lead to overflow. This can only happen if
134 // mantissa was all ones (0b111..11). For such a case, we will carry
135 // the overflow into the exponent.
136 if (new_mantissa == ONE)
137 result.set_biased_exponent(1);
138 return result.get_val();
139 } else {
140 return result.get_val();
141 }
142 }
143
144 result.set_biased_exponent(
145 static_cast<StorageType>(exponent + FPBits<T>::EXP_BIAS));
146 result.set_mantissa(mantissa);
147 return result.get_val();
148 }
149
150private:
151 LIBC_INLINE constexpr void init_from_bits(FPBits<T> bits) {
152 sign = bits.sign();
153
154 if (bits.is_inf_or_nan() || bits.is_zero()) {
155 // Ignore special bit patterns. Implementations deal with them separately
156 // anyway so this should not be a problem.
157 exponent = 0;
158 mantissa = 0;
159 return;
160 }
161
162 // Normalize subnormal numbers.
163 if (bits.is_subnormal()) {
164 unsigned shift = evaluate_normalization_shift(m: bits.get_mantissa());
165 mantissa = static_cast<StorageType>(bits.get_mantissa() << shift);
166 exponent = 1 - FPBits<T>::EXP_BIAS - static_cast<int32_t>(shift);
167 } else {
168 exponent = bits.get_biased_exponent() - FPBits<T>::EXP_BIAS;
169 mantissa = ONE | bits.get_mantissa();
170 }
171 }
172
173 LIBC_INLINE constexpr unsigned evaluate_normalization_shift(StorageType m) {
174 unsigned shift = 0;
175 for (; (ONE & m) == 0 && (shift < FPBits<T>::FRACTION_LEN);
176 m <<= 1, ++shift)
177 ;
178 return shift;
179 }
180};
181
182#ifdef LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80
183template <>
184LIBC_INLINE constexpr void
185NormalFloat<long double>::init_from_bits(FPBits<long double> bits) {
186 sign = bits.sign();
187
188 if (bits.is_inf_or_nan() || bits.is_zero()) {
189 // Ignore special bit patterns. Implementations deal with them separately
190 // anyway so this should not be a problem.
191 exponent = 0;
192 mantissa = 0;
193 return;
194 }
195
196 if (bits.is_subnormal()) {
197 if (bits.get_implicit_bit() == 0) {
198 // Since we ignore zero value, the mantissa in this case is non-zero.
199 int normalization_shift =
200 evaluate_normalization_shift(m: bits.get_mantissa());
201 exponent = -16382 - normalization_shift;
202 mantissa = (bits.get_mantissa() << normalization_shift);
203 } else {
204 exponent = -16382;
205 mantissa = ONE | bits.get_mantissa();
206 }
207 } else {
208 if (bits.get_implicit_bit() == 0) {
209 // Invalid number so just store 0 similar to a NaN.
210 exponent = 0;
211 mantissa = 0;
212 } else {
213 exponent = bits.get_biased_exponent() - 16383;
214 mantissa = ONE | bits.get_mantissa();
215 }
216 }
217}
218
219template <>
220LIBC_INLINE LIBC_BIT_CAST_CONSTEXPR NormalFloat<long double>::
221operator long double() const {
222 using LDBits = FPBits<long double>;
223 int biased_exponent = exponent + LDBits::EXP_BIAS;
224 // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
225 constexpr int MAX_EXPONENT_VALUE = (1 << LDBits::EXP_LEN) - 2;
226 if (biased_exponent > MAX_EXPONENT_VALUE) {
227 return LDBits::inf(sign).get_val();
228 }
229
230 FPBits<long double> result(0.0l);
231 result.set_sign(sign);
232
233 constexpr int SUBNORMAL_EXPONENT = -LDBits::EXP_BIAS + 1;
234 if (exponent < SUBNORMAL_EXPONENT) {
235 unsigned shift = SUBNORMAL_EXPONENT - exponent;
236 if (shift <= LDBits::FRACTION_LEN + 1) {
237 // Generate a subnormal number. Might lead to loss of precision.
238 // We round to nearest and round halfway cases to even.
239 const StorageType shift_out_mask = (StorageType(1) << shift) - 1;
240 const StorageType shift_out_value = mantissa & shift_out_mask;
241 const StorageType halfway_value = StorageType(1) << (shift - 1);
242 result.set_biased_exponent(0);
243 result.set_mantissa(mantissa >> shift);
244 StorageType new_mantissa = result.get_mantissa();
245 if (shift_out_value > halfway_value) {
246 new_mantissa += 1;
247 } else if (shift_out_value == halfway_value) {
248 // Round to even.
249 if (result.get_mantissa() & 0x1)
250 new_mantissa += 1;
251 }
252 result.set_mantissa(new_mantissa);
253 // Adding 1 to mantissa can lead to overflow. This can only happen if
254 // mantissa was all ones (0b111..11). For such a case, we will carry
255 // the overflow into the exponent and set the implicit bit to 1.
256 if (new_mantissa == ONE) {
257 result.set_biased_exponent(1);
258 result.set_implicit_bit(1);
259 } else {
260 result.set_implicit_bit(0);
261 }
262 return result.get_val();
263 } else {
264 return result.get_val();
265 }
266 }
267
268 result.set_biased_exponent(biased_exponent);
269 result.set_mantissa(mantissa);
270 result.set_implicit_bit(1);
271 return result.get_val();
272}
273#endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80
274
275} // namespace fputil
276} // namespace LIBC_NAMESPACE_DECL
277
278#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
279