FPBits.h source code [llvm_projects/libc/src/__support/FPUtil/FPBits.h]

1	//===-- Abstract class for bit manipulation of float numbers. ---- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	// -----------------------------------------------------------------------------
10	// ** WARNING **
11	// This file is shared with libc++. You should also be careful when adding
12	// dependencies to this file, since it needs to build for all libc++ targets.
13	// -----------------------------------------------------------------------------
14
15	#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H
16	#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H
17
18	#include "hdr/stdint_proxy.h"
19	#include "src/__support/CPP/bit.h"
20	#include "src/__support/CPP/type_traits.h"
21	#include "src/__support/common.h"
22	#include "src/__support/libc_assert.h" // LIBC_ASSERT
23	#include "src/__support/macros/attributes.h" // LIBC_INLINE, LIBC_INLINE_VAR
24	#include "src/__support/macros/config.h"
25	#include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_FLOAT128
26	#include "src/__support/math_extras.h" // mask_trailing_ones
27	#include "src/__support/sign.h" // Sign
28	#include "src/__support/uint128.h"
29
30	namespace LIBC_NAMESPACE_DECL {
31	namespace fputil {
32
33	// The supported floating point types.
34	enum class FPType {
35	IEEE754_Binary16,
36	IEEE754_Binary32,
37	IEEE754_Binary64,
38	IEEE754_Binary128,
39	X86_Binary80,
40	BFloat16
41	};
42
43	// The classes hierarchy is as follows:
44	//
45	// ┌───────────────────┐
46	// │ FPLayout<FPType> │
47	// └─────────▲─────────┘
48	// │
49	// ┌─────────┴─────────┐
50	// │ FPStorage<FPType> │
51	// └─────────▲─────────┘
52	// │
53	// ┌────────────┴─────────────┐
54	// │ │
55	// ┌────────┴─────────┐ ┌──────────────┴──────────────────┐
56	// │ FPRepSem<FPType> │ │ FPRepSem<FPType::X86_Binary80 │
57	// └────────▲─────────┘ └──────────────▲──────────────────┘
58	// │ │
59	// └────────────┬─────────────┘
60	// │
61	// ┌───────┴───────┐
62	// │ FPRepImpl<T> │
63	// └───────▲───────┘
64	// │
65	// ┌────────┴────────┐
66	// ┌─────┴─────┐ ┌─────┴─────┐
67	// │ FPRep<T> │ │ FPBits<T> │
68	// └───────────┘ └───────────┘
69	//
70	// - 'FPLayout' defines only a few constants, namely the 'StorageType' and
71	// length of the sign, the exponent, fraction and significand parts.
72	// - 'FPStorage' builds more constants on top of those from 'FPLayout' like
73	// exponent bias and masks. It also holds the bit representation of the
74	// floating point as a 'StorageType' type and defines tools to assemble or
75	// test these parts.
76	// - 'FPRepSem' defines functions to interact semantically with the floating
77	// point representation. The default implementation is the one for 'IEEE754',
78	// a specialization is provided for X86 Extended Precision.
79	// - 'FPRepImpl' derives from 'FPRepSem' and adds functions that are common to
80	// all implementations or build on the ones in 'FPRepSem'.
81	// - 'FPRep' exposes all functions from 'FPRepImpl' and returns 'FPRep'
82	// instances when using Builders (static functions to create values).
83	// - 'FPBits' exposes all the functions from 'FPRepImpl' but operates on the
84	// native C++ floating point type instead of 'FPType'. An additional 'get_val'
85	// function allows getting the C++ floating point type value back. Builders
86	// called from 'FPBits' return 'FPBits' instances.
87
88	namespace internal {
89
90	// Defines the layout (sign, exponent, significand) of a floating point type in
91	// memory. It also defines its associated StorageType, i.e., the unsigned
92	// integer type used to manipulate its representation.
93	// Additionally we provide the fractional part length, i.e., the number of bits
94	// after the decimal dot when the number is in normal form.
95	template <FPType> struct FPLayout {};
96
97	template <> struct FPLayout<FPType::IEEE754_Binary16> {
98	using StorageType = uint16_t;
99	LIBC_INLINE_VAR static constexpr int SIGN_LEN = `1`;
100	LIBC_INLINE_VAR static constexpr int EXP_LEN = `5`;
101	LIBC_INLINE_VAR static constexpr int SIG_LEN = `10`;
102	LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN;
103	};
104
105	template <> struct FPLayout<FPType::IEEE754_Binary32> {
106	using StorageType = uint32_t;
107	LIBC_INLINE_VAR static constexpr int SIGN_LEN = `1`;
108	LIBC_INLINE_VAR static constexpr int EXP_LEN = `8`;
109	LIBC_INLINE_VAR static constexpr int SIG_LEN = `23`;
110	LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN;
111	};
112
113	template <> struct FPLayout<FPType::IEEE754_Binary64> {
114	using StorageType = uint64_t;
115	LIBC_INLINE_VAR static constexpr int SIGN_LEN = `1`;
116	LIBC_INLINE_VAR static constexpr int EXP_LEN = `11`;
117	LIBC_INLINE_VAR static constexpr int SIG_LEN = `52`;
118	LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN;
119	};
120
121	template <> struct FPLayout<FPType::IEEE754_Binary128> {
122	using StorageType = UInt128;
123	LIBC_INLINE_VAR static constexpr int SIGN_LEN = `1`;
124	LIBC_INLINE_VAR static constexpr int EXP_LEN = `15`;
125	LIBC_INLINE_VAR static constexpr int SIG_LEN = `112`;
126	LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN;
127	};
128
129	template <> struct FPLayout<FPType::X86_Binary80> {
130	#if __SIZEOF_LONG_DOUBLE__ == 12
131	using StorageType = UInt<__SIZEOF_LONG_DOUBLE__ * CHAR_BIT>;
132	#else
133	using StorageType = UInt128;
134	#endif
135	LIBC_INLINE_VAR static constexpr int SIGN_LEN = `1`;
136	LIBC_INLINE_VAR static constexpr int EXP_LEN = `15`;
137	LIBC_INLINE_VAR static constexpr int SIG_LEN = `64`;
138	LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN - `1`;
139	};
140
141	template <> struct FPLayout<FPType::BFloat16> {
142	using StorageType = uint16_t;
143	LIBC_INLINE_VAR static constexpr int SIGN_LEN = `1`;
144	LIBC_INLINE_VAR static constexpr int EXP_LEN = `8`;
145	LIBC_INLINE_VAR static constexpr int SIG_LEN = `7`;
146	LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN;
147	};
148
149	// FPStorage derives useful constants from the FPLayout above.
150	template <FPType fp_type> struct FPStorage : public FPLayout<fp_type> {
151	using UP = FPLayout<fp_type>;
152
153	using UP::EXP_LEN; // The number of bits for the exponent* part*
154	using UP::SIG_LEN; // The number of bits for the significand* part*
155	using UP::SIGN_LEN; // The number of bits for the sign* part*
156	// For convenience, the sum of `SIG_LEN`, `EXP_LEN`, and `SIGN_LEN`.
157	LIBC_INLINE_VAR static constexpr int TOTAL_LEN = SIGN_LEN + EXP_LEN + SIG_LEN;
158
159	// The number of bits after the decimal dot when the number is in normal form.
160	using UP::FRACTION_LEN;
161
162	// An unsigned integer that is wide enough to contain all of the floating
163	// point bits.
164	using StorageType = typename UP::StorageType;
165
166	// The number of bits in StorageType.
167	LIBC_INLINE_VAR static constexpr int STORAGE_LEN =
168	sizeof(StorageType) * CHAR_BIT;
169	static_assert(STORAGE_LEN >= TOTAL_LEN);
170
171	// The exponent bias. Always positive.
172	LIBC_INLINE_VAR static constexpr int32_t EXP_BIAS =
173	(`1U` << (EXP_LEN - `1U`)) - `1U`;
174	static_assert(EXP_BIAS > `0`);
175
176	// The bit pattern that keeps only the significand* part.*
177	LIBC_INLINE_VAR static constexpr StorageType SIG_MASK =
178	mask_trailing_ones<StorageType, SIG_LEN>();
179	// The bit pattern that keeps only the exponent* part.*
180	LIBC_INLINE_VAR static constexpr StorageType EXP_MASK =
181	mask_trailing_ones<StorageType, EXP_LEN>() << SIG_LEN;
182	// The bit pattern that keeps only the sign* part.*
183	LIBC_INLINE_VAR static constexpr StorageType SIGN_MASK =
184	mask_trailing_ones<StorageType, SIGN_LEN>() << (EXP_LEN + SIG_LEN);
185	// The bit pattern that keeps only the exponent + significand* part.*
186	LIBC_INLINE_VAR static constexpr StorageType EXP_SIG_MASK =
187	mask_trailing_ones<StorageType, EXP_LEN + SIG_LEN>();
188	// The bit pattern that keeps only the sign + exponent + significand* part.*
189	LIBC_INLINE_VAR static constexpr StorageType FP_MASK =
190	mask_trailing_ones<StorageType, TOTAL_LEN>();
191	// The bit pattern that keeps only the fraction* part.*
192	// i.e., the significand* without the leading one.*
193	LIBC_INLINE_VAR static constexpr StorageType FRACTION_MASK =
194	mask_trailing_ones<StorageType, FRACTION_LEN>();
195
196	static_assert((SIG_MASK & EXP_MASK & SIGN_MASK) == `0`, "masks disjoint");
197	static_assert((SIG_MASK \| EXP_MASK \| SIGN_MASK) == FP_MASK, "masks cover");
198
199	protected:
200	// Merge bits from 'a' and 'b' values according to 'mask'.
201	// Use 'a' bits when corresponding 'mask' bits are zeroes and 'b' bits when
202	// corresponding bits are ones.
203	LIBC_INLINE static constexpr StorageType merge(StorageType a, StorageType b,
204	StorageType mask) {
205	// https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
206	return a ^ ((a ^ b) & mask);
207	}
208
209	// A stongly typed integer that prevents mixing and matching integers with
210	// different semantics.
211	template <typename T> struct TypedInt {
212	using value_type = T;
213	LIBC_INLINE constexpr explicit TypedInt(T value) : value(value) {}
214	LIBC_INLINE constexpr TypedInt(const TypedInt &value) = default;
215	LIBC_INLINE constexpr TypedInt &operator=(const TypedInt &value) = default;
216
217	LIBC_INLINE constexpr explicit operator T() const { return value; }
218
219	LIBC_INLINE constexpr StorageType to_storage_type() const {
220	return StorageType(value);
221	}
222
223	LIBC_INLINE friend constexpr bool operator==(TypedInt a, TypedInt b) {
224	return a.value == b.value;
225	}
226	LIBC_INLINE friend constexpr bool operator!=(TypedInt a, TypedInt b) {
227	return a.value != b.value;
228	}
229
230	protected:
231	T value;
232	};
233
234	// An opaque type to store a floating point exponent.
235	// We define special values but it is valid to create arbitrary values as long
236	// as they are in the range [min, max].
237	struct Exponent : public TypedInt<int32_t> {
238	using UP = TypedInt<int32_t>;
239	using UP::UP;
240	LIBC_INLINE static constexpr auto subnormal() {
241	return Exponent(-EXP_BIAS);
242	}
243	LIBC_INLINE static constexpr auto min() { return Exponent(`1` - EXP_BIAS); }
244	LIBC_INLINE static constexpr auto zero() { return Exponent(`0`); }
245	LIBC_INLINE static constexpr auto max() { return Exponent(EXP_BIAS); }
246	LIBC_INLINE static constexpr auto inf() { return Exponent(EXP_BIAS + `1`); }
247	};
248
249	// An opaque type to store a floating point biased exponent.
250	// We define special values but it is valid to create arbitrary values as long
251	// as they are in the range [zero, bits_all_ones].
252	// Values greater than bits_all_ones are truncated.
253	struct BiasedExponent : public TypedInt<uint32_t> {
254	using UP = TypedInt<uint32_t>;
255	using UP::UP;
256
257	LIBC_INLINE constexpr BiasedExponent(Exponent exp)
258	: UP(static_cast<uint32_t>(static_cast<int32_t>(exp) + EXP_BIAS)) {}
259
260	// Cast operator to get convert from BiasedExponent to Exponent.
261	LIBC_INLINE constexpr operator Exponent() const {
262	return Exponent(static_cast<int32_t>(UP::value - EXP_BIAS));
263	}
264
265	LIBC_INLINE constexpr BiasedExponent &operator++() {
266	LIBC_ASSERT(*this != BiasedExponent(Exponent::inf()));
267	++UP::value;
268	return *this;
269	}
270
271	LIBC_INLINE constexpr BiasedExponent &operator--() {
272	LIBC_ASSERT(*this != BiasedExponent(Exponent::subnormal()));
273	--UP::value;
274	return *this;
275	}
276	};
277
278	// An opaque type to store a floating point significand.
279	// We define special values but it is valid to create arbitrary values as long
280	// as they are in the range [zero, bits_all_ones].
281	// Note that the semantics of the Significand are implementation dependent.
282	// Values greater than bits_all_ones are truncated.
283	struct Significand : public TypedInt<StorageType> {
284	using UP = TypedInt<StorageType>;
285	using UP::UP;
286
287	LIBC_INLINE friend constexpr Significand operator\|(const Significand a,
288	const Significand b) {
289	return Significand(
290	StorageType(a.to_storage_type() \| b.to_storage_type()));
291	}
292	LIBC_INLINE friend constexpr Significand operator^(const Significand a,
293	const Significand b) {
294	return Significand(
295	StorageType(a.to_storage_type() ^ b.to_storage_type()));
296	}
297	LIBC_INLINE friend constexpr Significand operator>>(const Significand a,
298	int shift) {
299	return Significand(StorageType(a.to_storage_type() >> shift));
300	}
301
302	LIBC_INLINE static constexpr auto zero() {
303	return Significand(StorageType(`0`));
304	}
305	LIBC_INLINE static constexpr auto lsb() {
306	return Significand(StorageType(`1`));
307	}
308	LIBC_INLINE static constexpr auto msb() {
309	return Significand(StorageType(`1`) << (SIG_LEN - `1`));
310	}
311	LIBC_INLINE static constexpr auto bits_all_ones() {
312	return Significand(SIG_MASK);
313	}
314	};
315
316	LIBC_INLINE static constexpr StorageType encode(BiasedExponent exp) {
317	return (exp.to_storage_type() << SIG_LEN) & EXP_MASK;
318	}
319
320	LIBC_INLINE static constexpr StorageType encode(Significand value) {
321	return value.to_storage_type() & SIG_MASK;
322	}
323
324	LIBC_INLINE static constexpr StorageType encode(BiasedExponent exp,
325	Significand sig) {
326	return encode(exp) \| encode(sig);
327	}
328
329	LIBC_INLINE static constexpr StorageType encode(Sign sign, BiasedExponent exp,
330	Significand sig) {
331	if (sign.is_neg())
332	return SIGN_MASK \| encode(exp, sig);
333	return encode(exp, sig);
334	}
335
336	// The floating point number representation as an unsigned integer.
337	StorageType bits{};
338
339	LIBC_INLINE constexpr FPStorage() : bits(`0`) {}
340	LIBC_INLINE constexpr FPStorage(StorageType value) : bits(value) {}
341
342	// Observers
343	LIBC_INLINE constexpr StorageType exp_bits() const { return bits & EXP_MASK; }
344	LIBC_INLINE constexpr StorageType sig_bits() const { return bits & SIG_MASK; }
345	LIBC_INLINE constexpr StorageType exp_sig_bits() const {
346	return bits & EXP_SIG_MASK;
347	}
348
349	// Parts
350	LIBC_INLINE constexpr BiasedExponent biased_exponent() const {
351	return BiasedExponent(static_cast<uint32_t>(exp_bits() >> SIG_LEN));
352	}
353	LIBC_INLINE constexpr void set_biased_exponent(BiasedExponent biased) {
354	bits = merge(a: bits, b: encode(biased), mask: EXP_MASK);
355	}
356
357	public:
358	LIBC_INLINE constexpr Sign sign() const {
359	return (bits & SIGN_MASK) ? Sign::NEG : Sign::POS;
360	}
361	LIBC_INLINE constexpr void set_sign(Sign signVal) {
362	if (sign() != signVal)
363	bits ^= SIGN_MASK;
364	}
365	};
366
367	// This layer defines all functions that are specific to how the the floating
368	// point type is encoded. It enables constructions, modification and observation
369	// of values manipulated as 'StorageType'.
370	template <FPType fp_type, typename RetT>
371	struct FPRepSem : public FPStorage<fp_type> {
372	using UP = FPStorage<fp_type>;
373	using typename UP::StorageType;
374	using UP::FRACTION_LEN;
375	using UP::FRACTION_MASK;
376
377	protected:
378	using typename UP::Exponent;
379	using typename UP::Significand;
380	using UP::bits;
381	using UP::encode;
382	using UP::exp_bits;
383	using UP::exp_sig_bits;
384	using UP::sig_bits;
385	using UP::UP;
386
387	public:
388	// Builders
389	LIBC_INLINE static constexpr RetT zero(Sign sign = Sign::POS) {
390	return RetT(encode(sign, Exponent::subnormal(), Significand::zero()));
391	}
392	LIBC_INLINE static constexpr RetT one(Sign sign = Sign::POS) {
393	return RetT(encode(sign, Exponent::zero(), Significand::zero()));
394	}
395	LIBC_INLINE static constexpr RetT min_subnormal(Sign sign = Sign::POS) {
396	return RetT(encode(sign, Exponent::subnormal(), Significand::lsb()));
397	}
398	LIBC_INLINE static constexpr RetT max_subnormal(Sign sign = Sign::POS) {
399	return RetT(
400	encode(sign, Exponent::subnormal(), Significand::bits_all_ones()));
401	}
402	LIBC_INLINE static constexpr RetT min_normal(Sign sign = Sign::POS) {
403	return RetT(encode(sign, Exponent::min(), Significand::zero()));
404	}
405	LIBC_INLINE static constexpr RetT max_normal(Sign sign = Sign::POS) {
406	return RetT(encode(sign, Exponent::max(), Significand::bits_all_ones()));
407	}
408	LIBC_INLINE static constexpr RetT inf(Sign sign = Sign::POS) {
409	return RetT(encode(sign, Exponent::inf(), Significand::zero()));
410	}
411	LIBC_INLINE static constexpr RetT signaling_nan(Sign sign = Sign::POS,
412	StorageType v = `0`) {
413	return RetT(encode(sign, Exponent::inf(),
414	(v ? Significand(v) : (Significand::msb() >> `1`))));
415	}
416	LIBC_INLINE static constexpr RetT quiet_nan(Sign sign = Sign::POS,
417	StorageType v = `0`) {
418	return RetT(
419	encode(sign, Exponent::inf(), Significand::msb() \| Significand(v)));
420	}
421
422	// Observers
423	LIBC_INLINE constexpr bool is_zero() const { return exp_sig_bits() == `0`; }
424	LIBC_INLINE constexpr bool is_nan() const {
425	return exp_sig_bits() > encode(Exponent::inf(), Significand::zero());
426	}
427	LIBC_INLINE constexpr bool is_quiet_nan() const {
428	return exp_sig_bits() >= encode(Exponent::inf(), Significand::msb());
429	}
430	LIBC_INLINE constexpr bool is_signaling_nan() const {
431	return is_nan() && !is_quiet_nan();
432	}
433	LIBC_INLINE constexpr bool is_inf() const {
434	return exp_sig_bits() == encode(Exponent::inf(), Significand::zero());
435	}
436	LIBC_INLINE constexpr bool is_finite() const {
437	return exp_bits() != encode(Exponent::inf());
438	}
439	LIBC_INLINE
440	constexpr bool is_subnormal() const {
441	return exp_bits() == encode(Exponent::subnormal());
442	}
443	LIBC_INLINE constexpr bool is_normal() const {
444	return is_finite() && !is_subnormal();
445	}
446	LIBC_INLINE constexpr RetT next_toward_inf() const {
447	if (is_finite())
448	return RetT(bits + StorageType(`1`));
449	return RetT(bits);
450	}
451
452	// Returns the mantissa with the implicit bit set iff the current
453	// value is a valid normal number.
454	LIBC_INLINE constexpr StorageType get_explicit_mantissa() const {
455	if (is_subnormal())
456	return sig_bits();
457	return (StorageType(`1`) << UP::SIG_LEN) \| sig_bits();
458	}
459	};
460
461	// Specialization for the X86 Extended Precision type.
462	template <typename RetT>
463	struct FPRepSem<FPType::X86_Binary80, RetT>
464	: public FPStorage<FPType::X86_Binary80> {
465	using UP = FPStorage<FPType::X86_Binary80>;
466	using typename UP::StorageType;
467	using UP::FRACTION_LEN;
468	using UP::FRACTION_MASK;
469
470	// The x86 80 bit float represents the leading digit of the mantissa
471	// explicitly. This is the mask for that bit.
472	static constexpr StorageType EXPLICIT_BIT_MASK = StorageType(`1`)
473	<< FRACTION_LEN;
474	// The X80 significand is made of an explicit bit and the fractional part.
475	static_assert((EXPLICIT_BIT_MASK & FRACTION_MASK) == `0`,
476	"the explicit bit and the fractional part should not overlap");
477	static_assert((EXPLICIT_BIT_MASK \| FRACTION_MASK) == SIG_MASK,
478	"the explicit bit and the fractional part should cover the "
479	"whole significand");
480
481	protected:
482	using typename UP::Exponent;
483	using typename UP::Significand;
484	using UP::encode;
485	using UP::UP;
486
487	public:
488	// Builders
489	LIBC_INLINE static constexpr RetT zero(Sign sign = Sign::POS) {
490	return RetT(encode(sign, Exponent::subnormal(), Significand::zero()));
491	}
492	LIBC_INLINE static constexpr RetT one(Sign sign = Sign::POS) {
493	return RetT(encode(sign, Exponent::zero(), Significand::msb()));
494	}
495	LIBC_INLINE static constexpr RetT min_subnormal(Sign sign = Sign::POS) {
496	return RetT(encode(sign, Exponent::subnormal(), Significand::lsb()));
497	}
498	LIBC_INLINE static constexpr RetT max_subnormal(Sign sign = Sign::POS) {
499	return RetT(encode(sign, Exponent::subnormal(),
500	Significand::bits_all_ones() ^ Significand::msb()));
501	}
502	LIBC_INLINE static constexpr RetT min_normal(Sign sign = Sign::POS) {
503	return RetT(encode(sign, Exponent::min(), Significand::msb()));
504	}
505	LIBC_INLINE static constexpr RetT max_normal(Sign sign = Sign::POS) {
506	return RetT(encode(sign, Exponent::max(), Significand::bits_all_ones()));
507	}
508	LIBC_INLINE static constexpr RetT inf(Sign sign = Sign::POS) {
509	return RetT(encode(sign, Exponent::inf(), Significand::msb()));
510	}
511	LIBC_INLINE static constexpr RetT signaling_nan(Sign sign = Sign::POS,
512	StorageType v = `0`) {
513	return RetT(encode(sign, Exponent::inf(),
514	Significand::msb() \|
515	(v ? Significand(v) : (Significand::msb() >> `2`))));
516	}
517	LIBC_INLINE static constexpr RetT quiet_nan(Sign sign = Sign::POS,
518	StorageType v = `0`) {
519	return RetT(encode(sign, Exponent::inf(),
520	Significand::msb() \| (Significand::msb() >> `1`) \|
521	Significand(v)));
522	}
523
524	// Observers
525	LIBC_INLINE constexpr bool is_zero() const { return exp_sig_bits() == `0`; }
526	LIBC_INLINE constexpr bool is_nan() const {
527	// Most encoding forms from the table found in
528	// https://en.wikipedia.org/wiki/Extended_precision#x86_extended_precision_format
529	// are interpreted as NaN.
530	// More precisely :
531	// - Pseudo-Infinity
532	// - Pseudo Not a Number
533	// - Signalling Not a Number
534	// - Floating-point Indefinite
535	// - Quiet Not a Number
536	// - Unnormal
537	// This can be reduced to the following logic:
538	if (exp_bits() == encode(Exponent::inf()))
539	return !is_inf();
540	if (exp_bits() != encode(Exponent::subnormal()))
541	return (sig_bits() & encode(Significand::msb())) == `0`;
542	return false;
543	}
544	LIBC_INLINE constexpr bool is_quiet_nan() const {
545	return exp_sig_bits() >=
546	encode(Exponent::inf(),
547	Significand::msb() \| (Significand::msb() >> `1`));
548	}
549	LIBC_INLINE constexpr bool is_signaling_nan() const {
550	return is_nan() && !is_quiet_nan();
551	}
552	LIBC_INLINE constexpr bool is_inf() const {
553	return exp_sig_bits() == encode(Exponent::inf(), Significand::msb());
554	}
555	LIBC_INLINE constexpr bool is_finite() const {
556	return !is_inf() && !is_nan();
557	}
558	LIBC_INLINE
559	constexpr bool is_subnormal() const {
560	return exp_bits() == encode(Exponent::subnormal());
561	}
562	LIBC_INLINE constexpr bool is_normal() const {
563	const auto exp = exp_bits();
564	if (exp == encode(Exponent::subnormal()) \|\| exp == encode(Exponent::inf()))
565	return false;
566	return get_implicit_bit();
567	}
568	LIBC_INLINE constexpr RetT next_toward_inf() const {
569	if (is_finite()) {
570	if (exp_sig_bits() == max_normal().uintval()) {
571	return inf(sign: sign());
572	} else if (exp_sig_bits() == max_subnormal().uintval()) {
573	return min_normal(sign: sign());
574	} else if (sig_bits() == SIG_MASK) {
575	return RetT(encode(sign(), ++biased_exponent(), Significand::zero()));
576	} else {
577	return RetT(bits + StorageType(`1`));
578	}
579	}
580	return RetT(bits);
581	}
582
583	LIBC_INLINE constexpr StorageType get_explicit_mantissa() const {
584	return sig_bits();
585	}
586
587	// This functions is specific to FPRepSem<FPType::X86_Binary80>.
588	// TODO: Remove if possible.
589	LIBC_INLINE constexpr bool get_implicit_bit() const {
590	return static_cast<bool>(bits & EXPLICIT_BIT_MASK);
591	}
592
593	// This functions is specific to FPRepSem<FPType::X86_Binary80>.
594	// TODO: Remove if possible.
595	LIBC_INLINE constexpr void set_implicit_bit(bool implicitVal) {
596	if (get_implicit_bit() != implicitVal)
597	bits ^= EXPLICIT_BIT_MASK;
598	}
599	};
600
601	// 'FPRepImpl' is the bottom of the class hierarchy that only deals with
602	// 'FPType'. The operations dealing with specific float semantics are
603	// implemented by 'FPRepSem' above and specialized when needed.
604	//
605	// The 'RetT' type is being propagated up to 'FPRepSem' so that the functions
606	// creating new values (Builders) can return the appropriate type. That is, when
607	// creating a value through 'FPBits' below the builder will return an 'FPBits'
608	// value.
609	// FPBits<float>::zero(); // returns an FPBits<>
610	//
611	// When we don't care about specific C++ floating point type we can use
612	// 'FPRep' and specify the 'FPType' directly.
613	// FPRep<FPType::IEEE754_Binary32:>::zero() // returns an FPRep<>
614	template <FPType fp_type, typename RetT>
615	struct FPRepImpl : public FPRepSem<fp_type, RetT> {
616	using UP = FPRepSem<fp_type, RetT>;
617	using StorageType = typename UP::StorageType;
618
619	protected:
620	using UP::bits;
621	using UP::encode;
622	using UP::exp_bits;
623	using UP::exp_sig_bits;
624
625	using typename UP::BiasedExponent;
626	using typename UP::Exponent;
627	using typename UP::Significand;
628
629	using UP::FP_MASK;
630
631	public:
632	// Constants.
633	using UP::EXP_BIAS;
634	using UP::EXP_MASK;
635	using UP::FRACTION_MASK;
636	using UP::SIG_LEN;
637	using UP::SIG_MASK;
638	using UP::SIGN_MASK;
639	LIBC_INLINE_VAR static constexpr int MAX_BIASED_EXPONENT =
640	(`1` << UP::EXP_LEN) - `1`;
641
642	// CTors
643	LIBC_INLINE constexpr FPRepImpl() = default;
644	LIBC_INLINE constexpr explicit FPRepImpl(StorageType x) : UP(x) {}
645
646	// Comparison
647	LIBC_INLINE constexpr friend bool operator==(FPRepImpl a, FPRepImpl b) {
648	return a.uintval() == b.uintval();
649	}
650	LIBC_INLINE constexpr friend bool operator!=(FPRepImpl a, FPRepImpl b) {
651	return a.uintval() != b.uintval();
652	}
653
654	// Representation
655	LIBC_INLINE constexpr StorageType uintval() const { return bits & FP_MASK; }
656	LIBC_INLINE constexpr void set_uintval(StorageType value) {
657	bits = (value & FP_MASK);
658	}
659
660	// Builders
661	using UP::inf;
662	using UP::max_normal;
663	using UP::max_subnormal;
664	using UP::min_normal;
665	using UP::min_subnormal;
666	using UP::one;
667	using UP::quiet_nan;
668	using UP::signaling_nan;
669	using UP::zero;
670
671	// Modifiers
672	LIBC_INLINE constexpr RetT abs() const {
673	return RetT(static_cast<StorageType>(bits & UP::EXP_SIG_MASK));
674	}
675
676	// Observers
677	using UP::get_explicit_mantissa;
678	using UP::is_finite;
679	using UP::is_inf;
680	using UP::is_nan;
681	using UP::is_normal;
682	using UP::is_quiet_nan;
683	using UP::is_signaling_nan;
684	using UP::is_subnormal;
685	using UP::is_zero;
686	using UP::next_toward_inf;
687	using UP::sign;
688	LIBC_INLINE constexpr bool is_inf_or_nan() const { return !is_finite(); }
689	LIBC_INLINE constexpr bool is_neg() const { return sign().is_neg(); }
690	LIBC_INLINE constexpr bool is_pos() const { return sign().is_pos(); }
691
692	LIBC_INLINE constexpr uint16_t get_biased_exponent() const {
693	return static_cast<uint16_t>(static_cast<uint32_t>(UP::biased_exponent()));
694	}
695
696	LIBC_INLINE constexpr void set_biased_exponent(StorageType biased) {
697	UP::set_biased_exponent(BiasedExponent(static_cast<uint32_t>(biased)));
698	}
699
700	LIBC_INLINE constexpr int get_exponent() const {
701	return static_cast<int32_t>(Exponent(UP::biased_exponent()));
702	}
703
704	// If the number is subnormal, the exponent is treated as if it were the
705	// minimum exponent for a normal number. This is to keep continuity between
706	// the normal and subnormal ranges, but it causes problems for functions where
707	// values are calculated from the exponent, since just subtracting the bias
708	// will give a slightly incorrect result. Additionally, zero has an exponent
709	// of zero, and that should actually be treated as zero.
710	LIBC_INLINE constexpr int get_explicit_exponent() const {
711	Exponent exponent(UP::biased_exponent());
712	if (is_zero())
713	exponent = Exponent::zero();
714	if (exponent == Exponent::subnormal())
715	exponent = Exponent::min();
716	return static_cast<int32_t>(exponent);
717	}
718
719	LIBC_INLINE constexpr StorageType get_mantissa() const {
720	return bits & FRACTION_MASK;
721	}
722
723	LIBC_INLINE constexpr void set_mantissa(StorageType mantVal) {
724	bits = UP::merge(bits, mantVal, FRACTION_MASK);
725	}
726
727	LIBC_INLINE constexpr void set_significand(StorageType sigVal) {
728	bits = UP::merge(bits, sigVal, SIG_MASK);
729	}
730	// Unsafe function to create a floating point representation.
731	// It simply packs the sign, biased exponent and mantissa values without
732	// checking bound nor normalization.
733	//
734	// WARNING: For X86 Extended Precision, implicit bit needs to be set correctly
735	// in the 'mantissa' by the caller. This function will not check for its
736	// validity.
737	//
738	// FIXME: Use an uint32_t for 'biased_exp'.
739	LIBC_INLINE static constexpr RetT
740	create_value(Sign sign, StorageType biased_exp, StorageType mantissa) {
741	return RetT(encode(sign, BiasedExponent(static_cast<uint32_t>(biased_exp)),
742	Significand(mantissa)));
743	}
744
745	// The function converts integer number and unbiased exponent to proper
746	// float T type:
747	// Result = number 2^(ep+1 - exponent_bias)*
748	// Be careful!
749	// 1) "ep" is the raw exponent value.
750	// 2) The function adds +1 to ep for seamless normalized to denormalized
751	// transition.
752	// 3) The function does not check exponent high limit.
753	// 4) "number" zero value is not processed correctly.
754	// 5) Number is unsigned, so the result can be only positive.
755	LIBC_INLINE static constexpr RetT make_value(StorageType number, int ep) {
756	FPRepImpl result(`0`);
757	int lz =
758	UP::FRACTION_LEN + `1` - (UP::STORAGE_LEN - cpp::countl_zero(number));
759
760	number <<= lz;
761	ep -= lz;
762
763	if (LIBC_LIKELY(ep >= `0`)) {
764	// Implicit number bit will be removed by mask
765	result.set_significand(number);
766	result.set_biased_exponent(static_cast<StorageType>(ep + `1`));
767	} else {
768	result.set_significand(number >> static_cast<unsigned>(-ep));
769	}
770	return RetT(result.uintval());
771	}
772	};
773
774	// A generic class to manipulate floating point formats.
775	// It derives its functionality to FPRepImpl above.
776	template <FPType fp_type>
777	struct FPRep : public FPRepImpl<fp_type, FPRep<fp_type>> {
778	using UP = FPRepImpl<fp_type, FPRep<fp_type>>;
779	using StorageType = typename UP::StorageType;
780	using UP::UP;
781
782	LIBC_INLINE constexpr explicit operator StorageType() const {
783	return UP::uintval();
784	}
785	};
786
787	} // namespace internal
788
789	// Returns the FPType corresponding to C++ type T on the host.
790	template <typename T> LIBC_INLINE static constexpr FPType get_fp_type() {
791	using UnqualT = cpp::remove_cv_t<T>;
792	if constexpr (cpp::is_same_v<UnqualT, float> && FLT_MANT_DIG == `24`)
793	return FPType::IEEE754_Binary32;
794	else if constexpr (cpp::is_same_v<UnqualT, double> && DBL_MANT_DIG == `53`)
795	return FPType::IEEE754_Binary64;
796	else if constexpr (cpp::is_same_v<UnqualT, long double>) {
797	if constexpr (LDBL_MANT_DIG == `53`)
798	return FPType::IEEE754_Binary64;
799	else if constexpr (LDBL_MANT_DIG == `64`)
800	return FPType::X86_Binary80;
801	else if constexpr (LDBL_MANT_DIG == `113`)
802	return FPType::IEEE754_Binary128;
803	}
804	#if defined(LIBC_TYPES_HAS_FLOAT16)
805	else if constexpr (cpp::is_same_v<UnqualT, float16>)
806	return FPType::IEEE754_Binary16;
807	#endif
808	#if defined(LIBC_TYPES_HAS_FLOAT128)
809	else if constexpr (cpp::is_same_v<UnqualT, float128>)
810	return FPType::IEEE754_Binary128;
811	#endif
812	else if constexpr (cpp::is_same_v<UnqualT, bfloat16>)
813	return FPType::BFloat16;
814	else
815	static_assert(cpp::always_false<UnqualT>, "Unsupported type");
816	}
817
818	// -----------------------------------------------------------------------------
819	// ** WARNING **
820	// This interface is shared with libc++, if you change this interface you need
821	// to update it in both libc and libc++. You should also be careful when adding
822	// dependencies to this file, since it needs to build for all libc++ targets.
823	// -----------------------------------------------------------------------------
824	// A generic class to manipulate C++ floating point formats.
825	// It derives its functionality to FPRepImpl above.
826	template <typename T>
827	struct FPBits final : public internal::FPRepImpl<get_fp_type<T>(), FPBits<T>> {
828	static_assert(cpp::is_floating_point_v<T>,
829	"FPBits instantiated with invalid type.");
830	using UP = internal::FPRepImpl<get_fp_type<T>(), FPBits<T>>;
831	using StorageType = typename UP::StorageType;
832
833	// Constructors.
834	LIBC_INLINE constexpr FPBits() = default;
835
836	template <typename XType> LIBC_INLINE constexpr explicit FPBits(XType x) {
837	using Unqual = typename cpp::remove_cv_t<XType>;
838	if constexpr (cpp::is_same_v<Unqual, T>) {
839	UP::bits = cpp::bit_cast<StorageType>(x);
840	} else if constexpr (cpp::is_same_v<Unqual, StorageType>) {
841	UP::bits = x;
842	} else {
843	// We don't want accidental type promotions/conversions, so we require
844	// exact type match.
845	static_assert(cpp::always_false<XType>);
846	}
847	}
848
849	// Floating-point conversions.
850	LIBC_INLINE constexpr T get_val() const { return cpp::bit_cast<T>(UP::bits); }
851	};
852
853	} // namespace fputil
854	} // namespace LIBC_NAMESPACE_DECL
855
856	#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H
857

Browse the source code of llvm_projects/libc/src/__support/FPUtil/FPBits.h