expbf16.h source code [llvm_projects/libc/src/__support/math/expbf16.h]

1	//===-- Implementation header for expbf16 ------------------------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXPBF16_H
10	#define LLVM_LIBC_SRC___SUPPORT_MATH_EXPBF16_H
11
12	#include "hdr/errno_macros.h"
13	#include "hdr/fenv_macros.h"
14	#include "src/__support/FPUtil/FEnvImpl.h"
15	#include "src/__support/FPUtil/FPBits.h"
16	#include "src/__support/FPUtil/PolyEval.h"
17	#include "src/__support/FPUtil/bfloat16.h"
18	#include "src/__support/FPUtil/cast.h"
19	#include "src/__support/FPUtil/except_value_utils.h"
20	#include "src/__support/FPUtil/multiply_add.h"
21	#include "src/__support/FPUtil/nearest_integer.h"
22	#include "src/__support/FPUtil/rounding_mode.h"
23	#include "src/__support/macros/config.h"
24	#include "src/__support/macros/optimization.h"
25	#include "src/__support/macros/properties/types.h"
26
27	namespace LIBC_NAMESPACE_DECL {
28
29	namespace math {
30
31	namespace expbf16_internal {
32
33	// Generated by Sollya with the following commands:
34	// > display = hexadecimal;
35	// > for i from -96 to 88 by 8 do print(i, round(exp(i), SG, RN) @ "f,");
36	LIBC_INLINE_VAR constexpr float EXP_HI[`24`] = {
37	`0x1.6a4p-139f`, `0x1.07b71p-127f`, `0x1.7fd974p-116f`, `0x1.175afp-104f`,
38	`0x1.969d48p-93f`, `0x1.27ec46p-81f`, `0x1.aebabap-70f`, `0x1.397924p-58f`,
39	`0x1.c8465p-47f`, `0x1.4c1078p-35f`, `0x1.e355bcp-24f`, `0x1.5fc21p-12f`,
40	`0x1p0f`, `0x1.749ea8p11f`, `0x1.0f2ebep23f`, `0x1.8ab7fcp34f`,
41	`0x1.1f43fcp46f`, `0x1.a220d4p57f`, `0x1.304d6ap69f`, `0x1.baed16p80f`,
42	`0x1.425982p92f`, `0x1.d531d8p103f`, `0x1.55779cp115f`, `0x1.f1056ep126f`,
43	};
44
45	// Generated by Sollya with the following commands:
46	// > display = hexadecimal;
47	// > for i from 0 to 7.75 by 0.25 do print(round(exp(i), SG, RN) @ "f,");
48	LIBC_INLINE_VAR constexpr float EXP_MID[`32`] = {
49	`0x1p0f`, `0x1.48b5e4p0f`, `0x1.a61298p0f`, `0x1.0ef9dcp1f`,
50	`0x1.5bf0a8p1f`, `0x1.bec38ep1f`, `0x1.1ed3fep2f`, `0x1.704b6ap2f`,
51	`0x1.d8e64cp2f`, `0x1.2f9b88p3f`, `0x1.85d6fep3f`, `0x1.f4907p3f`,
52	`0x1.415e5cp4f`, `0x1.9ca53cp4f`, `0x1.08ec72p5f`, `0x1.542b2ep5f`,
53	`0x1.b4c902p5f`, `0x1.186bf2p6f`, `0x1.68118ap6f`, `0x1.ce564ep6f`,
54	`0x1.28d38ap7f`, `0x1.7d21eep7f`, `0x1.e96244p7f`, `0x1.3a30dp8f`,
55	`0x1.936dc6p8f`, `0x1.0301a4p9f`, `0x1.4c9222p9f`, `0x1.ab0786p9f`,
56	`0x1.122886p10f`, `0x1.6006b6p10f`, `0x1.c402b6p10f`, `0x1.223252p11f`,
57	};
58
59	} // namespace expbf16_internal
60
61	LIBC_INLINE bfloat16 expbf16(bfloat16 x) {
62	using FPBits = fputil::FPBits<bfloat16>;
63	FPBits x_bits(x);
64
65	uint16_t x_u = x_bits.uintval();
66	uint16_t x_abs = x_u & `0x7fffU`;
67
68	// 0 <= \|x\| <= 2^(-3), or \|x\| >= 89, or x is NaN
69	if (LIBC_UNLIKELY(x_abs <= `0x3e00U` \|\| x_abs >= `0x42b2U`)) {
70
71	// exp(NaN) = NaN
72	if (x_bits.is_nan()) {
73	if (x_bits.is_signaling_nan()) {
74	fputil::raise_except_if_required(FE_INVALID);
75	return FPBits::quiet_nan().get_val();
76	}
77
78	return x;
79	}
80
81	// if x >= 89
82	if (x_bits.is_pos() && x_abs >= `0x42b2U`) {
83	// exp(inf) = inf
84	if (x_bits.is_inf())
85	return x;
86
87	#ifdef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
88	fputil::set_errno_if_required(ERANGE);
89	fputil::raise_except_if_required(FE_OVERFLOW);
90	return FPBits::inf().get_val();
91	#else
92	switch (fputil::quick_get_round()) {
93	case FE_TONEAREST:
94	case FE_UPWARD:
95	fputil::set_errno_if_required(ERANGE);
96	fputil::raise_except_if_required(FE_OVERFLOW);
97	return FPBits::inf().get_val();
98	default:
99	return FPBits::max_normal().get_val();
100	}
101	#endif // LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
102	}
103
104	// x <= -92.5
105	if (x_u >= `0xc2b9U`) {
106	// exp(-inf) = +0
107	if (x_bits.is_inf())
108	return FPBits::zero().get_val();
109
110	fputil::set_errno_if_required(ERANGE);
111	fputil::raise_except_if_required(FE_UNDERFLOW \| FE_INEXACT);
112
113	#ifdef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
114	return FPBits::zero().get_val();
115	#else
116	switch (fputil::quick_get_round()) {
117	case FE_UPWARD:
118	case FE_TONEAREST:
119	if (LIBC_UNLIKELY(x_u == `0xc2b9U`))
120	return FPBits::min_subnormal().get_val();
121	return FPBits::zero().get_val();
122	default:
123	return FPBits::zero().get_val();
124	}
125	#endif // LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
126	}
127
128	// exp(0) = 1
129	if (x_bits.is_zero())
130	return bfloat16 (`1.0f`);
131
132	// 0 < \|x\| <= 2^(-3)
133	if (x_abs <= `0x3e00U`) {
134	float xf = static_cast<float>(x);
135	// Degree-3 minimax polynomial generated by Sollya with the following
136	// commands:
137	// > display = hexadecimal;
138	// > P = fpminimax(expm1(x)/x, 2, [\|SG...\|], [-2^-7, 2^-7]);
139	// > 1 + x P;*
140	// 0x1p0 + x (0x1p0 + x * (0x1.00004p-1 + x * 0x1.555578p-3))*
141	return fputil::cast<bfloat16>(x: fputil::polyeval(
142	x: xf, a0: `0x1p+0f`, a: `0x1p+0f`, a: `0x1.00004p-1f`, a: `0x1.555578p-3f`));
143	}
144	}
145
146	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
147	constexpr fputil::ExceptValues<bfloat16, `4`> EXPBF16_EXCEPTS = {.values: {
148	// (input, RZ output, RU offset, RD offset, RN offset)
149	// x = 0x40DB (6.84375)
150	// MPFR: RU=0x446B, RD=0x446A, RZ=0x446A, RN=0x446B
151	{.input: `0x40DBU`, .rnd_towardzero_result: `0x446AU`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
152	// x = 0x419D, keep original
153	{.input: `0x419DU`, .rnd_towardzero_result: `0x4D9FU`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
154	// x = 0x41F9 (31.125)
155	// MPFR: RU=0x55F0, RD=0x55EF, RZ=0x55EF, RN=0x55F0
156	{.input: `0x41F9U`, .rnd_towardzero_result: `0x55EFU`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
157	// x = 0xC19F (-19.875)
158	// MPFR: RU=0x3121, RD=0x3120, RZ=0x3120, RN=0x3121
159	{.input: `0xC19FU`, .rnd_towardzero_result: `0x3120U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
160	}};
161
162	if (auto r = EXPBF16_EXCEPTS.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value()))
163	return r.value();
164	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
165
166	// For -93 < x < 89, we do the following range reduction:
167	// x = hi + mid + lo
168	// where,
169	// hi is an integer
170	// mid 2^2 is an integer*
171	// -2^3 <= lo <= 2^3
172	// also, hi + mid = round(4 x) / x*
173	// then,
174	// exp(x) = exp(hi + mid + lo)
175	// = exp(hi) exp(mid) * exp(lo)*
176	// we store 184/8 + 1 = 24 values for looking up exp(hi)
177	// from -96 to 88
178	// we store 84 = 32 values for looking up exp(mid) since*
179	// mid will always have the bit pattern \|bbb.bb\| where
180	// b can be either 0 or 1
181
182	float xf = static_cast<float>(x);
183	float kf = fputil::nearest_integer(x: xf * `4.0f`);
184	int x_hi_mid = static_cast<int>(kf);
185	int x_hi = x_hi_mid >> `5`;
186	int x_mid = x_hi_mid & `0b11111`;
187	// lo = x - (hi + mid) = round(x 4) / (-4) + x*
188	float lo = fputil::multiply_add(x: kf, y: -`0.25f`, z: xf);
189
190	float exp_hi = expbf16_internal::EXP_HI[x_hi + `12`];
191	float exp_mid = expbf16_internal::EXP_MID[x_mid];
192
193	// Degree-3 minimax polynomial generated by Sollya with the following
194	// commands:
195	// > display = hexadecimal;
196	// > P = fpminimax(expm1(x)/x, 2, [\|SG...\|], [-2^-7, 2^-7]);
197	// > 1 + x P;*
198	// 0x1p0 + x (0x1p0 + x * (0x1.00004p-1 + x * 0x1.555578p-3))*
199	float exp_lo =
200	fputil::polyeval(x: lo, a0: `0x1p+0f`, a: `0x1p+0f`, a: `0x1.00004p-1f`, a: `0x1.555578p-3f`);
201
202	return fputil::cast<bfloat16>(x: exp_hi * exp_mid * exp_lo);
203	}
204
205	} // namespace math
206	} // namespace LIBC_NAMESPACE_DECL
207
208	#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXPBF16_H
209

Browse the source code of llvm_projects/libc/src/__support/math/expbf16.h