exp10m1f.h source code [llvm_projects/libc/src/__support/math/exp10m1f.h]

1	//===-- Implementation header for exp10m1f ----------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP10M1F_H
10	#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP10M1F_H
11
12	#include "exp10f_utils.h"
13	#include "src/__support/FPUtil/FEnvImpl.h"
14	#include "src/__support/FPUtil/FPBits.h"
15	#include "src/__support/FPUtil/PolyEval.h"
16	#include "src/__support/FPUtil/except_value_utils.h"
17	#include "src/__support/FPUtil/multiply_add.h"
18	#include "src/__support/FPUtil/rounding_mode.h"
19	#include "src/__support/common.h"
20	#include "src/__support/libc_errno.h"
21	#include "src/__support/macros/config.h"
22	#include "src/__support/macros/optimization.h"
23
24	namespace LIBC_NAMESPACE_DECL {
25
26	namespace math {
27
28	namespace exp10m1f_internal {
29
30	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
31	LIBC_INLINE_VAR constexpr size_t N_EXCEPTS_LO = `11`;
32
33	LIBC_INLINE_VAR constexpr fputil::ExceptValues<float, N_EXCEPTS_LO>
34	EXP10M1F_EXCEPTS_LO = {.values: {
35	// x = 0x1.0fe54ep-11, exp10m1f(x) = 0x1.3937eep-10 (RZ)
36	{.input: `0x3a07'f2a7U`, .rnd_towardzero_result: `0x3a9c'9bf7U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
37	// x = 0x1.80e6eap-11, exp10m1f(x) = 0x1.bb8272p-10 (RZ)
38	{.input: `0x3a40'7375U`, .rnd_towardzero_result: `0x3add'c139U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
39	// x = -0x1.2a33bcp-51, exp10m1f(x) = -0x1.57515ep-50 (RZ)
40	{.input: `0xa615'19deU`, .rnd_towardzero_result: `0xa6ab'a8afU`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
41	// x = -0x0p+0, exp10m1f(x) = -0x0p+0 (RZ)
42	{.input: `0x8000'0000U`, .rnd_towardzero_result: `0x8000'0000U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
43	// x = -0x1.b59e08p-31, exp10m1f(x) = -0x1.f7d356p-30 (RZ)
44	{.input: `0xb05a'cf04U`, .rnd_towardzero_result: `0xb0fb'e9abU`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
45	// x = -0x1.bf342p-12, exp10m1f(x) = -0x1.014e02p-10 (RZ)
46	{.input: `0xb9df'9a10U`, .rnd_towardzero_result: `0xba80'a701U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
47	// x = -0x1.6207fp-11, exp10m1f(x) = -0x1.9746cap-10 (RZ)
48	{.input: `0xba31'03f8U`, .rnd_towardzero_result: `0xbacb'a365U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
49	// x = -0x1.bd0c66p-11, exp10m1f(x) = -0x1.ffe168p-10 (RZ)
50	{.input: `0xba5e'8633U`, .rnd_towardzero_result: `0xbaff'f0b4U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
51	// x = -0x1.ffd84cp-10, exp10m1f(x) = -0x1.25faf2p-8 (RZ)
52	{.input: `0xbaff'ec26U`, .rnd_towardzero_result: `0xbb92'fd79U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
53	// x = -0x1.a74172p-9, exp10m1f(x) = -0x1.e57be2p-8 (RZ)
54	{.input: `0xbb53'a0b9U`, .rnd_towardzero_result: `0xbbf2'bdf1U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
55	// x = -0x1.cb694cp-9, exp10m1f(x) = -0x1.0764e4p-7 (RZ)
56	{.input: `0xbb65'b4a6U`, .rnd_towardzero_result: `0xbc03'b272U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
57	}};
58
59	LIBC_INLINE_VAR constexpr size_t N_EXCEPTS_HI = `19`;
60
61	LIBC_INLINE_VAR constexpr fputil::ExceptValues<float, N_EXCEPTS_HI>
62	EXP10M1F_EXCEPTS_HI = {.values: {
63	// (input, RZ output, RU offset, RD offset, RN offset)
64	// x = 0x1.8d31eep-8, exp10m1f(x) = 0x1.cc7e4cp-7 (RZ)
65	{.input: `0x3bc6'98f7U`, .rnd_towardzero_result: `0x3c66'3f26U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
66	// x = 0x1.915fcep-8, exp10m1f(x) = 0x1.d15f72p-7 (RZ)
67	{.input: `0x3bc8'afe7U`, .rnd_towardzero_result: `0x3c68'afb9U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
68	// x = 0x1.bcf982p-8, exp10m1f(x) = 0x1.022928p-6 (RZ)
69	{.input: `0x3bde'7cc1U`, .rnd_towardzero_result: `0x3c81'1494U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
70	// x = 0x1.99ff0ap-7, exp10m1f(x) = 0x1.dee416p-6 (RZ)
71	{.input: `0x3c4c'ff85U`, .rnd_towardzero_result: `0x3cef'720bU`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
72	// x = 0x1.75ea14p-6, exp10m1f(x) = 0x1.b9ff16p-5 (RZ)
73	{.input: `0x3cba'f50aU`, .rnd_towardzero_result: `0x3d5c'ff8bU`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
74	// x = 0x1.f81b64p-6, exp10m1f(x) = 0x1.2cb6bcp-4 (RZ)
75	{.input: `0x3cfc'0db2U`, .rnd_towardzero_result: `0x3d96'5b5eU`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
76	// x = 0x1.fafecp+3, exp10m1f(x) = 0x1.8c880ap+52 (RZ)
77	{.input: `0x417d'7f60U`, .rnd_towardzero_result: `0x59c6'4405U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
78	// x = -0x1.3bf094p-8, exp10m1f(x) = -0x1.69ba4ap-7 (RZ)
79	{.input: `0xbb9d'f84aU`, .rnd_towardzero_result: `0xbc34'dd25U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
80	// x = -0x1.4558bcp-8, exp10m1f(x) = -0x1.746fb8p-7 (RZ)
81	{.input: `0xbba2'ac5eU`, .rnd_towardzero_result: `0xbc3a'37dcU`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
82	// x = -0x1.4bb43p-8, exp10m1f(x) = -0x1.7babe4p-7 (RZ)
83	{.input: `0xbba5'da18U`, .rnd_towardzero_result: `0xbc3d'd5f2U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
84	// x = -0x1.776cc8p-8, exp10m1f(x) = -0x1.ad62c4p-7 (RZ)
85	{.input: `0xbbbb'b664U`, .rnd_towardzero_result: `0xbc56'b162U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
86	// x = -0x1.f024cp-8, exp10m1f(x) = -0x1.1b20d6p-6 (RZ)
87	{.input: `0xbbf8'1260U`, .rnd_towardzero_result: `0xbc8d'906bU`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
88	// x = -0x1.f510eep-8, exp10m1f(x) = -0x1.1de9aap-6 (RZ)
89	{.input: `0xbbfa'8877U`, .rnd_towardzero_result: `0xbc8e'f4d5U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
90	// x = -0x1.0b43c4p-7, exp10m1f(x) = -0x1.30d418p-6 (RZ)
91	{.input: `0xbc05'a1e2U`, .rnd_towardzero_result: `0xbc98'6a0cU`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
92	// x = -0x1.245ee4p-7, exp10m1f(x) = -0x1.4d2b86p-6 (RZ)
93	{.input: `0xbc12'2f72U`, .rnd_towardzero_result: `0xbca6'95c3U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
94	// x = -0x1.f9f2dap-7, exp10m1f(x) = -0x1.1e2186p-5 (RZ)
95	{.input: `0xbc7c'f96dU`, .rnd_towardzero_result: `0xbd0f'10c3U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
96	// x = -0x1.08e42p-6, exp10m1f(x) = -0x1.2b5c4p-5 (RZ)
97	{.input: `0xbc84'7210U`, .rnd_towardzero_result: `0xbd15'ae20U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
98	// x = -0x1.0cdc44p-5, exp10m1f(x) = -0x1.2a2152p-4 (RZ)
99	{.input: `0xbd06'6e22U`, .rnd_towardzero_result: `0xbd95'10a9U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
100	// x = -0x1.ca4322p-5, exp10m1f(x) = -0x1.ef073p-4 (RZ)
101	{.input: `0xbd65'2191U`, .rnd_towardzero_result: `0xbdf7'8398U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
102	}};
103	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
104
105	} // namespace exp10m1f_internal
106
107	LIBC_INLINE float exp10m1f(float x) {
108	using namespace exp10m1f_internal;
109	using FPBits = fputil::FPBits<float>;
110	FPBits xbits(x);
111
112	uint32_t x_u = xbits.uintval();
113	uint32_t x_abs = x_u & `0x7fff'ffffU`;
114
115	// When x >= log10(2^128), or x is nan
116	if (LIBC_UNLIKELY(xbits.is_pos() && x_u >= `0x421a'209bU`)) {
117	if (xbits.is_finite()) {
118	#ifndef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
119	int rounding = fputil::quick_get_round();
120	if (rounding == FE_DOWNWARD \|\| rounding == FE_TOWARDZERO)
121	return FPBits::max_normal().get_val();
122	#endif
123
124	fputil::set_errno_if_required(ERANGE);
125	fputil::raise_except_if_required(FE_OVERFLOW);
126	}
127
128	// x >= log10(2^128) and 10^x - 1 rounds to +inf, or x is +inf or nan
129	return x + FPBits::inf().get_val();
130	}
131
132	// When \|x\| <= log10(2) 2^(-6)*
133	if (LIBC_UNLIKELY(x_abs <= `0x3b9a'209bU`)) {
134	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
135	if (auto r = EXP10M1F_EXCEPTS_LO.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value()))
136	return r.value();
137	#else
138	// Even if we're not checking for the misrounded cases in this interval, we
139	// must still check for -0 as input and return -0 as output, rather than +0
140	// as the code below would compute.
141	//
142	// We might as well check for both zeroes at once, in fact, since it's no
143	// slower.
144	if (LIBC_UNLIKELY(x_abs == `0`))
145	return x;
146	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
147
148	double dx = x;
149	double dx_sq = dx * dx;
150	double c0 = dx * Exp10Base::COEFFS[`0`];
151	double c1 =
152	fputil::multiply_add(x: dx, y: Exp10Base::COEFFS[`2`], z: Exp10Base::COEFFS[`1`]);
153	double c2 =
154	fputil::multiply_add(x: dx, y: Exp10Base::COEFFS[`4`], z: Exp10Base::COEFFS[`3`]);
155	// 10^dx - 1 ~ (1 + COEFFS[0] dx + ... + COEFFS[4] * dx^5) - 1*
156	// = COEFFS[0] dx + ... + COEFFS[4] * dx^5*
157	return static_cast<float>(fputil::polyeval(x: dx_sq, a0: c0, a: c1, a: c2));
158	}
159
160	// When x <= log10(2^-25), or x is nan
161	if (LIBC_UNLIKELY(x_u >= `0xc0f0d2f1`)) {
162	// exp10m1(-inf) = -1
163	if (xbits.is_inf())
164	return -`1.0f`;
165	// exp10m1(nan) = nan
166	if (xbits.is_nan())
167	return x;
168
169	#ifdef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
170	if (x_u == `0xc0f0d2f1`) // x = log10(2^-25)
171	return -`0x1.ffff'fep-1f`; // -1.0f + 0x1.0p-24f
172	#else
173	int rounding = fputil::quick_get_round();
174	if (rounding == FE_UPWARD \|\| rounding == FE_TOWARDZERO \|\|
175	(rounding == FE_TONEAREST && x_u == `0xc0f0d2f1`))
176	return -`0x1.ffff'fep-1f`; // -1.0f + 0x1.0p-24f
177	#endif
178
179	fputil::set_errno_if_required(ERANGE);
180	fputil::raise_except_if_required(FE_UNDERFLOW);
181	return -`1.0f`;
182	}
183
184	// Exact outputs when x = 1, 2, ..., 10.
185	// Quick check mask: 0x800f'ffffU = ~(bits of 1.0f \| ... \| bits of 10.0f)
186	if (LIBC_UNLIKELY((x_u & `0x800f'ffffU`) == `0`)) {
187	switch (x_u) {
188	case `0x3f800000U`: // x = 1.0f
189	return `9.0f`;
190	case `0x40000000U`: // x = 2.0f
191	return `99.0f`;
192	case `0x40400000U`: // x = 3.0f
193	return `999.0f`;
194	case `0x40800000U`: // x = 4.0f
195	return `9'999.0f`;
196	case `0x40a00000U`: // x = 5.0f
197	return `99'999.0f`;
198	case `0x40c00000U`: // x = 6.0f
199	return `999'999.0f`;
200	case `0x40e00000U`: // x = 7.0f
201	return `9'999'999.0f`;
202	case `0x41000000U`: { // x = 8.0f
203	#ifdef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
204	return `100'000'000.0f`;
205	#else
206	int rounding = fputil::quick_get_round();
207	if (rounding == FE_UPWARD \|\| rounding == FE_TONEAREST)
208	return `100'000'000.0f`;
209	#endif // LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
210	return `99'999'992.0f`;
211	}
212	case `0x41100000U`: { // x = 9.0f
213	#ifdef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
214	return `1'000'000'000.0f`;
215	#else
216	int rounding = fputil::quick_get_round();
217	if (rounding == FE_UPWARD \|\| rounding == FE_TONEAREST)
218	return `1'000'000'000.0f`;
219	#endif // LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
220	return `999'999'936.0f`;
221	}
222	case `0x41200000U`: { // x = 10.0f
223	#ifdef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
224	return `10'000'000'000.0f`;
225	#else
226	int rounding = fputil::quick_get_round();
227	if (rounding == FE_UPWARD \|\| rounding == FE_TONEAREST)
228	return `10'000'000'000.0f`;
229	#endif // LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
230	return `9'999'998'976.0f`;
231	}
232	}
233	}
234
235	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
236	if (auto r = EXP10M1F_EXCEPTS_HI.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value()))
237	return r.value();
238	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
239
240	// Range reduction: 10^x = 2^(mid + hi) 10^lo*
241	// rr = (2^(mid + hi), lo)
242	auto rr = exp_b_range_reduc<Exp10Base>(x);
243
244	// The low part is approximated by a degree-5 minimax polynomial.
245	// 10^lo ~ 1 + COEFFS[0] lo + ... + COEFFS[4] * lo^5*
246	double lo_sq = rr.lo * rr.lo;
247	double c0 = fputil::multiply_add(x: rr.lo, y: Exp10Base::COEFFS[`0`], z: `1.0`);
248	double c1 =
249	fputil::multiply_add(x: rr.lo, y: Exp10Base::COEFFS[`2`], z: Exp10Base::COEFFS[`1`]);
250	double c2 =
251	fputil::multiply_add(x: rr.lo, y: Exp10Base::COEFFS[`4`], z: Exp10Base::COEFFS[`3`]);
252	double exp10_lo = fputil::polyeval(x: lo_sq, a0: c0, a: c1, a: c2);
253	// 10^x - 1 = 2^(mid + hi) 10^lo - 1*
254	// ~ mh exp10_lo - 1*
255	return static_cast<float>(fputil::multiply_add(x: exp10_lo, y: rr.mh, z: -`1.0`));
256	}
257
258	} // namespace math
259
260	} // namespace LIBC_NAMESPACE_DECL
261
262	#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP10M1F_H
263

Browse the source code of llvm_projects/libc/src/__support/math/exp10m1f.h