exp2m1f.h source code [llvm_projects/libc/src/__support/math/exp2m1f.h]

1	//===-- Implementation header for exp2m1f ------------------------- C++--===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F_H
10	#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F_H
11
12	#include "exp10f_utils.h"
13	#include "src/__support/FPUtil/FEnvImpl.h"
14	#include "src/__support/FPUtil/FPBits.h"
15	#include "src/__support/FPUtil/PolyEval.h"
16	#include "src/__support/FPUtil/except_value_utils.h"
17	#include "src/__support/FPUtil/multiply_add.h"
18	#include "src/__support/FPUtil/rounding_mode.h"
19	#include "src/__support/common.h"
20	#include "src/__support/libc_errno.h"
21	#include "src/__support/macros/config.h"
22	#include "src/__support/macros/optimization.h"
23	#include "src/__support/macros/properties/cpu_features.h"
24
25	namespace LIBC_NAMESPACE_DECL {
26
27	namespace math {
28
29	LIBC_INLINE float exp2m1f(float x) {
30	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
31	constexpr size_t N_EXCEPTS_LO = `8`;
32
33	constexpr fputil::ExceptValues<float, N_EXCEPTS_LO> EXP2M1F_EXCEPTS_LO = {.values: {
34	// (input, RZ output, RU offset, RD offset, RN offset)
35	// x = 0x1.36dc8ep-36, exp2m1f(x) = 0x1.aef212p-37 (RZ)
36	{.input: `0x2d9b'6e47U`, .rnd_towardzero_result: `0x2d57'7909U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
37	// x = 0x1.224936p-19, exp2m1f(x) = 0x1.926c0ep-20 (RZ)
38	{.input: `0x3611'249bU`, .rnd_towardzero_result: `0x35c9'3607U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
39	// x = 0x1.d16d2p-20, exp2m1f(x) = 0x1.429becp-20 (RZ)
40	{.input: `0x35e8'b690U`, .rnd_towardzero_result: `0x35a1'4df6U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
41	// x = 0x1.17949ep-14, exp2m1f(x) = 0x1.8397p-15 (RZ)
42	{.input: `0x388b'ca4fU`, .rnd_towardzero_result: `0x3841'cb80U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
43	// x = -0x1.9c3e1ep-38, exp2m1f(x) = -0x1.1dbeacp-38 (RZ)
44	{.input: `0xacce'1f0fU`, .rnd_towardzero_result: `0xac8e'df56U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
45	// x = -0x1.4d89b4p-32, exp2m1f(x) = -0x1.ce61b6p-33 (RZ)
46	{.input: `0xafa6'c4daU`, .rnd_towardzero_result: `0xaf67'30dbU`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
47	// x = -0x1.a6eac4p-10, exp2m1f(x) = -0x1.24fadap-10 (RZ)
48	{.input: `0xbad3'7562U`, .rnd_towardzero_result: `0xba92'7d6dU`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
49	// x = -0x1.e7526ep-6, exp2m1f(x) = -0x1.4e53dep-6 (RZ)
50	{.input: `0xbcf3'a937U`, .rnd_towardzero_result: `0xbca7'29efU`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
51	}};
52
53	constexpr size_t N_EXCEPTS_HI = `3`;
54
55	constexpr fputil::ExceptValues<float, N_EXCEPTS_HI> EXP2M1F_EXCEPTS_HI = {.values: {
56	// (input, RZ output, RU offset, RD offset, RN offset)
57	// x = 0x1.16a972p-1, exp2m1f(x) = 0x1.d545b2p-2 (RZ)
58	{.input: `0x3f0b'54b9U`, .rnd_towardzero_result: `0x3eea'a2d9U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
59	// x = -0x1.9f12acp-5, exp2m1f(x) = -0x1.1ab68cp-5 (RZ)
60	{.input: `0xbd4f'8956U`, .rnd_towardzero_result: `0xbd0d'5b46U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
61	// x = -0x1.de7b9cp-5, exp2m1f(x) = -0x1.4508f4p-5 (RZ)
62	{.input: `0xbd6f'3dceU`, .rnd_towardzero_result: `0xbd22'847aU`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
63	}};
64	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
65
66	using FPBits = fputil::FPBits<float>;
67	FPBits xbits(x);
68
69	uint32_t x_u = xbits.uintval();
70	uint32_t x_abs = x_u & `0x7fff'ffffU`;
71
72	// When \|x\| >= 128, or x is nan, or \|x\| <= 2^-5
73	if (LIBC_UNLIKELY(x_abs >= `0x4300'0000U` \|\| x_abs <= `0x3d00'0000U`)) {
74	// \|x\| <= 2^-5
75	if (x_abs <= `0x3d00'0000U`) {
76	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
77	if (auto r = EXP2M1F_EXCEPTS_LO.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value()))
78	return r.value();
79	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
80
81	// Minimax polynomial generated by Sollya with:
82	// > display = hexadecimal;
83	// > fpminimax((2^x - 1)/x, 5, [\|D...\|], [-2^-5, 2^-5]);
84	constexpr double COEFFS[] = {
85	`0x1.62e42fefa39f3p-1`, `0x1.ebfbdff82c57bp-3`, `0x1.c6b08d6f2d7aap-5`,
86	`0x1.3b2ab6fc92f5dp-7`, `0x1.5d897cfe27125p-10`, `0x1.43090e61e6af1p-13`};
87	double xd = x;
88	double xsq = xd * xd;
89	double c0 = fputil::multiply_add(x: xd, y: COEFFS[`1`], z: COEFFS[`0`]);
90	double c1 = fputil::multiply_add(x: xd, y: COEFFS[`3`], z: COEFFS[`2`]);
91	double c2 = fputil::multiply_add(x: xd, y: COEFFS[`5`], z: COEFFS[`4`]);
92	double p = fputil::polyeval(x: xsq, a0: c0, a: c1, a: c2);
93	return static_cast<float>(p * xd);
94	}
95
96	// x >= 128, or x is nan
97	if (xbits.is_pos()) {
98	if (xbits.is_finite()) {
99	#ifndef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
100	int rounding = fputil::quick_get_round();
101	if (rounding == FE_DOWNWARD \|\| rounding == FE_TOWARDZERO)
102	return FPBits::max_normal().get_val();
103	#endif
104
105	fputil::set_errno_if_required(ERANGE);
106	fputil::raise_except_if_required(FE_OVERFLOW);
107	}
108
109	// x >= 128 and 2^x - 1 rounds to +inf, or x is +inf or nan
110	return x + FPBits::inf().get_val();
111	}
112	}
113
114	if (LIBC_UNLIKELY(x <= -`25.0f`)) {
115	// 2^(-inf) - 1 = -1
116	if (xbits.is_inf())
117	return -`1.0f`;
118	// 2^nan - 1 = nan
119	if (xbits.is_nan())
120	return x;
121
122	#ifndef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
123	int rounding = fputil::quick_get_round();
124	if (rounding == FE_UPWARD \|\| rounding == FE_TOWARDZERO)
125	return -`0x1.ffff'fep-1f`; // -1.0f + 0x1.0p-24f
126	#endif
127
128	fputil::set_errno_if_required(ERANGE);
129	fputil::raise_except_if_required(FE_UNDERFLOW);
130	return -`1.0f`;
131	}
132
133	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
134	if (auto r = EXP2M1F_EXCEPTS_HI.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value()))
135	return r.value();
136	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
137
138	// For -25 < x < 128, to compute 2^x, we perform the following range
139	// reduction: find hi, mid, lo such that:
140	// x = hi + mid + lo, in which:
141	// hi is an integer,
142	// 0 <= mid 2^5 < 32 is an integer,*
143	// -2^(-6) <= lo <= 2^(-6).
144	// In particular,
145	// hi + mid = round(x 2^5) * 2^(-5).*
146	// Then,
147	// 2^x = 2^(hi + mid + lo) = 2^hi 2^mid * 2^lo.*
148	// 2^mid is stored in the lookup table of 32 elements.
149	// 2^lo is computed using a degree-4 minimax polynomial generated by Sollya.
150	// We perform 2^hi 2^mid by simply add hi to the exponent field of 2^mid.*
151
152	// kf = (hi + mid) 2^5 = round(x * 2^5)*
153	float kf = `0`;
154	int k = `0`;
155	#ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT
156	kf = fputil::nearest_integer(x * `32.0f`);
157	k = static_cast<int>(kf);
158	#else
159	constexpr float HALF[`2`] = {`0.5f`, -`0.5f`};
160	k = static_cast<int>(fputil::multiply_add(x, y: `32.0f`, z: HALF[x < `0.0f`]));
161	kf = static_cast<float>(k);
162	#endif // LIBC_TARGET_CPU_HAS_NEAREST_INT
163
164	// lo = x - (hi + mid) = x - kf 2^(-5)*
165	double lo = fputil::multiply_add(x: -`0x1.0p-5f`, y: kf, z: x);
166
167	// hi = floor(kf 2^(-4))*
168	// exp2_hi = shift hi to the exponent field of double precision.
169	int64_t exp2_hi =
170	static_cast<int64_t>(static_cast<uint64_t>(k >> ExpBase::MID_BITS)
171	<< fputil::FPBits<double>::FRACTION_LEN);
172	// mh = 2^hi 2^mid*
173	// mh_bits = bit field of mh
174	int64_t mh_bits = ExpBase::EXP_2_MID[k & ExpBase::MID_MASK] + exp2_hi;
175	double mh = fputil::FPBits<double>(static_cast<uint64_t>(mh_bits)).get_val();
176
177	// Degree-4 polynomial approximating (2^x - 1)/x generated by Sollya with:
178	// > display = hexadecimal;
179	// > fpminimax((2^x - 1)/x, 4, [\|D...\|], [-2^-6, 2^-6]);
180	constexpr double COEFFS[`5`] = {`0x1.62e42fefa39efp-1`, `0x1.ebfbdff8131c4p-3`,
181	`0x1.c6b08d7061695p-5`, `0x1.3b2b1bee74b2ap-7`,
182	`0x1.5d88091198529p-10`};
183	double lo_sq = lo * lo;
184	double c1 = fputil::multiply_add(x: lo, y: COEFFS[`0`], z: `1.0`);
185	double c2 = fputil::multiply_add(x: lo, y: COEFFS[`2`], z: COEFFS[`1`]);
186	double c3 = fputil::multiply_add(x: lo, y: COEFFS[`4`], z: COEFFS[`3`]);
187	double exp2_lo = fputil::polyeval(x: lo_sq, a0: c1, a: c2, a: c3);
188	// 2^x - 1 = 2^(hi + mid + lo) - 1
189	// = 2^(hi + mid) 2^lo - 1*
190	// ~ mh (1 + lo * P(lo)) - 1*
191	// = mh exp2_lo - 1*
192	return static_cast<float>(fputil::multiply_add(x: exp2_lo, y: mh, z: -`1.0`));
193	}
194
195	} // namespace math
196
197	} // namespace LIBC_NAMESPACE_DECL
198
199	#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F_H
200

Browse the source code of llvm_projects/libc/src/__support/math/exp2m1f.h