expm1f.h source code [llvm_projects/libc/src/__support/math/expm1f.h]

1	//===-- Implementation header for expm1f ------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXPM1F_H
10	#define LLVM_LIBC_SRC___SUPPORT_MATH_EXPM1F_H
11
12	#include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
13	#include "src/__support/FPUtil/BasicOperations.h"
14	#include "src/__support/FPUtil/FEnvImpl.h"
15	#include "src/__support/FPUtil/FMA.h"
16	#include "src/__support/FPUtil/FPBits.h"
17	#include "src/__support/FPUtil/PolyEval.h"
18	#include "src/__support/FPUtil/multiply_add.h"
19	#include "src/__support/FPUtil/nearest_integer.h"
20	#include "src/__support/FPUtil/rounding_mode.h"
21	#include "src/__support/common.h"
22	#include "src/__support/macros/config.h"
23	#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
24	#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
25
26	namespace LIBC_NAMESPACE_DECL {
27
28	namespace math {
29
30	LIBC_INLINE float expm1f(float x) {
31	using namespace common_constants_internal;
32	using FPBits = typename fputil::FPBits<float>;
33	FPBits xbits(x);
34
35	uint32_t x_u = xbits.uintval();
36	uint32_t x_abs = x_u & `0x7fff'ffffU`;
37
38	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
39	// Exceptional value
40	if (LIBC_UNLIKELY(x_u == `0x3e35'bec5U`)) { // x = 0x1.6b7d8ap-3f
41	#ifdef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
42	return `0x1.8dbe64p-3f`;
43	#else
44	int round_mode = fputil::quick_get_round();
45	if (round_mode == FE_TONEAREST \|\| round_mode == FE_UPWARD)
46	return `0x1.8dbe64p-3f`;
47	#endif // LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
48	return `0x1.8dbe62p-3f`;
49	}
50	#if !defined(LIBC_TARGET_CPU_HAS_FMA_DOUBLE)
51	if (LIBC_UNLIKELY(x_u == `0xbdc1'c6cbU`)) { // x = -0x1.838d96p-4f
52	#ifdef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
53	return -`0x1.71c884p-4f`;
54	#else
55	int round_mode = fputil::quick_get_round();
56	if (round_mode == FE_TONEAREST \|\| round_mode == FE_DOWNWARD)
57	return -`0x1.71c884p-4f`;
58	return -`0x1.71c882p-4f`;
59	#endif // LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
60	}
61	#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
62	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
63
64	// When \|x\| > 25log(2), or nan*
65	if (LIBC_UNLIKELY(x_abs >= `0x418a'a123U`)) {
66	// x < log(2^-25)
67	if (xbits.is_neg()) {
68	// exp(-Inf) = 0
69	if (xbits.is_inf())
70	return -`1.0f`;
71	// exp(nan) = nan
72	if (xbits.is_nan())
73	return x;
74	#ifndef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
75	int round_mode = fputil::quick_get_round();
76	if (round_mode == FE_UPWARD \|\| round_mode == FE_TOWARDZERO)
77	return -`0x1.ffff'fep-1f`; // -1.0f + 0x1.0p-24f
78	#endif
79	return -`1.0f`;
80	} else {
81	// x >= 89 or nan
82	if (xbits.uintval() >= `0x42b2'0000`) {
83	if (xbits.uintval() < `0x7f80'0000U`) {
84	#ifndef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
85	int rounding = fputil::quick_get_round();
86	if (rounding == FE_DOWNWARD \|\| rounding == FE_TOWARDZERO)
87	return FPBits::max_normal().get_val();
88	#endif
89
90	fputil::set_errno_if_required(ERANGE);
91	fputil::raise_except_if_required(FE_OVERFLOW);
92	}
93	return x + FPBits::inf().get_val();
94	}
95	}
96	}
97
98	// \|x\| < 2^-4
99	if (x_abs < `0x3d80'0000U`) {
100	// \|x\| < 2^-25
101	if (x_abs < `0x3300'0000U`) {
102	// x = -0.0f
103	if (LIBC_UNLIKELY(xbits.uintval() == `0x8000'0000U`))
104	return x;
105	// When \|x\| < 2^-25, the relative error of the approximation e^x - 1 ~ x
106	// is:
107	// \|(e^x - 1) - x\| / \|e^x - 1\| < \|x^2\| / \|x\|
108	// = \|x\|
109	// < 2^-25
110	// < epsilon(1)/2.
111	// So the correctly rounded values of expm1(x) are:
112	// = x + eps(x) if rounding mode = FE_UPWARD,
113	// or (rounding mode = FE_TOWARDZERO and x is
114	// negative),
115	// = x otherwise.
116	// To simplify the rounding decision and make it more efficient, we use
117	// fma(x, x, x) ~ x + x^2 instead.
118	// Note: to use the formula x + x^2 to decide the correct rounding, we
119	// do need fma(x, x, x) to prevent underflow caused by xx when \|x\| <*
120	// 2^-76. For targets without FMA instructions, we simply use double for
121	// intermediate results as it is more efficient than using an emulated
122	// version of FMA.
123	#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
124	return fputil::multiply_add(x, x, x);
125	#else
126	double xd = x;
127	return static_cast<float>(fputil::multiply_add(x: xd, y: xd, z: xd));
128	#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
129	}
130
131	constexpr double COEFFS[] = {`0x1p-1`,
132	`0x1.55555555557ddp-3`,
133	`0x1.55555555552fap-5`,
134	`0x1.111110fcd58b7p-7`,
135	`0x1.6c16c1717660bp-10`,
136	`0x1.a0241f0006d62p-13`,
137	`0x1.a01e3f8d3c06p-16`};
138
139	// 2^-25 <= \|x\| < 2^-4
140	double xd = static_cast<double>(x);
141	double xsq = xd * xd;
142	// Degree-8 minimax polynomial generated by Sollya with:
143	// > display = hexadecimal;
144	// > P = fpminimax((expm1(x) - x)/x^2, 6, [\|D...\|], [-2^-4, 2^-4]);
145
146	double c0 = fputil::multiply_add(x: xd, y: COEFFS[`1`], z: COEFFS[`0`]);
147	double c1 = fputil::multiply_add(x: xd, y: COEFFS[`3`], z: COEFFS[`2`]);
148	double c2 = fputil::multiply_add(x: xd, y: COEFFS[`5`], z: COEFFS[`4`]);
149
150	double r = fputil::polyeval(x: xsq, a0: c0, a: c1, a: c2, a: COEFFS[`6`]);
151	return static_cast<float>(fputil::multiply_add(x: r, y: xsq, z: xd));
152	}
153
154	// For -18 < x < 89, to compute expm1(x), we perform the following range
155	// reduction: find hi, mid, lo such that:
156	// x = hi + mid + lo, in which
157	// hi is an integer,
158	// mid 2^7 is an integer*
159	// -2^(-8) <= lo < 2^-8.
160	// In particular,
161	// hi + mid = round(x 2^7) * 2^(-7).*
162	// Then,
163	// expm1(x) = exp(hi + mid + lo) - 1 = exp(hi) exp(mid) * exp(lo) - 1.*
164	// We store exp(hi) and exp(mid) in the lookup tables EXP_M1 and EXP_M2
165	// respectively. exp(lo) is computed using a degree-4 minimax polynomial
166	// generated by Sollya.
167
168	// x_hi = hi + mid.
169	float kf = fputil::nearest_integer(x: x * `0x1.0p7f`);
170	int x_hi = static_cast<int>(kf);
171	// Subtract (hi + mid) from x to get lo.
172	double xd = static_cast<double>(fputil::multiply_add(x: kf, y: -`0x1.0p-7f`, z: x));
173	x_hi += `104` << `7`;
174	// hi = x_hi >> 7
175	double exp_hi = EXP_M1[x_hi >> `7`];
176	// lo = x_hi & 0x0000'007fU;
177	double exp_mid = EXP_M2[x_hi & `0x7f`];
178	double exp_hi_mid = exp_hi * exp_mid;
179	// Degree-4 minimax polynomial generated by Sollya with the following
180	// commands:
181	// > display = hexadecimal;
182	// > Q = fpminimax(expm1(x)/x, 3, [\|D...\|], [-2^-8, 2^-8]);
183	// > Q;
184	double exp_lo =
185	fputil::polyeval(x: xd, a0: `0x1.0p0`, a: `0x1.ffffffffff777p-1`, a: `0x1.000000000071cp-1`,
186	a: `0x1.555566668e5e7p-3`, a: `0x1.55555555ef243p-5`);
187	return static_cast<float>(fputil::multiply_add(x: exp_hi_mid, y: exp_lo, z: -`1.0`));
188	}
189
190	} // namespace math
191
192	} // namespace LIBC_NAMESPACE_DECL
193
194	#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXPM1F_H
195

Browse the source code of llvm_projects/libc/src/__support/math/expm1f.h