cbrt.h source code [llvm_projects/libc/src/__support/math/cbrt.h]

1	//===-- Implementation header for erff --------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_CBRT_H
10	#define LLVM_LIBC_SRC___SUPPORT_MATH_CBRT_H
11
12	#include "src/__support/FPUtil/FEnvImpl.h"
13	#include "src/__support/FPUtil/FPBits.h"
14	#include "src/__support/FPUtil/PolyEval.h"
15	#include "src/__support/FPUtil/double_double.h"
16	#include "src/__support/FPUtil/dyadic_float.h"
17	#include "src/__support/FPUtil/multiply_add.h"
18	#include "src/__support/integer_literals.h"
19	#include "src/__support/macros/config.h"
20	#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
21
22	namespace LIBC_NAMESPACE_DECL {
23
24	namespace math {
25
26	#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
27	#define LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
28	#endif
29
30	namespace cbrt_internal {
31	using namespace fputil;
32
33	// Initial approximation of x^(-2/3) for 1 <= x < 2.
34	// Polynomial generated by Sollya with:
35	// > P = fpminimax(x^(-2/3), 7, [\|D...\|], [1, 2]);
36	// > dirtyinfnorm(P/x^(-2/3) - 1, [1, 2]);
37	// 0x1.28...p-21
38	LIBC_INLINE double initial_approximation(double x) {
39	constexpr double COEFFS[`8`] = {
40	`0x1.bc52aedead5c6p1`, -`0x1.b52bfebf110b3p2`, `0x1.1d8d71d53d126p3`,
41	-`0x1.de2db9e81cf87p2`, `0x1.0154ca06153bdp2`, -`0x1.5973c66ee6da7p0`,
42	`0x1.07bf6ac832552p-2`, -`0x1.5e53d9ce41cb8p-6`,
43	};
44
45	double x_sq = x * x;
46
47	double c0 = fputil::multiply_add(x, y: COEFFS[`1`], z: COEFFS[`0`]);
48	double c1 = fputil::multiply_add(x, y: COEFFS[`3`], z: COEFFS[`2`]);
49	double c2 = fputil::multiply_add(x, y: COEFFS[`5`], z: COEFFS[`4`]);
50	double c3 = fputil::multiply_add(x, y: COEFFS[`7`], z: COEFFS[`6`]);
51
52	double x_4 = x_sq * x_sq;
53	double d0 = fputil::multiply_add(x: x_sq, y: c1, z: c0);
54	double d1 = fputil::multiply_add(x: x_sq, y: c3, z: c2);
55
56	return fputil::multiply_add(x: x_4, y: d1, z: d0);
57	}
58
59	// Get the error term for Newton iteration:
60	// h(x) = x^3 a^2 - 1,*
61	#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
62	LIBC_INLINE double get_error(const DoubleDouble &x_3,
63	const DoubleDouble &a_sq) {
64	return fputil::multiply_add(x_3.hi, a_sq.hi, -`1.0`) +
65	fputil::multiply_add(x_3.lo, a_sq.hi, x_3.hi * a_sq.lo);
66	}
67	#else
68	LIBC_INLINE constexpr double get_error(const DoubleDouble &x_3,
69	const DoubleDouble &a_sq) {
70	DoubleDouble x_3_a_sq = fputil::quick_mult(a: a_sq, b: x_3);
71	return (x_3_a_sq.hi - `1.0`) + x_3_a_sq.lo;
72	}
73	#endif
74
75	} // namespace cbrt_internal
76
77	// Correctly rounded cbrt algorithm:
78	//
79	// === Step 1 - Range reduction ===
80	// For x = (-1)^s 2^e * (1.m), we get 2 reduced arguments x_r and a as:*
81	// x_r = 1.m
82	// a = (-1)^s 2^(e % 3) * (1.m)*
83	// Then cbrt(x) = x^(1/3) can be computed as:
84	// x^(1/3) = 2^(e / 3) a^(1/3).*
85	//
86	// In order to avoid division, we compute a^(-2/3) using Newton method and then
87	// multiply the results by a:
88	// a^(1/3) = a a^(-2/3).*
89	//
90	// === Step 2 - First approximation to a^(-2/3) ===
91	// First, we use a degree-7 minimax polynomial generated by Sollya to
92	// approximate x_r^(-2/3) for 1 <= x_r < 2.
93	// p = P(x_r) ~ x_r^(-2/3),
94	// with relative errors bounded by:
95	// \| p / x_r^(-2/3) - 1 \| < 1.16 2^-21.*
96	//
97	// Then we multiply with 2^(e % 3) from a small lookup table to get:
98	// x_0 = 2^(-2(e % 3)/3) * p*
99	// ~ 2^(-2(e % 3)/3) * x_r^(-2/3)*
100	// = a^(-2/3)
101	// With relative errors:
102	// \| x_0 / a^(-2/3) - 1 \| < 1.16 2^-21.*
103	// This step is done in double precision.
104	//
105	// === Step 3 - First Newton iteration ===
106	// We follow the method described in:
107	// Sibidanov, A. and Zimmermann, P., "Correctly rounded cubic root evaluation
108	// in double precision", https://core-math.gitlabpages.inria.fr/cbrt64.pdf
109	// to derive multiplicative Newton iterations as below:
110	// Let x_n be the nth approximation to a^(-2/3). Define the n^th error as:
111	// h_n = x_n^3 a^2 - 1*
112	// Then:
113	// a^(-2/3) = x_n / (1 + h_n)^(1/3)
114	// = x_n (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3 + ...)*
115	// using the Taylor series expansion of (1 + h_n)^(-1/3).
116	//
117	// Apply to x_0 above:
118	// h_0 = x_0^3 a^2 - 1*
119	// = a^2 (x_0 - a^(-2/3)) * (x_0^2 + x_0 * a^(-2/3) + a^(-4/3)),*
120	// it's bounded by:
121	// \|h_0\| < 4 3 * 1.16 * 2^-21 * 4 < 2^-17.*
122	// So in the first iteration step, we use:
123	// x_1 = x_0 (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3)*
124	// Its relative error is bounded by:
125	// \| x_1 / a^(-2/3) - 1 \| < 35/242 \|h_0\|^4 < 2^-70.*
126	// Then we perform Ziv's rounding test and check if the answer is exact.
127	// This step is done in double-double precision.
128	//
129	// === Step 4 - Second Newton iteration ===
130	// If the Ziv's rounding test from the previous step fails, we define the error
131	// term:
132	// h_1 = x_1^3 a^2 - 1,*
133	// And perform another iteration:
134	// x_2 = x_1 (1 - h_1 / 3)*
135	// with the relative errors exceed the precision of double-double.
136	// We then check the Ziv's accuracy test with relative errors < 2^-102 to
137	// compensate for rounding errors.
138	//
139	// === Step 5 - Final iteration ===
140	// If the Ziv's accuracy test from the previous step fails, we perform another
141	// iteration in 128-bit precision and check for exact outputs.
142	//
143	// TODO: It is possible to replace this costly computation step with special
144	// exceptional handling, similar to what was done in the CORE-MATH project:
145	// https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/cbrt/cbrt.c
146
147	LIBC_INLINE double cbrt(double x) {
148	using DoubleDouble = fputil::DoubleDouble;
149	using namespace cbrt_internal;
150	using FPBits = fputil::FPBits<double>;
151
152	uint64_t x_abs = FPBits (x).abs().uintval();
153
154	unsigned exp_bias_correction = `682`; // 1023 2/3*
155
156	if (LIBC_UNLIKELY(x_abs < FPBits::min_normal().uintval() \|\|
157	x_abs >= FPBits::inf().uintval())) {
158	if (x == `0.0` \|\| x_abs >= FPBits::inf().uintval())
159	// x is 0, Inf, or NaN.
160	// Make sure it works for FTZ/DAZ modes.
161	return static_cast<double>(x + x);
162
163	// x is non-zero denormal number.
164	// Normalize x.
165	x *= `0x1.0p60`;
166	exp_bias_correction -= `20`;
167	}
168
169	FPBits x_bits(x);
170
171	// When using biased exponent of x in double precision,
172	// x_e = real_exponent_of_x + 1023
173	// Then:
174	// x_e / 3 = real_exponent_of_x / 3 + 1023/3
175	// = real_exponent_of_x / 3 + 341
176	// So to make it the correct biased exponent of x^(1/3), we add
177	// 1023 - 341 = 682
178	// to the quotient x_e / 3.
179	unsigned x_e = static_cast<unsigned>(x_bits.get_biased_exponent());
180	unsigned out_e = (x_e / `3` + exp_bias_correction);
181	unsigned shift_e = x_e % `3`;
182
183	// Set x_r = 1.mantissa
184	double x_r =
185	FPBits (x_bits.get_mantissa() \|
186	(static_cast<uint64_t>(FPBits::EXP_BIAS) << FPBits::FRACTION_LEN))
187	.get_val();
188
189	// Set a = (-1)^x_sign 2^(x_e % 3) * (1.mantissa)*
190	uint64_t a_bits = x_bits.uintval() & `0x800F'FFFF'FFFF'FFFF`;
191	a_bits \|=
192	(static_cast<uint64_t>(shift_e + static_cast<unsigned>(FPBits::EXP_BIAS))
193	<< FPBits::FRACTION_LEN);
194	double a = FPBits (a_bits).get_val();
195
196	// Initial approximation of x_r^(-2/3).
197	double p = initial_approximation(x: x_r);
198
199	// Look up for 2^(-2n/3) used for first approximation step.*
200	constexpr double EXP2_M2_OVER_3[`3`] = {`1.0`, `0x1.428a2f98d728bp-1`,
201	`0x1.965fea53d6e3dp-2`};
202
203	// x0 is an initial approximation of a^(-2/3) for 1 <= \|a\| < 8.
204	// Relative error: < 1.16 2^(-21).*
205	double x0 = static_cast<double>(EXP2_M2_OVER_3[shift_e] * p);
206
207	// First iteration in double precision.
208	DoubleDouble a_sq = fputil::exact_mult(a, b: a);
209
210	// h0 = x0^3 a^2 - 1*
211	DoubleDouble x0_sq = fputil::exact_mult(a: x0, b: x0);
212	DoubleDouble x0_3 = fputil::quick_mult(a: x0, b: x0_sq);
213
214	double h0 = get_error(x_3: x0_3, a_sq);
215
216	#ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
217	constexpr double REL_ERROR = `0`;
218	#else
219	constexpr double REL_ERROR = `0x1.0p-51`;
220	#endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
221
222	// Taylor polynomial of (1 + h)^(-1/3):
223	// (1 + h)^(-1/3) = 1 - h/3 + 2 h^2 / 9 - 14 h^3 / 81 + ...
224	constexpr double ERR_COEFFS[`3`] = {
225	-`0x1.5555555555555p-2` - REL_ERROR, // -1/3 - relative_error
226	`0x1.c71c71c71c71cp-3`, // 2/9
227	-`0x1.61f9add3c0ca4p-3`, // -14/81
228	};
229	// e0 = -14 h^2 / 81 + 2 * h / 9 - 1/3 - relative_error.*
230	double e0 = fputil::polyeval(x: h0, a0: ERR_COEFFS[`0`], a: ERR_COEFFS[`1`], a: ERR_COEFFS[`2`]);
231	double x0_h0 = x0 * h0;
232
233	// x1 = x0 (1 - h0/3 + 2 h0^2 / 9 - 14 h0^3 / 81)
234	// x1 approximate a^(-2/3) with relative errors bounded by:
235	// \| x1 / a^(-2/3) - 1 \| < (34/243) h0^4 < h0 REL_ERROR*
236	DoubleDouble x1_dd{.lo: x0_h0 * e0, .hi: x0};
237
238	// r1 = x1 a ~ a^(-2/3) * a = a^(1/3).*
239	DoubleDouble r1 = fputil::quick_mult(a, b: x1_dd);
240
241	// Lambda function to update the exponent of the result.
242	auto update_exponent = [=](double r) -> double {
243	uint64_t r_m = FPBits (r).uintval() - `0x3FF0'0000'0000'0000`;
244	// Adjust exponent and sign.
245	uint64_t r_bits =
246	r_m + (static_cast<uint64_t>(out_e) << FPBits::FRACTION_LEN);
247	return FPBits (r_bits).get_val();
248	};
249
250	#ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
251	// TODO: We probably don't need to use double-double if accurate tests and
252	// passes are skipped.
253	return update_exponent(r1.hi + r1.lo);
254	#else
255	// Accurate checks and passes.
256	double r1_lower = r1.hi + r1.lo;
257	double r1_upper =
258	r1.hi + fputil::multiply_add(x: x0_h0, y: `2.0` * REL_ERROR * a, z: r1.lo);
259
260	// Ziv's accuracy test.
261	if (LIBC_LIKELY(r1_upper == r1_lower)) {
262	// Test for exact outputs.
263	// Check if lower (52 - 17 = 35) bits are 0's.
264	if (LIBC_UNLIKELY((FPBits (r1_lower).uintval() & `0x0000'0007'FFFF'FFFF`) ==
265	`0`)) {
266	double r1_err = (r1_lower - r1.hi) - r1.lo;
267	if (FPBits (r1_err).abs().get_val() < `0x1.0p69`)
268	fputil::clear_except_if_required(FE_INEXACT);
269	}
270
271	return update_exponent(r1_lower);
272	}
273
274	// Accuracy test failed, perform another Newton iteration.
275	double x1 = x1_dd.hi + (e0 + REL_ERROR) * x0_h0;
276
277	// Second iteration in double-double precision.
278	// h1 = x1^3 a^2 - 1.*
279	DoubleDouble x1_sq = fputil::exact_mult(a: x1, b: x1);
280	DoubleDouble x1_3 = fputil::quick_mult(a: x1, b: x1_sq);
281	double h1 = get_error(x_3: x1_3, a_sq);
282
283	// e1 = -x1h1/3.*
284	double e1 = h1 * (x1 * -`0x1.5555555555555p-2`);
285	// x2 = x1(1 - h1/3) = x1 + e1 ~ a^(-2/3) with relative errors < 2^-101.*
286	DoubleDouble x2 = fputil::exact_add(a: x1, b: e1);
287	// r2 = a x2 ~ a * a^(-2/3) = a^(1/3) with relative errors < 2^-100.*
288	DoubleDouble r2 = fputil::quick_mult(a, b: x2);
289
290	double r2_upper = r2.hi + fputil::multiply_add(x: a, y: `0x1.0p-102`, z: r2.lo);
291	double r2_lower = r2.hi + fputil::multiply_add(x: a, y: -`0x1.0p-102`, z: r2.lo);
292
293	// Ziv's accuracy test.
294	if (LIBC_LIKELY(r2_upper == r2_lower))
295	return update_exponent(r2_upper);
296
297	using DFloat128 = fputil::DyadicFloat<`128`>;
298
299	// TODO: Investigate removing float128 and just list exceptional cases.
300	// Apply another Newton iteration with ~126-bit accuracy.
301	DFloat128 x2_f128 = fputil::quick_add(a: DFloat128 (x2.hi), b: DFloat128 (x2.lo));
302	// x2^3
303	DFloat128 x2_3 =
304	fputil::quick_mul(a: fputil::quick_mul(a: x2_f128, b: x2_f128), b: x2_f128);
305	// a^2
306	DFloat128 a_sq_f128 = fputil::quick_mul(a: DFloat128 (a), b: DFloat128 (a));
307	// x2^3 a^2*
308	DFloat128 x2_3_a_sq = fputil::quick_mul(a: x2_3, b: a_sq_f128);
309	// h2 = x2^3 a^2 - 1*
310	DFloat128 h2_f128 = fputil::quick_add(a: x2_3_a_sq, b: DFloat128 (-`1.0`));
311	double h2 = static_cast<double>(h2_f128);
312	// t2 = 1 - h2 / 3
313	DFloat128 t2 = fputil::quick_add(a: DFloat128 (`1.0`),
314	b: DFloat128 (h2 * (-`0x1.5555555555555p-2`)));
315	// x3 = x2 (1 - h2 / 3) ~ a^(-2/3)*
316	DFloat128 x3 = fputil::quick_mul(a: x2_f128, b: t2);
317	// r3 = a x3 ~ a * a^(-2/3) = a^(1/3)*
318	DFloat128 r3 = fputil::quick_mul(a: DFloat128 (a), b: x3);
319
320	// Check for exact cases:
321	DFloat128::MantissaType rounding_bits =
322	r3.mantissa & `0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFFF_u128`;
323
324	double result = static_cast<double>(r3);
325	if ((rounding_bits < `0x0000'0000'0000'0000'0000'0000'0000'000F_u128`) \|\|
326	(rounding_bits >= `0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128`)) {
327	// Output is exact.
328	r3.mantissa &= `0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFF0_u128`;
329
330	if (rounding_bits >= `0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128`) {
331	DFloat128 tmp{r3.sign, r3.exponent - `123`,
332	`0x8000'0000'0000'0000'0000'0000'0000'0000_u128`};
333	DFloat128 r4 = fputil::quick_add(a: r3, b: tmp);
334	result = static_cast<double>(r4);
335	} else {
336	result = static_cast<double>(r3);
337	}
338
339	fputil::clear_except_if_required(FE_INEXACT);
340	}
341
342	return update_exponent(result);
343	#endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
344	}
345
346	} // namespace math
347
348	} // namespace LIBC_NAMESPACE_DECL
349
350	#endif // LLVM_LIBC_SRC___SUPPORT_MATH_CBRT_H
351

Browse the source code of llvm_projects/libc/src/__support/math/cbrt.h