1//===-- Implementation header for erff --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_CBRT_H
10#define LLVM_LIBC_SRC___SUPPORT_MATH_CBRT_H
11
12#include "src/__support/FPUtil/FEnvImpl.h"
13#include "src/__support/FPUtil/FPBits.h"
14#include "src/__support/FPUtil/PolyEval.h"
15#include "src/__support/FPUtil/double_double.h"
16#include "src/__support/FPUtil/dyadic_float.h"
17#include "src/__support/FPUtil/multiply_add.h"
18#include "src/__support/integer_literals.h"
19#include "src/__support/macros/config.h"
20#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
21
22namespace LIBC_NAMESPACE_DECL {
23
24namespace math {
25
26#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
27#define LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
28#endif
29
30namespace cbrt_internal {
31using namespace fputil;
32
33// Initial approximation of x^(-2/3) for 1 <= x < 2.
34// Polynomial generated by Sollya with:
35// > P = fpminimax(x^(-2/3), 7, [|D...|], [1, 2]);
36// > dirtyinfnorm(P/x^(-2/3) - 1, [1, 2]);
37// 0x1.28...p-21
38LIBC_INLINE double initial_approximation(double x) {
39 constexpr double COEFFS[8] = {
40 0x1.bc52aedead5c6p1, -0x1.b52bfebf110b3p2, 0x1.1d8d71d53d126p3,
41 -0x1.de2db9e81cf87p2, 0x1.0154ca06153bdp2, -0x1.5973c66ee6da7p0,
42 0x1.07bf6ac832552p-2, -0x1.5e53d9ce41cb8p-6,
43 };
44
45 double x_sq = x * x;
46
47 double c0 = fputil::multiply_add(x, y: COEFFS[1], z: COEFFS[0]);
48 double c1 = fputil::multiply_add(x, y: COEFFS[3], z: COEFFS[2]);
49 double c2 = fputil::multiply_add(x, y: COEFFS[5], z: COEFFS[4]);
50 double c3 = fputil::multiply_add(x, y: COEFFS[7], z: COEFFS[6]);
51
52 double x_4 = x_sq * x_sq;
53 double d0 = fputil::multiply_add(x: x_sq, y: c1, z: c0);
54 double d1 = fputil::multiply_add(x: x_sq, y: c3, z: c2);
55
56 return fputil::multiply_add(x: x_4, y: d1, z: d0);
57}
58
59// Get the error term for Newton iteration:
60// h(x) = x^3 * a^2 - 1,
61#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
62LIBC_INLINE double get_error(const DoubleDouble &x_3,
63 const DoubleDouble &a_sq) {
64 return fputil::multiply_add(x_3.hi, a_sq.hi, -1.0) +
65 fputil::multiply_add(x_3.lo, a_sq.hi, x_3.hi * a_sq.lo);
66}
67#else
68LIBC_INLINE constexpr double get_error(const DoubleDouble &x_3,
69 const DoubleDouble &a_sq) {
70 DoubleDouble x_3_a_sq = fputil::quick_mult(a: a_sq, b: x_3);
71 return (x_3_a_sq.hi - 1.0) + x_3_a_sq.lo;
72}
73#endif
74
75} // namespace cbrt_internal
76
77// Correctly rounded cbrt algorithm:
78//
79// === Step 1 - Range reduction ===
80// For x = (-1)^s * 2^e * (1.m), we get 2 reduced arguments x_r and a as:
81// x_r = 1.m
82// a = (-1)^s * 2^(e % 3) * (1.m)
83// Then cbrt(x) = x^(1/3) can be computed as:
84// x^(1/3) = 2^(e / 3) * a^(1/3).
85//
86// In order to avoid division, we compute a^(-2/3) using Newton method and then
87// multiply the results by a:
88// a^(1/3) = a * a^(-2/3).
89//
90// === Step 2 - First approximation to a^(-2/3) ===
91// First, we use a degree-7 minimax polynomial generated by Sollya to
92// approximate x_r^(-2/3) for 1 <= x_r < 2.
93// p = P(x_r) ~ x_r^(-2/3),
94// with relative errors bounded by:
95// | p / x_r^(-2/3) - 1 | < 1.16 * 2^-21.
96//
97// Then we multiply with 2^(e % 3) from a small lookup table to get:
98// x_0 = 2^(-2*(e % 3)/3) * p
99// ~ 2^(-2*(e % 3)/3) * x_r^(-2/3)
100// = a^(-2/3)
101// With relative errors:
102// | x_0 / a^(-2/3) - 1 | < 1.16 * 2^-21.
103// This step is done in double precision.
104//
105// === Step 3 - First Newton iteration ===
106// We follow the method described in:
107// Sibidanov, A. and Zimmermann, P., "Correctly rounded cubic root evaluation
108// in double precision", https://core-math.gitlabpages.inria.fr/cbrt64.pdf
109// to derive multiplicative Newton iterations as below:
110// Let x_n be the nth approximation to a^(-2/3). Define the n^th error as:
111// h_n = x_n^3 * a^2 - 1
112// Then:
113// a^(-2/3) = x_n / (1 + h_n)^(1/3)
114// = x_n * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3 + ...)
115// using the Taylor series expansion of (1 + h_n)^(-1/3).
116//
117// Apply to x_0 above:
118// h_0 = x_0^3 * a^2 - 1
119// = a^2 * (x_0 - a^(-2/3)) * (x_0^2 + x_0 * a^(-2/3) + a^(-4/3)),
120// it's bounded by:
121// |h_0| < 4 * 3 * 1.16 * 2^-21 * 4 < 2^-17.
122// So in the first iteration step, we use:
123// x_1 = x_0 * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3)
124// Its relative error is bounded by:
125// | x_1 / a^(-2/3) - 1 | < 35/242 * |h_0|^4 < 2^-70.
126// Then we perform Ziv's rounding test and check if the answer is exact.
127// This step is done in double-double precision.
128//
129// === Step 4 - Second Newton iteration ===
130// If the Ziv's rounding test from the previous step fails, we define the error
131// term:
132// h_1 = x_1^3 * a^2 - 1,
133// And perform another iteration:
134// x_2 = x_1 * (1 - h_1 / 3)
135// with the relative errors exceed the precision of double-double.
136// We then check the Ziv's accuracy test with relative errors < 2^-102 to
137// compensate for rounding errors.
138//
139// === Step 5 - Final iteration ===
140// If the Ziv's accuracy test from the previous step fails, we perform another
141// iteration in 128-bit precision and check for exact outputs.
142//
143// TODO: It is possible to replace this costly computation step with special
144// exceptional handling, similar to what was done in the CORE-MATH project:
145// https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/cbrt/cbrt.c
146
147LIBC_INLINE double cbrt(double x) {
148 using DoubleDouble = fputil::DoubleDouble;
149 using namespace cbrt_internal;
150 using FPBits = fputil::FPBits<double>;
151
152 uint64_t x_abs = FPBits(x).abs().uintval();
153
154 unsigned exp_bias_correction = 682; // 1023 * 2/3
155
156 if (LIBC_UNLIKELY(x_abs < FPBits::min_normal().uintval() ||
157 x_abs >= FPBits::inf().uintval())) {
158 if (x == 0.0 || x_abs >= FPBits::inf().uintval())
159 // x is 0, Inf, or NaN.
160 // Make sure it works for FTZ/DAZ modes.
161 return static_cast<double>(x + x);
162
163 // x is non-zero denormal number.
164 // Normalize x.
165 x *= 0x1.0p60;
166 exp_bias_correction -= 20;
167 }
168
169 FPBits x_bits(x);
170
171 // When using biased exponent of x in double precision,
172 // x_e = real_exponent_of_x + 1023
173 // Then:
174 // x_e / 3 = real_exponent_of_x / 3 + 1023/3
175 // = real_exponent_of_x / 3 + 341
176 // So to make it the correct biased exponent of x^(1/3), we add
177 // 1023 - 341 = 682
178 // to the quotient x_e / 3.
179 unsigned x_e = static_cast<unsigned>(x_bits.get_biased_exponent());
180 unsigned out_e = (x_e / 3 + exp_bias_correction);
181 unsigned shift_e = x_e % 3;
182
183 // Set x_r = 1.mantissa
184 double x_r =
185 FPBits(x_bits.get_mantissa() |
186 (static_cast<uint64_t>(FPBits::EXP_BIAS) << FPBits::FRACTION_LEN))
187 .get_val();
188
189 // Set a = (-1)^x_sign * 2^(x_e % 3) * (1.mantissa)
190 uint64_t a_bits = x_bits.uintval() & 0x800F'FFFF'FFFF'FFFF;
191 a_bits |=
192 (static_cast<uint64_t>(shift_e + static_cast<unsigned>(FPBits::EXP_BIAS))
193 << FPBits::FRACTION_LEN);
194 double a = FPBits(a_bits).get_val();
195
196 // Initial approximation of x_r^(-2/3).
197 double p = initial_approximation(x: x_r);
198
199 // Look up for 2^(-2*n/3) used for first approximation step.
200 constexpr double EXP2_M2_OVER_3[3] = {1.0, 0x1.428a2f98d728bp-1,
201 0x1.965fea53d6e3dp-2};
202
203 // x0 is an initial approximation of a^(-2/3) for 1 <= |a| < 8.
204 // Relative error: < 1.16 * 2^(-21).
205 double x0 = static_cast<double>(EXP2_M2_OVER_3[shift_e] * p);
206
207 // First iteration in double precision.
208 DoubleDouble a_sq = fputil::exact_mult(a, b: a);
209
210 // h0 = x0^3 * a^2 - 1
211 DoubleDouble x0_sq = fputil::exact_mult(a: x0, b: x0);
212 DoubleDouble x0_3 = fputil::quick_mult(a: x0, b: x0_sq);
213
214 double h0 = get_error(x_3: x0_3, a_sq);
215
216#ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
217 constexpr double REL_ERROR = 0;
218#else
219 constexpr double REL_ERROR = 0x1.0p-51;
220#endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
221
222 // Taylor polynomial of (1 + h)^(-1/3):
223 // (1 + h)^(-1/3) = 1 - h/3 + 2 h^2 / 9 - 14 h^3 / 81 + ...
224 constexpr double ERR_COEFFS[3] = {
225 -0x1.5555555555555p-2 - REL_ERROR, // -1/3 - relative_error
226 0x1.c71c71c71c71cp-3, // 2/9
227 -0x1.61f9add3c0ca4p-3, // -14/81
228 };
229 // e0 = -14 * h^2 / 81 + 2 * h / 9 - 1/3 - relative_error.
230 double e0 = fputil::polyeval(x: h0, a0: ERR_COEFFS[0], a: ERR_COEFFS[1], a: ERR_COEFFS[2]);
231 double x0_h0 = x0 * h0;
232
233 // x1 = x0 (1 - h0/3 + 2 h0^2 / 9 - 14 h0^3 / 81)
234 // x1 approximate a^(-2/3) with relative errors bounded by:
235 // | x1 / a^(-2/3) - 1 | < (34/243) h0^4 < h0 * REL_ERROR
236 DoubleDouble x1_dd{.lo: x0_h0 * e0, .hi: x0};
237
238 // r1 = x1 * a ~ a^(-2/3) * a = a^(1/3).
239 DoubleDouble r1 = fputil::quick_mult(a, b: x1_dd);
240
241 // Lambda function to update the exponent of the result.
242 auto update_exponent = [=](double r) -> double {
243 uint64_t r_m = FPBits(r).uintval() - 0x3FF0'0000'0000'0000;
244 // Adjust exponent and sign.
245 uint64_t r_bits =
246 r_m + (static_cast<uint64_t>(out_e) << FPBits::FRACTION_LEN);
247 return FPBits(r_bits).get_val();
248 };
249
250#ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
251 // TODO: We probably don't need to use double-double if accurate tests and
252 // passes are skipped.
253 return update_exponent(r1.hi + r1.lo);
254#else
255 // Accurate checks and passes.
256 double r1_lower = r1.hi + r1.lo;
257 double r1_upper =
258 r1.hi + fputil::multiply_add(x: x0_h0, y: 2.0 * REL_ERROR * a, z: r1.lo);
259
260 // Ziv's accuracy test.
261 if (LIBC_LIKELY(r1_upper == r1_lower)) {
262 // Test for exact outputs.
263 // Check if lower (52 - 17 = 35) bits are 0's.
264 if (LIBC_UNLIKELY((FPBits(r1_lower).uintval() & 0x0000'0007'FFFF'FFFF) ==
265 0)) {
266 double r1_err = (r1_lower - r1.hi) - r1.lo;
267 if (FPBits(r1_err).abs().get_val() < 0x1.0p69)
268 fputil::clear_except_if_required(FE_INEXACT);
269 }
270
271 return update_exponent(r1_lower);
272 }
273
274 // Accuracy test failed, perform another Newton iteration.
275 double x1 = x1_dd.hi + (e0 + REL_ERROR) * x0_h0;
276
277 // Second iteration in double-double precision.
278 // h1 = x1^3 * a^2 - 1.
279 DoubleDouble x1_sq = fputil::exact_mult(a: x1, b: x1);
280 DoubleDouble x1_3 = fputil::quick_mult(a: x1, b: x1_sq);
281 double h1 = get_error(x_3: x1_3, a_sq);
282
283 // e1 = -x1*h1/3.
284 double e1 = h1 * (x1 * -0x1.5555555555555p-2);
285 // x2 = x1*(1 - h1/3) = x1 + e1 ~ a^(-2/3) with relative errors < 2^-101.
286 DoubleDouble x2 = fputil::exact_add(a: x1, b: e1);
287 // r2 = a * x2 ~ a * a^(-2/3) = a^(1/3) with relative errors < 2^-100.
288 DoubleDouble r2 = fputil::quick_mult(a, b: x2);
289
290 double r2_upper = r2.hi + fputil::multiply_add(x: a, y: 0x1.0p-102, z: r2.lo);
291 double r2_lower = r2.hi + fputil::multiply_add(x: a, y: -0x1.0p-102, z: r2.lo);
292
293 // Ziv's accuracy test.
294 if (LIBC_LIKELY(r2_upper == r2_lower))
295 return update_exponent(r2_upper);
296
297 using DFloat128 = fputil::DyadicFloat<128>;
298
299 // TODO: Investigate removing float128 and just list exceptional cases.
300 // Apply another Newton iteration with ~126-bit accuracy.
301 DFloat128 x2_f128 = fputil::quick_add(a: DFloat128(x2.hi), b: DFloat128(x2.lo));
302 // x2^3
303 DFloat128 x2_3 =
304 fputil::quick_mul(a: fputil::quick_mul(a: x2_f128, b: x2_f128), b: x2_f128);
305 // a^2
306 DFloat128 a_sq_f128 = fputil::quick_mul(a: DFloat128(a), b: DFloat128(a));
307 // x2^3 * a^2
308 DFloat128 x2_3_a_sq = fputil::quick_mul(a: x2_3, b: a_sq_f128);
309 // h2 = x2^3 * a^2 - 1
310 DFloat128 h2_f128 = fputil::quick_add(a: x2_3_a_sq, b: DFloat128(-1.0));
311 double h2 = static_cast<double>(h2_f128);
312 // t2 = 1 - h2 / 3
313 DFloat128 t2 = fputil::quick_add(a: DFloat128(1.0),
314 b: DFloat128(h2 * (-0x1.5555555555555p-2)));
315 // x3 = x2 * (1 - h2 / 3) ~ a^(-2/3)
316 DFloat128 x3 = fputil::quick_mul(a: x2_f128, b: t2);
317 // r3 = a * x3 ~ a * a^(-2/3) = a^(1/3)
318 DFloat128 r3 = fputil::quick_mul(a: DFloat128(a), b: x3);
319
320 // Check for exact cases:
321 DFloat128::MantissaType rounding_bits =
322 r3.mantissa & 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFFF_u128;
323
324 double result = static_cast<double>(r3);
325 if ((rounding_bits < 0x0000'0000'0000'0000'0000'0000'0000'000F_u128) ||
326 (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128)) {
327 // Output is exact.
328 r3.mantissa &= 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFF0_u128;
329
330 if (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128) {
331 DFloat128 tmp{r3.sign, r3.exponent - 123,
332 0x8000'0000'0000'0000'0000'0000'0000'0000_u128};
333 DFloat128 r4 = fputil::quick_add(a: r3, b: tmp);
334 result = static_cast<double>(r4);
335 } else {
336 result = static_cast<double>(r3);
337 }
338
339 fputil::clear_except_if_required(FE_INEXACT);
340 }
341
342 return update_exponent(result);
343#endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS
344}
345
346} // namespace math
347
348} // namespace LIBC_NAMESPACE_DECL
349
350#endif // LLVM_LIBC_SRC___SUPPORT_MATH_CBRT_H
351