| 1 | //===-- Implementation header for erff --------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_LIBC_SRC___SUPPORT_MATH_CBRT_H |
| 10 | #define LLVM_LIBC_SRC___SUPPORT_MATH_CBRT_H |
| 11 | |
| 12 | #include "src/__support/FPUtil/FEnvImpl.h" |
| 13 | #include "src/__support/FPUtil/FPBits.h" |
| 14 | #include "src/__support/FPUtil/PolyEval.h" |
| 15 | #include "src/__support/FPUtil/double_double.h" |
| 16 | #include "src/__support/FPUtil/dyadic_float.h" |
| 17 | #include "src/__support/FPUtil/multiply_add.h" |
| 18 | #include "src/__support/integer_literals.h" |
| 19 | #include "src/__support/macros/config.h" |
| 20 | #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY |
| 21 | |
| 22 | namespace LIBC_NAMESPACE_DECL { |
| 23 | |
| 24 | namespace math { |
| 25 | |
| 26 | #ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS |
| 27 | #define LIBC_MATH_CBRT_SKIP_ACCURATE_PASS |
| 28 | #endif |
| 29 | |
| 30 | namespace cbrt_internal { |
| 31 | using namespace fputil; |
| 32 | |
| 33 | // Initial approximation of x^(-2/3) for 1 <= x < 2. |
| 34 | // Polynomial generated by Sollya with: |
| 35 | // > P = fpminimax(x^(-2/3), 7, [|D...|], [1, 2]); |
| 36 | // > dirtyinfnorm(P/x^(-2/3) - 1, [1, 2]); |
| 37 | // 0x1.28...p-21 |
| 38 | LIBC_INLINE double initial_approximation(double x) { |
| 39 | constexpr double COEFFS[8] = { |
| 40 | 0x1.bc52aedead5c6p1, -0x1.b52bfebf110b3p2, 0x1.1d8d71d53d126p3, |
| 41 | -0x1.de2db9e81cf87p2, 0x1.0154ca06153bdp2, -0x1.5973c66ee6da7p0, |
| 42 | 0x1.07bf6ac832552p-2, -0x1.5e53d9ce41cb8p-6, |
| 43 | }; |
| 44 | |
| 45 | double x_sq = x * x; |
| 46 | |
| 47 | double c0 = fputil::multiply_add(x, y: COEFFS[1], z: COEFFS[0]); |
| 48 | double c1 = fputil::multiply_add(x, y: COEFFS[3], z: COEFFS[2]); |
| 49 | double c2 = fputil::multiply_add(x, y: COEFFS[5], z: COEFFS[4]); |
| 50 | double c3 = fputil::multiply_add(x, y: COEFFS[7], z: COEFFS[6]); |
| 51 | |
| 52 | double x_4 = x_sq * x_sq; |
| 53 | double d0 = fputil::multiply_add(x: x_sq, y: c1, z: c0); |
| 54 | double d1 = fputil::multiply_add(x: x_sq, y: c3, z: c2); |
| 55 | |
| 56 | return fputil::multiply_add(x: x_4, y: d1, z: d0); |
| 57 | } |
| 58 | |
| 59 | // Get the error term for Newton iteration: |
| 60 | // h(x) = x^3 * a^2 - 1, |
| 61 | #ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE |
| 62 | LIBC_INLINE double get_error(const DoubleDouble &x_3, |
| 63 | const DoubleDouble &a_sq) { |
| 64 | return fputil::multiply_add(x_3.hi, a_sq.hi, -1.0) + |
| 65 | fputil::multiply_add(x_3.lo, a_sq.hi, x_3.hi * a_sq.lo); |
| 66 | } |
| 67 | #else |
| 68 | LIBC_INLINE constexpr double get_error(const DoubleDouble &x_3, |
| 69 | const DoubleDouble &a_sq) { |
| 70 | DoubleDouble x_3_a_sq = fputil::quick_mult(a: a_sq, b: x_3); |
| 71 | return (x_3_a_sq.hi - 1.0) + x_3_a_sq.lo; |
| 72 | } |
| 73 | #endif |
| 74 | |
| 75 | } // namespace cbrt_internal |
| 76 | |
| 77 | // Correctly rounded cbrt algorithm: |
| 78 | // |
| 79 | // === Step 1 - Range reduction === |
| 80 | // For x = (-1)^s * 2^e * (1.m), we get 2 reduced arguments x_r and a as: |
| 81 | // x_r = 1.m |
| 82 | // a = (-1)^s * 2^(e % 3) * (1.m) |
| 83 | // Then cbrt(x) = x^(1/3) can be computed as: |
| 84 | // x^(1/3) = 2^(e / 3) * a^(1/3). |
| 85 | // |
| 86 | // In order to avoid division, we compute a^(-2/3) using Newton method and then |
| 87 | // multiply the results by a: |
| 88 | // a^(1/3) = a * a^(-2/3). |
| 89 | // |
| 90 | // === Step 2 - First approximation to a^(-2/3) === |
| 91 | // First, we use a degree-7 minimax polynomial generated by Sollya to |
| 92 | // approximate x_r^(-2/3) for 1 <= x_r < 2. |
| 93 | // p = P(x_r) ~ x_r^(-2/3), |
| 94 | // with relative errors bounded by: |
| 95 | // | p / x_r^(-2/3) - 1 | < 1.16 * 2^-21. |
| 96 | // |
| 97 | // Then we multiply with 2^(e % 3) from a small lookup table to get: |
| 98 | // x_0 = 2^(-2*(e % 3)/3) * p |
| 99 | // ~ 2^(-2*(e % 3)/3) * x_r^(-2/3) |
| 100 | // = a^(-2/3) |
| 101 | // With relative errors: |
| 102 | // | x_0 / a^(-2/3) - 1 | < 1.16 * 2^-21. |
| 103 | // This step is done in double precision. |
| 104 | // |
| 105 | // === Step 3 - First Newton iteration === |
| 106 | // We follow the method described in: |
| 107 | // Sibidanov, A. and Zimmermann, P., "Correctly rounded cubic root evaluation |
| 108 | // in double precision", https://core-math.gitlabpages.inria.fr/cbrt64.pdf |
| 109 | // to derive multiplicative Newton iterations as below: |
| 110 | // Let x_n be the nth approximation to a^(-2/3). Define the n^th error as: |
| 111 | // h_n = x_n^3 * a^2 - 1 |
| 112 | // Then: |
| 113 | // a^(-2/3) = x_n / (1 + h_n)^(1/3) |
| 114 | // = x_n * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3 + ...) |
| 115 | // using the Taylor series expansion of (1 + h_n)^(-1/3). |
| 116 | // |
| 117 | // Apply to x_0 above: |
| 118 | // h_0 = x_0^3 * a^2 - 1 |
| 119 | // = a^2 * (x_0 - a^(-2/3)) * (x_0^2 + x_0 * a^(-2/3) + a^(-4/3)), |
| 120 | // it's bounded by: |
| 121 | // |h_0| < 4 * 3 * 1.16 * 2^-21 * 4 < 2^-17. |
| 122 | // So in the first iteration step, we use: |
| 123 | // x_1 = x_0 * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3) |
| 124 | // Its relative error is bounded by: |
| 125 | // | x_1 / a^(-2/3) - 1 | < 35/242 * |h_0|^4 < 2^-70. |
| 126 | // Then we perform Ziv's rounding test and check if the answer is exact. |
| 127 | // This step is done in double-double precision. |
| 128 | // |
| 129 | // === Step 4 - Second Newton iteration === |
| 130 | // If the Ziv's rounding test from the previous step fails, we define the error |
| 131 | // term: |
| 132 | // h_1 = x_1^3 * a^2 - 1, |
| 133 | // And perform another iteration: |
| 134 | // x_2 = x_1 * (1 - h_1 / 3) |
| 135 | // with the relative errors exceed the precision of double-double. |
| 136 | // We then check the Ziv's accuracy test with relative errors < 2^-102 to |
| 137 | // compensate for rounding errors. |
| 138 | // |
| 139 | // === Step 5 - Final iteration === |
| 140 | // If the Ziv's accuracy test from the previous step fails, we perform another |
| 141 | // iteration in 128-bit precision and check for exact outputs. |
| 142 | // |
| 143 | // TODO: It is possible to replace this costly computation step with special |
| 144 | // exceptional handling, similar to what was done in the CORE-MATH project: |
| 145 | // https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/cbrt/cbrt.c |
| 146 | |
| 147 | LIBC_INLINE double cbrt(double x) { |
| 148 | using DoubleDouble = fputil::DoubleDouble; |
| 149 | using namespace cbrt_internal; |
| 150 | using FPBits = fputil::FPBits<double>; |
| 151 | |
| 152 | uint64_t x_abs = FPBits(x).abs().uintval(); |
| 153 | |
| 154 | unsigned exp_bias_correction = 682; // 1023 * 2/3 |
| 155 | |
| 156 | if (LIBC_UNLIKELY(x_abs < FPBits::min_normal().uintval() || |
| 157 | x_abs >= FPBits::inf().uintval())) { |
| 158 | if (x == 0.0 || x_abs >= FPBits::inf().uintval()) |
| 159 | // x is 0, Inf, or NaN. |
| 160 | // Make sure it works for FTZ/DAZ modes. |
| 161 | return static_cast<double>(x + x); |
| 162 | |
| 163 | // x is non-zero denormal number. |
| 164 | // Normalize x. |
| 165 | x *= 0x1.0p60; |
| 166 | exp_bias_correction -= 20; |
| 167 | } |
| 168 | |
| 169 | FPBits x_bits(x); |
| 170 | |
| 171 | // When using biased exponent of x in double precision, |
| 172 | // x_e = real_exponent_of_x + 1023 |
| 173 | // Then: |
| 174 | // x_e / 3 = real_exponent_of_x / 3 + 1023/3 |
| 175 | // = real_exponent_of_x / 3 + 341 |
| 176 | // So to make it the correct biased exponent of x^(1/3), we add |
| 177 | // 1023 - 341 = 682 |
| 178 | // to the quotient x_e / 3. |
| 179 | unsigned x_e = static_cast<unsigned>(x_bits.get_biased_exponent()); |
| 180 | unsigned out_e = (x_e / 3 + exp_bias_correction); |
| 181 | unsigned shift_e = x_e % 3; |
| 182 | |
| 183 | // Set x_r = 1.mantissa |
| 184 | double x_r = |
| 185 | FPBits(x_bits.get_mantissa() | |
| 186 | (static_cast<uint64_t>(FPBits::EXP_BIAS) << FPBits::FRACTION_LEN)) |
| 187 | .get_val(); |
| 188 | |
| 189 | // Set a = (-1)^x_sign * 2^(x_e % 3) * (1.mantissa) |
| 190 | uint64_t a_bits = x_bits.uintval() & 0x800F'FFFF'FFFF'FFFF; |
| 191 | a_bits |= |
| 192 | (static_cast<uint64_t>(shift_e + static_cast<unsigned>(FPBits::EXP_BIAS)) |
| 193 | << FPBits::FRACTION_LEN); |
| 194 | double a = FPBits(a_bits).get_val(); |
| 195 | |
| 196 | // Initial approximation of x_r^(-2/3). |
| 197 | double p = initial_approximation(x: x_r); |
| 198 | |
| 199 | // Look up for 2^(-2*n/3) used for first approximation step. |
| 200 | constexpr double EXP2_M2_OVER_3[3] = {1.0, 0x1.428a2f98d728bp-1, |
| 201 | 0x1.965fea53d6e3dp-2}; |
| 202 | |
| 203 | // x0 is an initial approximation of a^(-2/3) for 1 <= |a| < 8. |
| 204 | // Relative error: < 1.16 * 2^(-21). |
| 205 | double x0 = static_cast<double>(EXP2_M2_OVER_3[shift_e] * p); |
| 206 | |
| 207 | // First iteration in double precision. |
| 208 | DoubleDouble a_sq = fputil::exact_mult(a, b: a); |
| 209 | |
| 210 | // h0 = x0^3 * a^2 - 1 |
| 211 | DoubleDouble x0_sq = fputil::exact_mult(a: x0, b: x0); |
| 212 | DoubleDouble x0_3 = fputil::quick_mult(a: x0, b: x0_sq); |
| 213 | |
| 214 | double h0 = get_error(x_3: x0_3, a_sq); |
| 215 | |
| 216 | #ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS |
| 217 | constexpr double REL_ERROR = 0; |
| 218 | #else |
| 219 | constexpr double REL_ERROR = 0x1.0p-51; |
| 220 | #endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS |
| 221 | |
| 222 | // Taylor polynomial of (1 + h)^(-1/3): |
| 223 | // (1 + h)^(-1/3) = 1 - h/3 + 2 h^2 / 9 - 14 h^3 / 81 + ... |
| 224 | constexpr double ERR_COEFFS[3] = { |
| 225 | -0x1.5555555555555p-2 - REL_ERROR, // -1/3 - relative_error |
| 226 | 0x1.c71c71c71c71cp-3, // 2/9 |
| 227 | -0x1.61f9add3c0ca4p-3, // -14/81 |
| 228 | }; |
| 229 | // e0 = -14 * h^2 / 81 + 2 * h / 9 - 1/3 - relative_error. |
| 230 | double e0 = fputil::polyeval(x: h0, a0: ERR_COEFFS[0], a: ERR_COEFFS[1], a: ERR_COEFFS[2]); |
| 231 | double x0_h0 = x0 * h0; |
| 232 | |
| 233 | // x1 = x0 (1 - h0/3 + 2 h0^2 / 9 - 14 h0^3 / 81) |
| 234 | // x1 approximate a^(-2/3) with relative errors bounded by: |
| 235 | // | x1 / a^(-2/3) - 1 | < (34/243) h0^4 < h0 * REL_ERROR |
| 236 | DoubleDouble x1_dd{.lo: x0_h0 * e0, .hi: x0}; |
| 237 | |
| 238 | // r1 = x1 * a ~ a^(-2/3) * a = a^(1/3). |
| 239 | DoubleDouble r1 = fputil::quick_mult(a, b: x1_dd); |
| 240 | |
| 241 | // Lambda function to update the exponent of the result. |
| 242 | auto update_exponent = [=](double r) -> double { |
| 243 | uint64_t r_m = FPBits(r).uintval() - 0x3FF0'0000'0000'0000; |
| 244 | // Adjust exponent and sign. |
| 245 | uint64_t r_bits = |
| 246 | r_m + (static_cast<uint64_t>(out_e) << FPBits::FRACTION_LEN); |
| 247 | return FPBits(r_bits).get_val(); |
| 248 | }; |
| 249 | |
| 250 | #ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS |
| 251 | // TODO: We probably don't need to use double-double if accurate tests and |
| 252 | // passes are skipped. |
| 253 | return update_exponent(r1.hi + r1.lo); |
| 254 | #else |
| 255 | // Accurate checks and passes. |
| 256 | double r1_lower = r1.hi + r1.lo; |
| 257 | double r1_upper = |
| 258 | r1.hi + fputil::multiply_add(x: x0_h0, y: 2.0 * REL_ERROR * a, z: r1.lo); |
| 259 | |
| 260 | // Ziv's accuracy test. |
| 261 | if (LIBC_LIKELY(r1_upper == r1_lower)) { |
| 262 | // Test for exact outputs. |
| 263 | // Check if lower (52 - 17 = 35) bits are 0's. |
| 264 | if (LIBC_UNLIKELY((FPBits(r1_lower).uintval() & 0x0000'0007'FFFF'FFFF) == |
| 265 | 0)) { |
| 266 | double r1_err = (r1_lower - r1.hi) - r1.lo; |
| 267 | if (FPBits(r1_err).abs().get_val() < 0x1.0p69) |
| 268 | fputil::clear_except_if_required(FE_INEXACT); |
| 269 | } |
| 270 | |
| 271 | return update_exponent(r1_lower); |
| 272 | } |
| 273 | |
| 274 | // Accuracy test failed, perform another Newton iteration. |
| 275 | double x1 = x1_dd.hi + (e0 + REL_ERROR) * x0_h0; |
| 276 | |
| 277 | // Second iteration in double-double precision. |
| 278 | // h1 = x1^3 * a^2 - 1. |
| 279 | DoubleDouble x1_sq = fputil::exact_mult(a: x1, b: x1); |
| 280 | DoubleDouble x1_3 = fputil::quick_mult(a: x1, b: x1_sq); |
| 281 | double h1 = get_error(x_3: x1_3, a_sq); |
| 282 | |
| 283 | // e1 = -x1*h1/3. |
| 284 | double e1 = h1 * (x1 * -0x1.5555555555555p-2); |
| 285 | // x2 = x1*(1 - h1/3) = x1 + e1 ~ a^(-2/3) with relative errors < 2^-101. |
| 286 | DoubleDouble x2 = fputil::exact_add(a: x1, b: e1); |
| 287 | // r2 = a * x2 ~ a * a^(-2/3) = a^(1/3) with relative errors < 2^-100. |
| 288 | DoubleDouble r2 = fputil::quick_mult(a, b: x2); |
| 289 | |
| 290 | double r2_upper = r2.hi + fputil::multiply_add(x: a, y: 0x1.0p-102, z: r2.lo); |
| 291 | double r2_lower = r2.hi + fputil::multiply_add(x: a, y: -0x1.0p-102, z: r2.lo); |
| 292 | |
| 293 | // Ziv's accuracy test. |
| 294 | if (LIBC_LIKELY(r2_upper == r2_lower)) |
| 295 | return update_exponent(r2_upper); |
| 296 | |
| 297 | using DFloat128 = fputil::DyadicFloat<128>; |
| 298 | |
| 299 | // TODO: Investigate removing float128 and just list exceptional cases. |
| 300 | // Apply another Newton iteration with ~126-bit accuracy. |
| 301 | DFloat128 x2_f128 = fputil::quick_add(a: DFloat128(x2.hi), b: DFloat128(x2.lo)); |
| 302 | // x2^3 |
| 303 | DFloat128 x2_3 = |
| 304 | fputil::quick_mul(a: fputil::quick_mul(a: x2_f128, b: x2_f128), b: x2_f128); |
| 305 | // a^2 |
| 306 | DFloat128 a_sq_f128 = fputil::quick_mul(a: DFloat128(a), b: DFloat128(a)); |
| 307 | // x2^3 * a^2 |
| 308 | DFloat128 x2_3_a_sq = fputil::quick_mul(a: x2_3, b: a_sq_f128); |
| 309 | // h2 = x2^3 * a^2 - 1 |
| 310 | DFloat128 h2_f128 = fputil::quick_add(a: x2_3_a_sq, b: DFloat128(-1.0)); |
| 311 | double h2 = static_cast<double>(h2_f128); |
| 312 | // t2 = 1 - h2 / 3 |
| 313 | DFloat128 t2 = fputil::quick_add(a: DFloat128(1.0), |
| 314 | b: DFloat128(h2 * (-0x1.5555555555555p-2))); |
| 315 | // x3 = x2 * (1 - h2 / 3) ~ a^(-2/3) |
| 316 | DFloat128 x3 = fputil::quick_mul(a: x2_f128, b: t2); |
| 317 | // r3 = a * x3 ~ a * a^(-2/3) = a^(1/3) |
| 318 | DFloat128 r3 = fputil::quick_mul(a: DFloat128(a), b: x3); |
| 319 | |
| 320 | // Check for exact cases: |
| 321 | DFloat128::MantissaType rounding_bits = |
| 322 | r3.mantissa & 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFFF_u128; |
| 323 | |
| 324 | double result = static_cast<double>(r3); |
| 325 | if ((rounding_bits < 0x0000'0000'0000'0000'0000'0000'0000'000F_u128) || |
| 326 | (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128)) { |
| 327 | // Output is exact. |
| 328 | r3.mantissa &= 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFF0_u128; |
| 329 | |
| 330 | if (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128) { |
| 331 | DFloat128 tmp{r3.sign, r3.exponent - 123, |
| 332 | 0x8000'0000'0000'0000'0000'0000'0000'0000_u128}; |
| 333 | DFloat128 r4 = fputil::quick_add(a: r3, b: tmp); |
| 334 | result = static_cast<double>(r4); |
| 335 | } else { |
| 336 | result = static_cast<double>(r3); |
| 337 | } |
| 338 | |
| 339 | fputil::clear_except_if_required(FE_INEXACT); |
| 340 | } |
| 341 | |
| 342 | return update_exponent(result); |
| 343 | #endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS |
| 344 | } |
| 345 | |
| 346 | } // namespace math |
| 347 | |
| 348 | } // namespace LIBC_NAMESPACE_DECL |
| 349 | |
| 350 | #endif // LLVM_LIBC_SRC___SUPPORT_MATH_CBRT_H |
| 351 | |