| 1 | //===-- Collection of utils for acoshf --------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_UTILS_H |
| 10 | #define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_UTILS_H |
| 11 | |
| 12 | #include "acosh_float_constants.h" |
| 13 | #include "src/__support/FPUtil/FPBits.h" |
| 14 | #include "src/__support/FPUtil/PolyEval.h" |
| 15 | #include "src/__support/FPUtil/multiply_add.h" |
| 16 | #include "src/__support/macros/attributes.h" |
| 17 | #include "src/__support/macros/optimization.h" |
| 18 | |
| 19 | namespace LIBC_NAMESPACE_DECL { |
| 20 | |
| 21 | namespace acoshf_internal { |
| 22 | |
| 23 | // Compute log(|x|), use for float functions, so the error requirements are not |
| 24 | // as strict as for double precision, and x is assumed to be normal. |
| 25 | |
| 26 | #if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \ |
| 27 | defined(LIBC_MATH_HAS_SMALL_TABLES) |
| 28 | |
| 29 | LIBC_INLINE LIBC_CONSTEXPR double log_eval(double x) { |
| 30 | using FPBits = fputil::FPBits<double>; |
| 31 | FPBits x_bits(x); |
| 32 | uint64_t x_u = x_bits.uintval(); |
| 33 | // Extract exponent and remove sign bit. |
| 34 | double ex = static_cast<double>( |
| 35 | (static_cast<int>(x_u >> FPBits::FRACTION_LEN) & 0x7ff) - |
| 36 | FPBits::EXP_BIAS); |
| 37 | // Reduce to 1.m |
| 38 | double x_r = |
| 39 | FPBits((x_u & FPBits::FRACTION_MASK) | FPBits::one().uintval()).get_val(); |
| 40 | // x_r = 1 + dx |
| 41 | double dx = x_r - 1.0; |
| 42 | // Minimax polynomial of log(1 + x) generated by Sollya with: |
| 43 | // > P = fpminimax(log(1 + x)/x, 8, [|1, D...|], [0, 1]); |
| 44 | // > dirtyinfnorm((log(1 + x) - x*P)/log(1 + x), [0, 1]); |
| 45 | // 0x1.2d5f0a5f66124e3afefa7a66251d1530e07301ed8p-25 |
| 46 | constexpr double COEFFS[8] = { |
| 47 | -0x1.ffff09a15a555p-2, 0x1.55350257eb492p-2, -0x1.fd0649c8116b3p-3, |
| 48 | 0x1.8814186a6a587p-3, -0x1.194a9c269ae3p-3, 0x1.4389c5fa07e93p-4, |
| 49 | -0x1.ea7bb4f18dbccp-6, 0x1.5d864e41667eep-8, |
| 50 | }; |
| 51 | constexpr double LOG_2 = 0x1.62e42fefa39efp-1; |
| 52 | |
| 53 | double dx2 = dx * dx; |
| 54 | double c0 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]); |
| 55 | double c1 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]); |
| 56 | double c2 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]); |
| 57 | double c3 = fputil::multiply_add(dx, COEFFS[7], COEFFS[6]); |
| 58 | double dx4 = dx2 * dx2; |
| 59 | double d0 = fputil::multiply_add(dx2, c1, c0); |
| 60 | double d1 = fputil::multiply_add(dx2, c3, c2); |
| 61 | double p = fputil::multiply_add(dx4, d1, d0); |
| 62 | |
| 63 | double r_hi = fputil::multiply_add(ex, LOG_2, dx); |
| 64 | double r = fputil::multiply_add(dx2, p, r_hi); |
| 65 | return r; |
| 66 | } |
| 67 | |
| 68 | #else // Accurate evaluation. |
| 69 | |
| 70 | LIBC_INLINE LIBC_CONSTEXPR double log_eval(double x) { |
| 71 | constexpr double LOG_2 = 0x1.62e42fefa39efp-1; |
| 72 | |
| 73 | // For x = 2^ex * (1 + mx) |
| 74 | // log(x) = ex * log(2) + log(1 + mx) |
| 75 | using FPBits = fputil::FPBits<double>; |
| 76 | FPBits x_bits(x); |
| 77 | uint64_t x_u = x_bits.uintval(); |
| 78 | |
| 79 | // log(x) = log(2^x_e * x_m) |
| 80 | // = x_e * log(2) + log(x_m) |
| 81 | |
| 82 | // Range reduction for log(x_m): |
| 83 | // For each x_m, we would like to find r such that: |
| 84 | // -2^-7 <= r * x_m - 1 < 2^-6 |
| 85 | int shifted = static_cast<int>(x_u >> (FPBits::FRACTION_LEN - 6)); |
| 86 | int index = shifted & 0x3F; |
| 87 | double r = R_LOG[index]; |
| 88 | |
| 89 | // Add unbiased exponent. Add an extra 1 if the 8 leading fractional bits are |
| 90 | // all 1's. |
| 91 | int x_e = static_cast<int>((x_u + (1ULL << (FPBits::FRACTION_LEN - 6))) >> |
| 92 | FPBits::FRACTION_LEN) - |
| 93 | FPBits::EXP_BIAS; |
| 94 | double e_x = static_cast<double>(x_e); |
| 95 | |
| 96 | double r_hi = fputil::multiply_add(x: e_x, y: LOG_2, z: LOG_R[index]); |
| 97 | |
| 98 | // Set m = 1.mantissa. |
| 99 | uint64_t x_m = (x_u & FPBits::FRACTION_MASK) | FPBits::one().uintval(); |
| 100 | double m = FPBits(x_m).get_val(); |
| 101 | |
| 102 | double dx = 0.0; |
| 103 | |
| 104 | // Perform exact range reduction |
| 105 | #ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE |
| 106 | dx = fputil::multiply_add(r, m, -1.0); // exact |
| 107 | #else |
| 108 | uint64_t c_m = x_m & 0x3FFF'C000'0000'0000ULL; |
| 109 | double c = FPBits(c_m).get_val(); |
| 110 | dx = fputil::multiply_add(x: r, y: m - c, z: C_LOG[index]); // exact |
| 111 | #endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE |
| 112 | |
| 113 | // Minimax polynomial of log(1 + dx) generated by Sollya with: |
| 114 | // > P = fpminimax(log(1 + x)/x, 6, [|1, D...|], [-2^-7, 2^-6]); |
| 115 | // > dirtyinfnorm((log(1 + x) - x*P)/log(1 + x), [-2^-7, 2^-6]); |
| 116 | // 0x1.6e853e8864f29a6f10d50698ee6ef972a79d3a487p-54 |
| 117 | constexpr double COEFFS[6] = {-0x1.ffffffffffe03p-2, 0x1.55555555395f9p-2, |
| 118 | -0x1.0000001be5329p-2, 0x1.9999c1bf8c3afp-3, |
| 119 | -0x1.554f0ba9cee4bp-3, 0x1.1d94cd56b72d7p-3}; |
| 120 | |
| 121 | double dx2 = dx * dx; |
| 122 | double c1 = fputil::multiply_add(x: dx, y: COEFFS[1], z: COEFFS[0]); |
| 123 | double c2 = fputil::multiply_add(x: dx, y: COEFFS[3], z: COEFFS[2]); |
| 124 | double c3 = fputil::multiply_add(x: dx, y: COEFFS[5], z: COEFFS[4]); |
| 125 | double dx4 = dx2 * dx2; |
| 126 | double d1 = fputil::multiply_add(x: dx2, y: c1, z: r_hi + dx); |
| 127 | double d2 = fputil::multiply_add(x: dx2, y: c3, z: c2); |
| 128 | double result = fputil::multiply_add(x: dx4, y: d2, z: d1); |
| 129 | return result; |
| 130 | } |
| 131 | |
| 132 | #endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS && LIBC_MATH_HAS_SMALL_TABLES |
| 133 | |
| 134 | } // namespace acoshf_internal |
| 135 | |
| 136 | } // namespace LIBC_NAMESPACE_DECL |
| 137 | |
| 138 | #endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_UTILS_H |
| 139 | |