| 1 | //===-- Implementation header for log10p1f16 --------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_LIBC_SRC___SUPPORT_MATH_LOG10P1F16_H |
| 10 | #define LLVM_LIBC_SRC___SUPPORT_MATH_LOG10P1F16_H |
| 11 | |
| 12 | #include "include/llvm-libc-macros/float16-macros.h" |
| 13 | |
| 14 | #ifdef LIBC_TYPES_HAS_FLOAT16 |
| 15 | |
| 16 | #include "exp10_float16_constants.h" |
| 17 | #include "expxf16_utils.h" |
| 18 | #include "hdr/errno_macros.h" |
| 19 | #include "hdr/fenv_macros.h" |
| 20 | #include "src/__support/FPUtil/FEnvImpl.h" |
| 21 | #include "src/__support/FPUtil/FPBits.h" |
| 22 | #include "src/__support/FPUtil/PolyEval.h" |
| 23 | #include "src/__support/FPUtil/cast.h" |
| 24 | #include "src/__support/FPUtil/except_value_utils.h" |
| 25 | #include "src/__support/FPUtil/multiply_add.h" |
| 26 | #include "src/__support/common.h" |
| 27 | #include "src/__support/macros/config.h" |
| 28 | #include "src/__support/macros/optimization.h" |
| 29 | #include "src/__support/macros/properties/cpu_features.h" |
| 30 | |
| 31 | namespace LIBC_NAMESPACE_DECL { |
| 32 | |
| 33 | namespace math { |
| 34 | |
| 35 | LIBC_INLINE float16 log10p1f16(float16 x) { |
| 36 | using namespace math::expxf16_internal; |
| 37 | using FPBits = fputil::FPBits<float16>; |
| 38 | FPBits x_bits(x); |
| 39 | |
| 40 | uint16_t x_u = x_bits.uintval(); |
| 41 | uint16_t x_abs = x_u & 0x7fffU; |
| 42 | |
| 43 | // If x is +-0, NaN, +/-inf, or |x| <= 2^-3. |
| 44 | if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x7c00U)) { |
| 45 | // log10p1(NaN) = NaN |
| 46 | if (x_bits.is_nan()) { |
| 47 | if (x_bits.is_signaling_nan()) { |
| 48 | fputil::raise_except_if_required(FE_INVALID); |
| 49 | return FPBits::quiet_nan().get_val(); |
| 50 | } |
| 51 | |
| 52 | return x; |
| 53 | } |
| 54 | |
| 55 | // log10p1(+/-0) = +/-0 |
| 56 | if (x_abs == 0U) |
| 57 | return x; |
| 58 | |
| 59 | // log10p1(+inf) = +inf |
| 60 | if (x_u == 0x7c00U) |
| 61 | return FPBits::inf().get_val(); |
| 62 | |
| 63 | // log10p1(-inf) = NaN |
| 64 | if (x_abs >= 0x7c00U) { |
| 65 | fputil::set_errno_if_required(EDOM); |
| 66 | fputil::raise_except_if_required(FE_INVALID); |
| 67 | return FPBits::quiet_nan().get_val(); |
| 68 | } |
| 69 | |
| 70 | // When |x| <= 2^-3, use a degree-5 minimax polynomial on x. |
| 71 | // For y = 1+x near 1, the table-based range reduction suffers from |
| 72 | // catastrophic cancellation (m*log10(2) + log10(f) nearly cancel). |
| 73 | // Computing log10(1+x) directly via polynomial avoids this. |
| 74 | // |
| 75 | // Generated by Sollya with: |
| 76 | // > display = hexadecimal; |
| 77 | // > Q = fpminimax(log10(1 + x), [|1, 2, 3, 4, 5|], [|SG...|], |
| 78 | // [-2^-3, 2^-3]); |
| 79 | // > Q; |
| 80 | if (x_abs <= 0x3000U) { |
| 81 | #ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS |
| 82 | // ExceptValues for the small-|x| polynomial path (|x| <= 2^-3). |
| 83 | constexpr size_t N_LOG10P1F16_EXCEPTS = 7; |
| 84 | constexpr fputil::ExceptValues<float16, N_LOG10P1F16_EXCEPTS> |
| 85 | LOG10P1F16_EXCEPTS = {.values: { |
| 86 | // (input, RZ output, RU offset, RD offset, RN offset) |
| 87 | // x = 0x1.ep-13 |
| 88 | {.input: 0x0B80U, .rnd_towardzero_result: 0x0683U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U}, |
| 89 | // x = 0x1.4fp-6 |
| 90 | {.input: 0x213CU, .rnd_towardzero_result: 0x1C85U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U}, |
| 91 | // x = 0x1.82cp-4 |
| 92 | {.input: 0x2E0BU, .rnd_towardzero_result: 0x2904U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U}, |
| 93 | // x = 0x1.d24p-4 |
| 94 | {.input: 0x2EC9U, .rnd_towardzero_result: 0x299AU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U}, |
| 95 | // x = -0x1.b8p-13 |
| 96 | {.input: 0x89DCU, .rnd_towardzero_result: 0x8516U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U}, |
| 97 | // x = -0x1.49p-6 |
| 98 | {.input: 0x9924U, .rnd_towardzero_result: 0x9478U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U}, |
| 99 | // x = -0x1.42p-8 |
| 100 | {.input: 0x9D08U, .rnd_towardzero_result: 0x9861U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U}, |
| 101 | }}; |
| 102 | |
| 103 | if (auto r = LOG10P1F16_EXCEPTS.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value())) |
| 104 | return r.value(); |
| 105 | #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS |
| 106 | |
| 107 | float xf = x; |
| 108 | return fputil::cast<float16>( |
| 109 | x: xf * fputil::polyeval(x: xf, a0: 0x1.bcb7b2p-2f, a: -0x1.bcb4cp-3f, |
| 110 | a: 0x1.2875bcp-3f, a: -0x1.c2946ep-4f, |
| 111 | a: 0x1.69da2p-4f)); |
| 112 | } |
| 113 | } |
| 114 | |
| 115 | // log10p1(-1) = -inf |
| 116 | if (LIBC_UNLIKELY(x_u == 0xbc00U)) { |
| 117 | fputil::raise_except_if_required(FE_DIVBYZERO); |
| 118 | return FPBits::inf(sign: Sign::NEG).get_val(); |
| 119 | } |
| 120 | |
| 121 | // log10p1(x) = NaN for x < -1 |
| 122 | if (LIBC_UNLIKELY(x_u > 0xbc00U)) { |
| 123 | fputil::set_errno_if_required(EDOM); |
| 124 | fputil::raise_except_if_required(FE_INVALID); |
| 125 | return FPBits::quiet_nan().get_val(); |
| 126 | } |
| 127 | |
| 128 | #ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS |
| 129 | // ExceptValues for the large-|x| table-based path (|x| > 2^-3). |
| 130 | constexpr size_t N_LOG10P1F16_EXCEPTS_HI = 12; |
| 131 | constexpr fputil::ExceptValues<float16, N_LOG10P1F16_EXCEPTS_HI> |
| 132 | LOG10P1F16_EXCEPTS_HI = {.values: { |
| 133 | // (input, RZ output, RU offset, RD offset, RN offset) |
| 134 | // x = 0x1.3bp-3 |
| 135 | {.input: 0x30ECU, .rnd_towardzero_result: 0x2BF3U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U}, |
| 136 | // x = 0x1.ba0p-3 |
| 137 | {.input: 0x32E8U, .rnd_towardzero_result: 0x2D6EU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U}, |
| 138 | // x = 0x1.4c4p-2 |
| 139 | {.input: 0x3531U, .rnd_towardzero_result: 0x2FCFU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U}, |
| 140 | // x = 0x1.744p+0 |
| 141 | {.input: 0x3DD1U, .rnd_towardzero_result: 0x363CU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U}, |
| 142 | // x = 0x1.7d8p+0 |
| 143 | {.input: 0x3DF6U, .rnd_towardzero_result: 0x3656U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U}, |
| 144 | // x = 0x1.2p+3 |
| 145 | {.input: 0x4880U, .rnd_towardzero_result: 0x3C00U, .rnd_upward_offset: 0U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U}, |
| 146 | // x = 0x1.8cp+6 |
| 147 | {.input: 0x5630U, .rnd_towardzero_result: 0x4000U, .rnd_upward_offset: 0U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U}, |
| 148 | // x = 0x1.f44p+6 |
| 149 | {.input: 0x57D1U, .rnd_towardzero_result: 0x4033U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U}, |
| 150 | // x = 0x1.f38p+9 |
| 151 | {.input: 0x63CEU, .rnd_towardzero_result: 0x4200U, .rnd_upward_offset: 0U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U}, |
| 152 | // x = -0x1.558p-3 |
| 153 | {.input: 0xB156U, .rnd_towardzero_result: 0xAD12U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U}, |
| 154 | // x = -0x1.8d8p-2 |
| 155 | {.input: 0xB636U, .rnd_towardzero_result: 0xB2D3U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U}, |
| 156 | // x = -0x1.808p-1 |
| 157 | {.input: 0xBA02U, .rnd_towardzero_result: 0xB8D4U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U}, |
| 158 | }}; |
| 159 | |
| 160 | if (auto r = LOG10P1F16_EXCEPTS_HI.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value())) |
| 161 | return r.value(); |
| 162 | #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS |
| 163 | |
| 164 | // For the range reduction, we compute y = 1 + x in float. For |x| > 2^-3, |
| 165 | // the addition 1.0f + xf is exact in float, and the result y is bounded away |
| 166 | // from 1.0, avoiding catastrophic cancellation in the table decomposition. |
| 167 | float xf = x; |
| 168 | float y = 1.0f + xf; |
| 169 | |
| 170 | using FPBitsFloat = fputil::FPBits<float>; |
| 171 | FPBitsFloat y_bits(y); |
| 172 | |
| 173 | // When y is subnormal or zero, which shouldn't happen since y = 1 + x and |
| 174 | // x >= -1 + eps in f16, so y >= eps > 0. But handle y = 0 just in case. |
| 175 | if (LIBC_UNLIKELY(y_bits.is_zero())) |
| 176 | return FPBits::inf(sign: Sign::NEG).get_val(); |
| 177 | |
| 178 | int m = y_bits.get_exponent(); |
| 179 | // Set y_bits to 1.mant (biased exponent = 127). |
| 180 | y_bits.set_biased_exponent(FPBitsFloat::EXP_BIAS); |
| 181 | float mant_f = y_bits.get_val(); |
| 182 | |
| 183 | // Leading 23 - 5 = 18, so top 5 mantissa bits give index f in [0, 31]. |
| 184 | int f = y_bits.get_mantissa() >> (FPBitsFloat::FRACTION_LEN - 5); |
| 185 | |
| 186 | // v = 1.mant * 1/f - 1 = d/f |
| 187 | float v = fputil::multiply_add(x: mant_f, y: ONE_OVER_F_F[f], z: -1.0f); |
| 188 | |
| 189 | // Degree-3 minimax polynomial generated by Sollya with the following |
| 190 | // commands: |
| 191 | // > display = hexadecimal; |
| 192 | // > P = fpminimax(log10(1 + x)/x, 2, [|SG...|], [-2^-5, 2^-5]); |
| 193 | // > x * P; |
| 194 | float log10p1_d_over_f = |
| 195 | v * fputil::polyeval(x: v, a0: 0x1.bcb7bp-2f, a: -0x1.bce168p-3f, a: 0x1.28acb8p-3f); |
| 196 | // log10(1.mant) = log10(f) + log10(1 + d/f) |
| 197 | float log10_1_mant = LOG10F_F[f] + log10p1_d_over_f; |
| 198 | return fputil::cast<float16>( |
| 199 | x: fputil::multiply_add(x: static_cast<float>(m), y: LOG10F_2, z: log10_1_mant)); |
| 200 | } |
| 201 | |
| 202 | } // namespace math |
| 203 | } // namespace LIBC_NAMESPACE_DECL |
| 204 | |
| 205 | #endif // LIBC_TYPES_HAS_FLOAT16 |
| 206 | |
| 207 | #endif // LLVM_LIBC_SRC___SUPPORT_MATH_LOG10P1F16_H |
| 208 | |