| 1 | //===-- Implementation header for log2p1f16 ---------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_LIBC_SRC___SUPPORT_MATH_LOG2P1F16_H |
| 10 | #define LLVM_LIBC_SRC___SUPPORT_MATH_LOG2P1F16_H |
| 11 | |
| 12 | #include "include/llvm-libc-macros/float16-macros.h" |
| 13 | |
| 14 | #ifdef LIBC_TYPES_HAS_FLOAT16 |
| 15 | |
| 16 | #include "expxf16_utils.h" |
| 17 | #include "hdr/errno_macros.h" |
| 18 | #include "hdr/fenv_macros.h" |
| 19 | #include "src/__support/FPUtil/FEnvImpl.h" |
| 20 | #include "src/__support/FPUtil/FPBits.h" |
| 21 | #include "src/__support/FPUtil/PolyEval.h" |
| 22 | #include "src/__support/FPUtil/cast.h" |
| 23 | #include "src/__support/FPUtil/except_value_utils.h" |
| 24 | #include "src/__support/FPUtil/multiply_add.h" |
| 25 | #include "src/__support/common.h" |
| 26 | #include "src/__support/macros/config.h" |
| 27 | #include "src/__support/macros/optimization.h" |
| 28 | #include "src/__support/macros/properties/cpu_features.h" |
| 29 | |
| 30 | namespace LIBC_NAMESPACE_DECL { |
| 31 | |
| 32 | namespace math { |
| 33 | |
| 34 | LIBC_INLINE float16 log2p1f16(float16 x) { |
| 35 | using namespace math::expxf16_internal; |
| 36 | using FPBits = fputil::FPBits<float16>; |
| 37 | FPBits x_bits(x); |
| 38 | |
| 39 | uint16_t x_u = x_bits.uintval(); |
| 40 | uint16_t x_abs = x_u & 0x7fffU; |
| 41 | |
| 42 | // If x is NaN, +/-inf, or |x| <= 2^-3. |
| 43 | if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x7c00U)) { |
| 44 | // log2p1(NaN) = NaN |
| 45 | if (x_bits.is_nan()) { |
| 46 | if (x_bits.is_signaling_nan()) { |
| 47 | fputil::raise_except_if_required(FE_INVALID); |
| 48 | return FPBits::quiet_nan().get_val(); |
| 49 | } |
| 50 | |
| 51 | return x; |
| 52 | } |
| 53 | |
| 54 | // When |x| <= 2^-3, use a degree-5 minimax polynomial on x. |
| 55 | // For y = 1+x near 1, the table-based range reduction suffers from |
| 56 | // catastrophic cancellation (m + log2(f) nearly cancel). Computing |
| 57 | // log2(1+x) directly via polynomial avoids this. |
| 58 | // |
| 59 | // Generated by Sollya with: |
| 60 | // > display = hexadecimal; |
| 61 | // > Q = fpminimax(log2(1 + x), [|1, 2, 3, 4, 5|], [|SG...|], |
| 62 | // [-2^-3, 2^-3]); |
| 63 | // > Q; |
| 64 | // > dirtyinfnorm((log2(1 + x) - Q) / log2(1 + x), [-2^-3, 2^-3]); |
| 65 | // 0x1.6ed1f4728dcb6f1d6a651a3c728937f7468f8eedfp-22 |
| 66 | if (x_abs <= 0x3000U) { |
| 67 | // log2p1(+/-0) = +/-0 |
| 68 | if (x_abs == 0U) |
| 69 | return x; |
| 70 | |
| 71 | #ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS |
| 72 | constexpr size_t N_LOG2P1F16_EXCEPTS = 11; |
| 73 | constexpr fputil::ExceptValues<float16, N_LOG2P1F16_EXCEPTS> |
| 74 | LOG2P1F16_EXCEPTS = {.values: { |
| 75 | // (input, RZ output, RU offset, RD offset, RN offset) |
| 76 | // x = 0x1.ec4p-12 |
| 77 | {.input: 0x0FB1U, .rnd_towardzero_result: 0x118BU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U}, |
| 78 | // x = 0x1.c04p-6 |
| 79 | {.input: 0x2701U, .rnd_towardzero_result: 0x28FBU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U}, |
| 80 | // x = -0x1.824p-15 |
| 81 | {.input: 0x8309U, .rnd_towardzero_result: 0x8461U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U}, |
| 82 | // x = -0x1.414p-10 |
| 83 | {.input: 0x9505U, .rnd_towardzero_result: 0x973EU, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U}, |
| 84 | // x = -0x1.cb8p-10 |
| 85 | {.input: 0x972EU, .rnd_towardzero_result: 0x992FU, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U}, |
| 86 | // x = -0x1.99cp-8 |
| 87 | {.input: 0x9E67U, .rnd_towardzero_result: 0xA0A2U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U}, |
| 88 | // x = -0x1.ce8p-7 |
| 89 | {.input: 0xA33AU, .rnd_towardzero_result: 0xA540U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U}, |
| 90 | // x = -0x1.73cp-6 |
| 91 | {.input: 0xA5CFU, .rnd_towardzero_result: 0xA83DU, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U}, |
| 92 | // x = -0x1.87p-5 |
| 93 | {.input: 0xAA1CU, .rnd_towardzero_result: 0xAC84U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U}, |
| 94 | // x = -0x1.d48p-5 |
| 95 | {.input: 0xAB52U, .rnd_towardzero_result: 0xAD70U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U}, |
| 96 | // x = -0x1.da0p-4 |
| 97 | {.input: 0xAF68U, .rnd_towardzero_result: 0xB1ADU, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U}, |
| 98 | }}; |
| 99 | |
| 100 | if (auto r = LOG2P1F16_EXCEPTS.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value())) |
| 101 | return r.value(); |
| 102 | #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS |
| 103 | |
| 104 | float xf = x; |
| 105 | return fputil::cast<float16>( |
| 106 | x: xf * fputil::polyeval(x: xf, a0: 0x1.715476p+0f, a: -0x1.715204p-1f, |
| 107 | a: 0x1.ec6c5p-2f, a: -0x1.763338p-2f, |
| 108 | a: 0x1.2bcd3cp-2f)); |
| 109 | } |
| 110 | |
| 111 | // log2p1(+inf) = +inf |
| 112 | if (x_u == 0x7c00U) |
| 113 | return FPBits::inf().get_val(); |
| 114 | |
| 115 | // log2p1(-inf) = NaN |
| 116 | fputil::set_errno_if_required(EDOM); |
| 117 | fputil::raise_except_if_required(FE_INVALID); |
| 118 | return FPBits::quiet_nan().get_val(); |
| 119 | } |
| 120 | |
| 121 | // log2p1(-1) = -inf |
| 122 | if (LIBC_UNLIKELY(x_u == 0xbc00U)) { |
| 123 | fputil::raise_except_if_required(FE_DIVBYZERO); |
| 124 | return FPBits::inf(sign: Sign::NEG).get_val(); |
| 125 | } |
| 126 | |
| 127 | // log2p1(x) = NaN for x < -1 |
| 128 | if (LIBC_UNLIKELY(x_u > 0xbc00U)) { |
| 129 | fputil::set_errno_if_required(EDOM); |
| 130 | fputil::raise_except_if_required(FE_INVALID); |
| 131 | return FPBits::quiet_nan().get_val(); |
| 132 | } |
| 133 | |
| 134 | #ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS |
| 135 | #ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT |
| 136 | constexpr size_t N_LOG2P1F16_EXCEPTS_HI = 2; |
| 137 | #else |
| 138 | constexpr size_t N_LOG2P1F16_EXCEPTS_HI = 6; |
| 139 | #endif |
| 140 | constexpr fputil::ExceptValues<float16, N_LOG2P1F16_EXCEPTS_HI> |
| 141 | LOG2P1F16_EXCEPTS_HI = {.values: { |
| 142 | // (input, RZ output, RU offset, RD offset, RN offset) |
| 143 | #ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT |
| 144 | // x = 0x1.12p-3 |
| 145 | {.input: 0x3048U, .rnd_towardzero_result: 0x31CBU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U}, |
| 146 | // x = 0x1.598p-2 |
| 147 | {.input: 0x3566U, .rnd_towardzero_result: 0x36B5U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U}, |
| 148 | #endif |
| 149 | // x = 0x1.23cp-3 |
| 150 | {.input: 0x308FU, .rnd_towardzero_result: 0x3226U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U}, |
| 151 | // x = 0x1.accp+0 |
| 152 | {.input: 0x3EB3U, .rnd_towardzero_result: 0x3DADU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U}, |
| 153 | #ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT |
| 154 | // x = -0x1.534p-2 |
| 155 | {.input: 0xB54DU, .rnd_towardzero_result: 0xB8A5U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U}, |
| 156 | // x = -0x1.bb8p-2 |
| 157 | {.input: 0xB6EEU, .rnd_towardzero_result: 0xBA8DU, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U}, |
| 158 | #endif |
| 159 | }}; |
| 160 | |
| 161 | if (auto r = LOG2P1F16_EXCEPTS_HI.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value())) |
| 162 | return r.value(); |
| 163 | #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS |
| 164 | |
| 165 | // For the range reduction, we compute y = 1 + x in float. For |x| > 2^-3, |
| 166 | // the addition 1.0f + xf is exact in float, and the result y is bounded away |
| 167 | // from 1.0, avoiding catastrophic cancellation in the table decomposition. |
| 168 | float xf = x; |
| 169 | float y = 1.0f + xf; |
| 170 | |
| 171 | using FPBitsFloat = fputil::FPBits<float>; |
| 172 | FPBitsFloat y_bits(y); |
| 173 | |
| 174 | // y = 1 + x should stay positive for x > -1, but keep this guard for |
| 175 | // completeness in case of unexpected intermediate behavior. |
| 176 | if (LIBC_UNLIKELY(y_bits.is_zero())) |
| 177 | return FPBits::inf(sign: Sign::NEG).get_val(); |
| 178 | |
| 179 | int m = y_bits.get_exponent(); |
| 180 | y_bits.set_biased_exponent(FPBitsFloat::EXP_BIAS); |
| 181 | float mant_f = y_bits.get_val(); |
| 182 | |
| 183 | // Leading 23 - 5 = 18, so the top 5 mantissa bits give the index in [0, 31]. |
| 184 | int f = y_bits.get_mantissa() >> (FPBitsFloat::FRACTION_LEN - 5); |
| 185 | |
| 186 | // v = 1.mant * 1/f - 1 = d/f |
| 187 | float v = fputil::multiply_add(x: mant_f, y: ONE_OVER_F_F[f], z: -1.0f); |
| 188 | |
| 189 | // Degree-3 minimax polynomial generated by Sollya with the following |
| 190 | // commands: |
| 191 | // > display = hexadecimal; |
| 192 | // > P = fpminimax(log2(1 + x)/x, 2, [|SG...|], [-2^-5, 2^-5]); |
| 193 | // > x * P; |
| 194 | // > dirtyinfnorm((log2(1 + x)/x - P) / (log2(1 + x)/x), [-2^-5, 2^-5]); |
| 195 | // 0x1.0087f3cad284d0a4464b1e27e83258b5e69a647f5p-19 |
| 196 | float log2p1_d_over_f = |
| 197 | v * fputil::polyeval(x: v, a0: 0x1.715476p+0f, a: -0x1.71771ap-1f, a: 0x1.ecb38ep-2f); |
| 198 | float log2_1_mant = LOG2F_F[f] + log2p1_d_over_f; |
| 199 | return fputil::cast<float16>(x: static_cast<float>(m) + log2_1_mant); |
| 200 | } |
| 201 | |
| 202 | } // namespace math |
| 203 | } // namespace LIBC_NAMESPACE_DECL |
| 204 | |
| 205 | #endif // LIBC_TYPES_HAS_FLOAT16 |
| 206 | |
| 207 | #endif // LLVM_LIBC_SRC___SUPPORT_MATH_LOG2P1F16_H |
| 208 | |