| 1 | //===-- Implementation header for expbf16 -----------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXPBF16_H |
| 10 | #define LLVM_LIBC_SRC___SUPPORT_MATH_EXPBF16_H |
| 11 | |
| 12 | #include "hdr/errno_macros.h" |
| 13 | #include "hdr/fenv_macros.h" |
| 14 | #include "src/__support/FPUtil/FEnvImpl.h" |
| 15 | #include "src/__support/FPUtil/FPBits.h" |
| 16 | #include "src/__support/FPUtil/PolyEval.h" |
| 17 | #include "src/__support/FPUtil/bfloat16.h" |
| 18 | #include "src/__support/FPUtil/cast.h" |
| 19 | #include "src/__support/FPUtil/except_value_utils.h" |
| 20 | #include "src/__support/FPUtil/multiply_add.h" |
| 21 | #include "src/__support/FPUtil/nearest_integer.h" |
| 22 | #include "src/__support/FPUtil/rounding_mode.h" |
| 23 | #include "src/__support/macros/config.h" |
| 24 | #include "src/__support/macros/optimization.h" |
| 25 | #include "src/__support/macros/properties/types.h" |
| 26 | |
| 27 | namespace LIBC_NAMESPACE_DECL { |
| 28 | |
| 29 | namespace math { |
| 30 | |
| 31 | namespace expbf16_internal { |
| 32 | |
| 33 | // Generated by Sollya with the following commands: |
| 34 | // > display = hexadecimal; |
| 35 | // > for i from -96 to 88 by 8 do print(i, round(exp(i), SG, RN) @ "f,"); |
| 36 | LIBC_INLINE_VAR constexpr float EXP_HI[24] = { |
| 37 | 0x1.6a4p-139f, 0x1.07b71p-127f, 0x1.7fd974p-116f, 0x1.175afp-104f, |
| 38 | 0x1.969d48p-93f, 0x1.27ec46p-81f, 0x1.aebabap-70f, 0x1.397924p-58f, |
| 39 | 0x1.c8465p-47f, 0x1.4c1078p-35f, 0x1.e355bcp-24f, 0x1.5fc21p-12f, |
| 40 | 0x1p0f, 0x1.749ea8p11f, 0x1.0f2ebep23f, 0x1.8ab7fcp34f, |
| 41 | 0x1.1f43fcp46f, 0x1.a220d4p57f, 0x1.304d6ap69f, 0x1.baed16p80f, |
| 42 | 0x1.425982p92f, 0x1.d531d8p103f, 0x1.55779cp115f, 0x1.f1056ep126f, |
| 43 | }; |
| 44 | |
| 45 | // Generated by Sollya with the following commands: |
| 46 | // > display = hexadecimal; |
| 47 | // > for i from 0 to 7.75 by 0.25 do print(round(exp(i), SG, RN) @ "f,"); |
| 48 | LIBC_INLINE_VAR constexpr float EXP_MID[32] = { |
| 49 | 0x1p0f, 0x1.48b5e4p0f, 0x1.a61298p0f, 0x1.0ef9dcp1f, |
| 50 | 0x1.5bf0a8p1f, 0x1.bec38ep1f, 0x1.1ed3fep2f, 0x1.704b6ap2f, |
| 51 | 0x1.d8e64cp2f, 0x1.2f9b88p3f, 0x1.85d6fep3f, 0x1.f4907p3f, |
| 52 | 0x1.415e5cp4f, 0x1.9ca53cp4f, 0x1.08ec72p5f, 0x1.542b2ep5f, |
| 53 | 0x1.b4c902p5f, 0x1.186bf2p6f, 0x1.68118ap6f, 0x1.ce564ep6f, |
| 54 | 0x1.28d38ap7f, 0x1.7d21eep7f, 0x1.e96244p7f, 0x1.3a30dp8f, |
| 55 | 0x1.936dc6p8f, 0x1.0301a4p9f, 0x1.4c9222p9f, 0x1.ab0786p9f, |
| 56 | 0x1.122886p10f, 0x1.6006b6p10f, 0x1.c402b6p10f, 0x1.223252p11f, |
| 57 | }; |
| 58 | |
| 59 | } // namespace expbf16_internal |
| 60 | |
| 61 | LIBC_INLINE bfloat16 expbf16(bfloat16 x) { |
| 62 | using FPBits = fputil::FPBits<bfloat16>; |
| 63 | FPBits x_bits(x); |
| 64 | |
| 65 | uint16_t x_u = x_bits.uintval(); |
| 66 | uint16_t x_abs = x_u & 0x7fffU; |
| 67 | |
| 68 | // 0 <= |x| <= 2^(-3), or |x| >= 89, or x is NaN |
| 69 | if (LIBC_UNLIKELY(x_abs <= 0x3e00U || x_abs >= 0x42b2U)) { |
| 70 | |
| 71 | // exp(NaN) = NaN |
| 72 | if (x_bits.is_nan()) { |
| 73 | if (x_bits.is_signaling_nan()) { |
| 74 | fputil::raise_except_if_required(FE_INVALID); |
| 75 | return FPBits::quiet_nan().get_val(); |
| 76 | } |
| 77 | |
| 78 | return x; |
| 79 | } |
| 80 | |
| 81 | // if x >= 89 |
| 82 | if (x_bits.is_pos() && x_abs >= 0x42b2U) { |
| 83 | // exp(inf) = inf |
| 84 | if (x_bits.is_inf()) |
| 85 | return x; |
| 86 | |
| 87 | #ifdef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY |
| 88 | fputil::set_errno_if_required(ERANGE); |
| 89 | fputil::raise_except_if_required(FE_OVERFLOW); |
| 90 | return FPBits::inf().get_val(); |
| 91 | #else |
| 92 | switch (fputil::quick_get_round()) { |
| 93 | case FE_TONEAREST: |
| 94 | case FE_UPWARD: |
| 95 | fputil::set_errno_if_required(ERANGE); |
| 96 | fputil::raise_except_if_required(FE_OVERFLOW); |
| 97 | return FPBits::inf().get_val(); |
| 98 | default: |
| 99 | return FPBits::max_normal().get_val(); |
| 100 | } |
| 101 | #endif // LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY |
| 102 | } |
| 103 | |
| 104 | // x <= -92.5 |
| 105 | if (x_u >= 0xc2b9U) { |
| 106 | // exp(-inf) = +0 |
| 107 | if (x_bits.is_inf()) |
| 108 | return FPBits::zero().get_val(); |
| 109 | |
| 110 | fputil::set_errno_if_required(ERANGE); |
| 111 | fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT); |
| 112 | |
| 113 | #ifdef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY |
| 114 | return FPBits::zero().get_val(); |
| 115 | #else |
| 116 | switch (fputil::quick_get_round()) { |
| 117 | case FE_UPWARD: |
| 118 | case FE_TONEAREST: |
| 119 | if (LIBC_UNLIKELY(x_u == 0xc2b9U)) |
| 120 | return FPBits::min_subnormal().get_val(); |
| 121 | return FPBits::zero().get_val(); |
| 122 | default: |
| 123 | return FPBits::zero().get_val(); |
| 124 | } |
| 125 | #endif // LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY |
| 126 | } |
| 127 | |
| 128 | // exp(0) = 1 |
| 129 | if (x_bits.is_zero()) |
| 130 | return bfloat16(1.0f); |
| 131 | |
| 132 | // 0 < |x| <= 2^(-3) |
| 133 | if (x_abs <= 0x3e00U) { |
| 134 | float xf = static_cast<float>(x); |
| 135 | // Degree-3 minimax polynomial generated by Sollya with the following |
| 136 | // commands: |
| 137 | // > display = hexadecimal; |
| 138 | // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-7, 2^-7]); |
| 139 | // > 1 + x * P; |
| 140 | // 0x1p0 + x * (0x1p0 + x * (0x1.00004p-1 + x * 0x1.555578p-3)) |
| 141 | return fputil::cast<bfloat16>(x: fputil::polyeval( |
| 142 | x: xf, a0: 0x1p+0f, a: 0x1p+0f, a: 0x1.00004p-1f, a: 0x1.555578p-3f)); |
| 143 | } |
| 144 | } |
| 145 | |
| 146 | #ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS |
| 147 | constexpr fputil::ExceptValues<bfloat16, 4> EXPBF16_EXCEPTS = {.values: { |
| 148 | // (input, RZ output, RU offset, RD offset, RN offset) |
| 149 | // x = 0x40DB (6.84375) |
| 150 | // MPFR: RU=0x446B, RD=0x446A, RZ=0x446A, RN=0x446B |
| 151 | {.input: 0x40DBU, .rnd_towardzero_result: 0x446AU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U}, |
| 152 | // x = 0x419D, keep original |
| 153 | {.input: 0x419DU, .rnd_towardzero_result: 0x4D9FU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U}, |
| 154 | // x = 0x41F9 (31.125) |
| 155 | // MPFR: RU=0x55F0, RD=0x55EF, RZ=0x55EF, RN=0x55F0 |
| 156 | {.input: 0x41F9U, .rnd_towardzero_result: 0x55EFU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U}, |
| 157 | // x = 0xC19F (-19.875) |
| 158 | // MPFR: RU=0x3121, RD=0x3120, RZ=0x3120, RN=0x3121 |
| 159 | {.input: 0xC19FU, .rnd_towardzero_result: 0x3120U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U}, |
| 160 | }}; |
| 161 | |
| 162 | if (auto r = EXPBF16_EXCEPTS.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value())) |
| 163 | return r.value(); |
| 164 | #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS |
| 165 | |
| 166 | // For -93 < x < 89, we do the following range reduction: |
| 167 | // x = hi + mid + lo |
| 168 | // where, |
| 169 | // hi is an integer |
| 170 | // mid * 2^2 is an integer |
| 171 | // -2^3 <= lo <= 2^3 |
| 172 | // also, hi + mid = round(4 * x) / x |
| 173 | // then, |
| 174 | // exp(x) = exp(hi + mid + lo) |
| 175 | // = exp(hi) * exp(mid) * exp(lo) |
| 176 | // we store 184/8 + 1 = 24 values for looking up exp(hi) |
| 177 | // from -96 to 88 |
| 178 | // we store 8*4 = 32 values for looking up exp(mid) since |
| 179 | // mid will always have the bit pattern |bbb.bb| where |
| 180 | // b can be either 0 or 1 |
| 181 | |
| 182 | float xf = static_cast<float>(x); |
| 183 | float kf = fputil::nearest_integer(x: xf * 4.0f); |
| 184 | int x_hi_mid = static_cast<int>(kf); |
| 185 | int x_hi = x_hi_mid >> 5; |
| 186 | int x_mid = x_hi_mid & 0b11111; |
| 187 | // lo = x - (hi + mid) = round(x * 4) / (-4) + x |
| 188 | float lo = fputil::multiply_add(x: kf, y: -0.25f, z: xf); |
| 189 | |
| 190 | float exp_hi = expbf16_internal::EXP_HI[x_hi + 12]; |
| 191 | float exp_mid = expbf16_internal::EXP_MID[x_mid]; |
| 192 | |
| 193 | // Degree-3 minimax polynomial generated by Sollya with the following |
| 194 | // commands: |
| 195 | // > display = hexadecimal; |
| 196 | // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-7, 2^-7]); |
| 197 | // > 1 + x * P; |
| 198 | // 0x1p0 + x * (0x1p0 + x * (0x1.00004p-1 + x * 0x1.555578p-3)) |
| 199 | float exp_lo = |
| 200 | fputil::polyeval(x: lo, a0: 0x1p+0f, a: 0x1p+0f, a: 0x1.00004p-1f, a: 0x1.555578p-3f); |
| 201 | |
| 202 | return fputil::cast<bfloat16>(x: exp_hi * exp_mid * exp_lo); |
| 203 | } |
| 204 | |
| 205 | } // namespace math |
| 206 | } // namespace LIBC_NAMESPACE_DECL |
| 207 | |
| 208 | #endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXPBF16_H |
| 209 | |