| 1 | //===-- Implementation header for erfcf16 -----------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ERFCF16_H |
| 10 | #define LLVM_LIBC_SRC___SUPPORT_MATH_ERFCF16_H |
| 11 | |
| 12 | #include "include/llvm-libc-macros/float16-macros.h" |
| 13 | |
| 14 | #ifdef LIBC_TYPES_HAS_FLOAT16 |
| 15 | |
| 16 | #include "src/__support/FPUtil/FEnvImpl.h" |
| 17 | #include "src/__support/FPUtil/FPBits.h" |
| 18 | #include "src/__support/FPUtil/cast.h" |
| 19 | #include "src/__support/FPUtil/except_value_utils.h" |
| 20 | #include "src/__support/FPUtil/multiply_add.h" |
| 21 | #include "src/__support/macros/config.h" |
| 22 | #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY |
| 23 | |
| 24 | namespace LIBC_NAMESPACE_DECL { |
| 25 | |
| 26 | namespace math { |
| 27 | |
| 28 | LIBC_INLINE float16 erfcf16(float16 x) { |
| 29 | // Polynomials approximating erfc(|x|) on ( k/2, (k+1)/2 ) generated by |
| 30 | // Sollya with: > P = fpminimax(erfc(x), [|0, 1, 2, 3, 4, 5, 6, 7|], [|D...|], |
| 31 | // [k/2, (k+1)/2]); |
| 32 | // |
| 33 | // for k = 0..7. |
| 34 | constexpr double COEFFS[8][8] = { |
| 35 | {0x1.00000006e5898p0, -0x1.20dd7b4435481p0, 0x1.e2ffc0264c37cp-17, |
| 36 | 0x1.80ef5566d3135p-2, 0x1.96ef459387c13p-10, -0x1.e6f847a52d3fep-4, |
| 37 | 0x1.9a59af64a5dfap-7, 0x1.f652f3b14963bp-7}, |
| 38 | {0x1.001a9f1d3c0b4p0, -0x1.221a42637ae8p0, 0x1.9c3ddc1e5d176p-6, |
| 39 | 0x1.347481936316bp-2, 0x1.1db66a2746b8bp-3, -0x1.1d7a264182166p-2, |
| 40 | 0x1.ef858095e1609p-4, -0x1.26936198d6b07p-6}, |
| 41 | {0x1.f08a38741577bp-1, -0x1.e26ae90822fp-1, -0x1.f300a88478724p-2, |
| 42 | 0x1.115a0580ba3f8p0, -0x1.1a1c2bc3ffcdap-1, 0x1.888d0c9a082b8p-4, |
| 43 | 0x1.f41c9f1877fdfp-8, -0x1.a72fb878df30bp-9}, |
| 44 | {0x1.c1cabe622be64p-1, -0x1.ee19257d1037ap-2, -0x1.7a725229a24b8p0, |
| 45 | 0x1.2089bc62b1069p1, -0x1.673f1b9fbced5p0, 0x1.da7db686b0475p-2, |
| 46 | -0x1.49a292662ca6ap-4, 0x1.7e2b298da504dp-8}, |
| 47 | {0x1.a466e958ca525p0, -0x1.8a850d50339a9p1, 0x1.29b741ccfdd3ap1, |
| 48 | -0x1.b250e97982f77p-1, 0x1.ea6896c5fa419p-4, 0x1.b332978509bf1p-7, |
| 49 | -0x1.9f1c5d9122108p-8, 0x1.2fb4d83883203p-11}, |
| 50 | {0x1.12fb16acc5644p1, -0x1.25003c37e1b24p2, 0x1.0e0dc863b2f12p2, |
| 51 | -0x1.16f179d8797a6p1, 0x1.5c8b140bf43f8p-1, -0x1.07473d994c2edp-3, |
| 52 | 0x1.bd0c1c2d2a9e3p-7, -0x1.448767255aabbp-11}, |
| 53 | {0x1.13a691333e22ap0, -0x1.0e2a642d8d318p1, 0x1.c7d88193df94bp0, |
| 54 | -0x1.acf7c7b79498fp-1, 0x1.e626f6202a127p-3, -0x1.4babcc8609859p-5, |
| 55 | 0x1.f860060a3658p-9, -0x1.49a4190580d4p-13}, |
| 56 | {0x1.cf3a84bf655afp-3, -0x1.968e6988b5cep-2, 0x1.326f1f9499739p-2, |
| 57 | -0x1.011785d112c9bp-3, 0x1.0343a0c05e285p-5, -0x1.3a3ae5e48130ep-8, |
| 58 | 0x1.a7c5af0484892p-12, -0x1.ea82a062010c3p-17}, |
| 59 | }; |
| 60 | |
| 61 | static constexpr size_t N_ERFCF16_EXCEPTS = 3; |
| 62 | static constexpr fputil::ExceptValues<float16, N_ERFCF16_EXCEPTS> |
| 63 | ERFCF16_EXCEPTS = {.values: { |
| 64 | // (input, RZ output, RU offset, RD offset, RN offset) |
| 65 | // x = 0x0.000216, erfc(x) = 0x1.000 |
| 66 | {.input: 0x0B17, .rnd_towardzero_result: 0x3BFF, .rnd_upward_offset: 1, .rnd_downward_offset: 0, .rnd_tonearest_offset: 0}, |
| 67 | // x = 0x0.00346, erfc(x) = 0x0.ff8 |
| 68 | {.input: 0x1B17, .rnd_towardzero_result: 0x3BF7, .rnd_upward_offset: 1, .rnd_downward_offset: 0, .rnd_tonearest_offset: 1}, |
| 69 | // x = -0x0.00346, erfc(x) = 0x1.00c |
| 70 | {.input: 0x9B17, .rnd_towardzero_result: 0x3C04, .rnd_upward_offset: 1, .rnd_downward_offset: 0, .rnd_tonearest_offset: 0}, |
| 71 | }}; |
| 72 | using FPBits = typename fputil::FPBits<float16>; |
| 73 | FPBits xbits(x); |
| 74 | uint16_t x_u = xbits.uintval(); |
| 75 | |
| 76 | if (auto r = ERFCF16_EXCEPTS.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value())) |
| 77 | return r.value(); |
| 78 | |
| 79 | uint16_t x_abs = xbits.abs().uintval(); |
| 80 | |
| 81 | // Special cases: NaN and Inf |
| 82 | if (LIBC_UNLIKELY(x_abs >= 0x7c00U)) { |
| 83 | if (x_abs > 0x7c00U) { |
| 84 | if (xbits.is_signaling_nan()) { |
| 85 | fputil::raise_except_if_required(FE_INVALID); |
| 86 | return FPBits::quiet_nan().get_val(); |
| 87 | } |
| 88 | return x; |
| 89 | } |
| 90 | // erfc(+Inf) = 0, erfc(-Inf) = 2 |
| 91 | return xbits.is_neg() ? fputil::cast<float16>(x: 2.0) |
| 92 | : fputil::cast<float16>(x: 0.0); |
| 93 | } |
| 94 | |
| 95 | if (LIBC_UNLIKELY(x_abs == 0)) |
| 96 | return 1.0f16; |
| 97 | |
| 98 | // Asymptotic behavior: erfc(x) rounds to 0 or 2 for |x| >= 4.0. |
| 99 | if (LIBC_UNLIKELY(x_abs >= 0x4400U)) { // |x| >= 4.0 |
| 100 | if (xbits.is_neg()) { |
| 101 | // 0x1.0p-12 is ~0.25 ULP of 2.0 in float16, small enough to round |
| 102 | // to 2.0 in RN, but large enough to round down in RD/RZ. |
| 103 | float xf = fputil::cast<float>(x); |
| 104 | return fputil::cast<float16>(x: 2.0 - 0x1.0p-12 * (-4.0 / xf)); |
| 105 | } |
| 106 | fputil::set_errno_if_required(ERANGE); |
| 107 | fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT); |
| 108 | #ifndef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY |
| 109 | if (fputil::fenv_is_round_up()) |
| 110 | return FPBits::min_subnormal().get_val(); |
| 111 | #endif |
| 112 | return 0.0f16; |
| 113 | } |
| 114 | |
| 115 | // Polynomial approximation: |
| 116 | // erfc(x) ~ erfc(|x|) if x >= 0 |
| 117 | // erfc(x) ~ 2 - erfc(|x|) if x < 0 |
| 118 | // erfc(|x|) is evaluated using a degree-7 polynomial on each sub-interval. |
| 119 | |
| 120 | int idx = static_cast<int>(xbits.abs().get_val() * 2.0f); |
| 121 | double xd = fputil::cast<double>(x: xbits.abs().get_val()); |
| 122 | double xsq = xd * xd; |
| 123 | double x4 = xsq * xsq; |
| 124 | |
| 125 | double p01 = fputil::multiply_add(x: xd, y: COEFFS[idx][1], z: COEFFS[idx][0]); |
| 126 | double p23 = fputil::multiply_add(x: xd, y: COEFFS[idx][3], z: COEFFS[idx][2]); |
| 127 | double p45 = fputil::multiply_add(x: xd, y: COEFFS[idx][5], z: COEFFS[idx][4]); |
| 128 | double p67 = fputil::multiply_add(x: xd, y: COEFFS[idx][7], z: COEFFS[idx][6]); |
| 129 | |
| 130 | double p03 = fputil::multiply_add(x: xsq, y: p23, z: p01); |
| 131 | double p47 = fputil::multiply_add(x: xsq, y: p67, z: p45); |
| 132 | |
| 133 | double erfc_abs = fputil::multiply_add(x: x4, y: p47, z: p03); |
| 134 | |
| 135 | if (xbits.is_neg()) |
| 136 | return fputil::cast<float16>(x: 2.0 - erfc_abs); |
| 137 | |
| 138 | return fputil::cast<float16>(x: erfc_abs); |
| 139 | } |
| 140 | |
| 141 | } // namespace math |
| 142 | |
| 143 | } // namespace LIBC_NAMESPACE_DECL |
| 144 | |
| 145 | #endif // LIBC_TYPES_HAS_FLOAT16 |
| 146 | |
| 147 | #endif // LLVM_LIBC_SRC___SUPPORT_MATH_ERFCF16_H |
| 148 | |