1//===-- Implementation header for log2p1f16 ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_LOG2P1F16_H
10#define LLVM_LIBC_SRC___SUPPORT_MATH_LOG2P1F16_H
11
12#include "include/llvm-libc-macros/float16-macros.h"
13
14#ifdef LIBC_TYPES_HAS_FLOAT16
15
16#include "expxf16_utils.h"
17#include "hdr/errno_macros.h"
18#include "hdr/fenv_macros.h"
19#include "src/__support/FPUtil/FEnvImpl.h"
20#include "src/__support/FPUtil/FPBits.h"
21#include "src/__support/FPUtil/PolyEval.h"
22#include "src/__support/FPUtil/cast.h"
23#include "src/__support/FPUtil/except_value_utils.h"
24#include "src/__support/FPUtil/multiply_add.h"
25#include "src/__support/common.h"
26#include "src/__support/macros/config.h"
27#include "src/__support/macros/optimization.h"
28#include "src/__support/macros/properties/cpu_features.h"
29
30namespace LIBC_NAMESPACE_DECL {
31
32namespace math {
33
34LIBC_INLINE float16 log2p1f16(float16 x) {
35 using namespace math::expxf16_internal;
36 using FPBits = fputil::FPBits<float16>;
37 FPBits x_bits(x);
38
39 uint16_t x_u = x_bits.uintval();
40 uint16_t x_abs = x_u & 0x7fffU;
41
42 // If x is NaN, +/-inf, or |x| <= 2^-3.
43 if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x7c00U)) {
44 // log2p1(NaN) = NaN
45 if (x_bits.is_nan()) {
46 if (x_bits.is_signaling_nan()) {
47 fputil::raise_except_if_required(FE_INVALID);
48 return FPBits::quiet_nan().get_val();
49 }
50
51 return x;
52 }
53
54 // When |x| <= 2^-3, use a degree-5 minimax polynomial on x.
55 // For y = 1+x near 1, the table-based range reduction suffers from
56 // catastrophic cancellation (m + log2(f) nearly cancel). Computing
57 // log2(1+x) directly via polynomial avoids this.
58 //
59 // Generated by Sollya with:
60 // > display = hexadecimal;
61 // > Q = fpminimax(log2(1 + x), [|1, 2, 3, 4, 5|], [|SG...|],
62 // [-2^-3, 2^-3]);
63 // > Q;
64 // > dirtyinfnorm((log2(1 + x) - Q) / log2(1 + x), [-2^-3, 2^-3]);
65 // 0x1.6ed1f4728dcb6f1d6a651a3c728937f7468f8eedfp-22
66 if (x_abs <= 0x3000U) {
67 // log2p1(+/-0) = +/-0
68 if (x_abs == 0U)
69 return x;
70
71#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
72 constexpr size_t N_LOG2P1F16_EXCEPTS = 11;
73 constexpr fputil::ExceptValues<float16, N_LOG2P1F16_EXCEPTS>
74 LOG2P1F16_EXCEPTS = {.values: {
75 // (input, RZ output, RU offset, RD offset, RN offset)
76 // x = 0x1.ec4p-12
77 {.input: 0x0FB1U, .rnd_towardzero_result: 0x118BU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U},
78 // x = 0x1.c04p-6
79 {.input: 0x2701U, .rnd_towardzero_result: 0x28FBU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U},
80 // x = -0x1.824p-15
81 {.input: 0x8309U, .rnd_towardzero_result: 0x8461U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
82 // x = -0x1.414p-10
83 {.input: 0x9505U, .rnd_towardzero_result: 0x973EU, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U},
84 // x = -0x1.cb8p-10
85 {.input: 0x972EU, .rnd_towardzero_result: 0x992FU, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
86 // x = -0x1.99cp-8
87 {.input: 0x9E67U, .rnd_towardzero_result: 0xA0A2U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
88 // x = -0x1.ce8p-7
89 {.input: 0xA33AU, .rnd_towardzero_result: 0xA540U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
90 // x = -0x1.73cp-6
91 {.input: 0xA5CFU, .rnd_towardzero_result: 0xA83DU, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
92 // x = -0x1.87p-5
93 {.input: 0xAA1CU, .rnd_towardzero_result: 0xAC84U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
94 // x = -0x1.d48p-5
95 {.input: 0xAB52U, .rnd_towardzero_result: 0xAD70U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
96 // x = -0x1.da0p-4
97 {.input: 0xAF68U, .rnd_towardzero_result: 0xB1ADU, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
98 }};
99
100 if (auto r = LOG2P1F16_EXCEPTS.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value()))
101 return r.value();
102#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
103
104 float xf = x;
105 return fputil::cast<float16>(
106 x: xf * fputil::polyeval(x: xf, a0: 0x1.715476p+0f, a: -0x1.715204p-1f,
107 a: 0x1.ec6c5p-2f, a: -0x1.763338p-2f,
108 a: 0x1.2bcd3cp-2f));
109 }
110
111 // log2p1(+inf) = +inf
112 if (x_u == 0x7c00U)
113 return FPBits::inf().get_val();
114
115 // log2p1(-inf) = NaN
116 fputil::set_errno_if_required(EDOM);
117 fputil::raise_except_if_required(FE_INVALID);
118 return FPBits::quiet_nan().get_val();
119 }
120
121 // log2p1(-1) = -inf
122 if (LIBC_UNLIKELY(x_u == 0xbc00U)) {
123 fputil::raise_except_if_required(FE_DIVBYZERO);
124 return FPBits::inf(sign: Sign::NEG).get_val();
125 }
126
127 // log2p1(x) = NaN for x < -1
128 if (LIBC_UNLIKELY(x_u > 0xbc00U)) {
129 fputil::set_errno_if_required(EDOM);
130 fputil::raise_except_if_required(FE_INVALID);
131 return FPBits::quiet_nan().get_val();
132 }
133
134#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
135#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
136 constexpr size_t N_LOG2P1F16_EXCEPTS_HI = 2;
137#else
138 constexpr size_t N_LOG2P1F16_EXCEPTS_HI = 6;
139#endif
140 constexpr fputil::ExceptValues<float16, N_LOG2P1F16_EXCEPTS_HI>
141 LOG2P1F16_EXCEPTS_HI = {.values: {
142 // (input, RZ output, RU offset, RD offset, RN offset)
143#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
144 // x = 0x1.12p-3
145 {.input: 0x3048U, .rnd_towardzero_result: 0x31CBU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U},
146 // x = 0x1.598p-2
147 {.input: 0x3566U, .rnd_towardzero_result: 0x36B5U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U},
148#endif
149 // x = 0x1.23cp-3
150 {.input: 0x308FU, .rnd_towardzero_result: 0x3226U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
151 // x = 0x1.accp+0
152 {.input: 0x3EB3U, .rnd_towardzero_result: 0x3DADU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
153#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
154 // x = -0x1.534p-2
155 {.input: 0xB54DU, .rnd_towardzero_result: 0xB8A5U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
156 // x = -0x1.bb8p-2
157 {.input: 0xB6EEU, .rnd_towardzero_result: 0xBA8DU, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
158#endif
159 }};
160
161 if (auto r = LOG2P1F16_EXCEPTS_HI.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value()))
162 return r.value();
163#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
164
165 // For the range reduction, we compute y = 1 + x in float. For |x| > 2^-3,
166 // the addition 1.0f + xf is exact in float, and the result y is bounded away
167 // from 1.0, avoiding catastrophic cancellation in the table decomposition.
168 float xf = x;
169 float y = 1.0f + xf;
170
171 using FPBitsFloat = fputil::FPBits<float>;
172 FPBitsFloat y_bits(y);
173
174 // y = 1 + x should stay positive for x > -1, but keep this guard for
175 // completeness in case of unexpected intermediate behavior.
176 if (LIBC_UNLIKELY(y_bits.is_zero()))
177 return FPBits::inf(sign: Sign::NEG).get_val();
178
179 int m = y_bits.get_exponent();
180 y_bits.set_biased_exponent(FPBitsFloat::EXP_BIAS);
181 float mant_f = y_bits.get_val();
182
183 // Leading 23 - 5 = 18, so the top 5 mantissa bits give the index in [0, 31].
184 int f = y_bits.get_mantissa() >> (FPBitsFloat::FRACTION_LEN - 5);
185
186 // v = 1.mant * 1/f - 1 = d/f
187 float v = fputil::multiply_add(x: mant_f, y: ONE_OVER_F_F[f], z: -1.0f);
188
189 // Degree-3 minimax polynomial generated by Sollya with the following
190 // commands:
191 // > display = hexadecimal;
192 // > P = fpminimax(log2(1 + x)/x, 2, [|SG...|], [-2^-5, 2^-5]);
193 // > x * P;
194 // > dirtyinfnorm((log2(1 + x)/x - P) / (log2(1 + x)/x), [-2^-5, 2^-5]);
195 // 0x1.0087f3cad284d0a4464b1e27e83258b5e69a647f5p-19
196 float log2p1_d_over_f =
197 v * fputil::polyeval(x: v, a0: 0x1.715476p+0f, a: -0x1.71771ap-1f, a: 0x1.ecb38ep-2f);
198 float log2_1_mant = LOG2F_F[f] + log2p1_d_over_f;
199 return fputil::cast<float16>(x: static_cast<float>(m) + log2_1_mant);
200}
201
202} // namespace math
203} // namespace LIBC_NAMESPACE_DECL
204
205#endif // LIBC_TYPES_HAS_FLOAT16
206
207#endif // LLVM_LIBC_SRC___SUPPORT_MATH_LOG2P1F16_H
208