1//===-- Implementation header for log10p1f16 --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_LOG10P1F16_H
10#define LLVM_LIBC_SRC___SUPPORT_MATH_LOG10P1F16_H
11
12#include "include/llvm-libc-macros/float16-macros.h"
13
14#ifdef LIBC_TYPES_HAS_FLOAT16
15
16#include "exp10_float16_constants.h"
17#include "expxf16_utils.h"
18#include "hdr/errno_macros.h"
19#include "hdr/fenv_macros.h"
20#include "src/__support/FPUtil/FEnvImpl.h"
21#include "src/__support/FPUtil/FPBits.h"
22#include "src/__support/FPUtil/PolyEval.h"
23#include "src/__support/FPUtil/cast.h"
24#include "src/__support/FPUtil/except_value_utils.h"
25#include "src/__support/FPUtil/multiply_add.h"
26#include "src/__support/common.h"
27#include "src/__support/macros/config.h"
28#include "src/__support/macros/optimization.h"
29#include "src/__support/macros/properties/cpu_features.h"
30
31namespace LIBC_NAMESPACE_DECL {
32
33namespace math {
34
35LIBC_INLINE float16 log10p1f16(float16 x) {
36 using namespace math::expxf16_internal;
37 using FPBits = fputil::FPBits<float16>;
38 FPBits x_bits(x);
39
40 uint16_t x_u = x_bits.uintval();
41 uint16_t x_abs = x_u & 0x7fffU;
42
43 // If x is +-0, NaN, +/-inf, or |x| <= 2^-3.
44 if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x7c00U)) {
45 // log10p1(NaN) = NaN
46 if (x_bits.is_nan()) {
47 if (x_bits.is_signaling_nan()) {
48 fputil::raise_except_if_required(FE_INVALID);
49 return FPBits::quiet_nan().get_val();
50 }
51
52 return x;
53 }
54
55 // log10p1(+/-0) = +/-0
56 if (x_abs == 0U)
57 return x;
58
59 // log10p1(+inf) = +inf
60 if (x_u == 0x7c00U)
61 return FPBits::inf().get_val();
62
63 // log10p1(-inf) = NaN
64 if (x_abs >= 0x7c00U) {
65 fputil::set_errno_if_required(EDOM);
66 fputil::raise_except_if_required(FE_INVALID);
67 return FPBits::quiet_nan().get_val();
68 }
69
70 // When |x| <= 2^-3, use a degree-5 minimax polynomial on x.
71 // For y = 1+x near 1, the table-based range reduction suffers from
72 // catastrophic cancellation (m*log10(2) + log10(f) nearly cancel).
73 // Computing log10(1+x) directly via polynomial avoids this.
74 //
75 // Generated by Sollya with:
76 // > display = hexadecimal;
77 // > Q = fpminimax(log10(1 + x), [|1, 2, 3, 4, 5|], [|SG...|],
78 // [-2^-3, 2^-3]);
79 // > Q;
80 if (x_abs <= 0x3000U) {
81#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
82 // ExceptValues for the small-|x| polynomial path (|x| <= 2^-3).
83 constexpr size_t N_LOG10P1F16_EXCEPTS = 7;
84 constexpr fputil::ExceptValues<float16, N_LOG10P1F16_EXCEPTS>
85 LOG10P1F16_EXCEPTS = {.values: {
86 // (input, RZ output, RU offset, RD offset, RN offset)
87 // x = 0x1.ep-13
88 {.input: 0x0B80U, .rnd_towardzero_result: 0x0683U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
89 // x = 0x1.4fp-6
90 {.input: 0x213CU, .rnd_towardzero_result: 0x1C85U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U},
91 // x = 0x1.82cp-4
92 {.input: 0x2E0BU, .rnd_towardzero_result: 0x2904U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
93 // x = 0x1.d24p-4
94 {.input: 0x2EC9U, .rnd_towardzero_result: 0x299AU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
95 // x = -0x1.b8p-13
96 {.input: 0x89DCU, .rnd_towardzero_result: 0x8516U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U},
97 // x = -0x1.49p-6
98 {.input: 0x9924U, .rnd_towardzero_result: 0x9478U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U},
99 // x = -0x1.42p-8
100 {.input: 0x9D08U, .rnd_towardzero_result: 0x9861U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
101 }};
102
103 if (auto r = LOG10P1F16_EXCEPTS.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value()))
104 return r.value();
105#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
106
107 float xf = x;
108 return fputil::cast<float16>(
109 x: xf * fputil::polyeval(x: xf, a0: 0x1.bcb7b2p-2f, a: -0x1.bcb4cp-3f,
110 a: 0x1.2875bcp-3f, a: -0x1.c2946ep-4f,
111 a: 0x1.69da2p-4f));
112 }
113 }
114
115 // log10p1(-1) = -inf
116 if (LIBC_UNLIKELY(x_u == 0xbc00U)) {
117 fputil::raise_except_if_required(FE_DIVBYZERO);
118 return FPBits::inf(sign: Sign::NEG).get_val();
119 }
120
121 // log10p1(x) = NaN for x < -1
122 if (LIBC_UNLIKELY(x_u > 0xbc00U)) {
123 fputil::set_errno_if_required(EDOM);
124 fputil::raise_except_if_required(FE_INVALID);
125 return FPBits::quiet_nan().get_val();
126 }
127
128#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
129 // ExceptValues for the large-|x| table-based path (|x| > 2^-3).
130 constexpr size_t N_LOG10P1F16_EXCEPTS_HI = 12;
131 constexpr fputil::ExceptValues<float16, N_LOG10P1F16_EXCEPTS_HI>
132 LOG10P1F16_EXCEPTS_HI = {.values: {
133 // (input, RZ output, RU offset, RD offset, RN offset)
134 // x = 0x1.3bp-3
135 {.input: 0x30ECU, .rnd_towardzero_result: 0x2BF3U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U},
136 // x = 0x1.ba0p-3
137 {.input: 0x32E8U, .rnd_towardzero_result: 0x2D6EU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U},
138 // x = 0x1.4c4p-2
139 {.input: 0x3531U, .rnd_towardzero_result: 0x2FCFU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
140 // x = 0x1.744p+0
141 {.input: 0x3DD1U, .rnd_towardzero_result: 0x363CU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U},
142 // x = 0x1.7d8p+0
143 {.input: 0x3DF6U, .rnd_towardzero_result: 0x3656U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U},
144 // x = 0x1.2p+3
145 {.input: 0x4880U, .rnd_towardzero_result: 0x3C00U, .rnd_upward_offset: 0U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
146 // x = 0x1.8cp+6
147 {.input: 0x5630U, .rnd_towardzero_result: 0x4000U, .rnd_upward_offset: 0U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
148 // x = 0x1.f44p+6
149 {.input: 0x57D1U, .rnd_towardzero_result: 0x4033U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
150 // x = 0x1.f38p+9
151 {.input: 0x63CEU, .rnd_towardzero_result: 0x4200U, .rnd_upward_offset: 0U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
152 // x = -0x1.558p-3
153 {.input: 0xB156U, .rnd_towardzero_result: 0xAD12U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
154 // x = -0x1.8d8p-2
155 {.input: 0xB636U, .rnd_towardzero_result: 0xB2D3U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U},
156 // x = -0x1.808p-1
157 {.input: 0xBA02U, .rnd_towardzero_result: 0xB8D4U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U},
158 }};
159
160 if (auto r = LOG10P1F16_EXCEPTS_HI.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value()))
161 return r.value();
162#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
163
164 // For the range reduction, we compute y = 1 + x in float. For |x| > 2^-3,
165 // the addition 1.0f + xf is exact in float, and the result y is bounded away
166 // from 1.0, avoiding catastrophic cancellation in the table decomposition.
167 float xf = x;
168 float y = 1.0f + xf;
169
170 using FPBitsFloat = fputil::FPBits<float>;
171 FPBitsFloat y_bits(y);
172
173 // When y is subnormal or zero, which shouldn't happen since y = 1 + x and
174 // x >= -1 + eps in f16, so y >= eps > 0. But handle y = 0 just in case.
175 if (LIBC_UNLIKELY(y_bits.is_zero()))
176 return FPBits::inf(sign: Sign::NEG).get_val();
177
178 int m = y_bits.get_exponent();
179 // Set y_bits to 1.mant (biased exponent = 127).
180 y_bits.set_biased_exponent(FPBitsFloat::EXP_BIAS);
181 float mant_f = y_bits.get_val();
182
183 // Leading 23 - 5 = 18, so top 5 mantissa bits give index f in [0, 31].
184 int f = y_bits.get_mantissa() >> (FPBitsFloat::FRACTION_LEN - 5);
185
186 // v = 1.mant * 1/f - 1 = d/f
187 float v = fputil::multiply_add(x: mant_f, y: ONE_OVER_F_F[f], z: -1.0f);
188
189 // Degree-3 minimax polynomial generated by Sollya with the following
190 // commands:
191 // > display = hexadecimal;
192 // > P = fpminimax(log10(1 + x)/x, 2, [|SG...|], [-2^-5, 2^-5]);
193 // > x * P;
194 float log10p1_d_over_f =
195 v * fputil::polyeval(x: v, a0: 0x1.bcb7bp-2f, a: -0x1.bce168p-3f, a: 0x1.28acb8p-3f);
196 // log10(1.mant) = log10(f) + log10(1 + d/f)
197 float log10_1_mant = LOG10F_F[f] + log10p1_d_over_f;
198 return fputil::cast<float16>(
199 x: fputil::multiply_add(x: static_cast<float>(m), y: LOG10F_2, z: log10_1_mant));
200}
201
202} // namespace math
203} // namespace LIBC_NAMESPACE_DECL
204
205#endif // LIBC_TYPES_HAS_FLOAT16
206
207#endif // LLVM_LIBC_SRC___SUPPORT_MATH_LOG10P1F16_H
208