1//===-- Implementation header for log10f16 ----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_LOG10F16_H
10#define LLVM_LIBC_SRC___SUPPORT_MATH_LOG10F16_H
11
12#include "include/llvm-libc-macros/float16-macros.h"
13
14#ifdef LIBC_TYPES_HAS_FLOAT16
15
16#include "expxf16_utils.h"
17#include "hdr/errno_macros.h"
18#include "hdr/fenv_macros.h"
19#include "src/__support/FPUtil/FEnvImpl.h"
20#include "src/__support/FPUtil/FPBits.h"
21#include "src/__support/FPUtil/PolyEval.h"
22#include "src/__support/FPUtil/cast.h"
23#include "src/__support/FPUtil/except_value_utils.h"
24#include "src/__support/FPUtil/multiply_add.h"
25#include "src/__support/common.h"
26#include "src/__support/macros/config.h"
27#include "src/__support/macros/optimization.h"
28#include "src/__support/macros/properties/cpu_features.h"
29
30namespace LIBC_NAMESPACE_DECL {
31
32namespace math {
33
34namespace log10f16_internal {
35#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
36#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
37LIBC_INLINE_VAR constexpr size_t N_LOG10F16_EXCEPTS = 11;
38#else
39LIBC_INLINE_VAR constexpr size_t N_LOG10F16_EXCEPTS = 17;
40#endif
41
42LIBC_INLINE_VAR constexpr fputil::ExceptValues<float16, N_LOG10F16_EXCEPTS>
43 LOG10F16_EXCEPTS = {.values: {
44 // (input, RZ output, RU offset, RD offset, RN offset)
45 // x = 0x1.e3cp-3, log10f16(x) = -0x1.40cp-1 (RZ)
46 {.input: 0x338fU, .rnd_towardzero_result: 0xb903U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
47 // x = 0x1.fep-3, log10f16(x) = -0x1.35p-1 (RZ)
48 {.input: 0x33f8U, .rnd_towardzero_result: 0xb8d4U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U},
49#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
50 // x = 0x1.394p-1, log10f16(x) = -0x1.b4cp-3 (RZ)
51 {.input: 0x38e5U, .rnd_towardzero_result: 0xb2d3U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U},
52#endif
53 // x = 0x1.ea8p-1, log10f16(x) = -0x1.31p-6 (RZ)
54 {.input: 0x3baaU, .rnd_towardzero_result: 0xa4c4U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U},
55 // x = 0x1.ebp-1, log10f16(x) = -0x1.29cp-6 (RZ)
56 {.input: 0x3bacU, .rnd_towardzero_result: 0xa4a7U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U},
57 // x = 0x1.f3p-1, log10f16(x) = -0x1.6dcp-7 (RZ)
58 {.input: 0x3bccU, .rnd_towardzero_result: 0xa1b7U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U},
59// x = 0x1.f38p-1, log10f16(x) = -0x1.5f8p-7 (RZ)
60#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
61 {.input: 0x3bceU, .rnd_towardzero_result: 0xa17eU, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U},
62 // x = 0x1.fd8p-1, log10f16(x) = -0x1.168p-9 (RZ)
63 {.input: 0x3bf6U, .rnd_towardzero_result: 0x985aU, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U},
64 // x = 0x1.ff8p-1, log10f16(x) = -0x1.bccp-12 (RZ)
65 {.input: 0x3bfeU, .rnd_towardzero_result: 0x8ef3U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U},
66 // x = 0x1.374p+0, log10f16(x) = 0x1.5b8p-4 (RZ)
67 {.input: 0x3cddU, .rnd_towardzero_result: 0x2d6eU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U},
68 // x = 0x1.3ecp+1, log10f16(x) = 0x1.958p-2 (RZ)
69 {.input: 0x40fbU, .rnd_towardzero_result: 0x3656U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U},
70#endif
71 // x = 0x1.4p+3, log10f16(x) = 0x1p+0 (RZ)
72 {.input: 0x4900U, .rnd_towardzero_result: 0x3c00U, .rnd_upward_offset: 0U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
73 // x = 0x1.9p+6, log10f16(x) = 0x1p+1 (RZ)
74 {.input: 0x5640U, .rnd_towardzero_result: 0x4000U, .rnd_upward_offset: 0U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
75 // x = 0x1.f84p+6, log10f16(x) = 0x1.0ccp+1 (RZ)
76 {.input: 0x57e1U, .rnd_towardzero_result: 0x4033U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
77 // x = 0x1.f4p+9, log10f16(x) = 0x1.8p+1 (RZ)
78 {.input: 0x63d0U, .rnd_towardzero_result: 0x4200U, .rnd_upward_offset: 0U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
79 // x = 0x1.388p+13, log10f16(x) = 0x1p+2 (RZ)
80 {.input: 0x70e2U, .rnd_towardzero_result: 0x4400U, .rnd_upward_offset: 0U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
81 // x = 0x1.674p+13, log10f16(x) = 0x1.03cp+2 (RZ)
82 {.input: 0x719dU, .rnd_towardzero_result: 0x440fU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
83 }};
84#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
85} // namespace log10f16_internal
86
87LIBC_INLINE float16 log10f16(float16 x) {
88 using namespace math::expxf16_internal;
89 using FPBits = fputil::FPBits<float16>;
90 FPBits x_bits(x);
91
92 uint16_t x_u = x_bits.uintval();
93
94 // If x <= 0, or x is 1, or x is +inf, or x is NaN.
95 if (LIBC_UNLIKELY(x_u == 0U || x_u == 0x3c00U || x_u >= 0x7c00U)) {
96 // log10(NaN) = NaN
97 if (x_bits.is_nan()) {
98 if (x_bits.is_signaling_nan()) {
99 fputil::raise_except_if_required(FE_INVALID);
100 return FPBits::quiet_nan().get_val();
101 }
102
103 return x;
104 }
105
106 // log10(+/-0) = −inf
107 if ((x_u & 0x7fffU) == 0U) {
108 fputil::raise_except_if_required(FE_DIVBYZERO);
109 return FPBits::inf(sign: Sign::NEG).get_val();
110 }
111
112 if (x_u == 0x3c00U)
113 return FPBits::zero().get_val();
114
115 // When x < 0.
116 if (x_u > 0x8000U) {
117 fputil::set_errno_if_required(EDOM);
118 fputil::raise_except_if_required(FE_INVALID);
119 return FPBits::quiet_nan().get_val();
120 }
121
122 // log10(+inf) = +inf
123 return FPBits::inf().get_val();
124 }
125
126#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
127 if (auto r = log10f16_internal::LOG10F16_EXCEPTS.lookup(x_bits: x_u);
128 LIBC_UNLIKELY(r.has_value()))
129 return r.value();
130#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
131
132 // To compute log10(x), we perform the following range reduction:
133 // x = 2^m * 1.mant,
134 // log10(x) = m * log10(2) + log10(1.mant).
135 // To compute log10(1.mant), let f be the highest 6 bits including the hidden
136 // bit, and d be the difference (1.mant - f), i.e., the remaining 5 bits of
137 // the mantissa, then:
138 // log10(1.mant) = log10(f) + log10(1.mant / f)
139 // = log10(f) + log10(1 + d/f)
140 // since d/f is sufficiently small.
141 // We store log10(f) and 1/f in the lookup tables LOG10F_F and ONE_OVER_F_F
142 // respectively.
143
144 int m = -FPBits::EXP_BIAS;
145
146 // When x is subnormal, normalize it.
147 if ((x_u & FPBits::EXP_MASK) == 0U) {
148 // Can't pass an integer to fputil::cast directly.
149 constexpr float NORMALIZE_EXP = 1U << FPBits::FRACTION_LEN;
150 x_bits = FPBits(x_bits.get_val() * fputil::cast<float16>(x: NORMALIZE_EXP));
151 x_u = x_bits.uintval();
152 m -= FPBits::FRACTION_LEN;
153 }
154
155 uint16_t mant = x_bits.get_mantissa();
156 // Leading 10 - 5 = 5 bits of the mantissa.
157 int f = mant >> 5;
158 // Unbiased exponent.
159 m += x_u >> FPBits::FRACTION_LEN;
160
161 // Set bits to 1.mant instead of 2^m * 1.mant.
162 x_bits.set_biased_exponent(FPBits::EXP_BIAS);
163 float mant_f = x_bits.get_val();
164 // v = 1.mant * 1/f - 1 = d/f
165 float v = fputil::multiply_add(x: mant_f, y: ONE_OVER_F_F[f], z: -1.0f);
166
167 // Degree-3 minimax polynomial generated by Sollya with the following
168 // commands:
169 // > display = hexadecimal;
170 // > P = fpminimax(log10(1 + x)/x, 2, [|SG...|], [-2^-5, 2^-5]);
171 // > x * P;
172 float log10p1_d_over_f =
173 v * fputil::polyeval(x: v, a0: 0x1.bcb7bp-2f, a: -0x1.bce168p-3f, a: 0x1.28acb8p-3f);
174 // log10(1.mant) = log10(f) + log10(1 + d/f)
175 float log10_1_mant = LOG10F_F[f] + log10p1_d_over_f;
176 return fputil::cast<float16>(
177 x: fputil::multiply_add(x: static_cast<float>(m), y: LOG10F_2, z: log10_1_mant));
178}
179
180} // namespace math
181} // namespace LIBC_NAMESPACE_DECL
182
183#endif // LIBC_TYPES_HAS_FLOAT16
184
185#endif // LLVM_LIBC_SRC___SUPPORT_MATH_LOG10F16_H
186