1//===-- Implementation header for logf16 ------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_LOGF16_H
10#define LLVM_LIBC_SRC___SUPPORT_MATH_LOGF16_H
11
12#include "include/llvm-libc-macros/float16-macros.h"
13
14#ifdef LIBC_TYPES_HAS_FLOAT16
15
16#include "expxf16_utils.h"
17#include "hdr/errno_macros.h"
18#include "hdr/fenv_macros.h"
19#include "src/__support/FPUtil/FEnvImpl.h"
20#include "src/__support/FPUtil/FPBits.h"
21#include "src/__support/FPUtil/PolyEval.h"
22#include "src/__support/FPUtil/cast.h"
23#include "src/__support/FPUtil/except_value_utils.h"
24#include "src/__support/FPUtil/multiply_add.h"
25#include "src/__support/common.h"
26#include "src/__support/macros/config.h"
27#include "src/__support/macros/optimization.h"
28#include "src/__support/macros/properties/cpu_features.h"
29
30namespace LIBC_NAMESPACE_DECL {
31
32namespace math {
33
34namespace logf16_internal {
35
36#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
37#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
38LIBC_INLINE_VAR constexpr size_t N_LOGF16_EXCEPTS = 5;
39#else
40LIBC_INLINE_VAR constexpr size_t N_LOGF16_EXCEPTS = 11;
41#endif
42LIBC_INLINE_VAR constexpr fputil::ExceptValues<float16, N_LOGF16_EXCEPTS>
43 LOGF16_EXCEPTS = {.values: {
44// (input, RZ output, RU offset, RD offset, RN offset)
45#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
46 // x = 0x1.61cp-13, logf16(x) = -0x1.16p+3 (RZ)
47 {.input: 0x0987U, .rnd_towardzero_result: 0xc858U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
48 // x = 0x1.f2p-12, logf16(x) = -0x1.e98p+2 (RZ)
49 {.input: 0x0fc8U, .rnd_towardzero_result: 0xc7a6U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U},
50#endif
51 // x = 0x1.4d4p-9, logf16(x) = -0x1.7e4p+2 (RZ)
52 {.input: 0x1935U, .rnd_towardzero_result: 0xc5f9U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
53 // x = 0x1.5ep-8, logf16(x) = -0x1.4ecp+2 (RZ)
54 {.input: 0x1d78U, .rnd_towardzero_result: 0xc53bU, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
55#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
56 // x = 0x1.fdp-1, logf16(x) = -0x1.81p-8 (RZ)
57 {.input: 0x3bf4U, .rnd_towardzero_result: 0x9e04U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U},
58 // x = 0x1.fep-1, logf16(x) = -0x1.008p-8 (RZ)
59 {.input: 0x3bf8U, .rnd_towardzero_result: 0x9c02U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
60#endif
61 // x = 0x1.ffp-1, logf16(x) = -0x1.004p-9 (RZ)
62 {.input: 0x3bfcU, .rnd_towardzero_result: 0x9801U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 0U},
63 // x = 0x1.ff8p-1, logf16(x) = -0x1p-10 (RZ)
64 {.input: 0x3bfeU, .rnd_towardzero_result: 0x9400U, .rnd_upward_offset: 0U, .rnd_downward_offset: 1U, .rnd_tonearest_offset: 1U},
65#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
66 // x = 0x1.4c4p+1, logf16(x) = 0x1.e84p-1 (RZ)
67 {0x4131U, 0x3ba1U, 1U, 0U, 1U},
68#else
69 // x = 0x1.75p+2, logf16(x) = 0x1.c34p+0 (RZ)
70 {.input: 0x45d4U, .rnd_towardzero_result: 0x3f0dU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
71 // x = 0x1.75p+2, logf16(x) = 0x1.c34p+0 (RZ)
72 {.input: 0x45d4U, .rnd_towardzero_result: 0x3f0dU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
73 // x = 0x1.d5p+9, logf16(x) = 0x1.b5cp+2 (RZ)
74 {.input: 0x6354U, .rnd_towardzero_result: 0x46d7U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U},
75#endif
76 }};
77#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
78
79} // namespace logf16_internal
80
81LIBC_INLINE float16 logf16(float16 x) {
82 using namespace math::expxf16_internal;
83 using namespace math::logf16_internal;
84 using FPBits = fputil::FPBits<float16>;
85 FPBits x_bits(x);
86
87 uint16_t x_u = x_bits.uintval();
88
89 // If x <= 0, or x is 1, or x is +inf, or x is NaN.
90 if (LIBC_UNLIKELY(x_u == 0U || x_u == 0x3c00U || x_u >= 0x7c00U)) {
91 // log(NaN) = NaN
92 if (x_bits.is_nan()) {
93 if (x_bits.is_signaling_nan()) {
94 fputil::raise_except_if_required(FE_INVALID);
95 return FPBits::quiet_nan().get_val();
96 }
97
98 return x;
99 }
100
101 // log(+/-0) = −inf
102 if ((x_u & 0x7fffU) == 0U) {
103 fputil::raise_except_if_required(FE_DIVBYZERO);
104 return FPBits::inf(sign: Sign::NEG).get_val();
105 }
106
107 if (x_u == 0x3c00U)
108 return FPBits::zero().get_val();
109
110 // When x < 0.
111 if (x_u > 0x8000U) {
112 fputil::set_errno_if_required(EDOM);
113 fputil::raise_except_if_required(FE_INVALID);
114 return FPBits::quiet_nan().get_val();
115 }
116
117 // log(+inf) = +inf
118 return FPBits::inf().get_val();
119 }
120
121#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
122 if (auto r = LOGF16_EXCEPTS.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value()))
123 return r.value();
124#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
125
126 // To compute log(x), we perform the following range reduction:
127 // x = 2^m * 1.mant,
128 // log(x) = m * log(2) + log(1.mant).
129 // To compute log(1.mant), let f be the highest 6 bits including the hidden
130 // bit, and d be the difference (1.mant - f), i.e., the remaining 5 bits of
131 // the mantissa, then:
132 // log(1.mant) = log(f) + log(1.mant / f)
133 // = log(f) + log(1 + d/f)
134 // since d/f is sufficiently small.
135 // We store log(f) and 1/f in the lookup tables LOGF_F and ONE_OVER_F_F
136 // respectively.
137
138 int m = -FPBits::EXP_BIAS;
139
140 // When x is subnormal, normalize it.
141 if ((x_u & FPBits::EXP_MASK) == 0U) {
142 // Can't pass an integer to fputil::cast directly.
143 constexpr float NORMALIZE_EXP = 1U << FPBits::FRACTION_LEN;
144 x_bits = FPBits(x_bits.get_val() * fputil::cast<float16>(x: NORMALIZE_EXP));
145 x_u = x_bits.uintval();
146 m -= FPBits::FRACTION_LEN;
147 }
148
149 uint16_t mant = x_bits.get_mantissa();
150 // Leading 10 - 5 = 5 bits of the mantissa.
151 int f = mant >> 5;
152 // Unbiased exponent.
153 m += x_u >> FPBits::FRACTION_LEN;
154
155 // Set bits to 1.mant instead of 2^m * 1.mant.
156 x_bits.set_biased_exponent(FPBits::EXP_BIAS);
157 float mant_f = x_bits.get_val();
158 // v = 1.mant * 1/f - 1 = d/f
159 float v = fputil::multiply_add(x: mant_f, y: ONE_OVER_F_F[f], z: -1.0f);
160
161 // Degree-3 minimax polynomial generated by Sollya with the following
162 // commands:
163 // > display = hexadecimal;
164 // > P = fpminimax(log(1 + x)/x, 2, [|SG...|], [-2^-5, 2^-5]);
165 // > x * P;
166 float log1p_d_over_f =
167 v * fputil::polyeval(x: v, a0: 0x1p+0f, a: -0x1.001804p-1f, a: 0x1.557ef6p-2f);
168 // log(1.mant) = log(f) + log(1 + d/f)
169 float log_1_mant = LOGF_F[f] + log1p_d_over_f;
170 return fputil::cast<float16>(
171 x: fputil::multiply_add(x: static_cast<float>(m), y: LOGF_2, z: log_1_mant));
172}
173
174} // namespace math
175
176} // namespace LIBC_NAMESPACE_DECL
177
178#endif // LIBC_TYPES_HAS_FLOAT16
179
180#endif // LLVM_LIBC_SRC___SUPPORT_MATH_LOGF16_H
181