log10p1f16.h source code [llvm_projects/libc/src/__support/math/log10p1f16.h]

1	//===-- Implementation header for log10p1f16 --------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_LOG10P1F16_H
10	#define LLVM_LIBC_SRC___SUPPORT_MATH_LOG10P1F16_H
11
12	#include "include/llvm-libc-macros/float16-macros.h"
13
14	#ifdef LIBC_TYPES_HAS_FLOAT16
15
16	#include "exp10_float16_constants.h"
17	#include "expxf16_utils.h"
18	#include "hdr/errno_macros.h"
19	#include "hdr/fenv_macros.h"
20	#include "src/__support/FPUtil/FEnvImpl.h"
21	#include "src/__support/FPUtil/FPBits.h"
22	#include "src/__support/FPUtil/PolyEval.h"
23	#include "src/__support/FPUtil/cast.h"
24	#include "src/__support/FPUtil/except_value_utils.h"
25	#include "src/__support/FPUtil/multiply_add.h"
26	#include "src/__support/common.h"
27	#include "src/__support/macros/config.h"
28	#include "src/__support/macros/optimization.h"
29	#include "src/__support/macros/properties/cpu_features.h"
30
31	namespace LIBC_NAMESPACE_DECL {
32
33	namespace math {
34
35	LIBC_INLINE float16 log10p1f16(float16 x) {
36	using namespace math::expxf16_internal;
37	using FPBits = fputil::FPBits<float16>;
38	FPBits x_bits(x);
39
40	uint16_t x_u = x_bits.uintval();
41	uint16_t x_abs = x_u & `0x7fffU`;
42
43	// If x is +-0, NaN, +/-inf, or \|x\| <= 2^-3.
44	if (LIBC_UNLIKELY(x_abs <= `0x3000U` \|\| x_abs >= `0x7c00U`)) {
45	// log10p1(NaN) = NaN
46	if (x_bits.is_nan()) {
47	if (x_bits.is_signaling_nan()) {
48	fputil::raise_except_if_required(FE_INVALID);
49	return FPBits::quiet_nan().get_val();
50	}
51
52	return x;
53	}
54
55	// log10p1(+/-0) = +/-0
56	if (x_abs == `0U`)
57	return x;
58
59	// log10p1(+inf) = +inf
60	if (x_u == `0x7c00U`)
61	return FPBits::inf().get_val();
62
63	// log10p1(-inf) = NaN
64	if (x_abs >= `0x7c00U`) {
65	fputil::set_errno_if_required(EDOM);
66	fputil::raise_except_if_required(FE_INVALID);
67	return FPBits::quiet_nan().get_val();
68	}
69
70	// When \|x\| <= 2^-3, use a degree-5 minimax polynomial on x.
71	// For y = 1+x near 1, the table-based range reduction suffers from
72	// catastrophic cancellation (mlog10(2) + log10(f) nearly cancel).*
73	// Computing log10(1+x) directly via polynomial avoids this.
74	//
75	// Generated by Sollya with:
76	// > display = hexadecimal;
77	// > Q = fpminimax(log10(1 + x), [\|1, 2, 3, 4, 5\|], [\|SG...\|],
78	// [-2^-3, 2^-3]);
79	// > Q;
80	if (x_abs <= `0x3000U`) {
81	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
82	// ExceptValues for the small-\|x\| polynomial path (\|x\| <= 2^-3).
83	constexpr size_t N_LOG10P1F16_EXCEPTS = `7`;
84	constexpr fputil::ExceptValues<float16, N_LOG10P1F16_EXCEPTS>
85	LOG10P1F16_EXCEPTS = {.values: {
86	// (input, RZ output, RU offset, RD offset, RN offset)
87	// x = 0x1.ep-13
88	{.input: `0x0B80U`, .rnd_towardzero_result: `0x0683U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
89	// x = 0x1.4fp-6
90	{.input: `0x213CU`, .rnd_towardzero_result: `0x1C85U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
91	// x = 0x1.82cp-4
92	{.input: `0x2E0BU`, .rnd_towardzero_result: `0x2904U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
93	// x = 0x1.d24p-4
94	{.input: `0x2EC9U`, .rnd_towardzero_result: `0x299AU`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
95	// x = -0x1.b8p-13
96	{.input: `0x89DCU`, .rnd_towardzero_result: `0x8516U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
97	// x = -0x1.49p-6
98	{.input: `0x9924U`, .rnd_towardzero_result: `0x9478U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
99	// x = -0x1.42p-8
100	{.input: `0x9D08U`, .rnd_towardzero_result: `0x9861U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
101	}};
102
103	if (auto r = LOG10P1F16_EXCEPTS.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value()))
104	return r.value();
105	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
106
107	float xf = x;
108	return fputil::cast<float16>(
109	x: xf * fputil::polyeval(x: xf, a0: `0x1.bcb7b2p-2f`, a: -`0x1.bcb4cp-3f`,
110	a: `0x1.2875bcp-3f`, a: -`0x1.c2946ep-4f`,
111	a: `0x1.69da2p-4f`));
112	}
113	}
114
115	// log10p1(-1) = -inf
116	if (LIBC_UNLIKELY(x_u == `0xbc00U`)) {
117	fputil::raise_except_if_required(FE_DIVBYZERO);
118	return FPBits::inf(sign: Sign::NEG).get_val();
119	}
120
121	// log10p1(x) = NaN for x < -1
122	if (LIBC_UNLIKELY(x_u > `0xbc00U`)) {
123	fputil::set_errno_if_required(EDOM);
124	fputil::raise_except_if_required(FE_INVALID);
125	return FPBits::quiet_nan().get_val();
126	}
127
128	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
129	// ExceptValues for the large-\|x\| table-based path (\|x\| > 2^-3).
130	constexpr size_t N_LOG10P1F16_EXCEPTS_HI = `12`;
131	constexpr fputil::ExceptValues<float16, N_LOG10P1F16_EXCEPTS_HI>
132	LOG10P1F16_EXCEPTS_HI = {.values: {
133	// (input, RZ output, RU offset, RD offset, RN offset)
134	// x = 0x1.3bp-3
135	{.input: `0x30ECU`, .rnd_towardzero_result: `0x2BF3U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
136	// x = 0x1.ba0p-3
137	{.input: `0x32E8U`, .rnd_towardzero_result: `0x2D6EU`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
138	// x = 0x1.4c4p-2
139	{.input: `0x3531U`, .rnd_towardzero_result: `0x2FCFU`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
140	// x = 0x1.744p+0
141	{.input: `0x3DD1U`, .rnd_towardzero_result: `0x363CU`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
142	// x = 0x1.7d8p+0
143	{.input: `0x3DF6U`, .rnd_towardzero_result: `0x3656U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
144	// x = 0x1.2p+3
145	{.input: `0x4880U`, .rnd_towardzero_result: `0x3C00U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
146	// x = 0x1.8cp+6
147	{.input: `0x5630U`, .rnd_towardzero_result: `0x4000U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
148	// x = 0x1.f44p+6
149	{.input: `0x57D1U`, .rnd_towardzero_result: `0x4033U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
150	// x = 0x1.f38p+9
151	{.input: `0x63CEU`, .rnd_towardzero_result: `0x4200U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
152	// x = -0x1.558p-3
153	{.input: `0xB156U`, .rnd_towardzero_result: `0xAD12U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
154	// x = -0x1.8d8p-2
155	{.input: `0xB636U`, .rnd_towardzero_result: `0xB2D3U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
156	// x = -0x1.808p-1
157	{.input: `0xBA02U`, .rnd_towardzero_result: `0xB8D4U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
158	}};
159
160	if (auto r = LOG10P1F16_EXCEPTS_HI.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value()))
161	return r.value();
162	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
163
164	// For the range reduction, we compute y = 1 + x in float. For \|x\| > 2^-3,
165	// the addition 1.0f + xf is exact in float, and the result y is bounded away
166	// from 1.0, avoiding catastrophic cancellation in the table decomposition.
167	float xf = x;
168	float y = `1.0f` + xf;
169
170	using FPBitsFloat = fputil::FPBits<float>;
171	FPBitsFloat y_bits(y);
172
173	// When y is subnormal or zero, which shouldn't happen since y = 1 + x and
174	// x >= -1 + eps in f16, so y >= eps > 0. But handle y = 0 just in case.
175	if (LIBC_UNLIKELY(y_bits.is_zero()))
176	return FPBits::inf(sign: Sign::NEG).get_val();
177
178	int m = y_bits.get_exponent();
179	// Set y_bits to 1.mant (biased exponent = 127).
180	y_bits.set_biased_exponent(FPBitsFloat::EXP_BIAS);
181	float mant_f = y_bits.get_val();
182
183	// Leading 23 - 5 = 18, so top 5 mantissa bits give index f in [0, 31].
184	int f = y_bits.get_mantissa() >> (FPBitsFloat::FRACTION_LEN - `5`);
185
186	// v = 1.mant 1/f - 1 = d/f*
187	float v = fputil::multiply_add(x: mant_f, y: ONE_OVER_F_F [f], z: -`1.0f`);
188
189	// Degree-3 minimax polynomial generated by Sollya with the following
190	// commands:
191	// > display = hexadecimal;
192	// > P = fpminimax(log10(1 + x)/x, 2, [\|SG...\|], [-2^-5, 2^-5]);
193	// > x P;*
194	float log10p1_d_over_f =
195	v * fputil::polyeval(x: v, a0: `0x1.bcb7bp-2f`, a: -`0x1.bce168p-3f`, a: `0x1.28acb8p-3f`);
196	// log10(1.mant) = log10(f) + log10(1 + d/f)
197	float log10_1_mant = LOG10F_F [f] + log10p1_d_over_f;
198	return fputil::cast<float16>(
199	x: fputil::multiply_add(x: static_cast<float>(m), y: LOG10F_2, z: log10_1_mant));
200	}
201
202	} // namespace math
203	} // namespace LIBC_NAMESPACE_DECL
204
205	#endif // LIBC_TYPES_HAS_FLOAT16
206
207	#endif // LLVM_LIBC_SRC___SUPPORT_MATH_LOG10P1F16_H
208

Browse the source code of llvm_projects/libc/src/__support/math/log10p1f16.h