log2p1f16.h source code [llvm_projects/libc/src/__support/math/log2p1f16.h]

1	//===-- Implementation header for log2p1f16 ---------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_LOG2P1F16_H
10	#define LLVM_LIBC_SRC___SUPPORT_MATH_LOG2P1F16_H
11
12	#include "include/llvm-libc-macros/float16-macros.h"
13
14	#ifdef LIBC_TYPES_HAS_FLOAT16
15
16	#include "expxf16_utils.h"
17	#include "hdr/errno_macros.h"
18	#include "hdr/fenv_macros.h"
19	#include "src/__support/FPUtil/FEnvImpl.h"
20	#include "src/__support/FPUtil/FPBits.h"
21	#include "src/__support/FPUtil/PolyEval.h"
22	#include "src/__support/FPUtil/cast.h"
23	#include "src/__support/FPUtil/except_value_utils.h"
24	#include "src/__support/FPUtil/multiply_add.h"
25	#include "src/__support/common.h"
26	#include "src/__support/macros/config.h"
27	#include "src/__support/macros/optimization.h"
28	#include "src/__support/macros/properties/cpu_features.h"
29
30	namespace LIBC_NAMESPACE_DECL {
31
32	namespace math {
33
34	LIBC_INLINE float16 log2p1f16(float16 x) {
35	using namespace math::expxf16_internal;
36	using FPBits = fputil::FPBits<float16>;
37	FPBits x_bits(x);
38
39	uint16_t x_u = x_bits.uintval();
40	uint16_t x_abs = x_u & `0x7fffU`;
41
42	// If x is NaN, +/-inf, or \|x\| <= 2^-3.
43	if (LIBC_UNLIKELY(x_abs <= `0x3000U` \|\| x_abs >= `0x7c00U`)) {
44	// log2p1(NaN) = NaN
45	if (x_bits.is_nan()) {
46	if (x_bits.is_signaling_nan()) {
47	fputil::raise_except_if_required(FE_INVALID);
48	return FPBits::quiet_nan().get_val();
49	}
50
51	return x;
52	}
53
54	// When \|x\| <= 2^-3, use a degree-5 minimax polynomial on x.
55	// For y = 1+x near 1, the table-based range reduction suffers from
56	// catastrophic cancellation (m + log2(f) nearly cancel). Computing
57	// log2(1+x) directly via polynomial avoids this.
58	//
59	// Generated by Sollya with:
60	// > display = hexadecimal;
61	// > Q = fpminimax(log2(1 + x), [\|1, 2, 3, 4, 5\|], [\|SG...\|],
62	// [-2^-3, 2^-3]);
63	// > Q;
64	// > dirtyinfnorm((log2(1 + x) - Q) / log2(1 + x), [-2^-3, 2^-3]);
65	// 0x1.6ed1f4728dcb6f1d6a651a3c728937f7468f8eedfp-22
66	if (x_abs <= `0x3000U`) {
67	// log2p1(+/-0) = +/-0
68	if (x_abs == `0U`)
69	return x;
70
71	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
72	constexpr size_t N_LOG2P1F16_EXCEPTS = `11`;
73	constexpr fputil::ExceptValues<float16, N_LOG2P1F16_EXCEPTS>
74	LOG2P1F16_EXCEPTS = {.values: {
75	// (input, RZ output, RU offset, RD offset, RN offset)
76	// x = 0x1.ec4p-12
77	{.input: `0x0FB1U`, .rnd_towardzero_result: `0x118BU`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
78	// x = 0x1.c04p-6
79	{.input: `0x2701U`, .rnd_towardzero_result: `0x28FBU`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
80	// x = -0x1.824p-15
81	{.input: `0x8309U`, .rnd_towardzero_result: `0x8461U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
82	// x = -0x1.414p-10
83	{.input: `0x9505U`, .rnd_towardzero_result: `0x973EU`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `1U`},
84	// x = -0x1.cb8p-10
85	{.input: `0x972EU`, .rnd_towardzero_result: `0x992FU`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
86	// x = -0x1.99cp-8
87	{.input: `0x9E67U`, .rnd_towardzero_result: `0xA0A2U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
88	// x = -0x1.ce8p-7
89	{.input: `0xA33AU`, .rnd_towardzero_result: `0xA540U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
90	// x = -0x1.73cp-6
91	{.input: `0xA5CFU`, .rnd_towardzero_result: `0xA83DU`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
92	// x = -0x1.87p-5
93	{.input: `0xAA1CU`, .rnd_towardzero_result: `0xAC84U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
94	// x = -0x1.d48p-5
95	{.input: `0xAB52U`, .rnd_towardzero_result: `0xAD70U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
96	// x = -0x1.da0p-4
97	{.input: `0xAF68U`, .rnd_towardzero_result: `0xB1ADU`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
98	}};
99
100	if (auto r = LOG2P1F16_EXCEPTS.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value()))
101	return r.value();
102	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
103
104	float xf = x;
105	return fputil::cast<float16>(
106	x: xf * fputil::polyeval(x: xf, a0: `0x1.715476p+0f`, a: -`0x1.715204p-1f`,
107	a: `0x1.ec6c5p-2f`, a: -`0x1.763338p-2f`,
108	a: `0x1.2bcd3cp-2f`));
109	}
110
111	// log2p1(+inf) = +inf
112	if (x_u == `0x7c00U`)
113	return FPBits::inf().get_val();
114
115	// log2p1(-inf) = NaN
116	fputil::set_errno_if_required(EDOM);
117	fputil::raise_except_if_required(FE_INVALID);
118	return FPBits::quiet_nan().get_val();
119	}
120
121	// log2p1(-1) = -inf
122	if (LIBC_UNLIKELY(x_u == `0xbc00U`)) {
123	fputil::raise_except_if_required(FE_DIVBYZERO);
124	return FPBits::inf(sign: Sign::NEG).get_val();
125	}
126
127	// log2p1(x) = NaN for x < -1
128	if (LIBC_UNLIKELY(x_u > `0xbc00U`)) {
129	fputil::set_errno_if_required(EDOM);
130	fputil::raise_except_if_required(FE_INVALID);
131	return FPBits::quiet_nan().get_val();
132	}
133
134	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
135	#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
136	constexpr size_t N_LOG2P1F16_EXCEPTS_HI = `2`;
137	#else
138	constexpr size_t N_LOG2P1F16_EXCEPTS_HI = `6`;
139	#endif
140	constexpr fputil::ExceptValues<float16, N_LOG2P1F16_EXCEPTS_HI>
141	LOG2P1F16_EXCEPTS_HI = {.values: {
142	// (input, RZ output, RU offset, RD offset, RN offset)
143	#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
144	// x = 0x1.12p-3
145	{.input: `0x3048U`, .rnd_towardzero_result: `0x31CBU`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
146	// x = 0x1.598p-2
147	{.input: `0x3566U`, .rnd_towardzero_result: `0x36B5U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `1U`},
148	#endif
149	// x = 0x1.23cp-3
150	{.input: `0x308FU`, .rnd_towardzero_result: `0x3226U`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
151	// x = 0x1.accp+0
152	{.input: `0x3EB3U`, .rnd_towardzero_result: `0x3DADU`, .rnd_upward_offset: `1U`, .rnd_downward_offset: `0U`, .rnd_tonearest_offset: `0U`},
153	#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
154	// x = -0x1.534p-2
155	{.input: `0xB54DU`, .rnd_towardzero_result: `0xB8A5U`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
156	// x = -0x1.bb8p-2
157	{.input: `0xB6EEU`, .rnd_towardzero_result: `0xBA8DU`, .rnd_upward_offset: `0U`, .rnd_downward_offset: `1U`, .rnd_tonearest_offset: `0U`},
158	#endif
159	}};
160
161	if (auto r = LOG2P1F16_EXCEPTS_HI.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value()))
162	return r.value();
163	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
164
165	// For the range reduction, we compute y = 1 + x in float. For \|x\| > 2^-3,
166	// the addition 1.0f + xf is exact in float, and the result y is bounded away
167	// from 1.0, avoiding catastrophic cancellation in the table decomposition.
168	float xf = x;
169	float y = `1.0f` + xf;
170
171	using FPBitsFloat = fputil::FPBits<float>;
172	FPBitsFloat y_bits(y);
173
174	// y = 1 + x should stay positive for x > -1, but keep this guard for
175	// completeness in case of unexpected intermediate behavior.
176	if (LIBC_UNLIKELY(y_bits.is_zero()))
177	return FPBits::inf(sign: Sign::NEG).get_val();
178
179	int m = y_bits.get_exponent();
180	y_bits.set_biased_exponent(FPBitsFloat::EXP_BIAS);
181	float mant_f = y_bits.get_val();
182
183	// Leading 23 - 5 = 18, so the top 5 mantissa bits give the index in [0, 31].
184	int f = y_bits.get_mantissa() >> (FPBitsFloat::FRACTION_LEN - `5`);
185
186	// v = 1.mant 1/f - 1 = d/f*
187	float v = fputil::multiply_add(x: mant_f, y: ONE_OVER_F_F [f], z: -`1.0f`);
188
189	// Degree-3 minimax polynomial generated by Sollya with the following
190	// commands:
191	// > display = hexadecimal;
192	// > P = fpminimax(log2(1 + x)/x, 2, [\|SG...\|], [-2^-5, 2^-5]);
193	// > x P;*
194	// > dirtyinfnorm((log2(1 + x)/x - P) / (log2(1 + x)/x), [-2^-5, 2^-5]);
195	// 0x1.0087f3cad284d0a4464b1e27e83258b5e69a647f5p-19
196	float log2p1_d_over_f =
197	v * fputil::polyeval(x: v, a0: `0x1.715476p+0f`, a: -`0x1.71771ap-1f`, a: `0x1.ecb38ep-2f`);
198	float log2_1_mant = LOG2F_F [f] + log2p1_d_over_f;
199	return fputil::cast<float16>(x: static_cast<float>(m) + log2_1_mant);
200	}
201
202	} // namespace math
203	} // namespace LIBC_NAMESPACE_DECL
204
205	#endif // LIBC_TYPES_HAS_FLOAT16
206
207	#endif // LLVM_LIBC_SRC___SUPPORT_MATH_LOG2P1F16_H
208

Browse the source code of llvm_projects/libc/src/__support/math/log2p1f16.h