1//===-- Common utilities for half-precision exponential functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXPXF16_UTILS_H
10#define LLVM_LIBC_SRC___SUPPORT_MATH_EXPXF16_UTILS_H
11
12#include "include/llvm-libc-macros/float16-macros.h"
13
14#ifdef LIBC_TYPES_HAS_FLOAT16
15
16#include "hdr/stdint_proxy.h"
17#include "src/__support/FPUtil/FPBits.h"
18#include "src/__support/FPUtil/cast.h"
19#include "src/__support/FPUtil/multiply_add.h"
20#include "src/__support/FPUtil/nearest_integer.h"
21#include "src/__support/macros/attributes.h"
22#include "src/__support/macros/config.h"
23#include "src/__support/math/exp10_float16_constants.h"
24#include "src/__support/math/expf16_utils.h"
25
26namespace LIBC_NAMESPACE_DECL {
27
28namespace math {
29
30namespace expxf16_internal {
31
32LIBC_INLINE ExpRangeReduction exp2_range_reduction(float16 x) {
33 // For -25 < x < 16, to compute 2^x, we perform the following range reduction:
34 // find hi, mid, lo, such that:
35 // x = hi + mid + lo, in which
36 // hi is an integer,
37 // mid * 2^3 is an integer,
38 // -2^(-4) <= lo < 2^(-4).
39 // In particular,
40 // hi + mid = round(x * 2^3) * 2^(-3).
41 // Then,
42 // 2^x = 2^(hi + mid + lo) = 2^hi * 2^mid * 2^lo.
43 // We store 2^mid in the lookup table EXP2_MID_BITS, and compute 2^hi * 2^mid
44 // by adding hi to the exponent field of 2^mid. 2^lo is computed using a
45 // degree-3 minimax polynomial generated by Sollya.
46
47 float xf = x;
48 float kf = fputil::nearest_integer(x: xf * 0x1.0p+3f);
49 int x_hi_mid = static_cast<int>(kf);
50 unsigned x_hi = static_cast<unsigned>(x_hi_mid) >> 3;
51 unsigned x_mid = static_cast<unsigned>(x_hi_mid) & 0x7;
52 // lo = x - (hi + mid) = round(x * 2^3) * (-2^(-3)) + x
53 float lo = fputil::multiply_add(x: kf, y: -0x1.0p-3f, z: xf);
54
55 uint32_t exp2_hi_mid_bits =
56 EXP2_MID_BITS[x_mid] +
57 static_cast<uint32_t>(x_hi << fputil::FPBits<float>::FRACTION_LEN);
58 float exp2_hi_mid = fputil::FPBits<float>(exp2_hi_mid_bits).get_val();
59 // Degree-3 minimax polynomial generated by Sollya with the following
60 // commands:
61 // > display = hexadecimal;
62 // > P = fpminimax((2^x - 1)/x, 2, [|SG...|], [-2^-4, 2^-4]);
63 // > 1 + x * P;
64 float exp2_lo = fputil::polyeval(x: lo, a0: 0x1p+0f, a: 0x1.62e43p-1f, a: 0x1.ec0aa6p-3f,
65 a: 0x1.c6b4a6p-5f);
66 return {.exp_hi_mid: exp2_hi_mid, .exp_lo: exp2_lo};
67}
68
69// Generated by Sollya with the following commands:
70// > display = hexadecimal;
71// > round(log2(exp(1)), SG, RN);
72LIBC_INLINE_VAR constexpr float LOG2F_E = 0x1.715476p+0f;
73
74// Generated by Sollya with the following commands:
75// > display = hexadecimal;
76// > round(log(2), SG, RN);
77LIBC_INLINE_VAR constexpr float LOGF_2 = 0x1.62e43p-1f;
78
79// Generated by Sollya with the following commands:
80// > display = hexadecimal;
81// > for i from 0 to 31 do printsingle(round(2^(i * 2^-5), SG, RN));
82LIBC_INLINE_VAR constexpr cpp::array<uint32_t, 32> EXP2_MID_5_BITS = {
83 0x3f80'0000U, 0x3f82'cd87U, 0x3f85'aac3U, 0x3f88'980fU, 0x3f8b'95c2U,
84 0x3f8e'a43aU, 0x3f91'c3d3U, 0x3f94'f4f0U, 0x3f98'37f0U, 0x3f9b'8d3aU,
85 0x3f9e'f532U, 0x3fa2'7043U, 0x3fa5'fed7U, 0x3fa9'a15bU, 0x3fad'583fU,
86 0x3fb1'23f6U, 0x3fb5'04f3U, 0x3fb8'fbafU, 0x3fbd'08a4U, 0x3fc1'2c4dU,
87 0x3fc5'672aU, 0x3fc9'b9beU, 0x3fce'248cU, 0x3fd2'a81eU, 0x3fd7'44fdU,
88 0x3fdb'fbb8U, 0x3fe0'ccdfU, 0x3fe5'b907U, 0x3fea'c0c7U, 0x3fef'e4baU,
89 0x3ff5'257dU, 0x3ffa'83b3U,
90};
91
92// This function correctly calculates sinh(x) and cosh(x) by calculating exp(x)
93// and exp(-x) simultaneously.
94// To compute e^x, we perform the following range reduction:
95// find hi, mid, lo such that:
96// x = (hi + mid) * log(2) + lo, in which
97// hi is an integer,
98// 0 <= mid * 2^5 < 32 is an integer
99// -2^(-5) <= lo * log2(e) <= 2^-5.
100// In particular,
101// hi + mid = round(x * log2(e) * 2^5) * 2^(-5).
102// Then,
103// e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo.
104// We store 2^mid in the lookup table EXP2_MID_5_BITS, and compute 2^hi * 2^mid
105// by adding hi to the exponent field of 2^mid.
106// e^lo is computed using a degree-3 minimax polynomial generated by Sollya:
107// e^lo ~ P(lo)
108// = 1 + lo + c2 * lo^2 + ... + c5 * lo^5
109// = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4)
110// = P_even + lo * P_odd
111// To compute e^(-x), notice that:
112// e^(-x) = 2^(-(hi + mid)) * e^(-lo)
113// ~ 2^(-(hi + mid)) * P(-lo)
114// = 2^(-(hi + mid)) * (P_even - lo * P_odd)
115// So:
116// sinh(x) = (e^x - e^(-x)) / 2
117// ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) -
118// 2^(-(hi + mid)) * (P_even - lo * P_odd))
119// = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) +
120// lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid))))
121// And similarly:
122// cosh(x) = (e^x + e^(-x)) / 2
123// ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) +
124// lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid))))
125// The main point of these formulas is that the expensive part of calculating
126// the polynomials approximating lower parts of e^x and e^(-x) is shared and
127// only done once.
128template <bool IsSinh>
129LIBC_INLINE constexpr float16 eval_sinh_or_cosh(float16 x) {
130 float xf = x;
131 float kf = fputil::nearest_integer(x: xf * (LOG2F_E * 0x1.0p+5f));
132 int x_hi_mid_p = static_cast<int>(kf);
133 int x_hi_mid_m = -x_hi_mid_p;
134
135 unsigned x_hi_p = static_cast<unsigned>(x_hi_mid_p) >> 5;
136 unsigned x_hi_m = static_cast<unsigned>(x_hi_mid_m) >> 5;
137 unsigned x_mid_p = static_cast<unsigned>(x_hi_mid_p) & 0x1f;
138 unsigned x_mid_m = static_cast<unsigned>(x_hi_mid_m) & 0x1f;
139
140 uint32_t exp2_hi_mid_bits_p =
141 EXP2_MID_5_BITS[x_mid_p] +
142 static_cast<uint32_t>(x_hi_p << fputil::FPBits<float>::FRACTION_LEN);
143 uint32_t exp2_hi_mid_bits_m =
144 EXP2_MID_5_BITS[x_mid_m] +
145 static_cast<uint32_t>(x_hi_m << fputil::FPBits<float>::FRACTION_LEN);
146 // exp2_hi_mid_p = 2^(hi + mid)
147 float exp2_hi_mid_p = fputil::FPBits<float>(exp2_hi_mid_bits_p).get_val();
148 // exp2_hi_mid_m = 2^(-(hi + mid))
149 float exp2_hi_mid_m = fputil::FPBits<float>(exp2_hi_mid_bits_m).get_val();
150
151 // exp2_hi_mid_sum = 2^(hi + mid) + 2^(-(hi + mid))
152 float exp2_hi_mid_sum = exp2_hi_mid_p + exp2_hi_mid_m;
153 // exp2_hi_mid_diff = 2^(hi + mid) - 2^(-(hi + mid))
154 float exp2_hi_mid_diff = exp2_hi_mid_p - exp2_hi_mid_m;
155
156 // lo = x - (hi + mid) = round(x * log2(e) * 2^5) * log(2) * (-2^(-5)) + x
157 float lo = fputil::multiply_add(x: kf, y: LOGF_2 * -0x1.0p-5f, z: xf);
158 float lo_sq = lo * lo;
159
160 // Degree-3 minimax polynomial generated by Sollya with the following
161 // commands:
162 // > display = hexadecimal;
163 // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-5, 2^-5]);
164 // > 1 + x * P;
165 constexpr cpp::array<float, 4> COEFFS = {0x1p+0f, 0x1p+0f, 0x1.0004p-1f,
166 0x1.555778p-3f};
167 float half_p_odd =
168 fputil::polyeval(x: lo_sq, a0: COEFFS[1] * 0.5f, a: COEFFS[3] * 0.5f);
169 float half_p_even =
170 fputil::polyeval(x: lo_sq, a0: COEFFS[0] * 0.5f, a: COEFFS[2] * 0.5f);
171
172 // sinh(x) = lo * (0.5 * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) +
173 // (0.5 * P_even * (2^(hi + mid) - 2^(-(hi + mid))))
174 if constexpr (IsSinh)
175 return fputil::cast<float16>(x: fputil::multiply_add(
176 x: lo, y: half_p_odd * exp2_hi_mid_sum, z: half_p_even * exp2_hi_mid_diff));
177 // cosh(x) = lo * (0.5 * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) +
178 // (0.5 * P_even * (2^(hi + mid) + 2^(-(hi + mid))))
179 return fputil::cast<float16>(x: fputil::multiply_add(
180 x: lo, y: half_p_odd * exp2_hi_mid_diff, z: half_p_even * exp2_hi_mid_sum));
181}
182
183// Generated by Sollya with the following commands:
184// > display = hexadecimal;
185// > for i from 0 to 31 do print(round(log(1 + i * 2^-5), SG, RN));
186LIBC_INLINE_VAR constexpr cpp::array<float, 32> LOGF_F = {
187 0x0p+0f, 0x1.f829bp-6f, 0x1.f0a30cp-5f, 0x1.6f0d28p-4f,
188 0x1.e27076p-4f, 0x1.29553p-3f, 0x1.5ff308p-3f, 0x1.9525aap-3f,
189 0x1.c8ff7cp-3f, 0x1.fb9186p-3f, 0x1.1675cap-2f, 0x1.2e8e2cp-2f,
190 0x1.4618bcp-2f, 0x1.5d1bdcp-2f, 0x1.739d8p-2f, 0x1.89a338p-2f,
191 0x1.9f323ep-2f, 0x1.b44f78p-2f, 0x1.c8ff7cp-2f, 0x1.dd46ap-2f,
192 0x1.f128f6p-2f, 0x1.02552ap-1f, 0x1.0be72ep-1f, 0x1.154c3ep-1f,
193 0x1.1e85f6p-1f, 0x1.2795e2p-1f, 0x1.307d74p-1f, 0x1.393e0ep-1f,
194 0x1.41d8fep-1f, 0x1.4a4f86p-1f, 0x1.52a2d2p-1f, 0x1.5ad404p-1f,
195};
196
197// Generated by Sollya with the following commands:
198// > display = hexadecimal;
199// > for i from 0 to 31 do print(round(log2(1 + i * 2^-5), SG, RN));
200LIBC_INLINE_VAR constexpr cpp::array<float, 32> LOG2F_F = {
201 0x0p+0f, 0x1.6bad38p-5f, 0x1.663f7p-4f, 0x1.08c588p-3f,
202 0x1.5c01a4p-3f, 0x1.acf5e2p-3f, 0x1.fbc16cp-3f, 0x1.24407ap-2f,
203 0x1.49a784p-2f, 0x1.6e221cp-2f, 0x1.91bba8p-2f, 0x1.b47ecp-2f,
204 0x1.d6753ep-2f, 0x1.f7a856p-2f, 0x1.0c105p-1f, 0x1.1bf312p-1f,
205 0x1.2b8034p-1f, 0x1.3abb4p-1f, 0x1.49a784p-1f, 0x1.584822p-1f,
206 0x1.66a008p-1f, 0x1.74b1fep-1f, 0x1.82809ep-1f, 0x1.900e62p-1f,
207 0x1.9d5dap-1f, 0x1.aa709p-1f, 0x1.b74948p-1f, 0x1.c3e9cap-1f,
208 0x1.d053f6p-1f, 0x1.dc899ap-1f, 0x1.e88c6cp-1f, 0x1.f45e08p-1f,
209};
210
211// Generated by Sollya with the following commands:
212// > display = hexadecimal;
213// > for i from 0 to 31 do print(round(log10(1 + i * 2^-5), SG, RN));
214LIBC_INLINE_VAR constexpr cpp::array<float, 32> LOG10F_F = {
215 0x0p+0f, 0x1.b5e908p-7f, 0x1.af5f92p-6f, 0x1.3ed11ap-5f,
216 0x1.a30a9ep-5f, 0x1.02428cp-4f, 0x1.31b306p-4f, 0x1.5fe804p-4f,
217 0x1.8cf184p-4f, 0x1.b8de4ep-4f, 0x1.e3bc1ap-4f, 0x1.06cbd6p-3f,
218 0x1.1b3e72p-3f, 0x1.2f3b6ap-3f, 0x1.42c7e8p-3f, 0x1.55e8c6p-3f,
219 0x1.68a288p-3f, 0x1.7af974p-3f, 0x1.8cf184p-3f, 0x1.9e8e7cp-3f,
220 0x1.afd3e4p-3f, 0x1.c0c514p-3f, 0x1.d1653p-3f, 0x1.e1b734p-3f,
221 0x1.f1bdeep-3f, 0x1.00be06p-2f, 0x1.087a08p-2f, 0x1.101432p-2f,
222 0x1.178da6p-2f, 0x1.1ee778p-2f, 0x1.2622bp-2f, 0x1.2d404cp-2f,
223};
224
225// Generated by Sollya with the following commands:
226// > display = hexadecimal;
227// > for i from 0 to 31 do print(round(1 / (1 + i * 2^-5), SG, RN));
228LIBC_INLINE_VAR constexpr cpp::array<float, 32> ONE_OVER_F_F = {
229 0x1p+0f, 0x1.f07c2p-1f, 0x1.e1e1e2p-1f, 0x1.d41d42p-1f,
230 0x1.c71c72p-1f, 0x1.bacf92p-1f, 0x1.af286cp-1f, 0x1.a41a42p-1f,
231 0x1.99999ap-1f, 0x1.8f9c18p-1f, 0x1.861862p-1f, 0x1.7d05f4p-1f,
232 0x1.745d18p-1f, 0x1.6c16c2p-1f, 0x1.642c86p-1f, 0x1.5c9882p-1f,
233 0x1.555556p-1f, 0x1.4e5e0ap-1f, 0x1.47ae14p-1f, 0x1.414142p-1f,
234 0x1.3b13b2p-1f, 0x1.3521dp-1f, 0x1.2f684cp-1f, 0x1.29e412p-1f,
235 0x1.24924ap-1f, 0x1.1f7048p-1f, 0x1.1a7b96p-1f, 0x1.15b1e6p-1f,
236 0x1.111112p-1f, 0x1.0c9714p-1f, 0x1.08421p-1f, 0x1.041042p-1f,
237};
238
239} // namespace expxf16_internal
240
241} // namespace math
242
243} // namespace LIBC_NAMESPACE_DECL
244
245#endif // LIBC_TYPES_HAS_FLOAT16
246
247#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXPXF16_UTILS_H
248