1//===-- Implementation header for expbf16 -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXPBF16_H
10#define LLVM_LIBC_SRC___SUPPORT_MATH_EXPBF16_H
11
12#include "hdr/errno_macros.h"
13#include "hdr/fenv_macros.h"
14#include "src/__support/FPUtil/FEnvImpl.h"
15#include "src/__support/FPUtil/FPBits.h"
16#include "src/__support/FPUtil/PolyEval.h"
17#include "src/__support/FPUtil/bfloat16.h"
18#include "src/__support/FPUtil/cast.h"
19#include "src/__support/FPUtil/except_value_utils.h"
20#include "src/__support/FPUtil/multiply_add.h"
21#include "src/__support/FPUtil/nearest_integer.h"
22#include "src/__support/FPUtil/rounding_mode.h"
23#include "src/__support/macros/config.h"
24#include "src/__support/macros/optimization.h"
25#include "src/__support/macros/properties/types.h"
26
27namespace LIBC_NAMESPACE_DECL {
28
29namespace math {
30
31namespace expbf16_internal {
32
33// Generated by Sollya with the following commands:
34// > display = hexadecimal;
35// > for i from -96 to 88 by 8 do print(i, round(exp(i), SG, RN) @ "f,");
36LIBC_INLINE_VAR constexpr float EXP_HI[24] = {
37 0x1.6a4p-139f, 0x1.07b71p-127f, 0x1.7fd974p-116f, 0x1.175afp-104f,
38 0x1.969d48p-93f, 0x1.27ec46p-81f, 0x1.aebabap-70f, 0x1.397924p-58f,
39 0x1.c8465p-47f, 0x1.4c1078p-35f, 0x1.e355bcp-24f, 0x1.5fc21p-12f,
40 0x1p0f, 0x1.749ea8p11f, 0x1.0f2ebep23f, 0x1.8ab7fcp34f,
41 0x1.1f43fcp46f, 0x1.a220d4p57f, 0x1.304d6ap69f, 0x1.baed16p80f,
42 0x1.425982p92f, 0x1.d531d8p103f, 0x1.55779cp115f, 0x1.f1056ep126f,
43};
44
45// Generated by Sollya with the following commands:
46// > display = hexadecimal;
47// > for i from 0 to 7.75 by 0.25 do print(round(exp(i), SG, RN) @ "f,");
48LIBC_INLINE_VAR constexpr float EXP_MID[32] = {
49 0x1p0f, 0x1.48b5e4p0f, 0x1.a61298p0f, 0x1.0ef9dcp1f,
50 0x1.5bf0a8p1f, 0x1.bec38ep1f, 0x1.1ed3fep2f, 0x1.704b6ap2f,
51 0x1.d8e64cp2f, 0x1.2f9b88p3f, 0x1.85d6fep3f, 0x1.f4907p3f,
52 0x1.415e5cp4f, 0x1.9ca53cp4f, 0x1.08ec72p5f, 0x1.542b2ep5f,
53 0x1.b4c902p5f, 0x1.186bf2p6f, 0x1.68118ap6f, 0x1.ce564ep6f,
54 0x1.28d38ap7f, 0x1.7d21eep7f, 0x1.e96244p7f, 0x1.3a30dp8f,
55 0x1.936dc6p8f, 0x1.0301a4p9f, 0x1.4c9222p9f, 0x1.ab0786p9f,
56 0x1.122886p10f, 0x1.6006b6p10f, 0x1.c402b6p10f, 0x1.223252p11f,
57};
58
59} // namespace expbf16_internal
60
61LIBC_INLINE bfloat16 expbf16(bfloat16 x) {
62 using FPBits = fputil::FPBits<bfloat16>;
63 FPBits x_bits(x);
64
65 uint16_t x_u = x_bits.uintval();
66 uint16_t x_abs = x_u & 0x7fffU;
67
68 // 0 <= |x| <= 2^(-3), or |x| >= 89, or x is NaN
69 if (LIBC_UNLIKELY(x_abs <= 0x3e00U || x_abs >= 0x42b2U)) {
70
71 // exp(NaN) = NaN
72 if (x_bits.is_nan()) {
73 if (x_bits.is_signaling_nan()) {
74 fputil::raise_except_if_required(FE_INVALID);
75 return FPBits::quiet_nan().get_val();
76 }
77
78 return x;
79 }
80
81 // if x >= 89
82 if (x_bits.is_pos() && x_abs >= 0x42b2U) {
83 // exp(inf) = inf
84 if (x_bits.is_inf())
85 return x;
86
87#ifdef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
88 fputil::set_errno_if_required(ERANGE);
89 fputil::raise_except_if_required(FE_OVERFLOW);
90 return FPBits::inf().get_val();
91#else
92 switch (fputil::quick_get_round()) {
93 case FE_TONEAREST:
94 case FE_UPWARD:
95 fputil::set_errno_if_required(ERANGE);
96 fputil::raise_except_if_required(FE_OVERFLOW);
97 return FPBits::inf().get_val();
98 default:
99 return FPBits::max_normal().get_val();
100 }
101#endif // LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
102 }
103
104 // x <= -92.5
105 if (x_u >= 0xc2b9U) {
106 // exp(-inf) = +0
107 if (x_bits.is_inf())
108 return FPBits::zero().get_val();
109
110 fputil::set_errno_if_required(ERANGE);
111 fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT);
112
113#ifdef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
114 return FPBits::zero().get_val();
115#else
116 switch (fputil::quick_get_round()) {
117 case FE_UPWARD:
118 case FE_TONEAREST:
119 if (LIBC_UNLIKELY(x_u == 0xc2b9U))
120 return FPBits::min_subnormal().get_val();
121 return FPBits::zero().get_val();
122 default:
123 return FPBits::zero().get_val();
124 }
125#endif // LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
126 }
127
128 // exp(0) = 1
129 if (x_bits.is_zero())
130 return bfloat16(1.0f);
131
132 // 0 < |x| <= 2^(-3)
133 if (x_abs <= 0x3e00U) {
134 float xf = static_cast<float>(x);
135 // Degree-3 minimax polynomial generated by Sollya with the following
136 // commands:
137 // > display = hexadecimal;
138 // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-7, 2^-7]);
139 // > 1 + x * P;
140 // 0x1p0 + x * (0x1p0 + x * (0x1.00004p-1 + x * 0x1.555578p-3))
141 return fputil::cast<bfloat16>(x: fputil::polyeval(
142 x: xf, a0: 0x1p+0f, a: 0x1p+0f, a: 0x1.00004p-1f, a: 0x1.555578p-3f));
143 }
144 }
145
146#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
147 constexpr fputil::ExceptValues<bfloat16, 4> EXPBF16_EXCEPTS = {.values: {
148 // (input, RZ output, RU offset, RD offset, RN offset)
149 // x = 0x40DB (6.84375)
150 // MPFR: RU=0x446B, RD=0x446A, RZ=0x446A, RN=0x446B
151 {.input: 0x40DBU, .rnd_towardzero_result: 0x446AU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U},
152 // x = 0x419D, keep original
153 {.input: 0x419DU, .rnd_towardzero_result: 0x4D9FU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 0U},
154 // x = 0x41F9 (31.125)
155 // MPFR: RU=0x55F0, RD=0x55EF, RZ=0x55EF, RN=0x55F0
156 {.input: 0x41F9U, .rnd_towardzero_result: 0x55EFU, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U},
157 // x = 0xC19F (-19.875)
158 // MPFR: RU=0x3121, RD=0x3120, RZ=0x3120, RN=0x3121
159 {.input: 0xC19FU, .rnd_towardzero_result: 0x3120U, .rnd_upward_offset: 1U, .rnd_downward_offset: 0U, .rnd_tonearest_offset: 1U},
160 }};
161
162 if (auto r = EXPBF16_EXCEPTS.lookup(x_bits: x_u); LIBC_UNLIKELY(r.has_value()))
163 return r.value();
164#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
165
166 // For -93 < x < 89, we do the following range reduction:
167 // x = hi + mid + lo
168 // where,
169 // hi is an integer
170 // mid * 2^2 is an integer
171 // -2^3 <= lo <= 2^3
172 // also, hi + mid = round(4 * x) / x
173 // then,
174 // exp(x) = exp(hi + mid + lo)
175 // = exp(hi) * exp(mid) * exp(lo)
176 // we store 184/8 + 1 = 24 values for looking up exp(hi)
177 // from -96 to 88
178 // we store 8*4 = 32 values for looking up exp(mid) since
179 // mid will always have the bit pattern |bbb.bb| where
180 // b can be either 0 or 1
181
182 float xf = static_cast<float>(x);
183 float kf = fputil::nearest_integer(x: xf * 4.0f);
184 int x_hi_mid = static_cast<int>(kf);
185 int x_hi = x_hi_mid >> 5;
186 int x_mid = x_hi_mid & 0b11111;
187 // lo = x - (hi + mid) = round(x * 4) / (-4) + x
188 float lo = fputil::multiply_add(x: kf, y: -0.25f, z: xf);
189
190 float exp_hi = expbf16_internal::EXP_HI[x_hi + 12];
191 float exp_mid = expbf16_internal::EXP_MID[x_mid];
192
193 // Degree-3 minimax polynomial generated by Sollya with the following
194 // commands:
195 // > display = hexadecimal;
196 // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-7, 2^-7]);
197 // > 1 + x * P;
198 // 0x1p0 + x * (0x1p0 + x * (0x1.00004p-1 + x * 0x1.555578p-3))
199 float exp_lo =
200 fputil::polyeval(x: lo, a0: 0x1p+0f, a: 0x1p+0f, a: 0x1.00004p-1f, a: 0x1.555578p-3f);
201
202 return fputil::cast<bfloat16>(x: exp_hi * exp_mid * exp_lo);
203}
204
205} // namespace math
206} // namespace LIBC_NAMESPACE_DECL
207
208#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXPBF16_H
209