1//===-- Implementation for atanbf16(x) function ---------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception.
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATANBF16_H
10#define LLVM_LIBC_SRC___SUPPORT_MATH_ATANBF16_H
11
12#include "src/__support/FPUtil/FEnvImpl.h"
13#include "src/__support/FPUtil/FPBits.h"
14#include "src/__support/FPUtil/PolyEval.h"
15#include "src/__support/FPUtil/bfloat16.h"
16#include "src/__support/FPUtil/cast.h"
17#include "src/__support/FPUtil/multiply_add.h"
18#include "src/__support/FPUtil/sqrt.h"
19#include "src/__support/macros/optimization.h"
20
21namespace LIBC_NAMESPACE_DECL {
22
23namespace math {
24
25LIBC_INLINE bfloat16 atanbf16(bfloat16 x) {
26 // Generated by Sollya using the following command:
27 // > display = hexadecimal;
28 // > round(pi/2, SG, RN);
29 constexpr float PI_2 = 0x1.921fb6p0f;
30
31 using FPBits = fputil::FPBits<bfloat16>;
32 FPBits xbits(x);
33
34 uint16_t x_u = xbits.uintval();
35 uint16_t x_abs = x_u & 0x7fff;
36 bool x_sign = x_u >> 15;
37 float sign = (x_sign ? -1.0f : 1.0f);
38
39 // Taylor series -> [x - x^3/3 + x^5/5 - x^7/7 ...]
40 // x * [1 - x^2/3 + x^4/5 - x^6/7...] -> x * P(x)
41 // atan(x) = x * poly(x^2)
42 // atan(x)/x = poly(x^2)
43
44 // Degree 14 polynomial of atan(x) generated using Sollya with command :
45 // > display = hexadecimal ;
46 // > P = fpminimax(atan(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], [|1, SG,
47 // SG..SG|], [0, 1]);
48 //
49 // relative error for the polynomial given by:
50 // > dirtyinfnorm(atan(x)/x - P(x), [0, 1]);
51 // gives error ~ 0x1.ee44001p-24
52 // worst case error for it being ~ 0x1.dcf750p-23
53 // satisfying -> error < worst_case
54 auto atan_eval = [](float x0) {
55 return fputil::polyeval(x: x0, a0: -0x1.5552c4p-2f, a: 0x1.990f2p-3f, a: -0x1.1f7dccp-3f,
56 a: 0x1.97e49p-4f, a: -0x1.ebff34p-5f, a: 0x1.938e46p-6f,
57 a: -0x1.3a28bcp-8f);
58 };
59
60 float xf = x;
61 float x_sq = xf * xf;
62
63 // |x| <= 1
64 if (x_abs <= 0x3f80) {
65 // atanbf16(+/-0) = +/-0
66 if (LIBC_UNLIKELY(x_abs == 0))
67 return x;
68
69 // For smaller x
70 if (LIBC_UNLIKELY(x_abs <= 0x3db8))
71 return fputil::cast<bfloat16>(x: fputil::multiply_add(x: xf, y: -0x1p-9f, z: xf));
72
73 float result = atan_eval(x_sq);
74 return fputil::cast<bfloat16>(x: fputil::multiply_add(x: xf * x_sq, y: result, z: xf));
75 }
76
77 // |x| is +/-inf or NaN
78 if (LIBC_UNLIKELY(x_abs >= 0x7F80)) {
79 // NaN
80 if (xbits.is_nan()) {
81 if (xbits.is_signaling_nan()) {
82 fputil::raise_except_if_required(FE_INVALID);
83 return FPBits::quiet_nan().get_val();
84 }
85 return x;
86 }
87 // atanbf16(+/-inf) = +/-pi/2
88 return fputil::cast<bfloat16>(x: sign * PI_2);
89 }
90
91 // If |x| > 1:
92 // atan(x) = sign(x) * (pi/2 - atan(1/|x|))
93 // Since 1/|x| < 1, we can use the same polynomial.
94 float x_inv_sq = 1.0f / x_sq;
95 float x_inv = fputil::sqrt<float>(x: x_inv_sq);
96
97 float result = atan_eval(x_inv_sq);
98 float atan_inv = fputil::multiply_add(x: x_inv * x_inv_sq, y: result, z: x_inv);
99 return fputil::cast<bfloat16>(x: sign * (PI_2 - atan_inv));
100}
101
102} // namespace math
103} // namespace LIBC_NAMESPACE_DECL
104
105#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATANBF16_H
106