1//===-- Implementation header for atan --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H
10#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H
11
12#include "atan_utils.h"
13#include "src/__support/FPUtil/FEnvImpl.h"
14#include "src/__support/FPUtil/FPBits.h"
15#include "src/__support/FPUtil/double_double.h"
16#include "src/__support/FPUtil/multiply_add.h"
17#include "src/__support/FPUtil/nearest_integer.h"
18#include "src/__support/macros/config.h"
19#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
20
21namespace LIBC_NAMESPACE_DECL {
22
23namespace math {
24
25// To compute atan(x), we divided it into the following cases:
26// * |x| < 2^-26:
27// Since |x| > atan(|x|) > |x| - |x|^3/3, and |x|^3/3 < ulp(x)/2, we simply
28// return atan(x) = x - sign(x) * epsilon.
29// * 2^-26 <= |x| < 1:
30// We perform range reduction mod 2^-6 = 1/64 as follow:
31// Let k = 2^(-6) * round(|x| * 2^6), then
32// atan(x) = sign(x) * atan(|x|)
33// = sign(x) * (atan(k) + atan((|x| - k) / (1 + |x|*k)).
34// We store atan(k) in a look up table, and perform intermediate steps in
35// double-double.
36// * 1 < |x| < 2^53:
37// First we perform the transformation y = 1/|x|:
38// atan(x) = sign(x) * (pi/2 - atan(1/|x|))
39// = sign(x) * (pi/2 - atan(y)).
40// Then we compute atan(y) using range reduction mod 2^-6 = 1/64 as the
41// previous case:
42// Let k = 2^(-6) * round(y * 2^6), then
43// atan(y) = atan(k) + atan((y - k) / (1 + y*k))
44// = atan(k) + atan((1/|x| - k) / (1 + k/|x|)
45// = atan(k) + atan((1 - k*|x|) / (|x| + k)).
46// * |x| >= 2^53:
47// Using the reciprocal transformation:
48// atan(x) = sign(x) * (pi/2 - atan(1/|x|)).
49// We have that:
50// atan(1/|x|) <= 1/|x| <= 2^-53,
51// which is smaller than ulp(pi/2) / 2.
52// So we can return:
53// atan(x) = sign(x) * (pi/2 - epsilon)
54
55LIBC_INLINE double atan(double x) {
56
57 using namespace atan_internal;
58 using FPBits = fputil::FPBits<double>;
59
60 constexpr double IS_NEG[2] = {1.0, -1.0};
61 constexpr DoubleDouble PI_OVER_2 = {.lo: 0x1.1a62633145c07p-54,
62 .hi: 0x1.921fb54442d18p0};
63 constexpr DoubleDouble MPI_OVER_2 = {.lo: -0x1.1a62633145c07p-54,
64 .hi: -0x1.921fb54442d18p0};
65
66 FPBits xbits(x);
67 bool x_sign = xbits.is_neg();
68 xbits = xbits.abs();
69 uint64_t x_abs = xbits.uintval();
70 int x_exp =
71 static_cast<int>(x_abs >> FPBits::FRACTION_LEN) - FPBits::EXP_BIAS;
72
73 // |x| < 1.
74 if (x_exp < 0) {
75 if (LIBC_UNLIKELY(x_exp < -26)) {
76#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
77 return x;
78#else
79 if (x == 0.0)
80 return x;
81 // |x| < 2^-26
82 return fputil::multiply_add(x: -0x1.0p-54, y: x, z: x);
83#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
84 }
85
86 double x_d = xbits.get_val();
87 // k = 2^-6 * round(2^6 * |x|)
88 double k = fputil::nearest_integer(x: 0x1.0p6 * x_d);
89 unsigned idx = static_cast<unsigned>(k);
90 k *= 0x1.0p-6;
91
92 // numerator = |x| - k
93 DoubleDouble num, den;
94 num.lo = 0.0;
95 num.hi = x_d - k;
96
97 // denominator = 1 - k * |x|
98 den.hi = fputil::multiply_add(x: x_d, y: k, z: 1.0);
99 DoubleDouble prod = fputil::exact_mult(a: x_d, b: k);
100 // Using Dekker's 2SUM algorithm to compute the lower part.
101 den.lo = ((1.0 - den.hi) + prod.hi) + prod.lo;
102
103 // x_r = (|x| - k) / (1 + k * |x|)
104 DoubleDouble x_r = fputil::div(a: num, b: den);
105
106 // Approximating atan(x_r) using Taylor polynomial.
107 DoubleDouble p = atan_eval(x: x_r);
108
109 // atan(x) = sign(x) * (atan(k) + atan(x_r))
110 // = sign(x) * (atan(k) + atan( (|x| - k) / (1 + k * |x|) ))
111#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
112 return IS_NEG[x_sign] * (ATAN_I[idx].hi + (p.hi + (p.lo + ATAN_I[idx].lo)));
113#else
114
115 DoubleDouble c0 = fputil::exact_add(a: ATAN_I[idx].hi, b: p.hi);
116 double c1 = c0.lo + (ATAN_I[idx].lo + p.lo);
117 double r = IS_NEG[x_sign] * (c0.hi + c1);
118
119 return r;
120#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
121 }
122
123 // |x| >= 2^53 or x is NaN.
124 if (LIBC_UNLIKELY(x_exp >= 53)) {
125 // x is nan
126 if (xbits.is_nan()) {
127 if (xbits.is_signaling_nan()) {
128 fputil::raise_except_if_required(FE_INVALID);
129 return FPBits::quiet_nan().get_val();
130 }
131 return x;
132 }
133 // |x| >= 2^53
134 // atan(x) ~ sign(x) * pi/2.
135 if (x_exp >= 53)
136#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
137 return IS_NEG[x_sign] * PI_OVER_2.hi;
138#else
139 return fputil::multiply_add(x: IS_NEG[x_sign], y: PI_OVER_2.hi,
140 z: IS_NEG[x_sign] * PI_OVER_2.lo);
141#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
142 }
143
144 double x_d = xbits.get_val();
145 double y = 1.0 / x_d;
146
147 // k = 2^-6 * round(2^6 / |x|)
148 double k = fputil::nearest_integer(x: 0x1.0p6 * y);
149 unsigned idx = static_cast<unsigned>(k);
150 k *= 0x1.0p-6;
151
152 // denominator = |x| + k
153 DoubleDouble den = fputil::exact_add(a: x_d, b: k);
154 // numerator = 1 - k * |x|
155 DoubleDouble num;
156 num.hi = fputil::multiply_add(x: -x_d, y: k, z: 1.0);
157 DoubleDouble prod = fputil::exact_mult(a: x_d, b: k);
158 // Using Dekker's 2SUM algorithm to compute the lower part.
159 num.lo = ((1.0 - num.hi) - prod.hi) - prod.lo;
160
161 // x_r = (1/|x| - k) / (1 - k/|x|)
162 // = (1 - k * |x|) / (|x| - k)
163 DoubleDouble x_r = fputil::div(a: num, b: den);
164
165 // Approximating atan(x_r) using Taylor polynomial.
166 DoubleDouble p = atan_eval(x: x_r);
167
168 // atan(x) = sign(x) * (pi/2 - atan(1/|x|))
169 // = sign(x) * (pi/2 - atan(k) - atan(x_r))
170 // = (-sign(x)) * (-pi/2 + atan(k) + atan((1 - k*|x|)/(|x| - k)))
171#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
172 double lo_part = p.lo + ATAN_I[idx].lo + MPI_OVER_2.lo;
173 return IS_NEG[!x_sign] * (MPI_OVER_2.hi + ATAN_I[idx].hi + (p.hi + lo_part));
174#else
175 DoubleDouble c0 = fputil::exact_add(a: MPI_OVER_2.hi, b: ATAN_I[idx].hi);
176 DoubleDouble c1 = fputil::exact_add(a: c0.hi, b: p.hi);
177 double c2 = c1.lo + (c0.lo + p.lo) + (ATAN_I[idx].lo + MPI_OVER_2.lo);
178
179 double r = IS_NEG[!x_sign] * (c1.hi + c2);
180
181 return r;
182#endif
183}
184
185} // namespace math
186
187} // namespace LIBC_NAMESPACE_DECL
188
189#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H
190