1//===-- Implementation header for asinpi ------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASINPI_H
10#define LLVM_LIBC_SRC___SUPPORT_MATH_ASINPI_H
11
12#include "asin_utils.h"
13#include "src/__support/FPUtil/FEnvImpl.h"
14#include "src/__support/FPUtil/FPBits.h"
15#include "src/__support/FPUtil/double_double.h"
16#include "src/__support/FPUtil/dyadic_float.h"
17#include "src/__support/FPUtil/multiply_add.h"
18#include "src/__support/FPUtil/sqrt.h"
19#include "src/__support/macros/config.h"
20#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
21#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
22#include "src/__support/math/asin_utils.h"
23
24namespace LIBC_NAMESPACE_DECL {
25
26namespace math {
27
28LIBC_INLINE double asinpi(double x) {
29 using namespace asin_internal;
30 using FPBits = fputil::FPBits<double>;
31
32 FPBits xbits(x);
33 int x_exp = xbits.get_biased_exponent();
34
35 // |x| < 0.5.
36 if (x_exp < FPBits::EXP_BIAS - 1) {
37 // |x| < 2^-26.
38 if (LIBC_UNLIKELY(x_exp < FPBits::EXP_BIAS - 26)) {
39 // asinpi(+-0) = +-0.
40 if (LIBC_UNLIKELY(xbits.abs().uintval() == 0))
41 return x;
42 // When |x| < 2^-26, asinpi(x) ~ x/pi.
43 // The relative error of x/pi is:
44 // |asinpi(x) - x/pi| / |asinpi(x)| < x^2/6 < 2^-54.
45#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
46 return x * ASINPI_COEFFS[0];
47#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
48 }
49
50#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
51 return x * asinpi_eval(x * x);
52#else
53 using DFloat128 = fputil::DyadicFloat<128>;
54 using DoubleDouble = fputil::DoubleDouble;
55
56 // For |x| < 2^-511, x^2 would underflow to subnormal, raising a
57 // spurious underflow exception. Since asinpi(x) = x/pi with correction
58 // x^2/(6*pi) < 2^-1024 relative (negligible), compute x/pi directly
59 // in DFloat128.
60 if (LIBC_UNLIKELY(x_exp < 512)) {
61 DFloat128 x_f128(x);
62 DFloat128 r = fputil::quick_mul(a: x_f128, b: ONE_OVER_PI_F128);
63 double result = static_cast<double>(r);
64
65 // IEEE 754 "after rounding" tininess: the 53-bit unlimited-exponent
66 // result is strictly between +-2^-1022. DyadicFloat's conversion
67 // checks the *IEEE subnormal* result (52-bit at the boundary), not
68 // the 53-bit unlimited-exponent result, so we detect it here.
69 int exp_hi = r.exponent + 127 + FPBits::EXP_BIAS;
70 if (LIBC_UNLIKELY(exp_hi <= 0) && !r.mantissa.is_zero()) {
71 bool raise_underflow = true;
72 // When exp_hi == 0, a carry in 53-bit rounding can push the
73 // result to exactly 2^-1022 (not tiny). Check for this.
74 if (exp_hi == 0) {
75 constexpr unsigned SHIFT_53 = 128 - FPBits::SIG_LEN - 1;
76 using MantT = typename DFloat128::MantissaType;
77 MantT m53 = r.mantissa >> SHIFT_53;
78 constexpr MantT ALL_ONES_53 = (MantT(1) << (FPBits::SIG_LEN + 1)) - 1;
79 if (m53 == ALL_ONES_53) {
80 // All 53 bits set. carry happens if rounding rounds away
81 // from zero at this precision.
82 bool round_bit =
83 static_cast<bool>((r.mantissa >> (SHIFT_53 - 1)) & 1);
84 MantT sticky_mask = (MantT(1) << (SHIFT_53 - 1)) - 1;
85 bool sticky = (r.mantissa & sticky_mask) != 0;
86 bool lsb = static_cast<bool>(m53 & 1);
87#ifdef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
88 // Carry if round_bit && (lsb || sticky) (round half to even).
89 raise_underflow = !(round_bit && (lsb || sticky));
90#else
91 switch (fputil::quick_get_round()) {
92 case FE_TONEAREST:
93 // Carry if round_bit && (lsb || sticky) (round half to even).
94 raise_underflow = !(round_bit && (lsb || sticky));
95 break;
96 case FE_UPWARD:
97 raise_underflow = xbits.is_neg() || !(round_bit || sticky);
98 break;
99 case FE_DOWNWARD:
100 raise_underflow = !xbits.is_neg() || !(round_bit || sticky);
101 break;
102 case FE_TOWARDZERO:
103 default:
104 raise_underflow = true; // truncation never carries
105 break;
106 }
107#endif // LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
108 }
109 }
110 if (raise_underflow)
111 fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT);
112 }
113 return result;
114 }
115
116 unsigned idx = 0;
117 DoubleDouble x_sq = fputil::exact_mult(a: x, b: x);
118 double err = xbits.abs().get_val() * 0x1.0p-51;
119 // Polynomial approximation:
120 // p ~ asin(x)/(pi*x)
121
122 DoubleDouble p = asinpi_eval(u: x_sq, idx, err);
123 // asinpi(x) ~ x * p
124 DoubleDouble r0 = fputil::exact_mult(a: x, b: p.hi);
125 double r_lo = fputil::multiply_add(x, y: p.lo, z: r0.lo);
126
127 // Ziv's accuracy test.
128 double r_upper = r0.hi + (r_lo + err);
129 double r_lower = r0.hi + (r_lo - err);
130
131 if (LIBC_LIKELY(r_upper == r_lower))
132 return r_upper;
133
134 // Ziv's accuracy test failed, perform 128-bit calculation.
135
136 // Recalculate mod 1/64.
137 idx = static_cast<unsigned>(fputil::nearest_integer(x: x_sq.hi * 0x1.0p6));
138
139 DFloat128 x_f128(x);
140
141#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
142 DFloat128 u_hi(
143 fputil::multiply_add(static_cast<double>(idx), -0x1.0p-6, x_sq.hi));
144 DFloat128 u = fputil::quick_add(u_hi, DFloat128(x_sq.lo));
145#else
146 DFloat128 x_sq_f128 = fputil::quick_mul(a: x_f128, b: x_f128);
147 DFloat128 u = fputil::quick_add(
148 a: x_sq_f128, b: DFloat128(static_cast<double>(idx) * (-0x1.0p-6)));
149#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
150
151 DFloat128 p_f128 = asinpi_eval(u, idx);
152 DFloat128 r = fputil::quick_mul(a: x_f128, b: p_f128);
153
154 return static_cast<double>(r);
155#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
156 }
157 // |x| >= 0.5
158
159 double x_abs = xbits.abs().get_val();
160
161 // Maintaining the sign:
162 constexpr double SIGN[2] = {1.0, -1.0};
163 double x_sign = SIGN[xbits.is_neg()];
164
165 // |x| >= 1
166 if (LIBC_UNLIKELY(x_exp >= FPBits::EXP_BIAS)) {
167 // x = +-1, asinpi(x) = +- 0.5
168 if (x_abs == 1.0) {
169 return x_sign * 0.5;
170 }
171 // |x| > 1, return NaN.
172 if (xbits.is_quiet_nan())
173 return x;
174
175 // Set domain error for non-NaN input.
176 if (!xbits.is_nan())
177 fputil::set_errno_if_required(EDOM);
178
179 fputil::raise_except_if_required(FE_INVALID);
180 return FPBits::quiet_nan().get_val();
181 }
182
183 // When |x| >= 0.5, we perform range reduction as follow:
184 //
185 // Assume further that 0.5 <= x < 1, and let:
186 // y = asin(x)
187 // Using the identity:
188 // asin(x) = pi/2 - 2 * asin( sqrt( (1 - x)/2 ) )
189 // We get:
190 // asinpi(x) = asin(x)/pi = 0.5 - 2 * asin(sqrt(u)) / pi
191 // = 0.5 - 2 * sqrt(u) * [asin(sqrt(u)) / (pi * sqrt(u))]
192 // = 0.5 - 2 * sqrt(u) * asinpi_eval(u)
193 // where u = (1 - |x|) / 2.
194
195 // u = (1 - |x|)/2
196 double u = fputil::multiply_add(x: x_abs, y: -0.5, z: 0.5);
197 // v_hi ~ sqrt(u).
198 double v_hi = fputil::sqrt<double>(x: u);
199
200#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
201 double p = asinpi_eval(u);
202 double r = x_sign * fputil::multiply_add(-2.0 * v_hi, p, 0.5);
203 return r;
204#else
205 using DFloat128 = fputil::DyadicFloat<128>;
206 using DoubleDouble = fputil::DoubleDouble;
207
208#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
209 double h = fputil::multiply_add(v_hi, -v_hi, u);
210#else
211 DoubleDouble v_hi_sq = fputil::exact_mult(a: v_hi, b: v_hi);
212 double h = (u - v_hi_sq.hi) - v_hi_sq.lo;
213#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
214
215 // Scale v_lo and v_hi by 2 from the formula:
216 // vh = v_hi * 2
217 // vl = 2*v_lo = h / v_hi.
218 double vh = v_hi * 2.0;
219 double vl = h / v_hi;
220
221 // Polynomial approximation:
222 // p ~ asin(sqrt(u))/(pi*sqrt(u))
223 unsigned idx = 0;
224 double err = vh * 0x1.0p-51;
225
226 DoubleDouble p = asinpi_eval(u: DoubleDouble{.lo: 0.0, .hi: u}, idx, err);
227
228 // Perform computations in double-double arithmetic:
229 // asinpi(x) = 0.5 - (vh + vl) * p
230 DoubleDouble r0 = fputil::quick_mult(a: DoubleDouble{.lo: vl, .hi: vh}, b: p);
231 DoubleDouble r = fputil::exact_add(a: 0.5, b: -r0.hi);
232
233 double r_lo = -r0.lo + r.lo;
234
235 // Ziv's accuracy test.
236
237#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
238 double r_upper = fputil::multiply_add(
239 r.hi, x_sign, fputil::multiply_add(r_lo, x_sign, err));
240 double r_lower = fputil::multiply_add(
241 r.hi, x_sign, fputil::multiply_add(r_lo, x_sign, -err));
242#else
243 r_lo *= x_sign;
244 r.hi *= x_sign;
245 double r_upper = r.hi + (r_lo + err);
246 double r_lower = r.hi + (r_lo - err);
247#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
248
249 if (LIBC_LIKELY(r_upper == r_lower))
250 return r_upper;
251
252 // Ziv's accuracy test failed, we redo the computations in DFloat128.
253 // Recalculate mod 1/64.
254 idx = static_cast<unsigned>(fputil::nearest_integer(x: u * 0x1.0p6));
255
256 // After the first step of Newton-Raphson approximating v = sqrt(u):
257 // sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
258 // v_lo = h / (2 * v_hi)
259 // Add second-order correction:
260 // v_ll = -v_lo * (h / (4u))
261
262 // Get the rounding error of vl = 2 * v_lo ~ h / vh
263#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
264 double vl_lo = fputil::multiply_add(-v_hi, vl, h) / v_hi;
265#else
266 DoubleDouble vh_vl = fputil::exact_mult(a: v_hi, b: vl);
267 double vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi;
268#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
269 // vll = 2*v_ll = -vl * (h / (4u)).
270 double t = h * (-0.25) / u;
271 double vll = fputil::multiply_add(x: vl, y: t, z: vl_lo);
272 // m_v = -(v_hi + v_lo + v_ll).
273 DFloat128 m_v = fputil::quick_add(
274 a: DFloat128(vh), b: fputil::quick_add(a: DFloat128(vl), b: DFloat128(vll)));
275 m_v.sign = Sign::NEG;
276
277 // Perform computations in DFloat128:
278 // asinpi(x) = 0.5 - (v_hi + v_lo + vll) * P_pi(u).
279 DFloat128 y_f128(
280 fputil::multiply_add(x: static_cast<double>(idx), y: -0x1.0p-6, z: u));
281
282 DFloat128 p_f128 = asinpi_eval(u: y_f128, idx);
283 DFloat128 r0_f128 = fputil::quick_mul(a: m_v, b: p_f128);
284 DFloat128 r_f128 = fputil::quick_add(a: HALF_F128, b: r0_f128);
285
286 if (xbits.is_neg())
287 r_f128.sign = Sign::NEG;
288
289 return static_cast<double>(r_f128);
290#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
291}
292
293} // namespace math
294
295} // namespace LIBC_NAMESPACE_DECL
296
297#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASINPI_H
298